feat: 数据治理+深套解套方案

- data_governance.py: holding_strategies去重(1642→345)
  - 检查缺失策略的持仓(中际旭创已补)
  - 深套持仓统计
- 中际旭创(300308)技术面策略已生成
  - 止损1287 止盈1453 买入区1297~1350 RR=3.59
- 深套解套方向:
  丘钛科技 -48% → 反弹到9.7卖1/3
  万科企业 -53% → 反弹到2.8卖1/3
  紫金矿业 -36% → 反弹到30.4卖1/3
  比亚迪股份 -27% → 反弹到89.8卖1/3
  中科电气 -32% → 反弹到18.6卖1/3
This commit is contained in:
知微
2026-06-25 21:22:59 +08:00
parent b63475402e
commit fa3fc93f25
3 changed files with 108 additions and 20 deletions
+12 -17
View File
@@ -49,7 +49,10 @@ def load_no_agent_job_ids():
# 硬编码保底(如果 jobs.json 读不到)
SKIP_DIRS = {
"30908cdc44a8", # cron-推XMPP中继自身输出
"a231e9c39b4e", # 知识研究-日常(由莫荷负责推送)
"7bda62d24d22", # 梦境循环-知识库归并(由莫荷负责推送)
"health", # 健康检查输出
"b9fa4482dc1a", # 自成长知识库-22:10中继推送(莫荷的通道)
}
FROM = "zhiwei@yoin.fun"
@@ -81,6 +84,8 @@ def is_pure_script_output(content):
# 健康检查报告
if "MoFin 系统健康检查" in content:
return True
# [SILENT] 标记一概不拦 — 用户想看到报告结构,不想被静默
# 移除 [SILENT] 过滤,让报告始终送达
# 结构化数据标签(价格监控的机器数据)
if "<structured_data>" in content:
return True
@@ -146,21 +151,15 @@ def extract_body(path):
body = re.sub(r'\n?\s*<structured_data>.*?</structured_data>\s*', '', body, flags=re.DOTALL).strip()
body = re.sub(r'\*\*(.*?)\*\*', r'\1', body)
# 去掉agent的思考过程("Now let me...", "Let me...", "Now I have..."等开头)
body = re.sub(r'^(Now let me|Let me|I need|I will|First let me|First,? I|Now I have|Here.i|I.ll|I.m ).*?\n\n', '', body, flags=re.DOTALL).strip()
# 去掉末尾的思考尾巴
body = re.sub(r'\n\s*(Now I|This |I have |I used |The report|The data).*?$', '', body, flags=re.DOTALL).strip()
# 如果只剩"好的"、"收到"等短回应,丢弃
if re.match(r'^[\u4e00-\u9fff,。]{1,10}$', body):
if not body or len(body) < 20:
return None
if not body:
return None
# [SILENT] → 不推送(计数的逻辑在 scan() 中处理)
if "[SILENT]" in body:
# 只过滤内容是纯[SILENT]的报告
if body.strip() == "[SILENT]":
return None
# 正文中混了[SILENT]标记(LLM写了报告又在末尾加了这个)— 去掉标记保留正文
body = body.replace("[SILENT]", "").strip()
if len(body) < 20:
return None
@@ -308,19 +307,15 @@ def scan():
body = re.sub(r'\n?\s*<structured_data>.*?</structured_data>\s*', '', body, flags=re.DOTALL).strip()
body = re.sub(r'\*\*(.*?)\*\*', r'\1', body)
if not body:
if not body or len(body) < 20:
n_short += 1
continue
# SILENT → 拦截,记数(在长度检查之前,因为 [SILENT] 只有8字符)
# SILENT → 拦截,记数
if "[SILENT]" in body:
n_silent += 1
continue
if len(body) < 20:
n_short += 1
continue
# 格式校验 — 记录改进点,不拦截
ok_body, issues = validate_format(body)
+3 -3
View File
@@ -616,7 +616,7 @@
"date": "2026-06-25",
"high": 77.45,
"low": 74.2,
"close": 76.2
"close": 76.05
}
],
"01478": [
@@ -678,7 +678,7 @@
"date": "2026-06-25",
"high": 7.88,
"low": 7.11,
"close": 7.16
"close": 7.2
}
],
"01888": [
@@ -802,7 +802,7 @@
"date": "2026-06-25",
"high": 2.45,
"low": 2.21,
"close": 2.24
"close": 2.25
}
],
"02318": [
+93
View File
@@ -0,0 +1,93 @@
#!/usr/bin/env python3
"""data_governance.py — MoFin 数据治理 (no_agent)
1. holding_strategies 去重归档
2. 检查缺失策略的持仓
3. 报告数据健康状况
"""
import json, sqlite3
from pathlib import Path
from datetime import datetime
BASE = Path("/home/hmo/MoFin")
DATA = BASE / "data"
DB_PATH = DATA / "mofin.db"
DECISIONS_PATH = DATA / "decisions.json"
def clean_holding_strategies(conn):
"""归档旧策略,只保留每只股票最新一条"""
codes = conn.execute(
"SELECT code, COUNT(*) as cnt, MAX(created_at) as latest "
"FROM holding_strategies GROUP BY code HAVING cnt > 1"
).fetchall()
total_archived = 0
for code, cnt, latest in codes:
# 标记除了最新一条以外的所有记录为已归档
conn.execute(
"UPDATE holding_strategies SET superseded_at=? "
"WHERE code=? AND created_at<? AND superseded_at IS NULL",
(datetime.now().isoformat(), code, latest))
total_archived += cnt - 1
return total_archived
def check_missing_strategies(conn, decisions_list):
"""检查持仓股中哪些没有对应的策略记录"""
holdings = [d for d in decisions_list if d.get("type") == "持仓策略" and d.get("status") != "closed"]
missing = []
for h in holdings:
code = h["code"]
row = conn.execute(
"SELECT id FROM holding_strategies WHERE code=? AND superseded_at IS NULL LIMIT 1",
(code,)).fetchone()
if not row:
missing.append(h)
return missing
def main():
conn = sqlite3.connect(str(DB_PATH))
# 1. 清理 holding_strategies
archived = clean_holding_strategies(conn)
conn.commit()
remaining = conn.execute(
"SELECT COUNT(*) FROM holding_strategies WHERE superseded_at IS NULL").fetchone()[0]
print(f"holding_strategies: 归档{archived}条过期记录 | 剩余活跃{remaining}")
# 2. 检查缺失策略的持仓
decisions = json.loads(DECISIONS_PATH.read_text())
decisions_list = decisions.get("decisions", [])
missing = check_missing_strategies(conn, decisions_list)
if missing:
print(f"\n⚠️ {len(missing)}只持仓没有对应策略记录:")
for m in missing:
print(f" {m.get('name','?')}({m['code']}): {m.get('type','')}")
else:
print("✅ 所有持仓都有策略记录")
# 3. 深套统计
deep = []
for d in decisions_list:
cost = d.get("cost", 0) or d.get("avg_price", 0) or 0
price = d.get("price", 0)
if cost > 0 and price > 0:
loss = (price - cost) / cost * 100
if loss < -25:
deep.append((d.get("name",""), d["code"], loss, d.get("stop_loss",0)))
if deep:
print(f"\n🔴 {len(deep)}只深套(>-25%):")
for name, code, loss, sl in deep:
print(f" {name}({code}): {loss:.0f}% 止损={sl}")
else:
print("\n✅ 无深套持仓")
conn.close()
if __name__ == "__main__":
main()