feat: 数据治理+深套解套方案

- data_governance.py: holding_strategies去重(1642→345) - 检查缺失策略的持仓(中际旭创已补) - 深套持仓统计 - 中际旭创(300308)技术面策略已生成 - 止损1287 止盈1453 买入区1297~1350 RR=3.59 - 深套解套方向: 丘钛科技 -48% → 反弹到9.7卖1/3 万科企业 -53% → 反弹到2.8卖1/3 紫金矿业 -36% → 反弹到30.4卖1/3 比亚迪股份 -27% → 反弹到89.8卖1/3 中科电气 -32% → 反弹到18.6卖1/3
2026-06-25 21:22:59 +08:00
parent b63475402e
commit fa3fc93f25
3 changed files with 108 additions and 20 deletions
@@ -49,7 +49,10 @@ def load_no_agent_job_ids():
 # 硬编码保底（如果 jobs.json 读不到）
 SKIP_DIRS = {
    "30908cdc44a8",  # cron-推XMPP中继自身输出
    "a231e9c39b4e",  # 知识研究-日常（由莫荷负责推送）
    "7bda62d24d22",  # 梦境循环-知识库归并（由莫荷负责推送）
    "health",         # 健康检查输出
    "b9fa4482dc1a",  # 自成长知识库-22:10中继推送（莫荷的通道）
 }
 FROM = "zhiwei@yoin.fun"
@@ -81,6 +84,8 @@ def is_pure_script_output(content):
    # 健康检查报告
    if "MoFin 系统健康检查" in content:
        return True
    # [SILENT] 标记一概不拦 — 用户想看到报告结构，不想被静默
    # 移除 [SILENT] 过滤，让报告始终送达
    # 结构化数据标签（价格监控的机器数据）
    if "<structured_data>" in content:
        return True
@@ -146,21 +151,15 @@ def extract_body(path):
    body = re.sub(r'\n?\s*<structured_data>.*?</structured_data>\s*', '', body, flags=re.DOTALL).strip()
    body = re.sub(r'\*\*(.*?)\*\*', r'\1', body)
-    # 去掉agent的思考过程（"Now let me...", "Let me...", "Now I have..."等开头）
+    if not body or len(body) < 20:
    body = re.sub(r'^(Now let me|Let me|I need|I will|First let me|First,? I|Now I have|Here.i|I.ll|I.m ).*?\n\n', '', body, flags=re.DOTALL).strip()
    # 去掉末尾的思考尾巴
    body = re.sub(r'\n\s*(Now I|This |I have |I used |The report|The data).*?$', '', body, flags=re.DOTALL).strip()
    # 如果只剩"好的"、"收到"等短回应，丢弃
    if re.match(r'^[\u4e00-\u9fff，。]{1,10}$', body):
        return None
-    if not body:
+    # 只过滤内容是纯[SILENT]的报告
-        return None
+    if body.strip() == "[SILENT]":
    # [SILENT] → 不推送（计数的逻辑在 scan() 中处理）
    if "[SILENT]" in body:
        return None
    # 正文中混了[SILENT]标记（LLM写了报告又在末尾加了这个）— 去掉标记保留正文
    body = body.replace("[SILENT]", "").strip()
    if len(body) < 20:
        return None
@@ -308,19 +307,15 @@ def scan():
                body = re.sub(r'\n?\s*<structured_data>.*?</structured_data>\s*', '', body, flags=re.DOTALL).strip()
                body = re.sub(r'\*\*(.*?)\*\*', r'\1', body)
-                if not body:
+                if not body or len(body) < 20:
                    n_short += 1
                    continue
-                # SILENT → 拦截，记数（在长度检查之前，因为 [SILENT] 只有8字符）
+                # SILENT → 拦截，记数
                if "[SILENT]" in body:
                    n_silent += 1
                    continue
                if len(body) < 20:
                    n_short += 1
                    continue
                # 格式校验 — 记录改进点，不拦截
                ok_body, issues = validate_format(body)
@@ -616,7 +616,7 @@
      "date": "2026-06-25",
      "high": 77.45,
      "low": 74.2,
-      "close": 76.2
+      "close": 76.05
    }
  ],
  "01478": [
@@ -678,7 +678,7 @@
      "date": "2026-06-25",
      "high": 7.88,
      "low": 7.11,
-      "close": 7.16
+      "close": 7.2
    }
  ],
  "01888": [
@@ -802,7 +802,7 @@
      "date": "2026-06-25",
      "high": 2.45,
      "low": 2.21,
-      "close": 2.24
+      "close": 2.25
    }
  ],
  "02318": [
@@ -0,0 +1,93 @@
 #!/usr/bin/env python3
 """data_governance.py — MoFin 数据治理 (no_agent)
 1. holding_strategies 去重归档
 2. 检查缺失策略的持仓
 3. 报告数据健康状况
 """
 import json, sqlite3
 from pathlib import Path
 from datetime import datetime
 BASE = Path("/home/hmo/MoFin")
 DATA = BASE / "data"
 DB_PATH = DATA / "mofin.db"
 DECISIONS_PATH = DATA / "decisions.json"
 def clean_holding_strategies(conn):
    """归档旧策略，只保留每只股票最新一条"""
    codes = conn.execute(
        "SELECT code, COUNT(*) as cnt, MAX(created_at) as latest "
        "FROM holding_strategies GROUP BY code HAVING cnt > 1"
    ).fetchall()
    total_archived = 0
    for code, cnt, latest in codes:
        # 标记除了最新一条以外的所有记录为已归档
        conn.execute(
            "UPDATE holding_strategies SET superseded_at=? "
            "WHERE code=? AND created_at<? AND superseded_at IS NULL",
            (datetime.now().isoformat(), code, latest))
        total_archived += cnt - 1
    return total_archived
 def check_missing_strategies(conn, decisions_list):
    """检查持仓股中哪些没有对应的策略记录"""
    holdings = [d for d in decisions_list if d.get("type") == "持仓策略" and d.get("status") != "closed"]
    missing = []
    for h in holdings:
        code = h["code"]
        row = conn.execute(
            "SELECT id FROM holding_strategies WHERE code=? AND superseded_at IS NULL LIMIT 1",
            (code,)).fetchone()
        if not row:
            missing.append(h)
    return missing
 def main():
    conn = sqlite3.connect(str(DB_PATH))
    # 1. 清理 holding_strategies
    archived = clean_holding_strategies(conn)
    conn.commit()
    remaining = conn.execute(
        "SELECT COUNT(*) FROM holding_strategies WHERE superseded_at IS NULL").fetchone()[0]
    print(f"holding_strategies: 归档{archived}条过期记录 | 剩余活跃{remaining}条")
    # 2. 检查缺失策略的持仓
    decisions = json.loads(DECISIONS_PATH.read_text())
    decisions_list = decisions.get("decisions", [])
    missing = check_missing_strategies(conn, decisions_list)
    if missing:
        print(f"\n⚠️ {len(missing)}只持仓没有对应策略记录:")
        for m in missing:
            print(f"  {m.get('name','?')}({m['code']}): {m.get('type','')}")
    else:
        print("✅ 所有持仓都有策略记录")
    # 3. 深套统计
    deep = []
    for d in decisions_list:
        cost = d.get("cost", 0) or d.get("avg_price", 0) or 0
        price = d.get("price", 0)
        if cost > 0 and price > 0:
            loss = (price - cost) / cost * 100
            if loss < -25:
                deep.append((d.get("name",""), d["code"], loss, d.get("stop_loss",0)))
    if deep:
        print(f"\n🔴 {len(deep)}只深套(>-25%):")
        for name, code, loss, sl in deep:
            print(f"  {name}({code}): {loss:.0f}%  止损={sl}")
    else:
        print("\n✅ 无深套持仓")
    conn.close()
 if __name__ == "__main__":
    main()