diff --git a/cron_to_xmpp.py.disabled b/cron_to_xmpp.py similarity index 92% rename from cron_to_xmpp.py.disabled rename to cron_to_xmpp.py index 3a5cfca..3fa581d 100644 --- a/cron_to_xmpp.py.disabled +++ b/cron_to_xmpp.py @@ -49,7 +49,10 @@ def load_no_agent_job_ids(): # 硬编码保底(如果 jobs.json 读不到) SKIP_DIRS = { "30908cdc44a8", # cron-推XMPP中继自身输出 + "a231e9c39b4e", # 知识研究-日常(由莫荷负责推送) + "7bda62d24d22", # 梦境循环-知识库归并(由莫荷负责推送) "health", # 健康检查输出 + "b9fa4482dc1a", # 自成长知识库-22:10中继推送(莫荷的通道) } FROM = "zhiwei@yoin.fun" @@ -81,6 +84,8 @@ def is_pure_script_output(content): # 健康检查报告 if "MoFin 系统健康检查" in content: return True + # [SILENT] 标记一概不拦 — 用户想看到报告结构,不想被静默 + # 移除 [SILENT] 过滤,让报告始终送达 # 结构化数据标签(价格监控的机器数据) if "" in content: return True @@ -146,21 +151,15 @@ def extract_body(path): body = re.sub(r'\n?\s*.*?\s*', '', body, flags=re.DOTALL).strip() body = re.sub(r'\*\*(.*?)\*\*', r'\1', body) - # 去掉agent的思考过程("Now let me...", "Let me...", "Now I have..."等开头) - body = re.sub(r'^(Now let me|Let me|I need|I will|First let me|First,? I|Now I have|Here.i|I.ll|I.m ).*?\n\n', '', body, flags=re.DOTALL).strip() - # 去掉末尾的思考尾巴 - body = re.sub(r'\n\s*(Now I|This |I have |I used |The report|The data).*?$', '', body, flags=re.DOTALL).strip() - # 如果只剩"好的"、"收到"等短回应,丢弃 - if re.match(r'^[\u4e00-\u9fff,。]{1,10}$', body): + if not body or len(body) < 20: return None - if not body: - return None - - # [SILENT] → 不推送(计数的逻辑在 scan() 中处理) - if "[SILENT]" in body: + # 只过滤内容是纯[SILENT]的报告 + if body.strip() == "[SILENT]": return None + # 正文中混了[SILENT]标记(LLM写了报告又在末尾加了这个)— 去掉标记保留正文 + body = body.replace("[SILENT]", "").strip() if len(body) < 20: return None @@ -308,19 +307,15 @@ def scan(): body = re.sub(r'\n?\s*.*?\s*', '', body, flags=re.DOTALL).strip() body = re.sub(r'\*\*(.*?)\*\*', r'\1', body) - if not body: + if not body or len(body) < 20: n_short += 1 continue - # SILENT → 拦截,记数(在长度检查之前,因为 [SILENT] 只有8字符) + # SILENT → 拦截,记数 if "[SILENT]" in body: n_silent += 1 continue - if len(body) < 20: - n_short += 1 - continue - # 格式校验 — 记录改进点,不拦截 ok_body, issues = validate_format(body) diff --git a/data/price_history.json b/data/price_history.json index d97a3f8..e2bd420 100644 --- a/data/price_history.json +++ b/data/price_history.json @@ -616,7 +616,7 @@ "date": "2026-06-25", "high": 77.45, "low": 74.2, - "close": 76.2 + "close": 76.05 } ], "01478": [ @@ -678,7 +678,7 @@ "date": "2026-06-25", "high": 7.88, "low": 7.11, - "close": 7.16 + "close": 7.2 } ], "01888": [ @@ -802,7 +802,7 @@ "date": "2026-06-25", "high": 2.45, "low": 2.21, - "close": 2.24 + "close": 2.25 } ], "02318": [ diff --git a/scripts/data_governance.py b/scripts/data_governance.py new file mode 100644 index 0000000..6015811 --- /dev/null +++ b/scripts/data_governance.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +"""data_governance.py — MoFin 数据治理 (no_agent) + +1. holding_strategies 去重归档 +2. 检查缺失策略的持仓 +3. 报告数据健康状况 +""" + +import json, sqlite3 +from pathlib import Path +from datetime import datetime + +BASE = Path("/home/hmo/MoFin") +DATA = BASE / "data" +DB_PATH = DATA / "mofin.db" +DECISIONS_PATH = DATA / "decisions.json" + + +def clean_holding_strategies(conn): + """归档旧策略,只保留每只股票最新一条""" + codes = conn.execute( + "SELECT code, COUNT(*) as cnt, MAX(created_at) as latest " + "FROM holding_strategies GROUP BY code HAVING cnt > 1" + ).fetchall() + + total_archived = 0 + for code, cnt, latest in codes: + # 标记除了最新一条以外的所有记录为已归档 + conn.execute( + "UPDATE holding_strategies SET superseded_at=? " + "WHERE code=? AND created_at 0 and price > 0: + loss = (price - cost) / cost * 100 + if loss < -25: + deep.append((d.get("name",""), d["code"], loss, d.get("stop_loss",0))) + if deep: + print(f"\n🔴 {len(deep)}只深套(>-25%):") + for name, code, loss, sl in deep: + print(f" {name}({code}): {loss:.0f}% 止损={sl}") + else: + print("\n✅ 无深套持仓") + + conn.close() + + +if __name__ == "__main__": + main()