#!/usr/bin/env python3 """cron_to_xmpp.py — 智能cron报告推送 只推送LLM驱动的分析报告(有实质内容),不推送纯脚本输出。 关键规则: 1. 跳过 no_agent 脚本的输出(价格监控、数据同步等机器数据) 2. 跳过自己的输出目录(30908cdc44a8),避免循环推送 3. 正文太短(<20字)或只有 [SILENT] 的不推 4. 超时自动跳过,不影响后续 """ import json import subprocess import re import sys from datetime import datetime from pathlib import Path # 使用绝对路径,不受 profile 环境变量影响 REAL_HOME = Path("/home/hmo") # 扫描目录 CRON_DIRS = [ REAL_HOME / ".hermes" / "cron" / "output", REAL_HOME / ".hermes" / "profiles" / "position-analyst" / "cron" / "output", ] JOURNAL = REAL_HOME / ".hermes" / "cron" / ".relay_journal.json" SILENT_STATS = REAL_HOME / ".hermes" / "cron" / ".silent_daily_count.json" MAX_AGE_HOURS = 6 # 只推送6小时内的报告,防止清journal后爆历史 def load_no_agent_job_ids(): """从两个profile的jobs.json中读取所有no_agent=true的job ID""" ids = set() for jobs_path in [ REAL_HOME / ".hermes" / "cron" / "jobs.json", REAL_HOME / ".hermes" / "profiles" / "position-analyst" / "cron" / "jobs.json", ]: try: with open(jobs_path) as f: data = json.load(f) for j in data.get("jobs", []): if j.get("no_agent"): ids.add(j["id"]) except: pass return ids # 硬编码保底(如果 jobs.json 读不到) SKIP_DIRS = { "30908cdc44a8", # cron-推XMPP中继自身输出 "health", # 健康检查输出 } FROM = "zhiwei@yoin.fun" TO = "hmo@yoin.fun" def load_journal(): try: return set(json.loads(JOURNAL.read_text())) except: return set() def save_journal(entries): JOURNAL.write_text(json.dumps(sorted(entries))) def is_pure_script_output(content): """判断文件是否是纯脚本的机器输出(不是LLM报告)""" # LLM报告的特征:有 ## Response 节(包含agent的回复) if "## Response" in content: return False # 以 # Cron Job: 开头但没有 ## Response 的可能是脚本输出 if content.startswith("# Cron Job:"): return True # 价格监控的触发输出 if content.startswith("🔔") and "⏱" in content: return True # 健康检查报告 if "MoFin 系统健康检查" in content: return True # [SILENT] 标记一概不拦 — 用户想看到报告结构,不想被静默 # 移除 [SILENT] 过滤,让报告始终送达 # 结构化数据标签(价格监控的机器数据) if "" in content: return True # no_agent 脚本的输出特征(Hermes自动添加的header) if "**Mode:** no_agent (script)" in content: return True return False def validate_report_body(body): """质量检查 — 不拦截,返回改进建议""" issues = [] text = body.strip() if "重点推荐操作" not in text: issues.append("缺少【重点推荐操作】区域(如无需操作可写「无」)") if "风险关注" not in text: issues.append("缺少【风险关注】区域(如无风险可写「无」)") if len(text) > 600: issues.append(f"报告偏长({len(text)}字),建议压缩到600字以内") fuzzy = re.findall(r"可关注|可考虑|建议观察|试试|谨慎关注|择机|根据情况", text) if fuzzy: issues.append(f"含模糊词: {', '.join(set(fuzzy))},建议替换为明确操作指令") if re.search(r"如果.*就.*如果.*就|若.*则.*若.*则", text): issues.append("含选择题句式,建议只给一个确定建议") return issues def send_feedback(issues, job_name): """发送质量反馈给知微自己""" from xml.sax.saxutils import escape feedback = f"[自我反馈] 报告质量检查发现以下问题,下次注意:\n" + "\n".join(f"• {i}" for i in issues) safe = escape(feedback) stanza = ( f"" f"{safe}" ) try: subprocess.run( ["docker", "exec", "ejabberd", "ejabberdctl", "send_stanza", FROM, FROM, stanza], capture_output=True, timeout=10, text=True, ) except: pass def extract_body(path): content = path.read_text(encoding="utf-8", errors="replace") if is_pure_script_output(content): return None parts = content.split("## Response") body = parts[1].strip() if len(parts) > 1 else content.strip() body = re.sub(r'^#.*?\n', '', body, flags=re.MULTILINE).strip() body = re.sub(r'\n?\s*.*?\s*', '', body, flags=re.DOTALL).strip() body = re.sub(r'\*\*(.*?)\*\*', r'\1', body) # 去掉agent的思考过程("Now let me...", "Let me...", "Now I have..."等开头) body = re.sub(r'^(Now let me|Let me|I need|I will|First let me|First,? I|Now I have|Here.i|I.ll|I.m ).*?\n\n', '', body, flags=re.DOTALL).strip() # 去掉末尾的思考尾巴 body = re.sub(r'\n\s*(Now I|This |I have |I used |The report|The data).*?$', '', body, flags=re.DOTALL).strip() # 如果只剩"好的"、"收到"等短回应,丢弃 if re.match(r'^[\u4e00-\u9fff,。]{1,10}$', body): return None if not body or len(body) < 20: return None # 再次检查正文中是否包含 [SILENT] if "[SILENT]" in body: return None return body def send(body): from xml.sax.saxutils import escape safe = escape(f"【知微】{body}") stanza = ( f"" f"{safe}" ) # 重试3次 for attempt in range(3): try: r = subprocess.run( ["docker", "exec", "ejabberd", "ejabberdctl", "send_stanza", FROM, TO, stanza], capture_output=True, timeout=10, text=True, ) if r.stderr and "error" in r.stderr.lower(): print(f"send error (attempt {attempt+1}): {r.stderr.strip()[:100]}", file=sys.stderr) if attempt < 2: continue return False return r.returncode == 0 except subprocess.TimeoutExpired: print(f"send timeout (attempt {attempt+1})", file=sys.stderr) if attempt < 2: continue return False except Exception as e: print(f"send err (attempt {attempt+1}): {e}", file=sys.stderr) if attempt < 2: continue return False return False def validate_format(body): """格式检查 — 只记录不拦截,标记改进点""" text = body.strip() issues = [] # 必含区域检查 has_key = "重点推荐操作" in text has_risk = "风险关注" in text has_rest = "其余持仓" in text or "今日关注" in text if not has_key: issues.append("缺【重点推荐操作】区域") if not has_risk: issues.append("缺【风险关注】区域") # 超长提醒 if len(text) > 600: issues.append(f"报告偏长({len(text)}字),建议压缩到600字内") # 模糊词提醒 fuzzy = re.findall(r"可关注|可考虑|建议观察|试试|谨慎关注|择机|根据情况", text) if fuzzy: issues.append(f"含模糊词({', '.join(list(set(fuzzy))[:3])}),应给唯一结论") # 选择题句式提醒 if re.search(r"如果.*就|若.*则|可以.*也可以", text): issues.append("含选择题句式,应给唯一建议") return text, issues # 始终通过,issues 为空就是干净 def load_silent_stats(): """加载当日静默统计""" try: return json.loads(SILENT_STATS.read_text()) except: return {"date": "", "silent": 0, "short": 0, "script": 0} def save_silent_stats(stats): SILENT_STATS.write_text(json.dumps(stats)) def send_silent_summary(stats): """发送当日静默报告汇总""" parts = [] if stats.get("silent", 0) > 0: parts.append(f"静默[SILENT] {stats['silent']}次") if stats.get("short", 0) > 0: parts.append(f"过短(<20字) {stats['short']}次") if stats.get("script", 0) > 0: parts.append(f"脚本输出 {stats['script']}次") if not parts: body = "【每日汇总】今日所有cron报告已正常送达,无被拦截的报告。" else: body = "【每日汇总】今日以下cron报告未送达(已拦截):\n" + "\n".join(f"• {p}" for p in parts) + "\n\n无操作信号的报告正常静默,有操作信号的都已送达。" send(body) def scan(): processed = load_journal() new = set() n_pushed = 0 n_silent = 0 n_short = 0 n_script = 0 no_agent_ids = load_no_agent_job_ids() skip_all = SKIP_DIRS | no_agent_ids for cron_dir in CRON_DIRS: if not cron_dir.exists(): continue for d in sorted(cron_dir.iterdir()): if not d.is_dir(): continue if d.name in skip_all: continue for f in sorted(d.iterdir()): if f.suffix != ".md": continue key = str(f.resolve()) if key in processed or key in new: continue new.add(key) # 跳过超过MAX_AGE_HOURS小时的旧文件 age_hours = (datetime.now() - datetime.fromtimestamp(f.stat().st_mtime)).total_seconds() / 3600 if age_hours > MAX_AGE_HOURS: continue content = f.read_text(encoding="utf-8", errors="replace") # 提前判断脚本输出 if is_pure_script_output(content): n_script += 1 continue parts = content.split("## Response") body = parts[1].strip() if len(parts) > 1 else content.strip() body = re.sub(r'^#.*?\n', '', body, flags=re.MULTILINE).strip() body = re.sub(r'\n?\s*.*?\s*', '', body, flags=re.DOTALL).strip() body = re.sub(r'\*\*(.*?)\*\*', r'\1', body) if not body or len(body) < 20: n_short += 1 continue # SILENT → 拦截,记数 if "[SILENT]" in body: n_silent += 1 continue # 格式校验 — 记录改进点,不拦截 ok_body, issues = validate_format(body) n_pushed += 1 ok_sent = send(body) if not ok_sent: print(f" {d.name}: send failed", file=sys.stderr) if issues: print(f" {d.name}/{f.name}: 改进建议: {'; '.join(issues)}", file=sys.stderr) if new: save_journal(processed | new) # 保存当日汇总到文件(供16:30汇总用) today = datetime.now().strftime("%Y-%m-%d") stats = load_silent_stats() if stats.get("date") != today: stats = {"date": today, "silent": 0, "short": 0, "script": 0} stats["silent"] += n_silent stats["short"] += n_short stats["script"] += n_script save_silent_stats(stats) # 16:30~16:35 发送当日汇总(收盘后) now = datetime.now() hhmm = now.hour * 60 + now.minute if 990 <= hhmm <= 995: # 16:30~16:35 send_silent_summary(stats) log = f"推送{n_pushed}份,静默拦截{n_silent}份,过短{n_short}份,跳过脚本{n_script}份" print(log, file=sys.stderr) return n_pushed if __name__ == "__main__": scan()