360 lines
12 KiB
Python
360 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
"""cron_to_xmpp.py — 智能cron报告推送
|
||
|
||
只推送LLM驱动的分析报告(有实质内容),不推送纯脚本输出。
|
||
关键规则:
|
||
1. 跳过 no_agent 脚本的输出(价格监控、数据同步等机器数据)
|
||
2. 跳过自己的输出目录(30908cdc44a8),避免循环推送
|
||
3. 正文太短(<20字)或只有 [SILENT] 的不推
|
||
4. 超时自动跳过,不影响后续
|
||
"""
|
||
import json
|
||
import subprocess
|
||
import re
|
||
import sys
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
# 使用绝对路径,不受 profile 环境变量影响
|
||
REAL_HOME = Path("/home/hmo")
|
||
|
||
# 扫描目录
|
||
CRON_DIRS = [
|
||
REAL_HOME / ".hermes" / "cron" / "output",
|
||
REAL_HOME / ".hermes" / "profiles" / "position-analyst" / "cron" / "output",
|
||
]
|
||
JOURNAL = REAL_HOME / ".hermes" / "cron" / ".relay_journal.json"
|
||
SILENT_STATS = REAL_HOME / ".hermes" / "cron" / ".silent_daily_count.json"
|
||
MAX_AGE_HOURS = 6 # 只推送6小时内的报告,防止清journal后爆历史
|
||
|
||
|
||
def load_no_agent_job_ids():
|
||
"""从两个profile的jobs.json中读取所有no_agent=true的job ID"""
|
||
ids = set()
|
||
for jobs_path in [
|
||
REAL_HOME / ".hermes" / "cron" / "jobs.json",
|
||
REAL_HOME / ".hermes" / "profiles" / "position-analyst" / "cron" / "jobs.json",
|
||
]:
|
||
try:
|
||
with open(jobs_path) as f:
|
||
data = json.load(f)
|
||
for j in data.get("jobs", []):
|
||
if j.get("no_agent"):
|
||
ids.add(j["id"])
|
||
except:
|
||
pass
|
||
return ids
|
||
|
||
|
||
# 硬编码保底(如果 jobs.json 读不到)
|
||
SKIP_DIRS = {
|
||
"30908cdc44a8", # cron-推XMPP中继自身输出
|
||
"health", # 健康检查输出
|
||
}
|
||
|
||
FROM = "zhiwei@yoin.fun"
|
||
TO = "hmo@yoin.fun"
|
||
|
||
|
||
def load_journal():
|
||
try:
|
||
return set(json.loads(JOURNAL.read_text()))
|
||
except:
|
||
return set()
|
||
|
||
|
||
def save_journal(entries):
|
||
JOURNAL.write_text(json.dumps(sorted(entries)))
|
||
|
||
|
||
def is_pure_script_output(content):
|
||
"""判断文件是否是纯脚本的机器输出(不是LLM报告)"""
|
||
# LLM报告的特征:有 ## Response 节(包含agent的回复)
|
||
if "## Response" in content:
|
||
return False
|
||
# 以 # Cron Job: 开头但没有 ## Response 的可能是脚本输出
|
||
if content.startswith("# Cron Job:"):
|
||
return True
|
||
# 价格监控的触发输出
|
||
if content.startswith("🔔") and "⏱" in content:
|
||
return True
|
||
# 健康检查报告
|
||
if "MoFin 系统健康检查" in content:
|
||
return True
|
||
# 结构化数据标签(价格监控的机器数据)
|
||
if "<structured_data>" in content:
|
||
return True
|
||
# no_agent 脚本的输出特征(Hermes自动添加的header)
|
||
if "**Mode:** no_agent (script)" in content:
|
||
return True
|
||
return False
|
||
|
||
|
||
def validate_report_body(body):
|
||
"""质量检查 — 不拦截,返回改进建议"""
|
||
issues = []
|
||
text = body.strip()
|
||
|
||
if "重点推荐操作" not in text:
|
||
issues.append("缺少【重点推荐操作】区域(如无需操作可写「无」)")
|
||
|
||
if "风险关注" not in text:
|
||
issues.append("缺少【风险关注】区域(如无风险可写「无」)")
|
||
|
||
if len(text) > 600:
|
||
issues.append(f"报告偏长({len(text)}字),建议压缩到600字以内")
|
||
|
||
fuzzy = re.findall(r"可关注|可考虑|建议观察|试试|谨慎关注|择机|根据情况", text)
|
||
if fuzzy:
|
||
issues.append(f"含模糊词: {', '.join(set(fuzzy))},建议替换为明确操作指令")
|
||
|
||
if re.search(r"如果.*就.*如果.*就|若.*则.*若.*则", text):
|
||
issues.append("含选择题句式,建议只给一个确定建议")
|
||
|
||
return issues
|
||
|
||
|
||
def send_feedback(issues, job_name):
|
||
"""发送质量反馈给知微自己"""
|
||
from xml.sax.saxutils import escape
|
||
feedback = f"[自我反馈] 报告质量检查发现以下问题,下次注意:\n" + "\n".join(f"• {i}" for i in issues)
|
||
safe = escape(feedback)
|
||
stanza = (
|
||
f"<message from='{FROM}' to='{FROM}' "
|
||
f"type='chat' xml:lang='en'>"
|
||
f"<body>{safe}</body></message>"
|
||
)
|
||
try:
|
||
subprocess.run(
|
||
["docker", "exec", "ejabberd", "ejabberdctl",
|
||
"send_stanza", FROM, FROM, stanza],
|
||
capture_output=True, timeout=10, text=True,
|
||
)
|
||
except:
|
||
pass
|
||
|
||
|
||
def extract_body(path):
|
||
content = path.read_text(encoding="utf-8", errors="replace")
|
||
|
||
if is_pure_script_output(content):
|
||
return None
|
||
|
||
parts = content.split("## Response")
|
||
body = parts[1].strip() if len(parts) > 1 else content.strip()
|
||
body = re.sub(r'^#.*?\n', '', body, flags=re.MULTILINE).strip()
|
||
body = re.sub(r'\n?\s*<structured_data>.*?</structured_data>\s*', '', body, flags=re.DOTALL).strip()
|
||
body = re.sub(r'\*\*(.*?)\*\*', r'\1', body)
|
||
|
||
# 去掉agent的思考过程("Now let me...", "Let me...", "Now I have..."等开头)
|
||
body = re.sub(r'^(Now let me|Let me|I need|I will|First let me|First,? I|Now I have|Here.i|I.ll|I.m ).*?\n\n', '', body, flags=re.DOTALL).strip()
|
||
# 去掉末尾的思考尾巴
|
||
body = re.sub(r'\n\s*(Now I|This |I have |I used |The report|The data).*?$', '', body, flags=re.DOTALL).strip()
|
||
# 如果只剩"好的"、"收到"等短回应,丢弃
|
||
if re.match(r'^[\u4e00-\u9fff,。]{1,10}$', body):
|
||
return None
|
||
|
||
if not body:
|
||
return None
|
||
|
||
# [SILENT] → 不推送(计数的逻辑在 scan() 中处理)
|
||
if "[SILENT]" in body:
|
||
return None
|
||
|
||
if len(body) < 20:
|
||
return None
|
||
|
||
return body
|
||
|
||
|
||
def send(body):
|
||
from xml.sax.saxutils import escape
|
||
safe = escape(f"【知微】{body}")
|
||
stanza = (
|
||
f"<message from='{FROM}' to='{TO}' "
|
||
f"type='chat' xml:lang='en'>"
|
||
f"<body>{safe}</body></message>"
|
||
)
|
||
# 重试3次
|
||
for attempt in range(3):
|
||
try:
|
||
r = subprocess.run(
|
||
["docker", "exec", "ejabberd", "ejabberdctl",
|
||
"send_stanza", FROM, TO, stanza],
|
||
capture_output=True, timeout=10, text=True,
|
||
)
|
||
if r.stderr and "error" in r.stderr.lower():
|
||
print(f"send error (attempt {attempt+1}): {r.stderr.strip()[:100]}", file=sys.stderr)
|
||
if attempt < 2:
|
||
continue
|
||
return False
|
||
return r.returncode == 0
|
||
except subprocess.TimeoutExpired:
|
||
print(f"send timeout (attempt {attempt+1})", file=sys.stderr)
|
||
if attempt < 2:
|
||
continue
|
||
return False
|
||
except Exception as e:
|
||
print(f"send err (attempt {attempt+1}): {e}", file=sys.stderr)
|
||
if attempt < 2:
|
||
continue
|
||
return False
|
||
return False
|
||
|
||
|
||
def validate_format(body):
|
||
"""格式检查 — 只记录不拦截,标记改进点"""
|
||
text = body.strip()
|
||
issues = []
|
||
|
||
# 必含区域检查
|
||
has_key = "重点推荐操作" in text
|
||
has_risk = "风险关注" in text
|
||
has_rest = "其余持仓" in text or "今日关注" in text
|
||
if not has_key:
|
||
issues.append("缺【重点推荐操作】区域")
|
||
if not has_risk:
|
||
issues.append("缺【风险关注】区域")
|
||
|
||
# 超长提醒
|
||
if len(text) > 600:
|
||
issues.append(f"报告偏长({len(text)}字),建议压缩到600字内")
|
||
|
||
# 模糊词提醒
|
||
fuzzy = re.findall(r"可关注|可考虑|建议观察|试试|谨慎关注|择机|根据情况", text)
|
||
if fuzzy:
|
||
issues.append(f"含模糊词({', '.join(list(set(fuzzy))[:3])}),应给唯一结论")
|
||
|
||
# 选择题句式提醒
|
||
if re.search(r"如果.*就|若.*则|可以.*也可以", text):
|
||
issues.append("含选择题句式,应给唯一建议")
|
||
|
||
return text, issues # 始终通过,issues 为空就是干净
|
||
|
||
|
||
def load_silent_stats():
|
||
"""加载当日静默统计"""
|
||
try:
|
||
return json.loads(SILENT_STATS.read_text())
|
||
except:
|
||
return {"date": "", "silent": 0, "short": 0, "script": 0}
|
||
|
||
|
||
def save_silent_stats(stats):
|
||
SILENT_STATS.write_text(json.dumps(stats))
|
||
|
||
|
||
def send_silent_summary(stats):
|
||
"""发送当日静默报告汇总"""
|
||
parts = []
|
||
if stats.get("silent", 0) > 0:
|
||
parts.append(f"静默[SILENT] {stats['silent']}次")
|
||
if stats.get("short", 0) > 0:
|
||
parts.append(f"过短(<20字) {stats['short']}次")
|
||
if stats.get("script", 0) > 0:
|
||
parts.append(f"脚本输出 {stats['script']}次")
|
||
|
||
if not parts:
|
||
body = "【每日汇总】今日所有cron报告已正常送达,无被拦截的报告。"
|
||
else:
|
||
body = "【每日汇总】今日以下cron报告未送达(已拦截):\n" + "\n".join(f"• {p}" for p in parts) + "\n\n无操作信号的报告正常静默,有操作信号的都已送达。"
|
||
|
||
send(body)
|
||
|
||
|
||
def scan():
|
||
processed = load_journal()
|
||
new = set()
|
||
n_pushed = 0
|
||
n_silent = 0
|
||
n_short = 0
|
||
n_script = 0
|
||
no_agent_ids = load_no_agent_job_ids()
|
||
skip_all = SKIP_DIRS | no_agent_ids
|
||
|
||
for cron_dir in CRON_DIRS:
|
||
if not cron_dir.exists():
|
||
continue
|
||
|
||
for d in sorted(cron_dir.iterdir()):
|
||
if not d.is_dir():
|
||
continue
|
||
if d.name in skip_all:
|
||
continue
|
||
|
||
for f in sorted(d.iterdir()):
|
||
if f.suffix != ".md":
|
||
continue
|
||
key = str(f.resolve())
|
||
if key in processed or key in new:
|
||
continue
|
||
new.add(key)
|
||
|
||
# 跳过超过MAX_AGE_HOURS小时的旧文件
|
||
age_hours = (datetime.now() - datetime.fromtimestamp(f.stat().st_mtime)).total_seconds() / 3600
|
||
if age_hours > MAX_AGE_HOURS:
|
||
continue
|
||
|
||
content = f.read_text(encoding="utf-8", errors="replace")
|
||
|
||
# 提前判断脚本输出
|
||
if is_pure_script_output(content):
|
||
n_script += 1
|
||
continue
|
||
|
||
parts = content.split("## Response")
|
||
body = parts[1].strip() if len(parts) > 1 else content.strip()
|
||
body = re.sub(r'^#.*?\n', '', body, flags=re.MULTILINE).strip()
|
||
body = re.sub(r'\n?\s*<structured_data>.*?</structured_data>\s*', '', body, flags=re.DOTALL).strip()
|
||
body = re.sub(r'\*\*(.*?)\*\*', r'\1', body)
|
||
|
||
if not body:
|
||
n_short += 1
|
||
continue
|
||
|
||
# SILENT → 拦截,记数(在长度检查之前,因为 [SILENT] 只有8字符)
|
||
if "[SILENT]" in body:
|
||
n_silent += 1
|
||
continue
|
||
|
||
if len(body) < 20:
|
||
n_short += 1
|
||
continue
|
||
|
||
# 格式校验 — 记录改进点,不拦截
|
||
ok_body, issues = validate_format(body)
|
||
|
||
n_pushed += 1
|
||
ok_sent = send(body)
|
||
if not ok_sent:
|
||
print(f" {d.name}: send failed", file=sys.stderr)
|
||
if issues:
|
||
print(f" {d.name}/{f.name}: 改进建议: {'; '.join(issues)}", file=sys.stderr)
|
||
|
||
if new:
|
||
save_journal(processed | new)
|
||
|
||
# 保存当日汇总到文件(供16:30汇总用)
|
||
today = datetime.now().strftime("%Y-%m-%d")
|
||
stats = load_silent_stats()
|
||
if stats.get("date") != today:
|
||
stats = {"date": today, "silent": 0, "short": 0, "script": 0}
|
||
stats["silent"] += n_silent
|
||
stats["short"] += n_short
|
||
stats["script"] += n_script
|
||
save_silent_stats(stats)
|
||
|
||
# 16:30~16:35 发送当日汇总(收盘后)
|
||
now = datetime.now()
|
||
hhmm = now.hour * 60 + now.minute
|
||
if 990 <= hhmm <= 995: # 16:30~16:35
|
||
send_silent_summary(stats)
|
||
|
||
log = f"推送{n_pushed}份,静默拦截{n_silent}份,过短{n_short}份,跳过脚本{n_script}份"
|
||
print(log, file=sys.stderr)
|
||
return n_pushed
|
||
|
||
|
||
if __name__ == "__main__":
|
||
scan()
|