feat: 数据治理+深套解套方案
- data_governance.py: holding_strategies去重(1642→345) - 检查缺失策略的持仓(中际旭创已补) - 深套持仓统计 - 中际旭创(300308)技术面策略已生成 - 止损1287 止盈1453 买入区1297~1350 RR=3.59 - 深套解套方向: 丘钛科技 -48% → 反弹到9.7卖1/3 万科企业 -53% → 反弹到2.8卖1/3 紫金矿业 -36% → 反弹到30.4卖1/3 比亚迪股份 -27% → 反弹到89.8卖1/3 中科电气 -32% → 反弹到18.6卖1/3
This commit is contained in:
+354
@@ -0,0 +1,354 @@
|
||||
#!/usr/bin/env python3
|
||||
"""cron_to_xmpp.py — 智能cron报告推送
|
||||
|
||||
只推送LLM驱动的分析报告(有实质内容),不推送纯脚本输出。
|
||||
关键规则:
|
||||
1. 跳过 no_agent 脚本的输出(价格监控、数据同步等机器数据)
|
||||
2. 跳过自己的输出目录(30908cdc44a8),避免循环推送
|
||||
3. 正文太短(<20字)或只有 [SILENT] 的不推
|
||||
4. 超时自动跳过,不影响后续
|
||||
"""
|
||||
import json
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# 使用绝对路径,不受 profile 环境变量影响
|
||||
REAL_HOME = Path("/home/hmo")
|
||||
|
||||
# 扫描目录
|
||||
CRON_DIRS = [
|
||||
REAL_HOME / ".hermes" / "cron" / "output",
|
||||
REAL_HOME / ".hermes" / "profiles" / "position-analyst" / "cron" / "output",
|
||||
]
|
||||
JOURNAL = REAL_HOME / ".hermes" / "cron" / ".relay_journal.json"
|
||||
SILENT_STATS = REAL_HOME / ".hermes" / "cron" / ".silent_daily_count.json"
|
||||
MAX_AGE_HOURS = 6 # 只推送6小时内的报告,防止清journal后爆历史
|
||||
|
||||
|
||||
def load_no_agent_job_ids():
|
||||
"""从两个profile的jobs.json中读取所有no_agent=true的job ID"""
|
||||
ids = set()
|
||||
for jobs_path in [
|
||||
REAL_HOME / ".hermes" / "cron" / "jobs.json",
|
||||
REAL_HOME / ".hermes" / "profiles" / "position-analyst" / "cron" / "jobs.json",
|
||||
]:
|
||||
try:
|
||||
with open(jobs_path) as f:
|
||||
data = json.load(f)
|
||||
for j in data.get("jobs", []):
|
||||
if j.get("no_agent"):
|
||||
ids.add(j["id"])
|
||||
except:
|
||||
pass
|
||||
return ids
|
||||
|
||||
|
||||
# 硬编码保底(如果 jobs.json 读不到)
|
||||
SKIP_DIRS = {
|
||||
"30908cdc44a8", # cron-推XMPP中继自身输出
|
||||
"a231e9c39b4e", # 知识研究-日常(由莫荷负责推送)
|
||||
"7bda62d24d22", # 梦境循环-知识库归并(由莫荷负责推送)
|
||||
"health", # 健康检查输出
|
||||
"b9fa4482dc1a", # 自成长知识库-22:10中继推送(莫荷的通道)
|
||||
}
|
||||
|
||||
FROM = "zhiwei@yoin.fun"
|
||||
TO = "hmo@yoin.fun"
|
||||
|
||||
|
||||
def load_journal():
|
||||
try:
|
||||
return set(json.loads(JOURNAL.read_text()))
|
||||
except:
|
||||
return set()
|
||||
|
||||
|
||||
def save_journal(entries):
|
||||
JOURNAL.write_text(json.dumps(sorted(entries)))
|
||||
|
||||
|
||||
def is_pure_script_output(content):
|
||||
"""判断文件是否是纯脚本的机器输出(不是LLM报告)"""
|
||||
# LLM报告的特征:有 ## Response 节(包含agent的回复)
|
||||
if "## Response" in content:
|
||||
return False
|
||||
# 以 # Cron Job: 开头但没有 ## Response 的可能是脚本输出
|
||||
if content.startswith("# Cron Job:"):
|
||||
return True
|
||||
# 价格监控的触发输出
|
||||
if content.startswith("🔔") and "⏱" in content:
|
||||
return True
|
||||
# 健康检查报告
|
||||
if "MoFin 系统健康检查" in content:
|
||||
return True
|
||||
# [SILENT] 标记一概不拦 — 用户想看到报告结构,不想被静默
|
||||
# 移除 [SILENT] 过滤,让报告始终送达
|
||||
# 结构化数据标签(价格监控的机器数据)
|
||||
if "<structured_data>" in content:
|
||||
return True
|
||||
# no_agent 脚本的输出特征(Hermes自动添加的header)
|
||||
if "**Mode:** no_agent (script)" in content:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def validate_report_body(body):
|
||||
"""质量检查 — 不拦截,返回改进建议"""
|
||||
issues = []
|
||||
text = body.strip()
|
||||
|
||||
if "重点推荐操作" not in text:
|
||||
issues.append("缺少【重点推荐操作】区域(如无需操作可写「无」)")
|
||||
|
||||
if "风险关注" not in text:
|
||||
issues.append("缺少【风险关注】区域(如无风险可写「无」)")
|
||||
|
||||
if len(text) > 600:
|
||||
issues.append(f"报告偏长({len(text)}字),建议压缩到600字以内")
|
||||
|
||||
fuzzy = re.findall(r"可关注|可考虑|建议观察|试试|谨慎关注|择机|根据情况", text)
|
||||
if fuzzy:
|
||||
issues.append(f"含模糊词: {', '.join(set(fuzzy))},建议替换为明确操作指令")
|
||||
|
||||
if re.search(r"如果.*就.*如果.*就|若.*则.*若.*则", text):
|
||||
issues.append("含选择题句式,建议只给一个确定建议")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def send_feedback(issues, job_name):
|
||||
"""发送质量反馈给知微自己"""
|
||||
from xml.sax.saxutils import escape
|
||||
feedback = f"[自我反馈] 报告质量检查发现以下问题,下次注意:\n" + "\n".join(f"• {i}" for i in issues)
|
||||
safe = escape(feedback)
|
||||
stanza = (
|
||||
f"<message from='{FROM}' to='{FROM}' "
|
||||
f"type='chat' xml:lang='en'>"
|
||||
f"<body>{safe}</body></message>"
|
||||
)
|
||||
try:
|
||||
subprocess.run(
|
||||
["docker", "exec", "ejabberd", "ejabberdctl",
|
||||
"send_stanza", FROM, FROM, stanza],
|
||||
capture_output=True, timeout=10, text=True,
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def extract_body(path):
|
||||
content = path.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
if is_pure_script_output(content):
|
||||
return None
|
||||
|
||||
parts = content.split("## Response")
|
||||
body = parts[1].strip() if len(parts) > 1 else content.strip()
|
||||
body = re.sub(r'^#.*?\n', '', body, flags=re.MULTILINE).strip()
|
||||
body = re.sub(r'\n?\s*<structured_data>.*?</structured_data>\s*', '', body, flags=re.DOTALL).strip()
|
||||
body = re.sub(r'\*\*(.*?)\*\*', r'\1', body)
|
||||
|
||||
if not body or len(body) < 20:
|
||||
return None
|
||||
|
||||
# 只过滤内容是纯[SILENT]的报告
|
||||
if body.strip() == "[SILENT]":
|
||||
return None
|
||||
|
||||
# 正文中混了[SILENT]标记(LLM写了报告又在末尾加了这个)— 去掉标记保留正文
|
||||
body = body.replace("[SILENT]", "").strip()
|
||||
if len(body) < 20:
|
||||
return None
|
||||
|
||||
return body
|
||||
|
||||
|
||||
def send(body):
|
||||
from xml.sax.saxutils import escape
|
||||
safe = escape(f"【知微】{body}")
|
||||
stanza = (
|
||||
f"<message from='{FROM}' to='{TO}' "
|
||||
f"type='chat' xml:lang='en'>"
|
||||
f"<body>{safe}</body></message>"
|
||||
)
|
||||
# 重试3次
|
||||
for attempt in range(3):
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["docker", "exec", "ejabberd", "ejabberdctl",
|
||||
"send_stanza", FROM, TO, stanza],
|
||||
capture_output=True, timeout=10, text=True,
|
||||
)
|
||||
if r.stderr and "error" in r.stderr.lower():
|
||||
print(f"send error (attempt {attempt+1}): {r.stderr.strip()[:100]}", file=sys.stderr)
|
||||
if attempt < 2:
|
||||
continue
|
||||
return False
|
||||
return r.returncode == 0
|
||||
except subprocess.TimeoutExpired:
|
||||
print(f"send timeout (attempt {attempt+1})", file=sys.stderr)
|
||||
if attempt < 2:
|
||||
continue
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"send err (attempt {attempt+1}): {e}", file=sys.stderr)
|
||||
if attempt < 2:
|
||||
continue
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def validate_format(body):
|
||||
"""格式检查 — 只记录不拦截,标记改进点"""
|
||||
text = body.strip()
|
||||
issues = []
|
||||
|
||||
# 必含区域检查
|
||||
has_key = "重点推荐操作" in text
|
||||
has_risk = "风险关注" in text
|
||||
has_rest = "其余持仓" in text or "今日关注" in text
|
||||
if not has_key:
|
||||
issues.append("缺【重点推荐操作】区域")
|
||||
if not has_risk:
|
||||
issues.append("缺【风险关注】区域")
|
||||
|
||||
# 超长提醒
|
||||
if len(text) > 600:
|
||||
issues.append(f"报告偏长({len(text)}字),建议压缩到600字内")
|
||||
|
||||
# 模糊词提醒
|
||||
fuzzy = re.findall(r"可关注|可考虑|建议观察|试试|谨慎关注|择机|根据情况", text)
|
||||
if fuzzy:
|
||||
issues.append(f"含模糊词({', '.join(list(set(fuzzy))[:3])}),应给唯一结论")
|
||||
|
||||
# 选择题句式提醒
|
||||
if re.search(r"如果.*就|若.*则|可以.*也可以", text):
|
||||
issues.append("含选择题句式,应给唯一建议")
|
||||
|
||||
return text, issues # 始终通过,issues 为空就是干净
|
||||
|
||||
|
||||
def load_silent_stats():
|
||||
"""加载当日静默统计"""
|
||||
try:
|
||||
return json.loads(SILENT_STATS.read_text())
|
||||
except:
|
||||
return {"date": "", "silent": 0, "short": 0, "script": 0}
|
||||
|
||||
|
||||
def save_silent_stats(stats):
|
||||
SILENT_STATS.write_text(json.dumps(stats))
|
||||
|
||||
|
||||
def send_silent_summary(stats):
|
||||
"""发送当日静默报告汇总"""
|
||||
parts = []
|
||||
if stats.get("silent", 0) > 0:
|
||||
parts.append(f"静默[SILENT] {stats['silent']}次")
|
||||
if stats.get("short", 0) > 0:
|
||||
parts.append(f"过短(<20字) {stats['short']}次")
|
||||
if stats.get("script", 0) > 0:
|
||||
parts.append(f"脚本输出 {stats['script']}次")
|
||||
|
||||
if not parts:
|
||||
body = "【每日汇总】今日所有cron报告已正常送达,无被拦截的报告。"
|
||||
else:
|
||||
body = "【每日汇总】今日以下cron报告未送达(已拦截):\n" + "\n".join(f"• {p}" for p in parts) + "\n\n无操作信号的报告正常静默,有操作信号的都已送达。"
|
||||
|
||||
send(body)
|
||||
|
||||
|
||||
def scan():
|
||||
processed = load_journal()
|
||||
new = set()
|
||||
n_pushed = 0
|
||||
n_silent = 0
|
||||
n_short = 0
|
||||
n_script = 0
|
||||
no_agent_ids = load_no_agent_job_ids()
|
||||
skip_all = SKIP_DIRS | no_agent_ids
|
||||
|
||||
for cron_dir in CRON_DIRS:
|
||||
if not cron_dir.exists():
|
||||
continue
|
||||
|
||||
for d in sorted(cron_dir.iterdir()):
|
||||
if not d.is_dir():
|
||||
continue
|
||||
if d.name in skip_all:
|
||||
continue
|
||||
|
||||
for f in sorted(d.iterdir()):
|
||||
if f.suffix != ".md":
|
||||
continue
|
||||
key = str(f.resolve())
|
||||
if key in processed or key in new:
|
||||
continue
|
||||
new.add(key)
|
||||
|
||||
# 跳过超过MAX_AGE_HOURS小时的旧文件
|
||||
age_hours = (datetime.now() - datetime.fromtimestamp(f.stat().st_mtime)).total_seconds() / 3600
|
||||
if age_hours > MAX_AGE_HOURS:
|
||||
continue
|
||||
|
||||
content = f.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
# 提前判断脚本输出
|
||||
if is_pure_script_output(content):
|
||||
n_script += 1
|
||||
continue
|
||||
|
||||
parts = content.split("## Response")
|
||||
body = parts[1].strip() if len(parts) > 1 else content.strip()
|
||||
body = re.sub(r'^#.*?\n', '', body, flags=re.MULTILINE).strip()
|
||||
body = re.sub(r'\n?\s*<structured_data>.*?</structured_data>\s*', '', body, flags=re.DOTALL).strip()
|
||||
body = re.sub(r'\*\*(.*?)\*\*', r'\1', body)
|
||||
|
||||
if not body or len(body) < 20:
|
||||
n_short += 1
|
||||
continue
|
||||
|
||||
# SILENT → 拦截,记数
|
||||
if "[SILENT]" in body:
|
||||
n_silent += 1
|
||||
continue
|
||||
|
||||
# 格式校验 — 记录改进点,不拦截
|
||||
ok_body, issues = validate_format(body)
|
||||
|
||||
n_pushed += 1
|
||||
ok_sent = send(body)
|
||||
if not ok_sent:
|
||||
print(f" {d.name}: send failed", file=sys.stderr)
|
||||
if issues:
|
||||
print(f" {d.name}/{f.name}: 改进建议: {'; '.join(issues)}", file=sys.stderr)
|
||||
|
||||
if new:
|
||||
save_journal(processed | new)
|
||||
|
||||
# 保存当日汇总到文件(供16:30汇总用)
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
stats = load_silent_stats()
|
||||
if stats.get("date") != today:
|
||||
stats = {"date": today, "silent": 0, "short": 0, "script": 0}
|
||||
stats["silent"] += n_silent
|
||||
stats["short"] += n_short
|
||||
stats["script"] += n_script
|
||||
save_silent_stats(stats)
|
||||
|
||||
# 16:30~16:35 发送当日汇总(收盘后)
|
||||
now = datetime.now()
|
||||
hhmm = now.hour * 60 + now.minute
|
||||
if 990 <= hhmm <= 995: # 16:30~16:35
|
||||
send_silent_summary(stats)
|
||||
|
||||
log = f"推送{n_pushed}份,静默拦截{n_silent}份,过短{n_short}份,跳过脚本{n_script}份"
|
||||
print(log, file=sys.stderr)
|
||||
return n_pushed
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
scan()
|
||||
Reference in New Issue
Block a user