feat: 三层自检+元自检+cron全局审计

- 盘中高频(每15分): XMPP/Gateway/Scanner/价格/信号管道
- 每日早检(8:00): 原有7层 + 新增cron全局审计 + 元自检
- cron审计: 检查所有启用的定时任务是否在24h内运行过
- 元自检: 昨日体检是否完成/checklist覆盖是否完整
- 自成长: auto_discovery自动追加新增cron到清单
This commit is contained in:
知微
2026-06-24 21:58:57 +08:00
parent 7a6fb103cb
commit 077f683878
2 changed files with 215 additions and 1 deletions
+69 -1
View File
@@ -442,8 +442,70 @@ def check_delivery_targets():
except Exception as e:
return True, f"skip({str(e)[:60]})"
# ── 自动发现 ──
def check_cron_audit():
"""审计全部cron:最近24h内是否运行过"""
try:
cron_jobs_path = HERMES_CRON_DIR / "jobs.json"
if not cron_jobs_path.exists():
return True, "no_jobs_json"
data = json.loads(cron_jobs_path.read_text())
check_time = (ctx["started_at"] - timedelta(hours=24)).isoformat()[:19]
stale = []
for job in data.get("jobs", []):
name = job.get("name", "?")
enabled = job.get("enabled", True)
script = job.get("script", "")
last_run = job.get("last_run_at", "")
last_status = job.get("last_status")
if not enabled or not script:
continue
if not last_run:
stale.append(f"{name}(从未运行)")
continue
if last_run[:19] < check_time:
if last_status and last_status == "ok":
stale.append(f"{name}(>24h未运行)")
else:
stale.append(f"{name}(>24h+状态异常)")
if stale:
return False, f"{len(stale)}个cron异常: {'; '.join(stale[:5])}"
total = sum(1 for j in data.get("jobs",[]) if j.get("enabled") and j.get("script"))
return True, f"全部{total}个cron正常"
except Exception as e:
return True, f"skip({str(e)[:60]})"
def check_meta_health_check_yesterday():
"""元检:昨天体检是否正常完成"""
try:
history = []
if HISTORY_PATH.exists():
history = json.loads(HISTORY_PATH.read_text())
yesterday = (ctx["started_at"] - timedelta(days=1)).strftime("%Y-%m-%d")
for h in history[-30:]:
ts = h.get("timestamp", "")
if ts[:10] == yesterday:
if h.get("error", 0) == 0 and h.get("critical", 0) == 0:
return True, f"昨日体检通过({h.get('ok',0)}项正常)"
return True, f"昨日体检有{h.get('error',0)}错误+{h.get('critical',0)}严重(已记录)"
return True, "无昨日记录(首次运行)"
except:
return True, "skip"
def check_meta_checklist_completeness():
"""元检:检查清单是否覆盖了所有已知组件"""
try:
added = ctx.get("auto_discovered_items", [])
if added:
return True, f"自动发现并追加了{len(added)}个新组件到清单"
return True, "清单覆盖完整"
except:
return True, "skip"
# ── 自动发现 ──
def self_discovery():
"""自动发现新增组件并更新checklist"""
discovered = []
@@ -536,6 +598,12 @@ def run_check(item):
ok, detail = check_cron_paused()
elif check_spec == "delivery:origin_targets":
ok, detail = check_delivery_targets()
elif check_spec == "cron_audit:all":
ok, detail = check_cron_audit()
elif check_spec == "meta:health_check_yesterday":
ok, detail = check_meta_health_check_yesterday()
elif check_spec == "meta:checklist_completeness":
ok, detail = check_meta_checklist_completeness()
elif check_spec == "pipeline:xiaoguo_signal_flow":
# 综合检查:小果有数据→被我处理
today_xiaoguo, d1 = check_db_table_count("signal_news", "created_at", None, "today", 0)