feat: 三层自检+元自检+cron全局审计
- 盘中高频(每15分): XMPP/Gateway/Scanner/价格/信号管道 - 每日早检(8:00): 原有7层 + 新增cron全局审计 + 元自检 - cron审计: 检查所有启用的定时任务是否在24h内运行过 - 元自检: 昨日体检是否完成/checklist覆盖是否完整 - 自成长: auto_discovery自动追加新增cron到清单
This commit is contained in:
@@ -442,8 +442,70 @@ def check_delivery_targets():
|
||||
except Exception as e:
|
||||
return True, f"skip({str(e)[:60]})"
|
||||
|
||||
# ── 自动发现 ──
|
||||
|
||||
def check_cron_audit():
|
||||
"""审计全部cron:最近24h内是否运行过"""
|
||||
try:
|
||||
cron_jobs_path = HERMES_CRON_DIR / "jobs.json"
|
||||
if not cron_jobs_path.exists():
|
||||
return True, "no_jobs_json"
|
||||
data = json.loads(cron_jobs_path.read_text())
|
||||
check_time = (ctx["started_at"] - timedelta(hours=24)).isoformat()[:19]
|
||||
stale = []
|
||||
for job in data.get("jobs", []):
|
||||
name = job.get("name", "?")
|
||||
enabled = job.get("enabled", True)
|
||||
script = job.get("script", "")
|
||||
last_run = job.get("last_run_at", "")
|
||||
last_status = job.get("last_status")
|
||||
if not enabled or not script:
|
||||
continue
|
||||
if not last_run:
|
||||
stale.append(f"{name}(从未运行)")
|
||||
continue
|
||||
if last_run[:19] < check_time:
|
||||
if last_status and last_status == "ok":
|
||||
stale.append(f"{name}(>24h未运行)")
|
||||
else:
|
||||
stale.append(f"{name}(>24h+状态异常)")
|
||||
if stale:
|
||||
return False, f"{len(stale)}个cron异常: {'; '.join(stale[:5])}"
|
||||
total = sum(1 for j in data.get("jobs",[]) if j.get("enabled") and j.get("script"))
|
||||
return True, f"全部{total}个cron正常"
|
||||
except Exception as e:
|
||||
return True, f"skip({str(e)[:60]})"
|
||||
|
||||
|
||||
def check_meta_health_check_yesterday():
|
||||
"""元检:昨天体检是否正常完成"""
|
||||
try:
|
||||
history = []
|
||||
if HISTORY_PATH.exists():
|
||||
history = json.loads(HISTORY_PATH.read_text())
|
||||
yesterday = (ctx["started_at"] - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
for h in history[-30:]:
|
||||
ts = h.get("timestamp", "")
|
||||
if ts[:10] == yesterday:
|
||||
if h.get("error", 0) == 0 and h.get("critical", 0) == 0:
|
||||
return True, f"昨日体检通过({h.get('ok',0)}项正常)"
|
||||
return True, f"昨日体检有{h.get('error',0)}错误+{h.get('critical',0)}严重(已记录)"
|
||||
return True, "无昨日记录(首次运行)"
|
||||
except:
|
||||
return True, "skip"
|
||||
|
||||
|
||||
def check_meta_checklist_completeness():
|
||||
"""元检:检查清单是否覆盖了所有已知组件"""
|
||||
try:
|
||||
added = ctx.get("auto_discovered_items", [])
|
||||
if added:
|
||||
return True, f"自动发现并追加了{len(added)}个新组件到清单"
|
||||
return True, "清单覆盖完整"
|
||||
except:
|
||||
return True, "skip"
|
||||
|
||||
|
||||
# ── 自动发现 ──
|
||||
def self_discovery():
|
||||
"""自动发现新增组件并更新checklist"""
|
||||
discovered = []
|
||||
@@ -536,6 +598,12 @@ def run_check(item):
|
||||
ok, detail = check_cron_paused()
|
||||
elif check_spec == "delivery:origin_targets":
|
||||
ok, detail = check_delivery_targets()
|
||||
elif check_spec == "cron_audit:all":
|
||||
ok, detail = check_cron_audit()
|
||||
elif check_spec == "meta:health_check_yesterday":
|
||||
ok, detail = check_meta_health_check_yesterday()
|
||||
elif check_spec == "meta:checklist_completeness":
|
||||
ok, detail = check_meta_checklist_completeness()
|
||||
elif check_spec == "pipeline:xiaoguo_signal_flow":
|
||||
# 综合检查:小果有数据→被我处理
|
||||
today_xiaoguo, d1 = check_db_table_count("signal_news", "created_at", None, "today", 0)
|
||||
|
||||
Reference in New Issue
Block a user