cleanup: 历史记录从JSON迁移到DB(health_check_log表)
- 移除health_check_history.json → health_check_log表 - 移除health_known_issues.json(未使用) - 移除TODO_PATH(已用DB替代) - 更新文档注释
This commit is contained in:
@@ -2,16 +2,18 @@
|
||||
"""morning_health_check.py — MoFin 系统常规体检
|
||||
|
||||
每日开盘前(8:00)运行,全面扫描MoFin所有组件健康状况。
|
||||
当前8类48项(清单自动扩展)。
|
||||
输出格式化的体检报告,有问题才出声,没问题静默。
|
||||
|
||||
核心设计:
|
||||
1. 从 health_checklist.json 读检查清单
|
||||
2. 逐项检查,记录状态
|
||||
3. 报告异常项(只推异常,不推正常)
|
||||
4. 自动发现新增cron/脚本(通过 self_discovery 函数)
|
||||
5. 维护检查历史 (health_check_history.json)
|
||||
从 health_checklist.json 读检查清单
|
||||
逐项检查,记录状态
|
||||
报告异常项(只推异常,不推正常)
|
||||
自动发现新增cron/脚本(通过 self_discovery 函数)
|
||||
维护检查历史 (health_check_log 表)
|
||||
自动修复可修问题,不可修写TODO
|
||||
|
||||
新增组件自动发现机制:
|
||||
新增组件自动发现机制:
|
||||
- 对比当前cron list与checklist中记录的cron id
|
||||
- 发现新cron → 自动追加到checklist
|
||||
- 脚本修改 → 标记"需复核"
|
||||
@@ -34,10 +36,8 @@ DATA = BASE / "data"
|
||||
SCRIPTS_DIR = BASE / "scripts"
|
||||
PROFILE_SCRIPTS = Path("/home/hmo/.hermes/profiles/position-analyst/scripts")
|
||||
CHECKLIST_PATH = DATA / "health_checklist.json"
|
||||
HISTORY_PATH = DATA / "health_check_history.json"
|
||||
DB_PATH = DATA / "mofin.db"
|
||||
HERMES_CRON_DIR = Path("/home/hmo/.hermes/profiles/position-analyst/cron")
|
||||
TODO_PATH = Path("/home/hmo/.hermes/profiles/position-analyst/todo.json")
|
||||
|
||||
|
||||
def derive_fix_action(detail, msg):
|
||||
@@ -191,9 +191,6 @@ def write_todos_for_issues():
|
||||
except Exception as e:
|
||||
pass # TODO 写入失败不阻碍体检主流程
|
||||
|
||||
# 异常缓存(同一问题24h内不重复推)
|
||||
KNOWN_ISSUES_PATH = DATA / "health_known_issues.json"
|
||||
|
||||
# ── 上下文 ──
|
||||
ctx = {
|
||||
"report": [],
|
||||
@@ -479,16 +476,18 @@ def check_cron_audit():
|
||||
def check_meta_health_check_yesterday():
|
||||
"""元检:昨天体检是否正常完成"""
|
||||
try:
|
||||
history = []
|
||||
if HISTORY_PATH.exists():
|
||||
history = json.loads(HISTORY_PATH.read_text())
|
||||
conn = sqlite3.connect(str(DB_PATH))
|
||||
yesterday = (ctx["started_at"] - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
for h in history[-30:]:
|
||||
ts = h.get("timestamp", "")
|
||||
if ts[:10] == yesterday:
|
||||
if h.get("error", 0) == 0 and h.get("critical", 0) == 0:
|
||||
return True, f"昨日体检通过({h.get('ok',0)}项正常)"
|
||||
return True, f"昨日体检有{h.get('error',0)}错误+{h.get('critical',0)}严重(已记录)"
|
||||
row = conn.execute(
|
||||
"SELECT ok_count, error_count, critical_count FROM health_check_log "
|
||||
"WHERE date(created_at) = ? ORDER BY created_at DESC LIMIT 1",
|
||||
(yesterday,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
if row:
|
||||
if row[1] == 0 and row[2] == 0:
|
||||
return True, f"昨日体检通过({row[0]}项正常)"
|
||||
return True, f"昨日体检有{row[1]}错误+{row[2]}严重(已记录)"
|
||||
return True, "无昨日记录(首次运行)"
|
||||
except:
|
||||
return True, "skip"
|
||||
@@ -707,20 +706,16 @@ def main():
|
||||
|
||||
report = "\n".join(lines)
|
||||
|
||||
# 保存历史
|
||||
# 保存历史到DB
|
||||
try:
|
||||
history = []
|
||||
if HISTORY_PATH.exists():
|
||||
history = json.loads(HISTORY_PATH.read_text())
|
||||
history.append({
|
||||
"timestamp": ctx["started_at"].isoformat(),
|
||||
"ok": ok_count, "warn": warns, "error": errors, "critical": critical,
|
||||
"duration_s": round(time.time() - start_time, 1)
|
||||
})
|
||||
# 保留最近30天
|
||||
if len(history) > 90:
|
||||
history = history[-90:]
|
||||
HISTORY_PATH.write_text(json.dumps(history, ensure_ascii=False, indent=2))
|
||||
conn_hist = sqlite3.connect(str(DB_PATH))
|
||||
details = json.dumps([e for e in ctx["report"] if e["level"] in ("critical", "error")])
|
||||
conn_hist.execute(
|
||||
"INSERT INTO health_check_log (ok_count, warn_count, error_count, critical_count, duration_s, details) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(ok_count, warns, errors, critical, round(time.time()-start_time, 1), details))
|
||||
conn_hist.commit()
|
||||
conn_hist.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user