fix: health_check now queries /health endpoint before log analysis
If xmpp_connected=false, force restart immediately instead of waiting 10 minutes for message timeout. This catches the most common failure mode: process alive but XMPP dead.
This commit is contained in:
@@ -112,6 +112,25 @@ def main():
|
|||||||
wlog("=== end ===")
|
wlog("=== end ===")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# ── 0. Quick HTTP health check ──────────────────────────
|
||||||
|
# Check /health endpoint: if xmpp_connected=false, restart immediately.
|
||||||
|
# This catches "process alive but XMPP dead" — the most common failure mode.
|
||||||
|
try:
|
||||||
|
import urllib.request, json
|
||||||
|
req = urllib.request.Request("http://127.0.0.1:5802/health")
|
||||||
|
resp = json.loads(urllib.request.urlopen(req, timeout=5).read())
|
||||||
|
if not resp.get("xmpp_connected", True):
|
||||||
|
wlog("CRIT: xmpp_connected=false via /health — forcing restart")
|
||||||
|
kill_all_bots()
|
||||||
|
time.sleep(3)
|
||||||
|
start_bot()
|
||||||
|
wlog("=== end (restarted: XMPP disconnected) ===")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
wlog(f"WARN: /health check failed: {e}")
|
||||||
|
# If /health is unreachable, the bot process might be dead or stuck
|
||||||
|
pass
|
||||||
|
|
||||||
# Activity analysis
|
# Activity analysis
|
||||||
recent = parse_log_tail(BOT_LOG, 100)
|
recent = parse_log_tail(BOT_LOG, 100)
|
||||||
alive = sum(1 for l in recent if "alive" in l)
|
alive = sum(1 for l in recent if "alive" in l)
|
||||||
|
|||||||
Reference in New Issue
Block a user