""" xmpp_watchdog.py — monitors xmpp_bot, auto-restarts on crash, reports status. Runs alongside xmpp_bot.py as a separate process. """ import os, sys, time, subprocess, json, threading PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) BOT_SCRIPT = os.path.join(PROJECT_ROOT, "xmpp_bot.py") LOG_DIR = os.path.join(os.path.dirname(PROJECT_ROOT), "logs") WATCHDOG_LOG = os.path.join(LOG_DIR, "watchdog.log") PID_FILE = os.path.join(os.path.dirname(PROJECT_ROOT), "temp", ".xmpp_watchdog.pid") BOT_PID_FILE = os.path.join(os.path.dirname(PROJECT_ROOT), "temp", ".xmpp_bot.pid") PYTHON = r"C:\Users\hmo\AppData\Local\Programs\Python\Python310\python.exe" CHECK_INTERVAL = 30 # seconds between health checks os.makedirs(LOG_DIR, exist_ok=True) def wlog(msg: str): ts = time.strftime("%Y-%m-%d %H:%M:%S") with open(WATCHDOG_LOG, "a", encoding="utf-8") as f: f.write(f"{ts} [watchdog] {msg}\n") print(f"[watchdog] {msg}", flush=True) def rotate_log(path: str, max_bytes: int = 5 * 1024 * 1024): """Rotate log file if it exceeds max_bytes. Keeps last 3 backups.""" try: if os.path.getsize(path) > max_bytes: # shift .2→.tmp, .1→.2, file→.1 bak2 = f"{path}.2" bak1 = f"{path}.1" if os.path.exists(bak2): os.remove(bak2) if os.path.exists(bak1): os.rename(bak1, bak2) os.rename(path, bak1) wlog(f"Rotated: {os.path.basename(path)}") except: pass def is_process_alive(pid: int) -> bool: """Check if a process with given PID is alive.""" try: proc = subprocess.run( ['tasklist', '/FI', f'PID eq {pid}', '/NH'], capture_output=True, text=True, timeout=5 ) return str(pid) in proc.stdout except: return False def kill_bot(): """Kill ALL existing xmpp_bot.py processes before starting a new one.""" killed = 0 try: r = subprocess.run( ['tasklist', '/FO', 'CSV', '/NH', '/FI', 'IMAGENAME eq python.exe'], capture_output=True, text=True, timeout=10 ) for line in r.stdout.splitlines(): parts = line.strip('"').split('","') if len(parts) >= 2 and parts[0] == 'python.exe': pid_str = parts[1].strip() try: wmi = subprocess.run( ['wmic', 'process', 'where', f'ProcessId={pid_str}', 'get', 'CommandLine', '/format:list'], capture_output=True, text=True, timeout=5 ) if 'xmpp_bot' in wmi.stdout and 'watchdog' not in wmi.stdout: subprocess.run(['taskkill', '/f', '/pid', pid_str], capture_output=True, timeout=5) killed += 1 wlog(f"Killed old bot (PID {pid_str})") except: pass except: pass if killed > 0: time.sleep(3) # wait for process cleanup def start_bot() -> int: """Start xmpp_bot.py and return its PID. Kills old instances first.""" kill_bot() wlog("Starting xmpp_bot...") proc = subprocess.Popen( [PYTHON, BOT_SCRIPT], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, creationflags=subprocess.CREATE_NO_WINDOW ) pid = proc.pid with open(BOT_PID_FILE, "w") as f: f.write(str(pid)) wlog(f"xmpp_bot started (PID {pid})") return pid def get_last_log_activity() -> float: """Get timestamp of last xmpp_bot.log modification.""" log_file = os.path.join(LOG_DIR, "xmpp_bot.log") try: return os.path.getmtime(log_file) except: return 0 def health_check(bot_pid: int, last_activity: float) -> tuple[bool, int, float]: """ Check bot health. Returns (is_alive, pid, last_activity). If dead, restarts bot. """ alive = is_process_alive(bot_pid) if not alive: wlog(f"Bot PID {bot_pid} is DEAD. Restarting...") bot_pid = start_bot() time.sleep(5) last_activity = get_last_log_activity() return (True, bot_pid, last_activity) # Check if bot has been active recently (last 5 minutes) current_activity = get_last_log_activity() if current_activity > last_activity: last_activity = current_activity # If no activity for 5 minutes but bot is alive, warn if time.time() - last_activity > 300: wlog(f"WARNING: Bot PID {bot_pid} alive but no activity for 5+ min") return (True, bot_pid, last_activity) if __name__ == "__main__": wlog("Watchdog started") # Start bot if not already running bot_pid = 0 if os.path.exists(BOT_PID_FILE): try: with open(BOT_PID_FILE) as f: bot_pid = int(f.read().strip()) if not is_process_alive(bot_pid): bot_pid = 0 except: bot_pid = 0 if bot_pid == 0: bot_pid = start_bot() last_activity = get_last_log_activity() wlog(f"Initial: bot PID {bot_pid}, log last activity: {time.ctime(last_activity)}") log_rotate_counter = 0 # Main monitoring loop while True: time.sleep(CHECK_INTERVAL) alive, bot_pid, last_activity = health_check(bot_pid, last_activity) # Log rotation (every 30 checks ≈ 15 min) log_rotate_counter += 1 if log_rotate_counter >= 30: log_rotate_counter = 0 bot_log = os.path.join(LOG_DIR, "xmpp_bot.log") bridge_log = os.path.join(LOG_DIR, "bridge.log") rotate_log(bot_log) rotate_log(bridge_log) # Every 5 minutes, report status if int(time.time()) % 300 < CHECK_INTERVAL: alive_str = "ALIVE" if alive else "RESTARTED" wlog(f"Status: bot PID {bot_pid} [{alive_str}]")