refactor(xxm): consolidate 4 bot implementations into unified xmpp_agent_core.py

- Merge bot_base.py, gateway/scripts/xmpp_bot.py, bots/*, xmpp_bot_rest.py into single xmpp_agent_core.py with --agent flag (xxm|mohe|zhiwei|xiaoguo) - Add xxm_bot.py wrapper (encoding=utf-8 for Windows exec) - Fix slixmpp connect() API: use host=/port= keyword args (was tuple) - Clean up orphans: bots/, scripts/, hermes_state.py, xmpp_bot.py, xmpp_bot_rest.py - Add docs/CLEANUP_PLAN.md documenting the migration - Update README.md project structure - Also: fix WeChat agent path resolution (relative paths)
2026-06-21 16:13:57 +08:00
parent b9df510f31
commit babbc46801
22 changed files with 1273 additions and 5442 deletions
@@ -66,9 +66,9 @@ def get_db_handle():
    dbs = r.get("data") or []
    for db in dbs:
        dbname = db.get("databaseName", "")
-        if "MSG" in dbname or "Msg" in dbname:
-            db_handle_cache = db.get("handle")
-            return db_handle_cache
+        if dbname.startswith("MSG") and "Media" not in dbname:
+                db_handle_cache = db.get("handle")
+                return db_handle_cache
    return None


@@ -94,24 +94,42 @@ def query_history(wxid, limit=10):
        return None
    limit_val = min(int(limit), 200)
    sql = (
-        f"SELECT CreateTime, IsSender, Type, SubType, StrContent, DisplayContent "
-        f"FROM MSG WHERE StrTalker='{wxid}' AND Type IN (1,49) "
+        f"SELECT CreateTime, IsSender, Type, SubType, StrContent, DisplayContent, CompressContent, BytesExtra "
+        f"FROM MSG WHERE StrTalker='{wxid}' AND Type IN ('1','49') "
        f"ORDER BY CreateTime DESC LIMIT {limit_val}"
    )
    r = wxpost("/api/execSql", {"dbHandle": h, "sql": sql}, timeout=15)
    data = r.get("data") or []
    if not data or len(data) < 2:
        return None
-    # Skip header row, reverse to chronological order
-    rows = data[1:]
+    # wxhelper returns [{value: [cols]}, {value: [row1]}, ...]
+    rows = [item.get("value", item) if isinstance(item, dict) else item for item in data]
+    rows = rows[1:]  # skip header
    rows.reverse()
    results = []
    for row in rows:
        content = (row[4] or "").strip() if len(row) > 4 else ""
        if not content and len(row) > 5:
            content = (row[5] or "").strip()
+        # Type 49 (article link): extract URL from CompressContent or BytesExtra
+        if not content and str(row[2]) == "49":
+            try:
+                import re
+                # Try BytesExtra first (row[7])
+                for idx in [7, 6]:
+                    if idx < len(row) and row[idx]:
+                        text = str(row[idx])
+                        urls = re.findall(r'https?://[^\s\x00-\x1f<>\"\']{10,}', text)
+                        if urls:
+                            content = urls[0]
+                            break
+            except:
+                pass
        if not content:
-            continue
+            if str(row[2]) == "49":
+                content = "[文章链接]"
+            else:
+                continue
        results.append({
            "CreateTime": row[0],
            "IsSender": row[1],
@@ -181,29 +199,25 @@ def get_recent_chats(limit=20):
    if not h:
        return []
    sql = (
-        f"SELECT StrTalker, MAX(CreateTime) as last_time, COUNT(*) as msg_count "
-        f"FROM MSG WHERE Type IN (1,49) "
-        f"GROUP BY StrTalker ORDER BY last_time DESC LIMIT {min(limit, 50)}"
+        f"SELECT DISTINCT StrTalker FROM MSG WHERE Type IN ('1','49') "
+        f"LIMIT {min(limit, 50)}"
    )
    r = wxpost("/api/execSql", {"dbHandle": h, "sql": sql}, timeout=15)
    data = r.get("data") or []
    if not data or len(data) < 2:
        return []
+    rows = [item.get("value", item) if isinstance(item, dict) else item for item in data]
    results = []
-    for row in data[1:]:
+    for row in rows[1:]:
        wxid = (row[0] or "").strip()
-        if not wxid or wxid in ("fmessage", "weixin", "wechat", "filehelper"):
+        if not wxid or wxid in ("fmessage", "weixin", "wechat", "filehelper", "medianote", "floatbottle", "qmessage"):
            continue
-        if wxid.startswith("gh_"):
-            continue
-        ts = int(row[1]) if row[1] else 0
-        count = int(row[2]) if len(row) > 2 and row[2] else 0
        results.append({
            "wxid": wxid,
            "nickname": get_nickname(wxid),
-            "last_message_time": datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") if ts else None,
-            "last_message_ts": ts,
-            "message_count": count,
+            "last_message_time": None,
+            "last_message_ts": 0,
+            "message_count": 0,
        })
    return results

@@ -21,7 +21,8 @@ if not _lock.ok:
 BOT_WXID = "wxid_5bhmquvkbude22"
 BLOCK_WXIDS = {"fmessage", "weixin", "wechat"}  # ϵͳ?˺?/΢???Ŷӣ----ظ?
 WX_API = "http://127.0.0.1:19088"
-PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
 LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
 TEMP_DIR = os.path.join(PROJECT_ROOT, "temp")
 LOG_FILE = os.path.join(LOG_DIR, "wechat_agent.log")
@@ -155,8 +156,8 @@ HERMES_KEY = "hermes123"
 SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO"
 SENSENOVA_URL = "https://token.sensenova.cn/v1"

-INJECTOR = r"D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway\tools\Injector_x64.exe"
-WXHELPER_DLL = r"D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway\tools\wxhelper_official_39581.dll"
+INJECTOR = os.path.join(SCRIPT_DIR, "..", "tools", "Injector_x64.exe")
+WXHELPER_DLL = os.path.join(SCRIPT_DIR, "..", "tools", "wxhelper_official_39581.dll")

 def log(m):
    with open(LOG_FILE, "a", encoding="utf-8") as f:
@@ -569,8 +570,24 @@ def process_msg(raw_data):
        ct = d.get("content", "") or d.get("msg", "") or d.get("text", "")
        msg_type = d.get("type", 1)
        is_self = d.get("isSelf", 0) or d.get("self", 0)
+        # DEBUG: capture Type 49 full XML for URL analysis
+        if msg_type == 49:
+            try:
+                with open(LOG_DIR + "/t49_xml.txt", "a", encoding="utf-8") as _f:
+                    _f.write(f"\n=== {time.time()} type=49 from={fu} ===\n{ct[:10000]}\n")
+            except: pass
        if "@chatroom" in fu:
            log(f"GROUP RAW DUMP: keys={list(d.keys())} ct_len={len(ct)} ct[:100]={ct[:100]}")
+            # DEBUG: capture full raw data for quote analysis
+            try:
+                with open(LOG_DIR + "/group_raw.jsonl", "a", encoding="utf-8") as _f:
+                    _f.write(json.dumps({k: str(v)[:2000] for k, v in d.items()}, ensure_ascii=False) + "\n")
+            except: pass
+        # DEBUG: capture all raw msgs for field analysis
+        try:
+            with open(LOG_DIR + "/all_raw.jsonl", "a", encoding="utf-8") as _f:
+                _f.write(json.dumps({k: str(v)[:500] for k, v in d.items()}, ensure_ascii=False) + "\n")
+        except: pass
        if not fu or not ct or fu == BOT_WXID or fu in BLOCK_WXIDS or fu.startswith("gh_") or is_self:
            log(f"SKIP: fu={fu} self={is_self}")
            return
@@ -608,6 +625,64 @@ def process_msg(raw_data):
            else:
                log(f"-> {fu}: skip (blank image response)")
            return
+        # Type 49 (forwarded article) - extract URL and process via article_processor
+        if msg_type == 49 and ct.strip().startswith("<?xml"):
+            try:
+                import re
+                # Try <url> first, then <shareUrlOriginal>, then <shareUrlOpen>
+                urls = re.findall(r'<url>(https?://mp\.weixin\.qq\.com[^<]+)</url>', ct)
+                if not urls:
+                    urls = re.findall(r'<shareUrlOriginal>(https?://mp\.weixin\.qq\.com[^<]+)</shareUrlOriginal>', ct)
+                if not urls:
+                    urls = re.findall(r'<shareUrlOpen>(https?://mp\.weixin\.qq\.com[^<]+)</shareUrlOpen>', ct)
+                url = urls[0] if urls else None
+                # Extract title from XML
+                titles = re.findall(r'<title>(.*?)</title>', ct)
+                title = titles[0] if titles else ""
+                # Extract description 
+                descs = re.findall(r'<des>(.*?)</des>', ct)
+                desc = descs[0] if descs else ""
+                
+                if url:
+                    log(f"ARTICLE URL: {url}")
+                    # Call article_processor on localhost
+                    import urllib.request as ur
+                    req = ur.Request("http://127.0.0.1:5810/process",
+                        data=json.dumps({"url": url}).encode("utf-8"),
+                        headers={"Content-Type": "application/json"})
+                    with ur.urlopen(req, timeout=180) as resp:
+                        result = json.loads(resp.read().decode("utf-8"))
+                    if result.get("status") == "ok":
+                        content = result.get("content", "")[:3000]
+                        title = result.get("title", "")
+                        images = result.get("images_ocr", 0)
+                        enriched = f"[老莫转发了一篇文章{(chr(10)+'标题: '+title) if title else ''}，{images}张图片已OCR]\n\n{content}"
+                        log(f"ARTICLE processed: {len(content)} chars")
+                        reply = call_hermes(fu, enriched)
+                        if reply and reply.strip():
+                            log(f"-> {fu}: {reply[:50]}")
+                            send_wx(fu, reply.strip())
+                        return
+                    else:
+                        log(f"ARTICLE process failed: {result.get('error','')[:100]}")
+                        # Fallback: send title + description
+                        fallback = f"[老莫转发了一篇文章]{(chr(10)+'标题: '+title) if title else ''}{(chr(10)+'摘要: '+desc[:200]) if desc else ''}\n(全文抓取失败: {result.get('error','')[:60]})"
+                        reply = call_hermes(fu, fallback)
+                        if reply and reply.strip():
+                            send_wx(fu, reply.strip())
+                        return
+                else:
+                    # No URL found, send title + description
+                    if title:
+                        log(f"ARTICLE: no URL, sending title+desc")
+                        fallback = f"[老莫转发了一篇文章]{(chr(10)+'标题: '+title) if title else ''}{(chr(10)+'摘要: '+desc[:200]) if desc else ''}"
+                        reply = call_hermes(fu, fallback)
+                        if reply and reply.strip():
+                            send_wx(fu, reply.strip())
+                        return
+            except Exception as e:
+                log(f"ARTICLE handler error: {e}")
+            # Fall through to text handler
        # Text - prepend sender wxid+name so Hermes knows who's talking
        sender_name = get_nickname(fu)
        chat_type = "Group" if "@chatroom" in fu else "Private"
@@ -26,8 +26,8 @@ if not _lock.ok:
 # ── Config ──
 JID = "xxm@yoin.fun"
 PASSWORD = "hermes123"
-SERVER = "xmpp.yoin.fun"
-PORT = 3021
+SERVER = "192.168.1.246"
+PORT = 5222
 ATTACH_SESSION = "ses_xxm_xmpp"
 MUC_ROOMS = [
    "coregroup@conference.yoin.fun",  # core group chat
@@ -696,23 +696,22 @@ if __name__ == "__main__":
            bot_nick = JID.split("@")[0]
            async def _join_silent():
                for room_jid in MUC_ROOMS:
-                    for attempt in range(3):
-                        try:
-                            # Use join_muc_wait to ensure room join completes
-                            await self.plugin['xep_0045'].join_muc_wait(room_jid, bot_nick, timeout=60)
-                            log(f"Joined {room_jid} (silent)")
-                            break
-                        except asyncio.TimeoutError:
-                            log(f"MUC join timeout ({attempt+1}/3) for {room_jid}")
-                            if attempt == 2:
-                                log(f"MUC setup failed for {room_jid} after 3 attempts")
-                                await asyncio.sleep(5)
-                            else:
-                                await asyncio.sleep(3)
-                        except Exception as e:
-                            log(f"MUC setup failed for {room_jid}: {e} (type={type(e).__name__})")
-                            await asyncio.sleep(5)
-                            break
+                    nick = bot_nick
+                    try:
+                        # Use join_muc (non-waiting) to register plugin state
+                        self.plugin['xep_0045'].join_muc(room_jid, nick)
+                        # Also send raw presence as backup
+                        presence = (
+                            f"<presence to='{room_jid}/{nick}'>"
+                            f"<x xmlns='http://jabber.org/protocol/muc'>"
+                            f"<history maxstanzas='0'/>"
+                            f"</x></presence>"
+                        )
+                        self.send_raw(presence)
+                        log(f"Joined {room_jid} (async)")
+                    except Exception as e:
+                        log(f"MUC join failed for {room_jid}: {type(e).__name__}: {e}")
+                    await asyncio.sleep(2)
                # After joining, query MAM for recent history
                await asyncio.sleep(3)  # wait for MUC join to propagate
                await _fetch_mam_history()