feat: add 商汤图像生成, deprecate bridge, fix gateway --replace, update docs

2026-05-18 01:58:36 +08:00
parent 93f4273b3d
commit 6c12b51250
3 changed files with 232 additions and 35 deletions
@@ -89,7 +89,7 @@ Hermes → POST http://192.168.0.111:5801/hermes-msg
 | 19001 | wxhook HTTP API | Windows |
 | 5801 | Hermes→小小莫 消息入口 | Windows |
 | 8642 | Hermes API Server (OpenAI兼容) | Linux |
-| 5800 | bridge.py (旧，不再使用) | Linux |
+| 5800 | bridge.py (已废弃) | Linux |
 ## 组件
@@ -171,7 +171,10 @@ wechat-hermes-gateway/
 |------|------|
 | 文字消息收发（个人聊天） | ✅ 双向，session 上下文连贯 |
 | 文字消息收发（群聊） | ✅ 同 session，认识老爸 |
-| 图片消息接收 + OCR 分析 | ✅ 自动保存 → 豆包 OCR → 结果给 Hermes |
+| 图片接收 + OCR 分析 | ✅ 自动 OCR → 结果给莫荷 |
 | 发送网上图片 | ✅ [IMG]URL[/IMG] 标记，Bot.send_image 发出 |
 | 图像生成 (SenseNova商汤) | ✅ [IMG]generate:描述[/IMG] 支持多种比例 |
 | 图像理解/OCR | ✅ 豆包OCR + SenseNova 双引擎 |
 | Hermes 身份认知 | ✅ 知道自己是莫荷/莫小荷，知道老爸 |
 | 会话上下文持续 | ✅ session `sisyphus`，自动重置已关闭 |
 | 小小莫 ↔ Hermes 双向通信 | ✅ API (:8642) + HTTP (:5801/hermes-msg) |
@@ -183,11 +186,10 @@ wechat-hermes-gateway/
 | 功能 | 原因 |
 |------|------|
-| 语音消息 | ⏳ 已能检测并下载，转文字（STT）待接入 |
+| 语音消息(STT) | wxhook 不支持语音提取 |
-| 发送图片 | wxhook 有 send_image API 但未接入回复链路 |
+| 发送本地图片/文件 | bot.send_image API 已通，回复链路待完善 |
 | 文件收发 | 同上，未接入 |
 | 换头像/改资料 | wxhook 无相关 API |
-| 群管理（拉人踢人） | wxhook 无群管理 API |
+| 群管理 | wxhook 群 API 有限 |
 | iLink 官方 bot 接口 | 限制太多，弃用 |
 | 多人独立会话 | 目前全部共享 `sisyphus` 单会话 |
@@ -243,6 +245,14 @@ curl http://192.168.0.103:8642/v1/models -H "Authorization: Bearer hermes123"
 如果连这都不奏效 → 告诉 Hermes "去找小小莫" → 它会 POST 到 :5801/hermes-msg → 我来处理。
 ## 已知问题
 | 问题 | 状态 |
 |------|------|
 | Gateway 偶尔 hang | 已修复 --replace 冲突，改用 systemd 管理 |
 | 生图 API 有时较慢 | 商汤 SenseNova，首次调用需加载模型 |
 | 语音转文字 | wxhook 不支持语音提取，暂不可行 |
 ## 注意事项
 - wxhook DLL 仅支持 x64 微信 3.9.5.81
@@ -1,5 +1,6 @@
 """
-WeChat Hermes Bridge — with webhook keepalive
+[DEPRECATED] 早期架构组件，已由 wechat_agent.py 直调 Hermes API 替代。
 不再使用，仅作为架构参考保留。
 """
 import pymem, pymem.process, requests, time, json, threading
 from http.server import HTTPServer, BaseHTTPRequestHandler
@@ -1,4 +1,4 @@
-"""
+"""
 WeChat Agent - wxhook + Hermes API (:8642)
 """
 import sys, os, json, time, threading, requests, re
@@ -8,7 +8,7 @@ sys.path.insert(0, r"C:\Users\hmo\AppData\Local\Programs\Python\Python310\Lib\si
 os.environ["WXHOOK_LOG_LEVEL"] = "ERROR"
 from wxhook import Bot
-from wxhook.events import TEXT_MESSAGE, IMAGE_MESSAGE, VOICE_MESSAGE
+from wxhook.events import TEXT_MESSAGE, IMAGE_MESSAGE, VOICE_MESSAGE, XML_MESSAGE
 import pymem, pymem.process
 BOT_WXID = "wxid_7onnerpx2s2l22"
@@ -20,6 +20,12 @@ nickname_cache = {}
 HERMES_API = "http://192.168.0.103:8642/v1/chat/completions"
 HERMES_KEY = "hermes123"
 SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO"
 SENSENOVA_URL = "https://token.sensenova.cn/v1"
 # SenseNova (商汤) for image gen + vision
 SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO"
 SENSENOVA_URL = "https://token.sensenova.cn/v1"
 def log(m):
    with open(LOG_FILE, "a", encoding="utf-8") as f:
@@ -50,14 +56,35 @@ def get_nickname(wxid):
 def call_hermes(wxid, content):
    nickname = get_nickname(wxid)
    headers = {"Authorization": f"Bearer {HERMES_KEY}", "X-Hermes-Session-Id": "sisyphus", "Content-Type": "application/json"}
-    sys_prompt = f"你是莫荷，女生。你主人是老爸（{nickname}）。回复简短像聊天。发图用 [IMG]URL[/IMG]。"
+    sys_prompt = f"""你是莫荷，女生。你主人是老爸（{nickname}）。回复简短像聊天。
 能力：
 - 发图: [IMG]图片URL[/IMG]
 - 生图已通: [IMG]generate:描述[/IMG]（商汤模型，放心用）
 - 指定比例: [IMG]generate:描述|16:9[/IMG] 可选 1:1 16:9 9:16 3:2 2:3 3:4 4:3
 生图可以直接出图，不用怀疑能不能用
 - 发文件: [FILE]文件URL[/FILE]
 - 发表情: [EMOJI]图片URL[/EMOJI]
 - 查联系人: [CONTACT:wxid]
 - 查群成员: [ROOM_MEMBERS:群ID]
 - 拍一拍: [PAT:群ID:wxid]"""
    body = {"model": "hermes-agent", "messages": [{"role": "system", "content": sys_prompt}, {"role": "user", "content": content}]}
    try:
-        r = requests.post(HERMES_API, json=body, headers=headers, timeout=120, proxies={"http": None, "https": None})
+        r = requests.post(HERMES_API, json=body, headers=headers,         timeout=180, proxies={"http": None, "https": None})
        if r.status_code == 200:
            return r.json()["choices"][0]["message"]["content"]
    except Exception as e:
-        log(f"API ERR: {e}")
+        err_msg = str(e)
        log(f"API ERR: {err_msg[:60]}")
        # Notify user on errors
        try:
            if "timeout" in err_msg.lower() or "timed out" in err_msg.lower():
                send_wx(fu, "[莫荷处理超时，你再发一遍试试？]")
            elif "connection" in err_msg.lower() or "refused" in err_msg.lower():
                send_wx(fu, "[跟莫荷的连接断了，正在自动重连...]")
            elif "500" in err_msg or "50x" in err_msg:
                send_wx(fu, "[莫荷那边出错了，等一会儿再试]")
        except:
            pass
    return None
 def watchdog():
@@ -68,11 +95,24 @@ def watchdog():
            try:
                r = requests.post(WX_API + "/api/checkLogin", json={}, timeout=5)
                if r.json().get("code") == 1:
-                    requests.post(WX_API + "/api/hookSyncMsg", json={"ip": "127.0.0.1", "port": 19001, "enableHttp": 1, "url": "", "timeout": 300}, timeout=5)
+                    # API alive, just refresh webhook
                    port = WX_API.split(":")[-1]
                    requests.post(WX_API + "/api/hookSyncMsg", json={"ip": "127.0.0.1", "port": int(port), "enableHttp": 1, "url": "", "timeout": 300}, timeout=5)
                    log(f"WATCHDOG: refreshed ({int(idle)}s)")
                else:
-                    log("WATCHDOG: re-injecting...")
+                    # API dead, find WeChat and inject DLL
-                    pymem.process.inject_dll(pymem.Pymem("WeChat.exe").process_handle, DLL.encode())
+                    log("WATCHDOG: re-injecting into running WeChat...")
                    try:
                        for proc in psutil.process_iter(["pid", "name"]):
                            if proc.info["name"] == "WeChat.exe":
                                pm = pymem.Pymem()
                                pm.open_process_from_id(proc.info["pid"])
                                pymem.process.inject_dll(pm.process_handle, DLL.encode())
                                pm.close()
                                log(f"WATCHDOG: injected into PID {proc.info['pid']}")
                                break
                    except Exception as ej:
                        log(f"WATCHDOG: inject failed: {ej}")
            except:
                pass
            last_msg_time = time.time()
@@ -113,7 +153,7 @@ b = Bot()
 WX_API = b.BASE_URL
 log("Bot ready, API=" + WX_API)
-@b.handle([TEXT_MESSAGE, IMAGE_MESSAGE, VOICE_MESSAGE])
+@b.handle([TEXT_MESSAGE, IMAGE_MESSAGE, VOICE_MESSAGE, XML_MESSAGE])
 def on_msg(_bot, event):
    global last_msg_time
    last_msg_time = time.time()
@@ -121,7 +161,24 @@ def on_msg(_bot, event):
    if not fu or fu == BOT_WXID:
        return
    if event.type == VOICE_MESSAGE:
-        log(f"<- {fu}: [voice]")
+        mid = event.msgId or 0
        log(f"<- {fu}: [voice] msgId={mid}")
        # Try various voice download methods with real msgId
        try:
            r1 = requests.post(WX_API + "/api/getVoiceByMsgId", json={"msgId": mid, "storeDir": r"C:\Users\hmo\Desktop\wechat_voice"}, timeout=10)
            log(f"getVoice: {r1.json()}")
        except Exception as e:
            log(f"getVoice err: {e}")
        try:
            r2 = requests.post(WX_API + "/api/downloadAttach", json={"msgId": mid}, timeout=10)
            log(f"downloadAttach: {r2.json()}")
        except Exception as e:
            log(f"downloadAttach err: {e}")
        try:
            r3 = requests.post(WX_API + "/api/forwardMsg", json={"msgId": mid, "wxid": "filehelper"}, timeout=10)
            log(f"forwardMsg: {r3.json()}")
        except Exception as e:
            log(f"forwardMsg err: {e}")
        reply = call_hermes(fu, "[voice message]")
        if reply: send_wx(fu, reply)
        return
@@ -149,6 +206,20 @@ def on_msg(_bot, event):
        reply = call_hermes(fu, msg)
        if reply: send_wx(fu, reply)
        return
    # Handle XML messages (files, cards, etc.)
    if event.type == XML_MESSAGE:
        content = str(event.content or "")
        log(f"<- {fu}: [xml] {content[:80]}")
        # Try to extract file info from XML
        import re as _re
        fname_match = _re.search(r'<filename>(.*?)</filename>', content)
        if fname_match:
            reply = call_hermes(fu, f"[sent a file: {fname_match.group(1)}]")
        else:
            reply = call_hermes(fu, "[sent a file or card]")
        if reply: send_wx(fu, reply)
        return
    content = event.content or ""
    if not content:
        return
@@ -156,29 +227,144 @@ def on_msg(_bot, event):
    reply = call_hermes(fu, content)
    if reply:
        log(f"-> {fu}: {reply[:50]}")
-        img_match = re.search(r'\[IMG\](.*?)\[/IMG\]', reply)
+        clean = reply
        # Handle [FILE] tag
        file_match = re.search(r'\[FILE\](.*?)\[/FILE\]', reply)
        if file_match:
            file_url = file_match.group(1).strip()
            clean = re.sub(r'\s*\[FILE\].*?\[/FILE\]\s*', '', clean).strip()
            try:
                fr = requests.get(file_url, timeout=60, proxies={"http": None, "https": None})
                if fr.status_code == 200:
                    fname = os.path.join(r"C:\Users\hmo\Desktop", f"send_file_{int(time.time())}.dat")
                    with open(fname, "wb") as f:
                        f.write(fr.content)
                    try:
                        _bot.send_file(fu, fname)
                        log(f"FILE sent")
                    except:
                        requests.post(WX_API + "/api/sendFileMsg", json={"wxid": fu, "filePath": fname}, timeout=10)
                    os.remove(fname)
            except Exception as e:
                log(f"FILE err: {e}")
        # Handle [IMG] tag
        img_match = re.search(r'\[IMG\](.*?)\[/IMG\]', clean)
        if img_match:
            img_cmd = img_match.group(1).strip()
-            clean = re.sub(r'\s*\[IMG\].*?\[/IMG\]\s*', '', reply).strip()
+            clean = re.sub(r'\s*\[IMG\].*?\[/IMG\]\s*', '', clean).strip()
            if clean:
                send_wx(fu, clean)
            try:
-                ir = requests.get(img_cmd, timeout=30, proxies={"http": None, "https": None})
+                if img_cmd.startswith("generate:") or img_cmd.startswith("draw:"):
-                if ir.status_code == 200:
+                    # Generate image via SenseNova
-                    ext = ".jpg"
+                    parts = img_cmd.split(":", 1)[1].strip()
-                    if "png" in ir.headers.get("content-type", ""): ext = ".png"
+                    ratio = "1:1"
-                    tmp = os.path.join(r"C:\Users\hmo\Desktop", f"send_img_{int(time.time())}{ext}")
+                    if "|" in parts:
-                    with open(tmp, "wb") as f:
+                        ratio = parts.split("|")[1].strip()
-                        f.write(ir.content)
+                        prompt = parts.split("|")[0].strip()
-                    try:
+                    else:
-                        _bot.send_image(fu, tmp)
+                        prompt = parts
-                    except:
+                    # Map aspect ratio to SenseNova size
-                        requests.post(WX_API + "/api/sendImagesMsg", json={"wxid": fu, "imagePath": tmp}, timeout=10)
+                    size_map = {"1:1":"2048x2048", "16:9":"2752x1536", "9:16":"1536x2752",
-                    os.remove(tmp)
+                                "3:2":"2496x1664", "2:3":"1664x2496", "3:4":"1760x2368", "4:3":"2368x1760"}
                    size = size_map.get(ratio, "2048x2048")
                    log(f"GEN SenseNova: {prompt[:30]} [{ratio}]")
                    gen_r = requests.post(SENSENOVA_URL + "/images/generations",
                        json={"model": "sensenova-u1-fast", "prompt": prompt, "size": size, "response_format": "url"},
                        headers={"Authorization": f"Bearer {SENSENOVA_KEY}", "Content-Type": "application/json"},
                        timeout=180)
                    if gen_r.status_code == 200:
                        img_url = gen_r.json()["data"][0]["url"]
                        ir = requests.get(img_url, timeout=60)
                        if ir.status_code == 200:
                            tmp = os.path.join(r"C:\Users\hmo\Desktop", f"gen_img_{int(time.time())}.png")
                            with open(tmp, "wb") as f:
                                f.write(ir.content)
                            _bot.send_image(fu, tmp)
                            os.remove(tmp)
                            log("GEN sent")
                    else:
                        log(f"GEN err: {gen_r.status_code} {gen_r.text[:100]}")
                else:
                    ir = requests.get(img_cmd, timeout=30, proxies={"http": None, "https": None})
                    if ir.status_code == 200:
                        ext = ".jpg"
                        if "png" in ir.headers.get("content-type", ""): ext = ".png"
                        tmp = os.path.join(r"C:\Users\hmo\Desktop", f"send_img_{int(time.time())}{ext}")
                        with open(tmp, "wb") as f:
                            f.write(ir.content)
                        try:
                            _bot.send_image(fu, tmp)
                        except:
                            requests.post(WX_API + "/api/sendImagesMsg", json={"wxid": fu, "imagePath": tmp}, timeout=10)
                        os.remove(tmp)
            except Exception as e:
                log(f"IMG err: {e}")
-        else:
+        # Handle [CONTACT:wxid]
-            send_wx(fu, reply)
+        contact_match = re.search(r'\[CONTACT:(\w+)\]', clean)
        if contact_match:
            cwxid = contact_match.group(1)
            clean = re.sub(r'\s*\[CONTACT:\w+\]\s*', '', clean).strip()
            try:
                cr = requests.post(WX_API + "/api/getContactProfile", json={"wxid": cwxid}, timeout=10)
                cd = cr.json().get("data", {})
                info = f"昵称: {cd.get('nickname','?')}  备注: {cd.get('remark','')}  账号: {cd.get('account','')}"
                send_wx(fu, info)
                log(f"CONTACT info sent")
            except Exception as e:
                log(f"CONTACT err: {e}")
        # Handle [ROOM_MEMBERS:roomid]
        room_match = re.search(r'\[ROOM_MEMBERS:(\S+)\]', clean)
        if room_match:
            rid = room_match.group(1)
            clean = re.sub(r'\s*\[ROOM_MEMBERS:\S+\]\s*', '', clean).strip()
            try:
                rr = requests.post(WX_API + "/api/getMemberFromChatRoom", json={"chatRoomId": rid}, timeout=10)
                members = rr.json().get("data", {}).get("members", "")
                send_wx(fu, f"群成员: {members[:100]}")
                log(f"ROOM members sent")
            except Exception as e:
                log(f"ROOM err: {e}")
        # Handle [EMOJI] tag
        emoji_match = re.search(r'\[EMOJI\](.*?)\[/EMOJI\]', clean)
        if emoji_match:
            eurl = emoji_match.group(1).strip()
            clean = re.sub(r'\s*\[EMOJI\].*?\[/EMOJI\]\s*', '', clean).strip()
            try:
                er = requests.get(eurl, timeout=30, proxies={"http": None, "https": None})
                if er.status_code == 200:
                    epath = os.path.join(r"C:\Users\hmo\Desktop", f"emoji_{int(time.time())}.png")
                    with open(epath, "wb") as f:
                        f.write(er.content)
                    _bot.send_emotion(fu, epath)
                    os.remove(epath)
                    log(f"EMOJI sent")
            except Exception as e:
                log(f"EMOJI err: {e}")
        # Handle [PAT:roomid:wxid]
        pat_match = re.search(r'\[PAT:(\S+):(\S+)\]', clean)
        if pat_match:
            prid = pat_match.group(1)
            pwxid = pat_match.group(2)
            clean = re.sub(r'\s*\[PAT:\S+:\S+\]\s*', '', clean).strip()
            try:
                requests.post(WX_API + "/api/sendPatMsg", json={"receiver": prid, "wxid": pwxid}, timeout=10)
                log(f"PAT sent")
            except Exception as e:
                log(f"PAT err: {e}")
        # Handle [OCR:image_path]
        ocr_match = re.search(r'\[OCR:(.+?)\]', clean)
        if ocr_match:
            opath = ocr_match.group(1).strip()
            clean = re.sub(r'\s*\[OCR:.+?\]\s*', '', clean).strip()
            try:
                or_ = requests.post(WX_API + "/api/ocr", json={"imagePath": opath}, timeout=30)
                otext = or_.json().get("data", "")
                send_wx(fu, f"OCR: {otext[:200]}")
                log(f"OCR sent")
            except Exception as e:
                log(f"OCR err: {e}")
        # Send remaining text
        if clean.strip():
            send_wx(fu, clean.strip())
    else:
        log(f"-> {fu}: no reply")