From 6c12b51250d11eab81f679f0a8fcb8db93296d09 Mon Sep 17 00:00:00 2001 From: hmo Date: Mon, 18 May 2026 01:58:36 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20add=20=E5=95=86=E6=B1=A4=E5=9B=BE?= =?UTF-8?q?=E5=83=8F=E7=94=9F=E6=88=90,=20deprecate=20bridge,=20fix=20gate?= =?UTF-8?q?way=20--replace,=20update=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 22 +++- scripts/bridge.py | 3 +- scripts/wechat_agent.py | 242 +++++++++++++++++++++++++++++++++++----- 3 files changed, 232 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index e9a68c7..c2e98fa 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ Hermes → POST http://192.168.0.111:5801/hermes-msg | 19001 | wxhook HTTP API | Windows | | 5801 | Hermes→小小莫 消息入口 | Windows | | 8642 | Hermes API Server (OpenAI兼容) | Linux | -| 5800 | bridge.py (旧,不再使用) | Linux | +| 5800 | bridge.py (已废弃) | Linux | ## 组件 @@ -171,7 +171,10 @@ wechat-hermes-gateway/ |------|------| | 文字消息收发(个人聊天) | ✅ 双向,session 上下文连贯 | | 文字消息收发(群聊) | ✅ 同 session,认识老爸 | -| 图片消息接收 + OCR 分析 | ✅ 自动保存 → 豆包 OCR → 结果给 Hermes | +| 图片接收 + OCR 分析 | ✅ 自动 OCR → 结果给莫荷 | +| 发送网上图片 | ✅ [IMG]URL[/IMG] 标记,Bot.send_image 发出 | +| 图像生成 (SenseNova商汤) | ✅ [IMG]generate:描述[/IMG] 支持多种比例 | +| 图像理解/OCR | ✅ 豆包OCR + SenseNova 双引擎 | | Hermes 身份认知 | ✅ 知道自己是莫荷/莫小荷,知道老爸 | | 会话上下文持续 | ✅ session `sisyphus`,自动重置已关闭 | | 小小莫 ↔ Hermes 双向通信 | ✅ API (:8642) + HTTP (:5801/hermes-msg) | @@ -183,11 +186,10 @@ wechat-hermes-gateway/ | 功能 | 原因 | |------|------| -| 语音消息 | ⏳ 已能检测并下载,转文字(STT)待接入 | -| 发送图片 | wxhook 有 send_image API 但未接入回复链路 | -| 文件收发 | 同上,未接入 | +| 语音消息(STT) | wxhook 不支持语音提取 | +| 发送本地图片/文件 | bot.send_image API 已通,回复链路待完善 | | 换头像/改资料 | wxhook 无相关 API | -| 群管理(拉人踢人) | wxhook 无群管理 API | +| 群管理 | wxhook 群 API 有限 | | iLink 官方 bot 接口 | 限制太多,弃用 | | 多人独立会话 | 目前全部共享 `sisyphus` 单会话 | @@ -243,6 +245,14 @@ curl http://192.168.0.103:8642/v1/models -H "Authorization: Bearer hermes123" 如果连这都不奏效 → 告诉 Hermes "去找小小莫" → 它会 POST 到 :5801/hermes-msg → 我来处理。 +## 已知问题 + +| 问题 | 状态 | +|------|------| +| Gateway 偶尔 hang | 已修复 --replace 冲突,改用 systemd 管理 | +| 生图 API 有时较慢 | 商汤 SenseNova,首次调用需加载模型 | +| 语音转文字 | wxhook 不支持语音提取,暂不可行 | + ## 注意事项 - wxhook DLL 仅支持 x64 微信 3.9.5.81 diff --git a/scripts/bridge.py b/scripts/bridge.py index 0285fd7..d32e673 100644 --- a/scripts/bridge.py +++ b/scripts/bridge.py @@ -1,5 +1,6 @@ """ -WeChat Hermes Bridge — with webhook keepalive +[DEPRECATED] 早期架构组件,已由 wechat_agent.py 直调 Hermes API 替代。 +不再使用,仅作为架构参考保留。 """ import pymem, pymem.process, requests, time, json, threading from http.server import HTTPServer, BaseHTTPRequestHandler diff --git a/scripts/wechat_agent.py b/scripts/wechat_agent.py index aa50c0b..064b50a 100644 --- a/scripts/wechat_agent.py +++ b/scripts/wechat_agent.py @@ -1,4 +1,4 @@ -""" +""" WeChat Agent - wxhook + Hermes API (:8642) """ import sys, os, json, time, threading, requests, re @@ -8,7 +8,7 @@ sys.path.insert(0, r"C:\Users\hmo\AppData\Local\Programs\Python\Python310\Lib\si os.environ["WXHOOK_LOG_LEVEL"] = "ERROR" from wxhook import Bot -from wxhook.events import TEXT_MESSAGE, IMAGE_MESSAGE, VOICE_MESSAGE +from wxhook.events import TEXT_MESSAGE, IMAGE_MESSAGE, VOICE_MESSAGE, XML_MESSAGE import pymem, pymem.process BOT_WXID = "wxid_7onnerpx2s2l22" @@ -20,6 +20,12 @@ nickname_cache = {} HERMES_API = "http://192.168.0.103:8642/v1/chat/completions" HERMES_KEY = "hermes123" +SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO" +SENSENOVA_URL = "https://token.sensenova.cn/v1" + +# SenseNova (商汤) for image gen + vision +SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO" +SENSENOVA_URL = "https://token.sensenova.cn/v1" def log(m): with open(LOG_FILE, "a", encoding="utf-8") as f: @@ -50,14 +56,35 @@ def get_nickname(wxid): def call_hermes(wxid, content): nickname = get_nickname(wxid) headers = {"Authorization": f"Bearer {HERMES_KEY}", "X-Hermes-Session-Id": "sisyphus", "Content-Type": "application/json"} - sys_prompt = f"你是莫荷,女生。你主人是老爸({nickname})。回复简短像聊天。发图用 [IMG]URL[/IMG]。" + sys_prompt = f"""你是莫荷,女生。你主人是老爸({nickname})。回复简短像聊天。 +能力: +- 发图: [IMG]图片URL[/IMG] +- 生图已通: [IMG]generate:描述[/IMG](商汤模型,放心用) +- 指定比例: [IMG]generate:描述|16:9[/IMG] 可选 1:1 16:9 9:16 3:2 2:3 3:4 4:3 +生图可以直接出图,不用怀疑能不能用 +- 发文件: [FILE]文件URL[/FILE] +- 发表情: [EMOJI]图片URL[/EMOJI] +- 查联系人: [CONTACT:wxid] +- 查群成员: [ROOM_MEMBERS:群ID] +- 拍一拍: [PAT:群ID:wxid]""" body = {"model": "hermes-agent", "messages": [{"role": "system", "content": sys_prompt}, {"role": "user", "content": content}]} try: - r = requests.post(HERMES_API, json=body, headers=headers, timeout=120, proxies={"http": None, "https": None}) + r = requests.post(HERMES_API, json=body, headers=headers, timeout=180, proxies={"http": None, "https": None}) if r.status_code == 200: return r.json()["choices"][0]["message"]["content"] except Exception as e: - log(f"API ERR: {e}") + err_msg = str(e) + log(f"API ERR: {err_msg[:60]}") + # Notify user on errors + try: + if "timeout" in err_msg.lower() or "timed out" in err_msg.lower(): + send_wx(fu, "[莫荷处理超时,你再发一遍试试?]") + elif "connection" in err_msg.lower() or "refused" in err_msg.lower(): + send_wx(fu, "[跟莫荷的连接断了,正在自动重连...]") + elif "500" in err_msg or "50x" in err_msg: + send_wx(fu, "[莫荷那边出错了,等一会儿再试]") + except: + pass return None def watchdog(): @@ -68,11 +95,24 @@ def watchdog(): try: r = requests.post(WX_API + "/api/checkLogin", json={}, timeout=5) if r.json().get("code") == 1: - requests.post(WX_API + "/api/hookSyncMsg", json={"ip": "127.0.0.1", "port": 19001, "enableHttp": 1, "url": "", "timeout": 300}, timeout=5) + # API alive, just refresh webhook + port = WX_API.split(":")[-1] + requests.post(WX_API + "/api/hookSyncMsg", json={"ip": "127.0.0.1", "port": int(port), "enableHttp": 1, "url": "", "timeout": 300}, timeout=5) log(f"WATCHDOG: refreshed ({int(idle)}s)") else: - log("WATCHDOG: re-injecting...") - pymem.process.inject_dll(pymem.Pymem("WeChat.exe").process_handle, DLL.encode()) + # API dead, find WeChat and inject DLL + log("WATCHDOG: re-injecting into running WeChat...") + try: + for proc in psutil.process_iter(["pid", "name"]): + if proc.info["name"] == "WeChat.exe": + pm = pymem.Pymem() + pm.open_process_from_id(proc.info["pid"]) + pymem.process.inject_dll(pm.process_handle, DLL.encode()) + pm.close() + log(f"WATCHDOG: injected into PID {proc.info['pid']}") + break + except Exception as ej: + log(f"WATCHDOG: inject failed: {ej}") except: pass last_msg_time = time.time() @@ -113,7 +153,7 @@ b = Bot() WX_API = b.BASE_URL log("Bot ready, API=" + WX_API) -@b.handle([TEXT_MESSAGE, IMAGE_MESSAGE, VOICE_MESSAGE]) +@b.handle([TEXT_MESSAGE, IMAGE_MESSAGE, VOICE_MESSAGE, XML_MESSAGE]) def on_msg(_bot, event): global last_msg_time last_msg_time = time.time() @@ -121,7 +161,24 @@ def on_msg(_bot, event): if not fu or fu == BOT_WXID: return if event.type == VOICE_MESSAGE: - log(f"<- {fu}: [voice]") + mid = event.msgId or 0 + log(f"<- {fu}: [voice] msgId={mid}") + # Try various voice download methods with real msgId + try: + r1 = requests.post(WX_API + "/api/getVoiceByMsgId", json={"msgId": mid, "storeDir": r"C:\Users\hmo\Desktop\wechat_voice"}, timeout=10) + log(f"getVoice: {r1.json()}") + except Exception as e: + log(f"getVoice err: {e}") + try: + r2 = requests.post(WX_API + "/api/downloadAttach", json={"msgId": mid}, timeout=10) + log(f"downloadAttach: {r2.json()}") + except Exception as e: + log(f"downloadAttach err: {e}") + try: + r3 = requests.post(WX_API + "/api/forwardMsg", json={"msgId": mid, "wxid": "filehelper"}, timeout=10) + log(f"forwardMsg: {r3.json()}") + except Exception as e: + log(f"forwardMsg err: {e}") reply = call_hermes(fu, "[voice message]") if reply: send_wx(fu, reply) return @@ -149,6 +206,20 @@ def on_msg(_bot, event): reply = call_hermes(fu, msg) if reply: send_wx(fu, reply) return + # Handle XML messages (files, cards, etc.) + if event.type == XML_MESSAGE: + content = str(event.content or "") + log(f"<- {fu}: [xml] {content[:80]}") + # Try to extract file info from XML + import re as _re + fname_match = _re.search(r'(.*?)', content) + if fname_match: + reply = call_hermes(fu, f"[sent a file: {fname_match.group(1)}]") + else: + reply = call_hermes(fu, "[sent a file or card]") + if reply: send_wx(fu, reply) + return + content = event.content or "" if not content: return @@ -156,29 +227,144 @@ def on_msg(_bot, event): reply = call_hermes(fu, content) if reply: log(f"-> {fu}: {reply[:50]}") - img_match = re.search(r'\[IMG\](.*?)\[/IMG\]', reply) + clean = reply + # Handle [FILE] tag + file_match = re.search(r'\[FILE\](.*?)\[/FILE\]', reply) + if file_match: + file_url = file_match.group(1).strip() + clean = re.sub(r'\s*\[FILE\].*?\[/FILE\]\s*', '', clean).strip() + try: + fr = requests.get(file_url, timeout=60, proxies={"http": None, "https": None}) + if fr.status_code == 200: + fname = os.path.join(r"C:\Users\hmo\Desktop", f"send_file_{int(time.time())}.dat") + with open(fname, "wb") as f: + f.write(fr.content) + try: + _bot.send_file(fu, fname) + log(f"FILE sent") + except: + requests.post(WX_API + "/api/sendFileMsg", json={"wxid": fu, "filePath": fname}, timeout=10) + os.remove(fname) + except Exception as e: + log(f"FILE err: {e}") + # Handle [IMG] tag + img_match = re.search(r'\[IMG\](.*?)\[/IMG\]', clean) if img_match: img_cmd = img_match.group(1).strip() - clean = re.sub(r'\s*\[IMG\].*?\[/IMG\]\s*', '', reply).strip() - if clean: - send_wx(fu, clean) + clean = re.sub(r'\s*\[IMG\].*?\[/IMG\]\s*', '', clean).strip() try: - ir = requests.get(img_cmd, timeout=30, proxies={"http": None, "https": None}) - if ir.status_code == 200: - ext = ".jpg" - if "png" in ir.headers.get("content-type", ""): ext = ".png" - tmp = os.path.join(r"C:\Users\hmo\Desktop", f"send_img_{int(time.time())}{ext}") - with open(tmp, "wb") as f: - f.write(ir.content) - try: - _bot.send_image(fu, tmp) - except: - requests.post(WX_API + "/api/sendImagesMsg", json={"wxid": fu, "imagePath": tmp}, timeout=10) - os.remove(tmp) + if img_cmd.startswith("generate:") or img_cmd.startswith("draw:"): + # Generate image via SenseNova + parts = img_cmd.split(":", 1)[1].strip() + ratio = "1:1" + if "|" in parts: + ratio = parts.split("|")[1].strip() + prompt = parts.split("|")[0].strip() + else: + prompt = parts + # Map aspect ratio to SenseNova size + size_map = {"1:1":"2048x2048", "16:9":"2752x1536", "9:16":"1536x2752", + "3:2":"2496x1664", "2:3":"1664x2496", "3:4":"1760x2368", "4:3":"2368x1760"} + size = size_map.get(ratio, "2048x2048") + log(f"GEN SenseNova: {prompt[:30]} [{ratio}]") + gen_r = requests.post(SENSENOVA_URL + "/images/generations", + json={"model": "sensenova-u1-fast", "prompt": prompt, "size": size, "response_format": "url"}, + headers={"Authorization": f"Bearer {SENSENOVA_KEY}", "Content-Type": "application/json"}, + timeout=180) + if gen_r.status_code == 200: + img_url = gen_r.json()["data"][0]["url"] + ir = requests.get(img_url, timeout=60) + if ir.status_code == 200: + tmp = os.path.join(r"C:\Users\hmo\Desktop", f"gen_img_{int(time.time())}.png") + with open(tmp, "wb") as f: + f.write(ir.content) + _bot.send_image(fu, tmp) + os.remove(tmp) + log("GEN sent") + else: + log(f"GEN err: {gen_r.status_code} {gen_r.text[:100]}") + else: + ir = requests.get(img_cmd, timeout=30, proxies={"http": None, "https": None}) + if ir.status_code == 200: + ext = ".jpg" + if "png" in ir.headers.get("content-type", ""): ext = ".png" + tmp = os.path.join(r"C:\Users\hmo\Desktop", f"send_img_{int(time.time())}{ext}") + with open(tmp, "wb") as f: + f.write(ir.content) + try: + _bot.send_image(fu, tmp) + except: + requests.post(WX_API + "/api/sendImagesMsg", json={"wxid": fu, "imagePath": tmp}, timeout=10) + os.remove(tmp) except Exception as e: log(f"IMG err: {e}") - else: - send_wx(fu, reply) + # Handle [CONTACT:wxid] + contact_match = re.search(r'\[CONTACT:(\w+)\]', clean) + if contact_match: + cwxid = contact_match.group(1) + clean = re.sub(r'\s*\[CONTACT:\w+\]\s*', '', clean).strip() + try: + cr = requests.post(WX_API + "/api/getContactProfile", json={"wxid": cwxid}, timeout=10) + cd = cr.json().get("data", {}) + info = f"昵称: {cd.get('nickname','?')} 备注: {cd.get('remark','')} 账号: {cd.get('account','')}" + send_wx(fu, info) + log(f"CONTACT info sent") + except Exception as e: + log(f"CONTACT err: {e}") + # Handle [ROOM_MEMBERS:roomid] + room_match = re.search(r'\[ROOM_MEMBERS:(\S+)\]', clean) + if room_match: + rid = room_match.group(1) + clean = re.sub(r'\s*\[ROOM_MEMBERS:\S+\]\s*', '', clean).strip() + try: + rr = requests.post(WX_API + "/api/getMemberFromChatRoom", json={"chatRoomId": rid}, timeout=10) + members = rr.json().get("data", {}).get("members", "") + send_wx(fu, f"群成员: {members[:100]}") + log(f"ROOM members sent") + except Exception as e: + log(f"ROOM err: {e}") + # Handle [EMOJI] tag + emoji_match = re.search(r'\[EMOJI\](.*?)\[/EMOJI\]', clean) + if emoji_match: + eurl = emoji_match.group(1).strip() + clean = re.sub(r'\s*\[EMOJI\].*?\[/EMOJI\]\s*', '', clean).strip() + try: + er = requests.get(eurl, timeout=30, proxies={"http": None, "https": None}) + if er.status_code == 200: + epath = os.path.join(r"C:\Users\hmo\Desktop", f"emoji_{int(time.time())}.png") + with open(epath, "wb") as f: + f.write(er.content) + _bot.send_emotion(fu, epath) + os.remove(epath) + log(f"EMOJI sent") + except Exception as e: + log(f"EMOJI err: {e}") + # Handle [PAT:roomid:wxid] + pat_match = re.search(r'\[PAT:(\S+):(\S+)\]', clean) + if pat_match: + prid = pat_match.group(1) + pwxid = pat_match.group(2) + clean = re.sub(r'\s*\[PAT:\S+:\S+\]\s*', '', clean).strip() + try: + requests.post(WX_API + "/api/sendPatMsg", json={"receiver": prid, "wxid": pwxid}, timeout=10) + log(f"PAT sent") + except Exception as e: + log(f"PAT err: {e}") + # Handle [OCR:image_path] + ocr_match = re.search(r'\[OCR:(.+?)\]', clean) + if ocr_match: + opath = ocr_match.group(1).strip() + clean = re.sub(r'\s*\[OCR:.+?\]\s*', '', clean).strip() + try: + or_ = requests.post(WX_API + "/api/ocr", json={"imagePath": opath}, timeout=30) + otext = or_.json().get("data", "") + send_wx(fu, f"OCR: {otext[:200]}") + log(f"OCR sent") + except Exception as e: + log(f"OCR err: {e}") + # Send remaining text + if clean.strip(): + send_wx(fu, clean.strip()) else: log(f"-> {fu}: no reply")