diff --git a/.gitignore b/.gitignore index 60c7edd..8311c92 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,12 @@ Thumbs.db .idea/ # Logs +logs/ *.log + +# Temp +temp/ +*.dat + +# Large installer +WeChatSetup-*.exe diff --git a/README.md b/README.md index f0b7a4b..a8d8997 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Windows 微信机器人 ↔ Linux Hermes AI,全自动双向聊天。 │ Windows 192.168.0.111 │ │ │ │ ┌──────────────────┐ ┌───────────────────────────┐ │ -│ │ 微信 3.9.10.19 x64 │ │ 日常微信 WeChatAppEx 4.x │ │ +│ │ 微信 3.9.5.81 x64 │ │ 日常微信 WeChatAppEx 4.x │ │ │ │ 机器人号 modachen │ │ 老爸日常使用,互不干扰 │ │ │ │ wxhelper DLL 注入 │ └───────────────────────────┘ │ │ └────────┬─────────┘ │ @@ -23,6 +23,8 @@ Windows 微信机器人 ↔ Linux Hermes AI,全自动双向聊天。 │ │ → POST Hermes API :8642 │ ← sisyphus session │ │ │ ← 收回复 → wxhelper 发 │ │ │ │ HTTP :19088 收发消息 │ │ +│ │ 图片:downloadAttach │ ← 全尺寸原图 CDN 下载 │ +│ │ 图片:decodeImage │ ← 解密 .dat → OCR 识别 │ │ │ 看门狗自愈 │ │ │ └────────┬────────────────┘ │ └───────────┼─────────────────────────────────────────────────┘ @@ -92,13 +94,16 @@ Windows 微信机器人 ↔ Linux Hermes AI,全自动双向聊天。 → 老爸手机收到 ``` -### 图片消息 +### 图片消息(全尺寸 OCR) ``` 老爸发图片 - → WeChat 收到 → wxhelper 图片事件 - → wechat_agent.py 保存图片 → 调豆包OCR (VolcEngine) - → OCR 文字结果 + 通知 → POST Hermes API + → WeChat 收到 → wxhelper TCP 推送 (type=3, 含 msgId) + → wechat_agent.py 提取 msgId + → downloadAttach API → 从 CDN 下载 1.4MB+ 全尺寸原图 + → decodeImage API → 解密 .dat 加密文件 → JPEG + → VolcEngine doubao-seed-code OCR → 完整文字提取 + → OCR 结果 → POST Hermes API → Hermes 知道图片内容 → 回复老爸 ``` @@ -130,9 +135,11 @@ Hermes → POST http://192.168.0.111:5801/hermes-msg ## 组件 ### Windows 端(wechat_agent.py v2) -- **wxhelper DLL 注入** — ttttupup/wxhelper 3.9.10.19 x64 +- **wxhelper DLL 注入** — ttttupup/wxhelper 3.9.5.81 (官方 DLL, Injector_x64.exe 注入) - **TCP 接收消息** — :19099 收微信事件 - **HTTP 发送消息** — :19088 wxhelper API +- **全尺寸图片 OCR** — downloadAttach (CDN下载) + decodeImage (.dat解密) → VolcEngine OCR +- **空白响应过滤** — 空/白字符响应自动跳过,不发到微信群 - **Hermes API 调用** — 直接 POST :8642,session 固定 `sisyphus` - **回复服务** — 5801 端口收 Hermes 消息 - **看门狗** — 120s 无消息刷新 webhook;API 挂了才重注入 DLL @@ -148,20 +155,14 @@ Hermes → POST http://192.168.0.111:5801/hermes-msg ### Windows -```batch -cd D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway -scripts\start_bridge.bat -``` - -或直接: +使用 Python 3.10(Miniconda3 Python 3.13 的 encodings 模块损坏): ```powershell -$env:PYTHONHOME='' -Start-Process -WindowStyle Hidden python.exe scripts\wechat_agent.py +cd D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway +$python = "C:\Users\hmo\AppData\Local\Programs\Python\Python310\python.exe" +Start-Process -WindowStyle Hidden -FilePath $python -ArgumentList "scripts\wechat_agent.py" ``` -启动后需用修复过低工具扫码登录微信。 - ### Linux(如重启后) ```bash @@ -192,11 +193,20 @@ wechat-hermes-gateway/ │ └── history_api.py # History REST API :19001 ├── scripts/ │ ├── wechat_agent.py # 主力:微信机器人代理 -│ ├── start_bridge.bat # 微信桥接一键启动 -│ ├── start_history_api.bat # History API 一键启动 -│ ├── moho_view.py # 莫荷聊天记录查看器 -│ └── moho_chat.py # 莫荷聊天查看器(备选) -└── temp/ # 废弃/临时脚本 +│ └── start_history_api.bat # History API 一键启动 +├── tools/ +│ ├── Injector_x64.exe # DLL 注入器 (3.9.5.81) +│ ├── wxhelper_official_39581.dll # 官方 wxhelper 3.9.5.81 DLL +│ ├── WeChatSetup-3.9.5.81.exe # 微信 3.9.5.81 安装包 +│ ├── ConsoleApplication.exe # 旧注入器 (3.9.10.19 备份) +│ └── wxhelper_391019.dll # 旧 DLL 备份 +├── docs/ +│ ├── 通用架构-WeChat opencode 桥接.md +│ ├── 老莫消息路由设计.md +│ └── assets/ +│ └── architecture.png +├── logs/ # 运行时日志 +└── temp/ # 临时文件 (OCR 解码图等) ``` ## History REST API (:19001) @@ -345,6 +355,9 @@ curl -X POST http://localhost:19001/api/history -H "Content-Type: application/js 4. **session 自动重置** → 关闭 api_server 平台的重置策略 5. **群聊不认人** → session 固定 `sisyphus`,所有消息共享上下文 6. **Linux bridge 常挂** → 去掉 bridge.py,Windows 直接调 Hermes API +7. **3.9.10.19-v1 图片 API 不全** → 降级到 3.9.5.81,获得 downloadAttach + decodeImage 支持 +8. **ConsoleApplication.exe 注入器不兼容** → 改用 Injector_x64.exe(参数 `-n WeChat.exe -i dll_path`) +9. **缩略图 OCR 瞎编** → 全尺寸 downloadAttach → decodeImage → OCR,1.4MB 原图识别 1376 字符 ## 已实现的功能 @@ -352,14 +365,15 @@ curl -X POST http://localhost:19001/api/history -H "Content-Type: application/js |------|------| | 文字消息收发(个人聊天) | ✅ 双向,session 上下文连贯 | | 文字消息收发(群聊) | ✅ 同 session,认识老爸 | -| 图片接收 + OCR 分析 | ✅ 自动 OCR → 结果给莫荷 | +| 图片接收 + 全尺寸 OCR | ✅ downloadAttach → decodeImage → 1.4MB 原图 → 豆包OCR | | 发送网上图片 | ✅ [IMG]URL[/IMG] 标记,Bot.send_image 发出 | | 图像生成 (SenseNova商汤) | ✅ [IMG]generate:描述[/IMG] 支持多种比例 | -| 图像理解/OCR | ✅ 豆包OCR + SenseNova 双引擎 | +| 图像理解/OCR | ✅ 豆包 doubao-seed-code + 全尺寸原图,1376 字符实测通过 | +| 空白响应过滤 | ✅ 空/白字符响应自动跳过,不发微信群 | | Hermes 身份认知 | ✅ 知道自己是莫荷/莫小荷,知道老爸 | | 会话上下文持续 | ✅ session `sisyphus`,自动重置已关闭 | | 小小莫 ↔ Hermes 双向通信 | ✅ API (:8642) + HTTP (:5801/hermes-msg) | -| 看门狗自愈 | ✅ 120s 无消息刷新 webhook | +| 看门狗自愈 | ✅ 120s 无消息刷新 webhook,API 挂了自动重注入 DLL | | 昵称识别 | ✅ 从 getContactList 获取 | | 联系人列表查询 | ✅ wxhelper /api/getContactList | | 历史聊天记录查询 | ✅ [HISTORY:wxid:count] 标签 → MSG0.db SQL | @@ -386,10 +400,10 @@ curl -X POST http://localhost:19001/api/history -H "Content-Type: application/js → 启动 opencode serve(:4096,莫荷连接用) → 启动后台守护(自动清理僵尸连接) -2. 打开经典微信 3.9.10.19 +2. 打开经典微信 3.9.5.81(用修复过低工具扫码登录) → 扫码登录机器人号 modachenchen -3. wechat_agent.py 在登录后自动注入 DLL +3. wechat_agent.py 在登录后自动注入 wxhelper_official_39581.dll → 自动开始转发消息 → 日志在 projects/wechat-hermes-gateway/logs/ ``` @@ -435,9 +449,12 @@ curl http://192.168.0.103:8642/v1/models -H "Authorization: Bearer hermes123" ## 注意事项 -- wxhelper DLL 支持 3.9.10.19 x64 微信 +- wxhelper DLL 支持 3.9.5.81 x64 微信(使用 `wxhelper_official_39581.dll`) +- 注入器:使用 `Injector_x64.exe`(参数:`-n WeChat.exe -i dll_path`),不再是 ConsoleApplication.exe - 每次 WeChat 重启需重新登录 - 启动顺序:先开微信 → agent 自动注入 DLL - Hermes API 首次调用可能较慢(大模型冷启动) - 看门狗每 120s 刷新 webhook,API 挂了自动重注入 +- Python 请用 Python 3.10(Miniconda3 3.13 的 encodings 模块损坏) +- 全尺寸图片 OCR:依赖 `downloadAttach` + `decodeImage` API,仅 3.9.5.81+ 支持 - 如果微信登录后没反应,等 1-2 分钟看门狗会自动处理 diff --git a/docs/assets/architecture.png b/docs/assets/architecture.png new file mode 100644 index 0000000..ed3c9aa Binary files /dev/null and b/docs/assets/architecture.png differ diff --git a/docs/老莫消息路由设计.md b/docs/老莫消息路由设计.md new file mode 100644 index 0000000..018243c --- /dev/null +++ b/docs/老莫消息路由设计.md @@ -0,0 +1,153 @@ +# 老莫微信消息 → serve session 路由设计方案 + +> 2026-05-20 +> 目标:老莫给机器人号发微信,小小莫也能看到,不依赖莫荷转述 + +--- + +## 现状 + +``` +你微信 → wxhelper TCP (:19099) → wechat_agent.py + ↓ + Hermes API (:8642) + ↓ + 莫荷回复你 +``` + +莫荷独占所有微信消息。小小莫只能被动等她转述。 + +--- + +## 目标 + +``` +你微信 → wxhelper TCP → wechat_agent.py ─┬→ Hermes API → 莫荷 (不变) + └→ opencode serve (:4096) → 小小莫看到 +``` + +新增一条岔路:老莫的消息同时写入 serve session,小小莫可主动查看。 + +--- + +## 方案对比 + +### 方案 A:subprocess 调用 `opencode run --attach --message` + +**做法**:wechat_agent.py 的 `process_msg()` 里加一段: + +```python +def fork_to_session(fu, ct): + """将消息写入 opencode serve session (非阻塞)""" + if fu != "wxid_c0a6izmwd78y22": + return # 只转发老莫 + try: + import subprocess + subprocess.run( + ["opencode", "run", "--attach", "http://localhost:4096", + "--password", "hermes123", + "--session", SESSION_ID, + "--message", f"[老莫] {ct}"], + capture_output=True, timeout=10, + env={**os.environ, "PYTHONHOME": ""} + ) + except Exception as e: + log(f"FORK ERR (non-fatal): {e}") +``` + +| 方面 | 评估 | +|------|------| +| 复杂度 | ⭐ 低,~10 行代码 | +| 对现有链路影响 | ❌ **无**,fork 是独立线程,失败不影响 Hermes | +| session ID 稳定性 | ⚠️ serve 重启后 ses_xxx 会变 → 需想办法拿到当前 ID | +| 性能开销 | subprocess 每次约 1-2 秒,但独立线程不阻塞主流程 | +| serve 密码硬编码 | ⚠️ 已经在代码库(hermes123),无新增风险 | +| 可靠性 | subprocess 可能因 PATH/PYTHONHOME 问题失败 | + +### 方案 B:直接 HTTP POST 调用 serve API + +**做法**:抓包分析 `opencode run --attach` 的 HTTP 协议,直接用 `urllib.request` POST: + +```python +# 伪代码,serve API 协议未知,需逆向 +urllib.request.urlopen("http://localhost:4096/api/session/inject", + data=json.dumps({"session": SID, "message": "[老莫] xxx"})) +``` + +| 方面 | 评估 | +|------|------| +| 复杂度 | ⭐⭐⭐ 未知,需逆向 serve API | +| 性能 | ✅ 纯 HTTP,无 subprocess 开销 | +| 稳定性 | ⚠️ 非官方 API,版本更新可能不兼容 | + +### 方案 C:写 inbox 文件 + 小小莫轮询 + +**做法**:wechat_agent 写文件,小小莫定期读取 + +| 方面 | 评估 | +|------|------| +| 复杂度 | ⭐ 最低 | +| 即时性 | ❌ 需要轮询,无法实时 | + +--- + +## 关键风险 + +### 1. session ID 稳定性(最核心) + +`opencode run --attach` 需要 session ID。每次 serve 重启后,当前 TUI session 的 ID 可能变化: +- 如果 serve 重启 → 老 session 消失 → 新 session 新 ID → 需要更新 wechat_agent 里的配置 +- **解决思路**:用 session 名称而不是 ID,或每次启动时自动获取 + +### 2. 不对莫荷通信造成任何影响 + +**铁律**:fork 到 session 的代码必须: +- 在独立线程中运行 +- 捕获所有异常 +- 设置超时(≤10 秒) +- 永远不阻塞 `call_hermes()` 和 `send_wx()` + +### 3. 循环消息风暴 + +如果我不小心回了一条 `[老莫]` 到 session,wechat_agent 不能把它再 fork 一次。 +- 现有的 `is_self` 检查已经过滤自己发送的消息 +- 但如果 serve session 的消息被 serve 再推给 wechat_agent... 需要确认不会发生 + +### 4. `opencode` CLI 在 wechat_agent 环境中是否可用 + +wechat_agent 以 `$env:PYTHONHOME=''` 启动,`opencode.cmd` 可能也依赖 Python。 +- 需测试:从 Python subprocess 能否直接调用 `opencode run --attach --message` + +--- + +## 推荐方案 + +**方案 A**(subprocess)最稳妥: +1. 对现有链路零影响 +2. 改动最小 +3. 可以逐步优化(先 subprocess,后改 HTTP API) + +### 待确认事项 + +1. **serve session ID 如何维护?** + - 能否用固定名称?还是每次启动获取? + - 如果 serve 重启导致 ID 变了,wechat_agent 如何感知? + +2. **`opencode run --attach --message` 是否支持在 Python subprocess 中调用?** + - 需要验证 CLI 安装路径和调用方式 + +3. **是否需要前缀路由?** + - 是全部消息都 fork? + - 还是只有特定前缀(如 `[小小莫]` 开头的消息)才 fork? + +--- + +## 验证清单 + +实现后验证: +- [ ] 老莫发微信 → 莫荷正常回复(链路不变) +- [ ] 老莫发微信 → serve session 能看到消息(新增) +- [ ] `opencode run --attach` 超时/失败 → 莫荷通信不受影响 +- [ ] 老莫连续发多条 → 都能看到 +- [ ] wechat_agent 重启后依然工作 +- [ ] serve 重启后 session ID 变化时能自动适配 \ No newline at end of file diff --git a/docs/通用架构-WeChat opencode 桥接.md b/docs/通用架构-WeChat opencode 桥接.md new file mode 100644 index 0000000..e16c2af --- /dev/null +++ b/docs/通用架构-WeChat opencode 桥接.md @@ -0,0 +1,184 @@ +# 通用架构:WeChat ↔ opencode 双向桥接 + +> 2026-05-20 +> 剥离 Hermes AI 依赖,建立 opencode serve 与微信账号之间的通用双向通道 + +--- + +## 核心理念 + +微信机器人本质是一套通用的消息路由系统: + +``` +微信 ←→ wxhelper ←→ 桥接代理 ←→ 任何 AI / 程序 +``` + +**不应该是"莫荷专属"**。当前架构把 Hermes API 硬编码在桥接逻辑中,限制了通用性。 + +--- + +## 架构对比 + +### 当前(莫荷专用,双机) + +``` +你微信 → wxhelper TCP → wechat_agent → Hermes API (:8642, Linux) → 莫荷回复 + ↑ + 消息路由硬编码,改一处都要动 agent +``` + +### 目标(通用,纯 Windows) + +``` +你微信 → wxhelper TCP → Bridge Agent ─┬→ opencode serve session (小小莫看到) + │ + └→ HTTP API (:5801) 供任何程序消费 + ↑ + serve 里的 AI / 外部程序 POST 回复 +``` + +**关键变化**:桥接代理不再"替 AI 做决定",而是变成**中立的消息通道**: + +| 功能 | 现状 (wechat_agent.py) | 目标 (Bridge Agent) | +|------|----------------------|-------------------| +| 收到微信消息 | 直接 POST Hermes API | **写入 serve session + 提供 HTTP 消费接口** | +| 消息路由 | 硬编码 (call_hermes) | 无业务逻辑,只负责转发 | +| AI 是谁 | 只能是莫荷 (Hermes) | 可以是 serve 里的任何人(Sisyphus、莫荷...) | +| 外部调用 | :5801 简陋收消息 | http API 收消息 | + +--- + +## 通用架构图 + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Windows 192.168.0.111 │ +│ │ +│ ┌──────────────────────┐ │ +│ │ 微信 3.9.10.19 机器人 │ │ +│ │ (wxid_xxxxxxxxx) │ │ +│ └────────┬─────────────┘ │ +│ │ wxhelper TCP (:19099) → 收消息 │ +│ │ wxhelper HTTP (:19088) → 发消息 │ +│ ▼ │ +│ ┌────────────────────────────────────────────┐ │ +│ │ Bridge Agent (bridge.py v3) │ │ +│ │ │ │ +│ │ ┌────────────────────────────────────┐ │ │ +│ │ │ 消息收 (TCP thread) │ │ │ +│ │ │ 收到微信消息 → 写入 serve session │ │ opencode │ +│ │ │ → 触发 webhook(可选) │──┼──→ serve :4096 │ +│ │ └────────────────────────────────────┘ │ │ +│ │ │ │ +│ │ ┌────────────────────────────────────┐ │ │ +│ │ │ HTTP API 服务 (:5801) │ │ │ +│ │ │ POST /send → 发微信 │ │ │ +│ │ │ POST /history → 查历史 │ │ │ +│ │ │ POST /inject → 写 serve 会话 │ │ │ +│ │ └────────────────────────────────────┘ │ │ +│ └────────────────────────────────────────────┘ │ +│ │ ▲ │ +│ │ serve session 里的 AI │ HTTP POST /send │ +│ ▼ │ │ +│ ┌────────────────────────────────────┐ │ │ +│ │ opencode serve TUI │ │ │ +│ │ (Sisyphus / 任何 Agent) │──┘ │ +│ │ │ │ +│ │ 「老莫:今天吃了吗」← 从 session 看到 │ │ +│ │ 「回复:[xxm] 吃了」→ POST :5801 │ │ +│ └────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 数据流 + +### 微信消息 → AI(上行) + +``` +1. 老莫发微信给机器人号 +2. wxhelper DLL 通过 TCP (:19099) 推送给 Bridge Agent +3. Bridge Agent 收到消息,写入 serve session: + subprocess.run(["opencode", "run", "--attach", + "--message", "[老莫] 消息内容"]) +4. 同时,消息可通过 :5801 HTTP API 被任何订阅者消费 +5. serve session 里的 AI 在 TUI 或通过 session_search 看到 +``` + +### AI 回复 → 微信(下行) + +``` +1. AI 决定回复 → POST http://localhost:5801/send + {"to": "wxid_xxx", "message": "回复内容"} +2. Bridge Agent 收到 → wxpost /api/sendTextMsg +3. wxhelper DLL 发送 → 老莫手机收到 +``` + +--- + +## Bridge Agent 接口规范 + +### HTTP API (:5801) + +| 方法 | 路径 | 用途 | Body | +|------|------|------|------| +| POST | `/send` | 发微信消息 | `{"to":"wxid","message":"text"}` | +| POST | `/history` | 查聊天记录 | `{"wxid":"...","count":20}` | +| POST | `/recent` | 最近联系人 | 无 | +| POST | `/inject` | 写 serve session | `{"message":"[xxm] 内容"}` | +| GET | `/health` | 健康检查 | 无 | + +### 输出到 serve session 的格式 + +``` +[老莫] 消息内容 → 来自微信的普通消息 +[老莫|昵称] 消息内容 → 带昵称 +[系统] 新联系人 xxx → 系统事件 +[session:] 启动签到 → Agent 上线通知 +``` + +--- + +## 与 Hermes AI 的关系 + +Hermes 不再是架构的核心组件,而是可选的消息消费者之一: + +``` + ┌─→ opencode serve session → Sisyphus (小小莫) +老莫微信 → Bridge ──┤ + └─→ 消费 HTTP API 的任意程序 + │ + ┌─────┴─────┐ + │ │ + Hermes AI 其他 AI + (莫荷) (未来) +``` + +**迁就策略**: +1. 第一阶段:Bridge Agent 同时写 serve session + POST Hermes API(现状保留) +2. 第二阶段:Bridge Agent 只写 serve session,Hermes 通过 serve session 接入 +3. 第三阶段:完全通用化,Hermes 只是 serve session 里的一个 AI 角色 + +--- + +## 实施原则 + +1. **不破坏现有链路** — Bridge Agent 改造期间,Hermes 消息路由不变 +2. **增量迁移** — 先加新功能,再逐步替换旧逻辑 +3. **session 为主** — 所有消息以 serve session 为中心,HTTP API 为辅助 +4. **最低依赖** — 纯 Windows 可运行,不需要 Linux 端 Hermes + +--- + +## 开放问题 + +1. **session ID 管理**:Bridge Agent 如何知道当前有效的 session ID? +2. **session write 方式**:subprocess (`opencode run --attach --message`) 是否有更轻量的替代? +3. **消息去重**:写 session + POST Hermes 可能导致重复处理? +4. **serve 重启恢复**:Bridge Agent 如何在 serve 重启后自动重连? +5. **历史消息**:AI 上线后能否拉取 session 中已有的消息历史? + +--- + +*参考:projects/wechat-hermes-gateway/docs/老莫消息路由设计.md* \ No newline at end of file diff --git a/scripts/start_agent.bat b/scripts/start_agent.bat new file mode 100644 index 0000000..bf30a26 --- /dev/null +++ b/scripts/start_agent.bat @@ -0,0 +1,39 @@ +@echo off +title WeChat Agent + +set PROJECT_DIR=D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway +set TOOLS_DIR=%PROJECT_DIR%\tools +set PYTHONW=C:\Users\hmo\AppData\Local\Programs\Python\Python310\pythonw.exe +set INJECTOR=%TOOLS_DIR%\Injector_x64.exe +set DLL=%TOOLS_DIR%\wxhelper_official_39581.dll +set LOG=%PROJECT_DIR%\logs\startup.log + +echo [1/4] Waiting for WeChat... +:wait_wechat +tasklist /fi "imagename eq WeChat.exe" 2>nul | find /i "WeChat.exe" >nul +if errorlevel 1 ( + timeout /t 2 /nobreak >nul + goto wait_wechat +) +echo [2/4] WeChat started, checking wxhelper... + +curl -s -m 3 -X POST http://127.0.0.1:19088/api/checkLogin -H "Content-Type: application/json" -d "{}" 2>nul | find "code" >nul +if not errorlevel 1 ( + echo [3/4] wxhelper OK, skipping inject + goto start_agent +) + +echo [3/4] Injecting wxhelper... +%INJECTOR% -n WeChat.exe -i "%DLL%" >> "%LOG%" 2>&1 + +echo [3/4] Waiting for wxhelper HTTP... +:wait_wxhelper +timeout /t 2 /nobreak >nul +curl -s -m 3 -X POST http://127.0.0.1:19088/api/checkLogin -H "Content-Type: application/json" -d "{}" 2>nul | find "code" >nul +if errorlevel 1 goto wait_wxhelper + +:start_agent +echo [4/4] Clearing cache and starting agent... +if exist "%PROJECT_DIR%\scripts\__pycache__" rmdir /s /q "%PROJECT_DIR%\scripts\__pycache__" +start "" "%PYTHONW%" "%PROJECT_DIR%\scripts\wechat_agent.py" +echo Done. diff --git a/scripts/wechat_agent.py b/scripts/wechat_agent.py index 8499570..4755791 100644 --- a/scripts/wechat_agent.py +++ b/scripts/wechat_agent.py @@ -1,14 +1,14 @@ -""" +""" WeChat Agent v2 - wxhelper DLL + Hermes API (:8642) """ -import os, json, time, threading, requests, re, socketserver, subprocess, urllib.request, urllib.error +import os, json, time, threading, requests, re, socketserver, subprocess, urllib.request, urllib.error, base64 os.environ["no_proxy"] = "*" os.environ["NO_PROXY"] = "*" from http.server import HTTPServer, BaseHTTPRequestHandler from urllib.parse import urlparse, parse_qs BOT_WXID = "wxid_7onnerpx2s2l22" -BLOCK_WXIDS = {"fmessage", "weixin", "wechat"} # 系统账号/微信团队,不回复 +BLOCK_WXIDS = {"fmessage", "weixin", "wechat"} # ϵͳ�˺�/΢���Ŷӣ����ظ� WX_API = "http://127.0.0.1:19088" PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) LOG_DIR = os.path.join(PROJECT_ROOT, "logs") @@ -26,8 +26,8 @@ HERMES_KEY = "hermes123" SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO" SENSENOVA_URL = "https://token.sensenova.cn/v1" -INJECTOR = r"D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway\tools\ConsoleApplication.exe" -WXHELPER_DLL = r"D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway\tools\wxhelper_391019.dll" +INJECTOR = r"D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway\tools\Injector_x64.exe" +WXHELPER_DLL = r"D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway\tools\wxhelper_official_39581.dll" def log(m): with open(LOG_FILE, "a", encoding="utf-8") as f: @@ -44,7 +44,7 @@ def wxpost(path, data=None, timeout=10): log(f"WX ERR: {e}") return {"code": -1} -# ── History Query (via MSG table in MSG*.db databases) ── +# ���� History Query (via MSG table in MSG*.db databases) ���� def get_db_handle(): """Get handle for database containing MSG table. Cached after first call.""" global db_handle_cache @@ -78,7 +78,7 @@ def get_db_handle(): return None # Message type labels -MSG_TYPES = {1: "文字", 3: "图片", 34: "语音", 43: "视频", 47: "表情", 49: "链接", 10000: "系统", 10002: "红包"} +MSG_TYPES = {1: "����", 3: "ͼƬ", 34: "����", 43: "��Ƶ", 47: "����", 49: "����", 10000: "ϵͳ", 10002: "���"} def query_history(wxid, limit=10): """Query historical text messages with a contact from MSG table.""" @@ -110,7 +110,7 @@ def format_history(wxid, rows): """Format MSG rows into readable chat history text.""" sender_name = get_nickname(wxid) bot_name = get_nickname(BOT_WXID) - lines = [f"📜 最近与 {sender_name} 的聊天记录 ({len(rows)}条):"] + lines = [f"?? ����� {sender_name} �������¼ ({len(rows)}��):"] for row in rows: ts = int(row.get("CreateTime", 0)) time_str = time.strftime("%m/%d %H:%M", time.localtime(ts)) if ts else "?" @@ -121,7 +121,7 @@ def format_history(wxid, rows): who = bot_name if is_sender else sender_name # Format content if msg_type == 49: - content = f"[链接] {content[:60]}" + content = f"[����] {content[:60]}" else: content = content[:200] lines.append(f"[{time_str}] {who}: {content}") @@ -133,10 +133,10 @@ def handle_history(wxid, count): rows = query_history(wxid, count) if rows: return format_history(wxid, rows) - return f"暂无与 {get_nickname(wxid)} 的聊天记录" + return f"������ {get_nickname(wxid)} �������¼" except Exception as e: log(f"History ERR: {e}") - return "查询历史记录失败" + return "��ѯ��ʷ��¼ʧ��" def handle_history_json(wxid, count): """Query history and return JSON-serializable dict for HTTP API.""" @@ -176,7 +176,7 @@ def handle_history_json(wxid, count): def send_wx(wxid, msg): # Strip weixin:// URLs that WeChat interprets as commands import re as _re2 - msg = _re2.sub(r'weixin://[^\s]+', '[链接已过滤]', msg) + msg = _re2.sub(r'weixin://[^\s]+', '[�����ѹ���]', msg) r = wxpost("/api/sendTextMsg", {"wxid": wxid, "msg": msg}) log(f"SEND {wxid}: {r.get('msg','')}") @@ -195,7 +195,7 @@ def get_nickname(wxid): def call_hermes(wxid, content): nickname = get_nickname(wxid) headers = {"Authorization": f"Bearer {HERMES_KEY}", "X-Hermes-Session-Id": "sisyphus", "Content-Type": "application/json"} - sys_prompt = "回复简短。" + sys_prompt = "�ظ���̡�" body = {"model": "hermes-agent", "messages": [{"role": "system", "content": sys_prompt}, {"role": "user", "content": content}]} try: r = requests.post(HERMES_API, json=body, headers=headers, timeout=180, proxies={"http": None, "https": None}) @@ -208,7 +208,7 @@ def call_hermes(wxid, content): def inject_to_hermes_session(text): """Inject chat history / context directly into Hermes's sisyphus session for memory repair.""" headers = {"Authorization": f"Bearer {HERMES_KEY}", "X-Hermes-Session-Id": "sisyphus", "Content-Type": "application/json"} - sys_prompt = "📥 MEMORY INJECTION: Below is past chat history. Absorb this into your context for memory repair. Do NOT reply to this — just acknowledge with 'Memory synced.'" + sys_prompt = "?? MEMORY INJECTION: Below is past chat history. Absorb this into your context for memory repair. Do NOT reply to this �� just acknowledge with 'Memory synced.'" body = {"model": "hermes-agent", "messages": [ {"role": "system", "content": sys_prompt}, {"role": "user", "content": text} @@ -224,7 +224,7 @@ def inject_to_hermes_session(text): log(f"Inject history ERR: {e}") return False -# ── Inject wxhelper DLL ── +# ���� Inject wxhelper DLL ���� def inject_wxhelper(): try: r = wxpost("/api/checkLogin", timeout=5) @@ -233,9 +233,37 @@ def inject_wxhelper(): return True except: pass + # Also check if port 19088 is just listening (wxhelper HTTP server alive) try: - result = subprocess.run([INJECTOR, "-i", "WeChat.exe", "-p", WXHELPER_DLL], capture_output=True, text=True, timeout=30) - log(f"Inject: {result.stdout.strip()[:50]}") + import socket as _sock + s = _sock.create_connection(("127.0.0.1", 19088), timeout=2) + s.close() + r = wxpost("/api/checkLogin", timeout=5) + if r.get("code") == 1: + log("wxhelper HTTP server alive, login OK") + return True + except: + pass + # Wait a moment in case server is still starting + time.sleep(3) + try: + r = wxpost("/api/checkLogin", timeout=5) + if r.get("code") == 1: + log("wxhelper responding after wait") + return True + except: + pass + try: + # Injector_x64.exe: -n process_name -i dll_path + result = subprocess.run([INJECTOR, "-n", "WeChat.exe", "-i", WXHELPER_DLL], capture_output=True, text=True, timeout=30) + output = (result.stdout + result.stderr).strip() + log(f"Inject: {output[:100]}") + # Check if injection succeeded by looking for "success" in output + if "success" not in output.lower(): + log(f"Inject MAY HAVE FAILED (no 'success' in output), retrying...") + time.sleep(2) + result2 = subprocess.run([INJECTOR, "-n", "WeChat.exe", "-i", WXHELPER_DLL], capture_output=True, text=True, timeout=30) + log(f"Inject retry: {(result2.stdout+result2.stderr).strip()[:100]}") time.sleep(3) r = wxpost("/api/checkLogin", timeout=5) if r.get("code") == 1: @@ -247,7 +275,7 @@ def inject_wxhelper(): log(f"Inject FAIL: {e}") return False -# ── TCP Message Receiver ── +# ���� TCP Message Receiver ���� class MsgHandler(socketserver.BaseRequestHandler): def handle(self): try: @@ -265,6 +293,128 @@ class MsgHandler(socketserver.BaseRequestHandler): finally: self.request.close() +# ���� Image OCR ���� +WX_FILES_BASE = os.path.join(os.path.expanduser("~"), "Documents", "WeChat Files") +BOT_WX_DIR = os.path.join(WX_FILES_BASE, BOT_WXID, "wxhelper") + +def ocr_image(base64_data): + """OCR from in-memory base64 image data. Returns text or None.""" + try: + headers = {"Authorization": "Bearer b0359bed-09f2-49e2-a53c-32ba057412e3", "Content-Type": "application/json"} + payload = { + "model": "doubao-seed-code", + "messages": [{ + "role": "user", + "content": [ + {"type": "text", "text": "请识别这张图片中的所有中文和英文字符,保持原文输出,包括数字、表格、百分比的完整结构。严格逐行逐列输出所有数据,不要省略、不要总结。"}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_data}"}} + ] + }] + } + r = requests.post( + "https://ark.cn-beijing.volces.com/api/coding/v3/chat/completions", + json=payload, headers=headers, timeout=60, + proxies={"http": None, "https": None} + ) + if r.status_code == 200: + text = r.json()["choices"][0]["message"]["content"].strip() + log(f"OCR OK ({len(text)} chars)") + return text + log(f"OCR HTTP {r.status_code}: {r.text[:200]}") + except Exception as e: + log(f"OCR ERR: {e}") + return None + +def ocr_image_file(image_path): + """OCR an image file on disk. Returns text or None.""" + try: + with open(image_path, "rb") as f: + b64 = base64.b64encode(f.read()).decode() + return ocr_image(b64) + except Exception as e: + log(f"ocr_image_file ERR: {e}") + return None + +# ���� Full Image Download & Decode (wxhelper 3.9.5.81+) ���� +def download_full_image(msg_id): + """Download full image from CDN via downloadAttach. Returns encrypted .dat path or None. + + Retries both the API call (wxhelper may return -2 transiently) + and file existence (async CDN download takes time). + """ + try: + dat_path = os.path.join(BOT_WX_DIR, "image", f"{msg_id}.dat") + + # Phase 1: Retry API call (wxhelper may return -2 if msg not ready) + for api_attempt in range(10): + r = wxpost("/api/downloadAttach", {"msgId": int(msg_id)}, timeout=30) + code = r.get("code", -1) + if code >= 0: + break + log(f"downloadAttach attempt {api_attempt+1}: code={code} {r.get('msg','')}") + time.sleep(1) + else: + log(f"downloadAttach FAILED after 10 attempts, last code={code}") + return None + + # Phase 2: Wait for async CDN download + log(f"downloadAttach queued, waiting for file...") + for wait_attempt in range(20): + if os.path.exists(dat_path): + log(f"Download OK: {dat_path} ({os.path.getsize(dat_path)} bytes)") + return dat_path + time.sleep(1) + log(f"downloadAttach: .dat not found after 20s for msgId={msg_id}") + except Exception as e: + log(f"downloadAttach ERR: {e}") + return None + +def decode_image_file(dat_path): + """Decrypt encrypted .dat to viewable image. Returns decoded path or None. + + Some .dat files are already valid PNG/JPEG images (not encrypted). + Falls back to checking if .dat itself is a valid image. + """ + try: + before_files = set(os.listdir(TEMP_DIR)) + r = wxpost("/api/decodeImage", {"filePath": dat_path, "storeDir": TEMP_DIR}, timeout=30) + if r.get("code", -1) > 0: + base = os.path.splitext(os.path.basename(dat_path))[0] + for ext in ['.jpg', '.jpeg', '.png', '.bmp']: + cand = os.path.join(TEMP_DIR, base + ext) + if os.path.exists(cand): + log(f"Decoded: {cand}") + return cand + for f in os.listdir(TEMP_DIR): + if f in before_files: continue + if f.lower().endswith(('.jpg', '.jpeg', '.png')): + cand = os.path.join(TEMP_DIR, f) + log(f"Decoded (new): {cand}") + return cand + log("decodeImage OK but no new image file found") + # Fallback: .dat file may already be a valid image (not encrypted) + with open(dat_path, "rb") as f: + header = f.read(4) + ext = None + if header[:2] == b'\xff\xd8': # JPEG + ext = '.jpg' + elif header[:4] == b'\x89PNG': # PNG + ext = '.png' + elif header[:4] == b'GIF8': # GIF + ext = '.gif' + elif header[:2] == b'BM': # BMP + ext = '.bmp' + if ext: + out_path = os.path.join(TEMP_DIR, os.path.splitext(os.path.basename(dat_path))[0] + ext) + import shutil + shutil.copy(dat_path, out_path) + log(f".dat is already {ext}, copied to {out_path}") + return out_path + log(f"decodeImage FAIL: code={r.get('code')} {r.get('msg','')}") + except Exception as e: + log(f"decodeImage ERR: {e}") + return None + def process_msg(raw_data): global last_msg_time last_msg_time = time.time() @@ -278,24 +428,52 @@ def process_msg(raw_data): if not fu or not ct or fu == BOT_WXID or fu in BLOCK_WXIDS or fu.startswith("gh_") or is_self: log(f"SKIP: fu={fu} self={is_self}") return - # Route by message type +# Route by message type if msg_type == 34: # Voice log(f"<- {fu}: [voice]") reply = call_hermes(fu, "[voice message]") - if reply: send_wx(fu, reply) + if reply and reply.strip(): + send_wx(fu, reply.strip()) return - if msg_type == 3: # Image - wxhelper sends image as separate event + if msg_type == 3: # Image + msg_id = d.get("msgId", 0) or d.get("svrid", 0) + log(f"IMAGE: msgId={msg_id} b64_len={len(d.get('base64Img',''))}") + ocr_text = None + # Full-image OCR via wxhelper 3.9.5.81 APIs + if msg_id: + dat_path = download_full_image(msg_id) + if dat_path: + decoded = decode_image_file(dat_path) + if decoded: + log(f"Full image OCR on {decoded}") + ocr_text = ocr_image_file(decoded) + if ocr_text: + log(f"OCR result ({len(ocr_text)} chars): {ocr_text[:200]}") + reply = call_hermes(fu, f"[老莫发送了一张图片,OCR识别结果如下]\n{ocr_text}") + elif msg_id: + # Had msgId but full-image OCR failed - report error, don't use thumbnail + log("Full-image OCR failed, skipping thumbnail (useless at 84x210)") + reply = call_hermes(fu, "[老莫发送了一张图片,但全尺寸图片下载或OCR识别失败,无法读取内容]") + else: + # No msgId at all - rare, just report failure + log("No msgId available, cannot download full image") + reply = call_hermes(fu, "[老莫发送了一张图片,但无法获取图片ID,无法识别]") + if reply and reply.strip(): + log(f"-> {fu}: {reply[:50]}") + process_tags(reply, fu) + else: + log(f"-> {fu}: skip (blank image response)") return # Text - prepend sender wxid+name so Hermes knows who's talking sender_name = get_nickname(fu) msg_with_sender = f"[{fu}|{sender_name}] {ct}" log(f"<- {fu} ({sender_name}): {ct[:50]}") reply = call_hermes(fu, msg_with_sender) - if reply: + if reply and reply.strip(): log(f"-> {fu}: {reply[:50]}") process_tags(reply, fu) else: - log(f"-> {fu}: no reply") + log(f"-> {fu}: no reply (blank/empty)") except Exception as e: log(f"MSG ERR: {e}") import traceback @@ -325,7 +503,7 @@ def process_tags(reply, fu): clean = re.sub(r'\s*\[CONTACT:\w+\]\s*', '', clean).strip() r = wxpost("/api/getContactProfile", {"wxid": cm.group(1)}) cd = r.get("data", {}) - send_wx(fu, f"昵称: {cd.get('nickname','?')} 备注: {cd.get('remark','')}") + send_wx(fu, f"�dz�: {cd.get('nickname','?')} ��ע: {cd.get('remark','')}") # [ROOM_MEMBERS:roomid] rm = re.search(r'\[ROOM_MEMBERS:(\S+)\]', clean) if rm: @@ -333,7 +511,7 @@ def process_tags(reply, fu): r = wxpost("/api/getMemberFromChatRoom", {"chatRoomId": rm.group(1)}) members = (r.get("data") or {}).get("members", "") mlist = [m for m in members.split("\u0007") if m] - send_wx(fu, f"群成员 ({len(mlist)}): {','.join(mlist[:20])}") + send_wx(fu, f"Ⱥ��Ա ({len(mlist)}): {','.join(mlist[:20])}") # [HISTORY:wxid:count] - query chat history from MSG table hm = re.search(r'\[HISTORY:(\S+?):(\d+)\]', clean) if hm: @@ -400,7 +578,7 @@ def download_emoji(m, fu): wxpost("/api/sendCustomEmotion", {"wxid": fu, "filePath": tmp}) os.remove(tmp) -# ── Watchdog ── +# ���� Watchdog ���� def watchdog(): global last_msg_time while True: @@ -419,7 +597,7 @@ def watchdog(): last_msg_time = time.time() time.sleep(30) -# ── Start ── +# ���� Start ���� print("[Agent] starting...", flush=True) log("=== Agent v2 (wxhelper) ===") @@ -519,3 +697,4 @@ try: time.sleep(1) except KeyboardInterrupt: log("Bye") + diff --git a/tools/Injector_x64.exe b/tools/Injector_x64.exe new file mode 100644 index 0000000..6516db9 Binary files /dev/null and b/tools/Injector_x64.exe differ diff --git a/tools/wxhelper_official_39581.dll b/tools/wxhelper_official_39581.dll new file mode 100644 index 0000000..433d530 Binary files /dev/null and b/tools/wxhelper_official_39581.dll differ