feat: WeChat Linux bot via docker-wechatbot-webhook
- Docker container with auto-restart - systemd webhook receiver on :5804 - Full send/receive loop: WeChat ↔ Docker ↔ Hermes - Fixed login token for persistence - Firewall rules for container-host communication
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
# MoWeChat — 莫荷微信 Bot (Docker 版)
|
||||
|
||||
2026-06-24 凌晨 1:58 部署完成。
|
||||
|
||||
## 技术方案
|
||||
|
||||
**抛弃了 GDB Hook 和 /proc/PID/mem,改用 docker-wechatbot-webhook**
|
||||
|
||||
```
|
||||
微信手机 → WeChat服务器 → Docker Bot → Webhook(:5804) → Hermes Gateway
|
||||
↓
|
||||
微信手机 ← WeChat服务器 ← Docker Bot ← Webhook(:5804) ← Hermes回复
|
||||
```
|
||||
|
||||
## 部署组件
|
||||
|
||||
### 1. Docker 容器
|
||||
- 镜像: `dannicool/docker-wechatbot-webhook` (2174 stars)
|
||||
- 容器名: `wxBotWebhook`
|
||||
- 端口: 3001 (Web 管理 + API)
|
||||
- 自动重启: `--restart unless-stopped`
|
||||
- 固定 token: `LOCAL_LOGIN_API_TOKEN=mowechat_fixed_token_001`
|
||||
- Webhook: `RECVD_MSG_API=http://172.17.0.1:5804/`
|
||||
|
||||
### 2. Webhook 接收器 (systemd 服务)
|
||||
- 服务名: `wechat-webhook.service`
|
||||
- 端口: 5804
|
||||
- 脚本: `/home/hmo/projects/AgentsMeeting/gateway/linux/wechat_webhook.py`
|
||||
- 日志: `/home/hmo/projects/AgentsMeeting/gateway/linux/logs/webhook_service.log`
|
||||
|
||||
工作流程:
|
||||
1. 收到消息 → 解析 multipart/form-data → 提取发送者和内容
|
||||
2. 异步转发到 Hermes Gateway (绕过代理)
|
||||
3. 获取 Hermes 回复 → 通过 WeChat API 发回给用户
|
||||
|
||||
### 3. 联系信息
|
||||
- 老爸 (莫语不语): `wxid_c0a6izmwd78y22`
|
||||
- 莫荷: `wxid_7onnerpx2s2l22` (微信昵称: 莫小荷)
|
||||
|
||||
## 发送消息 API
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:3001/webhook/msg/v2?token=mowechat_fixed_token_001" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"to": "莫语不语", "data": {"content": "消息内容"}}'
|
||||
```
|
||||
|
||||
## 登录
|
||||
|
||||
容器重启后需要重新扫码登录:
|
||||
http://192.168.1.246:3001/login?token=mowechat_fixed_token_001
|
||||
|
||||
## 已知问题
|
||||
- Web 协议大约两天掉一次线,Docker 自动重启后需要重新扫码
|
||||
- 发图片/文件尚未实现(需要扩展 webhook 处理)
|
||||
- 群消息尚未测试
|
||||
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env gdb
|
||||
"""
|
||||
GDB Hook script for WeChat Linux AppImage.
|
||||
Intercepts incoming messages from WeChat's NewSync_ProcessStashMsgList.
|
||||
|
||||
Based on Ajax's Blog methodology:
|
||||
https://aajax.top/2026/03/11/GettingLinuxWechatMessages/
|
||||
|
||||
Usage:
|
||||
gdb -p $(pidof wechat) -x hooks/gdb_hook_messages.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────
|
||||
|
||||
# WeChat binary base address (from /proc/PID/maps, first r--p entry)
|
||||
# Must be recalculated each run due to ASLR
|
||||
WECHAT_PID = None
|
||||
|
||||
# Breakpoint RVA (relative to binary base) for WeChat 4.1.x
|
||||
# From Ajax's IDA Pro analysis of 4.1.0.16:
|
||||
# NewSync_ProcessStashMsgList -> loop call at 0x4994BEB
|
||||
BP_RVA = 0x4994BEB
|
||||
|
||||
# Log file
|
||||
LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "logs")
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_messages.log")
|
||||
|
||||
# Message structure offsets (from Ajax's blog)
|
||||
OFF_TYPE = 0x14 # int: message type
|
||||
OFF_SVRID = 0x50 # unsigned long long: server message ID
|
||||
OFF_HOLDER = 0x20 # void*: holder pointer
|
||||
OFF_INNER = 0x08 # void*: inner pointer (relative to holder)
|
||||
OFF_CONTENT_PTR = 0x00 # char*: content string pointer (from inner+0)
|
||||
OFF_CONTENT_LEN = 0x10 # int: content string length (from inner+0x10)
|
||||
|
||||
|
||||
def log(msg):
|
||||
"""Write log to file and stdout."""
|
||||
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
||||
f.write(f"{msg}\n")
|
||||
gdb.write(f"{msg}\n")
|
||||
|
||||
|
||||
class WechatMessageBreakpoint(gdb.Breakpoint):
|
||||
"""Breakpoint that fires on each incoming WeChat message."""
|
||||
|
||||
def __init__(self, address):
|
||||
super().__init__(f"*{address}")
|
||||
self.suppress = True # Don't print to stdout automatically
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
# Read registers
|
||||
msg_ptr = int(gdb.parse_and_eval("$rsi"))
|
||||
if msg_ptr == 0:
|
||||
return False
|
||||
|
||||
# Read message type
|
||||
raw_type = gdb.selected_inferior().read_memory(msg_ptr + OFF_TYPE, 4)
|
||||
msg_type = int.from_bytes(raw_type, byteorder='little', signed=True)
|
||||
|
||||
# Read server message ID
|
||||
raw_svrid = gdb.selected_inferior().read_memory(msg_ptr + OFF_SVRID, 8)
|
||||
svrid = int.from_bytes(raw_svrid, byteorder='little')
|
||||
|
||||
# Read holder pointer
|
||||
raw_holder = gdb.selected_inferior().read_memory(msg_ptr + OFF_HOLDER, 8)
|
||||
holder = int.from_bytes(raw_holder, byteorder='little', signed=False)
|
||||
if holder == 0:
|
||||
return False
|
||||
|
||||
# Read inner pointer
|
||||
raw_inner = gdb.selected_inferior().read_memory(holder + OFF_INNER, 8)
|
||||
inner = int.from_bytes(raw_inner, byteorder='little', signed=False)
|
||||
if inner == 0:
|
||||
return False
|
||||
|
||||
# Read content string length
|
||||
raw_len = gdb.selected_inferior().read_memory(inner + OFF_CONTENT_LEN, 4)
|
||||
content_len = int.from_bytes(raw_len, byteorder='little', signed=False)
|
||||
if content_len <= 0 or content_len > 100000:
|
||||
return False
|
||||
|
||||
# Read content string
|
||||
raw_content_ptr = gdb.selected_inferior().read_memory(inner + OFF_CONTENT_PTR, 8)
|
||||
content_ptr = int.from_bytes(raw_content_ptr, byteorder='little', signed=False)
|
||||
if content_ptr == 0:
|
||||
return False
|
||||
|
||||
raw_content = gdb.selected_inferior().read_memory(content_ptr, min(content_len * 2, 100000))
|
||||
|
||||
# Try to decode as UTF-16LE (WeChat internal encoding)
|
||||
try:
|
||||
content = raw_content.tobytes()[:content_len * 2].decode('utf-16le', errors='replace')
|
||||
except:
|
||||
content = str(raw_content)
|
||||
|
||||
# Read talker/sender info (different offset structure)
|
||||
# This is more complex — skip for initial test
|
||||
message = {
|
||||
"type": msg_type,
|
||||
"svrid": hex(svrid),
|
||||
"content": content[:500],
|
||||
"content_len": content_len,
|
||||
"holder": hex(holder),
|
||||
"inner": hex(inner),
|
||||
}
|
||||
|
||||
log(f"[WECHAT_MSG] type={msg_type} svrid={hex(svrid)}")
|
||||
log(f"[WECHAT_MSG] content: {content[:200]}")
|
||||
|
||||
# Forward to Hermes Gateway
|
||||
try:
|
||||
forward_to_hermes(message)
|
||||
except Exception as e:
|
||||
log(f"[WECHAT_MSG] forward error: {e}")
|
||||
|
||||
except Exception as e:
|
||||
log(f"[WECHAT_MSG] error: {e}")
|
||||
|
||||
return False # Don't stop execution
|
||||
|
||||
|
||||
def forward_to_hermes(msg):
|
||||
"""Forward message to Hermes Gateway."""
|
||||
import urllib.request
|
||||
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"You are a WeChat message handler. A new message arrived: type={msg.get('type')}, content={msg.get('content', '')[:100]}"
|
||||
}
|
||||
]
|
||||
}).encode('utf-8')
|
||||
|
||||
req = urllib.request.Request(
|
||||
"http://192.168.1.246:8642/v1/chat/completions",
|
||||
data=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": "Bearer hermes123"
|
||||
},
|
||||
method="POST"
|
||||
)
|
||||
# Don't wait for response — fire and forget
|
||||
try:
|
||||
urllib.request.urlopen(req, timeout=2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def detect_wechat_base():
|
||||
"""Detect WeChat binary base address from /proc/PID/maps."""
|
||||
pid = WECHAT_PID
|
||||
if pid is None:
|
||||
try:
|
||||
pid = gdb.selected_inferior().pid
|
||||
except:
|
||||
pass
|
||||
if pid is None:
|
||||
return None
|
||||
try:
|
||||
with open(f"/proc/{pid}/maps", "r") as f:
|
||||
for line in f:
|
||||
if "/opt/wechat/wechat" in line and "r--p" in line:
|
||||
addr = line.split("-")[0]
|
||||
return int(addr, 16)
|
||||
except Exception as e:
|
||||
log(f"[WECHAT_MSG] Failed to detect base: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class HookWechatMessages(gdb.Command):
|
||||
"""Install WeChat message hook."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("hook-wechat-messages", gdb.COMMAND_USER)
|
||||
|
||||
def invoke(self, arg, from_tty):
|
||||
base = detect_wechat_base()
|
||||
if base is None:
|
||||
log("[WECHAT_MSG] ERROR: Could not detect WeChat base address")
|
||||
return
|
||||
|
||||
addr = base + BP_RVA
|
||||
WechatMessageBreakpoint(addr)
|
||||
log(f"[WECHAT_MSG] Hook installed: base=0x{base:x} bp=0x{addr:x}")
|
||||
log(f"[WECHAT_MSG] Waiting for messages...")
|
||||
|
||||
|
||||
# Register the custom command
|
||||
HookWechatMessages()
|
||||
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
GDB startup script for WeChat message hooking.
|
||||
Run with:
|
||||
gdb -x this_script.py --args ./WeChatLinux.AppImage --no-sandbox --disable-gpu
|
||||
|
||||
GDB disables ASLR by default, so wechat base = 0x555555554000
|
||||
"""
|
||||
|
||||
import gdb
|
||||
import os
|
||||
import time
|
||||
|
||||
# Breakpoint RVA from Ajax's blog (4.1.0.16, confirmed valid for 4.1.7)
|
||||
BP_RVA = 0x4994BEB
|
||||
# Fixed base when GDB starts process (ASLR disabled)
|
||||
FIXED_BASE = 0x555555554000
|
||||
|
||||
LOG_FILE = "/home/hmo/projects/AgentsMeeting/gateway/linux/logs/wechat_messages.log"
|
||||
|
||||
|
||||
def log(msg):
|
||||
with open(LOG_FILE, "a") as f:
|
||||
f.write(f"{msg}\n")
|
||||
gdb.write(f"{msg}\n")
|
||||
|
||||
|
||||
class WechatMessageBreakpoint(gdb.Breakpoint):
|
||||
"""Breakpoint that fires on each incoming WeChat message."""
|
||||
|
||||
def __init__(self, address):
|
||||
super().__init__(f"*{address}")
|
||||
self.suppress = True
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
msg_ptr = int(gdb.parse_and_eval("$rsi"))
|
||||
if msg_ptr == 0:
|
||||
return False
|
||||
|
||||
raw_type = gdb.selected_inferior().read_memory(msg_ptr + 0x14, 4)
|
||||
msg_type = int.from_bytes(raw_type, byteorder='little', signed=True)
|
||||
|
||||
raw_svrid = gdb.selected_inferior().read_memory(msg_ptr + 0x50, 8)
|
||||
svrid = int.from_bytes(raw_svrid, byteorder='little')
|
||||
|
||||
raw_holder = gdb.selected_inferior().read_memory(msg_ptr + 0x20, 8)
|
||||
holder = int.from_bytes(raw_holder, byteorder='little', signed=False)
|
||||
if holder == 0:
|
||||
return False
|
||||
|
||||
raw_inner = gdb.selected_inferior().read_memory(holder + 0x8, 8)
|
||||
inner = int.from_bytes(raw_inner, byteorder='little', signed=False)
|
||||
if inner == 0:
|
||||
return False
|
||||
|
||||
# Read content length from inner + 0x10
|
||||
raw_len = gdb.selected_inferior().read_memory(inner + 0x10, 4)
|
||||
content_len = int.from_bytes(raw_len, byteorder='little', signed=False)
|
||||
if content_len <= 0 or content_len > 100000:
|
||||
return False
|
||||
|
||||
# Read content pointer from inner + 0x0
|
||||
raw_cp = gdb.selected_inferior().read_memory(inner + 0x0, 8)
|
||||
content_ptr = int.from_bytes(raw_cp, byteorder='little', signed=False)
|
||||
if content_ptr == 0:
|
||||
return False
|
||||
|
||||
raw_content = gdb.selected_inferior().read_memory(content_ptr, min(content_len * 2, 100000))
|
||||
try:
|
||||
content = raw_content.tobytes()[:content_len * 2].decode('utf-16le', errors='replace')
|
||||
except:
|
||||
content = str(raw_content)
|
||||
|
||||
# Try to read sender info from msg_ptr + 0x38 (talker wxid)
|
||||
try:
|
||||
raw_talker = gdb.selected_inferior().read_memory(msg_ptr + 0x38, 8)
|
||||
talker_ptr = int.from_bytes(raw_talker, byteorder='little', signed=False)
|
||||
if talker_ptr:
|
||||
talker_data = gdb.selected_inferior().read_memory(talker_ptr, 64)
|
||||
talker = talker_data.tobytes().split(b'\x00')[0].decode('utf-8', errors='replace')
|
||||
else:
|
||||
talker = "unknown"
|
||||
except:
|
||||
talker = "unknown"
|
||||
|
||||
info = f"[WECHAT_MSG] type={msg_type} svrid={hex(svrid)} talker={talker}"
|
||||
log(info)
|
||||
log(f"[WECHAT_MSG] content: {content[:300]}")
|
||||
|
||||
# Forward to Hermes
|
||||
try:
|
||||
import urllib.request
|
||||
import json
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{"role": "user", "content": f"[WeChat from {talker}] {content[:500]}"}
|
||||
]
|
||||
}).encode()
|
||||
req = urllib.request.Request(
|
||||
"http://192.168.1.246:8642/v1/chat/completions",
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json", "Authorization": "Bearer hermes123"},
|
||||
method="POST"
|
||||
)
|
||||
urllib.request.urlopen(req, timeout=2)
|
||||
except:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
log(f"[WECHAT_MSG] ERROR: {e}")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class AutoHookWechat(gdb.Command):
|
||||
"""Auto-hook WeChat messages on startup."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("auto-hook-wechat", gdb.COMMAND_USER)
|
||||
|
||||
def invoke(self, arg, from_tty):
|
||||
bp_addr = FIXED_BASE + BP_RVA
|
||||
WechatMessageBreakpoint(bp_addr)
|
||||
log(f"[WECHAT_MSG] Breakpoint set at 0x{bp_addr:x}")
|
||||
|
||||
|
||||
class OnStart(gdb.Breakpoint):
|
||||
"""Breakpoint on _start to set up hooks after process loads."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("_start")
|
||||
|
||||
def stop(self):
|
||||
gdb.execute("auto-hook-wechat")
|
||||
return True # Stop so user can continue
|
||||
|
||||
|
||||
# Configure GDB
|
||||
gdb.execute("set pagination off")
|
||||
gdb.execute("set confirm off")
|
||||
gdb.execute("handle SIG33 pass nostop noprint")
|
||||
gdb.execute("set follow-fork-mode child")
|
||||
|
||||
# Register our commands
|
||||
AutoHookWechat()
|
||||
|
||||
# Set breakpoint at _start so we hook after process loads
|
||||
OnStart()
|
||||
|
||||
log("[WECHAT_MSG] GDB startup script loaded. Type 'run' to start WeChat.")
|
||||
@@ -0,0 +1,296 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MoWeChat Message Monitor — reads WeChat process memory to capture incoming messages.
|
||||
No GDB, no ptrace attach, no crash risk.
|
||||
Uses /proc/PID/mem to scan the heap for message patterns.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import hashlib
|
||||
import logging
|
||||
import argparse
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
|
||||
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
|
||||
|
||||
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
|
||||
HERMES_KEY = "hermes123"
|
||||
|
||||
# Polling interval (seconds)
|
||||
POLL_INTERVAL = 2
|
||||
|
||||
# Minimum message content length to consider valid
|
||||
MIN_MSG_LEN = 2
|
||||
MAX_MSG_LEN = 10000
|
||||
|
||||
# WeChat binary marker in /proc/PID/maps
|
||||
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(LOG_FILE),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
log = logging.getLogger("mowechat")
|
||||
|
||||
|
||||
class WeChatMemoryMonitor:
|
||||
"""Monitors WeChat process memory for new messages."""
|
||||
|
||||
def __init__(self):
|
||||
self.pid = None
|
||||
self.seen = self._load_seen()
|
||||
self.heap_regions = []
|
||||
self.anon_regions = []
|
||||
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
|
||||
# Chinese characters and ASCII printable
|
||||
self.msg_pattern = re.compile(rb'[\x20-\x7e\x80-\xff]{2,}')
|
||||
|
||||
def _load_seen(self):
|
||||
"""Load previously seen message hash set."""
|
||||
try:
|
||||
with open(SAWN_FILE, 'r') as f:
|
||||
return set(json.load(f))
|
||||
except:
|
||||
return set()
|
||||
|
||||
def _save_seen(self):
|
||||
"""Save seen message hash set (trim to last 1000)."""
|
||||
trimmed = set(list(self.seen)[-1000:])
|
||||
try:
|
||||
with open(SAWN_FILE, 'w') as f:
|
||||
json.dump(list(trimmed), f)
|
||||
except:
|
||||
pass
|
||||
|
||||
def find_wechat(self):
|
||||
"""Find the main wechat process PID."""
|
||||
for p in os.listdir('/proc'):
|
||||
if not p.isdigit():
|
||||
continue
|
||||
try:
|
||||
with open(f'/proc/{p}/maps', 'r') as f:
|
||||
content = f.read(4096)
|
||||
if WECHAT_BINARY_MARKER in content:
|
||||
self.pid = int(p)
|
||||
return True
|
||||
except:
|
||||
continue
|
||||
return False
|
||||
|
||||
def update_memory_regions(self):
|
||||
"""Read /proc/PID/maps to find heap and anonymous regions."""
|
||||
self.heap_regions = []
|
||||
self.anon_regions = []
|
||||
|
||||
try:
|
||||
with open(f'/proc/{self.pid}/maps', 'r') as f:
|
||||
for line in f:
|
||||
parts = line.strip().split()
|
||||
if len(parts) < 5:
|
||||
continue
|
||||
addr_range = parts[0].split('-')
|
||||
start = int(addr_range[0], 16)
|
||||
end = int(addr_range[1], 16)
|
||||
perms = parts[1]
|
||||
name = parts[-1] if len(parts) > 4 else ''
|
||||
|
||||
size_mb = (end - start) / (1024 * 1024)
|
||||
if size_mb > 50: # Skip huge regions
|
||||
continue
|
||||
|
||||
if perms.startswith('rw'):
|
||||
if name == '[heap]':
|
||||
self.heap_regions.append((start, end))
|
||||
elif not name: # Anonymous mapping
|
||||
self.anon_regions.append((start, end))
|
||||
except Exception as e:
|
||||
log.error(f"Failed to read maps: {e}")
|
||||
|
||||
def scan_region(self, start, end, region_name=""):
|
||||
"""Scan a memory region for WeChat message patterns."""
|
||||
messages = []
|
||||
try:
|
||||
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
|
||||
mem.seek(start)
|
||||
data = mem.read(end - start)
|
||||
|
||||
# Find all wxid patterns (likely message senders)
|
||||
for match in self.wxid_pattern.finditer(data):
|
||||
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
|
||||
pos = match.start()
|
||||
|
||||
# Look for message content after the wxid
|
||||
# Message content is typically within 256 bytes after the wxid
|
||||
content_start = pos + len(match.group())
|
||||
search_end = min(content_start + 512, len(data))
|
||||
|
||||
# Find the next null-terminated string that's not the wxid itself
|
||||
for cmatch in self.msg_pattern.finditer(data, content_start, search_end):
|
||||
content = cmatch.group(0).decode('utf-8', errors='replace').strip('\x00')
|
||||
# Filter out known non-message patterns
|
||||
if len(content) < MIN_MSG_LEN or len(content) > MAX_MSG_LEN:
|
||||
continue
|
||||
if content.startswith('wxid_') or content.startswith('http'):
|
||||
# Skip if it's another wxid or begins with a URL
|
||||
# Actually, messages CAN start with URLs, so only skip wxid
|
||||
if content.startswith('wxid_'):
|
||||
continue
|
||||
|
||||
# Create a unique hash for dedup
|
||||
msg_hash = hashlib.md5(f"{wxid}:{content}".encode()).hexdigest()
|
||||
|
||||
if msg_hash not in self.seen:
|
||||
self.seen.add(msg_hash)
|
||||
messages.append({
|
||||
'wxid': wxid,
|
||||
'content': content,
|
||||
'pos': hex(pos + start),
|
||||
'region': region_name,
|
||||
})
|
||||
break # Only one message per wxid
|
||||
|
||||
except (PermissionError, ProcessLookupError) as e:
|
||||
log.warning(f"Memory read failed: {e}")
|
||||
except Exception as e:
|
||||
log.error(f"Scan error at {region_name}: {e}")
|
||||
|
||||
return messages
|
||||
|
||||
def scan_all(self):
|
||||
"""Scan all relevant memory regions for new messages."""
|
||||
self.update_memory_regions()
|
||||
all_messages = []
|
||||
|
||||
for start, end in self.heap_regions:
|
||||
msgs = self.scan_region(start, end, "[heap]")
|
||||
all_messages.extend(msgs)
|
||||
|
||||
for start, end in self.anon_regions[:20]: # Limit anonymous regions
|
||||
size_mb = (end - start) / (1024 * 1024)
|
||||
if size_mb > 10: # Skip large anonymous maps
|
||||
continue
|
||||
msgs = self.scan_region(start, end, "[anon]")
|
||||
all_messages.extend(msgs)
|
||||
|
||||
if all_messages:
|
||||
self._save_seen()
|
||||
|
||||
return all_messages
|
||||
|
||||
def forward_to_hermes(self, msg):
|
||||
"""Forward a captured message to Hermes Gateway."""
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You receive WeChat messages. "
|
||||
"Process this message according to the standard pipeline."
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"[WeChat Message]\n"
|
||||
f"From: {msg['wxid']}\n"
|
||||
f"Content: {msg['content']}"
|
||||
)
|
||||
}
|
||||
]
|
||||
}).encode('utf-8')
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
HERMES_API,
|
||||
data=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {HERMES_KEY}"
|
||||
},
|
||||
method="POST"
|
||||
)
|
||||
urllib.request.urlopen(req, timeout=3)
|
||||
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
|
||||
except Exception as e:
|
||||
log.warning(f"Forward failed: {e}")
|
||||
|
||||
def run(self, once=False):
|
||||
"""Main monitoring loop."""
|
||||
if not self.find_wechat():
|
||||
log.error("WeChat process not found!")
|
||||
return False
|
||||
|
||||
log.info(f"Found WeChat PID: {self.pid}")
|
||||
|
||||
if once:
|
||||
messages = self.scan_all()
|
||||
for msg in messages:
|
||||
log.info(f"Captured: [{msg['wxid']}] {msg['content'][:80]}")
|
||||
return messages
|
||||
|
||||
log.info(f"Starting monitor (poll every {POLL_INTERVAL}s)...")
|
||||
while True:
|
||||
try:
|
||||
# Check process is alive
|
||||
if not os.path.exists(f'/proc/{self.pid}'):
|
||||
log.warning("WeChat process died, re-finding...")
|
||||
if not self.find_wechat():
|
||||
log.error("Cannot find WeChat, sleeping 30s...")
|
||||
time.sleep(30)
|
||||
continue
|
||||
log.info(f"Re-attached to PID: {self.pid}")
|
||||
|
||||
messages = self.scan_all()
|
||||
for msg in messages:
|
||||
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
|
||||
# self.forward_to_hermes(msg)
|
||||
|
||||
time.sleep(POLL_INTERVAL)
|
||||
except KeyboardInterrupt:
|
||||
log.info("Monitor stopped.")
|
||||
break
|
||||
except Exception as e:
|
||||
log.error(f"Monitor error: {e}")
|
||||
time.sleep(10)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="MoWeChat Message Monitor")
|
||||
parser.add_argument("--once", action="store_true", help="Scan once and exit")
|
||||
parser.add_argument("--foreground", action="store_true", help="Run in foreground")
|
||||
args = parser.parse_args()
|
||||
|
||||
monitor = WeChatMemoryMonitor()
|
||||
|
||||
if args.once:
|
||||
msgs = monitor.run(once=True)
|
||||
if msgs:
|
||||
print(json.dumps(msgs, ensure_ascii=False, indent=2))
|
||||
else:
|
||||
print("No new messages found.")
|
||||
return
|
||||
|
||||
monitor.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,343 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MoWeChat Message Monitor v2 — reads WeChat process memory to capture incoming messages.
|
||||
Improvements over v1:
|
||||
- Better message extraction: looks for content near known wxids with length-prefix structure
|
||||
- Content filtering: rejects binary garbage, only keeps real text
|
||||
- Tracks messages by content + wxid hash
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import hashlib
|
||||
import logging
|
||||
import argparse
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
|
||||
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
|
||||
|
||||
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
|
||||
HERMES_KEY = "hermes123"
|
||||
|
||||
POLL_INTERVAL = 3
|
||||
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
|
||||
|
||||
# Known message sender wxids (populated during scanning)
|
||||
OWN_WXID = "wxid_c0a6izmwd78y22" # 老爸 (莫语不语)
|
||||
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷自己
|
||||
INTERESTING_WXIDS = {OWN_WXID, BOT_WXID}
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(LOG_FILE),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
log = logging.getLogger("mowechat")
|
||||
|
||||
|
||||
def is_valid_text(s, min_ratio=0.5):
|
||||
"""Check if a string looks like real text (vs binary garbage)."""
|
||||
if len(s) < 2:
|
||||
return False
|
||||
# Count printable chars
|
||||
printable = 0
|
||||
for ch in s:
|
||||
if ch.isprintable() and (ch.isalpha() or ch.isspace() or ch.isdigit() or ch in '.,!?;:\'\"-()[]{}@#_/\\'):
|
||||
printable += 1
|
||||
return printable / max(len(s), 1) >= min_ratio
|
||||
|
||||
|
||||
def extract_strings(data, min_len=4):
|
||||
"""Extract readable strings from binary data."""
|
||||
result = []
|
||||
current = b''
|
||||
for b in data:
|
||||
if 32 <= b < 127 or b in (0x0a, 0x0d, 0x09):
|
||||
current += bytes([b])
|
||||
elif b >= 0x80: # Part of multi-byte UTF-8
|
||||
current += bytes([b])
|
||||
else:
|
||||
if len(current) >= min_len:
|
||||
try:
|
||||
decoded = current.decode('utf-8', errors='replace')
|
||||
if is_valid_text(decoded):
|
||||
result.append(decoded)
|
||||
except:
|
||||
pass
|
||||
current = b''
|
||||
if len(current) >= min_len:
|
||||
try:
|
||||
decoded = current.decode('utf-8', errors='replace')
|
||||
if is_valid_text(decoded):
|
||||
result.append(decoded)
|
||||
except:
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
class WeChatMemoryMonitor:
|
||||
"""Monitors WeChat process memory for new messages."""
|
||||
|
||||
def __init__(self):
|
||||
self.pid = None
|
||||
self.seen = self._load_seen()
|
||||
self.heap_region = None
|
||||
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
|
||||
# Known message sources
|
||||
self.known_wxids = set(INTERESTING_WXIDS)
|
||||
|
||||
def _load_seen(self):
|
||||
try:
|
||||
with open(SAWN_FILE, 'r') as f:
|
||||
return set(json.load(f))
|
||||
except:
|
||||
return set()
|
||||
|
||||
def _save_seen(self):
|
||||
trimmed = set(list(self.seen)[-2000:])
|
||||
try:
|
||||
with open(SAWN_FILE, 'w') as f:
|
||||
json.dump(list(trimmed), f)
|
||||
except:
|
||||
pass
|
||||
|
||||
def find_wechat(self):
|
||||
"""Find the main wechat process PID and heap region."""
|
||||
for p in os.listdir('/proc'):
|
||||
if not p.isdigit():
|
||||
continue
|
||||
try:
|
||||
with open(f'/proc/{p}/maps', 'r') as f:
|
||||
content = f.read(8192)
|
||||
if WECHAT_BINARY_MARKER in content:
|
||||
self.pid = int(p)
|
||||
# Find heap
|
||||
for line in content.split('\n'):
|
||||
if '[heap]' in line:
|
||||
parts = line.split()
|
||||
addr_range = parts[0].split('-')
|
||||
self.heap_region = (int(addr_range[0], 16), int(addr_range[1], 16))
|
||||
return True
|
||||
except:
|
||||
continue
|
||||
return False
|
||||
|
||||
def scan_message(self, mem, wxid_bytes, wxid_pos, wxid_end):
|
||||
"""Try to extract a real message following a wxid in memory."""
|
||||
wxid_str = wxid_bytes.decode('utf-8', errors='replace').strip('\x00')
|
||||
|
||||
# Search within 512 bytes after the wxid for message content
|
||||
search_start = wxid_end
|
||||
search_end = min(search_start + 512, self.heap_region[1] - self.heap_region[0] if self.heap_region else search_start + 512)
|
||||
|
||||
try:
|
||||
mem.seek(search_start)
|
||||
data = mem.read(search_end - search_start)
|
||||
except:
|
||||
return None
|
||||
|
||||
# Look for null-terminated strings that look like messages
|
||||
messages = []
|
||||
current = b''
|
||||
for b in data:
|
||||
if b == 0:
|
||||
if len(current) >= 3:
|
||||
try:
|
||||
text = current.decode('utf-8', errors='replace')
|
||||
# Filter: must have real text content
|
||||
if is_valid_text(text) and len(text) >= 2 and not text.startswith('wxid_'):
|
||||
messages.append(text)
|
||||
except:
|
||||
pass
|
||||
current = b''
|
||||
else:
|
||||
current += bytes([b])
|
||||
|
||||
# Also try to find message by looking for it at a known offset pattern
|
||||
# In WeChat's structure: [msg_type(4)] [svr_id(8)] [content_ptr(8)] [content_len(4)] [content...]
|
||||
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
# Pick the best candidate (longest, most printable)
|
||||
best = max(messages, key=lambda m: (len(m), sum(1 for c in m if c.isalpha() or c.isdigit())))
|
||||
return best
|
||||
|
||||
def scan_heap(self):
|
||||
"""Scan the heap region for messages."""
|
||||
if not self.heap_region:
|
||||
return []
|
||||
|
||||
start, end = self.heap_region
|
||||
messages = []
|
||||
|
||||
try:
|
||||
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
|
||||
# Read entire heap
|
||||
mem.seek(start)
|
||||
size = min(end - start, 50 * 1024 * 1024) # Max 50MB
|
||||
data = mem.read(size)
|
||||
|
||||
# Find all wxid occurrences
|
||||
for match in self.wxid_pattern.finditer(data):
|
||||
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
|
||||
pos = match.start()
|
||||
global_pos = start + pos
|
||||
|
||||
# Look for message content in the next 256 bytes
|
||||
content_area = data[pos + len(match.group()):pos + len(match.group()) + 256]
|
||||
|
||||
# Try to find a null-terminated UTF-8 string that looks like a message
|
||||
for cmatch in re.finditer(rb'([\x20-\x7e\x80-\xff\x00]{4,})', content_area):
|
||||
raw = cmatch.group(0)
|
||||
# Remove trailing nulls
|
||||
raw = raw.rstrip(b'\x00')
|
||||
if len(raw) < 3:
|
||||
continue
|
||||
|
||||
try:
|
||||
text = raw.decode('utf-8', errors='replace')
|
||||
except:
|
||||
continue
|
||||
|
||||
# FILTER: Must have substantial real text content
|
||||
alpha_count = sum(1 for c in text if c.isalpha() or '\u4e00' <= c <= '\u9fff')
|
||||
total_len = len(text)
|
||||
|
||||
if total_len < 3:
|
||||
continue
|
||||
|
||||
# Skip if it's just another wxid
|
||||
if text.startswith('wxid_'):
|
||||
continue
|
||||
|
||||
# Skip binary garbage (must be >= 40% alphabetic/CJK chars)
|
||||
if alpha_count / max(total_len, 1) < 0.3:
|
||||
continue
|
||||
|
||||
# Skip if it looks like a URL/path with no message content
|
||||
if text.startswith('http') or text.startswith('/'):
|
||||
continue
|
||||
|
||||
# Create hash for dedup
|
||||
msg_hash = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
|
||||
|
||||
if msg_hash not in self.seen:
|
||||
self.seen.add(msg_hash)
|
||||
messages.append({
|
||||
'wxid': wxid,
|
||||
'content': text,
|
||||
'pos': hex(global_pos),
|
||||
'alpha_ratio': f"{alpha_count/total_len:.2f}",
|
||||
})
|
||||
break # One best message per wxid occurrence
|
||||
|
||||
except (PermissionError, ProcessLookupError) as e:
|
||||
log.warning(f"Memory read failed: {e}")
|
||||
except Exception as e:
|
||||
log.error(f"Heap scan error: {e}")
|
||||
|
||||
return messages
|
||||
|
||||
def forward_to_hermes(self, msg):
|
||||
"""Forward to Hermes Gateway."""
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You receive WeChat messages. Process according to standard pipeline."},
|
||||
{"role": "user", "content": f"[WeChat] From: {msg['wxid']}\n{msg['content']}"}
|
||||
]
|
||||
}).encode('utf-8')
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
HERMES_API, data=payload,
|
||||
headers={"Content-Type": "application/json", "Authorization": f"Bearer {HERMES_KEY}"},
|
||||
method="POST"
|
||||
)
|
||||
urllib.request.urlopen(req, timeout=3)
|
||||
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
|
||||
except Exception as e:
|
||||
log.warning(f"Forward failed: {e}")
|
||||
|
||||
def run(self, once=False):
|
||||
if not self.find_wechat():
|
||||
log.error("WeChat not found!")
|
||||
return []
|
||||
|
||||
log.info(f"WeChat PID: {self.pid}, heap: 0x{self.heap_region[0]:x}-0x{self.heap_region[1]:x}" if self.heap_region else f"PID: {self.pid}, no heap")
|
||||
|
||||
if once:
|
||||
messages = self.scan_heap()
|
||||
seen_wxids = set()
|
||||
for msg in messages:
|
||||
if msg['wxid'] not in seen_wxids:
|
||||
log.info(f" [{msg['wxid']}] {msg['content'][:80]}")
|
||||
seen_wxids.add(msg['wxid'])
|
||||
if messages:
|
||||
self._save_seen()
|
||||
return messages
|
||||
|
||||
log.info(f"Monitoring every {POLL_INTERVAL}s...")
|
||||
while True:
|
||||
try:
|
||||
if not os.path.exists(f'/proc/{self.pid}'):
|
||||
log.warning("WeChat died, re-finding...")
|
||||
if not self.find_wechat():
|
||||
time.sleep(30)
|
||||
continue
|
||||
|
||||
messages = self.scan_heap()
|
||||
for msg in messages:
|
||||
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
|
||||
# self.forward_to_hermes(msg)
|
||||
|
||||
if messages:
|
||||
self._save_seen()
|
||||
|
||||
time.sleep(POLL_INTERVAL)
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except Exception as e:
|
||||
log.error(f"Error: {e}")
|
||||
time.sleep(10)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--once", action="store_true")
|
||||
parser.add_argument("--foreground", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
monitor = WeChatMemoryMonitor()
|
||||
|
||||
if args.once:
|
||||
msgs = monitor.run(once=True)
|
||||
# Only show real messages (from known contacts or with good content)
|
||||
real_msgs = [m for m in msgs if m['wxid'] in INTERESTING_WXIDS or float(m.get('alpha_ratio', 0)) > 0.5]
|
||||
print(f"\nFound {len(msgs)} potential messages, {len(real_msgs)} from known contacts")
|
||||
for m in real_msgs:
|
||||
print(f" [{m['wxid']}] {m['content'][:100]}")
|
||||
print(f"\n(Total seen: {len(monitor.seen)})")
|
||||
return
|
||||
|
||||
monitor.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,206 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MoWeChat Message Monitor v3 — precise message extraction from WeChat heap.
|
||||
Strategy: look for message content near known wxids with structural patterns.
|
||||
"""
|
||||
|
||||
import os, re, sys, json, time, hashlib, struct, logging, argparse
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_v3.log")
|
||||
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v3.json")
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
|
||||
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
|
||||
log = logging.getLogger("mv3")
|
||||
|
||||
# Known wxids (discovered from memory)
|
||||
OWN_WXID = "wxid_c0a6izmwd78y22" # 莫语不语 (老爸)
|
||||
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷
|
||||
|
||||
INTERESTING = {OWN_WXID, BOT_WXID}
|
||||
|
||||
|
||||
class Monitor:
|
||||
def __init__(self):
|
||||
self.pid = None
|
||||
self.heap = (0, 0)
|
||||
self.seen = self._load_seen()
|
||||
self.wxid_re = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
|
||||
|
||||
def _load_seen(self):
|
||||
try:
|
||||
with open(SAWN_FILE) as f:
|
||||
return set(json.load(f))
|
||||
except:
|
||||
return set()
|
||||
|
||||
def _save_seen(self):
|
||||
s = set(list(self.seen)[-2000:])
|
||||
try:
|
||||
with open(SAWN_FILE, 'w') as f:
|
||||
json.dump(list(s), f)
|
||||
except:
|
||||
pass
|
||||
|
||||
def find_wechat(self):
|
||||
for p in os.listdir('/proc'):
|
||||
if not p.isdigit(): continue
|
||||
try:
|
||||
with open(f'/proc/{p}/maps') as f:
|
||||
c = f.read()
|
||||
if "/opt/wechat/wechat" in c:
|
||||
self.pid = int(p)
|
||||
for line in c.split('\n'):
|
||||
if '[heap]' in line:
|
||||
a = line.split()[0].split('-')
|
||||
self.heap = (int(a[0], 16), int(a[1], 16))
|
||||
return True
|
||||
except:
|
||||
continue
|
||||
return False
|
||||
|
||||
def is_valid_msg(self, text):
|
||||
"""Check if text is a real WeChat message."""
|
||||
if len(text) < 2 or len(text) > 5000:
|
||||
return False
|
||||
if text.startswith('wxid_') or text.startswith('http') or text.startswith('/'):
|
||||
return False
|
||||
# Count CJK + ASCII letters + digits
|
||||
good = sum(1 for c in text if c.isalpha() or c.isdigit() or c.isspace() or c in '.,!?;:\'"-()[]{}@#_/\\')
|
||||
if good / max(len(text), 1) < 0.6:
|
||||
return False
|
||||
# Must have at least 3 CJK chars OR 5 ASCII chars
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
ascii_alpha = sum(1 for c in text if c.isascii() and c.isalpha())
|
||||
return cjk >= 2 or ascii_alpha >= 4
|
||||
|
||||
def scan(self):
|
||||
start, end = self.heap
|
||||
if not start:
|
||||
return []
|
||||
|
||||
try:
|
||||
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
|
||||
mem.seek(start)
|
||||
data = mem.read(min(end - start, 60 * 1024 * 1024))
|
||||
except:
|
||||
return []
|
||||
|
||||
results = []
|
||||
|
||||
# Strategy: find wxid -> look for a nearby null-terminated UTF-8 string
|
||||
# that looks like real message content
|
||||
|
||||
# First pass: find all wxid positions in a bounded range
|
||||
for m in self.wxid_re.finditer(data):
|
||||
wxid = m.group(0).decode().strip('\x00')
|
||||
if wxid not in INTERESTING:
|
||||
continue
|
||||
|
||||
pos = m.end() # position after wxid\0
|
||||
# Scan forward up to 256 bytes for a printable string
|
||||
scan_end = min(pos + 256, len(data))
|
||||
chunk = data[pos:scan_end]
|
||||
|
||||
# Find the first null-terminated ASCII/UTF-8 string
|
||||
# that's at least 3 chars and not binary garbage
|
||||
i = 0
|
||||
while i < len(chunk):
|
||||
if chunk[i] == 0:
|
||||
i += 1
|
||||
continue
|
||||
# Start of potential string
|
||||
s_start = i
|
||||
while i < len(chunk) and chunk[i] != 0 and chunk[i] >= 0x20:
|
||||
i += 1
|
||||
s_len = i - s_start
|
||||
if s_len >= 3:
|
||||
try:
|
||||
text = chunk[s_start:s_start+s_len].decode('utf-8', errors='replace')
|
||||
if self.is_valid_msg(text):
|
||||
h = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
|
||||
if h not in self.seen:
|
||||
self.seen.add(h)
|
||||
results.append({'wxid': wxid, 'text': text})
|
||||
except:
|
||||
pass
|
||||
# Skip null
|
||||
while i < len(chunk) and chunk[i] == 0:
|
||||
i += 1
|
||||
|
||||
# Second strategy: scan heap for standalone CJK strings >= 4 chars
|
||||
# that are NOT preceded by known binary patterns (to catch the actual
|
||||
# message content which may be at a different address than the wxid)
|
||||
for cm in re.finditer(rb'([\x80-\xff][\x80-\xff][\x80-\xff][\x80-\xff])', data):
|
||||
pos = cm.start()
|
||||
# Read up to 200 bytes from here
|
||||
snippet = data[pos:pos+200]
|
||||
# Find first null byte
|
||||
null_pos = snippet.find(b'\x00')
|
||||
if null_pos > 0:
|
||||
snippet = snippet[:null_pos]
|
||||
if len(snippet) < 4:
|
||||
continue
|
||||
try:
|
||||
text = snippet.decode('utf-8', errors='replace')
|
||||
except:
|
||||
continue
|
||||
|
||||
# Only accept strings with substantial CJK content
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
if cjk < 2:
|
||||
continue
|
||||
if len(text) > 200:
|
||||
text = text[:200]
|
||||
|
||||
h = hashlib.md5(f"cjk:{text}".encode()).hexdigest()
|
||||
if h not in self.seen and self.is_valid_msg(text):
|
||||
self.seen.add(h)
|
||||
results.append({'wxid': 'unknown', 'text': text})
|
||||
|
||||
if results:
|
||||
self._save_seen()
|
||||
return results
|
||||
|
||||
def run(self, once=False):
|
||||
if not self.find_wechat():
|
||||
log.error("WeChat not found")
|
||||
return []
|
||||
log.info(f"PID {self.pid}, heap 0x{self.heap[0]:x}")
|
||||
|
||||
if once:
|
||||
msgs = self.scan()
|
||||
for m in msgs:
|
||||
log.info(f" [{m['wxid']}] {m['text'][:80]}")
|
||||
return msgs
|
||||
|
||||
while True:
|
||||
try:
|
||||
if not os.path.exists(f'/proc/{self.pid}'):
|
||||
log.warning("WeChat died")
|
||||
if not self.find_wechat():
|
||||
time.sleep(30)
|
||||
continue
|
||||
msgs = self.scan()
|
||||
for m in msgs:
|
||||
log.info(f"NEW [{m['wxid']}] {m['text'][:80]}")
|
||||
time.sleep(3)
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except:
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
m = Monitor()
|
||||
if '--once' in sys.argv:
|
||||
msgs = m.run(once=True)
|
||||
print(f"\nFound {len(msgs)} messages")
|
||||
for msg in msgs:
|
||||
print(f" [{msg['wxid']}] {msg['text']}")
|
||||
else:
|
||||
m.run()
|
||||
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python3
|
||||
"""MoWeChat v4 — efficient heap scanner for WeChat messages."""
|
||||
|
||||
import os, re, sys, json, time, hashlib, logging
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_v4.log")
|
||||
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v4.json")
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
|
||||
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
|
||||
log = logging.getLogger("wc4")
|
||||
|
||||
# Known wxids
|
||||
WXID_DAD = "wxid_c0a6izmwd78y22"
|
||||
WXID_MOHE = "wxid_7onnerpx2s2l22"
|
||||
KNOWN = {WXID_DAD, WXID_MOHE}
|
||||
|
||||
|
||||
def heap_of(pid):
|
||||
"""Get heap address range for a PID."""
|
||||
with open(f'/proc/{pid}/maps') as f:
|
||||
for line in f:
|
||||
if '[heap]' in line:
|
||||
parts = line.split()
|
||||
a, b = parts[0].split('-')
|
||||
return int(a, 16), int(b, 16)
|
||||
return None, None
|
||||
|
||||
|
||||
def read_heap(pid):
|
||||
"""Read the heap memory."""
|
||||
start, end = heap_of(pid)
|
||||
if not start:
|
||||
return None, None, None
|
||||
with open(f'/proc/{pid}/mem', 'rb') as f:
|
||||
f.seek(start)
|
||||
data = f.read(min(end - start, 40 * 1024 * 1024))
|
||||
return data, start, end
|
||||
|
||||
|
||||
def find_messages(data, heap_start):
|
||||
"""Extract messages from heap data efficiently."""
|
||||
msgs = []
|
||||
seen = set()
|
||||
|
||||
# Pattern: known wxid followed by content
|
||||
for wxid in KNOWN:
|
||||
pattern = wxid.encode() + b'\x00'
|
||||
pos = 0
|
||||
limit = 0
|
||||
while limit < 100:
|
||||
idx = data.find(pattern, pos)
|
||||
if idx < 0:
|
||||
break
|
||||
limit += 1
|
||||
|
||||
after = idx + len(pattern)
|
||||
snippet = data[after:after+300]
|
||||
|
||||
i = 0
|
||||
while i < len(snippet):
|
||||
if snippet[i] == 0:
|
||||
i += 1
|
||||
continue
|
||||
s = i
|
||||
while i < len(snippet) and snippet[i] != 0:
|
||||
i += 1
|
||||
if i - s < 3:
|
||||
i += 1
|
||||
continue
|
||||
try:
|
||||
text = snippet[s:i].decode('utf-8', errors='replace')
|
||||
except:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Score: CJK chars + ASCII letters
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
alpha = sum(1 for c in text if c.isascii() and c.isalpha())
|
||||
score = cjk * 3 + alpha
|
||||
|
||||
if score < 5:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
h = hashlib.md5(text.encode()).hexdigest()
|
||||
if h not in seen:
|
||||
seen.add(h)
|
||||
msgs.append((text, wxid, heap_start + after + s))
|
||||
break
|
||||
|
||||
pos = idx + 1
|
||||
|
||||
# Also scan for the \xNNcontent pattern directly (faster catch-all)
|
||||
# Pattern: [1 byte length/bufsize][readable text]
|
||||
i = 0
|
||||
limit2 = 0
|
||||
while i < len(data) - 10 and limit2 < 500:
|
||||
b = data[i]
|
||||
# Possible prefix: small values (0x04-0x40 = 4-64, common buffer sizes)
|
||||
if 0x04 <= b <= 0x40:
|
||||
# Check if what follows looks like text
|
||||
j = i + 1
|
||||
text_bytes = bytearray()
|
||||
while j < len(data) and j - i - 1 < b and data[j] != 0:
|
||||
if data[j] >= 0x20:
|
||||
text_bytes.append(data[j])
|
||||
j += 1
|
||||
else:
|
||||
break
|
||||
if len(text_bytes) >= 5:
|
||||
try:
|
||||
text = text_bytes.decode('utf-8', errors='replace')
|
||||
except:
|
||||
i += 1
|
||||
continue
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
alpha = sum(1 for c in text if c.isascii() and c.isalpha())
|
||||
if (cjk >= 2 or alpha >= 6) and text[0] not in '&/\\.':
|
||||
h = hashlib.md5(('any:' + text).encode()).hexdigest()
|
||||
if h not in seen:
|
||||
# Find which wxid is near this text (search backward)
|
||||
nearby = data[max(0, i-200):i]
|
||||
wxid_match = re.search(rb'wxid_[a-zA-Z0-9]{10,28}', nearby)
|
||||
owner = wxid_match.group(0).decode() if wxid_match else 'unknown'
|
||||
seen.add(h)
|
||||
msgs.append((text, owner, heap_start + i))
|
||||
limit2 += 1
|
||||
i += 1
|
||||
|
||||
return msgs
|
||||
|
||||
|
||||
def find_wechat():
|
||||
"""Find the main wechat process PID."""
|
||||
for p in os.listdir('/proc'):
|
||||
if not p.isdigit():
|
||||
continue
|
||||
try:
|
||||
with open(f'/proc/{p}/maps') as f:
|
||||
if "/opt/wechat/wechat" in f.read():
|
||||
return int(p)
|
||||
except:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
pid = find_wechat()
|
||||
if not pid:
|
||||
log.error("WeChat not found")
|
||||
return 1
|
||||
|
||||
log.info(f"Found PID {pid}")
|
||||
|
||||
data, hs, he = read_heap(pid)
|
||||
|
||||
if data is None:
|
||||
log.error("Cannot read heap")
|
||||
return 1
|
||||
|
||||
log.info(f"Heap: 0x{hs:x}-0x{he:x} ({len(data)} bytes)")
|
||||
|
||||
msgs = find_messages(data, hs)
|
||||
|
||||
# Deduplicate
|
||||
seen_texts = set()
|
||||
for text, wxid, addr in msgs:
|
||||
key = text.strip()
|
||||
if key not in seen_texts and len(key) >= 2:
|
||||
seen_texts.add(key)
|
||||
print(f"{wxid}: {text}")
|
||||
|
||||
if not msgs:
|
||||
print("No messages found")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,17 @@
|
||||
# MoWeChat Configuration
|
||||
# Bot API token (from docker-wechatbot-webhook)
|
||||
WECHAT_BOT_TOKEN="n~btHqwAmfQW"
|
||||
|
||||
# WeChat contact name for the bot itself
|
||||
BOT_NAME="莫小荷"
|
||||
|
||||
# User (Dad) contact info
|
||||
USER_NAME="莫语不语"
|
||||
USER_WXID="wxid_c0a6izmwd78y22"
|
||||
|
||||
# Hermes Gateway
|
||||
HERMES_API="http://192.168.1.246:8642/v1/chat/completions"
|
||||
HERMES_KEY="hermes123"
|
||||
|
||||
# Webhook receiver
|
||||
WEBHOOK_PORT=5804
|
||||
@@ -0,0 +1,23 @@
|
||||
[Unit]
|
||||
Description=WeChat Webhook Receiver — bridges WeChat bot messages to Hermes
|
||||
After=network-online.target docker.service
|
||||
Wants=network-online.target
|
||||
BindsTo=docker.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=hmo
|
||||
WorkingDirectory=/home/hmo/projects/AgentsMeeting/gateway/linux
|
||||
ExecStart=/usr/bin/python3 /home/hmo/projects/AgentsMeeting/gateway/linux/wechat_webhook.py
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
StandardOutput=append:/home/hmo/projects/AgentsMeeting/gateway/linux/logs/webhook_service.log
|
||||
StandardError=append:/home/hmo/projects/AgentsMeeting/gateway/linux/logs/webhook_service.log
|
||||
|
||||
# Don't inherit system proxy (Hermes is local)
|
||||
Environment=no_proxy=192.168.1.246,localhost,127.0.0.1,172.17.0.0/16
|
||||
Environment=http_proxy=
|
||||
Environment=https_proxy=
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
"""WeChat webhook receiver v2 - receives messages from docker-wechatbot-webhook."""
|
||||
|
||||
import os, sys, json, logging, threading
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
|
||||
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
|
||||
HERMES_KEY = "hermes123"
|
||||
PORT = 5804
|
||||
|
||||
WECHAT_BOT_TOKEN = os.environ.get("WECHAT_BOT_TOKEN", "mowechat_fixed_token_001")
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("/home/hmo/projects/AgentsMeeting/gateway/linux/logs/webhook.log"),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
log = logging.getLogger("wc-webhook")
|
||||
|
||||
|
||||
class WebhookHandler(BaseHTTPRequestHandler):
|
||||
def do_POST(self):
|
||||
content_type = self.headers.get('Content-Type', '')
|
||||
content_length = int(self.headers.get('Content-Length', 0))
|
||||
|
||||
# Read the body
|
||||
body = self.rfile.read(content_length)
|
||||
|
||||
# Parse multipart/form-data
|
||||
msg_type = 'unknown'
|
||||
content = ''
|
||||
source_raw = '{}'
|
||||
is_system = '0'
|
||||
|
||||
if 'multipart/form-data' in content_type:
|
||||
# Manual multipart parsing
|
||||
boundary = content_type.split('boundary=')[1].strip()
|
||||
if boundary.startswith('"') and boundary.endswith('"'):
|
||||
boundary = boundary[1:-1]
|
||||
|
||||
parts = body.split(b'--' + boundary.encode())
|
||||
for part in parts:
|
||||
if b'Content-Disposition' not in part:
|
||||
continue
|
||||
|
||||
# Parse headers
|
||||
header_end = part.find(b'\r\n\r\n')
|
||||
if header_end < 0:
|
||||
continue
|
||||
part_headers = part[:header_end].decode('utf-8', errors='replace')
|
||||
part_body = part[header_end + 4:]
|
||||
|
||||
# Get field name
|
||||
name_start = part_headers.find('name="')
|
||||
if name_start < 0:
|
||||
continue
|
||||
name_start += 6
|
||||
name_end = part_headers.find('"', name_start)
|
||||
field_name = part_headers[name_start:name_end]
|
||||
|
||||
# Trim trailing \r\n--
|
||||
if part_body.endswith(b'\r\n'):
|
||||
part_body = part_body[:-2]
|
||||
if part_body.endswith(b'--'):
|
||||
part_body = part_body[:-2]
|
||||
if part_body.endswith(b'\r\n'):
|
||||
part_body = part_body[:-2]
|
||||
|
||||
if field_name == 'type':
|
||||
msg_type = part_body.decode('utf-8', errors='replace')
|
||||
elif field_name == 'content':
|
||||
content = part_body.decode('utf-8', errors='replace')
|
||||
elif field_name == 'source':
|
||||
source_raw = part_body.decode('utf-8', errors='replace')
|
||||
elif field_name == 'isSystemEvent':
|
||||
is_system = part_body.decode('utf-8', errors='replace')
|
||||
else:
|
||||
# Try as regular form or JSON
|
||||
try:
|
||||
data = json.loads(body)
|
||||
msg_type = data.get('type', 'unknown')
|
||||
content = data.get('content', '')
|
||||
source_raw = data.get('source', '{}')
|
||||
except:
|
||||
pass
|
||||
|
||||
# Parse source
|
||||
try:
|
||||
source = json.loads(source_raw) if isinstance(source_raw, str) else source_raw
|
||||
except:
|
||||
source = {}
|
||||
|
||||
# Extract sender info
|
||||
sender_name = "unknown"
|
||||
sender_id = "unknown"
|
||||
if isinstance(source, dict):
|
||||
from_data = source.get('from', {})
|
||||
if isinstance(from_data, dict):
|
||||
payload = from_data.get('payload', {})
|
||||
sender_name = payload.get('name', 'unknown')
|
||||
sender_id = payload.get('id', 'unknown')
|
||||
|
||||
# Skip system events
|
||||
if is_system == '1':
|
||||
log.info(f"System event: {msg_type}")
|
||||
self._respond(200, {"status": "ok"})
|
||||
return
|
||||
|
||||
log.info(f"From: {sender_name} ({sender_id}), Type: {msg_type}")
|
||||
|
||||
if msg_type == 'text':
|
||||
log.info(f"Text: {content[:300]}")
|
||||
self._forward_to_hermes(sender_name, sender_id, content)
|
||||
elif msg_type == 'urlLink':
|
||||
log.info(f"URL: {content[:200]}")
|
||||
self._forward_to_hermes(sender_name, sender_id, f"[分享链接] {content}")
|
||||
elif msg_type == 'file':
|
||||
log.info(f"File received, length={len(body)}")
|
||||
else:
|
||||
log.info(f"Other: {msg_type}")
|
||||
|
||||
self._respond(200, {"status": "ok"})
|
||||
|
||||
def _forward_to_hermes(self, sender, sender_id, text):
|
||||
"""Forward to Hermes and send reply back to WeChat."""
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{"role": "system", "content": "你是莫荷微信Bot。回复简洁,不要废话。"},
|
||||
{"role": "user", "content": f"[微信消息] 来自 {sender}({sender_id}): {text}"}
|
||||
]
|
||||
}).encode()
|
||||
|
||||
def do_forward():
|
||||
try:
|
||||
import urllib.request as ureq
|
||||
handler = ureq.ProxyHandler({})
|
||||
opener = ureq.build_opener(handler)
|
||||
req = ureq.Request(HERMES_API, data=payload,
|
||||
headers={"Content-Type": "application/json", "Authorization": f"Bearer {HERMES_KEY}"})
|
||||
resp = opener.open(req, timeout=30)
|
||||
resp_data = json.loads(resp.read())
|
||||
reply = resp_data.get('choices', [{}])[0].get('message', {}).get('content', '')
|
||||
log.info(f"Hermes OK, reply: {reply[:60]}")
|
||||
|
||||
# Send reply back via WeChat API
|
||||
if reply and sender:
|
||||
self._send_wechat(sender, reply)
|
||||
except Exception as e:
|
||||
log.error(f"Hermes error: {e}")
|
||||
|
||||
threading.Thread(target=do_forward, daemon=True).start()
|
||||
|
||||
def _send_wechat(self, to_name, text):
|
||||
"""Send message back to WeChat user via bot API."""
|
||||
import urllib.request as ureq
|
||||
token = WECHAT_BOT_TOKEN
|
||||
api = f"http://localhost:3001/webhook/msg/v2?token={token}"
|
||||
data = json.dumps({"to": to_name, "data": {"content": text}}).encode()
|
||||
try:
|
||||
handler = ureq.ProxyHandler({})
|
||||
opener = ureq.build_opener(handler)
|
||||
req = ureq.Request(api, data=data,
|
||||
headers={"Content-Type": "application/json"})
|
||||
resp = opener.open(req, timeout=10)
|
||||
log.info(f"WeChat send OK: {resp.status}")
|
||||
except Exception as e:
|
||||
log.error(f"WeChat send error: {e}")
|
||||
|
||||
def _respond(self, code, data):
|
||||
self.send_response(code)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps(data).encode())
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
|
||||
def main():
|
||||
server = HTTPServer(('0.0.0.0', PORT), WebhookHandler)
|
||||
log.info(f"Webhook receiver on :{PORT}")
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
server.shutdown()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user