cleanup: archive unused scripts, finalize docs
- Moved old GDB hook and memory monitor scripts to archive/ - Updated README with final Docker solution - Updated .env.example, .gitignore - Added final-note.md documenting deployment decision - All research scripts preserved in archive/ for reference
This commit is contained in:
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env gdb
|
||||
"""
|
||||
GDB Hook script for WeChat Linux AppImage.
|
||||
Intercepts incoming messages from WeChat's NewSync_ProcessStashMsgList.
|
||||
|
||||
Based on Ajax's Blog methodology:
|
||||
https://aajax.top/2026/03/11/GettingLinuxWechatMessages/
|
||||
|
||||
Usage:
|
||||
gdb -p $(pidof wechat) -x hooks/gdb_hook_messages.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────
|
||||
|
||||
# WeChat binary base address (from /proc/PID/maps, first r--p entry)
|
||||
# Must be recalculated each run due to ASLR
|
||||
WECHAT_PID = None
|
||||
|
||||
# Breakpoint RVA (relative to binary base) for WeChat 4.1.x
|
||||
# From Ajax's IDA Pro analysis of 4.1.0.16:
|
||||
# NewSync_ProcessStashMsgList -> loop call at 0x4994BEB
|
||||
BP_RVA = 0x4994BEB
|
||||
|
||||
# Log file
|
||||
LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "logs")
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_messages.log")
|
||||
|
||||
# Message structure offsets (from Ajax's blog)
|
||||
OFF_TYPE = 0x14 # int: message type
|
||||
OFF_SVRID = 0x50 # unsigned long long: server message ID
|
||||
OFF_HOLDER = 0x20 # void*: holder pointer
|
||||
OFF_INNER = 0x08 # void*: inner pointer (relative to holder)
|
||||
OFF_CONTENT_PTR = 0x00 # char*: content string pointer (from inner+0)
|
||||
OFF_CONTENT_LEN = 0x10 # int: content string length (from inner+0x10)
|
||||
|
||||
|
||||
def log(msg):
|
||||
"""Write log to file and stdout."""
|
||||
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
||||
f.write(f"{msg}\n")
|
||||
gdb.write(f"{msg}\n")
|
||||
|
||||
|
||||
class WechatMessageBreakpoint(gdb.Breakpoint):
|
||||
"""Breakpoint that fires on each incoming WeChat message."""
|
||||
|
||||
def __init__(self, address):
|
||||
super().__init__(f"*{address}")
|
||||
self.suppress = True # Don't print to stdout automatically
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
# Read registers
|
||||
msg_ptr = int(gdb.parse_and_eval("$rsi"))
|
||||
if msg_ptr == 0:
|
||||
return False
|
||||
|
||||
# Read message type
|
||||
raw_type = gdb.selected_inferior().read_memory(msg_ptr + OFF_TYPE, 4)
|
||||
msg_type = int.from_bytes(raw_type, byteorder='little', signed=True)
|
||||
|
||||
# Read server message ID
|
||||
raw_svrid = gdb.selected_inferior().read_memory(msg_ptr + OFF_SVRID, 8)
|
||||
svrid = int.from_bytes(raw_svrid, byteorder='little')
|
||||
|
||||
# Read holder pointer
|
||||
raw_holder = gdb.selected_inferior().read_memory(msg_ptr + OFF_HOLDER, 8)
|
||||
holder = int.from_bytes(raw_holder, byteorder='little', signed=False)
|
||||
if holder == 0:
|
||||
return False
|
||||
|
||||
# Read inner pointer
|
||||
raw_inner = gdb.selected_inferior().read_memory(holder + OFF_INNER, 8)
|
||||
inner = int.from_bytes(raw_inner, byteorder='little', signed=False)
|
||||
if inner == 0:
|
||||
return False
|
||||
|
||||
# Read content string length
|
||||
raw_len = gdb.selected_inferior().read_memory(inner + OFF_CONTENT_LEN, 4)
|
||||
content_len = int.from_bytes(raw_len, byteorder='little', signed=False)
|
||||
if content_len <= 0 or content_len > 100000:
|
||||
return False
|
||||
|
||||
# Read content string
|
||||
raw_content_ptr = gdb.selected_inferior().read_memory(inner + OFF_CONTENT_PTR, 8)
|
||||
content_ptr = int.from_bytes(raw_content_ptr, byteorder='little', signed=False)
|
||||
if content_ptr == 0:
|
||||
return False
|
||||
|
||||
raw_content = gdb.selected_inferior().read_memory(content_ptr, min(content_len * 2, 100000))
|
||||
|
||||
# Try to decode as UTF-16LE (WeChat internal encoding)
|
||||
try:
|
||||
content = raw_content.tobytes()[:content_len * 2].decode('utf-16le', errors='replace')
|
||||
except:
|
||||
content = str(raw_content)
|
||||
|
||||
# Read talker/sender info (different offset structure)
|
||||
# This is more complex — skip for initial test
|
||||
message = {
|
||||
"type": msg_type,
|
||||
"svrid": hex(svrid),
|
||||
"content": content[:500],
|
||||
"content_len": content_len,
|
||||
"holder": hex(holder),
|
||||
"inner": hex(inner),
|
||||
}
|
||||
|
||||
log(f"[WECHAT_MSG] type={msg_type} svrid={hex(svrid)}")
|
||||
log(f"[WECHAT_MSG] content: {content[:200]}")
|
||||
|
||||
# Forward to Hermes Gateway
|
||||
try:
|
||||
forward_to_hermes(message)
|
||||
except Exception as e:
|
||||
log(f"[WECHAT_MSG] forward error: {e}")
|
||||
|
||||
except Exception as e:
|
||||
log(f"[WECHAT_MSG] error: {e}")
|
||||
|
||||
return False # Don't stop execution
|
||||
|
||||
|
||||
def forward_to_hermes(msg):
|
||||
"""Forward message to Hermes Gateway."""
|
||||
import urllib.request
|
||||
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"You are a WeChat message handler. A new message arrived: type={msg.get('type')}, content={msg.get('content', '')[:100]}"
|
||||
}
|
||||
]
|
||||
}).encode('utf-8')
|
||||
|
||||
req = urllib.request.Request(
|
||||
"http://192.168.1.246:8642/v1/chat/completions",
|
||||
data=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": "Bearer hermes123"
|
||||
},
|
||||
method="POST"
|
||||
)
|
||||
# Don't wait for response — fire and forget
|
||||
try:
|
||||
urllib.request.urlopen(req, timeout=2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def detect_wechat_base():
|
||||
"""Detect WeChat binary base address from /proc/PID/maps."""
|
||||
pid = WECHAT_PID
|
||||
if pid is None:
|
||||
try:
|
||||
pid = gdb.selected_inferior().pid
|
||||
except:
|
||||
pass
|
||||
if pid is None:
|
||||
return None
|
||||
try:
|
||||
with open(f"/proc/{pid}/maps", "r") as f:
|
||||
for line in f:
|
||||
if "/opt/wechat/wechat" in line and "r--p" in line:
|
||||
addr = line.split("-")[0]
|
||||
return int(addr, 16)
|
||||
except Exception as e:
|
||||
log(f"[WECHAT_MSG] Failed to detect base: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class HookWechatMessages(gdb.Command):
|
||||
"""Install WeChat message hook."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("hook-wechat-messages", gdb.COMMAND_USER)
|
||||
|
||||
def invoke(self, arg, from_tty):
|
||||
base = detect_wechat_base()
|
||||
if base is None:
|
||||
log("[WECHAT_MSG] ERROR: Could not detect WeChat base address")
|
||||
return
|
||||
|
||||
addr = base + BP_RVA
|
||||
WechatMessageBreakpoint(addr)
|
||||
log(f"[WECHAT_MSG] Hook installed: base=0x{base:x} bp=0x{addr:x}")
|
||||
log(f"[WECHAT_MSG] Waiting for messages...")
|
||||
|
||||
|
||||
# Register the custom command
|
||||
HookWechatMessages()
|
||||
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
GDB startup script for WeChat message hooking.
|
||||
Run with:
|
||||
gdb -x this_script.py --args ./WeChatLinux.AppImage --no-sandbox --disable-gpu
|
||||
|
||||
GDB disables ASLR by default, so wechat base = 0x555555554000
|
||||
"""
|
||||
|
||||
import gdb
|
||||
import os
|
||||
import time
|
||||
|
||||
# Breakpoint RVA from Ajax's blog (4.1.0.16, confirmed valid for 4.1.7)
|
||||
BP_RVA = 0x4994BEB
|
||||
# Fixed base when GDB starts process (ASLR disabled)
|
||||
FIXED_BASE = 0x555555554000
|
||||
|
||||
LOG_FILE = "/home/hmo/projects/AgentsMeeting/gateway/linux/logs/wechat_messages.log"
|
||||
|
||||
|
||||
def log(msg):
|
||||
with open(LOG_FILE, "a") as f:
|
||||
f.write(f"{msg}\n")
|
||||
gdb.write(f"{msg}\n")
|
||||
|
||||
|
||||
class WechatMessageBreakpoint(gdb.Breakpoint):
|
||||
"""Breakpoint that fires on each incoming WeChat message."""
|
||||
|
||||
def __init__(self, address):
|
||||
super().__init__(f"*{address}")
|
||||
self.suppress = True
|
||||
|
||||
def stop(self):
|
||||
try:
|
||||
msg_ptr = int(gdb.parse_and_eval("$rsi"))
|
||||
if msg_ptr == 0:
|
||||
return False
|
||||
|
||||
raw_type = gdb.selected_inferior().read_memory(msg_ptr + 0x14, 4)
|
||||
msg_type = int.from_bytes(raw_type, byteorder='little', signed=True)
|
||||
|
||||
raw_svrid = gdb.selected_inferior().read_memory(msg_ptr + 0x50, 8)
|
||||
svrid = int.from_bytes(raw_svrid, byteorder='little')
|
||||
|
||||
raw_holder = gdb.selected_inferior().read_memory(msg_ptr + 0x20, 8)
|
||||
holder = int.from_bytes(raw_holder, byteorder='little', signed=False)
|
||||
if holder == 0:
|
||||
return False
|
||||
|
||||
raw_inner = gdb.selected_inferior().read_memory(holder + 0x8, 8)
|
||||
inner = int.from_bytes(raw_inner, byteorder='little', signed=False)
|
||||
if inner == 0:
|
||||
return False
|
||||
|
||||
# Read content length from inner + 0x10
|
||||
raw_len = gdb.selected_inferior().read_memory(inner + 0x10, 4)
|
||||
content_len = int.from_bytes(raw_len, byteorder='little', signed=False)
|
||||
if content_len <= 0 or content_len > 100000:
|
||||
return False
|
||||
|
||||
# Read content pointer from inner + 0x0
|
||||
raw_cp = gdb.selected_inferior().read_memory(inner + 0x0, 8)
|
||||
content_ptr = int.from_bytes(raw_cp, byteorder='little', signed=False)
|
||||
if content_ptr == 0:
|
||||
return False
|
||||
|
||||
raw_content = gdb.selected_inferior().read_memory(content_ptr, min(content_len * 2, 100000))
|
||||
try:
|
||||
content = raw_content.tobytes()[:content_len * 2].decode('utf-16le', errors='replace')
|
||||
except:
|
||||
content = str(raw_content)
|
||||
|
||||
# Try to read sender info from msg_ptr + 0x38 (talker wxid)
|
||||
try:
|
||||
raw_talker = gdb.selected_inferior().read_memory(msg_ptr + 0x38, 8)
|
||||
talker_ptr = int.from_bytes(raw_talker, byteorder='little', signed=False)
|
||||
if talker_ptr:
|
||||
talker_data = gdb.selected_inferior().read_memory(talker_ptr, 64)
|
||||
talker = talker_data.tobytes().split(b'\x00')[0].decode('utf-8', errors='replace')
|
||||
else:
|
||||
talker = "unknown"
|
||||
except:
|
||||
talker = "unknown"
|
||||
|
||||
info = f"[WECHAT_MSG] type={msg_type} svrid={hex(svrid)} talker={talker}"
|
||||
log(info)
|
||||
log(f"[WECHAT_MSG] content: {content[:300]}")
|
||||
|
||||
# Forward to Hermes
|
||||
try:
|
||||
import urllib.request
|
||||
import json
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{"role": "user", "content": f"[WeChat from {talker}] {content[:500]}"}
|
||||
]
|
||||
}).encode()
|
||||
req = urllib.request.Request(
|
||||
"http://192.168.1.246:8642/v1/chat/completions",
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json", "Authorization": "Bearer hermes123"},
|
||||
method="POST"
|
||||
)
|
||||
urllib.request.urlopen(req, timeout=2)
|
||||
except:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
log(f"[WECHAT_MSG] ERROR: {e}")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class AutoHookWechat(gdb.Command):
|
||||
"""Auto-hook WeChat messages on startup."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("auto-hook-wechat", gdb.COMMAND_USER)
|
||||
|
||||
def invoke(self, arg, from_tty):
|
||||
bp_addr = FIXED_BASE + BP_RVA
|
||||
WechatMessageBreakpoint(bp_addr)
|
||||
log(f"[WECHAT_MSG] Breakpoint set at 0x{bp_addr:x}")
|
||||
|
||||
|
||||
class OnStart(gdb.Breakpoint):
|
||||
"""Breakpoint on _start to set up hooks after process loads."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("_start")
|
||||
|
||||
def stop(self):
|
||||
gdb.execute("auto-hook-wechat")
|
||||
return True # Stop so user can continue
|
||||
|
||||
|
||||
# Configure GDB
|
||||
gdb.execute("set pagination off")
|
||||
gdb.execute("set confirm off")
|
||||
gdb.execute("handle SIG33 pass nostop noprint")
|
||||
gdb.execute("set follow-fork-mode child")
|
||||
|
||||
# Register our commands
|
||||
AutoHookWechat()
|
||||
|
||||
# Set breakpoint at _start so we hook after process loads
|
||||
OnStart()
|
||||
|
||||
log("[WECHAT_MSG] GDB startup script loaded. Type 'run' to start WeChat.")
|
||||
@@ -0,0 +1,296 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MoWeChat Message Monitor — reads WeChat process memory to capture incoming messages.
|
||||
No GDB, no ptrace attach, no crash risk.
|
||||
Uses /proc/PID/mem to scan the heap for message patterns.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import hashlib
|
||||
import logging
|
||||
import argparse
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
|
||||
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
|
||||
|
||||
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
|
||||
HERMES_KEY = "hermes123"
|
||||
|
||||
# Polling interval (seconds)
|
||||
POLL_INTERVAL = 2
|
||||
|
||||
# Minimum message content length to consider valid
|
||||
MIN_MSG_LEN = 2
|
||||
MAX_MSG_LEN = 10000
|
||||
|
||||
# WeChat binary marker in /proc/PID/maps
|
||||
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(LOG_FILE),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
log = logging.getLogger("mowechat")
|
||||
|
||||
|
||||
class WeChatMemoryMonitor:
|
||||
"""Monitors WeChat process memory for new messages."""
|
||||
|
||||
def __init__(self):
|
||||
self.pid = None
|
||||
self.seen = self._load_seen()
|
||||
self.heap_regions = []
|
||||
self.anon_regions = []
|
||||
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
|
||||
# Chinese characters and ASCII printable
|
||||
self.msg_pattern = re.compile(rb'[\x20-\x7e\x80-\xff]{2,}')
|
||||
|
||||
def _load_seen(self):
|
||||
"""Load previously seen message hash set."""
|
||||
try:
|
||||
with open(SAWN_FILE, 'r') as f:
|
||||
return set(json.load(f))
|
||||
except:
|
||||
return set()
|
||||
|
||||
def _save_seen(self):
|
||||
"""Save seen message hash set (trim to last 1000)."""
|
||||
trimmed = set(list(self.seen)[-1000:])
|
||||
try:
|
||||
with open(SAWN_FILE, 'w') as f:
|
||||
json.dump(list(trimmed), f)
|
||||
except:
|
||||
pass
|
||||
|
||||
def find_wechat(self):
|
||||
"""Find the main wechat process PID."""
|
||||
for p in os.listdir('/proc'):
|
||||
if not p.isdigit():
|
||||
continue
|
||||
try:
|
||||
with open(f'/proc/{p}/maps', 'r') as f:
|
||||
content = f.read(4096)
|
||||
if WECHAT_BINARY_MARKER in content:
|
||||
self.pid = int(p)
|
||||
return True
|
||||
except:
|
||||
continue
|
||||
return False
|
||||
|
||||
def update_memory_regions(self):
|
||||
"""Read /proc/PID/maps to find heap and anonymous regions."""
|
||||
self.heap_regions = []
|
||||
self.anon_regions = []
|
||||
|
||||
try:
|
||||
with open(f'/proc/{self.pid}/maps', 'r') as f:
|
||||
for line in f:
|
||||
parts = line.strip().split()
|
||||
if len(parts) < 5:
|
||||
continue
|
||||
addr_range = parts[0].split('-')
|
||||
start = int(addr_range[0], 16)
|
||||
end = int(addr_range[1], 16)
|
||||
perms = parts[1]
|
||||
name = parts[-1] if len(parts) > 4 else ''
|
||||
|
||||
size_mb = (end - start) / (1024 * 1024)
|
||||
if size_mb > 50: # Skip huge regions
|
||||
continue
|
||||
|
||||
if perms.startswith('rw'):
|
||||
if name == '[heap]':
|
||||
self.heap_regions.append((start, end))
|
||||
elif not name: # Anonymous mapping
|
||||
self.anon_regions.append((start, end))
|
||||
except Exception as e:
|
||||
log.error(f"Failed to read maps: {e}")
|
||||
|
||||
def scan_region(self, start, end, region_name=""):
|
||||
"""Scan a memory region for WeChat message patterns."""
|
||||
messages = []
|
||||
try:
|
||||
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
|
||||
mem.seek(start)
|
||||
data = mem.read(end - start)
|
||||
|
||||
# Find all wxid patterns (likely message senders)
|
||||
for match in self.wxid_pattern.finditer(data):
|
||||
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
|
||||
pos = match.start()
|
||||
|
||||
# Look for message content after the wxid
|
||||
# Message content is typically within 256 bytes after the wxid
|
||||
content_start = pos + len(match.group())
|
||||
search_end = min(content_start + 512, len(data))
|
||||
|
||||
# Find the next null-terminated string that's not the wxid itself
|
||||
for cmatch in self.msg_pattern.finditer(data, content_start, search_end):
|
||||
content = cmatch.group(0).decode('utf-8', errors='replace').strip('\x00')
|
||||
# Filter out known non-message patterns
|
||||
if len(content) < MIN_MSG_LEN or len(content) > MAX_MSG_LEN:
|
||||
continue
|
||||
if content.startswith('wxid_') or content.startswith('http'):
|
||||
# Skip if it's another wxid or begins with a URL
|
||||
# Actually, messages CAN start with URLs, so only skip wxid
|
||||
if content.startswith('wxid_'):
|
||||
continue
|
||||
|
||||
# Create a unique hash for dedup
|
||||
msg_hash = hashlib.md5(f"{wxid}:{content}".encode()).hexdigest()
|
||||
|
||||
if msg_hash not in self.seen:
|
||||
self.seen.add(msg_hash)
|
||||
messages.append({
|
||||
'wxid': wxid,
|
||||
'content': content,
|
||||
'pos': hex(pos + start),
|
||||
'region': region_name,
|
||||
})
|
||||
break # Only one message per wxid
|
||||
|
||||
except (PermissionError, ProcessLookupError) as e:
|
||||
log.warning(f"Memory read failed: {e}")
|
||||
except Exception as e:
|
||||
log.error(f"Scan error at {region_name}: {e}")
|
||||
|
||||
return messages
|
||||
|
||||
def scan_all(self):
|
||||
"""Scan all relevant memory regions for new messages."""
|
||||
self.update_memory_regions()
|
||||
all_messages = []
|
||||
|
||||
for start, end in self.heap_regions:
|
||||
msgs = self.scan_region(start, end, "[heap]")
|
||||
all_messages.extend(msgs)
|
||||
|
||||
for start, end in self.anon_regions[:20]: # Limit anonymous regions
|
||||
size_mb = (end - start) / (1024 * 1024)
|
||||
if size_mb > 10: # Skip large anonymous maps
|
||||
continue
|
||||
msgs = self.scan_region(start, end, "[anon]")
|
||||
all_messages.extend(msgs)
|
||||
|
||||
if all_messages:
|
||||
self._save_seen()
|
||||
|
||||
return all_messages
|
||||
|
||||
def forward_to_hermes(self, msg):
|
||||
"""Forward a captured message to Hermes Gateway."""
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You receive WeChat messages. "
|
||||
"Process this message according to the standard pipeline."
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"[WeChat Message]\n"
|
||||
f"From: {msg['wxid']}\n"
|
||||
f"Content: {msg['content']}"
|
||||
)
|
||||
}
|
||||
]
|
||||
}).encode('utf-8')
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
HERMES_API,
|
||||
data=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {HERMES_KEY}"
|
||||
},
|
||||
method="POST"
|
||||
)
|
||||
urllib.request.urlopen(req, timeout=3)
|
||||
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
|
||||
except Exception as e:
|
||||
log.warning(f"Forward failed: {e}")
|
||||
|
||||
def run(self, once=False):
|
||||
"""Main monitoring loop."""
|
||||
if not self.find_wechat():
|
||||
log.error("WeChat process not found!")
|
||||
return False
|
||||
|
||||
log.info(f"Found WeChat PID: {self.pid}")
|
||||
|
||||
if once:
|
||||
messages = self.scan_all()
|
||||
for msg in messages:
|
||||
log.info(f"Captured: [{msg['wxid']}] {msg['content'][:80]}")
|
||||
return messages
|
||||
|
||||
log.info(f"Starting monitor (poll every {POLL_INTERVAL}s)...")
|
||||
while True:
|
||||
try:
|
||||
# Check process is alive
|
||||
if not os.path.exists(f'/proc/{self.pid}'):
|
||||
log.warning("WeChat process died, re-finding...")
|
||||
if not self.find_wechat():
|
||||
log.error("Cannot find WeChat, sleeping 30s...")
|
||||
time.sleep(30)
|
||||
continue
|
||||
log.info(f"Re-attached to PID: {self.pid}")
|
||||
|
||||
messages = self.scan_all()
|
||||
for msg in messages:
|
||||
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
|
||||
# self.forward_to_hermes(msg)
|
||||
|
||||
time.sleep(POLL_INTERVAL)
|
||||
except KeyboardInterrupt:
|
||||
log.info("Monitor stopped.")
|
||||
break
|
||||
except Exception as e:
|
||||
log.error(f"Monitor error: {e}")
|
||||
time.sleep(10)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="MoWeChat Message Monitor")
|
||||
parser.add_argument("--once", action="store_true", help="Scan once and exit")
|
||||
parser.add_argument("--foreground", action="store_true", help="Run in foreground")
|
||||
args = parser.parse_args()
|
||||
|
||||
monitor = WeChatMemoryMonitor()
|
||||
|
||||
if args.once:
|
||||
msgs = monitor.run(once=True)
|
||||
if msgs:
|
||||
print(json.dumps(msgs, ensure_ascii=False, indent=2))
|
||||
else:
|
||||
print("No new messages found.")
|
||||
return
|
||||
|
||||
monitor.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,343 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MoWeChat Message Monitor v2 — reads WeChat process memory to capture incoming messages.
|
||||
Improvements over v1:
|
||||
- Better message extraction: looks for content near known wxids with length-prefix structure
|
||||
- Content filtering: rejects binary garbage, only keeps real text
|
||||
- Tracks messages by content + wxid hash
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import hashlib
|
||||
import logging
|
||||
import argparse
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
|
||||
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
|
||||
|
||||
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
|
||||
HERMES_KEY = "hermes123"
|
||||
|
||||
POLL_INTERVAL = 3
|
||||
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
|
||||
|
||||
# Known message sender wxids (populated during scanning)
|
||||
OWN_WXID = "wxid_c0a6izmwd78y22" # 老爸 (莫语不语)
|
||||
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷自己
|
||||
INTERESTING_WXIDS = {OWN_WXID, BOT_WXID}
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(LOG_FILE),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
log = logging.getLogger("mowechat")
|
||||
|
||||
|
||||
def is_valid_text(s, min_ratio=0.5):
|
||||
"""Check if a string looks like real text (vs binary garbage)."""
|
||||
if len(s) < 2:
|
||||
return False
|
||||
# Count printable chars
|
||||
printable = 0
|
||||
for ch in s:
|
||||
if ch.isprintable() and (ch.isalpha() or ch.isspace() or ch.isdigit() or ch in '.,!?;:\'\"-()[]{}@#_/\\'):
|
||||
printable += 1
|
||||
return printable / max(len(s), 1) >= min_ratio
|
||||
|
||||
|
||||
def extract_strings(data, min_len=4):
|
||||
"""Extract readable strings from binary data."""
|
||||
result = []
|
||||
current = b''
|
||||
for b in data:
|
||||
if 32 <= b < 127 or b in (0x0a, 0x0d, 0x09):
|
||||
current += bytes([b])
|
||||
elif b >= 0x80: # Part of multi-byte UTF-8
|
||||
current += bytes([b])
|
||||
else:
|
||||
if len(current) >= min_len:
|
||||
try:
|
||||
decoded = current.decode('utf-8', errors='replace')
|
||||
if is_valid_text(decoded):
|
||||
result.append(decoded)
|
||||
except:
|
||||
pass
|
||||
current = b''
|
||||
if len(current) >= min_len:
|
||||
try:
|
||||
decoded = current.decode('utf-8', errors='replace')
|
||||
if is_valid_text(decoded):
|
||||
result.append(decoded)
|
||||
except:
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
class WeChatMemoryMonitor:
|
||||
"""Monitors WeChat process memory for new messages."""
|
||||
|
||||
def __init__(self):
|
||||
self.pid = None
|
||||
self.seen = self._load_seen()
|
||||
self.heap_region = None
|
||||
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
|
||||
# Known message sources
|
||||
self.known_wxids = set(INTERESTING_WXIDS)
|
||||
|
||||
def _load_seen(self):
|
||||
try:
|
||||
with open(SAWN_FILE, 'r') as f:
|
||||
return set(json.load(f))
|
||||
except:
|
||||
return set()
|
||||
|
||||
def _save_seen(self):
|
||||
trimmed = set(list(self.seen)[-2000:])
|
||||
try:
|
||||
with open(SAWN_FILE, 'w') as f:
|
||||
json.dump(list(trimmed), f)
|
||||
except:
|
||||
pass
|
||||
|
||||
def find_wechat(self):
|
||||
"""Find the main wechat process PID and heap region."""
|
||||
for p in os.listdir('/proc'):
|
||||
if not p.isdigit():
|
||||
continue
|
||||
try:
|
||||
with open(f'/proc/{p}/maps', 'r') as f:
|
||||
content = f.read(8192)
|
||||
if WECHAT_BINARY_MARKER in content:
|
||||
self.pid = int(p)
|
||||
# Find heap
|
||||
for line in content.split('\n'):
|
||||
if '[heap]' in line:
|
||||
parts = line.split()
|
||||
addr_range = parts[0].split('-')
|
||||
self.heap_region = (int(addr_range[0], 16), int(addr_range[1], 16))
|
||||
return True
|
||||
except:
|
||||
continue
|
||||
return False
|
||||
|
||||
def scan_message(self, mem, wxid_bytes, wxid_pos, wxid_end):
|
||||
"""Try to extract a real message following a wxid in memory."""
|
||||
wxid_str = wxid_bytes.decode('utf-8', errors='replace').strip('\x00')
|
||||
|
||||
# Search within 512 bytes after the wxid for message content
|
||||
search_start = wxid_end
|
||||
search_end = min(search_start + 512, self.heap_region[1] - self.heap_region[0] if self.heap_region else search_start + 512)
|
||||
|
||||
try:
|
||||
mem.seek(search_start)
|
||||
data = mem.read(search_end - search_start)
|
||||
except:
|
||||
return None
|
||||
|
||||
# Look for null-terminated strings that look like messages
|
||||
messages = []
|
||||
current = b''
|
||||
for b in data:
|
||||
if b == 0:
|
||||
if len(current) >= 3:
|
||||
try:
|
||||
text = current.decode('utf-8', errors='replace')
|
||||
# Filter: must have real text content
|
||||
if is_valid_text(text) and len(text) >= 2 and not text.startswith('wxid_'):
|
||||
messages.append(text)
|
||||
except:
|
||||
pass
|
||||
current = b''
|
||||
else:
|
||||
current += bytes([b])
|
||||
|
||||
# Also try to find message by looking for it at a known offset pattern
|
||||
# In WeChat's structure: [msg_type(4)] [svr_id(8)] [content_ptr(8)] [content_len(4)] [content...]
|
||||
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
# Pick the best candidate (longest, most printable)
|
||||
best = max(messages, key=lambda m: (len(m), sum(1 for c in m if c.isalpha() or c.isdigit())))
|
||||
return best
|
||||
|
||||
def scan_heap(self):
|
||||
"""Scan the heap region for messages."""
|
||||
if not self.heap_region:
|
||||
return []
|
||||
|
||||
start, end = self.heap_region
|
||||
messages = []
|
||||
|
||||
try:
|
||||
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
|
||||
# Read entire heap
|
||||
mem.seek(start)
|
||||
size = min(end - start, 50 * 1024 * 1024) # Max 50MB
|
||||
data = mem.read(size)
|
||||
|
||||
# Find all wxid occurrences
|
||||
for match in self.wxid_pattern.finditer(data):
|
||||
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
|
||||
pos = match.start()
|
||||
global_pos = start + pos
|
||||
|
||||
# Look for message content in the next 256 bytes
|
||||
content_area = data[pos + len(match.group()):pos + len(match.group()) + 256]
|
||||
|
||||
# Try to find a null-terminated UTF-8 string that looks like a message
|
||||
for cmatch in re.finditer(rb'([\x20-\x7e\x80-\xff\x00]{4,})', content_area):
|
||||
raw = cmatch.group(0)
|
||||
# Remove trailing nulls
|
||||
raw = raw.rstrip(b'\x00')
|
||||
if len(raw) < 3:
|
||||
continue
|
||||
|
||||
try:
|
||||
text = raw.decode('utf-8', errors='replace')
|
||||
except:
|
||||
continue
|
||||
|
||||
# FILTER: Must have substantial real text content
|
||||
alpha_count = sum(1 for c in text if c.isalpha() or '\u4e00' <= c <= '\u9fff')
|
||||
total_len = len(text)
|
||||
|
||||
if total_len < 3:
|
||||
continue
|
||||
|
||||
# Skip if it's just another wxid
|
||||
if text.startswith('wxid_'):
|
||||
continue
|
||||
|
||||
# Skip binary garbage (must be >= 40% alphabetic/CJK chars)
|
||||
if alpha_count / max(total_len, 1) < 0.3:
|
||||
continue
|
||||
|
||||
# Skip if it looks like a URL/path with no message content
|
||||
if text.startswith('http') or text.startswith('/'):
|
||||
continue
|
||||
|
||||
# Create hash for dedup
|
||||
msg_hash = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
|
||||
|
||||
if msg_hash not in self.seen:
|
||||
self.seen.add(msg_hash)
|
||||
messages.append({
|
||||
'wxid': wxid,
|
||||
'content': text,
|
||||
'pos': hex(global_pos),
|
||||
'alpha_ratio': f"{alpha_count/total_len:.2f}",
|
||||
})
|
||||
break # One best message per wxid occurrence
|
||||
|
||||
except (PermissionError, ProcessLookupError) as e:
|
||||
log.warning(f"Memory read failed: {e}")
|
||||
except Exception as e:
|
||||
log.error(f"Heap scan error: {e}")
|
||||
|
||||
return messages
|
||||
|
||||
def forward_to_hermes(self, msg):
|
||||
"""Forward to Hermes Gateway."""
|
||||
payload = json.dumps({
|
||||
"model": "nova-4",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You receive WeChat messages. Process according to standard pipeline."},
|
||||
{"role": "user", "content": f"[WeChat] From: {msg['wxid']}\n{msg['content']}"}
|
||||
]
|
||||
}).encode('utf-8')
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
HERMES_API, data=payload,
|
||||
headers={"Content-Type": "application/json", "Authorization": f"Bearer {HERMES_KEY}"},
|
||||
method="POST"
|
||||
)
|
||||
urllib.request.urlopen(req, timeout=3)
|
||||
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
|
||||
except Exception as e:
|
||||
log.warning(f"Forward failed: {e}")
|
||||
|
||||
def run(self, once=False):
|
||||
if not self.find_wechat():
|
||||
log.error("WeChat not found!")
|
||||
return []
|
||||
|
||||
log.info(f"WeChat PID: {self.pid}, heap: 0x{self.heap_region[0]:x}-0x{self.heap_region[1]:x}" if self.heap_region else f"PID: {self.pid}, no heap")
|
||||
|
||||
if once:
|
||||
messages = self.scan_heap()
|
||||
seen_wxids = set()
|
||||
for msg in messages:
|
||||
if msg['wxid'] not in seen_wxids:
|
||||
log.info(f" [{msg['wxid']}] {msg['content'][:80]}")
|
||||
seen_wxids.add(msg['wxid'])
|
||||
if messages:
|
||||
self._save_seen()
|
||||
return messages
|
||||
|
||||
log.info(f"Monitoring every {POLL_INTERVAL}s...")
|
||||
while True:
|
||||
try:
|
||||
if not os.path.exists(f'/proc/{self.pid}'):
|
||||
log.warning("WeChat died, re-finding...")
|
||||
if not self.find_wechat():
|
||||
time.sleep(30)
|
||||
continue
|
||||
|
||||
messages = self.scan_heap()
|
||||
for msg in messages:
|
||||
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
|
||||
# self.forward_to_hermes(msg)
|
||||
|
||||
if messages:
|
||||
self._save_seen()
|
||||
|
||||
time.sleep(POLL_INTERVAL)
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except Exception as e:
|
||||
log.error(f"Error: {e}")
|
||||
time.sleep(10)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--once", action="store_true")
|
||||
parser.add_argument("--foreground", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
monitor = WeChatMemoryMonitor()
|
||||
|
||||
if args.once:
|
||||
msgs = monitor.run(once=True)
|
||||
# Only show real messages (from known contacts or with good content)
|
||||
real_msgs = [m for m in msgs if m['wxid'] in INTERESTING_WXIDS or float(m.get('alpha_ratio', 0)) > 0.5]
|
||||
print(f"\nFound {len(msgs)} potential messages, {len(real_msgs)} from known contacts")
|
||||
for m in real_msgs:
|
||||
print(f" [{m['wxid']}] {m['content'][:100]}")
|
||||
print(f"\n(Total seen: {len(monitor.seen)})")
|
||||
return
|
||||
|
||||
monitor.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,206 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MoWeChat Message Monitor v3 — precise message extraction from WeChat heap.
|
||||
Strategy: look for message content near known wxids with structural patterns.
|
||||
"""
|
||||
|
||||
import os, re, sys, json, time, hashlib, struct, logging, argparse
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_v3.log")
|
||||
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v3.json")
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
|
||||
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
|
||||
log = logging.getLogger("mv3")
|
||||
|
||||
# Known wxids (discovered from memory)
|
||||
OWN_WXID = "wxid_c0a6izmwd78y22" # 莫语不语 (老爸)
|
||||
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷
|
||||
|
||||
INTERESTING = {OWN_WXID, BOT_WXID}
|
||||
|
||||
|
||||
class Monitor:
|
||||
def __init__(self):
|
||||
self.pid = None
|
||||
self.heap = (0, 0)
|
||||
self.seen = self._load_seen()
|
||||
self.wxid_re = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
|
||||
|
||||
def _load_seen(self):
|
||||
try:
|
||||
with open(SAWN_FILE) as f:
|
||||
return set(json.load(f))
|
||||
except:
|
||||
return set()
|
||||
|
||||
def _save_seen(self):
|
||||
s = set(list(self.seen)[-2000:])
|
||||
try:
|
||||
with open(SAWN_FILE, 'w') as f:
|
||||
json.dump(list(s), f)
|
||||
except:
|
||||
pass
|
||||
|
||||
def find_wechat(self):
|
||||
for p in os.listdir('/proc'):
|
||||
if not p.isdigit(): continue
|
||||
try:
|
||||
with open(f'/proc/{p}/maps') as f:
|
||||
c = f.read()
|
||||
if "/opt/wechat/wechat" in c:
|
||||
self.pid = int(p)
|
||||
for line in c.split('\n'):
|
||||
if '[heap]' in line:
|
||||
a = line.split()[0].split('-')
|
||||
self.heap = (int(a[0], 16), int(a[1], 16))
|
||||
return True
|
||||
except:
|
||||
continue
|
||||
return False
|
||||
|
||||
def is_valid_msg(self, text):
|
||||
"""Check if text is a real WeChat message."""
|
||||
if len(text) < 2 or len(text) > 5000:
|
||||
return False
|
||||
if text.startswith('wxid_') or text.startswith('http') or text.startswith('/'):
|
||||
return False
|
||||
# Count CJK + ASCII letters + digits
|
||||
good = sum(1 for c in text if c.isalpha() or c.isdigit() or c.isspace() or c in '.,!?;:\'"-()[]{}@#_/\\')
|
||||
if good / max(len(text), 1) < 0.6:
|
||||
return False
|
||||
# Must have at least 3 CJK chars OR 5 ASCII chars
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
ascii_alpha = sum(1 for c in text if c.isascii() and c.isalpha())
|
||||
return cjk >= 2 or ascii_alpha >= 4
|
||||
|
||||
def scan(self):
|
||||
start, end = self.heap
|
||||
if not start:
|
||||
return []
|
||||
|
||||
try:
|
||||
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
|
||||
mem.seek(start)
|
||||
data = mem.read(min(end - start, 60 * 1024 * 1024))
|
||||
except:
|
||||
return []
|
||||
|
||||
results = []
|
||||
|
||||
# Strategy: find wxid -> look for a nearby null-terminated UTF-8 string
|
||||
# that looks like real message content
|
||||
|
||||
# First pass: find all wxid positions in a bounded range
|
||||
for m in self.wxid_re.finditer(data):
|
||||
wxid = m.group(0).decode().strip('\x00')
|
||||
if wxid not in INTERESTING:
|
||||
continue
|
||||
|
||||
pos = m.end() # position after wxid\0
|
||||
# Scan forward up to 256 bytes for a printable string
|
||||
scan_end = min(pos + 256, len(data))
|
||||
chunk = data[pos:scan_end]
|
||||
|
||||
# Find the first null-terminated ASCII/UTF-8 string
|
||||
# that's at least 3 chars and not binary garbage
|
||||
i = 0
|
||||
while i < len(chunk):
|
||||
if chunk[i] == 0:
|
||||
i += 1
|
||||
continue
|
||||
# Start of potential string
|
||||
s_start = i
|
||||
while i < len(chunk) and chunk[i] != 0 and chunk[i] >= 0x20:
|
||||
i += 1
|
||||
s_len = i - s_start
|
||||
if s_len >= 3:
|
||||
try:
|
||||
text = chunk[s_start:s_start+s_len].decode('utf-8', errors='replace')
|
||||
if self.is_valid_msg(text):
|
||||
h = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
|
||||
if h not in self.seen:
|
||||
self.seen.add(h)
|
||||
results.append({'wxid': wxid, 'text': text})
|
||||
except:
|
||||
pass
|
||||
# Skip null
|
||||
while i < len(chunk) and chunk[i] == 0:
|
||||
i += 1
|
||||
|
||||
# Second strategy: scan heap for standalone CJK strings >= 4 chars
|
||||
# that are NOT preceded by known binary patterns (to catch the actual
|
||||
# message content which may be at a different address than the wxid)
|
||||
for cm in re.finditer(rb'([\x80-\xff][\x80-\xff][\x80-\xff][\x80-\xff])', data):
|
||||
pos = cm.start()
|
||||
# Read up to 200 bytes from here
|
||||
snippet = data[pos:pos+200]
|
||||
# Find first null byte
|
||||
null_pos = snippet.find(b'\x00')
|
||||
if null_pos > 0:
|
||||
snippet = snippet[:null_pos]
|
||||
if len(snippet) < 4:
|
||||
continue
|
||||
try:
|
||||
text = snippet.decode('utf-8', errors='replace')
|
||||
except:
|
||||
continue
|
||||
|
||||
# Only accept strings with substantial CJK content
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
if cjk < 2:
|
||||
continue
|
||||
if len(text) > 200:
|
||||
text = text[:200]
|
||||
|
||||
h = hashlib.md5(f"cjk:{text}".encode()).hexdigest()
|
||||
if h not in self.seen and self.is_valid_msg(text):
|
||||
self.seen.add(h)
|
||||
results.append({'wxid': 'unknown', 'text': text})
|
||||
|
||||
if results:
|
||||
self._save_seen()
|
||||
return results
|
||||
|
||||
def run(self, once=False):
|
||||
if not self.find_wechat():
|
||||
log.error("WeChat not found")
|
||||
return []
|
||||
log.info(f"PID {self.pid}, heap 0x{self.heap[0]:x}")
|
||||
|
||||
if once:
|
||||
msgs = self.scan()
|
||||
for m in msgs:
|
||||
log.info(f" [{m['wxid']}] {m['text'][:80]}")
|
||||
return msgs
|
||||
|
||||
while True:
|
||||
try:
|
||||
if not os.path.exists(f'/proc/{self.pid}'):
|
||||
log.warning("WeChat died")
|
||||
if not self.find_wechat():
|
||||
time.sleep(30)
|
||||
continue
|
||||
msgs = self.scan()
|
||||
for m in msgs:
|
||||
log.info(f"NEW [{m['wxid']}] {m['text'][:80]}")
|
||||
time.sleep(3)
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except:
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
m = Monitor()
|
||||
if '--once' in sys.argv:
|
||||
msgs = m.run(once=True)
|
||||
print(f"\nFound {len(msgs)} messages")
|
||||
for msg in msgs:
|
||||
print(f" [{msg['wxid']}] {msg['text']}")
|
||||
else:
|
||||
m.run()
|
||||
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python3
|
||||
"""MoWeChat v4 — efficient heap scanner for WeChat messages."""
|
||||
|
||||
import os, re, sys, json, time, hashlib, logging
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
LOG_FILE = os.path.join(LOG_DIR, "wechat_v4.log")
|
||||
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v4.json")
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
|
||||
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
|
||||
log = logging.getLogger("wc4")
|
||||
|
||||
# Known wxids
|
||||
WXID_DAD = "wxid_c0a6izmwd78y22"
|
||||
WXID_MOHE = "wxid_7onnerpx2s2l22"
|
||||
KNOWN = {WXID_DAD, WXID_MOHE}
|
||||
|
||||
|
||||
def heap_of(pid):
|
||||
"""Get heap address range for a PID."""
|
||||
with open(f'/proc/{pid}/maps') as f:
|
||||
for line in f:
|
||||
if '[heap]' in line:
|
||||
parts = line.split()
|
||||
a, b = parts[0].split('-')
|
||||
return int(a, 16), int(b, 16)
|
||||
return None, None
|
||||
|
||||
|
||||
def read_heap(pid):
|
||||
"""Read the heap memory."""
|
||||
start, end = heap_of(pid)
|
||||
if not start:
|
||||
return None, None, None
|
||||
with open(f'/proc/{pid}/mem', 'rb') as f:
|
||||
f.seek(start)
|
||||
data = f.read(min(end - start, 40 * 1024 * 1024))
|
||||
return data, start, end
|
||||
|
||||
|
||||
def find_messages(data, heap_start):
|
||||
"""Extract messages from heap data efficiently."""
|
||||
msgs = []
|
||||
seen = set()
|
||||
|
||||
# Pattern: known wxid followed by content
|
||||
for wxid in KNOWN:
|
||||
pattern = wxid.encode() + b'\x00'
|
||||
pos = 0
|
||||
limit = 0
|
||||
while limit < 100:
|
||||
idx = data.find(pattern, pos)
|
||||
if idx < 0:
|
||||
break
|
||||
limit += 1
|
||||
|
||||
after = idx + len(pattern)
|
||||
snippet = data[after:after+300]
|
||||
|
||||
i = 0
|
||||
while i < len(snippet):
|
||||
if snippet[i] == 0:
|
||||
i += 1
|
||||
continue
|
||||
s = i
|
||||
while i < len(snippet) and snippet[i] != 0:
|
||||
i += 1
|
||||
if i - s < 3:
|
||||
i += 1
|
||||
continue
|
||||
try:
|
||||
text = snippet[s:i].decode('utf-8', errors='replace')
|
||||
except:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Score: CJK chars + ASCII letters
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
alpha = sum(1 for c in text if c.isascii() and c.isalpha())
|
||||
score = cjk * 3 + alpha
|
||||
|
||||
if score < 5:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
h = hashlib.md5(text.encode()).hexdigest()
|
||||
if h not in seen:
|
||||
seen.add(h)
|
||||
msgs.append((text, wxid, heap_start + after + s))
|
||||
break
|
||||
|
||||
pos = idx + 1
|
||||
|
||||
# Also scan for the \xNNcontent pattern directly (faster catch-all)
|
||||
# Pattern: [1 byte length/bufsize][readable text]
|
||||
i = 0
|
||||
limit2 = 0
|
||||
while i < len(data) - 10 and limit2 < 500:
|
||||
b = data[i]
|
||||
# Possible prefix: small values (0x04-0x40 = 4-64, common buffer sizes)
|
||||
if 0x04 <= b <= 0x40:
|
||||
# Check if what follows looks like text
|
||||
j = i + 1
|
||||
text_bytes = bytearray()
|
||||
while j < len(data) and j - i - 1 < b and data[j] != 0:
|
||||
if data[j] >= 0x20:
|
||||
text_bytes.append(data[j])
|
||||
j += 1
|
||||
else:
|
||||
break
|
||||
if len(text_bytes) >= 5:
|
||||
try:
|
||||
text = text_bytes.decode('utf-8', errors='replace')
|
||||
except:
|
||||
i += 1
|
||||
continue
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
|
||||
alpha = sum(1 for c in text if c.isascii() and c.isalpha())
|
||||
if (cjk >= 2 or alpha >= 6) and text[0] not in '&/\\.':
|
||||
h = hashlib.md5(('any:' + text).encode()).hexdigest()
|
||||
if h not in seen:
|
||||
# Find which wxid is near this text (search backward)
|
||||
nearby = data[max(0, i-200):i]
|
||||
wxid_match = re.search(rb'wxid_[a-zA-Z0-9]{10,28}', nearby)
|
||||
owner = wxid_match.group(0).decode() if wxid_match else 'unknown'
|
||||
seen.add(h)
|
||||
msgs.append((text, owner, heap_start + i))
|
||||
limit2 += 1
|
||||
i += 1
|
||||
|
||||
return msgs
|
||||
|
||||
|
||||
def find_wechat():
|
||||
"""Find the main wechat process PID."""
|
||||
for p in os.listdir('/proc'):
|
||||
if not p.isdigit():
|
||||
continue
|
||||
try:
|
||||
with open(f'/proc/{p}/maps') as f:
|
||||
if "/opt/wechat/wechat" in f.read():
|
||||
return int(p)
|
||||
except:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
pid = find_wechat()
|
||||
if not pid:
|
||||
log.error("WeChat not found")
|
||||
return 1
|
||||
|
||||
log.info(f"Found PID {pid}")
|
||||
|
||||
data, hs, he = read_heap(pid)
|
||||
|
||||
if data is None:
|
||||
log.error("Cannot read heap")
|
||||
return 1
|
||||
|
||||
log.info(f"Heap: 0x{hs:x}-0x{he:x} ({len(data)} bytes)")
|
||||
|
||||
msgs = find_messages(data, hs)
|
||||
|
||||
# Deduplicate
|
||||
seen_texts = set()
|
||||
for text, wxid, addr in msgs:
|
||||
key = text.strip()
|
||||
if key not in seen_texts and len(key) >= 2:
|
||||
seen_texts.add(key)
|
||||
print(f"{wxid}: {text}")
|
||||
|
||||
if not msgs:
|
||||
print("No messages found")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,17 @@
|
||||
[Unit]
|
||||
Description=MoWeChat — 莫荷微信 Bot (Linux iLink 版)
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=hmo
|
||||
WorkingDirectory=/home/hmo/projects/AgentsMeeting/gateway/linux
|
||||
ExecStart=/home/hmo/projects/AgentsMeeting/.venv/bin/python3 /home/hmo/projects/AgentsMeeting/gateway/linux/wechat_agent.py
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
StandardOutput=append:/home/hmo/projects/AgentsMeeting/logs/wechat_agent_linux.log
|
||||
StandardError=append:/home/hmo/projects/AgentsMeeting/logs/wechat_agent_linux.log
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -0,0 +1,703 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MoWeChat — 莫荷微信 Bot (Linux iLink 版)
|
||||
|
||||
替换方案:将 Windows wxhelper DLL 注入方案替换为腾讯官方 iLink Bot API。
|
||||
架构:微信 → iLink Bot API (ilinkai.weixin.qq.com) → Hermes Gateway (:8642)
|
||||
|
||||
依赖:
|
||||
pip install weixin-bot-sdk aiohttp requests
|
||||
|
||||
首次运行:
|
||||
python3 wechat_agent.py
|
||||
终端显示二维码 → 用莫荷的手机微信扫码 → 凭证保存后自动运行
|
||||
|
||||
版本: 1.0
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from weixin_bot import WeixinBot, IncomingMessage
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR) # gateway/
|
||||
AGENTS_ROOT = os.path.dirname(PROJECT_ROOT) # AgentsMeeting/
|
||||
LOG_DIR = os.path.join(AGENTS_ROOT, "logs")
|
||||
TEMP_DIR = os.path.join(AGENTS_ROOT, "temp")
|
||||
|
||||
# Hermes Gateway — same endpoint as Windows wechat_agent.py uses
|
||||
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
|
||||
HERMES_KEY = "hermes123"
|
||||
|
||||
# Mohe's iLink Bot user_id (set after login)
|
||||
MOHE_USER_ID = None
|
||||
|
||||
# ── Setup ──────────────────────────────────────────────────────
|
||||
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
os.makedirs(TEMP_DIR, exist_ok=True)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
handlers=[
|
||||
logging.FileHandler(os.path.join(LOG_DIR, "wechat_agent_linux.log"), encoding="utf-8"),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
log = logging.getLogger("mohe")
|
||||
|
||||
# ── PID lock ───────────────────────────────────────────────────
|
||||
|
||||
PID_FILE = os.path.join(TEMP_DIR, "wechat_agent_linux.pid")
|
||||
|
||||
|
||||
def acquire_pid_lock():
|
||||
"""Write PID file, exit if another instance is running."""
|
||||
if os.path.exists(PID_FILE):
|
||||
try:
|
||||
with open(PID_FILE) as f:
|
||||
old_pid = int(f.read().strip())
|
||||
# Check if process still exists
|
||||
os.kill(old_pid, 0)
|
||||
log.error(f"Another instance running (PID {old_pid}). Exiting.")
|
||||
sys.exit(1)
|
||||
except (ProcessLookupError, ValueError):
|
||||
pass # Stale PID file
|
||||
with open(PID_FILE, "w") as f:
|
||||
f.write(str(os.getpid()))
|
||||
|
||||
|
||||
def release_pid_lock():
|
||||
try:
|
||||
os.remove(PID_FILE)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# ── Hermes Gateway API ─────────────────────────────────────────
|
||||
|
||||
import requests as _requests
|
||||
|
||||
|
||||
def call_hermes(wxid_or_user: str, content: str) -> str | None:
|
||||
"""
|
||||
Send message to Hermes Gateway, get reply.
|
||||
Returns the reply text, or None if silent/no reply.
|
||||
"""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {HERMES_KEY}",
|
||||
"X-Hermes-Session-Id": "sisyphus",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
is_group = "@chatroom" in wxid_or_user
|
||||
system_prompt = (
|
||||
"你是莫荷,女生。群聊中回复要短。"
|
||||
if is_group
|
||||
else "你是莫荷,女生。回复简短自然,像朋友聊天。"
|
||||
)
|
||||
|
||||
# Prefix with sender info like the Windows version does
|
||||
chat_type = "Group" if is_group else "Private"
|
||||
prefixed = f"[{chat_type}][{wxid_or_user}] {content}"
|
||||
|
||||
body = {
|
||||
"model": "hermes-agent",
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prefixed},
|
||||
],
|
||||
}
|
||||
|
||||
try:
|
||||
r = _requests.post(
|
||||
HERMES_API,
|
||||
json=body,
|
||||
headers=headers,
|
||||
timeout=60,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
data = r.json()
|
||||
choice = data["choices"][0]
|
||||
finish_reason = choice.get("finish_reason", "")
|
||||
if finish_reason == "silent":
|
||||
log.info("Hermes: __SILENT__ (group skip)")
|
||||
return None
|
||||
return choice["message"]["content"]
|
||||
log.warning(f"Hermes HTTP {r.status_code}: {r.text[:200]}")
|
||||
except Exception as e:
|
||||
log.error(f"Hermes API error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ── Image Generation (SenseNova) ───────────────────────────────
|
||||
|
||||
SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO"
|
||||
SENSENOVA_URL = "https://token.sensenova.cn/v1"
|
||||
|
||||
|
||||
def generate_image(prompt: str, ratio: str = "1:1") -> str | None:
|
||||
"""Generate image via SenseNova API. Returns URL or None."""
|
||||
size_map = {
|
||||
"1:1": "2048x2048", "16:9": "2752x1536", "9:16": "1536x2752",
|
||||
"3:2": "2496x1664", "2:3": "1664x2496", "3:4": "1760x2368", "4:3": "2368x1760",
|
||||
}
|
||||
size = size_map.get(ratio, "2048x2048")
|
||||
try:
|
||||
r = _requests.post(
|
||||
SENSENOVA_URL + "/images/generations",
|
||||
json={
|
||||
"model": "sensenova-u1-fast",
|
||||
"prompt": prompt,
|
||||
"size": size,
|
||||
"response_format": "url",
|
||||
},
|
||||
headers={
|
||||
"Authorization": f"Bearer {SENSENOVA_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout=180,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
return r.json()["data"][0]["url"]
|
||||
log.warning(f"Image gen HTTP {r.status_code}: {r.text[:200]}")
|
||||
except Exception as e:
|
||||
log.error(f"Image gen error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ── OCR via Doubao Vision API ──────────────────────────────────
|
||||
|
||||
DOUBAO_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3"
|
||||
|
||||
|
||||
def ocr_image_from_url(img_url: str) -> str | None:
|
||||
"""Download image from URL and OCR it. Returns text or None."""
|
||||
try:
|
||||
r = _requests.get(img_url, timeout=30)
|
||||
if r.status_code != 200:
|
||||
log.warning(f"Image download HTTP {r.status_code}")
|
||||
return None
|
||||
import base64
|
||||
b64 = base64.b64encode(r.content).decode()
|
||||
return _ocr_base64(b64)
|
||||
except Exception as e:
|
||||
log.error(f"Image download/OCR error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _ocr_base64(b64_data: str) -> str | None:
|
||||
"""OCR from base64-encoded image data."""
|
||||
try:
|
||||
r = _requests.post(
|
||||
"https://ark.cn-beijing.volces.com/api/coding/v3/chat/completions",
|
||||
json={
|
||||
"model": "doubao-seed-code",
|
||||
"messages": [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "请识别这张图片中的所有中文和英文字符,保持原文输出,包括数字、表格、百分比的完整结构。严格逐行逐列输出所有数据,不要省略、不要总结。"},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64_data}"}},
|
||||
],
|
||||
}],
|
||||
},
|
||||
headers={"Authorization": f"Bearer {DOUBAO_KEY}", "Content-Type": "application/json"},
|
||||
timeout=60,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
text = r.json()["choices"][0]["message"]["content"].strip()
|
||||
log.info(f"OCR OK ({len(text)} chars)")
|
||||
return text
|
||||
log.warning(f"OCR HTTP {r.status_code}: {r.text[:200]}")
|
||||
except Exception as e:
|
||||
log.error(f"OCR error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ── Article Processor ──────────────────────────────────────────
|
||||
|
||||
def fetch_article(url: str) -> dict | None:
|
||||
"""Fetch article content via local article_processor (:5810)."""
|
||||
try:
|
||||
import urllib.request as ur
|
||||
req = ur.Request(
|
||||
"http://127.0.0.1:5810/process",
|
||||
data=json.dumps({"url": url}).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
with ur.urlopen(req, timeout=180) as resp:
|
||||
result = json.loads(resp.read().decode("utf-8"))
|
||||
if result.get("status") == "ok":
|
||||
return {
|
||||
"title": result.get("title", ""),
|
||||
"content": result.get("content", "")[:3000],
|
||||
"images": result.get("images_ocr", 0),
|
||||
}
|
||||
log.warning(f"Article processor error: {result.get('error','')[:100]}")
|
||||
except Exception as e:
|
||||
log.error(f"Article fetch error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ── Main Message Handler ──────────────────────────────────────
|
||||
|
||||
def detect_group(msg: IncomingMessage) -> str:
|
||||
"""
|
||||
Detect if a message is from a group chat.
|
||||
Returns the group_id if it's a group, or empty string if private chat.
|
||||
"""
|
||||
raw = msg.raw
|
||||
# Check group_id field directly from protocol
|
||||
gid = raw.get("group_id", "") or ""
|
||||
if gid:
|
||||
return gid
|
||||
# Also check session_id for group patterns (contains 'chatroom' or multiple '@')
|
||||
session_id = raw.get("session_id", "") or ""
|
||||
if "@chatroom" in session_id:
|
||||
return session_id.split("#")[0] if "#" in session_id else session_id
|
||||
return ""
|
||||
|
||||
|
||||
def extract_article_url(text: str) -> str | None:
|
||||
"""Extract article URL from text. Supports WeChat public accounts, Xiaohongshu, and common share links."""
|
||||
patterns = [
|
||||
r"https?://mp\.weixin\.qq\.com[^\"'\s<)<>\[\]]+",
|
||||
r"https?://(?:www\.)?xiaohongshu\.com[^\"'\s<)<>\[\]]+",
|
||||
r"https?://xhslink\.com[^\"'\s<)<>\[\]]+",
|
||||
r"https?://(?:www\.)?zhihu\.com[^\"'\s<)<>\[\]]+",
|
||||
]
|
||||
for p in patterns:
|
||||
m = re.search(p, text)
|
||||
if m:
|
||||
return m.group(0)
|
||||
return None
|
||||
|
||||
|
||||
async def handle_incoming(msg: IncomingMessage, bot: WeixinBot):
|
||||
"""Process an incoming message from WeChat via iLink SDK."""
|
||||
log.info(f"[{msg.type}] {msg.user_id}: {(msg.text or '')[:80]}")
|
||||
|
||||
fu = msg.user_id
|
||||
content = msg.text or ""
|
||||
msg_type = msg.type
|
||||
|
||||
# ── Group chat detection ──
|
||||
group_id = detect_group(msg)
|
||||
if group_id:
|
||||
log.info(f"GROUP message from {group_id}, sender={fu}")
|
||||
# Use the group_id as the conversation identifier (like Windows version uses roomid)
|
||||
conv_id = group_id
|
||||
prefix = "[Group]"
|
||||
else:
|
||||
conv_id = fu
|
||||
prefix = "[Private]"
|
||||
|
||||
# ── TEXT message ──
|
||||
if msg_type == "text":
|
||||
handler_input = content
|
||||
|
||||
# Check for ref_msg (forwarded/quoted content like articles)
|
||||
ref_text = ""
|
||||
for item in msg.raw.get("item_list", []):
|
||||
ref_msg = item.get("ref_msg")
|
||||
if ref_msg:
|
||||
ref_title = ref_msg.get("title", "")
|
||||
ref_item = ref_msg.get("message_item", {})
|
||||
if isinstance(ref_item, dict) and ref_item.get("type") == 1:
|
||||
ref_text_data = ref_item.get("text_item", {}).get("text", "")
|
||||
if ref_text_data:
|
||||
ref_text = f"\n[引用消息] {ref_text_data[:500]}"
|
||||
if ref_title:
|
||||
handler_input = f"[老莫转发了一篇文章] 标题: {ref_title}\n\n{content}{ref_text}"
|
||||
|
||||
# Detect article URLs (WeChat public account, Xiaohongshu, etc.)
|
||||
article_url = extract_article_url(handler_input)
|
||||
if article_url:
|
||||
log.info(f"Article URL detected: {article_url[:80]}")
|
||||
article = fetch_article(article_url)
|
||||
if article:
|
||||
title = article.get("title", "")
|
||||
article_text = article.get("content", "")[:2000]
|
||||
images = article.get("images", 0)
|
||||
handler_input = (
|
||||
f"[老莫转发了一篇文章]\n标题: {title}\n"
|
||||
+ (f"({images}张图片已OCR)\n" if images else "")
|
||||
+ f"\n{article_text}"
|
||||
)
|
||||
else:
|
||||
# Article processor failed, send original content
|
||||
log.warning("Article processor returned no content, sending raw text")
|
||||
|
||||
reply = call_hermes(conv_id, handler_input)
|
||||
if reply and reply.strip():
|
||||
await process_reply(reply, conv_id, bot)
|
||||
|
||||
# ── IMAGE message ──
|
||||
elif msg_type == "image":
|
||||
log.info(f"Image from {conv_id}, attempting OCR...")
|
||||
|
||||
# Get image URL from the raw message
|
||||
img_url = None
|
||||
for item in msg.raw.get("item_list", []):
|
||||
if item.get("type") in (2,): # IMAGE
|
||||
img_item = item.get("image_item", {})
|
||||
img_url = img_item.get("url", "")
|
||||
if not img_url:
|
||||
# CDN media - download via CDN API
|
||||
media = img_item.get("media", {})
|
||||
if media:
|
||||
log.info("Image has CDN media, attempting CDN download...")
|
||||
img_url = download_cdn_image(media)
|
||||
break
|
||||
|
||||
ocr_text = None
|
||||
if img_url:
|
||||
ocr_text = ocr_image_from_url(img_url)
|
||||
if not ocr_text:
|
||||
log.warning("OCR returned no text from image URL")
|
||||
else:
|
||||
log.info("No image URL available, cannot OCR")
|
||||
|
||||
if ocr_text:
|
||||
handler_input = f"[老莫发送了一张图片,OCR识别结果如下]\n{ocr_text}"
|
||||
else:
|
||||
handler_input = "[老莫发送了一张图片,但无法识别图片内容]"
|
||||
|
||||
reply = call_hermes(conv_id, handler_input)
|
||||
if reply and reply.strip():
|
||||
await bot.reply(msg, reply.strip())
|
||||
|
||||
# ── VOICE message ──
|
||||
elif msg_type == "voice":
|
||||
reply = call_hermes(conv_id, "[voice message]")
|
||||
if reply and reply.strip():
|
||||
await bot.reply(msg, reply.strip())
|
||||
|
||||
# ── Unknown type ──
|
||||
else:
|
||||
log.info(f"Unhandled message type: {msg_type}")
|
||||
|
||||
|
||||
def download_cdn_image(media: dict) -> str | None:
|
||||
"""
|
||||
Download an image from WeChat CDN using the iLink media protocol.
|
||||
The media dict contains aes_key and encrypt_query_param for AES-128-ECB decryption.
|
||||
For now, this is a placeholder - CDN download requires AES decryption.
|
||||
"""
|
||||
logger = log
|
||||
logger.info(f"CDN media available but direct download not yet implemented")
|
||||
logger.debug(f"CDN media keys: {list(media.keys())}")
|
||||
return None
|
||||
|
||||
|
||||
async def process_reply(reply: str, fu: str, bot: WeixinBot):
|
||||
"""
|
||||
Process Hermes reply text, handling tags like [FILE], [IMG], [EMOJI].
|
||||
Uses bot.reply(msg) or bot.send(user_id, text) based on context.
|
||||
"""
|
||||
if not reply or not reply.strip():
|
||||
return
|
||||
|
||||
clean = reply
|
||||
|
||||
# ── [FILE] tag ──
|
||||
fm = re.search(r'\[FILE\](.*?)\[/FILE\]', clean)
|
||||
if fm:
|
||||
url = fm.group(1).strip()
|
||||
log.info(f"[FILE] URL: {url}")
|
||||
# iLink doesn't natively support file sending via simple URL.
|
||||
# Send as text with download link for now.
|
||||
clean = re.sub(r'\s*\[FILE\].*?\[/FILE\]\s*', '', clean).strip()
|
||||
extra = f"\n文件链接: {url}"
|
||||
if clean.strip():
|
||||
clean += extra
|
||||
else:
|
||||
clean = f"文件: {url}"
|
||||
|
||||
# ── [IMG] tag ──
|
||||
im = re.search(r'\[IMG\](.*?)\[/IMG\]', clean)
|
||||
if im:
|
||||
cmd = im.group(1).strip()
|
||||
clean = re.sub(r'\s*\[IMG\].*?\[/IMG\]\s*', '', clean).strip()
|
||||
|
||||
if cmd.startswith("generate:") or cmd.startswith("draw:"):
|
||||
parts = cmd.split(":", 1)[1].strip()
|
||||
ratio = "1:1"
|
||||
if "|" in parts:
|
||||
ratio = parts.split("|")[1].strip()
|
||||
prompt = parts.split("|")[0].strip()
|
||||
else:
|
||||
prompt = parts
|
||||
log.info(f"[IMG] generate: {prompt[:40]} [{ratio}]")
|
||||
img_url = generate_image(prompt, ratio)
|
||||
if img_url:
|
||||
# For images, we can't send via iLink natively yet.
|
||||
# Send the URL as a text message.
|
||||
extra = f"\n图片已生成: {img_url}"
|
||||
if clean.strip():
|
||||
clean += extra
|
||||
else:
|
||||
clean = f"图片已生成: {img_url}"
|
||||
else:
|
||||
extra = "\n[图片生成失败]"
|
||||
clean += extra if clean.strip() else extra.strip()
|
||||
else:
|
||||
# Regular image URL
|
||||
extra = f"\n图片: {cmd}"
|
||||
if clean.strip():
|
||||
clean += extra
|
||||
else:
|
||||
clean = f"图片: {cmd}"
|
||||
|
||||
# ── [EMOJI] tag ──
|
||||
em = re.search(r'\[EMOJI\](.*?)\[/EMOJI\]', clean)
|
||||
if em:
|
||||
url = em.group(1).strip()
|
||||
log.info(f"[EMOJI] URL: {url}")
|
||||
clean = re.sub(r'\s*\[EMOJI\].*?\[/EMOJI\]\s*', '', clean).strip()
|
||||
|
||||
# ── [CONTACT] tag — not supported via iLink ──
|
||||
clean = re.sub(r'\s*\[CONTACT:\w+\]\s*', '', clean)
|
||||
|
||||
# ── [ROOM_MEMBERS] tag — not supported via iLink ──
|
||||
clean = re.sub(r'\s*\[ROOM_MEMBERS:\S+\]\s*', '', clean)
|
||||
|
||||
# ── [HISTORY] tag — not supported via iLink ──
|
||||
clean = re.sub(r'\s*\[HISTORY:\S+?:\d+\]\s*', '', clean)
|
||||
|
||||
# ── [PAT] tag — not supported via iLink ──
|
||||
clean = re.sub(r'\s*\[PAT:\S+:\S+\]\s*', '', clean)
|
||||
|
||||
# ── Send remaining text ──
|
||||
clean = clean.strip()
|
||||
if clean:
|
||||
await bot.send(fu, clean)
|
||||
|
||||
|
||||
# ── 5801 HTTP Server ──────────────────────────────────────────
|
||||
# Receives messages from Hermes/xxm to forward to WeChat.
|
||||
# Uses asyncio queue to bridge sync HTTP → async iLink SDK.
|
||||
|
||||
_message_queue: asyncio.Queue[dict] = asyncio.Queue()
|
||||
_http_server_ready = threading.Event()
|
||||
|
||||
|
||||
class HermesMsgHandler(BaseHTTPRequestHandler):
|
||||
"""HTTP server that receives Hermes messages and queues them for iLink sending."""
|
||||
|
||||
def do_POST(self):
|
||||
body = self.rfile.read(int(self.headers.get("Content-Length", 0)))
|
||||
try:
|
||||
d = json.loads(body)
|
||||
log.info(f"5801 POST: {json.dumps(d, ensure_ascii=False)[:200]}")
|
||||
|
||||
to = d.get("to", "") or d.get("wxid", "")
|
||||
msg = d.get("message", "") or d.get("content", "")
|
||||
path = self.path
|
||||
|
||||
# History query endpoint
|
||||
if path in ("/history", "/api/chatHistory"):
|
||||
wxid = d.get("wxid", "") or d.get("to", "")
|
||||
count = d.get("count", 10) or d.get("limit", 10)
|
||||
self._send_json({
|
||||
"ok": True,
|
||||
"note": "iLink SDK does not support history query. Use Hermes session_search instead.",
|
||||
"messages": [],
|
||||
})
|
||||
return
|
||||
|
||||
# Stop endpoint
|
||||
if path == "/stop":
|
||||
_message_queue.put_nowait({"type": "stop"})
|
||||
self._send_json({"ok": True, "status": "stopped"})
|
||||
return
|
||||
|
||||
# Queue the message for async processing (queue consumed by the iLink message loop)
|
||||
_message_queue.put_nowait({
|
||||
"type": "outgoing",
|
||||
"to": to,
|
||||
"message": msg,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"5801 handler error: {e}")
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self):
|
||||
parsed = urlparse(self.path)
|
||||
if parsed.path == "/health":
|
||||
self._send_json({
|
||||
"ok": True,
|
||||
"platform": "linux-ilink",
|
||||
"mohe_user_id": MOHE_USER_ID or "not_logged_in",
|
||||
})
|
||||
return
|
||||
if parsed.path in ("/history", "/api/chatHistory"):
|
||||
import urllib.parse as up
|
||||
params = up.parse_qs(parsed.query)
|
||||
wxid = params.get("wxid", [""])[0]
|
||||
count = params.get("count", ["10"])[0]
|
||||
self._send_json({
|
||||
"ok": True,
|
||||
"note": "iLink SDK does not support history query.",
|
||||
"messages": [],
|
||||
})
|
||||
return
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(b'{"ok":true}')
|
||||
|
||||
def _send_json(self, data):
|
||||
body = json.dumps(data, ensure_ascii=False).encode("utf-8")
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json; charset=utf-8")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def log_message(self, *a):
|
||||
pass
|
||||
|
||||
|
||||
def start_http_server():
|
||||
"""Start the sync HTTP server in a daemon thread."""
|
||||
server = HTTPServer(("0.0.0.0", 5801), HermesMsgHandler)
|
||||
t = threading.Thread(target=server.serve_forever, daemon=True)
|
||||
t.start()
|
||||
log.info("HTTP :5801 ready (Hermes message receiver)")
|
||||
_http_server_ready.set()
|
||||
return server
|
||||
|
||||
|
||||
# ── Queue Consumer ────────────────────────────────────────────
|
||||
# Consumes the 5801 message queue inside the iLink event loop.
|
||||
|
||||
async def consume_outgoing_queue(bot: WeixinBot):
|
||||
"""
|
||||
Async task: consumes messages from 5801 queue and sends via iLink.
|
||||
Runs inside the same event loop as the iLink bot.
|
||||
"""
|
||||
while True:
|
||||
try:
|
||||
item = await _message_queue.get()
|
||||
msg_type = item.get("type")
|
||||
|
||||
if msg_type == "stop":
|
||||
log.info("Queue consumer: stop signal received")
|
||||
bot.stop()
|
||||
break
|
||||
|
||||
if msg_type == "outgoing":
|
||||
to = item.get("to", "")
|
||||
msg = item.get("message", "")
|
||||
if to and msg:
|
||||
try:
|
||||
await bot.send(to, msg)
|
||||
log.info(f"5801 -> {to}: {msg[:80]}")
|
||||
except RuntimeError as e:
|
||||
log.warning(f"5801 send failed (no context token for {to}): {e}")
|
||||
except Exception as e:
|
||||
log.error(f"5801 send error: {e}")
|
||||
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
log.error(f"Queue consumer error: {e}")
|
||||
|
||||
|
||||
# ── Outbound Message Router (replacement for session_router) ──
|
||||
# Handles messages from xxm/Hermes that need to be sent to WeChat.
|
||||
# In the iLink world, all outgoing messages go through bot.send().
|
||||
|
||||
async def route_hermes_message(msg_text: str, bot: WeixinBot):
|
||||
"""Route a message from Hermes to WeChat via iLink."""
|
||||
# For now, route to Dad's user_id if known.
|
||||
# In practice, the 5801 server with 'to' field is the primary path.
|
||||
log.info(f"Hermes message: {msg_text[:100]}")
|
||||
# The 5801 server handles the actual sending with explicit 'to' field.
|
||||
# This function is for messages without an explicit target.
|
||||
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
acquire_pid_lock()
|
||||
log.info("=== MoWeChat Agent (Linux/iLink v1.0) ===")
|
||||
|
||||
# Start 5801 HTTP server (in a thread)
|
||||
http_server = start_http_server()
|
||||
_http_server_ready.wait(timeout=5)
|
||||
|
||||
# Create and run the iLink bot
|
||||
bot = WeixinBot()
|
||||
|
||||
try:
|
||||
# Login (QR scan first time, then auto)
|
||||
log.info("Logging in to iLink Bot API...")
|
||||
creds = bot.login()
|
||||
global MOHE_USER_ID
|
||||
MOHE_USER_ID = creds.user_id
|
||||
log.info(f"Logged in as {MOHE_USER_ID}")
|
||||
except Exception as e:
|
||||
log.error(f"Login failed: {e}")
|
||||
release_pid_lock()
|
||||
sys.exit(1)
|
||||
|
||||
# Register message handler
|
||||
@bot.on_message
|
||||
async def handle(msg: IncomingMessage):
|
||||
await handle_incoming(msg, bot)
|
||||
|
||||
# Register queue consumer (runs inside bot's event loop)
|
||||
# We schedule this before bot.run() by patching into the _run_loop flow.
|
||||
# Since _run_loop is private, we use a different approach:
|
||||
# Override the start of _run_loop by wrapping it.
|
||||
|
||||
# Actually, bot.run() creates its own event loop with asyncio.run().
|
||||
# We need to hook into that loop. Let's save the original _run_loop
|
||||
# and wrap it to also start the queue consumer.
|
||||
|
||||
original_run_loop = bot._run_loop
|
||||
|
||||
async def wrapped_run_loop():
|
||||
# Start queue consumer as a background task
|
||||
asyncio.create_task(consume_outgoing_queue(bot))
|
||||
# Run the original loop
|
||||
await original_run_loop()
|
||||
|
||||
bot._run_loop = wrapped_run_loop
|
||||
|
||||
try:
|
||||
log.info("Bot starting. Press Ctrl+C to stop.")
|
||||
bot.run()
|
||||
except KeyboardInterrupt:
|
||||
log.info("Shutting down...")
|
||||
except Exception as e:
|
||||
log.error(f"Bot error: {e}")
|
||||
finally:
|
||||
bot.stop()
|
||||
http_server.shutdown()
|
||||
release_pid_lock()
|
||||
log.info("Stopped.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user