cleanup: archive unused scripts, finalize docs

- Moved old GDB hook and memory monitor scripts to archive/
- Updated README with final Docker solution
- Updated .env.example, .gitignore
- Added final-note.md documenting deployment decision
- All research scripts preserved in archive/ for reference
This commit is contained in:
2026-06-24 02:03:24 +08:00
parent f1630ebb03
commit b7993a54d5
12 changed files with 93 additions and 56 deletions
@@ -0,0 +1,198 @@
#!/usr/bin/env gdb
"""
GDB Hook script for WeChat Linux AppImage.
Intercepts incoming messages from WeChat's NewSync_ProcessStashMsgList.
Based on Ajax's Blog methodology:
https://aajax.top/2026/03/11/GettingLinuxWechatMessages/
Usage:
gdb -p $(pidof wechat) -x hooks/gdb_hook_messages.py
"""
import json
import os
import sys
# ── Configuration ──────────────────────────────────────────────
# WeChat binary base address (from /proc/PID/maps, first r--p entry)
# Must be recalculated each run due to ASLR
WECHAT_PID = None
# Breakpoint RVA (relative to binary base) for WeChat 4.1.x
# From Ajax's IDA Pro analysis of 4.1.0.16:
# NewSync_ProcessStashMsgList -> loop call at 0x4994BEB
BP_RVA = 0x4994BEB
# Log file
LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "logs")
LOG_FILE = os.path.join(LOG_DIR, "wechat_messages.log")
# Message structure offsets (from Ajax's blog)
OFF_TYPE = 0x14 # int: message type
OFF_SVRID = 0x50 # unsigned long long: server message ID
OFF_HOLDER = 0x20 # void*: holder pointer
OFF_INNER = 0x08 # void*: inner pointer (relative to holder)
OFF_CONTENT_PTR = 0x00 # char*: content string pointer (from inner+0)
OFF_CONTENT_LEN = 0x10 # int: content string length (from inner+0x10)
def log(msg):
"""Write log to file and stdout."""
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(f"{msg}\n")
gdb.write(f"{msg}\n")
class WechatMessageBreakpoint(gdb.Breakpoint):
"""Breakpoint that fires on each incoming WeChat message."""
def __init__(self, address):
super().__init__(f"*{address}")
self.suppress = True # Don't print to stdout automatically
def stop(self):
try:
# Read registers
msg_ptr = int(gdb.parse_and_eval("$rsi"))
if msg_ptr == 0:
return False
# Read message type
raw_type = gdb.selected_inferior().read_memory(msg_ptr + OFF_TYPE, 4)
msg_type = int.from_bytes(raw_type, byteorder='little', signed=True)
# Read server message ID
raw_svrid = gdb.selected_inferior().read_memory(msg_ptr + OFF_SVRID, 8)
svrid = int.from_bytes(raw_svrid, byteorder='little')
# Read holder pointer
raw_holder = gdb.selected_inferior().read_memory(msg_ptr + OFF_HOLDER, 8)
holder = int.from_bytes(raw_holder, byteorder='little', signed=False)
if holder == 0:
return False
# Read inner pointer
raw_inner = gdb.selected_inferior().read_memory(holder + OFF_INNER, 8)
inner = int.from_bytes(raw_inner, byteorder='little', signed=False)
if inner == 0:
return False
# Read content string length
raw_len = gdb.selected_inferior().read_memory(inner + OFF_CONTENT_LEN, 4)
content_len = int.from_bytes(raw_len, byteorder='little', signed=False)
if content_len <= 0 or content_len > 100000:
return False
# Read content string
raw_content_ptr = gdb.selected_inferior().read_memory(inner + OFF_CONTENT_PTR, 8)
content_ptr = int.from_bytes(raw_content_ptr, byteorder='little', signed=False)
if content_ptr == 0:
return False
raw_content = gdb.selected_inferior().read_memory(content_ptr, min(content_len * 2, 100000))
# Try to decode as UTF-16LE (WeChat internal encoding)
try:
content = raw_content.tobytes()[:content_len * 2].decode('utf-16le', errors='replace')
except:
content = str(raw_content)
# Read talker/sender info (different offset structure)
# This is more complex — skip for initial test
message = {
"type": msg_type,
"svrid": hex(svrid),
"content": content[:500],
"content_len": content_len,
"holder": hex(holder),
"inner": hex(inner),
}
log(f"[WECHAT_MSG] type={msg_type} svrid={hex(svrid)}")
log(f"[WECHAT_MSG] content: {content[:200]}")
# Forward to Hermes Gateway
try:
forward_to_hermes(message)
except Exception as e:
log(f"[WECHAT_MSG] forward error: {e}")
except Exception as e:
log(f"[WECHAT_MSG] error: {e}")
return False # Don't stop execution
def forward_to_hermes(msg):
"""Forward message to Hermes Gateway."""
import urllib.request
payload = json.dumps({
"model": "nova-4",
"messages": [
{
"role": "system",
"content": f"You are a WeChat message handler. A new message arrived: type={msg.get('type')}, content={msg.get('content', '')[:100]}"
}
]
}).encode('utf-8')
req = urllib.request.Request(
"http://192.168.1.246:8642/v1/chat/completions",
data=payload,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer hermes123"
},
method="POST"
)
# Don't wait for response — fire and forget
try:
urllib.request.urlopen(req, timeout=2)
except Exception:
pass
def detect_wechat_base():
"""Detect WeChat binary base address from /proc/PID/maps."""
pid = WECHAT_PID
if pid is None:
try:
pid = gdb.selected_inferior().pid
except:
pass
if pid is None:
return None
try:
with open(f"/proc/{pid}/maps", "r") as f:
for line in f:
if "/opt/wechat/wechat" in line and "r--p" in line:
addr = line.split("-")[0]
return int(addr, 16)
except Exception as e:
log(f"[WECHAT_MSG] Failed to detect base: {e}")
return None
class HookWechatMessages(gdb.Command):
"""Install WeChat message hook."""
def __init__(self):
super().__init__("hook-wechat-messages", gdb.COMMAND_USER)
def invoke(self, arg, from_tty):
base = detect_wechat_base()
if base is None:
log("[WECHAT_MSG] ERROR: Could not detect WeChat base address")
return
addr = base + BP_RVA
WechatMessageBreakpoint(addr)
log(f"[WECHAT_MSG] Hook installed: base=0x{base:x} bp=0x{addr:x}")
log(f"[WECHAT_MSG] Waiting for messages...")
# Register the custom command
HookWechatMessages()
+152
View File
@@ -0,0 +1,152 @@
#!/usr/bin/env python3
"""
GDB startup script for WeChat message hooking.
Run with:
gdb -x this_script.py --args ./WeChatLinux.AppImage --no-sandbox --disable-gpu
GDB disables ASLR by default, so wechat base = 0x555555554000
"""
import gdb
import os
import time
# Breakpoint RVA from Ajax's blog (4.1.0.16, confirmed valid for 4.1.7)
BP_RVA = 0x4994BEB
# Fixed base when GDB starts process (ASLR disabled)
FIXED_BASE = 0x555555554000
LOG_FILE = "/home/hmo/projects/AgentsMeeting/gateway/linux/logs/wechat_messages.log"
def log(msg):
with open(LOG_FILE, "a") as f:
f.write(f"{msg}\n")
gdb.write(f"{msg}\n")
class WechatMessageBreakpoint(gdb.Breakpoint):
"""Breakpoint that fires on each incoming WeChat message."""
def __init__(self, address):
super().__init__(f"*{address}")
self.suppress = True
def stop(self):
try:
msg_ptr = int(gdb.parse_and_eval("$rsi"))
if msg_ptr == 0:
return False
raw_type = gdb.selected_inferior().read_memory(msg_ptr + 0x14, 4)
msg_type = int.from_bytes(raw_type, byteorder='little', signed=True)
raw_svrid = gdb.selected_inferior().read_memory(msg_ptr + 0x50, 8)
svrid = int.from_bytes(raw_svrid, byteorder='little')
raw_holder = gdb.selected_inferior().read_memory(msg_ptr + 0x20, 8)
holder = int.from_bytes(raw_holder, byteorder='little', signed=False)
if holder == 0:
return False
raw_inner = gdb.selected_inferior().read_memory(holder + 0x8, 8)
inner = int.from_bytes(raw_inner, byteorder='little', signed=False)
if inner == 0:
return False
# Read content length from inner + 0x10
raw_len = gdb.selected_inferior().read_memory(inner + 0x10, 4)
content_len = int.from_bytes(raw_len, byteorder='little', signed=False)
if content_len <= 0 or content_len > 100000:
return False
# Read content pointer from inner + 0x0
raw_cp = gdb.selected_inferior().read_memory(inner + 0x0, 8)
content_ptr = int.from_bytes(raw_cp, byteorder='little', signed=False)
if content_ptr == 0:
return False
raw_content = gdb.selected_inferior().read_memory(content_ptr, min(content_len * 2, 100000))
try:
content = raw_content.tobytes()[:content_len * 2].decode('utf-16le', errors='replace')
except:
content = str(raw_content)
# Try to read sender info from msg_ptr + 0x38 (talker wxid)
try:
raw_talker = gdb.selected_inferior().read_memory(msg_ptr + 0x38, 8)
talker_ptr = int.from_bytes(raw_talker, byteorder='little', signed=False)
if talker_ptr:
talker_data = gdb.selected_inferior().read_memory(talker_ptr, 64)
talker = talker_data.tobytes().split(b'\x00')[0].decode('utf-8', errors='replace')
else:
talker = "unknown"
except:
talker = "unknown"
info = f"[WECHAT_MSG] type={msg_type} svrid={hex(svrid)} talker={talker}"
log(info)
log(f"[WECHAT_MSG] content: {content[:300]}")
# Forward to Hermes
try:
import urllib.request
import json
payload = json.dumps({
"model": "nova-4",
"messages": [
{"role": "user", "content": f"[WeChat from {talker}] {content[:500]}"}
]
}).encode()
req = urllib.request.Request(
"http://192.168.1.246:8642/v1/chat/completions",
data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer hermes123"},
method="POST"
)
urllib.request.urlopen(req, timeout=2)
except:
pass
except Exception as e:
log(f"[WECHAT_MSG] ERROR: {e}")
return False
class AutoHookWechat(gdb.Command):
"""Auto-hook WeChat messages on startup."""
def __init__(self):
super().__init__("auto-hook-wechat", gdb.COMMAND_USER)
def invoke(self, arg, from_tty):
bp_addr = FIXED_BASE + BP_RVA
WechatMessageBreakpoint(bp_addr)
log(f"[WECHAT_MSG] Breakpoint set at 0x{bp_addr:x}")
class OnStart(gdb.Breakpoint):
"""Breakpoint on _start to set up hooks after process loads."""
def __init__(self):
super().__init__("_start")
def stop(self):
gdb.execute("auto-hook-wechat")
return True # Stop so user can continue
# Configure GDB
gdb.execute("set pagination off")
gdb.execute("set confirm off")
gdb.execute("handle SIG33 pass nostop noprint")
gdb.execute("set follow-fork-mode child")
# Register our commands
AutoHookWechat()
# Set breakpoint at _start so we hook after process loads
OnStart()
log("[WECHAT_MSG] GDB startup script loaded. Type 'run' to start WeChat.")
@@ -0,0 +1,296 @@
#!/usr/bin/env python3
"""
MoWeChat Message Monitor — reads WeChat process memory to capture incoming messages.
No GDB, no ptrace attach, no crash risk.
Uses /proc/PID/mem to scan the heap for message patterns.
"""
import os
import re
import sys
import json
import time
import hashlib
import logging
import argparse
import urllib.request
import urllib.error
# ── Configuration ──────────────────────────────────────────────
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
HERMES_KEY = "hermes123"
# Polling interval (seconds)
POLL_INTERVAL = 2
# Minimum message content length to consider valid
MIN_MSG_LEN = 2
MAX_MSG_LEN = 10000
# WeChat binary marker in /proc/PID/maps
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
log = logging.getLogger("mowechat")
class WeChatMemoryMonitor:
"""Monitors WeChat process memory for new messages."""
def __init__(self):
self.pid = None
self.seen = self._load_seen()
self.heap_regions = []
self.anon_regions = []
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
# Chinese characters and ASCII printable
self.msg_pattern = re.compile(rb'[\x20-\x7e\x80-\xff]{2,}')
def _load_seen(self):
"""Load previously seen message hash set."""
try:
with open(SAWN_FILE, 'r') as f:
return set(json.load(f))
except:
return set()
def _save_seen(self):
"""Save seen message hash set (trim to last 1000)."""
trimmed = set(list(self.seen)[-1000:])
try:
with open(SAWN_FILE, 'w') as f:
json.dump(list(trimmed), f)
except:
pass
def find_wechat(self):
"""Find the main wechat process PID."""
for p in os.listdir('/proc'):
if not p.isdigit():
continue
try:
with open(f'/proc/{p}/maps', 'r') as f:
content = f.read(4096)
if WECHAT_BINARY_MARKER in content:
self.pid = int(p)
return True
except:
continue
return False
def update_memory_regions(self):
"""Read /proc/PID/maps to find heap and anonymous regions."""
self.heap_regions = []
self.anon_regions = []
try:
with open(f'/proc/{self.pid}/maps', 'r') as f:
for line in f:
parts = line.strip().split()
if len(parts) < 5:
continue
addr_range = parts[0].split('-')
start = int(addr_range[0], 16)
end = int(addr_range[1], 16)
perms = parts[1]
name = parts[-1] if len(parts) > 4 else ''
size_mb = (end - start) / (1024 * 1024)
if size_mb > 50: # Skip huge regions
continue
if perms.startswith('rw'):
if name == '[heap]':
self.heap_regions.append((start, end))
elif not name: # Anonymous mapping
self.anon_regions.append((start, end))
except Exception as e:
log.error(f"Failed to read maps: {e}")
def scan_region(self, start, end, region_name=""):
"""Scan a memory region for WeChat message patterns."""
messages = []
try:
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
mem.seek(start)
data = mem.read(end - start)
# Find all wxid patterns (likely message senders)
for match in self.wxid_pattern.finditer(data):
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
pos = match.start()
# Look for message content after the wxid
# Message content is typically within 256 bytes after the wxid
content_start = pos + len(match.group())
search_end = min(content_start + 512, len(data))
# Find the next null-terminated string that's not the wxid itself
for cmatch in self.msg_pattern.finditer(data, content_start, search_end):
content = cmatch.group(0).decode('utf-8', errors='replace').strip('\x00')
# Filter out known non-message patterns
if len(content) < MIN_MSG_LEN or len(content) > MAX_MSG_LEN:
continue
if content.startswith('wxid_') or content.startswith('http'):
# Skip if it's another wxid or begins with a URL
# Actually, messages CAN start with URLs, so only skip wxid
if content.startswith('wxid_'):
continue
# Create a unique hash for dedup
msg_hash = hashlib.md5(f"{wxid}:{content}".encode()).hexdigest()
if msg_hash not in self.seen:
self.seen.add(msg_hash)
messages.append({
'wxid': wxid,
'content': content,
'pos': hex(pos + start),
'region': region_name,
})
break # Only one message per wxid
except (PermissionError, ProcessLookupError) as e:
log.warning(f"Memory read failed: {e}")
except Exception as e:
log.error(f"Scan error at {region_name}: {e}")
return messages
def scan_all(self):
"""Scan all relevant memory regions for new messages."""
self.update_memory_regions()
all_messages = []
for start, end in self.heap_regions:
msgs = self.scan_region(start, end, "[heap]")
all_messages.extend(msgs)
for start, end in self.anon_regions[:20]: # Limit anonymous regions
size_mb = (end - start) / (1024 * 1024)
if size_mb > 10: # Skip large anonymous maps
continue
msgs = self.scan_region(start, end, "[anon]")
all_messages.extend(msgs)
if all_messages:
self._save_seen()
return all_messages
def forward_to_hermes(self, msg):
"""Forward a captured message to Hermes Gateway."""
payload = json.dumps({
"model": "nova-4",
"messages": [
{
"role": "system",
"content": (
"You receive WeChat messages. "
"Process this message according to the standard pipeline."
)
},
{
"role": "user",
"content": (
f"[WeChat Message]\n"
f"From: {msg['wxid']}\n"
f"Content: {msg['content']}"
)
}
]
}).encode('utf-8')
try:
req = urllib.request.Request(
HERMES_API,
data=payload,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {HERMES_KEY}"
},
method="POST"
)
urllib.request.urlopen(req, timeout=3)
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
except Exception as e:
log.warning(f"Forward failed: {e}")
def run(self, once=False):
"""Main monitoring loop."""
if not self.find_wechat():
log.error("WeChat process not found!")
return False
log.info(f"Found WeChat PID: {self.pid}")
if once:
messages = self.scan_all()
for msg in messages:
log.info(f"Captured: [{msg['wxid']}] {msg['content'][:80]}")
return messages
log.info(f"Starting monitor (poll every {POLL_INTERVAL}s)...")
while True:
try:
# Check process is alive
if not os.path.exists(f'/proc/{self.pid}'):
log.warning("WeChat process died, re-finding...")
if not self.find_wechat():
log.error("Cannot find WeChat, sleeping 30s...")
time.sleep(30)
continue
log.info(f"Re-attached to PID: {self.pid}")
messages = self.scan_all()
for msg in messages:
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
# self.forward_to_hermes(msg)
time.sleep(POLL_INTERVAL)
except KeyboardInterrupt:
log.info("Monitor stopped.")
break
except Exception as e:
log.error(f"Monitor error: {e}")
time.sleep(10)
return True
def main():
parser = argparse.ArgumentParser(description="MoWeChat Message Monitor")
parser.add_argument("--once", action="store_true", help="Scan once and exit")
parser.add_argument("--foreground", action="store_true", help="Run in foreground")
args = parser.parse_args()
monitor = WeChatMemoryMonitor()
if args.once:
msgs = monitor.run(once=True)
if msgs:
print(json.dumps(msgs, ensure_ascii=False, indent=2))
else:
print("No new messages found.")
return
monitor.run()
if __name__ == "__main__":
main()
@@ -0,0 +1,343 @@
#!/usr/bin/env python3
"""
MoWeChat Message Monitor v2 — reads WeChat process memory to capture incoming messages.
Improvements over v1:
- Better message extraction: looks for content near known wxids with length-prefix structure
- Content filtering: rejects binary garbage, only keeps real text
- Tracks messages by content + wxid hash
"""
import os
import re
import sys
import json
import time
import hashlib
import logging
import argparse
import urllib.request
import urllib.error
# ── Configuration ──────────────────────────────────────────────
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
HERMES_KEY = "hermes123"
POLL_INTERVAL = 3
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
# Known message sender wxids (populated during scanning)
OWN_WXID = "wxid_c0a6izmwd78y22" # 老爸 (莫语不语)
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷自己
INTERESTING_WXIDS = {OWN_WXID, BOT_WXID}
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
log = logging.getLogger("mowechat")
def is_valid_text(s, min_ratio=0.5):
"""Check if a string looks like real text (vs binary garbage)."""
if len(s) < 2:
return False
# Count printable chars
printable = 0
for ch in s:
if ch.isprintable() and (ch.isalpha() or ch.isspace() or ch.isdigit() or ch in '.,!?;:\'\"-()[]{}@#_/\\'):
printable += 1
return printable / max(len(s), 1) >= min_ratio
def extract_strings(data, min_len=4):
"""Extract readable strings from binary data."""
result = []
current = b''
for b in data:
if 32 <= b < 127 or b in (0x0a, 0x0d, 0x09):
current += bytes([b])
elif b >= 0x80: # Part of multi-byte UTF-8
current += bytes([b])
else:
if len(current) >= min_len:
try:
decoded = current.decode('utf-8', errors='replace')
if is_valid_text(decoded):
result.append(decoded)
except:
pass
current = b''
if len(current) >= min_len:
try:
decoded = current.decode('utf-8', errors='replace')
if is_valid_text(decoded):
result.append(decoded)
except:
pass
return result
class WeChatMemoryMonitor:
"""Monitors WeChat process memory for new messages."""
def __init__(self):
self.pid = None
self.seen = self._load_seen()
self.heap_region = None
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
# Known message sources
self.known_wxids = set(INTERESTING_WXIDS)
def _load_seen(self):
try:
with open(SAWN_FILE, 'r') as f:
return set(json.load(f))
except:
return set()
def _save_seen(self):
trimmed = set(list(self.seen)[-2000:])
try:
with open(SAWN_FILE, 'w') as f:
json.dump(list(trimmed), f)
except:
pass
def find_wechat(self):
"""Find the main wechat process PID and heap region."""
for p in os.listdir('/proc'):
if not p.isdigit():
continue
try:
with open(f'/proc/{p}/maps', 'r') as f:
content = f.read(8192)
if WECHAT_BINARY_MARKER in content:
self.pid = int(p)
# Find heap
for line in content.split('\n'):
if '[heap]' in line:
parts = line.split()
addr_range = parts[0].split('-')
self.heap_region = (int(addr_range[0], 16), int(addr_range[1], 16))
return True
except:
continue
return False
def scan_message(self, mem, wxid_bytes, wxid_pos, wxid_end):
"""Try to extract a real message following a wxid in memory."""
wxid_str = wxid_bytes.decode('utf-8', errors='replace').strip('\x00')
# Search within 512 bytes after the wxid for message content
search_start = wxid_end
search_end = min(search_start + 512, self.heap_region[1] - self.heap_region[0] if self.heap_region else search_start + 512)
try:
mem.seek(search_start)
data = mem.read(search_end - search_start)
except:
return None
# Look for null-terminated strings that look like messages
messages = []
current = b''
for b in data:
if b == 0:
if len(current) >= 3:
try:
text = current.decode('utf-8', errors='replace')
# Filter: must have real text content
if is_valid_text(text) and len(text) >= 2 and not text.startswith('wxid_'):
messages.append(text)
except:
pass
current = b''
else:
current += bytes([b])
# Also try to find message by looking for it at a known offset pattern
# In WeChat's structure: [msg_type(4)] [svr_id(8)] [content_ptr(8)] [content_len(4)] [content...]
if not messages:
return None
# Pick the best candidate (longest, most printable)
best = max(messages, key=lambda m: (len(m), sum(1 for c in m if c.isalpha() or c.isdigit())))
return best
def scan_heap(self):
"""Scan the heap region for messages."""
if not self.heap_region:
return []
start, end = self.heap_region
messages = []
try:
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
# Read entire heap
mem.seek(start)
size = min(end - start, 50 * 1024 * 1024) # Max 50MB
data = mem.read(size)
# Find all wxid occurrences
for match in self.wxid_pattern.finditer(data):
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
pos = match.start()
global_pos = start + pos
# Look for message content in the next 256 bytes
content_area = data[pos + len(match.group()):pos + len(match.group()) + 256]
# Try to find a null-terminated UTF-8 string that looks like a message
for cmatch in re.finditer(rb'([\x20-\x7e\x80-\xff\x00]{4,})', content_area):
raw = cmatch.group(0)
# Remove trailing nulls
raw = raw.rstrip(b'\x00')
if len(raw) < 3:
continue
try:
text = raw.decode('utf-8', errors='replace')
except:
continue
# FILTER: Must have substantial real text content
alpha_count = sum(1 for c in text if c.isalpha() or '\u4e00' <= c <= '\u9fff')
total_len = len(text)
if total_len < 3:
continue
# Skip if it's just another wxid
if text.startswith('wxid_'):
continue
# Skip binary garbage (must be >= 40% alphabetic/CJK chars)
if alpha_count / max(total_len, 1) < 0.3:
continue
# Skip if it looks like a URL/path with no message content
if text.startswith('http') or text.startswith('/'):
continue
# Create hash for dedup
msg_hash = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
if msg_hash not in self.seen:
self.seen.add(msg_hash)
messages.append({
'wxid': wxid,
'content': text,
'pos': hex(global_pos),
'alpha_ratio': f"{alpha_count/total_len:.2f}",
})
break # One best message per wxid occurrence
except (PermissionError, ProcessLookupError) as e:
log.warning(f"Memory read failed: {e}")
except Exception as e:
log.error(f"Heap scan error: {e}")
return messages
def forward_to_hermes(self, msg):
"""Forward to Hermes Gateway."""
payload = json.dumps({
"model": "nova-4",
"messages": [
{"role": "system", "content": "You receive WeChat messages. Process according to standard pipeline."},
{"role": "user", "content": f"[WeChat] From: {msg['wxid']}\n{msg['content']}"}
]
}).encode('utf-8')
try:
req = urllib.request.Request(
HERMES_API, data=payload,
headers={"Content-Type": "application/json", "Authorization": f"Bearer {HERMES_KEY}"},
method="POST"
)
urllib.request.urlopen(req, timeout=3)
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
except Exception as e:
log.warning(f"Forward failed: {e}")
def run(self, once=False):
if not self.find_wechat():
log.error("WeChat not found!")
return []
log.info(f"WeChat PID: {self.pid}, heap: 0x{self.heap_region[0]:x}-0x{self.heap_region[1]:x}" if self.heap_region else f"PID: {self.pid}, no heap")
if once:
messages = self.scan_heap()
seen_wxids = set()
for msg in messages:
if msg['wxid'] not in seen_wxids:
log.info(f" [{msg['wxid']}] {msg['content'][:80]}")
seen_wxids.add(msg['wxid'])
if messages:
self._save_seen()
return messages
log.info(f"Monitoring every {POLL_INTERVAL}s...")
while True:
try:
if not os.path.exists(f'/proc/{self.pid}'):
log.warning("WeChat died, re-finding...")
if not self.find_wechat():
time.sleep(30)
continue
messages = self.scan_heap()
for msg in messages:
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
# self.forward_to_hermes(msg)
if messages:
self._save_seen()
time.sleep(POLL_INTERVAL)
except KeyboardInterrupt:
break
except Exception as e:
log.error(f"Error: {e}")
time.sleep(10)
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--once", action="store_true")
parser.add_argument("--foreground", action="store_true")
args = parser.parse_args()
monitor = WeChatMemoryMonitor()
if args.once:
msgs = monitor.run(once=True)
# Only show real messages (from known contacts or with good content)
real_msgs = [m for m in msgs if m['wxid'] in INTERESTING_WXIDS or float(m.get('alpha_ratio', 0)) > 0.5]
print(f"\nFound {len(msgs)} potential messages, {len(real_msgs)} from known contacts")
for m in real_msgs:
print(f" [{m['wxid']}] {m['content'][:100]}")
print(f"\n(Total seen: {len(monitor.seen)})")
return
monitor.run()
if __name__ == "__main__":
main()
@@ -0,0 +1,206 @@
#!/usr/bin/env python3
"""
MoWeChat Message Monitor v3 — precise message extraction from WeChat heap.
Strategy: look for message content near known wxids with structural patterns.
"""
import os, re, sys, json, time, hashlib, struct, logging, argparse
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_v3.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v3.json")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
log = logging.getLogger("mv3")
# Known wxids (discovered from memory)
OWN_WXID = "wxid_c0a6izmwd78y22" # 莫语不语 (老爸)
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷
INTERESTING = {OWN_WXID, BOT_WXID}
class Monitor:
def __init__(self):
self.pid = None
self.heap = (0, 0)
self.seen = self._load_seen()
self.wxid_re = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
def _load_seen(self):
try:
with open(SAWN_FILE) as f:
return set(json.load(f))
except:
return set()
def _save_seen(self):
s = set(list(self.seen)[-2000:])
try:
with open(SAWN_FILE, 'w') as f:
json.dump(list(s), f)
except:
pass
def find_wechat(self):
for p in os.listdir('/proc'):
if not p.isdigit(): continue
try:
with open(f'/proc/{p}/maps') as f:
c = f.read()
if "/opt/wechat/wechat" in c:
self.pid = int(p)
for line in c.split('\n'):
if '[heap]' in line:
a = line.split()[0].split('-')
self.heap = (int(a[0], 16), int(a[1], 16))
return True
except:
continue
return False
def is_valid_msg(self, text):
"""Check if text is a real WeChat message."""
if len(text) < 2 or len(text) > 5000:
return False
if text.startswith('wxid_') or text.startswith('http') or text.startswith('/'):
return False
# Count CJK + ASCII letters + digits
good = sum(1 for c in text if c.isalpha() or c.isdigit() or c.isspace() or c in '.,!?;:\'"-()[]{}@#_/\\')
if good / max(len(text), 1) < 0.6:
return False
# Must have at least 3 CJK chars OR 5 ASCII chars
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
ascii_alpha = sum(1 for c in text if c.isascii() and c.isalpha())
return cjk >= 2 or ascii_alpha >= 4
def scan(self):
start, end = self.heap
if not start:
return []
try:
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
mem.seek(start)
data = mem.read(min(end - start, 60 * 1024 * 1024))
except:
return []
results = []
# Strategy: find wxid -> look for a nearby null-terminated UTF-8 string
# that looks like real message content
# First pass: find all wxid positions in a bounded range
for m in self.wxid_re.finditer(data):
wxid = m.group(0).decode().strip('\x00')
if wxid not in INTERESTING:
continue
pos = m.end() # position after wxid\0
# Scan forward up to 256 bytes for a printable string
scan_end = min(pos + 256, len(data))
chunk = data[pos:scan_end]
# Find the first null-terminated ASCII/UTF-8 string
# that's at least 3 chars and not binary garbage
i = 0
while i < len(chunk):
if chunk[i] == 0:
i += 1
continue
# Start of potential string
s_start = i
while i < len(chunk) and chunk[i] != 0 and chunk[i] >= 0x20:
i += 1
s_len = i - s_start
if s_len >= 3:
try:
text = chunk[s_start:s_start+s_len].decode('utf-8', errors='replace')
if self.is_valid_msg(text):
h = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
if h not in self.seen:
self.seen.add(h)
results.append({'wxid': wxid, 'text': text})
except:
pass
# Skip null
while i < len(chunk) and chunk[i] == 0:
i += 1
# Second strategy: scan heap for standalone CJK strings >= 4 chars
# that are NOT preceded by known binary patterns (to catch the actual
# message content which may be at a different address than the wxid)
for cm in re.finditer(rb'([\x80-\xff][\x80-\xff][\x80-\xff][\x80-\xff])', data):
pos = cm.start()
# Read up to 200 bytes from here
snippet = data[pos:pos+200]
# Find first null byte
null_pos = snippet.find(b'\x00')
if null_pos > 0:
snippet = snippet[:null_pos]
if len(snippet) < 4:
continue
try:
text = snippet.decode('utf-8', errors='replace')
except:
continue
# Only accept strings with substantial CJK content
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
if cjk < 2:
continue
if len(text) > 200:
text = text[:200]
h = hashlib.md5(f"cjk:{text}".encode()).hexdigest()
if h not in self.seen and self.is_valid_msg(text):
self.seen.add(h)
results.append({'wxid': 'unknown', 'text': text})
if results:
self._save_seen()
return results
def run(self, once=False):
if not self.find_wechat():
log.error("WeChat not found")
return []
log.info(f"PID {self.pid}, heap 0x{self.heap[0]:x}")
if once:
msgs = self.scan()
for m in msgs:
log.info(f" [{m['wxid']}] {m['text'][:80]}")
return msgs
while True:
try:
if not os.path.exists(f'/proc/{self.pid}'):
log.warning("WeChat died")
if not self.find_wechat():
time.sleep(30)
continue
msgs = self.scan()
for m in msgs:
log.info(f"NEW [{m['wxid']}] {m['text'][:80]}")
time.sleep(3)
except KeyboardInterrupt:
break
except:
time.sleep(10)
if __name__ == "__main__":
m = Monitor()
if '--once' in sys.argv:
msgs = m.run(once=True)
print(f"\nFound {len(msgs)} messages")
for msg in msgs:
print(f" [{msg['wxid']}] {msg['text']}")
else:
m.run()
+185
View File
@@ -0,0 +1,185 @@
#!/usr/bin/env python3
"""MoWeChat v4 — efficient heap scanner for WeChat messages."""
import os, re, sys, json, time, hashlib, logging
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_v4.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v4.json")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
log = logging.getLogger("wc4")
# Known wxids
WXID_DAD = "wxid_c0a6izmwd78y22"
WXID_MOHE = "wxid_7onnerpx2s2l22"
KNOWN = {WXID_DAD, WXID_MOHE}
def heap_of(pid):
"""Get heap address range for a PID."""
with open(f'/proc/{pid}/maps') as f:
for line in f:
if '[heap]' in line:
parts = line.split()
a, b = parts[0].split('-')
return int(a, 16), int(b, 16)
return None, None
def read_heap(pid):
"""Read the heap memory."""
start, end = heap_of(pid)
if not start:
return None, None, None
with open(f'/proc/{pid}/mem', 'rb') as f:
f.seek(start)
data = f.read(min(end - start, 40 * 1024 * 1024))
return data, start, end
def find_messages(data, heap_start):
"""Extract messages from heap data efficiently."""
msgs = []
seen = set()
# Pattern: known wxid followed by content
for wxid in KNOWN:
pattern = wxid.encode() + b'\x00'
pos = 0
limit = 0
while limit < 100:
idx = data.find(pattern, pos)
if idx < 0:
break
limit += 1
after = idx + len(pattern)
snippet = data[after:after+300]
i = 0
while i < len(snippet):
if snippet[i] == 0:
i += 1
continue
s = i
while i < len(snippet) and snippet[i] != 0:
i += 1
if i - s < 3:
i += 1
continue
try:
text = snippet[s:i].decode('utf-8', errors='replace')
except:
i += 1
continue
# Score: CJK chars + ASCII letters
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
alpha = sum(1 for c in text if c.isascii() and c.isalpha())
score = cjk * 3 + alpha
if score < 5:
i += 1
continue
h = hashlib.md5(text.encode()).hexdigest()
if h not in seen:
seen.add(h)
msgs.append((text, wxid, heap_start + after + s))
break
pos = idx + 1
# Also scan for the \xNNcontent pattern directly (faster catch-all)
# Pattern: [1 byte length/bufsize][readable text]
i = 0
limit2 = 0
while i < len(data) - 10 and limit2 < 500:
b = data[i]
# Possible prefix: small values (0x04-0x40 = 4-64, common buffer sizes)
if 0x04 <= b <= 0x40:
# Check if what follows looks like text
j = i + 1
text_bytes = bytearray()
while j < len(data) and j - i - 1 < b and data[j] != 0:
if data[j] >= 0x20:
text_bytes.append(data[j])
j += 1
else:
break
if len(text_bytes) >= 5:
try:
text = text_bytes.decode('utf-8', errors='replace')
except:
i += 1
continue
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
alpha = sum(1 for c in text if c.isascii() and c.isalpha())
if (cjk >= 2 or alpha >= 6) and text[0] not in '&/\\.':
h = hashlib.md5(('any:' + text).encode()).hexdigest()
if h not in seen:
# Find which wxid is near this text (search backward)
nearby = data[max(0, i-200):i]
wxid_match = re.search(rb'wxid_[a-zA-Z0-9]{10,28}', nearby)
owner = wxid_match.group(0).decode() if wxid_match else 'unknown'
seen.add(h)
msgs.append((text, owner, heap_start + i))
limit2 += 1
i += 1
return msgs
def find_wechat():
"""Find the main wechat process PID."""
for p in os.listdir('/proc'):
if not p.isdigit():
continue
try:
with open(f'/proc/{p}/maps') as f:
if "/opt/wechat/wechat" in f.read():
return int(p)
except:
continue
return None
def main():
pid = find_wechat()
if not pid:
log.error("WeChat not found")
return 1
log.info(f"Found PID {pid}")
data, hs, he = read_heap(pid)
if data is None:
log.error("Cannot read heap")
return 1
log.info(f"Heap: 0x{hs:x}-0x{he:x} ({len(data)} bytes)")
msgs = find_messages(data, hs)
# Deduplicate
seen_texts = set()
for text, wxid, addr in msgs:
key = text.strip()
if key not in seen_texts and len(key) >= 2:
seen_texts.add(key)
print(f"{wxid}: {text}")
if not msgs:
print("No messages found")
return 0
if __name__ == "__main__":
sys.exit(main())
+17
View File
@@ -0,0 +1,17 @@
[Unit]
Description=MoWeChat — 莫荷微信 Bot (Linux iLink 版)
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=hmo
WorkingDirectory=/home/hmo/projects/AgentsMeeting/gateway/linux
ExecStart=/home/hmo/projects/AgentsMeeting/.venv/bin/python3 /home/hmo/projects/AgentsMeeting/gateway/linux/wechat_agent.py
Restart=on-failure
RestartSec=5
StandardOutput=append:/home/hmo/projects/AgentsMeeting/logs/wechat_agent_linux.log
StandardError=append:/home/hmo/projects/AgentsMeeting/logs/wechat_agent_linux.log
[Install]
WantedBy=multi-user.target
+703
View File
@@ -0,0 +1,703 @@
#!/usr/bin/env python3
"""
MoWeChat — 莫荷微信 Bot (Linux iLink 版)
替换方案:将 Windows wxhelper DLL 注入方案替换为腾讯官方 iLink Bot API。
架构:微信 → iLink Bot API (ilinkai.weixin.qq.com) → Hermes Gateway (:8642)
依赖:
pip install weixin-bot-sdk aiohttp requests
首次运行:
python3 wechat_agent.py
终端显示二维码 → 用莫荷的手机微信扫码 → 凭证保存后自动运行
版本: 1.0
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import re
import sys
import threading
import time
import uuid
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.parse import urlparse
from weixin_bot import WeixinBot, IncomingMessage
# ── Configuration ──────────────────────────────────────────────
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR) # gateway/
AGENTS_ROOT = os.path.dirname(PROJECT_ROOT) # AgentsMeeting/
LOG_DIR = os.path.join(AGENTS_ROOT, "logs")
TEMP_DIR = os.path.join(AGENTS_ROOT, "temp")
# Hermes Gateway — same endpoint as Windows wechat_agent.py uses
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
HERMES_KEY = "hermes123"
# Mohe's iLink Bot user_id (set after login)
MOHE_USER_ID = None
# ── Setup ──────────────────────────────────────────────────────
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%H:%M:%S",
handlers=[
logging.FileHandler(os.path.join(LOG_DIR, "wechat_agent_linux.log"), encoding="utf-8"),
logging.StreamHandler(),
],
)
log = logging.getLogger("mohe")
# ── PID lock ───────────────────────────────────────────────────
PID_FILE = os.path.join(TEMP_DIR, "wechat_agent_linux.pid")
def acquire_pid_lock():
"""Write PID file, exit if another instance is running."""
if os.path.exists(PID_FILE):
try:
with open(PID_FILE) as f:
old_pid = int(f.read().strip())
# Check if process still exists
os.kill(old_pid, 0)
log.error(f"Another instance running (PID {old_pid}). Exiting.")
sys.exit(1)
except (ProcessLookupError, ValueError):
pass # Stale PID file
with open(PID_FILE, "w") as f:
f.write(str(os.getpid()))
def release_pid_lock():
try:
os.remove(PID_FILE)
except OSError:
pass
# ── Hermes Gateway API ─────────────────────────────────────────
import requests as _requests
def call_hermes(wxid_or_user: str, content: str) -> str | None:
"""
Send message to Hermes Gateway, get reply.
Returns the reply text, or None if silent/no reply.
"""
headers = {
"Authorization": f"Bearer {HERMES_KEY}",
"X-Hermes-Session-Id": "sisyphus",
"Content-Type": "application/json",
}
is_group = "@chatroom" in wxid_or_user
system_prompt = (
"你是莫荷,女生。群聊中回复要短。"
if is_group
else "你是莫荷,女生。回复简短自然,像朋友聊天。"
)
# Prefix with sender info like the Windows version does
chat_type = "Group" if is_group else "Private"
prefixed = f"[{chat_type}][{wxid_or_user}] {content}"
body = {
"model": "hermes-agent",
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prefixed},
],
}
try:
r = _requests.post(
HERMES_API,
json=body,
headers=headers,
timeout=60,
)
if r.status_code == 200:
data = r.json()
choice = data["choices"][0]
finish_reason = choice.get("finish_reason", "")
if finish_reason == "silent":
log.info("Hermes: __SILENT__ (group skip)")
return None
return choice["message"]["content"]
log.warning(f"Hermes HTTP {r.status_code}: {r.text[:200]}")
except Exception as e:
log.error(f"Hermes API error: {e}")
return None
# ── Image Generation (SenseNova) ───────────────────────────────
SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO"
SENSENOVA_URL = "https://token.sensenova.cn/v1"
def generate_image(prompt: str, ratio: str = "1:1") -> str | None:
"""Generate image via SenseNova API. Returns URL or None."""
size_map = {
"1:1": "2048x2048", "16:9": "2752x1536", "9:16": "1536x2752",
"3:2": "2496x1664", "2:3": "1664x2496", "3:4": "1760x2368", "4:3": "2368x1760",
}
size = size_map.get(ratio, "2048x2048")
try:
r = _requests.post(
SENSENOVA_URL + "/images/generations",
json={
"model": "sensenova-u1-fast",
"prompt": prompt,
"size": size,
"response_format": "url",
},
headers={
"Authorization": f"Bearer {SENSENOVA_KEY}",
"Content-Type": "application/json",
},
timeout=180,
)
if r.status_code == 200:
return r.json()["data"][0]["url"]
log.warning(f"Image gen HTTP {r.status_code}: {r.text[:200]}")
except Exception as e:
log.error(f"Image gen error: {e}")
return None
# ── OCR via Doubao Vision API ──────────────────────────────────
DOUBAO_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3"
def ocr_image_from_url(img_url: str) -> str | None:
"""Download image from URL and OCR it. Returns text or None."""
try:
r = _requests.get(img_url, timeout=30)
if r.status_code != 200:
log.warning(f"Image download HTTP {r.status_code}")
return None
import base64
b64 = base64.b64encode(r.content).decode()
return _ocr_base64(b64)
except Exception as e:
log.error(f"Image download/OCR error: {e}")
return None
def _ocr_base64(b64_data: str) -> str | None:
"""OCR from base64-encoded image data."""
try:
r = _requests.post(
"https://ark.cn-beijing.volces.com/api/coding/v3/chat/completions",
json={
"model": "doubao-seed-code",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "请识别这张图片中的所有中文和英文字符,保持原文输出,包括数字、表格、百分比的完整结构。严格逐行逐列输出所有数据,不要省略、不要总结。"},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64_data}"}},
],
}],
},
headers={"Authorization": f"Bearer {DOUBAO_KEY}", "Content-Type": "application/json"},
timeout=60,
)
if r.status_code == 200:
text = r.json()["choices"][0]["message"]["content"].strip()
log.info(f"OCR OK ({len(text)} chars)")
return text
log.warning(f"OCR HTTP {r.status_code}: {r.text[:200]}")
except Exception as e:
log.error(f"OCR error: {e}")
return None
# ── Article Processor ──────────────────────────────────────────
def fetch_article(url: str) -> dict | None:
"""Fetch article content via local article_processor (:5810)."""
try:
import urllib.request as ur
req = ur.Request(
"http://127.0.0.1:5810/process",
data=json.dumps({"url": url}).encode("utf-8"),
headers={"Content-Type": "application/json"},
)
with ur.urlopen(req, timeout=180) as resp:
result = json.loads(resp.read().decode("utf-8"))
if result.get("status") == "ok":
return {
"title": result.get("title", ""),
"content": result.get("content", "")[:3000],
"images": result.get("images_ocr", 0),
}
log.warning(f"Article processor error: {result.get('error','')[:100]}")
except Exception as e:
log.error(f"Article fetch error: {e}")
return None
# ── Main Message Handler ──────────────────────────────────────
def detect_group(msg: IncomingMessage) -> str:
"""
Detect if a message is from a group chat.
Returns the group_id if it's a group, or empty string if private chat.
"""
raw = msg.raw
# Check group_id field directly from protocol
gid = raw.get("group_id", "") or ""
if gid:
return gid
# Also check session_id for group patterns (contains 'chatroom' or multiple '@')
session_id = raw.get("session_id", "") or ""
if "@chatroom" in session_id:
return session_id.split("#")[0] if "#" in session_id else session_id
return ""
def extract_article_url(text: str) -> str | None:
"""Extract article URL from text. Supports WeChat public accounts, Xiaohongshu, and common share links."""
patterns = [
r"https?://mp\.weixin\.qq\.com[^\"'\s<)<>\[\]]+",
r"https?://(?:www\.)?xiaohongshu\.com[^\"'\s<)<>\[\]]+",
r"https?://xhslink\.com[^\"'\s<)<>\[\]]+",
r"https?://(?:www\.)?zhihu\.com[^\"'\s<)<>\[\]]+",
]
for p in patterns:
m = re.search(p, text)
if m:
return m.group(0)
return None
async def handle_incoming(msg: IncomingMessage, bot: WeixinBot):
"""Process an incoming message from WeChat via iLink SDK."""
log.info(f"[{msg.type}] {msg.user_id}: {(msg.text or '')[:80]}")
fu = msg.user_id
content = msg.text or ""
msg_type = msg.type
# ── Group chat detection ──
group_id = detect_group(msg)
if group_id:
log.info(f"GROUP message from {group_id}, sender={fu}")
# Use the group_id as the conversation identifier (like Windows version uses roomid)
conv_id = group_id
prefix = "[Group]"
else:
conv_id = fu
prefix = "[Private]"
# ── TEXT message ──
if msg_type == "text":
handler_input = content
# Check for ref_msg (forwarded/quoted content like articles)
ref_text = ""
for item in msg.raw.get("item_list", []):
ref_msg = item.get("ref_msg")
if ref_msg:
ref_title = ref_msg.get("title", "")
ref_item = ref_msg.get("message_item", {})
if isinstance(ref_item, dict) and ref_item.get("type") == 1:
ref_text_data = ref_item.get("text_item", {}).get("text", "")
if ref_text_data:
ref_text = f"\n[引用消息] {ref_text_data[:500]}"
if ref_title:
handler_input = f"[老莫转发了一篇文章] 标题: {ref_title}\n\n{content}{ref_text}"
# Detect article URLs (WeChat public account, Xiaohongshu, etc.)
article_url = extract_article_url(handler_input)
if article_url:
log.info(f"Article URL detected: {article_url[:80]}")
article = fetch_article(article_url)
if article:
title = article.get("title", "")
article_text = article.get("content", "")[:2000]
images = article.get("images", 0)
handler_input = (
f"[老莫转发了一篇文章]\n标题: {title}\n"
+ (f"{images}张图片已OCR\n" if images else "")
+ f"\n{article_text}"
)
else:
# Article processor failed, send original content
log.warning("Article processor returned no content, sending raw text")
reply = call_hermes(conv_id, handler_input)
if reply and reply.strip():
await process_reply(reply, conv_id, bot)
# ── IMAGE message ──
elif msg_type == "image":
log.info(f"Image from {conv_id}, attempting OCR...")
# Get image URL from the raw message
img_url = None
for item in msg.raw.get("item_list", []):
if item.get("type") in (2,): # IMAGE
img_item = item.get("image_item", {})
img_url = img_item.get("url", "")
if not img_url:
# CDN media - download via CDN API
media = img_item.get("media", {})
if media:
log.info("Image has CDN media, attempting CDN download...")
img_url = download_cdn_image(media)
break
ocr_text = None
if img_url:
ocr_text = ocr_image_from_url(img_url)
if not ocr_text:
log.warning("OCR returned no text from image URL")
else:
log.info("No image URL available, cannot OCR")
if ocr_text:
handler_input = f"[老莫发送了一张图片,OCR识别结果如下]\n{ocr_text}"
else:
handler_input = "[老莫发送了一张图片,但无法识别图片内容]"
reply = call_hermes(conv_id, handler_input)
if reply and reply.strip():
await bot.reply(msg, reply.strip())
# ── VOICE message ──
elif msg_type == "voice":
reply = call_hermes(conv_id, "[voice message]")
if reply and reply.strip():
await bot.reply(msg, reply.strip())
# ── Unknown type ──
else:
log.info(f"Unhandled message type: {msg_type}")
def download_cdn_image(media: dict) -> str | None:
"""
Download an image from WeChat CDN using the iLink media protocol.
The media dict contains aes_key and encrypt_query_param for AES-128-ECB decryption.
For now, this is a placeholder - CDN download requires AES decryption.
"""
logger = log
logger.info(f"CDN media available but direct download not yet implemented")
logger.debug(f"CDN media keys: {list(media.keys())}")
return None
async def process_reply(reply: str, fu: str, bot: WeixinBot):
"""
Process Hermes reply text, handling tags like [FILE], [IMG], [EMOJI].
Uses bot.reply(msg) or bot.send(user_id, text) based on context.
"""
if not reply or not reply.strip():
return
clean = reply
# ── [FILE] tag ──
fm = re.search(r'\[FILE\](.*?)\[/FILE\]', clean)
if fm:
url = fm.group(1).strip()
log.info(f"[FILE] URL: {url}")
# iLink doesn't natively support file sending via simple URL.
# Send as text with download link for now.
clean = re.sub(r'\s*\[FILE\].*?\[/FILE\]\s*', '', clean).strip()
extra = f"\n文件链接: {url}"
if clean.strip():
clean += extra
else:
clean = f"文件: {url}"
# ── [IMG] tag ──
im = re.search(r'\[IMG\](.*?)\[/IMG\]', clean)
if im:
cmd = im.group(1).strip()
clean = re.sub(r'\s*\[IMG\].*?\[/IMG\]\s*', '', clean).strip()
if cmd.startswith("generate:") or cmd.startswith("draw:"):
parts = cmd.split(":", 1)[1].strip()
ratio = "1:1"
if "|" in parts:
ratio = parts.split("|")[1].strip()
prompt = parts.split("|")[0].strip()
else:
prompt = parts
log.info(f"[IMG] generate: {prompt[:40]} [{ratio}]")
img_url = generate_image(prompt, ratio)
if img_url:
# For images, we can't send via iLink natively yet.
# Send the URL as a text message.
extra = f"\n图片已生成: {img_url}"
if clean.strip():
clean += extra
else:
clean = f"图片已生成: {img_url}"
else:
extra = "\n[图片生成失败]"
clean += extra if clean.strip() else extra.strip()
else:
# Regular image URL
extra = f"\n图片: {cmd}"
if clean.strip():
clean += extra
else:
clean = f"图片: {cmd}"
# ── [EMOJI] tag ──
em = re.search(r'\[EMOJI\](.*?)\[/EMOJI\]', clean)
if em:
url = em.group(1).strip()
log.info(f"[EMOJI] URL: {url}")
clean = re.sub(r'\s*\[EMOJI\].*?\[/EMOJI\]\s*', '', clean).strip()
# ── [CONTACT] tag — not supported via iLink ──
clean = re.sub(r'\s*\[CONTACT:\w+\]\s*', '', clean)
# ── [ROOM_MEMBERS] tag — not supported via iLink ──
clean = re.sub(r'\s*\[ROOM_MEMBERS:\S+\]\s*', '', clean)
# ── [HISTORY] tag — not supported via iLink ──
clean = re.sub(r'\s*\[HISTORY:\S+?:\d+\]\s*', '', clean)
# ── [PAT] tag — not supported via iLink ──
clean = re.sub(r'\s*\[PAT:\S+:\S+\]\s*', '', clean)
# ── Send remaining text ──
clean = clean.strip()
if clean:
await bot.send(fu, clean)
# ── 5801 HTTP Server ──────────────────────────────────────────
# Receives messages from Hermes/xxm to forward to WeChat.
# Uses asyncio queue to bridge sync HTTP → async iLink SDK.
_message_queue: asyncio.Queue[dict] = asyncio.Queue()
_http_server_ready = threading.Event()
class HermesMsgHandler(BaseHTTPRequestHandler):
"""HTTP server that receives Hermes messages and queues them for iLink sending."""
def do_POST(self):
body = self.rfile.read(int(self.headers.get("Content-Length", 0)))
try:
d = json.loads(body)
log.info(f"5801 POST: {json.dumps(d, ensure_ascii=False)[:200]}")
to = d.get("to", "") or d.get("wxid", "")
msg = d.get("message", "") or d.get("content", "")
path = self.path
# History query endpoint
if path in ("/history", "/api/chatHistory"):
wxid = d.get("wxid", "") or d.get("to", "")
count = d.get("count", 10) or d.get("limit", 10)
self._send_json({
"ok": True,
"note": "iLink SDK does not support history query. Use Hermes session_search instead.",
"messages": [],
})
return
# Stop endpoint
if path == "/stop":
_message_queue.put_nowait({"type": "stop"})
self._send_json({"ok": True, "status": "stopped"})
return
# Queue the message for async processing (queue consumed by the iLink message loop)
_message_queue.put_nowait({
"type": "outgoing",
"to": to,
"message": msg,
})
except Exception as e:
log.error(f"5801 handler error: {e}")
self.send_response(200)
self.end_headers()
def do_GET(self):
parsed = urlparse(self.path)
if parsed.path == "/health":
self._send_json({
"ok": True,
"platform": "linux-ilink",
"mohe_user_id": MOHE_USER_ID or "not_logged_in",
})
return
if parsed.path in ("/history", "/api/chatHistory"):
import urllib.parse as up
params = up.parse_qs(parsed.query)
wxid = params.get("wxid", [""])[0]
count = params.get("count", ["10"])[0]
self._send_json({
"ok": True,
"note": "iLink SDK does not support history query.",
"messages": [],
})
return
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(b'{"ok":true}')
def _send_json(self, data):
body = json.dumps(data, ensure_ascii=False).encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def log_message(self, *a):
pass
def start_http_server():
"""Start the sync HTTP server in a daemon thread."""
server = HTTPServer(("0.0.0.0", 5801), HermesMsgHandler)
t = threading.Thread(target=server.serve_forever, daemon=True)
t.start()
log.info("HTTP :5801 ready (Hermes message receiver)")
_http_server_ready.set()
return server
# ── Queue Consumer ────────────────────────────────────────────
# Consumes the 5801 message queue inside the iLink event loop.
async def consume_outgoing_queue(bot: WeixinBot):
"""
Async task: consumes messages from 5801 queue and sends via iLink.
Runs inside the same event loop as the iLink bot.
"""
while True:
try:
item = await _message_queue.get()
msg_type = item.get("type")
if msg_type == "stop":
log.info("Queue consumer: stop signal received")
bot.stop()
break
if msg_type == "outgoing":
to = item.get("to", "")
msg = item.get("message", "")
if to and msg:
try:
await bot.send(to, msg)
log.info(f"5801 -> {to}: {msg[:80]}")
except RuntimeError as e:
log.warning(f"5801 send failed (no context token for {to}): {e}")
except Exception as e:
log.error(f"5801 send error: {e}")
except asyncio.CancelledError:
break
except Exception as e:
log.error(f"Queue consumer error: {e}")
# ── Outbound Message Router (replacement for session_router) ──
# Handles messages from xxm/Hermes that need to be sent to WeChat.
# In the iLink world, all outgoing messages go through bot.send().
async def route_hermes_message(msg_text: str, bot: WeixinBot):
"""Route a message from Hermes to WeChat via iLink."""
# For now, route to Dad's user_id if known.
# In practice, the 5801 server with 'to' field is the primary path.
log.info(f"Hermes message: {msg_text[:100]}")
# The 5801 server handles the actual sending with explicit 'to' field.
# This function is for messages without an explicit target.
# ── Main ──────────────────────────────────────────────────────
def main():
acquire_pid_lock()
log.info("=== MoWeChat Agent (Linux/iLink v1.0) ===")
# Start 5801 HTTP server (in a thread)
http_server = start_http_server()
_http_server_ready.wait(timeout=5)
# Create and run the iLink bot
bot = WeixinBot()
try:
# Login (QR scan first time, then auto)
log.info("Logging in to iLink Bot API...")
creds = bot.login()
global MOHE_USER_ID
MOHE_USER_ID = creds.user_id
log.info(f"Logged in as {MOHE_USER_ID}")
except Exception as e:
log.error(f"Login failed: {e}")
release_pid_lock()
sys.exit(1)
# Register message handler
@bot.on_message
async def handle(msg: IncomingMessage):
await handle_incoming(msg, bot)
# Register queue consumer (runs inside bot's event loop)
# We schedule this before bot.run() by patching into the _run_loop flow.
# Since _run_loop is private, we use a different approach:
# Override the start of _run_loop by wrapping it.
# Actually, bot.run() creates its own event loop with asyncio.run().
# We need to hook into that loop. Let's save the original _run_loop
# and wrap it to also start the queue consumer.
original_run_loop = bot._run_loop
async def wrapped_run_loop():
# Start queue consumer as a background task
asyncio.create_task(consume_outgoing_queue(bot))
# Run the original loop
await original_run_loop()
bot._run_loop = wrapped_run_loop
try:
log.info("Bot starting. Press Ctrl+C to stop.")
bot.run()
except KeyboardInterrupt:
log.info("Shutting down...")
except Exception as e:
log.error(f"Bot error: {e}")
finally:
bot.stop()
http_server.shutdown()
release_pid_lock()
log.info("Stopped.")
if __name__ == "__main__":
main()