feat: WeChat Linux bot via docker-wechatbot-webhook

- Docker container with auto-restart
- systemd webhook receiver on :5804
- Full send/receive loop: WeChat ↔ Docker ↔ Hermes
- Fixed login token for persistence
- Firewall rules for container-host communication
This commit is contained in:
2026-06-24 01:59:44 +08:00
parent 255729bb8c
commit f1630ebb03
11 changed files with 1736 additions and 0 deletions
+198
View File
@@ -0,0 +1,198 @@
#!/usr/bin/env gdb
"""
GDB Hook script for WeChat Linux AppImage.
Intercepts incoming messages from WeChat's NewSync_ProcessStashMsgList.
Based on Ajax's Blog methodology:
https://aajax.top/2026/03/11/GettingLinuxWechatMessages/
Usage:
gdb -p $(pidof wechat) -x hooks/gdb_hook_messages.py
"""
import json
import os
import sys
# ── Configuration ──────────────────────────────────────────────
# WeChat binary base address (from /proc/PID/maps, first r--p entry)
# Must be recalculated each run due to ASLR
WECHAT_PID = None
# Breakpoint RVA (relative to binary base) for WeChat 4.1.x
# From Ajax's IDA Pro analysis of 4.1.0.16:
# NewSync_ProcessStashMsgList -> loop call at 0x4994BEB
BP_RVA = 0x4994BEB
# Log file
LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "logs")
LOG_FILE = os.path.join(LOG_DIR, "wechat_messages.log")
# Message structure offsets (from Ajax's blog)
OFF_TYPE = 0x14 # int: message type
OFF_SVRID = 0x50 # unsigned long long: server message ID
OFF_HOLDER = 0x20 # void*: holder pointer
OFF_INNER = 0x08 # void*: inner pointer (relative to holder)
OFF_CONTENT_PTR = 0x00 # char*: content string pointer (from inner+0)
OFF_CONTENT_LEN = 0x10 # int: content string length (from inner+0x10)
def log(msg):
"""Write log to file and stdout."""
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(f"{msg}\n")
gdb.write(f"{msg}\n")
class WechatMessageBreakpoint(gdb.Breakpoint):
"""Breakpoint that fires on each incoming WeChat message."""
def __init__(self, address):
super().__init__(f"*{address}")
self.suppress = True # Don't print to stdout automatically
def stop(self):
try:
# Read registers
msg_ptr = int(gdb.parse_and_eval("$rsi"))
if msg_ptr == 0:
return False
# Read message type
raw_type = gdb.selected_inferior().read_memory(msg_ptr + OFF_TYPE, 4)
msg_type = int.from_bytes(raw_type, byteorder='little', signed=True)
# Read server message ID
raw_svrid = gdb.selected_inferior().read_memory(msg_ptr + OFF_SVRID, 8)
svrid = int.from_bytes(raw_svrid, byteorder='little')
# Read holder pointer
raw_holder = gdb.selected_inferior().read_memory(msg_ptr + OFF_HOLDER, 8)
holder = int.from_bytes(raw_holder, byteorder='little', signed=False)
if holder == 0:
return False
# Read inner pointer
raw_inner = gdb.selected_inferior().read_memory(holder + OFF_INNER, 8)
inner = int.from_bytes(raw_inner, byteorder='little', signed=False)
if inner == 0:
return False
# Read content string length
raw_len = gdb.selected_inferior().read_memory(inner + OFF_CONTENT_LEN, 4)
content_len = int.from_bytes(raw_len, byteorder='little', signed=False)
if content_len <= 0 or content_len > 100000:
return False
# Read content string
raw_content_ptr = gdb.selected_inferior().read_memory(inner + OFF_CONTENT_PTR, 8)
content_ptr = int.from_bytes(raw_content_ptr, byteorder='little', signed=False)
if content_ptr == 0:
return False
raw_content = gdb.selected_inferior().read_memory(content_ptr, min(content_len * 2, 100000))
# Try to decode as UTF-16LE (WeChat internal encoding)
try:
content = raw_content.tobytes()[:content_len * 2].decode('utf-16le', errors='replace')
except:
content = str(raw_content)
# Read talker/sender info (different offset structure)
# This is more complex — skip for initial test
message = {
"type": msg_type,
"svrid": hex(svrid),
"content": content[:500],
"content_len": content_len,
"holder": hex(holder),
"inner": hex(inner),
}
log(f"[WECHAT_MSG] type={msg_type} svrid={hex(svrid)}")
log(f"[WECHAT_MSG] content: {content[:200]}")
# Forward to Hermes Gateway
try:
forward_to_hermes(message)
except Exception as e:
log(f"[WECHAT_MSG] forward error: {e}")
except Exception as e:
log(f"[WECHAT_MSG] error: {e}")
return False # Don't stop execution
def forward_to_hermes(msg):
"""Forward message to Hermes Gateway."""
import urllib.request
payload = json.dumps({
"model": "nova-4",
"messages": [
{
"role": "system",
"content": f"You are a WeChat message handler. A new message arrived: type={msg.get('type')}, content={msg.get('content', '')[:100]}"
}
]
}).encode('utf-8')
req = urllib.request.Request(
"http://192.168.1.246:8642/v1/chat/completions",
data=payload,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer hermes123"
},
method="POST"
)
# Don't wait for response — fire and forget
try:
urllib.request.urlopen(req, timeout=2)
except Exception:
pass
def detect_wechat_base():
"""Detect WeChat binary base address from /proc/PID/maps."""
pid = WECHAT_PID
if pid is None:
try:
pid = gdb.selected_inferior().pid
except:
pass
if pid is None:
return None
try:
with open(f"/proc/{pid}/maps", "r") as f:
for line in f:
if "/opt/wechat/wechat" in line and "r--p" in line:
addr = line.split("-")[0]
return int(addr, 16)
except Exception as e:
log(f"[WECHAT_MSG] Failed to detect base: {e}")
return None
class HookWechatMessages(gdb.Command):
"""Install WeChat message hook."""
def __init__(self):
super().__init__("hook-wechat-messages", gdb.COMMAND_USER)
def invoke(self, arg, from_tty):
base = detect_wechat_base()
if base is None:
log("[WECHAT_MSG] ERROR: Could not detect WeChat base address")
return
addr = base + BP_RVA
WechatMessageBreakpoint(addr)
log(f"[WECHAT_MSG] Hook installed: base=0x{base:x} bp=0x{addr:x}")
log(f"[WECHAT_MSG] Waiting for messages...")
# Register the custom command
HookWechatMessages()
+152
View File
@@ -0,0 +1,152 @@
#!/usr/bin/env python3
"""
GDB startup script for WeChat message hooking.
Run with:
gdb -x this_script.py --args ./WeChatLinux.AppImage --no-sandbox --disable-gpu
GDB disables ASLR by default, so wechat base = 0x555555554000
"""
import gdb
import os
import time
# Breakpoint RVA from Ajax's blog (4.1.0.16, confirmed valid for 4.1.7)
BP_RVA = 0x4994BEB
# Fixed base when GDB starts process (ASLR disabled)
FIXED_BASE = 0x555555554000
LOG_FILE = "/home/hmo/projects/AgentsMeeting/gateway/linux/logs/wechat_messages.log"
def log(msg):
with open(LOG_FILE, "a") as f:
f.write(f"{msg}\n")
gdb.write(f"{msg}\n")
class WechatMessageBreakpoint(gdb.Breakpoint):
"""Breakpoint that fires on each incoming WeChat message."""
def __init__(self, address):
super().__init__(f"*{address}")
self.suppress = True
def stop(self):
try:
msg_ptr = int(gdb.parse_and_eval("$rsi"))
if msg_ptr == 0:
return False
raw_type = gdb.selected_inferior().read_memory(msg_ptr + 0x14, 4)
msg_type = int.from_bytes(raw_type, byteorder='little', signed=True)
raw_svrid = gdb.selected_inferior().read_memory(msg_ptr + 0x50, 8)
svrid = int.from_bytes(raw_svrid, byteorder='little')
raw_holder = gdb.selected_inferior().read_memory(msg_ptr + 0x20, 8)
holder = int.from_bytes(raw_holder, byteorder='little', signed=False)
if holder == 0:
return False
raw_inner = gdb.selected_inferior().read_memory(holder + 0x8, 8)
inner = int.from_bytes(raw_inner, byteorder='little', signed=False)
if inner == 0:
return False
# Read content length from inner + 0x10
raw_len = gdb.selected_inferior().read_memory(inner + 0x10, 4)
content_len = int.from_bytes(raw_len, byteorder='little', signed=False)
if content_len <= 0 or content_len > 100000:
return False
# Read content pointer from inner + 0x0
raw_cp = gdb.selected_inferior().read_memory(inner + 0x0, 8)
content_ptr = int.from_bytes(raw_cp, byteorder='little', signed=False)
if content_ptr == 0:
return False
raw_content = gdb.selected_inferior().read_memory(content_ptr, min(content_len * 2, 100000))
try:
content = raw_content.tobytes()[:content_len * 2].decode('utf-16le', errors='replace')
except:
content = str(raw_content)
# Try to read sender info from msg_ptr + 0x38 (talker wxid)
try:
raw_talker = gdb.selected_inferior().read_memory(msg_ptr + 0x38, 8)
talker_ptr = int.from_bytes(raw_talker, byteorder='little', signed=False)
if talker_ptr:
talker_data = gdb.selected_inferior().read_memory(talker_ptr, 64)
talker = talker_data.tobytes().split(b'\x00')[0].decode('utf-8', errors='replace')
else:
talker = "unknown"
except:
talker = "unknown"
info = f"[WECHAT_MSG] type={msg_type} svrid={hex(svrid)} talker={talker}"
log(info)
log(f"[WECHAT_MSG] content: {content[:300]}")
# Forward to Hermes
try:
import urllib.request
import json
payload = json.dumps({
"model": "nova-4",
"messages": [
{"role": "user", "content": f"[WeChat from {talker}] {content[:500]}"}
]
}).encode()
req = urllib.request.Request(
"http://192.168.1.246:8642/v1/chat/completions",
data=payload,
headers={"Content-Type": "application/json", "Authorization": "Bearer hermes123"},
method="POST"
)
urllib.request.urlopen(req, timeout=2)
except:
pass
except Exception as e:
log(f"[WECHAT_MSG] ERROR: {e}")
return False
class AutoHookWechat(gdb.Command):
"""Auto-hook WeChat messages on startup."""
def __init__(self):
super().__init__("auto-hook-wechat", gdb.COMMAND_USER)
def invoke(self, arg, from_tty):
bp_addr = FIXED_BASE + BP_RVA
WechatMessageBreakpoint(bp_addr)
log(f"[WECHAT_MSG] Breakpoint set at 0x{bp_addr:x}")
class OnStart(gdb.Breakpoint):
"""Breakpoint on _start to set up hooks after process loads."""
def __init__(self):
super().__init__("_start")
def stop(self):
gdb.execute("auto-hook-wechat")
return True # Stop so user can continue
# Configure GDB
gdb.execute("set pagination off")
gdb.execute("set confirm off")
gdb.execute("handle SIG33 pass nostop noprint")
gdb.execute("set follow-fork-mode child")
# Register our commands
AutoHookWechat()
# Set breakpoint at _start so we hook after process loads
OnStart()
log("[WECHAT_MSG] GDB startup script loaded. Type 'run' to start WeChat.")
+296
View File
@@ -0,0 +1,296 @@
#!/usr/bin/env python3
"""
MoWeChat Message Monitor — reads WeChat process memory to capture incoming messages.
No GDB, no ptrace attach, no crash risk.
Uses /proc/PID/mem to scan the heap for message patterns.
"""
import os
import re
import sys
import json
import time
import hashlib
import logging
import argparse
import urllib.request
import urllib.error
# ── Configuration ──────────────────────────────────────────────
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
HERMES_KEY = "hermes123"
# Polling interval (seconds)
POLL_INTERVAL = 2
# Minimum message content length to consider valid
MIN_MSG_LEN = 2
MAX_MSG_LEN = 10000
# WeChat binary marker in /proc/PID/maps
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
log = logging.getLogger("mowechat")
class WeChatMemoryMonitor:
"""Monitors WeChat process memory for new messages."""
def __init__(self):
self.pid = None
self.seen = self._load_seen()
self.heap_regions = []
self.anon_regions = []
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
# Chinese characters and ASCII printable
self.msg_pattern = re.compile(rb'[\x20-\x7e\x80-\xff]{2,}')
def _load_seen(self):
"""Load previously seen message hash set."""
try:
with open(SAWN_FILE, 'r') as f:
return set(json.load(f))
except:
return set()
def _save_seen(self):
"""Save seen message hash set (trim to last 1000)."""
trimmed = set(list(self.seen)[-1000:])
try:
with open(SAWN_FILE, 'w') as f:
json.dump(list(trimmed), f)
except:
pass
def find_wechat(self):
"""Find the main wechat process PID."""
for p in os.listdir('/proc'):
if not p.isdigit():
continue
try:
with open(f'/proc/{p}/maps', 'r') as f:
content = f.read(4096)
if WECHAT_BINARY_MARKER in content:
self.pid = int(p)
return True
except:
continue
return False
def update_memory_regions(self):
"""Read /proc/PID/maps to find heap and anonymous regions."""
self.heap_regions = []
self.anon_regions = []
try:
with open(f'/proc/{self.pid}/maps', 'r') as f:
for line in f:
parts = line.strip().split()
if len(parts) < 5:
continue
addr_range = parts[0].split('-')
start = int(addr_range[0], 16)
end = int(addr_range[1], 16)
perms = parts[1]
name = parts[-1] if len(parts) > 4 else ''
size_mb = (end - start) / (1024 * 1024)
if size_mb > 50: # Skip huge regions
continue
if perms.startswith('rw'):
if name == '[heap]':
self.heap_regions.append((start, end))
elif not name: # Anonymous mapping
self.anon_regions.append((start, end))
except Exception as e:
log.error(f"Failed to read maps: {e}")
def scan_region(self, start, end, region_name=""):
"""Scan a memory region for WeChat message patterns."""
messages = []
try:
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
mem.seek(start)
data = mem.read(end - start)
# Find all wxid patterns (likely message senders)
for match in self.wxid_pattern.finditer(data):
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
pos = match.start()
# Look for message content after the wxid
# Message content is typically within 256 bytes after the wxid
content_start = pos + len(match.group())
search_end = min(content_start + 512, len(data))
# Find the next null-terminated string that's not the wxid itself
for cmatch in self.msg_pattern.finditer(data, content_start, search_end):
content = cmatch.group(0).decode('utf-8', errors='replace').strip('\x00')
# Filter out known non-message patterns
if len(content) < MIN_MSG_LEN or len(content) > MAX_MSG_LEN:
continue
if content.startswith('wxid_') or content.startswith('http'):
# Skip if it's another wxid or begins with a URL
# Actually, messages CAN start with URLs, so only skip wxid
if content.startswith('wxid_'):
continue
# Create a unique hash for dedup
msg_hash = hashlib.md5(f"{wxid}:{content}".encode()).hexdigest()
if msg_hash not in self.seen:
self.seen.add(msg_hash)
messages.append({
'wxid': wxid,
'content': content,
'pos': hex(pos + start),
'region': region_name,
})
break # Only one message per wxid
except (PermissionError, ProcessLookupError) as e:
log.warning(f"Memory read failed: {e}")
except Exception as e:
log.error(f"Scan error at {region_name}: {e}")
return messages
def scan_all(self):
"""Scan all relevant memory regions for new messages."""
self.update_memory_regions()
all_messages = []
for start, end in self.heap_regions:
msgs = self.scan_region(start, end, "[heap]")
all_messages.extend(msgs)
for start, end in self.anon_regions[:20]: # Limit anonymous regions
size_mb = (end - start) / (1024 * 1024)
if size_mb > 10: # Skip large anonymous maps
continue
msgs = self.scan_region(start, end, "[anon]")
all_messages.extend(msgs)
if all_messages:
self._save_seen()
return all_messages
def forward_to_hermes(self, msg):
"""Forward a captured message to Hermes Gateway."""
payload = json.dumps({
"model": "nova-4",
"messages": [
{
"role": "system",
"content": (
"You receive WeChat messages. "
"Process this message according to the standard pipeline."
)
},
{
"role": "user",
"content": (
f"[WeChat Message]\n"
f"From: {msg['wxid']}\n"
f"Content: {msg['content']}"
)
}
]
}).encode('utf-8')
try:
req = urllib.request.Request(
HERMES_API,
data=payload,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {HERMES_KEY}"
},
method="POST"
)
urllib.request.urlopen(req, timeout=3)
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
except Exception as e:
log.warning(f"Forward failed: {e}")
def run(self, once=False):
"""Main monitoring loop."""
if not self.find_wechat():
log.error("WeChat process not found!")
return False
log.info(f"Found WeChat PID: {self.pid}")
if once:
messages = self.scan_all()
for msg in messages:
log.info(f"Captured: [{msg['wxid']}] {msg['content'][:80]}")
return messages
log.info(f"Starting monitor (poll every {POLL_INTERVAL}s)...")
while True:
try:
# Check process is alive
if not os.path.exists(f'/proc/{self.pid}'):
log.warning("WeChat process died, re-finding...")
if not self.find_wechat():
log.error("Cannot find WeChat, sleeping 30s...")
time.sleep(30)
continue
log.info(f"Re-attached to PID: {self.pid}")
messages = self.scan_all()
for msg in messages:
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
# self.forward_to_hermes(msg)
time.sleep(POLL_INTERVAL)
except KeyboardInterrupt:
log.info("Monitor stopped.")
break
except Exception as e:
log.error(f"Monitor error: {e}")
time.sleep(10)
return True
def main():
parser = argparse.ArgumentParser(description="MoWeChat Message Monitor")
parser.add_argument("--once", action="store_true", help="Scan once and exit")
parser.add_argument("--foreground", action="store_true", help="Run in foreground")
args = parser.parse_args()
monitor = WeChatMemoryMonitor()
if args.once:
msgs = monitor.run(once=True)
if msgs:
print(json.dumps(msgs, ensure_ascii=False, indent=2))
else:
print("No new messages found.")
return
monitor.run()
if __name__ == "__main__":
main()
+343
View File
@@ -0,0 +1,343 @@
#!/usr/bin/env python3
"""
MoWeChat Message Monitor v2 — reads WeChat process memory to capture incoming messages.
Improvements over v1:
- Better message extraction: looks for content near known wxids with length-prefix structure
- Content filtering: rejects binary garbage, only keeps real text
- Tracks messages by content + wxid hash
"""
import os
import re
import sys
import json
import time
import hashlib
import logging
import argparse
import urllib.request
import urllib.error
# ── Configuration ──────────────────────────────────────────────
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
HERMES_KEY = "hermes123"
POLL_INTERVAL = 3
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
# Known message sender wxids (populated during scanning)
OWN_WXID = "wxid_c0a6izmwd78y22" # 老爸 (莫语不语)
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷自己
INTERESTING_WXIDS = {OWN_WXID, BOT_WXID}
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
log = logging.getLogger("mowechat")
def is_valid_text(s, min_ratio=0.5):
"""Check if a string looks like real text (vs binary garbage)."""
if len(s) < 2:
return False
# Count printable chars
printable = 0
for ch in s:
if ch.isprintable() and (ch.isalpha() or ch.isspace() or ch.isdigit() or ch in '.,!?;:\'\"-()[]{}@#_/\\'):
printable += 1
return printable / max(len(s), 1) >= min_ratio
def extract_strings(data, min_len=4):
"""Extract readable strings from binary data."""
result = []
current = b''
for b in data:
if 32 <= b < 127 or b in (0x0a, 0x0d, 0x09):
current += bytes([b])
elif b >= 0x80: # Part of multi-byte UTF-8
current += bytes([b])
else:
if len(current) >= min_len:
try:
decoded = current.decode('utf-8', errors='replace')
if is_valid_text(decoded):
result.append(decoded)
except:
pass
current = b''
if len(current) >= min_len:
try:
decoded = current.decode('utf-8', errors='replace')
if is_valid_text(decoded):
result.append(decoded)
except:
pass
return result
class WeChatMemoryMonitor:
"""Monitors WeChat process memory for new messages."""
def __init__(self):
self.pid = None
self.seen = self._load_seen()
self.heap_region = None
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
# Known message sources
self.known_wxids = set(INTERESTING_WXIDS)
def _load_seen(self):
try:
with open(SAWN_FILE, 'r') as f:
return set(json.load(f))
except:
return set()
def _save_seen(self):
trimmed = set(list(self.seen)[-2000:])
try:
with open(SAWN_FILE, 'w') as f:
json.dump(list(trimmed), f)
except:
pass
def find_wechat(self):
"""Find the main wechat process PID and heap region."""
for p in os.listdir('/proc'):
if not p.isdigit():
continue
try:
with open(f'/proc/{p}/maps', 'r') as f:
content = f.read(8192)
if WECHAT_BINARY_MARKER in content:
self.pid = int(p)
# Find heap
for line in content.split('\n'):
if '[heap]' in line:
parts = line.split()
addr_range = parts[0].split('-')
self.heap_region = (int(addr_range[0], 16), int(addr_range[1], 16))
return True
except:
continue
return False
def scan_message(self, mem, wxid_bytes, wxid_pos, wxid_end):
"""Try to extract a real message following a wxid in memory."""
wxid_str = wxid_bytes.decode('utf-8', errors='replace').strip('\x00')
# Search within 512 bytes after the wxid for message content
search_start = wxid_end
search_end = min(search_start + 512, self.heap_region[1] - self.heap_region[0] if self.heap_region else search_start + 512)
try:
mem.seek(search_start)
data = mem.read(search_end - search_start)
except:
return None
# Look for null-terminated strings that look like messages
messages = []
current = b''
for b in data:
if b == 0:
if len(current) >= 3:
try:
text = current.decode('utf-8', errors='replace')
# Filter: must have real text content
if is_valid_text(text) and len(text) >= 2 and not text.startswith('wxid_'):
messages.append(text)
except:
pass
current = b''
else:
current += bytes([b])
# Also try to find message by looking for it at a known offset pattern
# In WeChat's structure: [msg_type(4)] [svr_id(8)] [content_ptr(8)] [content_len(4)] [content...]
if not messages:
return None
# Pick the best candidate (longest, most printable)
best = max(messages, key=lambda m: (len(m), sum(1 for c in m if c.isalpha() or c.isdigit())))
return best
def scan_heap(self):
"""Scan the heap region for messages."""
if not self.heap_region:
return []
start, end = self.heap_region
messages = []
try:
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
# Read entire heap
mem.seek(start)
size = min(end - start, 50 * 1024 * 1024) # Max 50MB
data = mem.read(size)
# Find all wxid occurrences
for match in self.wxid_pattern.finditer(data):
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
pos = match.start()
global_pos = start + pos
# Look for message content in the next 256 bytes
content_area = data[pos + len(match.group()):pos + len(match.group()) + 256]
# Try to find a null-terminated UTF-8 string that looks like a message
for cmatch in re.finditer(rb'([\x20-\x7e\x80-\xff\x00]{4,})', content_area):
raw = cmatch.group(0)
# Remove trailing nulls
raw = raw.rstrip(b'\x00')
if len(raw) < 3:
continue
try:
text = raw.decode('utf-8', errors='replace')
except:
continue
# FILTER: Must have substantial real text content
alpha_count = sum(1 for c in text if c.isalpha() or '\u4e00' <= c <= '\u9fff')
total_len = len(text)
if total_len < 3:
continue
# Skip if it's just another wxid
if text.startswith('wxid_'):
continue
# Skip binary garbage (must be >= 40% alphabetic/CJK chars)
if alpha_count / max(total_len, 1) < 0.3:
continue
# Skip if it looks like a URL/path with no message content
if text.startswith('http') or text.startswith('/'):
continue
# Create hash for dedup
msg_hash = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
if msg_hash not in self.seen:
self.seen.add(msg_hash)
messages.append({
'wxid': wxid,
'content': text,
'pos': hex(global_pos),
'alpha_ratio': f"{alpha_count/total_len:.2f}",
})
break # One best message per wxid occurrence
except (PermissionError, ProcessLookupError) as e:
log.warning(f"Memory read failed: {e}")
except Exception as e:
log.error(f"Heap scan error: {e}")
return messages
def forward_to_hermes(self, msg):
"""Forward to Hermes Gateway."""
payload = json.dumps({
"model": "nova-4",
"messages": [
{"role": "system", "content": "You receive WeChat messages. Process according to standard pipeline."},
{"role": "user", "content": f"[WeChat] From: {msg['wxid']}\n{msg['content']}"}
]
}).encode('utf-8')
try:
req = urllib.request.Request(
HERMES_API, data=payload,
headers={"Content-Type": "application/json", "Authorization": f"Bearer {HERMES_KEY}"},
method="POST"
)
urllib.request.urlopen(req, timeout=3)
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
except Exception as e:
log.warning(f"Forward failed: {e}")
def run(self, once=False):
if not self.find_wechat():
log.error("WeChat not found!")
return []
log.info(f"WeChat PID: {self.pid}, heap: 0x{self.heap_region[0]:x}-0x{self.heap_region[1]:x}" if self.heap_region else f"PID: {self.pid}, no heap")
if once:
messages = self.scan_heap()
seen_wxids = set()
for msg in messages:
if msg['wxid'] not in seen_wxids:
log.info(f" [{msg['wxid']}] {msg['content'][:80]}")
seen_wxids.add(msg['wxid'])
if messages:
self._save_seen()
return messages
log.info(f"Monitoring every {POLL_INTERVAL}s...")
while True:
try:
if not os.path.exists(f'/proc/{self.pid}'):
log.warning("WeChat died, re-finding...")
if not self.find_wechat():
time.sleep(30)
continue
messages = self.scan_heap()
for msg in messages:
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
# self.forward_to_hermes(msg)
if messages:
self._save_seen()
time.sleep(POLL_INTERVAL)
except KeyboardInterrupt:
break
except Exception as e:
log.error(f"Error: {e}")
time.sleep(10)
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--once", action="store_true")
parser.add_argument("--foreground", action="store_true")
args = parser.parse_args()
monitor = WeChatMemoryMonitor()
if args.once:
msgs = monitor.run(once=True)
# Only show real messages (from known contacts or with good content)
real_msgs = [m for m in msgs if m['wxid'] in INTERESTING_WXIDS or float(m.get('alpha_ratio', 0)) > 0.5]
print(f"\nFound {len(msgs)} potential messages, {len(real_msgs)} from known contacts")
for m in real_msgs:
print(f" [{m['wxid']}] {m['content'][:100]}")
print(f"\n(Total seen: {len(monitor.seen)})")
return
monitor.run()
if __name__ == "__main__":
main()
+206
View File
@@ -0,0 +1,206 @@
#!/usr/bin/env python3
"""
MoWeChat Message Monitor v3 — precise message extraction from WeChat heap.
Strategy: look for message content near known wxids with structural patterns.
"""
import os, re, sys, json, time, hashlib, struct, logging, argparse
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_v3.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v3.json")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
log = logging.getLogger("mv3")
# Known wxids (discovered from memory)
OWN_WXID = "wxid_c0a6izmwd78y22" # 莫语不语 (老爸)
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷
INTERESTING = {OWN_WXID, BOT_WXID}
class Monitor:
def __init__(self):
self.pid = None
self.heap = (0, 0)
self.seen = self._load_seen()
self.wxid_re = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
def _load_seen(self):
try:
with open(SAWN_FILE) as f:
return set(json.load(f))
except:
return set()
def _save_seen(self):
s = set(list(self.seen)[-2000:])
try:
with open(SAWN_FILE, 'w') as f:
json.dump(list(s), f)
except:
pass
def find_wechat(self):
for p in os.listdir('/proc'):
if not p.isdigit(): continue
try:
with open(f'/proc/{p}/maps') as f:
c = f.read()
if "/opt/wechat/wechat" in c:
self.pid = int(p)
for line in c.split('\n'):
if '[heap]' in line:
a = line.split()[0].split('-')
self.heap = (int(a[0], 16), int(a[1], 16))
return True
except:
continue
return False
def is_valid_msg(self, text):
"""Check if text is a real WeChat message."""
if len(text) < 2 or len(text) > 5000:
return False
if text.startswith('wxid_') or text.startswith('http') or text.startswith('/'):
return False
# Count CJK + ASCII letters + digits
good = sum(1 for c in text if c.isalpha() or c.isdigit() or c.isspace() or c in '.,!?;:\'"-()[]{}@#_/\\')
if good / max(len(text), 1) < 0.6:
return False
# Must have at least 3 CJK chars OR 5 ASCII chars
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
ascii_alpha = sum(1 for c in text if c.isascii() and c.isalpha())
return cjk >= 2 or ascii_alpha >= 4
def scan(self):
start, end = self.heap
if not start:
return []
try:
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
mem.seek(start)
data = mem.read(min(end - start, 60 * 1024 * 1024))
except:
return []
results = []
# Strategy: find wxid -> look for a nearby null-terminated UTF-8 string
# that looks like real message content
# First pass: find all wxid positions in a bounded range
for m in self.wxid_re.finditer(data):
wxid = m.group(0).decode().strip('\x00')
if wxid not in INTERESTING:
continue
pos = m.end() # position after wxid\0
# Scan forward up to 256 bytes for a printable string
scan_end = min(pos + 256, len(data))
chunk = data[pos:scan_end]
# Find the first null-terminated ASCII/UTF-8 string
# that's at least 3 chars and not binary garbage
i = 0
while i < len(chunk):
if chunk[i] == 0:
i += 1
continue
# Start of potential string
s_start = i
while i < len(chunk) and chunk[i] != 0 and chunk[i] >= 0x20:
i += 1
s_len = i - s_start
if s_len >= 3:
try:
text = chunk[s_start:s_start+s_len].decode('utf-8', errors='replace')
if self.is_valid_msg(text):
h = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
if h not in self.seen:
self.seen.add(h)
results.append({'wxid': wxid, 'text': text})
except:
pass
# Skip null
while i < len(chunk) and chunk[i] == 0:
i += 1
# Second strategy: scan heap for standalone CJK strings >= 4 chars
# that are NOT preceded by known binary patterns (to catch the actual
# message content which may be at a different address than the wxid)
for cm in re.finditer(rb'([\x80-\xff][\x80-\xff][\x80-\xff][\x80-\xff])', data):
pos = cm.start()
# Read up to 200 bytes from here
snippet = data[pos:pos+200]
# Find first null byte
null_pos = snippet.find(b'\x00')
if null_pos > 0:
snippet = snippet[:null_pos]
if len(snippet) < 4:
continue
try:
text = snippet.decode('utf-8', errors='replace')
except:
continue
# Only accept strings with substantial CJK content
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
if cjk < 2:
continue
if len(text) > 200:
text = text[:200]
h = hashlib.md5(f"cjk:{text}".encode()).hexdigest()
if h not in self.seen and self.is_valid_msg(text):
self.seen.add(h)
results.append({'wxid': 'unknown', 'text': text})
if results:
self._save_seen()
return results
def run(self, once=False):
if not self.find_wechat():
log.error("WeChat not found")
return []
log.info(f"PID {self.pid}, heap 0x{self.heap[0]:x}")
if once:
msgs = self.scan()
for m in msgs:
log.info(f" [{m['wxid']}] {m['text'][:80]}")
return msgs
while True:
try:
if not os.path.exists(f'/proc/{self.pid}'):
log.warning("WeChat died")
if not self.find_wechat():
time.sleep(30)
continue
msgs = self.scan()
for m in msgs:
log.info(f"NEW [{m['wxid']}] {m['text'][:80]}")
time.sleep(3)
except KeyboardInterrupt:
break
except:
time.sleep(10)
if __name__ == "__main__":
m = Monitor()
if '--once' in sys.argv:
msgs = m.run(once=True)
print(f"\nFound {len(msgs)} messages")
for msg in msgs:
print(f" [{msg['wxid']}] {msg['text']}")
else:
m.run()
+185
View File
@@ -0,0 +1,185 @@
#!/usr/bin/env python3
"""MoWeChat v4 — efficient heap scanner for WeChat messages."""
import os, re, sys, json, time, hashlib, logging
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_v4.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v4.json")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
log = logging.getLogger("wc4")
# Known wxids
WXID_DAD = "wxid_c0a6izmwd78y22"
WXID_MOHE = "wxid_7onnerpx2s2l22"
KNOWN = {WXID_DAD, WXID_MOHE}
def heap_of(pid):
"""Get heap address range for a PID."""
with open(f'/proc/{pid}/maps') as f:
for line in f:
if '[heap]' in line:
parts = line.split()
a, b = parts[0].split('-')
return int(a, 16), int(b, 16)
return None, None
def read_heap(pid):
"""Read the heap memory."""
start, end = heap_of(pid)
if not start:
return None, None, None
with open(f'/proc/{pid}/mem', 'rb') as f:
f.seek(start)
data = f.read(min(end - start, 40 * 1024 * 1024))
return data, start, end
def find_messages(data, heap_start):
"""Extract messages from heap data efficiently."""
msgs = []
seen = set()
# Pattern: known wxid followed by content
for wxid in KNOWN:
pattern = wxid.encode() + b'\x00'
pos = 0
limit = 0
while limit < 100:
idx = data.find(pattern, pos)
if idx < 0:
break
limit += 1
after = idx + len(pattern)
snippet = data[after:after+300]
i = 0
while i < len(snippet):
if snippet[i] == 0:
i += 1
continue
s = i
while i < len(snippet) and snippet[i] != 0:
i += 1
if i - s < 3:
i += 1
continue
try:
text = snippet[s:i].decode('utf-8', errors='replace')
except:
i += 1
continue
# Score: CJK chars + ASCII letters
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
alpha = sum(1 for c in text if c.isascii() and c.isalpha())
score = cjk * 3 + alpha
if score < 5:
i += 1
continue
h = hashlib.md5(text.encode()).hexdigest()
if h not in seen:
seen.add(h)
msgs.append((text, wxid, heap_start + after + s))
break
pos = idx + 1
# Also scan for the \xNNcontent pattern directly (faster catch-all)
# Pattern: [1 byte length/bufsize][readable text]
i = 0
limit2 = 0
while i < len(data) - 10 and limit2 < 500:
b = data[i]
# Possible prefix: small values (0x04-0x40 = 4-64, common buffer sizes)
if 0x04 <= b <= 0x40:
# Check if what follows looks like text
j = i + 1
text_bytes = bytearray()
while j < len(data) and j - i - 1 < b and data[j] != 0:
if data[j] >= 0x20:
text_bytes.append(data[j])
j += 1
else:
break
if len(text_bytes) >= 5:
try:
text = text_bytes.decode('utf-8', errors='replace')
except:
i += 1
continue
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
alpha = sum(1 for c in text if c.isascii() and c.isalpha())
if (cjk >= 2 or alpha >= 6) and text[0] not in '&/\\.':
h = hashlib.md5(('any:' + text).encode()).hexdigest()
if h not in seen:
# Find which wxid is near this text (search backward)
nearby = data[max(0, i-200):i]
wxid_match = re.search(rb'wxid_[a-zA-Z0-9]{10,28}', nearby)
owner = wxid_match.group(0).decode() if wxid_match else 'unknown'
seen.add(h)
msgs.append((text, owner, heap_start + i))
limit2 += 1
i += 1
return msgs
def find_wechat():
"""Find the main wechat process PID."""
for p in os.listdir('/proc'):
if not p.isdigit():
continue
try:
with open(f'/proc/{p}/maps') as f:
if "/opt/wechat/wechat" in f.read():
return int(p)
except:
continue
return None
def main():
pid = find_wechat()
if not pid:
log.error("WeChat not found")
return 1
log.info(f"Found PID {pid}")
data, hs, he = read_heap(pid)
if data is None:
log.error("Cannot read heap")
return 1
log.info(f"Heap: 0x{hs:x}-0x{he:x} ({len(data)} bytes)")
msgs = find_messages(data, hs)
# Deduplicate
seen_texts = set()
for text, wxid, addr in msgs:
key = text.strip()
if key not in seen_texts and len(key) >= 2:
seen_texts.add(key)
print(f"{wxid}: {text}")
if not msgs:
print("No messages found")
return 0
if __name__ == "__main__":
sys.exit(main())