Files
AgentsMeeting/gateway/linux/hooks/memory_monitor_v3.py
T
zhiwei f1630ebb03 feat: WeChat Linux bot via docker-wechatbot-webhook
- Docker container with auto-restart
- systemd webhook receiver on :5804
- Full send/receive loop: WeChat ↔ Docker ↔ Hermes
- Fixed login token for persistence
- Firewall rules for container-host communication
2026-06-24 01:59:44 +08:00

207 lines
7.2 KiB
Python

#!/usr/bin/env python3
"""
MoWeChat Message Monitor v3 — precise message extraction from WeChat heap.
Strategy: look for message content near known wxids with structural patterns.
"""
import os, re, sys, json, time, hashlib, struct, logging, argparse
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_v3.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v3.json")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s",
handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()])
log = logging.getLogger("mv3")
# Known wxids (discovered from memory)
OWN_WXID = "wxid_c0a6izmwd78y22" # 莫语不语 (老爸)
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷
INTERESTING = {OWN_WXID, BOT_WXID}
class Monitor:
def __init__(self):
self.pid = None
self.heap = (0, 0)
self.seen = self._load_seen()
self.wxid_re = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
def _load_seen(self):
try:
with open(SAWN_FILE) as f:
return set(json.load(f))
except:
return set()
def _save_seen(self):
s = set(list(self.seen)[-2000:])
try:
with open(SAWN_FILE, 'w') as f:
json.dump(list(s), f)
except:
pass
def find_wechat(self):
for p in os.listdir('/proc'):
if not p.isdigit(): continue
try:
with open(f'/proc/{p}/maps') as f:
c = f.read()
if "/opt/wechat/wechat" in c:
self.pid = int(p)
for line in c.split('\n'):
if '[heap]' in line:
a = line.split()[0].split('-')
self.heap = (int(a[0], 16), int(a[1], 16))
return True
except:
continue
return False
def is_valid_msg(self, text):
"""Check if text is a real WeChat message."""
if len(text) < 2 or len(text) > 5000:
return False
if text.startswith('wxid_') or text.startswith('http') or text.startswith('/'):
return False
# Count CJK + ASCII letters + digits
good = sum(1 for c in text if c.isalpha() or c.isdigit() or c.isspace() or c in '.,!?;:\'"-()[]{}@#_/\\')
if good / max(len(text), 1) < 0.6:
return False
# Must have at least 3 CJK chars OR 5 ASCII chars
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
ascii_alpha = sum(1 for c in text if c.isascii() and c.isalpha())
return cjk >= 2 or ascii_alpha >= 4
def scan(self):
start, end = self.heap
if not start:
return []
try:
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
mem.seek(start)
data = mem.read(min(end - start, 60 * 1024 * 1024))
except:
return []
results = []
# Strategy: find wxid -> look for a nearby null-terminated UTF-8 string
# that looks like real message content
# First pass: find all wxid positions in a bounded range
for m in self.wxid_re.finditer(data):
wxid = m.group(0).decode().strip('\x00')
if wxid not in INTERESTING:
continue
pos = m.end() # position after wxid\0
# Scan forward up to 256 bytes for a printable string
scan_end = min(pos + 256, len(data))
chunk = data[pos:scan_end]
# Find the first null-terminated ASCII/UTF-8 string
# that's at least 3 chars and not binary garbage
i = 0
while i < len(chunk):
if chunk[i] == 0:
i += 1
continue
# Start of potential string
s_start = i
while i < len(chunk) and chunk[i] != 0 and chunk[i] >= 0x20:
i += 1
s_len = i - s_start
if s_len >= 3:
try:
text = chunk[s_start:s_start+s_len].decode('utf-8', errors='replace')
if self.is_valid_msg(text):
h = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
if h not in self.seen:
self.seen.add(h)
results.append({'wxid': wxid, 'text': text})
except:
pass
# Skip null
while i < len(chunk) and chunk[i] == 0:
i += 1
# Second strategy: scan heap for standalone CJK strings >= 4 chars
# that are NOT preceded by known binary patterns (to catch the actual
# message content which may be at a different address than the wxid)
for cm in re.finditer(rb'([\x80-\xff][\x80-\xff][\x80-\xff][\x80-\xff])', data):
pos = cm.start()
# Read up to 200 bytes from here
snippet = data[pos:pos+200]
# Find first null byte
null_pos = snippet.find(b'\x00')
if null_pos > 0:
snippet = snippet[:null_pos]
if len(snippet) < 4:
continue
try:
text = snippet.decode('utf-8', errors='replace')
except:
continue
# Only accept strings with substantial CJK content
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
if cjk < 2:
continue
if len(text) > 200:
text = text[:200]
h = hashlib.md5(f"cjk:{text}".encode()).hexdigest()
if h not in self.seen and self.is_valid_msg(text):
self.seen.add(h)
results.append({'wxid': 'unknown', 'text': text})
if results:
self._save_seen()
return results
def run(self, once=False):
if not self.find_wechat():
log.error("WeChat not found")
return []
log.info(f"PID {self.pid}, heap 0x{self.heap[0]:x}")
if once:
msgs = self.scan()
for m in msgs:
log.info(f" [{m['wxid']}] {m['text'][:80]}")
return msgs
while True:
try:
if not os.path.exists(f'/proc/{self.pid}'):
log.warning("WeChat died")
if not self.find_wechat():
time.sleep(30)
continue
msgs = self.scan()
for m in msgs:
log.info(f"NEW [{m['wxid']}] {m['text'][:80]}")
time.sleep(3)
except KeyboardInterrupt:
break
except:
time.sleep(10)
if __name__ == "__main__":
m = Monitor()
if '--once' in sys.argv:
msgs = m.run(once=True)
print(f"\nFound {len(msgs)} messages")
for msg in msgs:
print(f" [{msg['wxid']}] {msg['text']}")
else:
m.run()