Files
AgentsMeeting/gateway/linux/hooks/memory_monitor_v2.py
T
zhiwei f1630ebb03 feat: WeChat Linux bot via docker-wechatbot-webhook
- Docker container with auto-restart
- systemd webhook receiver on :5804
- Full send/receive loop: WeChat ↔ Docker ↔ Hermes
- Fixed login token for persistence
- Firewall rules for container-host communication
2026-06-24 01:59:44 +08:00

344 lines
12 KiB
Python

#!/usr/bin/env python3
"""
MoWeChat Message Monitor v2 — reads WeChat process memory to capture incoming messages.
Improvements over v1:
- Better message extraction: looks for content near known wxids with length-prefix structure
- Content filtering: rejects binary garbage, only keeps real text
- Tracks messages by content + wxid hash
"""
import os
import re
import sys
import json
import time
import hashlib
import logging
import argparse
import urllib.request
import urllib.error
# ── Configuration ──────────────────────────────────────────────
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs")
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, "wechat_msg_monitor.log")
SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_messages.json")
HERMES_API = "http://192.168.1.246:8642/v1/chat/completions"
HERMES_KEY = "hermes123"
POLL_INTERVAL = 3
WECHAT_BINARY_MARKER = "/opt/wechat/wechat"
# Known message sender wxids (populated during scanning)
OWN_WXID = "wxid_c0a6izmwd78y22" # 老爸 (莫语不语)
BOT_WXID = "wxid_7onnerpx2s2l22" # 莫荷自己
INTERESTING_WXIDS = {OWN_WXID, BOT_WXID}
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
log = logging.getLogger("mowechat")
def is_valid_text(s, min_ratio=0.5):
"""Check if a string looks like real text (vs binary garbage)."""
if len(s) < 2:
return False
# Count printable chars
printable = 0
for ch in s:
if ch.isprintable() and (ch.isalpha() or ch.isspace() or ch.isdigit() or ch in '.,!?;:\'\"-()[]{}@#_/\\'):
printable += 1
return printable / max(len(s), 1) >= min_ratio
def extract_strings(data, min_len=4):
"""Extract readable strings from binary data."""
result = []
current = b''
for b in data:
if 32 <= b < 127 or b in (0x0a, 0x0d, 0x09):
current += bytes([b])
elif b >= 0x80: # Part of multi-byte UTF-8
current += bytes([b])
else:
if len(current) >= min_len:
try:
decoded = current.decode('utf-8', errors='replace')
if is_valid_text(decoded):
result.append(decoded)
except:
pass
current = b''
if len(current) >= min_len:
try:
decoded = current.decode('utf-8', errors='replace')
if is_valid_text(decoded):
result.append(decoded)
except:
pass
return result
class WeChatMemoryMonitor:
"""Monitors WeChat process memory for new messages."""
def __init__(self):
self.pid = None
self.seen = self._load_seen()
self.heap_region = None
self.wxid_pattern = re.compile(rb'wxid_[a-zA-Z0-9]{10,28}\x00')
# Known message sources
self.known_wxids = set(INTERESTING_WXIDS)
def _load_seen(self):
try:
with open(SAWN_FILE, 'r') as f:
return set(json.load(f))
except:
return set()
def _save_seen(self):
trimmed = set(list(self.seen)[-2000:])
try:
with open(SAWN_FILE, 'w') as f:
json.dump(list(trimmed), f)
except:
pass
def find_wechat(self):
"""Find the main wechat process PID and heap region."""
for p in os.listdir('/proc'):
if not p.isdigit():
continue
try:
with open(f'/proc/{p}/maps', 'r') as f:
content = f.read(8192)
if WECHAT_BINARY_MARKER in content:
self.pid = int(p)
# Find heap
for line in content.split('\n'):
if '[heap]' in line:
parts = line.split()
addr_range = parts[0].split('-')
self.heap_region = (int(addr_range[0], 16), int(addr_range[1], 16))
return True
except:
continue
return False
def scan_message(self, mem, wxid_bytes, wxid_pos, wxid_end):
"""Try to extract a real message following a wxid in memory."""
wxid_str = wxid_bytes.decode('utf-8', errors='replace').strip('\x00')
# Search within 512 bytes after the wxid for message content
search_start = wxid_end
search_end = min(search_start + 512, self.heap_region[1] - self.heap_region[0] if self.heap_region else search_start + 512)
try:
mem.seek(search_start)
data = mem.read(search_end - search_start)
except:
return None
# Look for null-terminated strings that look like messages
messages = []
current = b''
for b in data:
if b == 0:
if len(current) >= 3:
try:
text = current.decode('utf-8', errors='replace')
# Filter: must have real text content
if is_valid_text(text) and len(text) >= 2 and not text.startswith('wxid_'):
messages.append(text)
except:
pass
current = b''
else:
current += bytes([b])
# Also try to find message by looking for it at a known offset pattern
# In WeChat's structure: [msg_type(4)] [svr_id(8)] [content_ptr(8)] [content_len(4)] [content...]
if not messages:
return None
# Pick the best candidate (longest, most printable)
best = max(messages, key=lambda m: (len(m), sum(1 for c in m if c.isalpha() or c.isdigit())))
return best
def scan_heap(self):
"""Scan the heap region for messages."""
if not self.heap_region:
return []
start, end = self.heap_region
messages = []
try:
with open(f'/proc/{self.pid}/mem', 'rb') as mem:
# Read entire heap
mem.seek(start)
size = min(end - start, 50 * 1024 * 1024) # Max 50MB
data = mem.read(size)
# Find all wxid occurrences
for match in self.wxid_pattern.finditer(data):
wxid = match.group(0).decode('utf-8', errors='replace').strip('\x00')
pos = match.start()
global_pos = start + pos
# Look for message content in the next 256 bytes
content_area = data[pos + len(match.group()):pos + len(match.group()) + 256]
# Try to find a null-terminated UTF-8 string that looks like a message
for cmatch in re.finditer(rb'([\x20-\x7e\x80-\xff\x00]{4,})', content_area):
raw = cmatch.group(0)
# Remove trailing nulls
raw = raw.rstrip(b'\x00')
if len(raw) < 3:
continue
try:
text = raw.decode('utf-8', errors='replace')
except:
continue
# FILTER: Must have substantial real text content
alpha_count = sum(1 for c in text if c.isalpha() or '\u4e00' <= c <= '\u9fff')
total_len = len(text)
if total_len < 3:
continue
# Skip if it's just another wxid
if text.startswith('wxid_'):
continue
# Skip binary garbage (must be >= 40% alphabetic/CJK chars)
if alpha_count / max(total_len, 1) < 0.3:
continue
# Skip if it looks like a URL/path with no message content
if text.startswith('http') or text.startswith('/'):
continue
# Create hash for dedup
msg_hash = hashlib.md5(f"{wxid}:{text}".encode()).hexdigest()
if msg_hash not in self.seen:
self.seen.add(msg_hash)
messages.append({
'wxid': wxid,
'content': text,
'pos': hex(global_pos),
'alpha_ratio': f"{alpha_count/total_len:.2f}",
})
break # One best message per wxid occurrence
except (PermissionError, ProcessLookupError) as e:
log.warning(f"Memory read failed: {e}")
except Exception as e:
log.error(f"Heap scan error: {e}")
return messages
def forward_to_hermes(self, msg):
"""Forward to Hermes Gateway."""
payload = json.dumps({
"model": "nova-4",
"messages": [
{"role": "system", "content": "You receive WeChat messages. Process according to standard pipeline."},
{"role": "user", "content": f"[WeChat] From: {msg['wxid']}\n{msg['content']}"}
]
}).encode('utf-8')
try:
req = urllib.request.Request(
HERMES_API, data=payload,
headers={"Content-Type": "application/json", "Authorization": f"Bearer {HERMES_KEY}"},
method="POST"
)
urllib.request.urlopen(req, timeout=3)
log.info(f"Forwarded: {msg['wxid']}: {msg['content'][:60]}")
except Exception as e:
log.warning(f"Forward failed: {e}")
def run(self, once=False):
if not self.find_wechat():
log.error("WeChat not found!")
return []
log.info(f"WeChat PID: {self.pid}, heap: 0x{self.heap_region[0]:x}-0x{self.heap_region[1]:x}" if self.heap_region else f"PID: {self.pid}, no heap")
if once:
messages = self.scan_heap()
seen_wxids = set()
for msg in messages:
if msg['wxid'] not in seen_wxids:
log.info(f" [{msg['wxid']}] {msg['content'][:80]}")
seen_wxids.add(msg['wxid'])
if messages:
self._save_seen()
return messages
log.info(f"Monitoring every {POLL_INTERVAL}s...")
while True:
try:
if not os.path.exists(f'/proc/{self.pid}'):
log.warning("WeChat died, re-finding...")
if not self.find_wechat():
time.sleep(30)
continue
messages = self.scan_heap()
for msg in messages:
log.info(f"NEW: [{msg['wxid']}] {msg['content'][:80]}")
# self.forward_to_hermes(msg)
if messages:
self._save_seen()
time.sleep(POLL_INTERVAL)
except KeyboardInterrupt:
break
except Exception as e:
log.error(f"Error: {e}")
time.sleep(10)
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--once", action="store_true")
parser.add_argument("--foreground", action="store_true")
args = parser.parse_args()
monitor = WeChatMemoryMonitor()
if args.once:
msgs = monitor.run(once=True)
# Only show real messages (from known contacts or with good content)
real_msgs = [m for m in msgs if m['wxid'] in INTERESTING_WXIDS or float(m.get('alpha_ratio', 0)) > 0.5]
print(f"\nFound {len(msgs)} potential messages, {len(real_msgs)} from known contacts")
for m in real_msgs:
print(f" [{m['wxid']}] {m['content'][:100]}")
print(f"\n(Total seen: {len(monitor.seen)})")
return
monitor.run()
if __name__ == "__main__":
main()