#!/usr/bin/env python3 """MoWeChat v4 — efficient heap scanner for WeChat messages.""" import os, re, sys, json, time, hashlib, logging SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) LOG_DIR = os.path.join(SCRIPT_DIR, "..", "logs") os.makedirs(LOG_DIR, exist_ok=True) LOG_FILE = os.path.join(LOG_DIR, "wechat_v4.log") SAWN_FILE = os.path.join(LOG_DIR, "wechat_seen_v4.json") logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s", handlers=[logging.FileHandler(LOG_FILE), logging.StreamHandler()]) log = logging.getLogger("wc4") # Known wxids WXID_DAD = "wxid_c0a6izmwd78y22" WXID_MOHE = "wxid_7onnerpx2s2l22" KNOWN = {WXID_DAD, WXID_MOHE} def heap_of(pid): """Get heap address range for a PID.""" with open(f'/proc/{pid}/maps') as f: for line in f: if '[heap]' in line: parts = line.split() a, b = parts[0].split('-') return int(a, 16), int(b, 16) return None, None def read_heap(pid): """Read the heap memory.""" start, end = heap_of(pid) if not start: return None, None, None with open(f'/proc/{pid}/mem', 'rb') as f: f.seek(start) data = f.read(min(end - start, 40 * 1024 * 1024)) return data, start, end def find_messages(data, heap_start): """Extract messages from heap data efficiently.""" msgs = [] seen = set() # Pattern: known wxid followed by content for wxid in KNOWN: pattern = wxid.encode() + b'\x00' pos = 0 limit = 0 while limit < 100: idx = data.find(pattern, pos) if idx < 0: break limit += 1 after = idx + len(pattern) snippet = data[after:after+300] i = 0 while i < len(snippet): if snippet[i] == 0: i += 1 continue s = i while i < len(snippet) and snippet[i] != 0: i += 1 if i - s < 3: i += 1 continue try: text = snippet[s:i].decode('utf-8', errors='replace') except: i += 1 continue # Score: CJK chars + ASCII letters cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff') alpha = sum(1 for c in text if c.isascii() and c.isalpha()) score = cjk * 3 + alpha if score < 5: i += 1 continue h = hashlib.md5(text.encode()).hexdigest() if h not in seen: seen.add(h) msgs.append((text, wxid, heap_start + after + s)) break pos = idx + 1 # Also scan for the \xNNcontent pattern directly (faster catch-all) # Pattern: [1 byte length/bufsize][readable text] i = 0 limit2 = 0 while i < len(data) - 10 and limit2 < 500: b = data[i] # Possible prefix: small values (0x04-0x40 = 4-64, common buffer sizes) if 0x04 <= b <= 0x40: # Check if what follows looks like text j = i + 1 text_bytes = bytearray() while j < len(data) and j - i - 1 < b and data[j] != 0: if data[j] >= 0x20: text_bytes.append(data[j]) j += 1 else: break if len(text_bytes) >= 5: try: text = text_bytes.decode('utf-8', errors='replace') except: i += 1 continue cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff') alpha = sum(1 for c in text if c.isascii() and c.isalpha()) if (cjk >= 2 or alpha >= 6) and text[0] not in '&/\\.': h = hashlib.md5(('any:' + text).encode()).hexdigest() if h not in seen: # Find which wxid is near this text (search backward) nearby = data[max(0, i-200):i] wxid_match = re.search(rb'wxid_[a-zA-Z0-9]{10,28}', nearby) owner = wxid_match.group(0).decode() if wxid_match else 'unknown' seen.add(h) msgs.append((text, owner, heap_start + i)) limit2 += 1 i += 1 return msgs def find_wechat(): """Find the main wechat process PID.""" for p in os.listdir('/proc'): if not p.isdigit(): continue try: with open(f'/proc/{p}/maps') as f: if "/opt/wechat/wechat" in f.read(): return int(p) except: continue return None def main(): pid = find_wechat() if not pid: log.error("WeChat not found") return 1 log.info(f"Found PID {pid}") data, hs, he = read_heap(pid) if data is None: log.error("Cannot read heap") return 1 log.info(f"Heap: 0x{hs:x}-0x{he:x} ({len(data)} bytes)") msgs = find_messages(data, hs) # Deduplicate seen_texts = set() for text, wxid, addr in msgs: key = text.strip() if key not in seen_texts and len(key) >= 2: seen_texts.add(key) print(f"{wxid}: {text}") if not msgs: print("No messages found") return 0 if __name__ == "__main__": sys.exit(main())