refactor(xxm): consolidate 4 bot implementations into unified xmpp_agent_core.py

- Merge bot_base.py, gateway/scripts/xmpp_bot.py, bots/*, xmpp_bot_rest.py
  into single xmpp_agent_core.py with --agent flag (xxm|mohe|zhiwei|xiaoguo)
- Add xxm_bot.py wrapper (encoding=utf-8 for Windows exec)
- Fix slixmpp connect() API: use host=/port= keyword args (was tuple)
- Clean up orphans: bots/, scripts/, hermes_state.py, xmpp_bot.py, xmpp_bot_rest.py
- Add docs/CLEANUP_PLAN.md documenting the migration
- Update README.md project structure
- Also: fix WeChat agent path resolution (relative paths)
This commit is contained in:
hmo
2026-06-21 16:13:57 +08:00
parent b9df510f31
commit babbc46801
22 changed files with 1273 additions and 5442 deletions
+34 -20
View File
@@ -66,9 +66,9 @@ def get_db_handle():
dbs = r.get("data") or []
for db in dbs:
dbname = db.get("databaseName", "")
if "MSG" in dbname or "Msg" in dbname:
db_handle_cache = db.get("handle")
return db_handle_cache
if dbname.startswith("MSG") and "Media" not in dbname:
db_handle_cache = db.get("handle")
return db_handle_cache
return None
@@ -94,24 +94,42 @@ def query_history(wxid, limit=10):
return None
limit_val = min(int(limit), 200)
sql = (
f"SELECT CreateTime, IsSender, Type, SubType, StrContent, DisplayContent "
f"FROM MSG WHERE StrTalker='{wxid}' AND Type IN (1,49) "
f"SELECT CreateTime, IsSender, Type, SubType, StrContent, DisplayContent, CompressContent, BytesExtra "
f"FROM MSG WHERE StrTalker='{wxid}' AND Type IN ('1','49') "
f"ORDER BY CreateTime DESC LIMIT {limit_val}"
)
r = wxpost("/api/execSql", {"dbHandle": h, "sql": sql}, timeout=15)
data = r.get("data") or []
if not data or len(data) < 2:
return None
# Skip header row, reverse to chronological order
rows = data[1:]
# wxhelper returns [{value: [cols]}, {value: [row1]}, ...]
rows = [item.get("value", item) if isinstance(item, dict) else item for item in data]
rows = rows[1:] # skip header
rows.reverse()
results = []
for row in rows:
content = (row[4] or "").strip() if len(row) > 4 else ""
if not content and len(row) > 5:
content = (row[5] or "").strip()
# Type 49 (article link): extract URL from CompressContent or BytesExtra
if not content and str(row[2]) == "49":
try:
import re
# Try BytesExtra first (row[7])
for idx in [7, 6]:
if idx < len(row) and row[idx]:
text = str(row[idx])
urls = re.findall(r'https?://[^\s\x00-\x1f<>\"\']{10,}', text)
if urls:
content = urls[0]
break
except:
pass
if not content:
continue
if str(row[2]) == "49":
content = "[文章链接]"
else:
continue
results.append({
"CreateTime": row[0],
"IsSender": row[1],
@@ -181,29 +199,25 @@ def get_recent_chats(limit=20):
if not h:
return []
sql = (
f"SELECT StrTalker, MAX(CreateTime) as last_time, COUNT(*) as msg_count "
f"FROM MSG WHERE Type IN (1,49) "
f"GROUP BY StrTalker ORDER BY last_time DESC LIMIT {min(limit, 50)}"
f"SELECT DISTINCT StrTalker FROM MSG WHERE Type IN ('1','49') "
f"LIMIT {min(limit, 50)}"
)
r = wxpost("/api/execSql", {"dbHandle": h, "sql": sql}, timeout=15)
data = r.get("data") or []
if not data or len(data) < 2:
return []
rows = [item.get("value", item) if isinstance(item, dict) else item for item in data]
results = []
for row in data[1:]:
for row in rows[1:]:
wxid = (row[0] or "").strip()
if not wxid or wxid in ("fmessage", "weixin", "wechat", "filehelper"):
if not wxid or wxid in ("fmessage", "weixin", "wechat", "filehelper", "medianote", "floatbottle", "qmessage"):
continue
if wxid.startswith("gh_"):
continue
ts = int(row[1]) if row[1] else 0
count = int(row[2]) if len(row) > 2 and row[2] else 0
results.append({
"wxid": wxid,
"nickname": get_nickname(wxid),
"last_message_time": datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") if ts else None,
"last_message_ts": ts,
"message_count": count,
"last_message_time": None,
"last_message_ts": 0,
"message_count": 0,
})
return results
+78 -3
View File
@@ -21,7 +21,8 @@ if not _lock.ok:
BOT_WXID = "wxid_5bhmquvkbude22"
BLOCK_WXIDS = {"fmessage", "weixin", "wechat"} # ϵͳ?˺?/΢???Ŷӣ----ظ?
WX_API = "http://127.0.0.1:19088"
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
TEMP_DIR = os.path.join(PROJECT_ROOT, "temp")
LOG_FILE = os.path.join(LOG_DIR, "wechat_agent.log")
@@ -155,8 +156,8 @@ HERMES_KEY = "hermes123"
SENSENOVA_KEY = "sk-aRNj3UwKSLPsDfh15QNTPwbHxahblfaO"
SENSENOVA_URL = "https://token.sensenova.cn/v1"
INJECTOR = r"D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway\tools\Injector_x64.exe"
WXHELPER_DLL = r"D:\F\NewI\opencode\daily-workspace\projects\wechat-hermes-gateway\tools\wxhelper_official_39581.dll"
INJECTOR = os.path.join(SCRIPT_DIR, "..", "tools", "Injector_x64.exe")
WXHELPER_DLL = os.path.join(SCRIPT_DIR, "..", "tools", "wxhelper_official_39581.dll")
def log(m):
with open(LOG_FILE, "a", encoding="utf-8") as f:
@@ -569,8 +570,24 @@ def process_msg(raw_data):
ct = d.get("content", "") or d.get("msg", "") or d.get("text", "")
msg_type = d.get("type", 1)
is_self = d.get("isSelf", 0) or d.get("self", 0)
# DEBUG: capture Type 49 full XML for URL analysis
if msg_type == 49:
try:
with open(LOG_DIR + "/t49_xml.txt", "a", encoding="utf-8") as _f:
_f.write(f"\n=== {time.time()} type=49 from={fu} ===\n{ct[:10000]}\n")
except: pass
if "@chatroom" in fu:
log(f"GROUP RAW DUMP: keys={list(d.keys())} ct_len={len(ct)} ct[:100]={ct[:100]}")
# DEBUG: capture full raw data for quote analysis
try:
with open(LOG_DIR + "/group_raw.jsonl", "a", encoding="utf-8") as _f:
_f.write(json.dumps({k: str(v)[:2000] for k, v in d.items()}, ensure_ascii=False) + "\n")
except: pass
# DEBUG: capture all raw msgs for field analysis
try:
with open(LOG_DIR + "/all_raw.jsonl", "a", encoding="utf-8") as _f:
_f.write(json.dumps({k: str(v)[:500] for k, v in d.items()}, ensure_ascii=False) + "\n")
except: pass
if not fu or not ct or fu == BOT_WXID or fu in BLOCK_WXIDS or fu.startswith("gh_") or is_self:
log(f"SKIP: fu={fu} self={is_self}")
return
@@ -608,6 +625,64 @@ def process_msg(raw_data):
else:
log(f"-> {fu}: skip (blank image response)")
return
# Type 49 (forwarded article) - extract URL and process via article_processor
if msg_type == 49 and ct.strip().startswith("<?xml"):
try:
import re
# Try <url> first, then <shareUrlOriginal>, then <shareUrlOpen>
urls = re.findall(r'<url>(https?://mp\.weixin\.qq\.com[^<]+)</url>', ct)
if not urls:
urls = re.findall(r'<shareUrlOriginal>(https?://mp\.weixin\.qq\.com[^<]+)</shareUrlOriginal>', ct)
if not urls:
urls = re.findall(r'<shareUrlOpen>(https?://mp\.weixin\.qq\.com[^<]+)</shareUrlOpen>', ct)
url = urls[0] if urls else None
# Extract title from XML
titles = re.findall(r'<title>(.*?)</title>', ct)
title = titles[0] if titles else ""
# Extract description
descs = re.findall(r'<des>(.*?)</des>', ct)
desc = descs[0] if descs else ""
if url:
log(f"ARTICLE URL: {url}")
# Call article_processor on localhost
import urllib.request as ur
req = ur.Request("http://127.0.0.1:5810/process",
data=json.dumps({"url": url}).encode("utf-8"),
headers={"Content-Type": "application/json"})
with ur.urlopen(req, timeout=180) as resp:
result = json.loads(resp.read().decode("utf-8"))
if result.get("status") == "ok":
content = result.get("content", "")[:3000]
title = result.get("title", "")
images = result.get("images_ocr", 0)
enriched = f"[老莫转发了一篇文章{(chr(10)+'标题: '+title) if title else ''}{images}张图片已OCR]\n\n{content}"
log(f"ARTICLE processed: {len(content)} chars")
reply = call_hermes(fu, enriched)
if reply and reply.strip():
log(f"-> {fu}: {reply[:50]}")
send_wx(fu, reply.strip())
return
else:
log(f"ARTICLE process failed: {result.get('error','')[:100]}")
# Fallback: send title + description
fallback = f"[老莫转发了一篇文章]{(chr(10)+'标题: '+title) if title else ''}{(chr(10)+'摘要: '+desc[:200]) if desc else ''}\n(全文抓取失败: {result.get('error','')[:60]})"
reply = call_hermes(fu, fallback)
if reply and reply.strip():
send_wx(fu, reply.strip())
return
else:
# No URL found, send title + description
if title:
log(f"ARTICLE: no URL, sending title+desc")
fallback = f"[老莫转发了一篇文章]{(chr(10)+'标题: '+title) if title else ''}{(chr(10)+'摘要: '+desc[:200]) if desc else ''}"
reply = call_hermes(fu, fallback)
if reply and reply.strip():
send_wx(fu, reply.strip())
return
except Exception as e:
log(f"ARTICLE handler error: {e}")
# Fall through to text handler
# Text - prepend sender wxid+name so Hermes knows who's talking
sender_name = get_nickname(fu)
chat_type = "Group" if "@chatroom" in fu else "Private"
+18 -19
View File
@@ -26,8 +26,8 @@ if not _lock.ok:
# ── Config ──
JID = "xxm@yoin.fun"
PASSWORD = "hermes123"
SERVER = "xmpp.yoin.fun"
PORT = 3021
SERVER = "192.168.1.246"
PORT = 5222
ATTACH_SESSION = "ses_xxm_xmpp"
MUC_ROOMS = [
"coregroup@conference.yoin.fun", # core group chat
@@ -696,23 +696,22 @@ if __name__ == "__main__":
bot_nick = JID.split("@")[0]
async def _join_silent():
for room_jid in MUC_ROOMS:
for attempt in range(3):
try:
# Use join_muc_wait to ensure room join completes
await self.plugin['xep_0045'].join_muc_wait(room_jid, bot_nick, timeout=60)
log(f"Joined {room_jid} (silent)")
break
except asyncio.TimeoutError:
log(f"MUC join timeout ({attempt+1}/3) for {room_jid}")
if attempt == 2:
log(f"MUC setup failed for {room_jid} after 3 attempts")
await asyncio.sleep(5)
else:
await asyncio.sleep(3)
except Exception as e:
log(f"MUC setup failed for {room_jid}: {e} (type={type(e).__name__})")
await asyncio.sleep(5)
break
nick = bot_nick
try:
# Use join_muc (non-waiting) to register plugin state
self.plugin['xep_0045'].join_muc(room_jid, nick)
# Also send raw presence as backup
presence = (
f"<presence to='{room_jid}/{nick}'>"
f"<x xmlns='http://jabber.org/protocol/muc'>"
f"<history maxstanzas='0'/>"
f"</x></presence>"
)
self.send_raw(presence)
log(f"Joined {room_jid} (async)")
except Exception as e:
log(f"MUC join failed for {room_jid}: {type(e).__name__}: {e}")
await asyncio.sleep(2)
# After joining, query MAM for recent history
await asyncio.sleep(3) # wait for MUC join to propagate
await _fetch_mam_history()