From c6795bcb468b6f1c660cd04dba2d4f346898a800 Mon Sep 17 00:00:00 2001
From: hmo <staymo7777@gmail.com>
Date: Thu, 25 Jun 2026 01:20:34 +0800
Subject: [PATCH] fix: decode HTML entities in wechat article URL from XML

WeChat XML uses &amp; to encode & in forwarded article URLs. Without html.unescape(), chksm and other query params were passed encoded to WeChat servers, causing signature mismatch and captcha block.

Ultraworked with Sisyphus

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
---
 gateway/scripts/wechat_agent.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/gateway/scripts/wechat_agent.py b/gateway/scripts/wechat_agent.py
index 5935a36..e56f3b1 100644
--- a/gateway/scripts/wechat_agent.py
+++ b/gateway/scripts/wechat_agent.py
@@ -636,12 +636,16 @@ def process_msg(raw_data):
                 if not urls:
                     urls = re.findall(r'<shareUrlOpen>(https?://mp\.weixin\.qq\.com[^<]+)</shareUrlOpen>', ct)
                 url = urls[0] if urls else None
+                # Decode HTML entities (&amp; → &) — WeChat XML uses &amp; in URLs
+                if url:
+                    import html as _html
+                    url = _html.unescape(url)
                 # Extract title from XML
                 titles = re.findall(r'<title>(.*?)</title>', ct)
-                title = titles[0] if titles else ""
+                title = _html.unescape(titles[0]) if titles else ""
                 # Extract description 
                 descs = re.findall(r'<des>(.*?)</des>', ct)
-                desc = descs[0] if descs else ""
+                desc = _html.unescape(descs[0]) if descs else ""
                 
                 if url:
                     log(f"ARTICLE URL: {url}")