From c6795bcb468b6f1c660cd04dba2d4f346898a800 Mon Sep 17 00:00:00 2001 From: hmo Date: Thu, 25 Jun 2026 01:20:34 +0800 Subject: [PATCH] fix: decode HTML entities in wechat article URL from XML WeChat XML uses & to encode & in forwarded article URLs. Without html.unescape(), chksm and other query params were passed encoded to WeChat servers, causing signature mismatch and captcha block. Ultraworked with Sisyphus Co-authored-by: Sisyphus --- gateway/scripts/wechat_agent.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gateway/scripts/wechat_agent.py b/gateway/scripts/wechat_agent.py index 5935a36..e56f3b1 100644 --- a/gateway/scripts/wechat_agent.py +++ b/gateway/scripts/wechat_agent.py @@ -636,12 +636,16 @@ def process_msg(raw_data): if not urls: urls = re.findall(r'(https?://mp\.weixin\.qq\.com[^<]+)', ct) url = urls[0] if urls else None + # Decode HTML entities (& → &) — WeChat XML uses & in URLs + if url: + import html as _html + url = _html.unescape(url) # Extract title from XML titles = re.findall(r'(.*?)', ct) - title = titles[0] if titles else "" + title = _html.unescape(titles[0]) if titles else "" # Extract description descs = re.findall(r'(.*?)', ct) - desc = descs[0] if descs else "" + desc = _html.unescape(descs[0]) if descs else "" if url: log(f"ARTICLE URL: {url}")