fix: decode HTML entities in wechat article URL from XML

WeChat XML uses & to encode & in forwarded article URLs. Without html.unescape(), chksm and other query params were passed encoded to WeChat servers, causing signature mismatch and captcha block.

Ultraworked with Sisyphus

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
hmo
2026-06-25 01:20:34 +08:00
parent ef93f066e3
commit c6795bcb46
+6 -2
View File
@@ -636,12 +636,16 @@ def process_msg(raw_data):
if not urls: if not urls:
urls = re.findall(r'<shareUrlOpen>(https?://mp\.weixin\.qq\.com[^<]+)</shareUrlOpen>', ct) urls = re.findall(r'<shareUrlOpen>(https?://mp\.weixin\.qq\.com[^<]+)</shareUrlOpen>', ct)
url = urls[0] if urls else None url = urls[0] if urls else None
# Decode HTML entities (&amp; → &) — WeChat XML uses &amp; in URLs
if url:
import html as _html
url = _html.unescape(url)
# Extract title from XML # Extract title from XML
titles = re.findall(r'<title>(.*?)</title>', ct) titles = re.findall(r'<title>(.*?)</title>', ct)
title = titles[0] if titles else "" title = _html.unescape(titles[0]) if titles else ""
# Extract description # Extract description
descs = re.findall(r'<des>(.*?)</des>', ct) descs = re.findall(r'<des>(.*?)</des>', ct)
desc = descs[0] if descs else "" desc = _html.unescape(descs[0]) if descs else ""
if url: if url:
log(f"ARTICLE URL: {url}") log(f"ARTICLE URL: {url}")