fix: decode HTML entities in wechat article URL from XML
WeChat XML uses & to encode & in forwarded article URLs. Without html.unescape(), chksm and other query params were passed encoded to WeChat servers, causing signature mismatch and captcha block. Ultraworked with Sisyphus Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -636,12 +636,16 @@ def process_msg(raw_data):
|
||||
if not urls:
|
||||
urls = re.findall(r'<shareUrlOpen>(https?://mp\.weixin\.qq\.com[^<]+)</shareUrlOpen>', ct)
|
||||
url = urls[0] if urls else None
|
||||
# Decode HTML entities (& → &) — WeChat XML uses & in URLs
|
||||
if url:
|
||||
import html as _html
|
||||
url = _html.unescape(url)
|
||||
# Extract title from XML
|
||||
titles = re.findall(r'<title>(.*?)</title>', ct)
|
||||
title = titles[0] if titles else ""
|
||||
title = _html.unescape(titles[0]) if titles else ""
|
||||
# Extract description
|
||||
descs = re.findall(r'<des>(.*?)</des>', ct)
|
||||
desc = descs[0] if descs else ""
|
||||
desc = _html.unescape(descs[0]) if descs else ""
|
||||
|
||||
if url:
|
||||
log(f"ARTICLE URL: {url}")
|
||||
|
||||
Reference in New Issue
Block a user