fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
171 lines
5.5 KiB
Python
171 lines
5.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
Date: 2026/05/02
|
|
Desc: TapTap 游戏榜单数据接口
|
|
"""
|
|
import re
|
|
import time
|
|
from typing import Optional
|
|
|
|
import pandas as pd
|
|
import requests
|
|
|
|
|
|
# ============================================================
|
|
# 常量配置
|
|
# ============================================================
|
|
_TAPTAP_BASE_URL = "https://www.taptap.cn/webapiv2/app-top/v2/hits"
|
|
|
|
_TAPTAP_HEADERS = {
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (iPhone; CPU iPhone OS 18_5 like Mac OS X) "
|
|
"AppleWebKit/605.1.15 (KHTML, like Gecko) "
|
|
"Version/18.0 Mobile/15E148 Safari/604.1"
|
|
),
|
|
"Referer": "https://www.taptap.cn/",
|
|
"Accept": "application/json, text/plain, */*",
|
|
}
|
|
|
|
_TAPTAP_X_UA = (
|
|
"V=1&PN=WebM&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=iOS&DS=Android"
|
|
"&UID=12f0a48b-bd25-4dce-9d50-27924e83da1d&OS=iOS&OSV=18.5"
|
|
)
|
|
|
|
# 内部参数(不对外暴露)
|
|
_TAPTAP_TIMEOUT = 15.0
|
|
_TAPTAP_SLEEP = 0.4
|
|
_TAPTAP_PAGE_SIZE = 10
|
|
_TAPTAP_MAX_LOOPS = 200
|
|
|
|
# 榜单类型枚举
|
|
_TAPTAP_RANK_TYPE_MAP = {
|
|
"热玩榜": "pop",
|
|
"热门榜": "hot",
|
|
"新品榜": "new",
|
|
"预约榜": "reserve",
|
|
"热卖榜": "sell",
|
|
}
|
|
|
|
|
|
def _clean_html(text: Optional[str]) -> str:
|
|
"""清洗 HTML 标签和实体字符"""
|
|
if text is None or (isinstance(text, float) and pd.isna(text)):
|
|
return ""
|
|
text = str(text)
|
|
text = re.sub(r"<br[^>]*/?>", "\n", text)
|
|
text = re.sub(r"<[^>]+>", "", text)
|
|
replacements = {
|
|
"&": "&", "<": "<", ">": ">",
|
|
""": '"', "'": "'", """: '"', " ": " ",
|
|
}
|
|
for k, v in replacements.items():
|
|
text = text.replace(k, v)
|
|
return re.sub(r"\n{3,}", "\n\n", text).strip()
|
|
|
|
|
|
def game_hot_rank_taptap(symbol: str = "热玩榜") -> pd.DataFrame:
|
|
"""
|
|
TapTap-游戏榜单
|
|
https://www.taptap.cn/top/played
|
|
|
|
:param symbol: 榜单类型; 可选 {"热玩榜", "热门榜", "新品榜", "预约榜", "热卖榜"}
|
|
:type symbol: str
|
|
:return: 游戏榜单数据
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
if symbol not in _TAPTAP_RANK_TYPE_MAP:
|
|
raise ValueError(
|
|
f"symbol 仅支持 {list(_TAPTAP_RANK_TYPE_MAP.keys())}, 当前传入: {symbol}"
|
|
)
|
|
type_name = _TAPTAP_RANK_TYPE_MAP[symbol]
|
|
session = requests.Session()
|
|
session.headers.update(_TAPTAP_HEADERS)
|
|
all_games = []
|
|
total: Optional[int] = None
|
|
offset = 0
|
|
for _ in range(_TAPTAP_MAX_LOOPS):
|
|
params = {
|
|
"from": offset,
|
|
"limit": _TAPTAP_PAGE_SIZE,
|
|
"type_name": type_name,
|
|
"X-UA": _TAPTAP_X_UA,
|
|
}
|
|
r = session.get(_TAPTAP_BASE_URL, params=params, timeout=_TAPTAP_TIMEOUT)
|
|
r.raise_for_status()
|
|
js = r.json()
|
|
if not js.get("success"):
|
|
raise RuntimeError(f"TapTap 接口返回失败: {js}")
|
|
data = js.get("data", {}) or {}
|
|
page_list = data.get("list", []) or []
|
|
if total is None:
|
|
total = data.get("total", 0)
|
|
if not page_list:
|
|
break
|
|
all_games.extend(page_list)
|
|
if total and len(all_games) >= total:
|
|
break
|
|
offset += _TAPTAP_PAGE_SIZE
|
|
time.sleep(_TAPTAP_SLEEP)
|
|
if not all_games:
|
|
return pd.DataFrame()
|
|
df = pd.json_normalize(all_games)
|
|
df["标签"] = [
|
|
", ".join([t.get("value", "") for t in (item.get("app", {}).get("tags") or [])])
|
|
for item in all_games
|
|
]
|
|
rename_map = {
|
|
"app.id": "游戏ID",
|
|
"app.title": "游戏名称",
|
|
"app.icon.url": "图标链接",
|
|
"app.stat.rating.score": "评分",
|
|
"app.stat.hits_total": "总点击量",
|
|
"app.stat.play_total": "游玩次数",
|
|
"app.stat.review_count": "评论数",
|
|
"app.stat.fans_count": "粉丝数",
|
|
"app.description.text": "简介",
|
|
"app.rec_text": "推荐语",
|
|
"app.released_time": "发布时间戳",
|
|
}
|
|
df.rename(columns=rename_map, inplace=True)
|
|
if "评分" in df.columns:
|
|
df["评分"] = pd.to_numeric(df["评分"], errors="coerce").astype("float64")
|
|
for col in ["总点击量", "游玩次数", "评论数", "粉丝数"]:
|
|
if col in df.columns:
|
|
df[col] = pd.to_numeric(df[col], errors="coerce").astype("Int64")
|
|
if "发布时间戳" in df.columns:
|
|
df["发布时间"] = pd.to_datetime(
|
|
df["发布时间戳"], unit="s", errors="coerce"
|
|
)
|
|
for col in ["游戏名称", "游戏ID", "图标链接", "推荐语", "标签"]:
|
|
if col in df.columns:
|
|
df[col] = df[col].astype("string").str.strip()
|
|
if "简介" in df.columns:
|
|
df["简介"] = df["简介"].apply(_clean_html).astype("string")
|
|
|
|
columns_order = [
|
|
"游戏名称", "评分", "总点击量", "游玩次数", "评论数",
|
|
"粉丝数", "标签", "推荐语", "发布时间",
|
|
"游戏ID", "图标链接", "简介",
|
|
]
|
|
df = df[[c for c in columns_order if c in df.columns]]
|
|
df = df.drop_duplicates(subset=["游戏ID"]).reset_index(drop=True)
|
|
df.insert(0, "排名", df.index + 1)
|
|
return df
|
|
|
|
|
|
if __name__ == "__main__":
|
|
game_hot_rank_taptap_df = game_hot_rank_taptap(symbol="热玩榜")
|
|
print(game_hot_rank_taptap_df)
|
|
|
|
game_hot_rank_taptap_df = game_hot_rank_taptap(symbol="热门榜")
|
|
print(game_hot_rank_taptap_df)
|
|
|
|
game_hot_rank_taptap_df = game_hot_rank_taptap(symbol="新品榜")
|
|
print(game_hot_rank_taptap_df)
|
|
|
|
game_hot_rank_taptap_df = game_hot_rank_taptap(symbol="预约榜")
|
|
print(game_hot_rank_taptap_df)
|
|
|
|
game_hot_rank_taptap_df = game_hot_rank_taptap(symbol="热卖榜")
|
|
print(game_hot_rank_taptap_df)
|