fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
78 lines
2.4 KiB
Python
78 lines
2.4 KiB
Python
# !/usr/bin/env python
|
|
"""
|
|
Date: 2025/3/10 18:00
|
|
Desc: 通用帮助函数
|
|
"""
|
|
|
|
import math
|
|
import random
|
|
import time
|
|
from typing import List, Dict
|
|
|
|
import pandas as pd
|
|
|
|
from akshare.utils.request import request_with_retry
|
|
from akshare.utils.tqdm import get_tqdm
|
|
|
|
|
|
def fetch_paginated_data(url: str, base_params: Dict, timeout: int = 15):
|
|
"""
|
|
东方财富-分页获取数据并合并结果
|
|
https://quote.eastmoney.com/f1.html?newcode=0.000001
|
|
:param url: 股票代码
|
|
:type url: str
|
|
:param base_params: 基础请求参数
|
|
:type base_params: dict
|
|
:param timeout: 请求超时时间
|
|
:type timeout: str
|
|
:return: 合并后的数据
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
# 复制参数以避免修改原始参数
|
|
params = base_params.copy()
|
|
# 获取第一页数据,用于确定分页信息
|
|
r = request_with_retry(url, params=params, timeout=timeout)
|
|
data_json = r.json()
|
|
# 计算分页信息
|
|
per_page_num = len(data_json["data"]["diff"])
|
|
total_page = math.ceil(data_json["data"]["total"] / per_page_num)
|
|
# 存储所有页面数据
|
|
temp_list = []
|
|
# 添加第一页数据
|
|
temp_list.append(pd.DataFrame(data_json["data"]["diff"]))
|
|
# 获取进度条
|
|
tqdm = get_tqdm()
|
|
# 获取剩余页面数据
|
|
for page in tqdm(range(2, total_page + 1), leave=False):
|
|
params.update({"pn": page})
|
|
# 添加随机延迟,避免请求过于频繁
|
|
time.sleep(random.uniform(0.5, 1.5))
|
|
r = request_with_retry(url, params=params, timeout=timeout)
|
|
data_json = r.json()
|
|
inner_temp_df = pd.DataFrame(data_json["data"]["diff"])
|
|
temp_list.append(inner_temp_df)
|
|
# 合并所有数据
|
|
temp_df = pd.concat(temp_list, ignore_index=True)
|
|
temp_df["f3"] = pd.to_numeric(temp_df["f3"], errors="coerce")
|
|
temp_df.sort_values(by=["f3"], ascending=False, inplace=True, ignore_index=True)
|
|
temp_df.reset_index(inplace=True)
|
|
temp_df["index"] = temp_df["index"].astype(int) + 1
|
|
return temp_df
|
|
|
|
|
|
def set_df_columns(df: pd.DataFrame, cols: List[str]) -> pd.DataFrame:
|
|
"""
|
|
设置 pandas.DataFrame 为空的情况
|
|
:param df: 需要设置命名的数据框
|
|
:type df: pandas.DataFrame
|
|
:param cols: 字段的列表
|
|
:type cols: list
|
|
:return: 重新设置后的数据
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
if df.shape == (0, 0):
|
|
return pd.DataFrame(data=[], columns=cols)
|
|
else:
|
|
df.columns = cols
|
|
return df
|