fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
131 lines
4.3 KiB
Python
131 lines
4.3 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding:utf-8 -*-
|
|
"""
|
|
Date: 2026/4/2 22:00
|
|
Desc: 深圳证券交易所-基金规模日频数据
|
|
https://www.szse.cn/market/fund/volume/etf/index.html
|
|
"""
|
|
|
|
import io
|
|
import random
|
|
import warnings
|
|
from datetime import date, datetime
|
|
|
|
import pandas as pd
|
|
import requests
|
|
|
|
|
|
def _parse_date(date_str: str) -> date:
|
|
if len(date_str) != 8 or not date_str.isdigit():
|
|
raise ValueError("start_date 和 end_date 格式应为 YYYYMMDD")
|
|
try:
|
|
return datetime.strptime(date_str, "%Y%m%d").date()
|
|
except ValueError as err:
|
|
raise ValueError("start_date 和 end_date 应为有效日期") from err
|
|
|
|
|
|
def fund_scale_daily_szse(
|
|
start_date: str = "20260401", end_date: str = "20260401", symbol: str = "ETF"
|
|
) -> pd.DataFrame:
|
|
"""
|
|
深圳证券交易所-基金产品-基金规模-日频数据
|
|
https://www.szse.cn/market/fund/volume/etf/index.html
|
|
:param start_date: 开始日期, 格式如 "20260401"
|
|
:type start_date: str
|
|
:param end_date: 结束日期, 格式如 "20260401"
|
|
:type end_date: str
|
|
:param symbol: 基金类别, choice of {"ETF", "LOF", "REITS"}
|
|
:type symbol: str
|
|
:return: 深交所基金规模日频数据;
|
|
日期范围不能超过 6 个月, 否则返回带表头的空 DataFrame
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
columns = ["日期", "基金代码", "基金简称", "基金份额"]
|
|
symbol_map = {
|
|
"ETF": {
|
|
"jjlb": "ETF",
|
|
"referer": "https://www.szse.cn/market/fund/volume/etf/index.html",
|
|
},
|
|
"LOF": {
|
|
"jjlb": "LOF",
|
|
"referer": "https://www.szse.cn/market/fund/volume/lof/index.html",
|
|
},
|
|
"REITS": {
|
|
"jjlb": "不动产基金",
|
|
"referer": "https://www.szse.cn/market/fund/volume/reits/index.html",
|
|
},
|
|
}
|
|
if symbol not in symbol_map:
|
|
raise ValueError("symbol 应为 {'ETF', 'LOF', 'REITS'}")
|
|
start = _parse_date(start_date)
|
|
end = _parse_date(end_date)
|
|
if start > end:
|
|
raise ValueError("start_date 不能大于 end_date")
|
|
|
|
url = "https://www.szse.cn/api/report/ShowReport"
|
|
headers = {
|
|
"Host": "www.szse.cn",
|
|
"Referer": symbol_map[symbol]["referer"],
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
|
}
|
|
params = {
|
|
"SHOWTYPE": "xlsx",
|
|
"CATALOGID": "scsj_fund_jjgm",
|
|
"TABKEY": "tab1",
|
|
"txtStart": start.strftime("%Y-%m-%d"),
|
|
"txtEnd": end.strftime("%Y-%m-%d"),
|
|
"jjlb": symbol_map[symbol]["jjlb"],
|
|
"random": str(random.random()),
|
|
}
|
|
r = requests.get(url, params=params, headers=headers, timeout=15)
|
|
r.raise_for_status()
|
|
with warnings.catch_warnings(record=True):
|
|
warnings.simplefilter("always")
|
|
temp_df = pd.read_excel(io.BytesIO(r.content), engine="openpyxl")
|
|
|
|
temp_df = temp_df.dropna(how="all")
|
|
if temp_df.empty:
|
|
return pd.DataFrame(columns=columns)
|
|
|
|
temp_df.rename(
|
|
columns={
|
|
"基金规模(份)": "基金份额",
|
|
},
|
|
inplace=True,
|
|
)
|
|
|
|
if "基金代码" in temp_df.columns:
|
|
code_series = pd.to_numeric(temp_df["基金代码"], errors="coerce")
|
|
temp_df = temp_df[code_series.notna()].copy()
|
|
temp_df["基金代码"] = (
|
|
code_series[code_series.notna()].astype(int).astype(str).str.zfill(6)
|
|
)
|
|
|
|
if "日期" in temp_df.columns:
|
|
temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
|
|
temp_df = temp_df[temp_df["日期"].notna()]
|
|
|
|
if temp_df.empty:
|
|
return pd.DataFrame(columns=columns)
|
|
|
|
for item in ["基金份额"]:
|
|
if item in temp_df.columns:
|
|
temp_df[item] = temp_df[item].astype(str).str.replace(",", "", regex=False)
|
|
temp_df[item] = pd.to_numeric(temp_df[item], errors="coerce")
|
|
|
|
for item in columns:
|
|
if item not in temp_df.columns:
|
|
temp_df[item] = pd.NA
|
|
temp_df = temp_df[columns]
|
|
return temp_df
|
|
|
|
|
|
if __name__ == "__main__":
|
|
for item_symbol in ["ETF", "LOF", "REITS"]:
|
|
fund_scale_daily_szse_df = fund_scale_daily_szse(
|
|
start_date="20260401", end_date="20260402", symbol=item_symbol
|
|
)
|
|
print(item_symbol)
|
|
print(fund_scale_daily_szse_df)
|