fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
224 lines
7.4 KiB
Python
224 lines
7.4 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding:utf-8 -*-
|
|
"""
|
|
Date: 2024/6/17 14:00
|
|
Desc: 股票指数成份股数据, 新浪有两个接口, 这里使用老接口:
|
|
新接口:https://vip.stock.finance.sina.com.cn/mkt/#zhishu_000001
|
|
老接口:https://vip.stock.finance.sina.com.cn/corp/view/vII_NewestComponent.php?page=1&indexid=399639
|
|
"""
|
|
|
|
import math
|
|
from io import BytesIO, StringIO
|
|
|
|
import pandas as pd
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
from akshare.utils import demjson
|
|
|
|
|
|
def index_stock_cons_sina(symbol: str = "000300") -> pd.DataFrame:
|
|
"""
|
|
新浪新版股票指数成份页面, 目前该接口可获取指数数量较少
|
|
https://vip.stock.finance.sina.com.cn/mkt/#zhishu_000040
|
|
:param symbol: 指数代码
|
|
:type symbol: str
|
|
:return: 指数的成份股
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
if symbol == "000300":
|
|
symbol = "hs300"
|
|
url = (
|
|
"https://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php"
|
|
"/Market_Center.getHQNodeStockCountSimple"
|
|
)
|
|
params = {"node": f"{symbol}"}
|
|
r = requests.get(url, params=params)
|
|
page_num = math.ceil(int(r.json()) / 80) + 1
|
|
temp_df = pd.DataFrame()
|
|
for page in range(1, page_num):
|
|
url = "https://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData"
|
|
params = {
|
|
"page": str(page),
|
|
"num": "80",
|
|
"sort": "symbol",
|
|
"asc": "1",
|
|
"node": "hs300",
|
|
"symbol": "",
|
|
"_s_r_a": "init",
|
|
}
|
|
r = requests.get(url, params=params)
|
|
temp_df = pd.concat(
|
|
objs=[temp_df, pd.DataFrame(demjson.decode(r.text))], ignore_index=True
|
|
)
|
|
return temp_df
|
|
|
|
url = "https://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeDataSimple"
|
|
params = {
|
|
"page": 1,
|
|
"num": "3000",
|
|
"sort": "symbol",
|
|
"asc": "1",
|
|
"node": f"zhishu_{symbol}",
|
|
"_s_r_a": "setlen",
|
|
}
|
|
r = requests.get(url, params=params)
|
|
temp = pd.DataFrame(demjson.decode(r.text))
|
|
return temp
|
|
|
|
|
|
def index_stock_info() -> pd.DataFrame:
|
|
"""
|
|
聚宽-指数数据-指数列表
|
|
https://www.joinquant.com/data/dict/indexData
|
|
:return: 指数信息的数据框
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://www.joinquant.com/data/dict/indexData"
|
|
r = requests.get(url)
|
|
r.encoding = "utf-8"
|
|
index_df = pd.read_html(StringIO(r.text))[0]
|
|
index_df["指数代码"] = index_df["指数代码"].str.split(".", expand=True)[0]
|
|
index_df.columns = ["index_code", "display_name", "publish_date", "-", "-"]
|
|
temp_df = index_df[["index_code", "display_name", "publish_date"]].copy()
|
|
return temp_df
|
|
|
|
|
|
def index_stock_cons(symbol: str = "399639") -> pd.DataFrame:
|
|
"""
|
|
最新股票指数的成份股目录
|
|
https://vip.stock.finance.sina.com.cn/corp/view/vII_NewestComponent.php?page=1&indexid=399639
|
|
:param symbol: 指数代码, 可以通过 ak.index_stock_info() 函数获取
|
|
:type symbol: str
|
|
:return: 最新股票指数的成份股目录
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = f"https://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{symbol}.phtml"
|
|
r = requests.get(url)
|
|
r.encoding = "gb2312"
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
page_num = (
|
|
soup.find(attrs={"class": "table2"})
|
|
.find("td")
|
|
.find_all("a")[-1]["href"]
|
|
.split("page=")[-1]
|
|
.split("&")[0]
|
|
)
|
|
if page_num == "#":
|
|
temp_df = pd.read_html(StringIO(r.text), header=0, skiprows=1)[3].iloc[:, :3]
|
|
temp_df["品种代码"] = temp_df["品种代码"].astype(str).str.zfill(6)
|
|
return temp_df
|
|
|
|
temp_df = pd.DataFrame()
|
|
for page in range(1, int(page_num) + 1):
|
|
url = f"https://vip.stock.finance.sina.com.cn/corp/view/vII_NewestComponent.php?page={page}&indexid={symbol}"
|
|
r = requests.get(url)
|
|
r.encoding = "gb2312"
|
|
temp_df = pd.concat(
|
|
objs=[temp_df, pd.read_html(StringIO(r.text), header=1)[3]],
|
|
ignore_index=True,
|
|
)
|
|
temp_df = temp_df.iloc[:, :3]
|
|
temp_df["品种代码"] = temp_df["品种代码"].astype(str).str.zfill(6)
|
|
return temp_df
|
|
|
|
|
|
def index_stock_cons_csindex(symbol: str = "000300") -> pd.DataFrame:
|
|
"""
|
|
中证指数网站-成份股目录
|
|
https://www.csindex.com.cn/zh-CN/indices/index-detail/000300
|
|
:param symbol: 指数代码, 可以通过 ak.index_stock_info() 函数获取
|
|
:type symbol: str
|
|
:return: 最新指数的成份股
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = (
|
|
f"https://oss-ch.csindex.com.cn/static/"
|
|
f"html/csindex/public/uploads/file/autofile/cons/{symbol}cons.xls"
|
|
)
|
|
r = requests.get(url)
|
|
temp_df = pd.read_excel(BytesIO(r.content))
|
|
temp_df.columns = [
|
|
"日期",
|
|
"指数代码",
|
|
"指数名称",
|
|
"指数英文名称",
|
|
"成分券代码",
|
|
"成分券名称",
|
|
"成分券英文名称",
|
|
"交易所",
|
|
"交易所英文名称",
|
|
]
|
|
temp_df["日期"] = pd.to_datetime(
|
|
temp_df["日期"], format="%Y%m%d", errors="coerce"
|
|
).dt.date
|
|
temp_df["指数代码"] = temp_df["指数代码"].astype(str).str.zfill(6)
|
|
temp_df["成分券代码"] = temp_df["成分券代码"].astype(str).str.zfill(6)
|
|
return temp_df
|
|
|
|
|
|
def index_stock_cons_weight_csindex(symbol: str = "000300") -> pd.DataFrame:
|
|
"""
|
|
中证指数网站-样本权重
|
|
https://www.csindex.com.cn/zh-CN/indices/index-detail/000300
|
|
:param symbol: 指数代码, 可以通过 ak.index_stock_info() 接口获取
|
|
:type symbol: str
|
|
:return: 最新指数的成份股权重
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = (
|
|
f"https://oss-ch.csindex.com.cn/static/html/csindex/"
|
|
f"public/uploads/file/autofile/closeweight/{symbol}closeweight.xls"
|
|
)
|
|
r = requests.get(url)
|
|
temp_df = pd.read_excel(BytesIO(r.content))
|
|
temp_df.columns = [
|
|
"日期",
|
|
"指数代码",
|
|
"指数名称",
|
|
"指数英文名称",
|
|
"成分券代码",
|
|
"成分券名称",
|
|
"成分券英文名称",
|
|
"交易所",
|
|
"交易所英文名称",
|
|
"权重",
|
|
]
|
|
temp_df["日期"] = pd.to_datetime(
|
|
temp_df["日期"], format="%Y%m%d", errors="coerce"
|
|
).dt.date
|
|
temp_df["指数代码"] = temp_df["指数代码"].astype(str).str.zfill(6)
|
|
temp_df["成分券代码"] = temp_df["成分券代码"].astype(str).str.zfill(6)
|
|
temp_df["权重"] = pd.to_numeric(temp_df["权重"], errors="coerce")
|
|
return temp_df
|
|
|
|
|
|
def stock_a_code_to_symbol(symbol: str = "000300") -> str:
|
|
"""
|
|
输入股票代码判断股票市场
|
|
:param symbol: 股票代码
|
|
:type symbol: str
|
|
:return: 股票市场
|
|
:rtype: str
|
|
"""
|
|
if symbol.startswith("6") or symbol.startswith("900"):
|
|
return f"sh{symbol}"
|
|
else:
|
|
return f"sz{symbol}"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
index_stock_cons_csindex_df = index_stock_cons_csindex(symbol="000300")
|
|
print(index_stock_cons_csindex_df)
|
|
|
|
index_stock_cons_weight_csindex_df = index_stock_cons_weight_csindex(
|
|
symbol="000300"
|
|
)
|
|
print(index_stock_cons_weight_csindex_df)
|
|
|
|
index_stock_cons_sina_df = index_stock_cons_sina(symbol="000300")
|
|
print(index_stock_cons_sina_df)
|
|
|
|
index_stock_cons_df = index_stock_cons(symbol="000300")
|
|
print(index_stock_cons_df)
|