Files
MoFin/venv/lib/python3.12/site-packages/akshare/index/index_cons.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

224 lines
7.4 KiB
Python

#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2024/6/17 14:00
Desc: 股票指数成份股数据, 新浪有两个接口, 这里使用老接口:
新接口:https://vip.stock.finance.sina.com.cn/mkt/#zhishu_000001
老接口:https://vip.stock.finance.sina.com.cn/corp/view/vII_NewestComponent.php?page=1&indexid=399639
"""
import math
from io import BytesIO, StringIO
import pandas as pd
import requests
from bs4 import BeautifulSoup
from akshare.utils import demjson
def index_stock_cons_sina(symbol: str = "000300") -> pd.DataFrame:
"""
新浪新版股票指数成份页面, 目前该接口可获取指数数量较少
https://vip.stock.finance.sina.com.cn/mkt/#zhishu_000040
:param symbol: 指数代码
:type symbol: str
:return: 指数的成份股
:rtype: pandas.DataFrame
"""
if symbol == "000300":
symbol = "hs300"
url = (
"https://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php"
"/Market_Center.getHQNodeStockCountSimple"
)
params = {"node": f"{symbol}"}
r = requests.get(url, params=params)
page_num = math.ceil(int(r.json()) / 80) + 1
temp_df = pd.DataFrame()
for page in range(1, page_num):
url = "https://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData"
params = {
"page": str(page),
"num": "80",
"sort": "symbol",
"asc": "1",
"node": "hs300",
"symbol": "",
"_s_r_a": "init",
}
r = requests.get(url, params=params)
temp_df = pd.concat(
objs=[temp_df, pd.DataFrame(demjson.decode(r.text))], ignore_index=True
)
return temp_df
url = "https://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeDataSimple"
params = {
"page": 1,
"num": "3000",
"sort": "symbol",
"asc": "1",
"node": f"zhishu_{symbol}",
"_s_r_a": "setlen",
}
r = requests.get(url, params=params)
temp = pd.DataFrame(demjson.decode(r.text))
return temp
def index_stock_info() -> pd.DataFrame:
"""
聚宽-指数数据-指数列表
https://www.joinquant.com/data/dict/indexData
:return: 指数信息的数据框
:rtype: pandas.DataFrame
"""
url = "https://www.joinquant.com/data/dict/indexData"
r = requests.get(url)
r.encoding = "utf-8"
index_df = pd.read_html(StringIO(r.text))[0]
index_df["指数代码"] = index_df["指数代码"].str.split(".", expand=True)[0]
index_df.columns = ["index_code", "display_name", "publish_date", "-", "-"]
temp_df = index_df[["index_code", "display_name", "publish_date"]].copy()
return temp_df
def index_stock_cons(symbol: str = "399639") -> pd.DataFrame:
"""
最新股票指数的成份股目录
https://vip.stock.finance.sina.com.cn/corp/view/vII_NewestComponent.php?page=1&indexid=399639
:param symbol: 指数代码, 可以通过 ak.index_stock_info() 函数获取
:type symbol: str
:return: 最新股票指数的成份股目录
:rtype: pandas.DataFrame
"""
url = f"https://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{symbol}.phtml"
r = requests.get(url)
r.encoding = "gb2312"
soup = BeautifulSoup(r.text, "lxml")
page_num = (
soup.find(attrs={"class": "table2"})
.find("td")
.find_all("a")[-1]["href"]
.split("page=")[-1]
.split("&")[0]
)
if page_num == "#":
temp_df = pd.read_html(StringIO(r.text), header=0, skiprows=1)[3].iloc[:, :3]
temp_df["品种代码"] = temp_df["品种代码"].astype(str).str.zfill(6)
return temp_df
temp_df = pd.DataFrame()
for page in range(1, int(page_num) + 1):
url = f"https://vip.stock.finance.sina.com.cn/corp/view/vII_NewestComponent.php?page={page}&indexid={symbol}"
r = requests.get(url)
r.encoding = "gb2312"
temp_df = pd.concat(
objs=[temp_df, pd.read_html(StringIO(r.text), header=1)[3]],
ignore_index=True,
)
temp_df = temp_df.iloc[:, :3]
temp_df["品种代码"] = temp_df["品种代码"].astype(str).str.zfill(6)
return temp_df
def index_stock_cons_csindex(symbol: str = "000300") -> pd.DataFrame:
"""
中证指数网站-成份股目录
https://www.csindex.com.cn/zh-CN/indices/index-detail/000300
:param symbol: 指数代码, 可以通过 ak.index_stock_info() 函数获取
:type symbol: str
:return: 最新指数的成份股
:rtype: pandas.DataFrame
"""
url = (
f"https://oss-ch.csindex.com.cn/static/"
f"html/csindex/public/uploads/file/autofile/cons/{symbol}cons.xls"
)
r = requests.get(url)
temp_df = pd.read_excel(BytesIO(r.content))
temp_df.columns = [
"日期",
"指数代码",
"指数名称",
"指数英文名称",
"成分券代码",
"成分券名称",
"成分券英文名称",
"交易所",
"交易所英文名称",
]
temp_df["日期"] = pd.to_datetime(
temp_df["日期"], format="%Y%m%d", errors="coerce"
).dt.date
temp_df["指数代码"] = temp_df["指数代码"].astype(str).str.zfill(6)
temp_df["成分券代码"] = temp_df["成分券代码"].astype(str).str.zfill(6)
return temp_df
def index_stock_cons_weight_csindex(symbol: str = "000300") -> pd.DataFrame:
"""
中证指数网站-样本权重
https://www.csindex.com.cn/zh-CN/indices/index-detail/000300
:param symbol: 指数代码, 可以通过 ak.index_stock_info() 接口获取
:type symbol: str
:return: 最新指数的成份股权重
:rtype: pandas.DataFrame
"""
url = (
f"https://oss-ch.csindex.com.cn/static/html/csindex/"
f"public/uploads/file/autofile/closeweight/{symbol}closeweight.xls"
)
r = requests.get(url)
temp_df = pd.read_excel(BytesIO(r.content))
temp_df.columns = [
"日期",
"指数代码",
"指数名称",
"指数英文名称",
"成分券代码",
"成分券名称",
"成分券英文名称",
"交易所",
"交易所英文名称",
"权重",
]
temp_df["日期"] = pd.to_datetime(
temp_df["日期"], format="%Y%m%d", errors="coerce"
).dt.date
temp_df["指数代码"] = temp_df["指数代码"].astype(str).str.zfill(6)
temp_df["成分券代码"] = temp_df["成分券代码"].astype(str).str.zfill(6)
temp_df["权重"] = pd.to_numeric(temp_df["权重"], errors="coerce")
return temp_df
def stock_a_code_to_symbol(symbol: str = "000300") -> str:
"""
输入股票代码判断股票市场
:param symbol: 股票代码
:type symbol: str
:return: 股票市场
:rtype: str
"""
if symbol.startswith("6") or symbol.startswith("900"):
return f"sh{symbol}"
else:
return f"sz{symbol}"
if __name__ == "__main__":
index_stock_cons_csindex_df = index_stock_cons_csindex(symbol="000300")
print(index_stock_cons_csindex_df)
index_stock_cons_weight_csindex_df = index_stock_cons_weight_csindex(
symbol="000300"
)
print(index_stock_cons_weight_csindex_df)
index_stock_cons_sina_df = index_stock_cons_sina(symbol="000300")
print(index_stock_cons_sina_df)
index_stock_cons_df = index_stock_cons(symbol="000300")
print(index_stock_cons_df)