Files
MoFin/venv/lib/python3.12/site-packages/akshare/futures/cot.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

1382 lines
57 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2025/7/21 15:00
Desc: 期货-中国-交易所-会员持仓数据接口
大连商品交易所、上海期货交易所、郑州商品交易所、中国金融期货交易所、广州期货交易所
采集前 20 会员持仓数据;
建议下午 16:30 以后采集当天数据, 避免交易所数据更新不稳定;
郑州商品交易所格式分为三类
大连商品交易所有具体合约的持仓排名, 通过 futures_dce_position_rank 获取
20171228
http://www.czce.com.cn/cn/DFSStaticFiles/Future/2020/20200727/FutureDataHolding.txt
20100825
http://www.czce.com.cn/cn/exchange/2014/datatradeholding/20140515.txt
"""
import datetime
import json
import re
import time
import warnings
import zipfile
from io import BytesIO
from io import StringIO
import pandas as pd
import requests
from bs4 import BeautifulSoup
from akshare.futures import cons
from akshare.futures.requests_fun import requests_link
from akshare.futures.symbol_var import symbol_varieties
calendar = cons.get_calendar()
rank_columns = [
"vol_party_name",
"vol",
"vol_chg",
"long_party_name",
"long_open_interest",
"long_open_interest_chg",
"short_party_name",
"short_open_interest",
"short_open_interest_chg",
]
intColumns = [
"vol",
"vol_chg",
"long_open_interest",
"long_open_interest_chg",
"short_open_interest",
"short_open_interest_chg",
]
def get_rank_sum_daily(
start_day: str = "20210510",
end_day: str = "20210510",
vars_list: list = cons.contract_symbols,
):
"""
采集四个期货交易所前 5、前 10、前 15、前 20 会员持仓排名数据
注1:由于上期所和中金所只公布每个品种内部的标的排名,没有公布品种的总排名;
所以函数输出的品种排名是由品种中的每个标的加总获得,并不是真实的品种排名列表
注2:大商所只公布了品种排名,未公布标的排名
:param start_day: 开始日期 formatYYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
:type start_day: str
:param end_day: 结束数据 formatYYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
:type end_day: str
:param vars_list: 合约品种如 ['RB''AL'] 等列表为空时为所有商品
:type vars_list: list
:return: 会员持仓排名数据
:rtype: pandas.DataFrame
symbol 标的合约 string
var 商品品种 string
vol_top5 成交量前5会员成交量总和 int
vol_chg_top5 成交量前5会员成交量变化总和 int
long_open_interest_top5 持多单前5会员持多单总和 int
long_open_interest_chg_top5 持多单前5会员持多单变化总和 int
short_open_interest_top5 持空单前5会员持空单总和 int
short_open_interest_chg_top5 持空单前5会员持空单变化总和 int
vol_top10 成交量前10会员成交量总和 int
"""
start_day = (
cons.convert_date(start_day) if start_day is not None else datetime.date.today()
)
end_day = (
cons.convert_date(end_day)
if end_day is not None
else cons.convert_date(cons.get_latest_data_date(datetime.datetime.now()))
)
records = pd.DataFrame()
while start_day <= end_day:
print(start_day)
if start_day.strftime("%Y%m%d") in calendar:
data = get_rank_sum(start_day, vars_list)
if data is False:
print(
f"{start_day.strftime('%Y-%m-%d')}日交易所数据连接失败,已超过20次,您的地址被网站墙了,请保存好返回数据,稍后从该日期起重试"
)
return records.reset_index(drop=True)
records = pd.concat(objs=[records, data], ignore_index=True)
else:
warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日")
start_day += datetime.timedelta(days=1)
return records.reset_index(drop=True)
def get_rank_sum(date: str = "20210525", vars_list: list = cons.contract_symbols):
"""
采集五个期货交易所前5、前10、前15、前20会员持仓排名数据
注1:由于上期所和中金所只公布每个品种内部的标的排名, 没有公布品种的总排名;
所以函数输出的品种排名是由品种中的每个标的加总获得, 并不是真实的品种排名列表
注2:大商所只公布了品种排名, 未公布标的排名
:param date: 日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
:type date: date
:param vars_list: 合约品种如 ['RB', 'AL'] 等列表为空时为所有商品
:type vars_list: list
:return: 持仓排名数据
:rtype: pandas.DataFrame
symbol 标的合约 string
var 商品品种 string
vol_top5 成交量前5会员成交量总和 int
vol_chg_top5 成交量前5会员成交量变化总和 int
long_open_interest_top5 持多单前5会员持多单总和 int
long_open_interest_chg_top5 持多单前5会员持多单变化总和 int
short_open_interest_top5 持空单前5会员持空单总和 int
short_open_interest_chg_top5 持空单前5会员持空单变化总和 int
vol_top10 成交量前10会员成交量总和 int
"""
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date.strftime("%Y%m%d") not in calendar:
warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
return None
dce_var = [i for i in vars_list if i in cons.market_exchange_symbols["dce"]]
shfe_var = [i for i in vars_list if i in cons.market_exchange_symbols["shfe"]]
czce_var = [i for i in vars_list if i in cons.market_exchange_symbols["czce"]]
cffex_var = [i for i in vars_list if i in cons.market_exchange_symbols["cffex"]]
gfex_var = [i for i in vars_list if i in cons.market_exchange_symbols["gfex"]]
big_dict = {}
if len(dce_var) > 0:
data = futures_dce_position_rank(date, dce_var)
if data is False:
return False
big_dict.update(data)
if len(shfe_var) > 0:
data = get_shfe_rank_table(date, shfe_var)
if data is False:
return False
big_dict.update(data)
if len(czce_var) > 0:
data = get_rank_table_czce(date)
if data is False:
return False
big_dict.update(data)
if len(cffex_var) > 0:
data = get_cffex_rank_table(date, cffex_var)
if data is False:
return False
big_dict.update(data)
if len(gfex_var) > 0:
data = futures_gfex_position_rank(date, gfex_var)
if data is False:
return False
big_dict.update(data)
records = pd.DataFrame()
for symbol, table in big_dict.items():
table = table.map(lambda x: 0 if x == "" else x)
for symbol_inner in set(table["symbol"]):
var = symbol_varieties(symbol_inner)
if var in vars_list:
if var in czce_var:
for col in [
item
for item in table.columns
if item.find("open_interest") > -1
] + ["vol", "vol_chg"]:
table[col] = [
float(value.replace(",", "")) if value != "-" else 0.0
for value in table[col]
]
table_cut = table[table["symbol"] == symbol_inner]
table_cut["rank"] = table_cut["rank"].astype("float")
table_cut_top5 = table_cut[table_cut["rank"] <= 5]
table_cut_top10 = table_cut[table_cut["rank"] <= 10]
table_cut_top15 = table_cut[table_cut["rank"] <= 15]
table_cut_top20 = table_cut[table_cut["rank"] <= 20]
big_dict = {
"symbol": symbol_inner,
"variety": var,
"vol_top5": table_cut_top5["vol"].sum(),
"vol_chg_top5": table_cut_top5["vol_chg"].sum(),
"long_open_interest_top5": table_cut_top5[
"long_open_interest"
].sum(),
"long_open_interest_chg_top5": table_cut_top5[
"long_open_interest_chg"
].sum(),
"short_open_interest_top5": table_cut_top5[
"short_open_interest"
].sum(),
"short_open_interest_chg_top5": table_cut_top5[
"short_open_interest_chg"
].sum(),
"vol_top10": table_cut_top10["vol"].sum(),
"vol_chg_top10": table_cut_top10["vol_chg"].sum(),
"long_open_interest_top10": table_cut_top10[
"long_open_interest"
].sum(),
"long_open_interest_chg_top10": table_cut_top10[
"long_open_interest_chg"
].sum(),
"short_open_interest_top10": table_cut_top10[
"short_open_interest"
].sum(),
"short_open_interest_chg_top10": table_cut_top10[
"short_open_interest_chg"
].sum(),
"vol_top15": table_cut_top15["vol"].sum(),
"vol_chg_top15": table_cut_top15["vol_chg"].sum(),
"long_open_interest_top15": table_cut_top15[
"long_open_interest"
].sum(),
"long_open_interest_chg_top15": table_cut_top15[
"long_open_interest_chg"
].sum(),
"short_open_interest_top15": table_cut_top15[
"short_open_interest"
].sum(),
"short_open_interest_chg_top15": table_cut_top15[
"short_open_interest_chg"
].sum(),
"vol_top20": table_cut_top20["vol"].sum(),
"vol_chg_top20": table_cut_top20["vol_chg"].sum(),
"long_open_interest_top20": table_cut_top20[
"long_open_interest"
].sum(),
"long_open_interest_chg_top20": table_cut_top20[
"long_open_interest_chg"
].sum(),
"short_open_interest_top20": table_cut_top20[
"short_open_interest"
].sum(),
"short_open_interest_chg_top20": table_cut_top20[
"short_open_interest_chg"
].sum(),
"date": date.strftime("%Y%m%d"),
}
records = pd.concat(
[records, pd.DataFrame(big_dict, index=[0])], ignore_index=True
)
if len(big_dict.items()) > 0:
add_vars = [
i
for i in cons.market_exchange_symbols["dce"]
+ cons.market_exchange_symbols["shfe"]
+ cons.market_exchange_symbols["cffex"]
if i in records["variety"].tolist()
]
for var in add_vars:
records_cut = records[records["variety"] == var]
var_record = pd.DataFrame(records_cut.sum()).T
var_record["date"] = date.strftime("%Y%m%d")
var_record.loc[:, ["variety", "symbol"]] = var
records = pd.concat([records, var_record], ignore_index=True)
return records.reset_index(drop=True)
def get_shfe_rank_table(
date: str = None, vars_list: list = cons.contract_symbols
) -> dict:
"""
上海期货交易所会员成交及持仓排名表
https://www.shfe.com.cn/
https://tsite.shfe.com.cn/statements/dataview.html?paramid=kx
注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
数据从 20020107 开始,每交易日 16:30 左右更新数据
:param date: 交易日
:type date: str
:param vars_list: 合约品种如 RB、AL等列表; 为空时为所有商品
:type vars_list: list
:return: 上海期货交易所会员成交及持仓排名表
:rtype: dict
rank 排名 int
vol_party_name 成交量排序的当前名次会员 string(中文)
vol 该会员成交量 int
vol_chg 该会员成交量变化量 int
long_party_name 持多单排序的当前名次会员 string(中文)
long_open_interest 该会员持多单 int
long_open_interest_chg 该会员持多单变化量 int
short_party_name 持空单排序的当前名次会员 string(中文)
short_open_interest 该会员持空单 int
short_open_interest_chg 该会员持空单变化量 int
symbol 标的合约 string
var 品种 string
date 日期 string YYYYMMDD
"""
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date < datetime.date(year=2002, month=1, day=7):
print("shfe数据源开始日期为 20020107,跳过")
return {}
if date.strftime("%Y%m%d") not in calendar:
warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
return {}
url = cons.SHFE_VOL_RANK_URL_20250701 % (date.strftime("%Y%m%d"))
r = requests_link(url, encoding="utf-8", headers=cons.shfe_headers)
try:
context = json.loads(r.text)
except: # noqa: E722
return {}
df = pd.DataFrame(context["o_cursor"])
df = df.rename(
columns={
"CJ1": "vol",
"CJ1_CHG": "vol_chg",
"CJ2": "long_open_interest",
"CJ2_CHG": "long_open_interest_chg",
"CJ3": "short_open_interest",
"CJ3_CHG": "short_open_interest_chg",
"PARTICIPANTABBR1": "vol_party_name",
"PARTICIPANTABBR2": "long_party_name",
"PARTICIPANTABBR3": "short_party_name",
"PRODUCTNAME": "product1",
"RANK": "rank",
"INSTRUMENTID": "symbol",
"PRODUCTSORTNO": "product2",
}
)
if len(df.columns) < 3:
return {}
df = df.map(lambda x: x.strip() if isinstance(x, str) else x)
df = df.map(lambda x: None if x == "" else x)
df["variety"] = df["symbol"].apply(lambda x: symbol_varieties(x))
df = df[df["rank"] > 0]
for col in [
"PARTICIPANTID1",
"PARTICIPANTID2",
"PARTICIPANTID3",
"product1",
"product2",
]:
try:
del df[col]
except: # noqa: E722
pass
get_vars = [var for var in vars_list if var in df["variety"].tolist()]
big_dict = {}
for var in get_vars:
df_var = df[df["variety"] == var]
for symbol in set(df_var["symbol"]):
df_symbol = df_var[df_var["symbol"] == symbol].copy()
df_symbol["symbol"] = df_symbol["symbol"].str.upper()
big_dict[symbol] = df_symbol.reset_index(drop=True)
return big_dict
def _czce_df_read(url, skip_rows, encoding="utf-8", header=0):
"""
郑州商品交易所的网页数据
:param header:
:type header:
:param url: 网站 string
:param skip_rows: 去掉前几行 int
:param encoding: utf-8 or gbk or gb2312
:return: pd.DataFrame
"""
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/84.0.4147.89 Safari/537.36",
"Host": "www.czce.com.cn",
"Cookie": "XquW6dFMPxV380S=CAaD3sMkdXv3fUoaJlICIEv0MVegGq5EoMyBcxkOjCgSjmpuovYFuTLtYFcxTZGw; "
"XquW6dFMPxV380T=5QTTjUlA6f6WiDO7fMGmqNxHBWz.hKIc8lb_tc1o4nHrJM4nsXCAI9VHaKyV_jkHh4cIVvD25kGQAh."
"MvLL1SHRA20HCG9mVVHPhAzktNdPK3evjm0NYbTg2Gu_XGGtPhecxLvdFQ0."
"JlAxy_z0C15_KdO8kOI18i4K0rFERNPxjXq5qG1Gs.QiOm976wODY.pe8XCQtAsuLYJ."
"N4DpTgNfHJp04jhMl0SntHhr.jhh3dFjMXBx.JEHngXBzY6gQAhER7uSKAeSktruxFeuKlebse.vrPghHqWvJm4WPTEvDQ8q",
}
r = requests_link(url, encoding, headers=headers)
data = pd.read_html(
StringIO(r.text),
match=".+",
flavor=None,
header=header,
index_col=0,
skiprows=skip_rows,
attrs=None,
parse_dates=False,
thousands=", ",
encoding="gbk",
decimal=".",
converters=None,
na_values=None,
keep_default_na=True,
)
return data
def get_rank_table_czce(date: str = "20251103") -> dict:
"""
郑州商品交易所前 20 会员持仓排名数据明细
https://www.czce.com.cn/cn/jysj/ccpm/H077003004index_1.htm
注:该交易所既公布了品种排名, 也公布了标的排名
:param date: 日期 formatYYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
:return: 持仓排名数据明细
:rtype: pandas.DataFrame
返回值格式
rank 排名 int
vol_party_name 成交量排序的当前名次会员 string(中文)
vol 该会员成交量 int
vol_chg 该会员成交量变化量 int
long_party_name 持多单排序的当前名次会员 string(中文)
long_open_interest 该会员持多单 int
long_open_interest_chg 该会员持多单变化量 int
short_party_name 持空单排序的当前名次会员 string(中文)
short_open_interest 该会员持空单 int
short_open_interest_chg 该会员持空单变化量 int
symbol 标的合约 string
var 品种 string
date 日期 string YYYYMMDD
"""
date = cons.convert_date(date) if date is not None else datetime.date.today()
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/108.0.0.0 Safari/537.36"
}
temp_df = pd.DataFrame()
if date < datetime.date(2015, 10, 8):
print("CZCE可获取的数据源开始日期为 20151008, 请输入合适的日期参数")
return {}
if date.strftime("%Y%m%d") not in calendar:
warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
return {}
if date > datetime.date(year=2025, month=11, day=1):
url = (
f"http://www.czce.com.cn/cn/DFSStaticFiles/Future/{date.year}/"
f"{date.isoformat().replace('-', '')}/FutureDataHolding.xlsx"
)
else:
url = (
f"http://www.czce.com.cn/cn/DFSStaticFiles/Future/{date.year}/"
f"{date.isoformat().replace('-', '')}/FutureDataHolding.xls"
)
r = requests.get(url, headers=headers)
temp_df = pd.read_excel(BytesIO(r.content))
temp_pinzhong_index = [
item + 1
for item in temp_df[
temp_df.iloc[:, 0].str.contains("合计", na=False)
].index.to_list()
]
temp_pinzhong_index.insert(0, 0)
temp_pinzhong_index.pop()
temp_symbol_index = (
temp_df.iloc[temp_pinzhong_index, 0].str.split(" ", expand=True).iloc[:, 0]
)
symbol_list = [
re.compile(r"[0-9a-zA-Z_]+").findall(item)[0]
for item in temp_symbol_index.values
]
temp_symbol_index_list = temp_symbol_index.index.to_list()
big_dict = {}
for i in range(len(temp_symbol_index_list) - 1):
inner_temp_df = temp_df[
temp_symbol_index_list[i] + 2 : temp_symbol_index_list[i + 1] - 1
]
inner_temp_df.columns = [
"rank",
"vol_party_name",
"vol",
"vol_chg",
"long_party_name",
"long_open_interest",
"long_open_interest_chg",
"short_party_name",
"short_open_interest",
"short_open_interest_chg",
]
inner_temp_df.reset_index(inplace=True, drop=True)
big_dict[symbol_list[i]] = inner_temp_df
inner_temp_df = temp_df[temp_symbol_index_list[i + 1] + 2 : -1]
inner_temp_df.columns = [
"rank",
"vol_party_name",
"vol",
"vol_chg",
"long_party_name",
"long_open_interest",
"long_open_interest_chg",
"short_party_name",
"short_open_interest",
"short_open_interest_chg",
]
inner_temp_df.reset_index(inplace=True, drop=True)
big_dict[symbol_list[-1]] = inner_temp_df
new_big_dict = {}
for key, value in big_dict.items():
value = value.assign(symbol=key)
value = value.assign(variety=re.compile(r"[a-zA-Z_]+").findall(key)[0])
new_big_dict[key] = value
return new_big_dict
def _get_dce_contract_list(date, var):
"""
大连商品交易所取消了品种排名,只提供标的合约排名,需要获取标的合约列表
:param date: 日期 datetime.date 对象, 为空时为当天
:param var: 合约品种
:return: list 公布了持仓排名的合约列表
"""
url = "http://portal.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;"
"q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Cache-Control": "no-cache",
"Connection": "close",
"Host": "www.dce.com.cn",
"Origin": "http://portal.dce.com.cn",
"Pragma": "no-cache",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/81.0.4044.138 Safari/537.36",
}
params = {
"memberDealPosiQuotes.variety": var.lower(),
"memberDealPosiQuotes.trade_type": "0",
"year": date.year,
"month": date.month - 1,
"day": date.day,
"contract.contract_id": "all",
"contract.variety_id": var.lower(),
"contract": "",
}
while 1:
try:
r = requests.post(url, params=params, headers=headers)
soup = BeautifulSoup(r.text, "lxml")
contract_list = [
re.findall(
r"\d+",
item["onclick"].strip("javascript:setContract_id('").strip("');"),
)[0]
for item in soup.find_all(attrs={"name": "contract"})
]
contract_list = [var.lower() + item for item in contract_list]
return contract_list # noqa: E722
except: # noqa: E722
time.sleep(5)
continue
def get_dce_rank_table(date: str = "20230706", vars_list=cons.contract_symbols) -> dict:
"""
大连商品交易所前 20 会员持仓排名数据明细, 由于交易所网站问题, 需要 20200720 之后才有数据
注: 该交易所只公布标的合约排名
:param date: 日期 formatYYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象, 为空时为当天
:param vars_list: 合约品种如 RB、AL 等列表为空时为所有商品, 数据从 20060104 开始,每交易日 16:30 左右更新数据
:return: 持仓排名
:rtype: pandas.DataFrame
返回值格式
rank 排名 int
vol_party_name 成交量排序的当前名次会员 string(中文)
vol 该会员成交量 int
vol_chg 该会员成交量变化量 int
long_party_name 持多单排序的当前名次会员 string(中文)
long_open_interest 该会员持多单 int
long_open_interest_chg 该会员持多单变化量 int
short_party_name 持空单排序的当前名次会员 string(中文)
short_open_interest 该会员持空单 int
short_open_interest_chg 该会员持空单变化量 int
symbol 标的合约 string
var 品种 string
date 日期 string YYYYMMDD
"""
print("如果本接口不可用,请使用 ak.futures_dce_position_rank() 接口")
date_string = date
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date < datetime.date(2006, 1, 4):
print(Exception("大连商品交易所数据源开始日期为 20060104,跳过"))
return {}
if date.strftime("%Y%m%d") not in calendar:
warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
return {}
vars_list = [i for i in vars_list if i in cons.market_exchange_symbols["dce"]]
big_dict = {}
for var in vars_list:
# var = 'V'
symbol_list = _get_dce_contract_list(date, var)
for symbol in symbol_list:
# print(symbol)
url = cons.DCE_VOL_RANK_URL_1 % (
var.lower(),
symbol,
var.lower(),
date.year,
date.month - 1,
date.day,
)
try:
temp_df = pd.read_excel(url[:-3] + "excel", header=0, skiprows=3)
temp_df.dropna(how="any", axis=0, inplace=True)
temp_df = temp_df.map(lambda x: str(x).replace(",", ""))
del temp_df["名次.1"]
del temp_df["名次.2"]
temp_df.rename(
columns={
"名次": "rank",
"会员简称": "vol_party_name",
"成交量": "vol",
"增减": "vol_chg",
"会员简称.1": "long_party_name",
"持买单量": "long_open_interest",
"增减.1": "long_open_interest_chg",
"会员简称.2": "short_party_name",
"持卖单量": "short_open_interest",
"增减.2": "short_open_interest_chg",
},
inplace=True,
)
temp_df["symbol"] = symbol.upper()
temp_df["var"] = var
temp_df["date"] = date_string
temp_df = temp_df.map(
lambda x: str(x).replace("-", "0") if x == "-" else x
)
temp_df["rank"] = range(1, len(temp_df) + 1)
temp_df["vol"] = temp_df["vol"].astype(float)
temp_df["vol_chg"] = temp_df["vol_chg"].astype(float)
temp_df["long_open_interest"] = temp_df["long_open_interest"].astype(
float
)
temp_df["long_open_interest_chg"] = temp_df[
"long_open_interest_chg"
].astype(float)
temp_df["short_open_interest"] = temp_df["short_open_interest"].astype(
float
)
temp_df["short_open_interest_chg"] = temp_df[
"short_open_interest_chg"
].astype(float)
big_dict[symbol] = temp_df
except: # noqa: E722
temp_url = "http://portal.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
payload = {
"memberDealPosiQuotes.variety": var.lower(),
"memberDealPosiQuotes.trade_type": "0",
"year": date.year,
"month": date.month - 1,
"day": str(date.day).zfill(2),
"contract.contract_id": symbol,
"contract.variety_id": var.lower(),
"contract": "",
}
r = requests.post(temp_url, data=payload)
if r.status_code != 200:
big_dict[symbol] = {}
else:
temp_df = pd.read_html(StringIO(r.text))[1].iloc[:-1, :]
del temp_df["名次.1"]
del temp_df["名次.2"]
temp_df.rename(
columns={
"名次": "rank",
"会员简称": "vol_party_name",
"成交量": "vol",
"增减": "vol_chg",
"会员简称.1": "long_party_name",
"持买单量": "long_open_interest",
"增减.1": "long_open_interest_chg",
"会员简称.2": "short_party_name",
"持卖单量": "short_open_interest",
"增减.2": "short_open_interest_chg",
},
inplace=True,
)
temp_df["symbol"] = symbol.upper()
temp_df["var"] = var
temp_df["date"] = date_string
temp_df = temp_df.map(
lambda x: str(x).replace("-", "0") if x == "-" else x
)
temp_df["rank"] = range(1, len(temp_df) + 1)
temp_df["vol"] = temp_df["vol"].astype(float)
temp_df["vol_chg"] = temp_df["vol_chg"].astype(float)
temp_df["long_open_interest"] = temp_df[
"long_open_interest"
].astype(float)
temp_df["long_open_interest_chg"] = temp_df[
"long_open_interest_chg"
].astype(float)
temp_df["short_open_interest"] = temp_df[
"short_open_interest"
].astype(float)
temp_df["short_open_interest_chg"] = temp_df[
"short_open_interest_chg"
].astype(float)
big_dict[symbol] = temp_df
return big_dict
def get_cffex_rank_table(date: str = "20190805", vars_list=cons.contract_symbols):
"""
中国金融期货交易所前 20 会员持仓排名数据明细
http://www.cffex.com.cn/ccpm/
注:该交易所既公布品种排名,也公布标的排名
:param date: 日期 formatYYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
:param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20100416开始,每交易日16:30左右更新数据
:return: 持仓排名
:rtype: pandas.DataFrame
:rfield:
rank 排名 int
vol_party_name 成交量排序的当前名次会员 string(中文)
vol 该会员成交量 int
vol_chg 该会员成交量变化量 int
long_party_name 持多单排序的当前名次会员 string(中文)
long_open_interest 该会员持多单 int
long_open_interest_chg 该会员持多单变化量 int
short_party_name 持空单排序的当前名次会员 string(中文)
short_open_interest 该会员持空单 int
short_open_interest_chg 该会员持空单变化量 int
symbol 标的合约 string
var 品种 string
date 日期 string YYYYMMDD
"""
vars_list = [i for i in vars_list if i in cons.market_exchange_symbols["cffex"]]
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date < datetime.date(2010, 4, 16):
print(Exception("CFFEX 数据源开始日期为 20100416,跳过"))
return {}
if date.strftime("%Y%m%d") not in calendar:
warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
return {}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/81.0.4044.138 Safari/537.36",
}
big_dict = {}
for var in vars_list:
# print(var)
# var = "IF"
url = cons.CFFEX_VOL_RANK_URL % (
date.strftime("%Y%m"),
date.strftime("%d"),
var,
)
# url = 'http://www.cffex.com.cn/sj/ccpm/201908/05/IF_1.csv'
# url = 'http://www.cffex.com.cn/sj/ccpm/202308/08/IF_1.csv'
r = requests.get(url, headers=headers)
# 20200316 开始数据结构变化,统一格式
if r.status_code == 200:
try:
# 当所需要的合约没有数据时
temp_df = pd.read_table(BytesIO(r.content), encoding="gbk", header=None)
except: # noqa: E722
continue
need_index = temp_df.iloc[:, 0].str.contains("交易日")
if sum(need_index) > 2:
table = temp_df.iloc[temp_df[need_index].index[1] :, 0].str.split(
",", expand=True
)
table.columns = table.iloc[0, :]
table = table.iloc[2:, :].copy()
table.reset_index(inplace=True, drop=True)
else:
table = pd.read_csv(BytesIO(r.content), encoding="gbk")
else:
return
table = table.dropna(how="any")
table = table.map(lambda x: x.strip() if isinstance(x, str) else x)
del table["交易日"]
for symbol in set(table["合约"]):
table_cut = table[table["合约"] == symbol]
table_cut.columns = ["symbol", "rank"] + rank_columns
table_cut = _table_cut_cal(pd.DataFrame(table_cut), symbol)
big_dict[symbol] = table_cut.reset_index(drop=True)
return big_dict
def _table_cut_cal(table_cut, symbol):
"""
表格切分
:param table_cut: 需要切分的表格
:type table_cut: pandas.DataFrame
:param symbol: 具体合约的代码
:type symbol: str
:return: 表格切分后的结果
:rtype: pandas.DataFrame
"""
var = symbol_varieties(symbol)
table_cut[intColumns + ["rank"]] = table_cut[intColumns + ["rank"]].astype(int)
table_cut_sum = table_cut.sum()
table_cut_sum["rank"] = 999
for col in ["vol_party_name", "long_party_name", "short_party_name"]:
table_cut_sum[col] = None
table_cut = pd.concat([table_cut, pd.DataFrame(table_cut_sum).T], sort=True)
table_cut["symbol"] = symbol
table_cut["variety"] = var
table_cut[intColumns + ["rank"]] = table_cut[intColumns + ["rank"]].astype(int)
return table_cut
def futures_dce_position_rank(
date: str = "20160919", vars_list=cons.contract_symbols
) -> dict:
"""
大连商品交易所-每日持仓排名-具体合约
http://www.dce.com.cn/dalianshangpin/xqsj/tjsj26/rtj/rcjccpm/index.html
:param date: 指定交易日; e.g., "20200511"
:type date: str
:param vars_list: 品种列表
:type vars_list: list
:return: 指定日期的持仓排名数据
:rtype: pandas.DataFrame
"""
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date.strftime("%Y%m%d") not in calendar:
warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
return {}
date_str = date.strftime("%Y%m%d")
url = "http://www.dce.com.cn/dcereport/publicweb/dailystat/memberDealPosi/batchDownload"
payload = {
"tradeDate": date_str,
"varietyId": "a",
"contractId": "a2601",
"tradeType": "1",
"lang": "zh",
}
r = requests.post(url, json=payload)
big_dict = dict()
with zipfile.ZipFile(BytesIO(r.content), mode="r") as z:
for i in z.namelist():
file_name = i
if not file_name.startswith(date_str):
continue
try:
data = pd.read_table(z.open(i), header=None, sep="\t")
if sum(data.iloc[:, 0].str.find("会员类别") == 0) > 0:
data = data.iloc[:-6]
if len(data) < 12: # 处理没有活跃合约的情况
big_dict[file_name.split("_")[1]] = pd.DataFrame()
continue
temp_filter = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
if (
temp_filter[1] - temp_filter[0] < 5
): # 过滤有无成交量但是有买卖持仓的数据, 如 20201105_c2011_成交量_买持仓_卖持仓排名.txt
big_dict[file_name.split("_")[1]] = pd.DataFrame()
continue
start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
data = data.iloc[
start_list[0] :,
data.columns[data.iloc[start_list[0], :].notnull()],
]
data.reset_index(inplace=True, drop=True)
start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
end_list = data[
data.iloc[:, 0].str.contains(r"(?:总计|合计)", na=False)
].index.tolist()
part_one = data[start_list[0] : end_list[0]].iloc[1:, :]
part_two = data[start_list[1] : end_list[1]].iloc[1:, :]
part_three = data[start_list[2] : end_list[2]].iloc[1:, :]
temp_df = pd.concat(
objs=[
part_one.reset_index(drop=True),
part_two.reset_index(drop=True),
part_three.reset_index(drop=True),
],
axis=1,
ignore_index=True,
)
temp_df.columns = [
"名次",
"会员简称",
"成交量",
"增减",
"名次",
"会员简称",
"持买单量",
"增减",
"名次",
"会员简称",
"持卖单量",
"增减",
]
temp_df["rank"] = range(1, len(temp_df) + 1)
del temp_df["名次"]
temp_df.columns = [
"vol_party_name",
"vol",
"vol_chg",
"long_party_name",
"long_open_interest",
"long_open_interest_chg",
"short_party_name",
"short_open_interest",
"short_open_interest_chg",
"rank",
]
temp_df["symbol"] = file_name.split("_")[1].upper()
temp_df["variety"] = file_name.split("_")[1][:-4].upper()
temp_df = temp_df[
[
"long_open_interest",
"long_open_interest_chg",
"long_party_name",
"rank",
"short_open_interest",
"short_open_interest_chg",
"short_party_name",
"vol",
"vol_chg",
"vol_party_name",
"symbol",
"variety",
]
]
temp_df = temp_df.map(lambda x: str(x).replace(",", ""))
temp_df["long_open_interest"] = pd.to_numeric(
temp_df["long_open_interest"], errors="coerce"
)
temp_df["long_open_interest_chg"] = pd.to_numeric(
temp_df["long_open_interest_chg"], errors="coerce"
)
temp_df["rank"] = pd.to_numeric(temp_df["rank"], errors="coerce")
temp_df["short_open_interest"] = pd.to_numeric(
temp_df["short_open_interest"], errors="coerce"
)
temp_df["short_open_interest_chg"] = pd.to_numeric(
temp_df["short_open_interest_chg"], errors="coerce"
)
temp_df["vol"] = pd.to_numeric(temp_df["vol"], errors="coerce")
temp_df["vol_chg"] = pd.to_numeric(temp_df["vol_chg"], errors="coerce")
big_dict[file_name.split("_")[1]] = temp_df
except UnicodeDecodeError:
try:
data = pd.read_table(
z.open(i),
header=None,
sep="\\s+",
encoding="gb2312",
skiprows=3,
)
except: # noqa: E722
data = pd.read_table(
z.open(i),
header=None,
sep="\\s+",
encoding="gb2312",
skiprows=4,
)
start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist()
end_list = data[data.iloc[:, 0].str.find("总计") == 0].index.tolist()
part_one = data[start_list[0] : end_list[0]].iloc[1:, :]
part_two = data[start_list[1] : end_list[1]].iloc[1:, :]
part_three = data[start_list[2] : end_list[2]].iloc[1:, :]
temp_df = pd.concat(
objs=[
part_one.reset_index(drop=True),
part_two.reset_index(drop=True),
part_three.reset_index(drop=True),
],
axis=1,
ignore_index=True,
)
temp_df.columns = [
"名次",
"会员简称",
"成交量",
"增减",
"名次",
"会员简称",
"持买单量",
"增减",
"名次",
"会员简称",
"持卖单量",
"增减",
]
temp_df["rank"] = range(1, len(temp_df) + 1)
del temp_df["名次"]
temp_df.columns = [
"vol_party_name",
"vol",
"vol_chg",
"long_party_name",
"long_open_interest",
"long_open_interest_chg",
"short_party_name",
"short_open_interest",
"short_open_interest_chg",
"rank",
]
temp_df["symbol"] = file_name.split("_")[1].upper()
temp_df["variety"] = file_name.split("_")[1][:-4].upper()
temp_df = temp_df[
[
"long_open_interest",
"long_open_interest_chg",
"long_party_name",
"rank",
"short_open_interest",
"short_open_interest_chg",
"short_party_name",
"vol",
"vol_chg",
"vol_party_name",
"symbol",
"variety",
]
]
temp_df = temp_df.map(lambda x: str(x).replace(",", ""))
temp_df["long_open_interest"] = pd.to_numeric(
temp_df["long_open_interest"], errors="coerce"
)
temp_df["long_open_interest_chg"] = pd.to_numeric(
temp_df["long_open_interest_chg"], errors="coerce"
)
temp_df["rank"] = pd.to_numeric(temp_df["rank"], errors="coerce")
temp_df["short_open_interest"] = pd.to_numeric(
temp_df["short_open_interest"], errors="coerce"
)
temp_df["short_open_interest_chg"] = pd.to_numeric(
temp_df["short_open_interest_chg"], errors="coerce"
)
temp_df["vol"] = pd.to_numeric(temp_df["vol"], errors="coerce")
temp_df["vol_chg"] = pd.to_numeric(temp_df["vol_chg"], errors="coerce")
big_dict[file_name.split("_")[1]] = temp_df
dict_keys = list(big_dict.keys())
for item in dict_keys:
result = re.sub(r"\d", "", item)
if result.upper() not in vars_list:
del big_dict[item]
filtered_dict = {k: v for k, v in big_dict.items() if len(v) > 1}
return filtered_dict
def futures_dce_position_rank_other(date: str = "20160104"):
"""
大连商品交易所-每日持仓排名-具体合约-补充
http://www.dce.com.cn/dalianshangpin/xqsj/tjsj26/rtj/rcjccpm/index.html
:param date: 交易日
:type date: str
:return: 合约具体名称列表
:rtype: list
"""
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date.strftime("%Y%m%d") not in calendar:
warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
return {}
url = "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
payload = {
"memberDealPosiQuotes.variety": "c",
"memberDealPosiQuotes.trade_type": "0",
"year": date.year,
"month": date.month - 1,
"day": date.day,
"contract.contract_id": "all",
"contract.variety_id": "c",
"contract": "",
}
r = requests.post(url, data=payload)
soup = BeautifulSoup(r.text, features="lxml")
symbol_list = [
item["onclick"].strip("javascript:setVariety(").strip("');")
for item in soup.find_all(attrs={"class": "selBox"})[-3].find_all("input")
]
big_df = dict()
for symbol in symbol_list:
payload = {
"memberDealPosiQuotes.variety": symbol,
"memberDealPosiQuotes.trade_type": "0",
"year": date.year,
"month": date.month - 1,
"day": date.day,
"contract.contract_id": "all",
"contract.variety_id": symbol,
"contract": "",
}
r = requests.post(url, data=payload)
soup = BeautifulSoup(r.text, features="lxml")
contract_list = [
item["onclick"].strip("javascript:setContract_id('").strip("');")
for item in soup.find_all(attrs={"name": "contract"})
]
if contract_list:
if len(contract_list[0]) == 4:
contract_list = [symbol + item for item in contract_list]
for contract in contract_list:
payload = {
"memberDealPosiQuotes.variety": symbol,
"memberDealPosiQuotes.trade_type": "0",
"year": date.year,
"month": date.month - 1,
"day": date.day,
"contract.contract_id": contract,
"contract.variety_id": symbol,
"contract": "",
}
r = requests.post(url, data=payload)
temp_df = pd.read_html(StringIO(r.text))[1].iloc[:-1, :]
temp_df.columns = [
"rank",
"vol_party_name",
"vol",
"vol_chg",
"_",
"long_party_name",
"long_open_interest",
"long_open_interest_chg",
"_",
"short_party_name",
"short_open_interest",
"short_open_interest_chg",
]
temp_df["variety"] = symbol.upper()
temp_df["symbol"] = contract
temp_df = temp_df[
[
"long_open_interest",
"long_open_interest_chg",
"long_party_name",
"rank",
"short_open_interest",
"short_open_interest_chg",
"short_party_name",
"vol",
"vol_chg",
"vol_party_name",
"symbol",
"variety",
]
]
big_df[contract] = temp_df
return big_df
def __futures_gfex_vars_list() -> list:
"""
广州期货交易所-合约品种名称列表
http://www.gfex.com.cn/gfex/rcjccpm/hqsj_tjsj.shtml
:return: 合约品种名称列表
:rtype: list
"""
url = "http://www.gfex.com.cn/u/interfacesWebVariety/loadList"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/119.0.0.0 Safari/537.36"
}
r = requests.post(url=url, headers=headers)
data_json = r.json()
temp_df = pd.DataFrame(data_json["data"])
var_list = temp_df["varietyId"].tolist()
return var_list
def __futures_gfex_contract_list(symbol: str = "si", date: str = "20240729") -> list:
"""
广州期货交易所-合约具体名称列表
http://www.gfex.com.cn/gfex/rcjccpm/hqsj_tjsj.shtml
:param symbol: 品种
:type symbol: str
:param date: 交易日
:type date: str
:return: 合约具体名称列表
:rtype: list
"""
url = "http://www.gfex.com.cn/u/interfacesWebTiMemberDealPosiQuotes/loadListContract_id"
payload = {
"variety": symbol,
"trade_date": date,
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/119.0.0.0 Safari/537.36"
}
r = requests.post(url=url, data=payload, headers=headers)
data_json = r.json()
temp_df = pd.DataFrame(data_json["data"])
if temp_df.empty:
return []
contract_list = temp_df.iloc[:, 0].tolist()
return contract_list
def __futures_gfex_contract_data(
symbol: str = "si", contract_id: str = "si2312", date: str = "20231113"
) -> pd.DataFrame:
"""
广州期货交易所-合约具体数据
http://www.gfex.com.cn/gfex/rcjccpm/hqsj_tjsj.shtml
:param symbol: 品种
:type symbol: str
:param contract_id: 具体合约
:type contract_id: str
:param date: 交易日
:type date: str
:return: 合约具体数据
:rtype: pandas.DataFrame
"""
url = "http://www.gfex.com.cn/u/interfacesWebTiMemberDealPosiQuotes/loadList"
payload = {
"trade_date": date,
"trade_type": "0",
"variety": symbol,
"contract_id": contract_id,
"data_type": "1",
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/119.0.0.0 Safari/537.36"
}
big_df = pd.DataFrame()
for page in range(1, 4):
payload.update(
{
"data_type": page,
}
)
r = requests.post(url=url, data=payload, headers=headers)
data_json = r.json()
temp_df = pd.DataFrame(data_json["data"])
if "qtySub" in temp_df.columns:
temp_df.rename(
columns={
"abbr": "vol_party_name",
"todayQty": "vol",
"qtySub": "vol_chg",
},
inplace=True,
)
else:
temp_df.rename(
columns={
"abbr": "vol_party_name",
"todayQty": "vol",
"todayQtyChg": "vol_chg",
},
inplace=True,
)
temp_df = temp_df[["vol_party_name", "vol", "vol_chg"]]
big_df = pd.concat(objs=[big_df, temp_df], axis=1, ignore_index=True)
big_df.reset_index(inplace=True)
big_df["index"] = big_df["index"] + 1
big_df.columns = [
"rank",
"vol_party_name",
"vol",
"vol_chg",
"long_party_name",
"long_open_interest",
"long_open_interest_chg",
"short_party_name",
"short_open_interest",
"short_open_interest_chg",
]
big_df["symbol"] = contract_id.upper()
big_df["variety"] = symbol.upper()
big_df = big_df.iloc[:-1, :]
big_df["vol"] = pd.to_numeric(big_df["vol"], errors="coerce")
big_df["vol_chg"] = pd.to_numeric(big_df["vol_chg"], errors="coerce")
big_df["long_open_interest"] = pd.to_numeric(
big_df["long_open_interest"], errors="coerce"
)
big_df["long_open_interest_chg"] = pd.to_numeric(
big_df["long_open_interest_chg"], errors="coerce"
)
big_df["short_open_interest"] = pd.to_numeric(
big_df["short_open_interest"], errors="coerce"
)
big_df["short_open_interest_chg"] = pd.to_numeric(
big_df["short_open_interest_chg"], errors="coerce"
)
return big_df
def futures_gfex_position_rank(date: str = "20231113", vars_list: list = None):
"""
广州期货交易所-日成交持仓排名
http://www.gfex.com.cn/gfex/rcjccpm/hqsj_tjsj.shtml
:param date: 开始日期; 广州期货交易所的日成交持仓排名从 20231110 开始
:type date: str
:param vars_list: 商品代码列表
:type vars_list: list
:return: 日成交持仓排名
:rtype: pandas.DataFrame
"""
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date.strftime("%Y%m%d") not in calendar:
warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
return {}
date = date.strftime("%Y%m%d")
if vars_list is None:
vars_list = __futures_gfex_vars_list()
else:
vars_list = [item.lower() for item in vars_list]
big_dict = {}
for item in vars_list:
try:
futures_contract_list = __futures_gfex_contract_list(
symbol=item.lower(), date=date
)
except: # noqa: E722
return big_dict
for name in futures_contract_list:
try:
temp_df = __futures_gfex_contract_data(
symbol=item.lower(), contract_id=name, date=date
)
big_dict[name] = temp_df
except: # noqa: E722
return big_dict
return big_dict
if __name__ == "__main__":
# 郑州商品交易所
get_rank_table_czce_df = get_rank_table_czce(date="20230109")
print(get_rank_table_czce_df)
get_rank_table_czce_df = get_rank_table_czce(date="20201026")
print(get_rank_table_czce_df)
# 中国金融期货交易所
get_cffex_rank_table_df = get_cffex_rank_table(date="20250721")
print(get_cffex_rank_table_df)
# 上海期货交易所
get_shfe_rank_table_df = get_shfe_rank_table(date="20240509")
print(get_shfe_rank_table_df)
# 大连商品交易所-老接口
get_dce_rank_table_first_df = get_dce_rank_table(date="20131227")
print(get_dce_rank_table_first_df)
get_dce_rank_table_second_df = get_dce_rank_table(date="20171227")
print(get_dce_rank_table_second_df)
get_dce_rank_table_third_df = get_dce_rank_table(date="20200929")
print(get_dce_rank_table_third_df)
get_dce_rank_table_third_df = get_dce_rank_table(date="20230706")
print(get_dce_rank_table_third_df)
get_dce_rank_table_fourth_df = get_dce_rank_table(date="20210517", vars_list=["V"])
print(get_dce_rank_table_fourth_df)
# 大连商品交易所-新接口
futures_dce_detail_dict = futures_dce_position_rank(date="20240517")
print(futures_dce_detail_dict)
futures_dce_position_rank_other_df = futures_dce_position_rank_other(
date="20200727"
)
print(futures_dce_position_rank_other_df)
# 广州期货交易所
futures_gfex_position_rank_df = futures_gfex_position_rank(date="20250718")
print(futures_gfex_position_rank_df)
# 总接口
get_rank_sum_daily_df = get_rank_sum_daily(
start_day="20251031",
end_day="20251103",
)
print(get_rank_sum_daily_df)