Files
MoFin/venv/lib/python3.12/site-packages/akshare/futures/futures_basis.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

387 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2024/12/12 17:00
Desc: 生意社网站采集大宗商品现货价格及相应基差数据, 数据时间段从 20110104-至今
备注:现期差 = 现货价格 - 期货价格(这里的期货价格为结算价)
黄金为 元/克, 白银为 元/千克, 玻璃现货为 元/平方米, 鸡蛋现货为 元/公斤, 鸡蛋期货为 元/500千克, 其余为 元/吨.
焦炭现货规格是: 一级冶金焦; 焦炭期货规格: 介于一级和二级之间, 焦炭现期差仅供参考.
铁矿石现货价格是: 湿吨, 铁矿石期货价格是: 干吨
网页地址: https://www.100ppi.com/sf/
历史数据可以通过修改 url 地址来获取, 比如: https://www.100ppi.com/sf/day-2017-09-12.html
发现生意社的 bugs:
1. 2018-09-12 周三 数据缺失是因为生意社源数据在该交易日缺失: https://www.100ppi.com/sf/day-2018-09-12.html
"""
import datetime
import re
import time
import warnings
from typing import List
import pandas as pd
from akshare.futures import cons
from akshare.futures.requests_fun import pandas_read_html_link
from akshare.futures.symbol_var import chinese_to_english
calendar = cons.get_calendar()
def futures_spot_price_daily(
start_day: str = "20210201",
end_day: str = "20210208",
vars_list: list = cons.contract_symbols,
):
"""
指定时间段内大宗商品现货价格及相应基差
https://www.100ppi.com/sf/
:param start_day: str 开始日期 formatYYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 默认为当天
:param end_day: str 结束数据 formatYYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 默认为当天
:param vars_list: list 合约品种如 [RB, AL]; 默认参数为所有商品
:return: 基差
:rtype: pandas.DataFrame
展期收益率数据:
var 商品品种 string
sp 现货价格 float
near_symbol 临近交割合约 string
near_price 临近交割合约结算价 float
dom_symbol 主力合约 string
dom_price 主力合约结算价 float
near_basis 临近交割合约相对现货的基差 float
dom_basis 主力合约相对现货的基差 float
near_basis_rate 临近交割合约相对现货的基差率 float
dom_basis_rate 主力合约相对现货的基差率 float
date 日期 string YYYYMMDD
"""
start_day = (
cons.convert_date(start_day) if start_day is not None else datetime.date.today()
)
end_day = (
cons.convert_date(end_day)
if end_day is not None
else cons.convert_date(cons.get_latest_data_date(datetime.datetime.now()))
)
df_list = []
while start_day <= end_day:
temp_df = futures_spot_price(start_day, vars_list)
if temp_df is False:
return pd.concat(df_list).reset_index(drop=True)
elif temp_df is not None:
df_list.append(temp_df)
start_day += datetime.timedelta(days=1)
if len(df_list) > 0:
temp_df = pd.concat(df_list)
temp_df.reset_index(drop=True, inplace=True)
return temp_df
def futures_spot_price(
date: str = "20240430", vars_list: list = cons.contract_symbols
) -> pd.DataFrame:
"""
指定交易日大宗商品现货价格及相应基差
https://www.100ppi.com/sf/day-2017-09-12.html
:param date: 开始日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象; 为空时为当天
:param vars_list: 合约品种如 RB、AL 等列表 为空时为所有商品
:return: pandas.DataFrame
展期收益率数据:
var 商品品种 string
sp 现货价格 float
near_symbol 临近交割合约 string
near_price 临近交割合约结算价 float
dom_symbol 主力合约 string
dom_price 主力合约结算价 float
near_basis 临近交割合约相对现货的基差 float
dom_basis 主力合约相对现货的基差 float
near_basis_rate 临近交割合约相对现货的基差率 float
dom_basis_rate 主力合约相对现货的基差率 float
date 日期 string YYYYMMDD
"""
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date < datetime.date(2011, 1, 4):
raise Exception(
"数据源开始日期为 20110104, 请将获取数据时间点设置在 20110104 后"
)
if date.strftime("%Y%m%d") not in calendar:
warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
return pd.DataFrame()
u1 = "https://www.100ppi.com/sf/"
u2 = f"https://www.100ppi.com/sf/day-{date.strftime('%Y-%m-%d')}.html"
i = 1
while True:
for url in [u2, u1]:
try:
# url = u2
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
}
r = pandas_read_html_link(url, headers=headers)
string = r[0].loc[1, 1]
news = "".join(re.findall(r"[0-9]", string))
if news[3:11] == date.strftime("%Y%m%d"):
records = _check_information(r[1], date)
records.index = records["symbol"]
var_list_in_market = [i for i in vars_list if i in records.index]
temp_df = records.loc[var_list_in_market, :]
temp_df.reset_index(drop=True, inplace=True)
return temp_df
else:
time.sleep(3)
except Exception as e: # noqa: E722
print(
f"{date.strftime('%Y-%m-%d')}日生意社数据连接失败[错误信息:{e}],第{str(i)}次尝试,最多5次"
)
i += 1
if i > 5:
print(
f"{date.strftime('%Y-%m-%d')}日生意社数据连接失败, 如果当前交易日是 2018-09-12, "
f"由于生意社源数据缺失, 无法访问, 否则为重复访问已超过5次,您的地址被网站墙了,"
f"请保存好返回数据,稍后从该日期起重试"
)
return pd.DataFrame()
def _check_information(df_data, date):
"""
数据验证和计算模块
:param df_data: pandas.DataFrame 采集的数据
:param date: datetime.date 具体某一天 YYYYMMDD
:return: pandas.DataFrame
中间数据
symbol spot_price near_contract ... near_basis_rate dom_basis_rate date
CU 49620.00 cu1811 ... -0.002418 -0.003426 20181108
RB 4551.54 rb1811 ... -0.013521 -0.134359 20181108
ZN 22420.00 zn1811 ... -0.032114 -0.076271 20181108
AL 13900.00 al1812 ... 0.005396 0.003957 20181108
AU 274.10 au1811 ... 0.005655 0.020430 20181108
WR 4806.25 wr1903 ... -0.180026 -0.237035 20181108
RU 10438.89 ru1811 ... -0.020969 0.084406 20181108
PB 18600.00 pb1811 ... -0.001344 -0.010215 20181108
AG 3542.67 ag1811 ... -0.000754 0.009408 20181108
BU 4045.53 bu1811 ... -0.129904 -0.149679 20181108
HC 4043.33 hc1811 ... -0.035449 -0.088128 20...
"""
df_data = df_data.loc[:, [0, 1, 2, 3, 5, 6]]
df_data.columns = [
"symbol",
"spot_price",
"near_contract",
"near_contract_price",
"dominant_contract",
"dominant_contract_price",
]
records = pd.DataFrame()
for string in df_data["symbol"].tolist():
news = "".join(re.findall(r"[\u4e00-\u9fa5]", string))
if news == "":
news = string.strip()
"""
if string == "PTA":
news = "PTA"
else:
news = "".join(re.findall(r"[\u4e00-\u9fa5]", string))
"""
if news != "" and news not in [
"商品",
"价格",
"上海期货交易所",
"郑州商品交易所",
"大连商品交易所",
"广州期货交易所",
# 某些天网站没有数据,比如 20180912,此时返回"暂无数据",但并不是网站被墙了
"暂无数据",
]:
symbol = chinese_to_english(news)
record = pd.DataFrame(df_data[df_data["symbol"] == string])
record.loc[:, "symbol"] = symbol
record["spot_price"] = record["spot_price"].astype(float)
if (
symbol == "JD"
): # 鸡蛋现货为元/公斤, 鸡蛋期货为元/500千克, 其余元/吨(http://www.100ppi.com/sf/)
record.loc[:, "spot_price"] = float(record["spot_price"].iloc[0]) * 500
elif (
symbol == "FG"
): # 上表中现货单位为元/平方米, 期货单位为元/吨. 换算公式:元/平方米*80=元/吨(http://www.100ppi.com/sf/959.html)
record.loc[:, "spot_price"] = float(record["spot_price"].iloc[0]) * 80
elif (
symbol == "LH"
): # 上表中现货单位为元/公斤, 期货单位为元/吨. 换算公式:元/公斤*1000=元/吨(http://www.100ppi.com/sf/959.html)
record.loc[:, "spot_price"] = float(record["spot_price"].iloc[0]) * 1000
records = pd.concat([records, record])
# 20241129:如果某日没有数据,直接返回返回空表
if records.empty:
records = df_data.iloc[0:0]
records["near_basis"] = pd.Series(dtype="float")
records["dom_basis"] = pd.Series(dtype="float")
records["near_basis_rate"] = pd.Series(dtype="float")
records["dom_basis_rate"] = pd.Series(dtype="float")
records["date"] = pd.Series(dtype="object")
return records
records[["near_contract_price", "dominant_contract_price", "spot_price"]] = (
records[["near_contract_price", "dominant_contract_price", "spot_price"]
].astype("float")
)
records["near_contract"] = records["near_contract"].replace(
r"[^0-9]*(\d*)$", r"\g<1>", regex=True
)
records["dominant_contract"] = records["dominant_contract"].replace(
r"[^0-9]*(\d*)$", r"\g<1>", regex=True
)
records["near_month"] = records.loc[:, "near_contract"]
records["near_contract"] = records["symbol"] + records.loc[
:, "near_contract"
].astype("int").astype("str")
records["dominant_month"] = records.loc[:, "dominant_contract"]
records["dominant_contract"] = records["symbol"] + records.loc[
:, "dominant_contract"
].astype("int").astype("str")
records["near_contract"] = records["near_contract"].apply(
lambda x: (
x.lower()
if x[:-4]
in cons.market_exchange_symbols["shfe"]
+ cons.market_exchange_symbols["dce"]
else x
)
)
records["dominant_contract"] = records["dominant_contract"].apply(
lambda x: (
x.lower()
if x[:-4]
in cons.market_exchange_symbols["shfe"]
+ cons.market_exchange_symbols["dce"]
else x
)
)
records["near_contract"] = records["near_contract"].apply(
lambda x: (
x[:-4] + x[-3:] if x[:-4] in cons.market_exchange_symbols["czce"] else x
)
)
records["dominant_contract"] = records["dominant_contract"].apply(
lambda x: (
x[:-4] + x[-3:] if x[:-4] in cons.market_exchange_symbols["czce"] else x
)
)
records["near_basis"] = records["near_contract_price"] - records["spot_price"]
records["dom_basis"] = records["dominant_contract_price"] - records["spot_price"]
records["near_basis_rate"] = (
records["near_contract_price"] / records["spot_price"] - 1
)
records["dom_basis_rate"] = (
records["dominant_contract_price"] / records["spot_price"] - 1
)
# records.loc[:, "date"] = date.strftime("%Y%m%d")
records.insert(0, "date", date.strftime("%Y%m%d"))
records.reset_index(inplace=True, drop=True)
return records
def _join_head(content: pd.DataFrame) -> List:
headers = []
for s1, s2 in zip(content.iloc[0], content.iloc[1]):
if s1 != s2:
s = f"{s1}{s2}"
else:
s = s1
headers.append(s)
return headers
def futures_spot_price_previous(date: str = "20240430") -> pd.DataFrame:
"""
具体交易日大宗商品现货价格及相应基差
https://www.100ppi.com/sf/day-2017-09-12.html
:param date: 交易日; 历史日期
:type date: str
:return: 现货价格及相应基差
:rtype: pandas.DataFrame
"""
date = cons.convert_date(date) if date is not None else datetime.date.today()
if date < datetime.date(2011, 1, 4):
raise Exception(
"数据源开始日期为 20110104, 请将获取数据时间点设置在 20110104 后"
)
if date.strftime("%Y%m%d") not in calendar:
warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
return pd.DataFrame()
url = date.strftime("https://www.100ppi.com/sf2/day-%Y-%m-%d.html")
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,"
"image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
}
content = pandas_read_html_link(url, headers=headers)
main = content[1]
# Header
header = _join_head(main)
# Values
values = main[main[4].str.endswith("%")]
values.columns = header
# Basis
# 对于没有数据的天,xml文件中没有数据,所以content[2:-1]可能为空
if len(content[2:-1]) > 0:
basis = pd.concat(content[2:-1])
else:
basis = pd.DataFrame(columns=["主力合约基差", "主力合约基差(%)"])
basis.columns = ["主力合约基差", "主力合约基差(%)"]
# 20241125(jasonudu):因为部分日期,存在多个品种的现货价格,比如20151125的白糖、豆粕、豆油等,
# 如果用商品名来merge,会出现重复列名,所以改用index来merge
# basis["商品"] = values["商品"].tolist()
basis.index = values.index
basis = pd.merge(
values[["商品", "现货价格", "主力合约代码", "主力合约价格"]],
basis,
left_index=True,
right_index=True,
)
basis = pd.merge(
basis,
values[
[
"180日内主力基差最高",
"180日内主力基差最低",
"180日内主力基差平均",
]
],
left_index=True,
right_index=True,
)
basis.columns = [
"商品",
"现货价格",
"主力合约代码",
"主力合约价格",
"主力合约基差",
"主力合约变动百分比",
"180日内主力基差最高",
"180日内主力基差最低",
"180日内主力基差平均",
]
basis["主力合约变动百分比"] = basis["主力合约变动百分比"].str.strip("%")
basis.reset_index(inplace=True, drop=True)
return basis
if __name__ == "__main__":
futures_spot_price_daily_df = futures_spot_price_daily(
start_day="20260303", end_day="20260303", vars_list=['PL']
)
print(futures_spot_price_daily_df)
futures_spot_price_df = futures_spot_price(date="20260303")
print(futures_spot_price_df)
futures_spot_price_previous_df = futures_spot_price_previous(date="20240430")
print(futures_spot_price_previous_df)