Files
MoFin/venv/lib/python3.12/site-packages/akshare/cal/rv.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

174 lines
6.4 KiB
Python

"""
Yang-Zhang-s-Realized-Volatility-Automated-Estimation-in-Python
https://github.com/hugogobato/Yang-Zhang-s-Realized-Volatility-Automated-Estimation-in-Python
论文地址:https://www.jstor.org/stable/10.1086/209650
"""
import warnings
import numpy as np
import pandas as pd
def rv_from_stock_zh_a_hist_min_em(
symbol="000001",
start_date="2021-10-20 09:30:00",
end_date="2024-11-01 15:00:00",
period="1",
adjust="hfq",
) -> pd.DataFrame:
"""
从东方财富网获取股票的分钟级历史行情数据,并进行数据清洗和格式化为计算 yz 已实现波动率所需的数据格式
https://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码,如"000001"
:type symbol: str
:param start_date: 开始日期时间,格式"YYYY-MM-DD HH:MM:SS"
:type start_date: str
:param end_date: 结束日期时间,格式"YYYY-MM-DD HH:MM:SS"
:type end_date: str
:param period: 时间周期,可选{'1','5','15','30','60'}分钟
:type period: str
:param adjust: 复权方式,可选{'','qfq'(前复权),'hfq'(后复权)}
:type adjust: str
:return: 整理后的分钟行情数据,包含Date(索引),Open,High,Low,Close列
:rtype: pandas.DataFrame
"""
from akshare.stock_feature.stock_hist_em import stock_zh_a_hist_min_em
temp_df = stock_zh_a_hist_min_em(
symbol=symbol,
start_date=start_date,
end_date=end_date,
period=period,
adjust=adjust,
)
temp_df.rename(
columns={
"时间": "Date",
"开盘": "Open",
"最高": "High",
"最低": "Low",
"收盘": "Close",
},
inplace=True,
)
temp_df = temp_df[temp_df["Open"] != 0]
temp_df["Date"] = pd.to_datetime(temp_df["Date"])
temp_df.set_index(keys="Date", inplace=True)
return temp_df
def rv_from_futures_zh_minute_sina(
symbol: str = "IF2008", period: str = "5"
) -> pd.DataFrame:
"""
从新浪财经获取期货的分钟级历史行情数据,并进行数据清洗和格式化
https://vip.stock.finance.sina.com.cn/quotes_service/view/qihuohangqing.html#titlePos_3
:param symbol: 期货合约代码,如"IF2008"代表沪深300期货2020年8月合约
:type symbol: str
:param period: 时间周期,可选{'1','5','15','30','60'}分钟
:type period: str
:return: 整理后的分钟行情数据,包含Date(索引),Open,High,Low,Close列
:rtype: pandas.DataFrame
"""
from akshare.futures.futures_zh_sina import futures_zh_minute_sina
temp_df = futures_zh_minute_sina(symbol=symbol, period=period)
temp_df.rename(
columns={
"datetime": "Date",
"open": "Open",
"high": "High",
"low": "Low",
"close": "Close",
},
inplace=True,
)
temp_df["Date"] = pd.to_datetime(temp_df["Date"])
temp_df.set_index(keys="Date", inplace=True)
return temp_df
def volatility_yz_rv(data: pd.DataFrame) -> pd.DataFrame:
(
"""
波动率-已实现波动率-Yang-Zhang 已实现波动率(Yang-Zhang Realized Volatility)
https://github.com/hugogobato/Yang-Zhang-s-Realized-Volatility-Automated-Estimation-in-Python
论文地址:https://www.jstor.org/stable/10.1086/209650
基于以下公式计算:
RV^2 = Vo + k*Vc + (1-k)*Vrs
其中:
- Vo: 隔夜波动率, Vo = 1/(n-1)*sum(Oi-Obar)^2
Oi为标准化开盘价, Obar为标准化开盘价均值
- Vc: 收盘波动率, Vc = 1/(n-1)*sum(ci-Cbar)^2
ci为标准化收盘价, Cbar为标准化收盘价均值
- k: 权重系数, k = 0.34/(1.34+(n+1)/(n-1))
n为样本数量
- Vrs: Rogers-Satchell波动率代理, Vrs = ui(ui-ci)+di(di-ci)
ui = ln(Hi/Oi), ci = ln(Ci/Oi), di = ln(Li/Oi), oi = ln(Oi/Ci-1)
Hi/Li/Ci/Oi分别为最高价/最低价/收盘价/开盘价
:param data: 包含 OHLC(开高低收) 价格的 pandas.DataFrame
:type data: pandas.DataFrame
:return: 包含 Yang-Zhang 实现波动率的 pandas.DataFrame
:rtype: pandas.DataFrame
要求输入数据包含以下列:
- Open: 开盘价
- High: 最高价
- Low: 最低价
- Close: 收盘价
# yang_zhang_rv formula is give as:
# RV^2 = Vo + k*Vc + (1-k)*Vrs
# where Vo = 1/(n-1)*sum(Oi-Obar)^2
# with oi = normalized opening price at time t and Obar = mean of normalized opening prices
# Vc = = 1/(n-1)*sum(ci-Cbar)^2
# with ci = normalized close price at time t and Cbar = mean of normalized close prices
# k = 0.34/(1.34+(n+1)/(n-1))
# with n = total number of days or time periods considered
# Vrs (Rogers & Satchell RV proxy) = ui(ui-ci)+di(di-ci)
# with ui = ln(Hi/Oi), ci = ln(Ci/Oi), di=(Li/Oi), oi = ln(Oi/Ci-1)
# where Hi = high price at time t and Li = low price at time t
"""
""
)
warnings.filterwarnings("ignore")
data["ui"] = np.log(np.divide(data["High"][1:], data["Open"][1:]))
data["ci"] = np.log(np.divide(data["Close"][1:], data["Open"][1:]))
data["di"] = np.log(np.divide(data["Low"][1:], data["Open"][1:]))
data["oi"] = np.log(np.divide(data["Open"][1:], data["Close"][: len(data) - 1]))
data = data[1:]
data["RS"] = data["ui"] * (data["ui"] - data["ci"]) + data["di"] * (
data["di"] - data["ci"]
)
rs_var = data["RS"].groupby(pd.Grouper(freq="1D")).mean().dropna()
vc_and_vo = data[["oi", "ci"]].groupby(pd.Grouper(freq="1D")).var().dropna()
n = int(len(data) / len(rs_var))
k = 0.34 / (1.34 + (n + 1) / (n - 1))
yang_zhang_rv = np.sqrt((1 - k) * rs_var + vc_and_vo["oi"] + vc_and_vo["ci"] * k)
yang_zhang_rv_df = pd.DataFrame(yang_zhang_rv)
yang_zhang_rv_df.rename(columns={0: "yz_rv"}, inplace=True)
yang_zhang_rv_df.reset_index(inplace=True)
yang_zhang_rv_df.columns = ["date", "rv"]
yang_zhang_rv_df["date"] = pd.to_datetime(
yang_zhang_rv_df["date"], errors="coerce"
).dt.date
return yang_zhang_rv_df
if __name__ == "__main__":
futures_df = rv_from_futures_zh_minute_sina(symbol="IF2008", period="1")
volatility_yz_rv_df = volatility_yz_rv(data=futures_df)
print(volatility_yz_rv_df)
stock_df = rv_from_stock_zh_a_hist_min_em(
symbol="000001",
start_date="2021-10-20 09:30:00",
end_date="2024-11-01 15:00:00",
period="5",
adjust="",
)
volatility_yz_rv_df = volatility_yz_rv(data=stock_df)
print(volatility_yz_rv_df)