Files
MoFin/venv/lib/python3.12/site-packages/akshare/article/ff_factor.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

167 lines
4.5 KiB
Python

#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2024/1/20 22:30
Desc: FF-data-library
https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
"""
from io import StringIO
import pandas as pd
import requests
from akshare.article.cons import ff_home_url
def article_ff_crr() -> pd.DataFrame:
"""
FF多因子模型
https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
:return: FF多因子模型单一表格
:rtype: pandas.DataFrame
"""
res = requests.get(ff_home_url)
# first table
list_index = (
pd.read_html(StringIO(res.text), header=0, index_col=0)[4]
.iloc[2, :]
.index.tolist()
)
list_0 = [
item
for item in pd.read_html(StringIO(res.text), header=0, index_col=0)[4]
.iloc[0, :]
.iloc[0]
.split(" ")
if item != ""
]
list_1 = [
item
for item in pd.read_html(StringIO(res.text), header=0, index_col=0)[4]
.iloc[0, :]
.iloc[1]
.split(" ")
if item != ""
]
list_2 = [
item
for item in pd.read_html(StringIO(res.text), header=0, index_col=0)[4]
.iloc[0, :]
.iloc[2]
.split(" ")
if item != ""
]
list_0.insert(0, "-")
list_1.insert(0, "-")
list_2.insert(0, "-")
temp_columns = (
pd.read_html(StringIO(res.text), header=0)[4]
.iloc[:, 0]
.str.split(" ", expand=True)
.T[0]
.dropna()
.tolist()
)
table_one = pd.DataFrame(
[list_0, list_1, list_2], index=list_index, columns=temp_columns
).T
# second table
list_index = (
pd.read_html(StringIO(res.text), header=0, index_col=0)[4]
.iloc[1, :]
.index.tolist()
)
list_0 = [
item
for item in pd.read_html(StringIO(res.text), header=0, index_col=0)[4]
.iloc[1, :]
.iloc[0]
.split(" ")
if item != ""
]
list_1 = [
item
for item in pd.read_html(StringIO(res.text), header=0, index_col=0)[4]
.iloc[1, :]
.iloc[1]
.split(" ")
if item != ""
]
list_2 = [
item
for item in pd.read_html(StringIO(res.text), header=0, index_col=0)[4]
.iloc[1, :]
.iloc[2]
.split(" ")
if item != ""
]
list_0.insert(0, "-")
list_1.insert(0, "-")
list_2.insert(0, "-")
temp_columns = (
pd.read_html(StringIO(res.text), header=0)[4]
.iloc[:, 0]
.str.split(" ", expand=True)
.T[1]
.dropna()
.tolist()
)
table_two = pd.DataFrame(
[list_0, list_1, list_2], index=list_index, columns=temp_columns
).T
# third table
df = pd.read_html(StringIO(res.text), header=0, index_col=0)[4].iloc[2, :]
name_list = (
pd.read_html(StringIO(res.text), header=0)[4]
.iloc[:, 0]
.str.split(r" ", expand=True)
.iloc[2, :]
.tolist()
)
value_list_0 = df.iloc[0].split(" ")
value_list_0.insert(0, "-")
value_list_0.insert(1, "-")
value_list_0.insert(8, "-")
value_list_0.insert(15, "-")
value_list_1 = df.iloc[1].split(" ")
value_list_1.insert(0, "-")
value_list_1.insert(1, "-")
value_list_1.insert(8, "-")
value_list_1.insert(15, "-")
value_list_2 = df.iloc[2].split(" ")
value_list_2.insert(0, "-")
value_list_2.insert(1, "-")
value_list_2.insert(8, "-")
value_list_2.insert(15, "-")
name_list.remove("Small Growth Big Value")
name_list.insert(5, "Small Growth")
name_list.insert(6, "Big Value")
temp_list = [item for item in name_list if "Portfolios" not in item]
temp_list.insert(0, "Fama/French Research Portfolios")
temp_list.insert(1, "Size and Book-to-Market Portfolios")
temp_list.insert(8, "Size and Operating Profitability Portfolios")
temp_list.insert(15, "Size and Investment Portfolios")
temp_df = pd.DataFrame([temp_list, value_list_0, value_list_1, value_list_2]).T
temp_df.index = temp_df.iloc[:, 0]
temp_df = temp_df.iloc[:, 1:]
# concat
all_df = pd.DataFrame()
all_df = pd.concat([all_df, table_one])
all_df = pd.concat([all_df, table_two])
temp_df.columns = table_two.columns
all_df = pd.concat([all_df, temp_df])
all_df.reset_index(inplace=True)
all_df.rename(columns={"index": "item"}, inplace=True)
return all_df
if __name__ == "__main__":
article_ff_crr_df = article_ff_crr()
print(article_ff_crr_df)