fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
376 lines
12 KiB
Python
376 lines
12 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding:utf-8 -*-
|
|
"""
|
|
Date: 2023/12/12 16:30
|
|
Desc: 基金评级
|
|
https://fund.eastmoney.com/data/fundrating.html
|
|
"""
|
|
|
|
import pandas as pd
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def fund_rating_all() -> pd.DataFrame:
|
|
"""
|
|
天天基金网-基金评级-基金评级总汇
|
|
https://fund.eastmoney.com/data/fundrating.html
|
|
:return: 基金评级总汇
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://fund.eastmoney.com/data/fundrating.html"
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.text, features="lxml")
|
|
data_text = soup.find(name="div", attrs={"id": "fundinfo"}).find("script").string
|
|
data_content = [
|
|
item.split("|")
|
|
for item in data_text.split("var")[6]
|
|
.split("=")[1]
|
|
.strip()
|
|
.strip(";")
|
|
.strip('"')
|
|
.strip("|")
|
|
.split("|_")
|
|
]
|
|
temp_df = pd.DataFrame(data_content)
|
|
temp_df.columns = [
|
|
"代码",
|
|
"简称",
|
|
"类型",
|
|
"基金经理",
|
|
"-",
|
|
"基金公司",
|
|
"-",
|
|
"5星评级家数",
|
|
"-",
|
|
"-",
|
|
"招商证券",
|
|
"-",
|
|
"上海证券",
|
|
"-",
|
|
"晨星评级",
|
|
"-",
|
|
"济安金信",
|
|
"-",
|
|
"手续费",
|
|
"-",
|
|
"-",
|
|
"-",
|
|
"-",
|
|
"-",
|
|
"-",
|
|
"-",
|
|
"-",
|
|
]
|
|
temp_df = temp_df[
|
|
[
|
|
"代码",
|
|
"简称",
|
|
"基金经理",
|
|
"基金公司",
|
|
"5星评级家数",
|
|
"上海证券",
|
|
"招商证券",
|
|
"济安金信",
|
|
"晨星评级",
|
|
"手续费",
|
|
"类型",
|
|
]
|
|
]
|
|
temp_df["5星评级家数"] = pd.to_numeric(temp_df["5星评级家数"], errors="coerce")
|
|
temp_df["上海证券"] = pd.to_numeric(temp_df["上海证券"], errors="coerce")
|
|
temp_df["招商证券"] = pd.to_numeric(temp_df["招商证券"], errors="coerce")
|
|
temp_df["济安金信"] = pd.to_numeric(temp_df["济安金信"], errors="coerce")
|
|
temp_df["晨星评级"] = pd.to_numeric(temp_df["晨星评级"], errors="coerce")
|
|
temp_df["手续费"] = (
|
|
pd.to_numeric(temp_df["手续费"].str.strip("%"), errors="coerce") / 100
|
|
)
|
|
return temp_df
|
|
|
|
|
|
def fund_rating_sh(date: str = "20230630") -> pd.DataFrame:
|
|
"""
|
|
天天基金网-基金评级-上海证券评级
|
|
https://fund.eastmoney.com/data/fundrating_3.html
|
|
:param date: 日期; https://fund.eastmoney.com/data/fundrating_3.html 获取查询日期
|
|
:type date: str
|
|
:return: 上海证券评级
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://fund.eastmoney.com/data/fundrating_3.html"
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
date_list = [
|
|
item["value"] for item in soup.find("select", attrs={"id": "rqoptions"})
|
|
]
|
|
date_format = "-".join([date[:4], date[4:6], date[6:]])
|
|
if date_format not in date_list:
|
|
raise "请访问 https://fund.eastmoney.com/data/fundrating_3.html 获取查询日期"
|
|
url = f"https://fund.eastmoney.com/data/fundrating_3_{'-'.join([date[:4], date[4:6], date[6:]])}.html"
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
data_text = soup.find("div", attrs={"id": "fundinfo"}).find("script").string
|
|
data_content = [
|
|
item.split("|")
|
|
for item in data_text.split("var")[1]
|
|
.split("=")[1]
|
|
.strip()
|
|
.strip(";")
|
|
.strip('"')
|
|
.strip("|")
|
|
.split("|_")
|
|
]
|
|
temp_df = pd.DataFrame(data_content)
|
|
temp_df.columns = [
|
|
"代码",
|
|
"简称",
|
|
"类型",
|
|
"基金经理",
|
|
"_",
|
|
"基金公司",
|
|
"_",
|
|
"3年期评级-3年评级",
|
|
"3年期评级-较上期",
|
|
"5年期评级-5年评级",
|
|
"5年期评级-较上期",
|
|
"单位净值",
|
|
"日期",
|
|
"日增长率",
|
|
"近1年涨幅",
|
|
"近3年涨幅",
|
|
"近5年涨幅",
|
|
"手续费",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
]
|
|
temp_df = temp_df[
|
|
[
|
|
"代码",
|
|
"简称",
|
|
"基金经理",
|
|
"基金公司",
|
|
"3年期评级-3年评级",
|
|
"3年期评级-较上期",
|
|
"5年期评级-5年评级",
|
|
"5年期评级-较上期",
|
|
"单位净值",
|
|
"日期",
|
|
"日增长率",
|
|
"近1年涨幅",
|
|
"近3年涨幅",
|
|
"近5年涨幅",
|
|
"手续费",
|
|
"类型",
|
|
]
|
|
]
|
|
temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
|
|
temp_df["3年期评级-3年评级"] = pd.to_numeric(
|
|
temp_df["3年期评级-3年评级"], errors="coerce"
|
|
)
|
|
temp_df["3年期评级-较上期"] = pd.to_numeric(
|
|
temp_df["3年期评级-较上期"], errors="coerce"
|
|
)
|
|
temp_df["5年期评级-5年评级"] = pd.to_numeric(
|
|
temp_df["5年期评级-5年评级"], errors="coerce"
|
|
)
|
|
temp_df["5年期评级-较上期"] = pd.to_numeric(
|
|
temp_df["5年期评级-较上期"], errors="coerce"
|
|
)
|
|
temp_df["单位净值"] = pd.to_numeric(temp_df["单位净值"], errors="coerce")
|
|
temp_df["日增长率"] = pd.to_numeric(temp_df["日增长率"], errors="coerce")
|
|
temp_df["近1年涨幅"] = pd.to_numeric(temp_df["近1年涨幅"], errors="coerce")
|
|
temp_df["近3年涨幅"] = pd.to_numeric(temp_df["近3年涨幅"], errors="coerce")
|
|
temp_df["近5年涨幅"] = pd.to_numeric(temp_df["近5年涨幅"], errors="coerce")
|
|
return temp_df
|
|
|
|
|
|
def fund_rating_zs(date: str = "20230331") -> pd.DataFrame:
|
|
"""
|
|
天天基金网-基金评级-招商证券评级
|
|
https://fund.eastmoney.com/data/fundrating_2.html
|
|
:param date: 日期; https://fund.eastmoney.com/data/fundrating_2.html 获取查询日期
|
|
:type date: str
|
|
:return: 招商证券评级-混合型
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://fund.eastmoney.com/data/fundrating_2.html"
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
date_list = [
|
|
item["value"] for item in soup.find("select", attrs={"id": "rqoptions"})
|
|
]
|
|
date_format = "-".join([date[:4], date[4:6], date[6:]])
|
|
if date_format not in date_list:
|
|
raise "请访问 https://fund.eastmoney.com/data/fundrating_2.html 获取查询日期"
|
|
url = f"https://fund.eastmoney.com/data/fundrating_2_{'-'.join([date[:4], date[4:6], date[6:]])}.html"
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
data_text = soup.find("div", attrs={"id": "fundinfo"}).find("script").string
|
|
data_content = [
|
|
item.split("|")
|
|
for item in data_text.split("var")[1]
|
|
.split("=")[1]
|
|
.strip()
|
|
.strip(";")
|
|
.strip('"')
|
|
.strip("|")
|
|
.split("|_")
|
|
]
|
|
temp_df = pd.DataFrame(data_content)
|
|
temp_df.columns = [
|
|
"代码",
|
|
"简称",
|
|
"_",
|
|
"基金经理",
|
|
"_",
|
|
"基金公司",
|
|
"_",
|
|
"3年期评级-3年评级",
|
|
"3年期评级-较上期",
|
|
"单位净值",
|
|
"日期",
|
|
"日增长率",
|
|
"近1年涨幅",
|
|
"近3年涨幅",
|
|
"近5年涨幅",
|
|
"手续费",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
]
|
|
temp_df = temp_df[
|
|
[
|
|
"代码",
|
|
"简称",
|
|
"基金经理",
|
|
"基金公司",
|
|
"3年期评级-3年评级",
|
|
"3年期评级-较上期",
|
|
"单位净值",
|
|
"日期",
|
|
"日增长率",
|
|
"近1年涨幅",
|
|
"近3年涨幅",
|
|
"近5年涨幅",
|
|
"手续费",
|
|
]
|
|
]
|
|
temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
|
|
temp_df["3年期评级-3年评级"] = pd.to_numeric(
|
|
temp_df["3年期评级-3年评级"], errors="coerce"
|
|
)
|
|
temp_df["3年期评级-较上期"] = pd.to_numeric(
|
|
temp_df["3年期评级-较上期"], errors="coerce"
|
|
)
|
|
temp_df["单位净值"] = pd.to_numeric(temp_df["单位净值"], errors="coerce")
|
|
temp_df["日增长率"] = pd.to_numeric(temp_df["日增长率"], errors="coerce")
|
|
temp_df["近1年涨幅"] = pd.to_numeric(temp_df["近1年涨幅"], errors="coerce")
|
|
temp_df["近3年涨幅"] = pd.to_numeric(temp_df["近3年涨幅"], errors="coerce")
|
|
temp_df["近5年涨幅"] = pd.to_numeric(temp_df["近5年涨幅"], errors="coerce")
|
|
return temp_df
|
|
|
|
|
|
def fund_rating_ja(date: str = "20230331") -> pd.DataFrame:
|
|
"""
|
|
天天基金网-基金评级-济安金信评级
|
|
https://fund.eastmoney.com/data/fundrating_4.html
|
|
:param date: 日期; https://fund.eastmoney.com/data/fundrating_4.html 获取查询日期
|
|
:type date: str
|
|
:return: 济安金信评级
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://fund.eastmoney.com/data/fundrating_4.html"
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
date_list = [
|
|
item["value"] for item in soup.find("select", attrs={"id": "rqoptions"})
|
|
]
|
|
date_format = "-".join([date[:4], date[4:6], date[6:]])
|
|
if date_format not in date_list:
|
|
raise "请访问 http://fund.eastmoney.com/data/fundrating_4.html 获取查询日期"
|
|
url = f"https://fund.eastmoney.com/data/fundrating_4_{'-'.join([date[:4], date[4:6], date[6:]])}.html"
|
|
r = requests.get(url)
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
data_text = soup.find("div", attrs={"id": "fundinfo"}).find("script").string
|
|
data_content = [
|
|
item.split("|")
|
|
for item in data_text.split("var")[1]
|
|
.split("=")[1]
|
|
.strip()
|
|
.strip(";")
|
|
.strip('"')
|
|
.strip("|")
|
|
.split("|_")
|
|
]
|
|
temp_df = pd.DataFrame(data_content)
|
|
temp_df.columns = [
|
|
"代码",
|
|
"简称",
|
|
"类型",
|
|
"基金经理",
|
|
"_",
|
|
"基金公司",
|
|
"_",
|
|
"3年期评级-3年评级",
|
|
"3年期评级-较上期",
|
|
"单位净值",
|
|
"日期",
|
|
"日增长率",
|
|
"近1年涨幅",
|
|
"近3年涨幅",
|
|
"近5年涨幅",
|
|
"手续费",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
]
|
|
temp_df = temp_df[
|
|
[
|
|
"代码",
|
|
"简称",
|
|
"基金经理",
|
|
"基金公司",
|
|
"3年期评级-3年评级",
|
|
"3年期评级-较上期",
|
|
"单位净值",
|
|
"日期",
|
|
"日增长率",
|
|
"近1年涨幅",
|
|
"近3年涨幅",
|
|
"近5年涨幅",
|
|
"手续费",
|
|
"类型",
|
|
]
|
|
]
|
|
temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
|
|
temp_df["3年期评级-3年评级"] = pd.to_numeric(
|
|
temp_df["3年期评级-3年评级"], errors="coerce"
|
|
)
|
|
temp_df["3年期评级-较上期"] = pd.to_numeric(
|
|
temp_df["3年期评级-较上期"], errors="coerce"
|
|
)
|
|
temp_df["单位净值"] = pd.to_numeric(temp_df["单位净值"], errors="coerce")
|
|
temp_df["日增长率"] = pd.to_numeric(temp_df["日增长率"], errors="coerce")
|
|
temp_df["近1年涨幅"] = pd.to_numeric(temp_df["近1年涨幅"], errors="coerce")
|
|
temp_df["近3年涨幅"] = pd.to_numeric(temp_df["近3年涨幅"], errors="coerce")
|
|
temp_df["近5年涨幅"] = pd.to_numeric(temp_df["近5年涨幅"], errors="coerce")
|
|
return temp_df
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fund_rating_all_df = fund_rating_all()
|
|
print(fund_rating_all_df)
|
|
|
|
fund_rating_sh_df = fund_rating_sh(date="20230630")
|
|
print(fund_rating_sh_df)
|
|
|
|
fund_rating_zs_df = fund_rating_zs(date="20230331")
|
|
print(fund_rating_zs_df)
|
|
|
|
fund_rating_ja_df = fund_rating_ja(date="20230331")
|
|
print(fund_rating_ja_df)
|