fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
279 lines
10 KiB
Python
279 lines
10 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding:utf-8 -*-
|
|
"""
|
|
Date: 2024/1/24 15:00
|
|
Desc: 申万宏源研究-申万指数-指数发布
|
|
乐咕乐股网
|
|
https://legulegu.com/stockdata/index-composition?industryCode=851921.SI
|
|
"""
|
|
|
|
from io import StringIO
|
|
|
|
import pandas as pd
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
from akshare.utils.cons import headers
|
|
|
|
|
|
def sw_index_first_info() -> pd.DataFrame:
|
|
"""
|
|
乐咕乐股-申万一级-分类
|
|
https://legulegu.com/stockdata/sw-industry-overview#level1
|
|
:return: 分类
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://legulegu.com/stockdata/sw-industry-overview"
|
|
r = requests.get(url, headers=headers)
|
|
soup = BeautifulSoup(r.text, features="lxml")
|
|
code_raw = soup.find(name="div", attrs={"id": "level1Items"}).find_all(
|
|
name="div", attrs={"class": "lg-industries-item-chinese-title"}
|
|
)
|
|
name_raw = soup.find(name="div", attrs={"id": "level1Items"}).find_all(
|
|
name="div", attrs={"class": "lg-industries-item-number"}
|
|
)
|
|
value_raw = soup.find(name="div", attrs={"id": "level1Items"}).find_all(
|
|
name="div", attrs={"class": "lg-sw-industries-item-value"}
|
|
)
|
|
code = [item.get_text() for item in code_raw]
|
|
name = [item.get_text().split("(")[0] for item in name_raw]
|
|
num = [item.get_text().split("(")[1].split(")")[0] for item in name_raw]
|
|
num_1 = [
|
|
item.find_all("span", attrs={"class": "value"})[0].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_2 = [
|
|
item.find_all("span", attrs={"class": "value"})[1].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_3 = [
|
|
item.find_all("span", attrs={"class": "value"})[2].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_4 = [
|
|
item.find_all("span", attrs={"class": "value"})[3].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
temp_df = pd.DataFrame([code, name, num, num_1, num_2, num_3, num_4]).T
|
|
temp_df.columns = [
|
|
"行业代码",
|
|
"行业名称",
|
|
"成份个数",
|
|
"静态市盈率",
|
|
"TTM(滚动)市盈率",
|
|
"市净率",
|
|
"静态股息率",
|
|
]
|
|
temp_df["成份个数"] = pd.to_numeric(temp_df["成份个数"], errors="coerce")
|
|
temp_df["静态市盈率"] = pd.to_numeric(temp_df["静态市盈率"], errors="coerce")
|
|
temp_df["TTM(滚动)市盈率"] = pd.to_numeric(
|
|
temp_df["TTM(滚动)市盈率"], errors="coerce"
|
|
)
|
|
temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
|
|
temp_df["静态股息率"] = pd.to_numeric(temp_df["静态股息率"], errors="coerce")
|
|
return temp_df
|
|
|
|
|
|
def sw_index_second_info() -> pd.DataFrame:
|
|
"""
|
|
乐咕乐股-申万二级-分类
|
|
https://legulegu.com/stockdata/sw-industry-overview#level1
|
|
:return: 分类
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://legulegu.com/stockdata/sw-industry-overview"
|
|
r = requests.get(url, headers=headers)
|
|
soup = BeautifulSoup(r.text, features="lxml")
|
|
code_raw = soup.find(name="div", attrs={"id": "level2Items"}).find_all(
|
|
name="div", attrs={"class": "lg-industries-item-chinese-title"}
|
|
)
|
|
name_raw = soup.find(name="div", attrs={"id": "level2Items"}).find_all(
|
|
name="div", attrs={"class": "lg-industries-item-number"}
|
|
)
|
|
value_raw = soup.find(name="div", attrs={"id": "level2Items"}).find_all(
|
|
name="div", attrs={"class": "lg-sw-industries-item-value"}
|
|
)
|
|
code = [item.get_text() for item in code_raw]
|
|
name = [item.get_text().split("(")[0] for item in name_raw]
|
|
parent_name = [
|
|
item.find("span").get_text().split("(")[0][1:-1] for item in name_raw
|
|
]
|
|
num = [item.get_text().split("(")[1].split(")")[0] for item in name_raw]
|
|
num_1 = [
|
|
item.find_all("span", attrs={"class": "value"})[0].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_2 = [
|
|
item.find_all("span", attrs={"class": "value"})[1].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_3 = [
|
|
item.find_all("span", attrs={"class": "value"})[2].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_4 = [
|
|
item.find_all("span", attrs={"class": "value"})[3].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
temp_df = pd.DataFrame([code, name, parent_name, num, num_1, num_2, num_3, num_4]).T
|
|
temp_df.columns = [
|
|
"行业代码",
|
|
"行业名称",
|
|
"上级行业",
|
|
"成份个数",
|
|
"静态市盈率",
|
|
"TTM(滚动)市盈率",
|
|
"市净率",
|
|
"静态股息率",
|
|
]
|
|
temp_df["成份个数"] = pd.to_numeric(temp_df["成份个数"], errors="coerce")
|
|
temp_df["静态市盈率"] = pd.to_numeric(temp_df["静态市盈率"], errors="coerce")
|
|
temp_df["TTM(滚动)市盈率"] = pd.to_numeric(
|
|
temp_df["TTM(滚动)市盈率"], errors="coerce"
|
|
)
|
|
temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
|
|
temp_df["静态股息率"] = pd.to_numeric(temp_df["静态股息率"], errors="coerce")
|
|
return temp_df
|
|
|
|
|
|
def sw_index_third_info() -> pd.DataFrame:
|
|
"""
|
|
乐咕乐股-申万三级-分类
|
|
https://legulegu.com/stockdata/sw-industry-overview#level1
|
|
:return: 分类
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://legulegu.com/stockdata/sw-industry-overview"
|
|
r = requests.get(url, headers=headers)
|
|
soup = BeautifulSoup(r.text, features="lxml")
|
|
code_raw = soup.find(name="div", attrs={"id": "level3Items"}).find_all(
|
|
name="div", attrs={"class": "lg-industries-item-chinese-title"}
|
|
)
|
|
name_raw = soup.find(name="div", attrs={"id": "level3Items"}).find_all(
|
|
name="div", attrs={"class": "lg-industries-item-number"}
|
|
)
|
|
value_raw = soup.find(name="div", attrs={"id": "level3Items"}).find_all(
|
|
name="div", attrs={"class": "lg-sw-industries-item-value"}
|
|
)
|
|
code = [item.get_text() for item in code_raw]
|
|
name = [item.get_text().split("(")[0] for item in name_raw]
|
|
parent_name = [
|
|
item.find("span").get_text().split("(")[0][1:-1] for item in name_raw
|
|
]
|
|
num = [item.get_text().split("(")[1].split(")")[0] for item in name_raw]
|
|
num_1 = [
|
|
item.find_all("span", attrs={"class": "value"})[0].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_2 = [
|
|
item.find_all("span", attrs={"class": "value"})[1].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_3 = [
|
|
item.find_all("span", attrs={"class": "value"})[2].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
num_4 = [
|
|
item.find_all("span", attrs={"class": "value"})[3].get_text().strip()
|
|
for item in value_raw
|
|
]
|
|
temp_df = pd.DataFrame([code, name, parent_name, num, num_1, num_2, num_3, num_4]).T
|
|
temp_df.columns = [
|
|
"行业代码",
|
|
"行业名称",
|
|
"上级行业",
|
|
"成份个数",
|
|
"静态市盈率",
|
|
"TTM(滚动)市盈率",
|
|
"市净率",
|
|
"静态股息率",
|
|
]
|
|
temp_df["成份个数"] = pd.to_numeric(temp_df["成份个数"], errors="coerce")
|
|
temp_df["静态市盈率"] = pd.to_numeric(temp_df["静态市盈率"], errors="coerce")
|
|
temp_df["TTM(滚动)市盈率"] = pd.to_numeric(
|
|
temp_df["TTM(滚动)市盈率"], errors="coerce"
|
|
)
|
|
temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
|
|
temp_df["静态股息率"] = pd.to_numeric(temp_df["静态股息率"], errors="coerce")
|
|
return temp_df
|
|
|
|
|
|
def sw_index_third_cons(symbol: str = "801120.SI") -> pd.DataFrame:
|
|
"""
|
|
乐咕乐股-申万三级-行业成份
|
|
https://legulegu.com/stockdata/index-composition?industryCode=801120.SI
|
|
:param symbol: 三级行业的行业代码
|
|
:type symbol: str
|
|
:return: 行业成份
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = f"https://legulegu.com/stockdata/index-composition?industryCode={symbol}"
|
|
r = requests.get(url, headers=headers)
|
|
temp_df = pd.read_html(StringIO(r.text))[0]
|
|
temp_df.columns = [
|
|
"序号",
|
|
"股票代码",
|
|
"股票简称",
|
|
"纳入时间",
|
|
"申万1级",
|
|
"申万2级",
|
|
"申万3级",
|
|
"价格",
|
|
"市盈率",
|
|
"市盈率ttm",
|
|
"市净率",
|
|
"股息率",
|
|
"市值",
|
|
"归母净利润同比增长(09-30)",
|
|
"归母净利润同比增长(06-30)",
|
|
"营业收入同比增长(09-30)",
|
|
"营业收入同比增长(06-30)",
|
|
]
|
|
temp_df["价格"] = pd.to_numeric(temp_df["价格"], errors="coerce")
|
|
temp_df["市盈率"] = pd.to_numeric(temp_df["市盈率"], errors="coerce")
|
|
temp_df["市盈率ttm"] = pd.to_numeric(temp_df["市盈率ttm"], errors="coerce")
|
|
temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
|
|
temp_df["股息率"] = pd.to_numeric(temp_df["股息率"].str.strip("%"), errors="coerce")
|
|
temp_df["市值"] = pd.to_numeric(temp_df["市值"], errors="coerce")
|
|
|
|
temp_df["归母净利润同比增长(09-30)"] = temp_df[
|
|
"归母净利润同比增长(09-30)"
|
|
].str.strip("%")
|
|
temp_df["归母净利润同比增长(06-30)"] = temp_df[
|
|
"归母净利润同比增长(06-30)"
|
|
].str.strip("%")
|
|
temp_df["营业收入同比增长(09-30)"] = temp_df["营业收入同比增长(09-30)"].str.strip(
|
|
"%"
|
|
)
|
|
temp_df["营业收入同比增长(06-30)"] = temp_df["营业收入同比增长(06-30)"].str.strip(
|
|
"%"
|
|
)
|
|
|
|
temp_df["归母净利润同比增长(09-30)"] = pd.to_numeric(
|
|
temp_df["归母净利润同比增长(09-30)"], errors="coerce"
|
|
)
|
|
temp_df["归母净利润同比增长(06-30)"] = pd.to_numeric(
|
|
temp_df["归母净利润同比增长(06-30)"], errors="coerce"
|
|
)
|
|
temp_df["营业收入同比增长(09-30)"] = pd.to_numeric(
|
|
temp_df["营业收入同比增长(09-30)"], errors="coerce"
|
|
)
|
|
temp_df["营业收入同比增长(06-30)"] = pd.to_numeric(
|
|
temp_df["营业收入同比增长(06-30)"], errors="coerce"
|
|
)
|
|
return temp_df
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sw_index_first_info_df = sw_index_first_info()
|
|
print(sw_index_first_info_df)
|
|
|
|
sw_index_second_info_df = sw_index_second_info()
|
|
print(sw_index_second_info_df)
|
|
|
|
sw_index_third_info_df = sw_index_third_info()
|
|
print(sw_index_third_info_df)
|
|
|
|
sw_index_third_cons_df = sw_index_third_cons(symbol="850111.SI")
|
|
print(sw_index_third_cons_df)
|