MoFin/venv/lib/python3.12/site-packages/akshare/index/index_sw.py

#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2024/1/24 15:00
Desc: 申万宏源研究-申万指数-指数发布
乐咕乐股网
https://legulegu.com/stockdata/index-composition?industryCode=851921.SI
"""

from io import StringIO

import pandas as pd
import requests
from bs4 import BeautifulSoup

from akshare.utils.cons import headers


def sw_index_first_info() -> pd.DataFrame:
    """
    乐咕乐股-申万一级-分类
    https://legulegu.com/stockdata/sw-industry-overview#level1
    :return: 分类
    :rtype: pandas.DataFrame
    """
    url = "https://legulegu.com/stockdata/sw-industry-overview"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, features="lxml")
    code_raw = soup.find(name="div", attrs={"id": "level1Items"}).find_all(
        name="div", attrs={"class": "lg-industries-item-chinese-title"}
    )
    name_raw = soup.find(name="div", attrs={"id": "level1Items"}).find_all(
        name="div", attrs={"class": "lg-industries-item-number"}
    )
    value_raw = soup.find(name="div", attrs={"id": "level1Items"}).find_all(
        name="div", attrs={"class": "lg-sw-industries-item-value"}
    )
    code = [item.get_text() for item in code_raw]
    name = [item.get_text().split("(")[0] for item in name_raw]
    num = [item.get_text().split("(")[1].split(")")[0] for item in name_raw]
    num_1 = [
        item.find_all("span", attrs={"class": "value"})[0].get_text().strip()
        for item in value_raw
    ]
    num_2 = [
        item.find_all("span", attrs={"class": "value"})[1].get_text().strip()
        for item in value_raw
    ]
    num_3 = [
        item.find_all("span", attrs={"class": "value"})[2].get_text().strip()
        for item in value_raw
    ]
    num_4 = [
        item.find_all("span", attrs={"class": "value"})[3].get_text().strip()
        for item in value_raw
    ]
    temp_df = pd.DataFrame([code, name, num, num_1, num_2, num_3, num_4]).T
    temp_df.columns = [
        "行业代码",
        "行业名称",
        "成份个数",
        "静态市盈率",
        "TTM(滚动)市盈率",
        "市净率",
        "静态股息率",
    ]
    temp_df["成份个数"] = pd.to_numeric(temp_df["成份个数"], errors="coerce")
    temp_df["静态市盈率"] = pd.to_numeric(temp_df["静态市盈率"], errors="coerce")
    temp_df["TTM(滚动)市盈率"] = pd.to_numeric(
        temp_df["TTM(滚动)市盈率"], errors="coerce"
    )
    temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
    temp_df["静态股息率"] = pd.to_numeric(temp_df["静态股息率"], errors="coerce")
    return temp_df


def sw_index_second_info() -> pd.DataFrame:
    """
    乐咕乐股-申万二级-分类
    https://legulegu.com/stockdata/sw-industry-overview#level1
    :return: 分类
    :rtype: pandas.DataFrame
    """
    url = "https://legulegu.com/stockdata/sw-industry-overview"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, features="lxml")
    code_raw = soup.find(name="div", attrs={"id": "level2Items"}).find_all(
        name="div", attrs={"class": "lg-industries-item-chinese-title"}
    )
    name_raw = soup.find(name="div", attrs={"id": "level2Items"}).find_all(
        name="div", attrs={"class": "lg-industries-item-number"}
    )
    value_raw = soup.find(name="div", attrs={"id": "level2Items"}).find_all(
        name="div", attrs={"class": "lg-sw-industries-item-value"}
    )
    code = [item.get_text() for item in code_raw]
    name = [item.get_text().split("(")[0] for item in name_raw]
    parent_name = [
        item.find("span").get_text().split("(")[0][1:-1] for item in name_raw
    ]
    num = [item.get_text().split("(")[1].split(")")[0] for item in name_raw]
    num_1 = [
        item.find_all("span", attrs={"class": "value"})[0].get_text().strip()
        for item in value_raw
    ]
    num_2 = [
        item.find_all("span", attrs={"class": "value"})[1].get_text().strip()
        for item in value_raw
    ]
    num_3 = [
        item.find_all("span", attrs={"class": "value"})[2].get_text().strip()
        for item in value_raw
    ]
    num_4 = [
        item.find_all("span", attrs={"class": "value"})[3].get_text().strip()
        for item in value_raw
    ]
    temp_df = pd.DataFrame([code, name, parent_name, num, num_1, num_2, num_3, num_4]).T
    temp_df.columns = [
        "行业代码",
        "行业名称",
        "上级行业",
        "成份个数",
        "静态市盈率",
        "TTM(滚动)市盈率",
        "市净率",
        "静态股息率",
    ]
    temp_df["成份个数"] = pd.to_numeric(temp_df["成份个数"], errors="coerce")
    temp_df["静态市盈率"] = pd.to_numeric(temp_df["静态市盈率"], errors="coerce")
    temp_df["TTM(滚动)市盈率"] = pd.to_numeric(
        temp_df["TTM(滚动)市盈率"], errors="coerce"
    )
    temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
    temp_df["静态股息率"] = pd.to_numeric(temp_df["静态股息率"], errors="coerce")
    return temp_df


def sw_index_third_info() -> pd.DataFrame:
    """
    乐咕乐股-申万三级-分类
    https://legulegu.com/stockdata/sw-industry-overview#level1
    :return: 分类
    :rtype: pandas.DataFrame
    """
    url = "https://legulegu.com/stockdata/sw-industry-overview"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, features="lxml")
    code_raw = soup.find(name="div", attrs={"id": "level3Items"}).find_all(
        name="div", attrs={"class": "lg-industries-item-chinese-title"}
    )
    name_raw = soup.find(name="div", attrs={"id": "level3Items"}).find_all(
        name="div", attrs={"class": "lg-industries-item-number"}
    )
    value_raw = soup.find(name="div", attrs={"id": "level3Items"}).find_all(
        name="div", attrs={"class": "lg-sw-industries-item-value"}
    )
    code = [item.get_text() for item in code_raw]
    name = [item.get_text().split("(")[0] for item in name_raw]
    parent_name = [
        item.find("span").get_text().split("(")[0][1:-1] for item in name_raw
    ]
    num = [item.get_text().split("(")[1].split(")")[0] for item in name_raw]
    num_1 = [
        item.find_all("span", attrs={"class": "value"})[0].get_text().strip()
        for item in value_raw
    ]
    num_2 = [
        item.find_all("span", attrs={"class": "value"})[1].get_text().strip()
        for item in value_raw
    ]
    num_3 = [
        item.find_all("span", attrs={"class": "value"})[2].get_text().strip()
        for item in value_raw
    ]
    num_4 = [
        item.find_all("span", attrs={"class": "value"})[3].get_text().strip()
        for item in value_raw
    ]
    temp_df = pd.DataFrame([code, name, parent_name, num, num_1, num_2, num_3, num_4]).T
    temp_df.columns = [
        "行业代码",
        "行业名称",
        "上级行业",
        "成份个数",
        "静态市盈率",
        "TTM(滚动)市盈率",
        "市净率",
        "静态股息率",
    ]
    temp_df["成份个数"] = pd.to_numeric(temp_df["成份个数"], errors="coerce")
    temp_df["静态市盈率"] = pd.to_numeric(temp_df["静态市盈率"], errors="coerce")
    temp_df["TTM(滚动)市盈率"] = pd.to_numeric(
        temp_df["TTM(滚动)市盈率"], errors="coerce"
    )
    temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
    temp_df["静态股息率"] = pd.to_numeric(temp_df["静态股息率"], errors="coerce")
    return temp_df


def sw_index_third_cons(symbol: str = "801120.SI") -> pd.DataFrame:
    """
    乐咕乐股-申万三级-行业成份
    https://legulegu.com/stockdata/index-composition?industryCode=801120.SI
    :param symbol: 三级行业的行业代码
    :type symbol: str
    :return: 行业成份
    :rtype: pandas.DataFrame
    """
    url = f"https://legulegu.com/stockdata/index-composition?industryCode={symbol}"
    r = requests.get(url, headers=headers)
    temp_df = pd.read_html(StringIO(r.text))[0]
    temp_df.columns = [
        "序号",
        "股票代码",
        "股票简称",
        "纳入时间",
        "申万1级",
        "申万2级",
        "申万3级",
        "价格",
        "市盈率",
        "市盈率ttm",
        "市净率",
        "股息率",
        "市值",
        "归母净利润同比增长(09-30)",
        "归母净利润同比增长(06-30)",
        "营业收入同比增长(09-30)",
        "营业收入同比增长(06-30)",
    ]
    temp_df["价格"] = pd.to_numeric(temp_df["价格"], errors="coerce")
    temp_df["市盈率"] = pd.to_numeric(temp_df["市盈率"], errors="coerce")
    temp_df["市盈率ttm"] = pd.to_numeric(temp_df["市盈率ttm"], errors="coerce")
    temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
    temp_df["股息率"] = pd.to_numeric(temp_df["股息率"].str.strip("%"), errors="coerce")
    temp_df["市值"] = pd.to_numeric(temp_df["市值"], errors="coerce")

    temp_df["归母净利润同比增长(09-30)"] = temp_df[
        "归母净利润同比增长(09-30)"
    ].str.strip("%")
    temp_df["归母净利润同比增长(06-30)"] = temp_df[
        "归母净利润同比增长(06-30)"
    ].str.strip("%")
    temp_df["营业收入同比增长(09-30)"] = temp_df["营业收入同比增长(09-30)"].str.strip(
        "%"
    )
    temp_df["营业收入同比增长(06-30)"] = temp_df["营业收入同比增长(06-30)"].str.strip(
        "%"
    )

    temp_df["归母净利润同比增长(09-30)"] = pd.to_numeric(
        temp_df["归母净利润同比增长(09-30)"], errors="coerce"
    )
    temp_df["归母净利润同比增长(06-30)"] = pd.to_numeric(
        temp_df["归母净利润同比增长(06-30)"], errors="coerce"
    )
    temp_df["营业收入同比增长(09-30)"] = pd.to_numeric(
        temp_df["营业收入同比增长(09-30)"], errors="coerce"
    )
    temp_df["营业收入同比增长(06-30)"] = pd.to_numeric(
        temp_df["营业收入同比增长(06-30)"], errors="coerce"
    )
    return temp_df


if __name__ == "__main__":
    sw_index_first_info_df = sw_index_first_info()
    print(sw_index_first_info_df)

    sw_index_second_info_df = sw_index_second_info()
    print(sw_index_second_info_df)

    sw_index_third_info_df = sw_index_third_info()
    print(sw_index_third_info_df)

    sw_index_third_cons_df = sw_index_third_cons(symbol="850111.SI")
    print(sw_index_third_cons_df)