Files
MoFin/venv/lib/python3.12/site-packages/akshare/economic/macro_china_nbs.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

294 lines
9.5 KiB
Python

#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2024/6/30 22:00
Desc: 中国-国家统计局-宏观数据
https://data.stats.gov.cn/easyquery.htm
"""
import time
from functools import lru_cache
from typing import Union, Literal, List, Dict
import jsonpath as jp
import numpy as np
import pandas as pd
import requests
import urllib3
from urllib3.exceptions import InsecureRequestWarning
# 忽略InsecureRequestWarning警告
urllib3.disable_warnings(InsecureRequestWarning)
@lru_cache
def _get_nbs_tree(idcode: str, dbcode: str) -> List[Dict]:
"""
获取指标目录树
:param idcode: 指标编码
:param dbcode: 库编码
:return: json数据
"""
url = "https://data.stats.gov.cn/easyquery.htm"
params = {"id": idcode, "dbcode": dbcode, "wdcode": "zb", "m": "getTree"}
r = requests.post(url, params=params, verify=False, allow_redirects=True)
data_json = r.json()
return data_json
@lru_cache
def _get_nbs_wds_tree(idcode: str, dbcode: str, rowcode: str) -> List[Dict]:
"""
获取地区数据的可选指标目录树
:param idcode: 指标编码
:param dbcode: 库编码
:param rowcode: 值为zb是返回地区的编码,值为reg时返回可选指标的编码
:return: json数据
"""
url = "https://data.stats.gov.cn/easyquery.htm"
params = {
"m": "getOtherWds",
"dbcode": dbcode,
"rowcode": rowcode,
"colcode": "sj",
"wds": '[{"wdcode":"zb","valuecode":"%s"}]' % idcode,
"k1": str(time.time_ns())[:13],
}
r = requests.post(url, params=params, verify=False, allow_redirects=True)
data_json = r.json()
data_json = data_json["returndata"][0]["nodes"]
return data_json
def _get_code_from_nbs_tree(tree: List[Dict], name: str, target: str = "id") -> str:
"""
根据指标名称从目录树中获取target编码
:param tree: 目录树
:param name: 指标名称
:param target: 指标编码属性名
:return: 指标编码
"""
expr = f'$[?(@.name == "{name}")].{target}'
ret = jp.jsonpath(tree, expr)
if ret is False:
raise ValueError("Please check if the data path or indicator is correct.")
return ret[0]
def macro_china_nbs_nation(
kind: Literal["月度数据", "季度数据", "年度数据"], path: str, period: str = "LAST10"
) -> pd.DataFrame:
"""
国家统计局全国数据通用接口
https://data.stats.gov.cn/easyquery.htm
:param kind: 数据类别
:param path: 数据路径
:param period: 时间区间,例如'LAST10', '2016-2023', '2016-'
:return: 国家统计局统计数据
:rtype: pandas.DataFrame
"""
# 获取dbcode
kind_code = {"月度数据": "hgyd", "季度数据": "hgjd", "年度数据": "hgnd"}
dbcode = kind_code[kind]
# 获取最终id
parent_tree = _get_nbs_tree("zb", dbcode)
path_split = path.replace(" ", "").split(">")
indicator_id = _get_code_from_nbs_tree(parent_tree, path_split[0])
path_split.pop(0)
while path_split:
temp_tree = _get_nbs_tree(indicator_id, dbcode)
indicator_id = _get_code_from_nbs_tree(temp_tree, path_split[0])
path_split.pop(0)
# 请求数据
url = "https://data.stats.gov.cn/easyquery.htm"
params = {
"m": "QueryData",
"dbcode": dbcode,
"rowcode": "zb",
"colcode": "sj",
"wds": "[]",
"dfwds": '[{"wdcode":"zb","valuecode":"%s"}, '
'{"wdcode":"sj","valuecode":"%s"}]' % (indicator_id, period),
"k1": str(time.time_ns())[:13],
}
r = requests.get(url, params=params, verify=False, allow_redirects=True)
data_json = r.json()
# 整理为dataframe
temp_df = pd.DataFrame(data_json["returndata"]["datanodes"])
temp_df["data"] = temp_df["data"].apply(
lambda x: x["data"] if x["hasdata"] else None
)
wdnodes = data_json["returndata"]["wdnodes"]
wn_df_list = []
for wn in wdnodes:
wn_df_list.append(
pd.DataFrame(wn["nodes"])
.assign(
funit=lambda df: df["unit"].apply(lambda x: "(" + x + ")" if x else x)
)
.assign(fname=lambda df: df["cname"] + df["funit"]),
)
row_name, column_name = (
wn_df_list[0]["fname"],
wn_df_list[1]["fname"],
)
data_ndarray = np.reshape(temp_df["data"], (len(row_name), len(column_name)))
data_df = pd.DataFrame(data=data_ndarray, columns=column_name, index=row_name)
data_df.index.name = None
data_df.columns.name = None
return data_df
def macro_china_nbs_region(
kind: Literal[
"分省月度数据",
"分省季度数据",
"分省年度数据",
"主要城市月度价格",
"主要城市年度数据",
"港澳台月度数据",
"港澳台年度数据",
],
path: str,
indicator: Union[str, None],
region: Union[str, None] = None,
period: str = "LAST10",
) -> pd.DataFrame:
"""
国家统计局地区数据通用接口
https://data.stats.gov.cn/easyquery.htm
:param kind: 数据类别
:param path: 数据路径
:param indicator: 指定指标
:param region: 指定地区 当指定region时,将symbol设为None可以同时获得所有可选指标的值
:param period: 时间区间,例如'LAST10', '2016-2023', '2016-'
:return: 国家统计局统计数据
:rtype: pandas.DataFrame
"""
if indicator is None and region is None:
raise AssertionError("The indicator and region parameters cannot both be None.")
# 获取dbcode
kind_dict = {
"分省月度数据": "fsyd",
"分省季度数据": "fsjd",
"分省年度数据": "fsnd",
"主要城市月度价格": "csyd",
"主要城市年度数据": "csnd",
"港澳台月度数据": "gatyd",
"港澳台年度数据": "gatnd",
}
dbcode = kind_dict[kind]
# 获取最终id
parent_tree = _get_nbs_tree("zb", dbcode)
path_split = path.replace(" ", "").split(">")
indicator_id = _get_code_from_nbs_tree(parent_tree, path_split[0])
path_split.pop(0)
while path_split:
temp_tree = _get_nbs_tree(indicator_id, dbcode)
indicator_id = _get_code_from_nbs_tree(temp_tree, path_split[0])
path_split.pop(0)
# 参数设定
if region is None:
indicator_tree = _get_nbs_wds_tree(indicator_id, dbcode, "reg")
indicator_id = _get_code_from_nbs_tree(indicator_tree, indicator, target="code")
rowcode = "reg"
colcode = "sj"
wds = '[{"wdcode":"zb","valuecode":"%s"}]' % indicator_id
dfwds = '[{"wdcode":"sj","valuecode":"%s"}]' % period
else:
if indicator is not None:
indicator_tree = _get_nbs_wds_tree(indicator_id, dbcode, "reg")
indicator_id = _get_code_from_nbs_tree(
indicator_tree, indicator, target="code"
)
region_tree = _get_nbs_wds_tree(indicator_id, dbcode, "zb")
region_id = _get_code_from_nbs_tree(region_tree, region, target="code")
rowcode = "zb"
colcode = "sj"
wds = '[{"wdcode":"reg","valuecode":"%s"}]' % region_id
dfwds = (
'[{"wdcode":"zb","valuecode":"%s"}, '
'{"wdcode":"sj","valuecode":"%s"}]' % (indicator_id, period)
)
# 请求数据
url = "https://data.stats.gov.cn/easyquery.htm"
params = {
"m": "QueryData",
"dbcode": dbcode,
"rowcode": rowcode,
"colcode": colcode,
"wds": wds,
"dfwds": dfwds,
"k1": str(time.time_ns())[:13],
}
r = requests.get(url, params=params, verify=False, allow_redirects=True)
data_json = r.json()
# 整理为dataframe
temp_df = pd.DataFrame(data_json["returndata"]["datanodes"])
temp_df["data"] = temp_df["data"].apply(
lambda x: x["data"] if x["hasdata"] else None
)
wdnodes = data_json["returndata"]["wdnodes"]
wn_df_list = []
for wn in wdnodes:
wn_df_list.append(
pd.DataFrame(wn["nodes"])
.assign(
funit=lambda df: df["unit"].apply(lambda x: "(" + x + ")" if x else x)
)
.assign(fname=lambda df: df["cname"] + df["funit"]),
)
if region is None:
row_name, column_name = wn_df_list[1]["fname"], wn_df_list[2]["fname"]
title_name = wn_df_list[0]["fname"][0]
else:
row_name, column_name = wn_df_list[0]["fname"], wn_df_list[2]["fname"]
title_name = wn_df_list[1]["fname"][0]
data_ndarray = np.reshape(temp_df["data"], (len(row_name), len(column_name)))
data_df = pd.DataFrame(data=data_ndarray, columns=column_name, index=row_name)
data_df.index.name = None
data_df.columns.name = title_name
return data_df
if __name__ == "__main__":
macro_china_nbs_nation_df = macro_china_nbs_nation(
kind="月度数据",
path="工业 > 工业分大类行业出口交货值(2018-至今) > 废弃资源综合利用业",
period="LAST5",
)
print(macro_china_nbs_nation_df)
macro_china_nbs_region_df = macro_china_nbs_region(
kind="分省季度数据",
path="人民生活 > 居民人均可支配收入",
period="2018-2022",
indicator=None,
region="北京市",
)
print(macro_china_nbs_region_df)
macro_china_nbs_region_df = macro_china_nbs_region(
kind="分省季度数据",
path="国民经济核算 > 地区生产总值",
period="2018-",
indicator="地区生产总值_累计值(亿元)",
)
print(macro_china_nbs_region_df)