aa0f740381
完整数据采集+分析管道: - market_watch.py:90行业板块采集(同花顺/东方财富) - 市场精选推荐 cron:全市场分析+候选池+星级推荐 - price_monitor.py:持仓/自选高频价格监控 - refresh_mtf_cache.py:多周期K线缓存 - 策略评估/知识萃取管道 文档:docs/ 含完整需求+架构设计 注意:尚未配置 git remote,笑笑接手后自行配置
190 lines
6.4 KiB
Python
190 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
|
"""stock_sector_enrich.py — 自动补全 stock_profiles.json 中缺失的行业/业务信息
|
|
|
|
策略(按优先级):
|
|
1. 内置映射表(预先维护的已知股票行业分类)
|
|
2. web_search(从同花顺/新浪等网页提取)
|
|
3. 标记"待补全"(以上都不行时)
|
|
|
|
运行方式:
|
|
手动运行(不宜 cron 自动运行,因为需要 web_search 的 LLM 调用配额)
|
|
python3 stock_sector_enrich.py
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
DATA_DIR = Path(__file__).parent / "data"
|
|
PROFILES_PATH = DATA_DIR / "stock_profiles.json"
|
|
|
|
# ── 内置映射表(优先级最高) ──
|
|
# 格式:code -> {sector, business}
|
|
# 来源:已有持仓股行业 + 公开市场资料
|
|
KNOWN_MAPPING = {
|
|
# === 持仓股(sector 已填,不需要补全)===
|
|
# (只列出 sector 为空的)
|
|
"688639": {
|
|
"sector": "化工/生物制造",
|
|
"business": "生物法丙氨酸/缬氨酸等氨基酸产品,合成生物学平台技术"
|
|
},
|
|
# === 自选股(需要补全)===
|
|
# A股
|
|
"002594": {
|
|
"sector": "新能源汽车",
|
|
"business": "新能源整车(乘用车/商用车),动力电池(弗迪电池),半导体(比亚迪半导体)"
|
|
},
|
|
"688795": {
|
|
"sector": "半导体/GPU",
|
|
"business": "国产GPU芯片设计,AI训练/推理芯片,图形渲染芯片"
|
|
},
|
|
"688802": {
|
|
"sector": "半导体/GPU",
|
|
"business": "国产GPU芯片设计,图形渲染/通用计算芯片"
|
|
},
|
|
"300548": {
|
|
"sector": "光通信/光器件",
|
|
"business": "光无源器件(分路器/波分复用),光有源器件,数据中心光互联"
|
|
},
|
|
"300124": {
|
|
"sector": "工控自动化",
|
|
"business": "工业自动化(伺服系统/PLC/变频器),新能源汽车电驱系统"
|
|
},
|
|
"688981": {
|
|
"sector": "半导体/晶圆代工",
|
|
"business": "集成电路晶圆代工,先进制程(14nm/28nm及以上),成熟制程"
|
|
},
|
|
"001309": {
|
|
"sector": "半导体/存储",
|
|
"business": "存储芯片(闪存主控/NAND/DRAM模组),嵌入式存储解决方案"
|
|
},
|
|
# 港股
|
|
"01888": {
|
|
"sector": "电子/覆铜板",
|
|
"business": "覆铜板(CCL)全球龙头,印刷线路板(PCB),玻璃纤维布"
|
|
},
|
|
"01088": {
|
|
"sector": "煤炭/能源",
|
|
"business": "煤炭开采(动力煤/焦煤),煤化工,铁路/港口运输"
|
|
},
|
|
"09868": {
|
|
"sector": "新能源汽车",
|
|
"business": "智能电动汽车(SUV/轿车),自动驾驶技术(XNGP),飞行汽车"
|
|
},
|
|
"02359": {
|
|
"sector": "医药/CRO",
|
|
"business": "小分子药物发现/临床前CRO,化学药/生物药CDMO"
|
|
},
|
|
"02628": {
|
|
"sector": "保险",
|
|
"business": "人身保险(寿险/健康险/意外险),养老保险"
|
|
},
|
|
"00968": {
|
|
"sector": "新能源/光伏",
|
|
"business": "光伏玻璃全球龙头,太阳能发电站运营,EVA胶膜"
|
|
},
|
|
"06869": {
|
|
"sector": "通信/光缆",
|
|
"business": "光纤预制棒/光纤/光缆全球龙头,通信线缆,数据中心"
|
|
},
|
|
"02318": {
|
|
"sector": "金融/保险",
|
|
"business": "综合金融(保险/银行/证券/信托),科技金融"
|
|
},
|
|
"01070": {
|
|
"sector": "消费电子/家电",
|
|
"business": "电视机/显示器全球出货前列,光伏储能,智能家居"
|
|
},
|
|
}
|
|
|
|
|
|
def load_profiles():
|
|
with open(PROFILES_PATH, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
|
|
def save_profiles(data):
|
|
# 按 code 排序
|
|
data["profiles"].sort(key=lambda p: p["code"])
|
|
with open(PROFILES_PATH, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
print(f"写入 {PROFILES_PATH}")
|
|
|
|
|
|
def fill_profiles():
|
|
data = load_profiles()
|
|
profiles = data.get("profiles", [])
|
|
changed = 0
|
|
errors = 0
|
|
|
|
for p in profiles:
|
|
code = p.get("code", "")
|
|
name = p.get("name", "")
|
|
market = p.get("market", "")
|
|
current_sector = p.get("sector", "").strip()
|
|
current_business = p.get("business", "").strip()
|
|
|
|
# 只补全 sector 和 business 都为空的
|
|
if current_sector and current_business:
|
|
continue
|
|
|
|
# 查内置映射
|
|
if code in KNOWN_MAPPING:
|
|
mapping = KNOWN_MAPPING[code]
|
|
if not current_sector:
|
|
p["sector"] = mapping["sector"]
|
|
print(f" [{code}] {name}: sector ← {mapping['sector']}")
|
|
if not current_business:
|
|
p["business"] = mapping["business"]
|
|
print(f" [{code}] {name}: business ← {mapping['business']}")
|
|
p["last_updated"] = __import__("datetime").datetime.now().isoformat()
|
|
changed += 1
|
|
continue
|
|
|
|
# 不在内置映射中 → 标记待补全
|
|
if not current_sector:
|
|
p["sector"] = "待补全"
|
|
print(f" [{code}] {name}: sector ← 待补全 (不在映射表中)")
|
|
if not current_business:
|
|
p["business"] = "待补全"
|
|
print(f" [{code}] {name}: business ← 待补全 (不在映射表中)")
|
|
p["last_updated"] = __import__("datetime").datetime.now().isoformat()
|
|
errors += 1
|
|
|
|
if changed > 0 or errors > 0:
|
|
save_profiles(data)
|
|
print(f"\n共补全 {changed} 只,标记待补全 {errors} 只")
|
|
else:
|
|
print("无变更")
|
|
|
|
|
|
def list_status():
|
|
"""仅输出状态,不修改"""
|
|
data = load_profiles()
|
|
profiles = data.get("profiles", [])
|
|
filled = [p for p in profiles if p.get("sector", "").strip() and p.get("sector") != "待补全"]
|
|
empty_sector = [p for p in profiles if not p.get("sector", "").strip() or p.get("sector") == "待补全"]
|
|
empty_biz = [p for p in profiles if not p.get("business", "").strip() or p.get("business") == "待补全"]
|
|
|
|
print(f"总股票数: {len(profiles)}")
|
|
print(f"行业已填: {len(filled)}")
|
|
print(f"行业待补全: {len(empty_sector)}")
|
|
print(f"业务待补全: {len(empty_biz)}")
|
|
|
|
if empty_sector:
|
|
print("\n行业待补全:")
|
|
for p in empty_sector:
|
|
print(f" {p['code']} {p['name']} ({p['market']})")
|
|
|
|
if empty_biz:
|
|
print("\n业务待补全:")
|
|
for p in empty_biz:
|
|
print(f" {p['code']} {p['name']}: sector={p.get('sector','?')}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1 and sys.argv[1] == "--status":
|
|
list_status()
|
|
else:
|
|
fill_profiles()
|