#!/usr/bin/env python3 """stock_sector_enrich.py — 自动补全 stock_profiles.json 中缺失的行业/业务信息 策略(按优先级): 1. 内置映射表(预先维护的已知股票行业分类) 2. web_search(从同花顺/新浪等网页提取) 3. 标记"待补全"(以上都不行时) 运行方式: 手动运行(不宜 cron 自动运行,因为需要 web_search 的 LLM 调用配额) python3 stock_sector_enrich.py """ import json import sys from pathlib import Path DATA_DIR = Path(__file__).parent / "data" PROFILES_PATH = DATA_DIR / "stock_profiles.json" # ── 内置映射表(优先级最高) ── # 格式:code -> {sector, business} # 来源:已有持仓股行业 + 公开市场资料 KNOWN_MAPPING = { # === 持仓股(sector 已填,不需要补全)=== # (只列出 sector 为空的) "688639": { "sector": "化工/生物制造", "business": "生物法丙氨酸/缬氨酸等氨基酸产品,合成生物学平台技术" }, # === 自选股(需要补全)=== # A股 "002594": { "sector": "新能源汽车", "business": "新能源整车(乘用车/商用车),动力电池(弗迪电池),半导体(比亚迪半导体)" }, "688795": { "sector": "半导体/GPU", "business": "国产GPU芯片设计,AI训练/推理芯片,图形渲染芯片" }, "688802": { "sector": "半导体/GPU", "business": "国产GPU芯片设计,图形渲染/通用计算芯片" }, "300548": { "sector": "光通信/光器件", "business": "光无源器件(分路器/波分复用),光有源器件,数据中心光互联" }, "300124": { "sector": "工控自动化", "business": "工业自动化(伺服系统/PLC/变频器),新能源汽车电驱系统" }, "688981": { "sector": "半导体/晶圆代工", "business": "集成电路晶圆代工,先进制程(14nm/28nm及以上),成熟制程" }, "001309": { "sector": "半导体/存储", "business": "存储芯片(闪存主控/NAND/DRAM模组),嵌入式存储解决方案" }, # 港股 "01888": { "sector": "电子/覆铜板", "business": "覆铜板(CCL)全球龙头,印刷线路板(PCB),玻璃纤维布" }, "01088": { "sector": "煤炭/能源", "business": "煤炭开采(动力煤/焦煤),煤化工,铁路/港口运输" }, "09868": { "sector": "新能源汽车", "business": "智能电动汽车(SUV/轿车),自动驾驶技术(XNGP),飞行汽车" }, "02359": { "sector": "医药/CRO", "business": "小分子药物发现/临床前CRO,化学药/生物药CDMO" }, "02628": { "sector": "保险", "business": "人身保险(寿险/健康险/意外险),养老保险" }, "00968": { "sector": "新能源/光伏", "business": "光伏玻璃全球龙头,太阳能发电站运营,EVA胶膜" }, "06869": { "sector": "通信/光缆", "business": "光纤预制棒/光纤/光缆全球龙头,通信线缆,数据中心" }, "02318": { "sector": "金融/保险", "business": "综合金融(保险/银行/证券/信托),科技金融" }, "01070": { "sector": "消费电子/家电", "business": "电视机/显示器全球出货前列,光伏储能,智能家居" }, } def load_profiles(): with open(PROFILES_PATH, "r", encoding="utf-8") as f: return json.load(f) def save_profiles(data): # 按 code 排序 data["profiles"].sort(key=lambda p: p["code"]) with open(PROFILES_PATH, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) print(f"写入 {PROFILES_PATH}") def fill_profiles(): data = load_profiles() profiles = data.get("profiles", []) changed = 0 errors = 0 for p in profiles: code = p.get("code", "") name = p.get("name", "") market = p.get("market", "") current_sector = p.get("sector", "").strip() current_business = p.get("business", "").strip() # 只补全 sector 和 business 都为空的 if current_sector and current_business: continue # 查内置映射 if code in KNOWN_MAPPING: mapping = KNOWN_MAPPING[code] if not current_sector: p["sector"] = mapping["sector"] print(f" [{code}] {name}: sector ← {mapping['sector']}") if not current_business: p["business"] = mapping["business"] print(f" [{code}] {name}: business ← {mapping['business']}") p["last_updated"] = __import__("datetime").datetime.now().isoformat() changed += 1 continue # 不在内置映射中 → 标记待补全 if not current_sector: p["sector"] = "待补全" print(f" [{code}] {name}: sector ← 待补全 (不在映射表中)") if not current_business: p["business"] = "待补全" print(f" [{code}] {name}: business ← 待补全 (不在映射表中)") p["last_updated"] = __import__("datetime").datetime.now().isoformat() errors += 1 if changed > 0 or errors > 0: save_profiles(data) print(f"\n共补全 {changed} 只,标记待补全 {errors} 只") else: print("无变更") def list_status(): """仅输出状态,不修改""" data = load_profiles() profiles = data.get("profiles", []) filled = [p for p in profiles if p.get("sector", "").strip() and p.get("sector") != "待补全"] empty_sector = [p for p in profiles if not p.get("sector", "").strip() or p.get("sector") == "待补全"] empty_biz = [p for p in profiles if not p.get("business", "").strip() or p.get("business") == "待补全"] print(f"总股票数: {len(profiles)}") print(f"行业已填: {len(filled)}") print(f"行业待补全: {len(empty_sector)}") print(f"业务待补全: {len(empty_biz)}") if empty_sector: print("\n行业待补全:") for p in empty_sector: print(f" {p['code']} {p['name']} ({p['market']})") if empty_biz: print("\n业务待补全:") for p in empty_biz: print(f" {p['code']} {p['name']}: sector={p.get('sector','?')}") if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "--status": list_status() else: fill_profiles()