MoFin 初始提交
完整数据采集+分析管道: - market_watch.py:90行业板块采集(同花顺/东方财富) - 市场精选推荐 cron:全市场分析+候选池+星级推荐 - price_monitor.py:持仓/自选高频价格监控 - refresh_mtf_cache.py:多周期K线缓存 - 策略评估/知识萃取管道 文档:docs/ 含完整需求+架构设计 注意:尚未配置 git remote,笑笑接手后自行配置
This commit is contained in:
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python3
|
||||
"""stock_sector_enrich.py — 自动补全 stock_profiles.json 中缺失的行业/业务信息
|
||||
|
||||
策略(按优先级):
|
||||
1. 内置映射表(预先维护的已知股票行业分类)
|
||||
2. web_search(从同花顺/新浪等网页提取)
|
||||
3. 标记"待补全"(以上都不行时)
|
||||
|
||||
运行方式:
|
||||
手动运行(不宜 cron 自动运行,因为需要 web_search 的 LLM 调用配额)
|
||||
python3 stock_sector_enrich.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
DATA_DIR = Path(__file__).parent / "data"
|
||||
PROFILES_PATH = DATA_DIR / "stock_profiles.json"
|
||||
|
||||
# ── 内置映射表(优先级最高) ──
|
||||
# 格式:code -> {sector, business}
|
||||
# 来源:已有持仓股行业 + 公开市场资料
|
||||
KNOWN_MAPPING = {
|
||||
# === 持仓股(sector 已填,不需要补全)===
|
||||
# (只列出 sector 为空的)
|
||||
"688639": {
|
||||
"sector": "化工/生物制造",
|
||||
"business": "生物法丙氨酸/缬氨酸等氨基酸产品,合成生物学平台技术"
|
||||
},
|
||||
# === 自选股(需要补全)===
|
||||
# A股
|
||||
"002594": {
|
||||
"sector": "新能源汽车",
|
||||
"business": "新能源整车(乘用车/商用车),动力电池(弗迪电池),半导体(比亚迪半导体)"
|
||||
},
|
||||
"688795": {
|
||||
"sector": "半导体/GPU",
|
||||
"business": "国产GPU芯片设计,AI训练/推理芯片,图形渲染芯片"
|
||||
},
|
||||
"688802": {
|
||||
"sector": "半导体/GPU",
|
||||
"business": "国产GPU芯片设计,图形渲染/通用计算芯片"
|
||||
},
|
||||
"300548": {
|
||||
"sector": "光通信/光器件",
|
||||
"business": "光无源器件(分路器/波分复用),光有源器件,数据中心光互联"
|
||||
},
|
||||
"300124": {
|
||||
"sector": "工控自动化",
|
||||
"business": "工业自动化(伺服系统/PLC/变频器),新能源汽车电驱系统"
|
||||
},
|
||||
"688981": {
|
||||
"sector": "半导体/晶圆代工",
|
||||
"business": "集成电路晶圆代工,先进制程(14nm/28nm及以上),成熟制程"
|
||||
},
|
||||
"001309": {
|
||||
"sector": "半导体/存储",
|
||||
"business": "存储芯片(闪存主控/NAND/DRAM模组),嵌入式存储解决方案"
|
||||
},
|
||||
# 港股
|
||||
"01888": {
|
||||
"sector": "电子/覆铜板",
|
||||
"business": "覆铜板(CCL)全球龙头,印刷线路板(PCB),玻璃纤维布"
|
||||
},
|
||||
"01088": {
|
||||
"sector": "煤炭/能源",
|
||||
"business": "煤炭开采(动力煤/焦煤),煤化工,铁路/港口运输"
|
||||
},
|
||||
"09868": {
|
||||
"sector": "新能源汽车",
|
||||
"business": "智能电动汽车(SUV/轿车),自动驾驶技术(XNGP),飞行汽车"
|
||||
},
|
||||
"02359": {
|
||||
"sector": "医药/CRO",
|
||||
"business": "小分子药物发现/临床前CRO,化学药/生物药CDMO"
|
||||
},
|
||||
"02628": {
|
||||
"sector": "保险",
|
||||
"business": "人身保险(寿险/健康险/意外险),养老保险"
|
||||
},
|
||||
"00968": {
|
||||
"sector": "新能源/光伏",
|
||||
"business": "光伏玻璃全球龙头,太阳能发电站运营,EVA胶膜"
|
||||
},
|
||||
"06869": {
|
||||
"sector": "通信/光缆",
|
||||
"business": "光纤预制棒/光纤/光缆全球龙头,通信线缆,数据中心"
|
||||
},
|
||||
"02318": {
|
||||
"sector": "金融/保险",
|
||||
"business": "综合金融(保险/银行/证券/信托),科技金融"
|
||||
},
|
||||
"01070": {
|
||||
"sector": "消费电子/家电",
|
||||
"business": "电视机/显示器全球出货前列,光伏储能,智能家居"
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def load_profiles():
|
||||
with open(PROFILES_PATH, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def save_profiles(data):
|
||||
# 按 code 排序
|
||||
data["profiles"].sort(key=lambda p: p["code"])
|
||||
with open(PROFILES_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
print(f"写入 {PROFILES_PATH}")
|
||||
|
||||
|
||||
def fill_profiles():
|
||||
data = load_profiles()
|
||||
profiles = data.get("profiles", [])
|
||||
changed = 0
|
||||
errors = 0
|
||||
|
||||
for p in profiles:
|
||||
code = p.get("code", "")
|
||||
name = p.get("name", "")
|
||||
market = p.get("market", "")
|
||||
current_sector = p.get("sector", "").strip()
|
||||
current_business = p.get("business", "").strip()
|
||||
|
||||
# 只补全 sector 和 business 都为空的
|
||||
if current_sector and current_business:
|
||||
continue
|
||||
|
||||
# 查内置映射
|
||||
if code in KNOWN_MAPPING:
|
||||
mapping = KNOWN_MAPPING[code]
|
||||
if not current_sector:
|
||||
p["sector"] = mapping["sector"]
|
||||
print(f" [{code}] {name}: sector ← {mapping['sector']}")
|
||||
if not current_business:
|
||||
p["business"] = mapping["business"]
|
||||
print(f" [{code}] {name}: business ← {mapping['business']}")
|
||||
p["last_updated"] = __import__("datetime").datetime.now().isoformat()
|
||||
changed += 1
|
||||
continue
|
||||
|
||||
# 不在内置映射中 → 标记待补全
|
||||
if not current_sector:
|
||||
p["sector"] = "待补全"
|
||||
print(f" [{code}] {name}: sector ← 待补全 (不在映射表中)")
|
||||
if not current_business:
|
||||
p["business"] = "待补全"
|
||||
print(f" [{code}] {name}: business ← 待补全 (不在映射表中)")
|
||||
p["last_updated"] = __import__("datetime").datetime.now().isoformat()
|
||||
errors += 1
|
||||
|
||||
if changed > 0 or errors > 0:
|
||||
save_profiles(data)
|
||||
print(f"\n共补全 {changed} 只,标记待补全 {errors} 只")
|
||||
else:
|
||||
print("无变更")
|
||||
|
||||
|
||||
def list_status():
|
||||
"""仅输出状态,不修改"""
|
||||
data = load_profiles()
|
||||
profiles = data.get("profiles", [])
|
||||
filled = [p for p in profiles if p.get("sector", "").strip() and p.get("sector") != "待补全"]
|
||||
empty_sector = [p for p in profiles if not p.get("sector", "").strip() or p.get("sector") == "待补全"]
|
||||
empty_biz = [p for p in profiles if not p.get("business", "").strip() or p.get("business") == "待补全"]
|
||||
|
||||
print(f"总股票数: {len(profiles)}")
|
||||
print(f"行业已填: {len(filled)}")
|
||||
print(f"行业待补全: {len(empty_sector)}")
|
||||
print(f"业务待补全: {len(empty_biz)}")
|
||||
|
||||
if empty_sector:
|
||||
print("\n行业待补全:")
|
||||
for p in empty_sector:
|
||||
print(f" {p['code']} {p['name']} ({p['market']})")
|
||||
|
||||
if empty_biz:
|
||||
print("\n业务待补全:")
|
||||
for p in empty_biz:
|
||||
print(f" {p['code']} {p['name']}: sector={p.get('sector','?')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "--status":
|
||||
list_status()
|
||||
else:
|
||||
fill_profiles()
|
||||
Reference in New Issue
Block a user