MoFin 初始提交

完整数据采集+分析管道:
- market_watch.py:90行业板块采集(同花顺/东方财富)
- 市场精选推荐 cron:全市场分析+候选池+星级推荐
- price_monitor.py:持仓/自选高频价格监控
- refresh_mtf_cache.py:多周期K线缓存
- 策略评估/知识萃取管道

文档:docs/ 含完整需求+架构设计
注意:尚未配置 git remote,笑笑接手后自行配置
This commit is contained in:
知微 (MoFin)
2026-06-20 12:04:21 +08:00
commit aa0f740381
950 changed files with 189006 additions and 0 deletions
+189
View File
@@ -0,0 +1,189 @@
#!/usr/bin/env python3
"""stock_sector_enrich.py — 自动补全 stock_profiles.json 中缺失的行业/业务信息
策略(按优先级):
1. 内置映射表(预先维护的已知股票行业分类)
2. web_search(从同花顺/新浪等网页提取)
3. 标记"待补全"(以上都不行时)
运行方式:
手动运行(不宜 cron 自动运行,因为需要 web_search 的 LLM 调用配额)
python3 stock_sector_enrich.py
"""
import json
import sys
from pathlib import Path
DATA_DIR = Path(__file__).parent / "data"
PROFILES_PATH = DATA_DIR / "stock_profiles.json"
# ── 内置映射表(优先级最高) ──
# 格式:code -> {sector, business}
# 来源:已有持仓股行业 + 公开市场资料
KNOWN_MAPPING = {
# === 持仓股(sector 已填,不需要补全)===
# (只列出 sector 为空的)
"688639": {
"sector": "化工/生物制造",
"business": "生物法丙氨酸/缬氨酸等氨基酸产品,合成生物学平台技术"
},
# === 自选股(需要补全)===
# A股
"002594": {
"sector": "新能源汽车",
"business": "新能源整车(乘用车/商用车),动力电池(弗迪电池),半导体(比亚迪半导体)"
},
"688795": {
"sector": "半导体/GPU",
"business": "国产GPU芯片设计,AI训练/推理芯片,图形渲染芯片"
},
"688802": {
"sector": "半导体/GPU",
"business": "国产GPU芯片设计,图形渲染/通用计算芯片"
},
"300548": {
"sector": "光通信/光器件",
"business": "光无源器件(分路器/波分复用),光有源器件,数据中心光互联"
},
"300124": {
"sector": "工控自动化",
"business": "工业自动化(伺服系统/PLC/变频器),新能源汽车电驱系统"
},
"688981": {
"sector": "半导体/晶圆代工",
"business": "集成电路晶圆代工,先进制程(14nm/28nm及以上),成熟制程"
},
"001309": {
"sector": "半导体/存储",
"business": "存储芯片(闪存主控/NAND/DRAM模组),嵌入式存储解决方案"
},
# 港股
"01888": {
"sector": "电子/覆铜板",
"business": "覆铜板(CCL)全球龙头,印刷线路板(PCB),玻璃纤维布"
},
"01088": {
"sector": "煤炭/能源",
"business": "煤炭开采(动力煤/焦煤),煤化工,铁路/港口运输"
},
"09868": {
"sector": "新能源汽车",
"business": "智能电动汽车(SUV/轿车),自动驾驶技术(XNGP),飞行汽车"
},
"02359": {
"sector": "医药/CRO",
"business": "小分子药物发现/临床前CRO,化学药/生物药CDMO"
},
"02628": {
"sector": "保险",
"business": "人身保险(寿险/健康险/意外险),养老保险"
},
"00968": {
"sector": "新能源/光伏",
"business": "光伏玻璃全球龙头,太阳能发电站运营,EVA胶膜"
},
"06869": {
"sector": "通信/光缆",
"business": "光纤预制棒/光纤/光缆全球龙头,通信线缆,数据中心"
},
"02318": {
"sector": "金融/保险",
"business": "综合金融(保险/银行/证券/信托),科技金融"
},
"01070": {
"sector": "消费电子/家电",
"business": "电视机/显示器全球出货前列,光伏储能,智能家居"
},
}
def load_profiles():
with open(PROFILES_PATH, "r", encoding="utf-8") as f:
return json.load(f)
def save_profiles(data):
# 按 code 排序
data["profiles"].sort(key=lambda p: p["code"])
with open(PROFILES_PATH, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"写入 {PROFILES_PATH}")
def fill_profiles():
data = load_profiles()
profiles = data.get("profiles", [])
changed = 0
errors = 0
for p in profiles:
code = p.get("code", "")
name = p.get("name", "")
market = p.get("market", "")
current_sector = p.get("sector", "").strip()
current_business = p.get("business", "").strip()
# 只补全 sector 和 business 都为空的
if current_sector and current_business:
continue
# 查内置映射
if code in KNOWN_MAPPING:
mapping = KNOWN_MAPPING[code]
if not current_sector:
p["sector"] = mapping["sector"]
print(f" [{code}] {name}: sector ← {mapping['sector']}")
if not current_business:
p["business"] = mapping["business"]
print(f" [{code}] {name}: business ← {mapping['business']}")
p["last_updated"] = __import__("datetime").datetime.now().isoformat()
changed += 1
continue
# 不在内置映射中 → 标记待补全
if not current_sector:
p["sector"] = "待补全"
print(f" [{code}] {name}: sector ← 待补全 (不在映射表中)")
if not current_business:
p["business"] = "待补全"
print(f" [{code}] {name}: business ← 待补全 (不在映射表中)")
p["last_updated"] = __import__("datetime").datetime.now().isoformat()
errors += 1
if changed > 0 or errors > 0:
save_profiles(data)
print(f"\n共补全 {changed} 只,标记待补全 {errors}")
else:
print("无变更")
def list_status():
"""仅输出状态,不修改"""
data = load_profiles()
profiles = data.get("profiles", [])
filled = [p for p in profiles if p.get("sector", "").strip() and p.get("sector") != "待补全"]
empty_sector = [p for p in profiles if not p.get("sector", "").strip() or p.get("sector") == "待补全"]
empty_biz = [p for p in profiles if not p.get("business", "").strip() or p.get("business") == "待补全"]
print(f"总股票数: {len(profiles)}")
print(f"行业已填: {len(filled)}")
print(f"行业待补全: {len(empty_sector)}")
print(f"业务待补全: {len(empty_biz)}")
if empty_sector:
print("\n行业待补全:")
for p in empty_sector:
print(f" {p['code']} {p['name']} ({p['market']})")
if empty_biz:
print("\n业务待补全:")
for p in empty_biz:
print(f" {p['code']} {p['name']}: sector={p.get('sector','?')}")
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "--status":
list_status()
else:
fill_profiles()