aa0f740381
完整数据采集+分析管道: - market_watch.py:90行业板块采集(同花顺/东方财富) - 市场精选推荐 cron:全市场分析+候选池+星级推荐 - price_monitor.py:持仓/自选高频价格监控 - refresh_mtf_cache.py:多周期K线缓存 - 策略评估/知识萃取管道 文档:docs/ 含完整需求+架构设计 注意:尚未配置 git remote,笑笑接手后自行配置
366 lines
12 KiB
Python
366 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""collect_evaluation_data.py — 六维评估原始数据采集
|
|
|
|
纯数据收集脚本(no_agent),不做任何评估/判断/RR计算。
|
|
输出:data/evaluation_input.json — 供 21:00 LLM cron 使用。
|
|
|
|
采集内容:
|
|
D1 宏观环境 — 五大指数(上证/深证/恒生/恒科/A50)
|
|
D2 行业表现 — 持仓+自选按行业分组
|
|
D3 技术面(当前) — 今开/今高/今低/昨收/现价/成交量
|
|
D4 基本面 — PE/PB/总市值/52周高/52周低
|
|
D5 消息面 — (此脚本不采集,LLM cron web_search)
|
|
D6 资金面 — 成交额/换手率/量比
|
|
|
|
日期:2026-06-18 v1 — 初始版本
|
|
"""
|
|
|
|
import json
|
|
import urllib.request
|
|
import os
|
|
import sys
|
|
import re
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
DATA_DIR = Path(__file__).parent / "data"
|
|
DECISIONS_PATH = DATA_DIR / "decisions.json"
|
|
PORTFOLIO_PATH = DATA_DIR / "portfolio.json"
|
|
PROFILES_PATH = DATA_DIR / "stock_profiles.json"
|
|
OUTPUT_PATH = DATA_DIR / "evaluation_input.json"
|
|
|
|
UA = "Mozilla/5.0"
|
|
|
|
|
|
def load_json(path, default=None):
|
|
try:
|
|
with open(path, encoding="utf-8") as f:
|
|
return json.load(f)
|
|
except (FileNotFoundError, json.JSONDecodeError):
|
|
return {} if default is None else default
|
|
|
|
|
|
def save_json(path, data):
|
|
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
def fetch_tencent_data(symbols):
|
|
"""批量拉腾讯行情,返回 {code: fields_dict}"""
|
|
if not symbols:
|
|
return {}
|
|
code_map = {}
|
|
query_symbols = []
|
|
for c in symbols:
|
|
sym = f"hk{c}" if len(c) == 5 else f"sh{c}" if c.startswith(("5", "6", "9")) else f"sz{c}"
|
|
query_symbols.append(sym)
|
|
code_map[sym] = c
|
|
url = f"http://qt.gtimg.cn/q={','.join(query_symbols)}"
|
|
try:
|
|
req = urllib.request.Request(url, headers={"User-Agent": UA})
|
|
resp = urllib.request.urlopen(req, timeout=15)
|
|
text = resp.read().decode("gbk")
|
|
except Exception as e:
|
|
print(f"行情拉取失败: {e}", file=sys.stderr)
|
|
return {}
|
|
result = {}
|
|
for line in text.strip().split("\n"):
|
|
line = line.strip()
|
|
if not line or "=" not in line:
|
|
continue
|
|
raw = line.split("=", 1)[1].strip().strip('"').strip(";")
|
|
fields = raw.split("~")
|
|
if len(fields) < 35:
|
|
continue
|
|
sym = line.split("=", 1)[0].strip().lstrip("v_")
|
|
orig = code_map.get(sym)
|
|
if not orig:
|
|
continue
|
|
# 统一格式(A股和港股字段长度不同)
|
|
result[orig] = fields
|
|
return result
|
|
|
|
|
|
def fetch_indices():
|
|
"""拉五大指数"""
|
|
index_codes = {
|
|
"sh000001": "上证指数",
|
|
"sz399001": "深证成指",
|
|
"sz399006": "创业板指",
|
|
"hkHSI": "恒生指数",
|
|
"hkHSTECH": "恒生科技",
|
|
}
|
|
idx_map = {}
|
|
for c, n in index_codes.items():
|
|
sym = c # 已经是完整符号
|
|
idx_map[sym] = n
|
|
url = f"http://qt.gtimg.cn/q={','.join(index_codes.keys())}"
|
|
try:
|
|
req = urllib.request.Request(url, headers={"User-Agent": UA})
|
|
resp = urllib.request.urlopen(req, timeout=10)
|
|
text = resp.read().decode("gbk")
|
|
except Exception as e:
|
|
print(f"指数拉取失败: {e}", file=sys.stderr)
|
|
return {}
|
|
result = {}
|
|
for line in text.strip().split("\n"):
|
|
line = line.strip()
|
|
if not line or "=" not in line:
|
|
continue
|
|
raw = line.split("=", 1)[1].strip().strip('"').strip(";")
|
|
fields = raw.split("~")
|
|
if len(fields) < 33:
|
|
continue
|
|
sym = line.split("=", 1)[0].strip().lstrip("v_")
|
|
name = idx_map.get(sym, sym)
|
|
result[name] = {
|
|
"price": safe_float(fields[3]),
|
|
"prev_close": safe_float(fields[4]),
|
|
"change_pct": safe_float(fields[32]),
|
|
"high": safe_float(fields[33]),
|
|
"low": safe_float(fields[34]),
|
|
"timestamp": fields[30] if len(fields) > 30 else "",
|
|
}
|
|
return result
|
|
|
|
|
|
def safe_float(v):
|
|
try:
|
|
return float(v) if v else None
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
|
|
def parse_stock_data(code, fields, is_hk=False):
|
|
"""从腾讯 API 字段解析为结构化数据"""
|
|
data = {
|
|
"code": code,
|
|
"name": fields[1] if len(fields) > 1 else code,
|
|
"price": safe_float(fields[3]),
|
|
"prev_close": safe_float(fields[4]),
|
|
"open": safe_float(fields[5]) if not is_hk else None,
|
|
"change_pct": safe_float(fields[32]),
|
|
"high": safe_float(fields[33]),
|
|
"low": safe_float(fields[34]),
|
|
"volume": safe_float(fields[6]), # 股数
|
|
}
|
|
# A股特有字段 (index 35+)
|
|
if not is_hk and len(fields) > 46:
|
|
data["turnover_rate"] = safe_float(fields[38]) # 换手率%
|
|
data["pe"] = safe_float(fields[47]) # 市盈率(动)
|
|
data["total_market_cap"] = safe_float(fields[44]) # 总市值(亿)
|
|
data["circulating_market_cap"] = safe_float(fields[45]) # 流通市值(亿)
|
|
data["high_52w"] = safe_float(fields[48]) # 52周高
|
|
data["low_52w"] = safe_float(fields[49]) # 52周低
|
|
data["amplitude"] = safe_float(fields[43]) # 振幅%
|
|
# 港股特有字段
|
|
if is_hk and len(fields) > 70:
|
|
# 港股 PE 在 [71] 左右
|
|
data["pe"] = safe_float(fields[71])
|
|
data["total_market_cap"] = safe_float(fields[69])
|
|
data["high_52w"] = safe_float(fields[48])
|
|
data["low_52w"] = safe_float(fields[49])
|
|
return data
|
|
|
|
|
|
def get_sector_mapping(profiles, decisions):
|
|
"""
|
|
从 stock_profiles.json 和 decisions.json 建立
|
|
{code: {name, sector, business, market, type}} 映射
|
|
"""
|
|
mapping = {}
|
|
# 先读 stock_profiles
|
|
profile_list = profiles.get("profiles", []) if isinstance(profiles, dict) else profiles
|
|
if isinstance(profile_list, list):
|
|
for p in profile_list:
|
|
code = p.get("code", "")
|
|
if code:
|
|
mapping[code] = {
|
|
"name": p.get("name", ""),
|
|
"sector": p.get("sector", ""),
|
|
"business": p.get("business", ""),
|
|
"market": p.get("market", ""),
|
|
"type": p.get("type", ""),
|
|
}
|
|
# 再补全 decisions.json 中的信息
|
|
for d in decisions.get("decisions", []):
|
|
code = d.get("code", "")
|
|
if code and code not in mapping:
|
|
trig = d.get("trigger", {})
|
|
mapping[code] = {
|
|
"name": d.get("name", code),
|
|
"sector": trig.get("sector_name", d.get("sector_name", "")),
|
|
"business": "",
|
|
"market": "港股" if len(code) == 5 else "A股",
|
|
"type": d.get("type", "持仓策略"),
|
|
}
|
|
return mapping
|
|
|
|
|
|
def get_portfolio_info(portfolio):
|
|
"""建立 {code: {cost, shares, position_pct}} 映射"""
|
|
result = {}
|
|
for h in portfolio.get("holdings", []):
|
|
code = h.get("code", "")
|
|
result[code] = {
|
|
"cost": h.get("cost", 0),
|
|
"shares": h.get("shares", 0),
|
|
"position_pct": h.get("position_pct", 0),
|
|
}
|
|
return result
|
|
|
|
|
|
def get_decisions_info(decisions):
|
|
"""提取 decisions.json 中的策略参数"""
|
|
return decisions.get("decisions", [])
|
|
|
|
|
|
def run():
|
|
# 加载数据
|
|
decisions = load_json(DECISIONS_PATH, {"decisions": []})
|
|
portfolio = load_json(PORTFOLIO_PATH, {"holdings": []})
|
|
profiles = load_json(PROFILES_PATH, {"profiles": []})
|
|
|
|
# 获取行业映射
|
|
sector_mapping = get_sector_mapping(profiles, decisions)
|
|
|
|
# 获取持仓信息
|
|
portfolio_info = get_portfolio_info(portfolio)
|
|
|
|
# 收集所有代码
|
|
all_codes = set()
|
|
for d in decisions.get("decisions", []):
|
|
code = d.get("code", "")
|
|
if code:
|
|
all_codes.add(code)
|
|
for h in portfolio.get("holdings", []):
|
|
code = h.get("code", "")
|
|
if code:
|
|
all_codes.add(code)
|
|
|
|
# 区分 A/H 股
|
|
a_codes = [c for c in all_codes if len(c) != 5]
|
|
hk_codes = [c for c in all_codes if len(c) == 5]
|
|
|
|
# 拉行情
|
|
a_prices = fetch_tencent_data(a_codes) if a_codes else {}
|
|
hk_prices = fetch_tencent_data(hk_codes) if hk_codes else {}
|
|
|
|
# 拉指数
|
|
index_data = fetch_indices()
|
|
|
|
# 解析个股数据
|
|
stock_data = {}
|
|
for code in a_codes:
|
|
if code in a_prices:
|
|
stock_data[code] = parse_stock_data(code, a_prices[code], is_hk=False)
|
|
for code in hk_codes:
|
|
if code in hk_prices:
|
|
stock_data[code] = parse_stock_data(code, hk_prices[code], is_hk=True)
|
|
|
|
# 组装输出
|
|
stocks = []
|
|
all_codes_sorted = sorted(all_codes)
|
|
|
|
for code in all_codes_sorted:
|
|
raw = stock_data.get(code, {})
|
|
sector_info = sector_mapping.get(code, {})
|
|
port = portfolio_info.get(code, {})
|
|
strategy = None
|
|
for d in decisions.get("decisions", []):
|
|
if d.get("code") == code:
|
|
trig = d.get("trigger", {})
|
|
strategy = {
|
|
"action": trig.get("action", d.get("action", "")),
|
|
"entry_zone": trig.get("entry_zone", ""),
|
|
"stop_loss": trig.get("stop_loss", d.get("stop_loss", "")),
|
|
"take_profit": trig.get("take_profit", d.get("take_profit", "")),
|
|
"type": d.get("type", "持仓策略"),
|
|
"tech_snapshot": trig.get("tech_snapshot", d.get("tech_snapshot", "")),
|
|
}
|
|
break
|
|
|
|
stock_entry = {
|
|
"code": code,
|
|
"name": raw.get("name", sector_info.get("name", code)),
|
|
"market": "港股" if len(code) == 5 else "A股",
|
|
"type": sector_info.get("type", "持仓策略"),
|
|
"sector": sector_info.get("sector", ""),
|
|
"business": sector_info.get("business", ""),
|
|
# 当天行情
|
|
"price": raw.get("price"),
|
|
"prev_close": raw.get("prev_close"),
|
|
"open": raw.get("open"),
|
|
"high": raw.get("high"),
|
|
"low": raw.get("low"),
|
|
"change_pct": raw.get("change_pct"),
|
|
"volume": raw.get("volume"),
|
|
# 基本面
|
|
"pe": raw.get("pe"),
|
|
"total_market_cap": raw.get("total_market_cap"),
|
|
"high_52w": raw.get("high_52w"),
|
|
"low_52w": raw.get("low_52w"),
|
|
"turnover_rate": raw.get("turnover_rate"),
|
|
"amplitude": raw.get("amplitude"),
|
|
# 持仓
|
|
"cost": port.get("cost", 0),
|
|
"shares": port.get("shares", 0),
|
|
"position_pct": port.get("position_pct", 0),
|
|
# 现策略
|
|
"strategy": strategy,
|
|
}
|
|
# 浮亏%
|
|
cost = port.get("cost", 0)
|
|
price = raw.get("price", 0)
|
|
if cost > 0 and price > 0:
|
|
stock_entry["pnl_pct"] = round((price - cost) / cost * 100, 2)
|
|
else:
|
|
stock_entry["pnl_pct"] = None
|
|
|
|
stocks.append(stock_entry)
|
|
|
|
# 按行业分组统计
|
|
sector_groups = {}
|
|
for s in stocks:
|
|
sector = s.get("sector", "未分类")
|
|
if sector not in sector_groups:
|
|
sector_groups[sector] = []
|
|
sector_groups[sector].append({
|
|
"code": s["code"],
|
|
"name": s["name"],
|
|
"change_pct": s["change_pct"],
|
|
"pnl_pct": s["pnl_pct"],
|
|
"type": s["type"],
|
|
})
|
|
|
|
# 汇总
|
|
total = len(stocks)
|
|
up_count = sum(1 for s in stocks if s["change_pct"] is not None and s["change_pct"] > 0)
|
|
down_count = sum(1 for s in stocks if s["change_pct"] is not None and s["change_pct"] < 0)
|
|
deep_loss = sum(1 for s in stocks if s["pnl_pct"] is not None and s["pnl_pct"] < -20)
|
|
|
|
output = {
|
|
"collected_at": datetime.now().isoformat(),
|
|
"total_stocks": total,
|
|
"summary": {
|
|
"up_count": up_count,
|
|
"down_count": down_count,
|
|
"deep_loss_count": deep_loss,
|
|
"holdings_count": len(portfolio_info),
|
|
"watchlist_count": total - len(portfolio_info),
|
|
},
|
|
"index_data": index_data,
|
|
"sector_groups": sector_groups,
|
|
"stocks": stocks,
|
|
}
|
|
|
|
save_json(OUTPUT_PATH, output)
|
|
print(f"数据收集完成: {total}只股票, {len(index_data)}个指数, {len(sector_groups)}个行业分组")
|
|
print(f" 上涨{up_count} 下跌{down_count} 深套{deep_loss}")
|
|
print(f" 输出: {OUTPUT_PATH}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run()
|