Files
MoFin/venv/lib/python3.12/site-packages/alphasift/audit.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

192 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""Project and strategy audit helpers."""
from __future__ import annotations
from collections import Counter
from pathlib import Path
from alphasift.config import Config
from alphasift.filter import requires_daily_features
from alphasift.strategy import load_all_strategies
def audit_project(strategies_dir: Path | None = None) -> dict[str, object]:
"""Return a structured audit of current strategy coverage and known gaps."""
if strategies_dir is None:
strategies_dir = Config.from_env().strategies_dir
strategies = load_all_strategies(strategies_dir)
strategy_items = list(strategies.values())
categories = Counter(strategy.category for strategy in strategy_items)
profile_fields = [
"scoring_profile",
"risk_profile",
"portfolio_profile",
"scorecard_profile",
]
profile_coverage = {
field: {
"configured": sum(1 for strategy in strategy_items if getattr(strategy.screening, field)),
"missing": [
strategy.name
for strategy in strategy_items
if not getattr(strategy.screening, field)
],
}
for field in profile_fields
}
strategy_findings = []
for strategy in strategy_items:
strategy_findings.extend(_audit_strategy(strategy))
project_gaps = _known_project_gaps()
strengths = _current_strengths()
return {
"project": "alphasift",
"positioning": "全市场候选发现与 LLM 横向排序引擎",
"strategy_count": len(strategy_items),
"categories": dict(sorted(categories.items())),
"profile_coverage": profile_coverage,
"strengths": strengths,
"strategy_findings": strategy_findings,
"project_gaps": project_gaps,
"next_priorities": _next_priorities(strategy_findings, project_gaps),
}
def _audit_strategy(strategy) -> list[dict[str, str]]:
findings: list[dict[str, str]] = []
screening = strategy.screening
hard_filters = screening.hard_filters
uses_daily = requires_daily_features(hard_filters)
tags = {str(tag).lower() for tag in strategy.tags}
is_shape_strategy = strategy.category in {"trend", "pattern"} or bool(
tags & {"breakout", "pullback", "daily_k"}
)
if is_shape_strategy and not uses_daily:
findings.append({
"severity": "warn",
"strategy": strategy.name,
"area": "l1_shape_validation",
"message": "趋势/形态策略尚未声明日 K 硬条件,可能只是在快照上近似形态。",
"recommendation": "补充 require_ma_bullish、price_above_ma20、change_60d 或更严格的形态特征。",
})
if not screening.ranking_hints.strip():
findings.append({
"severity": "warn",
"strategy": strategy.name,
"area": "llm_ranking",
"message": "缺少 ranking_hintsLLM 无法获得策略级取舍偏好。",
"recommendation": "补充候选排序优先级、风险排除条件和组合分散要求。",
})
for field in ("scoring_profile", "risk_profile", "portfolio_profile", "scorecard_profile"):
if not getattr(screening, field):
findings.append({
"severity": "info",
"strategy": strategy.name,
"area": "configurability",
"message": f"未配置 {field},将使用默认规则。",
"recommendation": "如该策略风格与默认基线不同,应在 YAML 中显式配置。",
})
if screening.max_output > 10:
findings.append({
"severity": "info",
"strategy": strategy.name,
"area": "output",
"message": "max_output 较大,L3/LLM 输出可能变成列表而不是精选候选。",
"recommendation": "保持最终输出精简,更多候选可通过保存 run 和后续评估管理。",
})
return findings
def _current_strengths() -> list[dict[str, str]]:
return [
{
"area": "positioning",
"message": "定位在全市场候选发现,位于 DSA 单股深度分析和通知助手上游。",
},
{
"area": "llm",
"message": "LLM 只在候选池内做结构化横向排序,支持按 code 注入文件线索或抓取 Top K 新闻/公告/资金流,并输出 thesis、sector、theme、risk、watch_items 和 invalidators。",
},
{
"area": "risk",
"message": "风险层和组合分散层独立于 alpha 排序,扣分字段可审计。",
},
{
"area": "configurability",
"message": "策略 YAML 可覆盖评分曲线、风险阈值、组合风险桶和 scorecard 规则。",
},
{
"area": "evaluation",
"message": "支持保存运行、单次 T+N 最新快照评估、可选日 K 路径最大回撤/最大浮盈,以及按单只和等权组合维度聚合最近 runs。",
},
{
"area": "daily_shape",
"message": "日 K 增强已支持 20 日突破幅度、区间振幅、20 日量能比、实体强度、MA20 回踩距离和平台持续天数。",
},
{
"area": "data",
"message": "行业/概念锚点支持本地映射文件、可选 AkShare 板块反查、板块热度分、主题热度摘要,以及 history JSONL 滚动趋势、持续性、降温、状态回填和异常热度值过滤。",
},
{
"area": "shape_validation",
"message": "T+N 评估会对突破、MA20 回踩和平台整理候选打形态后验标签,用于发现突破延续或失败回落。",
},
]
def _known_project_gaps() -> list[dict[str, str]]:
return [
{
"severity": "gap",
"area": "data",
"message": "行业/概念和板块热度已能进入 LLM 上下文与 theme_heat 因子,industry-cache 会写 metadata/history sidecar 并回填滚动趋势、持续性和降温信号,但数据源仍偏本地/AkShare。",
"recommendation": "补多源映射、板块层级口径归一和更完整的数据质量报告。",
},
{
"severity": "gap",
"area": "news_context",
"message": "LLM 上下文支持候选级文件注入、Top K 抓取、缓存、基础去重、来源置信度、来源权重分、压缩摘要、公告类别和粗粒度事件/负面风险标签;策略可配置事件偏好和来源权重,但事件权重尚未进入可学习评估闭环。",
"recommendation": "将事件类型偏好纳入 T+N 归因统计,补更细公告分类和来源质量评估。",
},
{
"severity": "gap",
"area": "backtest",
"message": "当前 T+N 批量评估覆盖多维聚合、交易成本扣减、可选日 K 路径指标和等权组合摘要,但不是复权、持仓约束和调仓约束完整回测。",
"recommendation": "增加持仓约束和组合层面的逐日权益曲线;完整研究后端可接 Qlib/Backtrader。",
},
{
"severity": "gap",
"area": "market_scope",
"message": "当前主链只支持 A 股,尚未抽象港股/美股字段映射和交易日历。",
"recommendation": "在字段标准化稳定后再扩展多市场,不要先把 L1 规则泛化过度。",
},
{
"severity": "gap",
"area": "shape_validation",
"message": "突破、回踩等形态策略已有平台、20 日形态特征、T+N 后验标签和可选日 K 路径指标,但还没有前高压力密集度和盘中失效条件。",
"recommendation": "补充前高压力密集度和盘中止损约束。",
},
]
def _next_priorities(strategy_findings: list[dict[str, str]], project_gaps: list[dict[str, str]]) -> list[str]:
priorities = [
"给行业/概念 history JSONL 增加板块层级口径归一和更完整的数据质量报告。",
"将候选上下文事件类型偏好纳入 T+N 归因统计,补更细公告分类和来源质量评估。",
"给批量 T+N 评估补持仓约束和组合层面的逐日权益曲线。",
]
if any(item["area"] == "l1_shape_validation" for item in strategy_findings):
priorities.insert(0, "补强趋势/形态策略的日 K 结构验证,尤其是放量突破类策略。")
if any(item["area"] == "backtest" for item in project_gaps):
priorities.append("后续再考虑接入 backtrader/qlib 这类完整回测框架,而不是在主链里硬塞。")
return priorities