Files
MoFin/venv/lib/python3.12/site-packages/alphasift/pipeline.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

675 lines
29 KiB
Python

# -*- coding: utf-8 -*-
"""Main pipeline — orchestrates L1 → L2 → result."""
import logging
import uuid
from pathlib import Path
import pandas as pd
from alphasift.config import Config
from alphasift.candidate_context import collect_candidate_context
from alphasift.context import build_llm_context
from alphasift.daily import enrich_daily_features
from alphasift.dsa_provider import apply_dsa_provider_context
from alphasift.filter import (
apply_hard_filters,
hard_filter_rejection_summary,
hard_filter_waterfall,
requires_daily_features,
without_daily_filters,
)
from alphasift.industry import enrich_industry_concepts
from alphasift.models import Pick, ScreenResult
from alphasift.normalize import (
normalize_code,
safe_bool as _safe_bool,
safe_float as _safe_float,
safe_int as _safe_int,
safe_text,
)
from alphasift.post_analysis import normalize_post_analyzers, run_post_analyzers
from alphasift.ranker import rank_candidates_with_metadata
from alphasift.risk import apply_portfolio_overlay, apply_risk_overlay
from alphasift.scorer import compute_screen_scores, factor_score_columns
from alphasift.snapshot import fetch_snapshot_with_fallback
from alphasift.strategy import load_all_strategies
logger = logging.getLogger(__name__)
def screen(
strategy: str,
*,
market: str = "cn",
max_output: int | None = None,
use_llm: bool = True,
llm_context: str | None = None,
llm_context_files: list[str | Path] | None = None,
candidate_context_files: list[str | Path] | None = None,
collect_llm_candidate_context: bool | None = None,
candidate_context_max_candidates: int | None = None,
candidate_context_providers: list[str] | None = None,
industry_map_files: list[str | Path] | None = None,
industry_provider: str | None = None,
post_analyzers: list[str] | None = None,
post_analysis_max_picks: int | None = None,
daily_enrich: bool | None = None,
daily_enrich_max_candidates: int | None = None,
explain_filters: bool = False,
deep_analysis: bool = False,
deep_analysis_max_picks: int | None = None,
context: dict[str, object] | None = None,
config: Config | None = None,
) -> ScreenResult:
"""Execute stock screening with the given strategy.
Args:
strategy: Strategy name (matches a YAML file in strategies/).
market: Market scope, currently only "cn".
max_output: Override max output count from strategy.
use_llm: Whether to use LLM for L2 ranking.
llm_context: Optional market/news/theme context supplied to the LLM ranker.
llm_context_files: Optional text files appended to LLM context.
candidate_context_files: Optional CSV/JSON/JSONL files keyed by code with candidate-level context.
collect_llm_candidate_context: Whether to fetch Top-K candidate news/fund-flow context for LLM.
candidate_context_max_candidates: Max candidates to fetch external context for.
candidate_context_providers: Optional provider names: news, fund_flow, announcement.
industry_map_files: Optional code->industry/concepts files used before L1/L2.
industry_provider: Optional provider for board mapping, e.g. "akshare".
post_analyzers: Optional L3 analyzers, e.g. ["scorecard", "dsa"].
post_analysis_max_picks: Override max number of picks sent to post analyzers.
daily_enrich: Whether to enrich shortlisted candidates with daily K-line features.
daily_enrich_max_candidates: Max candidates to enrich after snapshot filtering.
explain_filters: Whether to include sequential hard-filter waterfall diagnostics.
deep_analysis: Backward-compatible alias for post_analyzers=["dsa"].
deep_analysis_max_picks: Backward-compatible max-picks alias for DSA.
context: Optional host runtime context. DSA may provide LLM settings and
callable data providers under context["dsa"].
config: Runtime config. Defaults to Config.from_env().
Returns:
ScreenResult with ranked picks.
"""
if config is None:
config = Config.from_env()
if market not in ("cn", "us"):
raise ValueError(f"Unsupported market: {market!r} (supported: cn, us)")
run_id = uuid.uuid4().hex[:12]
degradation: list[str] = []
# 1. Load strategy
strategies = load_all_strategies(config.strategies_dir)
if strategy not in strategies:
available = ", ".join(strategies.keys()) or "(none)"
raise ValueError(f"Strategy '{strategy}' not found. Available: {available}")
strat = strategies[strategy]
screening = strat.screening
if market not in screening.market_scope:
raise ValueError(
f"Strategy '{strategy}' does not support market '{market}'. "
f"Supported: {', '.join(screening.market_scope)}"
)
output_count = max_output or screening.max_output
analyzer_names = normalize_post_analyzers(
post_analyzers if post_analyzers is not None else config.post_analyzers
)
if deep_analysis and "dsa" not in analyzer_names:
analyzer_names.append("dsa")
analyzer_max_picks = (
post_analysis_max_picks
or deep_analysis_max_picks
)
daily_needed = requires_daily_features(screening.hard_filters)
daily_requested = config.daily_enrich_enabled if daily_enrich is None else daily_enrich
daily_limit = daily_enrich_max_candidates or config.daily_enrich_max_candidates
snapshot_filters = without_daily_filters(screening.hard_filters) if daily_needed else screening.hard_filters
# 2. Fetch snapshot
snapshot_df = fetch_snapshot_with_fallback(
config.snapshot_source_priority,
required_columns=_required_snapshot_columns(snapshot_filters),
fallback_snapshot_path=config.fallback_snapshot_path,
fallback_max_age_hours=config.snapshot_fallback_max_age_hours,
market=market,
)
effective_industry_map_files = (
list(industry_map_files)
if industry_map_files is not None
else list(config.industry_map_files)
)
effective_industry_provider = (
industry_provider
if industry_provider is not None
else config.industry_provider
)
effective_industry_provider = str(effective_industry_provider or "none").strip().lower()
if effective_industry_map_files or effective_industry_provider not in {"", "none", "off", "false"}:
snapshot_df, industry_notes = enrich_industry_concepts(
snapshot_df,
map_files=effective_industry_map_files,
provider=effective_industry_provider,
max_boards=config.industry_provider_max_boards,
provider_cache_dir=config.industry_provider_cache_dir,
provider_cache_ttl_hours=config.industry_provider_cache_ttl_hours,
)
degradation.extend(f"Industry/concepts enrichment: {item}" for item in industry_notes)
snapshot_count = len(snapshot_df)
snapshot_source = str(snapshot_df.attrs.get("snapshot_source", ""))
source_errors = [str(item) for item in snapshot_df.attrs.get("source_errors", [])]
degradation.extend(f"Snapshot source fallback: {item}" for item in source_errors)
if bool(snapshot_df.attrs.get("fallback_used")):
stale_age = snapshot_df.attrs.get("stale_age_hours")
if stale_age is None:
degradation.append("Snapshot source fallback: last_good_cache stale")
else:
degradation.append(
f"Snapshot source fallback: last_good_cache stale_age_hours={stale_age}"
)
# 3. L1 hard filter. If a strategy needs daily features, first apply only
# snapshot-safe filters, then enrich a narrowed candidate pool.
if explain_filters:
snapshot_waterfall = hard_filter_waterfall(snapshot_df, snapshot_filters)
if snapshot_waterfall:
degradation.append(
"Snapshot hard-filter waterfall: "
+ _format_filter_waterfall(snapshot_waterfall)
)
df = apply_hard_filters(snapshot_df, snapshot_filters)
after_filter_count = len(df)
if df.empty:
return ScreenResult(
strategy=strategy,
market=market,
snapshot_count=snapshot_count,
after_filter_count=0,
run_id=run_id,
degradation=[*degradation, "No candidates after hard filter"],
snapshot_source=snapshot_source,
source_errors=source_errors,
strategy_version=strat.version,
strategy_category=strat.category,
post_analyzers=analyzer_names,
daily_enriched=False,
risk_enabled=config.risk_enabled,
portfolio_diversity_enabled=config.portfolio_diversity_enabled,
)
daily_enriched = False
daily_enrich_count = 0
if daily_needed or daily_requested:
provisional = _sort_screened_candidates(compute_screen_scores(df, screening), screening)
enrich_count = min(daily_limit, len(provisional))
daily_candidates = provisional.head(enrich_count)
try:
enriched = enrich_daily_features(
daily_candidates,
max_rows=enrich_count,
lookback_days=config.daily_lookback_days,
source=config.daily_source,
fetch_retries=config.daily_fetch_retries,
max_workers=config.daily_fetch_max_workers,
)
daily_enriched = True
daily_errors = [str(item) for item in enriched.attrs.get("daily_errors", [])]
daily_enrich_count = int(enriched.attrs.get("daily_success_count", len(enriched)))
daily_source_counts = dict(enriched.attrs.get("daily_source_counts", {}) or {})
daily_quality_flag_counts = dict(enriched.attrs.get("daily_quality_flag_counts", {}) or {})
daily_source_order_notes = [str(item) for item in enriched.attrs.get("daily_source_order_notes", [])]
daily_source_health_notes = _daily_source_health_notes(
dict(enriched.attrs.get("daily_source_health", {}) or {})
)
degradation.append(
f"Daily K-line enrichment attempted {enrich_count} candidates, "
f"succeeded {daily_enrich_count} of {after_filter_count} snapshot-filtered candidates"
)
if daily_source_counts:
source_summary = ", ".join(
f"{name}={count}" for name, count in sorted(daily_source_counts.items())
)
degradation.append(f"Daily K-line sources: {source_summary}")
if daily_quality_flag_counts:
flag_summary = ", ".join(
f"{name}={count}" for name, count in sorted(daily_quality_flag_counts.items())
)
degradation.append(f"Daily K-line quality flags: {flag_summary}")
if daily_source_order_notes:
degradation.append("Daily K-line source ordering: " + " | ".join(daily_source_order_notes[:3]))
if daily_source_health_notes:
degradation.append("Daily K-line source health: " + "; ".join(daily_source_health_notes))
if daily_errors:
sample = " | ".join(daily_errors[:5])
suffix = f" | +{len(daily_errors) - 5} more" if len(daily_errors) > 5 else ""
degradation.append(f"Daily K-line enrichment row errors: {sample}{suffix}")
if daily_needed:
if explain_filters:
daily_waterfall = hard_filter_waterfall(enriched, screening.hard_filters)
if daily_waterfall:
degradation.append(
"Daily hard-filter waterfall: "
+ _format_filter_waterfall(daily_waterfall)
)
daily_filter_rejections = hard_filter_rejection_summary(
enriched,
screening.hard_filters,
limit=6,
)
df = apply_hard_filters(enriched, screening.hard_filters)
after_filter_count = len(df)
if daily_filter_rejections:
degradation.append(
"Daily hard-filter rejections: "
+ "; ".join(daily_filter_rejections)
)
else:
df = enriched
except Exception as exc:
if daily_needed:
raise RuntimeError(
"Daily K-line enrichment is required by this strategy but failed: "
f"{exc}"
) from exc
degradation.append(f"Daily K-line enrichment skipped: {exc}")
if df.empty:
return ScreenResult(
strategy=strategy,
market=market,
strategy_version=strat.version,
strategy_category=strat.category,
snapshot_count=snapshot_count,
after_filter_count=0,
run_id=run_id,
degradation=[*degradation, "No candidates after daily hard filter"],
snapshot_source=snapshot_source,
source_errors=source_errors,
post_analyzers=analyzer_names,
daily_enriched=daily_enriched,
daily_enrich_count=daily_enrich_count,
risk_enabled=config.risk_enabled,
portfolio_diversity_enabled=config.portfolio_diversity_enabled,
)
# 4. Compute screen_score
df = _sort_screened_candidates(compute_screen_scores(df, screening), screening)
# 5. Take Top K for LLM ranking
top_k = min(
max(output_count * config.llm_candidate_multiplier, output_count),
config.llm_max_candidates,
len(df),
)
df_top = df.head(top_k)
# 6. Build Pick list
picks = _df_to_picks(df_top)
# 6.5. Host-provided candidate context, e.g. DSA realtime quote,
# fundamentals, and news. This runs before LLM ranking so L2 can use it.
degradation.extend(apply_dsa_provider_context(picks, context))
# 7. L2 LLM ranking
llm_ranked = False
llm_market_view = ""
llm_selection_logic = ""
llm_portfolio_risk = ""
llm_coverage: float | None = None
llm_parse_errors: list[str] = []
if use_llm and config.has_llm_config():
candidate_context_rows: list[dict[str, object]] = []
event_source_weights = _event_source_weights(screening.event_profile)
should_collect_candidate_context = (
config.llm_candidate_context_enabled
if collect_llm_candidate_context is None
else collect_llm_candidate_context
)
if should_collect_candidate_context:
candidate_context_rows, candidate_context_errors = collect_candidate_context(
df_top,
max_rows=(
candidate_context_max_candidates
or config.llm_candidate_context_max_candidates
),
providers=(
candidate_context_providers
if candidate_context_providers is not None
else config.llm_candidate_context_providers
),
news_limit=config.llm_candidate_context_news_limit,
announcement_limit=config.llm_candidate_context_announcement_limit,
cache_dir=(
config.data_dir / "candidate_context"
if config.llm_candidate_context_cache_enabled
else None
),
cache_ttl_hours=config.llm_candidate_context_cache_ttl_hours,
source_weights=event_source_weights,
)
degradation.append(
f"Candidate context collected rows={len(candidate_context_rows)}"
)
if candidate_context_errors:
sample = " | ".join(candidate_context_errors[:5])
suffix = (
f" | +{len(candidate_context_errors) - 5} more"
if len(candidate_context_errors) > 5
else ""
)
degradation.append(f"Candidate context row errors: {sample}{suffix}")
llm_context_degradation: list[str] = []
effective_context = build_llm_context(
base_context=llm_context if llm_context is not None else config.llm_context,
context_files=llm_context_files,
candidate_context_files=candidate_context_files,
candidate_context_rows=candidate_context_rows,
snapshot_df=snapshot_df,
candidate_df=df_top,
event_profile=screening.event_profile,
max_chars=config.llm_context_max_chars,
degradation=llm_context_degradation,
)
degradation.extend(llm_context_degradation)
llm_prompt_degradation: list[str] = []
llm_result = rank_candidates_with_metadata(
picks,
screening.ranking_hints,
config.llm_api_key,
config.llm_model,
config.llm_base_url,
context=effective_context,
rank_weight=config.llm_rank_weight,
max_retries=config.llm_max_retries,
min_coverage=config.llm_min_coverage,
fallback_models=config.llm_fallback_models,
temperature=config.llm_temperature,
json_mode=config.llm_json_mode,
silent=config.llm_silent,
channels=config.llm_channels,
config_path=str(config.llm_config_path or ""),
timeout_sec=config.llm_timeout_sec,
max_tokens=config.llm_max_tokens,
degradation=llm_prompt_degradation,
)
degradation.extend(llm_prompt_degradation)
picks = llm_result.picks
llm_market_view = llm_result.market_view
llm_selection_logic = llm_result.selection_logic
llm_portfolio_risk = llm_result.portfolio_risk
llm_coverage = llm_result.coverage
llm_parse_errors = llm_result.errors
llm_ranked = any(p.llm_score is not None for p in picks)
if not llm_ranked:
degradation.append("LLM ranking failed: fell back to screen_score")
for i, p in enumerate(picks):
p.rank = i + 1
p.final_score = p.screen_score
else:
if use_llm and not config.has_llm_config():
degradation.append("LLM ranking skipped: no LLM config")
for i, p in enumerate(picks):
p.rank = i + 1
p.final_score = p.screen_score
# 8. Independent risk overlay
if config.risk_enabled:
picks, risk_degradation = apply_risk_overlay(
picks,
max_penalty=config.risk_max_penalty,
veto_high_risk=config.risk_veto_high,
profile=screening.risk_profile,
)
degradation.extend(risk_degradation)
# 9. LLM-driven portfolio overlay. This runs before trimming so an
# over-crowded sector can make room for a comparable candidate elsewhere.
portfolio_concentration_notes: list[str] = []
if config.portfolio_diversity_enabled:
picks, portfolio_concentration_notes = apply_portfolio_overlay(
picks,
max_same_sector=config.portfolio_max_same_llm_sector,
concentration_penalty=config.portfolio_concentration_penalty,
profile=screening.portfolio_profile,
)
# 10. Trim to max_output
picks = picks[:output_count]
# 11. Optional L3 post-analysis, DSA is only one possible analyzer.
if analyzer_names:
picks, post_degradation = run_post_analyzers(
picks,
analyzer_names=analyzer_names,
run_id=run_id,
config=config,
max_picks=analyzer_max_picks,
scorecard_profile=screening.scorecard_profile,
)
degradation.extend(post_degradation)
return ScreenResult(
strategy=strategy,
market=market,
strategy_version=strat.version,
strategy_category=strat.category,
snapshot_count=snapshot_count,
after_filter_count=after_filter_count,
picks=picks,
run_id=run_id,
llm_ranked=llm_ranked,
llm_market_view=llm_market_view,
llm_selection_logic=llm_selection_logic,
llm_portfolio_risk=llm_portfolio_risk,
llm_coverage=llm_coverage,
llm_parse_errors=llm_parse_errors,
degradation=degradation,
snapshot_source=snapshot_source,
source_errors=source_errors,
deep_analysis_requested=("dsa" in analyzer_names),
post_analyzers=analyzer_names,
daily_enriched=daily_enriched,
daily_enrich_count=daily_enrich_count,
risk_enabled=config.risk_enabled,
portfolio_diversity_enabled=config.portfolio_diversity_enabled,
portfolio_concentration_notes=portfolio_concentration_notes,
)
def _df_to_picks(df: pd.DataFrame) -> list[Pick]:
"""Convert DataFrame rows to Pick objects."""
picks = []
factor_cols = factor_score_columns()
for i, (_, row) in enumerate(df.iterrows()):
factor_scores = {
factor: _safe_float(row.get(col)) or 0.0
for factor, col in factor_cols.items()
if col in df.columns
}
picks.append(Pick(
rank=i + 1,
code=normalize_code(row.get("code", row.get("代码", "")), allow_ticker=True),
name=str(row.get("name", row.get("名称", row.get("股票名称", "")))),
screen_score=float(row.get("screen_score", 0)),
final_score=float(row.get("screen_score", 0)),
price=float(row.get("price", row.get("最新价", 0)) or 0),
change_pct=float(row.get("change_pct", row.get("涨跌幅", 0)) or 0),
amount=float(row.get("amount", row.get("成交额", 0)) or 0),
total_mv=_safe_float(row.get("total_mv", row.get("总市值"))),
turnover_rate=_safe_float(row.get("turnover_rate", row.get("换手率"))),
volume_ratio=_safe_float(row.get("volume_ratio", row.get("量比"))),
pe_ratio=_safe_float(row.get("pe_ratio", row.get("市盈率"))),
pb_ratio=_safe_float(row.get("pb_ratio", row.get("市净率"))),
industry=_safe_text(row.get("industry", row.get("行业", row.get("所属行业", "")))),
concepts=_safe_text(row.get("concepts", row.get("概念", row.get("概念题材", "")))),
industry_rank=_safe_int(row.get("industry_rank")),
industry_change_pct=_safe_float(row.get("industry_change_pct")),
industry_heat_score=_safe_float(row.get("industry_heat_score")),
concept_heat_score=_safe_float(row.get("concept_heat_score")),
board_heat_score=_safe_float(row.get("board_heat_score")),
board_heat_latest_score=_safe_float(row.get("board_heat_latest_score")),
board_heat_trend_score=_safe_float(row.get("board_heat_trend_score")),
board_heat_persistence_score=_safe_float(row.get("board_heat_persistence_score")),
board_heat_cooling_score=_safe_float(row.get("board_heat_cooling_score")),
board_heat_observations=_safe_int(row.get("board_heat_observations")),
board_heat_state=_safe_text(row.get("board_heat_state")),
board_heat_summary=_safe_text(row.get("board_heat_summary")),
change_60d=_safe_float(row.get("change_60d")),
signal_score=_safe_float(row.get("signal_score")),
ma_bullish=_safe_bool(row.get("ma_bullish")),
price_above_ma20=_safe_bool(row.get("price_above_ma20")),
macd_status=str(row.get("macd_status", "") or ""),
rsi_status=str(row.get("rsi_status", "") or ""),
breakout_20d_pct=_safe_float(row.get("breakout_20d_pct")),
range_20d_pct=_safe_float(row.get("range_20d_pct")),
volume_ratio_20d=_safe_float(row.get("volume_ratio_20d")),
body_pct=_safe_float(row.get("body_pct")),
pullback_to_ma20_pct=_safe_float(row.get("pullback_to_ma20_pct")),
consolidation_days_20d=_safe_int(row.get("consolidation_days_20d")),
volatility_20d_pct=_safe_float(row.get("volatility_20d_pct")),
max_drawdown_20d_pct=_safe_float(row.get("max_drawdown_20d_pct")),
atr_20_pct=_safe_float(row.get("atr_20_pct")),
daily_quality_score=_safe_float(row.get("daily_quality_score")),
daily_quality_flags=_safe_text(row.get("daily_quality_flags")),
daily_source=_safe_text(row.get("daily_source")),
factor_scores=factor_scores,
))
return picks
def _sort_screened_candidates(df: pd.DataFrame, screening=None) -> pd.DataFrame:
"""Sort scored candidates deterministically with factor-aware tie breakers."""
factor_order = ["stability", "activity", "momentum", "value"]
if screening is not None and screening.factor_weights:
factor_order = [
factor
for factor, _weight in sorted(
screening.factor_weights.items(),
key=lambda item: (-float(item[1]), item[0]),
)
]
sort_columns = [
column
for column in ["screen_score"] + [f"factor_{factor}_score" for factor in factor_order]
if column in df.columns
]
ascending = [False] * len(sort_columns)
if "code" in df.columns:
sort_columns.append("code")
ascending.append(True)
if not sort_columns:
return df
return df.sort_values(sort_columns, ascending=ascending, kind="mergesort")
def _required_snapshot_columns(filters) -> list[str]:
columns: list[str] = []
if filters.exclude_st:
columns.append("name")
if filters.amount_min is not None:
columns.append("amount")
if filters.price_min is not None or filters.price_max is not None:
columns.append("price")
if filters.market_cap_min is not None or filters.market_cap_max is not None:
columns.append("total_mv")
if filters.pe_ttm_min is not None or filters.pe_ttm_max is not None:
columns.append("pe_ratio")
if filters.pb_min is not None or filters.pb_max is not None:
columns.append("pb_ratio")
if filters.volume_ratio_min is not None:
columns.append("volume_ratio")
if filters.turnover_rate_min is not None:
columns.append("turnover_rate")
if filters.change_pct_min is not None or filters.change_pct_max is not None:
columns.append("change_pct")
return list(dict.fromkeys(columns))
def _event_source_weights(event_profile: dict[str, object]) -> dict[str, float] | None:
value = (event_profile or {}).get("source_weights")
if not isinstance(value, dict):
return None
result: dict[str, float] = {}
for key, raw in value.items():
try:
result[str(key)] = float(raw)
except (TypeError, ValueError):
continue
return result or None
def _daily_source_health_notes(health: dict[str, object], *, limit: int = 4) -> list[str]:
source_states: list[tuple[tuple[int, float, float, str], str, dict[object, object]]] = []
for source, raw_state in health.items():
if not isinstance(raw_state, dict):
continue
failures = _safe_float(raw_state.get("failures")) or 0.0
total_failures = _safe_float(raw_state.get("total_failures")) or 0.0
disabled = bool(raw_state.get("disabled"))
if not disabled and failures <= 0 and total_failures <= 0:
continue
severity_key = (
0 if disabled else 1 if failures > 0 else 2,
-failures,
-total_failures,
str(source),
)
source_states.append((severity_key, str(source), raw_state))
notes: list[str] = []
for _severity_key, source, raw_state in sorted(source_states):
failures = _safe_float(raw_state.get("failures")) or 0.0
total_failures = _safe_float(raw_state.get("total_failures")) or 0.0
disabled = bool(raw_state.get("disabled"))
last_rows = _safe_float(raw_state.get("last_rows")) or 0.0
parts: list[str] = []
if disabled:
parts.append("disabled")
if failures > 0:
parts.append(f"failures={failures:g}")
elif total_failures > 0:
parts.append(f"total_failures={total_failures:g}")
if last_rows > 0:
parts.append(f"last_rows={last_rows:g}")
if parts:
notes.append(f"{source} " + ",".join(parts))
if len(notes) >= limit:
break
hidden_count = len(source_states) - len(notes)
if hidden_count > 0:
notes.append(f"+{hidden_count} more")
return notes
def _format_filter_waterfall(steps: list[dict[str, object]], *, limit: int = 8) -> str:
parts: list[str] = []
for step in steps[:limit]:
text = (
f"{step.get('filter')} {step.get('before')}->{step.get('after')} "
f"removed={step.get('removed')}"
)
samples = step.get("samples")
if isinstance(samples, list) and samples:
sample_names = [
str(item.get("name") or item.get("code") or item.get("value") or "")
for item in samples
if isinstance(item, dict)
]
sample_names = [item for item in sample_names if item]
if sample_names:
text += f" samples={','.join(sample_names[:3])}"
if step.get("suggestion"):
text += f" next={step.get('suggestion')}"
parts.append(text)
hidden = len(steps) - len(parts)
if hidden > 0:
parts.append(f"+{hidden} more")
return "; ".join(parts)
def _safe_text(v: object) -> str:
return safe_text(v, max_len=120)