Files
MoFin/venv/lib/python3.12/site-packages/alphasift/scorer.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

504 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""screen_score calculation."""
import pandas as pd
from alphasift.models import ScreeningConfig
_FACTOR_COLUMNS = {
"value": "factor_value_score",
"liquidity": "factor_liquidity_score",
"momentum": "factor_momentum_score",
"reversal": "factor_reversal_score",
"activity": "factor_activity_score",
"stability": "factor_stability_score",
"size": "factor_size_score",
"theme_heat": "factor_theme_heat_score",
"topic_alignment": "factor_topic_alignment_score",
}
_DEFAULT_SCORING_PROFILE = {
"momentum_base": 60.0,
"momentum_intraday_slope": 5.0,
"momentum_chase_start_pct": 5.0,
"momentum_chase_penalty_slope": 10.0,
"momentum_downside_start_pct": -2.0,
"momentum_downside_penalty_slope": 3.0,
"momentum_60d_base": 55.0,
"momentum_60d_slope": 0.9,
"momentum_60d_overheat_pct": 45.0,
"momentum_60d_overheat_penalty_slope": 0.8,
"momentum_60d_breakdown_pct": -20.0,
"momentum_60d_breakdown_penalty_slope": 0.7,
"macd_bullish_bonus": 6.0,
"macd_bearish_penalty": 8.0,
"reversal_ideal_change_pct": -3.0,
"reversal_distance_penalty_slope": 13.0,
"reversal_collapse_start_pct": -8.0,
"reversal_collapse_penalty_slope": 10.0,
"reversal_chase_start_pct": 1.0,
"reversal_chase_penalty_slope": 8.0,
"rsi_oversold_bonus": 10.0,
"rsi_overbought_penalty": 14.0,
"activity_ideal_volume_ratio": 2.0,
"activity_volume_ratio_distance_slope": 15.0,
"activity_high_volume_ratio": 5.0,
"activity_high_volume_ratio_penalty_slope": 8.0,
"activity_ideal_turnover_rate": 4.0,
"activity_turnover_distance_slope": 8.0,
"activity_high_turnover_rate": 12.0,
"activity_high_turnover_penalty_slope": 5.0,
"stability_base": 78.0,
"stability_change_abs_penalty_slope": 3.0,
"stability_hot_change_pct": 7.0,
"stability_hot_change_penalty_slope": 5.0,
"stability_high_turnover_rate": 10.0,
"stability_high_turnover_penalty_slope": 2.0,
"stability_high_volume_ratio": 5.0,
"stability_high_volume_ratio_penalty_slope": 4.0,
"stability_invalid_pe_penalty": 18.0,
"stability_high_volatility_pct": 45.0,
"stability_high_volatility_penalty_slope": 0.45,
"stability_max_drawdown_floor_pct": -12.0,
"stability_drawdown_penalty_slope": 1.2,
"stability_high_atr_pct": 6.0,
"stability_high_atr_penalty_slope": 2.0,
"stability_low_daily_quality_score": 80.0,
"stability_low_daily_quality_penalty_slope": 0.35,
"stability_bad_daily_quality_flag_penalty": 8.0,
"theme_heat_unknown_score": 50.0,
"theme_heat_change_slope": 6.0,
"theme_heat_rank_bonus": 10.0,
"theme_heat_trend_min_observations": 2.0,
"theme_heat_trend_slope": 0.8,
"theme_heat_trend_bonus_cap": 10.0,
"theme_heat_cooling_penalty_slope": 0.8,
"theme_heat_cooling_penalty_cap": 12.0,
"theme_heat_persistence_min_score": 60.0,
"theme_heat_persistence_slope": 0.08,
"theme_heat_persistence_bonus_cap": 6.0,
"theme_heat_cooling_score_penalty_slope": 0.6,
"theme_heat_cooling_score_penalty_cap": 10.0,
"theme_heat_overheat_score": 88.0,
"theme_heat_overheat_penalty_slope": 0.5,
"topic_alignment_unknown_score": 50.0,
"topic_alignment_match_bonus": 25.0,
"topic_alignment_heat_weight": 0.25,
"topic_alignment_unmatched_penalty": 12.0,
}
def compute_screen_scores(df: pd.DataFrame, config: ScreeningConfig) -> pd.DataFrame:
"""Compute screen_score for each candidate row.
Adds a 'screen_score' column (0-100). Higher is better.
"""
result = df.copy()
factors = _compute_factor_scores(result, config)
for name, series in factors.items():
result[_FACTOR_COLUMNS[name]] = series.round(4)
weights = _normalized_factor_weights(config)
result["screen_score"] = 0.0
for factor, weight in weights.items():
if factor in factors:
result["screen_score"] += factors[factor] * weight
result["screen_score"] = result["screen_score"].clip(0, 100)
return result
def factor_score_columns() -> dict[str, str]:
"""Return the stable factor-score column mapping used in Pick output."""
return dict(_FACTOR_COLUMNS)
def _normalized_factor_weights(config: ScreeningConfig) -> dict[str, float]:
"""Use explicit factor weights, or derive a sane legacy default from tech_weight."""
raw_weights = config.factor_weights or {
"value": (1 - config.tech_weight) * 0.50,
"liquidity": (1 - config.tech_weight) * 0.25,
"stability": (1 - config.tech_weight) * 0.25,
"momentum": config.tech_weight * 0.55,
"activity": config.tech_weight * 0.45,
}
weights = {
factor: max(float(weight), 0.0)
for factor, weight in raw_weights.items()
if factor in _FACTOR_COLUMNS
}
total = sum(weights.values())
if total <= 0:
return {"value": 0.4, "liquidity": 0.2, "momentum": 0.2, "activity": 0.2}
return {factor: weight / total for factor, weight in weights.items()}
def _compute_factor_scores(df: pd.DataFrame, config: ScreeningConfig | None = None) -> dict[str, pd.Series]:
config = config or ScreeningConfig()
profile = _scoring_profile(config)
return {
"value": _compute_value_score(df),
"liquidity": _compute_liquidity_score(df),
"momentum": _compute_momentum_score(df, profile),
"reversal": _compute_reversal_score(df, profile),
"activity": _compute_activity_score(df, profile),
"stability": _compute_stability_score(df, profile),
"size": _compute_size_score(df),
"theme_heat": _compute_theme_heat_score(df, profile),
"topic_alignment": _compute_topic_alignment_score(df, profile),
}
def _scoring_profile(config: ScreeningConfig) -> dict[str, float]:
profile = dict(_DEFAULT_SCORING_PROFILE)
for key, value in (config.scoring_profile or {}).items():
if key in profile:
profile[key] = float(value)
return profile
def _compute_snapshot_score(df: pd.DataFrame) -> pd.Series:
"""Score based on snapshot fundamentals (0-100).
Components:
- PE ratio: lower is better (for value), normalized
- PB ratio: lower is better, normalized
- Turnover rate: moderate is best
- Amount (liquidity): higher is better, log-scaled
- Change pct: near zero or moderate positive preferred
"""
factors = _compute_factor_scores(df)
return (
factors["value"] * 0.50
+ factors["liquidity"] * 0.25
+ factors["stability"] * 0.25
).clip(0, 100)
def _compute_tech_score(df: pd.DataFrame) -> pd.Series:
"""Score based on technical features (0-100).
Uses available columns like volume_ratio, change_pct patterns.
Full tech scoring (MA structure, MACD/RSI) needs daily data,
which is not in the snapshot — scored conservatively here.
"""
factors = _compute_factor_scores(df)
return (factors["momentum"] * 0.55 + factors["activity"] * 0.45).clip(0, 100)
def _compute_value_score(df: pd.DataFrame) -> pd.Series:
score = pd.Series(50.0, index=df.index)
if "pe_ratio" in df.columns:
pe = pd.to_numeric(df["pe_ratio"], errors="coerce")
pe_score = _rank_score(pe.where((pe > 0) & (pe < 500)), lower_is_better=True, na_score=25)
score = score * 0.35 + pe_score * 0.65
if "pb_ratio" in df.columns:
pb = pd.to_numeric(df["pb_ratio"], errors="coerce")
pb_score = _rank_score(pb.where((pb > 0) & (pb < 50)), lower_is_better=True, na_score=25)
score = score * 0.55 + pb_score * 0.45
return score.clip(0, 100)
def _compute_liquidity_score(df: pd.DataFrame) -> pd.Series:
if "amount" not in df.columns:
return pd.Series(50.0, index=df.index)
import numpy as np
amount = pd.to_numeric(df["amount"], errors="coerce")
log_amount = np.log10(amount.clip(lower=1))
return _rank_score(log_amount.where(amount > 0), lower_is_better=False, na_score=20)
def _compute_momentum_score(df: pd.DataFrame, profile: dict[str, float]) -> pd.Series:
score = pd.Series(50.0, index=df.index)
if "change_pct" in df.columns:
change = pd.to_numeric(df["change_pct"], errors="coerce").fillna(0)
# Prefer constructive positive moves, but penalize chase-risk near limit-up.
intraday_score = profile["momentum_base"] + change * profile["momentum_intraday_slope"]
intraday_score = intraday_score - (
change - profile["momentum_chase_start_pct"]
).clip(lower=0) * profile["momentum_chase_penalty_slope"]
intraday_score = intraday_score - (
-change + profile["momentum_downside_start_pct"]
).clip(lower=0) * profile["momentum_downside_penalty_slope"]
score = score * 0.35 + intraday_score.clip(5, 100) * 0.65
if "change_60d" in df.columns:
change_60d = pd.to_numeric(df["change_60d"], errors="coerce").fillna(0)
trend_score = profile["momentum_60d_base"] + change_60d * profile["momentum_60d_slope"]
trend_score = trend_score - (
change_60d - profile["momentum_60d_overheat_pct"]
).clip(lower=0) * profile["momentum_60d_overheat_penalty_slope"]
trend_score = trend_score - (
-change_60d + profile["momentum_60d_breakdown_pct"]
).clip(lower=0) * profile["momentum_60d_breakdown_penalty_slope"]
score = score * 0.60 + trend_score.clip(5, 100) * 0.40
if "signal_score" in df.columns:
signal = pd.to_numeric(df["signal_score"], errors="coerce").fillna(50)
score = score * 0.70 + signal.clip(0, 100) * 0.30
if "macd_status" in df.columns:
macd = df["macd_status"].astype(str)
score = score + macd.map({
"bullish": profile["macd_bullish_bonus"],
"bearish": -profile["macd_bearish_penalty"],
}).fillna(0)
return score.clip(5, 100)
def _compute_reversal_score(df: pd.DataFrame, profile: dict[str, float]) -> pd.Series:
if "change_pct" not in df.columns:
return pd.Series(50.0, index=df.index)
change = pd.to_numeric(df["change_pct"], errors="coerce").fillna(0)
# Reversal setups prefer controlled weakness, not collapse.
score = 100 - (
change - profile["reversal_ideal_change_pct"]
).abs() * profile["reversal_distance_penalty_slope"]
score = score - (
-change + profile["reversal_collapse_start_pct"]
).clip(lower=0) * profile["reversal_collapse_penalty_slope"]
score = score - (
change - profile["reversal_chase_start_pct"]
).clip(lower=0) * profile["reversal_chase_penalty_slope"]
if "rsi_status" in df.columns:
rsi = df["rsi_status"].astype(str)
score = score + rsi.map({
"oversold": profile["rsi_oversold_bonus"],
"overbought": -profile["rsi_overbought_penalty"],
}).fillna(0)
if "change_60d" in df.columns:
change_60d = pd.to_numeric(df["change_60d"], errors="coerce").fillna(0)
score = score - (change_60d - 35).clip(lower=0) * 0.5
score = score - (-change_60d - 35).clip(lower=0) * 0.8
return score.clip(5, 100)
def _compute_activity_score(df: pd.DataFrame, profile: dict[str, float]) -> pd.Series:
score = pd.Series(50.0, index=df.index)
if "volume_ratio" in df.columns:
volume_ratio = pd.to_numeric(df["volume_ratio"], errors="coerce").fillna(1.0)
vr_score = 100 - (
volume_ratio - profile["activity_ideal_volume_ratio"]
).abs() * profile["activity_volume_ratio_distance_slope"]
vr_score = vr_score - (
volume_ratio - profile["activity_high_volume_ratio"]
).clip(lower=0) * profile["activity_high_volume_ratio_penalty_slope"]
score = score * 0.45 + vr_score.clip(5, 100) * 0.55
if "turnover_rate" in df.columns:
turnover = pd.to_numeric(df["turnover_rate"], errors="coerce").fillna(0)
turnover_score = 100 - (
turnover - profile["activity_ideal_turnover_rate"]
).abs() * profile["activity_turnover_distance_slope"]
turnover_score = turnover_score - (
turnover - profile["activity_high_turnover_rate"]
).clip(lower=0) * profile["activity_high_turnover_penalty_slope"]
turnover_score = turnover_score.where(turnover > 0, 40)
score = score * 0.55 + turnover_score.clip(5, 100) * 0.45
return score.clip(0, 100)
def _compute_stability_score(df: pd.DataFrame, profile: dict[str, float]) -> pd.Series:
score = pd.Series(profile["stability_base"], index=df.index)
if "change_pct" in df.columns:
change = pd.to_numeric(df["change_pct"], errors="coerce").fillna(0)
score -= change.abs().clip(upper=10) * profile["stability_change_abs_penalty_slope"]
score -= (
change - profile["stability_hot_change_pct"]
).clip(lower=0) * profile["stability_hot_change_penalty_slope"]
if "turnover_rate" in df.columns:
turnover = pd.to_numeric(df["turnover_rate"], errors="coerce").fillna(0)
score -= (
turnover - profile["stability_high_turnover_rate"]
).clip(lower=0) * profile["stability_high_turnover_penalty_slope"]
if "volume_ratio" in df.columns:
volume_ratio = pd.to_numeric(df["volume_ratio"], errors="coerce").fillna(1)
score -= (
volume_ratio - profile["stability_high_volume_ratio"]
).clip(lower=0) * profile["stability_high_volume_ratio_penalty_slope"]
if "pe_ratio" in df.columns:
pe = pd.to_numeric(df["pe_ratio"], errors="coerce")
score = score.where((pe.isna()) | (pe > 0), score - profile["stability_invalid_pe_penalty"])
if "signal_score" in df.columns:
signal = pd.to_numeric(df["signal_score"], errors="coerce").fillna(50)
score = score + (signal - 50) * 0.12
if "volatility_20d_pct" in df.columns:
volatility = pd.to_numeric(df["volatility_20d_pct"], errors="coerce")
score -= (
volatility - profile["stability_high_volatility_pct"]
).clip(lower=0).fillna(0) * profile["stability_high_volatility_penalty_slope"]
if "max_drawdown_20d_pct" in df.columns:
drawdown = pd.to_numeric(df["max_drawdown_20d_pct"], errors="coerce")
score -= (
profile["stability_max_drawdown_floor_pct"] - drawdown
).clip(lower=0).fillna(0) * profile["stability_drawdown_penalty_slope"]
if "atr_20_pct" in df.columns:
atr = pd.to_numeric(df["atr_20_pct"], errors="coerce")
score -= (
atr - profile["stability_high_atr_pct"]
).clip(lower=0).fillna(0) * profile["stability_high_atr_penalty_slope"]
if "daily_quality_score" in df.columns:
quality = pd.to_numeric(df["daily_quality_score"], errors="coerce")
score -= (
profile["stability_low_daily_quality_score"] - quality
).clip(lower=0).fillna(0) * profile["stability_low_daily_quality_penalty_slope"]
if "daily_quality_flags" in df.columns:
flags = df["daily_quality_flags"].fillna("").astype(str)
severe_flags = flags.str.contains("invalid_ohlc|non_positive_price|negative_volume|stale_cache")
score -= severe_flags.astype(float) * profile["stability_bad_daily_quality_flag_penalty"]
return score.clip(0, 100)
def _compute_size_score(df: pd.DataFrame) -> pd.Series:
if "total_mv" not in df.columns:
return pd.Series(50.0, index=df.index)
import numpy as np
mv = pd.to_numeric(df["total_mv"], errors="coerce")
log_mv = np.log10(mv.clip(lower=1))
return _rank_score(log_mv.where(mv > 0), lower_is_better=False, na_score=35)
def _compute_theme_heat_score(df: pd.DataFrame, profile: dict[str, float]) -> pd.Series:
base = pd.Series(profile["theme_heat_unknown_score"], index=df.index)
if "board_heat_score" in df.columns:
score = pd.to_numeric(df["board_heat_score"], errors="coerce").fillna(base)
elif "industry_heat_score" in df.columns or "concept_heat_score" in df.columns:
industry = _numeric_column(df, "industry_heat_score")
concept = _numeric_column(df, "concept_heat_score")
score = pd.concat([industry, concept], axis=1).max(axis=1).fillna(base)
elif "industry_change_pct" in df.columns:
change = pd.to_numeric(df["industry_change_pct"], errors="coerce").fillna(0)
score = base + change * profile["theme_heat_change_slope"]
if "industry_rank" in df.columns:
rank = pd.to_numeric(df["industry_rank"], errors="coerce")
score += (
(profile["theme_heat_rank_bonus"] - rank.clip(lower=1, upper=10))
.clip(lower=0)
.fillna(0)
)
else:
return base.clip(0, 100)
if "board_heat_trend_score" in df.columns:
trend = pd.to_numeric(df["board_heat_trend_score"], errors="coerce").fillna(0)
if "board_heat_observations" in df.columns:
observations = pd.to_numeric(df["board_heat_observations"], errors="coerce").fillna(0)
else:
observations = pd.Series(profile["theme_heat_trend_min_observations"], index=df.index)
trend_is_reliable = observations >= profile["theme_heat_trend_min_observations"]
trend_bonus = (trend.clip(lower=0) * profile["theme_heat_trend_slope"]).clip(
upper=profile["theme_heat_trend_bonus_cap"]
)
cooling_penalty = ((-trend).clip(lower=0) * profile["theme_heat_cooling_penalty_slope"]).clip(
upper=profile["theme_heat_cooling_penalty_cap"]
)
score = score + (trend_bonus - cooling_penalty).where(trend_is_reliable, 0)
if "board_heat_persistence_score" in df.columns:
persistence = pd.to_numeric(df["board_heat_persistence_score"], errors="coerce").fillna(0)
persistence_bonus = (
(persistence - profile["theme_heat_persistence_min_score"]).clip(lower=0)
* profile["theme_heat_persistence_slope"]
).clip(upper=profile["theme_heat_persistence_bonus_cap"])
score = score + persistence_bonus
if "board_heat_cooling_score" in df.columns:
cooling = pd.to_numeric(df["board_heat_cooling_score"], errors="coerce").fillna(0)
cooling_penalty = (cooling * profile["theme_heat_cooling_score_penalty_slope"]).clip(
upper=profile["theme_heat_cooling_score_penalty_cap"]
)
score = score - cooling_penalty
overheat = (score - profile["theme_heat_overheat_score"]).clip(lower=0)
score = score - overheat * profile["theme_heat_overheat_penalty_slope"]
return score.clip(0, 100)
def _compute_topic_alignment_score(df: pd.DataFrame, profile: dict[str, float]) -> pd.Series:
"""Score whether industry/concept labels align with hotspot route summaries."""
base = pd.Series(profile["topic_alignment_unknown_score"], index=df.index)
if not {"industry", "concepts", "board_heat_summary"} & set(df.columns):
return base
scores = []
for _, row in df.iterrows():
candidate_topics = _topic_tokens(row.get("industry")) | _topic_tokens(row.get("concepts"))
route_topics = _topic_tokens(row.get("board_heat_summary"))
if not candidate_topics or not route_topics:
scores.append(float(profile["topic_alignment_unknown_score"]))
continue
overlap = candidate_topics & route_topics
score = float(profile["topic_alignment_unknown_score"])
if overlap:
score += float(profile["topic_alignment_match_bonus"])
heat = pd.to_numeric(row.get("board_heat_score"), errors="coerce")
if pd.notna(heat):
score += max(float(heat) - 50.0, 0.0) * float(profile["topic_alignment_heat_weight"])
else:
score -= float(profile["topic_alignment_unmatched_penalty"])
scores.append(score)
return pd.Series(scores, index=df.index).clip(0, 100)
def _topic_tokens(value: object) -> set[str]:
text = str(value or "").strip()
if not text or text.lower() in {"nan", "none", "<na>"}:
return set()
normalized = text
for sep in ["|", ",", "", ";", "", "/"]:
normalized = normalized.replace(sep, " ")
tokens = set()
for raw in normalized.split():
token = raw.split(":", 1)[0].strip()
if token and token.lower() not in {"rank", "nan", "none", "<na>"}:
tokens.add(token)
return tokens
def _numeric_column(df: pd.DataFrame, column: str) -> pd.Series:
if column not in df.columns:
return pd.Series(pd.NA, index=df.index)
return pd.to_numeric(df[column], errors="coerce")
def _rank_score(
series: pd.Series,
*,
lower_is_better: bool,
na_score: float = 50.0,
) -> pd.Series:
numeric = pd.to_numeric(series, errors="coerce")
if numeric.notna().sum() == 0:
return pd.Series(float(na_score), index=series.index)
ranks = numeric.rank(
ascending=not lower_is_better,
na_option="keep",
pct=True,
) * 100
return ranks.fillna(float(na_score)).clip(0, 100)