Files
MoFin/venv/lib/python3.12/site-packages/alphasift/doctor.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

231 lines
7.6 KiB
Python

# -*- coding: utf-8 -*-
"""Runtime diagnostic helpers for AlphaSift data sources."""
from __future__ import annotations
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from alphasift.config import Config
from alphasift.daily import daily_source_health_snapshot, fetch_daily_history
from alphasift.snapshot import (
fetch_snapshot_with_fallback,
snapshot_source_health_snapshot,
)
@dataclass
class SourceCheckResult:
"""Single source-family diagnostic result."""
status: str
sources: list[str] = field(default_factory=list)
source: str = ""
rows: int = 0
fallback_used: bool = False
stale: bool = False
stale_age_hours: float | None = None
errors: list[str] = field(default_factory=list)
health: dict[str, dict[str, float | bool]] = field(default_factory=dict)
@dataclass
class DataSourcesDoctorResult:
"""Machine-readable data-source doctor report."""
status: str
generated_at: str
config: dict[str, Any]
snapshot: SourceCheckResult
daily: SourceCheckResult | None = None
recommendations: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
payload = asdict(self)
payload["source_health"] = {
"snapshot": self.snapshot.health,
"daily": self.daily.health if self.daily is not None else {},
}
return payload
def doctor_data_sources(
config: Config,
*,
snapshot_sources: list[str] | None = None,
daily_source: str | None = None,
daily_code: str = "000001",
run_live: bool = True,
check_daily: bool = True,
) -> DataSourcesDoctorResult:
"""Check snapshot and daily K-line source health without exposing secrets."""
sources = list(snapshot_sources or config.snapshot_source_priority)
daily_source_name = daily_source or config.daily_source
snapshot = _check_snapshot_sources(config, sources=sources, run_live=run_live)
daily = (
_check_daily_sources(
config,
source=daily_source_name,
code=daily_code,
run_live=run_live,
)
if check_daily
else None
)
recommendations = _build_recommendations(snapshot, daily)
statuses = [snapshot.status, daily.status if daily is not None else "skipped"]
status = _overall_status(statuses)
return DataSourcesDoctorResult(
status=status,
generated_at=datetime.now(timezone.utc).isoformat(),
config={
"snapshot_source_priority": sources,
"daily_source": daily_source_name,
"daily_code": daily_code if check_daily else "",
"fallback_snapshot_path": str(config.fallback_snapshot_path or ""),
"daily_history_cache_dir": str(config.daily_history_cache_dir or ""),
"tushare_configured": bool(_has_configured_tushare()),
"live_checks": bool(run_live),
},
snapshot=snapshot,
daily=daily,
recommendations=recommendations,
)
def _check_snapshot_sources(
config: Config,
*,
sources: list[str],
run_live: bool,
) -> SourceCheckResult:
health = snapshot_source_health_snapshot(sources)
if not run_live:
return SourceCheckResult(status="skipped", sources=sources, health=health)
try:
df = fetch_snapshot_with_fallback(
sources,
required_columns=["code", "name", "price"],
fallback_snapshot_path=config.fallback_snapshot_path,
fallback_max_age_hours=config.snapshot_fallback_max_age_hours,
market="cn",
)
except Exception as exc: # noqa: BLE001 - doctor must aggregate failures.
return SourceCheckResult(
status="failed",
sources=sources,
errors=[str(exc)],
health=snapshot_source_health_snapshot(sources),
)
return SourceCheckResult(
status="ok" if not bool(df.attrs.get("fallback_used")) else "degraded",
sources=sources,
source=str(df.attrs.get("snapshot_source", "")),
rows=int(len(df)),
fallback_used=bool(df.attrs.get("fallback_used")),
stale=bool(df.attrs.get("stale")),
stale_age_hours=df.attrs.get("stale_age_hours"),
errors=[str(item) for item in list(df.attrs.get("source_errors", []) or [])],
health=snapshot_source_health_snapshot(sources),
)
def _check_daily_sources(
config: Config,
*,
source: str,
code: str,
run_live: bool,
) -> SourceCheckResult:
health = daily_source_health_snapshot()
if not run_live:
return SourceCheckResult(status="skipped", sources=[source], health=health)
try:
df = fetch_daily_history(
code,
lookback_days=config.daily_lookback_days,
source=source,
retries=0,
cache_dir=config.daily_history_cache_dir,
cache_ttl_seconds=config.daily_history_cache_ttl_hours * 3600,
)
except Exception as exc: # noqa: BLE001 - doctor must aggregate failures.
return SourceCheckResult(
status="failed",
sources=[source],
errors=[str(exc)],
health=daily_source_health_snapshot(),
)
return SourceCheckResult(
status="ok" if not bool(df.attrs.get("daily_stale")) else "degraded",
sources=[source],
source=str(df.attrs.get("daily_source", "")),
rows=int(len(df)),
fallback_used=bool(df.attrs.get("source_errors")),
stale=bool(df.attrs.get("daily_stale")),
errors=[str(item) for item in list(df.attrs.get("source_errors", []) or [])],
health=daily_source_health_snapshot(),
)
def _overall_status(statuses: list[str]) -> str:
active = [status for status in statuses if status != "skipped"]
if not active:
return "skipped"
if all(status == "ok" for status in active):
return "ok"
if any(status == "ok" for status in active) or any(
status == "degraded" for status in active
):
return "degraded"
return "failed"
def _build_recommendations(
snapshot: SourceCheckResult,
daily: SourceCheckResult | None,
) -> list[str]:
recommendations: list[str] = []
if snapshot.status == "failed":
recommendations.append(
"Snapshot failed: check network access and SNAPSHOT_SOURCE_PRIORITY; attach this doctor output to issue #18."
)
elif snapshot.fallback_used:
recommendations.append(
"Snapshot used last-good cache: live sources are degraded; inspect snapshot.errors for the failing provider."
)
if daily is not None:
if daily.status == "failed":
recommendations.append(
"Daily K-line failed: try DAILY_SOURCE=auto or verify TUSHARE_TOKEN/Tencent/Sina/Akshare connectivity."
)
elif daily.stale:
recommendations.append(
"Daily K-line used stale cache: refresh network-backed sources before relying on fresh technical filters."
)
if not recommendations:
recommendations.append("Data sources look usable for a basic AlphaSift run.")
return recommendations
def _has_configured_tushare() -> bool:
import os
return bool(
os.getenv("TUSHARE_TOKEN", "").strip()
or os.getenv("TUSHARE_API_TOKEN", "").strip()
)
def write_doctor_report(path: str | Path, result: DataSourcesDoctorResult) -> Path:
import json
output = Path(path)
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(
json.dumps(result.to_dict(), ensure_ascii=False, indent=2), encoding="utf-8"
)
return output