MoFin/venv/lib/python3.12/site-packages/alphasift/doctor.py

# -*- coding: utf-8 -*-
"""Runtime diagnostic helpers for AlphaSift data sources."""

from __future__ import annotations

from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

from alphasift.config import Config
from alphasift.daily import daily_source_health_snapshot, fetch_daily_history
from alphasift.snapshot import (
    fetch_snapshot_with_fallback,
    snapshot_source_health_snapshot,
)


@dataclass
class SourceCheckResult:
    """Single source-family diagnostic result."""

    status: str
    sources: list[str] = field(default_factory=list)
    source: str = ""
    rows: int = 0
    fallback_used: bool = False
    stale: bool = False
    stale_age_hours: float | None = None
    errors: list[str] = field(default_factory=list)
    health: dict[str, dict[str, float | bool]] = field(default_factory=dict)


@dataclass
class DataSourcesDoctorResult:
    """Machine-readable data-source doctor report."""

    status: str
    generated_at: str
    config: dict[str, Any]
    snapshot: SourceCheckResult
    daily: SourceCheckResult | None = None
    recommendations: list[str] = field(default_factory=list)

    def to_dict(self) -> dict[str, Any]:
        payload = asdict(self)
        payload["source_health"] = {
            "snapshot": self.snapshot.health,
            "daily": self.daily.health if self.daily is not None else {},
        }
        return payload


def doctor_data_sources(
    config: Config,
    *,
    snapshot_sources: list[str] | None = None,
    daily_source: str | None = None,
    daily_code: str = "000001",
    run_live: bool = True,
    check_daily: bool = True,
) -> DataSourcesDoctorResult:
    """Check snapshot and daily K-line source health without exposing secrets."""
    sources = list(snapshot_sources or config.snapshot_source_priority)
    daily_source_name = daily_source or config.daily_source
    snapshot = _check_snapshot_sources(config, sources=sources, run_live=run_live)
    daily = (
        _check_daily_sources(
            config,
            source=daily_source_name,
            code=daily_code,
            run_live=run_live,
        )
        if check_daily
        else None
    )
    recommendations = _build_recommendations(snapshot, daily)
    statuses = [snapshot.status, daily.status if daily is not None else "skipped"]
    status = _overall_status(statuses)
    return DataSourcesDoctorResult(
        status=status,
        generated_at=datetime.now(timezone.utc).isoformat(),
        config={
            "snapshot_source_priority": sources,
            "daily_source": daily_source_name,
            "daily_code": daily_code if check_daily else "",
            "fallback_snapshot_path": str(config.fallback_snapshot_path or ""),
            "daily_history_cache_dir": str(config.daily_history_cache_dir or ""),
            "tushare_configured": bool(_has_configured_tushare()),
            "live_checks": bool(run_live),
        },
        snapshot=snapshot,
        daily=daily,
        recommendations=recommendations,
    )


def _check_snapshot_sources(
    config: Config,
    *,
    sources: list[str],
    run_live: bool,
) -> SourceCheckResult:
    health = snapshot_source_health_snapshot(sources)
    if not run_live:
        return SourceCheckResult(status="skipped", sources=sources, health=health)
    try:
        df = fetch_snapshot_with_fallback(
            sources,
            required_columns=["code", "name", "price"],
            fallback_snapshot_path=config.fallback_snapshot_path,
            fallback_max_age_hours=config.snapshot_fallback_max_age_hours,
            market="cn",
        )
    except Exception as exc:  # noqa: BLE001 - doctor must aggregate failures.
        return SourceCheckResult(
            status="failed",
            sources=sources,
            errors=[str(exc)],
            health=snapshot_source_health_snapshot(sources),
        )
    return SourceCheckResult(
        status="ok" if not bool(df.attrs.get("fallback_used")) else "degraded",
        sources=sources,
        source=str(df.attrs.get("snapshot_source", "")),
        rows=int(len(df)),
        fallback_used=bool(df.attrs.get("fallback_used")),
        stale=bool(df.attrs.get("stale")),
        stale_age_hours=df.attrs.get("stale_age_hours"),
        errors=[str(item) for item in list(df.attrs.get("source_errors", []) or [])],
        health=snapshot_source_health_snapshot(sources),
    )


def _check_daily_sources(
    config: Config,
    *,
    source: str,
    code: str,
    run_live: bool,
) -> SourceCheckResult:
    health = daily_source_health_snapshot()
    if not run_live:
        return SourceCheckResult(status="skipped", sources=[source], health=health)
    try:
        df = fetch_daily_history(
            code,
            lookback_days=config.daily_lookback_days,
            source=source,
            retries=0,
            cache_dir=config.daily_history_cache_dir,
            cache_ttl_seconds=config.daily_history_cache_ttl_hours * 3600,
        )
    except Exception as exc:  # noqa: BLE001 - doctor must aggregate failures.
        return SourceCheckResult(
            status="failed",
            sources=[source],
            errors=[str(exc)],
            health=daily_source_health_snapshot(),
        )
    return SourceCheckResult(
        status="ok" if not bool(df.attrs.get("daily_stale")) else "degraded",
        sources=[source],
        source=str(df.attrs.get("daily_source", "")),
        rows=int(len(df)),
        fallback_used=bool(df.attrs.get("source_errors")),
        stale=bool(df.attrs.get("daily_stale")),
        errors=[str(item) for item in list(df.attrs.get("source_errors", []) or [])],
        health=daily_source_health_snapshot(),
    )


def _overall_status(statuses: list[str]) -> str:
    active = [status for status in statuses if status != "skipped"]
    if not active:
        return "skipped"
    if all(status == "ok" for status in active):
        return "ok"
    if any(status == "ok" for status in active) or any(
        status == "degraded" for status in active
    ):
        return "degraded"
    return "failed"


def _build_recommendations(
    snapshot: SourceCheckResult,
    daily: SourceCheckResult | None,
) -> list[str]:
    recommendations: list[str] = []
    if snapshot.status == "failed":
        recommendations.append(
            "Snapshot failed: check network access and SNAPSHOT_SOURCE_PRIORITY; attach this doctor output to issue #18."
        )
    elif snapshot.fallback_used:
        recommendations.append(
            "Snapshot used last-good cache: live sources are degraded; inspect snapshot.errors for the failing provider."
        )
    if daily is not None:
        if daily.status == "failed":
            recommendations.append(
                "Daily K-line failed: try DAILY_SOURCE=auto or verify TUSHARE_TOKEN/Tencent/Sina/Akshare connectivity."
            )
        elif daily.stale:
            recommendations.append(
                "Daily K-line used stale cache: refresh network-backed sources before relying on fresh technical filters."
            )
    if not recommendations:
        recommendations.append("Data sources look usable for a basic AlphaSift run.")
    return recommendations


def _has_configured_tushare() -> bool:
    import os

    return bool(
        os.getenv("TUSHARE_TOKEN", "").strip()
        or os.getenv("TUSHARE_API_TOKEN", "").strip()
    )


def write_doctor_report(path: str | Path, result: DataSourcesDoctorResult) -> Path:
    import json

    output = Path(path)
    output.parent.mkdir(parents=True, exist_ok=True)
    output.write_text(
        json.dumps(result.to_dict(), ensure_ascii=False, indent=2), encoding="utf-8"
    )
    return output