fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
231 lines
7.6 KiB
Python
231 lines
7.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""Runtime diagnostic helpers for AlphaSift data sources."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import asdict, dataclass, field
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from alphasift.config import Config
|
|
from alphasift.daily import daily_source_health_snapshot, fetch_daily_history
|
|
from alphasift.snapshot import (
|
|
fetch_snapshot_with_fallback,
|
|
snapshot_source_health_snapshot,
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class SourceCheckResult:
|
|
"""Single source-family diagnostic result."""
|
|
|
|
status: str
|
|
sources: list[str] = field(default_factory=list)
|
|
source: str = ""
|
|
rows: int = 0
|
|
fallback_used: bool = False
|
|
stale: bool = False
|
|
stale_age_hours: float | None = None
|
|
errors: list[str] = field(default_factory=list)
|
|
health: dict[str, dict[str, float | bool]] = field(default_factory=dict)
|
|
|
|
|
|
@dataclass
|
|
class DataSourcesDoctorResult:
|
|
"""Machine-readable data-source doctor report."""
|
|
|
|
status: str
|
|
generated_at: str
|
|
config: dict[str, Any]
|
|
snapshot: SourceCheckResult
|
|
daily: SourceCheckResult | None = None
|
|
recommendations: list[str] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
payload = asdict(self)
|
|
payload["source_health"] = {
|
|
"snapshot": self.snapshot.health,
|
|
"daily": self.daily.health if self.daily is not None else {},
|
|
}
|
|
return payload
|
|
|
|
|
|
def doctor_data_sources(
|
|
config: Config,
|
|
*,
|
|
snapshot_sources: list[str] | None = None,
|
|
daily_source: str | None = None,
|
|
daily_code: str = "000001",
|
|
run_live: bool = True,
|
|
check_daily: bool = True,
|
|
) -> DataSourcesDoctorResult:
|
|
"""Check snapshot and daily K-line source health without exposing secrets."""
|
|
sources = list(snapshot_sources or config.snapshot_source_priority)
|
|
daily_source_name = daily_source or config.daily_source
|
|
snapshot = _check_snapshot_sources(config, sources=sources, run_live=run_live)
|
|
daily = (
|
|
_check_daily_sources(
|
|
config,
|
|
source=daily_source_name,
|
|
code=daily_code,
|
|
run_live=run_live,
|
|
)
|
|
if check_daily
|
|
else None
|
|
)
|
|
recommendations = _build_recommendations(snapshot, daily)
|
|
statuses = [snapshot.status, daily.status if daily is not None else "skipped"]
|
|
status = _overall_status(statuses)
|
|
return DataSourcesDoctorResult(
|
|
status=status,
|
|
generated_at=datetime.now(timezone.utc).isoformat(),
|
|
config={
|
|
"snapshot_source_priority": sources,
|
|
"daily_source": daily_source_name,
|
|
"daily_code": daily_code if check_daily else "",
|
|
"fallback_snapshot_path": str(config.fallback_snapshot_path or ""),
|
|
"daily_history_cache_dir": str(config.daily_history_cache_dir or ""),
|
|
"tushare_configured": bool(_has_configured_tushare()),
|
|
"live_checks": bool(run_live),
|
|
},
|
|
snapshot=snapshot,
|
|
daily=daily,
|
|
recommendations=recommendations,
|
|
)
|
|
|
|
|
|
def _check_snapshot_sources(
|
|
config: Config,
|
|
*,
|
|
sources: list[str],
|
|
run_live: bool,
|
|
) -> SourceCheckResult:
|
|
health = snapshot_source_health_snapshot(sources)
|
|
if not run_live:
|
|
return SourceCheckResult(status="skipped", sources=sources, health=health)
|
|
try:
|
|
df = fetch_snapshot_with_fallback(
|
|
sources,
|
|
required_columns=["code", "name", "price"],
|
|
fallback_snapshot_path=config.fallback_snapshot_path,
|
|
fallback_max_age_hours=config.snapshot_fallback_max_age_hours,
|
|
market="cn",
|
|
)
|
|
except Exception as exc: # noqa: BLE001 - doctor must aggregate failures.
|
|
return SourceCheckResult(
|
|
status="failed",
|
|
sources=sources,
|
|
errors=[str(exc)],
|
|
health=snapshot_source_health_snapshot(sources),
|
|
)
|
|
return SourceCheckResult(
|
|
status="ok" if not bool(df.attrs.get("fallback_used")) else "degraded",
|
|
sources=sources,
|
|
source=str(df.attrs.get("snapshot_source", "")),
|
|
rows=int(len(df)),
|
|
fallback_used=bool(df.attrs.get("fallback_used")),
|
|
stale=bool(df.attrs.get("stale")),
|
|
stale_age_hours=df.attrs.get("stale_age_hours"),
|
|
errors=[str(item) for item in list(df.attrs.get("source_errors", []) or [])],
|
|
health=snapshot_source_health_snapshot(sources),
|
|
)
|
|
|
|
|
|
def _check_daily_sources(
|
|
config: Config,
|
|
*,
|
|
source: str,
|
|
code: str,
|
|
run_live: bool,
|
|
) -> SourceCheckResult:
|
|
health = daily_source_health_snapshot()
|
|
if not run_live:
|
|
return SourceCheckResult(status="skipped", sources=[source], health=health)
|
|
try:
|
|
df = fetch_daily_history(
|
|
code,
|
|
lookback_days=config.daily_lookback_days,
|
|
source=source,
|
|
retries=0,
|
|
cache_dir=config.daily_history_cache_dir,
|
|
cache_ttl_seconds=config.daily_history_cache_ttl_hours * 3600,
|
|
)
|
|
except Exception as exc: # noqa: BLE001 - doctor must aggregate failures.
|
|
return SourceCheckResult(
|
|
status="failed",
|
|
sources=[source],
|
|
errors=[str(exc)],
|
|
health=daily_source_health_snapshot(),
|
|
)
|
|
return SourceCheckResult(
|
|
status="ok" if not bool(df.attrs.get("daily_stale")) else "degraded",
|
|
sources=[source],
|
|
source=str(df.attrs.get("daily_source", "")),
|
|
rows=int(len(df)),
|
|
fallback_used=bool(df.attrs.get("source_errors")),
|
|
stale=bool(df.attrs.get("daily_stale")),
|
|
errors=[str(item) for item in list(df.attrs.get("source_errors", []) or [])],
|
|
health=daily_source_health_snapshot(),
|
|
)
|
|
|
|
|
|
def _overall_status(statuses: list[str]) -> str:
|
|
active = [status for status in statuses if status != "skipped"]
|
|
if not active:
|
|
return "skipped"
|
|
if all(status == "ok" for status in active):
|
|
return "ok"
|
|
if any(status == "ok" for status in active) or any(
|
|
status == "degraded" for status in active
|
|
):
|
|
return "degraded"
|
|
return "failed"
|
|
|
|
|
|
def _build_recommendations(
|
|
snapshot: SourceCheckResult,
|
|
daily: SourceCheckResult | None,
|
|
) -> list[str]:
|
|
recommendations: list[str] = []
|
|
if snapshot.status == "failed":
|
|
recommendations.append(
|
|
"Snapshot failed: check network access and SNAPSHOT_SOURCE_PRIORITY; attach this doctor output to issue #18."
|
|
)
|
|
elif snapshot.fallback_used:
|
|
recommendations.append(
|
|
"Snapshot used last-good cache: live sources are degraded; inspect snapshot.errors for the failing provider."
|
|
)
|
|
if daily is not None:
|
|
if daily.status == "failed":
|
|
recommendations.append(
|
|
"Daily K-line failed: try DAILY_SOURCE=auto or verify TUSHARE_TOKEN/Tencent/Sina/Akshare connectivity."
|
|
)
|
|
elif daily.stale:
|
|
recommendations.append(
|
|
"Daily K-line used stale cache: refresh network-backed sources before relying on fresh technical filters."
|
|
)
|
|
if not recommendations:
|
|
recommendations.append("Data sources look usable for a basic AlphaSift run.")
|
|
return recommendations
|
|
|
|
|
|
def _has_configured_tushare() -> bool:
|
|
import os
|
|
|
|
return bool(
|
|
os.getenv("TUSHARE_TOKEN", "").strip()
|
|
or os.getenv("TUSHARE_API_TOKEN", "").strip()
|
|
)
|
|
|
|
|
|
def write_doctor_report(path: str | Path, result: DataSourcesDoctorResult) -> Path:
|
|
import json
|
|
|
|
output = Path(path)
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
output.write_text(
|
|
json.dumps(result.to_dict(), ensure_ascii=False, indent=2), encoding="utf-8"
|
|
)
|
|
return output
|