fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
105 lines
3.4 KiB
Python
105 lines
3.4 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""Shared normalization and safe parsing helpers."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
import re
|
|
|
|
_NULL_TEXT_VALUES = {"", "nan", "none", "<na>", "na", "null"}
|
|
|
|
# US-style ticker: ASCII letters/digits with optional dot/dash separators,
|
|
# at least one letter (pure digits are A-share codes).
|
|
_TICKER_RE = re.compile(r"^(?=.*[A-Za-z])[A-Za-z0-9][A-Za-z0-9.\-]{0,19}$")
|
|
|
|
|
|
def safe_text(value: object, *, max_len: int | None = None) -> str:
|
|
"""Return cleaned text, treating common null spellings as empty."""
|
|
if value is None:
|
|
return ""
|
|
if isinstance(value, float) and math.isnan(value):
|
|
return ""
|
|
text = str(value).strip()
|
|
if text.lower() in _NULL_TEXT_VALUES:
|
|
return ""
|
|
if max_len is not None:
|
|
return text[:max_len]
|
|
return text
|
|
|
|
|
|
def normalize_code(value: object, *, width: int = 6, allow_ticker: bool = False) -> str:
|
|
"""Normalize A-share style stock codes from numeric, prefixed, or suffixed text.
|
|
|
|
A-share recognition always takes precedence and is unchanged. With
|
|
``allow_ticker=True``, text with no embedded A-share code that looks like
|
|
a US-style ticker (AAPL, BRK-B) passes through uppercased instead of
|
|
normalizing to "". Only opt in for structured code fields (snapshot rows,
|
|
LLM ranking JSON, stored picks) — free-text mining paths must stay strict
|
|
so garbage tokens keep dropping to "".
|
|
"""
|
|
text = safe_text(value, max_len=80)
|
|
if not text:
|
|
return ""
|
|
if text.endswith(".0") and text[:-2].isdigit():
|
|
text = text[:-2]
|
|
if text.isdigit():
|
|
return text.zfill(width)[-width:]
|
|
match = re.search(r"(?<!\d)(\d{6})(?!\d)", text)
|
|
if match:
|
|
return match.group(1)
|
|
if allow_ticker and _TICKER_RE.match(text):
|
|
return text.upper()
|
|
digits = "".join(ch for ch in text if ch.isdigit())
|
|
return digits.zfill(width)[-width:] if digits else ""
|
|
|
|
|
|
def safe_float(value: object, default: float | None = None) -> float | None:
|
|
"""Parse a float from loose snapshot/provider values."""
|
|
text = safe_text(value)
|
|
if not text or text in {"-", "--"}:
|
|
return default
|
|
try:
|
|
parsed = float(text.replace("%", "").replace(",", ""))
|
|
except (TypeError, ValueError):
|
|
return default
|
|
if math.isnan(parsed):
|
|
return default
|
|
return parsed
|
|
|
|
|
|
def safe_int(value: object, default: int | None = None) -> int | None:
|
|
"""Parse an int from loose numeric values."""
|
|
parsed = safe_float(value)
|
|
if parsed is None:
|
|
return default
|
|
return int(parsed)
|
|
|
|
|
|
def safe_bool(value: object) -> bool | None:
|
|
"""Parse a bool when the input is present, otherwise return None."""
|
|
text = safe_text(value)
|
|
if not text:
|
|
return None
|
|
if isinstance(value, bool):
|
|
return value
|
|
return text.lower() in {"1", "true", "yes", "on"}
|
|
|
|
|
|
def bounded_float(value: object, *, low: float, high: float) -> float | None:
|
|
"""Parse and clamp a float to an inclusive range."""
|
|
parsed = safe_float(value)
|
|
if parsed is None:
|
|
return None
|
|
return max(low, min(parsed, high))
|
|
|
|
|
|
def safe_string_list(value: object, *, max_len: int = 80) -> list[str]:
|
|
"""Return a cleaned string list from list-like API payload fields."""
|
|
if not isinstance(value, list):
|
|
return []
|
|
return [
|
|
text
|
|
for text in (safe_text(item, max_len=max_len) for item in value)
|
|
if text
|
|
]
|