Files
MoFin/venv/lib/python3.12/site-packages/alphasift/normalize.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

105 lines
3.4 KiB
Python

# -*- coding: utf-8 -*-
"""Shared normalization and safe parsing helpers."""
from __future__ import annotations
import math
import re
_NULL_TEXT_VALUES = {"", "nan", "none", "<na>", "na", "null"}
# US-style ticker: ASCII letters/digits with optional dot/dash separators,
# at least one letter (pure digits are A-share codes).
_TICKER_RE = re.compile(r"^(?=.*[A-Za-z])[A-Za-z0-9][A-Za-z0-9.\-]{0,19}$")
def safe_text(value: object, *, max_len: int | None = None) -> str:
"""Return cleaned text, treating common null spellings as empty."""
if value is None:
return ""
if isinstance(value, float) and math.isnan(value):
return ""
text = str(value).strip()
if text.lower() in _NULL_TEXT_VALUES:
return ""
if max_len is not None:
return text[:max_len]
return text
def normalize_code(value: object, *, width: int = 6, allow_ticker: bool = False) -> str:
"""Normalize A-share style stock codes from numeric, prefixed, or suffixed text.
A-share recognition always takes precedence and is unchanged. With
``allow_ticker=True``, text with no embedded A-share code that looks like
a US-style ticker (AAPL, BRK-B) passes through uppercased instead of
normalizing to "". Only opt in for structured code fields (snapshot rows,
LLM ranking JSON, stored picks) — free-text mining paths must stay strict
so garbage tokens keep dropping to "".
"""
text = safe_text(value, max_len=80)
if not text:
return ""
if text.endswith(".0") and text[:-2].isdigit():
text = text[:-2]
if text.isdigit():
return text.zfill(width)[-width:]
match = re.search(r"(?<!\d)(\d{6})(?!\d)", text)
if match:
return match.group(1)
if allow_ticker and _TICKER_RE.match(text):
return text.upper()
digits = "".join(ch for ch in text if ch.isdigit())
return digits.zfill(width)[-width:] if digits else ""
def safe_float(value: object, default: float | None = None) -> float | None:
"""Parse a float from loose snapshot/provider values."""
text = safe_text(value)
if not text or text in {"-", "--"}:
return default
try:
parsed = float(text.replace("%", "").replace(",", ""))
except (TypeError, ValueError):
return default
if math.isnan(parsed):
return default
return parsed
def safe_int(value: object, default: int | None = None) -> int | None:
"""Parse an int from loose numeric values."""
parsed = safe_float(value)
if parsed is None:
return default
return int(parsed)
def safe_bool(value: object) -> bool | None:
"""Parse a bool when the input is present, otherwise return None."""
text = safe_text(value)
if not text:
return None
if isinstance(value, bool):
return value
return text.lower() in {"1", "true", "yes", "on"}
def bounded_float(value: object, *, low: float, high: float) -> float | None:
"""Parse and clamp a float to an inclusive range."""
parsed = safe_float(value)
if parsed is None:
return None
return max(low, min(parsed, high))
def safe_string_list(value: object, *, max_len: int = 80) -> list[str]:
"""Return a cleaned string list from list-like API payload fields."""
if not isinstance(value, list):
return []
return [
text
for text in (safe_text(item, max_len=max_len) for item in value)
if text
]