Files
MoFin/venv/lib/python3.12/site-packages/huggingface_hub/utils/_parsing.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

112 lines
3.4 KiB
Python

# Copyright 2025-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Parsing helpers shared across modules."""
import re
import time
RE_NUMBER_WITH_UNIT = re.compile(r"(\d+)([a-z]+)", re.IGNORECASE)
BYTE_UNITS: dict[str, int] = {
"k": 1_000,
"m": 1_000_000,
"g": 1_000_000_000,
"t": 1_000_000_000_000,
"p": 1_000_000_000_000_000,
}
TIME_UNITS: dict[str, int] = {
"s": 1,
"m": 60,
"h": 60 * 60,
"d": 24 * 60 * 60,
"w": 7 * 24 * 60 * 60,
"mo": 30 * 24 * 60 * 60,
"y": 365 * 24 * 60 * 60,
}
def parse_size(value: str) -> int:
"""Parse a size expressed as a string with digits and unit (like `"10MB"`) to an integer (in bytes)."""
return _parse_with_unit(value, BYTE_UNITS)
def parse_duration(value: str) -> int:
"""Parse a duration expressed as a string with digits and unit (like `"10s"`) to an integer (in seconds)."""
return _parse_with_unit(value, TIME_UNITS)
def _parse_with_unit(value: str, units: dict[str, int]) -> int:
"""Parse a numeric value with optional unit."""
stripped = value.strip()
if not stripped:
raise ValueError("Value cannot be empty.")
try:
return int(value)
except ValueError:
pass
match = RE_NUMBER_WITH_UNIT.fullmatch(stripped)
if not match:
raise ValueError(f"Invalid value '{value}'. Must match pattern '\\d+[a-z]+' or be a plain number.")
number = int(match.group(1))
unit = match.group(2).lower()
if unit not in units:
raise ValueError(f"Unknown unit '{unit}'. Must be one of {list(units.keys())}.")
return number * units[unit]
def format_duration(secs: int | None) -> str:
"""Format a duration in seconds as a short human-readable string (e.g. `"1m 32s"`, `"2h 15m"`, `"45s"`).
Returns `"--"` when `secs` is `None` so it can be used directly as a CLI table cell.
"""
if secs is None:
return "--"
secs = int(secs)
if secs < 60:
return f"{secs}s"
if secs < 3600:
return f"{secs // 60}m {secs % 60}s"
return f"{secs // 3600}h {(secs % 3600) // 60}m"
def format_timesince(ts: float) -> str:
"""Format timestamp in seconds into a human-readable string, relative to now.
Vaguely inspired by Django's `timesince` formatter.
"""
_TIMESINCE_CHUNKS = (
# Label, divider, max value
("second", 1, 60),
("minute", 60, 60),
("hour", 60 * 60, 24),
("day", 60 * 60 * 24, 6),
("week", 60 * 60 * 24 * 7, 6),
("month", 60 * 60 * 24 * 30, 11),
("year", 60 * 60 * 24 * 365, None),
)
delta = time.time() - ts
if delta < 20:
return "a few seconds ago"
for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
value = round(delta / divider)
if max_value is not None and value <= max_value:
break
return f"{value} {label}{'s' if value > 1 else ''} ago"