Files
MoFin/venv/lib/python3.12/site-packages/tldextract/suffix_list.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

119 lines
3.8 KiB
Python

"""tldextract helpers for testing and fetching remote resources."""
import logging
import pkgutil
import re
from collections.abc import Sequence
from typing import cast
import requests
from requests_file import FileAdapter
from .cache import DiskCache
LOG = logging.getLogger("tldextract")
PUBLIC_SUFFIX_RE = re.compile(r"^(?P<suffix>[.*!]*\w[\S]*)", re.UNICODE | re.MULTILINE)
PUBLIC_PRIVATE_SUFFIX_SEPARATOR = "// ===BEGIN PRIVATE DOMAINS==="
class SuffixListNotFound(LookupError): # noqa: N818
"""A recoverable error while looking up a suffix list.
Recoverable because you can specify backups, or use this library's bundled
snapshot.
"""
def find_first_response(
cache: DiskCache,
urls: Sequence[str],
cache_fetch_timeout: float | int | None = None,
session: requests.Session | None = None,
) -> str:
"""Decode the first successfully fetched URL, from UTF-8 encoding to Python unicode."""
session_created = False
if session is None:
session = requests.Session()
session.mount("file://", FileAdapter())
session_created = True
try:
for url in urls:
try:
return cache.cached_fetch_url(
session=session, url=url, timeout=cache_fetch_timeout
)
except requests.exceptions.RequestException:
LOG.warning(
"Exception reading Public Suffix List url %s", url, exc_info=True
)
finally:
# Ensure the session is always closed if it's constructed in the method
if session_created:
session.close()
raise SuffixListNotFound(
"No remote Public Suffix List found. Consider using a mirror, or avoid this"
" fetch by constructing your TLDExtract with `suffix_list_urls=()`."
)
def extract_tlds_from_suffix_list(suffix_list_text: str) -> tuple[list[str], list[str]]:
"""Parse the raw suffix list text for its different designations of suffixes."""
public_text, _, private_text = suffix_list_text.partition(
PUBLIC_PRIVATE_SUFFIX_SEPARATOR
)
public_tlds = [m.group("suffix") for m in PUBLIC_SUFFIX_RE.finditer(public_text)]
private_tlds = [m.group("suffix") for m in PUBLIC_SUFFIX_RE.finditer(private_text)]
return public_tlds, private_tlds
def get_suffix_lists(
cache: DiskCache,
urls: Sequence[str],
cache_fetch_timeout: float | int | None,
fallback_to_snapshot: bool,
session: requests.Session | None = None,
) -> tuple[list[str], list[str]]:
"""Fetch, parse, and cache the suffix lists."""
return cache.run_and_cache(
func=_get_suffix_lists,
namespace="publicsuffix.org-tlds",
kwargs={
"cache": cache,
"urls": urls,
"cache_fetch_timeout": cache_fetch_timeout,
"fallback_to_snapshot": fallback_to_snapshot,
"session": session,
},
hashed_argnames=["urls", "fallback_to_snapshot"],
)
def _get_suffix_lists(
cache: DiskCache,
urls: Sequence[str],
cache_fetch_timeout: float | int | None,
fallback_to_snapshot: bool,
session: requests.Session | None = None,
) -> tuple[list[str], list[str]]:
"""Fetch, parse, and cache the suffix lists."""
try:
text = find_first_response(
cache, urls, cache_fetch_timeout=cache_fetch_timeout, session=session
)
except SuffixListNotFound as exc:
if fallback_to_snapshot:
maybe_pkg_data = pkgutil.get_data("tldextract", ".tld_set_snapshot")
# package maintainers guarantee file is included
pkg_data = cast(bytes, maybe_pkg_data)
text = pkg_data.decode("utf-8")
else:
raise exc
public_tlds, private_tlds = extract_tlds_from_suffix_list(text)
return public_tlds, private_tlds