fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
461 lines
18 KiB
Python
461 lines
18 KiB
Python
"""
|
|
Lazy registration for optional feature routers. Each LAZY_FEATURES entry
|
|
imports its module only on the first request matching its path prefix,
|
|
saving ~700 MB at idle for deployments that don't use these features.
|
|
First hit pays the import cost (1-3 s for heavy modules); /openapi.json
|
|
omits each feature's routes until the feature is warmed.
|
|
"""
|
|
|
|
import asyncio
|
|
import importlib
|
|
import sys
|
|
from dataclasses import dataclass, field
|
|
from typing import TYPE_CHECKING, Callable, Dict, Tuple
|
|
|
|
from starlette.types import Receive, Scope, Send
|
|
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
if TYPE_CHECKING:
|
|
from fastapi import APIRouter, FastAPI
|
|
|
|
|
|
def _include_router(attr_name: str = "router") -> Callable[["FastAPI", object], None]:
|
|
def _register(app: "FastAPI", module: object) -> None:
|
|
app.include_router(getattr(module, attr_name))
|
|
|
|
return _register
|
|
|
|
|
|
def _mount_app(
|
|
prefix: str, attr_name: str = "app"
|
|
) -> Callable[["FastAPI", object], None]:
|
|
def _register(app: "FastAPI", module: object) -> None:
|
|
app.mount(path=prefix, app=getattr(module, attr_name))
|
|
|
|
return _register
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class LazyFeature:
|
|
name: str
|
|
module_path: str
|
|
path_prefixes: Tuple[str, ...]
|
|
register_fn: Callable[["FastAPI", object], None] = field(
|
|
default_factory=lambda: _include_router("router")
|
|
)
|
|
# For routes whose path has a leading parameter (e.g. /{server}/authorize)
|
|
# — startswith can't match those, so the matcher also checks endswith.
|
|
path_suffixes: Tuple[str, ...] = ()
|
|
# Keep the stub injected even after load — for mounted ASGI sub-apps
|
|
# whose routes don't appear in the parent app's openapi spec.
|
|
persistent_swagger_stub: bool = False
|
|
|
|
def matches(self, path: str) -> bool:
|
|
return any(path.startswith(p) for p in self.path_prefixes) or any(
|
|
path.endswith(s) for s in self.path_suffixes
|
|
)
|
|
|
|
|
|
LAZY_FEATURES: Tuple[LazyFeature, ...] = (
|
|
LazyFeature(
|
|
name="guardrails",
|
|
module_path="litellm.proxy.guardrails.guardrail_endpoints",
|
|
path_prefixes=(
|
|
"/guardrails",
|
|
"/v2/guardrails",
|
|
"/apply_guardrail",
|
|
"/policies/usage",
|
|
),
|
|
),
|
|
LazyFeature(
|
|
name="policies",
|
|
module_path="litellm.proxy.management_endpoints.policy_endpoints",
|
|
# Trailing slash to avoid matching /policies/... (policy_engine).
|
|
path_prefixes=("/policy/", "/utils/test_policies_and_guardrails"),
|
|
),
|
|
LazyFeature(
|
|
name="policy_engine",
|
|
module_path="litellm.proxy.policy_engine.policy_endpoints",
|
|
path_prefixes=("/policies",),
|
|
),
|
|
LazyFeature(
|
|
name="policy_resolve",
|
|
module_path="litellm.proxy.policy_engine.policy_resolve_endpoints",
|
|
path_prefixes=("/policies/resolve", "/policies/attachments/estimate-impact"),
|
|
),
|
|
LazyFeature(
|
|
name="agents",
|
|
module_path="litellm.proxy.agent_endpoints.endpoints",
|
|
path_prefixes=("/v1/agents", "/agents", "/agent/"),
|
|
),
|
|
LazyFeature(
|
|
name="gemini_agents",
|
|
module_path="litellm.proxy.google_endpoints.agents_endpoints",
|
|
path_prefixes=("/v1beta/agents",),
|
|
),
|
|
LazyFeature(
|
|
name="a2a",
|
|
module_path="litellm.proxy.agent_endpoints.a2a_endpoints",
|
|
# ``/v1/a2a/{agent_id}/message/send`` is caught via the suffix so the
|
|
# ``/v1/a2a`` prefix doesn't subsume the discover prefix below.
|
|
path_prefixes=("/a2a",),
|
|
path_suffixes=("/message/send",),
|
|
),
|
|
LazyFeature(
|
|
name="a2a_registration",
|
|
module_path="litellm.proxy.a2a.endpoints",
|
|
path_prefixes=("/v1/a2a/discover",),
|
|
),
|
|
LazyFeature(
|
|
name="vector_stores",
|
|
module_path="litellm.proxy.vector_store_endpoints.endpoints",
|
|
path_prefixes=("/v1/vector_stores", "/vector_stores", "/v1/indexes"),
|
|
),
|
|
LazyFeature(
|
|
name="vector_store_management",
|
|
module_path="litellm.proxy.vector_store_endpoints.management_endpoints",
|
|
# Trailing slash to avoid matching /vector_stores/... (vector_stores).
|
|
path_prefixes=("/vector_store/", "/v1/vector_store/"),
|
|
),
|
|
LazyFeature(
|
|
name="vector_store_files",
|
|
# Routes appear under both /v1/vector_stores/{id}/files and the
|
|
# un-versioned form, so both prefixes must trigger the load.
|
|
module_path="litellm.proxy.vector_store_files_endpoints.endpoints",
|
|
path_prefixes=("/v1/vector_stores", "/vector_stores"),
|
|
),
|
|
LazyFeature(
|
|
name="tools",
|
|
module_path="litellm.proxy.management_endpoints.tool_management_endpoints",
|
|
path_prefixes=("/v1/tool", "/tool"),
|
|
),
|
|
LazyFeature(
|
|
name="search_tools",
|
|
module_path="litellm.proxy.search_endpoints.search_tool_management",
|
|
path_prefixes=("/search_tools",),
|
|
),
|
|
# mcp_management owns most /v1/mcp/* admin routes; mcp_app is the mounted
|
|
# streaming sub-app at /mcp.
|
|
LazyFeature(
|
|
name="mcp_management",
|
|
module_path="litellm.proxy.management_endpoints.mcp_management_endpoints",
|
|
path_prefixes=("/v1/mcp/",),
|
|
),
|
|
LazyFeature(
|
|
# Also serves /.well-known/oauth-* (OAuth metadata discovery).
|
|
# No /mcp/oauth prefix here: the mounted /mcp sub-app would
|
|
# shadow it, and there are no actual routes there anyway.
|
|
name="mcp_byok_oauth",
|
|
module_path="litellm.proxy._experimental.mcp_server.byok_oauth_endpoints",
|
|
path_prefixes=("/v1/mcp/oauth", "/.well-known/oauth-"),
|
|
),
|
|
LazyFeature(
|
|
# Serves OAuth dance endpoints (/authorize, /token, /callback,
|
|
# /register) plus several /.well-known/ discovery URLs at the proxy
|
|
# root — needed for MCP-over-OAuth flows even before /mcp is hit.
|
|
name="mcp_discoverable",
|
|
module_path="litellm.proxy._experimental.mcp_server.discoverable_endpoints",
|
|
path_prefixes=(
|
|
"/.well-known/oauth-",
|
|
"/.well-known/openid-configuration",
|
|
"/.well-known/jwks.json",
|
|
"/authorize",
|
|
"/token",
|
|
"/callback",
|
|
"/register",
|
|
),
|
|
# Catches the /{mcp_server_name}/authorize|token|register variants.
|
|
path_suffixes=("/authorize", "/token", "/register"),
|
|
),
|
|
LazyFeature(
|
|
name="mcp_rest",
|
|
module_path="litellm.proxy._experimental.mcp_server.rest_endpoints",
|
|
path_prefixes=("/mcp-rest",),
|
|
),
|
|
LazyFeature(
|
|
# Hardcoded /mcp matches BASE_MCP_ROUTE; importing the constant
|
|
# here would defeat lazy loading.
|
|
name="mcp_app",
|
|
module_path="litellm.proxy._experimental.mcp_server.server",
|
|
path_prefixes=("/mcp",),
|
|
register_fn=_mount_app("/mcp", attr_name="app"),
|
|
persistent_swagger_stub=True,
|
|
),
|
|
LazyFeature(
|
|
name="config_overrides",
|
|
module_path="litellm.proxy.management_endpoints.config_override_endpoints",
|
|
path_prefixes=("/config_overrides",),
|
|
),
|
|
LazyFeature(
|
|
name="realtime",
|
|
module_path="litellm.proxy.realtime_endpoints.endpoints",
|
|
path_prefixes=("/openai/v1/realtime", "/v1/realtime", "/realtime"),
|
|
),
|
|
LazyFeature(
|
|
name="anthropic_passthrough",
|
|
module_path="litellm.proxy.anthropic_endpoints.endpoints",
|
|
path_prefixes=("/v1/messages", "/anthropic", "/api/event_logging"),
|
|
),
|
|
LazyFeature(
|
|
name="anthropic_skills",
|
|
module_path="litellm.proxy.anthropic_endpoints.skills_endpoints",
|
|
path_prefixes=("/v1/skills", "/skills"),
|
|
),
|
|
LazyFeature(
|
|
name="langfuse_passthrough",
|
|
module_path="litellm.proxy.vertex_ai_endpoints.langfuse_endpoints",
|
|
path_prefixes=("/langfuse",),
|
|
),
|
|
LazyFeature(
|
|
name="evals",
|
|
module_path="litellm.proxy.openai_evals_endpoints.endpoints",
|
|
path_prefixes=("/v1/evals", "/evals"),
|
|
),
|
|
LazyFeature(
|
|
name="claude_code_marketplace",
|
|
module_path="litellm.proxy.anthropic_endpoints.claude_code_endpoints",
|
|
path_prefixes=("/claude-code",),
|
|
register_fn=_include_router("claude_code_marketplace_router"),
|
|
),
|
|
LazyFeature(
|
|
name="scim",
|
|
module_path="litellm.proxy.management_endpoints.scim.scim_v2",
|
|
path_prefixes=("/scim",),
|
|
register_fn=_include_router("scim_router"),
|
|
),
|
|
LazyFeature(
|
|
name="cloudzero",
|
|
module_path="litellm.proxy.spend_tracking.cloudzero_endpoints",
|
|
path_prefixes=("/cloudzero",),
|
|
),
|
|
LazyFeature(
|
|
name="vantage",
|
|
module_path="litellm.proxy.spend_tracking.vantage_endpoints",
|
|
path_prefixes=("/vantage",),
|
|
),
|
|
LazyFeature(
|
|
name="usage_ai",
|
|
module_path="litellm.proxy.management_endpoints.usage_endpoints",
|
|
path_prefixes=("/usage/ai",),
|
|
),
|
|
LazyFeature(
|
|
name="prompts",
|
|
module_path="litellm.proxy.prompts.prompt_endpoints",
|
|
path_prefixes=("/prompts", "/utils/dotprompt_json_converter"),
|
|
),
|
|
LazyFeature(
|
|
name="jwt_mappings",
|
|
module_path="litellm.proxy.management_endpoints.jwt_key_mapping_endpoints",
|
|
path_prefixes=("/jwt/key/mapping",),
|
|
),
|
|
LazyFeature(
|
|
name="compliance",
|
|
module_path="litellm.proxy.management_endpoints.compliance_endpoints",
|
|
path_prefixes=("/compliance",),
|
|
),
|
|
LazyFeature(
|
|
name="access_groups",
|
|
module_path="litellm.proxy.management_endpoints.access_group_endpoints",
|
|
path_prefixes=("/access_group", "/v1/access_group", "/v1/unified_access_group"),
|
|
),
|
|
)
|
|
|
|
|
|
class LazyFeatureMiddleware:
|
|
"""ASGI middleware that imports + registers a feature router on first
|
|
matching request. Idempotent; once loaded, subsequent requests skip."""
|
|
|
|
def __init__(
|
|
self,
|
|
app,
|
|
fastapi_app: "FastAPI",
|
|
features: Tuple[LazyFeature, ...] = LAZY_FEATURES,
|
|
):
|
|
self.app = app
|
|
self._fastapi_app = fastapi_app
|
|
self._features = features
|
|
# SERVER_ROOT_PATH is a process-startup env var, cache the normalized
|
|
# form once instead of recomputing per request. Lazy import to avoid
|
|
# pulling proxy.utils into this module's import graph at startup
|
|
# (proxy_server imports both).
|
|
from litellm.proxy.utils import get_server_root_path
|
|
|
|
self._root_path = get_server_root_path().rstrip("/")
|
|
# Loaded set / per-feature locks live on app.state so the warm endpoint
|
|
# and the middleware share them — preventing duplicate registrations
|
|
# when both paths fire for the same feature.
|
|
if not hasattr(fastapi_app.state, "lazy_loaded"):
|
|
fastapi_app.state.lazy_loaded = set()
|
|
fastapi_app.state.lazy_locks = {}
|
|
|
|
@property
|
|
def _loaded(self) -> set:
|
|
return self._fastapi_app.state.lazy_loaded
|
|
|
|
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
|
|
# Short-circuit once every feature has loaded.
|
|
if scope["type"] in ("http", "websocket") and len(self._loaded) < len(
|
|
self._features
|
|
):
|
|
path = scope.get("path", "")
|
|
# Strip SERVER_ROOT_PATH so prefix matching works under a server
|
|
# root path. Without this, requests like /api/v1/policies/... never
|
|
# match the registered prefixes (/policies/...) and lazy features
|
|
# stay unloaded — every endpoint under them returns 404. The
|
|
# `+ "/"` boundary prevents false-positive matches (e.g. /apiv2
|
|
# against root /api). If the path doesn't start with the prefix
|
|
# (e.g. a reverse proxy already stripped it), we leave it alone.
|
|
if self._root_path and path.startswith(self._root_path + "/"):
|
|
path = path[len(self._root_path) :]
|
|
for feat in self._features:
|
|
if feat.module_path in self._loaded:
|
|
continue
|
|
if feat.matches(path):
|
|
await _force_load(self._fastapi_app, feat)
|
|
await self.app(scope, receive, send)
|
|
|
|
|
|
async def _force_load(app: "FastAPI", feat: LazyFeature) -> bool:
|
|
"""Import + register a lazy feature exactly once per (app, module).
|
|
Shared by the middleware and the /lazy/warm endpoint."""
|
|
if not hasattr(app.state, "lazy_loaded"):
|
|
app.state.lazy_loaded = set()
|
|
app.state.lazy_locks = {}
|
|
lock = app.state.lazy_locks.setdefault(feat.module_path, asyncio.Lock())
|
|
async with lock:
|
|
if feat.module_path in app.state.lazy_loaded:
|
|
return False
|
|
try:
|
|
# Import on a thread (heavy modules take 1-3 s). register_fn
|
|
# mutates app.router.routes, so it stays on the loop thread.
|
|
loop = asyncio.get_running_loop()
|
|
module = await loop.run_in_executor(
|
|
None, importlib.import_module, feat.module_path
|
|
)
|
|
feat.register_fn(app, module)
|
|
app.state.lazy_loaded.add(feat.module_path)
|
|
app.openapi_schema = None
|
|
verbose_proxy_logger.info(
|
|
"Lazy-loaded optional feature %r (module: %s)",
|
|
feat.name,
|
|
feat.module_path,
|
|
)
|
|
return True
|
|
except Exception as exc:
|
|
# Mark loaded anyway so we don't retry on every request.
|
|
app.state.lazy_loaded.add(feat.module_path)
|
|
verbose_proxy_logger.warning(
|
|
"Failed to lazy-load optional feature %r (module: %s): %s. "
|
|
"This feature's endpoints will return 404 until restart.",
|
|
feat.name,
|
|
feat.module_path,
|
|
exc,
|
|
)
|
|
return False
|
|
|
|
|
|
def attach_lazy_features(app: "FastAPI") -> None:
|
|
app.include_router(_make_warmup_router(app))
|
|
app.add_middleware(LazyFeatureMiddleware, fastapi_app=app)
|
|
|
|
|
|
def _make_warmup_router(app: "FastAPI") -> "APIRouter":
|
|
"""POST /lazy/warm/{name}: load a feature and return its partial openapi
|
|
so the Swagger plugin can merge in-place without a full /openapi.json refetch.
|
|
Requires auth — anyone who can hit the proxy can already trigger the same
|
|
imports by sending a real request to a feature's prefix, but gating this
|
|
debug endpoint avoids unauthenticated callers forcing the import chain."""
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from fastapi.openapi.utils import get_openapi
|
|
|
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
|
|
|
router = APIRouter()
|
|
|
|
@router.post(
|
|
"/lazy/warm/{name}",
|
|
include_in_schema=False,
|
|
dependencies=[Depends(user_api_key_auth)],
|
|
)
|
|
async def warm(name: str):
|
|
feat = next((f for f in LAZY_FEATURES if f.name == name), None)
|
|
if feat is None:
|
|
raise HTTPException(404, f"unknown lazy feature: {name}")
|
|
if feat.persistent_swagger_stub:
|
|
return {"stub_path": None, "paths": {}, "components": {"schemas": {}}}
|
|
|
|
await _force_load(app, feat)
|
|
|
|
feat_routes = [r for r in app.routes if feat.matches(getattr(r, "path", ""))]
|
|
full = get_openapi(title=app.title, version=app.version, routes=feat_routes)
|
|
# Force all operations under one tag so they group under a single Swagger
|
|
# section — many lazy modules tag routes inconsistently.
|
|
for path_ops in full.get("paths", {}).values():
|
|
for op in path_ops.values():
|
|
if isinstance(op, dict):
|
|
op["tags"] = [feat.name]
|
|
return {
|
|
"stub_path": feat.path_prefixes[0],
|
|
"paths": full.get("paths", {}),
|
|
"components": {"schemas": full.get("components", {}).get("schemas", {})},
|
|
}
|
|
|
|
return router
|
|
|
|
|
|
def inject_lazy_stubs(schema: Dict) -> Dict:
|
|
"""Inject openapi entries for unloaded features. Uses the snapshot file
|
|
when available (full route info), otherwise falls back to a single
|
|
placeholder per feature. Any failure logs and returns the schema unchanged
|
|
so /openapi.json never 500s on a cosmetic injection bug."""
|
|
try:
|
|
from litellm.proxy._lazy_openapi_snapshot import load_snapshot
|
|
|
|
snapshot = load_snapshot()
|
|
paths = schema.setdefault("paths", {})
|
|
schemas = schema.setdefault("components", {}).setdefault("schemas", {})
|
|
|
|
for feat in LAZY_FEATURES:
|
|
if feat.module_path in sys.modules and not feat.persistent_swagger_stub:
|
|
continue
|
|
|
|
fragment = (snapshot or {}).get(feat.name)
|
|
if fragment:
|
|
for p, ops in fragment.get("paths", {}).items():
|
|
paths.setdefault(p, ops)
|
|
for name, sch in (
|
|
fragment.get("components", {}).get("schemas", {}).items()
|
|
):
|
|
schemas.setdefault(name, sch)
|
|
continue
|
|
|
|
prefix = feat.path_prefixes[0]
|
|
if prefix in paths:
|
|
continue
|
|
paths[prefix] = {
|
|
"get": {
|
|
"tags": [feat.name],
|
|
"summary": feat.name,
|
|
"responses": {"200": {"description": "OK"}},
|
|
}
|
|
}
|
|
except Exception as exc:
|
|
verbose_proxy_logger.warning("inject_lazy_stubs failed: %s", exc)
|
|
return schema
|
|
|
|
|
|
def lazy_tag_to_prefix() -> Dict[str, str]:
|
|
"""feature.name -> first prefix, used by the Swagger warmup JS plugin.
|
|
Returns empty when the snapshot is loaded — the plugin is unnecessary
|
|
because /openapi.json already has full route info."""
|
|
from litellm.proxy._lazy_openapi_snapshot import load_snapshot
|
|
|
|
if load_snapshot():
|
|
return {}
|
|
return {
|
|
feat.name: feat.path_prefixes[0]
|
|
for feat in LAZY_FEATURES
|
|
if not feat.persistent_swagger_stub
|
|
}
|