fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
649 lines
25 KiB
Python
649 lines
25 KiB
Python
import asyncio
|
|
from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
|
|
import httpx
|
|
from fastapi import HTTPException, status
|
|
|
|
import litellm
|
|
from litellm.proxy._types import UserAPIKeyAuth
|
|
|
|
# Router-internal mock_testing_* flag names — kept in sync with
|
|
# ``litellm.types.router.MockRouterTestingParams`` by the test
|
|
# ``test_mock_testing_kwarg_names_matches_dataclass``. Hardcoding (rather
|
|
# than deriving via ``dataclasses.fields(MockRouterTestingParams)`` at
|
|
# import time) avoids a cyclic import: ``litellm.types.router`` imports
|
|
# back into proxy modules before this module finishes loading.
|
|
_MOCK_TESTING_KWARG_NAMES: tuple = (
|
|
"mock_testing_fallbacks",
|
|
"mock_testing_context_fallbacks",
|
|
"mock_testing_content_policy_fallbacks",
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from litellm.router import Router as _Router
|
|
|
|
LitellmRouter = _Router
|
|
else:
|
|
LitellmRouter = Any
|
|
|
|
|
|
def _route_user_config_request(data: dict, route_type: str):
|
|
"""Route a request using the user-provided router config."""
|
|
router_config = data.pop("user_config")
|
|
|
|
# Filter router_config to only include valid Router.__init__ arguments
|
|
# This prevents TypeError when invalid parameters are stored in the database
|
|
valid_args = litellm.Router.get_valid_args()
|
|
filtered_config = {k: v for k, v in router_config.items() if k in valid_args}
|
|
|
|
user_router = litellm.Router(**filtered_config)
|
|
ret_val = getattr(user_router, f"{route_type}")(**data)
|
|
user_router.discard()
|
|
return ret_val
|
|
|
|
|
|
def _is_a2a_agent_model(model_name: Any) -> bool:
|
|
"""Check if the model name is for an A2A agent (a2a/ prefix)."""
|
|
return isinstance(model_name, str) and model_name.startswith("a2a/")
|
|
|
|
|
|
def _raise_if_model_fully_blocked(
|
|
llm_router: LitellmRouter, model_name: Any, team_id: Optional[str]
|
|
) -> None:
|
|
if not isinstance(model_name, str) or not model_name:
|
|
return
|
|
if not isinstance(llm_router, litellm.Router):
|
|
return
|
|
deployments = (
|
|
llm_router.get_model_list(model_name=model_name, team_id=team_id) or []
|
|
)
|
|
if llm_router._are_all_deployments_blocked(deployments):
|
|
raise litellm.PermissionDeniedError(
|
|
message="Model is blocked",
|
|
model=model_name,
|
|
llm_provider="",
|
|
response=httpx.Response(
|
|
status_code=403,
|
|
request=httpx.Request(
|
|
method="POST", url="https://github.com/BerriAI/litellm"
|
|
),
|
|
),
|
|
)
|
|
|
|
|
|
ROUTE_ENDPOINT_MAPPING = {
|
|
"acompletion": "/chat/completions",
|
|
"atext_completion": "/completions",
|
|
"aembedding": "/embeddings",
|
|
"aimage_generation": "/image/generations",
|
|
"aspeech": "/audio/speech",
|
|
"atranscription": "/audio/transcriptions",
|
|
"amoderation": "/moderations",
|
|
"arerank": "/rerank",
|
|
"aresponses": "/responses",
|
|
"_aresponses_websocket": "/responses",
|
|
"alist_input_items": "/responses/{response_id}/input_items",
|
|
"aimage_edit": "/images/edits",
|
|
"acancel_responses": "/responses/{response_id}/cancel",
|
|
"acompact_responses": "/responses/compact",
|
|
"aocr": "/ocr",
|
|
"asearch": "/search",
|
|
"avideo_generation": "/videos",
|
|
"avideo_list": "/videos",
|
|
"avideo_status": "/videos/{video_id}",
|
|
"avideo_content": "/videos/{video_id}/content",
|
|
"avideo_remix": "/videos/{video_id}/remix",
|
|
"avideo_create_character": "/videos/characters",
|
|
"avideo_get_character": "/videos/characters/{character_id}",
|
|
"avideo_edit": "/videos/edits",
|
|
"avideo_extension": "/videos/extensions",
|
|
"acreate_realtime_client_secret": "/realtime/client_secrets",
|
|
"arealtime_calls": "/realtime/calls",
|
|
"acreate_realtime_transcription_session": "/realtime/transcription_sessions",
|
|
"acreate_container": "/containers",
|
|
"alist_containers": "/containers",
|
|
"aretrieve_container": "/containers/{container_id}",
|
|
"adelete_container": "/containers/{container_id}",
|
|
# Auto-generated container file routes
|
|
"aupload_container_file": "/containers/{container_id}/files",
|
|
"alist_container_files": "/containers/{container_id}/files",
|
|
"aretrieve_container_file": "/containers/{container_id}/files/{file_id}",
|
|
"adelete_container_file": "/containers/{container_id}/files/{file_id}",
|
|
"aretrieve_container_file_content": "/containers/{container_id}/files/{file_id}/content",
|
|
"acreate_skill": "/skills",
|
|
"alist_skills": "/skills",
|
|
"aget_skill": "/skills/{skill_id}",
|
|
"adelete_skill": "/skills/{skill_id}",
|
|
"aingest": "/rag/ingest",
|
|
# Google Interactions API routes
|
|
"acreate_interaction": "/interactions",
|
|
"aget_interaction": "/interactions/{interaction_id}",
|
|
"adelete_interaction": "/interactions/{interaction_id}",
|
|
"acancel_interaction": "/interactions/{interaction_id}/cancel",
|
|
# Google Managed Agents API routes
|
|
"acreate_agent": "/v1beta/agents",
|
|
"alist_agents": "/v1beta/agents",
|
|
"aget_agent": "/v1beta/agents/{name}",
|
|
"adelete_agent": "/v1beta/agents/{name}",
|
|
"alist_agent_versions": "/v1beta/agents/{name}/versions",
|
|
# OpenAI Evals API routes
|
|
"acreate_eval": "/evals",
|
|
"alist_evals": "/evals",
|
|
"aget_eval": "/evals/{eval_id}",
|
|
"aupdate_eval": "/evals/{eval_id}",
|
|
"adelete_eval": "/evals/{eval_id}",
|
|
"acancel_eval": "/evals/{eval_id}/cancel",
|
|
# OpenAI Evals Runs API routes
|
|
"acreate_run": "/evals/{eval_id}/runs",
|
|
"alist_runs": "/evals/{eval_id}/runs",
|
|
"aget_run": "/evals/{eval_id}/runs/{run_id}",
|
|
"acancel_run": "/evals/{eval_id}/runs/{run_id}/cancel",
|
|
"adelete_run": "/evals/{eval_id}/runs/{run_id}",
|
|
}
|
|
|
|
|
|
class ProxyModelNotFoundError(HTTPException):
|
|
def __init__(self, route: str, model_name: str):
|
|
detail = {
|
|
"error": f"{route}: Invalid model name passed in model={model_name}. Call `/v1/models` to view available models for your key."
|
|
}
|
|
super().__init__(status_code=status.HTTP_400_BAD_REQUEST, detail=detail)
|
|
|
|
|
|
def get_team_id_from_data(data: dict) -> Optional[str]:
|
|
"""
|
|
Get the team id from the data's metadata or litellm_metadata params.
|
|
"""
|
|
if (
|
|
"metadata" in data
|
|
and data["metadata"] is not None
|
|
and "user_api_key_team_id" in data["metadata"]
|
|
):
|
|
return data["metadata"].get("user_api_key_team_id")
|
|
elif (
|
|
"litellm_metadata" in data
|
|
and data["litellm_metadata"] is not None
|
|
and "user_api_key_team_id" in data["litellm_metadata"]
|
|
):
|
|
return data["litellm_metadata"].get("user_api_key_team_id")
|
|
return None
|
|
|
|
|
|
_shared_session_lock: Optional[asyncio.Lock] = None
|
|
|
|
|
|
def _get_shared_session_lock() -> asyncio.Lock:
|
|
"""Lazily create the shared session lock (must be called within a running event loop).
|
|
|
|
WARNING: Do not reset _shared_session_lock to None while any coroutine may be
|
|
executing the session-recovery path; doing so breaks the double-checked locking
|
|
guarantee and can cause duplicate session creation.
|
|
"""
|
|
global _shared_session_lock
|
|
if _shared_session_lock is None:
|
|
_shared_session_lock = asyncio.Lock()
|
|
return _shared_session_lock
|
|
|
|
|
|
async def add_shared_session_to_data(data: dict) -> None:
|
|
"""
|
|
Add shared aiohttp session for connection reuse (prevents cold starts).
|
|
If the session was closed (e.g. due to network interruption or idle timeout),
|
|
automatically recreates it so connection pooling is restored.
|
|
Uses an asyncio.Lock to prevent race conditions where multiple concurrent
|
|
requests could each create a new session, leaking intermediate ones.
|
|
Silently continues without session reuse if import fails or session is unavailable.
|
|
|
|
Args:
|
|
data: Dictionary to add the shared session to
|
|
"""
|
|
try:
|
|
import litellm.proxy.proxy_server as proxy_server
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
session = proxy_server.shared_aiohttp_session
|
|
|
|
if session is not None and not session.closed:
|
|
data["shared_session"] = session
|
|
verbose_proxy_logger.info(
|
|
f"SESSION REUSE: Attached shared aiohttp session to request (ID: {id(session)})"
|
|
)
|
|
elif session is not None and session.closed:
|
|
# Session was created at startup but has since closed — recreate it
|
|
# Use lock to prevent concurrent recreation (avoids session/connector leak)
|
|
lock = _get_shared_session_lock()
|
|
async with lock:
|
|
# Double-check under lock — another coroutine may have already recreated it
|
|
session = proxy_server.shared_aiohttp_session
|
|
if session is not None and not session.closed:
|
|
data["shared_session"] = session
|
|
return
|
|
|
|
# session could be None here (if another coroutine set it to None)
|
|
# or closed — either way we need to recreate
|
|
if session is not None:
|
|
verbose_proxy_logger.warning(
|
|
f"SESSION REUSE: Shared aiohttp session is closed (ID: {id(session)}), recreating..."
|
|
)
|
|
else:
|
|
verbose_proxy_logger.warning(
|
|
"SESSION REUSE: Shared aiohttp session is None after re-check, recreating..."
|
|
)
|
|
try:
|
|
new_session = (
|
|
await proxy_server._initialize_shared_aiohttp_session()
|
|
)
|
|
except Exception:
|
|
verbose_proxy_logger.exception(
|
|
"SESSION REUSE: Exception during shared session recreation"
|
|
)
|
|
new_session = None
|
|
if new_session is not None:
|
|
proxy_server.shared_aiohttp_session = new_session
|
|
data["shared_session"] = new_session
|
|
else:
|
|
verbose_proxy_logger.info(
|
|
"SESSION REUSE: Failed to recreate shared session, continuing without session reuse"
|
|
)
|
|
else:
|
|
verbose_proxy_logger.info(
|
|
"SESSION REUSE: No shared session available for this request"
|
|
)
|
|
except Exception:
|
|
# Continue without session reuse — this outer handler covers import failures
|
|
# and other unexpected errors to avoid breaking the request path.
|
|
# Inner recovery logic has its own specific exception handling.
|
|
try:
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
verbose_proxy_logger.debug(
|
|
"SESSION REUSE: Unexpected error in session setup, continuing without reuse",
|
|
exc_info=True,
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
async def route_request(
|
|
data: dict,
|
|
llm_router: Optional[LitellmRouter],
|
|
user_model: Optional[str],
|
|
route_type: Literal[
|
|
"acompletion",
|
|
"atext_completion",
|
|
"aembedding",
|
|
"aimage_generation",
|
|
"aspeech",
|
|
"atranscription",
|
|
"amoderation",
|
|
"arerank",
|
|
"aresponses",
|
|
"aget_responses",
|
|
"adelete_responses",
|
|
"acancel_responses",
|
|
"acompact_responses",
|
|
"acreate_response_reply",
|
|
"alist_input_items",
|
|
"_arealtime", # private function for realtime API
|
|
"acreate_realtime_client_secret",
|
|
"arealtime_calls",
|
|
"acreate_realtime_transcription_session",
|
|
"_aresponses_websocket", # private function for responses WebSocket mode
|
|
"aimage_edit",
|
|
"agenerate_content",
|
|
"agenerate_content_stream",
|
|
"allm_passthrough_route",
|
|
"acreate_batch",
|
|
"aretrieve_batch",
|
|
"alist_batches",
|
|
"afile_content",
|
|
"afile_retrieve",
|
|
"acreate_fine_tuning_job",
|
|
"acancel_fine_tuning_job",
|
|
"alist_fine_tuning_jobs",
|
|
"aretrieve_fine_tuning_job",
|
|
"avector_store_search",
|
|
"avector_store_create",
|
|
"avector_store_retrieve",
|
|
"avector_store_list",
|
|
"avector_store_update",
|
|
"avector_store_delete",
|
|
"avector_store_file_create",
|
|
"avector_store_file_list",
|
|
"avector_store_file_retrieve",
|
|
"avector_store_file_content",
|
|
"avector_store_file_update",
|
|
"avector_store_file_delete",
|
|
"aocr",
|
|
"asearch",
|
|
"avideo_generation",
|
|
"avideo_list",
|
|
"avideo_status",
|
|
"avideo_content",
|
|
"avideo_remix",
|
|
"avideo_create_character",
|
|
"avideo_get_character",
|
|
"avideo_edit",
|
|
"avideo_extension",
|
|
"acreate_container",
|
|
"alist_containers",
|
|
"aretrieve_container",
|
|
"adelete_container",
|
|
"aupload_container_file",
|
|
"alist_container_files",
|
|
"aretrieve_container_file",
|
|
"adelete_container_file",
|
|
"aretrieve_container_file_content",
|
|
"acreate_skill",
|
|
"alist_skills",
|
|
"aget_skill",
|
|
"adelete_skill",
|
|
"aingest",
|
|
"anthropic_messages",
|
|
"acreate_interaction",
|
|
"aget_interaction",
|
|
"adelete_interaction",
|
|
"acancel_interaction",
|
|
"acreate_agent",
|
|
"alist_agents",
|
|
"aget_agent",
|
|
"adelete_agent",
|
|
"alist_agent_versions",
|
|
"asend_message",
|
|
"call_mcp_tool",
|
|
"acancel_batch",
|
|
"afile_delete",
|
|
"acreate_eval",
|
|
"alist_evals",
|
|
"aget_eval",
|
|
"aupdate_eval",
|
|
"adelete_eval",
|
|
"acancel_eval",
|
|
"acreate_run",
|
|
"alist_runs",
|
|
"aget_run",
|
|
"acancel_run",
|
|
"adelete_run",
|
|
],
|
|
user_api_key_dict: Optional[UserAPIKeyAuth] = None,
|
|
):
|
|
"""
|
|
Common helper to route the request
|
|
"""
|
|
await add_shared_session_to_data(data)
|
|
|
|
# Strip router-internal mock_testing_* flags. Combined with an
|
|
# unauthorized fallback in ``router_settings_override`` they let a
|
|
# caller deterministically execute requests against restricted
|
|
# models. VERIA-44.
|
|
for _key in _MOCK_TESTING_KWARG_NAMES:
|
|
data.pop(_key, None)
|
|
|
|
team_id = get_team_id_from_data(data)
|
|
router_model_names = llm_router.model_names if llm_router is not None else []
|
|
|
|
# Preprocess Google GenAI generate content requests
|
|
if route_type in ["agenerate_content", "agenerate_content_stream"]:
|
|
# Map generationConfig to config parameter for Google GenAI compatibility
|
|
if "generationConfig" in data and "config" not in data:
|
|
data["config"] = data.pop("generationConfig")
|
|
if "api_key" in data or "api_base" in data:
|
|
if llm_router is not None:
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
else:
|
|
return getattr(litellm, f"{route_type}")(**data)
|
|
|
|
elif (
|
|
route_type == "acompletion"
|
|
and data.get("model", "") is not None
|
|
and "," in data.get("model", "")
|
|
and llm_router is not None
|
|
):
|
|
# Handle batch completions with comma-separated models BEFORE user_config check
|
|
# This ensures batch completion logic is applied even when user_config is set
|
|
if data.get("fastest_response", False):
|
|
return llm_router.abatch_completion_fastest_response(**data)
|
|
else:
|
|
models = [model.strip() for model in data.pop("model").split(",")]
|
|
return llm_router.abatch_completion(models=models, **data)
|
|
|
|
elif "user_config" in data:
|
|
return _route_user_config_request(data, route_type)
|
|
|
|
elif "router_settings_override" in data:
|
|
# Apply per-request router settings overrides from key/team config
|
|
# Instead of creating a new Router (expensive), merge settings into kwargs
|
|
# The Router already supports per-request overrides for these settings
|
|
override_settings = data.pop("router_settings_override")
|
|
|
|
# Settings that the Router accepts as per-request kwargs
|
|
# These override the global router settings for this specific request
|
|
per_request_settings = [
|
|
"fallbacks",
|
|
"context_window_fallbacks",
|
|
"content_policy_fallbacks",
|
|
"num_retries",
|
|
"timeout",
|
|
"model_group_retry_policy",
|
|
]
|
|
|
|
# Merge override settings into data (only if not already set in request)
|
|
for key in per_request_settings:
|
|
if key in override_settings and key not in data:
|
|
data[key] = override_settings[key]
|
|
|
|
# Use main router with overridden kwargs
|
|
if llm_router is not None:
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
else:
|
|
return getattr(litellm, f"{route_type}")(**data)
|
|
elif llm_router is not None:
|
|
_raise_if_model_fully_blocked(
|
|
llm_router=llm_router, model_name=data.get("model"), team_id=team_id
|
|
)
|
|
# Evals API: always route to litellm directly (not through router)
|
|
# But extract model credentials if a model is provided
|
|
if route_type in [
|
|
"acreate_eval",
|
|
"alist_evals",
|
|
"aget_eval",
|
|
"aupdate_eval",
|
|
"adelete_eval",
|
|
"acancel_eval",
|
|
"acreate_run",
|
|
"alist_runs",
|
|
"aget_run",
|
|
"acancel_run",
|
|
"adelete_run",
|
|
"acreate_realtime_client_secret",
|
|
"arealtime_calls",
|
|
"acreate_realtime_transcription_session",
|
|
]:
|
|
# If a model is provided, get its credentials from the router
|
|
model = data.get("model")
|
|
if model and llm_router:
|
|
try:
|
|
# Try to get deployment credentials for this model
|
|
deployment_creds = llm_router.get_deployment_credentials(
|
|
model_id=model
|
|
)
|
|
if not deployment_creds:
|
|
# Try by model group name
|
|
deployment = llm_router.get_deployment_by_model_group_name(
|
|
model_group_name=model
|
|
)
|
|
if (
|
|
deployment
|
|
and deployment.litellm_params
|
|
and not llm_router._is_deployment_blocked(deployment)
|
|
):
|
|
deployment_creds = deployment.litellm_params.model_dump(
|
|
exclude_none=True
|
|
)
|
|
|
|
# If we found credentials, merge them into data (but don't override user-provided values)
|
|
if deployment_creds:
|
|
data.update(deployment_creds)
|
|
except Exception:
|
|
# If we can't get deployment creds, continue without them
|
|
pass
|
|
|
|
return getattr(litellm, f"{route_type}")(**data)
|
|
# Skip model-based routing for container operations
|
|
if route_type in [
|
|
"acreate_container",
|
|
"alist_containers",
|
|
"aretrieve_container",
|
|
"adelete_container",
|
|
"aupload_container_file",
|
|
"alist_container_files",
|
|
"aretrieve_container_file",
|
|
"adelete_container_file",
|
|
"aretrieve_container_file_content",
|
|
]:
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
# Interactions API: create with agent, get/delete/cancel don't need model routing
|
|
if route_type in [
|
|
"acreate_interaction",
|
|
"aget_interaction",
|
|
"adelete_interaction",
|
|
"acancel_interaction",
|
|
]:
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
# Managed Agents API: these don't need model routing
|
|
if route_type in [
|
|
"acreate_agent",
|
|
"alist_agents",
|
|
"aget_agent",
|
|
"adelete_agent",
|
|
"alist_agent_versions",
|
|
]:
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
if route_type in [
|
|
"avideo_list",
|
|
"avideo_status",
|
|
"avideo_content",
|
|
"avideo_remix",
|
|
"avideo_create_character",
|
|
"avideo_get_character",
|
|
"avideo_edit",
|
|
"avideo_extension",
|
|
"avector_store_file_list",
|
|
"avector_store_file_retrieve",
|
|
"avector_store_file_content",
|
|
"avector_store_file_delete",
|
|
"acreate_skill",
|
|
"alist_skills",
|
|
"aget_skill",
|
|
"adelete_skill",
|
|
"aingest",
|
|
] and (data.get("model") is None or data.get("model") == ""):
|
|
# These endpoints don't need a model, use custom_llm_provider directly
|
|
return getattr(litellm, f"{route_type}")(**data)
|
|
|
|
team_model_name = (
|
|
llm_router.map_team_model(data["model"], team_id)
|
|
if team_id is not None
|
|
else None
|
|
)
|
|
if team_model_name is not None:
|
|
data["model"] = team_model_name
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
|
|
elif data["model"] in router_model_names or llm_router.has_model_id(
|
|
data["model"]
|
|
):
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
|
|
elif (
|
|
llm_router.model_group_alias is not None
|
|
and data["model"] in llm_router.model_group_alias
|
|
):
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
|
|
elif data["model"] not in router_model_names:
|
|
# Check wildcards before checking deployment_names
|
|
# Priority: 1. Exact model_name match, 2. Wildcard match, 3. deployment_names match
|
|
if llm_router.router_general_settings.pass_through_all_models:
|
|
return getattr(litellm, f"{route_type}")(**data)
|
|
elif (
|
|
llm_router.default_deployment is not None
|
|
or len(llm_router.pattern_router.patterns) > 0
|
|
):
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
elif data["model"] in llm_router.deployment_names:
|
|
# Only match deployment_names if no wildcard matched
|
|
return getattr(llm_router, f"{route_type}")(
|
|
**data, specific_deployment=True
|
|
)
|
|
elif route_type in [
|
|
"amoderation",
|
|
"aget_responses",
|
|
"adelete_responses",
|
|
"acancel_responses",
|
|
"alist_input_items",
|
|
"avector_store_create",
|
|
"avector_store_search",
|
|
"avector_store_retrieve",
|
|
"avector_store_list",
|
|
"avector_store_update",
|
|
"avector_store_delete",
|
|
"avector_store_file_create",
|
|
"avector_store_file_list",
|
|
"avector_store_file_retrieve",
|
|
"avector_store_file_content",
|
|
"avector_store_file_update",
|
|
"avector_store_file_delete",
|
|
"asearch",
|
|
"acreate_container",
|
|
"alist_containers",
|
|
"aretrieve_container",
|
|
"adelete_container",
|
|
"aupload_container_file",
|
|
"alist_container_files",
|
|
"aretrieve_container_file",
|
|
"adelete_container_file",
|
|
"aretrieve_container_file_content",
|
|
]:
|
|
# These endpoints can work with or without model parameter
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
elif route_type in [
|
|
"avideo_status",
|
|
"avideo_content",
|
|
"avideo_remix",
|
|
"avideo_create_character",
|
|
"avideo_get_character",
|
|
"avideo_edit",
|
|
"avideo_extension",
|
|
]:
|
|
# Video endpoints: If model is provided (e.g., from decoded video_id or target_model_names),
|
|
# try router first to allow for multi-deployment load balancing
|
|
try:
|
|
return getattr(llm_router, f"{route_type}")(**data)
|
|
except Exception:
|
|
# If router fails (e.g., model not found in router), fall back to direct call
|
|
return getattr(litellm, f"{route_type}")(**data)
|
|
elif _is_a2a_agent_model(data.get("model", "")):
|
|
from litellm.proxy.agent_endpoints.a2a_routing import (
|
|
route_a2a_agent_request,
|
|
)
|
|
|
|
result = await route_a2a_agent_request(
|
|
data, route_type, user_api_key_dict=user_api_key_dict
|
|
)
|
|
if result is not None:
|
|
return result
|
|
# Fall through to raise exception below if result is None
|
|
|
|
elif user_model is not None:
|
|
return getattr(litellm, f"{route_type}")(**data)
|
|
elif route_type == "allm_passthrough_route":
|
|
return getattr(litellm, f"{route_type}")(**data)
|
|
|
|
# if no route found then it's a bad request
|
|
route_name = ROUTE_ENDPOINT_MAPPING.get(route_type, route_type)
|
|
raise ProxyModelNotFoundError(
|
|
route=route_name,
|
|
model_name=data.get("model", ""),
|
|
)
|