import asyncio from typing import TYPE_CHECKING, Any, Literal, Optional import httpx from fastapi import HTTPException, status import litellm from litellm.proxy._types import UserAPIKeyAuth # Router-internal mock_testing_* flag names — kept in sync with # ``litellm.types.router.MockRouterTestingParams`` by the test # ``test_mock_testing_kwarg_names_matches_dataclass``. Hardcoding (rather # than deriving via ``dataclasses.fields(MockRouterTestingParams)`` at # import time) avoids a cyclic import: ``litellm.types.router`` imports # back into proxy modules before this module finishes loading. _MOCK_TESTING_KWARG_NAMES: tuple = ( "mock_testing_fallbacks", "mock_testing_context_fallbacks", "mock_testing_content_policy_fallbacks", ) if TYPE_CHECKING: from litellm.router import Router as _Router LitellmRouter = _Router else: LitellmRouter = Any def _route_user_config_request(data: dict, route_type: str): """Route a request using the user-provided router config.""" router_config = data.pop("user_config") # Filter router_config to only include valid Router.__init__ arguments # This prevents TypeError when invalid parameters are stored in the database valid_args = litellm.Router.get_valid_args() filtered_config = {k: v for k, v in router_config.items() if k in valid_args} user_router = litellm.Router(**filtered_config) ret_val = getattr(user_router, f"{route_type}")(**data) user_router.discard() return ret_val def _is_a2a_agent_model(model_name: Any) -> bool: """Check if the model name is for an A2A agent (a2a/ prefix).""" return isinstance(model_name, str) and model_name.startswith("a2a/") def _raise_if_model_fully_blocked( llm_router: LitellmRouter, model_name: Any, team_id: Optional[str] ) -> None: if not isinstance(model_name, str) or not model_name: return if not isinstance(llm_router, litellm.Router): return deployments = ( llm_router.get_model_list(model_name=model_name, team_id=team_id) or [] ) if llm_router._are_all_deployments_blocked(deployments): raise litellm.PermissionDeniedError( message="Model is blocked", model=model_name, llm_provider="", response=httpx.Response( status_code=403, request=httpx.Request( method="POST", url="https://github.com/BerriAI/litellm" ), ), ) ROUTE_ENDPOINT_MAPPING = { "acompletion": "/chat/completions", "atext_completion": "/completions", "aembedding": "/embeddings", "aimage_generation": "/image/generations", "aspeech": "/audio/speech", "atranscription": "/audio/transcriptions", "amoderation": "/moderations", "arerank": "/rerank", "aresponses": "/responses", "_aresponses_websocket": "/responses", "alist_input_items": "/responses/{response_id}/input_items", "aimage_edit": "/images/edits", "acancel_responses": "/responses/{response_id}/cancel", "acompact_responses": "/responses/compact", "aocr": "/ocr", "asearch": "/search", "avideo_generation": "/videos", "avideo_list": "/videos", "avideo_status": "/videos/{video_id}", "avideo_content": "/videos/{video_id}/content", "avideo_remix": "/videos/{video_id}/remix", "avideo_create_character": "/videos/characters", "avideo_get_character": "/videos/characters/{character_id}", "avideo_edit": "/videos/edits", "avideo_extension": "/videos/extensions", "acreate_realtime_client_secret": "/realtime/client_secrets", "arealtime_calls": "/realtime/calls", "acreate_realtime_transcription_session": "/realtime/transcription_sessions", "acreate_container": "/containers", "alist_containers": "/containers", "aretrieve_container": "/containers/{container_id}", "adelete_container": "/containers/{container_id}", # Auto-generated container file routes "aupload_container_file": "/containers/{container_id}/files", "alist_container_files": "/containers/{container_id}/files", "aretrieve_container_file": "/containers/{container_id}/files/{file_id}", "adelete_container_file": "/containers/{container_id}/files/{file_id}", "aretrieve_container_file_content": "/containers/{container_id}/files/{file_id}/content", "acreate_skill": "/skills", "alist_skills": "/skills", "aget_skill": "/skills/{skill_id}", "adelete_skill": "/skills/{skill_id}", "aingest": "/rag/ingest", # Google Interactions API routes "acreate_interaction": "/interactions", "aget_interaction": "/interactions/{interaction_id}", "adelete_interaction": "/interactions/{interaction_id}", "acancel_interaction": "/interactions/{interaction_id}/cancel", # Google Managed Agents API routes "acreate_agent": "/v1beta/agents", "alist_agents": "/v1beta/agents", "aget_agent": "/v1beta/agents/{name}", "adelete_agent": "/v1beta/agents/{name}", "alist_agent_versions": "/v1beta/agents/{name}/versions", # OpenAI Evals API routes "acreate_eval": "/evals", "alist_evals": "/evals", "aget_eval": "/evals/{eval_id}", "aupdate_eval": "/evals/{eval_id}", "adelete_eval": "/evals/{eval_id}", "acancel_eval": "/evals/{eval_id}/cancel", # OpenAI Evals Runs API routes "acreate_run": "/evals/{eval_id}/runs", "alist_runs": "/evals/{eval_id}/runs", "aget_run": "/evals/{eval_id}/runs/{run_id}", "acancel_run": "/evals/{eval_id}/runs/{run_id}/cancel", "adelete_run": "/evals/{eval_id}/runs/{run_id}", } class ProxyModelNotFoundError(HTTPException): def __init__(self, route: str, model_name: str): detail = { "error": f"{route}: Invalid model name passed in model={model_name}. Call `/v1/models` to view available models for your key." } super().__init__(status_code=status.HTTP_400_BAD_REQUEST, detail=detail) def get_team_id_from_data(data: dict) -> Optional[str]: """ Get the team id from the data's metadata or litellm_metadata params. """ if ( "metadata" in data and data["metadata"] is not None and "user_api_key_team_id" in data["metadata"] ): return data["metadata"].get("user_api_key_team_id") elif ( "litellm_metadata" in data and data["litellm_metadata"] is not None and "user_api_key_team_id" in data["litellm_metadata"] ): return data["litellm_metadata"].get("user_api_key_team_id") return None _shared_session_lock: Optional[asyncio.Lock] = None def _get_shared_session_lock() -> asyncio.Lock: """Lazily create the shared session lock (must be called within a running event loop). WARNING: Do not reset _shared_session_lock to None while any coroutine may be executing the session-recovery path; doing so breaks the double-checked locking guarantee and can cause duplicate session creation. """ global _shared_session_lock if _shared_session_lock is None: _shared_session_lock = asyncio.Lock() return _shared_session_lock async def add_shared_session_to_data(data: dict) -> None: """ Add shared aiohttp session for connection reuse (prevents cold starts). If the session was closed (e.g. due to network interruption or idle timeout), automatically recreates it so connection pooling is restored. Uses an asyncio.Lock to prevent race conditions where multiple concurrent requests could each create a new session, leaking intermediate ones. Silently continues without session reuse if import fails or session is unavailable. Args: data: Dictionary to add the shared session to """ try: import litellm.proxy.proxy_server as proxy_server from litellm._logging import verbose_proxy_logger session = proxy_server.shared_aiohttp_session if session is not None and not session.closed: data["shared_session"] = session verbose_proxy_logger.info( f"SESSION REUSE: Attached shared aiohttp session to request (ID: {id(session)})" ) elif session is not None and session.closed: # Session was created at startup but has since closed — recreate it # Use lock to prevent concurrent recreation (avoids session/connector leak) lock = _get_shared_session_lock() async with lock: # Double-check under lock — another coroutine may have already recreated it session = proxy_server.shared_aiohttp_session if session is not None and not session.closed: data["shared_session"] = session return # session could be None here (if another coroutine set it to None) # or closed — either way we need to recreate if session is not None: verbose_proxy_logger.warning( f"SESSION REUSE: Shared aiohttp session is closed (ID: {id(session)}), recreating..." ) else: verbose_proxy_logger.warning( "SESSION REUSE: Shared aiohttp session is None after re-check, recreating..." ) try: new_session = ( await proxy_server._initialize_shared_aiohttp_session() ) except Exception: verbose_proxy_logger.exception( "SESSION REUSE: Exception during shared session recreation" ) new_session = None if new_session is not None: proxy_server.shared_aiohttp_session = new_session data["shared_session"] = new_session else: verbose_proxy_logger.info( "SESSION REUSE: Failed to recreate shared session, continuing without session reuse" ) else: verbose_proxy_logger.info( "SESSION REUSE: No shared session available for this request" ) except Exception: # Continue without session reuse — this outer handler covers import failures # and other unexpected errors to avoid breaking the request path. # Inner recovery logic has its own specific exception handling. try: from litellm._logging import verbose_proxy_logger verbose_proxy_logger.debug( "SESSION REUSE: Unexpected error in session setup, continuing without reuse", exc_info=True, ) except Exception: pass async def route_request( data: dict, llm_router: Optional[LitellmRouter], user_model: Optional[str], route_type: Literal[ "acompletion", "atext_completion", "aembedding", "aimage_generation", "aspeech", "atranscription", "amoderation", "arerank", "aresponses", "aget_responses", "adelete_responses", "acancel_responses", "acompact_responses", "acreate_response_reply", "alist_input_items", "_arealtime", # private function for realtime API "acreate_realtime_client_secret", "arealtime_calls", "acreate_realtime_transcription_session", "_aresponses_websocket", # private function for responses WebSocket mode "aimage_edit", "agenerate_content", "agenerate_content_stream", "allm_passthrough_route", "acreate_batch", "aretrieve_batch", "alist_batches", "afile_content", "afile_retrieve", "acreate_fine_tuning_job", "acancel_fine_tuning_job", "alist_fine_tuning_jobs", "aretrieve_fine_tuning_job", "avector_store_search", "avector_store_create", "avector_store_retrieve", "avector_store_list", "avector_store_update", "avector_store_delete", "avector_store_file_create", "avector_store_file_list", "avector_store_file_retrieve", "avector_store_file_content", "avector_store_file_update", "avector_store_file_delete", "aocr", "asearch", "avideo_generation", "avideo_list", "avideo_status", "avideo_content", "avideo_remix", "avideo_create_character", "avideo_get_character", "avideo_edit", "avideo_extension", "acreate_container", "alist_containers", "aretrieve_container", "adelete_container", "aupload_container_file", "alist_container_files", "aretrieve_container_file", "adelete_container_file", "aretrieve_container_file_content", "acreate_skill", "alist_skills", "aget_skill", "adelete_skill", "aingest", "anthropic_messages", "acreate_interaction", "aget_interaction", "adelete_interaction", "acancel_interaction", "acreate_agent", "alist_agents", "aget_agent", "adelete_agent", "alist_agent_versions", "asend_message", "call_mcp_tool", "acancel_batch", "afile_delete", "acreate_eval", "alist_evals", "aget_eval", "aupdate_eval", "adelete_eval", "acancel_eval", "acreate_run", "alist_runs", "aget_run", "acancel_run", "adelete_run", ], user_api_key_dict: Optional[UserAPIKeyAuth] = None, ): """ Common helper to route the request """ await add_shared_session_to_data(data) # Strip router-internal mock_testing_* flags. Combined with an # unauthorized fallback in ``router_settings_override`` they let a # caller deterministically execute requests against restricted # models. VERIA-44. for _key in _MOCK_TESTING_KWARG_NAMES: data.pop(_key, None) team_id = get_team_id_from_data(data) router_model_names = llm_router.model_names if llm_router is not None else [] # Preprocess Google GenAI generate content requests if route_type in ["agenerate_content", "agenerate_content_stream"]: # Map generationConfig to config parameter for Google GenAI compatibility if "generationConfig" in data and "config" not in data: data["config"] = data.pop("generationConfig") if "api_key" in data or "api_base" in data: if llm_router is not None: return getattr(llm_router, f"{route_type}")(**data) else: return getattr(litellm, f"{route_type}")(**data) elif ( route_type == "acompletion" and data.get("model", "") is not None and "," in data.get("model", "") and llm_router is not None ): # Handle batch completions with comma-separated models BEFORE user_config check # This ensures batch completion logic is applied even when user_config is set if data.get("fastest_response", False): return llm_router.abatch_completion_fastest_response(**data) else: models = [model.strip() for model in data.pop("model").split(",")] return llm_router.abatch_completion(models=models, **data) elif "user_config" in data: return _route_user_config_request(data, route_type) elif "router_settings_override" in data: # Apply per-request router settings overrides from key/team config # Instead of creating a new Router (expensive), merge settings into kwargs # The Router already supports per-request overrides for these settings override_settings = data.pop("router_settings_override") # Settings that the Router accepts as per-request kwargs # These override the global router settings for this specific request per_request_settings = [ "fallbacks", "context_window_fallbacks", "content_policy_fallbacks", "num_retries", "timeout", "model_group_retry_policy", ] # Merge override settings into data (only if not already set in request) for key in per_request_settings: if key in override_settings and key not in data: data[key] = override_settings[key] # Use main router with overridden kwargs if llm_router is not None: return getattr(llm_router, f"{route_type}")(**data) else: return getattr(litellm, f"{route_type}")(**data) elif llm_router is not None: _raise_if_model_fully_blocked( llm_router=llm_router, model_name=data.get("model"), team_id=team_id ) # Evals API: always route to litellm directly (not through router) # But extract model credentials if a model is provided if route_type in [ "acreate_eval", "alist_evals", "aget_eval", "aupdate_eval", "adelete_eval", "acancel_eval", "acreate_run", "alist_runs", "aget_run", "acancel_run", "adelete_run", "acreate_realtime_client_secret", "arealtime_calls", "acreate_realtime_transcription_session", ]: # If a model is provided, get its credentials from the router model = data.get("model") if model and llm_router: try: # Try to get deployment credentials for this model deployment_creds = llm_router.get_deployment_credentials( model_id=model ) if not deployment_creds: # Try by model group name deployment = llm_router.get_deployment_by_model_group_name( model_group_name=model ) if ( deployment and deployment.litellm_params and not llm_router._is_deployment_blocked(deployment) ): deployment_creds = deployment.litellm_params.model_dump( exclude_none=True ) # If we found credentials, merge them into data (but don't override user-provided values) if deployment_creds: data.update(deployment_creds) except Exception: # If we can't get deployment creds, continue without them pass return getattr(litellm, f"{route_type}")(**data) # Skip model-based routing for container operations if route_type in [ "acreate_container", "alist_containers", "aretrieve_container", "adelete_container", "aupload_container_file", "alist_container_files", "aretrieve_container_file", "adelete_container_file", "aretrieve_container_file_content", ]: return getattr(llm_router, f"{route_type}")(**data) # Interactions API: create with agent, get/delete/cancel don't need model routing if route_type in [ "acreate_interaction", "aget_interaction", "adelete_interaction", "acancel_interaction", ]: return getattr(llm_router, f"{route_type}")(**data) # Managed Agents API: these don't need model routing if route_type in [ "acreate_agent", "alist_agents", "aget_agent", "adelete_agent", "alist_agent_versions", ]: return getattr(llm_router, f"{route_type}")(**data) if route_type in [ "avideo_list", "avideo_status", "avideo_content", "avideo_remix", "avideo_create_character", "avideo_get_character", "avideo_edit", "avideo_extension", "avector_store_file_list", "avector_store_file_retrieve", "avector_store_file_content", "avector_store_file_delete", "acreate_skill", "alist_skills", "aget_skill", "adelete_skill", "aingest", ] and (data.get("model") is None or data.get("model") == ""): # These endpoints don't need a model, use custom_llm_provider directly return getattr(litellm, f"{route_type}")(**data) team_model_name = ( llm_router.map_team_model(data["model"], team_id) if team_id is not None else None ) if team_model_name is not None: data["model"] = team_model_name return getattr(llm_router, f"{route_type}")(**data) elif data["model"] in router_model_names or llm_router.has_model_id( data["model"] ): return getattr(llm_router, f"{route_type}")(**data) elif ( llm_router.model_group_alias is not None and data["model"] in llm_router.model_group_alias ): return getattr(llm_router, f"{route_type}")(**data) elif data["model"] not in router_model_names: # Check wildcards before checking deployment_names # Priority: 1. Exact model_name match, 2. Wildcard match, 3. deployment_names match if llm_router.router_general_settings.pass_through_all_models: return getattr(litellm, f"{route_type}")(**data) elif ( llm_router.default_deployment is not None or len(llm_router.pattern_router.patterns) > 0 ): return getattr(llm_router, f"{route_type}")(**data) elif data["model"] in llm_router.deployment_names: # Only match deployment_names if no wildcard matched return getattr(llm_router, f"{route_type}")( **data, specific_deployment=True ) elif route_type in [ "amoderation", "aget_responses", "adelete_responses", "acancel_responses", "alist_input_items", "avector_store_create", "avector_store_search", "avector_store_retrieve", "avector_store_list", "avector_store_update", "avector_store_delete", "avector_store_file_create", "avector_store_file_list", "avector_store_file_retrieve", "avector_store_file_content", "avector_store_file_update", "avector_store_file_delete", "asearch", "acreate_container", "alist_containers", "aretrieve_container", "adelete_container", "aupload_container_file", "alist_container_files", "aretrieve_container_file", "adelete_container_file", "aretrieve_container_file_content", ]: # These endpoints can work with or without model parameter return getattr(llm_router, f"{route_type}")(**data) elif route_type in [ "avideo_status", "avideo_content", "avideo_remix", "avideo_create_character", "avideo_get_character", "avideo_edit", "avideo_extension", ]: # Video endpoints: If model is provided (e.g., from decoded video_id or target_model_names), # try router first to allow for multi-deployment load balancing try: return getattr(llm_router, f"{route_type}")(**data) except Exception: # If router fails (e.g., model not found in router), fall back to direct call return getattr(litellm, f"{route_type}")(**data) elif _is_a2a_agent_model(data.get("model", "")): from litellm.proxy.agent_endpoints.a2a_routing import ( route_a2a_agent_request, ) result = await route_a2a_agent_request( data, route_type, user_api_key_dict=user_api_key_dict ) if result is not None: return result # Fall through to raise exception below if result is None elif user_model is not None: return getattr(litellm, f"{route_type}")(**data) elif route_type == "allm_passthrough_route": return getattr(litellm, f"{route_type}")(**data) # if no route found then it's a bad request route_name = ROUTE_ENDPOINT_MAPPING.get(route_type, route_type) raise ProxyModelNotFoundError( route=route_name, model_name=data.get("model", ""), )