MoFin/venv/lib/python3.12/site-packages/litellm/llms/openai/realtime/handler.py

"""
This file contains the calling OpenAI's `/v1/realtime` endpoint.

This requires websockets, and is currently only supported on LiteLLM Proxy.
"""

from typing import Any, Optional, cast

from litellm._logging import _redact_string, verbose_logger
from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES
from litellm.types.realtime import RealtimeQueryParams

from ....litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from ....litellm_core_utils.realtime_streaming import (
    RealTimeStreaming,
    client_sent_openai_beta_realtime_header,
)
from ....llms.custom_httpx.http_handler import get_shared_realtime_ssl_context
from ..openai import OpenAIChatCompletion


class OpenAIRealtime(OpenAIChatCompletion):
    """
    Base handler for OpenAI-compatible realtime WebSocket connections.

    Subclasses can override template methods to customize:
    - _get_default_api_base(): Default API base URL
    - _get_additional_headers(): Extra headers beyond Authorization
    - _get_ssl_config(): SSL configuration for WebSocket connection
    """

    def _get_default_api_base(self) -> str:
        """
        Get the default API base URL for this provider.
        Override this in subclasses to set provider-specific defaults.
        """
        return "https://api.openai.com/"

    def _get_additional_headers(
        self,
        api_key: str,
        *,
        openai_beta_realtime: bool = False,
    ) -> dict:
        """
        Headers for the upstream OpenAI Realtime WebSocket.

        When the client sent ``OpenAI-Beta: realtime=v1`` on the proxy WebSocket,
        ``openai_beta_realtime`` is True and the same header is forwarded upstream
        so the legacy beta API is used. GA clients omit that header on the client
        connection and must send GA-shaped ``session.update`` payloads.
        """
        headers: dict = {"Authorization": f"Bearer {api_key}"}
        if openai_beta_realtime:
            headers["OpenAI-Beta"] = "realtime=v1"
        return headers

    def _get_ssl_config(self, url: str) -> Any:
        """
        Get SSL configuration for WebSocket connection.
        Override this in subclasses to customize SSL behavior.

        Args:
            url: WebSocket URL (ws:// or wss://)

        Returns:
            SSL configuration (None, True, or SSLContext)
        """
        if url.startswith("ws://"):
            return None

        # Use the shared SSL context which respects custom CA certs and SSL settings
        ssl_config = get_shared_realtime_ssl_context()

        # If ssl_config is False (ssl_verify=False), websockets library needs True instead
        # to establish connection without verification (False would fail)
        if ssl_config is False:
            return True

        return ssl_config

    def _construct_url(self, api_base: str, query_params: RealtimeQueryParams) -> str:
        """
        Construct the backend websocket URL with all query parameters (including 'model').
        """
        from httpx import URL

        api_base = api_base.replace("https://", "wss://")
        api_base = api_base.replace("http://", "ws://")
        url = URL(api_base)
        # Set the correct path
        url = url.copy_with(path="/v1/realtime")
        # Include all query parameters including 'model'
        if query_params:
            url = url.copy_with(params=query_params)
        return str(url)

    async def async_realtime(
        self,
        model: str,
        websocket: Any,
        logging_obj: LiteLLMLogging,
        api_base: Optional[str] = None,
        api_key: Optional[str] = None,
        client: Optional[Any] = None,
        timeout: Optional[float] = None,
        query_params: Optional[RealtimeQueryParams] = None,
        user_api_key_dict: Optional[Any] = None,
        litellm_metadata: Optional[dict] = None,
        **kwargs: Any,
    ):
        import websockets
        from websockets.asyncio.client import ClientConnection

        if api_base is None:
            api_base = self._get_default_api_base()
        if api_key is None:
            raise ValueError("api_key is required for OpenAI realtime calls")

        # Use all query params if provided, else fallback to just model
        if query_params is None:
            query_params = {"model": model}
        url = self._construct_url(api_base, query_params)

        try:
            # Get provider-specific SSL configuration
            ssl_config = self._get_ssl_config(url)

            openai_beta_realtime = client_sent_openai_beta_realtime_header(websocket)
            if not openai_beta_realtime:
                verbose_logger.debug(
                    "OpenAI Realtime: connecting with GA protocol (no OpenAI-Beta header). "
                    "If your client expects beta event names, add 'OpenAI-Beta: realtime=v1' "
                    "to the WebSocket headers sent to the LiteLLM proxy."
                )
            headers = self._get_additional_headers(
                api_key, openai_beta_realtime=openai_beta_realtime
            )

            # Log a masked request preview consistent with other endpoints.
            logging_obj.pre_call(
                input=None,
                api_key=api_key,
                additional_args={
                    "api_base": url,
                    "headers": headers,
                    "complete_input_dict": {"query_params": query_params},
                },
            )
            async with websockets.connect(  # type: ignore
                url,
                additional_headers=headers,  # type: ignore
                max_size=REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES,
                ssl=ssl_config,
            ) as backend_ws:
                realtime_streaming = RealTimeStreaming(
                    websocket,
                    cast(ClientConnection, backend_ws),
                    logging_obj,
                    model=model,
                    user_api_key_dict=user_api_key_dict,
                    request_data={"litellm_metadata": litellm_metadata or {}},
                    force_transcription_model=(
                        model
                        if (query_params or {}).get("intent") == "transcription"
                        else None
                    ),
                )
                await realtime_streaming.bidirectional_forward()

        except websockets.exceptions.InvalidStatusCode as e:  # type: ignore
            await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
        except Exception as e:
            try:
                await websocket.close(
                    code=1011, reason=_redact_string(f"Internal server error: {str(e)}")
                )
            except RuntimeError as close_error:
                if "already completed" in str(close_error) or "websocket.close" in str(
                    close_error
                ):
                    # The WebSocket is already closed or the response is completed, so we can ignore this error
                    pass
                else:
                    # If it's a different RuntimeError, we might want to log it or handle it differently
                    raise Exception(
                        f"Unexpected error while closing WebSocket: {close_error}"
                    )