fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
157 lines
5.7 KiB
Python
157 lines
5.7 KiB
Python
"""
|
|
This file contains the calling Azure OpenAI's `/openai/realtime` endpoint.
|
|
|
|
This requires websockets, and is currently only supported on LiteLLM Proxy.
|
|
"""
|
|
|
|
from typing import Any, Optional, cast
|
|
|
|
from litellm._logging import _redact_string, verbose_proxy_logger
|
|
from litellm.constants import REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES
|
|
from litellm.types.realtime import RealtimeQueryParams
|
|
|
|
from ....litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
|
from ....litellm_core_utils.realtime_streaming import RealTimeStreaming
|
|
from ....llms.custom_httpx.http_handler import get_shared_realtime_ssl_context
|
|
from ..azure import AzureChatCompletion
|
|
|
|
# BACKEND_WS_URL = "ws://localhost:8080/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01"
|
|
|
|
|
|
async def forward_messages(client_ws: Any, backend_ws: Any):
|
|
import websockets
|
|
|
|
try:
|
|
while True:
|
|
message = await backend_ws.recv()
|
|
await client_ws.send_text(message)
|
|
except websockets.exceptions.ConnectionClosed: # type: ignore
|
|
pass
|
|
|
|
|
|
class AzureOpenAIRealtime(AzureChatCompletion):
|
|
def _construct_url(
|
|
self,
|
|
api_base: str,
|
|
model: str,
|
|
api_version: Optional[str],
|
|
realtime_protocol: Optional[str] = None,
|
|
query_params: Optional[RealtimeQueryParams] = None,
|
|
) -> str:
|
|
"""
|
|
Construct Azure realtime WebSocket URL.
|
|
|
|
Args:
|
|
api_base: Azure API base URL (will be converted from https:// to wss://)
|
|
model: Model deployment name
|
|
api_version: Azure API version
|
|
realtime_protocol: Protocol version to use:
|
|
- "GA" or "v1": Uses /openai/v1/realtime (GA path)
|
|
- "beta" or None: Uses /openai/realtime (beta path, default)
|
|
query_params: Extra query params to forward (e.g. intent=transcription).
|
|
|
|
Returns:
|
|
WebSocket URL string
|
|
|
|
Examples:
|
|
beta/default: "wss://.../openai/realtime?api-version=2024-10-01-preview&deployment=gpt-4o-realtime-preview"
|
|
GA/v1: "wss://.../openai/v1/realtime?model=gpt-realtime-deployment"
|
|
"""
|
|
from urllib.parse import urlencode
|
|
|
|
api_base = api_base.replace("https://", "wss://")
|
|
|
|
# Determine path based on realtime_protocol (case-insensitive)
|
|
_is_ga = realtime_protocol is not None and realtime_protocol.upper() in (
|
|
"GA",
|
|
"V1",
|
|
)
|
|
intent = (query_params or {}).get("intent")
|
|
|
|
if _is_ga:
|
|
path = "/openai/v1/realtime"
|
|
query_parts = []
|
|
if intent != "transcription" and (
|
|
query_params is None or "model" in query_params
|
|
):
|
|
query_parts.append(urlencode({"model": model}))
|
|
else:
|
|
# Default to beta path for backwards compatibility
|
|
path = "/openai/realtime"
|
|
query_parts = [urlencode({"api-version": api_version, "deployment": model})]
|
|
|
|
if intent:
|
|
query_parts.append(urlencode({"intent": intent}))
|
|
|
|
qs = "&".join(query_parts)
|
|
return f"{api_base}{path}?{qs}" if qs else f"{api_base}{path}"
|
|
|
|
async def async_realtime(
|
|
self,
|
|
model: str,
|
|
websocket: Any,
|
|
logging_obj: LiteLLMLogging,
|
|
api_base: Optional[str] = None,
|
|
api_key: Optional[str] = None,
|
|
api_version: Optional[str] = None,
|
|
azure_ad_token: Optional[str] = None,
|
|
client: Optional[Any] = None,
|
|
timeout: Optional[float] = None,
|
|
realtime_protocol: Optional[str] = None,
|
|
query_params: Optional[RealtimeQueryParams] = None,
|
|
user_api_key_dict: Optional[Any] = None,
|
|
litellm_metadata: Optional[dict] = None,
|
|
):
|
|
import websockets
|
|
from websockets.asyncio.client import ClientConnection
|
|
|
|
if api_base is None:
|
|
raise ValueError("api_base is required for Azure OpenAI calls")
|
|
backend_uses_beta_protocol = (
|
|
realtime_protocol is None or realtime_protocol.upper() not in ("GA", "V1")
|
|
)
|
|
if api_version is None and backend_uses_beta_protocol:
|
|
raise ValueError("api_version is required for Azure OpenAI calls")
|
|
|
|
url = self._construct_url(
|
|
api_base,
|
|
model,
|
|
api_version,
|
|
realtime_protocol=realtime_protocol,
|
|
query_params=query_params,
|
|
)
|
|
|
|
try:
|
|
ssl_context = get_shared_realtime_ssl_context()
|
|
async with websockets.connect( # type: ignore
|
|
url,
|
|
additional_headers={
|
|
"api-key": api_key, # type: ignore
|
|
},
|
|
max_size=REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES,
|
|
ssl=ssl_context,
|
|
) as backend_ws:
|
|
realtime_streaming = RealTimeStreaming(
|
|
websocket,
|
|
cast(ClientConnection, backend_ws),
|
|
logging_obj,
|
|
model=model,
|
|
user_api_key_dict=user_api_key_dict,
|
|
request_data={"litellm_metadata": litellm_metadata or {}},
|
|
backend_uses_beta_protocol=backend_uses_beta_protocol,
|
|
force_transcription_model=(
|
|
model
|
|
if (query_params or {}).get("intent") == "transcription"
|
|
else None
|
|
),
|
|
)
|
|
await realtime_streaming.bidirectional_forward()
|
|
|
|
except websockets.exceptions.InvalidStatusCode as e: # type: ignore
|
|
await websocket.close(code=e.status_code, reason=_redact_string(str(e)))
|
|
except Exception:
|
|
verbose_proxy_logger.exception(
|
|
"Error in AzureOpenAIRealtime.async_realtime"
|
|
)
|
|
pass
|