MoFin/venv/lib/python3.12/site-packages/litellm/constants.py

import os
import sys
from typing import List, Literal, Optional

from litellm.litellm_core_utils.env_utils import get_env_int

DEFAULT_HEALTH_CHECK_PROMPT = str(
    os.getenv("DEFAULT_HEALTH_CHECK_PROMPT", "test from litellm")
)
AZURE_DEFAULT_RESPONSES_API_VERSION = str(
    os.getenv("AZURE_DEFAULT_RESPONSES_API_VERSION", "preview")
)
ROUTER_MAX_FALLBACKS = int(os.getenv("ROUTER_MAX_FALLBACKS", 5))
DEFAULT_BATCH_SIZE = int(os.getenv("DEFAULT_BATCH_SIZE", 512))
DEFAULT_FLUSH_INTERVAL_SECONDS = int(os.getenv("DEFAULT_FLUSH_INTERVAL_SECONDS", 5))
DEFAULT_S3_FLUSH_INTERVAL_SECONDS = int(
    os.getenv("DEFAULT_S3_FLUSH_INTERVAL_SECONDS", 10)
)
DEFAULT_S3_BATCH_SIZE = int(os.getenv("DEFAULT_S3_BATCH_SIZE", 512))
DEFAULT_SQS_FLUSH_INTERVAL_SECONDS = int(
    os.getenv("DEFAULT_SQS_FLUSH_INTERVAL_SECONDS", 10)
)
DEFAULT_NUM_WORKERS_LITELLM_PROXY = int(
    os.getenv("DEFAULT_NUM_WORKERS_LITELLM_PROXY", 1)
)
DYNAMIC_RATE_LIMIT_ERROR_THRESHOLD_PER_MINUTE = int(
    os.getenv("DYNAMIC_RATE_LIMIT_ERROR_THRESHOLD_PER_MINUTE", 1)
)
DEFAULT_SQS_BATCH_SIZE = int(os.getenv("DEFAULT_SQS_BATCH_SIZE", 512))
SQS_SEND_MESSAGE_ACTION = "SendMessage"
SQS_API_VERSION = "2012-11-05"
DEFAULT_MAX_RETRIES = int(os.getenv("DEFAULT_MAX_RETRIES", 2))
DEFAULT_MAX_RECURSE_DEPTH = int(os.getenv("DEFAULT_MAX_RECURSE_DEPTH", 100))
DEFAULT_MAX_RECURSE_DEPTH_SENSITIVE_DATA_MASKER = int(
    os.getenv("DEFAULT_MAX_RECURSE_DEPTH_SENSITIVE_DATA_MASKER", 10)
)
DEFAULT_FAILURE_THRESHOLD_PERCENT = float(
    os.getenv("DEFAULT_FAILURE_THRESHOLD_PERCENT", 0.5)
)  # default cooldown a deployment if 50% of requests fail in a given minute
DEFAULT_MAX_TOKENS = int(os.getenv("DEFAULT_MAX_TOKENS", 4096))
DEFAULT_ALLOWED_FAILS = int(os.getenv("DEFAULT_ALLOWED_FAILS", 3))
DEFAULT_REDIS_SYNC_INTERVAL = int(os.getenv("DEFAULT_REDIS_SYNC_INTERVAL", 1))
DEFAULT_COOLDOWN_TIME_SECONDS = int(os.getenv("DEFAULT_COOLDOWN_TIME_SECONDS", 5))
DEFAULT_REPLICATE_POLLING_RETRIES = int(
    os.getenv("DEFAULT_REPLICATE_POLLING_RETRIES", 5)
)
DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = int(
    os.getenv("DEFAULT_REPLICATE_POLLING_DELAY_SECONDS", 1)
)
DEFAULT_IMAGE_TOKEN_COUNT = int(os.getenv("DEFAULT_IMAGE_TOKEN_COUNT", 250))

# Maximum wall-clock seconds a streaming response is allowed to run.
# Streams exceeding this duration are terminated with a Timeout error.
# None (default) = no limit.  Set env var to a number of seconds to enable globally.
_max_stream_duration_env = os.getenv("LITELLM_MAX_STREAMING_DURATION_SECONDS", None)
LITELLM_MAX_STREAMING_DURATION_SECONDS = (
    float(_max_stream_duration_env) if _max_stream_duration_env is not None else None
)

# Maximum number of base64 characters to keep in logging payloads.
# Data URIs exceeding this are replaced with a size placeholder.
# Set to 0 to disable truncation.
MAX_BASE64_LENGTH_FOR_LOGGING = int(os.getenv("MAX_BASE64_LENGTH_FOR_LOGGING", 64))

# When true, adds detailed per-phase timing breakdown headers to responses.
# Headers: x-litellm-timing-{pre-processing,llm-api,post-processing,message-copy}-ms
LITELLM_DETAILED_TIMING = (
    os.getenv("LITELLM_DETAILED_TIMING", "false").lower() == "true"
)

# Model cost map validation constants
MODEL_COST_MAP_MIN_MODEL_COUNT = int(
    os.getenv("MODEL_COST_MAP_MIN_MODEL_COUNT", 50)
)  # Minimum number of models a fetched cost map must contain to be considered valid
MODEL_COST_MAP_MAX_SHRINK_RATIO = float(
    os.getenv("MODEL_COST_MAP_MAX_SHRINK_RATIO", 0.5)
)  # Maximum allowed shrinkage ratio vs local backup (0.5 = reject if fetched map is <50% of backup)
DEFAULT_IMAGE_WIDTH = int(os.getenv("DEFAULT_IMAGE_WIDTH", 300))
DEFAULT_IMAGE_HEIGHT = int(os.getenv("DEFAULT_IMAGE_HEIGHT", 300))
# Maximum size for image URL downloads in MB (default 50MB, set to 0 to disable limit)
# This prevents memory issues from downloading very large images
# Maps to OpenAI's 50 MB payload limit - requests with images exceeding this size will be rejected
# Set MAX_IMAGE_URL_DOWNLOAD_SIZE_MB=0 to disable image URL handling entirely
MAX_IMAGE_URL_DOWNLOAD_SIZE_MB = float(os.getenv("MAX_IMAGE_URL_DOWNLOAD_SIZE_MB", 50))
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = int(
    os.getenv("MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB", 1024)
)  # 1MB = 1024KB
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = int(
    os.getenv("SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD", 1000)
)  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
DEFAULT_FAILURE_THRESHOLD_MINIMUM_REQUESTS = int(
    os.getenv("DEFAULT_FAILURE_THRESHOLD_MINIMUM_REQUESTS", 5)
)  # Minimum number of requests before applying error rate cooldown. Prevents cooldown from triggering on first failure.

DEFAULT_REASONING_EFFORT_DISABLE_THINKING_BUDGET = int(
    os.getenv("DEFAULT_REASONING_EFFORT_DISABLE_THINKING_BUDGET", 0)
)

# MCP Semantic Tool Filter Defaults
DEFAULT_MCP_SEMANTIC_FILTER_EMBEDDING_MODEL = str(
    os.getenv("DEFAULT_MCP_SEMANTIC_FILTER_EMBEDDING_MODEL", "text-embedding-3-small")
)
DEFAULT_MCP_SEMANTIC_FILTER_TOP_K = int(
    os.getenv("DEFAULT_MCP_SEMANTIC_FILTER_TOP_K", 10)
)
DEFAULT_MCP_SEMANTIC_FILTER_SIMILARITY_THRESHOLD = float(
    os.getenv("DEFAULT_MCP_SEMANTIC_FILTER_SIMILARITY_THRESHOLD", 0.3)
)
MAX_MCP_SEMANTIC_FILTER_TOOLS_HEADER_LENGTH = int(
    os.getenv("MAX_MCP_SEMANTIC_FILTER_TOOLS_HEADER_LENGTH", 150)
)

# Semantic Guard Defaults
DEFAULT_SEMANTIC_GUARD_EMBEDDING_MODEL = str(
    os.getenv("DEFAULT_SEMANTIC_GUARD_EMBEDDING_MODEL", "text-embedding-3-small")
)
DEFAULT_SEMANTIC_GUARD_SIMILARITY_THRESHOLD = float(
    os.getenv("DEFAULT_SEMANTIC_GUARD_SIMILARITY_THRESHOLD", 0.75)
)

# MCP OAuth2 Client Credentials Defaults
MCP_OAUTH2_TOKEN_EXPIRY_BUFFER_SECONDS = int(
    os.getenv("MCP_OAUTH2_TOKEN_EXPIRY_BUFFER_SECONDS", "60")
)
MCP_OAUTH2_TOKEN_CACHE_MAX_SIZE = int(
    os.getenv("MCP_OAUTH2_TOKEN_CACHE_MAX_SIZE", "200")
)
MCP_OAUTH2_TOKEN_CACHE_DEFAULT_TTL = int(
    os.getenv("MCP_OAUTH2_TOKEN_CACHE_DEFAULT_TTL", "3600")
)

# Default npm cache directory for STDIO MCP servers.
# npm/npx needs a writable cache dir; in containers the default (~/.npm)
# may not exist or be read-only. /tmp is always writable.
MCP_NPM_CACHE_DIR = os.getenv("MCP_NPM_CACHE_DIR", "/tmp/.npm_mcp_cache")
MCP_OAUTH2_TOKEN_CACHE_MIN_TTL = int(os.getenv("MCP_OAUTH2_TOKEN_CACHE_MIN_TTL", "10"))

# Per-user OAuth token Redis cache (for server-side token storage)
MCP_PER_USER_TOKEN_REDIS_KEY_PREFIX = "mcp:per_user_token"
MCP_PER_USER_TOKEN_DEFAULT_TTL = int(
    os.getenv("MCP_PER_USER_TOKEN_DEFAULT_TTL", "43200")  # 12 hours
)
MCP_PER_USER_TOKEN_EXPIRY_BUFFER_SECONDS = int(
    os.getenv("MCP_PER_USER_TOKEN_EXPIRY_BUFFER_SECONDS", "60")
)

# MCP timeout defaults (seconds). Override via env vars for slow/custom MCP servers.
MCP_CLIENT_TIMEOUT = float(os.getenv("LITELLM_MCP_CLIENT_TIMEOUT", "60.0"))
MCP_TOOL_LISTING_TIMEOUT = float(os.getenv("LITELLM_MCP_TOOL_LISTING_TIMEOUT", "30.0"))
MCP_METADATA_TIMEOUT = float(os.getenv("LITELLM_MCP_METADATA_TIMEOUT", "10.0"))
MCP_HEALTH_CHECK_TIMEOUT = float(os.getenv("LITELLM_MCP_HEALTH_CHECK_TIMEOUT", "10.0"))

# Allowlist of commands permitted for MCP stdio transport.
# Prevents arbitrary command execution via /mcp-rest/test/* endpoints or server creation.
# Note: allowlisted runtimes can still execute code via args (e.g. python -c "...").
# This is an accepted residual risk since these endpoints require PROXY_ADMIN.
# Extend via LITELLM_MCP_STDIO_EXTRA_COMMANDS env var (comma-separated).
_MCP_STDIO_EXTRA_COMMANDS = os.getenv("LITELLM_MCP_STDIO_EXTRA_COMMANDS", "")
MCP_STDIO_ALLOWED_COMMANDS: frozenset = frozenset(
    {"npx", "uvx", "python", "python3", "node", "docker", "deno"}
    | (set(_MCP_STDIO_EXTRA_COMMANDS.split(",")) - {""})
)

# MCP OAuth2 Token Exchange (OBO) Defaults
MCP_TOKEN_EXCHANGE_CACHE_MAX_SIZE = int(
    os.getenv("MCP_TOKEN_EXCHANGE_CACHE_MAX_SIZE", "500")
)

LITELLM_UI_ALLOW_HEADERS = [
    "x-litellm-semantic-filter",
    "x-litellm-semantic-filter-tools",
    "x-litellm-adaptive-router-model",
]

# Gemini model-specific minimal thinking budget constants
DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_FLASH = int(
    os.getenv("DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_FLASH", 1)
)
DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_PRO = int(
    os.getenv("DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_PRO", 128)
)
DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_FLASH_LITE = int(
    os.getenv(
        "DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET_GEMINI_2_5_FLASH_LITE", 512
    )
)

# Maximum number of callbacks that can be registered
# This prevents callbacks from exponentially growing and consuming CPU resources
# Override with LITELLM_MAX_CALLBACKS env var for large deployments (e.g., many teams with guardrails)
MAX_CALLBACKS = get_env_int("LITELLM_MAX_CALLBACKS", 100)

# Metadata key recording which pre_call guardrails the proxy loop already ran,
# so the deployment-level hook does not re-run them for the same request
PRE_CALL_EXECUTED_GUARDRAILS_KEY = "_pre_call_executed_guardrails"

# Generic fallback for unknown models
DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET = int(
    os.getenv("DEFAULT_REASONING_EFFORT_MINIMAL_THINKING_BUDGET", 128)
)

# Provider-specific API base URLs
XAI_API_BASE = "https://api.x.ai/v1"

DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET = int(
    os.getenv("DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET", 1024)
)
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET = int(
    os.getenv("DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET", 2048)
)
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET = int(
    os.getenv("DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET", 4096)
)
DEFAULT_REASONING_EFFORT_XHIGH_THINKING_BUDGET = int(
    os.getenv("DEFAULT_REASONING_EFFORT_XHIGH_THINKING_BUDGET", 8192)
)
DEFAULT_REASONING_EFFORT_MAX_THINKING_BUDGET = int(
    os.getenv("DEFAULT_REASONING_EFFORT_MAX_THINKING_BUDGET", 16384)
)
MAX_TOKEN_TRIMMING_ATTEMPTS = int(
    os.getenv("MAX_TOKEN_TRIMMING_ATTEMPTS", 10)
)  # Maximum number of attempts to trim the message

RUNWAYML_DEFAULT_API_VERSION = str(
    os.getenv("RUNWAYML_DEFAULT_API_VERSION", "2024-11-06")
)
RUNWAYML_POLLING_TIMEOUT = int(
    os.getenv("RUNWAYML_POLLING_TIMEOUT", 600)
)  # 10 minutes default for image generation

########## Networking constants ##############################################################
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600  # 1 hour, re-use the same httpx client for 1 hour

# Aiohttp connection pooling - prevents memory leaks from unbounded connection growth
# Set to 0 for unlimited (not recommended for production)
AIOHTTP_CONNECTOR_LIMIT = int(os.getenv("AIOHTTP_CONNECTOR_LIMIT", 1000))
AIOHTTP_CONNECTOR_LIMIT_PER_HOST = int(
    os.getenv("AIOHTTP_CONNECTOR_LIMIT_PER_HOST", 500)
)
AIOHTTP_KEEPALIVE_TIMEOUT = int(os.getenv("AIOHTTP_KEEPALIVE_TIMEOUT", 120))
AIOHTTP_TTL_DNS_CACHE = int(os.getenv("AIOHTTP_TTL_DNS_CACHE", 300))
# TCP keep-alive (SO_KEEPALIVE) — opt-in. Required when running behind NAT/LBs
# whose idle timeout is shorter than provider response timeouts (e.g. AWS NAT
# Gateway: 350s vs OpenAI/Azure: 600s). Without this, the kernel sends nothing
# during a long provider call and the NAT reaps the flow before the response
# arrives. Enabling SO_KEEPALIVE makes the kernel emit TCP probes that reset
# the NAT idle timer.
AIOHTTP_SO_KEEPALIVE = os.getenv("AIOHTTP_SO_KEEPALIVE", "False").lower() == "true"
AIOHTTP_TCP_KEEPIDLE = int(os.getenv("AIOHTTP_TCP_KEEPIDLE", 60))
AIOHTTP_TCP_KEEPINTVL = int(os.getenv("AIOHTTP_TCP_KEEPINTVL", 30))
AIOHTTP_TCP_KEEPCNT = int(os.getenv("AIOHTTP_TCP_KEEPCNT", 5))
# enable_cleanup_closed is only needed for Python versions with the SSL leak bug
# Fixed in Python 3.12.7+ and 3.13.1+ (see https://github.com/python/cpython/pull/118960)
# Reference: https://github.com/aio-libs/aiohttp/blob/master/aiohttp/connector.py#L74-L78
AIOHTTP_NEEDS_CLEANUP_CLOSED = (3, 13, 0) <= sys.version_info < (
    3,
    13,
    1,
) or sys.version_info < (3, 12, 7)

# WebSocket constants
# Default to None (unlimited) to match OpenAI's official agents SDK behavior
# https://github.com/openai/openai-agents-python/blob/cf1b933660e44fd37b4350c41febab8221801409/src/agents/realtime/openai_realtime.py#L235
_max_size_env = os.getenv("REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES")
REALTIME_WEBSOCKET_MAX_MESSAGE_SIZE_BYTES = (
    int(_max_size_env) if _max_size_env is not None else None
)

# SSL/TLS cipher configuration for faster handshakes
# Strategy: Strongly prefer fast modern ciphers, but allow fallback to commonly supported ones
# This balances performance with broad compatibility
DEFAULT_SSL_CIPHERS = os.getenv(
    "LITELLM_SSL_CIPHERS",
    # Priority 1: TLS 1.3 ciphers (fastest, ~50ms handshake)
    "TLS_AES_256_GCM_SHA384:"  # Fastest observed in testing
    "TLS_AES_128_GCM_SHA256:"  # Slightly faster than 256-bit
    "TLS_CHACHA20_POLY1305_SHA256:"  # Fast on ARM/mobile
    # Priority 2: TLS 1.2 ECDHE+GCM (fast, ~100ms handshake, widely supported)
    "ECDHE-RSA-AES256-GCM-SHA384:"
    "ECDHE-RSA-AES128-GCM-SHA256:"
    "ECDHE-ECDSA-AES256-GCM-SHA384:"
    "ECDHE-ECDSA-AES128-GCM-SHA256:"
    # Priority 3: Additional modern ciphers (good balance)
    "ECDHE-RSA-CHACHA20-POLY1305:" "ECDHE-ECDSA-CHACHA20-POLY1305:"
    # Priority 4: Widely compatible fallbacks (slower but universally supported)
    "ECDHE-RSA-AES256-SHA384:"  # Common fallback
    "ECDHE-RSA-AES128-SHA256:"  # Very widely supported
    "AES256-GCM-SHA384:"  # Non-PFS fallback (compatibility)
    "AES128-GCM-SHA256",  # Last resort (maximum compatibility)
)

########### v2 Architecture constants for managing writing updates to the database ###########
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer"
REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_team_spend_update_buffer"
REDIS_DAILY_ORG_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_org_spend_update_buffer"
REDIS_DAILY_END_USER_SPEND_UPDATE_BUFFER_KEY = (
    "litellm_daily_end_user_spend_update_buffer"
)
REDIS_DAILY_AGENT_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_agent_spend_update_buffer"
REDIS_DAILY_TAG_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_tag_spend_update_buffer"
MAX_REDIS_BUFFER_DEQUEUE_COUNT = int(os.getenv("MAX_REDIS_BUFFER_DEQUEUE_COUNT", 100))
# Bounds asyncio.Queue() instances (log queues, spend update queues, etc.) to prevent unbounded memory growth
LITELLM_ASYNCIO_QUEUE_MAXSIZE = int(os.getenv("LITELLM_ASYNCIO_QUEUE_MAXSIZE", 1000))
TOOL_POLICY_CACHE_TTL_SECONDS = int(os.getenv("TOOL_POLICY_CACHE_TTL_SECONDS", 60))
# Aggregation threshold: default to 80% of the asyncio queue maxsize so the check can always trigger.
# Must be < LITELLM_ASYNCIO_QUEUE_MAXSIZE; if set higher the aggregation logic will never fire.
MAX_SIZE_IN_MEMORY_QUEUE = int(
    os.getenv("MAX_SIZE_IN_MEMORY_QUEUE", int(LITELLM_ASYNCIO_QUEUE_MAXSIZE * 0.8))
)
MAX_IN_MEMORY_QUEUE_FLUSH_COUNT = int(
    os.getenv("MAX_IN_MEMORY_QUEUE_FLUSH_COUNT", 1000)
)
###############################################################################################
MINIMUM_PROMPT_CACHE_TOKEN_COUNT = int(
    os.getenv("MINIMUM_PROMPT_CACHE_TOKEN_COUNT", 1024)
)  # minimum number of tokens to cache a prompt by Anthropic
DEFAULT_TRIM_RATIO = float(
    os.getenv("DEFAULT_TRIM_RATIO", 0.75)
)  # default ratio of tokens to trim from the end of a prompt
HOURS_IN_A_DAY = int(os.getenv("HOURS_IN_A_DAY", 24))
DAYS_IN_A_WEEK = int(os.getenv("DAYS_IN_A_WEEK", 7))
DAYS_IN_A_MONTH = int(os.getenv("DAYS_IN_A_MONTH", 28))
DAYS_IN_A_YEAR = int(os.getenv("DAYS_IN_A_YEAR", 365))
REPLICATE_MODEL_NAME_WITH_ID_LENGTH = int(
    os.getenv("REPLICATE_MODEL_NAME_WITH_ID_LENGTH", 64)
)
#### TOKEN COUNTING ####
FUNCTION_DEFINITION_TOKEN_COUNT = int(os.getenv("FUNCTION_DEFINITION_TOKEN_COUNT", 9))
SYSTEM_MESSAGE_TOKEN_COUNT = int(os.getenv("SYSTEM_MESSAGE_TOKEN_COUNT", 4))
TOOL_CHOICE_OBJECT_TOKEN_COUNT = int(os.getenv("TOOL_CHOICE_OBJECT_TOKEN_COUNT", 4))
DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT = int(
    os.getenv("DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT", 10)
)
DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT = int(
    os.getenv("DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT", 20)
)
MAX_SHORT_SIDE_FOR_IMAGE_HIGH_RES = int(
    os.getenv("MAX_SHORT_SIDE_FOR_IMAGE_HIGH_RES", 768)
)
MAX_LONG_SIDE_FOR_IMAGE_HIGH_RES = int(
    os.getenv("MAX_LONG_SIDE_FOR_IMAGE_HIGH_RES", 2000)
)
MAX_TILE_WIDTH = int(os.getenv("MAX_TILE_WIDTH", 512))
MAX_TILE_HEIGHT = int(os.getenv("MAX_TILE_HEIGHT", 512))
OPENAI_FILE_SEARCH_COST_PER_1K_CALLS = float(
    os.getenv("OPENAI_FILE_SEARCH_COST_PER_1K_CALLS", 2.5 / 1000)
)
# Azure OpenAI Assistants feature costs
# Source: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
AZURE_FILE_SEARCH_COST_PER_GB_PER_DAY = float(
    os.getenv("AZURE_FILE_SEARCH_COST_PER_GB_PER_DAY", 0.1)  # $0.1 USD per 1 GB/Day
)
AZURE_COMPUTER_USE_INPUT_COST_PER_1K_TOKENS = float(
    os.getenv(
        "AZURE_COMPUTER_USE_INPUT_COST_PER_1K_TOKENS", 3.0
    )  # $0.003 USD per 1K Tokens
)
AZURE_COMPUTER_USE_OUTPUT_COST_PER_1K_TOKENS = float(
    os.getenv(
        "AZURE_COMPUTER_USE_OUTPUT_COST_PER_1K_TOKENS", 12.0
    )  # $0.012 USD per 1K Tokens
)
AZURE_VECTOR_STORE_COST_PER_GB_PER_DAY = float(
    os.getenv(
        "AZURE_VECTOR_STORE_COST_PER_GB_PER_DAY", 0.1
    )  # $0.1 USD per 1 GB/Day (same as file search)
)
MIN_NON_ZERO_TEMPERATURE = float(os.getenv("MIN_NON_ZERO_TEMPERATURE", 0.0001))
#### RELIABILITY ####
REPEATED_STREAMING_CHUNK_LIMIT = int(
    os.getenv("REPEATED_STREAMING_CHUNK_LIMIT", 100)
)  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
# Shared maxsize for functools.lru_cache usage across hot paths.
# Defaulted to 64 to avoid cache thrash in multi-model production workloads.
DEFAULT_MAX_LRU_CACHE_SIZE = int(os.getenv("DEFAULT_MAX_LRU_CACHE_SIZE", 64))
_REALTIME_BODY_CACHE_SIZE = 1000  # Keep realtime helper caches bounded; workloads rarely exceed 1k models/intents
INITIAL_RETRY_DELAY = float(os.getenv("INITIAL_RETRY_DELAY", 0.5))
MAX_RETRY_DELAY = float(os.getenv("MAX_RETRY_DELAY", 8.0))
JITTER = float(os.getenv("JITTER", 0.75))
DEFAULT_IN_MEMORY_TTL = int(
    os.getenv("DEFAULT_IN_MEMORY_TTL", 5)
)  # default time to live for the in-memory cache
DEFAULT_MAX_REDIS_BATCH_CACHE_SIZE = int(
    os.getenv("DEFAULT_MAX_REDIS_BATCH_CACHE_SIZE", 1000)
)  # default max size for redis batch cache
DEFAULT_POLLING_INTERVAL = float(
    os.getenv("DEFAULT_POLLING_INTERVAL", 0.03)
)  # default polling interval for the scheduler
AZURE_OPERATION_POLLING_TIMEOUT = int(os.getenv("AZURE_OPERATION_POLLING_TIMEOUT", 120))
AZURE_DOCUMENT_INTELLIGENCE_API_VERSION = str(
    os.getenv("AZURE_DOCUMENT_INTELLIGENCE_API_VERSION", "2024-11-30")
)
AZURE_DOCUMENT_INTELLIGENCE_DEFAULT_DPI = int(
    os.getenv("AZURE_DOCUMENT_INTELLIGENCE_DEFAULT_DPI", 96)
)
REDIS_SOCKET_TIMEOUT = float(os.getenv("REDIS_SOCKET_TIMEOUT", 0.1))
REDIS_CONNECTION_POOL_TIMEOUT = int(os.getenv("REDIS_CONNECTION_POOL_TIMEOUT", 5))
REDIS_CIRCUIT_BREAKER_FAILURE_THRESHOLD = int(
    os.getenv("REDIS_CIRCUIT_BREAKER_FAILURE_THRESHOLD", 5)
)
REDIS_CIRCUIT_BREAKER_RECOVERY_TIMEOUT = int(
    os.getenv("REDIS_CIRCUIT_BREAKER_RECOVERY_TIMEOUT", 60)
)
REDIS_CIRCUIT_BREAKER_ENABLED = (
    os.getenv("REDIS_CIRCUIT_BREAKER_ENABLED", "true").lower() == "true"
)
# Default Redis major version to assume when version cannot be determined
# Using 7 as it's the modern version that supports LPOP with count parameter
DEFAULT_REDIS_MAJOR_VERSION = int(os.getenv("DEFAULT_REDIS_MAJOR_VERSION", 7))
NON_LLM_CONNECTION_TIMEOUT = int(
    os.getenv("NON_LLM_CONNECTION_TIMEOUT", 15)
)  # timeout for adjacent services (e.g. jwt auth)
MAX_EXCEPTION_MESSAGE_LENGTH = int(os.getenv("MAX_EXCEPTION_MESSAGE_LENGTH", 2000))
MAX_STRING_LENGTH_PROMPT_IN_DB = int(os.getenv("MAX_STRING_LENGTH_PROMPT_IN_DB", 2048))
BEDROCK_MAX_POLICY_SIZE = int(os.getenv("BEDROCK_MAX_POLICY_SIZE", 75))
BEDROCK_MIN_THINKING_BUDGET_TOKENS = int(
    os.getenv("BEDROCK_MIN_THINKING_BUDGET_TOKENS", 1024)
)
# Anthropic's Messages API rejects thinking.budget_tokens < 1024.
ANTHROPIC_MIN_THINKING_BUDGET_TOKENS = 1024
REPLICATE_POLLING_DELAY_SECONDS = float(
    os.getenv("REPLICATE_POLLING_DELAY_SECONDS", 0.5)
)
DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS = int(
    os.getenv("DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS", 4096)
)
DEFAULT_OCI_CHAT_MAX_TOKENS = 4096
TOGETHER_AI_4_B = int(os.getenv("TOGETHER_AI_4_B", 4))
TOGETHER_AI_8_B = int(os.getenv("TOGETHER_AI_8_B", 8))
TOGETHER_AI_21_B = int(os.getenv("TOGETHER_AI_21_B", 21))
TOGETHER_AI_41_B = int(os.getenv("TOGETHER_AI_41_B", 41))
TOGETHER_AI_80_B = int(os.getenv("TOGETHER_AI_80_B", 80))
TOGETHER_AI_110_B = int(os.getenv("TOGETHER_AI_110_B", 110))
TOGETHER_AI_EMBEDDING_150_M = int(os.getenv("TOGETHER_AI_EMBEDDING_150_M", 150))
TOGETHER_AI_EMBEDDING_350_M = int(os.getenv("TOGETHER_AI_EMBEDDING_350_M", 350))
QDRANT_SCALAR_QUANTILE = float(os.getenv("QDRANT_SCALAR_QUANTILE", 0.99))
QDRANT_VECTOR_SIZE = int(os.getenv("QDRANT_VECTOR_SIZE", 1536))
CACHED_STREAMING_CHUNK_DELAY = float(os.getenv("CACHED_STREAMING_CHUNK_DELAY", 0.02))
AUDIO_SPEECH_CHUNK_SIZE = int(
    os.getenv("AUDIO_SPEECH_CHUNK_SIZE", 8192)
)  # chunk_size for audio speech streaming. Balance between latency and memory usage
DEFAULT_MAX_TOKENS_FOR_TRITON = int(os.getenv("DEFAULT_MAX_TOKENS_FOR_TRITON", 2000))
#### Networking settings ####
# Sentinel used when `REQUEST_TIMEOUT` is unset: `litellm.request_timeout` keeps this
# value so longer-running surfaces (Router `timeout or litellm.request_timeout`,
# speech/TTS, responses, vector stores, etc.) get a long HTTP deadline. Chat
# `completion()` maps this sentinel down to 600s when the caller did not set a
# per-request/model timeout—see ``CompletionTimeout.resolve`` in completion_timeout.py. MCP uses
# dedicated timeouts (e.g. `MCP_CLIENT_TIMEOUT`), not `request_timeout`.
DEFAULT_REQUEST_TIMEOUT_SECONDS: float = 6000.0
# Pair used for default httpx clients when no custom timeout is passed: read/write
# deadline and connect handshake (see ``http_handler`` cached handler paths).
COMPLETION_HTTP_FALLBACK_SECONDS: float = 600.0
HTTP_HANDLER_CONNECT_TIMEOUT_SECONDS: float = 5.0
request_timeout: float = float(
    os.getenv("REQUEST_TIMEOUT", str(int(DEFAULT_REQUEST_TIMEOUT_SECONDS)))
)
DEFAULT_A2A_AGENT_TIMEOUT: float = float(
    os.getenv("DEFAULT_A2A_AGENT_TIMEOUT", 6000)
)  # 10 minutes
# Patterns that indicate a localhost/internal URL in A2A agent cards that should be
# replaced with the original base_url. This is a common misconfiguration where
# developers deploy agents with development URLs in their agent cards.
LOCALHOST_URL_PATTERNS: List[str] = [
    "localhost",
    "127.0.0.1",
    "0.0.0.0",
    "[::1]",  # IPv6 localhost
]
# Patterns in error messages that indicate a connection failure
CONNECTION_ERROR_PATTERNS: List[str] = [
    "connect",
    "connection",
    "network",
    "refused",
]
STREAM_SSE_DONE_STRING: str = "[DONE]"
STREAM_SSE_DATA_PREFIX: str = "data: "
### SPEND TRACKING ###
DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND = float(
    os.getenv("DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND", 0.001400)
)  # price per second for a100 80GB
FIREWORKS_AI_56_B_MOE = int(os.getenv("FIREWORKS_AI_56_B_MOE", 56))
FIREWORKS_AI_176_B_MOE = int(os.getenv("FIREWORKS_AI_176_B_MOE", 176))
FIREWORKS_AI_4_B = int(os.getenv("FIREWORKS_AI_4_B", 4))
FIREWORKS_AI_16_B = int(os.getenv("FIREWORKS_AI_16_B", 16))
FIREWORKS_AI_80_B = int(os.getenv("FIREWORKS_AI_80_B", 80))
#### Logging callback constants ####
REDACTED_BY_LITELM_STRING = "REDACTED_BY_LITELM"
MAX_LANGFUSE_INITIALIZED_CLIENTS = int(
    os.getenv("MAX_LANGFUSE_INITIALIZED_CLIENTS", 50)
)
LOGGING_WORKER_CONCURRENCY = int(
    os.getenv("LOGGING_WORKER_CONCURRENCY", 100)
)  # Must be above 0
LOGGING_WORKER_MAX_QUEUE_SIZE = int(os.getenv("LOGGING_WORKER_MAX_QUEUE_SIZE", 50_000))
LOGGING_WORKER_MAX_TIME_PER_COROUTINE = float(
    os.getenv("LOGGING_WORKER_MAX_TIME_PER_COROUTINE", 20.0)
)
LOGGING_WORKER_CLEAR_PERCENTAGE = int(
    os.getenv("LOGGING_WORKER_CLEAR_PERCENTAGE", 50)
)  # Percentage of queue to clear (default: 50%)
MAX_ITERATIONS_TO_CLEAR_QUEUE = int(os.getenv("MAX_ITERATIONS_TO_CLEAR_QUEUE", 200))
MAX_TIME_TO_CLEAR_QUEUE = float(os.getenv("MAX_TIME_TO_CLEAR_QUEUE", 5.0))
LOGGING_WORKER_AGGRESSIVE_CLEAR_COOLDOWN_SECONDS = float(
    os.getenv("LOGGING_WORKER_AGGRESSIVE_CLEAR_COOLDOWN_SECONDS", 0.5)
)  # Cooldown time in seconds before allowing another aggressive clear (default: 0.5s)
DD_TRACER_STREAMING_CHUNK_YIELD_RESOURCE = os.getenv(
    "DD_TRACER_STREAMING_CHUNK_YIELD_RESOURCE", "streaming.chunk.yield"
)

LITELLM_HTTP_STATUS_CLIENT_DISCONNECTED = 499

EMAIL_BUDGET_ALERT_TTL = int(
    os.getenv("EMAIL_BUDGET_ALERT_TTL", 24 * 60 * 60)
)  # 24 hours in seconds
EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE = float(
    os.getenv("EMAIL_BUDGET_ALERT_MAX_SPEND_ALERT_PERCENTAGE", 0.8)
)  # 80% of max budget
############### LLM Provider Constants ###############
### ANTHROPIC CONSTANTS ###
ANTHROPIC_TOKEN_COUNTING_BETA_VERSION = os.getenv(
    "ANTHROPIC_TOKEN_COUNTING_BETA_VERSION", "token-counting-2024-11-01"
)
ANTHROPIC_SKILLS_API_BETA_VERSION = "skills-2025-10-02"
ANTHROPIC_WEB_SEARCH_TOOL_MAX_USES = {
    "low": 1,
    "medium": 5,
    "high": 10,
}

# LiteLLM standard web search tool name
# Used for web search interception across providers
LITELLM_WEB_SEARCH_TOOL_NAME = "litellm_web_search"

DEFAULT_IMAGE_ENDPOINT_MODEL = "dall-e-2"
DEFAULT_VIDEO_ENDPOINT_MODEL = "sora-2"

DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS = int(
    os.getenv("DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS", 8)
)

### DATAFORSEO CONSTANTS ###
DEFAULT_DATAFORSEO_LOCATION_CODE = int(
    os.getenv("DEFAULT_DATAFORSEO_LOCATION_CODE", 2250)
)  # Default to France (2250) - lower number, commonly used location

LITELLM_CHAT_PROVIDERS = [
    "openai",
    "openai_like",
    "bytez",
    "xai",
    "custom_openai",
    "text-completion-openai",
    "cohere",
    "cohere_chat",
    "clarifai",
    "anthropic",
    "anthropic_text",
    "replicate",
    "huggingface",
    "together_ai",
    "datarobot",
    "helicone",
    "openrouter",
    "cometapi",
    "vertex_ai",
    "vertex_ai_beta",
    "gemini",
    "ai21",
    "baseten",
    "azure",
    "azure_text",
    "azure_ai",
    "sagemaker",
    "sagemaker_chat",
    "sagemaker_nova",
    "bedrock",
    "vllm",
    "nlp_cloud",
    "petals",
    "oobabooga",
    "ollama",
    "ollama_chat",
    "deepinfra",
    "perplexity",
    "mistral",
    "groq",
    "gigachat",
    "nvidia_nim",
    "cerebras",
    "baseten",
    "ai21_chat",
    "volcengine",
    "codestral",
    "text-completion-codestral",
    "text-completion-inception",
    "deepseek",
    "sambanova",
    "maritalk",
    "cloudflare",
    "fireworks_ai",
    "friendliai",
    "watsonx",
    "watsonx_text",
    "triton",
    "predibase",
    "databricks",
    "empower",
    "github",
    "custom",
    "litellm_proxy",
    "hosted_vllm",
    "llamafile",
    "lm_studio",
    "galadriel",
    "gradient_ai",
    "github_copilot",  # GitHub Copilot Chat API
    "chatgpt",  # ChatGPT subscription API
    "novita",
    "meta_llama",
    "featherless_ai",
    "nscale",
    "nebius",
    "dashscope",
    "modelscope",
    "moonshot",
    "publicai",
    "v0",
    "heroku",
    "oci",
    "morph",
    "lambda_ai",
    "inception",
    "vercel_ai_gateway",
    "wandb",
    "ovhcloud",
    "lemonade",
    "docker_model_runner",
    "amazon_nova",
]

LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS = [
    "openai",
    "azure",
    "hosted_vllm",
    "nebius",
]


OPENAI_CHAT_COMPLETION_PARAMS = [
    "functions",
    "function_call",
    "temperature",
    "temperature",
    "top_p",
    "n",
    "stream",
    "stream_options",
    "stop",
    "max_completion_tokens",
    "modalities",
    "prediction",
    "audio",
    "max_tokens",
    "presence_penalty",
    "frequency_penalty",
    "logit_bias",
    "user",
    "request_timeout",
    "api_base",
    "api_version",
    "api_key",
    "deployment_id",
    "organization",
    "base_url",
    "default_headers",
    "timeout",
    "response_format",
    "seed",
    "tools",
    "tool_choice",
    "max_retries",
    "parallel_tool_calls",
    "logprobs",
    "top_logprobs",
    "reasoning_effort",
    "extra_headers",
    "thinking",
    "web_search_options",
    "include_server_side_tool_invocations",
    "service_tier",
    "prompt_cache_key",
    "prompt_cache_retention",
    "safety_identifier",
    "verbosity",
    "store",
]

OPENAI_TRANSCRIPTION_PARAMS = [
    "language",
    "response_format",
    "timestamp_granularities",
]

OPENAI_EMBEDDING_PARAMS = ["dimensions", "encoding_format", "user"]

DEFAULT_EMBEDDING_PARAM_VALUES = {
    **{k: None for k in OPENAI_EMBEDDING_PARAMS},
    "model": None,
    "custom_llm_provider": "",
    "input": None,
}

DEFAULT_CHAT_COMPLETION_PARAM_VALUES = {
    "functions": None,
    "function_call": None,
    "temperature": None,
    "top_p": None,
    "n": None,
    "stream": None,
    "stream_options": None,
    "stop": None,
    "max_tokens": None,
    "max_completion_tokens": None,
    "modalities": None,
    "prediction": None,
    "audio": None,
    "presence_penalty": None,
    "frequency_penalty": None,
    "logit_bias": None,
    "user": None,
    "model": None,
    "custom_llm_provider": "",
    "response_format": None,
    "seed": None,
    "tools": None,
    "tool_choice": None,
    "max_retries": None,
    "logprobs": None,
    "top_logprobs": None,
    "extra_headers": None,
    "api_version": None,
    "parallel_tool_calls": None,
    "drop_params": None,
    "allowed_openai_params": None,
    "additional_drop_params": None,
    "messages": None,
    "reasoning_effort": None,
    "verbosity": None,
    "thinking": None,
    "web_search_options": None,
    "include_server_side_tool_invocations": None,
    "service_tier": None,
    "safety_identifier": None,
    "prompt_cache_key": None,
    "prompt_cache_retention": None,
    "store": None,
    "metadata": None,
    "context_management": None,
}

openai_compatible_endpoints: List = [
    "api.perplexity.ai",
    "api.endpoints.anyscale.com/v1",
    "api.deepinfra.com/v1/openai",
    "api.mistral.ai/v1",
    "codestral.mistral.ai/v1/chat/completions",
    "codestral.mistral.ai/v1/fim/completions",
    "api.groq.com/openai/v1",
    "https://integrate.api.nvidia.com/v1",
    "api.deepseek.com/v1",
    "api.together.xyz/v1",
    "app.empower.dev/api/v1",
    "https://api.friendli.ai/serverless/v1",
    "api.sambanova.ai/v1",
    "api.x.ai/v1",
    "ollama.com",
    "api.galadriel.ai/v1",
    "api.llama.com/compat/v1/",
    "api.featherless.ai/v1",
    "inference.api.nscale.com/v1",
    "api.studio.nebius.ai/v1",
    "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
    "https://api-inference.modelscope.cn/v1",
    "https://api.moonshot.ai/v1",
    "https://api.publicai.co/v1",
    "https://api.synthetic.new/openai/v1",
    "https://serverless.tensormesh.ai/v1",
    "https://api.stima.tech/v1",
    "https://nano-gpt.com/api/v1",
    "https://api.poe.com/v1",
    "https://llm.chutes.ai/v1/",
    "https://api.v0.dev/v1",
    "https://api.morphllm.com/v1",
    "https://api.lambda.ai/v1",
    "https://api.inceptionlabs.ai/v1",
    "https://api.hyperbolic.xyz/v1",
    "https://ai-gateway.helicone.ai/",
    "https://ai-gateway.vercel.sh/v1",
    "https://api.inference.wandb.ai/v1",
    "https://api.clarifai.com/v2/ext/openai/v1",
    "https://api.libertai.io/v1",
    "https://pinstripes.io/v1",
]


openai_compatible_providers: List = [
    "anyscale",
    "groq",
    "nvidia_nim",
    "cerebras",
    "baseten",
    "sambanova",
    "ai21_chat",
    "ai21",
    "volcengine",
    "codestral",
    "deepseek",
    "deepinfra",
    "perplexity",
    "xinference",
    "xai",
    "zai",
    "together_ai",
    "fireworks_ai",
    "empower",
    "friendliai",
    "azure_ai",
    "github",
    "litellm_proxy",
    "hosted_vllm",
    "llamafile",
    "lm_studio",
    "galadriel",
    "github_copilot",  # GitHub Copilot Chat API
    "chatgpt",  # ChatGPT subscription API
    "novita",
    "meta_llama",
    "publicai",  # PublicAI - JSON-configured provider
    "synthetic",  # Synthetic - JSON-configured provider
    "tensormesh",  # Tensormesh - JSON-configured provider
    "apertis",  # Apertis - JSON-configured provider
    "nano-gpt",  # Nano-GPT - JSON-configured provider
    "poe",  # Poe - JSON-configured provider
    "chutes",  # Chutes - JSON-configured provider
    "parasail",  # Parasail - JSON-configured provider
    "libertai",  # LibertAI - JSON-configured provider
    "featherless_ai",
    "nscale",
    "nebius",
    "dashscope",
    "modelscope",
    "moonshot",
    "v0",
    "helicone",
    "morph",
    "lambda_ai",
    "inception",
    "hyperbolic",
    "vercel_ai_gateway",
    "aiml",
    "wandb",
    "cometapi",
    "clarifai",
    "docker_model_runner",
    "ragflow",
    "pinstripes",  # Pinstripes - JSON-configured provider
]
openai_text_completion_compatible_providers: List = (
    [  # providers that support `/v1/completions`
        "together_ai",
        "fireworks_ai",
        "hosted_vllm",
        "meta_llama",
        "llamafile",
        "featherless_ai",
        "nebius",
        "dashscope",
        "modelscope",
        "moonshot",
        "publicai",
        "synthetic",
        "tensormesh",
        "apertis",
        "nano-gpt",
        "poe",
        "chutes",
        "v0",
        "lambda_ai",
        "hyperbolic",
        "wandb",
    ]
)
_openai_like_providers: List = [
    "predibase",
    "databricks",
    "lemonade",
    "watsonx",
]  # private helper. similar to openai but require some custom auth / endpoint handling, so can't use the openai sdk
# well supported replicate llms
replicate_models: set = set(
    [
        # llama replicate supported LLMs
        "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
        "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
        "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
        # Vicuna
        "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
        "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
        # Flan T-5
        "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f",
        # Others
        "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
        "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
    ]
)

clarifai_models: set = set(
    [
        "clarifai/openai.chat-completion.gpt-oss-20b",
        "clarifai/qwen.qwenLM.Qwen3-30B-A3B-Instruct-2507",
        "clarifai/qwen.qwen3.qwen3-next-80B-A3B-Thinking",
        "clarifai/openai.chat-completion.gpt-oss-120b",
        "clarifai/qwen.qwenLM.Qwen3-30B-A3B-Thinking-2507"
        "clarifai/openai.chat-completion.gpt-5-nano",
        "clarifai/openai.chat-completion.gpt-4o",
        "clarifai/gcp.generate.gemini-2_5-pro",
        "clarifai/anthropic.completion.claude-sonnet-4",
        "clarifai/xai.chat-completion.grok-2-vision-1212",
        "clarifai/openbmb.miniCPM.MiniCPM-o-2_6-language",
        "clarifai/microsoft.text-generation.Phi-4-reasoning-plus",
        "clarifai/openbmb.miniCPM.MiniCPM3-4B",
        "clarifai/openbmb.miniCPM.MiniCPM4-8B",
        "clarifai/xai.chat-completion.grok-2-1212",
        "clarifai/anthropic.completion.claude-opus-4",
        "clarifai/xai.chat-completion.grok-code-fast-1",
        "clarifai/qwen.qwenCoder.Qwen3-Coder-30B-A3B-Instruct",
        "clarifai/deepseek-ai.deepseek-chat.DeepSeek-R1-0528-Qwen3-8B",
        "clarifai/openai.chat-completion.gpt-5-mini",
        "clarifai/microsoft.text-generation.phi-4",
        "clarifai/openai.chat-completion.gpt-5",
        "clarifai/meta.Llama-3.Llama-3_2-3B-Instruct",
        "clarifai/xai.image-generation.grok-2-image-1212",
        "clarifai/xai.chat-completion.grok-3",
        "clarifai/openai.chat-completion.o3",
        "clarifai/qwen.qwen-VL.Qwen2_5-VL-7B-Instruct",
        "clarifai/qwen.qwenLM.Qwen3-14B",
        "clarifai/qwen.qwenLM.QwQ-32B-AWQ",
        "clarifai/anthropic.completion.claude-3_5-haiku",
        "clarifai/anthropic.completion.claude-3_7-sonnet",
    ]
)


huggingface_models: set = set(
    [
        "meta-llama/Llama-2-7b-hf",
        "meta-llama/Llama-2-7b-chat-hf",
        "meta-llama/Llama-2-13b-hf",
        "meta-llama/Llama-2-13b-chat-hf",
        "meta-llama/Llama-2-70b-hf",
        "meta-llama/Llama-2-70b-chat-hf",
        "meta-llama/Llama-2-7b",
        "meta-llama/Llama-2-7b-chat",
        "meta-llama/Llama-2-13b",
        "meta-llama/Llama-2-13b-chat",
        "meta-llama/Llama-2-70b",
        "meta-llama/Llama-2-70b-chat",
    ]
)  # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
empower_models = set(
    [
        "empower/empower-functions",
        "empower/empower-functions-small",
    ]
)

together_ai_models: set = set(
    [
        # llama llms - chat
        "togethercomputer/llama-2-70b-chat",
        # llama llms - language / instruct
        "togethercomputer/llama-2-70b",
        "togethercomputer/LLaMA-2-7B-32K",
        "togethercomputer/Llama-2-7B-32K-Instruct",
        "togethercomputer/llama-2-7b",
        # falcon llms
        "togethercomputer/falcon-40b-instruct",
        "togethercomputer/falcon-7b-instruct",
        # alpaca
        "togethercomputer/alpaca-7b",
        # chat llms
        "HuggingFaceH4/starchat-alpha",
        # code llms
        "togethercomputer/CodeLlama-34b",
        "togethercomputer/CodeLlama-34b-Instruct",
        "togethercomputer/CodeLlama-34b-Python",
        "defog/sqlcoder",
        "NumbersStation/nsql-llama-2-7B",
        "WizardLM/WizardCoder-15B-V1.0",
        "WizardLM/WizardCoder-Python-34B-V1.0",
        # language llms
        "NousResearch/Nous-Hermes-Llama2-13b",
        "Austism/chronos-hermes-13b",
        "upstage/SOLAR-0-70b-16bit",
        "WizardLM/WizardLM-70B-V1.0",
    ]
)
# supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)


baseten_models: set = set(
    [
        "qvv0xeq",
        "q841o8w",
        "31dxrj3",
    ]
)  # FALCON 7B  # WizardLM  # Mosaic ML

featherless_ai_models: set = set(
    [
        "featherless-ai/Qwerky-72B",
        "featherless-ai/Qwerky-QwQ-32B",
        "Qwen/Qwen2.5-72B-Instruct",
        "all-hands/openhands-lm-32b-v0.1",
        "Qwen/Qwen2.5-Coder-32B-Instruct",
        "deepseek-ai/DeepSeek-V3-0324",
        "mistralai/Mistral-Small-24B-Instruct-2501",
        "mistralai/Mistral-Nemo-Instruct-2407",
        "ProdeusUnity/Stellar-Odyssey-12b-v0.0",
    ]
)

nebius_models: set = set(
    [
        # deepseek models
        "deepseek-ai/DeepSeek-R1-0528",
        "deepseek-ai/DeepSeek-V3-0324",
        "deepseek-ai/DeepSeek-V3",
        "deepseek-ai/DeepSeek-R1",
        "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
        # google models
        "google/gemma-2-2b-it",
        "google/gemma-2-9b-it-fast",
        # llama models
        "meta-llama/Llama-3.3-70B-Instruct",
        "meta-llama/Meta-Llama-3.1-70B-Instruct",
        "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "meta-llama/Meta-Llama-3.1-405B-Instruct",
        "NousResearch/Hermes-3-Llama-405B",
        # microsoft models
        "microsoft/phi-4",
        # mistral models
        "mistralai/Mistral-Nemo-Instruct-2407",
        "mistralai/Devstral-Small-2505",
        # moonshot models
        "moonshotai/Kimi-K2-Instruct",
        # nvidia models
        "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1",
        "nvidia/Llama-3_3-Nemotron-Super-49B-v1",
        # openai models
        "openai/gpt-oss-120b",
        "openai/gpt-oss-20b",
        # qwen models
        "Qwen/Qwen3-Coder-480B-A35B-Instruct",
        "Qwen/Qwen3-235B-A22B-Instruct-2507",
        "Qwen/Qwen3-235B-A22B",
        "Qwen/Qwen3-30B-A3B",
        "Qwen/Qwen3-32B",
        "Qwen/Qwen3-14B",
        "Qwen/Qwen3-4B-fast",
        "Qwen/Qwen2.5-Coder-7B",
        "Qwen/Qwen2.5-Coder-32B-Instruct",
        "Qwen/Qwen2.5-72B-Instruct",
        "Qwen/QwQ-32B",
        "Qwen/Qwen3-30B-A3B-Thinking-2507",
        "Qwen/Qwen3-30B-A3B-Instruct-2507",
        # zai models
        "zai-org/GLM-4.5",
        "zai-org/GLM-4.5-Air",
        # other models
        "aaditya/Llama3-OpenBioLLM-70B",
        "ProdeusUnity/Stellar-Odyssey-12b-v0.0",
        "all-hands/openhands-lm-32b-v0.1",
    ]
)

dashscope_models: set = set(
    [
        "qwen-turbo",
        "qwen-plus",
        "qwen-max",
        "qwen-turbo-latest",
        "qwen-plus-latest",
        "qwen-max-latest",
        "qwq-32b",
        "qwen3-235b-a22b",
        "qwen3-32b",
        "qwen3-30b-a3b",
    ]
)

nebius_embedding_models: set = set(
    [
        "BAAI/bge-en-icl",
        "BAAI/bge-multilingual-gemma2",
        "intfloat/e5-mistral-7b-instruct",
    ]
)

WANDB_MODELS: set = set(
    [
        # openai models
        "openai/gpt-oss-120b",
        "openai/gpt-oss-20b",
        # zai-org models
        "zai-org/GLM-4.5",
        # Qwen models
        "Qwen/Qwen3-235B-A22B-Instruct-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct",
        "Qwen/Qwen3-235B-A22B-Thinking-2507",
        # moonshotai
        "moonshotai/Kimi-K2-Instruct",
        "moonshotai/Kimi-K2.5",
        # MiniMaxAI
        "MiniMaxAI/MiniMax-M2.5",
        # meta models
        "meta-llama/Llama-3.1-8B-Instruct",
        "meta-llama/Llama-3.3-70B-Instruct",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        # deepseek-ai
        "deepseek-ai/DeepSeek-V3.1",
        "deepseek-ai/DeepSeek-R1-0528",
        "deepseek-ai/DeepSeek-V3-0324",
        # microsoft
        "microsoft/Phi-4-mini-instruct",
    ]
)

modelscope_models: set = set(
    [
        # Qwen series models
        "Qwen/Qwen3-0.6B",
        "Qwen/Qwen3-1.7B",
        "Qwen/Qwen3-4B",
        "Qwen/Qwen3-8B",
        "Qwen/Qwen3-14B",
        "Qwen/Qwen3-30B-A3B",
        "Qwen/Qwen3-32B",
        "Qwen/Qwen3-235B-A22B",
        "Qwen/Qwen3-235B-A22B-Instruct-2507",
        "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-30B-A3B-Thinking-2507",
        "Qwen/Qwen3-Coder-30B-A3B-Instruct",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct",
        "Qwen/Qwen3-Next-80B-A3B-Instruct",
        "Qwen/Qwen3-Next-80B-A3B-Thinking",
        "Qwen/Qwen3-VL-235B-A22B-Instruct",
        "Qwen/Qwen3-VL-8B-Instruct",
        "Qwen/Qwen3-VL-8B-Thinking",
        "Qwen/Qwen3.5-122B-A10B",
        "Qwen/Qwen3.5-27B",
        "Qwen/Qwen3.5-35B-A3B",
        "Qwen/Qwen3.5-397B-A17B",
        "Qwen/QwQ-32B",
        "Qwen/QwQ-32B-Preview",
        "Qwen/QVQ-72B-Preview",
        "Qwen/Qwen-Image-Edit",
        # DeepSeek series models
        "deepseek-ai/DeepSeek-R1-0528",
        "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
        "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
        "deepseek-ai/DeepSeek-V3.2",
        "deepseek-ai/DeepSeek-V4-Flash",
    ]
)

BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
    "cohere",
    "anthropic",
    "mistral",
    "amazon",
    "meta",
    "llama",
    "ai21",
    "nova",
    "deepseek_r1",
    "qwen3",
    "qwen2",
    "twelvelabs",
    "openai",
    "stability",
    "moonshot",
]

BEDROCK_EMBEDDING_PROVIDERS_LITERAL = Literal[
    "cohere",
    "amazon",
    "twelvelabs",
    "nova",
]

BEDROCK_CONVERSE_MODELS = [
    "qwen.qwen3-coder-480b-a35b-v1:0",
    "qwen.qwen3-coder-next",
    "qwen.qwen3-235b-a22b-2507-v1:0",
    "qwen.qwen3-coder-30b-a3b-v1:0",
    "qwen.qwen3-32b-v1:0",
    "deepseek.v3-v1:0",
    "deepseek.v3.2",
    "openai.gpt-oss-20b-1:0",
    "openai.gpt-oss-120b-1:0",
    "anthropic.claude-haiku-4-5-20251001-v1:0",
    "anthropic.claude-sonnet-4-5-20250929-v1:0",
    "anthropic.claude-fable-5",
    "anthropic.claude-opus-4-8",
    "anthropic.claude-opus-4-7",
    "anthropic.claude-opus-4-6-v1:0",
    "anthropic.claude-opus-4-6-v1",
    "anthropic.claude-sonnet-4-6",
    "anthropic.claude-opus-4-1-20250805-v1:0",
    "anthropic.claude-opus-4-20250514-v1:0",
    "anthropic.claude-sonnet-4-20250514-v1:0",
    "anthropic.claude-3-7-sonnet-20250219-v1:0",
    "anthropic.claude-3-5-haiku-20241022-v1:0",
    "anthropic.claude-3-5-sonnet-20241022-v2:0",
    "anthropic.claude-3-5-sonnet-20240620-v1:0",
    "anthropic.claude-3-opus-20240229-v1:0",
    "anthropic.claude-3-sonnet-20240229-v1:0",
    "anthropic.claude-3-haiku-20240307-v1:0",
    "anthropic.claude-v2",
    "anthropic.claude-v2:1",
    "anthropic.claude-v1",
    "anthropic.claude-instant-v1",
    "ai21.jamba-instruct-v1:0",
    "ai21.jamba-1-5-mini-v1:0",
    "ai21.jamba-1-5-large-v1:0",
    "meta.llama3-70b-instruct-v1:0",
    "meta.llama3-8b-instruct-v1:0",
    "meta.llama3-1-8b-instruct-v1:0",
    "meta.llama3-1-70b-instruct-v1:0",
    "meta.llama3-1-405b-instruct-v1:0",
    "meta.llama3-70b-instruct-v1:0",
    "mistral.mistral-large-2407-v1:0",
    "mistral.mistral-large-2402-v1:0",
    "mistral.mistral-small-2402-v1:0",
    "meta.llama3-2-1b-instruct-v1:0",
    "meta.llama3-2-3b-instruct-v1:0",
    "meta.llama3-2-11b-instruct-v1:0",
    "meta.llama3-2-90b-instruct-v1:0",
    "amazon.nova-lite-v1:0",
    "amazon.nova-2-lite-v1:0",
    "amazon.nova-2-pro-preview-20251202-v1:0",
    "amazon.nova-pro-v1:0",
    "writer.palmyra-x4-v1:0",
    "writer.palmyra-x5-v1:0",
    "minimax.minimax-m2.1",
    "moonshotai.kimi-k2.5",
]


open_ai_embedding_models: set = set(["text-embedding-ada-002"])
cohere_embedding_models: set = set(
    [
        "embed-v4.0",
        "embed-english-v3.0",
        "embed-english-light-v3.0",
        "embed-multilingual-v3.0",
        "embed-english-v2.0",
        "embed-english-light-v2.0",
        "embed-multilingual-v2.0",
    ]
)
bedrock_embedding_models: set = set(
    [
        "amazon.titan-embed-text-v1",
        "amazon.nova-2-multimodal-embeddings-v1:0",
        "cohere.embed-english-v3",
        "cohere.embed-multilingual-v3",
        "cohere.embed-v4:0",
        "twelvelabs.marengo-embed-2-7-v1:0",
    ]
)

known_tokenizer_config = {
    "mistralai/Mistral-7B-Instruct-v0.1": {
        "tokenizer": {
            "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
            "bos_token": "<s>",
            "eos_token": "</s>",
        },
        "status": "success",
    },
    "meta-llama/Meta-Llama-3-8B-Instruct": {
        "tokenizer": {
            "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
            "bos_token": "<|begin_of_text|>",
            "eos_token": "",
        },
        "status": "success",
    },
    "deepseek-r1/deepseek-r1-7b-instruct": {
        "tokenizer": {
            "add_bos_token": True,
            "add_eos_token": False,
            "bos_token": {
                "__type": "AddedToken",
                "content": "<｜begin▁of▁sentence｜>",
                "lstrip": False,
                "normalized": True,
                "rstrip": False,
                "single_word": False,
            },
            "clean_up_tokenization_spaces": False,
            "eos_token": {
                "__type": "AddedToken",
                "content": "<｜end▁of▁sentence｜>",
                "lstrip": False,
                "normalized": True,
                "rstrip": False,
                "single_word": False,
            },
            "legacy": True,
            "model_max_length": 16384,
            "pad_token": {
                "__type": "AddedToken",
                "content": "<｜end▁of▁sentence｜>",
                "lstrip": False,
                "normalized": True,
                "rstrip": False,
                "single_word": False,
            },
            "sp_model_kwargs": {},
            "unk_token": None,
            "tokenizer_class": "LlamaTokenizerFast",
            "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
        },
        "status": "success",
    },
}


OPENAI_FINISH_REASONS = [
    "stop",
    "length",
    "function_call",
    "tool_calls",
    "content_filter",
]
HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = int(
    os.getenv("HUMANLOOP_PROMPT_CACHE_TTL_SECONDS", 60)
)  # 1 minute
RESPONSE_FORMAT_TOOL_NAME = "json_tool_call"  # default tool name used when converting response format to tool call

########################### Logging Callback Constants ###########################
AZURE_STORAGE_MSFT_VERSION = "2019-07-07"
PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES = int(
    os.getenv("PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES", 5)
)
CLOUDZERO_EXPORT_INTERVAL_MINUTES = int(
    os.getenv("CLOUDZERO_EXPORT_INTERVAL_MINUTES", 60)
)
MCP_TOOL_NAME_PREFIX = "mcp_tool"
MAXIMUM_TRACEBACK_LINES_TO_LOG = int(os.getenv("MAXIMUM_TRACEBACK_LINES_TO_LOG", 100))

# Headers to control callbacks
X_LITELLM_DISABLE_CALLBACKS = "x-litellm-disable-callbacks"
LITELLM_METADATA_FIELD = "litellm_metadata"
OLD_LITELLM_METADATA_FIELD = "metadata"
LITELLM_TRUNCATED_PAYLOAD_FIELD = "litellm_truncated"
LITELLM_TRUNCATION_DB_SAFEGUARD_NOTE = (
    "Truncation is a DB storage safeguard. "
    "Full, untruncated data is logged to logging callbacks (OTEL, Datadog, etc.). "
    "To increase the truncation limit, set `MAX_STRING_LENGTH_PROMPT_IN_DB` in your env."
)

########################### LiteLLM Proxy Specific Constants ###########################
########################################################################################

# Standard headers that are always checked for customer/end-user ID (no configuration required)
# These headers work out-of-the-box for tools like Claude Code that support custom headers
STANDARD_CUSTOMER_ID_HEADERS = [
    "x-litellm-customer-id",
    "x-litellm-end-user-id",
]
MAX_SPENDLOG_ROWS_TO_QUERY = int(
    os.getenv("MAX_SPENDLOG_ROWS_TO_QUERY", 1_000_000)
)  # if spendLogs has more than 1M rows, do not query the DB
DEFAULT_SOFT_BUDGET = float(
    os.getenv("DEFAULT_SOFT_BUDGET", 50.0)
)  # by default all litellm proxy keys have a soft budget of 50.0
# makes it clear this is a rate limit error for a litellm virtual key
RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash"

# Python garbage collection threshold configuration
# Format: "gen0,gen1,gen2" e.g., "1000,50,50"
PYTHON_GC_THRESHOLD = os.getenv("PYTHON_GC_THRESHOLD")

# pass through route constansts
BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES = [
    "agents/",
    "knowledgebases/",
    "flows/",
    "retrieveAndGenerate/",
    "rerank/",
    "generateQuery/",
    "optimize-prompt/",
]


# Headers that are safe to forward from incoming requests to Vertex AI
# Using an allowlist approach for security - only forward headers we explicitly trust
ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS = {
    "anthropic-beta",  # Required for Anthropic features like extended context windows
    "content-type",  # Required for request body parsing
}

# Prefix for headers that should be forwarded to the provider with the prefix stripped
# e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
# Works for all LLM pass-through endpoints (Vertex AI, Anthropic, Bedrock, etc.)
PASS_THROUGH_HEADER_PREFIX = "x-pass-"

BASE_MCP_ROUTE = "/mcp"

BATCH_STATUS_POLL_INTERVAL_SECONDS = int(
    os.getenv("BATCH_STATUS_POLL_INTERVAL_SECONDS", 3600)
)  # 1 hour
BATCH_STATUS_POLL_MAX_ATTEMPTS = int(
    os.getenv("BATCH_STATUS_POLL_MAX_ATTEMPTS", 24)
)  # for 24 hours

HEALTH_CHECK_TIMEOUT_SECONDS = int(
    os.getenv("HEALTH_CHECK_TIMEOUT_SECONDS", 60)
)  # 60 seconds
_background_health_check_max_tokens_env = os.getenv(
    "BACKGROUND_HEALTH_CHECK_MAX_TOKENS"
)
try:
    _raw_background_health_check_max_tokens = (
        _background_health_check_max_tokens_env.strip()
        if _background_health_check_max_tokens_env is not None
        else ""
    )
    BACKGROUND_HEALTH_CHECK_MAX_TOKENS: Optional[int] = (
        int(_raw_background_health_check_max_tokens)
        if _raw_background_health_check_max_tokens
        else None
    )
except (ValueError, TypeError):
    BACKGROUND_HEALTH_CHECK_MAX_TOKENS = None


_background_health_check_max_tokens_reasoning_env = os.getenv(
    "BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING"
)
try:
    _raw_background_health_check_max_tokens_reasoning = (
        _background_health_check_max_tokens_reasoning_env.strip()
        if _background_health_check_max_tokens_reasoning_env is not None
        else ""
    )
    BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING: Optional[int] = (
        int(_raw_background_health_check_max_tokens_reasoning)
        if _raw_background_health_check_max_tokens_reasoning
        else None
    )
except (ValueError, TypeError):
    BACKGROUND_HEALTH_CHECK_MAX_TOKENS_REASONING = None

LITTELM_INTERNAL_HEALTH_SERVICE_ACCOUNT_NAME = "litellm-internal-health-check"
LITTELM_CLI_SERVICE_ACCOUNT_NAME = "litellm-cli"
LITELLM_INTERNAL_JOBS_SERVICE_ACCOUNT_NAME = "litellm_internal_jobs"
# Stable identifier substituted in place of the master key on UserAPIKeyAuth
# objects so the master key (or its hash) never propagates to spend logs,
# Prometheus metrics, audit trails, or any other downstream consumer.
LITELLM_PROXY_MASTER_KEY_ALIAS = "litellm_proxy_master_key"

# Marker placed in ``model_call_details`` on a synthetic ``Logging`` object that
# records a proxy-gate error (auth/rate-limit rejection) for a request that never
# reached an upstream provider. Tracing callbacks key off it to avoid fabricating
# an LLM-call span for a call that did not happen. See
# ``ProxyLogging._handle_logging_proxy_only_error``.
LITELLM_LOGGING_NO_UPSTREAM_LLM_CALL = "litellm_no_upstream_llm_call"

# Key Rotation Constants
LITELLM_KEY_ROTATION_ENABLED = os.getenv("LITELLM_KEY_ROTATION_ENABLED", "false")
LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS = int(
    os.getenv("LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS", 86400)
)  # 24 hours default
LITELLM_KEY_ROTATION_GRACE_PERIOD: str = os.getenv(
    "LITELLM_KEY_ROTATION_GRACE_PERIOD", ""
)  # Duration to keep old key valid after rotation (e.g. "24h", "2d"); empty = immediate revoke (default)
LITELLM_KEY_ROTATION_LOCK_TTL_SECONDS = int(
    os.getenv("LITELLM_KEY_ROTATION_LOCK_TTL_SECONDS", 600)
)  # 10 minutes default — caps the deadlock window if a pod crashes mid-rotation
UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"
LITELLM_EXPIRED_UI_SESSION_KEY_CLEANUP_ENABLED = os.getenv(
    "LITELLM_EXPIRED_UI_SESSION_KEY_CLEANUP_ENABLED", "false"
)
LITELLM_EXPIRED_UI_SESSION_KEY_CLEANUP_INTERVAL_SECONDS = int(
    os.getenv("LITELLM_EXPIRED_UI_SESSION_KEY_CLEANUP_INTERVAL_SECONDS", 86400)
)  # 24 hours default
LITELLM_EXPIRED_UI_SESSION_KEY_CLEANUP_BATCH_SIZE = int(
    os.getenv("LITELLM_EXPIRED_UI_SESSION_KEY_CLEANUP_BATCH_SIZE", 1000)
)
LITELLM_PROXY_ADMIN_NAME = "default_user_id"

########################### CLI SSO AUTHENTICATION CONSTANTS ###########################
LITELLM_CLI_SOURCE_IDENTIFIER = "litellm-cli"
LITELLM_CLI_SESSION_TOKEN_PREFIX = "litellm-session-token"
CLI_SSO_SESSION_CACHE_KEY_PREFIX = "cli_sso_session"
CLI_SSO_SESSION_TTL_SECONDS = 600
CLI_JWT_TOKEN_NAME = "cli-jwt-token"
# Support both CLI_JWT_EXPIRATION_HOURS and LITELLM_CLI_JWT_EXPIRATION_HOURS for backwards compatibility
CLI_JWT_EXPIRATION_HOURS = int(
    os.getenv("CLI_JWT_EXPIRATION_HOURS")
    or os.getenv("LITELLM_CLI_JWT_EXPIRATION_HOURS")
    or 24
)
# Comma-separated allowlisted OIDC claim map for CLI SSO polling, e.g.
# "employment_type->acme_employment_type,org_info.department->department"
CLI_SSO_CLAIM_MAP = (
    os.getenv("CLI_SSO_CLAIM_MAP") or os.getenv("LITELLM_CLI_SSO_CLAIM_MAP") or ""
)
CLI_SSO_CLAIM_MAX_SCALAR_LENGTH = 1024

########################### UI SESSION DURATION ###########################
# Duration for UI login session (username/password, SSO, invitation links). Format: "30s", "30m", "24h", "7d"
# Does NOT apply to EXPERIMENTAL_UI_LOGIN flow, which intentionally uses a fixed 10-minute expiry for security.
LITELLM_UI_SESSION_DURATION = os.getenv("LITELLM_UI_SESSION_DURATION", "24h")

########################### DB CRON JOB NAMES ###########################
DB_SPEND_UPDATE_JOB_NAME = "db_spend_update_job"
DB_DAILY_TAG_SPEND_UPDATE_JOB_NAME = "db_daily_tag_spend_update_job"
PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME = "prometheus_emit_budget_metrics"
CLOUDZERO_EXPORT_USAGE_DATA_JOB_NAME = "cloudzero_export_usage_data"
MAVVRIK_FOCUS_EXPORT_JOB_NAME = "mavvrik_focus_export_usage_data"
CLOUDZERO_MAX_FETCHED_DATA_RECORDS = int(
    os.getenv("CLOUDZERO_MAX_FETCHED_DATA_RECORDS", 50000)
)
SPEND_LOG_CLEANUP_JOB_NAME = "spend_log_cleanup"
KEY_ROTATION_JOB_NAME = "litellm_key_rotation_job"
EXPIRED_UI_SESSION_KEY_CLEANUP_JOB_NAME = "litellm_expired_ui_session_key_cleanup_job"
SPEND_LOG_RUN_LOOPS = int(os.getenv("SPEND_LOG_RUN_LOOPS", 500))
SPEND_LOG_CLEANUP_BATCH_SIZE = int(os.getenv("SPEND_LOG_CLEANUP_BATCH_SIZE", 1000))
SPEND_LOG_CLEANUP_MAX_CONSECUTIVE_BATCH_FAILURES = int(
    os.getenv("SPEND_LOG_CLEANUP_MAX_CONSECUTIVE_BATCH_FAILURES", 3)
)
SPEND_LOG_CLEANUP_BATCH_FAILURE_BACKOFF_SECONDS = float(
    os.getenv("SPEND_LOG_CLEANUP_BATCH_FAILURE_BACKOFF_SECONDS", 0.5)
)
SPEND_LOG_PARTITION_INTERVAL = os.getenv("SPEND_LOG_PARTITION_INTERVAL", "day")
SPEND_LOG_PARTITION_PRECREATE_AHEAD = int(
    os.getenv("SPEND_LOG_PARTITION_PRECREATE_AHEAD", 7)
)
SPEND_LOG_QUEUE_SIZE_THRESHOLD = int(os.getenv("SPEND_LOG_QUEUE_SIZE_THRESHOLD", 100))
SPEND_LOG_QUEUE_POLL_INTERVAL = float(os.getenv("SPEND_LOG_QUEUE_POLL_INTERVAL", 2.0))
SPEND_COUNTER_RESEED_LOCKS_MAX_SIZE = int(
    os.getenv("SPEND_COUNTER_RESEED_LOCKS_MAX_SIZE", 10000)
)
DEFAULT_CRON_JOB_LOCK_TTL_SECONDS = int(
    os.getenv("DEFAULT_CRON_JOB_LOCK_TTL_SECONDS", 60)
)  # 1 minute
PROXY_BUDGET_RESCHEDULER_MIN_TIME = int(
    os.getenv("PROXY_BUDGET_RESCHEDULER_MIN_TIME", 597)
)
PROXY_BATCH_POLLING_INTERVAL = int(os.getenv("PROXY_BATCH_POLLING_INTERVAL", 3600))
MAX_OBJECTS_PER_POLL_CYCLE = max(1, int(os.getenv("MAX_OBJECTS_PER_POLL_CYCLE", 50)))
MANAGED_OBJECT_STALENESS_CUTOFF_DAYS = max(
    1, int(os.getenv("MANAGED_OBJECT_STALENESS_CUTOFF_DAYS", 7))
)
STALE_OBJECT_CLEANUP_BATCH_SIZE = max(
    1, int(os.getenv("STALE_OBJECT_CLEANUP_BATCH_SIZE", 1000))
)
# Set PROXY_BATCH_POLLING_ENABLED=false to disable the CheckBatchCost and
# CheckResponsesCost background polling jobs entirely (e.g. to avoid DB load on
# installations with large numbers of stale managed objects).
_batch_polling_env = os.getenv("PROXY_BATCH_POLLING_ENABLED", "true").lower()
PROXY_BATCH_POLLING_ENABLED = _batch_polling_env == "true"
PROXY_BUDGET_RESCHEDULER_MAX_TIME = int(
    os.getenv("PROXY_BUDGET_RESCHEDULER_MAX_TIME", 605)
)
PROXY_BATCH_WRITE_AT = int(
    os.getenv("PROXY_BATCH_WRITE_AT", 10)
)  # in seconds, increased from 10

# APScheduler Configuration - MEMORY LEAK FIX
# These settings prevent memory leaks in APScheduler's normalize() and _apply_jitter() functions
APSCHEDULER_COALESCE = os.getenv("APSCHEDULER_COALESCE", "True").lower() in [
    "true",
    "1",
]  # collapse many missed runs into one
APSCHEDULER_MISFIRE_GRACE_TIME = int(
    os.getenv("APSCHEDULER_MISFIRE_GRACE_TIME", 3600)
)  # ignore runs older than 1 hour (was 120)
APSCHEDULER_MAX_INSTANCES = int(
    os.getenv("APSCHEDULER_MAX_INSTANCES", 1)
)  # prevent concurrent job instances
APSCHEDULER_REPLACE_EXISTING = os.getenv(
    "APSCHEDULER_REPLACE_EXISTING", "True"
).lower() in [
    "true",
    "1",
]  # always replace existing jobs

# The number of tag entries are higher than number of user, team entries. This leads to a higher QPS.
# This will run tag spcific tasks at a later time to smooth QPS
DAILY_TAG_SPEND_BATCH_MULTIPLIER = 2.3

DEFAULT_HEALTH_CHECK_INTERVAL = int(
    os.getenv("DEFAULT_HEALTH_CHECK_INTERVAL", 300)
)  # 5 minutes
DEFAULT_SHARED_HEALTH_CHECK_TTL = int(
    os.getenv("DEFAULT_SHARED_HEALTH_CHECK_TTL", 300)
)  # 5 minutes - TTL for cached health check results
DEFAULT_SHARED_HEALTH_CHECK_LOCK_TTL = int(
    os.getenv("DEFAULT_SHARED_HEALTH_CHECK_LOCK_TTL", 60)
)  # 1 minute - TTL for health check lock
DEFAULT_HEALTH_CHECK_STALENESS_MULTIPLIER = (
    2  # health state is stale after interval * this
)
PROMETHEUS_FALLBACK_STATS_SEND_TIME_HOURS = int(
    os.getenv("PROMETHEUS_FALLBACK_STATS_SEND_TIME_HOURS", 9)
)
DEFAULT_MODEL_CREATED_AT_TIME = int(
    os.getenv("DEFAULT_MODEL_CREATED_AT_TIME", 1677610602)
)  # returns on `/models` endpoint
DEFAULT_SLACK_ALERTING_THRESHOLD = int(
    os.getenv("DEFAULT_SLACK_ALERTING_THRESHOLD", 300)
)
MAX_TEAM_LIST_LIMIT = int(os.getenv("MAX_TEAM_LIST_LIMIT", 20))
MAX_POLICY_ESTIMATE_IMPACT_ROWS = int(
    os.getenv("MAX_POLICY_ESTIMATE_IMPACT_ROWS", 1000)
)
DEFAULT_PROMPT_INJECTION_SIMILARITY_THRESHOLD = float(
    os.getenv("DEFAULT_PROMPT_INJECTION_SIMILARITY_THRESHOLD", 0.7)
)
LENGTH_OF_LITELLM_GENERATED_KEY = int(os.getenv("LENGTH_OF_LITELLM_GENERATED_KEY", 16))
SECRET_MANAGER_REFRESH_INTERVAL = int(
    os.getenv("SECRET_MANAGER_REFRESH_INTERVAL", 86400)
)
LITELLM_SETTINGS_SAFE_DB_OVERRIDES = [
    "default_internal_user_params",
    "default_team_params",
    "public_mcp_servers",
    "public_agent_groups",
    "public_model_groups",
    "public_model_groups_links",
    "cost_discount_config",
    "cost_margin_config",
]
SPECIAL_LITELLM_AUTH_TOKEN = ["ui-token"]
DEFAULT_MANAGEMENT_OBJECT_IN_MEMORY_CACHE_TTL = int(
    os.getenv("DEFAULT_MANAGEMENT_OBJECT_IN_MEMORY_CACHE_TTL", 60)
)
DEFAULT_ACCESS_GROUP_CACHE_TTL = int(os.getenv("DEFAULT_ACCESS_GROUP_CACHE_TTL", 600))
# Short TTL for negative MCP access-group existence lookups. Keeps unauthenticated
# callers from forcing a DB query per request for unknown names, while bounding
# staleness so a transient DB error (which surfaces as an empty list) cannot
# hide a real group for long.
DEFAULT_MCP_ACCESS_GROUP_NEGATIVE_CACHE_TTL = 10
# Maximum number of comma-separated MCP server / access-group tokens accepted
# in a single ``/{name1,name2,...}/mcp`` URL. Bounds the per-request DB / cache
# fan-out an authenticated caller can trigger by stuffing the path with tokens.
DEFAULT_MCP_NAMESPACE_CSV_MAX_TOKENS = 16

# Sentry Scrubbing Configuration
SENTRY_DENYLIST = [
    # API Keys and Tokens
    "api_key",
    "token",
    "key",
    "secret",
    "password",
    "auth",
    "credential",
    "OPENAI_API_KEY",
    "ANTHROPIC_API_KEY",
    "ANTHROPIC_AUTH_TOKEN",
    "AZURE_API_KEY",
    "COHERE_API_KEY",
    "REPLICATE_API_KEY",
    "HUGGINGFACE_API_KEY",
    "TOGETHERAI_API_KEY",
    "CLOUDFLARE_API_KEY",
    "BASETEN_KEY",
    "OPENROUTER_KEY",
    "COMETAPI_KEY",
    "DATAROBOT_API_TOKEN",
    "FIREWORKS_API_KEY",
    "FIREWORKS_AI_API_KEY",
    "FIREWORKSAI_API_KEY",
    "OVHCLOUD_API_KEY",
    "CLARIFAI_API_KEY",
    # Database and Connection Strings
    "database_url",
    "redis_url",
    "connection_string",
    # Authentication and Security
    "master_key",
    "LITELLM_MASTER_KEY",
    "auth_token",
    "jwt_token",
    "private_key",
    "SLACK_WEBHOOK_URL",
    "webhook_url",
    "LANGFUSE_SECRET_KEY",
    # Email Configuration
    "SMTP_PASSWORD",
    "SMTP_USERNAME",
    "email_password",
    # Cloud Provider Credentials
    "aws_access_key",
    "aws_secret_key",
    "gcp_credentials",
    "azure_credentials",
    "HCP_VAULT_TOKEN",
    "CIRCLE_OIDC_TOKEN",
    # Proxy and Environment Settings
    "proxy_url",
    "proxy_key",
    "environment_variables",
]
SENTRY_PII_DENYLIST = [
    "user_id",
    "email",
    "phone",
    "address",
    "ip_address",
    "SMTP_SENDER_EMAIL",
    "TEST_EMAIL_ADDRESS",
]

# CoroutineChecker cache configuration
COROUTINE_CHECKER_MAX_SIZE_IN_MEMORY = int(
    os.getenv("COROUTINE_CHECKER_MAX_SIZE_IN_MEMORY", 1000)
)

########################### RAG Text Splitter Constants ###########################
DEFAULT_CHUNK_SIZE = int(os.getenv("DEFAULT_CHUNK_SIZE", 1000))
DEFAULT_CHUNK_OVERLAP = int(os.getenv("DEFAULT_CHUNK_OVERLAP", 200))

########################### S3 Vectors RAG Constants ###########################
S3_VECTORS_DEFAULT_DIMENSION = int(os.getenv("S3_VECTORS_DEFAULT_DIMENSION", 1024))
S3_VECTORS_DEFAULT_DISTANCE_METRIC = str(
    os.getenv("S3_VECTORS_DEFAULT_DISTANCE_METRIC", "cosine")
)
S3_VECTORS_DEFAULT_NON_FILTERABLE_METADATA_KEYS = ["source_text"]

########################### Microsoft SSO Constants ###########################
MICROSOFT_USER_EMAIL_ATTRIBUTE = str(
    os.getenv("MICROSOFT_USER_EMAIL_ATTRIBUTE", "userPrincipalName")
)
MICROSOFT_USER_DISPLAY_NAME_ATTRIBUTE = str(
    os.getenv("MICROSOFT_USER_DISPLAY_NAME_ATTRIBUTE", "displayName")
)
MICROSOFT_USER_ID_ATTRIBUTE = str(os.getenv("MICROSOFT_USER_ID_ATTRIBUTE", "id"))
MICROSOFT_USER_FIRST_NAME_ATTRIBUTE = str(
    os.getenv("MICROSOFT_USER_FIRST_NAME_ATTRIBUTE", "givenName")
)
MICROSOFT_USER_LAST_NAME_ATTRIBUTE = str(
    os.getenv("MICROSOFT_USER_LAST_NAME_ATTRIBUTE", "surname")
)

# Maximum payload size (in bytes) to fully serialize for DEBUG logging.
# Payloads larger than this are truncated to avoid multi-second json.dumps blocking the response.
MAX_PAYLOAD_SIZE_FOR_DEBUG_LOG = int(
    os.getenv("MAX_PAYLOAD_SIZE_FOR_DEBUG_LOG", 102400)
)  # 100 KB

# Policy template enrichment
MAX_COMPETITOR_NAMES = int(os.getenv("MAX_COMPETITOR_NAMES", 100))
COMPETITOR_LLM_TEMPERATURE = float(os.getenv("COMPETITOR_LLM_TEMPERATURE", 0.3))
DEFAULT_COMPETITOR_DISCOVERY_MODEL = "gpt-4o-mini"

# Advisor tool orchestration
# Providers that support advisor_20260301 natively (no LiteLLM orchestration needed).
# Add vertex_ai here once verified.
ADVISOR_NATIVE_PROVIDERS: frozenset = frozenset({"anthropic"})
# Hard cap on advisor iterations per request to prevent runaway loops.
ADVISOR_MAX_USES: int = 5
# Description injected into the synthetic advisor tool definition sent to non-native providers.
ADVISOR_TOOL_DESCRIPTION: str = (
    "Consult a highly intelligent advisor model when you need expert guidance, "
    "want to verify your reasoning, or face a complex decision. "
    "Describe your question or challenge clearly in the 'question' field."
)