MoFin/venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py

"""
Helper util for handling openai-specific cost calculation
- e.g.: prompt caching
"""

from typing import Any, Literal, Mapping, Optional, Tuple

from litellm._logging import verbose_logger
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
from litellm.types.utils import CallTypes, ModelInfo, Usage
from litellm.utils import get_model_info


def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_second"]:
    if call_type == CallTypes.atranscription or call_type == CallTypes.transcription:
        return "cost_per_second"
    else:
        return "cost_per_token"


def cost_per_token(
    model: str,
    usage: Usage,
    service_tier: Optional[str] = None,
    data_residency: Optional[str] = None,
) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.

    Input:
        - model: str, the model name without provider prefix
        - usage: LiteLLM Usage block, containing anthropic caching information
        - data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
          inferred from api_base. Applies the model's regional-processing
          uplift multiplier when set.

    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
    """
    ## CALCULATE INPUT COST
    return generic_cost_per_token(
        model=model,
        usage=usage,
        custom_llm_provider="openai",
        service_tier=service_tier,
        data_residency=data_residency,
    )
    # ### Non-cached text tokens
    # non_cached_text_tokens = usage.prompt_tokens
    # cached_tokens: Optional[int] = None
    # if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
    #     cached_tokens = usage.prompt_tokens_details.cached_tokens
    #     non_cached_text_tokens = non_cached_text_tokens - cached_tokens
    # prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
    # ## Prompt Caching cost calculation
    # if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
    #     # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
    #     prompt_cost += cached_tokens * (
    #         model_info.get("cache_read_input_token_cost", 0) or 0
    #     )

    # _audio_tokens: Optional[int] = (
    #     usage.prompt_tokens_details.audio_tokens
    #     if usage.prompt_tokens_details is not None
    #     else None
    # )
    # _audio_cost_per_token: Optional[float] = model_info.get(
    #     "input_cost_per_audio_token"
    # )
    # if _audio_tokens is not None and _audio_cost_per_token is not None:
    #     audio_cost: float = _audio_tokens * _audio_cost_per_token
    #     prompt_cost += audio_cost

    # ## CALCULATE OUTPUT COST
    # completion_cost: float = (
    #     usage["completion_tokens"] * model_info["output_cost_per_token"]
    # )
    # _output_cost_per_audio_token: Optional[float] = model_info.get(
    #     "output_cost_per_audio_token"
    # )
    # _output_audio_tokens: Optional[int] = (
    #     usage.completion_tokens_details.audio_tokens
    #     if usage.completion_tokens_details is not None
    #     else None
    # )
    # if _output_cost_per_audio_token is not None and _output_audio_tokens is not None:
    #     audio_cost = _output_audio_tokens * _output_cost_per_audio_token
    #     completion_cost += audio_cost

    # return prompt_cost, completion_cost


def cost_per_second(
    model: str, custom_llm_provider: Optional[str], duration: float = 0.0
) -> Tuple[float, float]:
    """
    Calculates the cost per second for a given model, prompt tokens, and completion tokens.

    Input:
        - model: str, the model name without provider prefix
        - custom_llm_provider: str, the custom llm provider
        - duration: float, the duration of the response in seconds

    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
    """

    ## GET MODEL INFO
    model_info = get_model_info(
        model=model, custom_llm_provider=custom_llm_provider or "openai"
    )
    prompt_cost = 0.0
    completion_cost = 0.0
    ## Speech / Audio cost calculation
    if (
        "output_cost_per_second" in model_info
        and model_info["output_cost_per_second"] is not None
    ):
        verbose_logger.debug(
            f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
        )
        ## COST PER SECOND ##
        completion_cost = model_info["output_cost_per_second"] * duration
    elif (
        "input_cost_per_second" in model_info
        and model_info["input_cost_per_second"] is not None
    ):
        verbose_logger.debug(
            f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"
        )
        ## COST PER SECOND ##
        prompt_cost = model_info["input_cost_per_second"] * duration
        completion_cost = 0.0

    return prompt_cost, completion_cost


def _video_resolution_to_cost_field_suffix(resolution: str) -> Optional[str]:
    """
    Map usage resolution to a safe suffix for ``output_cost_per_second_<suffix>`` keys.

    Note: Currently only ``output_cost_per_second_1080p`` is explicitly declared in
    ModelInfo (types/utils.py). Other resolution tiers (e.g., 720p, 4k) can be added
    to model_prices_and_context_window.json but are not exposed via get_model_info()
    until added to the ModelInfo TypedDict.
    """
    r = resolution.strip().lower()
    if not r:
        return None
    safe = "".join(c for c in r if c.isalnum() or c == "_")
    if not safe or len(safe) > 24:
        return None
    return safe


def _video_output_cost_per_second(
    model_info: Mapping[str, Any],
    video_resolution: Optional[str],
) -> Optional[float]:
    """
    Per-second video output rate from model_info.

    If ``video_resolution`` is set (e.g. ``1080p``, ``720p``, ``4k``), looks up
    ``output_cost_per_second_<resolution>`` first (e.g. ``output_cost_per_second_1080p``),
    then falls back to ``output_cost_per_second``.
    """
    r = (video_resolution or "").strip().lower()
    if r:
        suffix = _video_resolution_to_cost_field_suffix(r)
        if suffix is not None:
            tier_key = f"output_cost_per_second_{suffix}"
            tier_rate = model_info.get(tier_key)
            if tier_rate is not None:
                return float(tier_rate)
    out = model_info.get("output_cost_per_second")
    if out is not None:
        return float(out)
    return None


def video_generation_cost(
    model: str,
    duration_seconds: float,
    custom_llm_provider: Optional[str] = None,
    model_info: Optional[ModelInfo] = None,
    video_resolution: Optional[str] = None,
) -> float:
    """
    Calculates the cost for video generation based on duration in seconds.

    Input:
        - model: str, the model name without provider prefix
        - duration_seconds: float, the duration of the generated video in seconds
        - custom_llm_provider: str, the custom llm provider
        - model_info: Optional[dict], deployment-level model info containing
            custom video pricing. When provided, skips the global
            get_model_info() lookup so that deployment-specific pricing is used.
        - video_resolution: Optional resolution label from usage (e.g. ``720p``, ``1080p``).

    Returns:
        float - total_cost_in_usd
    """
    ## GET MODEL INFO
    if model_info is None:
        model_info = get_model_info(
            model=model, custom_llm_provider=custom_llm_provider or "openai"
        )

    # Check for video-specific cost per second
    video_cost_per_second = model_info.get("output_cost_per_video_per_second")
    if video_cost_per_second is not None:
        verbose_logger.debug(
            f"For model={model} - output_cost_per_video_per_second: {video_cost_per_second}; duration: {duration_seconds}"
        )
        return video_cost_per_second * duration_seconds

    output_cost_per_second = _video_output_cost_per_second(model_info, video_resolution)
    if output_cost_per_second is not None:
        verbose_logger.debug(
            f"For model={model} - output_cost_per_second: {output_cost_per_second}; duration: {duration_seconds}"
        )
        return output_cost_per_second * duration_seconds

    # If no cost information found, return 0
    verbose_logger.warning(
        f"No cost information found for video model {model}. Please add pricing to model_prices_and_context_window.json"
    )
    return 0.0