Files
MoFin/venv/lib/python3.12/site-packages/litellm/llms/anthropic/cost_calculation.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

148 lines
4.8 KiB
Python

"""
Helper util for handling anthropic-specific cost calculation
- e.g.: prompt caching
"""
from typing import TYPE_CHECKING, Optional, Tuple
from litellm.litellm_core_utils.llm_cost_calc.utils import (
_get_token_base_cost,
_get_web_search_requests,
_parse_prompt_tokens_details,
calculate_cache_writing_cost,
generic_cost_per_token,
)
if TYPE_CHECKING:
from litellm.types.utils import ModelInfo, Usage
import litellm
def _compute_cache_only_cost(
model_info: "ModelInfo", usage: "Usage", service_tier: str | None = None
) -> float:
"""
Return only the cache-related portion of the prompt cost (cache read + cache write).
These costs must NOT be scaled by geo/speed multipliers because the old
explicit ``fast/`` model entries carried unchanged cache rates while
multiplying only the regular input/output token costs.
"""
if usage.prompt_tokens_details is None:
return 0.0
prompt_tokens_details = _parse_prompt_tokens_details(usage)
(
_,
_,
cache_creation_cost,
cache_creation_cost_above_1hr,
cache_read_cost,
) = _get_token_base_cost(
model_info=model_info, usage=usage, service_tier=service_tier
)
cache_cost = float(prompt_tokens_details["cache_hit_tokens"]) * cache_read_cost
if (
prompt_tokens_details["cache_creation_tokens"]
or prompt_tokens_details["cache_creation_token_details"] is not None
):
cache_cost += calculate_cache_writing_cost(
cache_creation_tokens=prompt_tokens_details["cache_creation_tokens"],
cache_creation_token_details=prompt_tokens_details[
"cache_creation_token_details"
],
cache_creation_cost_above_1hr=cache_creation_cost_above_1hr,
cache_creation_cost=cache_creation_cost,
)
return cache_cost
def cost_per_token(
model: str, usage: "Usage", service_tier: str | None = None
) -> Tuple[float, float]:
"""
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
Input:
- model: str, the model name without provider prefix
- usage: LiteLLM Usage block, containing anthropic caching information
- service_tier: the service tier the request was served at (e.g. "priority"),
read from the Anthropic response usage and used to select tier-specific pricing
Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
"""
prompt_cost, completion_cost = generic_cost_per_token(
model=model,
usage=usage,
custom_llm_provider="anthropic",
service_tier=service_tier,
)
# Apply provider_specific_entry multipliers for geo/speed routing
try:
model_info = litellm.get_model_info(
model=model, custom_llm_provider="anthropic"
)
provider_specific_entry: dict = model_info.get("provider_specific_entry") or {}
multiplier = 1.0
if (
hasattr(usage, "inference_geo")
and usage.inference_geo
and usage.inference_geo.lower() not in ["global", "not_available"]
):
multiplier *= provider_specific_entry.get(usage.inference_geo.lower(), 1.0)
if hasattr(usage, "speed") and usage.speed == "fast":
multiplier *= provider_specific_entry.get("fast", 1.0)
if multiplier != 1.0:
cache_cost = _compute_cache_only_cost(
model_info=model_info, usage=usage, service_tier=service_tier
)
prompt_cost = (prompt_cost - cache_cost) * multiplier + cache_cost
completion_cost *= multiplier
except Exception:
pass
return prompt_cost, completion_cost
def get_cost_for_anthropic_web_search(
model_info: Optional["ModelInfo"] = None,
usage: Optional["Usage"] = None,
) -> float:
"""
Get the cost of using a web search tool for Anthropic.
"""
from litellm.types.utils import SearchContextCostPerQuery
## Check if web search requests are in the usage object
if model_info is None:
return 0.0
if usage is None:
return 0.0
web_search_requests = _get_web_search_requests(
getattr(usage, "server_tool_use", None)
)
if web_search_requests is None:
return 0.0
## Get the cost per web search request
search_context_pricing: SearchContextCostPerQuery = (
model_info.get("search_context_cost_per_query") or SearchContextCostPerQuery()
)
cost_per_web_search_request = search_context_pricing.get(
"search_context_size_medium", 0.0
)
if cost_per_web_search_request is None or cost_per_web_search_request == 0.0:
return 0.0
## Calculate the total cost
total_cost = cost_per_web_search_request * web_search_requests
return total_cost