import os from dataclasses import dataclass, field from datetime import datetime from typing import ( TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional, Set, Tuple, Union, cast, ) import litellm from litellm._logging import verbose_logger from litellm.integrations._types.open_inference import ( OpenInferenceSpanKindValues, SpanAttributes, ) from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.opentelemetry_utils.gen_ai_semconv import ( OTEL_SEMCONV_STABILITY_OPT_IN_ENV, OTELGenAISemconvMixin, OTELSemconvCategory, parse_semconv_opt_in, ) from litellm.litellm_core_utils.safe_json_dumps import safe_dumps from litellm.secret_managers.main import get_secret_bool, str_to_bool from litellm.types.services import ServiceLoggerPayload from litellm.types.utils import ( ChatCompletionMessageToolCall, CostBreakdown, Function, LLMResponseTypes, StandardCallbackDynamicParams, StandardLoggingPayload, ) # OpenTelemetry imports moved to individual functions to avoid import errors when not installed if TYPE_CHECKING: from opentelemetry.sdk.trace.export import SpanExporter as _SpanExporter from opentelemetry.trace import Context as _Context from opentelemetry.trace import Span as _Span from opentelemetry.trace import Tracer as _Tracer from litellm.proxy._types import ( ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload, ) from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth Span = Union[_Span, Any] Tracer = Union[_Tracer, Any] Context = Union[_Context, Any] SpanExporter = Union[_SpanExporter, Any] UserAPIKeyAuth = Union[_UserAPIKeyAuth, Any] ManagementEndpointLoggingPayload = Union[_ManagementEndpointLoggingPayload, Any] else: Span = Any Tracer = Any SpanExporter = Any UserAPIKeyAuth = Any ManagementEndpointLoggingPayload = Any Context = Any LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm") LITELLM_METER_NAME = os.getenv("LITELLM_METER_NAME", "litellm") LITELLM_LOGGER_NAME = os.getenv("LITELLM_LOGGER_NAME", "litellm") LITELLM_PROXY_REQUEST_SPAN_NAME = "Received Proxy Server Request" # OTel-standard names. status is also kept under error.code for back compat. HTTP_RESPONSE_STATUS_CODE_ATTRIBUTE = "http.response.status_code" HTTP_ROUTE_ATTRIBUTE = "http.route" URL_PATH_ATTRIBUTE = "url.path" PREPROCESSING_DURATION_MS_ATTRIBUTE = "litellm.preprocessing.duration_ms" TEAM_METADATA_ATTRIBUTE = "litellm.team.metadata" MODEL_GROUP_ATTRIBUTE = "litellm.model_group" PROVIDER_MODEL_ATTRIBUTE = "litellm.provider.model" # Remove the hardcoded LITELLM_RESOURCE dictionary - we'll create it properly later RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request" LITELLM_REQUEST_SPAN_NAME = "litellm_request" CAPTURE_MODE_NO_CONTENT = "NO_CONTENT" CAPTURE_MODE_SPAN_ONLY = "SPAN_ONLY" CAPTURE_MODE_EVENT_ONLY = "EVENT_ONLY" CAPTURE_MODE_SPAN_AND_EVENT = "SPAN_AND_EVENT" _VALID_CAPTURE_MODES = { CAPTURE_MODE_NO_CONTENT, CAPTURE_MODE_SPAN_ONLY, CAPTURE_MODE_EVENT_ONLY, CAPTURE_MODE_SPAN_AND_EVENT, } METRIC_METADATA_KEYS: Tuple[str, ...] = ( "user_api_key_hash", "user_api_key_alias", "user_api_key_team_id", "user_api_key_org_id", "user_api_key_user_id", "user_api_key_team_alias", "user_api_key_user_email", "spend_logs_metadata", "requester_ip_address", "requester_metadata", "user_api_key_end_user_id", "prompt_management_metadata", "applied_guardrails", "mcp_tool_call_metadata", "vector_store_request_metadata", ) TOKEN_TYPE_ATTRIBUTE: str = "gen_ai.token.type" VALID_METRIC_ATTRIBUTE_NAMES: FrozenSet[str] = frozenset( ( "gen_ai.operation.name", "gen_ai.system", "gen_ai.request.model", "gen_ai.framework", "hidden_params", ) + tuple(f"metadata.{key}" for key in METRIC_METADATA_KEYS) ) @dataclass(frozen=True) class OTELMetricAttributeFilter: include_list: Optional[List[str]] = None exclude_list: Optional[List[str]] = None def _build_metric_attribute_filter(value: Any) -> OTELMetricAttributeFilter: if isinstance(value, OTELMetricAttributeFilter): return value if not isinstance(value, dict): raise ValueError( "otel.attributes must be a mapping with optional 'include_list' / " f"'exclude_list', got {type(value).__name__}" ) return OTELMetricAttributeFilter( include_list=value.get("include_list"), exclude_list=value.get("exclude_list"), ) def _resolve_metric_attribute_filter( attributes: Optional[OTELMetricAttributeFilter], ) -> Tuple[Optional[FrozenSet[str]], Optional[FrozenSet[str]]]: if attributes is None: return None, None include = attributes.include_list or None exclude = attributes.exclude_list or None if include and exclude: raise ValueError( "otel.attributes: include_list and exclude_list are mutually exclusive" ) requested = include or exclude or [] if TOKEN_TYPE_ATTRIBUTE in requested: raise ValueError( f"otel.attributes: {TOKEN_TYPE_ATTRIBUTE} is a structural token-usage " "discriminator and cannot be filtered" ) unknown = sorted( name for name in requested if name not in VALID_METRIC_ATTRIBUTE_NAMES ) if unknown: raise ValueError( f"otel.attributes: unknown attribute name(s) {unknown}. " f"Valid names: {sorted(VALID_METRIC_ATTRIBUTE_NAMES)}" ) return ( frozenset(include) if include else None, frozenset(exclude) if exclude else None, ) def _normalize_team_metadata_keys(value: Any) -> List[str]: """Coerce a team-metadata allowlist from a list or comma-separated string. config.yaml passes a YAML list; an env var passes a comma-separated string. Both collapse to a list of stripped, non-empty keys. """ if value is None: return [] if isinstance(value, str): return [item.strip() for item in value.split(",") if item.strip()] return [str(item).strip() for item in value if str(item).strip()] @dataclass class OpenTelemetryConfig: exporter: Union[str, SpanExporter] = "console" endpoint: Optional[str] = None headers: Optional[str] = None enable_metrics: bool = False enable_events: bool = False service_name: Optional[str] = None deployment_environment: Optional[str] = None model_id: Optional[str] = None ignore_context_propagation: Optional[bool] = None # When True, create a private TracerProvider instead of reusing or setting the global one. skip_set_global: bool = False # Programmatic override for OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT. # One of NO_CONTENT, SPAN_ONLY, EVENT_ONLY, SPAN_AND_EVENT (or "true" as legacy alias). capture_message_content: Optional[str] = None semconv_stability_opt_in: Set[OTELSemconvCategory] = field(default_factory=set) # Sub-keys of the team's free-form metadata stamped onto the inference span # under ``litellm.team.metadata``. Empty by default so none of a team's # metadata leaves the process until explicitly allowlisted. baggage_team_metadata_keys: List[str] = field(default_factory=list) # Prometheus-style include/exclude control over which attributes are stamped # on emitted metrics, to cap metric cardinality. attributes: Optional[OTELMetricAttributeFilter] = None def __post_init__(self) -> None: # If endpoint is specified but exporter is still the default "console", # automatically infer "otlp_http" to send traces to the endpoint. # This fixes an issue where UI-configured OTEL settings would default # to console output instead of sending traces to the configured endpoint. if ( self.endpoint and isinstance(self.exporter, str) and self.exporter == "console" ): self.exporter = "otlp_http" if not self.service_name: self.service_name = os.getenv("OTEL_SERVICE_NAME", "litellm") if not self.deployment_environment: self.deployment_environment = os.getenv( "OTEL_ENVIRONMENT_NAME", "production" ) if not self.model_id: self.model_id = os.getenv("OTEL_MODEL_ID", self.service_name) if self.ignore_context_propagation is None: self.ignore_context_propagation = str_to_bool( os.getenv("OTEL_IGNORE_CONTEXT_PROPAGATION") ) # Resolve the env opt-in once here so self.semconv_stability_opt_in is the # single source of truth: the union of programmatic and env categories. self.semconv_stability_opt_in |= parse_semconv_opt_in( os.getenv(OTEL_SEMCONV_STABILITY_OPT_IN_ENV) ) self.baggage_team_metadata_keys = _normalize_team_metadata_keys( self.baggage_team_metadata_keys ) or _normalize_team_metadata_keys( os.getenv("LITELLM_OTEL_BAGGAGE_TEAM_METADATA_KEYS") ) @classmethod def from_env(cls): """ OTEL_HEADERS=x-honeycomb-team=B85YgLm9**** OTEL_EXPORTER="otlp_http" OTEL_ENDPOINT="https://api.honeycomb.io/v1/traces" OTEL_HEADERS gets sent as headers = {"x-honeycomb-team": "B85YgLm96******"} """ from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) exporter = os.getenv( "OTEL_EXPORTER_OTLP_PROTOCOL", os.getenv("OTEL_EXPORTER", "console") ) endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", os.getenv("OTEL_ENDPOINT")) headers = os.getenv( "OTEL_EXPORTER_OTLP_HEADERS", os.getenv("OTEL_HEADERS") ) # example: OTEL_HEADERS=x-honeycomb-team=B85YgLm96***" enable_metrics: bool = ( os.getenv("LITELLM_OTEL_INTEGRATION_ENABLE_METRICS", "false").lower() == "true" ) enable_events: bool = ( os.getenv("LITELLM_OTEL_INTEGRATION_ENABLE_EVENTS", "false").lower() == "true" ) service_name = os.getenv("OTEL_SERVICE_NAME", "litellm") deployment_environment = os.getenv("OTEL_ENVIRONMENT_NAME", "production") model_id = os.getenv("OTEL_MODEL_ID", service_name) if exporter == "in_memory": return cls(exporter=InMemorySpanExporter()) return cls( exporter=exporter, endpoint=endpoint, headers=headers, # example: OTEL_HEADERS=x-honeycomb-team=B85YgLm96***" enable_metrics=enable_metrics, enable_events=enable_events, service_name=service_name, deployment_environment=deployment_environment, model_id=model_id, ) class OpenTelemetry(OTELGenAISemconvMixin, CustomLogger): def __init__( self, config: Optional[OpenTelemetryConfig] = None, callback_name: Optional[str] = None, # injection points for testing tracer_provider: Optional[Any] = None, logger_provider: Optional[Any] = None, meter_provider: Optional[Any] = None, **kwargs, ): team_metadata_keys_override = kwargs.pop("baggage_team_metadata_keys", None) metric_attributes_override = kwargs.pop("attributes", None) if config is None: config = OpenTelemetryConfig.from_env() if team_metadata_keys_override is not None: config.baggage_team_metadata_keys = _normalize_team_metadata_keys( team_metadata_keys_override ) if metric_attributes_override is not None: config.attributes = _build_metric_attribute_filter( metric_attributes_override ) self.config = config self.callback_name = callback_name # Resolved on first metric record, not here: the proxy populates # callback_settings.otel.attributes after this logger is constructed, so # reading it now would miss it. An explicit config is validated eagerly so # a bad config still fails at startup. self._metric_attr_include: Optional[FrozenSet[str]] = None self._metric_attr_exclude: Optional[FrozenSet[str]] = None self._metric_attr_filter_resolved = False if config.attributes is not None: self._ensure_metric_attribute_filter() self.OTEL_EXPORTER = self.config.exporter self.OTEL_ENDPOINT = self.config.endpoint self.OTEL_HEADERS = self.config.headers self._tracer_provider_cache: Dict[str, Any] = {} self._init_tracing(tracer_provider) _debug_otel = str(os.getenv("DEBUG_OTEL", "False")).lower() if _debug_otel == "true": # Set up logging import logging logging.basicConfig(level=logging.DEBUG) logging.getLogger(__name__) # Enable OpenTelemetry logging otel_exporter_logger = logging.getLogger("opentelemetry.sdk.trace.export") otel_exporter_logger.setLevel(logging.DEBUG) # init CustomLogger params super().__init__(**kwargs) self._init_metrics(meter_provider) self._init_logs(logger_provider) # Sample env-var / config / message_logging at init so subsequent # _capture_in_span / _capture_in_event calls are deterministic. self._capture_mode_cached = self._compute_capture_mode_from_init_state() self._init_otel_logger_on_litellm_proxy() @staticmethod def _get_litellm_resource(config: OpenTelemetryConfig): """Create an OpenTelemetry Resource using config-driven defaults.""" from opentelemetry.sdk.resources import OTELResourceDetector, Resource base_attributes: Dict[str, Optional[str]] = { "service.name": config.service_name, "deployment.environment": config.deployment_environment, "model_id": config.model_id or config.service_name, } base_resource = Resource.create(base_attributes) # type: ignore[arg-type] otel_resource_detector = OTELResourceDetector() env_resource = otel_resource_detector.detect() return base_resource.merge(env_resource) def _init_otel_logger_on_litellm_proxy(self): """ Initializes OpenTelemetry for litellm proxy server - Adds Otel as a service callback - Sets `proxy_server.open_telemetry_logger` to self """ try: from litellm.proxy import proxy_server except ImportError: verbose_logger.warning( "Proxy Server is not installed. Skipping OpenTelemetry initialization." ) return # Add self as a service callback if "otel" not in litellm.service_callback and all( not isinstance(cb, OpenTelemetry) for cb in litellm.service_callback ): litellm.service_callback.append(self) # avoid proxy logger ownership being overwritten by later # handlers. Multiple integrations (default OTEL, Langfuse OTEL, # Arize OTEL, etc.) may initialize in sequence; without this guard, # the last one silently replaces the first and breaks expected # routing for proxy_server.open_telemetry_logger consumers. # Behavior: first-registered wins. if getattr(proxy_server, "open_telemetry_logger", None) is None: setattr(proxy_server, "open_telemetry_logger", self) def _get_or_create_provider( self, provider, provider_name: str, get_existing_provider_fn, sdk_provider_class, create_new_provider_fn, set_provider_fn, skip_set_global: bool = False, ): """ Generic helper to get or create an OpenTelemetry provider (Tracer, Meter, or Logger). Args: provider: The provider instance passed to the init function (can be None) provider_name: Name for logging (e.g., "TracerProvider") get_existing_provider_fn: Function to get the existing global provider sdk_provider_class: The SDK provider class to check for (e.g., TracerProvider from SDK) create_new_provider_fn: Function to create a new provider instance set_provider_fn: Function to set the provider globally skip_set_global: If True, don't set the provider globally (for dynamic-only providers) Returns: The provider to use (either existing, new, or explicitly provided) """ if provider is not None: # Provider explicitly provided (e.g., for testing) # Do NOT call set_provider_fn - the caller is responsible for managing global state # If they want it to be global, they've already set it before passing it to us verbose_logger.debug( "OpenTelemetry: Using provided TracerProvider: %s", type(provider).__name__, ) return provider # Check if a provider is already set globally try: existing_provider = get_existing_provider_fn() if isinstance(existing_provider, sdk_provider_class): if skip_set_global: verbose_logger.debug( "OpenTelemetry: existing %s found but skip_set_global=True; creating private %s for isolation", provider_name, provider_name, ) provider = create_new_provider_fn() else: verbose_logger.debug( "OpenTelemetry: Using existing %s: %s", provider_name, type(existing_provider).__name__, ) provider = existing_provider else: # Default proxy provider or unknown type, create our own verbose_logger.debug("OpenTelemetry: Creating new %s", provider_name) provider = create_new_provider_fn() if not skip_set_global: set_provider_fn(provider) else: verbose_logger.info( "OpenTelemetry: Created %s but NOT setting it globally (will use dynamic providers per-request)", provider_name, ) except Exception as e: # Fallback: create a new provider if something goes wrong verbose_logger.debug( "OpenTelemetry: Exception checking existing %s, creating new one: %s", provider_name, str(e), ) provider = create_new_provider_fn() if not skip_set_global: set_provider_fn(provider) return provider def _skip_set_global(self) -> bool: # langfuse_otel relies on the Langfuse SDK's providers; don't overwrite them. return self.config.skip_set_global or ( hasattr(self, "callback_name") and self.callback_name == "langfuse_otel" ) def _compute_capture_mode_from_init_state(self) -> Optional[str]: """Sample explicit settings at init. Returns the resolved mode or None if nothing explicit is set (in which case the legacy ``self.message_logging`` flag is consulted dynamically per request). ``"true"``/``"1"`` map to ``EVENT_ONLY`` per the contrib convention. ``"false"``/``"0"`` map to ``NO_CONTENT``. Unknown values are ignored. """ explicit = self.config.capture_message_content or os.getenv( "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT" ) if not explicit: return None normalized = explicit.upper() if normalized in ("TRUE", "1"): return CAPTURE_MODE_EVENT_ONLY if normalized in ("FALSE", "0"): return CAPTURE_MODE_NO_CONTENT if normalized in _VALID_CAPTURE_MODES: return normalized return None def _resolve_capture_mode(self) -> str: """Return the active capture mode for this request. Precedence: 1. ``litellm.turn_off_message_logging=True`` forces ``NO_CONTENT`` (kill-switch checked dynamically). 2. Explicit setting sampled at init from ``OpenTelemetryConfig.capture_message_content`` or ``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT``. 3. Legacy ``self.message_logging`` (checked dynamically). """ if litellm.turn_off_message_logging: return CAPTURE_MODE_NO_CONTENT if self._capture_mode_cached is not None: return self._capture_mode_cached return ( CAPTURE_MODE_SPAN_AND_EVENT if self.message_logging else CAPTURE_MODE_NO_CONTENT ) def _capture_in_span(self) -> bool: return self._resolve_capture_mode() in ( CAPTURE_MODE_SPAN_ONLY, CAPTURE_MODE_SPAN_AND_EVENT, ) def _capture_in_event(self) -> bool: return self._resolve_capture_mode() in ( CAPTURE_MODE_EVENT_ONLY, CAPTURE_MODE_SPAN_AND_EVENT, ) def _init_tracing(self, tracer_provider): from opentelemetry import trace from opentelemetry.sdk.trace import TracerProvider from opentelemetry.trace import SpanKind def create_tracer_provider(): provider = TracerProvider(resource=self._get_litellm_resource(self.config)) provider.add_span_processor(self._get_span_processor()) return provider tracer_provider = self._get_or_create_provider( provider=tracer_provider, provider_name="TracerProvider", get_existing_provider_fn=trace.get_tracer_provider, sdk_provider_class=TracerProvider, create_new_provider_fn=create_tracer_provider, set_provider_fn=trace.set_tracer_provider, skip_set_global=self._skip_set_global(), ) # Grab our tracer from the TracerProvider (not from global context) # This ensures we use the provided TracerProvider (e.g., for testing) self.tracer = tracer_provider.get_tracer(LITELLM_TRACER_NAME) self._tracer_provider = tracer_provider self.span_kind = SpanKind def _init_metrics(self, meter_provider): if not self.config.enable_metrics: self._meter_provider = None self._operation_duration_histogram = None self._token_usage_histogram = None self._cost_histogram = None self._time_to_first_token_histogram = None self._time_per_output_token_histogram = None self._response_duration_histogram = None return from opentelemetry import metrics from opentelemetry.sdk.metrics import MeterProvider def create_meter_provider(): metric_reader = self._get_metric_reader() return MeterProvider( metric_readers=[metric_reader], resource=self._get_litellm_resource(self.config), ) meter_provider = self._get_or_create_provider( provider=meter_provider, provider_name="MeterProvider", get_existing_provider_fn=metrics.get_meter_provider, sdk_provider_class=MeterProvider, create_new_provider_fn=create_meter_provider, set_provider_fn=metrics.set_meter_provider, skip_set_global=self._skip_set_global(), ) self._meter_provider = meter_provider meter = meter_provider.get_meter(__name__) self._operation_duration_histogram = meter.create_histogram( name="gen_ai.client.operation.duration", # Replace with semconv constant in otel 1.38 description="GenAI operation duration", unit="s", ) self._token_usage_histogram = meter.create_histogram( name="gen_ai.client.token.usage", # Replace with semconv constant in otel 1.38 description="GenAI token usage", unit="{token}", ) self._cost_histogram = meter.create_histogram( name="gen_ai.client.token.cost", description="GenAI request cost", unit="USD", ) self._time_to_first_token_histogram = meter.create_histogram( name="gen_ai.client.response.time_to_first_token", description="Time to first token for streaming requests", unit="s", ) self._time_per_output_token_histogram = meter.create_histogram( name="gen_ai.client.response.time_per_output_token", description="Average time per output token (generation time / completion tokens)", unit="s", ) self._response_duration_histogram = meter.create_histogram( name="gen_ai.client.response.duration", description="Total LLM API generation time (excludes LiteLLM overhead)", unit="s", ) def _init_logs(self, logger_provider): # nothing to do if events disabled if not self.config.enable_events: self._logger_provider = None return from opentelemetry._logs import get_logger_provider, set_logger_provider from opentelemetry.sdk._logs import LoggerProvider as OTLoggerProvider from opentelemetry.sdk._logs.export import BatchLogRecordProcessor def create_logger_provider(): provider = OTLoggerProvider( resource=self._get_litellm_resource(self.config) ) log_exporter = self._get_log_exporter() provider.add_log_record_processor( BatchLogRecordProcessor(log_exporter) # type: ignore[arg-type] ) return provider self._logger_provider = self._get_or_create_provider( provider=logger_provider, provider_name="LoggerProvider", get_existing_provider_fn=get_logger_provider, sdk_provider_class=OTLoggerProvider, create_new_provider_fn=create_logger_provider, set_provider_fn=set_logger_provider, skip_set_global=self._skip_set_global(), ) def log_success_event(self, kwargs, response_obj, start_time, end_time): self._handle_success(kwargs, response_obj, start_time, end_time) def log_failure_event(self, kwargs, response_obj, start_time, end_time): self._handle_failure(kwargs, response_obj, start_time, end_time) async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): self._handle_success(kwargs, response_obj, start_time, end_time) async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): self._handle_failure(kwargs, response_obj, start_time, end_time) async def async_service_success_hook( self, payload: ServiceLoggerPayload, parent_otel_span: Optional[Span] = None, start_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[datetime, float]] = None, event_metadata: Optional[dict] = None, ): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode _start_time_ns = 0 _end_time_ns = 0 if isinstance(start_time, float): _start_time_ns = int(start_time * 1e9) else: _start_time_ns = self._to_ns(start_time) if isinstance(end_time, float): _end_time_ns = int(end_time * 1e9) else: _end_time_ns = self._to_ns(end_time) if parent_otel_span is not None: _span_name = payload.service service_logging_span = self.tracer.start_span( name=_span_name, context=trace.set_span_in_context(parent_otel_span), start_time=_start_time_ns, ) self.safe_set_attribute( span=service_logging_span, key="call_type", value=payload.call_type, ) self.safe_set_attribute( span=service_logging_span, key="service", value=payload.service.value, ) if event_metadata: for key, value in event_metadata.items(): if value is None: value = "None" if isinstance(value, dict): try: value = str(value) except Exception: value = "litellm logging error - could_not_json_serialize" self.safe_set_attribute( span=service_logging_span, key=key, value=value, ) service_logging_span.set_status(Status(StatusCode.OK)) service_logging_span.end(end_time=_end_time_ns) async def async_service_failure_hook( self, payload: ServiceLoggerPayload, error: Optional[str] = "", parent_otel_span: Optional[Span] = None, start_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[float, datetime]] = None, event_metadata: Optional[dict] = None, ): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode _start_time_ns = 0 _end_time_ns = 0 if isinstance(start_time, float): _start_time_ns = int(int(start_time) * 1e9) else: _start_time_ns = self._to_ns(start_time) if isinstance(end_time, float): _end_time_ns = int(int(end_time) * 1e9) else: _end_time_ns = self._to_ns(end_time) if parent_otel_span is not None: _span_name = payload.service service_logging_span = self.tracer.start_span( name=_span_name, context=trace.set_span_in_context(parent_otel_span), start_time=_start_time_ns, ) self.safe_set_attribute( span=service_logging_span, key="call_type", value=payload.call_type, ) self.safe_set_attribute( span=service_logging_span, key="service", value=payload.service.value, ) if error: self.safe_set_attribute( span=service_logging_span, key="error", value=error, ) if event_metadata: for key, value in event_metadata.items(): if isinstance(value, dict): try: value = str(value) except Exception: value = "litllm logging error - could_not_json_serialize" self.safe_set_attribute( span=service_logging_span, key=key, value=value, ) service_logging_span.set_status(Status(StatusCode.ERROR)) service_logging_span.end(end_time=_end_time_ns) async def async_post_call_failure_hook( self, request_data: dict, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth, traceback_str: Optional[str] = None, ): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode parent_otel_span = user_api_key_dict.parent_otel_span if parent_otel_span is not None: parent_otel_span.set_status(Status(StatusCode.ERROR)) # Stamp team attributes onto the SERVER (root) span too, so the # trace root is team-filterable on the failure path like the # child exception span below. self._set_team_attributes_on_span( span=parent_otel_span, team_id=user_api_key_dict.team_id, team_alias=user_api_key_dict.team_alias, ) # Stamp structured error attrs on the SERVER span itself; the # failure path otherwise only sets its status (_handle_failure # records on the litellm_request child span). Inline import: # litellm_logging <-> integrations is circular. from litellm.litellm_core_utils.litellm_logging import ( StandardLoggingPayloadSetup, ) error_information = StandardLoggingPayloadSetup.get_error_information( original_exception=original_exception, traceback_str=traceback_str, ) self._record_exception_on_span( span=parent_otel_span, kwargs={ "exception": original_exception, "standard_logging_object": {"error_information": error_information}, }, ) # _record_exception_on_span only stamps when error_code is set; # bare TypeError etc. has none, and the span is about to be ended. error_code = ( error_information.get("error_code") if error_information else None ) if not error_code: self.set_response_status_code_attribute(parent_otel_span, 500) # Pre-request latency (request_data carries the propagated # metadata on the failure path; omitted if it failed before handoff). self.set_preprocessing_duration_attribute(parent_otel_span, request_data) _span_name = "Failed Proxy Server Request" # Exception Logging Child Span exception_logging_span = self.tracer.start_span( name=_span_name, context=trace.set_span_in_context(parent_otel_span), ) self.safe_set_attribute( span=exception_logging_span, key="exception", value=str(original_exception), ) self._set_team_attributes_on_span( span=exception_logging_span, team_id=user_api_key_dict.team_id, team_alias=user_api_key_dict.team_alias, ) exception_logging_span.set_status(Status(StatusCode.ERROR)) exception_logging_span.end(end_time=self._to_ns(datetime.now())) # Emit guardrail spans for any guardrail invocations that # ran during this request. _handle_failure typically does this, # but for pre-call guardrail blocks the standard_logging_object # may not carry guardrail_information by the time _handle_failure # fires (the data lives only in request_data["metadata"]). Pull # directly from request_data so the span is recorded either way; # _emit_once dedupes if _handle_failure already emitted it. self._emit_guardrail_spans_from_request_data( request_data=request_data, parent_span=parent_otel_span, ) # End Parent OTEL Sspan parent_otel_span.end(end_time=self._to_ns(datetime.now())) def _emit_guardrail_spans_from_request_data( self, request_data: dict, parent_span: Optional[Any], ) -> None: """Emit ``guardrail`` spans from ``request_data["metadata"] ["standard_logging_guardrail_information"]``. Routed through ``_create_guardrail_span`` so the dedupe state in ``_otel_internal`` is honoured — if ``_handle_failure`` already emitted these spans for the same kwargs, this is a no-op. """ from opentelemetry import trace as _trace metadata = (request_data or {}).get("metadata") or {} guardrail_information = metadata.get("standard_logging_guardrail_information") if not guardrail_information: return # _create_guardrail_span reads guardrail_information from # kwargs["standard_logging_object"] and shares its dedupe state via # kwargs["litellm_params"]["metadata"]["_otel_internal"]. Pass the # SAME metadata dict the proxy populated so _handle_failure and # this hook see the same dedupe markers. kwargs: Dict[str, Any] = { "litellm_params": {"metadata": metadata}, "standard_logging_object": { "guardrail_information": guardrail_information, "metadata": metadata, }, } context = ( _trace.set_span_in_context(parent_span) if parent_span is not None else None ) self._create_guardrail_span(kwargs=kwargs, context=context) async def async_post_call_success_hook( self, data: dict, user_api_key_dict: UserAPIKeyAuth, response: LLMResponseTypes, ): from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging litellm_logging_obj = data.get("litellm_logging_obj") if litellm_logging_obj is not None and isinstance( litellm_logging_obj, LiteLLMLogging ): kwargs = litellm_logging_obj.model_call_details parent_span = user_api_key_dict.parent_otel_span ctx, _ = self._get_span_context(kwargs, default_span=parent_span) # Pre-request latency on the SERVER span (success path). self.set_preprocessing_duration_attribute(parent_span, kwargs) # 3. Guardrail span self._create_guardrail_span(kwargs=kwargs, context=ctx) return response ######################################################### # Team/Key Based Logging Control Flow ######################################################### def get_tracer_to_use_for_request(self, kwargs: dict) -> Tracer: """ Get the tracer to use for this request If dynamic headers are present, a temporary tracer is created with the dynamic headers. Otherwise, the default tracer is used. Returns: Tracer: The tracer to use for this request """ dynamic_headers = self._get_dynamic_otel_headers_from_kwargs(kwargs) if dynamic_headers is not None: # Create spans using a temporary tracer with dynamic headers tracer_to_use = self._get_tracer_with_dynamic_headers(dynamic_headers) verbose_logger.debug( "[OTEL DEBUG] Using DYNAMIC tracer with headers: %s", dynamic_headers ) else: # For langfuse_otel without dynamic headers, create a provider with env var credentials if hasattr(self, "callback_name") and self.callback_name == "langfuse_otel": # Use the headers from config (which were set from env vars during init) env_var_headers = ( self._get_headers_dictionary(self.OTEL_HEADERS) if self.OTEL_HEADERS else {} ) if env_var_headers: tracer_to_use = self._get_tracer_with_dynamic_headers( env_var_headers ) verbose_logger.debug( "[OTEL DEBUG] Using env var credentials for langfuse_otel (master key request)" ) else: # No env vars set, use global tracer (will be NoOp) tracer_to_use = self.tracer verbose_logger.debug( "[OTEL DEBUG] No credentials available for langfuse_otel" ) else: tracer_to_use = self.tracer verbose_logger.debug( "[OTEL DEBUG] Using GLOBAL tracer (no dynamic headers)" ) return tracer_to_use def _get_dynamic_otel_headers_from_kwargs(self, kwargs) -> Optional[dict]: """Extract dynamic headers from kwargs if available.""" standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = ( kwargs.get("standard_callback_dynamic_params") ) if not standard_callback_dynamic_params: return None dynamic_headers = self.construct_dynamic_otel_headers( standard_callback_dynamic_params=standard_callback_dynamic_params ) return dynamic_headers if dynamic_headers else None def _get_tracer_with_dynamic_headers(self, dynamic_headers: dict): """Create a temporary tracer with dynamic headers for this request only.""" from opentelemetry.sdk.trace import TracerProvider # Prevents thread exhaustion by reusing providers for the same credential sets (e.g. per-team keys) cache_key = str(sorted(dynamic_headers.items())) if cache_key in self._tracer_provider_cache: return self._tracer_provider_cache[cache_key].get_tracer( LITELLM_TRACER_NAME ) # Create a temporary tracer provider with dynamic headers temp_provider = TracerProvider(resource=self._get_litellm_resource(self.config)) temp_provider.add_span_processor( self._get_span_processor(dynamic_headers=dynamic_headers) ) # Store in cache for reuse self._tracer_provider_cache[cache_key] = temp_provider return temp_provider.get_tracer(LITELLM_TRACER_NAME) def construct_dynamic_otel_headers( self, standard_callback_dynamic_params: StandardCallbackDynamicParams ) -> Optional[dict]: """ Construct dynamic headers from standard callback dynamic params Note: You just need to override this method in Arize, Langfuse Otel if you want to allow team/key based logging. Returns: dict: A dictionary of dynamic headers """ return None ######################################################### # End of Team/Key Based Logging Control Flow ######################################################### def _emit_once(self, kwargs: dict, *scope: object) -> bool: """Return True the first time this handler is asked to emit a span for the given (handler, scope) on this kwargs; False on repeats. Used to suppress duplicate span emission for two distinct patterns: 1. **Handler-level dual-fire**: streaming code paths trigger both the sync and async callback for one request, so ``_handle_success`` / ``_handle_failure`` would otherwise produce two ``litellm_request`` spans. Scope: ``("success",)`` / ``("failure",)``. 2. **Payload-driven multi-entrypoint emission**: a span loop that reads entries from ``standard_logging_payload`` (currently only guardrails) is invoked from multiple lifecycle points (post-call hooks, success callback, failure callback). The list can be re-read with mutated entries between calls, so dedupe must be at entry granularity. Scope: the entry's stable identity. ``scope`` parts can be any hashable identity. The marker is stored in ``kwargs["litellm_params"]["metadata"]["_otel_internal"]`` so it is request-local (kwargs is shared across the sync/async callbacks and lifecycle hooks for one request). """ litellm_params = kwargs.get("litellm_params") if not isinstance(litellm_params, dict): litellm_params = {} kwargs["litellm_params"] = litellm_params _metadata = litellm_params.get("metadata") if not isinstance(_metadata, dict): _metadata = {} litellm_params["metadata"] = _metadata _otel_internal = _metadata.get("_otel_internal") if not isinstance(_otel_internal, dict): _otel_internal = {} _metadata["_otel_internal"] = _otel_internal spans_logged = _otel_internal.get("spans_logged") if not isinstance(spans_logged, dict): spans_logged = {} _otel_internal["spans_logged"] = spans_logged dedupe_key = (self.__class__.__name__, id(self), *scope) if spans_logged.get(dedupe_key) is True: return False spans_logged[dedupe_key] = True return True def _end_proxy_span_from_kwargs(self, kwargs: dict, end_time) -> None: """Close the proxy-level parent span if it is still recording. This helper retrieves the proxy span directly from kwargs metadata and closes it after all child spans have been recorded. Only called from the success path. The failure path deliberately leaves the proxy span open so ``async_post_call_failure_hook`` can append the ``"Failed Proxy Server Request"`` child span before closing it. Only spans named ``LITELLM_PROXY_REQUEST_SPAN_NAME`` are closed — externally provided spans must not be closed by LiteLLM. """ litellm_params = kwargs.get("litellm_params", {}) or {} _metadata = litellm_params.get("metadata", {}) or {} proxy_span = _metadata.get("litellm_parent_otel_span", None) # Fallback: check litellm_metadata (used by /v1/messages and other # LITELLM_METADATA_ROUTES). if proxy_span is None: _litellm_metadata = litellm_params.get("litellm_metadata", {}) or {} proxy_span = _litellm_metadata.get("litellm_parent_otel_span", None) if ( proxy_span is not None and getattr(proxy_span, "name", None) == LITELLM_PROXY_REQUEST_SPAN_NAME and hasattr(proxy_span, "is_recording") and proxy_span.is_recording() ): self._close_proxy_span_ok(proxy_span, end_time) def _close_proxy_span_ok(self, span: Span, end_time) -> None: """Stamp http.response.status_code=200 + status=OK, then end the span.""" from opentelemetry.trace import Status, StatusCode self.set_response_status_code_attribute(span, 200) span.set_status(Status(StatusCode.OK)) span.end(end_time=self._to_ns(end_time)) def _handle_success(self, kwargs, response_obj, start_time, end_time): """Create the litellm_request span then close the proxy span.""" verbose_logger.debug( "OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s", kwargs, self.config, ) # sync + async success handlers can both fire for one # request (notably in streaming code paths). Guard against duplicate # span writes — but still close the proxy span on the skip path so # the trace doesn't leak an open root span. if not self._emit_once(kwargs, "success"): verbose_logger.debug( "OpenTelemetry: skipping duplicate success span for handler=%s", self.__class__.__name__, ) self._end_proxy_span_from_kwargs(kwargs, end_time) return ctx, parent_span = self._get_span_context(kwargs) if self.config.ignore_context_propagation: parent_span = None # Ignore parent spans from other providers ctx = None # Decide whether to create a primary span # Always create if no parent span exists (backward compatibility) # OR if USE_OTEL_LITELLM_REQUEST_SPAN is explicitly enabled should_create_primary_span = parent_span is None or get_secret_bool( "USE_OTEL_LITELLM_REQUEST_SPAN" ) if should_create_primary_span: # Create a new litellm_request span span = self._start_primary_span( kwargs, response_obj, start_time, end_time, ctx ) # Raw-request sub-span (if enabled) - child of litellm_request span self._maybe_log_raw_request( kwargs, response_obj, start_time, end_time, span ) # Do NOT duplicate attributes onto the parent proxy-request span. # The child litellm_request span already carries all attributes; # copying them to the parent doubles storage and complicates # search (Issue #4). else: # Do not create primary span (keep hierarchy shallow when parent exists) from opentelemetry.trace import Status, StatusCode span = None # Only set attributes if the span is still recording (not closed) # Note: parent_span is guaranteed to be not None here if hasattr(parent_span, "set_status"): parent_span.set_status(Status(StatusCode.OK)) self.set_attributes(parent_span, kwargs, response_obj) # Raw-request as direct child of parent_span self._maybe_log_raw_request( kwargs, response_obj, start_time, end_time, parent_span ) # 3. Guardrail span — ensure guardrails are always parented to an # existing span so they never become orphaned root spans (Issue #5). guardrail_ctx = self._resolve_guardrail_context( span=span, parent_span=parent_span, fallback_ctx=ctx ) self._create_guardrail_span(kwargs=kwargs, context=guardrail_ctx) # 4. Metrics & cost recording self._record_metrics(kwargs, response_obj, start_time, end_time) # 5. Semantic logs. if self.config.enable_events: log_span = span if span is not None else parent_span if log_span is not None: self._emit_semantic_logs(kwargs, response_obj, log_span) # 6. Do NOT end parent span - it should be managed by its creator # External spans (from Langfuse, user code, HTTP headers, global context) must not be closed by LiteLLM # However, proxy-created spans should be closed here. if ( parent_span is not None and hasattr(parent_span, "name") and parent_span.name == LITELLM_PROXY_REQUEST_SPAN_NAME and hasattr(parent_span, "is_recording") and parent_span.is_recording() ): self._close_proxy_span_ok(parent_span, end_time) # Stamp team attributes onto the SERVER (root) span before it is # closed, so the trace root carries them like every child span. self._set_team_attributes_on_proxy_span_from_kwargs(kwargs) # close the proxy span explicitly from kwargs metadata # after all child spans (litellm_request, guardrail, raw_request) # have been fully recorded and exported. self._end_proxy_span_from_kwargs(kwargs, end_time) def _start_primary_span( self, kwargs, response_obj, start_time, end_time, context, ): from opentelemetry.trace import Status, StatusCode otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs) span_kwargs: Dict[str, Any] = { "name": self._get_span_name(kwargs), "start_time": self._to_ns(start_time), "context": context, } if self._gen_ai_semconv_latest_experimental: span_kwargs["kind"] = self.span_kind.CLIENT span = otel_tracer.start_span(**span_kwargs) span.set_status(Status(StatusCode.OK)) self.set_attributes(span, kwargs, response_obj) span.end(end_time=self._to_ns(end_time)) return span def _maybe_log_raw_request( self, kwargs, response_obj, start_time, end_time, parent_span ): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode # raw_gen_ai_request is non-standard in semconv mode. if self._gen_ai_semconv_latest_experimental: return if not self._capture_in_span(): return litellm_params = kwargs.get("litellm_params", {}) metadata = litellm_params.get("metadata") or {} generation_name = metadata.get("generation_name") raw_span_name = generation_name if generation_name else RAW_REQUEST_SPAN_NAME otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs) raw_span = otel_tracer.start_span( name=raw_span_name, start_time=self._to_ns(start_time), context=trace.set_span_in_context(parent_span), ) raw_span.set_status(Status(StatusCode.OK)) self.set_raw_request_attributes(raw_span, kwargs, response_obj) self._set_team_attributes_from_kwargs(raw_span, kwargs) raw_span.end(end_time=self._to_ns(end_time)) def _set_team_attributes_on_span( self, span: Span, team_id: Optional[str], team_alias: Optional[str], ) -> None: """Stamp team_id / team_alias onto a span so every child span of a litellm_request trace carries them, not just the root span. Empty strings are treated as absent: a request made with the master key or a team-less virtual key carries ``user_api_key_team_id=""`` in ``standard_logging_object.metadata``; propagating that to every span only adds noise that makes traces look mis-instrumented. """ if team_id: self.safe_set_attribute( span=span, key="metadata.user_api_key_team_id", value=team_id, ) if team_alias: self.safe_set_attribute( span=span, key="metadata.user_api_key_team_alias", value=team_alias, ) def _set_team_attributes_from_kwargs(self, span: Span, kwargs: dict) -> None: """Pull team_id / team_alias from the standard logging metadata in kwargs and stamp them onto span.""" std_log = kwargs.get("standard_logging_object") md: dict = {} if isinstance(std_log, dict): md = std_log.get("metadata") or {} elif std_log is not None: md = getattr(std_log, "metadata", None) or {} self._set_team_attributes_on_span( span=span, team_id=md.get("user_api_key_team_id"), team_alias=md.get("user_api_key_team_alias"), ) def _set_team_attributes_on_proxy_span_from_kwargs(self, kwargs: dict) -> None: """Stamp team attributes onto the proxy SERVER (root) span so the trace root is filterable by team, not just its children. The root span is created in auth before the team is resolved and is otherwise only closed (never re-attributed) on the success path. Guarded to the LiteLLM-created proxy span (by name + recording) so externally provided parent spans are never mutated. """ litellm_params = kwargs.get("litellm_params") or {} metadata = litellm_params.get("metadata") or {} proxy_span = metadata.get("litellm_parent_otel_span") if ( proxy_span is not None and getattr(proxy_span, "name", None) == LITELLM_PROXY_REQUEST_SPAN_NAME and hasattr(proxy_span, "is_recording") and proxy_span.is_recording() ): self._set_team_attributes_from_kwargs(proxy_span, kwargs) def _set_inference_identity_attributes( self, span: Span, standard_logging_payload: StandardLoggingPayload, litellm_params: dict, ) -> None: """Stamp request-identity attributes onto an inference span so every LLM-call span is filterable by the route it came in on, the team's metadata, and both the user-facing (model_group alias) and the dispatched (provider) model names. Empty/absent values are skipped. """ metadata = standard_logging_payload.get("metadata") or {} http_route = metadata.get("user_api_key_request_route") if http_route: self.safe_set_attribute( span=span, key=HTTP_ROUTE_ATTRIBUTE, value=http_route ) # ``user_api_key_team_metadata`` is dropped from the standard logging # payload metadata, so read it from the raw request metadata in kwargs. # ``metadata`` and ``litellm_metadata`` are alternate names for the same # full metadata dict (the name varies by endpoint), so first-truthy wins. raw_metadata = ( litellm_params.get("metadata") or litellm_params.get("litellm_metadata") or {} ) team_metadata = self._team_metadata_json( raw_metadata.get("user_api_key_team_metadata"), self.config.baggage_team_metadata_keys, ) if team_metadata: self.safe_set_attribute( span=span, key=TEAM_METADATA_ATTRIBUTE, value=team_metadata ) model_group = standard_logging_payload.get("model_group") if model_group: self.safe_set_attribute( span=span, key=MODEL_GROUP_ATTRIBUTE, value=model_group ) hidden_params = standard_logging_payload.get("hidden_params") or {} provider_model = hidden_params.get( "litellm_model_name" ) or standard_logging_payload.get("model") if provider_model: self.safe_set_attribute( span=span, key=PROVIDER_MODEL_ATTRIBUTE, value=provider_model ) @staticmethod def _team_metadata_json(value: Any, allowed_keys: List[str]) -> Optional[str]: """JSON-serialize only the allowlisted sub-keys of a team's metadata. Returns ``None`` when nothing is allowlisted or no allowlisted key is present, so the empty case is dropped rather than stamping a useless ``"{}"`` (and so a team's metadata never leaves the process until an operator opts each sub-key in via ``baggage_team_metadata_keys``). """ if not isinstance(value, dict) or not value or not allowed_keys: return None filtered = {key: value[key] for key in allowed_keys if key in value} if not filtered: return None return safe_dumps(filtered) def _ensure_metric_attribute_filter(self) -> None: """Resolve the include/exclude filter once, falling back to the proxy's callback_settings.otel.attributes when no explicit config was passed.""" if self._metric_attr_filter_resolved: return attributes = self.config.attributes if attributes is None and self.callback_name in (None, "otel"): otel_settings = (litellm.callback_settings or {}).get("otel") or {} raw = ( otel_settings.get("attributes") if isinstance(otel_settings, dict) else None ) if raw is not None: attributes = _build_metric_attribute_filter(raw) ( self._metric_attr_include, self._metric_attr_exclude, ) = _resolve_metric_attribute_filter(attributes) self._metric_attr_filter_resolved = True def _filter_metric_attributes(self, attrs: Dict[str, Any]) -> Dict[str, Any]: if not self._metric_attr_filter_resolved: self._ensure_metric_attribute_filter() if self._metric_attr_include is not None: return {k: v for k, v in attrs.items() if k in self._metric_attr_include} if self._metric_attr_exclude is not None: return { k: v for k, v in attrs.items() if k not in self._metric_attr_exclude } return attrs def _record_metrics(self, kwargs, response_obj, start_time, end_time): duration_s = (end_time - start_time).total_seconds() params = kwargs.get("litellm_params") or {} provider = params.get("custom_llm_provider", "Unknown") common_attrs = { "gen_ai.operation.name": ( self._gen_ai_operation_name(kwargs) if self._gen_ai_semconv_latest_experimental else "chat" ), "gen_ai.system": provider, "gen_ai.request.model": kwargs.get("model"), "gen_ai.framework": "litellm", } std_log = kwargs.get("standard_logging_object") md = getattr(std_log, "metadata", None) or (std_log or {}).get("metadata", {}) for key in METRIC_METADATA_KEYS: value = md.get(key) if value is None: continue if isinstance(value, (dict, list)): common_attrs[f"metadata.{key}"] = safe_dumps(value) else: common_attrs[f"metadata.{key}"] = str(value) # get hidden params hidden_params = getattr(std_log, "hidden_params", None) or (std_log or {}).get( "hidden_params", {} ) if hidden_params: common_attrs["hidden_params"] = safe_dumps(hidden_params) common_attrs = self._filter_metric_attributes(common_attrs) if self._operation_duration_histogram: self._operation_duration_histogram.record( duration_s, attributes=common_attrs ) if ( response_obj and (usage := response_obj.get("usage")) and self._token_usage_histogram ): in_attrs = {**common_attrs, TOKEN_TYPE_ATTRIBUTE: "input"} out_attrs = {**common_attrs, TOKEN_TYPE_ATTRIBUTE: "output"} self._token_usage_histogram.record( usage.get("prompt_tokens", 0), attributes=in_attrs ) self._token_usage_histogram.record( usage.get("completion_tokens", 0), attributes=out_attrs ) cost = kwargs.get("response_cost") if self._cost_histogram and cost: self._cost_histogram.record(cost, attributes=common_attrs) # Record latency metrics (TTFT, TPOT, and Total Generation Time) self._record_time_to_first_token_metric(kwargs, common_attrs) self._record_time_per_output_token_metric( kwargs, response_obj, end_time, duration_s, common_attrs ) self._record_response_duration_metric(kwargs, end_time, common_attrs) @staticmethod def _to_timestamp( val: Optional[Union[datetime, float, str]], ) -> Optional[float]: """Convert datetime/float/string to timestamp.""" if val is None: return None if isinstance(val, datetime): return val.timestamp() if isinstance(val, (int, float)): return float(val) # isinstance(val, str) - parse datetime string (with or without microseconds) try: return datetime.strptime(val, "%Y-%m-%d %H:%M:%S.%f").timestamp() except ValueError: try: return datetime.strptime(val, "%Y-%m-%d %H:%M:%S").timestamp() except ValueError: return None def _record_time_to_first_token_metric(self, kwargs: dict, common_attrs: dict): """Record Time to First Token (TTFT) metric for streaming requests.""" optional_params = kwargs.get("optional_params", {}) is_streaming = optional_params.get("stream", False) if not (self._time_to_first_token_histogram and is_streaming): return # Use api_call_start_time for precision (matches Prometheus implementation) # This excludes LiteLLM overhead and measures pure LLM API latency api_call_start_time = kwargs.get("api_call_start_time", None) completion_start_time = kwargs.get("completion_start_time", None) if api_call_start_time is not None and completion_start_time is not None: # Convert to timestamps if needed (handles datetime, float, and string) api_call_start_ts = self._to_timestamp(api_call_start_time) completion_start_ts = self._to_timestamp(completion_start_time) if api_call_start_ts is None or completion_start_ts is None: return # Skip recording if conversion failed time_to_first_token_seconds = completion_start_ts - api_call_start_ts self._time_to_first_token_histogram.record( time_to_first_token_seconds, attributes=common_attrs ) def _record_time_per_output_token_metric( self, kwargs: dict, response_obj: Optional[Any], end_time: datetime, duration_s: float, common_attrs: dict, ): """Record Time Per Output Token (TPOT) metric. Calculated as: generation_time / completion_tokens - For streaming: uses end_time - completion_start_time (time to generate all tokens after first) - For non-streaming: uses end_time - api_call_start_time (total generation time) """ if not self._time_per_output_token_histogram: return # Get completion tokens from response_obj completion_tokens = None if response_obj and (usage := response_obj.get("usage")): completion_tokens = usage.get("completion_tokens") if completion_tokens is None or completion_tokens <= 0: return # Calculate generation time completion_start_time = kwargs.get("completion_start_time", None) api_call_start_time = kwargs.get("api_call_start_time", None) # Convert end_time to timestamp (handles datetime, float, and string) end_time_ts = self._to_timestamp(end_time) if end_time_ts is None: # Fallback to duration_s if conversion failed generation_time_seconds = duration_s if generation_time_seconds > 0: time_per_output_token_seconds = ( generation_time_seconds / completion_tokens ) self._time_per_output_token_histogram.record( time_per_output_token_seconds, attributes=common_attrs ) return if completion_start_time is not None: # Streaming: use completion_start_time (when first token arrived) # This measures time to generate all tokens after the first one completion_start_ts = self._to_timestamp(completion_start_time) if completion_start_ts is None: # Fallback to duration_s if conversion failed generation_time_seconds = duration_s else: generation_time_seconds = end_time_ts - completion_start_ts elif api_call_start_time is not None: # Non-streaming: use api_call_start_time (total generation time) api_call_start_ts = self._to_timestamp(api_call_start_time) if api_call_start_ts is None: # Fallback to duration_s if conversion failed generation_time_seconds = duration_s else: generation_time_seconds = end_time_ts - api_call_start_ts else: # Fallback: use duration_s (already calculated as (end_time - start_time).total_seconds()) generation_time_seconds = duration_s if generation_time_seconds > 0: time_per_output_token_seconds = generation_time_seconds / completion_tokens self._time_per_output_token_histogram.record( time_per_output_token_seconds, attributes=common_attrs ) def _record_response_duration_metric( self, kwargs: dict, end_time: Union[datetime, float], common_attrs: dict, ): """Record Total Generation Time (response duration) metric. Measures pure LLM API generation time: end_time - api_call_start_time This excludes LiteLLM overhead and measures only the LLM provider's response time. Works for both streaming and non-streaming requests. Mirrors Prometheus's litellm_llm_api_latency_metric. Uses kwargs.get("end_time") with fallback to parameter for consistency with Prometheus. """ if not self._response_duration_histogram: return api_call_start_time = kwargs.get("api_call_start_time", None) if api_call_start_time is None: return # Use end_time from kwargs if available (matches Prometheus), otherwise use parameter # For streaming: end_time is when the stream completes (final chunk received) # For non-streaming: end_time is when the response is received _end_time = kwargs.get("end_time") or end_time if _end_time is None: _end_time = datetime.now() # Convert to timestamps if needed (handles datetime, float, and string) api_call_start_ts = self._to_timestamp(api_call_start_time) end_time_ts = self._to_timestamp(_end_time) if api_call_start_ts is None or end_time_ts is None: return # Skip recording if conversion failed response_duration_seconds = end_time_ts - api_call_start_ts if response_duration_seconds > 0: self._response_duration_histogram.record( response_duration_seconds, attributes=common_attrs ) @staticmethod def _otel_log_types(): """Resolve ``(LogRecord, SeverityNumber)`` across OTEL SDK versions. ``LogRecord`` moved out of ``opentelemetry.sdk._logs`` in OTEL >= 1.39.0 (open-telemetry/opentelemetry-python#4676). Imports stay function-local because the SDK is an optional dependency. """ from opentelemetry._logs import SeverityNumber try: from opentelemetry.sdk._logs import LogRecord # OTEL < 1.39.0 except ImportError: from opentelemetry.sdk._logs._internal import ( # OTEL >= 1.39.0 LogRecord, ) return LogRecord, SeverityNumber def _emit_semantic_logs(self, kwargs, response_obj, span: Span): if not self.config.enable_events: return # NOTE: Semantic logs (gen_ai.content.prompt/completion events) have compatibility issues # with OTEL SDK >= 1.39.0 due to breaking changes in PR #4676: # - LogRecord moved from opentelemetry.sdk._logs to opentelemetry.sdk._logs._internal # - LogRecord constructor no longer accepts 'resource' parameter (now inherited from LoggerProvider) # - LogData class was removed entirely # These logs work correctly in OTEL SDK < 1.39.0 but may fail in >= 1.39.0. # See: https://github.com/open-telemetry/opentelemetry-python/pull/4676 # TODO: Refactor to use the proper OTEL Logs API instead of directly creating SDK LogRecords SdkLogRecord, SeverityNumber = self._otel_log_types() # Resolve through the handler's own LoggerProvider (which may be a # private one when skip_set_global=True) rather than the module-level # get_logger() which always goes through the global provider. otel_logger = self._logger_provider.get_logger(LITELLM_LOGGER_NAME) parent_ctx = span.get_span_context() provider = (kwargs.get("litellm_params") or {}).get( "custom_llm_provider", "Unknown" ) if self._gen_ai_semconv_latest_experimental: self._emit_inference_details_event( kwargs=kwargs, response_obj=response_obj, provider=provider, otel_logger=otel_logger, parent_ctx=parent_ctx, ) return # per-message events for msg in kwargs.get("messages", []): role = msg.get("role", "user") attrs = { "event_name": "gen_ai.content.prompt", "gen_ai.system": provider, } if role == "tool" and msg.get("id"): attrs["id"] = msg["id"] capture_event_content = self._capture_in_event() if capture_event_content and msg.get("content"): attrs["gen_ai.prompt"] = msg["content"] body = msg.copy() if not capture_event_content: body.pop("content", None) log_record = SdkLogRecord( timestamp=self._to_ns(datetime.now()), trace_id=parent_ctx.trace_id, span_id=parent_ctx.span_id, trace_flags=parent_ctx.trace_flags, severity_number=SeverityNumber.INFO, severity_text="INFO", body=body, attributes=attrs, ) otel_logger.emit(log_record) # per-choice events for idx, choice in enumerate(response_obj.get("choices", [])): attrs = { "event_name": "gen_ai.content.completion", "gen_ai.system": provider, "index": idx, "finish_reason": choice.get("finish_reason"), } body_msg = choice.get("message", {}) capture_event_content = self._capture_in_event() if capture_event_content and body_msg.get("content"): attrs["message.content"] = body_msg["content"] body = { "index": idx, "finish_reason": choice.get("finish_reason"), "message": {"role": body_msg.get("role", "assistant")}, } if capture_event_content and body_msg.get("content"): body["message"]["content"] = body_msg["content"] log_record = SdkLogRecord( timestamp=self._to_ns(datetime.now()), trace_id=parent_ctx.trace_id, span_id=parent_ctx.span_id, trace_flags=parent_ctx.trace_flags, severity_number=SeverityNumber.INFO, severity_text="INFO", body=body, attributes=attrs, ) otel_logger.emit(log_record) @staticmethod def _resolve_guardrail_context( span: Optional[Any], parent_span: Optional[Any], fallback_ctx: Optional[Any], ) -> Optional[Any]: """ Return a valid OTEL context for guardrail child spans so they are never orphaned (Issue #5). Priority: 1. The litellm_request span that was just created 2. The parent proxy-request span 3. The original fallback context (may be None — last resort) """ from opentelemetry import trace as _trace if span is not None: return _trace.set_span_in_context(span) if parent_span is not None: return _trace.set_span_in_context(parent_span) return fallback_ctx def _create_guardrail_span( self, kwargs: Optional[dict], context: Optional[Context] ): """ Creates a span for Guardrail, if any guardrail information is present in standard_logging_object """ # Create span for guardrail information kwargs = kwargs or {} standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get( "standard_logging_object" ) if standard_logging_payload is None: return guardrail_information_data = standard_logging_payload.get( "guardrail_information" ) if not guardrail_information_data: return guardrail_information_list = [ information for information in guardrail_information_data if isinstance(information, dict) ] if not guardrail_information_list: return otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs) for guardrail_information in guardrail_information_list: start_time_float = guardrail_information.get("start_time") end_time_float = guardrail_information.get("end_time") # ``_create_guardrail_span`` is called from three lifecycle # points (``async_post_call_success_hook``, ``_handle_success``, # ``_handle_failure``) and re-reads the (mutating) entry list # each time. Dedupe at entry granularity so a single real # guardrail invocation produces exactly one span per handler. if not self._emit_once( kwargs, "guardrail", guardrail_information.get("guardrail_name"), start_time_float, guardrail_information.get("guardrail_mode"), ): continue start_time_datetime = datetime.now() if start_time_float is not None: start_time_datetime = datetime.fromtimestamp(start_time_float) end_time_datetime = datetime.now() if end_time_float is not None: end_time_datetime = datetime.fromtimestamp(end_time_float) guardrail_span = otel_tracer.start_span( name="guardrail", start_time=self._to_ns(start_time_datetime), context=context, ) self.safe_set_attribute( span=guardrail_span, key=SpanAttributes.OPENINFERENCE_SPAN_KIND, value=OpenInferenceSpanKindValues.GUARDRAIL.value, ) self.safe_set_attribute( span=guardrail_span, key="guardrail_name", value=guardrail_information.get("guardrail_name"), ) self.safe_set_attribute( span=guardrail_span, key="guardrail_mode", value=guardrail_information.get("guardrail_mode"), ) masked_entity_count = guardrail_information.get("masked_entity_count") if masked_entity_count is not None: guardrail_span.set_attribute( "masked_entity_count", safe_dumps(masked_entity_count) ) guardrail_response = guardrail_information.get("guardrail_response") if guardrail_response is not None: guardrail_span.set_attribute( "guardrail_response", safe_dumps(guardrail_response) ) # Surface guardrail_status (success / guardrail_intervened / # guardrail_failed_to_respond / not_run) as a top-level span # attribute so trace backends can filter on it without parsing # guardrail_response. self.safe_set_attribute( span=guardrail_span, key="guardrail_status", value=guardrail_information.get("guardrail_status"), ) # Provider's raw top-level action (e.g. Bedrock's # ``GUARDRAIL_INTERVENED`` / ``NONE``). Populated by the provider # hook onto StandardLoggingGuardrailInformation so this integration # stays provider-agnostic — we only read a normalised string. guardrail_action = guardrail_information.get("guardrail_action") if guardrail_action: guardrail_span.set_attribute("guardrail_action", guardrail_action) # The provider hook (e.g. Bedrock) extracts violation_categories # from the raw response BEFORE redaction and stamps them onto # StandardLoggingGuardrailInformation. Surfacing them here as a # queryable attribute lets dashboards group by violation category # without parsing the redacted guardrail_response blob. violation_categories = guardrail_information.get("violation_categories") if violation_categories: # OTel sequence attributes must be homogeneous primitives; # serialise to JSON once so set_attribute never coerces. guardrail_span.set_attribute( "guardrail_violation_categories", safe_dumps(violation_categories) ) self._set_team_attributes_from_kwargs(guardrail_span, kwargs) guardrail_span.end(end_time=self._to_ns(end_time_datetime)) def _handle_failure(self, kwargs, response_obj, start_time, end_time): from opentelemetry.trace import Status, StatusCode verbose_logger.debug( "OpenTelemetry Logger: Failure HandlerLogging kwargs: %s, OTEL config settings=%s", kwargs, self.config, ) # sync + async failure handlers can both fire for one # request (notably in streaming code paths), producing two # semantically identical ERROR spans. Unlike the success path, the # proxy span is intentionally left open here so that # ``async_post_call_failure_hook`` can append the # "Failed Proxy Server Request" child span before closing it — # there is no proxy-span side-effect to preserve on the skip path. if not self._emit_once(kwargs, "failure"): verbose_logger.debug( "OpenTelemetry: skipping duplicate failure span for handler=%s", self.__class__.__name__, ) return _parent_context, parent_otel_span = self._get_span_context(kwargs) if self.config.ignore_context_propagation: parent_otel_span = None # Ignore parent spans from other providers _parent_context = None # Decide whether to create a primary span # Always create if no parent span exists (backward compatibility) # OR if USE_OTEL_LITELLM_REQUEST_SPAN is explicitly enabled should_create_primary_span = parent_otel_span is None or get_secret_bool( "USE_OTEL_LITELLM_REQUEST_SPAN" ) span = None if should_create_primary_span: # Span 1: Request sent to litellm SDK otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs) span_kwargs: Dict[str, Any] = { "name": self._get_span_name(kwargs), "start_time": self._to_ns(start_time), "context": _parent_context, } if self._gen_ai_semconv_latest_experimental: span_kwargs["kind"] = self.span_kind.CLIENT span = otel_tracer.start_span(**span_kwargs) span.set_status(Status(StatusCode.ERROR)) self.set_attributes(span, kwargs, response_obj) # Record exception information using OTEL standard method self._record_exception_on_span(span=span, kwargs=kwargs) span.end(end_time=self._to_ns(end_time)) else: # When parent span exists and USE_OTEL_LITELLM_REQUEST_SPAN=false, # record error on parent span (keeps hierarchy shallow) # Only set attributes if the span is still recording (not closed) # Note: parent_otel_span is guaranteed to be not None here if parent_otel_span.is_recording(): parent_otel_span.set_status(Status(StatusCode.ERROR)) self.set_attributes(parent_otel_span, kwargs, response_obj) self._record_exception_on_span(span=parent_otel_span, kwargs=kwargs) # Create span for guardrail information — ensure proper parenting (Issue #5) guardrail_ctx = self._resolve_guardrail_context( span=span, parent_span=parent_otel_span, fallback_ctx=_parent_context ) self._create_guardrail_span(kwargs=kwargs, context=guardrail_ctx) # Do NOT end parent span - it should be managed by its creator # External spans (from Langfuse, user code, HTTP headers, global context) must not be closed by LiteLLM # However, proxy-created spans should be closed here if ( parent_otel_span is not None and hasattr(parent_otel_span, "name") and parent_otel_span.name == LITELLM_PROXY_REQUEST_SPAN_NAME ): parent_otel_span.end(end_time=self._to_ns(end_time)) def _record_exception_on_span(self, span: Span, kwargs: dict): """ Record exception information on the span using OTEL standard methods. This extracts error information from StandardLoggingPayload and: 1. Uses span.record_exception() for the actual exception object (OTEL standard) 2. Sets structured error attributes from StandardLoggingPayloadErrorInformation """ try: from litellm.integrations._types.open_inference import ( ErrorAttributes, ) # Get the exception object if available exception = kwargs.get("exception") # Record the exception using OTEL's standard method if exception is not None: span.record_exception(exception) # Get StandardLoggingPayload for structured error information standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get( "standard_logging_object" ) if standard_logging_payload is None: return # Extract error_information from StandardLoggingPayload error_information = standard_logging_payload.get("error_information") if error_information is None: # Fallback to error_str if error_information is not available error_str = standard_logging_payload.get("error_str") if error_str: self.safe_set_attribute( span=span, key=ErrorAttributes.ERROR_MESSAGE, value=error_str, ) return # Set structured error attributes from StandardLoggingPayloadErrorInformation if error_information.get("error_code"): self.safe_set_attribute( span=span, key=ErrorAttributes.ERROR_CODE, value=error_information["error_code"], ) # Also expose under the OTel-standard name as an int # (error_code is a str, may be non-numeric). _error_code_val = error_information["error_code"] if _error_code_val is not None: try: self.safe_set_attribute( span=span, key=HTTP_RESPONSE_STATUS_CODE_ATTRIBUTE, value=int(_error_code_val), ) except (ValueError, TypeError): pass if error_information.get("error_class"): self.safe_set_attribute( span=span, key=ErrorAttributes.ERROR_TYPE, value=error_information["error_class"], ) if error_information.get("error_message"): self.safe_set_attribute( span=span, key=ErrorAttributes.ERROR_MESSAGE, value=error_information["error_message"], ) if error_information.get("llm_provider"): self.safe_set_attribute( span=span, key=ErrorAttributes.ERROR_LLM_PROVIDER, value=error_information["llm_provider"], ) if error_information.get("traceback"): self.safe_set_attribute( span=span, key=ErrorAttributes.ERROR_STACK_TRACE, value=error_information["traceback"], ) except Exception as e: verbose_logger.exception( "OpenTelemetry: Error recording exception on span: %s", str(e) ) def set_tools_attributes(self, span: Span, tools): import json from litellm.proxy._types import SpanAttributes if not tools: return try: for i, tool in enumerate(tools): function = tool.get("function") if not function: continue prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS.value}.{i}" self.safe_set_attribute( span=span, key=f"{prefix}.name", value=function.get("name"), ) self.safe_set_attribute( span=span, key=f"{prefix}.description", value=function.get("description"), ) self.safe_set_attribute( span=span, key=f"{prefix}.parameters", value=json.dumps(function.get("parameters")), ) except Exception as e: verbose_logger.error( "OpenTelemetry: Error setting tools attributes: %s", str(e) ) pass def cast_as_primitive_value_type(self, value) -> Union[str, bool, int, float]: """ Casts the value to a primitive OTEL type if it is not already a primitive type. OTEL supports - str, bool, int, float If it's not a primitive type, then it's converted to a string """ if value is None: return "" if isinstance(value, (str, bool, int, float)): return value try: return str(value) except Exception: return "" @staticmethod def _tool_calls_kv_pair( tool_calls: List[ChatCompletionMessageToolCall], ) -> Dict[str, Any]: from litellm.proxy._types import SpanAttributes kv_pairs: Dict[str, Any] = {} for idx, tool_call in enumerate(tool_calls): _function = tool_call.get("function") if not _function: continue keys = Function.__annotations__.keys() for key in keys: _value = _function.get(key) if _value: kv_pairs[ f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.function_call.{key}" ] = _value return kv_pairs def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]): try: if self.callback_name == "langtrace": from litellm.integrations.langtrace import LangtraceAttributes LangtraceAttributes().set_langtrace_attributes( span, kwargs, response_obj ) return elif self.callback_name == "langfuse_otel": from litellm.integrations.langfuse.langfuse_otel import ( LangfuseOtelLogger, ) LangfuseOtelLogger.set_langfuse_otel_attributes( span, kwargs, response_obj ) return elif self.callback_name == "weave_otel": from litellm.integrations.weave.weave_otel import ( set_weave_otel_attributes, ) set_weave_otel_attributes(span, kwargs, response_obj) return from litellm.proxy._types import SpanAttributes optional_params = kwargs.get("optional_params", {}) litellm_params = kwargs.get("litellm_params", {}) or {} standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get( "standard_logging_object" ) if standard_logging_payload is None: raise ValueError("standard_logging_object not found in kwargs") # https://github.com/open-telemetry/semantic-conventions/blob/main/model/registry/gen-ai.yaml # Following Conventions here: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md ############################################# ############ LLM CALL METADATA ############## ############################################# metadata = standard_logging_payload["metadata"] for key, value in metadata.items(): self.safe_set_attribute( span=span, key="metadata.{}".format(key), value=value ) # get hidden params hidden_params = getattr( standard_logging_payload, "hidden_params", None ) or (standard_logging_payload or {}).get("hidden_params", {}) if hidden_params: self.safe_set_attribute( span=span, key="hidden_params", value=safe_dumps(hidden_params), ) self._set_inference_identity_attributes( span=span, standard_logging_payload=standard_logging_payload, litellm_params=litellm_params, ) # Cost breakdown tracking cost_breakdown: Optional[CostBreakdown] = standard_logging_payload.get( "cost_breakdown" ) if cost_breakdown: for key, value in cost_breakdown.items(): if value is not None: self.safe_set_attribute( span=span, key=f"gen_ai.cost.{key}", value=value, ) ############################################# ########## LLM Request Attributes ########### ############################################# # The name of the LLM a request is being made to if kwargs.get("model"): self.safe_set_attribute( span=span, key=SpanAttributes.LLM_REQUEST_MODEL.value, value=kwargs.get("model"), ) # The LLM request type self.safe_set_attribute( span=span, key=SpanAttributes.LLM_REQUEST_TYPE.value, value=standard_logging_payload["call_type"], ) # The Generative AI Provider: Azure, OpenAI, etc. provider_name = litellm_params.get("custom_llm_provider", "Unknown") # Latest-experimental semconv replaced gen_ai.system with # gen_ai.provider.name; emit only the conformant key in that mode. if self._gen_ai_semconv_latest_experimental: self.safe_set_attribute( span=span, key="gen_ai.provider.name", value=provider_name, ) else: self.safe_set_attribute( span=span, key=SpanAttributes.LLM_SYSTEM.value, value=provider_name, ) # The maximum number of tokens the LLM generates for a request. if optional_params.get("max_tokens"): self.safe_set_attribute( span=span, key=SpanAttributes.LLM_REQUEST_MAX_TOKENS.value, value=optional_params.get("max_tokens"), ) # The temperature setting for the LLM request. if optional_params.get("temperature"): self.safe_set_attribute( span=span, key=SpanAttributes.LLM_REQUEST_TEMPERATURE.value, value=optional_params.get("temperature"), ) # The top_p sampling setting for the LLM request. if optional_params.get("top_p"): self.safe_set_attribute( span=span, key=SpanAttributes.LLM_REQUEST_TOP_P.value, value=optional_params.get("top_p"), ) if self._gen_ai_semconv_latest_experimental: # Semconv emits gen_ai.request.stream (only when streaming) via # _set_semconv_request_attributes; skip the legacy llm.is_streaming. self._set_semconv_request_attributes(span, optional_params) self._set_semconv_cache_token_attributes(span, standard_logging_payload) else: self.safe_set_attribute( span=span, key=SpanAttributes.LLM_IS_STREAMING.value, value=str(optional_params.get("stream", False)), ) if optional_params.get("user"): self.safe_set_attribute( span=span, key=SpanAttributes.LLM_USER.value, value=optional_params.get("user"), ) # The unique identifier for the LLM call. # Completions have a provider response ID (e.g. "chatcmpl-xxx"), # but Embeddings and Image-gen responses do not. Fall back to # the litellm call ID so every call type can be correlated # across LiteLLM UI, Phoenix traces, and provider logs (Issue #8). response_id = ( response_obj.get("id") if response_obj else None ) or standard_logging_payload.get("id") if response_id: self.safe_set_attribute( span=span, key="gen_ai.response.id", value=response_id, ) litellm_call_id = standard_logging_payload.get("litellm_call_id") if litellm_call_id: self.safe_set_attribute( span=span, key="litellm.call_id", value=litellm_call_id, ) # The model used to generate the response. if response_obj and response_obj.get("model"): self.safe_set_attribute( span=span, key=SpanAttributes.LLM_RESPONSE_MODEL.value, value=response_obj.get("model"), ) usage = response_obj and response_obj.get("usage") if usage: self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS.value, value=usage.get("total_tokens"), ) # The number of tokens used in the LLM response (completion). self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_USAGE_OUTPUT_TOKENS.value, value=usage.get("completion_tokens"), ) # The number of tokens used in the LLM prompt. self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_USAGE_INPUT_TOKENS.value, value=usage.get("prompt_tokens"), ) ######################################################################## ########## LLM Request Medssages / tools / content Attributes ########### ######################################################################### if not self._capture_in_span(): return if optional_params.get("tools"): tools = optional_params["tools"] self.set_tools_attributes(span, tools) if kwargs.get("messages"): transformed_messages = ( self._transform_messages_to_otel_semantic_conventions( kwargs.get("messages") ) ) self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_INPUT_MESSAGES.value, value=safe_dumps(transformed_messages), ) # Coalesce the different kwarg names that carry the system # prompt depending on the call path: # - "system_instructions" — Vertex AI Gemini chat-completion # - "instructions" — OpenAI Responses API # - "system" — Anthropic Messages API # Use `is not None` rather than truthiness to avoid falsy # values (e.g. []) falling through to the wrong kwarg. system_instructions = ( kwargs.get("system_instructions") if kwargs.get("system_instructions") is not None else ( kwargs.get("instructions") if kwargs.get("instructions") is not None else kwargs.get("system") ) ) if system_instructions: if isinstance(system_instructions, str): # Plain text system prompt — no transformation needed self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_SYSTEM_INSTRUCTIONS.value, value=system_instructions, ) else: transformed_system_instructions = ( self._transform_messages_to_otel_semantic_conventions( system_instructions ) ) self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_SYSTEM_INSTRUCTIONS.value, value=safe_dumps(transformed_system_instructions), ) if self._gen_ai_semconv_latest_experimental: operation_name = self._gen_ai_operation_name(kwargs) else: operation_name = ( "chat" if standard_logging_payload.get("call_type") == "completion" else standard_logging_payload.get("call_type") or "chat" ) self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_OPERATION_NAME.value, value=operation_name, ) if standard_logging_payload.get("request_id"): self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_REQUEST_ID.value, value=standard_logging_payload.get("request_id"), ) ############################################# ########## LLM Response Attributes ########## ############################################# if response_obj is not None: if response_obj.get("choices"): transformed_choices = ( self._transform_choices_to_otel_semantic_conventions( response_obj.get("choices") ) ) self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_OUTPUT_MESSAGES.value, value=safe_dumps(transformed_choices), ) finish_reasons = [] for idx, choice in enumerate(response_obj.get("choices")): if choice.get("finish_reason"): finish_reasons.append(choice.get("finish_reason")) if finish_reasons: self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_RESPONSE_FINISH_REASONS.value, value=safe_dumps(finish_reasons), ) for idx, choice in enumerate(response_obj.get("choices")): if choice.get("finish_reason"): message = choice.get("message") tool_calls = message.get("tool_calls") if tool_calls: kv_pairs = OpenTelemetry._tool_calls_kv_pair(tool_calls) # type: ignore for key, value in kv_pairs.items(): self.safe_set_attribute( span=span, key=key, value=value, ) elif response_obj.get("output"): # Responses API: ResponsesAPIResponse has an "output" # list instead of "choices". Each item with # type="message" contains a "content" list of # OutputText objects (type="output_text"). output_items = response_obj.get("output") output_messages = self._transform_responses_api_output_to_otel( output_items ) if output_messages: self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_OUTPUT_MESSAGES.value, value=safe_dumps(output_messages), ) # Emit per-tool-call span attributes (parity with # the choices branch that calls _tool_calls_kv_pair). # Convert Responses API function_call items to the # ChatCompletionMessageToolCall format expected by # _tool_calls_kv_pair. tool_calls = [] for out_item in output_items: item_d = self._to_dict(out_item) if item_d and item_d.get("type") == "function_call": tool_calls.append( { "function": { "name": item_d.get("name", ""), "arguments": item_d.get("arguments", ""), } } ) if tool_calls: kv_pairs = OpenTelemetry._tool_calls_kv_pair(tool_calls) # type: ignore for key, value in kv_pairs.items(): self.safe_set_attribute( span=span, key=key, value=value, ) # Extract finish reason from ResponsesAPIResponse.status status = response_obj.get("status") if status: self.safe_set_attribute( span=span, key=SpanAttributes.GEN_AI_RESPONSE_FINISH_REASONS.value, value=safe_dumps([status]), ) except Exception as e: self.handle_callback_failure( callback_name=self.callback_name or "opentelemetry" ) verbose_logger.exception( "OpenTelemetry logging error in set_attributes %s", str(e) ) def _cast_as_primitive_value_type(self, value) -> Union[str, bool, int, float]: """ Casts the value to a primitive OTEL type if it is not already a primitive type. OTEL supports - str, bool, int, float If it's not a primitive type, then it's converted to a string """ if value is None: return "" if isinstance(value, (str, bool, int, float)): return value try: return str(value) except Exception: return "" def safe_set_attribute(self, span: Span, key: str, value: Any): """ Safely sets an attribute on the span, ensuring the value is a primitive type. """ primitive_value = self._cast_as_primitive_value_type(value) span.set_attribute(key, primitive_value) def _transform_messages_to_otel_semantic_conventions( self, messages: Union[List[dict], str] ) -> List[dict]: """ Transforms LiteLLM/OpenAI style messages into OTEL GenAI 1.38 compliant format. OTEL expects a 'parts' array instead of a single 'content' string. """ if isinstance(messages, str): # Handle system_instructions passed as a string return [ { "role": "system", "parts": [{"type": "text", "content": messages}], } ] transformed = [] for msg in messages: role = msg.get("role", "user") content = msg.get("content", "") parts = [] if isinstance(content, str): parts.append({"type": "text", "content": content}) elif isinstance(content, list): # Handle multi-modal content if necessary for part in content: if isinstance(part, dict): parts.append(part) else: parts.append({"type": "text", "content": str(part)}) transformed_msg = {"role": role, "parts": parts} if "id" in msg: transformed_msg["id"] = msg["id"] if "tool_calls" in msg: transformed_msg["tool_calls"] = msg["tool_calls"] if "tool_call_id" in msg: transformed_msg["tool_call_id"] = msg["tool_call_id"] transformed.append(transformed_msg) return transformed def _transform_choices_to_otel_semantic_conventions( self, choices: List[dict] ) -> List[dict]: """ Transforms choices into OTEL GenAI 1.38 compliant format for output.messages. """ transformed = [] for choice in choices: message = choice.get("message") or {} finish_reason = choice.get("finish_reason") transformed_msg = self._transform_messages_to_otel_semantic_conventions( [message] )[0] if finish_reason: transformed_msg["finish_reason"] = finish_reason transformed.append(transformed_msg) return transformed @staticmethod def _to_dict(obj) -> Optional[dict]: """Normalize an object to a plain dict. Handles three forms that appear in practice: 1. Plain ``dict`` — returned as-is. 2. LiteLLM's ``BaseLiteLLMOpenAIResponseObject`` — exposes a ``.get()`` method that delegates to ``__dict__``. 3. Raw Pydantic v2 models from the ``openai`` SDK (e.g. ``ResponseOutputMessage``, ``ResponseOutputText``) — these do **not** have ``.get()`` but do have ``.model_dump()``. Returns ``None`` for anything else so callers can skip it. """ if isinstance(obj, dict): return obj if hasattr(obj, "get"): # BaseLiteLLMOpenAIResponseObject duck-type return obj # type: ignore[return-value] if hasattr(obj, "model_dump"): # Raw Pydantic v2 model (e.g. openai SDK types) return obj.model_dump() # type: ignore[union-attr] return None def _transform_responses_api_output_to_otel(self, output: List) -> List[dict]: """ Transform Responses API output items into OTEL GenAI 1.38 format. The Responses API returns output as a list of items, each with a ``type`` field. Message items (``type="message"``) contain a ``content`` list of ``OutputText`` objects with ``type="output_text"`` and ``text`` fields. Items may be plain dicts, LiteLLM wrapper objects (with ``.get()``), or raw Pydantic v2 models from the ``openai`` SDK (with ``.model_dump()``). We normalize each item to a dict via ``_to_dict`` before processing. This method converts them to the same ``{"role": ..., "parts": [...]}`` format used by ``_transform_choices_to_otel_semantic_conventions``. """ transformed = [] for raw_item in output: item = self._to_dict(raw_item) if item is None: continue if item.get("type") == "message": role = item.get("role", "assistant") parts = [] for raw_content in item.get("content", []): content = self._to_dict(raw_content) if content is None: continue if content.get("type") == "output_text": text = content.get("text", "") if text: parts.append({"type": "text", "content": text}) if parts: transformed.append({"role": role, "parts": parts}) elif item.get("type") == "function_call": # Surface tool calls from Responses API output part: dict = { "type": "tool_call", "name": item.get("name", ""), "arguments": item.get("arguments", ""), } if item.get("call_id"): part["id"] = item["call_id"] transformed.append({"role": "assistant", "parts": [part]}) return transformed def set_raw_request_attributes(self, span: Span, kwargs, response_obj): try: # Only set provider-specific raw payload attributes on this span. # The parent litellm_request span already carries the standard # gen_ai.* / metadata.* attributes — duplicating them here doubles # storage and adds noise (Issue #3). litellm_params = kwargs.get("litellm_params", {}) or {} custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown") _raw_response = kwargs.get("original_response") _additional_args = kwargs.get("additional_args", {}) or {} complete_input_dict = _additional_args.get("complete_input_dict") ############################################# ########## LLM Request Attributes ########### ############################################# # OTEL Attributes for the RAW Request to https://docs.anthropic.com/en/api/messages if complete_input_dict and isinstance(complete_input_dict, dict): for param, val in complete_input_dict.items(): self.safe_set_attribute( span=span, key=f"llm.{custom_llm_provider}.{param}", value=val, ) ############################################# ########## LLM Response Attributes ########## ############################################# if _raw_response and isinstance(_raw_response, str): # cast sr -> dict import json try: _raw_response = json.loads(_raw_response) for param, val in _raw_response.items(): self.safe_set_attribute( span=span, key=f"llm.{custom_llm_provider}.{param}", value=val, ) except json.JSONDecodeError: verbose_logger.debug( "litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format( _raw_response ) ) self.safe_set_attribute( span=span, key=f"llm.{custom_llm_provider}.stringified_raw_response", value=_raw_response, ) except Exception as e: verbose_logger.exception( "OpenTelemetry logging error in set_raw_request_attributes %s", str(e), ) def _to_ns(self, dt): if dt is None: return int(datetime.now().timestamp() * 1e9) if isinstance(dt, (int, float)): return int(dt * 1e9) return int(dt.timestamp() * 1e9) def _get_span_name(self, kwargs): litellm_params = kwargs.get("litellm_params", {}) metadata = litellm_params.get("metadata") or {} generation_name = metadata.get("generation_name") if generation_name: return generation_name if self._gen_ai_semconv_latest_experimental: model = kwargs.get("model") or "unknown" return f"{self._gen_ai_operation_name(kwargs)} {model}" return LITELLM_REQUEST_SPAN_NAME def get_traceparent_from_header(self, headers): if headers is None: return None _traceparent = headers.get("traceparent", None) if _traceparent is None: return None from opentelemetry.trace.propagation.tracecontext import ( TraceContextTextMapPropagator, ) propagator = TraceContextTextMapPropagator() carrier = {"traceparent": _traceparent} _parent_context = propagator.extract(carrier=carrier) return _parent_context def _get_span_context(self, kwargs, default_span: Optional[Span] = None): from opentelemetry import context, trace from opentelemetry.trace.propagation.tracecontext import ( TraceContextTextMapPropagator, ) litellm_params = kwargs.get("litellm_params", {}) or {} proxy_server_request = litellm_params.get("proxy_server_request", {}) or {} headers = proxy_server_request.get("headers", {}) or {} traceparent = headers.get("traceparent", None) _metadata = litellm_params.get("metadata", {}) or {} parent_otel_span = _metadata.get("litellm_parent_otel_span", None) # Fallback: check litellm_metadata (used by /v1/messages and other # LITELLM_METADATA_ROUTES that store proxy-internal metadata # separately from the provider's native "metadata" field). if parent_otel_span is None: _litellm_metadata = litellm_params.get("litellm_metadata", {}) or {} parent_otel_span = _litellm_metadata.get("litellm_parent_otel_span", None) # Priority 1: Explicit parent span from metadata if parent_otel_span is not None: verbose_logger.debug( "OpenTelemetry: Using explicit parent span from metadata" ) return trace.set_span_in_context(parent_otel_span), None # Priority 2: HTTP traceparent header if traceparent is not None: verbose_logger.debug( "OpenTelemetry: Using traceparent header for context propagation" ) carrier = {"traceparent": traceparent} return ( TraceContextTextMapPropagator().extract(carrier=carrier), None, ) # Priority 3: Active span from global context (auto-detection) try: current_span = trace.get_current_span() if current_span is not None: span_context = current_span.get_span_context() if span_context.is_valid: verbose_logger.debug( "OpenTelemetry: Using active span from global context: %s (trace_id=%s, span_id=%s, is_recording=%s)", current_span, format(span_context.trace_id, "032x"), format(span_context.span_id, "016x"), current_span.is_recording(), ) return context.get_current(), current_span except Exception as e: verbose_logger.debug( "OpenTelemetry: Error getting current span: %s", str(e) ) # Priority 4: No parent context verbose_logger.debug( "OpenTelemetry: No parent context found, creating root span" ) return None, None def _get_span_processor(self, dynamic_headers: Optional[dict] = None): from opentelemetry.sdk.trace.export import ( BatchSpanProcessor, ConsoleSpanExporter, SimpleSpanProcessor, SpanExporter, ) verbose_logger.debug( "OpenTelemetry Logger, initializing span processor \nself.OTEL_EXPORTER: %s\nself.OTEL_ENDPOINT: %s\nself.OTEL_HEADERS: %s", self.OTEL_EXPORTER, self.OTEL_ENDPOINT, self.OTEL_HEADERS, ) _split_otel_headers = OpenTelemetry._get_headers_dictionary( headers=dynamic_headers or self.OTEL_HEADERS ) if dynamic_headers: verbose_logger.debug( "[OTEL DEBUG] Creating span processor with DYNAMIC headers: %s", { k: v[:20] + "..." if len(str(v)) > 20 else v for k, v in _split_otel_headers.items() }, ) else: verbose_logger.debug( "[OTEL DEBUG] Creating span processor with GLOBAL headers" ) if hasattr( self.OTEL_EXPORTER, "export" ): # Check if it has the export method that SpanExporter requires verbose_logger.debug( "OpenTelemetry: intiializing SpanExporter. Value of OTEL_EXPORTER: %s", self.OTEL_EXPORTER, ) return SimpleSpanProcessor(cast(SpanExporter, self.OTEL_EXPORTER)) if self.OTEL_EXPORTER == "console": verbose_logger.debug( "OpenTelemetry: intiializing console exporter. Value of OTEL_EXPORTER: %s", self.OTEL_EXPORTER, ) return BatchSpanProcessor(ConsoleSpanExporter()) elif ( self.OTEL_EXPORTER == "otlp_http" or self.OTEL_EXPORTER == "http/protobuf" or self.OTEL_EXPORTER == "http/json" ): try: from opentelemetry.exporter.otlp.proto.http.trace_exporter import ( OTLPSpanExporter as OTLPSpanExporterHTTP, ) except ImportError as exc: raise ImportError( "OpenTelemetry OTLP HTTP exporter is not available. Install " "`opentelemetry-exporter-otlp` to enable OTLP HTTP." ) from exc verbose_logger.debug( "OpenTelemetry: intiializing http exporter. Value of OTEL_EXPORTER: %s", self.OTEL_EXPORTER, ) normalized_endpoint = self._normalize_otel_endpoint( self.OTEL_ENDPOINT, "traces" ) return BatchSpanProcessor( OTLPSpanExporterHTTP( endpoint=normalized_endpoint, headers=_split_otel_headers ), ) elif self.OTEL_EXPORTER == "otlp_grpc" or self.OTEL_EXPORTER == "grpc": try: from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( OTLPSpanExporter as OTLPSpanExporterGRPC, ) except ImportError as exc: raise ImportError( "OpenTelemetry OTLP gRPC exporter is not available. Install " "`opentelemetry-exporter-otlp` and `grpcio` (or `litellm[grpc]`)." ) from exc verbose_logger.debug( "OpenTelemetry: intiializing grpc exporter. Value of OTEL_EXPORTER: %s", self.OTEL_EXPORTER, ) normalized_endpoint = self._normalize_otel_endpoint( self.OTEL_ENDPOINT, "traces" ) return BatchSpanProcessor( OTLPSpanExporterGRPC( endpoint=normalized_endpoint, headers=_split_otel_headers ), ) else: verbose_logger.debug( "OpenTelemetry: intiializing console exporter. Value of OTEL_EXPORTER: %s", self.OTEL_EXPORTER, ) return BatchSpanProcessor(ConsoleSpanExporter()) def _get_log_exporter(self): """ Get the appropriate log exporter based on the configuration. """ verbose_logger.debug( "OpenTelemetry Logger, initializing log exporter \nself.OTEL_EXPORTER: %s\nself.OTEL_ENDPOINT: %s\nself.OTEL_HEADERS: %s", self.OTEL_EXPORTER, self.OTEL_ENDPOINT, self.OTEL_HEADERS, ) _split_otel_headers = OpenTelemetry._get_headers_dictionary(self.OTEL_HEADERS) # Normalize endpoint for logs - ensure it points to /v1/logs instead of /v1/traces normalized_endpoint = self._normalize_otel_endpoint(self.OTEL_ENDPOINT, "logs") verbose_logger.debug( "OpenTelemetry: Log endpoint normalized from %s to %s", self.OTEL_ENDPOINT, normalized_endpoint, ) if hasattr(self.OTEL_EXPORTER, "export"): # Custom exporter provided verbose_logger.debug( "OpenTelemetry: Using custom log exporter. Value of OTEL_EXPORTER: %s", self.OTEL_EXPORTER, ) return self.OTEL_EXPORTER otel_logs_exporter = os.getenv("OTEL_LOGS_EXPORTER") if self.OTEL_EXPORTER == "console" or otel_logs_exporter == "console": from opentelemetry.sdk._logs.export import ConsoleLogExporter verbose_logger.debug( "OpenTelemetry: Using console log exporter. Value of OTEL_EXPORTER: %s", self.OTEL_EXPORTER, ) return ConsoleLogExporter() elif ( self.OTEL_EXPORTER == "otlp_http" or self.OTEL_EXPORTER == "http/protobuf" or self.OTEL_EXPORTER == "http/json" ): from opentelemetry.exporter.otlp.proto.http._log_exporter import ( OTLPLogExporter, ) verbose_logger.debug( "OpenTelemetry: Using HTTP log exporter. Value of OTEL_EXPORTER: %s, endpoint: %s", self.OTEL_EXPORTER, normalized_endpoint, ) return OTLPLogExporter( endpoint=normalized_endpoint, headers=_split_otel_headers ) elif self.OTEL_EXPORTER == "otlp_grpc" or self.OTEL_EXPORTER == "grpc": try: from opentelemetry.exporter.otlp.proto.grpc._log_exporter import ( OTLPLogExporter, ) except ImportError as exc: raise ImportError( "OpenTelemetry OTLP gRPC log exporter is not available. Install " "`opentelemetry-exporter-otlp` and `grpcio` (or `litellm[grpc]`)." ) from exc verbose_logger.debug( "OpenTelemetry: Using gRPC log exporter. Value of OTEL_EXPORTER: %s, endpoint: %s", self.OTEL_EXPORTER, normalized_endpoint, ) return OTLPLogExporter( endpoint=normalized_endpoint, headers=_split_otel_headers ) else: verbose_logger.warning( "OpenTelemetry: Unknown log exporter '%s', defaulting to console. Supported: console, otlp_http, otlp_grpc", self.OTEL_EXPORTER, ) from opentelemetry.sdk._logs.export import ConsoleLogExporter return ConsoleLogExporter() def _get_metric_reader(self): """ Get the appropriate metric reader based on the configuration. """ from opentelemetry.sdk.metrics import Histogram from opentelemetry.sdk.metrics.export import ( AggregationTemporality, ConsoleMetricExporter, PeriodicExportingMetricReader, ) verbose_logger.debug( "OpenTelemetry Logger, initializing metric reader\nself.OTEL_EXPORTER: %s\nself.OTEL_ENDPOINT: %s\nself.OTEL_HEADERS: %s", self.OTEL_EXPORTER, self.OTEL_ENDPOINT, self.OTEL_HEADERS, ) _split_otel_headers = OpenTelemetry._get_headers_dictionary(self.OTEL_HEADERS) normalized_endpoint = self._normalize_otel_endpoint( self.OTEL_ENDPOINT, "metrics" ) if self.OTEL_EXPORTER == "console": exporter = ConsoleMetricExporter() return PeriodicExportingMetricReader(exporter, export_interval_millis=5000) elif ( self.OTEL_EXPORTER == "otlp_http" or self.OTEL_EXPORTER == "http/protobuf" or self.OTEL_EXPORTER == "http/json" ): from opentelemetry.exporter.otlp.proto.http.metric_exporter import ( OTLPMetricExporter, ) exporter = OTLPMetricExporter( endpoint=normalized_endpoint, headers=_split_otel_headers, preferred_temporality={Histogram: AggregationTemporality.DELTA}, ) return PeriodicExportingMetricReader(exporter, export_interval_millis=5000) elif self.OTEL_EXPORTER == "otlp_grpc" or self.OTEL_EXPORTER == "grpc": try: from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( OTLPMetricExporter, ) except ImportError as exc: raise ImportError( "OpenTelemetry OTLP gRPC metric exporter is not available. Install " "`opentelemetry-exporter-otlp` and `grpcio` (or `litellm[grpc]`)." ) from exc exporter = OTLPMetricExporter( endpoint=normalized_endpoint, headers=_split_otel_headers, preferred_temporality={Histogram: AggregationTemporality.DELTA}, ) return PeriodicExportingMetricReader(exporter, export_interval_millis=5000) else: verbose_logger.warning( "OpenTelemetry: Unknown metric exporter '%s', defaulting to console. Supported: console, otlp_http, otlp_grpc", self.OTEL_EXPORTER, ) exporter = ConsoleMetricExporter() return PeriodicExportingMetricReader(exporter, export_interval_millis=5000) def _normalize_otel_endpoint( self, endpoint: Optional[str], signal_type: str ) -> Optional[str]: """ Normalize the endpoint URL for a specific OpenTelemetry signal type. The OTLP exporters expect endpoints to use signal-specific paths: - traces: /v1/traces - metrics: /v1/metrics - logs: /v1/logs This method ensures the endpoint has the correct path for the given signal type. Args: endpoint: The endpoint URL to normalize signal_type: The telemetry signal type ('traces', 'metrics', or 'logs') Returns: Normalized endpoint URL with the correct signal path Examples: _normalize_otel_endpoint("http://collector:4318/v1/traces", "logs") -> "http://collector:4318/v1/logs" _normalize_otel_endpoint("http://collector:4318", "traces") -> "http://collector:4318/v1/traces" _normalize_otel_endpoint("http://collector:4318/v1/logs", "metrics") -> "http://collector:4318/v1/metrics" """ if not endpoint: return endpoint # Validate signal_type valid_signals = {"traces", "metrics", "logs"} if signal_type not in valid_signals: verbose_logger.warning( "Invalid signal_type '%s' provided to _normalize_otel_endpoint. " "Valid values: %s. Returning endpoint unchanged.", signal_type, valid_signals, ) return endpoint # Remove trailing slash endpoint = endpoint.rstrip("/") # Splunk Observability Cloud OTLP/HTTP uses /v2/trace/otlp (not /v1/traces). Do not rewrite. if signal_type == "traces" and "/v2/trace/otlp" in endpoint: return endpoint # Check if endpoint already ends with the correct signal path target_path = f"/v1/{signal_type}" if endpoint.endswith(target_path): return endpoint # Replace existing signal path with the target signal path other_signals = valid_signals - {signal_type} for other_signal in other_signals: other_path = f"/v1/{other_signal}" if endpoint.endswith(other_path): endpoint = endpoint.rsplit("/", 1)[0] + f"/{signal_type}" return endpoint # No existing signal path found, append the target path if not endpoint.endswith("/v1"): endpoint = endpoint + target_path else: endpoint = endpoint + f"/{signal_type}" return endpoint @staticmethod def _get_headers_dictionary( headers: Optional[Union[str, dict]], ) -> Dict[str, str]: """ Convert a string or dictionary of headers into a dictionary of headers. """ _split_otel_headers: Dict[str, str] = {} if headers: if isinstance(headers, str): # when passed HEADERS="x-honeycomb-team=B85YgLm96******" # Split only on first '=' occurrence parts = headers.split(",") for part in parts: key, value = part.split("=", 1) _split_otel_headers[key] = value elif isinstance(headers, dict): _split_otel_headers = headers return _split_otel_headers async def async_management_endpoint_success_hook( self, logging_payload: ManagementEndpointLoggingPayload, parent_otel_span: Optional[Span] = None, ): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode _start_time_ns = 0 _end_time_ns = 0 start_time = logging_payload.start_time end_time = logging_payload.end_time if isinstance(start_time, float): _start_time_ns = int(start_time * 1e9) else: _start_time_ns = self._to_ns(start_time) if isinstance(end_time, float): _end_time_ns = int(end_time * 1e9) else: _end_time_ns = self._to_ns(end_time) if parent_otel_span is not None: _span_name = logging_payload.route management_endpoint_span = self.tracer.start_span( name=_span_name, context=trace.set_span_in_context(parent_otel_span), start_time=_start_time_ns, ) _request_data = logging_payload.request_data if _request_data is not None: for key, value in _request_data.items(): self.safe_set_attribute( span=management_endpoint_span, key=f"request.{key}", value=value, ) _response = logging_payload.response if _response is not None: for key, value in _response.items(): self.safe_set_attribute( span=management_endpoint_span, key=f"response.{key}", value=value, ) management_endpoint_span.set_status(Status(StatusCode.OK)) management_endpoint_span.end(end_time=_end_time_ns) # The management wrapper has no other hook that closes the SERVER span. self.set_response_status_code_attribute(parent_otel_span, 200) parent_otel_span.set_status(Status(StatusCode.OK)) parent_otel_span.end(end_time=_end_time_ns) async def async_management_endpoint_failure_hook( self, logging_payload: ManagementEndpointLoggingPayload, parent_otel_span: Optional[Span] = None, ): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode _start_time_ns = 0 _end_time_ns = 0 start_time = logging_payload.start_time end_time = logging_payload.end_time if isinstance(start_time, float): _start_time_ns = int(int(start_time) * 1e9) else: _start_time_ns = self._to_ns(start_time) if isinstance(end_time, float): _end_time_ns = int(int(end_time) * 1e9) else: _end_time_ns = self._to_ns(end_time) if parent_otel_span is not None: _span_name = logging_payload.route management_endpoint_span = self.tracer.start_span( name=_span_name, context=trace.set_span_in_context(parent_otel_span), start_time=_start_time_ns, ) _request_data = logging_payload.request_data if _request_data is not None: for key, value in _request_data.items(): self.safe_set_attribute( span=management_endpoint_span, key=f"request.{key}", value=value, ) _exception = logging_payload.exception self.safe_set_attribute( span=management_endpoint_span, key="exception", value=str(_exception), ) management_endpoint_span.set_status(Status(StatusCode.ERROR)) management_endpoint_span.end(end_time=_end_time_ns) # The management wrapper has no other hook that closes the SERVER span. from litellm.litellm_core_utils.litellm_logging import ( StandardLoggingPayloadSetup, ) error_information = StandardLoggingPayloadSetup.get_error_information( original_exception=_exception, ) parent_otel_span.set_status(Status(StatusCode.ERROR)) self._record_exception_on_span( span=parent_otel_span, kwargs={ "exception": _exception, "standard_logging_object": {"error_information": error_information}, }, ) parent_otel_span.end(end_time=_end_time_ns) def create_litellm_proxy_request_started_span( self, start_time: datetime, headers: dict, ) -> Optional[Span]: """ Create a span for the received proxy server request. """ return self.tracer.start_span( name=LITELLM_PROXY_REQUEST_SPAN_NAME, start_time=self._to_ns(start_time), context=self.get_traceparent_from_header(headers=headers), kind=self.span_kind.SERVER, ) def set_proxy_request_route_attributes( self, span: Optional[Span], *, url_path: Optional[str] = None, http_route: Optional[str] = None, ) -> None: """ Set OTel-standard ``http.route`` / ``url.path`` on the proxy SERVER span. Called from the auth path, the only point where both the SERVER span and the request are in hand. No-op if span/value missing. """ if span is None: return if url_path: self.safe_set_attribute(span=span, key=URL_PATH_ATTRIBUTE, value=url_path) if http_route: self.safe_set_attribute( span=span, key=HTTP_ROUTE_ATTRIBUTE, value=http_route ) def set_response_status_code_attribute( self, span: Optional[Span], status_code: Optional[int] ) -> None: """ Set OTel-standard ``http.response.status_code`` (int) on the proxy SERVER span. The failure path sets this from the error code in ``_record_exception_on_span``; this is the success-path counterpart so the attribute is present on every SERVER span regardless of outcome (required by the HTTP semconv, and needed for error-ratio / status-breakdown dashboards). No-op if span/value missing. """ if span is None or status_code is None: return self.safe_set_attribute( span=span, key=HTTP_RESPONSE_STATUS_CODE_ATTRIBUTE, value=int(status_code), ) def record_error_attributes_on_span( self, span: Optional[Span], exception: Optional[Exception], status_code: int, ) -> None: """Stamp structured ``error.*`` attributes on the SERVER span from the exception returned to the client, with ``error.code`` pinned to the real response status. Idempotent (overwrites); emits no exception event.""" if span is None or exception is None: return from litellm.litellm_core_utils.litellm_logging import ( StandardLoggingPayloadSetup, ) error_information = StandardLoggingPayloadSetup.get_error_information( original_exception=exception ) error_information["error_code"] = str(status_code) self._record_exception_on_span( span=span, kwargs={ "standard_logging_object": {"error_information": error_information} }, ) def set_preprocessing_duration_attribute( self, span: Optional[Span], container: Any ) -> None: """ Set ``litellm.preprocessing.duration_ms`` (proxy-receive -> first provider handoff) on the proxy SERVER span. ``litellm_received_at`` rides request metadata; ``first_api_call_start_time`` is the set-once first-handoff instant (retries/backoff excluded). Works uniformly for the success (model_call_details) and failure (request_data) containers. No-op if span/either anchor is missing. """ if span is None or not isinstance(container, dict): return received_at = None # first_api_call_start_time is top-level (never in user metadata). first_handoff = container.get("first_api_call_start_time") _lp = container.get("litellm_params") for _md in ( (_lp or {}).get("metadata") if isinstance(_lp, dict) else None, container.get("metadata"), container.get("litellm_metadata"), ): if isinstance(_md, dict): received_at = received_at or _md.get("litellm_received_at") if received_at is None or first_handoff is None: return try: start_ts = self._to_timestamp(received_at) end_ts = self._to_timestamp(first_handoff) except Exception: return if start_ts is None or end_ts is None: return duration_ms = (end_ts - start_ts) * 1000.0 # Clock skew → omit rather than emit a negative latency. if duration_ms < 0: return self.safe_set_attribute( span=span, key=PREPROCESSING_DURATION_MS_ATTRIBUTE, value=duration_ms, )