import os
from dataclasses import dataclass, field
from datetime import datetime
from typing import (
    TYPE_CHECKING,
    Any,
    Dict,
    FrozenSet,
    List,
    Optional,
    Set,
    Tuple,
    Union,
    cast,
)

import litellm
from litellm._logging import verbose_logger
from litellm.integrations._types.open_inference import (
    OpenInferenceSpanKindValues,
    SpanAttributes,
)
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.opentelemetry_utils.gen_ai_semconv import (
    OTEL_SEMCONV_STABILITY_OPT_IN_ENV,
    OTELGenAISemconvMixin,
    OTELSemconvCategory,
    parse_semconv_opt_in,
)
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
from litellm.secret_managers.main import get_secret_bool, str_to_bool
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import (
    ChatCompletionMessageToolCall,
    CostBreakdown,
    Function,
    LLMResponseTypes,
    StandardCallbackDynamicParams,
    StandardLoggingPayload,
)

# OpenTelemetry imports moved to individual functions to avoid import errors when not installed

if TYPE_CHECKING:
    from opentelemetry.sdk.trace.export import SpanExporter as _SpanExporter
    from opentelemetry.trace import Context as _Context
    from opentelemetry.trace import Span as _Span
    from opentelemetry.trace import Tracer as _Tracer

    from litellm.proxy._types import (
        ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
    )
    from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth

    Span = Union[_Span, Any]
    Tracer = Union[_Tracer, Any]
    Context = Union[_Context, Any]
    SpanExporter = Union[_SpanExporter, Any]
    UserAPIKeyAuth = Union[_UserAPIKeyAuth, Any]
    ManagementEndpointLoggingPayload = Union[_ManagementEndpointLoggingPayload, Any]
else:
    Span = Any
    Tracer = Any
    SpanExporter = Any
    UserAPIKeyAuth = Any
    ManagementEndpointLoggingPayload = Any
    Context = Any

LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
LITELLM_METER_NAME = os.getenv("LITELLM_METER_NAME", "litellm")
LITELLM_LOGGER_NAME = os.getenv("LITELLM_LOGGER_NAME", "litellm")
LITELLM_PROXY_REQUEST_SPAN_NAME = "Received Proxy Server Request"
# OTel-standard names. status is also kept under error.code for back compat.
HTTP_RESPONSE_STATUS_CODE_ATTRIBUTE = "http.response.status_code"
HTTP_ROUTE_ATTRIBUTE = "http.route"
URL_PATH_ATTRIBUTE = "url.path"
PREPROCESSING_DURATION_MS_ATTRIBUTE = "litellm.preprocessing.duration_ms"
TEAM_METADATA_ATTRIBUTE = "litellm.team.metadata"
MODEL_GROUP_ATTRIBUTE = "litellm.model_group"
PROVIDER_MODEL_ATTRIBUTE = "litellm.provider.model"
# Remove the hardcoded LITELLM_RESOURCE dictionary - we'll create it properly later
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
LITELLM_REQUEST_SPAN_NAME = "litellm_request"

CAPTURE_MODE_NO_CONTENT = "NO_CONTENT"
CAPTURE_MODE_SPAN_ONLY = "SPAN_ONLY"
CAPTURE_MODE_EVENT_ONLY = "EVENT_ONLY"
CAPTURE_MODE_SPAN_AND_EVENT = "SPAN_AND_EVENT"
_VALID_CAPTURE_MODES = {
    CAPTURE_MODE_NO_CONTENT,
    CAPTURE_MODE_SPAN_ONLY,
    CAPTURE_MODE_EVENT_ONLY,
    CAPTURE_MODE_SPAN_AND_EVENT,
}

METRIC_METADATA_KEYS: Tuple[str, ...] = (
    "user_api_key_hash",
    "user_api_key_alias",
    "user_api_key_team_id",
    "user_api_key_org_id",
    "user_api_key_user_id",
    "user_api_key_team_alias",
    "user_api_key_user_email",
    "spend_logs_metadata",
    "requester_ip_address",
    "requester_metadata",
    "user_api_key_end_user_id",
    "prompt_management_metadata",
    "applied_guardrails",
    "mcp_tool_call_metadata",
    "vector_store_request_metadata",
)

TOKEN_TYPE_ATTRIBUTE: str = "gen_ai.token.type"

VALID_METRIC_ATTRIBUTE_NAMES: FrozenSet[str] = frozenset(
    (
        "gen_ai.operation.name",
        "gen_ai.system",
        "gen_ai.request.model",
        "gen_ai.framework",
        "hidden_params",
    )
    + tuple(f"metadata.{key}" for key in METRIC_METADATA_KEYS)
)


@dataclass(frozen=True)
class OTELMetricAttributeFilter:
    include_list: Optional[List[str]] = None
    exclude_list: Optional[List[str]] = None


def _build_metric_attribute_filter(value: Any) -> OTELMetricAttributeFilter:
    if isinstance(value, OTELMetricAttributeFilter):
        return value
    if not isinstance(value, dict):
        raise ValueError(
            "otel.attributes must be a mapping with optional 'include_list' / "
            f"'exclude_list', got {type(value).__name__}"
        )
    return OTELMetricAttributeFilter(
        include_list=value.get("include_list"),
        exclude_list=value.get("exclude_list"),
    )


def _resolve_metric_attribute_filter(
    attributes: Optional[OTELMetricAttributeFilter],
) -> Tuple[Optional[FrozenSet[str]], Optional[FrozenSet[str]]]:
    if attributes is None:
        return None, None
    include = attributes.include_list or None
    exclude = attributes.exclude_list or None
    if include and exclude:
        raise ValueError(
            "otel.attributes: include_list and exclude_list are mutually exclusive"
        )
    requested = include or exclude or []
    if TOKEN_TYPE_ATTRIBUTE in requested:
        raise ValueError(
            f"otel.attributes: {TOKEN_TYPE_ATTRIBUTE} is a structural token-usage "
            "discriminator and cannot be filtered"
        )
    unknown = sorted(
        name for name in requested if name not in VALID_METRIC_ATTRIBUTE_NAMES
    )
    if unknown:
        raise ValueError(
            f"otel.attributes: unknown attribute name(s) {unknown}. "
            f"Valid names: {sorted(VALID_METRIC_ATTRIBUTE_NAMES)}"
        )
    return (
        frozenset(include) if include else None,
        frozenset(exclude) if exclude else None,
    )


def _normalize_team_metadata_keys(value: Any) -> List[str]:
    """Coerce a team-metadata allowlist from a list or comma-separated string.

    config.yaml passes a YAML list; an env var passes a comma-separated string.
    Both collapse to a list of stripped, non-empty keys.
    """
    if value is None:
        return []
    if isinstance(value, str):
        return [item.strip() for item in value.split(",") if item.strip()]
    return [str(item).strip() for item in value if str(item).strip()]


@dataclass
class OpenTelemetryConfig:
    exporter: Union[str, SpanExporter] = "console"
    endpoint: Optional[str] = None
    headers: Optional[str] = None
    enable_metrics: bool = False
    enable_events: bool = False
    service_name: Optional[str] = None
    deployment_environment: Optional[str] = None
    model_id: Optional[str] = None
    ignore_context_propagation: Optional[bool] = None
    # When True, create a private TracerProvider instead of reusing or setting the global one.
    skip_set_global: bool = False
    # Programmatic override for OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT.
    # One of NO_CONTENT, SPAN_ONLY, EVENT_ONLY, SPAN_AND_EVENT (or "true" as legacy alias).
    capture_message_content: Optional[str] = None
    semconv_stability_opt_in: Set[OTELSemconvCategory] = field(default_factory=set)
    # Sub-keys of the team's free-form metadata stamped onto the inference span
    # under ``litellm.team.metadata``. Empty by default so none of a team's
    # metadata leaves the process until explicitly allowlisted.
    baggage_team_metadata_keys: List[str] = field(default_factory=list)
    # Prometheus-style include/exclude control over which attributes are stamped
    # on emitted metrics, to cap metric cardinality.
    attributes: Optional[OTELMetricAttributeFilter] = None

    def __post_init__(self) -> None:
        # If endpoint is specified but exporter is still the default "console",
        # automatically infer "otlp_http" to send traces to the endpoint.
        # This fixes an issue where UI-configured OTEL settings would default
        # to console output instead of sending traces to the configured endpoint.
        if (
            self.endpoint
            and isinstance(self.exporter, str)
            and self.exporter == "console"
        ):
            self.exporter = "otlp_http"

        if not self.service_name:
            self.service_name = os.getenv("OTEL_SERVICE_NAME", "litellm")
        if not self.deployment_environment:
            self.deployment_environment = os.getenv(
                "OTEL_ENVIRONMENT_NAME", "production"
            )
        if not self.model_id:
            self.model_id = os.getenv("OTEL_MODEL_ID", self.service_name)
        if self.ignore_context_propagation is None:
            self.ignore_context_propagation = str_to_bool(
                os.getenv("OTEL_IGNORE_CONTEXT_PROPAGATION")
            )
        # Resolve the env opt-in once here so self.semconv_stability_opt_in is the
        # single source of truth: the union of programmatic and env categories.
        self.semconv_stability_opt_in |= parse_semconv_opt_in(
            os.getenv(OTEL_SEMCONV_STABILITY_OPT_IN_ENV)
        )
        self.baggage_team_metadata_keys = _normalize_team_metadata_keys(
            self.baggage_team_metadata_keys
        ) or _normalize_team_metadata_keys(
            os.getenv("LITELLM_OTEL_BAGGAGE_TEAM_METADATA_KEYS")
        )

    @classmethod
    def from_env(cls):
        """
        OTEL_HEADERS=x-honeycomb-team=B85YgLm9****
        OTEL_EXPORTER="otlp_http"
        OTEL_ENDPOINT="https://api.honeycomb.io/v1/traces"

        OTEL_HEADERS gets sent as headers = {"x-honeycomb-team": "B85YgLm96******"}
        """
        from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
            InMemorySpanExporter,
        )

        exporter = os.getenv(
            "OTEL_EXPORTER_OTLP_PROTOCOL", os.getenv("OTEL_EXPORTER", "console")
        )
        endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", os.getenv("OTEL_ENDPOINT"))
        headers = os.getenv(
            "OTEL_EXPORTER_OTLP_HEADERS", os.getenv("OTEL_HEADERS")
        )  # example: OTEL_HEADERS=x-honeycomb-team=B85YgLm96***"
        enable_metrics: bool = (
            os.getenv("LITELLM_OTEL_INTEGRATION_ENABLE_METRICS", "false").lower()
            == "true"
        )
        enable_events: bool = (
            os.getenv("LITELLM_OTEL_INTEGRATION_ENABLE_EVENTS", "false").lower()
            == "true"
        )
        service_name = os.getenv("OTEL_SERVICE_NAME", "litellm")
        deployment_environment = os.getenv("OTEL_ENVIRONMENT_NAME", "production")
        model_id = os.getenv("OTEL_MODEL_ID", service_name)

        if exporter == "in_memory":
            return cls(exporter=InMemorySpanExporter())
        return cls(
            exporter=exporter,
            endpoint=endpoint,
            headers=headers,  # example: OTEL_HEADERS=x-honeycomb-team=B85YgLm96***"
            enable_metrics=enable_metrics,
            enable_events=enable_events,
            service_name=service_name,
            deployment_environment=deployment_environment,
            model_id=model_id,
        )


class OpenTelemetry(OTELGenAISemconvMixin, CustomLogger):
    def __init__(
        self,
        config: Optional[OpenTelemetryConfig] = None,
        callback_name: Optional[str] = None,
        # injection points for testing
        tracer_provider: Optional[Any] = None,
        logger_provider: Optional[Any] = None,
        meter_provider: Optional[Any] = None,
        **kwargs,
    ):
        team_metadata_keys_override = kwargs.pop("baggage_team_metadata_keys", None)
        metric_attributes_override = kwargs.pop("attributes", None)
        if config is None:
            config = OpenTelemetryConfig.from_env()
        if team_metadata_keys_override is not None:
            config.baggage_team_metadata_keys = _normalize_team_metadata_keys(
                team_metadata_keys_override
            )
        if metric_attributes_override is not None:
            config.attributes = _build_metric_attribute_filter(
                metric_attributes_override
            )

        self.config = config
        self.callback_name = callback_name
        # Resolved on first metric record, not here: the proxy populates
        # callback_settings.otel.attributes after this logger is constructed, so
        # reading it now would miss it. An explicit config is validated eagerly so
        # a bad config still fails at startup.
        self._metric_attr_include: Optional[FrozenSet[str]] = None
        self._metric_attr_exclude: Optional[FrozenSet[str]] = None
        self._metric_attr_filter_resolved = False
        if config.attributes is not None:
            self._ensure_metric_attribute_filter()
        self.OTEL_EXPORTER = self.config.exporter
        self.OTEL_ENDPOINT = self.config.endpoint
        self.OTEL_HEADERS = self.config.headers
        self._tracer_provider_cache: Dict[str, Any] = {}
        self._init_tracing(tracer_provider)

        _debug_otel = str(os.getenv("DEBUG_OTEL", "False")).lower()

        if _debug_otel == "true":
            # Set up logging
            import logging

            logging.basicConfig(level=logging.DEBUG)
            logging.getLogger(__name__)

            # Enable OpenTelemetry logging
            otel_exporter_logger = logging.getLogger("opentelemetry.sdk.trace.export")
            otel_exporter_logger.setLevel(logging.DEBUG)

        # init CustomLogger params
        super().__init__(**kwargs)
        self._init_metrics(meter_provider)
        self._init_logs(logger_provider)
        # Sample env-var / config / message_logging at init so subsequent
        # _capture_in_span / _capture_in_event calls are deterministic.
        self._capture_mode_cached = self._compute_capture_mode_from_init_state()
        self._init_otel_logger_on_litellm_proxy()

    @staticmethod
    def _get_litellm_resource(config: OpenTelemetryConfig):
        """Create an OpenTelemetry Resource using config-driven defaults."""
        from opentelemetry.sdk.resources import OTELResourceDetector, Resource

        base_attributes: Dict[str, Optional[str]] = {
            "service.name": config.service_name,
            "deployment.environment": config.deployment_environment,
            "model_id": config.model_id or config.service_name,
        }

        base_resource = Resource.create(base_attributes)  # type: ignore[arg-type]
        otel_resource_detector = OTELResourceDetector()
        env_resource = otel_resource_detector.detect()
        return base_resource.merge(env_resource)

    def _init_otel_logger_on_litellm_proxy(self):
        """
        Initializes OpenTelemetry for litellm proxy server

        - Adds Otel as a service callback
        - Sets `proxy_server.open_telemetry_logger` to self
        """
        try:
            from litellm.proxy import proxy_server
        except ImportError:
            verbose_logger.warning(
                "Proxy Server is not installed. Skipping OpenTelemetry initialization."
            )
            return

        # Add self as a service callback
        if "otel" not in litellm.service_callback and all(
            not isinstance(cb, OpenTelemetry) for cb in litellm.service_callback
        ):
            litellm.service_callback.append(self)
        # avoid proxy logger ownership being overwritten by later
        # handlers. Multiple integrations (default OTEL, Langfuse OTEL,
        # Arize OTEL, etc.) may initialize in sequence; without this guard,
        # the last one silently replaces the first and breaks expected
        # routing for proxy_server.open_telemetry_logger consumers.
        # Behavior: first-registered wins.
        if getattr(proxy_server, "open_telemetry_logger", None) is None:
            setattr(proxy_server, "open_telemetry_logger", self)

    def _get_or_create_provider(
        self,
        provider,
        provider_name: str,
        get_existing_provider_fn,
        sdk_provider_class,
        create_new_provider_fn,
        set_provider_fn,
        skip_set_global: bool = False,
    ):
        """
        Generic helper to get or create an OpenTelemetry provider (Tracer, Meter, or Logger).

        Args:
            provider: The provider instance passed to the init function (can be None)
            provider_name: Name for logging (e.g., "TracerProvider")
            get_existing_provider_fn: Function to get the existing global provider
            sdk_provider_class: The SDK provider class to check for (e.g., TracerProvider from SDK)
            create_new_provider_fn: Function to create a new provider instance
            set_provider_fn: Function to set the provider globally
            skip_set_global: If True, don't set the provider globally (for dynamic-only providers)

        Returns:
            The provider to use (either existing, new, or explicitly provided)
        """
        if provider is not None:
            # Provider explicitly provided (e.g., for testing)
            # Do NOT call set_provider_fn - the caller is responsible for managing global state
            # If they want it to be global, they've already set it before passing it to us
            verbose_logger.debug(
                "OpenTelemetry: Using provided TracerProvider: %s",
                type(provider).__name__,
            )
            return provider

        # Check if a provider is already set globally
        try:
            existing_provider = get_existing_provider_fn()

            if isinstance(existing_provider, sdk_provider_class):
                if skip_set_global:
                    verbose_logger.debug(
                        "OpenTelemetry: existing %s found but skip_set_global=True; creating private %s for isolation",
                        provider_name,
                        provider_name,
                    )
                    provider = create_new_provider_fn()
                else:
                    verbose_logger.debug(
                        "OpenTelemetry: Using existing %s: %s",
                        provider_name,
                        type(existing_provider).__name__,
                    )
                    provider = existing_provider
            else:
                # Default proxy provider or unknown type, create our own
                verbose_logger.debug("OpenTelemetry: Creating new %s", provider_name)
                provider = create_new_provider_fn()
                if not skip_set_global:
                    set_provider_fn(provider)
                else:
                    verbose_logger.info(
                        "OpenTelemetry: Created %s but NOT setting it globally (will use dynamic providers per-request)",
                        provider_name,
                    )
        except Exception as e:
            # Fallback: create a new provider if something goes wrong
            verbose_logger.debug(
                "OpenTelemetry: Exception checking existing %s, creating new one: %s",
                provider_name,
                str(e),
            )
            provider = create_new_provider_fn()
            if not skip_set_global:
                set_provider_fn(provider)

        return provider

    def _skip_set_global(self) -> bool:
        # langfuse_otel relies on the Langfuse SDK's providers; don't overwrite them.
        return self.config.skip_set_global or (
            hasattr(self, "callback_name") and self.callback_name == "langfuse_otel"
        )

    def _compute_capture_mode_from_init_state(self) -> Optional[str]:
        """Sample explicit settings at init. Returns the resolved mode or
        None if nothing explicit is set (in which case the legacy
        ``self.message_logging`` flag is consulted dynamically per request).

        ``"true"``/``"1"`` map to ``EVENT_ONLY`` per the contrib convention.
        ``"false"``/``"0"`` map to ``NO_CONTENT``.
        Unknown values are ignored.
        """
        explicit = self.config.capture_message_content or os.getenv(
            "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"
        )
        if not explicit:
            return None
        normalized = explicit.upper()
        if normalized in ("TRUE", "1"):
            return CAPTURE_MODE_EVENT_ONLY
        if normalized in ("FALSE", "0"):
            return CAPTURE_MODE_NO_CONTENT
        if normalized in _VALID_CAPTURE_MODES:
            return normalized
        return None

    def _resolve_capture_mode(self) -> str:
        """Return the active capture mode for this request.

        Precedence:
          1. ``litellm.turn_off_message_logging=True`` forces ``NO_CONTENT``
             (kill-switch checked dynamically).
          2. Explicit setting sampled at init from
             ``OpenTelemetryConfig.capture_message_content`` or
             ``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT``.
          3. Legacy ``self.message_logging`` (checked dynamically).
        """
        if litellm.turn_off_message_logging:
            return CAPTURE_MODE_NO_CONTENT
        if self._capture_mode_cached is not None:
            return self._capture_mode_cached
        return (
            CAPTURE_MODE_SPAN_AND_EVENT
            if self.message_logging
            else CAPTURE_MODE_NO_CONTENT
        )

    def _capture_in_span(self) -> bool:
        return self._resolve_capture_mode() in (
            CAPTURE_MODE_SPAN_ONLY,
            CAPTURE_MODE_SPAN_AND_EVENT,
        )

    def _capture_in_event(self) -> bool:
        return self._resolve_capture_mode() in (
            CAPTURE_MODE_EVENT_ONLY,
            CAPTURE_MODE_SPAN_AND_EVENT,
        )

    def _init_tracing(self, tracer_provider):
        from opentelemetry import trace
        from opentelemetry.sdk.trace import TracerProvider
        from opentelemetry.trace import SpanKind

        def create_tracer_provider():
            provider = TracerProvider(resource=self._get_litellm_resource(self.config))
            provider.add_span_processor(self._get_span_processor())
            return provider

        tracer_provider = self._get_or_create_provider(
            provider=tracer_provider,
            provider_name="TracerProvider",
            get_existing_provider_fn=trace.get_tracer_provider,
            sdk_provider_class=TracerProvider,
            create_new_provider_fn=create_tracer_provider,
            set_provider_fn=trace.set_tracer_provider,
            skip_set_global=self._skip_set_global(),
        )

        # Grab our tracer from the TracerProvider (not from global context)
        # This ensures we use the provided TracerProvider (e.g., for testing)
        self.tracer = tracer_provider.get_tracer(LITELLM_TRACER_NAME)
        self._tracer_provider = tracer_provider
        self.span_kind = SpanKind

    def _init_metrics(self, meter_provider):
        if not self.config.enable_metrics:
            self._meter_provider = None
            self._operation_duration_histogram = None
            self._token_usage_histogram = None
            self._cost_histogram = None
            self._time_to_first_token_histogram = None
            self._time_per_output_token_histogram = None
            self._response_duration_histogram = None
            return

        from opentelemetry import metrics
        from opentelemetry.sdk.metrics import MeterProvider

        def create_meter_provider():
            metric_reader = self._get_metric_reader()
            return MeterProvider(
                metric_readers=[metric_reader],
                resource=self._get_litellm_resource(self.config),
            )

        meter_provider = self._get_or_create_provider(
            provider=meter_provider,
            provider_name="MeterProvider",
            get_existing_provider_fn=metrics.get_meter_provider,
            sdk_provider_class=MeterProvider,
            create_new_provider_fn=create_meter_provider,
            set_provider_fn=metrics.set_meter_provider,
            skip_set_global=self._skip_set_global(),
        )
        self._meter_provider = meter_provider

        meter = meter_provider.get_meter(__name__)

        self._operation_duration_histogram = meter.create_histogram(
            name="gen_ai.client.operation.duration",  # Replace with semconv constant in otel 1.38
            description="GenAI operation duration",
            unit="s",
        )
        self._token_usage_histogram = meter.create_histogram(
            name="gen_ai.client.token.usage",  # Replace with semconv constant in otel 1.38
            description="GenAI token usage",
            unit="{token}",
        )
        self._cost_histogram = meter.create_histogram(
            name="gen_ai.client.token.cost",
            description="GenAI request cost",
            unit="USD",
        )
        self._time_to_first_token_histogram = meter.create_histogram(
            name="gen_ai.client.response.time_to_first_token",
            description="Time to first token for streaming requests",
            unit="s",
        )
        self._time_per_output_token_histogram = meter.create_histogram(
            name="gen_ai.client.response.time_per_output_token",
            description="Average time per output token (generation time / completion tokens)",
            unit="s",
        )
        self._response_duration_histogram = meter.create_histogram(
            name="gen_ai.client.response.duration",
            description="Total LLM API generation time (excludes LiteLLM overhead)",
            unit="s",
        )

    def _init_logs(self, logger_provider):
        # nothing to do if events disabled
        if not self.config.enable_events:
            self._logger_provider = None
            return

        from opentelemetry._logs import get_logger_provider, set_logger_provider
        from opentelemetry.sdk._logs import LoggerProvider as OTLoggerProvider
        from opentelemetry.sdk._logs.export import BatchLogRecordProcessor

        def create_logger_provider():
            provider = OTLoggerProvider(
                resource=self._get_litellm_resource(self.config)
            )
            log_exporter = self._get_log_exporter()
            provider.add_log_record_processor(
                BatchLogRecordProcessor(log_exporter)  # type: ignore[arg-type]
            )
            return provider

        self._logger_provider = self._get_or_create_provider(
            provider=logger_provider,
            provider_name="LoggerProvider",
            get_existing_provider_fn=get_logger_provider,
            sdk_provider_class=OTLoggerProvider,
            create_new_provider_fn=create_logger_provider,
            set_provider_fn=set_logger_provider,
            skip_set_global=self._skip_set_global(),
        )

    def log_success_event(self, kwargs, response_obj, start_time, end_time):
        self._handle_success(kwargs, response_obj, start_time, end_time)

    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
        self._handle_failure(kwargs, response_obj, start_time, end_time)

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
        self._handle_success(kwargs, response_obj, start_time, end_time)

    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        self._handle_failure(kwargs, response_obj, start_time, end_time)

    async def async_service_success_hook(
        self,
        payload: ServiceLoggerPayload,
        parent_otel_span: Optional[Span] = None,
        start_time: Optional[Union[datetime, float]] = None,
        end_time: Optional[Union[datetime, float]] = None,
        event_metadata: Optional[dict] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode

        _start_time_ns = 0
        _end_time_ns = 0

        if isinstance(start_time, float):
            _start_time_ns = int(start_time * 1e9)
        else:
            _start_time_ns = self._to_ns(start_time)

        if isinstance(end_time, float):
            _end_time_ns = int(end_time * 1e9)
        else:
            _end_time_ns = self._to_ns(end_time)

        if parent_otel_span is not None:
            _span_name = payload.service
            service_logging_span = self.tracer.start_span(
                name=_span_name,
                context=trace.set_span_in_context(parent_otel_span),
                start_time=_start_time_ns,
            )
            self.safe_set_attribute(
                span=service_logging_span,
                key="call_type",
                value=payload.call_type,
            )
            self.safe_set_attribute(
                span=service_logging_span,
                key="service",
                value=payload.service.value,
            )

            if event_metadata:
                for key, value in event_metadata.items():
                    if value is None:
                        value = "None"
                    if isinstance(value, dict):
                        try:
                            value = str(value)
                        except Exception:
                            value = "litellm logging error - could_not_json_serialize"
                    self.safe_set_attribute(
                        span=service_logging_span,
                        key=key,
                        value=value,
                    )
            service_logging_span.set_status(Status(StatusCode.OK))
            service_logging_span.end(end_time=_end_time_ns)

    async def async_service_failure_hook(
        self,
        payload: ServiceLoggerPayload,
        error: Optional[str] = "",
        parent_otel_span: Optional[Span] = None,
        start_time: Optional[Union[datetime, float]] = None,
        end_time: Optional[Union[float, datetime]] = None,
        event_metadata: Optional[dict] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode

        _start_time_ns = 0
        _end_time_ns = 0

        if isinstance(start_time, float):
            _start_time_ns = int(int(start_time) * 1e9)
        else:
            _start_time_ns = self._to_ns(start_time)

        if isinstance(end_time, float):
            _end_time_ns = int(int(end_time) * 1e9)
        else:
            _end_time_ns = self._to_ns(end_time)

        if parent_otel_span is not None:
            _span_name = payload.service
            service_logging_span = self.tracer.start_span(
                name=_span_name,
                context=trace.set_span_in_context(parent_otel_span),
                start_time=_start_time_ns,
            )
            self.safe_set_attribute(
                span=service_logging_span,
                key="call_type",
                value=payload.call_type,
            )
            self.safe_set_attribute(
                span=service_logging_span,
                key="service",
                value=payload.service.value,
            )
            if error:
                self.safe_set_attribute(
                    span=service_logging_span,
                    key="error",
                    value=error,
                )
            if event_metadata:
                for key, value in event_metadata.items():
                    if isinstance(value, dict):
                        try:
                            value = str(value)
                        except Exception:
                            value = "litllm logging error - could_not_json_serialize"
                    self.safe_set_attribute(
                        span=service_logging_span,
                        key=key,
                        value=value,
                    )

            service_logging_span.set_status(Status(StatusCode.ERROR))
            service_logging_span.end(end_time=_end_time_ns)

    async def async_post_call_failure_hook(
        self,
        request_data: dict,
        original_exception: Exception,
        user_api_key_dict: UserAPIKeyAuth,
        traceback_str: Optional[str] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode

        parent_otel_span = user_api_key_dict.parent_otel_span
        if parent_otel_span is not None:
            parent_otel_span.set_status(Status(StatusCode.ERROR))

            # Stamp team attributes onto the SERVER (root) span too, so the
            # trace root is team-filterable on the failure path like the
            # child exception span below.
            self._set_team_attributes_on_span(
                span=parent_otel_span,
                team_id=user_api_key_dict.team_id,
                team_alias=user_api_key_dict.team_alias,
            )

            # Stamp structured error attrs on the SERVER span itself; the
            # failure path otherwise only sets its status (_handle_failure
            # records on the litellm_request child span). Inline import:
            # litellm_logging <-> integrations is circular.
            from litellm.litellm_core_utils.litellm_logging import (
                StandardLoggingPayloadSetup,
            )

            error_information = StandardLoggingPayloadSetup.get_error_information(
                original_exception=original_exception,
                traceback_str=traceback_str,
            )
            self._record_exception_on_span(
                span=parent_otel_span,
                kwargs={
                    "exception": original_exception,
                    "standard_logging_object": {"error_information": error_information},
                },
            )

            # _record_exception_on_span only stamps when error_code is set;
            # bare TypeError etc. has none, and the span is about to be ended.
            error_code = (
                error_information.get("error_code") if error_information else None
            )
            if not error_code:
                self.set_response_status_code_attribute(parent_otel_span, 500)

            # Pre-request latency (request_data carries the propagated
            # metadata on the failure path; omitted if it failed before handoff).
            self.set_preprocessing_duration_attribute(parent_otel_span, request_data)

            _span_name = "Failed Proxy Server Request"

            # Exception Logging Child Span
            exception_logging_span = self.tracer.start_span(
                name=_span_name,
                context=trace.set_span_in_context(parent_otel_span),
            )
            self.safe_set_attribute(
                span=exception_logging_span,
                key="exception",
                value=str(original_exception),
            )
            self._set_team_attributes_on_span(
                span=exception_logging_span,
                team_id=user_api_key_dict.team_id,
                team_alias=user_api_key_dict.team_alias,
            )
            exception_logging_span.set_status(Status(StatusCode.ERROR))
            exception_logging_span.end(end_time=self._to_ns(datetime.now()))

            # Emit guardrail spans for any guardrail invocations that
            # ran during this request. _handle_failure typically does this,
            # but for pre-call guardrail blocks the standard_logging_object
            # may not carry guardrail_information by the time _handle_failure
            # fires (the data lives only in request_data["metadata"]). Pull
            # directly from request_data so the span is recorded either way;
            # _emit_once dedupes if _handle_failure already emitted it.
            self._emit_guardrail_spans_from_request_data(
                request_data=request_data,
                parent_span=parent_otel_span,
            )

            # End Parent OTEL Sspan
            parent_otel_span.end(end_time=self._to_ns(datetime.now()))

    def _emit_guardrail_spans_from_request_data(
        self,
        request_data: dict,
        parent_span: Optional[Any],
    ) -> None:
        """Emit ``guardrail`` spans from ``request_data["metadata"]
        ["standard_logging_guardrail_information"]``.

        Routed through ``_create_guardrail_span`` so the dedupe state in
        ``_otel_internal`` is honoured — if ``_handle_failure`` already
        emitted these spans for the same kwargs, this is a no-op.
        """
        from opentelemetry import trace as _trace

        metadata = (request_data or {}).get("metadata") or {}
        guardrail_information = metadata.get("standard_logging_guardrail_information")
        if not guardrail_information:
            return

        # _create_guardrail_span reads guardrail_information from
        # kwargs["standard_logging_object"] and shares its dedupe state via
        # kwargs["litellm_params"]["metadata"]["_otel_internal"]. Pass the
        # SAME metadata dict the proxy populated so _handle_failure and
        # this hook see the same dedupe markers.
        kwargs: Dict[str, Any] = {
            "litellm_params": {"metadata": metadata},
            "standard_logging_object": {
                "guardrail_information": guardrail_information,
                "metadata": metadata,
            },
        }
        context = (
            _trace.set_span_in_context(parent_span) if parent_span is not None else None
        )
        self._create_guardrail_span(kwargs=kwargs, context=context)

    async def async_post_call_success_hook(
        self,
        data: dict,
        user_api_key_dict: UserAPIKeyAuth,
        response: LLMResponseTypes,
    ):
        from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging

        litellm_logging_obj = data.get("litellm_logging_obj")

        if litellm_logging_obj is not None and isinstance(
            litellm_logging_obj, LiteLLMLogging
        ):
            kwargs = litellm_logging_obj.model_call_details
            parent_span = user_api_key_dict.parent_otel_span

            ctx, _ = self._get_span_context(kwargs, default_span=parent_span)

            # Pre-request latency on the SERVER span (success path).
            self.set_preprocessing_duration_attribute(parent_span, kwargs)

            # 3. Guardrail span
            self._create_guardrail_span(kwargs=kwargs, context=ctx)

        return response

    #########################################################
    # Team/Key Based Logging Control Flow
    #########################################################
    def get_tracer_to_use_for_request(self, kwargs: dict) -> Tracer:
        """
        Get the tracer to use for this request

        If dynamic headers are present, a temporary tracer is created with the dynamic headers.
        Otherwise, the default tracer is used.

        Returns:
            Tracer: The tracer to use for this request
        """
        dynamic_headers = self._get_dynamic_otel_headers_from_kwargs(kwargs)

        if dynamic_headers is not None:
            # Create spans using a temporary tracer with dynamic headers
            tracer_to_use = self._get_tracer_with_dynamic_headers(dynamic_headers)
            verbose_logger.debug(
                "[OTEL DEBUG] Using DYNAMIC tracer with headers: %s", dynamic_headers
            )
        else:
            # For langfuse_otel without dynamic headers, create a provider with env var credentials
            if hasattr(self, "callback_name") and self.callback_name == "langfuse_otel":
                # Use the headers from config (which were set from env vars during init)
                env_var_headers = (
                    self._get_headers_dictionary(self.OTEL_HEADERS)
                    if self.OTEL_HEADERS
                    else {}
                )
                if env_var_headers:
                    tracer_to_use = self._get_tracer_with_dynamic_headers(
                        env_var_headers
                    )
                    verbose_logger.debug(
                        "[OTEL DEBUG] Using env var credentials for langfuse_otel (master key request)"
                    )
                else:
                    # No env vars set, use global tracer (will be NoOp)
                    tracer_to_use = self.tracer
                    verbose_logger.debug(
                        "[OTEL DEBUG] No credentials available for langfuse_otel"
                    )
            else:
                tracer_to_use = self.tracer
                verbose_logger.debug(
                    "[OTEL DEBUG] Using GLOBAL tracer (no dynamic headers)"
                )

        return tracer_to_use

    def _get_dynamic_otel_headers_from_kwargs(self, kwargs) -> Optional[dict]:
        """Extract dynamic headers from kwargs if available."""
        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
            kwargs.get("standard_callback_dynamic_params")
        )

        if not standard_callback_dynamic_params:
            return None

        dynamic_headers = self.construct_dynamic_otel_headers(
            standard_callback_dynamic_params=standard_callback_dynamic_params
        )

        return dynamic_headers if dynamic_headers else None

    def _get_tracer_with_dynamic_headers(self, dynamic_headers: dict):
        """Create a temporary tracer with dynamic headers for this request only."""
        from opentelemetry.sdk.trace import TracerProvider

        # Prevents thread exhaustion by reusing providers for the same credential sets (e.g. per-team keys)
        cache_key = str(sorted(dynamic_headers.items()))
        if cache_key in self._tracer_provider_cache:
            return self._tracer_provider_cache[cache_key].get_tracer(
                LITELLM_TRACER_NAME
            )

        # Create a temporary tracer provider with dynamic headers
        temp_provider = TracerProvider(resource=self._get_litellm_resource(self.config))
        temp_provider.add_span_processor(
            self._get_span_processor(dynamic_headers=dynamic_headers)
        )

        # Store in cache for reuse
        self._tracer_provider_cache[cache_key] = temp_provider

        return temp_provider.get_tracer(LITELLM_TRACER_NAME)

    def construct_dynamic_otel_headers(
        self, standard_callback_dynamic_params: StandardCallbackDynamicParams
    ) -> Optional[dict]:
        """
        Construct dynamic headers from standard callback dynamic params

        Note: You just need to override this method in Arize, Langfuse Otel if you want to allow team/key based logging.

        Returns:
            dict: A dictionary of dynamic headers
        """
        return None

    #########################################################
    # End of Team/Key Based Logging Control Flow
    #########################################################

    def _emit_once(self, kwargs: dict, *scope: object) -> bool:
        """Return True the first time this handler is asked to emit a span
        for the given (handler, scope) on this kwargs; False on repeats.

        Used to suppress duplicate span emission for two distinct patterns:

        1. **Handler-level dual-fire**: streaming code paths trigger both
           the sync and async callback for one request, so ``_handle_success``
           / ``_handle_failure`` would otherwise produce two
           ``litellm_request`` spans. Scope: ``("success",)`` / ``("failure",)``.
        2. **Payload-driven multi-entrypoint emission**: a span loop that
           reads entries from ``standard_logging_payload`` (currently only
           guardrails) is invoked from multiple lifecycle points
           (post-call hooks, success callback, failure callback). The list
           can be re-read with mutated entries between calls, so dedupe
           must be at entry granularity. Scope: the entry's stable identity.

        ``scope`` parts can be any hashable identity. The marker is stored
        in ``kwargs["litellm_params"]["metadata"]["_otel_internal"]`` so it
        is request-local (kwargs is shared across the sync/async callbacks
        and lifecycle hooks for one request).
        """
        litellm_params = kwargs.get("litellm_params")
        if not isinstance(litellm_params, dict):
            litellm_params = {}
            kwargs["litellm_params"] = litellm_params

        _metadata = litellm_params.get("metadata")
        if not isinstance(_metadata, dict):
            _metadata = {}
            litellm_params["metadata"] = _metadata

        _otel_internal = _metadata.get("_otel_internal")
        if not isinstance(_otel_internal, dict):
            _otel_internal = {}
            _metadata["_otel_internal"] = _otel_internal

        spans_logged = _otel_internal.get("spans_logged")
        if not isinstance(spans_logged, dict):
            spans_logged = {}
            _otel_internal["spans_logged"] = spans_logged

        dedupe_key = (self.__class__.__name__, id(self), *scope)
        if spans_logged.get(dedupe_key) is True:
            return False

        spans_logged[dedupe_key] = True
        return True

    def _end_proxy_span_from_kwargs(self, kwargs: dict, end_time) -> None:
        """Close the proxy-level parent span if it is still recording.

        This helper retrieves the proxy span directly from kwargs metadata
        and closes it after all child spans have been recorded.

        Only called from the success path. The failure path deliberately
        leaves the proxy span open so ``async_post_call_failure_hook`` can
        append the ``"Failed Proxy Server Request"`` child span before
        closing it.

        Only spans named ``LITELLM_PROXY_REQUEST_SPAN_NAME`` are closed —
        externally provided spans must not be closed by LiteLLM.
        """
        litellm_params = kwargs.get("litellm_params", {}) or {}
        _metadata = litellm_params.get("metadata", {}) or {}
        proxy_span = _metadata.get("litellm_parent_otel_span", None)

        # Fallback: check litellm_metadata (used by /v1/messages and other
        # LITELLM_METADATA_ROUTES).
        if proxy_span is None:
            _litellm_metadata = litellm_params.get("litellm_metadata", {}) or {}
            proxy_span = _litellm_metadata.get("litellm_parent_otel_span", None)

        if (
            proxy_span is not None
            and getattr(proxy_span, "name", None) == LITELLM_PROXY_REQUEST_SPAN_NAME
            and hasattr(proxy_span, "is_recording")
            and proxy_span.is_recording()
        ):
            self._close_proxy_span_ok(proxy_span, end_time)

    def _close_proxy_span_ok(self, span: Span, end_time) -> None:
        """Stamp http.response.status_code=200 + status=OK, then end the span."""
        from opentelemetry.trace import Status, StatusCode

        self.set_response_status_code_attribute(span, 200)
        span.set_status(Status(StatusCode.OK))
        span.end(end_time=self._to_ns(end_time))

    def _handle_success(self, kwargs, response_obj, start_time, end_time):
        """Create the litellm_request span then close the proxy span."""
        verbose_logger.debug(
            "OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s",
            kwargs,
            self.config,
        )

        # sync + async success handlers can both fire for one
        # request (notably in streaming code paths). Guard against duplicate
        # span writes — but still close the proxy span on the skip path so
        # the trace doesn't leak an open root span.
        if not self._emit_once(kwargs, "success"):
            verbose_logger.debug(
                "OpenTelemetry: skipping duplicate success span for handler=%s",
                self.__class__.__name__,
            )
            self._end_proxy_span_from_kwargs(kwargs, end_time)
            return

        ctx, parent_span = self._get_span_context(kwargs)

        if self.config.ignore_context_propagation:
            parent_span = None  # Ignore parent spans from other providers
            ctx = None

        # Decide whether to create a primary span
        # Always create if no parent span exists (backward compatibility)
        # OR if USE_OTEL_LITELLM_REQUEST_SPAN is explicitly enabled
        should_create_primary_span = parent_span is None or get_secret_bool(
            "USE_OTEL_LITELLM_REQUEST_SPAN"
        )

        if should_create_primary_span:
            # Create a new litellm_request span
            span = self._start_primary_span(
                kwargs, response_obj, start_time, end_time, ctx
            )
            # Raw-request sub-span (if enabled) - child of litellm_request span
            self._maybe_log_raw_request(
                kwargs, response_obj, start_time, end_time, span
            )
            # Do NOT duplicate attributes onto the parent proxy-request span.
            # The child litellm_request span already carries all attributes;
            # copying them to the parent doubles storage and complicates
            # search (Issue #4).
        else:
            # Do not create primary span (keep hierarchy shallow when parent exists)
            from opentelemetry.trace import Status, StatusCode

            span = None
            # Only set attributes if the span is still recording (not closed)
            # Note: parent_span is guaranteed to be not None here
            if hasattr(parent_span, "set_status"):
                parent_span.set_status(Status(StatusCode.OK))
                self.set_attributes(parent_span, kwargs, response_obj)
            # Raw-request as direct child of parent_span
            self._maybe_log_raw_request(
                kwargs, response_obj, start_time, end_time, parent_span
            )

        # 3. Guardrail span — ensure guardrails are always parented to an
        #    existing span so they never become orphaned root spans (Issue #5).
        guardrail_ctx = self._resolve_guardrail_context(
            span=span, parent_span=parent_span, fallback_ctx=ctx
        )
        self._create_guardrail_span(kwargs=kwargs, context=guardrail_ctx)

        # 4. Metrics & cost recording
        self._record_metrics(kwargs, response_obj, start_time, end_time)

        # 5. Semantic logs.
        if self.config.enable_events:
            log_span = span if span is not None else parent_span
            if log_span is not None:
                self._emit_semantic_logs(kwargs, response_obj, log_span)

        # 6. Do NOT end parent span - it should be managed by its creator
        # External spans (from Langfuse, user code, HTTP headers, global context) must not be closed by LiteLLM
        # However, proxy-created spans should be closed here.
        if (
            parent_span is not None
            and hasattr(parent_span, "name")
            and parent_span.name == LITELLM_PROXY_REQUEST_SPAN_NAME
            and hasattr(parent_span, "is_recording")
            and parent_span.is_recording()
        ):
            self._close_proxy_span_ok(parent_span, end_time)

        # Stamp team attributes onto the SERVER (root) span before it is
        # closed, so the trace root carries them like every child span.
        self._set_team_attributes_on_proxy_span_from_kwargs(kwargs)

        # close the proxy span explicitly from kwargs metadata
        # after all child spans (litellm_request, guardrail, raw_request)
        # have been fully recorded and exported.
        self._end_proxy_span_from_kwargs(kwargs, end_time)

    def _start_primary_span(
        self,
        kwargs,
        response_obj,
        start_time,
        end_time,
        context,
    ):
        from opentelemetry.trace import Status, StatusCode

        otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs)

        span_kwargs: Dict[str, Any] = {
            "name": self._get_span_name(kwargs),
            "start_time": self._to_ns(start_time),
            "context": context,
        }
        if self._gen_ai_semconv_latest_experimental:
            span_kwargs["kind"] = self.span_kind.CLIENT
        span = otel_tracer.start_span(**span_kwargs)

        span.set_status(Status(StatusCode.OK))
        self.set_attributes(span, kwargs, response_obj)
        span.end(end_time=self._to_ns(end_time))
        return span

    def _maybe_log_raw_request(
        self, kwargs, response_obj, start_time, end_time, parent_span
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode

        # raw_gen_ai_request is non-standard in semconv mode.
        if self._gen_ai_semconv_latest_experimental:
            return

        if not self._capture_in_span():
            return

        litellm_params = kwargs.get("litellm_params", {})
        metadata = litellm_params.get("metadata") or {}
        generation_name = metadata.get("generation_name")

        raw_span_name = generation_name if generation_name else RAW_REQUEST_SPAN_NAME

        otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs)
        raw_span = otel_tracer.start_span(
            name=raw_span_name,
            start_time=self._to_ns(start_time),
            context=trace.set_span_in_context(parent_span),
        )
        raw_span.set_status(Status(StatusCode.OK))
        self.set_raw_request_attributes(raw_span, kwargs, response_obj)
        self._set_team_attributes_from_kwargs(raw_span, kwargs)
        raw_span.end(end_time=self._to_ns(end_time))

    def _set_team_attributes_on_span(
        self,
        span: Span,
        team_id: Optional[str],
        team_alias: Optional[str],
    ) -> None:
        """Stamp team_id / team_alias onto a span so every child span of a
        litellm_request trace carries them, not just the root span.

        Empty strings are treated as absent: a request made with the master
        key or a team-less virtual key carries ``user_api_key_team_id=""``
        in ``standard_logging_object.metadata``; propagating that to every
        span only adds noise that makes traces look mis-instrumented.
        """
        if team_id:
            self.safe_set_attribute(
                span=span,
                key="metadata.user_api_key_team_id",
                value=team_id,
            )
        if team_alias:
            self.safe_set_attribute(
                span=span,
                key="metadata.user_api_key_team_alias",
                value=team_alias,
            )

    def _set_team_attributes_from_kwargs(self, span: Span, kwargs: dict) -> None:
        """Pull team_id / team_alias from the standard logging metadata in kwargs and stamp them onto span."""
        std_log = kwargs.get("standard_logging_object")
        md: dict = {}
        if isinstance(std_log, dict):
            md = std_log.get("metadata") or {}
        elif std_log is not None:
            md = getattr(std_log, "metadata", None) or {}
        self._set_team_attributes_on_span(
            span=span,
            team_id=md.get("user_api_key_team_id"),
            team_alias=md.get("user_api_key_team_alias"),
        )

    def _set_team_attributes_on_proxy_span_from_kwargs(self, kwargs: dict) -> None:
        """Stamp team attributes onto the proxy SERVER (root) span so the
        trace root is filterable by team, not just its children. The root
        span is created in auth before the team is resolved and is
        otherwise only closed (never re-attributed) on the success path.

        Guarded to the LiteLLM-created proxy span (by name + recording) so
        externally provided parent spans are never mutated.
        """
        litellm_params = kwargs.get("litellm_params") or {}
        metadata = litellm_params.get("metadata") or {}
        proxy_span = metadata.get("litellm_parent_otel_span")
        if (
            proxy_span is not None
            and getattr(proxy_span, "name", None) == LITELLM_PROXY_REQUEST_SPAN_NAME
            and hasattr(proxy_span, "is_recording")
            and proxy_span.is_recording()
        ):
            self._set_team_attributes_from_kwargs(proxy_span, kwargs)

    def _set_inference_identity_attributes(
        self,
        span: Span,
        standard_logging_payload: StandardLoggingPayload,
        litellm_params: dict,
    ) -> None:
        """Stamp request-identity attributes onto an inference span so every
        LLM-call span is filterable by the route it came in on, the team's
        metadata, and both the user-facing (model_group alias) and the
        dispatched (provider) model names. Empty/absent values are skipped.
        """
        metadata = standard_logging_payload.get("metadata") or {}

        http_route = metadata.get("user_api_key_request_route")
        if http_route:
            self.safe_set_attribute(
                span=span, key=HTTP_ROUTE_ATTRIBUTE, value=http_route
            )

        # ``user_api_key_team_metadata`` is dropped from the standard logging
        # payload metadata, so read it from the raw request metadata in kwargs.
        # ``metadata`` and ``litellm_metadata`` are alternate names for the same
        # full metadata dict (the name varies by endpoint), so first-truthy wins.
        raw_metadata = (
            litellm_params.get("metadata")
            or litellm_params.get("litellm_metadata")
            or {}
        )
        team_metadata = self._team_metadata_json(
            raw_metadata.get("user_api_key_team_metadata"),
            self.config.baggage_team_metadata_keys,
        )
        if team_metadata:
            self.safe_set_attribute(
                span=span, key=TEAM_METADATA_ATTRIBUTE, value=team_metadata
            )

        model_group = standard_logging_payload.get("model_group")
        if model_group:
            self.safe_set_attribute(
                span=span, key=MODEL_GROUP_ATTRIBUTE, value=model_group
            )

        hidden_params = standard_logging_payload.get("hidden_params") or {}
        provider_model = hidden_params.get(
            "litellm_model_name"
        ) or standard_logging_payload.get("model")
        if provider_model:
            self.safe_set_attribute(
                span=span, key=PROVIDER_MODEL_ATTRIBUTE, value=provider_model
            )

    @staticmethod
    def _team_metadata_json(value: Any, allowed_keys: List[str]) -> Optional[str]:
        """JSON-serialize only the allowlisted sub-keys of a team's metadata.

        Returns ``None`` when nothing is allowlisted or no allowlisted key is
        present, so the empty case is dropped rather than stamping a useless
        ``"{}"`` (and so a team's metadata never leaves the process until an
        operator opts each sub-key in via ``baggage_team_metadata_keys``).
        """
        if not isinstance(value, dict) or not value or not allowed_keys:
            return None
        filtered = {key: value[key] for key in allowed_keys if key in value}
        if not filtered:
            return None
        return safe_dumps(filtered)

    def _ensure_metric_attribute_filter(self) -> None:
        """Resolve the include/exclude filter once, falling back to the proxy's
        callback_settings.otel.attributes when no explicit config was passed."""
        if self._metric_attr_filter_resolved:
            return
        attributes = self.config.attributes
        if attributes is None and self.callback_name in (None, "otel"):
            otel_settings = (litellm.callback_settings or {}).get("otel") or {}
            raw = (
                otel_settings.get("attributes")
                if isinstance(otel_settings, dict)
                else None
            )
            if raw is not None:
                attributes = _build_metric_attribute_filter(raw)
        (
            self._metric_attr_include,
            self._metric_attr_exclude,
        ) = _resolve_metric_attribute_filter(attributes)
        self._metric_attr_filter_resolved = True

    def _filter_metric_attributes(self, attrs: Dict[str, Any]) -> Dict[str, Any]:
        if not self._metric_attr_filter_resolved:
            self._ensure_metric_attribute_filter()
        if self._metric_attr_include is not None:
            return {k: v for k, v in attrs.items() if k in self._metric_attr_include}
        if self._metric_attr_exclude is not None:
            return {
                k: v for k, v in attrs.items() if k not in self._metric_attr_exclude
            }
        return attrs

    def _record_metrics(self, kwargs, response_obj, start_time, end_time):
        duration_s = (end_time - start_time).total_seconds()
        params = kwargs.get("litellm_params") or {}
        provider = params.get("custom_llm_provider", "Unknown")

        common_attrs = {
            "gen_ai.operation.name": (
                self._gen_ai_operation_name(kwargs)
                if self._gen_ai_semconv_latest_experimental
                else "chat"
            ),
            "gen_ai.system": provider,
            "gen_ai.request.model": kwargs.get("model"),
            "gen_ai.framework": "litellm",
        }

        std_log = kwargs.get("standard_logging_object")
        md = getattr(std_log, "metadata", None) or (std_log or {}).get("metadata", {})
        for key in METRIC_METADATA_KEYS:
            value = md.get(key)
            if value is None:
                continue
            if isinstance(value, (dict, list)):
                common_attrs[f"metadata.{key}"] = safe_dumps(value)
            else:
                common_attrs[f"metadata.{key}"] = str(value)

        # get hidden params
        hidden_params = getattr(std_log, "hidden_params", None) or (std_log or {}).get(
            "hidden_params", {}
        )
        if hidden_params:
            common_attrs["hidden_params"] = safe_dumps(hidden_params)

        common_attrs = self._filter_metric_attributes(common_attrs)

        if self._operation_duration_histogram:
            self._operation_duration_histogram.record(
                duration_s, attributes=common_attrs
            )
            if (
                response_obj
                and (usage := response_obj.get("usage"))
                and self._token_usage_histogram
            ):
                in_attrs = {**common_attrs, TOKEN_TYPE_ATTRIBUTE: "input"}
                out_attrs = {**common_attrs, TOKEN_TYPE_ATTRIBUTE: "output"}
                self._token_usage_histogram.record(
                    usage.get("prompt_tokens", 0), attributes=in_attrs
                )
                self._token_usage_histogram.record(
                    usage.get("completion_tokens", 0), attributes=out_attrs
                )

        cost = kwargs.get("response_cost")
        if self._cost_histogram and cost:
            self._cost_histogram.record(cost, attributes=common_attrs)

        # Record latency metrics (TTFT, TPOT, and Total Generation Time)
        self._record_time_to_first_token_metric(kwargs, common_attrs)
        self._record_time_per_output_token_metric(
            kwargs, response_obj, end_time, duration_s, common_attrs
        )
        self._record_response_duration_metric(kwargs, end_time, common_attrs)

    @staticmethod
    def _to_timestamp(
        val: Optional[Union[datetime, float, str]],
    ) -> Optional[float]:
        """Convert datetime/float/string to timestamp."""
        if val is None:
            return None
        if isinstance(val, datetime):
            return val.timestamp()
        if isinstance(val, (int, float)):
            return float(val)
        # isinstance(val, str) - parse datetime string (with or without microseconds)
        try:
            return datetime.strptime(val, "%Y-%m-%d %H:%M:%S.%f").timestamp()
        except ValueError:
            try:
                return datetime.strptime(val, "%Y-%m-%d %H:%M:%S").timestamp()
            except ValueError:
                return None

    def _record_time_to_first_token_metric(self, kwargs: dict, common_attrs: dict):
        """Record Time to First Token (TTFT) metric for streaming requests."""
        optional_params = kwargs.get("optional_params", {})
        is_streaming = optional_params.get("stream", False)

        if not (self._time_to_first_token_histogram and is_streaming):
            return

        # Use api_call_start_time for precision (matches Prometheus implementation)
        # This excludes LiteLLM overhead and measures pure LLM API latency
        api_call_start_time = kwargs.get("api_call_start_time", None)
        completion_start_time = kwargs.get("completion_start_time", None)

        if api_call_start_time is not None and completion_start_time is not None:
            # Convert to timestamps if needed (handles datetime, float, and string)
            api_call_start_ts = self._to_timestamp(api_call_start_time)
            completion_start_ts = self._to_timestamp(completion_start_time)

            if api_call_start_ts is None or completion_start_ts is None:
                return  # Skip recording if conversion failed

            time_to_first_token_seconds = completion_start_ts - api_call_start_ts
            self._time_to_first_token_histogram.record(
                time_to_first_token_seconds, attributes=common_attrs
            )

    def _record_time_per_output_token_metric(
        self,
        kwargs: dict,
        response_obj: Optional[Any],
        end_time: datetime,
        duration_s: float,
        common_attrs: dict,
    ):
        """Record Time Per Output Token (TPOT) metric.

        Calculated as: generation_time / completion_tokens
        - For streaming: uses end_time - completion_start_time (time to generate all tokens after first)
        - For non-streaming: uses end_time - api_call_start_time (total generation time)
        """
        if not self._time_per_output_token_histogram:
            return

        # Get completion tokens from response_obj
        completion_tokens = None
        if response_obj and (usage := response_obj.get("usage")):
            completion_tokens = usage.get("completion_tokens")

        if completion_tokens is None or completion_tokens <= 0:
            return

        # Calculate generation time
        completion_start_time = kwargs.get("completion_start_time", None)
        api_call_start_time = kwargs.get("api_call_start_time", None)

        # Convert end_time to timestamp (handles datetime, float, and string)
        end_time_ts = self._to_timestamp(end_time)
        if end_time_ts is None:
            # Fallback to duration_s if conversion failed
            generation_time_seconds = duration_s
            if generation_time_seconds > 0:
                time_per_output_token_seconds = (
                    generation_time_seconds / completion_tokens
                )
                self._time_per_output_token_histogram.record(
                    time_per_output_token_seconds, attributes=common_attrs
                )
            return

        if completion_start_time is not None:
            # Streaming: use completion_start_time (when first token arrived)
            # This measures time to generate all tokens after the first one
            completion_start_ts = self._to_timestamp(completion_start_time)
            if completion_start_ts is None:
                # Fallback to duration_s if conversion failed
                generation_time_seconds = duration_s
            else:
                generation_time_seconds = end_time_ts - completion_start_ts
        elif api_call_start_time is not None:
            # Non-streaming: use api_call_start_time (total generation time)
            api_call_start_ts = self._to_timestamp(api_call_start_time)
            if api_call_start_ts is None:
                # Fallback to duration_s if conversion failed
                generation_time_seconds = duration_s
            else:
                generation_time_seconds = end_time_ts - api_call_start_ts
        else:
            # Fallback: use duration_s (already calculated as (end_time - start_time).total_seconds())
            generation_time_seconds = duration_s

        if generation_time_seconds > 0:
            time_per_output_token_seconds = generation_time_seconds / completion_tokens
            self._time_per_output_token_histogram.record(
                time_per_output_token_seconds, attributes=common_attrs
            )

    def _record_response_duration_metric(
        self,
        kwargs: dict,
        end_time: Union[datetime, float],
        common_attrs: dict,
    ):
        """Record Total Generation Time (response duration) metric.

        Measures pure LLM API generation time: end_time - api_call_start_time
        This excludes LiteLLM overhead and measures only the LLM provider's response time.
        Works for both streaming and non-streaming requests.

        Mirrors Prometheus's litellm_llm_api_latency_metric.
        Uses kwargs.get("end_time") with fallback to parameter for consistency with Prometheus.
        """
        if not self._response_duration_histogram:
            return

        api_call_start_time = kwargs.get("api_call_start_time", None)
        if api_call_start_time is None:
            return

        # Use end_time from kwargs if available (matches Prometheus), otherwise use parameter
        # For streaming: end_time is when the stream completes (final chunk received)
        # For non-streaming: end_time is when the response is received
        _end_time = kwargs.get("end_time") or end_time
        if _end_time is None:
            _end_time = datetime.now()

        # Convert to timestamps if needed (handles datetime, float, and string)
        api_call_start_ts = self._to_timestamp(api_call_start_time)
        end_time_ts = self._to_timestamp(_end_time)

        if api_call_start_ts is None or end_time_ts is None:
            return  # Skip recording if conversion failed

        response_duration_seconds = end_time_ts - api_call_start_ts

        if response_duration_seconds > 0:
            self._response_duration_histogram.record(
                response_duration_seconds, attributes=common_attrs
            )

    @staticmethod
    def _otel_log_types():
        """Resolve ``(LogRecord, SeverityNumber)`` across OTEL SDK versions.

        ``LogRecord`` moved out of ``opentelemetry.sdk._logs`` in OTEL >= 1.39.0
        (open-telemetry/opentelemetry-python#4676). Imports stay function-local
        because the SDK is an optional dependency.
        """
        from opentelemetry._logs import SeverityNumber

        try:
            from opentelemetry.sdk._logs import LogRecord  # OTEL < 1.39.0
        except ImportError:
            from opentelemetry.sdk._logs._internal import (  # OTEL >= 1.39.0
                LogRecord,
            )
        return LogRecord, SeverityNumber

    def _emit_semantic_logs(self, kwargs, response_obj, span: Span):
        if not self.config.enable_events:
            return

        # NOTE: Semantic logs (gen_ai.content.prompt/completion events) have compatibility issues
        # with OTEL SDK >= 1.39.0 due to breaking changes in PR #4676:
        # - LogRecord moved from opentelemetry.sdk._logs to opentelemetry.sdk._logs._internal
        # - LogRecord constructor no longer accepts 'resource' parameter (now inherited from LoggerProvider)
        # - LogData class was removed entirely
        # These logs work correctly in OTEL SDK < 1.39.0 but may fail in >= 1.39.0.
        # See: https://github.com/open-telemetry/opentelemetry-python/pull/4676
        # TODO: Refactor to use the proper OTEL Logs API instead of directly creating SDK LogRecords

        SdkLogRecord, SeverityNumber = self._otel_log_types()

        # Resolve through the handler's own LoggerProvider (which may be a
        # private one when skip_set_global=True) rather than the module-level
        # get_logger() which always goes through the global provider.
        otel_logger = self._logger_provider.get_logger(LITELLM_LOGGER_NAME)

        parent_ctx = span.get_span_context()
        provider = (kwargs.get("litellm_params") or {}).get(
            "custom_llm_provider", "Unknown"
        )

        if self._gen_ai_semconv_latest_experimental:
            self._emit_inference_details_event(
                kwargs=kwargs,
                response_obj=response_obj,
                provider=provider,
                otel_logger=otel_logger,
                parent_ctx=parent_ctx,
            )
            return

        # per-message events
        for msg in kwargs.get("messages", []):
            role = msg.get("role", "user")
            attrs = {
                "event_name": "gen_ai.content.prompt",
                "gen_ai.system": provider,
            }
            if role == "tool" and msg.get("id"):
                attrs["id"] = msg["id"]
            capture_event_content = self._capture_in_event()
            if capture_event_content and msg.get("content"):
                attrs["gen_ai.prompt"] = msg["content"]

            body = msg.copy()
            if not capture_event_content:
                body.pop("content", None)

            log_record = SdkLogRecord(
                timestamp=self._to_ns(datetime.now()),
                trace_id=parent_ctx.trace_id,
                span_id=parent_ctx.span_id,
                trace_flags=parent_ctx.trace_flags,
                severity_number=SeverityNumber.INFO,
                severity_text="INFO",
                body=body,
                attributes=attrs,
            )
            otel_logger.emit(log_record)

        # per-choice events
        for idx, choice in enumerate(response_obj.get("choices", [])):
            attrs = {
                "event_name": "gen_ai.content.completion",
                "gen_ai.system": provider,
                "index": idx,
                "finish_reason": choice.get("finish_reason"),
            }
            body_msg = choice.get("message", {})
            capture_event_content = self._capture_in_event()
            if capture_event_content and body_msg.get("content"):
                attrs["message.content"] = body_msg["content"]
            body = {
                "index": idx,
                "finish_reason": choice.get("finish_reason"),
                "message": {"role": body_msg.get("role", "assistant")},
            }
            if capture_event_content and body_msg.get("content"):
                body["message"]["content"] = body_msg["content"]

            log_record = SdkLogRecord(
                timestamp=self._to_ns(datetime.now()),
                trace_id=parent_ctx.trace_id,
                span_id=parent_ctx.span_id,
                trace_flags=parent_ctx.trace_flags,
                severity_number=SeverityNumber.INFO,
                severity_text="INFO",
                body=body,
                attributes=attrs,
            )
            otel_logger.emit(log_record)

    @staticmethod
    def _resolve_guardrail_context(
        span: Optional[Any],
        parent_span: Optional[Any],
        fallback_ctx: Optional[Any],
    ) -> Optional[Any]:
        """
        Return a valid OTEL context for guardrail child spans so they are
        never orphaned (Issue #5).  Priority:
          1. The litellm_request span that was just created
          2. The parent proxy-request span
          3. The original fallback context (may be None — last resort)
        """
        from opentelemetry import trace as _trace

        if span is not None:
            return _trace.set_span_in_context(span)
        if parent_span is not None:
            return _trace.set_span_in_context(parent_span)
        return fallback_ctx

    def _create_guardrail_span(
        self, kwargs: Optional[dict], context: Optional[Context]
    ):
        """
        Creates a span for Guardrail, if any guardrail information is present in standard_logging_object
        """
        # Create span for guardrail information
        kwargs = kwargs or {}
        standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
            "standard_logging_object"
        )
        if standard_logging_payload is None:
            return

        guardrail_information_data = standard_logging_payload.get(
            "guardrail_information"
        )

        if not guardrail_information_data:
            return

        guardrail_information_list = [
            information
            for information in guardrail_information_data
            if isinstance(information, dict)
        ]

        if not guardrail_information_list:
            return

        otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs)
        for guardrail_information in guardrail_information_list:
            start_time_float = guardrail_information.get("start_time")
            end_time_float = guardrail_information.get("end_time")

            # ``_create_guardrail_span`` is called from three lifecycle
            # points (``async_post_call_success_hook``, ``_handle_success``,
            # ``_handle_failure``) and re-reads the (mutating) entry list
            # each time. Dedupe at entry granularity so a single real
            # guardrail invocation produces exactly one span per handler.
            if not self._emit_once(
                kwargs,
                "guardrail",
                guardrail_information.get("guardrail_name"),
                start_time_float,
                guardrail_information.get("guardrail_mode"),
            ):
                continue

            start_time_datetime = datetime.now()
            if start_time_float is not None:
                start_time_datetime = datetime.fromtimestamp(start_time_float)
            end_time_datetime = datetime.now()
            if end_time_float is not None:
                end_time_datetime = datetime.fromtimestamp(end_time_float)

            guardrail_span = otel_tracer.start_span(
                name="guardrail",
                start_time=self._to_ns(start_time_datetime),
                context=context,
            )

            self.safe_set_attribute(
                span=guardrail_span,
                key=SpanAttributes.OPENINFERENCE_SPAN_KIND,
                value=OpenInferenceSpanKindValues.GUARDRAIL.value,
            )

            self.safe_set_attribute(
                span=guardrail_span,
                key="guardrail_name",
                value=guardrail_information.get("guardrail_name"),
            )

            self.safe_set_attribute(
                span=guardrail_span,
                key="guardrail_mode",
                value=guardrail_information.get("guardrail_mode"),
            )

            masked_entity_count = guardrail_information.get("masked_entity_count")
            if masked_entity_count is not None:
                guardrail_span.set_attribute(
                    "masked_entity_count", safe_dumps(masked_entity_count)
                )

            guardrail_response = guardrail_information.get("guardrail_response")
            if guardrail_response is not None:
                guardrail_span.set_attribute(
                    "guardrail_response", safe_dumps(guardrail_response)
                )

            # Surface guardrail_status (success / guardrail_intervened /
            # guardrail_failed_to_respond / not_run) as a top-level span
            # attribute so trace backends can filter on it without parsing
            # guardrail_response.
            self.safe_set_attribute(
                span=guardrail_span,
                key="guardrail_status",
                value=guardrail_information.get("guardrail_status"),
            )

            # Provider's raw top-level action (e.g. Bedrock's
            # ``GUARDRAIL_INTERVENED`` / ``NONE``). Populated by the provider
            # hook onto StandardLoggingGuardrailInformation so this integration
            # stays provider-agnostic — we only read a normalised string.
            guardrail_action = guardrail_information.get("guardrail_action")
            if guardrail_action:
                guardrail_span.set_attribute("guardrail_action", guardrail_action)

            # The provider hook (e.g. Bedrock) extracts violation_categories
            # from the raw response BEFORE redaction and stamps them onto
            # StandardLoggingGuardrailInformation. Surfacing them here as a
            # queryable attribute lets dashboards group by violation category
            # without parsing the redacted guardrail_response blob.
            violation_categories = guardrail_information.get("violation_categories")
            if violation_categories:
                # OTel sequence attributes must be homogeneous primitives;
                # serialise to JSON once so set_attribute never coerces.
                guardrail_span.set_attribute(
                    "guardrail_violation_categories", safe_dumps(violation_categories)
                )

            self._set_team_attributes_from_kwargs(guardrail_span, kwargs)

            guardrail_span.end(end_time=self._to_ns(end_time_datetime))

    def _handle_failure(self, kwargs, response_obj, start_time, end_time):
        from opentelemetry.trace import Status, StatusCode

        verbose_logger.debug(
            "OpenTelemetry Logger: Failure HandlerLogging kwargs: %s, OTEL config settings=%s",
            kwargs,
            self.config,
        )

        # sync + async failure handlers can both fire for one
        # request (notably in streaming code paths), producing two
        # semantically identical ERROR spans. Unlike the success path, the
        # proxy span is intentionally left open here so that
        # ``async_post_call_failure_hook`` can append the
        # "Failed Proxy Server Request" child span before closing it —
        # there is no proxy-span side-effect to preserve on the skip path.
        if not self._emit_once(kwargs, "failure"):
            verbose_logger.debug(
                "OpenTelemetry: skipping duplicate failure span for handler=%s",
                self.__class__.__name__,
            )
            return

        _parent_context, parent_otel_span = self._get_span_context(kwargs)

        if self.config.ignore_context_propagation:
            parent_otel_span = None  # Ignore parent spans from other providers
            _parent_context = None

        # Decide whether to create a primary span
        # Always create if no parent span exists (backward compatibility)
        # OR if USE_OTEL_LITELLM_REQUEST_SPAN is explicitly enabled
        should_create_primary_span = parent_otel_span is None or get_secret_bool(
            "USE_OTEL_LITELLM_REQUEST_SPAN"
        )

        span = None
        if should_create_primary_span:
            # Span 1: Request sent to litellm SDK
            otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs)
            span_kwargs: Dict[str, Any] = {
                "name": self._get_span_name(kwargs),
                "start_time": self._to_ns(start_time),
                "context": _parent_context,
            }
            if self._gen_ai_semconv_latest_experimental:
                span_kwargs["kind"] = self.span_kind.CLIENT
            span = otel_tracer.start_span(**span_kwargs)
            span.set_status(Status(StatusCode.ERROR))
            self.set_attributes(span, kwargs, response_obj)

            # Record exception information using OTEL standard method
            self._record_exception_on_span(span=span, kwargs=kwargs)

            span.end(end_time=self._to_ns(end_time))
        else:
            # When parent span exists and USE_OTEL_LITELLM_REQUEST_SPAN=false,
            # record error on parent span (keeps hierarchy shallow)
            # Only set attributes if the span is still recording (not closed)
            # Note: parent_otel_span is guaranteed to be not None here
            if parent_otel_span.is_recording():
                parent_otel_span.set_status(Status(StatusCode.ERROR))
                self.set_attributes(parent_otel_span, kwargs, response_obj)
                self._record_exception_on_span(span=parent_otel_span, kwargs=kwargs)

        # Create span for guardrail information — ensure proper parenting (Issue #5)
        guardrail_ctx = self._resolve_guardrail_context(
            span=span, parent_span=parent_otel_span, fallback_ctx=_parent_context
        )
        self._create_guardrail_span(kwargs=kwargs, context=guardrail_ctx)

        # Do NOT end parent span - it should be managed by its creator
        # External spans (from Langfuse, user code, HTTP headers, global context) must not be closed by LiteLLM
        # However, proxy-created spans should be closed here
        if (
            parent_otel_span is not None
            and hasattr(parent_otel_span, "name")
            and parent_otel_span.name == LITELLM_PROXY_REQUEST_SPAN_NAME
        ):
            parent_otel_span.end(end_time=self._to_ns(end_time))

    def _record_exception_on_span(self, span: Span, kwargs: dict):
        """
        Record exception information on the span using OTEL standard methods.

        This extracts error information from StandardLoggingPayload and:
        1. Uses span.record_exception() for the actual exception object (OTEL standard)
        2. Sets structured error attributes from StandardLoggingPayloadErrorInformation
        """
        try:
            from litellm.integrations._types.open_inference import (
                ErrorAttributes,
            )

            # Get the exception object if available
            exception = kwargs.get("exception")

            # Record the exception using OTEL's standard method
            if exception is not None:
                span.record_exception(exception)

            # Get StandardLoggingPayload for structured error information
            standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
                "standard_logging_object"
            )

            if standard_logging_payload is None:
                return

            # Extract error_information from StandardLoggingPayload
            error_information = standard_logging_payload.get("error_information")

            if error_information is None:
                # Fallback to error_str if error_information is not available
                error_str = standard_logging_payload.get("error_str")
                if error_str:
                    self.safe_set_attribute(
                        span=span,
                        key=ErrorAttributes.ERROR_MESSAGE,
                        value=error_str,
                    )
                return

            # Set structured error attributes from StandardLoggingPayloadErrorInformation
            if error_information.get("error_code"):
                self.safe_set_attribute(
                    span=span,
                    key=ErrorAttributes.ERROR_CODE,
                    value=error_information["error_code"],
                )

                # Also expose under the OTel-standard name as an int
                # (error_code is a str, may be non-numeric).
                _error_code_val = error_information["error_code"]
                if _error_code_val is not None:
                    try:
                        self.safe_set_attribute(
                            span=span,
                            key=HTTP_RESPONSE_STATUS_CODE_ATTRIBUTE,
                            value=int(_error_code_val),
                        )
                    except (ValueError, TypeError):
                        pass

            if error_information.get("error_class"):
                self.safe_set_attribute(
                    span=span,
                    key=ErrorAttributes.ERROR_TYPE,
                    value=error_information["error_class"],
                )

            if error_information.get("error_message"):
                self.safe_set_attribute(
                    span=span,
                    key=ErrorAttributes.ERROR_MESSAGE,
                    value=error_information["error_message"],
                )

            if error_information.get("llm_provider"):
                self.safe_set_attribute(
                    span=span,
                    key=ErrorAttributes.ERROR_LLM_PROVIDER,
                    value=error_information["llm_provider"],
                )

            if error_information.get("traceback"):
                self.safe_set_attribute(
                    span=span,
                    key=ErrorAttributes.ERROR_STACK_TRACE,
                    value=error_information["traceback"],
                )

        except Exception as e:
            verbose_logger.exception(
                "OpenTelemetry: Error recording exception on span: %s", str(e)
            )

    def set_tools_attributes(self, span: Span, tools):
        import json

        from litellm.proxy._types import SpanAttributes

        if not tools:
            return

        try:
            for i, tool in enumerate(tools):
                function = tool.get("function")
                if not function:
                    continue

                prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS.value}.{i}"
                self.safe_set_attribute(
                    span=span,
                    key=f"{prefix}.name",
                    value=function.get("name"),
                )
                self.safe_set_attribute(
                    span=span,
                    key=f"{prefix}.description",
                    value=function.get("description"),
                )
                self.safe_set_attribute(
                    span=span,
                    key=f"{prefix}.parameters",
                    value=json.dumps(function.get("parameters")),
                )
        except Exception as e:
            verbose_logger.error(
                "OpenTelemetry: Error setting tools attributes: %s", str(e)
            )
            pass

    def cast_as_primitive_value_type(self, value) -> Union[str, bool, int, float]:
        """
        Casts the value to a primitive OTEL type if it is not already a primitive type.

        OTEL supports - str, bool, int, float

        If it's not a primitive type, then it's converted to a string
        """
        if value is None:
            return ""
        if isinstance(value, (str, bool, int, float)):
            return value
        try:
            return str(value)
        except Exception:
            return ""

    @staticmethod
    def _tool_calls_kv_pair(
        tool_calls: List[ChatCompletionMessageToolCall],
    ) -> Dict[str, Any]:
        from litellm.proxy._types import SpanAttributes

        kv_pairs: Dict[str, Any] = {}
        for idx, tool_call in enumerate(tool_calls):
            _function = tool_call.get("function")
            if not _function:
                continue

            keys = Function.__annotations__.keys()
            for key in keys:
                _value = _function.get(key)
                if _value:
                    kv_pairs[
                        f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.function_call.{key}"
                    ] = _value

        return kv_pairs

    def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]):
        try:
            if self.callback_name == "langtrace":
                from litellm.integrations.langtrace import LangtraceAttributes

                LangtraceAttributes().set_langtrace_attributes(
                    span, kwargs, response_obj
                )
                return
            elif self.callback_name == "langfuse_otel":
                from litellm.integrations.langfuse.langfuse_otel import (
                    LangfuseOtelLogger,
                )

                LangfuseOtelLogger.set_langfuse_otel_attributes(
                    span, kwargs, response_obj
                )
                return
            elif self.callback_name == "weave_otel":
                from litellm.integrations.weave.weave_otel import (
                    set_weave_otel_attributes,
                )

                set_weave_otel_attributes(span, kwargs, response_obj)
                return
            from litellm.proxy._types import SpanAttributes

            optional_params = kwargs.get("optional_params", {})
            litellm_params = kwargs.get("litellm_params", {}) or {}
            standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
                "standard_logging_object"
            )
            if standard_logging_payload is None:
                raise ValueError("standard_logging_object not found in kwargs")

            # https://github.com/open-telemetry/semantic-conventions/blob/main/model/registry/gen-ai.yaml
            # Following Conventions here: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
            #############################################
            ############ LLM CALL METADATA ##############
            #############################################
            metadata = standard_logging_payload["metadata"]
            for key, value in metadata.items():
                self.safe_set_attribute(
                    span=span, key="metadata.{}".format(key), value=value
                )

            # get hidden params
            hidden_params = getattr(
                standard_logging_payload, "hidden_params", None
            ) or (standard_logging_payload or {}).get("hidden_params", {})
            if hidden_params:
                self.safe_set_attribute(
                    span=span,
                    key="hidden_params",
                    value=safe_dumps(hidden_params),
                )

            self._set_inference_identity_attributes(
                span=span,
                standard_logging_payload=standard_logging_payload,
                litellm_params=litellm_params,
            )
            # Cost breakdown tracking
            cost_breakdown: Optional[CostBreakdown] = standard_logging_payload.get(
                "cost_breakdown"
            )
            if cost_breakdown:
                for key, value in cost_breakdown.items():
                    if value is not None:
                        self.safe_set_attribute(
                            span=span,
                            key=f"gen_ai.cost.{key}",
                            value=value,
                        )
            #############################################
            ########## LLM Request Attributes ###########
            #############################################

            # The name of the LLM a request is being made to
            if kwargs.get("model"):
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_REQUEST_MODEL.value,
                    value=kwargs.get("model"),
                )

            # The LLM request type
            self.safe_set_attribute(
                span=span,
                key=SpanAttributes.LLM_REQUEST_TYPE.value,
                value=standard_logging_payload["call_type"],
            )

            # The Generative AI Provider: Azure, OpenAI, etc.
            provider_name = litellm_params.get("custom_llm_provider", "Unknown")
            # Latest-experimental semconv replaced gen_ai.system with
            # gen_ai.provider.name; emit only the conformant key in that mode.
            if self._gen_ai_semconv_latest_experimental:
                self.safe_set_attribute(
                    span=span,
                    key="gen_ai.provider.name",
                    value=provider_name,
                )
            else:
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_SYSTEM.value,
                    value=provider_name,
                )

            # The maximum number of tokens the LLM generates for a request.
            if optional_params.get("max_tokens"):
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_REQUEST_MAX_TOKENS.value,
                    value=optional_params.get("max_tokens"),
                )

            # The temperature setting for the LLM request.
            if optional_params.get("temperature"):
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_REQUEST_TEMPERATURE.value,
                    value=optional_params.get("temperature"),
                )

            # The top_p sampling setting for the LLM request.
            if optional_params.get("top_p"):
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_REQUEST_TOP_P.value,
                    value=optional_params.get("top_p"),
                )

            if self._gen_ai_semconv_latest_experimental:
                # Semconv emits gen_ai.request.stream (only when streaming) via
                # _set_semconv_request_attributes; skip the legacy llm.is_streaming.
                self._set_semconv_request_attributes(span, optional_params)
                self._set_semconv_cache_token_attributes(span, standard_logging_payload)
            else:
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_IS_STREAMING.value,
                    value=str(optional_params.get("stream", False)),
                )

            if optional_params.get("user"):
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_USER.value,
                    value=optional_params.get("user"),
                )

            # The unique identifier for the LLM call.
            # Completions have a provider response ID (e.g. "chatcmpl-xxx"),
            # but Embeddings and Image-gen responses do not.  Fall back to
            # the litellm call ID so every call type can be correlated
            # across LiteLLM UI, Phoenix traces, and provider logs (Issue #8).
            response_id = (
                response_obj.get("id") if response_obj else None
            ) or standard_logging_payload.get("id")
            if response_id:
                self.safe_set_attribute(
                    span=span,
                    key="gen_ai.response.id",
                    value=response_id,
                )

            litellm_call_id = standard_logging_payload.get("litellm_call_id")
            if litellm_call_id:
                self.safe_set_attribute(
                    span=span,
                    key="litellm.call_id",
                    value=litellm_call_id,
                )

            # The model used to generate the response.
            if response_obj and response_obj.get("model"):
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_RESPONSE_MODEL.value,
                    value=response_obj.get("model"),
                )

            usage = response_obj and response_obj.get("usage")
            if usage:
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS.value,
                    value=usage.get("total_tokens"),
                )

                # The number of tokens used in the LLM response (completion).
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.GEN_AI_USAGE_OUTPUT_TOKENS.value,
                    value=usage.get("completion_tokens"),
                )

                # The number of tokens used in the LLM prompt.
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.GEN_AI_USAGE_INPUT_TOKENS.value,
                    value=usage.get("prompt_tokens"),
                )

                ########################################################################
            ########## LLM Request Medssages / tools / content Attributes ###########
            #########################################################################

            if not self._capture_in_span():
                return

            if optional_params.get("tools"):
                tools = optional_params["tools"]
                self.set_tools_attributes(span, tools)

            if kwargs.get("messages"):
                transformed_messages = (
                    self._transform_messages_to_otel_semantic_conventions(
                        kwargs.get("messages")
                    )
                )
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.GEN_AI_INPUT_MESSAGES.value,
                    value=safe_dumps(transformed_messages),
                )

            # Coalesce the different kwarg names that carry the system
            # prompt depending on the call path:
            #   - "system_instructions" — Vertex AI Gemini chat-completion
            #   - "instructions"        — OpenAI Responses API
            #   - "system"              — Anthropic Messages API
            # Use `is not None` rather than truthiness to avoid falsy
            # values (e.g. []) falling through to the wrong kwarg.
            system_instructions = (
                kwargs.get("system_instructions")
                if kwargs.get("system_instructions") is not None
                else (
                    kwargs.get("instructions")
                    if kwargs.get("instructions") is not None
                    else kwargs.get("system")
                )
            )
            if system_instructions:
                if isinstance(system_instructions, str):
                    # Plain text system prompt — no transformation needed
                    self.safe_set_attribute(
                        span=span,
                        key=SpanAttributes.GEN_AI_SYSTEM_INSTRUCTIONS.value,
                        value=system_instructions,
                    )
                else:
                    transformed_system_instructions = (
                        self._transform_messages_to_otel_semantic_conventions(
                            system_instructions
                        )
                    )
                    self.safe_set_attribute(
                        span=span,
                        key=SpanAttributes.GEN_AI_SYSTEM_INSTRUCTIONS.value,
                        value=safe_dumps(transformed_system_instructions),
                    )

            if self._gen_ai_semconv_latest_experimental:
                operation_name = self._gen_ai_operation_name(kwargs)
            else:
                operation_name = (
                    "chat"
                    if standard_logging_payload.get("call_type") == "completion"
                    else standard_logging_payload.get("call_type") or "chat"
                )
            self.safe_set_attribute(
                span=span,
                key=SpanAttributes.GEN_AI_OPERATION_NAME.value,
                value=operation_name,
            )

            if standard_logging_payload.get("request_id"):
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.GEN_AI_REQUEST_ID.value,
                    value=standard_logging_payload.get("request_id"),
                )
            #############################################
            ########## LLM Response Attributes ##########
            #############################################
            if response_obj is not None:
                if response_obj.get("choices"):
                    transformed_choices = (
                        self._transform_choices_to_otel_semantic_conventions(
                            response_obj.get("choices")
                        )
                    )
                    self.safe_set_attribute(
                        span=span,
                        key=SpanAttributes.GEN_AI_OUTPUT_MESSAGES.value,
                        value=safe_dumps(transformed_choices),
                    )

                    finish_reasons = []
                    for idx, choice in enumerate(response_obj.get("choices")):
                        if choice.get("finish_reason"):
                            finish_reasons.append(choice.get("finish_reason"))

                    if finish_reasons:
                        self.safe_set_attribute(
                            span=span,
                            key=SpanAttributes.GEN_AI_RESPONSE_FINISH_REASONS.value,
                            value=safe_dumps(finish_reasons),
                        )

                    for idx, choice in enumerate(response_obj.get("choices")):
                        if choice.get("finish_reason"):
                            message = choice.get("message")
                            tool_calls = message.get("tool_calls")
                            if tool_calls:
                                kv_pairs = OpenTelemetry._tool_calls_kv_pair(tool_calls)  # type: ignore
                                for key, value in kv_pairs.items():
                                    self.safe_set_attribute(
                                        span=span,
                                        key=key,
                                        value=value,
                                    )

                elif response_obj.get("output"):
                    # Responses API: ResponsesAPIResponse has an "output"
                    # list instead of "choices".  Each item with
                    # type="message" contains a "content" list of
                    # OutputText objects (type="output_text").
                    output_items = response_obj.get("output")
                    output_messages = self._transform_responses_api_output_to_otel(
                        output_items
                    )
                    if output_messages:
                        self.safe_set_attribute(
                            span=span,
                            key=SpanAttributes.GEN_AI_OUTPUT_MESSAGES.value,
                            value=safe_dumps(output_messages),
                        )

                    # Emit per-tool-call span attributes (parity with
                    # the choices branch that calls _tool_calls_kv_pair).
                    # Convert Responses API function_call items to the
                    # ChatCompletionMessageToolCall format expected by
                    # _tool_calls_kv_pair.
                    tool_calls = []
                    for out_item in output_items:
                        item_d = self._to_dict(out_item)
                        if item_d and item_d.get("type") == "function_call":
                            tool_calls.append(
                                {
                                    "function": {
                                        "name": item_d.get("name", ""),
                                        "arguments": item_d.get("arguments", ""),
                                    }
                                }
                            )
                    if tool_calls:
                        kv_pairs = OpenTelemetry._tool_calls_kv_pair(tool_calls)  # type: ignore
                        for key, value in kv_pairs.items():
                            self.safe_set_attribute(
                                span=span,
                                key=key,
                                value=value,
                            )

                    # Extract finish reason from ResponsesAPIResponse.status
                    status = response_obj.get("status")
                    if status:
                        self.safe_set_attribute(
                            span=span,
                            key=SpanAttributes.GEN_AI_RESPONSE_FINISH_REASONS.value,
                            value=safe_dumps([status]),
                        )

        except Exception as e:
            self.handle_callback_failure(
                callback_name=self.callback_name or "opentelemetry"
            )
            verbose_logger.exception(
                "OpenTelemetry logging error in set_attributes %s", str(e)
            )

    def _cast_as_primitive_value_type(self, value) -> Union[str, bool, int, float]:
        """
        Casts the value to a primitive OTEL type if it is not already a primitive type.

        OTEL supports - str, bool, int, float

        If it's not a primitive type, then it's converted to a string
        """
        if value is None:
            return ""
        if isinstance(value, (str, bool, int, float)):
            return value
        try:
            return str(value)
        except Exception:
            return ""

    def safe_set_attribute(self, span: Span, key: str, value: Any):
        """
        Safely sets an attribute on the span, ensuring the value is a primitive type.
        """
        primitive_value = self._cast_as_primitive_value_type(value)
        span.set_attribute(key, primitive_value)

    def _transform_messages_to_otel_semantic_conventions(
        self, messages: Union[List[dict], str]
    ) -> List[dict]:
        """
        Transforms LiteLLM/OpenAI style messages into OTEL GenAI 1.38 compliant format.
        OTEL expects a 'parts' array instead of a single 'content' string.
        """
        if isinstance(messages, str):
            # Handle system_instructions passed as a string
            return [
                {
                    "role": "system",
                    "parts": [{"type": "text", "content": messages}],
                }
            ]

        transformed = []
        for msg in messages:
            role = msg.get("role", "user")
            content = msg.get("content", "")
            parts = []

            if isinstance(content, str):
                parts.append({"type": "text", "content": content})
            elif isinstance(content, list):
                # Handle multi-modal content if necessary
                for part in content:
                    if isinstance(part, dict):
                        parts.append(part)
                    else:
                        parts.append({"type": "text", "content": str(part)})

            transformed_msg = {"role": role, "parts": parts}
            if "id" in msg:
                transformed_msg["id"] = msg["id"]
            if "tool_calls" in msg:
                transformed_msg["tool_calls"] = msg["tool_calls"]
            if "tool_call_id" in msg:
                transformed_msg["tool_call_id"] = msg["tool_call_id"]
            transformed.append(transformed_msg)

        return transformed

    def _transform_choices_to_otel_semantic_conventions(
        self, choices: List[dict]
    ) -> List[dict]:
        """
        Transforms choices into OTEL GenAI 1.38 compliant format for output.messages.
        """
        transformed = []
        for choice in choices:
            message = choice.get("message") or {}
            finish_reason = choice.get("finish_reason")

            transformed_msg = self._transform_messages_to_otel_semantic_conventions(
                [message]
            )[0]
            if finish_reason:
                transformed_msg["finish_reason"] = finish_reason

            transformed.append(transformed_msg)
        return transformed

    @staticmethod
    def _to_dict(obj) -> Optional[dict]:
        """Normalize an object to a plain dict.

        Handles three forms that appear in practice:

        1. Plain ``dict`` — returned as-is.
        2. LiteLLM's ``BaseLiteLLMOpenAIResponseObject`` — exposes a
           ``.get()`` method that delegates to ``__dict__``.
        3. Raw Pydantic v2 models from the ``openai`` SDK (e.g.
           ``ResponseOutputMessage``, ``ResponseOutputText``) — these do
           **not** have ``.get()`` but do have ``.model_dump()``.

        Returns ``None`` for anything else so callers can skip it.
        """
        if isinstance(obj, dict):
            return obj
        if hasattr(obj, "get"):
            # BaseLiteLLMOpenAIResponseObject duck-type
            return obj  # type: ignore[return-value]
        if hasattr(obj, "model_dump"):
            # Raw Pydantic v2 model (e.g. openai SDK types)
            return obj.model_dump()  # type: ignore[union-attr]
        return None

    def _transform_responses_api_output_to_otel(self, output: List) -> List[dict]:
        """
        Transform Responses API output items into OTEL GenAI 1.38 format.

        The Responses API returns output as a list of items, each with a
        ``type`` field.  Message items (``type="message"``) contain a
        ``content`` list of ``OutputText`` objects with ``type="output_text"``
        and ``text`` fields.

        Items may be plain dicts, LiteLLM wrapper objects (with ``.get()``),
        or raw Pydantic v2 models from the ``openai`` SDK (with
        ``.model_dump()``).  We normalize each item to a dict via
        ``_to_dict`` before processing.

        This method converts them to the same ``{"role": ..., "parts": [...]}``
        format used by ``_transform_choices_to_otel_semantic_conventions``.
        """
        transformed = []
        for raw_item in output:
            item = self._to_dict(raw_item)
            if item is None:
                continue
            if item.get("type") == "message":
                role = item.get("role", "assistant")
                parts = []
                for raw_content in item.get("content", []):
                    content = self._to_dict(raw_content)
                    if content is None:
                        continue
                    if content.get("type") == "output_text":
                        text = content.get("text", "")
                        if text:
                            parts.append({"type": "text", "content": text})
                if parts:
                    transformed.append({"role": role, "parts": parts})
            elif item.get("type") == "function_call":
                # Surface tool calls from Responses API output
                part: dict = {
                    "type": "tool_call",
                    "name": item.get("name", ""),
                    "arguments": item.get("arguments", ""),
                }
                if item.get("call_id"):
                    part["id"] = item["call_id"]
                transformed.append({"role": "assistant", "parts": [part]})
        return transformed

    def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
        try:
            # Only set provider-specific raw payload attributes on this span.
            # The parent litellm_request span already carries the standard
            # gen_ai.* / metadata.* attributes — duplicating them here doubles
            # storage and adds noise (Issue #3).
            litellm_params = kwargs.get("litellm_params", {}) or {}
            custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")

            _raw_response = kwargs.get("original_response")
            _additional_args = kwargs.get("additional_args", {}) or {}
            complete_input_dict = _additional_args.get("complete_input_dict")
            #############################################
            ########## LLM Request Attributes ###########
            #############################################

            # OTEL Attributes for the RAW Request to https://docs.anthropic.com/en/api/messages
            if complete_input_dict and isinstance(complete_input_dict, dict):
                for param, val in complete_input_dict.items():
                    self.safe_set_attribute(
                        span=span,
                        key=f"llm.{custom_llm_provider}.{param}",
                        value=val,
                    )

            #############################################
            ########## LLM Response Attributes ##########
            #############################################
            if _raw_response and isinstance(_raw_response, str):
                # cast sr -> dict
                import json

                try:
                    _raw_response = json.loads(_raw_response)
                    for param, val in _raw_response.items():
                        self.safe_set_attribute(
                            span=span,
                            key=f"llm.{custom_llm_provider}.{param}",
                            value=val,
                        )
                except json.JSONDecodeError:
                    verbose_logger.debug(
                        "litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format(
                            _raw_response
                        )
                    )

                    self.safe_set_attribute(
                        span=span,
                        key=f"llm.{custom_llm_provider}.stringified_raw_response",
                        value=_raw_response,
                    )
        except Exception as e:
            verbose_logger.exception(
                "OpenTelemetry logging error in set_raw_request_attributes %s",
                str(e),
            )

    def _to_ns(self, dt):
        if dt is None:
            return int(datetime.now().timestamp() * 1e9)
        if isinstance(dt, (int, float)):
            return int(dt * 1e9)
        return int(dt.timestamp() * 1e9)

    def _get_span_name(self, kwargs):
        litellm_params = kwargs.get("litellm_params", {})
        metadata = litellm_params.get("metadata") or {}
        generation_name = metadata.get("generation_name")

        if generation_name:
            return generation_name

        if self._gen_ai_semconv_latest_experimental:
            model = kwargs.get("model") or "unknown"
            return f"{self._gen_ai_operation_name(kwargs)} {model}"

        return LITELLM_REQUEST_SPAN_NAME

    def get_traceparent_from_header(self, headers):
        if headers is None:
            return None
        _traceparent = headers.get("traceparent", None)
        if _traceparent is None:
            return None

        from opentelemetry.trace.propagation.tracecontext import (
            TraceContextTextMapPropagator,
        )

        propagator = TraceContextTextMapPropagator()
        carrier = {"traceparent": _traceparent}
        _parent_context = propagator.extract(carrier=carrier)

        return _parent_context

    def _get_span_context(self, kwargs, default_span: Optional[Span] = None):
        from opentelemetry import context, trace
        from opentelemetry.trace.propagation.tracecontext import (
            TraceContextTextMapPropagator,
        )

        litellm_params = kwargs.get("litellm_params", {}) or {}
        proxy_server_request = litellm_params.get("proxy_server_request", {}) or {}
        headers = proxy_server_request.get("headers", {}) or {}
        traceparent = headers.get("traceparent", None)
        _metadata = litellm_params.get("metadata", {}) or {}
        parent_otel_span = _metadata.get("litellm_parent_otel_span", None)

        # Fallback: check litellm_metadata (used by /v1/messages and other
        # LITELLM_METADATA_ROUTES that store proxy-internal metadata
        # separately from the provider's native "metadata" field).
        if parent_otel_span is None:
            _litellm_metadata = litellm_params.get("litellm_metadata", {}) or {}
            parent_otel_span = _litellm_metadata.get("litellm_parent_otel_span", None)

        # Priority 1: Explicit parent span from metadata
        if parent_otel_span is not None:
            verbose_logger.debug(
                "OpenTelemetry: Using explicit parent span from metadata"
            )
            return trace.set_span_in_context(parent_otel_span), None

        # Priority 2: HTTP traceparent header
        if traceparent is not None:
            verbose_logger.debug(
                "OpenTelemetry: Using traceparent header for context propagation"
            )
            carrier = {"traceparent": traceparent}
            return (
                TraceContextTextMapPropagator().extract(carrier=carrier),
                None,
            )

        # Priority 3: Active span from global context (auto-detection)
        try:
            current_span = trace.get_current_span()
            if current_span is not None:
                span_context = current_span.get_span_context()
                if span_context.is_valid:
                    verbose_logger.debug(
                        "OpenTelemetry: Using active span from global context: %s (trace_id=%s, span_id=%s, is_recording=%s)",
                        current_span,
                        format(span_context.trace_id, "032x"),
                        format(span_context.span_id, "016x"),
                        current_span.is_recording(),
                    )
                    return context.get_current(), current_span
        except Exception as e:
            verbose_logger.debug(
                "OpenTelemetry: Error getting current span: %s", str(e)
            )

        # Priority 4: No parent context
        verbose_logger.debug(
            "OpenTelemetry: No parent context found, creating root span"
        )
        return None, None

    def _get_span_processor(self, dynamic_headers: Optional[dict] = None):
        from opentelemetry.sdk.trace.export import (
            BatchSpanProcessor,
            ConsoleSpanExporter,
            SimpleSpanProcessor,
            SpanExporter,
        )

        verbose_logger.debug(
            "OpenTelemetry Logger, initializing span processor \nself.OTEL_EXPORTER: %s\nself.OTEL_ENDPOINT: %s\nself.OTEL_HEADERS: %s",
            self.OTEL_EXPORTER,
            self.OTEL_ENDPOINT,
            self.OTEL_HEADERS,
        )
        _split_otel_headers = OpenTelemetry._get_headers_dictionary(
            headers=dynamic_headers or self.OTEL_HEADERS
        )

        if dynamic_headers:
            verbose_logger.debug(
                "[OTEL DEBUG] Creating span processor with DYNAMIC headers: %s",
                {
                    k: v[:20] + "..." if len(str(v)) > 20 else v
                    for k, v in _split_otel_headers.items()
                },
            )
        else:
            verbose_logger.debug(
                "[OTEL DEBUG] Creating span processor with GLOBAL headers"
            )

        if hasattr(
            self.OTEL_EXPORTER, "export"
        ):  # Check if it has the export method that SpanExporter requires
            verbose_logger.debug(
                "OpenTelemetry: intiializing SpanExporter. Value of OTEL_EXPORTER: %s",
                self.OTEL_EXPORTER,
            )
            return SimpleSpanProcessor(cast(SpanExporter, self.OTEL_EXPORTER))

        if self.OTEL_EXPORTER == "console":
            verbose_logger.debug(
                "OpenTelemetry: intiializing console exporter. Value of OTEL_EXPORTER: %s",
                self.OTEL_EXPORTER,
            )
            return BatchSpanProcessor(ConsoleSpanExporter())
        elif (
            self.OTEL_EXPORTER == "otlp_http"
            or self.OTEL_EXPORTER == "http/protobuf"
            or self.OTEL_EXPORTER == "http/json"
        ):
            try:
                from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
                    OTLPSpanExporter as OTLPSpanExporterHTTP,
                )
            except ImportError as exc:
                raise ImportError(
                    "OpenTelemetry OTLP HTTP exporter is not available. Install "
                    "`opentelemetry-exporter-otlp` to enable OTLP HTTP."
                ) from exc

            verbose_logger.debug(
                "OpenTelemetry: intiializing http exporter. Value of OTEL_EXPORTER: %s",
                self.OTEL_EXPORTER,
            )
            normalized_endpoint = self._normalize_otel_endpoint(
                self.OTEL_ENDPOINT, "traces"
            )
            return BatchSpanProcessor(
                OTLPSpanExporterHTTP(
                    endpoint=normalized_endpoint, headers=_split_otel_headers
                ),
            )
        elif self.OTEL_EXPORTER == "otlp_grpc" or self.OTEL_EXPORTER == "grpc":
            try:
                from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
                    OTLPSpanExporter as OTLPSpanExporterGRPC,
                )
            except ImportError as exc:
                raise ImportError(
                    "OpenTelemetry OTLP gRPC exporter is not available. Install "
                    "`opentelemetry-exporter-otlp` and `grpcio` (or `litellm[grpc]`)."
                ) from exc

            verbose_logger.debug(
                "OpenTelemetry: intiializing grpc exporter. Value of OTEL_EXPORTER: %s",
                self.OTEL_EXPORTER,
            )
            normalized_endpoint = self._normalize_otel_endpoint(
                self.OTEL_ENDPOINT, "traces"
            )
            return BatchSpanProcessor(
                OTLPSpanExporterGRPC(
                    endpoint=normalized_endpoint, headers=_split_otel_headers
                ),
            )
        else:
            verbose_logger.debug(
                "OpenTelemetry: intiializing console exporter. Value of OTEL_EXPORTER: %s",
                self.OTEL_EXPORTER,
            )
            return BatchSpanProcessor(ConsoleSpanExporter())

    def _get_log_exporter(self):
        """
        Get the appropriate log exporter based on the configuration.
        """
        verbose_logger.debug(
            "OpenTelemetry Logger, initializing log exporter \nself.OTEL_EXPORTER: %s\nself.OTEL_ENDPOINT: %s\nself.OTEL_HEADERS: %s",
            self.OTEL_EXPORTER,
            self.OTEL_ENDPOINT,
            self.OTEL_HEADERS,
        )

        _split_otel_headers = OpenTelemetry._get_headers_dictionary(self.OTEL_HEADERS)

        # Normalize endpoint for logs - ensure it points to /v1/logs instead of /v1/traces
        normalized_endpoint = self._normalize_otel_endpoint(self.OTEL_ENDPOINT, "logs")

        verbose_logger.debug(
            "OpenTelemetry: Log endpoint normalized from %s to %s",
            self.OTEL_ENDPOINT,
            normalized_endpoint,
        )

        if hasattr(self.OTEL_EXPORTER, "export"):
            # Custom exporter provided
            verbose_logger.debug(
                "OpenTelemetry: Using custom log exporter. Value of OTEL_EXPORTER: %s",
                self.OTEL_EXPORTER,
            )
            return self.OTEL_EXPORTER

        otel_logs_exporter = os.getenv("OTEL_LOGS_EXPORTER")
        if self.OTEL_EXPORTER == "console" or otel_logs_exporter == "console":
            from opentelemetry.sdk._logs.export import ConsoleLogExporter

            verbose_logger.debug(
                "OpenTelemetry: Using console log exporter. Value of OTEL_EXPORTER: %s",
                self.OTEL_EXPORTER,
            )
            return ConsoleLogExporter()
        elif (
            self.OTEL_EXPORTER == "otlp_http"
            or self.OTEL_EXPORTER == "http/protobuf"
            or self.OTEL_EXPORTER == "http/json"
        ):
            from opentelemetry.exporter.otlp.proto.http._log_exporter import (
                OTLPLogExporter,
            )

            verbose_logger.debug(
                "OpenTelemetry: Using HTTP log exporter. Value of OTEL_EXPORTER: %s, endpoint: %s",
                self.OTEL_EXPORTER,
                normalized_endpoint,
            )
            return OTLPLogExporter(
                endpoint=normalized_endpoint, headers=_split_otel_headers
            )
        elif self.OTEL_EXPORTER == "otlp_grpc" or self.OTEL_EXPORTER == "grpc":
            try:
                from opentelemetry.exporter.otlp.proto.grpc._log_exporter import (
                    OTLPLogExporter,
                )
            except ImportError as exc:
                raise ImportError(
                    "OpenTelemetry OTLP gRPC log exporter is not available. Install "
                    "`opentelemetry-exporter-otlp` and `grpcio` (or `litellm[grpc]`)."
                ) from exc

            verbose_logger.debug(
                "OpenTelemetry: Using gRPC log exporter. Value of OTEL_EXPORTER: %s, endpoint: %s",
                self.OTEL_EXPORTER,
                normalized_endpoint,
            )
            return OTLPLogExporter(
                endpoint=normalized_endpoint, headers=_split_otel_headers
            )
        else:
            verbose_logger.warning(
                "OpenTelemetry: Unknown log exporter '%s', defaulting to console. Supported: console, otlp_http, otlp_grpc",
                self.OTEL_EXPORTER,
            )
            from opentelemetry.sdk._logs.export import ConsoleLogExporter

            return ConsoleLogExporter()

    def _get_metric_reader(self):
        """
        Get the appropriate metric reader based on the configuration.
        """
        from opentelemetry.sdk.metrics import Histogram
        from opentelemetry.sdk.metrics.export import (
            AggregationTemporality,
            ConsoleMetricExporter,
            PeriodicExportingMetricReader,
        )

        verbose_logger.debug(
            "OpenTelemetry Logger, initializing metric reader\nself.OTEL_EXPORTER: %s\nself.OTEL_ENDPOINT: %s\nself.OTEL_HEADERS: %s",
            self.OTEL_EXPORTER,
            self.OTEL_ENDPOINT,
            self.OTEL_HEADERS,
        )

        _split_otel_headers = OpenTelemetry._get_headers_dictionary(self.OTEL_HEADERS)
        normalized_endpoint = self._normalize_otel_endpoint(
            self.OTEL_ENDPOINT, "metrics"
        )

        if self.OTEL_EXPORTER == "console":
            exporter = ConsoleMetricExporter()
            return PeriodicExportingMetricReader(exporter, export_interval_millis=5000)

        elif (
            self.OTEL_EXPORTER == "otlp_http"
            or self.OTEL_EXPORTER == "http/protobuf"
            or self.OTEL_EXPORTER == "http/json"
        ):
            from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
                OTLPMetricExporter,
            )

            exporter = OTLPMetricExporter(
                endpoint=normalized_endpoint,
                headers=_split_otel_headers,
                preferred_temporality={Histogram: AggregationTemporality.DELTA},
            )
            return PeriodicExportingMetricReader(exporter, export_interval_millis=5000)

        elif self.OTEL_EXPORTER == "otlp_grpc" or self.OTEL_EXPORTER == "grpc":
            try:
                from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
                    OTLPMetricExporter,
                )
            except ImportError as exc:
                raise ImportError(
                    "OpenTelemetry OTLP gRPC metric exporter is not available. Install "
                    "`opentelemetry-exporter-otlp` and `grpcio` (or `litellm[grpc]`)."
                ) from exc

            exporter = OTLPMetricExporter(
                endpoint=normalized_endpoint,
                headers=_split_otel_headers,
                preferred_temporality={Histogram: AggregationTemporality.DELTA},
            )
            return PeriodicExportingMetricReader(exporter, export_interval_millis=5000)

        else:
            verbose_logger.warning(
                "OpenTelemetry: Unknown metric exporter '%s', defaulting to console. Supported: console, otlp_http, otlp_grpc",
                self.OTEL_EXPORTER,
            )
            exporter = ConsoleMetricExporter()
            return PeriodicExportingMetricReader(exporter, export_interval_millis=5000)

    def _normalize_otel_endpoint(
        self, endpoint: Optional[str], signal_type: str
    ) -> Optional[str]:
        """
        Normalize the endpoint URL for a specific OpenTelemetry signal type.

        The OTLP exporters expect endpoints to use signal-specific paths:
        - traces: /v1/traces
        - metrics: /v1/metrics
        - logs: /v1/logs

        This method ensures the endpoint has the correct path for the given signal type.

        Args:
            endpoint: The endpoint URL to normalize
            signal_type: The telemetry signal type ('traces', 'metrics', or 'logs')

        Returns:
            Normalized endpoint URL with the correct signal path

        Examples:
            _normalize_otel_endpoint("http://collector:4318/v1/traces", "logs")
            -> "http://collector:4318/v1/logs"

            _normalize_otel_endpoint("http://collector:4318", "traces")
            -> "http://collector:4318/v1/traces"

            _normalize_otel_endpoint("http://collector:4318/v1/logs", "metrics")
            -> "http://collector:4318/v1/metrics"
        """
        if not endpoint:
            return endpoint

        # Validate signal_type
        valid_signals = {"traces", "metrics", "logs"}
        if signal_type not in valid_signals:
            verbose_logger.warning(
                "Invalid signal_type '%s' provided to _normalize_otel_endpoint. "
                "Valid values: %s. Returning endpoint unchanged.",
                signal_type,
                valid_signals,
            )
            return endpoint

        # Remove trailing slash
        endpoint = endpoint.rstrip("/")

        # Splunk Observability Cloud OTLP/HTTP uses /v2/trace/otlp (not /v1/traces). Do not rewrite.
        if signal_type == "traces" and "/v2/trace/otlp" in endpoint:
            return endpoint

        # Check if endpoint already ends with the correct signal path
        target_path = f"/v1/{signal_type}"
        if endpoint.endswith(target_path):
            return endpoint

        # Replace existing signal path with the target signal path
        other_signals = valid_signals - {signal_type}
        for other_signal in other_signals:
            other_path = f"/v1/{other_signal}"
            if endpoint.endswith(other_path):
                endpoint = endpoint.rsplit("/", 1)[0] + f"/{signal_type}"
                return endpoint

        # No existing signal path found, append the target path
        if not endpoint.endswith("/v1"):
            endpoint = endpoint + target_path
        else:
            endpoint = endpoint + f"/{signal_type}"

        return endpoint

    @staticmethod
    def _get_headers_dictionary(
        headers: Optional[Union[str, dict]],
    ) -> Dict[str, str]:
        """
        Convert a string or dictionary of headers into a dictionary of headers.
        """
        _split_otel_headers: Dict[str, str] = {}
        if headers:
            if isinstance(headers, str):
                # when passed HEADERS="x-honeycomb-team=B85YgLm96******"
                # Split only on first '=' occurrence
                parts = headers.split(",")
                for part in parts:
                    key, value = part.split("=", 1)
                    _split_otel_headers[key] = value
            elif isinstance(headers, dict):
                _split_otel_headers = headers
        return _split_otel_headers

    async def async_management_endpoint_success_hook(
        self,
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode

        _start_time_ns = 0
        _end_time_ns = 0

        start_time = logging_payload.start_time
        end_time = logging_payload.end_time

        if isinstance(start_time, float):
            _start_time_ns = int(start_time * 1e9)
        else:
            _start_time_ns = self._to_ns(start_time)

        if isinstance(end_time, float):
            _end_time_ns = int(end_time * 1e9)
        else:
            _end_time_ns = self._to_ns(end_time)

        if parent_otel_span is not None:
            _span_name = logging_payload.route
            management_endpoint_span = self.tracer.start_span(
                name=_span_name,
                context=trace.set_span_in_context(parent_otel_span),
                start_time=_start_time_ns,
            )

            _request_data = logging_payload.request_data
            if _request_data is not None:
                for key, value in _request_data.items():
                    self.safe_set_attribute(
                        span=management_endpoint_span,
                        key=f"request.{key}",
                        value=value,
                    )

            _response = logging_payload.response
            if _response is not None:
                for key, value in _response.items():
                    self.safe_set_attribute(
                        span=management_endpoint_span,
                        key=f"response.{key}",
                        value=value,
                    )

            management_endpoint_span.set_status(Status(StatusCode.OK))
            management_endpoint_span.end(end_time=_end_time_ns)

            # The management wrapper has no other hook that closes the SERVER span.
            self.set_response_status_code_attribute(parent_otel_span, 200)
            parent_otel_span.set_status(Status(StatusCode.OK))
            parent_otel_span.end(end_time=_end_time_ns)

    async def async_management_endpoint_failure_hook(
        self,
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode

        _start_time_ns = 0
        _end_time_ns = 0

        start_time = logging_payload.start_time
        end_time = logging_payload.end_time

        if isinstance(start_time, float):
            _start_time_ns = int(int(start_time) * 1e9)
        else:
            _start_time_ns = self._to_ns(start_time)

        if isinstance(end_time, float):
            _end_time_ns = int(int(end_time) * 1e9)
        else:
            _end_time_ns = self._to_ns(end_time)

        if parent_otel_span is not None:
            _span_name = logging_payload.route
            management_endpoint_span = self.tracer.start_span(
                name=_span_name,
                context=trace.set_span_in_context(parent_otel_span),
                start_time=_start_time_ns,
            )

            _request_data = logging_payload.request_data
            if _request_data is not None:
                for key, value in _request_data.items():
                    self.safe_set_attribute(
                        span=management_endpoint_span,
                        key=f"request.{key}",
                        value=value,
                    )

            _exception = logging_payload.exception
            self.safe_set_attribute(
                span=management_endpoint_span,
                key="exception",
                value=str(_exception),
            )
            management_endpoint_span.set_status(Status(StatusCode.ERROR))
            management_endpoint_span.end(end_time=_end_time_ns)

            # The management wrapper has no other hook that closes the SERVER span.
            from litellm.litellm_core_utils.litellm_logging import (
                StandardLoggingPayloadSetup,
            )

            error_information = StandardLoggingPayloadSetup.get_error_information(
                original_exception=_exception,
            )
            parent_otel_span.set_status(Status(StatusCode.ERROR))
            self._record_exception_on_span(
                span=parent_otel_span,
                kwargs={
                    "exception": _exception,
                    "standard_logging_object": {"error_information": error_information},
                },
            )
            parent_otel_span.end(end_time=_end_time_ns)

    def create_litellm_proxy_request_started_span(
        self,
        start_time: datetime,
        headers: dict,
    ) -> Optional[Span]:
        """
        Create a span for the received proxy server request.
        """

        return self.tracer.start_span(
            name=LITELLM_PROXY_REQUEST_SPAN_NAME,
            start_time=self._to_ns(start_time),
            context=self.get_traceparent_from_header(headers=headers),
            kind=self.span_kind.SERVER,
        )

    def set_proxy_request_route_attributes(
        self,
        span: Optional[Span],
        *,
        url_path: Optional[str] = None,
        http_route: Optional[str] = None,
    ) -> None:
        """
        Set OTel-standard ``http.route`` / ``url.path`` on the proxy SERVER
        span. Called from the auth path, the only point where both the
        SERVER span and the request are in hand. No-op if span/value missing.
        """
        if span is None:
            return
        if url_path:
            self.safe_set_attribute(span=span, key=URL_PATH_ATTRIBUTE, value=url_path)
        if http_route:
            self.safe_set_attribute(
                span=span, key=HTTP_ROUTE_ATTRIBUTE, value=http_route
            )

    def set_response_status_code_attribute(
        self, span: Optional[Span], status_code: Optional[int]
    ) -> None:
        """
        Set OTel-standard ``http.response.status_code`` (int) on the proxy
        SERVER span. The failure path sets this from the error code in
        ``_record_exception_on_span``; this is the success-path counterpart
        so the attribute is present on every SERVER span regardless of
        outcome (required by the HTTP semconv, and needed for error-ratio /
        status-breakdown dashboards). No-op if span/value missing.
        """
        if span is None or status_code is None:
            return
        self.safe_set_attribute(
            span=span,
            key=HTTP_RESPONSE_STATUS_CODE_ATTRIBUTE,
            value=int(status_code),
        )

    def record_error_attributes_on_span(
        self,
        span: Optional[Span],
        exception: Optional[Exception],
        status_code: int,
    ) -> None:
        """Stamp structured ``error.*`` attributes on the SERVER span from the
        exception returned to the client, with ``error.code`` pinned to the real
        response status. Idempotent (overwrites); emits no exception event."""
        if span is None or exception is None:
            return
        from litellm.litellm_core_utils.litellm_logging import (
            StandardLoggingPayloadSetup,
        )

        error_information = StandardLoggingPayloadSetup.get_error_information(
            original_exception=exception
        )
        error_information["error_code"] = str(status_code)
        self._record_exception_on_span(
            span=span,
            kwargs={
                "standard_logging_object": {"error_information": error_information}
            },
        )

    def set_preprocessing_duration_attribute(
        self, span: Optional[Span], container: Any
    ) -> None:
        """
        Set ``litellm.preprocessing.duration_ms`` (proxy-receive -> first
        provider handoff) on the proxy SERVER span. ``litellm_received_at``
        rides request metadata; ``first_api_call_start_time`` is the
        set-once first-handoff instant (retries/backoff excluded). Works
        uniformly for the success (model_call_details) and failure
        (request_data) containers. No-op if span/either anchor is missing.
        """
        if span is None or not isinstance(container, dict):
            return
        received_at = None
        # first_api_call_start_time is top-level (never in user metadata).
        first_handoff = container.get("first_api_call_start_time")
        _lp = container.get("litellm_params")
        for _md in (
            (_lp or {}).get("metadata") if isinstance(_lp, dict) else None,
            container.get("metadata"),
            container.get("litellm_metadata"),
        ):
            if isinstance(_md, dict):
                received_at = received_at or _md.get("litellm_received_at")
        if received_at is None or first_handoff is None:
            return
        try:
            start_ts = self._to_timestamp(received_at)
            end_ts = self._to_timestamp(first_handoff)
        except Exception:
            return
        if start_ts is None or end_ts is None:
            return
        duration_ms = (end_ts - start_ts) * 1000.0
        # Clock skew → omit rather than emit a negative latency.
        if duration_ms < 0:
            return
        self.safe_set_attribute(
            span=span,
            key=PREPROCESSING_DURATION_MS_ATTRIBUTE,
            value=duration_ms,
        )