MoFin/venv/lib/python3.12/site-packages/litellm/integrations/otel/logger.py

"""``CustomLogger`` adapter on the OpenTelemetry span engine."""

from collections import OrderedDict
from contextlib import contextmanager
from datetime import datetime
from typing import TYPE_CHECKING, Any, Callable, Iterator, Mapping, Sequence, cast

from opentelemetry.context import attach, get_current
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.trace import Span, Tracer, get_current_span, use_span

import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.otel.model.baggage import promoted_baggage
from litellm.integrations.otel.model.config import OpenTelemetryV2Config
from litellm.integrations.otel.plumbing.context import (
    is_recordable_span,
    request_root_span,
    resolve_parent_context,
    resolve_request_span_context,
    set_request_baggage,
    set_request_root_span,
)
from litellm.integrations.otel.emitter import SpanEmitter
from litellm.integrations.otel.mappers import resolve_mappers
from litellm.integrations.otel.model.metadata import (
    LLMCallEvent,
    RequestIdentity,
    model_from_request_data,
)
from litellm.integrations.otel.model.payloads import (
    GuardrailSpanData,
    LLMCallSpanData,
    MCPToolCallSpanData,
    ServiceSpanData,
    SpanError,
    is_mcp_tool_call,
)
from litellm.integrations.otel.plumbing.metrics import (
    GenAIMetricRecorder,
    create_genai_metrics,
)
from litellm.integrations.otel.plumbing.providers import (
    build_tracer_provider,
    get_meter,
    get_tracer,
    resolve_meter_provider,
)
from litellm.integrations.otel.plumbing.routing import TenantTracerCache
from litellm.integrations.otel.model.spans import SpanRole, span_role_for_service
from litellm.integrations.otel.model.utils import to_ns

if TYPE_CHECKING:
    from litellm.types.utils import (
        StandardLoggingGuardrailInformation,
        StandardLoggingPayload,
    )

LITELLM_TRACER_NAME = "litellm"

# Any callback whose class belongs to one of these modules is "the OTel
# callback" for proxy-global-registration purposes.
_OTEL_MODULES = (
    "litellm.integrations.otel",
    "litellm.integrations.opentelemetry",
)


# Cap on the open-call carrier map. A span opened at ``pre_call`` that never
# reaches a success/failure callback (e.g. a stream that only fires stream
# events) would otherwise linger; bounding the map evicts the oldest so memory
# stays flat on a long-running proxy while covering every concurrent in-flight
# call.
_OPEN_CALLS_MAX = 10_000


class _LLMCallSpan:
    """The state carried from the ``pre_call`` boundary to span close.

    ``span`` is the live span when it could be opened at the boundary (the server
    span was ambient), or ``None`` when creation was deferred because no ambient
    parent was visible — in which case the async callback creates it against its
    own (worker-copied) ambient context using ``start_time_ns``. The presence of
    a carrier for a call at all is the proof that ``pre_call`` ran, i.e. that an
    upstream call was actually attempted.
    """

    __slots__ = ("span", "start_time_ns")

    def __init__(self, span: "Span | None", start_time_ns: int | None) -> None:
        self.span = span
        self.start_time_ns = start_time_ns


class OpenTelemetryV2(CustomLogger):
    """The ``CustomLogger`` for OpenTelemetry."""

    def __init__(
        self,
        config: OpenTelemetryV2Config | None = None,
        callback_name: str | None = None,
        tracer_provider: TracerProvider | None = None,
        logger_provider: Any | None = None,  # reserved for OTel logs
        meter_provider: Any | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.config: OpenTelemetryV2Config = config or OpenTelemetryV2Config(**kwargs)
        self.callback_name = callback_name
        self._tracer_provider: TracerProvider = (
            tracer_provider
            if tracer_provider is not None
            else build_tracer_provider(self.config)
        )
        self.tracer: Tracer = get_tracer(self._tracer_provider, LITELLM_TRACER_NAME)
        self._metrics_recorder = self._init_metrics(meter_provider)
        self._metric_filter_error_logged = False
        self._emitter = SpanEmitter(
            self.tracer, self.config, mappers=resolve_mappers(self.config.mapper_names)
        )
        self._tenant_tracers = TenantTracerCache(
            self.config, callback_name, LITELLM_TRACER_NAME
        )
        self._open_llm_calls: "OrderedDict[str, _LLMCallSpan]" = OrderedDict()
        self._init_otel_logger_on_litellm_proxy()

    def _init_metrics(self, meter_provider: Any | None) -> "GenAIMetricRecorder | None":
        """Create the six GenAI histograms when metrics are enabled, else ``None``.

        ``meter_provider`` is an explicit override (tests inject one); otherwise the
        provider is resolved from the OTel global so the operator's configured
        readers/exporters receive the metrics, building and registering one only
        when no global provider is set.
        """
        if not self.config.enable_metrics:
            return None
        provider = resolve_meter_provider(self.config, meter_provider)
        meter = get_meter(provider, LITELLM_TRACER_NAME)
        return GenAIMetricRecorder(create_genai_metrics(meter), self.callback_name)

    # ====================================================================== #
    #  Proxy global registration
    # ====================================================================== #

    def _register_in_callback_list(self, callbacks: list) -> None:
        already_otel = any(
            cb.__class__.__module__.startswith(_OTEL_MODULES)
            for cb in callbacks
            if hasattr(cb, "__class__")
        )
        if not already_otel:
            callbacks.append(self)

    def _init_otel_logger_on_litellm_proxy(self) -> None:
        try:
            from litellm.proxy import proxy_server
        except Exception:
            return
        try:
            self._register_in_callback_list(litellm.service_callback)
            self._register_in_callback_list(litellm.input_callback)
            self._register_in_callback_list(litellm._async_success_callback)
            self._register_in_callback_list(litellm._async_failure_callback)
        except Exception:
            pass
        if getattr(proxy_server, "open_telemetry_logger", None) is None:
            setattr(proxy_server, "open_telemetry_logger", self)

    # ====================================================================== #
    #  LLM-call callbacks — the span is opened at the ``pre_call`` boundary and
    #  closed here. See ``log_pre_api_call``.
    # ====================================================================== #

    def log_pre_api_call(self, model, messages, kwargs):
        """Open the LLM-call span at the call boundary.

        Runs synchronously inside the request task, before the upstream call —
        the one place where the live server span is genuinely the ambient OTel
        context — so the span parents to it natively, with no span threaded
        through a metadata dict. The open span is stashed on the per-request
        ``LiteLLMLoggingObj`` (a typed object) and closed in the async callback.

        When no recordable parent is visible (``pre_call`` was driven from a thread
        pool for a sync-only provider, where contextvars — and so the anchor —
        don't follow), creation is deferred: only the start time is recorded, and
        the async callback — whose worker context was copied from the request task
        and so still carries the anchor — creates the span then.

        Synthetic proxy-gate error logs (auth/rate-limit rejections) also fire this
        hook but never made an upstream call; they are tagged and skipped so no
        phantom LLM-call span is produced.
        """
        call = LLMCallEvent.from_dict(kwargs)
        if call.is_no_upstream_call:
            return
        call_id = call.call_id
        if call_id is None:
            return
        # Idempotent: a retried call may re-enter ``pre_call`` with the same
        # call id; keep the first span so its start time is the true one.
        if call_id in self._open_llm_calls:
            return
        start_time_ns = to_ns(datetime.now())
        span: Span | None = None
        # Parent to the request's anchored root span (stable across the request),
        # falling back to ambient on the SDK path. Open the span live only when
        # that resolves to a recordable parent; otherwise defer to the close
        # callback (the thread-pool case, where the anchor isn't visible here).
        parent_context = resolve_request_span_context()
        if is_recordable_span(get_current_span(parent_context)):
            span = self._emitter.start_span(
                SpanRole.LLM_CALL,
                call.provisional_span_name,
                parent_context=parent_context,
                start_time_ns=start_time_ns,
                tracer=self._tenant_tracers.tracer_for(
                    self.tracer, call.dynamic_params
                ),
            )
        self._open_llm_calls[call_id] = _LLMCallSpan(
            span=span, start_time_ns=start_time_ns
        )
        # Evict the oldest open call if the map is over budget. A call that opens
        # but never closes (a stream that only fires stream events) would linger
        # otherwise; the evicted span is simply dropped (never exported).
        if len(self._open_llm_calls) > _OPEN_CALLS_MAX:
            self._open_llm_calls.popitem(last=False)

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
        if self._emit_mcp_tool_call(kwargs, start_time, end_time):
            return
        self._close_llm_call(kwargs, start_time, end_time)
        self._record_metrics(kwargs, response_obj, start_time, end_time)

    def _record_metrics(self, kwargs, response_obj, start_time, end_time) -> None:
        """Record the GenAI metrics for a successful LLM call. Best-effort: a
        recording failure (e.g. a malformed payload) must never break the span
        close or the request itself."""
        if self._metrics_recorder is None:
            return
        try:
            self._metrics_recorder.record(kwargs, response_obj, start_time, end_time)
        except ValueError as exc:
            if not self._metric_filter_error_logged:
                verbose_logger.error(
                    "OpenTelemetryV2: invalid otel.attributes metric filter, metrics disabled: %s",
                    exc,
                )
                self._metric_filter_error_logged = True
        except Exception as exc:
            verbose_logger.debug("OpenTelemetryV2: metric recording failed: %s", exc)

    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        if self._emit_mcp_tool_call(kwargs, start_time, end_time):
            return
        self._close_llm_call(kwargs, start_time, end_time)

    def _emit_mcp_tool_call(
        self,
        kwargs: Mapping[str, Any],
        start_time: datetime | float | None,
        end_time: datetime | float | None,
    ) -> bool:
        """Emit an MCP tool-call span when the closed request was a tool call.

        MCP tool calls reach the success/failure callbacks like any other request
        (with ``call_type`` ``call_mcp_tool``), but they are not LLM calls and have
        no ``pre_call`` carrier — so they get their own CLIENT span here, parented
        to the request's server span. Returns whether it handled the event, so the
        caller skips the LLM-call path. The whole span is emitted at once (there is
        no boundary to open it at), deduped on the call id by the emitter.
        """
        raw_payload = kwargs.get("standard_logging_object")
        if not raw_payload or not is_mcp_tool_call(
            cast(Mapping[str, object], raw_payload)
        ):
            return False
        payload = cast("StandardLoggingPayload", raw_payload)
        data = MCPToolCallSpanData.from_standard_logging_payload(
            payload, capture_content=self.config.capture_span_content
        )
        # A stray LLM carrier from a ``pre_call`` that mis-fired for this id would
        # otherwise linger until evicted; drop it so it's neither leaked nor closed
        # as a phantom LLM span.
        if data.identity.call_id:
            self._open_llm_calls.pop(data.identity.call_id, None)
        self._emitter.emit(
            SpanRole.MCP_TOOL_CALL,
            data,
            parent_context=resolve_request_span_context(),
            start_time_ns=to_ns(start_time),
            end_time_ns=to_ns(end_time),
        )
        return True

    def _close_llm_call(
        self,
        kwargs: Mapping[str, Any],
        start_time: datetime | float | None,
        end_time: datetime | float | None,
    ) -> Span | None:
        """Finish the LLM-call span opened at ``pre_call`` (or create it deferred).

        No carrier for this call id means ``pre_call`` never ran — the request was
        rejected at the gate or blocked by a pre-call guardrail before any upstream
        call — so there is nothing to record and no phantom span.
        """
        call = LLMCallEvent.from_dict(kwargs)
        call_id = call.call_id
        # ``pop`` is the dedup: this method runs from both the success and failure
        # paths, and whichever fires first removes the carrier and closes the span.
        carrier = self._open_llm_calls.pop(call_id, None) if call_id else None
        if carrier is None:
            return None
        payload = call.payload
        if payload is None:
            if carrier.span is not None:
                # Opened at the boundary but the payload never materialized — end
                # it (named provisionally) so it isn't leaked as an open span.
                carrier.span.end(end_time=to_ns(end_time))
            return None
        data = LLMCallSpanData.from_standard_logging_payload(
            payload, capture_content=self.config.capture_span_content
        )
        end_time_ns = to_ns(end_time)
        if carrier.span is not None:
            # Born at the boundary: stamp attributes from the typed payload, set
            # status, and end it. Its parent (the server span) was captured at
            # creation from real ambient context.
            self._emitter.finish_span(
                SpanRole.LLM_CALL, carrier.span, data, end_time_ns=end_time_ns
            )
            return carrier.span
        # Deferred: ``pre_call`` saw no recordable parent, so create the span now.
        # The worker copied the request task's context, which carries the anchored
        # root span — parent to it (ambient fallback on the SDK path). Seed identity
        # Baggage so the span — and the SDK path, which has none — is labeled
        # consistently.
        parent_ctx = resolve_request_span_context()
        bag = promoted_baggage(
            data.identity,
            data.request_model,
            promoted_keys=tuple(self.config.baggage_promoted_keys),
            metadata_keys=tuple(self.config.baggage_metadata_keys),
            team_metadata_keys=tuple(self.config.baggage_team_metadata_keys),
        )
        if bag:
            parent_ctx = set_request_baggage(bag, context=parent_ctx)
        return self._emitter.emit(
            SpanRole.LLM_CALL,
            data,
            parent_context=parent_ctx,
            start_time_ns=carrier.start_time_ns,
            end_time_ns=end_time_ns,
            tracer=self._tenant_tracers.tracer_for(self.tracer, call.dynamic_params),
        )

    # ====================================================================== #
    #  Service hooks
    # ====================================================================== #

    async def async_service_success_hook(
        self,
        payload: Any,
        parent_otel_span: Span | None = None,
        start_time: datetime | float | None = None,
        end_time: datetime | float | None = None,
        event_metadata: dict | None = None,
    ) -> None:
        self._emit_service(
            payload,
            parent_otel_span=parent_otel_span,
            start_time=start_time,
            end_time=end_time,
            event_metadata=event_metadata,
            error_override=None,
        )

    async def async_service_failure_hook(
        self,
        payload: Any,
        error: str | None = "",
        parent_otel_span: Span | None = None,
        start_time: datetime | float | None = None,
        end_time: datetime | float | None = None,
        event_metadata: dict | None = None,
    ) -> None:
        self._emit_service(
            payload,
            parent_otel_span=parent_otel_span,
            start_time=start_time,
            end_time=end_time,
            event_metadata=event_metadata,
            error_override=error or "error",
        )

    def _emit_service(
        self,
        payload: Any,
        *,
        parent_otel_span: Span | None,
        start_time: datetime | float | None,
        end_time: datetime | float | None,
        event_metadata: dict | None,
        error_override: str | None,
    ) -> Span | None:
        data = ServiceSpanData.from_payload(payload, event_metadata=event_metadata)
        # Decide whether this service call is a span at all, and of what kind.
        # ``None`` means metrics-only (framework instrumentation that duplicates a
        # gen-AI span — ``self``/``router``/``proxy_pre_call`` — or ``auth``, which
        # gets a live phase span instead). Those still feed Prometheus/Datadog via
        # their own hooks; they just never enter the trace.
        role = span_role_for_service(data.service_name)
        if role is None:
            return None
        # A metrics-only ping with neither timing nor a parent (in-memory queue
        # gauges) is not a traceable operation; a span for it would be a
        # zero-duration root with no context, so skip it. Real background work
        # (budget/reset jobs, spend flush) passes start/end times and still emits
        # as a root; anything with a parent emits regardless.
        if (
            error_override is None
            and start_time is None
            and end_time is None
            and parent_otel_span is None
        ):
            return None
        if error_override is not None and data.error is None:
            data = ServiceSpanData(
                service_name=data.service_name,
                call_type=data.call_type,
                error=SpanError(message=error_override),
                event_metadata=data.event_metadata,
            )
        # Parent like every other span: ambient context first (so identity Baggage
        # rides along and the call nests under whatever request phase is active —
        # e.g. a DB lookup under the live ``auth`` span), falling back to the
        # server span the proxy threaded as ``parent_otel_span``. A background
        # service call has neither, so it starts its own root trace.
        parent_context = resolve_parent_context(threaded=parent_otel_span)
        return self._emitter.emit(
            role,
            data,
            parent_context=parent_context,
            start_time_ns=to_ns(start_time),
            end_time_ns=to_ns(end_time),
        )

    # ====================================================================== #
    #  async_post_call_* hooks — emit guardrail spans. The server span's status
    #  / errors are the FastAPI instrumentor's job, so we don't touch it here.
    # ====================================================================== #

    def seed_request_identity(self, user_api_key_dict: Any, model: Any = None) -> None:
        """Attach request-identity Baggage to the current context + server span.

        Seeding identity into Baggage makes **every** span emitted afterwards for
        this request — LLM call, guardrail, DB call — inherit it via
        ``LiteLLMBaggageSpanProcessor``. Called once at the auth boundary (as soon
        as the key resolves) so post-auth spans are labeled consistently; the
        Baggage rides the request task's contextvar from there on. Auth-internal
        DB lookups that run before the key is known stay unlabeled — identity
        isn't determined yet, which is correct.
        """
        try:
            identity = RequestIdentity.from_user_api_key_auth(user_api_key_dict)
            bag = promoted_baggage(
                identity,
                model,
                promoted_keys=tuple(self.config.baggage_promoted_keys),
                metadata_keys=tuple(self.config.baggage_metadata_keys),
                team_metadata_keys=tuple(self.config.baggage_team_metadata_keys),
            )
            if bag:
                # Attach (no detach): the contextvar is scoped to this request's
                # asyncio task and is reclaimed when the task ends.
                attach(set_request_baggage(bag, context=get_current()))
                # The server span was started by the instrumentor before this ran,
                # so the Baggage processor (which only fires at span start) won't
                # backfill it — stamp identity on it directly. Prefer the anchored
                # root span over the ambient one so identity still lands on the
                # server span when seeding from inside the live ``auth`` phase span
                # (the auth-failure path), where ``get_current_span`` is the phase
                # span, not the request's root.
                server_span = request_root_span() or get_current_span()
                if is_recordable_span(server_span):
                    # Re-capture the anchor here too: this runs post-auth with the
                    # server span active and covers entrypoints that bypass
                    # ``create_litellm_proxy_request_started_span`` (e.g. the SDK
                    # path's ``async_pre_call_hook``). Idempotent.
                    set_request_root_span(server_span)
                    for key, value in bag.items():
                        server_span.set_attribute(key, value)
        except Exception:
            pass

    @contextmanager
    def start_phase_span(self, name: str) -> "Iterator[Span]":
        span = self._emitter.start_span(SpanRole.SERVICE, name)
        with use_span(span, end_on_exit=True):
            yield span

    async def async_pre_call_hook(
        self,
        user_api_key_dict: Any,
        cache: Any,
        data: dict,
        call_type: Any,
    ) -> dict:
        self.seed_request_identity(
            user_api_key_dict,
            model=model_from_request_data(data),
        )
        return data

    def emit_guardrail_span(self, entry: "StandardLoggingGuardrailInformation") -> None:
        # Emitted by the guardrail-recording code the moment a guardrail finishes,
        # not from a post-call hook — that hook does not fire on every path (a
        # pass-through request that passes its guardrails never reaches it), which
        # left passing guardrails without a span.
        #
        # A guardrail is a sibling of the LLM call under the request's root span,
        # so parent it to the explicit anchor — never the active span, which during
        # a pre_call guardrail can be the live ``auth`` phase span. Emit with the
        # guardrail's actual execution window so a pre_call guardrail is placed
        # before the LLM call rather than at emission time. One entry in, one span
        # out — the module-level entry point routes each entry to this single
        # registered logger so a guardrail is never emitted more than once.
        data = GuardrailSpanData.from_logging_entry(entry)
        self._emitter.emit(
            SpanRole.GUARDRAIL,
            data,
            parent_context=resolve_request_span_context(),
            start_time_ns=to_ns(data.start_time),
            end_time_ns=to_ns(data.end_time),
        )

    def create_litellm_proxy_request_started_span(
        self, start_time: datetime, headers: Mapping[str, str] | None
    ) -> Span | None:
        span = get_current_span()
        if not is_recordable_span(span):
            return None
        set_request_root_span(span)
        return span


def select_global_otel_v2_logger(
    in_memory_loggers: Sequence[object],
    registered: "OpenTelemetryV2 | None" = None,
) -> "OpenTelemetryV2":
    """The single ``OpenTelemetryV2`` whose provider should become the OTel global.

    The callback factory designates one logger as canonical the moment it builds
    the first one (``_init_otel_logger_on_litellm_proxy`` sets
    ``proxy_server.open_telemetry_logger``), and every other v2 entry point —
    guardrail, identity seeding, phase spans — already routes through that same
    ``registered`` owner. Reuse it here too so the global provider has one source
    of truth instead of a second, independently-derived guess; this is the logger
    a preset (arize, langfuse, …) folds the ``OTEL_*`` base exporter and its own
    exporter into, so the FastAPI server span and the gen-ai spans share one
    provider and one trace.

    Fall back to ``in_memory_loggers`` for the SDK path, where no proxy global is
    set (selecting from there, not ``service_callback``, which a preset logger does
    not always reach), and build a generic logger from ``OTEL_*`` only when none was
    configured at all. Each fallback still avoids the second generic logger that
    orphaned the gen-ai spans onto a different backend than the server span.
    """
    if registered is not None:
        return registered
    existing = next(
        (cb for cb in in_memory_loggers if isinstance(cb, OpenTelemetryV2)), None
    )
    return existing if existing is not None else OpenTelemetryV2()


def publish_global_otel_v2_provider(
    in_memory_loggers: Sequence[object],
    set_global_provider: Callable[[TracerProvider], None],
    registered: "OpenTelemetryV2 | None" = None,
) -> "OpenTelemetryV2":
    """Select the single v2 logger and publish its provider as the OTel global.

    The proxy calls this once at startup, after callbacks are initialized, so the
    preset logger already exists; it passes ``registered`` (the canonical owner the
    factory designated as ``proxy_server.open_telemetry_logger``) so the global
    provider reuses the same logger the rest of the v2 code emits through (see
    :func:`select_global_otel_v2_logger`). Both ``registered`` and
    ``set_global_provider`` (the proxy passes
    ``opentelemetry.trace.set_tracer_provider``) are injected so the publish step is
    unit-testable without reading or mutating real global OTel state. Returns the
    logger whose provider was published.
    """
    logger = select_global_otel_v2_logger(in_memory_loggers, registered=registered)
    set_global_provider(logger._tracer_provider)
    return logger


def _registered_v2_logger() -> "OpenTelemetryV2 | None":
    try:
        from litellm.proxy import proxy_server
    except Exception:
        return None
    logger = getattr(proxy_server, "open_telemetry_logger", None)
    return logger if isinstance(logger, OpenTelemetryV2) else None


def emit_guardrail_span(entry: "StandardLoggingGuardrailInformation") -> None:
    """Emit a guardrail span on the registered v2 OTel logger.

    Called by the guardrail-recording code the moment a guardrail finishes, so a
    span is produced regardless of whether a post-call hook later runs (it does
    not on the pass-through allow path). Routes through the single canonical
    logger — the same one every other v2 entry point uses — so a guardrail
    recorded once yields exactly one span; fanning out across every reachable
    ``OpenTelemetryV2`` instance double-emits the same entry. Best-effort: span
    emission must never break guardrail evaluation.
    """
    logger = _registered_v2_logger()
    if logger is None:
        return
    try:
        logger.emit_guardrail_span(entry)
    except Exception:
        pass


def seed_request_identity(user_api_key_dict: Any, model: Any = None) -> None:
    logger = _registered_v2_logger()
    if logger is not None:
        logger.seed_request_identity(user_api_key_dict, model=model)


@contextmanager
def phase_span(name: str) -> "Iterator[Span | None]":
    logger = _registered_v2_logger()
    if logger is None:
        yield None
        return
    with logger.start_phase_span(name) as span:
        yield span