MoFin/venv/lib/python3.12/site-packages/litellm/integrations/otel/emitter.py

"""The span engine: dedup, start, run the mapper chain, set status, end."""

from collections import OrderedDict
from typing import Callable, Sequence

from opentelemetry.context import Context
from opentelemetry.trace import Span, Tracer
from opentelemetry.trace.status import Status, StatusCode

from litellm.integrations.otel.model.config import OpenTelemetryV2Config
from litellm.integrations.otel.mappers import resolve_mappers
from litellm.integrations.otel.mappers.base import AttributeMapper, SpanData
from litellm.integrations.otel.model.payloads import (
    GuardrailSpanData,
    LLMCallSpanData,
    MCPToolCallSpanData,
    ServiceSpanData,
)
from litellm.integrations.otel.plumbing.providers import to_otel_span_kind
from litellm.integrations.otel.model.semconv import Error, ExceptionEvent
from litellm.integrations.otel.model.spans import (
    SPAN_REGISTRY,
    SpanRole,
    guardrail_span_name,
    llm_call_span_name,
    mcp_tool_call_span_name,
    service_span_name,
)

# Roles emit() knows how to name and emit. PROXY_REQUEST and the management
# routes are SERVER spans owned by the mounted FastAPI instrumentor, so they
# have no builder here.
_NAME_BUILDERS: dict[SpanRole, Callable[..., str]] = {
    SpanRole.LLM_CALL: llm_call_span_name,
    SpanRole.MCP_TOOL_CALL: mcp_tool_call_span_name,
    SpanRole.GUARDRAIL: guardrail_span_name,
    # DB_CALL and SERVICE are both built from ServiceSpanData; they differ only in
    # span kind (CLIENT vs INTERNAL) and attribute vocabulary, not in naming.
    SpanRole.DB_CALL: service_span_name,
    SpanRole.SERVICE: service_span_name,
}

# Cap on the dedup cache. It only needs to coalesce the sync+async firing window
# of a single in-flight request, so a bounded LRU keeps memory flat on a
# long-running proxy while still covering every concurrently-open call.
_DEDUP_CACHE_MAX = 10_000


class SpanEmitter:
    def __init__(
        self,
        tracer: Tracer,
        config: OpenTelemetryV2Config,
        mappers: Sequence[AttributeMapper] | None = None,
    ) -> None:
        self._tracer = tracer
        self._config = config
        # The mapper chain is the sole source of span attributes. When not
        # passed in, resolve it from the config so there's one source of truth.
        self._mappers: list[AttributeMapper] = (
            list(mappers)
            if mappers is not None
            else resolve_mappers(config.mapper_names)
        )
        # Bounded LRU (ordered by insertion / most-recent touch). Storing keys
        # only — the value is unused — so it behaves like a capped set.
        self._emitted: "OrderedDict[tuple[str, SpanRole], None]" = OrderedDict()

    # -- low-level helpers --------------------------------------------------- #

    def start_span(
        self,
        role: SpanRole,
        name: str,
        parent_context: Context | None = None,
        start_time_ns: int | None = None,
        *,
        tracer: Tracer | None = None,
    ) -> Span:
        """Start a span for ``role`` without dedup or attribute mapping.

        For callers that own and manage their own span lifecycle. ``tracer``
        overrides the bound tracer for this span only, used for per-request
        multi-tenant credential routing.
        """
        return (tracer or self._tracer).start_span(
            name,
            context=parent_context,
            kind=to_otel_span_kind(SPAN_REGISTRY[role].kind),
            start_time=start_time_ns,
        )

    def _seen(self, dedup_key: str | None, role: SpanRole) -> bool:
        """Return True once a ``(dedup_key, role)`` pair has been emitted.

        Guards against emitting the same span twice when a streaming call
        fires both a sync and an async logging callback.
        """
        if not dedup_key:
            return False
        marker = (dedup_key, role)
        if marker in self._emitted:
            self._emitted.move_to_end(marker)
            return True
        self._emitted[marker] = None
        if len(self._emitted) > _DEDUP_CACHE_MAX:
            self._emitted.popitem(last=False)  # evict least-recently-used
        return False

    # -- the engine ---------------------------------------------------------- #

    def emit(
        self,
        role: SpanRole,
        data: SpanData,
        parent_context: Context | None = None,
        *,
        start_time_ns: int | None = None,
        end_time_ns: int | None = None,
        tracer: Tracer | None = None,
    ) -> Span | None:
        """Emit one complete span: dedup, start, map attributes, status, end.

        Return the span, or ``None`` if it was deduplicated away. ``tracer``
        overrides the bound tracer for this span, used for per-request routing.
        """
        # LLM-call and MCP tool-call spans carry a dedup key (their request's
        # call id), so a sync+async double-firing coalesces. ``isinstance`` narrows
        # the type for mypy and keeps the engine free of duck-typed attribute reads.
        dedup_key = (
            data.identity.call_id
            if isinstance(data, (LLMCallSpanData, MCPToolCallSpanData))
            else None
        )
        if self._seen(dedup_key, role):
            return None
        span = self.start_span(
            role,
            _NAME_BUILDERS[role](data),
            parent_context=parent_context,
            start_time_ns=start_time_ns,
            tracer=tracer,
        )
        self.finish_span(role, span, data, end_time_ns=end_time_ns)
        return span

    def finish_span(
        self,
        role: SpanRole,
        span: Span,
        data: SpanData,
        *,
        end_time_ns: int | None = None,
    ) -> None:
        """Stamp attributes + status on an already-started ``span`` and end it.

        The counterpart to :meth:`start_span` for callers that own a span's
        lifecycle — the LLM-call span is opened at the request's ``pre_call``
        boundary (so it parents to the live server span via real ambient context,
        never a span threaded through a metadata dict) and closed here once the
        typed payload is available. The span name is (re)built from the now-known
        data, since the boundary opener only has a provisional name.
        """
        span.update_name(_NAME_BUILDERS[role](data))
        for mapper in self._mappers:
            for key, value in mapper.map(data).items():
                span.set_attribute(key, value)
        error = (
            data.error
            if isinstance(
                data,
                (
                    LLMCallSpanData,
                    MCPToolCallSpanData,
                    ServiceSpanData,
                    GuardrailSpanData,
                ),
            )
            else None
        )
        if error and (error.error_type or error.message):
            error_type = error.error_type or "error"
            message = error.message or error.error_type or "error"
            span.set_attribute(Error.TYPE, error_type)
            span.set_status(Status(StatusCode.ERROR, message))
            # Carry the full message on the standard ``exception`` event so backends
            # map it as full text under ``exception.message``. Setting it as a bare
            # string attribute instead lets backends like Elasticsearch dynamic-map
            # it to a ``keyword`` capped at 1024 chars, truncating the message.
            span.add_event(
                ExceptionEvent.NAME,
                {ExceptionEvent.TYPE: error_type, ExceptionEvent.MESSAGE: message},
            )
        # On success leave the status UNSET (the semconv default) rather than
        # forcing OK — that matches the FastAPI server span and avoids implying a
        # span-level health signal litellm doesn't actually evaluate. Only a
        # genuine error sets a status.
        span.end(end_time=end_time_ns)