fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
644 lines
28 KiB
Python
644 lines
28 KiB
Python
"""``CustomLogger`` adapter on the OpenTelemetry span engine."""
|
|
|
|
from collections import OrderedDict
|
|
from contextlib import contextmanager
|
|
from datetime import datetime
|
|
from typing import TYPE_CHECKING, Any, Callable, Iterator, Mapping, Sequence, cast
|
|
|
|
from opentelemetry.context import attach, get_current
|
|
from opentelemetry.sdk.trace import TracerProvider
|
|
from opentelemetry.trace import Span, Tracer, get_current_span, use_span
|
|
|
|
import litellm
|
|
from litellm._logging import verbose_logger
|
|
from litellm.integrations.custom_logger import CustomLogger
|
|
from litellm.integrations.otel.model.baggage import promoted_baggage
|
|
from litellm.integrations.otel.model.config import OpenTelemetryV2Config
|
|
from litellm.integrations.otel.plumbing.context import (
|
|
is_recordable_span,
|
|
request_root_span,
|
|
resolve_parent_context,
|
|
resolve_request_span_context,
|
|
set_request_baggage,
|
|
set_request_root_span,
|
|
)
|
|
from litellm.integrations.otel.emitter import SpanEmitter
|
|
from litellm.integrations.otel.mappers import resolve_mappers
|
|
from litellm.integrations.otel.model.metadata import (
|
|
LLMCallEvent,
|
|
RequestIdentity,
|
|
model_from_request_data,
|
|
)
|
|
from litellm.integrations.otel.model.payloads import (
|
|
GuardrailSpanData,
|
|
LLMCallSpanData,
|
|
MCPToolCallSpanData,
|
|
ServiceSpanData,
|
|
SpanError,
|
|
is_mcp_tool_call,
|
|
)
|
|
from litellm.integrations.otel.plumbing.metrics import (
|
|
GenAIMetricRecorder,
|
|
create_genai_metrics,
|
|
)
|
|
from litellm.integrations.otel.plumbing.providers import (
|
|
build_tracer_provider,
|
|
get_meter,
|
|
get_tracer,
|
|
resolve_meter_provider,
|
|
)
|
|
from litellm.integrations.otel.plumbing.routing import TenantTracerCache
|
|
from litellm.integrations.otel.model.spans import SpanRole, span_role_for_service
|
|
from litellm.integrations.otel.model.utils import to_ns
|
|
|
|
if TYPE_CHECKING:
|
|
from litellm.types.utils import (
|
|
StandardLoggingGuardrailInformation,
|
|
StandardLoggingPayload,
|
|
)
|
|
|
|
LITELLM_TRACER_NAME = "litellm"
|
|
|
|
# Any callback whose class belongs to one of these modules is "the OTel
|
|
# callback" for proxy-global-registration purposes.
|
|
_OTEL_MODULES = (
|
|
"litellm.integrations.otel",
|
|
"litellm.integrations.opentelemetry",
|
|
)
|
|
|
|
|
|
# Cap on the open-call carrier map. A span opened at ``pre_call`` that never
|
|
# reaches a success/failure callback (e.g. a stream that only fires stream
|
|
# events) would otherwise linger; bounding the map evicts the oldest so memory
|
|
# stays flat on a long-running proxy while covering every concurrent in-flight
|
|
# call.
|
|
_OPEN_CALLS_MAX = 10_000
|
|
|
|
|
|
class _LLMCallSpan:
|
|
"""The state carried from the ``pre_call`` boundary to span close.
|
|
|
|
``span`` is the live span when it could be opened at the boundary (the server
|
|
span was ambient), or ``None`` when creation was deferred because no ambient
|
|
parent was visible — in which case the async callback creates it against its
|
|
own (worker-copied) ambient context using ``start_time_ns``. The presence of
|
|
a carrier for a call at all is the proof that ``pre_call`` ran, i.e. that an
|
|
upstream call was actually attempted.
|
|
"""
|
|
|
|
__slots__ = ("span", "start_time_ns")
|
|
|
|
def __init__(self, span: "Span | None", start_time_ns: int | None) -> None:
|
|
self.span = span
|
|
self.start_time_ns = start_time_ns
|
|
|
|
|
|
class OpenTelemetryV2(CustomLogger):
|
|
"""The ``CustomLogger`` for OpenTelemetry."""
|
|
|
|
def __init__(
|
|
self,
|
|
config: OpenTelemetryV2Config | None = None,
|
|
callback_name: str | None = None,
|
|
tracer_provider: TracerProvider | None = None,
|
|
logger_provider: Any | None = None, # reserved for OTel logs
|
|
meter_provider: Any | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(**kwargs)
|
|
self.config: OpenTelemetryV2Config = config or OpenTelemetryV2Config(**kwargs)
|
|
self.callback_name = callback_name
|
|
self._tracer_provider: TracerProvider = (
|
|
tracer_provider
|
|
if tracer_provider is not None
|
|
else build_tracer_provider(self.config)
|
|
)
|
|
self.tracer: Tracer = get_tracer(self._tracer_provider, LITELLM_TRACER_NAME)
|
|
self._metrics_recorder = self._init_metrics(meter_provider)
|
|
self._metric_filter_error_logged = False
|
|
self._emitter = SpanEmitter(
|
|
self.tracer, self.config, mappers=resolve_mappers(self.config.mapper_names)
|
|
)
|
|
self._tenant_tracers = TenantTracerCache(
|
|
self.config, callback_name, LITELLM_TRACER_NAME
|
|
)
|
|
self._open_llm_calls: "OrderedDict[str, _LLMCallSpan]" = OrderedDict()
|
|
self._init_otel_logger_on_litellm_proxy()
|
|
|
|
def _init_metrics(self, meter_provider: Any | None) -> "GenAIMetricRecorder | None":
|
|
"""Create the six GenAI histograms when metrics are enabled, else ``None``.
|
|
|
|
``meter_provider`` is an explicit override (tests inject one); otherwise the
|
|
provider is resolved from the OTel global so the operator's configured
|
|
readers/exporters receive the metrics, building and registering one only
|
|
when no global provider is set.
|
|
"""
|
|
if not self.config.enable_metrics:
|
|
return None
|
|
provider = resolve_meter_provider(self.config, meter_provider)
|
|
meter = get_meter(provider, LITELLM_TRACER_NAME)
|
|
return GenAIMetricRecorder(create_genai_metrics(meter), self.callback_name)
|
|
|
|
# ====================================================================== #
|
|
# Proxy global registration
|
|
# ====================================================================== #
|
|
|
|
def _register_in_callback_list(self, callbacks: list) -> None:
|
|
already_otel = any(
|
|
cb.__class__.__module__.startswith(_OTEL_MODULES)
|
|
for cb in callbacks
|
|
if hasattr(cb, "__class__")
|
|
)
|
|
if not already_otel:
|
|
callbacks.append(self)
|
|
|
|
def _init_otel_logger_on_litellm_proxy(self) -> None:
|
|
try:
|
|
from litellm.proxy import proxy_server
|
|
except Exception:
|
|
return
|
|
try:
|
|
self._register_in_callback_list(litellm.service_callback)
|
|
self._register_in_callback_list(litellm.input_callback)
|
|
self._register_in_callback_list(litellm._async_success_callback)
|
|
self._register_in_callback_list(litellm._async_failure_callback)
|
|
except Exception:
|
|
pass
|
|
if getattr(proxy_server, "open_telemetry_logger", None) is None:
|
|
setattr(proxy_server, "open_telemetry_logger", self)
|
|
|
|
# ====================================================================== #
|
|
# LLM-call callbacks — the span is opened at the ``pre_call`` boundary and
|
|
# closed here. See ``log_pre_api_call``.
|
|
# ====================================================================== #
|
|
|
|
def log_pre_api_call(self, model, messages, kwargs):
|
|
"""Open the LLM-call span at the call boundary.
|
|
|
|
Runs synchronously inside the request task, before the upstream call —
|
|
the one place where the live server span is genuinely the ambient OTel
|
|
context — so the span parents to it natively, with no span threaded
|
|
through a metadata dict. The open span is stashed on the per-request
|
|
``LiteLLMLoggingObj`` (a typed object) and closed in the async callback.
|
|
|
|
When no recordable parent is visible (``pre_call`` was driven from a thread
|
|
pool for a sync-only provider, where contextvars — and so the anchor —
|
|
don't follow), creation is deferred: only the start time is recorded, and
|
|
the async callback — whose worker context was copied from the request task
|
|
and so still carries the anchor — creates the span then.
|
|
|
|
Synthetic proxy-gate error logs (auth/rate-limit rejections) also fire this
|
|
hook but never made an upstream call; they are tagged and skipped so no
|
|
phantom LLM-call span is produced.
|
|
"""
|
|
call = LLMCallEvent.from_dict(kwargs)
|
|
if call.is_no_upstream_call:
|
|
return
|
|
call_id = call.call_id
|
|
if call_id is None:
|
|
return
|
|
# Idempotent: a retried call may re-enter ``pre_call`` with the same
|
|
# call id; keep the first span so its start time is the true one.
|
|
if call_id in self._open_llm_calls:
|
|
return
|
|
start_time_ns = to_ns(datetime.now())
|
|
span: Span | None = None
|
|
# Parent to the request's anchored root span (stable across the request),
|
|
# falling back to ambient on the SDK path. Open the span live only when
|
|
# that resolves to a recordable parent; otherwise defer to the close
|
|
# callback (the thread-pool case, where the anchor isn't visible here).
|
|
parent_context = resolve_request_span_context()
|
|
if is_recordable_span(get_current_span(parent_context)):
|
|
span = self._emitter.start_span(
|
|
SpanRole.LLM_CALL,
|
|
call.provisional_span_name,
|
|
parent_context=parent_context,
|
|
start_time_ns=start_time_ns,
|
|
tracer=self._tenant_tracers.tracer_for(
|
|
self.tracer, call.dynamic_params
|
|
),
|
|
)
|
|
self._open_llm_calls[call_id] = _LLMCallSpan(
|
|
span=span, start_time_ns=start_time_ns
|
|
)
|
|
# Evict the oldest open call if the map is over budget. A call that opens
|
|
# but never closes (a stream that only fires stream events) would linger
|
|
# otherwise; the evicted span is simply dropped (never exported).
|
|
if len(self._open_llm_calls) > _OPEN_CALLS_MAX:
|
|
self._open_llm_calls.popitem(last=False)
|
|
|
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
if self._emit_mcp_tool_call(kwargs, start_time, end_time):
|
|
return
|
|
self._close_llm_call(kwargs, start_time, end_time)
|
|
self._record_metrics(kwargs, response_obj, start_time, end_time)
|
|
|
|
def _record_metrics(self, kwargs, response_obj, start_time, end_time) -> None:
|
|
"""Record the GenAI metrics for a successful LLM call. Best-effort: a
|
|
recording failure (e.g. a malformed payload) must never break the span
|
|
close or the request itself."""
|
|
if self._metrics_recorder is None:
|
|
return
|
|
try:
|
|
self._metrics_recorder.record(kwargs, response_obj, start_time, end_time)
|
|
except ValueError as exc:
|
|
if not self._metric_filter_error_logged:
|
|
verbose_logger.error(
|
|
"OpenTelemetryV2: invalid otel.attributes metric filter, metrics disabled: %s",
|
|
exc,
|
|
)
|
|
self._metric_filter_error_logged = True
|
|
except Exception as exc:
|
|
verbose_logger.debug("OpenTelemetryV2: metric recording failed: %s", exc)
|
|
|
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
|
if self._emit_mcp_tool_call(kwargs, start_time, end_time):
|
|
return
|
|
self._close_llm_call(kwargs, start_time, end_time)
|
|
|
|
def _emit_mcp_tool_call(
|
|
self,
|
|
kwargs: Mapping[str, Any],
|
|
start_time: datetime | float | None,
|
|
end_time: datetime | float | None,
|
|
) -> bool:
|
|
"""Emit an MCP tool-call span when the closed request was a tool call.
|
|
|
|
MCP tool calls reach the success/failure callbacks like any other request
|
|
(with ``call_type`` ``call_mcp_tool``), but they are not LLM calls and have
|
|
no ``pre_call`` carrier — so they get their own CLIENT span here, parented
|
|
to the request's server span. Returns whether it handled the event, so the
|
|
caller skips the LLM-call path. The whole span is emitted at once (there is
|
|
no boundary to open it at), deduped on the call id by the emitter.
|
|
"""
|
|
raw_payload = kwargs.get("standard_logging_object")
|
|
if not raw_payload or not is_mcp_tool_call(
|
|
cast(Mapping[str, object], raw_payload)
|
|
):
|
|
return False
|
|
payload = cast("StandardLoggingPayload", raw_payload)
|
|
data = MCPToolCallSpanData.from_standard_logging_payload(
|
|
payload, capture_content=self.config.capture_span_content
|
|
)
|
|
# A stray LLM carrier from a ``pre_call`` that mis-fired for this id would
|
|
# otherwise linger until evicted; drop it so it's neither leaked nor closed
|
|
# as a phantom LLM span.
|
|
if data.identity.call_id:
|
|
self._open_llm_calls.pop(data.identity.call_id, None)
|
|
self._emitter.emit(
|
|
SpanRole.MCP_TOOL_CALL,
|
|
data,
|
|
parent_context=resolve_request_span_context(),
|
|
start_time_ns=to_ns(start_time),
|
|
end_time_ns=to_ns(end_time),
|
|
)
|
|
return True
|
|
|
|
def _close_llm_call(
|
|
self,
|
|
kwargs: Mapping[str, Any],
|
|
start_time: datetime | float | None,
|
|
end_time: datetime | float | None,
|
|
) -> Span | None:
|
|
"""Finish the LLM-call span opened at ``pre_call`` (or create it deferred).
|
|
|
|
No carrier for this call id means ``pre_call`` never ran — the request was
|
|
rejected at the gate or blocked by a pre-call guardrail before any upstream
|
|
call — so there is nothing to record and no phantom span.
|
|
"""
|
|
call = LLMCallEvent.from_dict(kwargs)
|
|
call_id = call.call_id
|
|
# ``pop`` is the dedup: this method runs from both the success and failure
|
|
# paths, and whichever fires first removes the carrier and closes the span.
|
|
carrier = self._open_llm_calls.pop(call_id, None) if call_id else None
|
|
if carrier is None:
|
|
return None
|
|
payload = call.payload
|
|
if payload is None:
|
|
if carrier.span is not None:
|
|
# Opened at the boundary but the payload never materialized — end
|
|
# it (named provisionally) so it isn't leaked as an open span.
|
|
carrier.span.end(end_time=to_ns(end_time))
|
|
return None
|
|
data = LLMCallSpanData.from_standard_logging_payload(
|
|
payload, capture_content=self.config.capture_span_content
|
|
)
|
|
end_time_ns = to_ns(end_time)
|
|
if carrier.span is not None:
|
|
# Born at the boundary: stamp attributes from the typed payload, set
|
|
# status, and end it. Its parent (the server span) was captured at
|
|
# creation from real ambient context.
|
|
self._emitter.finish_span(
|
|
SpanRole.LLM_CALL, carrier.span, data, end_time_ns=end_time_ns
|
|
)
|
|
return carrier.span
|
|
# Deferred: ``pre_call`` saw no recordable parent, so create the span now.
|
|
# The worker copied the request task's context, which carries the anchored
|
|
# root span — parent to it (ambient fallback on the SDK path). Seed identity
|
|
# Baggage so the span — and the SDK path, which has none — is labeled
|
|
# consistently.
|
|
parent_ctx = resolve_request_span_context()
|
|
bag = promoted_baggage(
|
|
data.identity,
|
|
data.request_model,
|
|
promoted_keys=tuple(self.config.baggage_promoted_keys),
|
|
metadata_keys=tuple(self.config.baggage_metadata_keys),
|
|
team_metadata_keys=tuple(self.config.baggage_team_metadata_keys),
|
|
)
|
|
if bag:
|
|
parent_ctx = set_request_baggage(bag, context=parent_ctx)
|
|
return self._emitter.emit(
|
|
SpanRole.LLM_CALL,
|
|
data,
|
|
parent_context=parent_ctx,
|
|
start_time_ns=carrier.start_time_ns,
|
|
end_time_ns=end_time_ns,
|
|
tracer=self._tenant_tracers.tracer_for(self.tracer, call.dynamic_params),
|
|
)
|
|
|
|
# ====================================================================== #
|
|
# Service hooks
|
|
# ====================================================================== #
|
|
|
|
async def async_service_success_hook(
|
|
self,
|
|
payload: Any,
|
|
parent_otel_span: Span | None = None,
|
|
start_time: datetime | float | None = None,
|
|
end_time: datetime | float | None = None,
|
|
event_metadata: dict | None = None,
|
|
) -> None:
|
|
self._emit_service(
|
|
payload,
|
|
parent_otel_span=parent_otel_span,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
event_metadata=event_metadata,
|
|
error_override=None,
|
|
)
|
|
|
|
async def async_service_failure_hook(
|
|
self,
|
|
payload: Any,
|
|
error: str | None = "",
|
|
parent_otel_span: Span | None = None,
|
|
start_time: datetime | float | None = None,
|
|
end_time: datetime | float | None = None,
|
|
event_metadata: dict | None = None,
|
|
) -> None:
|
|
self._emit_service(
|
|
payload,
|
|
parent_otel_span=parent_otel_span,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
event_metadata=event_metadata,
|
|
error_override=error or "error",
|
|
)
|
|
|
|
def _emit_service(
|
|
self,
|
|
payload: Any,
|
|
*,
|
|
parent_otel_span: Span | None,
|
|
start_time: datetime | float | None,
|
|
end_time: datetime | float | None,
|
|
event_metadata: dict | None,
|
|
error_override: str | None,
|
|
) -> Span | None:
|
|
data = ServiceSpanData.from_payload(payload, event_metadata=event_metadata)
|
|
# Decide whether this service call is a span at all, and of what kind.
|
|
# ``None`` means metrics-only (framework instrumentation that duplicates a
|
|
# gen-AI span — ``self``/``router``/``proxy_pre_call`` — or ``auth``, which
|
|
# gets a live phase span instead). Those still feed Prometheus/Datadog via
|
|
# their own hooks; they just never enter the trace.
|
|
role = span_role_for_service(data.service_name)
|
|
if role is None:
|
|
return None
|
|
# A metrics-only ping with neither timing nor a parent (in-memory queue
|
|
# gauges) is not a traceable operation; a span for it would be a
|
|
# zero-duration root with no context, so skip it. Real background work
|
|
# (budget/reset jobs, spend flush) passes start/end times and still emits
|
|
# as a root; anything with a parent emits regardless.
|
|
if (
|
|
error_override is None
|
|
and start_time is None
|
|
and end_time is None
|
|
and parent_otel_span is None
|
|
):
|
|
return None
|
|
if error_override is not None and data.error is None:
|
|
data = ServiceSpanData(
|
|
service_name=data.service_name,
|
|
call_type=data.call_type,
|
|
error=SpanError(message=error_override),
|
|
event_metadata=data.event_metadata,
|
|
)
|
|
# Parent like every other span: ambient context first (so identity Baggage
|
|
# rides along and the call nests under whatever request phase is active —
|
|
# e.g. a DB lookup under the live ``auth`` span), falling back to the
|
|
# server span the proxy threaded as ``parent_otel_span``. A background
|
|
# service call has neither, so it starts its own root trace.
|
|
parent_context = resolve_parent_context(threaded=parent_otel_span)
|
|
return self._emitter.emit(
|
|
role,
|
|
data,
|
|
parent_context=parent_context,
|
|
start_time_ns=to_ns(start_time),
|
|
end_time_ns=to_ns(end_time),
|
|
)
|
|
|
|
# ====================================================================== #
|
|
# async_post_call_* hooks — emit guardrail spans. The server span's status
|
|
# / errors are the FastAPI instrumentor's job, so we don't touch it here.
|
|
# ====================================================================== #
|
|
|
|
def seed_request_identity(self, user_api_key_dict: Any, model: Any = None) -> None:
|
|
"""Attach request-identity Baggage to the current context + server span.
|
|
|
|
Seeding identity into Baggage makes **every** span emitted afterwards for
|
|
this request — LLM call, guardrail, DB call — inherit it via
|
|
``LiteLLMBaggageSpanProcessor``. Called once at the auth boundary (as soon
|
|
as the key resolves) so post-auth spans are labeled consistently; the
|
|
Baggage rides the request task's contextvar from there on. Auth-internal
|
|
DB lookups that run before the key is known stay unlabeled — identity
|
|
isn't determined yet, which is correct.
|
|
"""
|
|
try:
|
|
identity = RequestIdentity.from_user_api_key_auth(user_api_key_dict)
|
|
bag = promoted_baggage(
|
|
identity,
|
|
model,
|
|
promoted_keys=tuple(self.config.baggage_promoted_keys),
|
|
metadata_keys=tuple(self.config.baggage_metadata_keys),
|
|
team_metadata_keys=tuple(self.config.baggage_team_metadata_keys),
|
|
)
|
|
if bag:
|
|
# Attach (no detach): the contextvar is scoped to this request's
|
|
# asyncio task and is reclaimed when the task ends.
|
|
attach(set_request_baggage(bag, context=get_current()))
|
|
# The server span was started by the instrumentor before this ran,
|
|
# so the Baggage processor (which only fires at span start) won't
|
|
# backfill it — stamp identity on it directly. Prefer the anchored
|
|
# root span over the ambient one so identity still lands on the
|
|
# server span when seeding from inside the live ``auth`` phase span
|
|
# (the auth-failure path), where ``get_current_span`` is the phase
|
|
# span, not the request's root.
|
|
server_span = request_root_span() or get_current_span()
|
|
if is_recordable_span(server_span):
|
|
# Re-capture the anchor here too: this runs post-auth with the
|
|
# server span active and covers entrypoints that bypass
|
|
# ``create_litellm_proxy_request_started_span`` (e.g. the SDK
|
|
# path's ``async_pre_call_hook``). Idempotent.
|
|
set_request_root_span(server_span)
|
|
for key, value in bag.items():
|
|
server_span.set_attribute(key, value)
|
|
except Exception:
|
|
pass
|
|
|
|
@contextmanager
|
|
def start_phase_span(self, name: str) -> "Iterator[Span]":
|
|
span = self._emitter.start_span(SpanRole.SERVICE, name)
|
|
with use_span(span, end_on_exit=True):
|
|
yield span
|
|
|
|
async def async_pre_call_hook(
|
|
self,
|
|
user_api_key_dict: Any,
|
|
cache: Any,
|
|
data: dict,
|
|
call_type: Any,
|
|
) -> dict:
|
|
self.seed_request_identity(
|
|
user_api_key_dict,
|
|
model=model_from_request_data(data),
|
|
)
|
|
return data
|
|
|
|
def emit_guardrail_span(self, entry: "StandardLoggingGuardrailInformation") -> None:
|
|
# Emitted by the guardrail-recording code the moment a guardrail finishes,
|
|
# not from a post-call hook — that hook does not fire on every path (a
|
|
# pass-through request that passes its guardrails never reaches it), which
|
|
# left passing guardrails without a span.
|
|
#
|
|
# A guardrail is a sibling of the LLM call under the request's root span,
|
|
# so parent it to the explicit anchor — never the active span, which during
|
|
# a pre_call guardrail can be the live ``auth`` phase span. Emit with the
|
|
# guardrail's actual execution window so a pre_call guardrail is placed
|
|
# before the LLM call rather than at emission time. One entry in, one span
|
|
# out — the module-level entry point routes each entry to this single
|
|
# registered logger so a guardrail is never emitted more than once.
|
|
data = GuardrailSpanData.from_logging_entry(entry)
|
|
self._emitter.emit(
|
|
SpanRole.GUARDRAIL,
|
|
data,
|
|
parent_context=resolve_request_span_context(),
|
|
start_time_ns=to_ns(data.start_time),
|
|
end_time_ns=to_ns(data.end_time),
|
|
)
|
|
|
|
def create_litellm_proxy_request_started_span(
|
|
self, start_time: datetime, headers: Mapping[str, str] | None
|
|
) -> Span | None:
|
|
span = get_current_span()
|
|
if not is_recordable_span(span):
|
|
return None
|
|
set_request_root_span(span)
|
|
return span
|
|
|
|
|
|
def select_global_otel_v2_logger(
|
|
in_memory_loggers: Sequence[object],
|
|
registered: "OpenTelemetryV2 | None" = None,
|
|
) -> "OpenTelemetryV2":
|
|
"""The single ``OpenTelemetryV2`` whose provider should become the OTel global.
|
|
|
|
The callback factory designates one logger as canonical the moment it builds
|
|
the first one (``_init_otel_logger_on_litellm_proxy`` sets
|
|
``proxy_server.open_telemetry_logger``), and every other v2 entry point —
|
|
guardrail, identity seeding, phase spans — already routes through that same
|
|
``registered`` owner. Reuse it here too so the global provider has one source
|
|
of truth instead of a second, independently-derived guess; this is the logger
|
|
a preset (arize, langfuse, …) folds the ``OTEL_*`` base exporter and its own
|
|
exporter into, so the FastAPI server span and the gen-ai spans share one
|
|
provider and one trace.
|
|
|
|
Fall back to ``in_memory_loggers`` for the SDK path, where no proxy global is
|
|
set (selecting from there, not ``service_callback``, which a preset logger does
|
|
not always reach), and build a generic logger from ``OTEL_*`` only when none was
|
|
configured at all. Each fallback still avoids the second generic logger that
|
|
orphaned the gen-ai spans onto a different backend than the server span.
|
|
"""
|
|
if registered is not None:
|
|
return registered
|
|
existing = next(
|
|
(cb for cb in in_memory_loggers if isinstance(cb, OpenTelemetryV2)), None
|
|
)
|
|
return existing if existing is not None else OpenTelemetryV2()
|
|
|
|
|
|
def publish_global_otel_v2_provider(
|
|
in_memory_loggers: Sequence[object],
|
|
set_global_provider: Callable[[TracerProvider], None],
|
|
registered: "OpenTelemetryV2 | None" = None,
|
|
) -> "OpenTelemetryV2":
|
|
"""Select the single v2 logger and publish its provider as the OTel global.
|
|
|
|
The proxy calls this once at startup, after callbacks are initialized, so the
|
|
preset logger already exists; it passes ``registered`` (the canonical owner the
|
|
factory designated as ``proxy_server.open_telemetry_logger``) so the global
|
|
provider reuses the same logger the rest of the v2 code emits through (see
|
|
:func:`select_global_otel_v2_logger`). Both ``registered`` and
|
|
``set_global_provider`` (the proxy passes
|
|
``opentelemetry.trace.set_tracer_provider``) are injected so the publish step is
|
|
unit-testable without reading or mutating real global OTel state. Returns the
|
|
logger whose provider was published.
|
|
"""
|
|
logger = select_global_otel_v2_logger(in_memory_loggers, registered=registered)
|
|
set_global_provider(logger._tracer_provider)
|
|
return logger
|
|
|
|
|
|
def _registered_v2_logger() -> "OpenTelemetryV2 | None":
|
|
try:
|
|
from litellm.proxy import proxy_server
|
|
except Exception:
|
|
return None
|
|
logger = getattr(proxy_server, "open_telemetry_logger", None)
|
|
return logger if isinstance(logger, OpenTelemetryV2) else None
|
|
|
|
|
|
def emit_guardrail_span(entry: "StandardLoggingGuardrailInformation") -> None:
|
|
"""Emit a guardrail span on the registered v2 OTel logger.
|
|
|
|
Called by the guardrail-recording code the moment a guardrail finishes, so a
|
|
span is produced regardless of whether a post-call hook later runs (it does
|
|
not on the pass-through allow path). Routes through the single canonical
|
|
logger — the same one every other v2 entry point uses — so a guardrail
|
|
recorded once yields exactly one span; fanning out across every reachable
|
|
``OpenTelemetryV2`` instance double-emits the same entry. Best-effort: span
|
|
emission must never break guardrail evaluation.
|
|
"""
|
|
logger = _registered_v2_logger()
|
|
if logger is None:
|
|
return
|
|
try:
|
|
logger.emit_guardrail_span(entry)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def seed_request_identity(user_api_key_dict: Any, model: Any = None) -> None:
|
|
logger = _registered_v2_logger()
|
|
if logger is not None:
|
|
logger.seed_request_identity(user_api_key_dict, model=model)
|
|
|
|
|
|
@contextmanager
|
|
def phase_span(name: str) -> "Iterator[Span | None]":
|
|
logger = _registered_v2_logger()
|
|
if logger is None:
|
|
yield None
|
|
return
|
|
with logger.start_phase_span(name) as span:
|
|
yield span
|