Files
MoFin/venv/lib/python3.12/site-packages/litellm/integrations/otel/logger.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

644 lines
28 KiB
Python

"""``CustomLogger`` adapter on the OpenTelemetry span engine."""
from collections import OrderedDict
from contextlib import contextmanager
from datetime import datetime
from typing import TYPE_CHECKING, Any, Callable, Iterator, Mapping, Sequence, cast
from opentelemetry.context import attach, get_current
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.trace import Span, Tracer, get_current_span, use_span
import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.otel.model.baggage import promoted_baggage
from litellm.integrations.otel.model.config import OpenTelemetryV2Config
from litellm.integrations.otel.plumbing.context import (
is_recordable_span,
request_root_span,
resolve_parent_context,
resolve_request_span_context,
set_request_baggage,
set_request_root_span,
)
from litellm.integrations.otel.emitter import SpanEmitter
from litellm.integrations.otel.mappers import resolve_mappers
from litellm.integrations.otel.model.metadata import (
LLMCallEvent,
RequestIdentity,
model_from_request_data,
)
from litellm.integrations.otel.model.payloads import (
GuardrailSpanData,
LLMCallSpanData,
MCPToolCallSpanData,
ServiceSpanData,
SpanError,
is_mcp_tool_call,
)
from litellm.integrations.otel.plumbing.metrics import (
GenAIMetricRecorder,
create_genai_metrics,
)
from litellm.integrations.otel.plumbing.providers import (
build_tracer_provider,
get_meter,
get_tracer,
resolve_meter_provider,
)
from litellm.integrations.otel.plumbing.routing import TenantTracerCache
from litellm.integrations.otel.model.spans import SpanRole, span_role_for_service
from litellm.integrations.otel.model.utils import to_ns
if TYPE_CHECKING:
from litellm.types.utils import (
StandardLoggingGuardrailInformation,
StandardLoggingPayload,
)
LITELLM_TRACER_NAME = "litellm"
# Any callback whose class belongs to one of these modules is "the OTel
# callback" for proxy-global-registration purposes.
_OTEL_MODULES = (
"litellm.integrations.otel",
"litellm.integrations.opentelemetry",
)
# Cap on the open-call carrier map. A span opened at ``pre_call`` that never
# reaches a success/failure callback (e.g. a stream that only fires stream
# events) would otherwise linger; bounding the map evicts the oldest so memory
# stays flat on a long-running proxy while covering every concurrent in-flight
# call.
_OPEN_CALLS_MAX = 10_000
class _LLMCallSpan:
"""The state carried from the ``pre_call`` boundary to span close.
``span`` is the live span when it could be opened at the boundary (the server
span was ambient), or ``None`` when creation was deferred because no ambient
parent was visible — in which case the async callback creates it against its
own (worker-copied) ambient context using ``start_time_ns``. The presence of
a carrier for a call at all is the proof that ``pre_call`` ran, i.e. that an
upstream call was actually attempted.
"""
__slots__ = ("span", "start_time_ns")
def __init__(self, span: "Span | None", start_time_ns: int | None) -> None:
self.span = span
self.start_time_ns = start_time_ns
class OpenTelemetryV2(CustomLogger):
"""The ``CustomLogger`` for OpenTelemetry."""
def __init__(
self,
config: OpenTelemetryV2Config | None = None,
callback_name: str | None = None,
tracer_provider: TracerProvider | None = None,
logger_provider: Any | None = None, # reserved for OTel logs
meter_provider: Any | None = None,
**kwargs: Any,
) -> None:
super().__init__(**kwargs)
self.config: OpenTelemetryV2Config = config or OpenTelemetryV2Config(**kwargs)
self.callback_name = callback_name
self._tracer_provider: TracerProvider = (
tracer_provider
if tracer_provider is not None
else build_tracer_provider(self.config)
)
self.tracer: Tracer = get_tracer(self._tracer_provider, LITELLM_TRACER_NAME)
self._metrics_recorder = self._init_metrics(meter_provider)
self._metric_filter_error_logged = False
self._emitter = SpanEmitter(
self.tracer, self.config, mappers=resolve_mappers(self.config.mapper_names)
)
self._tenant_tracers = TenantTracerCache(
self.config, callback_name, LITELLM_TRACER_NAME
)
self._open_llm_calls: "OrderedDict[str, _LLMCallSpan]" = OrderedDict()
self._init_otel_logger_on_litellm_proxy()
def _init_metrics(self, meter_provider: Any | None) -> "GenAIMetricRecorder | None":
"""Create the six GenAI histograms when metrics are enabled, else ``None``.
``meter_provider`` is an explicit override (tests inject one); otherwise the
provider is resolved from the OTel global so the operator's configured
readers/exporters receive the metrics, building and registering one only
when no global provider is set.
"""
if not self.config.enable_metrics:
return None
provider = resolve_meter_provider(self.config, meter_provider)
meter = get_meter(provider, LITELLM_TRACER_NAME)
return GenAIMetricRecorder(create_genai_metrics(meter), self.callback_name)
# ====================================================================== #
# Proxy global registration
# ====================================================================== #
def _register_in_callback_list(self, callbacks: list) -> None:
already_otel = any(
cb.__class__.__module__.startswith(_OTEL_MODULES)
for cb in callbacks
if hasattr(cb, "__class__")
)
if not already_otel:
callbacks.append(self)
def _init_otel_logger_on_litellm_proxy(self) -> None:
try:
from litellm.proxy import proxy_server
except Exception:
return
try:
self._register_in_callback_list(litellm.service_callback)
self._register_in_callback_list(litellm.input_callback)
self._register_in_callback_list(litellm._async_success_callback)
self._register_in_callback_list(litellm._async_failure_callback)
except Exception:
pass
if getattr(proxy_server, "open_telemetry_logger", None) is None:
setattr(proxy_server, "open_telemetry_logger", self)
# ====================================================================== #
# LLM-call callbacks — the span is opened at the ``pre_call`` boundary and
# closed here. See ``log_pre_api_call``.
# ====================================================================== #
def log_pre_api_call(self, model, messages, kwargs):
"""Open the LLM-call span at the call boundary.
Runs synchronously inside the request task, before the upstream call —
the one place where the live server span is genuinely the ambient OTel
context — so the span parents to it natively, with no span threaded
through a metadata dict. The open span is stashed on the per-request
``LiteLLMLoggingObj`` (a typed object) and closed in the async callback.
When no recordable parent is visible (``pre_call`` was driven from a thread
pool for a sync-only provider, where contextvars — and so the anchor —
don't follow), creation is deferred: only the start time is recorded, and
the async callback — whose worker context was copied from the request task
and so still carries the anchor — creates the span then.
Synthetic proxy-gate error logs (auth/rate-limit rejections) also fire this
hook but never made an upstream call; they are tagged and skipped so no
phantom LLM-call span is produced.
"""
call = LLMCallEvent.from_dict(kwargs)
if call.is_no_upstream_call:
return
call_id = call.call_id
if call_id is None:
return
# Idempotent: a retried call may re-enter ``pre_call`` with the same
# call id; keep the first span so its start time is the true one.
if call_id in self._open_llm_calls:
return
start_time_ns = to_ns(datetime.now())
span: Span | None = None
# Parent to the request's anchored root span (stable across the request),
# falling back to ambient on the SDK path. Open the span live only when
# that resolves to a recordable parent; otherwise defer to the close
# callback (the thread-pool case, where the anchor isn't visible here).
parent_context = resolve_request_span_context()
if is_recordable_span(get_current_span(parent_context)):
span = self._emitter.start_span(
SpanRole.LLM_CALL,
call.provisional_span_name,
parent_context=parent_context,
start_time_ns=start_time_ns,
tracer=self._tenant_tracers.tracer_for(
self.tracer, call.dynamic_params
),
)
self._open_llm_calls[call_id] = _LLMCallSpan(
span=span, start_time_ns=start_time_ns
)
# Evict the oldest open call if the map is over budget. A call that opens
# but never closes (a stream that only fires stream events) would linger
# otherwise; the evicted span is simply dropped (never exported).
if len(self._open_llm_calls) > _OPEN_CALLS_MAX:
self._open_llm_calls.popitem(last=False)
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
if self._emit_mcp_tool_call(kwargs, start_time, end_time):
return
self._close_llm_call(kwargs, start_time, end_time)
self._record_metrics(kwargs, response_obj, start_time, end_time)
def _record_metrics(self, kwargs, response_obj, start_time, end_time) -> None:
"""Record the GenAI metrics for a successful LLM call. Best-effort: a
recording failure (e.g. a malformed payload) must never break the span
close or the request itself."""
if self._metrics_recorder is None:
return
try:
self._metrics_recorder.record(kwargs, response_obj, start_time, end_time)
except ValueError as exc:
if not self._metric_filter_error_logged:
verbose_logger.error(
"OpenTelemetryV2: invalid otel.attributes metric filter, metrics disabled: %s",
exc,
)
self._metric_filter_error_logged = True
except Exception as exc:
verbose_logger.debug("OpenTelemetryV2: metric recording failed: %s", exc)
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
if self._emit_mcp_tool_call(kwargs, start_time, end_time):
return
self._close_llm_call(kwargs, start_time, end_time)
def _emit_mcp_tool_call(
self,
kwargs: Mapping[str, Any],
start_time: datetime | float | None,
end_time: datetime | float | None,
) -> bool:
"""Emit an MCP tool-call span when the closed request was a tool call.
MCP tool calls reach the success/failure callbacks like any other request
(with ``call_type`` ``call_mcp_tool``), but they are not LLM calls and have
no ``pre_call`` carrier — so they get their own CLIENT span here, parented
to the request's server span. Returns whether it handled the event, so the
caller skips the LLM-call path. The whole span is emitted at once (there is
no boundary to open it at), deduped on the call id by the emitter.
"""
raw_payload = kwargs.get("standard_logging_object")
if not raw_payload or not is_mcp_tool_call(
cast(Mapping[str, object], raw_payload)
):
return False
payload = cast("StandardLoggingPayload", raw_payload)
data = MCPToolCallSpanData.from_standard_logging_payload(
payload, capture_content=self.config.capture_span_content
)
# A stray LLM carrier from a ``pre_call`` that mis-fired for this id would
# otherwise linger until evicted; drop it so it's neither leaked nor closed
# as a phantom LLM span.
if data.identity.call_id:
self._open_llm_calls.pop(data.identity.call_id, None)
self._emitter.emit(
SpanRole.MCP_TOOL_CALL,
data,
parent_context=resolve_request_span_context(),
start_time_ns=to_ns(start_time),
end_time_ns=to_ns(end_time),
)
return True
def _close_llm_call(
self,
kwargs: Mapping[str, Any],
start_time: datetime | float | None,
end_time: datetime | float | None,
) -> Span | None:
"""Finish the LLM-call span opened at ``pre_call`` (or create it deferred).
No carrier for this call id means ``pre_call`` never ran — the request was
rejected at the gate or blocked by a pre-call guardrail before any upstream
call — so there is nothing to record and no phantom span.
"""
call = LLMCallEvent.from_dict(kwargs)
call_id = call.call_id
# ``pop`` is the dedup: this method runs from both the success and failure
# paths, and whichever fires first removes the carrier and closes the span.
carrier = self._open_llm_calls.pop(call_id, None) if call_id else None
if carrier is None:
return None
payload = call.payload
if payload is None:
if carrier.span is not None:
# Opened at the boundary but the payload never materialized — end
# it (named provisionally) so it isn't leaked as an open span.
carrier.span.end(end_time=to_ns(end_time))
return None
data = LLMCallSpanData.from_standard_logging_payload(
payload, capture_content=self.config.capture_span_content
)
end_time_ns = to_ns(end_time)
if carrier.span is not None:
# Born at the boundary: stamp attributes from the typed payload, set
# status, and end it. Its parent (the server span) was captured at
# creation from real ambient context.
self._emitter.finish_span(
SpanRole.LLM_CALL, carrier.span, data, end_time_ns=end_time_ns
)
return carrier.span
# Deferred: ``pre_call`` saw no recordable parent, so create the span now.
# The worker copied the request task's context, which carries the anchored
# root span — parent to it (ambient fallback on the SDK path). Seed identity
# Baggage so the span — and the SDK path, which has none — is labeled
# consistently.
parent_ctx = resolve_request_span_context()
bag = promoted_baggage(
data.identity,
data.request_model,
promoted_keys=tuple(self.config.baggage_promoted_keys),
metadata_keys=tuple(self.config.baggage_metadata_keys),
team_metadata_keys=tuple(self.config.baggage_team_metadata_keys),
)
if bag:
parent_ctx = set_request_baggage(bag, context=parent_ctx)
return self._emitter.emit(
SpanRole.LLM_CALL,
data,
parent_context=parent_ctx,
start_time_ns=carrier.start_time_ns,
end_time_ns=end_time_ns,
tracer=self._tenant_tracers.tracer_for(self.tracer, call.dynamic_params),
)
# ====================================================================== #
# Service hooks
# ====================================================================== #
async def async_service_success_hook(
self,
payload: Any,
parent_otel_span: Span | None = None,
start_time: datetime | float | None = None,
end_time: datetime | float | None = None,
event_metadata: dict | None = None,
) -> None:
self._emit_service(
payload,
parent_otel_span=parent_otel_span,
start_time=start_time,
end_time=end_time,
event_metadata=event_metadata,
error_override=None,
)
async def async_service_failure_hook(
self,
payload: Any,
error: str | None = "",
parent_otel_span: Span | None = None,
start_time: datetime | float | None = None,
end_time: datetime | float | None = None,
event_metadata: dict | None = None,
) -> None:
self._emit_service(
payload,
parent_otel_span=parent_otel_span,
start_time=start_time,
end_time=end_time,
event_metadata=event_metadata,
error_override=error or "error",
)
def _emit_service(
self,
payload: Any,
*,
parent_otel_span: Span | None,
start_time: datetime | float | None,
end_time: datetime | float | None,
event_metadata: dict | None,
error_override: str | None,
) -> Span | None:
data = ServiceSpanData.from_payload(payload, event_metadata=event_metadata)
# Decide whether this service call is a span at all, and of what kind.
# ``None`` means metrics-only (framework instrumentation that duplicates a
# gen-AI span — ``self``/``router``/``proxy_pre_call`` — or ``auth``, which
# gets a live phase span instead). Those still feed Prometheus/Datadog via
# their own hooks; they just never enter the trace.
role = span_role_for_service(data.service_name)
if role is None:
return None
# A metrics-only ping with neither timing nor a parent (in-memory queue
# gauges) is not a traceable operation; a span for it would be a
# zero-duration root with no context, so skip it. Real background work
# (budget/reset jobs, spend flush) passes start/end times and still emits
# as a root; anything with a parent emits regardless.
if (
error_override is None
and start_time is None
and end_time is None
and parent_otel_span is None
):
return None
if error_override is not None and data.error is None:
data = ServiceSpanData(
service_name=data.service_name,
call_type=data.call_type,
error=SpanError(message=error_override),
event_metadata=data.event_metadata,
)
# Parent like every other span: ambient context first (so identity Baggage
# rides along and the call nests under whatever request phase is active —
# e.g. a DB lookup under the live ``auth`` span), falling back to the
# server span the proxy threaded as ``parent_otel_span``. A background
# service call has neither, so it starts its own root trace.
parent_context = resolve_parent_context(threaded=parent_otel_span)
return self._emitter.emit(
role,
data,
parent_context=parent_context,
start_time_ns=to_ns(start_time),
end_time_ns=to_ns(end_time),
)
# ====================================================================== #
# async_post_call_* hooks — emit guardrail spans. The server span's status
# / errors are the FastAPI instrumentor's job, so we don't touch it here.
# ====================================================================== #
def seed_request_identity(self, user_api_key_dict: Any, model: Any = None) -> None:
"""Attach request-identity Baggage to the current context + server span.
Seeding identity into Baggage makes **every** span emitted afterwards for
this request — LLM call, guardrail, DB call — inherit it via
``LiteLLMBaggageSpanProcessor``. Called once at the auth boundary (as soon
as the key resolves) so post-auth spans are labeled consistently; the
Baggage rides the request task's contextvar from there on. Auth-internal
DB lookups that run before the key is known stay unlabeled — identity
isn't determined yet, which is correct.
"""
try:
identity = RequestIdentity.from_user_api_key_auth(user_api_key_dict)
bag = promoted_baggage(
identity,
model,
promoted_keys=tuple(self.config.baggage_promoted_keys),
metadata_keys=tuple(self.config.baggage_metadata_keys),
team_metadata_keys=tuple(self.config.baggage_team_metadata_keys),
)
if bag:
# Attach (no detach): the contextvar is scoped to this request's
# asyncio task and is reclaimed when the task ends.
attach(set_request_baggage(bag, context=get_current()))
# The server span was started by the instrumentor before this ran,
# so the Baggage processor (which only fires at span start) won't
# backfill it — stamp identity on it directly. Prefer the anchored
# root span over the ambient one so identity still lands on the
# server span when seeding from inside the live ``auth`` phase span
# (the auth-failure path), where ``get_current_span`` is the phase
# span, not the request's root.
server_span = request_root_span() or get_current_span()
if is_recordable_span(server_span):
# Re-capture the anchor here too: this runs post-auth with the
# server span active and covers entrypoints that bypass
# ``create_litellm_proxy_request_started_span`` (e.g. the SDK
# path's ``async_pre_call_hook``). Idempotent.
set_request_root_span(server_span)
for key, value in bag.items():
server_span.set_attribute(key, value)
except Exception:
pass
@contextmanager
def start_phase_span(self, name: str) -> "Iterator[Span]":
span = self._emitter.start_span(SpanRole.SERVICE, name)
with use_span(span, end_on_exit=True):
yield span
async def async_pre_call_hook(
self,
user_api_key_dict: Any,
cache: Any,
data: dict,
call_type: Any,
) -> dict:
self.seed_request_identity(
user_api_key_dict,
model=model_from_request_data(data),
)
return data
def emit_guardrail_span(self, entry: "StandardLoggingGuardrailInformation") -> None:
# Emitted by the guardrail-recording code the moment a guardrail finishes,
# not from a post-call hook — that hook does not fire on every path (a
# pass-through request that passes its guardrails never reaches it), which
# left passing guardrails without a span.
#
# A guardrail is a sibling of the LLM call under the request's root span,
# so parent it to the explicit anchor — never the active span, which during
# a pre_call guardrail can be the live ``auth`` phase span. Emit with the
# guardrail's actual execution window so a pre_call guardrail is placed
# before the LLM call rather than at emission time. One entry in, one span
# out — the module-level entry point routes each entry to this single
# registered logger so a guardrail is never emitted more than once.
data = GuardrailSpanData.from_logging_entry(entry)
self._emitter.emit(
SpanRole.GUARDRAIL,
data,
parent_context=resolve_request_span_context(),
start_time_ns=to_ns(data.start_time),
end_time_ns=to_ns(data.end_time),
)
def create_litellm_proxy_request_started_span(
self, start_time: datetime, headers: Mapping[str, str] | None
) -> Span | None:
span = get_current_span()
if not is_recordable_span(span):
return None
set_request_root_span(span)
return span
def select_global_otel_v2_logger(
in_memory_loggers: Sequence[object],
registered: "OpenTelemetryV2 | None" = None,
) -> "OpenTelemetryV2":
"""The single ``OpenTelemetryV2`` whose provider should become the OTel global.
The callback factory designates one logger as canonical the moment it builds
the first one (``_init_otel_logger_on_litellm_proxy`` sets
``proxy_server.open_telemetry_logger``), and every other v2 entry point —
guardrail, identity seeding, phase spans — already routes through that same
``registered`` owner. Reuse it here too so the global provider has one source
of truth instead of a second, independently-derived guess; this is the logger
a preset (arize, langfuse, …) folds the ``OTEL_*`` base exporter and its own
exporter into, so the FastAPI server span and the gen-ai spans share one
provider and one trace.
Fall back to ``in_memory_loggers`` for the SDK path, where no proxy global is
set (selecting from there, not ``service_callback``, which a preset logger does
not always reach), and build a generic logger from ``OTEL_*`` only when none was
configured at all. Each fallback still avoids the second generic logger that
orphaned the gen-ai spans onto a different backend than the server span.
"""
if registered is not None:
return registered
existing = next(
(cb for cb in in_memory_loggers if isinstance(cb, OpenTelemetryV2)), None
)
return existing if existing is not None else OpenTelemetryV2()
def publish_global_otel_v2_provider(
in_memory_loggers: Sequence[object],
set_global_provider: Callable[[TracerProvider], None],
registered: "OpenTelemetryV2 | None" = None,
) -> "OpenTelemetryV2":
"""Select the single v2 logger and publish its provider as the OTel global.
The proxy calls this once at startup, after callbacks are initialized, so the
preset logger already exists; it passes ``registered`` (the canonical owner the
factory designated as ``proxy_server.open_telemetry_logger``) so the global
provider reuses the same logger the rest of the v2 code emits through (see
:func:`select_global_otel_v2_logger`). Both ``registered`` and
``set_global_provider`` (the proxy passes
``opentelemetry.trace.set_tracer_provider``) are injected so the publish step is
unit-testable without reading or mutating real global OTel state. Returns the
logger whose provider was published.
"""
logger = select_global_otel_v2_logger(in_memory_loggers, registered=registered)
set_global_provider(logger._tracer_provider)
return logger
def _registered_v2_logger() -> "OpenTelemetryV2 | None":
try:
from litellm.proxy import proxy_server
except Exception:
return None
logger = getattr(proxy_server, "open_telemetry_logger", None)
return logger if isinstance(logger, OpenTelemetryV2) else None
def emit_guardrail_span(entry: "StandardLoggingGuardrailInformation") -> None:
"""Emit a guardrail span on the registered v2 OTel logger.
Called by the guardrail-recording code the moment a guardrail finishes, so a
span is produced regardless of whether a post-call hook later runs (it does
not on the pass-through allow path). Routes through the single canonical
logger — the same one every other v2 entry point uses — so a guardrail
recorded once yields exactly one span; fanning out across every reachable
``OpenTelemetryV2`` instance double-emits the same entry. Best-effort: span
emission must never break guardrail evaluation.
"""
logger = _registered_v2_logger()
if logger is None:
return
try:
logger.emit_guardrail_span(entry)
except Exception:
pass
def seed_request_identity(user_api_key_dict: Any, model: Any = None) -> None:
logger = _registered_v2_logger()
if logger is not None:
logger.seed_request_identity(user_api_key_dict, model=model)
@contextmanager
def phase_span(name: str) -> "Iterator[Span | None]":
logger = _registered_v2_logger()
if logger is None:
yield None
return
with logger.start_phase_span(name) as span:
yield span