Files
MoFin/venv/lib/python3.12/site-packages/litellm/integrations/otel/plumbing/context.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

128 lines
5.4 KiB
Python

"""Trace-context + Baggage helpers."""
from contextvars import ContextVar
from typing import Mapping
from opentelemetry import baggage
from opentelemetry.context import Context, get_current
from opentelemetry.trace import Span, get_current_span, set_span_in_context
from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator,
)
_PROPAGATOR = TraceContextTextMapPropagator()
# The request's root span — the FastAPI-owned SERVER span — captured ONCE when the
# proxy first resolves it, so request-level spans (the LLM call, guardrails) can
# parent to it EXPLICITLY instead of to whatever span happens to be active at the
# instant they are emitted. Ambient-only parenting (``get_current_span()``) is
# wrong at two boundaries:
# * inside the ``auth`` phase span the active span is the auth span, so an LLM /
# guardrail span emitted there would nest under auth instead of being its
# sibling; and
# * in a detached success task (pass-through logs success from a fire-and-forget
# ``asyncio.create_task``) the server span may not be active at all, orphaning
# the span into a brand-new trace.
# A ``ContextVar`` (not a request attribute) so it rides the request task's context
# and is inherited by ``asyncio.create_task`` children — i.e. the async logging
# callbacks that close the span. It is never reset: the contextvar dies with the
# request task, so there is nothing to leak.
_request_root_span: "ContextVar[Span | None]" = ContextVar(
"litellm_otel_request_root_span", default=None
)
def set_request_root_span(span: Span) -> None:
"""Anchor the request's root (server) span for explicit child parenting.
No-ops for a non-recordable span so a bad capture can never replace a good one
with a phantom parent. Idempotent — the proxy captures the same server span at
more than one entry point.
"""
if is_recordable_span(span):
_request_root_span.set(span)
def request_root_span() -> "Span | None":
"""The anchored request root span, or ``None`` outside a proxy request."""
span = _request_root_span.get()
return span if is_recordable_span(span) else None
def set_request_baggage(
values: Mapping[str, str], context: Context | None = None
) -> Context:
"""Return a context with ``values`` written into Baggage."""
ctx = context
for key, value in values.items():
ctx = baggage.set_baggage(key, value, context=ctx)
return ctx if ctx is not None else (context or get_current())
def get_baggage_attributes(context: Context | None = None) -> dict[str, str]:
"""All Baggage entries on ``context`` as strings."""
return {key: str(value) for key, value in baggage.get_all(context).items()}
def context_from_span(span: Span, context: Context | None = None) -> Context:
"""A context with ``span`` as the active span (for explicit parenting)."""
return set_span_in_context(span, context=context)
def resolve_parent_context(threaded: Span | None = None) -> Context:
"""The context a child span should parent under.
Ambient-first: parent to the active OTel context (the server span, restored
by the logging worker or active in the request task), falling back to a span
passed explicitly (``threaded``) only when the ambient context has no
recordable span — e.g. a background service call with no request on the
stack. When neither is recordable the ambient context is returned unchanged,
so the span starts a new root trace.
Only service/DB spans pass ``threaded`` (the ``parent_otel_span`` handed to
the service hook). Request-level spans — the LLM call and guardrails — are
created where the server span is genuinely ambient, so they never need it.
"""
ctx = get_current()
if is_recordable_span(threaded) and not is_recordable_span(get_current_span(ctx)):
ctx = context_from_span(threaded, context=ctx) # type: ignore[arg-type]
return ctx
def resolve_request_span_context() -> Context:
"""The parent context for a request-level span (the LLM call, a guardrail).
These are direct children of the request's root server span — siblings of the
``auth`` phase span and of each other, never nested under whatever span is
momentarily active. So prefer the explicitly anchored root span; fall back to
ambient context only when there is no anchor (the SDK / no-proxy path), where
the span legitimately starts its own root trace.
Unlike :func:`resolve_parent_context` (used by DB/service spans, which DO want
to nest under the active phase span, e.g. an auth DB lookup under ``auth``),
this never returns the active span when an anchor exists.
"""
root = request_root_span()
if root is not None:
return context_from_span(root)
return get_current()
def is_recordable_span(obj: object) -> bool:
"""True if ``obj`` is a live span with a valid context (safe to parent under)."""
if not isinstance(obj, Span):
return False
try:
ctx = obj.get_span_context()
except Exception:
return False
return ctx is not None and ctx.is_valid
def extract_traceparent(headers: Mapping[str, str]) -> Context | None:
"""Extract a remote parent context from incoming HTTP headers, if present."""
if not any(key.lower() == "traceparent" for key in headers):
return None
carrier = {str(key).lower(): value for key, value in headers.items()}
return _PROPAGATOR.extract(carrier)