fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
128 lines
5.4 KiB
Python
128 lines
5.4 KiB
Python
"""Trace-context + Baggage helpers."""
|
|
|
|
from contextvars import ContextVar
|
|
from typing import Mapping
|
|
|
|
from opentelemetry import baggage
|
|
from opentelemetry.context import Context, get_current
|
|
from opentelemetry.trace import Span, get_current_span, set_span_in_context
|
|
from opentelemetry.trace.propagation.tracecontext import (
|
|
TraceContextTextMapPropagator,
|
|
)
|
|
|
|
_PROPAGATOR = TraceContextTextMapPropagator()
|
|
|
|
# The request's root span — the FastAPI-owned SERVER span — captured ONCE when the
|
|
# proxy first resolves it, so request-level spans (the LLM call, guardrails) can
|
|
# parent to it EXPLICITLY instead of to whatever span happens to be active at the
|
|
# instant they are emitted. Ambient-only parenting (``get_current_span()``) is
|
|
# wrong at two boundaries:
|
|
# * inside the ``auth`` phase span the active span is the auth span, so an LLM /
|
|
# guardrail span emitted there would nest under auth instead of being its
|
|
# sibling; and
|
|
# * in a detached success task (pass-through logs success from a fire-and-forget
|
|
# ``asyncio.create_task``) the server span may not be active at all, orphaning
|
|
# the span into a brand-new trace.
|
|
# A ``ContextVar`` (not a request attribute) so it rides the request task's context
|
|
# and is inherited by ``asyncio.create_task`` children — i.e. the async logging
|
|
# callbacks that close the span. It is never reset: the contextvar dies with the
|
|
# request task, so there is nothing to leak.
|
|
_request_root_span: "ContextVar[Span | None]" = ContextVar(
|
|
"litellm_otel_request_root_span", default=None
|
|
)
|
|
|
|
|
|
def set_request_root_span(span: Span) -> None:
|
|
"""Anchor the request's root (server) span for explicit child parenting.
|
|
|
|
No-ops for a non-recordable span so a bad capture can never replace a good one
|
|
with a phantom parent. Idempotent — the proxy captures the same server span at
|
|
more than one entry point.
|
|
"""
|
|
if is_recordable_span(span):
|
|
_request_root_span.set(span)
|
|
|
|
|
|
def request_root_span() -> "Span | None":
|
|
"""The anchored request root span, or ``None`` outside a proxy request."""
|
|
span = _request_root_span.get()
|
|
return span if is_recordable_span(span) else None
|
|
|
|
|
|
def set_request_baggage(
|
|
values: Mapping[str, str], context: Context | None = None
|
|
) -> Context:
|
|
"""Return a context with ``values`` written into Baggage."""
|
|
ctx = context
|
|
for key, value in values.items():
|
|
ctx = baggage.set_baggage(key, value, context=ctx)
|
|
return ctx if ctx is not None else (context or get_current())
|
|
|
|
|
|
def get_baggage_attributes(context: Context | None = None) -> dict[str, str]:
|
|
"""All Baggage entries on ``context`` as strings."""
|
|
return {key: str(value) for key, value in baggage.get_all(context).items()}
|
|
|
|
|
|
def context_from_span(span: Span, context: Context | None = None) -> Context:
|
|
"""A context with ``span`` as the active span (for explicit parenting)."""
|
|
return set_span_in_context(span, context=context)
|
|
|
|
|
|
def resolve_parent_context(threaded: Span | None = None) -> Context:
|
|
"""The context a child span should parent under.
|
|
|
|
Ambient-first: parent to the active OTel context (the server span, restored
|
|
by the logging worker or active in the request task), falling back to a span
|
|
passed explicitly (``threaded``) only when the ambient context has no
|
|
recordable span — e.g. a background service call with no request on the
|
|
stack. When neither is recordable the ambient context is returned unchanged,
|
|
so the span starts a new root trace.
|
|
|
|
Only service/DB spans pass ``threaded`` (the ``parent_otel_span`` handed to
|
|
the service hook). Request-level spans — the LLM call and guardrails — are
|
|
created where the server span is genuinely ambient, so they never need it.
|
|
"""
|
|
ctx = get_current()
|
|
if is_recordable_span(threaded) and not is_recordable_span(get_current_span(ctx)):
|
|
ctx = context_from_span(threaded, context=ctx) # type: ignore[arg-type]
|
|
return ctx
|
|
|
|
|
|
def resolve_request_span_context() -> Context:
|
|
"""The parent context for a request-level span (the LLM call, a guardrail).
|
|
|
|
These are direct children of the request's root server span — siblings of the
|
|
``auth`` phase span and of each other, never nested under whatever span is
|
|
momentarily active. So prefer the explicitly anchored root span; fall back to
|
|
ambient context only when there is no anchor (the SDK / no-proxy path), where
|
|
the span legitimately starts its own root trace.
|
|
|
|
Unlike :func:`resolve_parent_context` (used by DB/service spans, which DO want
|
|
to nest under the active phase span, e.g. an auth DB lookup under ``auth``),
|
|
this never returns the active span when an anchor exists.
|
|
"""
|
|
root = request_root_span()
|
|
if root is not None:
|
|
return context_from_span(root)
|
|
return get_current()
|
|
|
|
|
|
def is_recordable_span(obj: object) -> bool:
|
|
"""True if ``obj`` is a live span with a valid context (safe to parent under)."""
|
|
if not isinstance(obj, Span):
|
|
return False
|
|
try:
|
|
ctx = obj.get_span_context()
|
|
except Exception:
|
|
return False
|
|
return ctx is not None and ctx.is_valid
|
|
|
|
|
|
def extract_traceparent(headers: Mapping[str, str]) -> Context | None:
|
|
"""Extract a remote parent context from incoming HTTP headers, if present."""
|
|
if not any(key.lower() == "traceparent" for key in headers):
|
|
return None
|
|
carrier = {str(key).lower(): value for key, value in headers.items()}
|
|
return _PROPAGATOR.extract(carrier)
|