fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
199 lines
7.7 KiB
Python
199 lines
7.7 KiB
Python
"""The span engine: dedup, start, run the mapper chain, set status, end."""
|
|
|
|
from collections import OrderedDict
|
|
from typing import Callable, Sequence
|
|
|
|
from opentelemetry.context import Context
|
|
from opentelemetry.trace import Span, Tracer
|
|
from opentelemetry.trace.status import Status, StatusCode
|
|
|
|
from litellm.integrations.otel.model.config import OpenTelemetryV2Config
|
|
from litellm.integrations.otel.mappers import resolve_mappers
|
|
from litellm.integrations.otel.mappers.base import AttributeMapper, SpanData
|
|
from litellm.integrations.otel.model.payloads import (
|
|
GuardrailSpanData,
|
|
LLMCallSpanData,
|
|
MCPToolCallSpanData,
|
|
ServiceSpanData,
|
|
)
|
|
from litellm.integrations.otel.plumbing.providers import to_otel_span_kind
|
|
from litellm.integrations.otel.model.semconv import Error, ExceptionEvent
|
|
from litellm.integrations.otel.model.spans import (
|
|
SPAN_REGISTRY,
|
|
SpanRole,
|
|
guardrail_span_name,
|
|
llm_call_span_name,
|
|
mcp_tool_call_span_name,
|
|
service_span_name,
|
|
)
|
|
|
|
# Roles emit() knows how to name and emit. PROXY_REQUEST and the management
|
|
# routes are SERVER spans owned by the mounted FastAPI instrumentor, so they
|
|
# have no builder here.
|
|
_NAME_BUILDERS: dict[SpanRole, Callable[..., str]] = {
|
|
SpanRole.LLM_CALL: llm_call_span_name,
|
|
SpanRole.MCP_TOOL_CALL: mcp_tool_call_span_name,
|
|
SpanRole.GUARDRAIL: guardrail_span_name,
|
|
# DB_CALL and SERVICE are both built from ServiceSpanData; they differ only in
|
|
# span kind (CLIENT vs INTERNAL) and attribute vocabulary, not in naming.
|
|
SpanRole.DB_CALL: service_span_name,
|
|
SpanRole.SERVICE: service_span_name,
|
|
}
|
|
|
|
# Cap on the dedup cache. It only needs to coalesce the sync+async firing window
|
|
# of a single in-flight request, so a bounded LRU keeps memory flat on a
|
|
# long-running proxy while still covering every concurrently-open call.
|
|
_DEDUP_CACHE_MAX = 10_000
|
|
|
|
|
|
class SpanEmitter:
|
|
def __init__(
|
|
self,
|
|
tracer: Tracer,
|
|
config: OpenTelemetryV2Config,
|
|
mappers: Sequence[AttributeMapper] | None = None,
|
|
) -> None:
|
|
self._tracer = tracer
|
|
self._config = config
|
|
# The mapper chain is the sole source of span attributes. When not
|
|
# passed in, resolve it from the config so there's one source of truth.
|
|
self._mappers: list[AttributeMapper] = (
|
|
list(mappers)
|
|
if mappers is not None
|
|
else resolve_mappers(config.mapper_names)
|
|
)
|
|
# Bounded LRU (ordered by insertion / most-recent touch). Storing keys
|
|
# only — the value is unused — so it behaves like a capped set.
|
|
self._emitted: "OrderedDict[tuple[str, SpanRole], None]" = OrderedDict()
|
|
|
|
# -- low-level helpers --------------------------------------------------- #
|
|
|
|
def start_span(
|
|
self,
|
|
role: SpanRole,
|
|
name: str,
|
|
parent_context: Context | None = None,
|
|
start_time_ns: int | None = None,
|
|
*,
|
|
tracer: Tracer | None = None,
|
|
) -> Span:
|
|
"""Start a span for ``role`` without dedup or attribute mapping.
|
|
|
|
For callers that own and manage their own span lifecycle. ``tracer``
|
|
overrides the bound tracer for this span only, used for per-request
|
|
multi-tenant credential routing.
|
|
"""
|
|
return (tracer or self._tracer).start_span(
|
|
name,
|
|
context=parent_context,
|
|
kind=to_otel_span_kind(SPAN_REGISTRY[role].kind),
|
|
start_time=start_time_ns,
|
|
)
|
|
|
|
def _seen(self, dedup_key: str | None, role: SpanRole) -> bool:
|
|
"""Return True once a ``(dedup_key, role)`` pair has been emitted.
|
|
|
|
Guards against emitting the same span twice when a streaming call
|
|
fires both a sync and an async logging callback.
|
|
"""
|
|
if not dedup_key:
|
|
return False
|
|
marker = (dedup_key, role)
|
|
if marker in self._emitted:
|
|
self._emitted.move_to_end(marker)
|
|
return True
|
|
self._emitted[marker] = None
|
|
if len(self._emitted) > _DEDUP_CACHE_MAX:
|
|
self._emitted.popitem(last=False) # evict least-recently-used
|
|
return False
|
|
|
|
# -- the engine ---------------------------------------------------------- #
|
|
|
|
def emit(
|
|
self,
|
|
role: SpanRole,
|
|
data: SpanData,
|
|
parent_context: Context | None = None,
|
|
*,
|
|
start_time_ns: int | None = None,
|
|
end_time_ns: int | None = None,
|
|
tracer: Tracer | None = None,
|
|
) -> Span | None:
|
|
"""Emit one complete span: dedup, start, map attributes, status, end.
|
|
|
|
Return the span, or ``None`` if it was deduplicated away. ``tracer``
|
|
overrides the bound tracer for this span, used for per-request routing.
|
|
"""
|
|
# LLM-call and MCP tool-call spans carry a dedup key (their request's
|
|
# call id), so a sync+async double-firing coalesces. ``isinstance`` narrows
|
|
# the type for mypy and keeps the engine free of duck-typed attribute reads.
|
|
dedup_key = (
|
|
data.identity.call_id
|
|
if isinstance(data, (LLMCallSpanData, MCPToolCallSpanData))
|
|
else None
|
|
)
|
|
if self._seen(dedup_key, role):
|
|
return None
|
|
span = self.start_span(
|
|
role,
|
|
_NAME_BUILDERS[role](data),
|
|
parent_context=parent_context,
|
|
start_time_ns=start_time_ns,
|
|
tracer=tracer,
|
|
)
|
|
self.finish_span(role, span, data, end_time_ns=end_time_ns)
|
|
return span
|
|
|
|
def finish_span(
|
|
self,
|
|
role: SpanRole,
|
|
span: Span,
|
|
data: SpanData,
|
|
*,
|
|
end_time_ns: int | None = None,
|
|
) -> None:
|
|
"""Stamp attributes + status on an already-started ``span`` and end it.
|
|
|
|
The counterpart to :meth:`start_span` for callers that own a span's
|
|
lifecycle — the LLM-call span is opened at the request's ``pre_call``
|
|
boundary (so it parents to the live server span via real ambient context,
|
|
never a span threaded through a metadata dict) and closed here once the
|
|
typed payload is available. The span name is (re)built from the now-known
|
|
data, since the boundary opener only has a provisional name.
|
|
"""
|
|
span.update_name(_NAME_BUILDERS[role](data))
|
|
for mapper in self._mappers:
|
|
for key, value in mapper.map(data).items():
|
|
span.set_attribute(key, value)
|
|
error = (
|
|
data.error
|
|
if isinstance(
|
|
data,
|
|
(
|
|
LLMCallSpanData,
|
|
MCPToolCallSpanData,
|
|
ServiceSpanData,
|
|
GuardrailSpanData,
|
|
),
|
|
)
|
|
else None
|
|
)
|
|
if error and (error.error_type or error.message):
|
|
error_type = error.error_type or "error"
|
|
message = error.message or error.error_type or "error"
|
|
span.set_attribute(Error.TYPE, error_type)
|
|
span.set_status(Status(StatusCode.ERROR, message))
|
|
# Carry the full message on the standard ``exception`` event so backends
|
|
# map it as full text under ``exception.message``. Setting it as a bare
|
|
# string attribute instead lets backends like Elasticsearch dynamic-map
|
|
# it to a ``keyword`` capped at 1024 chars, truncating the message.
|
|
span.add_event(
|
|
ExceptionEvent.NAME,
|
|
{ExceptionEvent.TYPE: error_type, ExceptionEvent.MESSAGE: message},
|
|
)
|
|
# On success leave the status UNSET (the semconv default) rather than
|
|
# forcing OK — that matches the FastAPI server span and avoids implying a
|
|
# span-level health signal litellm doesn't actually evaluate. Only a
|
|
# genuine error sets a status.
|
|
span.end(end_time=end_time_ns)
|