Files
MoFin/venv/lib/python3.12/site-packages/litellm/integrations/arize/arize_phoenix.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

500 lines
19 KiB
Python

import os
import threading
from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
from litellm._logging import verbose_logger
from litellm.integrations.arize import _utils
from litellm.integrations.arize._utils import ArizeOTELAttributes
from litellm.types.integrations.arize_phoenix import ArizePhoenixConfig
if TYPE_CHECKING:
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SpanProcessor
from opentelemetry.trace import Span as _Span
from opentelemetry.trace import SpanKind
from opentelemetry.trace import Tracer
from litellm.integrations.opentelemetry import OpenTelemetry as _OpenTelemetry
from litellm.integrations.opentelemetry import (
OpenTelemetryConfig as _OpenTelemetryConfig,
)
from litellm.types.integrations.arize import Protocol as _Protocol
Protocol = _Protocol
OpenTelemetryConfig = _OpenTelemetryConfig
Span = Union[_Span, Any]
OpenTelemetry = _OpenTelemetry
LITELLM_TRACER_NAME: str
else:
Protocol = Any
OpenTelemetryConfig = Any
Span = Any
Tracer = Any
TracerProvider = Any
SpanKind = Any
SpanProcessor = Any
try:
from litellm.integrations.opentelemetry import (
LITELLM_TRACER_NAME,
OpenTelemetry,
)
except ImportError:
LITELLM_TRACER_NAME = "litellm"
OpenTelemetry = None # type: ignore
ARIZE_HOSTED_PHOENIX_ENDPOINT = "https://otlp.arize.com/v1/traces"
_MAX_PROJECT_PROVIDERS = 64
class ArizePhoenixLogger(OpenTelemetry): # type: ignore
"""
Arize Phoenix logger that sends traces to a Phoenix endpoint.
Creates its own dedicated TracerProvider so it can coexist with the
generic ``otel`` callback (or any other OTEL-based integration) without
fighting over the global ``opentelemetry.trace`` TracerProvider singleton.
"""
def _init_tracing(self, tracer_provider):
"""
Override to create per-project TracerProviders (LRU-cached) for Arize Phoenix.
The base ``OpenTelemetry._init_tracing`` falls back to the global
TracerProvider when one already exists. That causes whichever
integration initialises second to silently reuse the first one's
exporter, so spans only reach one destination.
"""
from opentelemetry.trace import SpanKind
if tracer_provider is not None:
self._use_injected_tracer_provider = True
self._shared_span_processor = None
self.tracer = tracer_provider.get_tracer(LITELLM_TRACER_NAME)
self.span_kind = SpanKind
return
self._use_injected_tracer_provider = False
self._project_providers: OrderedDict[str, TracerProvider] = OrderedDict()
self._project_providers_lock = threading.Lock()
self._shared_span_processor = self._get_span_processor()
self.span_kind = SpanKind
default_project = self._resolve_project_name({})
self.tracer = self._get_tracer_for(default_project)
verbose_logger.debug(
"ArizePhoenixLogger: Initialized per-project TracerProvider cache "
"(default_project=%s, endpoint=%s, exporter=%s)",
default_project,
self.config.endpoint,
self.config.exporter,
)
def flush_tracer_providers(self) -> None:
"""
Flush all cached per-project providers and the shared span processor.
Call on graceful proxy shutdown. Do not call on LRU eviction — in-flight
spans may still reference evicted providers.
"""
if getattr(self, "_use_injected_tracer_provider", False):
return
shared_processor = getattr(self, "_shared_span_processor", None)
if shared_processor is not None:
try:
shared_processor.force_flush()
except Exception as e:
verbose_logger.debug(
"ArizePhoenixLogger: shared span processor force_flush failed: %s",
e,
)
with getattr(self, "_project_providers_lock", threading.Lock()):
providers = list(getattr(self, "_project_providers", {}).values())
for provider in providers:
try:
provider.force_flush()
except Exception as e:
verbose_logger.debug(
"ArizePhoenixLogger: TracerProvider force_flush failed: %s", e
)
def _get_litellm_resource_for_project(self, project_name: str):
"""
Build an OTEL Resource with project routing attrs that win over env detector.
Phoenix uses ``openinference.project.name``; Arize AX uses ``model_id`` and
``service.name``. Project attrs are merged last so OTEL_RESOURCE_ATTRIBUTES
from init does not pin every provider to one project.
"""
from opentelemetry.sdk.resources import OTELResourceDetector, Resource
project_attributes: dict[str, str] = {
"openinference.project.name": project_name,
"model_id": project_name,
"service.name": project_name,
}
deployment_environment = getattr(self.config, "deployment_environment", None)
if deployment_environment is not None:
project_attributes["deployment.environment"] = deployment_environment
env_resource = OTELResourceDetector().detect()
project_resource = Resource.create(project_attributes) # type: ignore[arg-type]
return env_resource.merge(project_resource)
def _build_tracer_provider_for_project(self, project_name: str) -> TracerProvider:
"""Create a TracerProvider for *project_name* (caller holds no cache lock)."""
from opentelemetry.sdk.trace import TracerProvider
provider = TracerProvider(
resource=self._get_litellm_resource_for_project(project_name)
)
provider.add_span_processor(self._shared_span_processor)
return provider
def _get_tracer_for(self, project_name: str) -> Tracer:
"""Return a tracer for *project_name*, creating/caching a provider on miss."""
if getattr(self, "_use_injected_tracer_provider", False):
return self.tracer
with self._project_providers_lock:
if project_name in self._project_providers:
self._project_providers.move_to_end(project_name)
return self._project_providers[project_name].get_tracer(
LITELLM_TRACER_NAME
)
# OTELResourceDetector().detect() is synchronous; build outside the lock so
# concurrent requests for other projects are not blocked on cache misses.
new_provider = self._build_tracer_provider_for_project(project_name)
with self._project_providers_lock:
if project_name in self._project_providers:
self._project_providers.move_to_end(project_name)
return self._project_providers[project_name].get_tracer(
LITELLM_TRACER_NAME
)
if len(self._project_providers) >= _MAX_PROJECT_PROVIDERS:
self._project_providers.popitem(last=False)
self._project_providers[project_name] = new_provider
return new_provider.get_tracer(LITELLM_TRACER_NAME)
def _resolve_tracer_for_kwargs(self, kwargs: dict) -> Tuple[str, Tracer]:
"""Resolve project name once and return the matching tracer."""
project_name = self._resolve_project_name(kwargs)
return project_name, self._get_tracer_for(project_name)
def get_tracer_to_use_for_request(self, kwargs: dict) -> Tracer:
"""Route guardrail/raw-request spans to the same per-project tracer as the request."""
if getattr(self, "_use_injected_tracer_provider", False):
return self.tracer
return self._resolve_tracer_for_kwargs(kwargs)[1]
def _init_otel_logger_on_litellm_proxy(self):
"""
Override: Arize Phoenix should NOT overwrite the proxy's
``open_telemetry_logger``. That attribute is reserved for the
primary ``otel`` callback which handles proxy-level parent spans.
"""
pass
def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]):
ArizePhoenixLogger.set_arize_phoenix_attributes(span, kwargs, response_obj)
return
@staticmethod
def set_arize_phoenix_attributes(span: Span, kwargs, response_obj):
_utils.set_attributes(span, kwargs, response_obj, ArizeOTELAttributes)
return
@staticmethod
def _normalize_project_name(name: Optional[str]) -> Optional[str]:
if name is None:
return None
normalized = str(name).strip()
return normalized if normalized else None
@staticmethod
def _iter_metadata_dicts_from_kwargs(kwargs: dict):
"""Yield request metadata dicts; standard_logging_object before litellm_params."""
for key in ("standard_logging_object", "litellm_params"):
found_key = kwargs.get(key)
if not isinstance(found_key, dict):
continue
metadata = found_key.get("metadata")
if isinstance(metadata, dict):
yield metadata
@staticmethod
def _is_proxy_request(kwargs: dict) -> bool:
"""True when the call is routed through the LiteLLM proxy.
Proxy mode is determined solely by the server-set ``proxy_server_request``
field in ``litellm_params``. Checking request metadata for
``user_api_key_auth_metadata`` is intentionally avoided: that field is
user-supplied and would let an authenticated caller fake proxy-mode
detection to route their telemetry into arbitrary Arize/Phoenix projects.
"""
litellm_params = kwargs.get("litellm_params")
return isinstance(litellm_params, dict) and bool(
litellm_params.get("proxy_server_request")
)
@staticmethod
def _project_from_metadata_dict(
metadata: dict, metadata_key: str, *, proxy_mode: bool
) -> Optional[str]:
"""
Read a Phoenix project field from proxy/SDK metadata.
On the proxy, only ``user_api_key_auth_metadata`` (team/key config) may
select the project. SDK callers may still set project fields directly on
``metadata``.
"""
auth_metadata = metadata.get("user_api_key_auth_metadata")
if isinstance(auth_metadata, dict):
project = ArizePhoenixLogger._normalize_project_name(
auth_metadata.get(metadata_key)
)
if project:
return project
if not proxy_mode:
return ArizePhoenixLogger._normalize_project_name(
metadata.get(metadata_key)
)
return None
@staticmethod
def _metadata_project_from_kwargs(kwargs: dict, metadata_key: str) -> Optional[str]:
proxy_mode = ArizePhoenixLogger._is_proxy_request(kwargs)
for metadata in ArizePhoenixLogger._iter_metadata_dicts_from_kwargs(kwargs):
project = ArizePhoenixLogger._project_from_metadata_dict(
metadata, metadata_key, proxy_mode=proxy_mode
)
if project:
return project
return None
@staticmethod
def _resolve_project_name(kwargs: dict) -> str:
"""
Resolve the target Phoenix/Arize project for this request.
Proxy priority: ``user_api_key_auth_metadata.phoenix_project_name_override``,
``user_api_key_auth_metadata.phoenix_project_name``, env, then ``default``.
SDK priority: request metadata fields, then env, then ``default``.
"""
override = ArizePhoenixLogger._metadata_project_from_kwargs(
kwargs, "phoenix_project_name_override"
)
if override:
return override
phoenix_name = ArizePhoenixLogger._metadata_project_from_kwargs(
kwargs, "phoenix_project_name"
)
if phoenix_name:
return phoenix_name
env_name = ArizePhoenixLogger._normalize_project_name(
os.environ.get("PHOENIX_PROJECT_NAME")
or os.environ.get("ARIZE_PROJECT_NAME")
)
if env_name:
return env_name
return "default"
def _get_phoenix_context(self, kwargs, tracer: Optional[Tracer] = None):
"""
Build a trace context for Phoenix's dedicated TracerProvider.
The base ``_get_span_context`` returns parent spans from the global
TracerProvider (the ``otel`` callback). Those spans live on a
*different* TracerProvider, so they won't appear in Phoenix — using
them as parents just creates broken links.
Instead we:
1. Honour an incoming ``traceparent`` HTTP header (distributed tracing).
2. In proxy mode, create our *own* parent span on Phoenix's tracer
so the hierarchy is visible end-to-end inside Phoenix.
3. In SDK (non-proxy) mode, just return (None, None) for a root span.
"""
from opentelemetry import trace
if tracer is None:
tracer = self._resolve_tracer_for_kwargs(kwargs)[1]
litellm_params = kwargs.get("litellm_params", {}) or {}
proxy_server_request = litellm_params.get("proxy_server_request", {}) or {}
headers = proxy_server_request.get("headers", {}) or {}
traceparent_ctx = (
self.get_traceparent_from_header(headers=headers)
if headers.get("traceparent")
else None
)
is_proxy_mode = bool(proxy_server_request)
if is_proxy_mode:
start_time_val = kwargs.get("start_time", kwargs.get("api_call_start_time"))
parent_span = tracer.start_span(
name="litellm_proxy_request",
start_time=(
self._to_ns(start_time_val) if start_time_val is not None else None
),
context=traceparent_ctx,
kind=self.span_kind.SERVER,
)
ctx = trace.set_span_in_context(parent_span)
return ctx, parent_span
return traceparent_ctx, None
def _handle_success(self, kwargs, response_obj, start_time, end_time):
self._handle_phoenix_trace(
kwargs, response_obj, start_time, end_time, success=True
)
def _handle_failure(self, kwargs, response_obj, start_time, end_time):
self._handle_phoenix_trace(
kwargs, response_obj, start_time, end_time, success=False
)
def _handle_phoenix_trace(
self,
kwargs,
response_obj,
start_time,
end_time,
*,
success: bool,
):
from opentelemetry.trace import Status, StatusCode
verbose_logger.debug(
"ArizePhoenixLogger: %s - kwargs: %s, OTEL config settings=%s",
"success" if success else "failure",
kwargs,
self.config,
)
_project_name, tracer = self._resolve_tracer_for_kwargs(kwargs)
ctx, parent_span = self._get_phoenix_context(kwargs, tracer=tracer)
status = Status(StatusCode.OK if success else StatusCode.ERROR)
span = tracer.start_span(
name=self._get_span_name(kwargs),
start_time=self._to_ns(start_time),
context=ctx,
)
span.set_status(status)
self.set_attributes(span, kwargs, response_obj)
if not success:
self._record_exception_on_span(span=span, kwargs=kwargs)
if success:
self._maybe_log_raw_request(
kwargs, response_obj, start_time, end_time, span
)
span.end(end_time=self._to_ns(end_time))
self._create_guardrail_span(kwargs=kwargs, context=ctx)
if parent_span is not None:
parent_span.set_status(status)
self.set_attributes(parent_span, kwargs, response_obj)
if not success:
self._record_exception_on_span(span=parent_span, kwargs=kwargs)
parent_span.end(end_time=self._to_ns(end_time))
if success:
self._record_metrics(kwargs, response_obj, start_time, end_time)
if self.config.enable_events:
self._emit_semantic_logs(kwargs, response_obj, span)
@staticmethod
def get_arize_phoenix_config() -> ArizePhoenixConfig:
"""
Retrieves the Arize Phoenix configuration based on environment variables.
Returns:
"""
api_key = os.environ.get("PHOENIX_API_KEY", None)
collector_endpoint = os.environ.get("PHOENIX_COLLECTOR_HTTP_ENDPOINT", None)
if not collector_endpoint:
grpc_endpoint = os.environ.get("PHOENIX_COLLECTOR_ENDPOINT", None)
http_endpoint = os.environ.get("PHOENIX_COLLECTOR_HTTP_ENDPOINT", None)
collector_endpoint = http_endpoint or grpc_endpoint
endpoint = None
protocol: Protocol = "otlp_http"
if collector_endpoint:
if collector_endpoint.startswith("grpc://") or (
":4317" in collector_endpoint and "/v1/traces" not in collector_endpoint
):
endpoint = collector_endpoint
protocol = "otlp_grpc"
else:
if "app.phoenix.arize.com" in collector_endpoint:
endpoint = collector_endpoint
protocol = "otlp_http"
elif "/v1/traces" not in collector_endpoint:
if collector_endpoint.endswith("/v1"):
endpoint = collector_endpoint + "/traces"
elif collector_endpoint.endswith("/"):
endpoint = f"{collector_endpoint}v1/traces"
else:
endpoint = f"{collector_endpoint}/v1/traces"
else:
endpoint = collector_endpoint
protocol = "otlp_http"
else:
endpoint = "http://localhost:6006/v1/traces"
protocol = "otlp_http"
verbose_logger.debug(
f"No PHOENIX_COLLECTOR_ENDPOINT found, using default local Phoenix endpoint: {endpoint}"
)
otlp_auth_headers = None
if api_key is not None:
otlp_auth_headers = f"Authorization=Bearer {api_key}"
elif "app.phoenix.arize.com" in endpoint:
raise ValueError(
"PHOENIX_API_KEY must be set when using Phoenix Cloud (app.phoenix.arize.com)."
)
project_name = os.environ.get("PHOENIX_PROJECT_NAME") or "default"
return ArizePhoenixConfig(
otlp_auth_headers=otlp_auth_headers,
protocol=protocol,
endpoint=endpoint,
project_name=project_name,
)
async def async_health_check(self):
config = self.get_arize_phoenix_config()
if not config.otlp_auth_headers:
return {
"status": "unhealthy",
"error_message": "PHOENIX_API_KEY environment variable not set",
}
return {
"status": "healthy",
"message": "Arize-Phoenix credentials are configured properly",
}