fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
1135 lines
42 KiB
Python
1135 lines
42 KiB
Python
import json
|
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
|
|
|
|
from typing_extensions import override
|
|
|
|
from litellm._logging import verbose_logger
|
|
from litellm.integrations.opentelemetry_utils.base_otel_llm_obs_attributes import (
|
|
BaseLLMObsOTELAttributes,
|
|
safe_set_attribute,
|
|
)
|
|
from litellm.litellm_core_utils.redact_messages import (
|
|
should_redact_message_logging,
|
|
)
|
|
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
|
from litellm.types.utils import StandardLoggingPayload
|
|
|
|
if TYPE_CHECKING:
|
|
from opentelemetry.trace import Span
|
|
from litellm.integrations._types.open_inference import (
|
|
AudioAttributes,
|
|
EmbeddingAttributes,
|
|
ImageAttributes,
|
|
MessageAttributes,
|
|
MessageContentAttributes,
|
|
OpenInferenceSpanKindValues,
|
|
SpanAttributes,
|
|
ToolCallAttributes,
|
|
)
|
|
|
|
|
|
class ArizeOTELAttributes(BaseLLMObsOTELAttributes):
|
|
@staticmethod
|
|
@override
|
|
def set_messages(span: "Span", kwargs: Dict[str, Any]):
|
|
messages = kwargs.get("messages")
|
|
|
|
# for /chat/completions
|
|
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
|
if messages:
|
|
last_message = messages[-1]
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.INPUT_VALUE,
|
|
last_message.get("content", ""),
|
|
)
|
|
|
|
# LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page.
|
|
for idx, msg in enumerate(messages):
|
|
prefix = f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}"
|
|
# Set the role per message.
|
|
safe_set_attribute(
|
|
span, f"{prefix}.{MessageAttributes.MESSAGE_ROLE}", msg.get("role")
|
|
)
|
|
# Set the content per message.
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_CONTENT}",
|
|
msg.get("content", ""),
|
|
)
|
|
|
|
# Additive: emit structured tool_calls / multimodal content
|
|
# so Arize/Phoenix can render tool-using and image-bearing
|
|
# turns. These set NEW attribute keys (MESSAGE_TOOL_CALLS /
|
|
# MESSAGE_NAME / MESSAGE_TOOL_CALL_ID / MESSAGE_CONTENTS.*) —
|
|
# never replace the MESSAGE_CONTENT write above.
|
|
_safe_emit(
|
|
f"input message extras (idx={idx})",
|
|
_emit_input_message_extras,
|
|
span,
|
|
prefix,
|
|
msg,
|
|
)
|
|
|
|
# Note: `BaseLLMObsOTELAttributes.set_response_output_messages` is not
|
|
# overridden here. The live code path uses `_set_choice_outputs` (called
|
|
# via `_set_response_attributes` from `set_attributes`) which handles
|
|
# tool_calls, multimodal output, embeddings, audio, images, and structured
|
|
# outputs in a single place.
|
|
|
|
|
|
def _set_response_attributes(span: "Span", response_obj):
|
|
"""Helper to set response output and token usage attributes on span."""
|
|
|
|
if not hasattr(response_obj, "get"):
|
|
return
|
|
|
|
_set_choice_outputs(span, response_obj, MessageAttributes, SpanAttributes)
|
|
_set_image_outputs(span, response_obj, ImageAttributes, SpanAttributes)
|
|
_set_audio_outputs(span, response_obj, AudioAttributes, SpanAttributes)
|
|
_set_embedding_outputs(span, response_obj, EmbeddingAttributes, SpanAttributes)
|
|
_set_structured_outputs(span, response_obj, MessageAttributes, SpanAttributes)
|
|
_set_usage_outputs(span, response_obj, SpanAttributes)
|
|
|
|
|
|
def _set_choice_outputs(span: "Span", response_obj, msg_attrs, span_attrs):
|
|
for idx, choice in enumerate(response_obj.get("choices", [])):
|
|
response_message = choice.get("message", {})
|
|
content = response_message.get("content", "")
|
|
|
|
# Tool-only assistant responses have empty content; serialize the
|
|
# tool_calls into OUTPUT_VALUE so Arize's "Output" pane isn't blank.
|
|
output_value = content
|
|
if not output_value:
|
|
tool_calls = _get_tool_calls(response_message)
|
|
if tool_calls:
|
|
output_value = _summarize_tool_calls_for_output(tool_calls)
|
|
|
|
safe_set_attribute(span, span_attrs.OUTPUT_VALUE, output_value)
|
|
prefix = f"{span_attrs.LLM_OUTPUT_MESSAGES}.{idx}"
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{msg_attrs.MESSAGE_ROLE}",
|
|
response_message.get("role"),
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{msg_attrs.MESSAGE_CONTENT}",
|
|
content,
|
|
)
|
|
|
|
# Additive: emit assistant tool_calls so tool-using turns render in
|
|
# Arize/Phoenix. Sets new MESSAGE_TOOL_CALLS keys only — does not
|
|
# change MESSAGE_CONTENT/MESSAGE_ROLE writes above.
|
|
_safe_emit(
|
|
f"output tool_calls (idx={idx})",
|
|
_emit_message_tool_calls,
|
|
span,
|
|
prefix,
|
|
response_message,
|
|
)
|
|
|
|
|
|
def _set_image_outputs(span: "Span", response_obj, image_attrs, span_attrs):
|
|
images = response_obj.get("data", [])
|
|
for i, image in enumerate(images):
|
|
img_url = image.get("url")
|
|
if img_url is None and image.get("b64_json"):
|
|
img_url = f"data:image/png;base64,{image.get('b64_json')}"
|
|
|
|
if not img_url:
|
|
continue
|
|
|
|
if i == 0:
|
|
safe_set_attribute(span, span_attrs.OUTPUT_VALUE, img_url)
|
|
|
|
safe_set_attribute(span, f"{image_attrs.IMAGE_URL}.{i}", img_url)
|
|
|
|
|
|
def _set_audio_outputs(span: "Span", response_obj, audio_attrs, span_attrs):
|
|
audio = response_obj.get("audio", [])
|
|
for i, audio_item in enumerate(audio):
|
|
audio_url = audio_item.get("url")
|
|
if audio_url is None and audio_item.get("b64_json"):
|
|
audio_url = f"data:audio/wav;base64,{audio_item.get('b64_json')}"
|
|
|
|
if audio_url:
|
|
if i == 0:
|
|
safe_set_attribute(span, span_attrs.OUTPUT_VALUE, audio_url)
|
|
safe_set_attribute(span, f"{audio_attrs.AUDIO_URL}.{i}", audio_url)
|
|
|
|
audio_mime = audio_item.get("mime_type")
|
|
if audio_mime:
|
|
safe_set_attribute(span, f"{audio_attrs.AUDIO_MIME_TYPE}.{i}", audio_mime)
|
|
|
|
audio_transcript = audio_item.get("transcript")
|
|
if audio_transcript:
|
|
safe_set_attribute(
|
|
span, f"{audio_attrs.AUDIO_TRANSCRIPT}.{i}", audio_transcript
|
|
)
|
|
|
|
|
|
def _set_embedding_outputs(span: "Span", response_obj, embedding_attrs, span_attrs):
|
|
embeddings = response_obj.get("data", [])
|
|
for i, embedding_item in enumerate(embeddings):
|
|
embedding_vector = embedding_item.get("embedding")
|
|
if embedding_vector:
|
|
if i == 0:
|
|
safe_set_attribute(
|
|
span,
|
|
span_attrs.OUTPUT_VALUE,
|
|
str(embedding_vector),
|
|
)
|
|
|
|
safe_set_attribute(
|
|
span,
|
|
f"{embedding_attrs.EMBEDDING_VECTOR}.{i}",
|
|
str(embedding_vector),
|
|
)
|
|
|
|
embedding_text = embedding_item.get("text")
|
|
if embedding_text:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{embedding_attrs.EMBEDDING_TEXT}.{i}",
|
|
str(embedding_text),
|
|
)
|
|
|
|
|
|
def _set_structured_outputs(span: "Span", response_obj, msg_attrs, span_attrs):
|
|
output_items = response_obj.get("output", [])
|
|
for i, item in enumerate(output_items):
|
|
prefix = f"{span_attrs.LLM_OUTPUT_MESSAGES}.{i}"
|
|
if not hasattr(item, "type"):
|
|
continue
|
|
|
|
item_type = item.type
|
|
if item_type == "reasoning" and hasattr(item, "summary"):
|
|
for summary in item.summary:
|
|
if hasattr(summary, "text"):
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{msg_attrs.MESSAGE_REASONING_SUMMARY}",
|
|
summary.text,
|
|
)
|
|
elif item_type == "message" and hasattr(item, "content"):
|
|
message_content = ""
|
|
content_list = item.content
|
|
if content_list and len(content_list) > 0:
|
|
first_content = content_list[0]
|
|
message_content = getattr(first_content, "text", "")
|
|
message_role = getattr(item, "role", "assistant")
|
|
safe_set_attribute(span, span_attrs.OUTPUT_VALUE, message_content)
|
|
safe_set_attribute(
|
|
span, f"{prefix}.{msg_attrs.MESSAGE_CONTENT}", message_content
|
|
)
|
|
safe_set_attribute(span, f"{prefix}.{msg_attrs.MESSAGE_ROLE}", message_role)
|
|
|
|
|
|
def _safe_get(obj, key, default=None):
|
|
"""Read ``key`` from a dict-like or Pydantic-model-like object.
|
|
|
|
The arize/langfuse_otel logger receives ``usage`` objects from many sources:
|
|
plain dicts, litellm ``Usage`` (which exposes ``.get``), and raw OpenAI
|
|
Pydantic models (e.g. ``openai.types.completion_usage.CompletionUsage`` and
|
|
nested ``CompletionTokensDetails`` / ``OutputTokensDetails``) which do NOT
|
|
expose ``.get``. Calling ``.get`` on the latter raised ``AttributeError`` —
|
|
see https://github.com/BerriAI/litellm/issues/13672.
|
|
"""
|
|
if obj is None:
|
|
return default
|
|
getter = getattr(obj, "get", None)
|
|
if callable(getter):
|
|
try:
|
|
return getter(key, default)
|
|
except TypeError:
|
|
# Some objects expose `.get` with a different signature
|
|
pass
|
|
return getattr(obj, key, default)
|
|
|
|
|
|
def _set_usage_outputs(span: "Span", response_obj, span_attrs):
|
|
usage = response_obj and response_obj.get("usage")
|
|
if not usage:
|
|
return
|
|
|
|
safe_set_attribute(
|
|
span, span_attrs.LLM_TOKEN_COUNT_TOTAL, _safe_get(usage, "total_tokens")
|
|
)
|
|
completion_tokens = _safe_get(usage, "completion_tokens") or _safe_get(
|
|
usage, "output_tokens"
|
|
)
|
|
if completion_tokens:
|
|
safe_set_attribute(
|
|
span, span_attrs.LLM_TOKEN_COUNT_COMPLETION, completion_tokens
|
|
)
|
|
prompt_tokens = _safe_get(usage, "prompt_tokens") or _safe_get(
|
|
usage, "input_tokens"
|
|
)
|
|
if prompt_tokens:
|
|
safe_set_attribute(span, span_attrs.LLM_TOKEN_COUNT_PROMPT, prompt_tokens)
|
|
|
|
# Reasoning tokens live in `completion_tokens_details` for Chat Completions
|
|
# API (Usage) and in `output_tokens_details` for Responses API
|
|
# (ResponseAPIUsage). Both nested objects may be plain Pydantic models
|
|
# without `.get`.
|
|
token_details = _safe_get(usage, "completion_tokens_details") or _safe_get(
|
|
usage, "output_tokens_details"
|
|
)
|
|
reasoning_tokens = _safe_get(token_details, "reasoning_tokens")
|
|
if reasoning_tokens:
|
|
safe_set_attribute(
|
|
span,
|
|
span_attrs.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
|
|
reasoning_tokens,
|
|
)
|
|
|
|
# Additive: cache token breakdown so prompt-caching savings render in
|
|
# Arize. Sources covered:
|
|
# - OpenAI Chat Completions: `prompt_tokens_details.cached_tokens`
|
|
# - Anthropic / Bedrock-Anthropic: `cache_read_input_tokens`,
|
|
# `cache_creation_input_tokens`
|
|
# All emits are conditional, so when none of these fields exist (the
|
|
# situation in the existing test fixtures) no extra attributes are set.
|
|
prompt_token_details = _safe_get(usage, "prompt_tokens_details") or _safe_get(
|
|
usage, "input_tokens_details"
|
|
)
|
|
cache_read = _safe_get(prompt_token_details, "cached_tokens") or _safe_get(
|
|
usage, "cache_read_input_tokens"
|
|
)
|
|
if cache_read:
|
|
safe_set_attribute(
|
|
span,
|
|
span_attrs.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ,
|
|
cache_read,
|
|
)
|
|
# Anthropic / Bedrock-Anthropic only — OpenAI's `prompt_tokens_details`
|
|
# does not expose a cache-write count, so we read straight off `usage`.
|
|
cache_write = _safe_get(usage, "cache_creation_input_tokens")
|
|
if cache_write:
|
|
safe_set_attribute(
|
|
span,
|
|
span_attrs.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE,
|
|
cache_write,
|
|
)
|
|
|
|
audio_prompt_tokens = _safe_get(prompt_token_details, "audio_tokens")
|
|
if audio_prompt_tokens:
|
|
safe_set_attribute(
|
|
span,
|
|
span_attrs.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO,
|
|
audio_prompt_tokens,
|
|
)
|
|
|
|
|
|
def _infer_open_inference_span_kind(call_type: Optional[str]) -> str:
|
|
"""
|
|
Map LiteLLM call types to OpenInference span kinds.
|
|
"""
|
|
|
|
if not call_type:
|
|
return OpenInferenceSpanKindValues.UNKNOWN.value
|
|
|
|
lowered = str(call_type).lower()
|
|
|
|
if "embed" in lowered:
|
|
return OpenInferenceSpanKindValues.EMBEDDING.value
|
|
|
|
if "rerank" in lowered:
|
|
return OpenInferenceSpanKindValues.RERANKER.value
|
|
|
|
if "search" in lowered:
|
|
return OpenInferenceSpanKindValues.RETRIEVER.value
|
|
|
|
if "moderation" in lowered or "guardrail" in lowered:
|
|
return OpenInferenceSpanKindValues.GUARDRAIL.value
|
|
|
|
if lowered == "call_mcp_tool" or lowered == "mcp" or lowered.endswith("tool"):
|
|
return OpenInferenceSpanKindValues.TOOL.value
|
|
|
|
if "asend_message" in lowered or "a2a" in lowered or "assistant" in lowered:
|
|
return OpenInferenceSpanKindValues.AGENT.value
|
|
|
|
if any(
|
|
keyword in lowered
|
|
for keyword in (
|
|
"completion",
|
|
"chat",
|
|
"image",
|
|
"audio",
|
|
"speech",
|
|
"transcription",
|
|
"generate_content",
|
|
"response",
|
|
"videos",
|
|
"realtime",
|
|
"pass_through",
|
|
# `passthrough` (no underscore) is what real call_types use:
|
|
# `allm_passthrough_route`, `llm_passthrough_route`. Without
|
|
# this they fell through to UNKNOWN, blanking span.kind.
|
|
"passthrough",
|
|
"anthropic_messages",
|
|
"ocr",
|
|
)
|
|
):
|
|
return OpenInferenceSpanKindValues.LLM.value
|
|
|
|
if any(
|
|
keyword in lowered
|
|
for keyword in ("file", "batch", "container", "fine_tuning_job")
|
|
):
|
|
return OpenInferenceSpanKindValues.CHAIN.value
|
|
|
|
return OpenInferenceSpanKindValues.UNKNOWN.value
|
|
|
|
|
|
def _set_tool_attributes(
|
|
span: "Span", optional_tools: Optional[list], metadata_tools: Optional[list]
|
|
):
|
|
"""set tool attributes on span from optional_params or tool call metadata"""
|
|
if optional_tools:
|
|
for idx, tool in enumerate(optional_tools):
|
|
if not isinstance(tool, dict):
|
|
continue
|
|
function = (
|
|
tool.get("function") if isinstance(tool.get("function"), dict) else None
|
|
)
|
|
if not function:
|
|
continue
|
|
tool_name = function.get("name")
|
|
if tool_name:
|
|
safe_set_attribute(
|
|
span, f"{SpanAttributes.LLM_TOOLS}.{idx}.name", tool_name
|
|
)
|
|
tool_description = function.get("description")
|
|
if tool_description:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{SpanAttributes.LLM_TOOLS}.{idx}.description",
|
|
tool_description,
|
|
)
|
|
params = function.get("parameters")
|
|
if params is not None:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{SpanAttributes.LLM_TOOLS}.{idx}.parameters",
|
|
json.dumps(params),
|
|
)
|
|
|
|
if metadata_tools and isinstance(metadata_tools, list):
|
|
for idx, tool in enumerate(metadata_tools):
|
|
if not isinstance(tool, dict):
|
|
continue
|
|
tool_name = tool.get("name")
|
|
if tool_name:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{SpanAttributes.LLM_INVOCATION_PARAMETERS}.tools.{idx}.name",
|
|
tool_name,
|
|
)
|
|
|
|
tool_description = tool.get("description")
|
|
if tool_description:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{SpanAttributes.LLM_INVOCATION_PARAMETERS}.tools.{idx}.description",
|
|
tool_description,
|
|
)
|
|
|
|
|
|
def set_attributes(
|
|
span: "Span", kwargs, response_obj, attributes: Type[BaseLLMObsOTELAttributes]
|
|
):
|
|
"""
|
|
Populates span with OpenInference-compliant LLM attributes for Arize and Phoenix tracing.
|
|
"""
|
|
# Coerce non-dict response objects (e.g. httpx.Response from passthrough
|
|
# routes) into a dict so downstream `.get()` calls don't crash. Existing
|
|
# dict / `.get()`-bearing objects (incl. Pydantic OpenAI Responses API
|
|
# models) are returned unchanged, preserving the existing test behavior.
|
|
response_obj_for_attrs = _coerce_response_obj_for_attrs(response_obj)
|
|
|
|
# Set span.kind defensively before anything else. If a downstream step
|
|
# throws, the span still has a kind so Arize can render it correctly
|
|
# (an LLM call instead of UNKNOWN). This is the single source of truth
|
|
# for span.kind — no late re-write happens below.
|
|
_safe_emit("early span kind", _set_early_span_kind, span, kwargs)
|
|
|
|
try:
|
|
optional_params = _sanitize_optional_params(kwargs.get("optional_params"))
|
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
|
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
|
"standard_logging_object"
|
|
)
|
|
if standard_logging_payload is None:
|
|
raise ValueError("standard_logging_object not found in kwargs")
|
|
|
|
metadata = (
|
|
standard_logging_payload.get("metadata")
|
|
if standard_logging_payload
|
|
else None
|
|
)
|
|
_set_metadata_attributes(span, metadata, SpanAttributes)
|
|
|
|
metadata_tools = _extract_metadata_tools(metadata)
|
|
optional_tools = _extract_optional_tools(optional_params)
|
|
|
|
_set_request_attributes(
|
|
span=span,
|
|
kwargs=kwargs,
|
|
standard_logging_payload=standard_logging_payload,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
response_obj=response_obj_for_attrs,
|
|
span_attrs=SpanAttributes,
|
|
)
|
|
|
|
# span.kind was already set above by `_set_early_span_kind`. We do
|
|
# NOT re-write it here based on tool presence: a chat completion
|
|
# that passes `tools=[...]` (or returns `tool_calls`) is still an
|
|
# LLM call per the OpenInference spec — TOOL is reserved for actual
|
|
# tool execution spans, not LLM calls that request tools.
|
|
_set_tool_attributes(span, optional_tools, metadata_tools)
|
|
attributes.set_messages(span, kwargs)
|
|
|
|
model_params = (
|
|
standard_logging_payload.get("model_parameters")
|
|
if standard_logging_payload
|
|
else None
|
|
)
|
|
_set_model_params(span, model_params, SpanAttributes)
|
|
|
|
_set_response_attributes(span=span, response_obj=response_obj_for_attrs)
|
|
|
|
except Exception as e:
|
|
verbose_logger.error(
|
|
f"[Arize/Phoenix] Failed to set OpenInference span attributes: {e}"
|
|
)
|
|
if hasattr(span, "record_exception"):
|
|
span.record_exception(e)
|
|
|
|
# Additive emitters. Each is independently guarded so a failure can never
|
|
# blank the attributes set by the main try-block above. New attributes are
|
|
# written under new keys; existing attributes are not overwritten.
|
|
slp = kwargs.get("standard_logging_object")
|
|
_safe_emit("session/user attrs", _set_session_and_user_attrs, span, kwargs, slp)
|
|
_safe_emit("response cost", _set_response_cost_attr, span, slp)
|
|
_safe_emit(
|
|
"passthrough normalization",
|
|
_maybe_normalize_passthrough,
|
|
span,
|
|
kwargs,
|
|
response_obj,
|
|
response_obj_for_attrs,
|
|
slp,
|
|
)
|
|
|
|
|
|
def _sanitize_optional_params(optional_params: Optional[dict]) -> dict:
|
|
if not isinstance(optional_params, dict):
|
|
return {}
|
|
optional_params.pop("secret_fields", None)
|
|
return optional_params
|
|
|
|
|
|
def _set_metadata_attributes(span: "Span", metadata: Optional[Any], span_attrs) -> None:
|
|
if metadata is not None:
|
|
safe_set_attribute(span, span_attrs.METADATA, safe_dumps(metadata))
|
|
|
|
|
|
def _extract_metadata_tools(metadata: Optional[Any]) -> Optional[list]:
|
|
if not isinstance(metadata, dict):
|
|
return None
|
|
llm_obj = metadata.get("llm")
|
|
if isinstance(llm_obj, dict):
|
|
return llm_obj.get("tools")
|
|
return None
|
|
|
|
|
|
def _extract_optional_tools(optional_params: dict) -> Optional[list]:
|
|
return optional_params.get("tools") if isinstance(optional_params, dict) else None
|
|
|
|
|
|
def _set_request_attributes(
|
|
span: "Span",
|
|
kwargs,
|
|
standard_logging_payload: StandardLoggingPayload,
|
|
optional_params: dict,
|
|
litellm_params: dict,
|
|
response_obj,
|
|
span_attrs,
|
|
):
|
|
if kwargs.get("model"):
|
|
safe_set_attribute(span, span_attrs.LLM_MODEL_NAME, kwargs.get("model"))
|
|
|
|
safe_set_attribute(
|
|
span, "llm.request.type", standard_logging_payload.get("call_type")
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
span_attrs.LLM_PROVIDER,
|
|
litellm_params.get("custom_llm_provider", "Unknown"),
|
|
)
|
|
|
|
if optional_params.get("max_tokens"):
|
|
safe_set_attribute(
|
|
span, "llm.request.max_tokens", optional_params.get("max_tokens")
|
|
)
|
|
if optional_params.get("temperature"):
|
|
safe_set_attribute(
|
|
span, "llm.request.temperature", optional_params.get("temperature")
|
|
)
|
|
if optional_params.get("top_p"):
|
|
safe_set_attribute(span, "llm.request.top_p", optional_params.get("top_p"))
|
|
|
|
safe_set_attribute(
|
|
span, "llm.is_streaming", str(optional_params.get("stream", False))
|
|
)
|
|
|
|
if optional_params.get("user"):
|
|
safe_set_attribute(span, "llm.user", optional_params.get("user"))
|
|
|
|
if response_obj and response_obj.get("id"):
|
|
safe_set_attribute(span, "llm.response.id", response_obj.get("id"))
|
|
if response_obj and response_obj.get("model"):
|
|
safe_set_attribute(span, "llm.response.model", response_obj.get("model"))
|
|
|
|
|
|
def _set_model_params(span: "Span", model_params: Optional[dict], span_attrs) -> None:
|
|
if not model_params:
|
|
return
|
|
|
|
safe_set_attribute(
|
|
span, span_attrs.LLM_INVOCATION_PARAMETERS, safe_dumps(model_params)
|
|
)
|
|
if model_params.get("user"):
|
|
user_id = model_params.get("user")
|
|
if user_id is not None:
|
|
safe_set_attribute(span, span_attrs.USER_ID, user_id)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Additive rendering helpers (introduced to enhance Arize/Phoenix rendering
|
|
# without changing any previously-emitted attribute keys or values).
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _safe_emit(label: str, fn, *args, **kwargs) -> None:
|
|
"""Run an additive attribute emitter, swallowing any error so it cannot
|
|
blank attributes set elsewhere on the span. Failures are logged at debug.
|
|
"""
|
|
try:
|
|
fn(*args, **kwargs)
|
|
except Exception as e:
|
|
verbose_logger.debug("[Arize] %s skipped: %s", label, e)
|
|
|
|
|
|
def _set_early_span_kind(span: "Span", kwargs: dict) -> None:
|
|
"""Defensively set OPENINFERENCE_SPAN_KIND before any other logic runs."""
|
|
slp = kwargs.get("standard_logging_object")
|
|
call_type = slp.get("call_type") if isinstance(slp, dict) else None
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
_infer_open_inference_span_kind(call_type=call_type),
|
|
)
|
|
|
|
|
|
def _coerce_response_obj_for_attrs(response_obj):
|
|
"""Return a `.get`-compatible view of `response_obj` when possible.
|
|
|
|
- dicts and Pydantic models that already expose `.get` are returned
|
|
unchanged (preserves all current behavior, including the Responses API
|
|
flow which relies on Pydantic attribute access).
|
|
- `httpx.Response` and other text-only responses (passthrough routes)
|
|
are JSON-decoded so the standard extraction paths can read fields like
|
|
`id`, `model`, and `usage`. On failure the original object is returned
|
|
so behavior is no worse than today.
|
|
"""
|
|
if response_obj is None or hasattr(response_obj, "get"):
|
|
return response_obj
|
|
text = getattr(response_obj, "text", None)
|
|
if isinstance(text, str) and text:
|
|
try:
|
|
parsed = json.loads(text)
|
|
if isinstance(parsed, dict):
|
|
return parsed
|
|
except Exception:
|
|
pass
|
|
return response_obj
|
|
|
|
|
|
def _coerce_text(value) -> Optional[str]:
|
|
"""Best-effort text extraction from a message-content value.
|
|
|
|
Returns None when no textual portion can be derived. Handles:
|
|
- plain strings
|
|
- lists of OpenAI-style content parts (`{"type": "text", "text": ...}`)
|
|
- lists of Anthropic-style content parts (`{"type": "text", "text": ...}`
|
|
or `{"type": "input_text", "text": ...}`)
|
|
"""
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, str):
|
|
return value
|
|
if isinstance(value, list):
|
|
parts = []
|
|
for part in value:
|
|
if isinstance(part, str):
|
|
parts.append(part)
|
|
elif isinstance(part, dict):
|
|
text = part.get("text") or part.get("input_text")
|
|
if isinstance(text, str):
|
|
parts.append(text)
|
|
if parts:
|
|
return "\n".join(parts)
|
|
return None
|
|
|
|
|
|
def _to_plain_dict(value):
|
|
"""Best-effort: coerce a value (Pydantic model / dict / None) to a dict.
|
|
|
|
Returns the original value when no safe conversion exists. Used to bridge
|
|
OpenAI Pydantic message/tool_call objects into the dict-based helpers.
|
|
"""
|
|
if value is None or isinstance(value, dict):
|
|
return value
|
|
model_dump = getattr(value, "model_dump", None)
|
|
if callable(model_dump):
|
|
try:
|
|
return model_dump()
|
|
except Exception:
|
|
pass
|
|
return value
|
|
|
|
|
|
def _get_tool_calls(message) -> Optional[list]:
|
|
"""Return ``message.tool_calls`` only when it's a non-empty list.
|
|
|
|
Works for dicts and Pydantic message objects via ``_safe_get``.
|
|
"""
|
|
tool_calls = _safe_get(message, "tool_calls")
|
|
return tool_calls if isinstance(tool_calls, list) and tool_calls else None
|
|
|
|
|
|
def _normalize_tool_call(raw_tc) -> Optional[Dict[str, Any]]:
|
|
"""Normalize a single tool_call (dict or Pydantic) into a stable shape:
|
|
|
|
{"id": str|None, "type": str, "function": {"name": str|None, "arguments": str|None}}
|
|
|
|
Arguments are coerced to a JSON string per OpenInference convention.
|
|
Returns ``None`` when ``raw_tc`` cannot be coerced to a dict.
|
|
"""
|
|
tc = _to_plain_dict(raw_tc)
|
|
if not isinstance(tc, dict):
|
|
return None
|
|
function = _to_plain_dict(tc.get("function"))
|
|
name = function.get("name") if isinstance(function, dict) else None
|
|
args = function.get("arguments") if isinstance(function, dict) else None
|
|
if args is not None and not isinstance(args, str):
|
|
try:
|
|
args = json.dumps(args)
|
|
except Exception:
|
|
args = str(args)
|
|
return {
|
|
"id": tc.get("id"),
|
|
"type": tc.get("type", "function"),
|
|
"function": {"name": name, "arguments": args},
|
|
}
|
|
|
|
|
|
def _summarize_tool_calls_for_output(tool_calls) -> str:
|
|
"""Render a tool_calls list as a compact JSON string for OUTPUT_VALUE.
|
|
|
|
Best-effort: returns ``str(tool_calls)`` if anything unexpected happens
|
|
so OUTPUT_VALUE is never blanked on a malformed payload.
|
|
"""
|
|
try:
|
|
normalized = [n for n in (_normalize_tool_call(tc) for tc in tool_calls) if n]
|
|
return json.dumps({"tool_calls": normalized})
|
|
except Exception:
|
|
return str(tool_calls)
|
|
|
|
|
|
def _emit_message_tool_calls(span: "Span", prefix: str, message) -> None:
|
|
"""Emit ``MESSAGE_TOOL_CALLS.*`` for an assistant message that requested
|
|
tool calls. Pure addition: only writes when ``tool_calls`` is non-empty.
|
|
|
|
Accepts dicts or Pydantic message objects (e.g. ``litellm.Message``); the
|
|
same applies to each tool_call entry.
|
|
"""
|
|
tool_calls = _get_tool_calls(message)
|
|
if not tool_calls:
|
|
return
|
|
for tc_idx, raw_tc in enumerate(tool_calls):
|
|
tc = _normalize_tool_call(raw_tc)
|
|
if tc is None:
|
|
continue
|
|
tc_prefix = f"{prefix}.{MessageAttributes.MESSAGE_TOOL_CALLS}.{tc_idx}"
|
|
if tc["id"]:
|
|
safe_set_attribute(
|
|
span, f"{tc_prefix}.{ToolCallAttributes.TOOL_CALL_ID}", tc["id"]
|
|
)
|
|
fn = tc["function"]
|
|
if fn["name"]:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{tc_prefix}.{ToolCallAttributes.TOOL_CALL_FUNCTION_NAME}",
|
|
fn["name"],
|
|
)
|
|
if fn["arguments"] is not None:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{tc_prefix}.{ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON}",
|
|
fn["arguments"],
|
|
)
|
|
|
|
|
|
def _emit_input_message_extras(span: "Span", prefix: str, message: dict) -> None:
|
|
"""Emit additive attributes for an input message:
|
|
|
|
- `MESSAGE_NAME` and `MESSAGE_TOOL_CALL_ID` (commonly set on tool-result
|
|
messages so traces show which tool produced which result).
|
|
- `MESSAGE_TOOL_CALLS.*` when an assistant message requested tools.
|
|
- `MESSAGE_CONTENTS.*` structured content for list-shaped content
|
|
(multimodal text + image parts). The plain `MESSAGE_CONTENT` write is
|
|
still performed by the caller, so renderers that only read the legacy
|
|
key continue to work.
|
|
"""
|
|
if not isinstance(message, dict):
|
|
return
|
|
|
|
name = message.get("name")
|
|
if name:
|
|
safe_set_attribute(span, f"{prefix}.{MessageAttributes.MESSAGE_NAME}", name)
|
|
|
|
tool_call_id = message.get("tool_call_id")
|
|
if tool_call_id:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_TOOL_CALL_ID}",
|
|
tool_call_id,
|
|
)
|
|
|
|
_emit_message_tool_calls(span, prefix, message)
|
|
|
|
content = message.get("content")
|
|
if isinstance(content, list):
|
|
contents_prefix = f"{prefix}.{MessageAttributes.MESSAGE_CONTENTS}"
|
|
for part_idx, part in enumerate(content):
|
|
if not isinstance(part, dict):
|
|
continue
|
|
part_prefix = f"{contents_prefix}.{part_idx}"
|
|
part_type = part.get("type")
|
|
if part_type in ("text", "input_text"):
|
|
text = part.get("text")
|
|
if isinstance(text, str):
|
|
safe_set_attribute(
|
|
span,
|
|
f"{part_prefix}.{MessageContentAttributes.MESSAGE_CONTENT_TYPE}",
|
|
"text",
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
f"{part_prefix}.{MessageContentAttributes.MESSAGE_CONTENT_TEXT}",
|
|
text,
|
|
)
|
|
elif part_type in ("image_url", "image", "input_image"):
|
|
url = None
|
|
image = part.get("image_url")
|
|
if isinstance(image, dict):
|
|
url = image.get("url")
|
|
elif isinstance(image, str):
|
|
url = image
|
|
if not url:
|
|
# Anthropic-style source.{type=base64,media_type,data}
|
|
source = part.get("source")
|
|
if isinstance(source, dict) and source.get("data"):
|
|
media_type = source.get("media_type", "image/jpeg")
|
|
url = f"data:{media_type};base64,{source['data']}"
|
|
elif isinstance(part.get("url"), str):
|
|
url = part["url"]
|
|
if url:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{part_prefix}.{MessageContentAttributes.MESSAGE_CONTENT_TYPE}",
|
|
"image",
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
f"{part_prefix}.message_content.image.image.url",
|
|
url,
|
|
)
|
|
|
|
|
|
def _set_session_and_user_attrs(
|
|
span: "Span", kwargs: dict, standard_logging_payload
|
|
) -> None:
|
|
"""Emit `SESSION_ID` / `USER_ID` / team metadata when source data exists.
|
|
|
|
`SESSION_ID` is emitted only when an explicit end-user identifier exists
|
|
(`metadata.user_api_key_end_user_id`). We deliberately do NOT fall back
|
|
to `trace_id`, because that would create a distinct "session" for every
|
|
single request and distort Arize's Session-grouping analytics. The
|
|
`trace_id` is still emitted under its own `litellm.trace_id` key so
|
|
spans remain filterable by trace.
|
|
|
|
USER_ID is *only* emitted when no upstream path (model_params.user or
|
|
optional_params.user) has already set it, to avoid overwriting an
|
|
existing value with a possibly-different one from API-key metadata.
|
|
"""
|
|
if not isinstance(standard_logging_payload, dict):
|
|
return
|
|
metadata = standard_logging_payload.get("metadata") or {}
|
|
if not isinstance(metadata, dict):
|
|
return
|
|
|
|
session_id = metadata.get("user_api_key_end_user_id")
|
|
if session_id:
|
|
safe_set_attribute(span, SpanAttributes.SESSION_ID, str(session_id))
|
|
|
|
trace_id = standard_logging_payload.get("trace_id")
|
|
if trace_id:
|
|
safe_set_attribute(span, "litellm.trace_id", str(trace_id))
|
|
|
|
optional_params = kwargs.get("optional_params") or {}
|
|
model_params = standard_logging_payload.get("model_parameters") or {}
|
|
has_user_already = bool(
|
|
(isinstance(optional_params, dict) and optional_params.get("user"))
|
|
or (isinstance(model_params, dict) and model_params.get("user"))
|
|
)
|
|
if not has_user_already:
|
|
user_id = metadata.get("user_api_key_user_id")
|
|
if user_id:
|
|
safe_set_attribute(span, SpanAttributes.USER_ID, str(user_id))
|
|
|
|
team_id = metadata.get("user_api_key_team_id")
|
|
if team_id:
|
|
safe_set_attribute(span, "litellm.team_id", str(team_id))
|
|
team_alias = metadata.get("user_api_key_team_alias")
|
|
if team_alias:
|
|
safe_set_attribute(span, "litellm.team_alias", str(team_alias))
|
|
key_alias = metadata.get("user_api_key_alias")
|
|
if key_alias:
|
|
safe_set_attribute(span, "litellm.key_alias", str(key_alias))
|
|
|
|
|
|
def _set_response_cost_attr(span: "Span", standard_logging_payload) -> None:
|
|
"""Emit cost attributes from the StandardLoggingPayload when present.
|
|
|
|
Uses the OpenInference `llm.cost.total` key so Arize / Phoenix can
|
|
surface the cost in their "Total Cost" column. LiteLLM only tracks a
|
|
single total in `StandardLoggingPayload.response_cost`, so we cannot
|
|
split it into prompt/completion. We also keep the legacy
|
|
`llm.response.cost` key for back-compat with any consumer querying it.
|
|
"""
|
|
if not isinstance(standard_logging_payload, dict):
|
|
return
|
|
cost = standard_logging_payload.get("response_cost")
|
|
if cost is None:
|
|
return
|
|
try:
|
|
cost_value = float(cost)
|
|
except (TypeError, ValueError):
|
|
return
|
|
safe_set_attribute(span, "llm.cost.total", cost_value)
|
|
safe_set_attribute(span, "llm.response.cost", cost_value)
|
|
|
|
|
|
def _is_passthrough_call_type(call_type: Optional[str]) -> bool:
|
|
if not call_type:
|
|
return False
|
|
lowered = str(call_type).lower()
|
|
return "passthrough" in lowered or "pass_through" in lowered
|
|
|
|
|
|
def _maybe_normalize_passthrough(
|
|
span: "Span",
|
|
kwargs: dict,
|
|
raw_response_obj,
|
|
coerced_response_obj,
|
|
standard_logging_payload,
|
|
) -> None:
|
|
"""Surface input/output text for passthrough routes (e.g. Bedrock
|
|
InvokeModel) so the parent span renders as more than `usage` numbers.
|
|
|
|
Only runs when `call_type` is a passthrough variant. Reads from:
|
|
- `kwargs["additional_args"]["complete_input_dict"]` for input
|
|
- the coerced response (or `kwargs["original_response"]`) for output
|
|
|
|
All emits are best-effort: if the provider shape isn't recognized the
|
|
helper exits silently. Existing chat/completion paths never enter this
|
|
helper because their call_type doesn't contain "passthrough".
|
|
|
|
TEMPORARY BRIDGE: passthrough handlers don't populate the
|
|
StandardLoggingPayload `messages` field today (they call
|
|
`transform_response(messages=[])`), so the input is only available via
|
|
`additional_args.complete_input_dict`. The proper fix is upstream in
|
|
`base_passthrough_logging_handler._create_response_logging_payload()`:
|
|
once that populates SLP `messages`/`response`, every callback gets
|
|
passthrough I/O (with central redaction) for free and this helper's
|
|
`complete_input_dict` fallback can be deleted. See follow-up issue.
|
|
"""
|
|
call_type = (
|
|
standard_logging_payload.get("call_type")
|
|
if isinstance(standard_logging_payload, dict)
|
|
else None
|
|
)
|
|
if not _is_passthrough_call_type(call_type):
|
|
return
|
|
|
|
# Respect LiteLLM's central message-redaction contract. The normal
|
|
# chat/completion path is redacted by `perform_redaction` before
|
|
# callbacks run, but `complete_input_dict` (read below) is NOT covered by
|
|
# that layer — so without this gate, an operator who enabled redaction
|
|
# would still see raw passthrough prompts in Arize. Skip entirely when
|
|
# redaction is on so neither input nor output leaks through this bridge.
|
|
if should_redact_message_logging(kwargs):
|
|
return
|
|
|
|
# --- INPUT --------------------------------------------------------------
|
|
additional_args = kwargs.get("additional_args") or {}
|
|
complete_input_dict = (
|
|
additional_args.get("complete_input_dict")
|
|
if isinstance(additional_args, dict)
|
|
else None
|
|
)
|
|
if isinstance(complete_input_dict, dict):
|
|
_set_passthrough_input_attributes(span, complete_input_dict.get("messages"))
|
|
|
|
# --- OUTPUT -------------------------------------------------------------
|
|
parsed_response = _parse_passthrough_response(
|
|
raw_response_obj, coerced_response_obj, kwargs
|
|
)
|
|
if not isinstance(parsed_response, dict):
|
|
return
|
|
|
|
_set_passthrough_output_attributes(span, parsed_response)
|
|
|
|
|
|
def _set_passthrough_input_attributes(span: "Span", messages) -> None:
|
|
"""Render passthrough request messages into INPUT_VALUE + LLM_INPUT_MESSAGES."""
|
|
if not (isinstance(messages, list) and messages):
|
|
return
|
|
# Set INPUT_VALUE from the last user message text if discoverable.
|
|
last_text = None
|
|
for msg in reversed(messages):
|
|
if isinstance(msg, dict):
|
|
last_text = _coerce_text(msg.get("content"))
|
|
if last_text:
|
|
break
|
|
if last_text:
|
|
safe_set_attribute(span, SpanAttributes.INPUT_VALUE, last_text)
|
|
# Mirror messages into LLM_INPUT_MESSAGES so the input pane renders.
|
|
for idx, msg in enumerate(messages):
|
|
if not isinstance(msg, dict):
|
|
continue
|
|
prefix = f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}"
|
|
role = msg.get("role")
|
|
if role:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_ROLE}",
|
|
role,
|
|
)
|
|
text = _coerce_text(msg.get("content"))
|
|
if text is not None:
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_CONTENT}",
|
|
text,
|
|
)
|
|
|
|
|
|
def _set_passthrough_output_attributes(span: "Span", parsed_response: dict) -> None:
|
|
"""Render passthrough response into OUTPUT_VALUE + LLM_OUTPUT_MESSAGES."""
|
|
# Anthropic / Bedrock-Anthropic: `content` is a list of typed parts.
|
|
content_list = parsed_response.get("content")
|
|
if isinstance(content_list, list) and content_list:
|
|
texts = []
|
|
for part in content_list:
|
|
if isinstance(part, dict) and isinstance(part.get("text"), str):
|
|
texts.append(part["text"])
|
|
joined = "\n\n".join(t for t in texts if t)
|
|
if joined:
|
|
safe_set_attribute(span, SpanAttributes.OUTPUT_VALUE, joined)
|
|
prefix = f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0"
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_ROLE}",
|
|
parsed_response.get("role", "assistant"),
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_CONTENT}",
|
|
joined,
|
|
)
|
|
|
|
# OpenAI-style passthrough: `choices[0].message.content`
|
|
choices = parsed_response.get("choices")
|
|
if isinstance(choices, list) and choices:
|
|
first = choices[0]
|
|
if isinstance(first, dict):
|
|
msg = first.get("message")
|
|
if isinstance(msg, dict):
|
|
text = _coerce_text(msg.get("content"))
|
|
if text:
|
|
safe_set_attribute(span, SpanAttributes.OUTPUT_VALUE, text)
|
|
prefix = f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0"
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_ROLE}",
|
|
msg.get("role", "assistant"),
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_CONTENT}",
|
|
text,
|
|
)
|
|
|
|
|
|
def _parse_passthrough_response(raw_response_obj, coerced_response_obj, kwargs):
|
|
"""Return a dict view of the provider response for passthrough routes."""
|
|
# Prefer the coerced view (already JSON-parsed for httpx.Response).
|
|
candidates = []
|
|
if isinstance(coerced_response_obj, dict):
|
|
candidates.append(coerced_response_obj)
|
|
if (
|
|
isinstance(raw_response_obj, dict)
|
|
and raw_response_obj is not coerced_response_obj
|
|
):
|
|
candidates.append(raw_response_obj)
|
|
|
|
for candidate in candidates:
|
|
# StandardPassThroughResponseObject wrapper: {"response": "..."}.
|
|
if (
|
|
"response" in candidate
|
|
and "content" not in candidate
|
|
and "choices" not in candidate
|
|
):
|
|
inner = candidate.get("response")
|
|
if isinstance(inner, str):
|
|
try:
|
|
parsed = json.loads(inner)
|
|
if isinstance(parsed, dict):
|
|
return parsed
|
|
except Exception:
|
|
continue
|
|
if isinstance(inner, dict):
|
|
return inner
|
|
else:
|
|
return candidate
|
|
|
|
# Fallback: kwargs["original_response"] from the OTel base path.
|
|
original = kwargs.get("original_response") if isinstance(kwargs, dict) else None
|
|
if isinstance(original, dict):
|
|
return original
|
|
if isinstance(original, str):
|
|
try:
|
|
parsed = json.loads(original)
|
|
if isinstance(parsed, dict):
|
|
return parsed
|
|
except Exception:
|
|
return None
|
|
return None
|