fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
857 lines
32 KiB
Python
857 lines
32 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
|
|
|
import httpx
|
|
from pydantic import BaseModel, Field
|
|
|
|
import litellm
|
|
from litellm._logging import verbose_logger
|
|
from litellm.integrations.custom_logger import CustomLogger
|
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
|
convert_content_list_to_str,
|
|
get_content_from_model_response,
|
|
)
|
|
from litellm.types.llms.openai import (
|
|
AllMessageValues,
|
|
HttpxBinaryResponseContent,
|
|
ResponsesAPIResponse,
|
|
)
|
|
from litellm.llms.custom_httpx.http_handler import (
|
|
get_async_httpx_client,
|
|
httpxSpecialProvider,
|
|
)
|
|
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
|
|
|
|
GALILEO_CLOUD_API_BASE_URL = "https://api.galileo.ai"
|
|
# Cap the in-memory buffer so persistent flush failures (e.g. Galileo
|
|
# unavailable, invalid credentials) cannot leak memory unboundedly.
|
|
GALILEO_MAX_IN_MEMORY_RECORDS = 1000
|
|
|
|
|
|
class LLMResponse(BaseModel):
|
|
latency_ms: int
|
|
status_code: int
|
|
input_text: str
|
|
output_text: str
|
|
node_type: str
|
|
model: str
|
|
num_input_tokens: int
|
|
num_output_tokens: int
|
|
num_total_tokens: int
|
|
cost: Optional[float] = Field(
|
|
default=None,
|
|
description="Total cost of the LLM call in USD as computed by LiteLLM.",
|
|
)
|
|
output_logprobs: Optional[Dict[str, Any]] = Field(
|
|
default=None,
|
|
description="Optional. When available, logprobs are used to compute Uncertainty.",
|
|
)
|
|
created_at: str = Field(
|
|
..., description='timestamp constructed in "%Y-%m-%dT%H:%M:%S" format'
|
|
)
|
|
tags: Optional[List[str]] = None
|
|
user_metadata: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
class GalileoObserve(CustomLogger):
|
|
def __init__(self) -> None:
|
|
self.in_memory_records: List[dict] = []
|
|
self.batch_size = 1
|
|
self.api_key = os.getenv("GALILEO_API_KEY")
|
|
self.project_id = os.getenv("GALILEO_PROJECT_ID")
|
|
self.log_stream_id = os.getenv("GALILEO_LOG_STREAM_ID")
|
|
self.username = os.getenv("GALILEO_USERNAME")
|
|
self.password = os.getenv("GALILEO_PASSWORD")
|
|
self.base_url = self._normalize_base_url(os.getenv("GALILEO_BASE_URL"))
|
|
if self.api_key and not self.base_url:
|
|
self.base_url = GALILEO_CLOUD_API_BASE_URL
|
|
self.use_v2_api = bool(self.api_key)
|
|
self.headers: Optional[Dict[str, str]] = None
|
|
self.async_httpx_handler = get_async_httpx_client(
|
|
llm_provider=httpxSpecialProvider.LoggingCallback
|
|
)
|
|
|
|
@staticmethod
|
|
def _normalize_base_url(base_url: Optional[str]) -> Optional[str]:
|
|
if base_url:
|
|
return base_url.rstrip("/")
|
|
return None
|
|
|
|
def _is_configured(self) -> bool:
|
|
if not self.project_id or not self.base_url:
|
|
return False
|
|
if self.use_v2_api:
|
|
return bool(self.api_key)
|
|
return bool(self.username and self.password)
|
|
|
|
async def async_health_check(self) -> IntegrationHealthCheckStatus:
|
|
try:
|
|
if not self.project_id:
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message="GALILEO_PROJECT_ID environment variable not set",
|
|
)
|
|
|
|
if not self.base_url:
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message="GALILEO_BASE_URL environment variable not set",
|
|
)
|
|
|
|
if not self.use_v2_api and (not self.username or not self.password):
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message=(
|
|
"GALILEO_API_KEY or GALILEO_USERNAME and GALILEO_PASSWORD "
|
|
"environment variables must be set"
|
|
),
|
|
)
|
|
|
|
if not await self._ensure_headers():
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message="Galileo authentication failed",
|
|
)
|
|
|
|
response = await self.async_httpx_handler.get(
|
|
url=f"{self.base_url}/current_user",
|
|
headers=self.headers,
|
|
)
|
|
if response.status_code >= 400:
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message=(f"Galileo API returned HTTP {response.status_code}"),
|
|
)
|
|
|
|
return IntegrationHealthCheckStatus(status="healthy", error_message=None)
|
|
except Exception as e:
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message=f"Galileo health check failed: {str(e)}",
|
|
)
|
|
|
|
async def async_set_galileo_headers(self) -> None:
|
|
galileo_login_response = await self.async_httpx_handler.post(
|
|
url=f"{self.base_url}/login",
|
|
headers={
|
|
"accept": "application/json",
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
},
|
|
data={
|
|
"username": self.username,
|
|
"password": self.password,
|
|
},
|
|
)
|
|
galileo_login_response.raise_for_status()
|
|
access_token = galileo_login_response.json()["access_token"]
|
|
self.headers = {
|
|
"accept": "application/json",
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {access_token}",
|
|
}
|
|
|
|
async def _ensure_headers(self) -> bool:
|
|
if self.headers is not None:
|
|
return True
|
|
|
|
if self.use_v2_api:
|
|
if not self.api_key:
|
|
return False
|
|
self.headers = {
|
|
"accept": "application/json",
|
|
"Content-Type": "application/json",
|
|
"Galileo-API-Key": self.api_key,
|
|
}
|
|
return True
|
|
|
|
if not (self.username and self.password and self.base_url):
|
|
return False
|
|
|
|
try:
|
|
await self.async_set_galileo_headers()
|
|
return True
|
|
except Exception as e:
|
|
verbose_logger.debug("Galileo Logger: failed to authenticate: %s", e)
|
|
return False
|
|
|
|
@staticmethod
|
|
def _galileo_input_messages(
|
|
messages: Optional[Any], input_text: str
|
|
) -> List[Dict[str, str]]:
|
|
if isinstance(messages, dict):
|
|
messages = messages.get("messages")
|
|
if not messages:
|
|
return [{"role": "user", "content": input_text}]
|
|
if not isinstance(messages, list):
|
|
return [{"role": "user", "content": input_text}]
|
|
|
|
galileo_messages: List[Dict[str, str]] = []
|
|
for message in messages:
|
|
if not isinstance(message, dict):
|
|
continue
|
|
role = message.get("role")
|
|
if not role:
|
|
continue
|
|
galileo_messages.append(
|
|
{
|
|
"role": str(role),
|
|
"content": convert_content_list_to_str(
|
|
message=cast(AllMessageValues, message)
|
|
),
|
|
}
|
|
)
|
|
|
|
if galileo_messages:
|
|
return galileo_messages
|
|
return [{"role": "user", "content": input_text}]
|
|
|
|
@staticmethod
|
|
def _local_timezone():
|
|
return datetime.now().astimezone().tzinfo or timezone.utc
|
|
|
|
@staticmethod
|
|
def _format_created_at(dt: Union[datetime, Any]) -> str:
|
|
"""Serialize timestamps as UTC ISO-8601 for Galileo."""
|
|
if not isinstance(dt, datetime):
|
|
return str(dt)
|
|
|
|
if dt.tzinfo is None:
|
|
# LiteLLM often passes naive datetimes in local time; convert to UTC
|
|
# instead of appending Z to local time (which shifts Traces tab sorting).
|
|
dt = dt.replace(tzinfo=GalileoObserve._local_timezone())
|
|
|
|
return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
@staticmethod
|
|
def _normalize_created_at(created_at: str) -> str:
|
|
if created_at and not re.search(r"(Z|[+-]\d{2}:?\d{2})$", created_at):
|
|
return f"{created_at}Z"
|
|
return created_at
|
|
|
|
@staticmethod
|
|
def _token_metrics_from_record(record: Dict[str, Any]) -> Dict[str, Any]:
|
|
num_input_tokens = int(record.get("num_input_tokens") or 0)
|
|
num_output_tokens = int(record.get("num_output_tokens") or 0)
|
|
num_total_tokens = int(record.get("num_total_tokens") or 0)
|
|
if num_total_tokens == 0 and (num_input_tokens or num_output_tokens):
|
|
num_total_tokens = num_input_tokens + num_output_tokens
|
|
metrics: Dict[str, Any] = {
|
|
"num_input_tokens": num_input_tokens,
|
|
"num_output_tokens": num_output_tokens,
|
|
"num_total_tokens": num_total_tokens,
|
|
}
|
|
cost = record.get("cost")
|
|
if cost is not None:
|
|
metrics["cost"] = float(cost)
|
|
return metrics
|
|
|
|
@staticmethod
|
|
def _record_to_v2_span(
|
|
record: Dict[str, Any],
|
|
*,
|
|
trace_id: str,
|
|
span_id: str,
|
|
) -> Dict[str, Any]:
|
|
created_at = GalileoObserve._normalize_created_at(record.get("created_at", ""))
|
|
|
|
span: Dict[str, Any] = {
|
|
"type": "llm",
|
|
"id": span_id,
|
|
"trace_id": trace_id,
|
|
"parent_id": trace_id,
|
|
"name": record.get("node_type", "litellm"),
|
|
"created_at": created_at,
|
|
"input": GalileoObserve._galileo_input_messages(
|
|
record.get("messages"), record.get("input_text", "")
|
|
),
|
|
"output": {
|
|
"role": "assistant",
|
|
"content": record.get("output_text", ""),
|
|
},
|
|
"status_code": record.get("status_code", 200),
|
|
"model": record.get("model"),
|
|
"metrics": {
|
|
"duration_ns": int(record.get("latency_ms", 0)) * 1_000_000,
|
|
**GalileoObserve._token_metrics_from_record(record),
|
|
},
|
|
}
|
|
if record.get("tags"):
|
|
span["tags"] = record["tags"]
|
|
return span
|
|
|
|
@staticmethod
|
|
def _record_to_v2_trace(record: Dict[str, Any]) -> Dict[str, Any]:
|
|
trace_id = str(uuid.uuid4())
|
|
span_id = str(uuid.uuid4())
|
|
created_at = GalileoObserve._normalize_created_at(record.get("created_at", ""))
|
|
|
|
return {
|
|
"type": "trace",
|
|
"id": trace_id,
|
|
"name": record.get("node_type", "litellm"),
|
|
"created_at": created_at,
|
|
"input": record.get("input_text", ""),
|
|
"output": record.get("output_text", ""),
|
|
"status_code": record.get("status_code", 200),
|
|
"metrics": {
|
|
"duration_ns": int(record.get("latency_ms", 0)) * 1_000_000,
|
|
**GalileoObserve._token_metrics_from_record(record),
|
|
},
|
|
"spans": [
|
|
GalileoObserve._record_to_v2_span(
|
|
record, trace_id=trace_id, span_id=span_id
|
|
)
|
|
],
|
|
}
|
|
|
|
def _build_traces_payload(self, records: List[dict]) -> Dict[str, Any]:
|
|
payload: Dict[str, Any] = {
|
|
"traces": [self._record_to_v2_trace(record) for record in records],
|
|
"logging_method": "api_direct",
|
|
"reliable": False,
|
|
"is_complete": True,
|
|
}
|
|
if self.log_stream_id:
|
|
payload["log_stream_id"] = self.log_stream_id
|
|
return payload
|
|
|
|
def _get_ingest_request(self) -> Optional[Tuple[str, Dict[str, Any]]]:
|
|
if not self.base_url or not self.project_id:
|
|
return None
|
|
|
|
# Snapshot the records to be sent into a new list so concurrent appends
|
|
# during the network round-trip (across the await points in
|
|
# flush_in_memory_records) aren't silently dropped when we later clear
|
|
# the in-memory buffer.
|
|
records = list(self.in_memory_records)
|
|
payload = self._build_traces_payload(records)
|
|
|
|
if self.use_v2_api:
|
|
return (
|
|
f"{self.base_url}/ingest/traces/{self.project_id}",
|
|
payload,
|
|
)
|
|
|
|
# Username/password auth logs in for a JWT and uses the standard v2 traces API.
|
|
return (
|
|
f"{self.base_url}/v2/projects/{self.project_id}/traces",
|
|
payload,
|
|
)
|
|
|
|
@staticmethod
|
|
def _redact_headers(headers: Optional[Dict[str, str]]) -> Dict[str, str]:
|
|
if not headers:
|
|
return {}
|
|
redacted: Dict[str, str] = {}
|
|
for key, value in headers.items():
|
|
if key.lower() in {"authorization", "galileo-api-key"} and value:
|
|
redacted[key] = (
|
|
f"{value[:8]}...{value[-4:]}" if len(value) > 12 else "***"
|
|
)
|
|
else:
|
|
redacted[key] = value
|
|
return redacted
|
|
|
|
def _log_flush_config(self) -> None:
|
|
verbose_logger.debug(
|
|
"Galileo Logger flush config: use_v2_api=%s base_url=%s project_id=%s "
|
|
"log_stream_id=%s api_key_set=%s username_set=%s record_count=%s",
|
|
self.use_v2_api,
|
|
self.base_url,
|
|
self.project_id,
|
|
self.log_stream_id,
|
|
bool(self.api_key),
|
|
bool(self.username),
|
|
len(self.in_memory_records),
|
|
)
|
|
|
|
@staticmethod
|
|
def _log_v2_payload_validation(payload: Dict[str, Any]) -> None:
|
|
missing_fields: List[str] = []
|
|
traces = payload.get("traces", [])
|
|
if not traces:
|
|
missing_fields.append("traces")
|
|
|
|
for trace_index, trace in enumerate(traces):
|
|
if not isinstance(trace, dict):
|
|
continue
|
|
for field in ("id", "type", "spans"):
|
|
if field not in trace:
|
|
missing_fields.append(f"traces[{trace_index}].{field}")
|
|
|
|
trace_id = trace.get("id")
|
|
for span_index, span in enumerate(trace.get("spans", [])):
|
|
if not isinstance(span, dict):
|
|
continue
|
|
for field in ("id", "trace_id", "parent_id"):
|
|
if field not in span:
|
|
missing_fields.append(
|
|
f"traces[{trace_index}].spans[{span_index}].{field}"
|
|
)
|
|
if trace_id and span.get("trace_id") != trace_id:
|
|
missing_fields.append(
|
|
f"traces[{trace_index}].spans[{span_index}].trace_id mismatch"
|
|
)
|
|
|
|
if missing_fields:
|
|
verbose_logger.debug(
|
|
"Galileo Logger: ingest /traces payload validation issues: %s",
|
|
missing_fields,
|
|
)
|
|
|
|
def _log_flush_payload(self, url: str, payload: Dict[str, Any]) -> None:
|
|
traces = payload.get("traces", [])
|
|
verbose_logger.debug(
|
|
"Galileo Logger flush URL: %s trace_count=%s",
|
|
url,
|
|
len(traces) if isinstance(traces, list) else 0,
|
|
)
|
|
if self.use_v2_api and "/ingest/traces/" in url:
|
|
self._log_v2_payload_validation(payload)
|
|
|
|
@staticmethod
|
|
def _log_http_status_error(error: httpx.HTTPStatusError, url: str) -> None:
|
|
response = error.response
|
|
verbose_logger.debug(
|
|
"Galileo Logger HTTP error: status=%s url=%s",
|
|
response.status_code,
|
|
url,
|
|
)
|
|
verbose_logger.debug(
|
|
"Galileo Logger HTTP error response body: %s",
|
|
response.text,
|
|
)
|
|
try:
|
|
verbose_logger.debug(
|
|
"Galileo Logger HTTP error response json: %s",
|
|
response.json(),
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
@staticmethod
|
|
def _build_prompt(kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
|
optional_params = kwargs.get("optional_params", {}) or {}
|
|
prompt: Dict[str, Any] = {"messages": kwargs.get("messages")}
|
|
if optional_params.get("functions") is not None:
|
|
prompt["functions"] = optional_params["functions"]
|
|
if optional_params.get("tools") is not None:
|
|
prompt["tools"] = optional_params["tools"]
|
|
return prompt
|
|
|
|
@staticmethod
|
|
def _serialize_galileo_output(value: Any) -> str:
|
|
if value is None:
|
|
return ""
|
|
if isinstance(value, str):
|
|
return value
|
|
|
|
def _json_default(obj: Any) -> Any:
|
|
if hasattr(obj, "model_dump"):
|
|
return obj.model_dump()
|
|
return str(obj)
|
|
|
|
return json.dumps(value, default=_json_default)
|
|
|
|
@staticmethod
|
|
def _prompt_to_input_text(prompt: Dict[str, Any]) -> str:
|
|
messages = prompt.get("messages")
|
|
if messages is not None:
|
|
text = GalileoObserve._input_text_from_messages(messages)
|
|
if text:
|
|
return text
|
|
return json.dumps(prompt, default=str)
|
|
|
|
@staticmethod
|
|
def _get_chat_content_for_galileo(response_obj: litellm.ModelResponse) -> Any:
|
|
if response_obj.choices and len(response_obj.choices) > 0:
|
|
message = response_obj["choices"][0]["message"]
|
|
if hasattr(message, "json"):
|
|
message_json = message.json()
|
|
if isinstance(message_json, str):
|
|
return json.loads(message_json)
|
|
return message_json
|
|
return message
|
|
return None
|
|
|
|
@staticmethod
|
|
def _get_text_completion_content_for_galileo(
|
|
response_obj: litellm.TextCompletionResponse,
|
|
) -> Optional[str]:
|
|
if response_obj.choices and len(response_obj.choices) > 0:
|
|
return response_obj.choices[0].text
|
|
return None
|
|
|
|
@staticmethod
|
|
def _get_responses_api_content_for_galileo(
|
|
response_obj: ResponsesAPIResponse,
|
|
) -> Any:
|
|
if hasattr(response_obj, "output") and response_obj.output:
|
|
return response_obj.output
|
|
return None
|
|
|
|
@staticmethod
|
|
def _langfuse_style_rerank_prompt(kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Match Langfuse rerank input: prompt = {"messages": kwargs.get("messages")}."""
|
|
return {"messages": kwargs.get("messages")}
|
|
|
|
def _get_galileo_input_output_content(
|
|
self,
|
|
kwargs: Dict[str, Any],
|
|
response_obj: Any,
|
|
level: str = "DEFAULT",
|
|
status_message: Optional[str] = None,
|
|
) -> Tuple[str, str, Any]:
|
|
"""
|
|
Mirror Langfuse _get_langfuse_input_output_content for Galileo ingest.
|
|
|
|
Returns (input_text, output_text, messages_for_span).
|
|
"""
|
|
call_type = kwargs.get("call_type")
|
|
prompt = self._build_prompt(kwargs)
|
|
|
|
if (
|
|
level == "ERROR"
|
|
and status_message is not None
|
|
and isinstance(status_message, str)
|
|
):
|
|
return self._prompt_to_input_text(prompt), status_message, prompt
|
|
|
|
if response_obj is not None and (
|
|
call_type in ("embedding", "aembedding")
|
|
or isinstance(response_obj, litellm.EmbeddingResponse)
|
|
):
|
|
# Match Langfuse OTEL: log embeddings without serializing vectors.
|
|
return self._prompt_to_input_text(prompt), "embedding-output", prompt
|
|
|
|
if response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
|
|
output = self._get_chat_content_for_galileo(response_obj)
|
|
return (
|
|
self._prompt_to_input_text(prompt),
|
|
self._serialize_galileo_output(output),
|
|
kwargs.get("messages") or [],
|
|
)
|
|
|
|
if response_obj is not None and isinstance(
|
|
response_obj, HttpxBinaryResponseContent
|
|
):
|
|
return self._prompt_to_input_text(prompt), "speech-output", prompt
|
|
|
|
if response_obj is not None and isinstance(
|
|
response_obj, litellm.TextCompletionResponse
|
|
):
|
|
output = self._get_text_completion_content_for_galileo(response_obj)
|
|
return (
|
|
self._prompt_to_input_text(prompt),
|
|
self._serialize_galileo_output(output),
|
|
kwargs.get("messages") or [],
|
|
)
|
|
|
|
if response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
|
|
output = response_obj.get("data", None)
|
|
return (
|
|
self._prompt_to_input_text(prompt),
|
|
self._serialize_galileo_output(output),
|
|
prompt,
|
|
)
|
|
|
|
if response_obj is not None and isinstance(
|
|
response_obj, litellm.TranscriptionResponse
|
|
):
|
|
output = response_obj.get("text", None)
|
|
return (
|
|
self._prompt_to_input_text(prompt),
|
|
self._serialize_galileo_output(output),
|
|
prompt,
|
|
)
|
|
|
|
if response_obj is not None and isinstance(
|
|
response_obj, litellm.RerankResponse
|
|
):
|
|
output = response_obj.results
|
|
rerank_prompt = self._langfuse_style_rerank_prompt(kwargs)
|
|
return (
|
|
json.dumps(rerank_prompt, default=str),
|
|
self._serialize_galileo_output(output),
|
|
rerank_prompt,
|
|
)
|
|
|
|
if response_obj is not None and isinstance(response_obj, ResponsesAPIResponse):
|
|
output = self._get_responses_api_content_for_galileo(response_obj)
|
|
return (
|
|
self._prompt_to_input_text(prompt),
|
|
self._serialize_galileo_output(output),
|
|
kwargs.get("messages") or [],
|
|
)
|
|
|
|
if (
|
|
call_type == "_arealtime"
|
|
and response_obj is not None
|
|
and isinstance(response_obj, list)
|
|
):
|
|
input_val = kwargs.get("input")
|
|
return (
|
|
self._serialize_galileo_output(input_val),
|
|
self._serialize_galileo_output(response_obj),
|
|
input_val,
|
|
)
|
|
|
|
if (
|
|
call_type == "pass_through_endpoint"
|
|
and response_obj is not None
|
|
and isinstance(response_obj, dict)
|
|
):
|
|
output = response_obj.get("response", "")
|
|
return (
|
|
self._prompt_to_input_text(prompt),
|
|
self._serialize_galileo_output(output),
|
|
prompt,
|
|
)
|
|
|
|
if response_obj is not None and isinstance(response_obj, dict):
|
|
output = get_content_from_model_response(response_obj)
|
|
return (
|
|
self._prompt_to_input_text(prompt),
|
|
self._serialize_galileo_output(output),
|
|
kwargs.get("messages") or [],
|
|
)
|
|
|
|
return self._prompt_to_input_text(prompt), "", kwargs.get("messages") or []
|
|
|
|
def get_output_str_from_response(
|
|
self, response_obj: Any, kwargs: Dict[str, Any]
|
|
) -> str:
|
|
_, output_text, _ = self._get_galileo_input_output_content(
|
|
kwargs=kwargs, response_obj=response_obj
|
|
)
|
|
return output_text
|
|
|
|
@staticmethod
|
|
def _input_text_from_messages(messages: Any) -> str:
|
|
"""Return a plain-string summary of the input suitable for the trace-level input field."""
|
|
if isinstance(messages, str):
|
|
return messages
|
|
if not isinstance(messages, list):
|
|
return ""
|
|
# Use the last user/human message so the trace table shows the actual prompt
|
|
for msg in reversed(messages):
|
|
if not isinstance(msg, dict):
|
|
continue
|
|
if str(msg.get("role", "")).lower() in ("user", "human"):
|
|
content = msg.get("content") or ""
|
|
if isinstance(content, list):
|
|
content = " ".join(
|
|
b.get("text", "") if isinstance(b, dict) else str(b)
|
|
for b in content
|
|
)
|
|
if content:
|
|
return str(content)
|
|
# Fallback: first non-empty content of any role
|
|
for msg in messages:
|
|
if isinstance(msg, dict):
|
|
content = msg.get("content") or ""
|
|
if isinstance(content, list):
|
|
content = " ".join(
|
|
b.get("text", "") if isinstance(b, dict) else str(b)
|
|
for b in content
|
|
)
|
|
if content:
|
|
return str(content)
|
|
return ""
|
|
|
|
async def async_log_success_event(
|
|
self, kwargs: Any, response_obj: Any, start_time: Any, end_time: Any
|
|
):
|
|
verbose_logger.debug("On Async Success")
|
|
try:
|
|
await self._async_log_success_event_impl(
|
|
kwargs=kwargs,
|
|
response_obj=response_obj,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
)
|
|
except Exception:
|
|
verbose_logger.exception(
|
|
"Galileo Logger: unexpected error in async_log_success_event"
|
|
)
|
|
|
|
async def _async_log_success_event_impl(
|
|
self, kwargs: Any, response_obj: Any, start_time: Any, end_time: Any
|
|
):
|
|
if not self._is_configured():
|
|
verbose_logger.debug(
|
|
"Galileo Logger: skipping — GALILEO_PROJECT_ID=%s GALILEO_API_KEY=%s GALILEO_BASE_URL=%s",
|
|
bool(self.project_id),
|
|
bool(self.api_key),
|
|
bool(self.base_url),
|
|
)
|
|
return
|
|
|
|
slo: Optional[Dict[str, Any]] = kwargs.get("standard_logging_object")
|
|
if slo is None:
|
|
verbose_logger.debug(
|
|
"Galileo Logger: no standard_logging_object in kwargs, skipping"
|
|
)
|
|
return
|
|
|
|
_call_type: str = str(
|
|
slo.get("call_type") or kwargs.get("call_type") or "litellm"
|
|
)
|
|
|
|
input_text, output_text, messages = self._get_galileo_input_output_content(
|
|
kwargs=kwargs, response_obj=response_obj
|
|
)
|
|
|
|
raw_start = slo.get("startTime")
|
|
raw_end = slo.get("endTime")
|
|
if raw_start is None or raw_end is None:
|
|
verbose_logger.debug(
|
|
"Galileo Logger: standard_logging_object missing startTime/endTime, "
|
|
"falling back to start_time/end_time params"
|
|
)
|
|
if not isinstance(start_time, datetime) or not isinstance(
|
|
end_time, datetime
|
|
):
|
|
return
|
|
start_ts = start_time
|
|
end_ts = end_time
|
|
if start_ts.tzinfo is None:
|
|
start_ts = start_ts.replace(tzinfo=GalileoObserve._local_timezone())
|
|
if end_ts.tzinfo is None:
|
|
end_ts = end_ts.replace(tzinfo=GalileoObserve._local_timezone())
|
|
start_ts = start_ts.astimezone(timezone.utc)
|
|
end_ts = end_ts.astimezone(timezone.utc)
|
|
else:
|
|
start_ts = datetime.fromtimestamp(float(raw_start), tz=timezone.utc)
|
|
end_ts = datetime.fromtimestamp(float(raw_end), tz=timezone.utc)
|
|
_latency_ms = max(0, int((end_ts - start_ts).total_seconds() * 1000))
|
|
num_input_tokens = int(slo.get("prompt_tokens") or 0)
|
|
num_output_tokens = int(slo.get("completion_tokens") or 0)
|
|
num_total_tokens = int(slo.get("total_tokens") or 0)
|
|
if num_total_tokens == 0 and (num_input_tokens or num_output_tokens):
|
|
num_total_tokens = num_input_tokens + num_output_tokens
|
|
|
|
request_record = LLMResponse(
|
|
latency_ms=_latency_ms,
|
|
status_code=200,
|
|
input_text=input_text,
|
|
output_text=output_text,
|
|
node_type=_call_type,
|
|
model=str(slo.get("model") or kwargs.get("model") or "-"),
|
|
num_input_tokens=num_input_tokens,
|
|
num_output_tokens=num_output_tokens,
|
|
num_total_tokens=num_total_tokens,
|
|
cost=slo.get("response_cost"),
|
|
created_at=GalileoObserve._format_created_at(start_ts),
|
|
)
|
|
|
|
request_dict = request_record.model_dump()
|
|
if isinstance(messages, dict):
|
|
messages = messages.get("messages")
|
|
if isinstance(messages, list) and messages:
|
|
request_dict["messages"] = messages
|
|
self.in_memory_records.append(request_dict)
|
|
verbose_logger.debug(
|
|
"Galileo Logger: queued record, in_memory=%d", len(self.in_memory_records)
|
|
)
|
|
|
|
# Bound the buffer so persistent flush failures cannot grow it
|
|
# without limit. Drop the oldest records once we exceed the cap.
|
|
if len(self.in_memory_records) > GALILEO_MAX_IN_MEMORY_RECORDS:
|
|
dropped = len(self.in_memory_records) - GALILEO_MAX_IN_MEMORY_RECORDS
|
|
self.in_memory_records = self.in_memory_records[
|
|
-GALILEO_MAX_IN_MEMORY_RECORDS:
|
|
]
|
|
verbose_logger.warning(
|
|
"Galileo Logger: in-memory buffer exceeded %s records; "
|
|
"dropped %s oldest record(s). Check Galileo connectivity/credentials.",
|
|
GALILEO_MAX_IN_MEMORY_RECORDS,
|
|
dropped,
|
|
)
|
|
|
|
if len(self.in_memory_records) >= self.batch_size:
|
|
await self.flush_in_memory_records()
|
|
|
|
async def flush_in_memory_records(self):
|
|
if not self.in_memory_records:
|
|
return
|
|
|
|
# Capture the number of records that will be sent BEFORE any await so
|
|
# that concurrent appends made by other asyncio tasks during the
|
|
# network round-trip aren't silently dropped on the success-clear.
|
|
records_in_payload = len(self.in_memory_records)
|
|
|
|
ingest_request = self._get_ingest_request()
|
|
if ingest_request is None:
|
|
verbose_logger.debug(
|
|
"Galileo Logger: missing GALILEO_BASE_URL or GALILEO_PROJECT_ID — skipping flush"
|
|
)
|
|
return
|
|
|
|
if not await self._ensure_headers():
|
|
verbose_logger.debug(
|
|
"Galileo Logger: could not set request headers — skipping flush"
|
|
)
|
|
return
|
|
|
|
url, payload = ingest_request
|
|
self._log_flush_config()
|
|
self._log_flush_payload(url=url, payload=payload)
|
|
verbose_logger.debug(
|
|
"Galileo Logger flush headers: %s",
|
|
self._redact_headers(self.headers),
|
|
)
|
|
verbose_logger.debug("flushing in memory records to %s", url)
|
|
|
|
try:
|
|
response = await self.async_httpx_handler.post(
|
|
url=url,
|
|
headers=self.headers,
|
|
json=payload,
|
|
)
|
|
except httpx.HTTPStatusError as e:
|
|
self._log_http_status_error(error=e, url=url)
|
|
verbose_logger.debug(
|
|
"Galileo Logger: failed to flush in memory records: %s", e
|
|
)
|
|
return
|
|
except Exception as e:
|
|
verbose_logger.debug(
|
|
"Galileo Logger: failed to flush in memory records: %s", e
|
|
)
|
|
return
|
|
|
|
if response.is_success:
|
|
verbose_logger.debug(
|
|
"Galileo Logger: successfully flushed in memory records"
|
|
)
|
|
verbose_logger.debug(
|
|
"Galileo Logger flush response: status=%s body=%s",
|
|
response.status_code,
|
|
response.text,
|
|
)
|
|
del self.in_memory_records[:records_in_payload]
|
|
else:
|
|
verbose_logger.debug("Galileo Logger: failed to flush in memory records")
|
|
verbose_logger.debug(
|
|
"Galileo Logger error=%s, status code=%s",
|
|
response.text,
|
|
response.status_code,
|
|
)
|
|
# Legacy enterprise auth caches a bearer token obtained from
|
|
# /login. If the request was rejected for auth reasons, drop the
|
|
# cached headers so the next flush re-authenticates instead of
|
|
# silently failing forever on a stale token. The v2 API key path
|
|
# uses a long-lived static key, so leave its headers in place.
|
|
if not self.use_v2_api and response.status_code in (401, 403):
|
|
self.headers = None
|
|
|
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
|
verbose_logger.debug("On Async Failure")
|