fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
927 lines
36 KiB
Python
927 lines
36 KiB
Python
"""
|
|
New Relic AI Monitoring Integration for LiteLLM
|
|
|
|
This module provides integration with New Relic's AI Monitoring feature to track
|
|
LLM requests, responses, and usage metrics.
|
|
|
|
Environment Variables (consumed by the New Relic agent at process bootstrap -
|
|
set via container env, or before invoking `newrelic-admin run-program`):
|
|
NEW_RELIC_LICENSE_KEY: Your New Relic license key (required)
|
|
NEW_RELIC_APP_NAME: Your application name (required)
|
|
|
|
UI- and runtime-toggleable:
|
|
NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED: Whether to record message
|
|
content (optional, default: true)
|
|
|
|
Configuration:
|
|
Message logging can be controlled via (both must agree to record):
|
|
1. turn_off_message_logging parameter - pass via callback initialization or config YAML
|
|
2. NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED env var
|
|
|
|
Default behavior: Messages ARE recorded unless explicitly disabled by either method
|
|
Either method can disable recording - both must enable for recording to occur
|
|
|
|
Usage - Python SDK:
|
|
import litellm
|
|
litellm.callbacks = ["newrelic"]
|
|
|
|
# Or with explicit configuration:
|
|
from litellm.integrations.newrelic import NewRelicLogger
|
|
litellm.callbacks = [NewRelicLogger(turn_off_message_logging=True)]
|
|
|
|
Usage - Proxy Server (config.yaml):
|
|
litellm_settings:
|
|
callbacks: ["newrelic"]
|
|
newrelic_params:
|
|
turn_off_message_logging: true # Disable message content recording
|
|
|
|
# Or disable via environment variable:
|
|
# export NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED=false
|
|
|
|
# Ensure New Relic agent is initialized (use newrelic-admin or initialize manually)
|
|
# newrelic-admin run-program python your_app.py
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import threading
|
|
import time
|
|
import uuid
|
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
|
|
import litellm
|
|
from litellm._logging import verbose_logger
|
|
from litellm.integrations.custom_logger import CustomLogger
|
|
from litellm.litellm_core_utils.redact_messages import should_redact_message_logging
|
|
from litellm.types.integrations.newrelic import NewRelicInitParams
|
|
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
|
|
from litellm.types.utils import ModelResponse, Message, StandardLoggingPayload
|
|
|
|
try:
|
|
import newrelic.agent as _newrelic_agent
|
|
except ImportError:
|
|
_newrelic_agent = None # type: ignore
|
|
|
|
|
|
class NewRelicLogger(CustomLogger):
|
|
"""
|
|
New Relic logger for LiteLLM to send AI monitoring events.
|
|
|
|
This logger creates two types of New Relic custom events:
|
|
1. LlmChatCompletionSummary - One per completion request
|
|
2. LlmChatCompletionMessage - One per message (request and response)
|
|
"""
|
|
|
|
# Class-level state for supportability metric emission, shared across all instances.
|
|
# Protected by _metric_lock to ensure thread-safe access.
|
|
_last_metric_emission_time: float = 0.0
|
|
_metric_lock = threading.Lock()
|
|
|
|
def __init__(self, **kwargs):
|
|
#########################################################
|
|
# Handle newrelic_params set as litellm.newrelic_params
|
|
#########################################################
|
|
dict_newrelic_params = self._get_newrelic_params()
|
|
|
|
# Use setdefault so constructor kwargs take priority over global params.
|
|
# model_dump() always returns all fields (including defaults), so update()
|
|
# would silently overwrite explicit constructor args like turn_off_message_logging=True.
|
|
for k, v in dict_newrelic_params.items():
|
|
kwargs.setdefault(k, v)
|
|
|
|
# CustomLogger.__init__ will set self.turn_off_message_logging from kwargs
|
|
super().__init__(**kwargs)
|
|
|
|
# Check for required environment variables
|
|
self.license_key = os.getenv("NEW_RELIC_LICENSE_KEY")
|
|
self.app_name = os.getenv("NEW_RELIC_APP_NAME")
|
|
|
|
# Validate configuration
|
|
if not self.license_key or not self.app_name:
|
|
verbose_logger.warning(
|
|
"New Relic integration requires NEW_RELIC_LICENSE_KEY and "
|
|
"NEW_RELIC_APP_NAME environment variables. Integration will be disabled."
|
|
)
|
|
self.enabled = False
|
|
elif _newrelic_agent is None:
|
|
verbose_logger.error(
|
|
"New Relic Python agent not installed. Review the New Relic integration documentation at https://docs.litellm.ai/docs/observability/newrelic."
|
|
)
|
|
self.enabled = False
|
|
else:
|
|
try:
|
|
# timeout=0 forces non-blocking startup: the agent connects in a
|
|
# background thread regardless of newrelic.ini / NEW_RELIC_STARTUP_TIMEOUT.
|
|
_newrelic_agent.register_application(timeout=0)
|
|
|
|
self.enabled = True
|
|
verbose_logger.info(
|
|
f"New Relic AI Monitoring initialized for app: {self.app_name}, "
|
|
f"content recording: {self.record_content}"
|
|
)
|
|
except Exception as e:
|
|
verbose_logger.error(
|
|
f"Failed to initialize New Relic agent: {e}. "
|
|
"Integration will be disabled."
|
|
)
|
|
self.enabled = False
|
|
|
|
def _get_newrelic_params(self) -> Dict:
|
|
"""
|
|
Get the newrelic_params from litellm.newrelic_params
|
|
|
|
These are params specific to initializing the NewRelicLogger e.g. turn_off_message_logging
|
|
"""
|
|
dict_newrelic_params: Dict = {}
|
|
if litellm.newrelic_params is not None:
|
|
if isinstance(litellm.newrelic_params, NewRelicInitParams):
|
|
dict_newrelic_params = litellm.newrelic_params.model_dump()
|
|
elif isinstance(litellm.newrelic_params, Dict):
|
|
# only allow params that are of NewRelicInitParams
|
|
dict_newrelic_params = NewRelicInitParams(
|
|
**litellm.newrelic_params
|
|
).model_dump()
|
|
return dict_newrelic_params
|
|
|
|
@property
|
|
def record_content(self) -> bool:
|
|
"""Whether to record message content in New Relic.
|
|
|
|
Both turn_off_message_logging param AND NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED
|
|
env var must agree to record content. If either disables recording, content will not
|
|
be recorded. Read at call time so UI config changes take effect without a restart.
|
|
Default: True (record content) unless explicitly disabled by either method.
|
|
"""
|
|
return (not self.turn_off_message_logging) and self._parse_bool_env(
|
|
"NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED", True
|
|
)
|
|
|
|
def _parse_bool_env(self, var_name: str, default: bool = False) -> bool:
|
|
"""Parse a boolean environment variable.
|
|
|
|
Accepts true/false, 1/0, yes/no, on/off (case-insensitive,
|
|
whitespace-tolerant) — matching the convention used in
|
|
``litellm/__init__.py`` and the standard library's
|
|
``configparser.BOOLEAN_STATES``. Unrecognised values log a
|
|
warning and fall back to ``default`` rather than silently
|
|
flipping user intent.
|
|
"""
|
|
raw = os.getenv(var_name)
|
|
if not raw:
|
|
return default
|
|
value = raw.strip().lower()
|
|
if value in ("1", "true", "yes", "on"):
|
|
return True
|
|
if value in ("0", "false", "no", "off"):
|
|
return False
|
|
verbose_logger.warning(
|
|
f"{var_name}={raw!r} is not a recognised boolean "
|
|
f"(accepts true/false, 1/0, yes/no, on/off). "
|
|
f"Falling back to default ({default})."
|
|
)
|
|
return default
|
|
|
|
def _get_litellm_version(self) -> str:
|
|
"""
|
|
Get litellm version for supportability metrics.
|
|
|
|
Returns:
|
|
Version string (e.g., "1.80.0") or "unknown" if unable to determine
|
|
"""
|
|
try:
|
|
from importlib.metadata import version
|
|
|
|
return version("litellm")
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Unable to determine litellm version: {e}")
|
|
return "unknown"
|
|
|
|
def _emit_supportability_metric(self):
|
|
"""
|
|
Emit New Relic supportability metric for LiteLLM usage.
|
|
|
|
Per spec, this metric should be emitted at least once every 27 hours
|
|
to indicate the library is in use. Format:
|
|
Supportability/Python/ML/LiteLLM/{version}
|
|
|
|
This method updates _last_metric_emission_time and should
|
|
be called within a lock when checking periodic emission.
|
|
"""
|
|
try:
|
|
litellm_version = self._get_litellm_version()
|
|
metric_name = f"Supportability/Python/ML/LiteLLM/{litellm_version}"
|
|
|
|
# Record metric with value of 1 (will be aggregated by New Relic)
|
|
app = _newrelic_agent.application()
|
|
|
|
# Always update the timestamp so the 27-hour back-off applies
|
|
# regardless of whether the app is ready, preventing lock contention
|
|
# on every request when the agent is slow to register or never starts.
|
|
NewRelicLogger._last_metric_emission_time = time.time()
|
|
|
|
if app and app.enabled:
|
|
app.record_custom_metric(metric_name, 1)
|
|
verbose_logger.info(
|
|
f"Emitted New Relic supportability metric: {metric_name}"
|
|
)
|
|
else:
|
|
verbose_logger.info(
|
|
"New Relic application is not enabled; skipping metric recording."
|
|
)
|
|
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Failed to emit supportability metric: {e}")
|
|
|
|
def _check_and_emit_periodic_metric(self):
|
|
"""
|
|
Check if 27 hours have passed since last metric emission and re-emit if needed.
|
|
|
|
Uses a mutex to ensure only one thread emits the metric even if multiple
|
|
requests are being processed concurrently.
|
|
"""
|
|
# Quick check without lock to avoid unnecessary locking
|
|
current_time = time.time()
|
|
time_since_last_emission = (
|
|
current_time - NewRelicLogger._last_metric_emission_time
|
|
)
|
|
|
|
if time_since_last_emission >= 97200: # 27 hours = 97200 seconds
|
|
# Acquire lock to ensure only one thread emits
|
|
with NewRelicLogger._metric_lock:
|
|
# Double-check inside lock in case another thread just emitted
|
|
current_time = time.time()
|
|
time_since_last_emission = (
|
|
current_time - NewRelicLogger._last_metric_emission_time
|
|
)
|
|
|
|
if time_since_last_emission >= 97200:
|
|
self._emit_supportability_metric()
|
|
|
|
def _get_trace_context(
|
|
self,
|
|
kwargs: Dict,
|
|
standard_logging_object: Optional[StandardLoggingPayload] = None,
|
|
) -> str:
|
|
"""
|
|
Get the New Relic trace ID for AI monitoring events.
|
|
|
|
This integration runs in LiteLLM's async logging worker, outside the
|
|
New Relic agent's current transaction. Because we can't call
|
|
`newrelic.agent.current_trace_id()` to let the agent populate the
|
|
trace_id on AIM custom events, we manually simulate what the agent
|
|
would do. An AIM event without a trace_id is malformed per the NR
|
|
schema, so this method always returns a valid string.
|
|
|
|
Resolution order:
|
|
1. W3C traceparent header (litellm_params.metadata.headers.traceparent) -
|
|
what the agent would link to if we were in-transaction.
|
|
2. StandardLoggingPayload.trace_id - LiteLLM's internal trace for
|
|
retry/fallback grouping.
|
|
3. Generated UUID - synthetic grouping key when upstream context is
|
|
absent or parsing it fails.
|
|
|
|
Span IDs are intentionally not emitted: any span ID recoverable from
|
|
the inbound traceparent is the caller's parent span, not ours.
|
|
|
|
Returns:
|
|
trace_id: always a non-empty string.
|
|
"""
|
|
trace_id: Optional[str] = None
|
|
try:
|
|
litellm_params = kwargs.get("litellm_params") or {}
|
|
metadata = litellm_params.get("metadata") or {}
|
|
headers = metadata.get("headers") or {}
|
|
# Normalize header key lookup to be case-insensitive per W3C spec
|
|
traceparent = next(
|
|
(v for k, v in headers.items() if k.lower() == "traceparent"), None
|
|
)
|
|
|
|
if traceparent:
|
|
# Extract trace_id from traceparent header if available
|
|
# traceparent format: "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-00"
|
|
parts = traceparent.split("-")
|
|
if len(parts) == 4:
|
|
trace_id = parts[1]
|
|
|
|
if not trace_id and standard_logging_object:
|
|
slo_trace_id = standard_logging_object.get("trace_id")
|
|
if slo_trace_id:
|
|
trace_id = slo_trace_id
|
|
|
|
except Exception as e:
|
|
verbose_logger.warning(
|
|
f"Unable to parse New Relic trace context from upstream sources: {e}"
|
|
)
|
|
|
|
if not trace_id:
|
|
trace_id = uuid.uuid4().hex
|
|
verbose_logger.debug(
|
|
f"New Relic trace_id not available from distributed tracing headers or "
|
|
f"StandardLoggingPayload. Generated trace_id={trace_id} for AI monitoring "
|
|
f"event grouping."
|
|
)
|
|
|
|
return trace_id
|
|
|
|
def _extract_completion_id(self, kwargs: Dict, response_obj: ModelResponse) -> str:
|
|
"""
|
|
Extract completion ID from kwargs or response_obj, or generate one.
|
|
"""
|
|
completion_id = None
|
|
|
|
if response_obj:
|
|
completion_id = response_obj.get("id")
|
|
|
|
if not completion_id:
|
|
completion_id = kwargs.get("litellm_call_id")
|
|
|
|
# If still not found, generate UUID and log warning per spec
|
|
if not completion_id:
|
|
completion_id = str(uuid.uuid4())
|
|
|
|
return completion_id
|
|
|
|
def _get_vendor(
|
|
self,
|
|
kwargs: Dict,
|
|
standard_logging_object: Optional[StandardLoggingPayload] = None,
|
|
) -> str:
|
|
"""Extract vendor/provider, preferring StandardLoggingPayload."""
|
|
if standard_logging_object:
|
|
vendor = standard_logging_object.get("custom_llm_provider")
|
|
if vendor:
|
|
return vendor
|
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
|
return litellm_params.get("custom_llm_provider") or "litellm"
|
|
|
|
def _get_model_names(
|
|
self,
|
|
kwargs: Dict,
|
|
response_obj: ModelResponse,
|
|
standard_logging_object: Optional[StandardLoggingPayload] = None,
|
|
) -> Tuple[str, str]:
|
|
"""
|
|
Extract request and response model names, preferring StandardLoggingPayload
|
|
for the request model.
|
|
|
|
Returns:
|
|
Tuple of (request_model, response_model)
|
|
"""
|
|
request_model = None
|
|
if standard_logging_object:
|
|
slo_model = standard_logging_object.get("model")
|
|
if slo_model:
|
|
request_model = str(slo_model)
|
|
if not request_model:
|
|
request_model = str(kwargs.get("model") or "unknown")
|
|
response_model: str = str(response_obj.get("model") or request_model)
|
|
return request_model, response_model
|
|
|
|
def _extract_usage(
|
|
self,
|
|
response_obj: ModelResponse,
|
|
standard_logging_object: Optional[StandardLoggingPayload] = None,
|
|
) -> Dict[str, int]:
|
|
"""Extract usage statistics, preferring StandardLoggingPayload."""
|
|
if standard_logging_object:
|
|
prompt = standard_logging_object.get("prompt_tokens")
|
|
completion = standard_logging_object.get("completion_tokens")
|
|
total = standard_logging_object.get("total_tokens")
|
|
if any(x is not None for x in [prompt, completion, total]):
|
|
return {
|
|
"prompt_tokens": prompt or 0,
|
|
"completion_tokens": completion or 0,
|
|
"total_tokens": total or 0,
|
|
}
|
|
|
|
usage = response_obj.get("usage", None)
|
|
if not usage:
|
|
return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
|
|
|
return {
|
|
"prompt_tokens": usage.get("prompt_tokens") or 0,
|
|
"completion_tokens": usage.get("completion_tokens") or 0,
|
|
"total_tokens": usage.get("total_tokens") or 0,
|
|
}
|
|
|
|
def _get_finish_reason(self, response_obj: ModelResponse) -> str:
|
|
"""
|
|
Extract finish reason from first choice in the response.
|
|
|
|
Returns "unknown" if choices are not present or finish_reason is not found.
|
|
"""
|
|
choices = response_obj.get("choices") or []
|
|
if choices and len(choices) > 0:
|
|
return choices[0].get("finish_reason") or "unknown"
|
|
return "unknown"
|
|
|
|
def _to_epoch_ms(self, t: Any) -> float:
|
|
"""Convert a datetime or float timestamp to epoch milliseconds."""
|
|
if hasattr(t, "timestamp"):
|
|
return t.timestamp() * 1000.0
|
|
return float(t) * 1000.0
|
|
|
|
def _get_duration(
|
|
self,
|
|
kwargs: Dict,
|
|
start_time: Any,
|
|
end_time: Any,
|
|
standard_logging_object: Optional[StandardLoggingPayload] = None,
|
|
) -> Optional[float]:
|
|
"""
|
|
Extract duration in milliseconds.
|
|
|
|
Resolution order:
|
|
1. StandardLoggingPayload.response_time (already computed by LiteLLM)
|
|
2. llm_api_duration_ms from kwargs
|
|
3. Calculated from start_time and end_time
|
|
"""
|
|
if standard_logging_object:
|
|
response_time = standard_logging_object.get("response_time")
|
|
if response_time is not None:
|
|
return (
|
|
float(response_time) * 1000.0
|
|
) # SLO stores seconds; convert to ms
|
|
|
|
duration_ms = kwargs.get("llm_api_duration_ms")
|
|
if duration_ms is not None:
|
|
return float(duration_ms)
|
|
|
|
if start_time is not None and end_time is not None:
|
|
return self._to_epoch_ms(end_time) - self._to_epoch_ms(start_time)
|
|
|
|
return None
|
|
|
|
def _get_request_params(
|
|
self,
|
|
kwargs: Dict,
|
|
standard_logging_object: Optional[StandardLoggingPayload] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Extract request parameters like temperature and max_tokens, preferring
|
|
StandardLoggingPayload.model_parameters.
|
|
|
|
Returns dict with available parameters, omitting those not present.
|
|
"""
|
|
if standard_logging_object:
|
|
source_params = standard_logging_object.get("model_parameters") or {}
|
|
else:
|
|
source_params = kwargs.get("optional_params") or {}
|
|
|
|
params = {}
|
|
|
|
temperature = source_params.get("temperature")
|
|
if temperature is not None:
|
|
params["temperature"] = temperature
|
|
|
|
max_tokens = source_params.get("max_tokens")
|
|
if max_tokens is not None:
|
|
params["max_tokens"] = max_tokens
|
|
|
|
return params
|
|
|
|
def _extract_message_content(self, message: Union[Message, Dict]) -> str:
|
|
"""
|
|
Extract content from a message, handling various formats.
|
|
|
|
Handles tool calls, multimodal content (as JSON), and standard text content.
|
|
Returns empty string if content is None or missing.
|
|
"""
|
|
content = message.get("content")
|
|
|
|
# Handle tool calls
|
|
if message.get("tool_calls"):
|
|
try:
|
|
return json.dumps(message["tool_calls"])
|
|
except Exception:
|
|
return str(message["tool_calls"])
|
|
|
|
# Handle None or missing content
|
|
if content is None:
|
|
return ""
|
|
|
|
# Handle list content (multimodal)
|
|
if isinstance(content, list):
|
|
try:
|
|
return json.dumps(content)
|
|
except Exception:
|
|
return str(content)
|
|
|
|
# Handle non-string content
|
|
if not isinstance(content, str):
|
|
return str(content)
|
|
|
|
return content
|
|
|
|
def _extract_all_messages(
|
|
self,
|
|
kwargs: Dict,
|
|
response_obj: ModelResponse,
|
|
response_model: str,
|
|
vendor: str,
|
|
standard_logging_object: Optional[StandardLoggingPayload] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract all messages (request + response) with sequence numbers and timestamps.
|
|
|
|
Processes request messages from StandardLoggingPayload.messages (preferred) or
|
|
kwargs["messages"] (fallback), and response messages from response_obj["choices"].
|
|
Assigns sequential numbers starting at 0.
|
|
Adds timestamps from StandardLoggingPayload (preferred) or kwargs if available
|
|
(converted to epoch milliseconds).
|
|
"""
|
|
messages = []
|
|
sequence = 0
|
|
|
|
# Extract timestamps, preferring StandardLoggingPayload
|
|
start_time = None
|
|
if standard_logging_object:
|
|
start_time = standard_logging_object.get("startTime")
|
|
if not start_time:
|
|
start_time = kwargs.get("start_time")
|
|
|
|
end_time = None
|
|
if standard_logging_object:
|
|
end_time = standard_logging_object.get("endTime")
|
|
if not end_time:
|
|
end_time = kwargs.get("end_time")
|
|
|
|
# Content is recorded only when the NR-specific switches allow it AND
|
|
# LiteLLM's wider redaction decision (turn_off_message_logging, dynamic
|
|
# params, headers) does not require redaction. Async streaming hands the
|
|
# callback an unredacted async_complete_streaming_response, so without
|
|
# this gate generated content would still reach NR even when the user
|
|
# has globally disabled message logging.
|
|
record_content = self.record_content and not should_redact_message_logging(
|
|
kwargs
|
|
)
|
|
|
|
# Extract request messages, preferring StandardLoggingPayload.
|
|
# SLO messages can be a string (serialized/redacted), so only use it when it's a list.
|
|
slo_messages = (
|
|
standard_logging_object.get("messages") if standard_logging_object else None
|
|
)
|
|
if isinstance(slo_messages, list):
|
|
request_messages = slo_messages
|
|
else:
|
|
request_messages = kwargs.get("messages") or []
|
|
for msg in request_messages:
|
|
message_data = {
|
|
"role": msg.get("role") or "user",
|
|
"sequence": sequence,
|
|
"response.model": response_model,
|
|
"vendor": vendor,
|
|
}
|
|
|
|
# Add timestamp for request message if available (convert to milliseconds)
|
|
if start_time is not None:
|
|
message_data["timestamp"] = int(self._to_epoch_ms(start_time))
|
|
|
|
if record_content:
|
|
message_data["content"] = self._extract_message_content(msg)
|
|
|
|
messages.append(message_data)
|
|
sequence += 1
|
|
|
|
# Extract response messages from choices
|
|
choices = response_obj.get("choices") or []
|
|
if choices and len(choices) > 0:
|
|
for choice in choices:
|
|
# Prefer "message" (non-streaming); fall back to "delta" (streaming-assembled)
|
|
message = choice.get("message", None) or choice.get("delta", None)
|
|
if message:
|
|
message_data = {
|
|
"role": message.get("role") or "assistant",
|
|
"sequence": sequence,
|
|
"response.model": response_model,
|
|
"vendor": vendor,
|
|
"is_response": True,
|
|
}
|
|
|
|
# Add timestamp for response message if available (convert to milliseconds)
|
|
if end_time is not None:
|
|
message_data["timestamp"] = int(self._to_epoch_ms(end_time))
|
|
|
|
if record_content:
|
|
message_data["content"] = self._extract_message_content(message)
|
|
|
|
messages.append(message_data)
|
|
sequence += 1
|
|
|
|
return messages
|
|
|
|
def _record_summary_event(
|
|
self,
|
|
request_id: str,
|
|
trace_id: Optional[str],
|
|
request_model: str,
|
|
response_model: str,
|
|
vendor: str,
|
|
finish_reason: str,
|
|
num_messages: int,
|
|
usage: Dict[str, int],
|
|
duration: Optional[float] = None,
|
|
request_params: Optional[Dict[str, Any]] = None,
|
|
):
|
|
"""Record LlmChatCompletionSummary event to New Relic."""
|
|
try:
|
|
event_data = {
|
|
"id": request_id,
|
|
"request_id": request_id,
|
|
"request.model": request_model,
|
|
"response.model": response_model,
|
|
"response.choices.finish_reason": finish_reason,
|
|
"response.number_of_messages": num_messages,
|
|
"vendor": vendor,
|
|
"ingest_source": "litellm",
|
|
"response.usage.prompt_tokens": usage["prompt_tokens"],
|
|
"response.usage.completion_tokens": usage["completion_tokens"],
|
|
"response.usage.total_tokens": usage["total_tokens"],
|
|
}
|
|
|
|
# Add optional attributes if present
|
|
if trace_id:
|
|
event_data["trace_id"] = trace_id
|
|
|
|
if duration is not None:
|
|
event_data["duration"] = duration
|
|
|
|
# Add request parameters if present
|
|
if request_params:
|
|
if "temperature" in request_params:
|
|
event_data["request.temperature"] = request_params["temperature"]
|
|
if "max_tokens" in request_params:
|
|
event_data["request.max_tokens"] = request_params["max_tokens"]
|
|
|
|
app = _newrelic_agent.application()
|
|
|
|
if app and app.enabled:
|
|
app.record_custom_event("LlmChatCompletionSummary", event_data)
|
|
else:
|
|
verbose_logger.warning(
|
|
"New Relic application is not enabled; skipping summary event recording."
|
|
)
|
|
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Failed to record New Relic summary event: {e}")
|
|
self.handle_callback_failure("newrelic")
|
|
|
|
def _record_message_events(
|
|
self,
|
|
request_id: str,
|
|
llm_response_id: str,
|
|
trace_id: Optional[str],
|
|
messages: List[Dict[str, Any]],
|
|
):
|
|
"""Record LlmChatCompletionMessage events to New Relic.
|
|
|
|
Args:
|
|
request_id: Agent-generated UUID that links to Summary event's id
|
|
llm_response_id: LLM's response ID (e.g., "chatcmpl-...") for message id format
|
|
trace_id: Trace ID for distributed tracing (None if not available)
|
|
messages: List of message dicts to record
|
|
"""
|
|
try:
|
|
app = _newrelic_agent.application()
|
|
|
|
if not (app and app.enabled):
|
|
verbose_logger.warning(
|
|
"New Relic application is not enabled; skipping message event recording."
|
|
)
|
|
return
|
|
|
|
for message in messages:
|
|
sequence = message["sequence"]
|
|
event_data = {
|
|
"id": f"{llm_response_id}-{sequence}",
|
|
"request_id": request_id,
|
|
"completion_id": request_id,
|
|
"role": message["role"],
|
|
"sequence": sequence,
|
|
"response.model": message["response.model"],
|
|
"vendor": message["vendor"],
|
|
"ingest_source": "litellm",
|
|
"token_count": 0, # Per-message token counts are not available from LiteLLM
|
|
}
|
|
|
|
# Add trace context if available
|
|
if trace_id:
|
|
event_data["trace_id"] = trace_id
|
|
|
|
# Add content only if it was included in the message data
|
|
if "content" in message:
|
|
event_data["content"] = message["content"]
|
|
|
|
# Add is_response only if True (per spec, omit for request messages)
|
|
if message.get("is_response"):
|
|
event_data["is_response"] = True
|
|
|
|
# Forward actual request/response timestamp (ms) so NR uses the
|
|
# real LLM call window rather than the async-logger fire time.
|
|
# Requires newrelic>=11.2.0 which reads params["timestamp"] as
|
|
# the intrinsic event timestamp.
|
|
if "timestamp" in message:
|
|
event_data["timestamp"] = message["timestamp"]
|
|
|
|
app.record_custom_event("LlmChatCompletionMessage", event_data)
|
|
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Failed to record New Relic message events: {e}")
|
|
self.handle_callback_failure("newrelic")
|
|
|
|
def _record_error_metric(self):
|
|
"""Record error metric to New Relic."""
|
|
try:
|
|
if not self.enabled:
|
|
return
|
|
|
|
self._check_and_emit_periodic_metric()
|
|
|
|
app = _newrelic_agent.application()
|
|
if app and app.enabled:
|
|
app.record_custom_metric("LLM/LiteLLM/Error", 1)
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Failed to record New Relic error metric: {e}")
|
|
self.handle_callback_failure("newrelic")
|
|
|
|
def _process_success(
|
|
self,
|
|
kwargs: Dict,
|
|
response_obj: ModelResponse,
|
|
start_time: Optional[float] = None,
|
|
end_time: Optional[float] = None,
|
|
):
|
|
"""
|
|
Core logic for processing successful LLM calls.
|
|
Used by both sync and async success event handlers.
|
|
"""
|
|
# Early exit if not enabled
|
|
if not self.enabled:
|
|
return
|
|
|
|
# Check and emit periodic supportability metric if 27 hours have passed
|
|
self._check_and_emit_periodic_metric()
|
|
|
|
# Use StandardLoggingPayload where available for normalized, pre-computed values
|
|
standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
|
|
"standard_logging_object"
|
|
)
|
|
|
|
# Get trace context
|
|
trace_id = self._get_trace_context(kwargs, standard_logging_object)
|
|
|
|
# Generate unique request ID for this request (used as Summary event id)
|
|
request_id = str(uuid.uuid4())
|
|
|
|
# Extract data from response
|
|
llm_response_id = self._extract_completion_id(kwargs, response_obj)
|
|
vendor = self._get_vendor(kwargs, standard_logging_object)
|
|
request_model, response_model = self._get_model_names(
|
|
kwargs, response_obj, standard_logging_object
|
|
)
|
|
usage = self._extract_usage(response_obj, standard_logging_object)
|
|
finish_reason = self._get_finish_reason(response_obj)
|
|
|
|
# Extract additional summary event fields
|
|
duration = self._get_duration(
|
|
kwargs, start_time, end_time, standard_logging_object
|
|
)
|
|
request_params = self._get_request_params(kwargs, standard_logging_object)
|
|
|
|
# Extract all messages
|
|
messages = self._extract_all_messages(
|
|
kwargs, response_obj, response_model, vendor, standard_logging_object
|
|
)
|
|
|
|
# Record summary event
|
|
self._record_summary_event(
|
|
request_id=request_id,
|
|
trace_id=trace_id,
|
|
request_model=request_model,
|
|
response_model=response_model,
|
|
vendor=vendor,
|
|
finish_reason=finish_reason,
|
|
num_messages=len(messages),
|
|
usage=usage,
|
|
duration=duration,
|
|
request_params=request_params,
|
|
)
|
|
|
|
# Record message events
|
|
self._record_message_events(
|
|
request_id=request_id,
|
|
llm_response_id=llm_response_id,
|
|
trace_id=trace_id,
|
|
messages=messages,
|
|
)
|
|
|
|
async def async_health_check(self) -> IntegrationHealthCheckStatus:
|
|
"""
|
|
Check if the New Relic integration is healthy.
|
|
|
|
Verifies that the integration is enabled and the New Relic agent
|
|
has an active, connected application, then records a small
|
|
`LiteLLMConnectionTest` custom event so the user can confirm the
|
|
end-to-end pipeline in the New Relic UI via NRQL:
|
|
`SELECT * FROM LiteLLMConnectionTest SINCE 1 hour ago`.
|
|
|
|
The `LiteLLMConnectionTest` event type is intentionally outside the
|
|
`Llm*` family that AI Monitoring queries, so test events do not
|
|
appear in AI Monitoring dashboards.
|
|
"""
|
|
if not self.enabled:
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message="New Relic integration is disabled. Check that "
|
|
"NEW_RELIC_LICENSE_KEY and NEW_RELIC_APP_NAME are set and the "
|
|
"newrelic package is installed.",
|
|
)
|
|
|
|
try:
|
|
app = _newrelic_agent.application()
|
|
if not (app and app.enabled):
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message=(
|
|
"New Relic Python agent not installed. Review the New Relic integration documentation at https://docs.litellm.ai/docs/observability/newrelic."
|
|
),
|
|
)
|
|
|
|
app.record_custom_event(
|
|
"LiteLLMConnectionTest",
|
|
{
|
|
"is_test_event": True,
|
|
"app_name": self.app_name,
|
|
"source": "litellm-proxy",
|
|
"timestamp": time.time(),
|
|
},
|
|
)
|
|
return IntegrationHealthCheckStatus(status="healthy", error_message=None)
|
|
except Exception as e:
|
|
return IntegrationHealthCheckStatus(
|
|
status="unhealthy",
|
|
error_message=str(e),
|
|
)
|
|
|
|
# CustomLogger interface implementation
|
|
|
|
def log_pre_api_call(self, model, messages, kwargs):
|
|
"""Unused per spec."""
|
|
pass
|
|
|
|
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
|
"""Unused per spec."""
|
|
pass
|
|
|
|
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
"""
|
|
Main success path for non-streaming requests.
|
|
|
|
Note: New Relic's record_custom_event is synchronous but non-blocking
|
|
(in-memory operation), so it's safe to call from sync context.
|
|
"""
|
|
try:
|
|
self._process_success(kwargs, response_obj, start_time, end_time)
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Error in New Relic log_success_event: {e}")
|
|
self.handle_callback_failure("newrelic")
|
|
|
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
"""
|
|
Main success path for async/streaming requests.
|
|
|
|
Note: New Relic's SDK is thread-safe and record_custom_event is fast,
|
|
so we can call it directly without asyncio.to_thread().
|
|
"""
|
|
try:
|
|
self._process_success(kwargs, response_obj, start_time, end_time)
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Error in New Relic async_log_success_event: {e}")
|
|
self.handle_callback_failure("newrelic")
|
|
|
|
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
|
"""
|
|
Log error metric for failed LLM calls (sync).
|
|
|
|
Per spec: Do not send AI events on failure, only record error metric.
|
|
"""
|
|
try:
|
|
self._record_error_metric()
|
|
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Error in New Relic log_failure_event: {e}")
|
|
self.handle_callback_failure("newrelic")
|
|
|
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
|
"""
|
|
Log error metric for failed LLM calls (async).
|
|
|
|
Per spec: Do not send AI events on failure, only record error metric.
|
|
"""
|
|
try:
|
|
self._record_error_metric()
|
|
|
|
except Exception as e:
|
|
verbose_logger.warning(f"Error in New Relic async_log_failure_event: {e}")
|
|
self.handle_callback_failure("newrelic")
|