MoFin/venv/lib/python3.12/site-packages/litellm/integrations/newrelic/newrelic.py

"""
New Relic AI Monitoring Integration for LiteLLM

This module provides integration with New Relic's AI Monitoring feature to track
LLM requests, responses, and usage metrics.

Environment Variables (consumed by the New Relic agent at process bootstrap -
set via container env, or before invoking `newrelic-admin run-program`):
    NEW_RELIC_LICENSE_KEY: Your New Relic license key (required)
    NEW_RELIC_APP_NAME: Your application name (required)

UI- and runtime-toggleable:
    NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED: Whether to record message
        content (optional, default: true)

Configuration:
    Message logging can be controlled via (both must agree to record):
    1. turn_off_message_logging parameter - pass via callback initialization or config YAML
    2. NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED env var

    Default behavior: Messages ARE recorded unless explicitly disabled by either method
    Either method can disable recording - both must enable for recording to occur

Usage - Python SDK:
    import litellm
    litellm.callbacks = ["newrelic"]

    # Or with explicit configuration:
    from litellm.integrations.newrelic import NewRelicLogger
    litellm.callbacks = [NewRelicLogger(turn_off_message_logging=True)]

Usage - Proxy Server (config.yaml):
    litellm_settings:
      callbacks: ["newrelic"]
      newrelic_params:
        turn_off_message_logging: true  # Disable message content recording

    # Or disable via environment variable:
    # export NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED=false

    # Ensure New Relic agent is initialized (use newrelic-admin or initialize manually)
    # newrelic-admin run-program python your_app.py
"""

import json
import os
import threading
import time
import uuid
from typing import Any, Dict, List, Optional, Tuple, Union

import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.redact_messages import should_redact_message_logging
from litellm.types.integrations.newrelic import NewRelicInitParams
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
from litellm.types.utils import ModelResponse, Message, StandardLoggingPayload

try:
    import newrelic.agent as _newrelic_agent
except ImportError:
    _newrelic_agent = None  # type: ignore


class NewRelicLogger(CustomLogger):
    """
    New Relic logger for LiteLLM to send AI monitoring events.

    This logger creates two types of New Relic custom events:
    1. LlmChatCompletionSummary - One per completion request
    2. LlmChatCompletionMessage - One per message (request and response)
    """

    # Class-level state for supportability metric emission, shared across all instances.
    # Protected by _metric_lock to ensure thread-safe access.
    _last_metric_emission_time: float = 0.0
    _metric_lock = threading.Lock()

    def __init__(self, **kwargs):
        #########################################################
        # Handle newrelic_params set as litellm.newrelic_params
        #########################################################
        dict_newrelic_params = self._get_newrelic_params()

        # Use setdefault so constructor kwargs take priority over global params.
        # model_dump() always returns all fields (including defaults), so update()
        # would silently overwrite explicit constructor args like turn_off_message_logging=True.
        for k, v in dict_newrelic_params.items():
            kwargs.setdefault(k, v)

        # CustomLogger.__init__ will set self.turn_off_message_logging from kwargs
        super().__init__(**kwargs)

        # Check for required environment variables
        self.license_key = os.getenv("NEW_RELIC_LICENSE_KEY")
        self.app_name = os.getenv("NEW_RELIC_APP_NAME")

        # Validate configuration
        if not self.license_key or not self.app_name:
            verbose_logger.warning(
                "New Relic integration requires NEW_RELIC_LICENSE_KEY and "
                "NEW_RELIC_APP_NAME environment variables. Integration will be disabled."
            )
            self.enabled = False
        elif _newrelic_agent is None:
            verbose_logger.error(
                "New Relic Python agent not installed. Review the New Relic integration documentation at https://docs.litellm.ai/docs/observability/newrelic."
            )
            self.enabled = False
        else:
            try:
                # timeout=0 forces non-blocking startup: the agent connects in a
                # background thread regardless of newrelic.ini / NEW_RELIC_STARTUP_TIMEOUT.
                _newrelic_agent.register_application(timeout=0)

                self.enabled = True
                verbose_logger.info(
                    f"New Relic AI Monitoring initialized for app: {self.app_name}, "
                    f"content recording: {self.record_content}"
                )
            except Exception as e:
                verbose_logger.error(
                    f"Failed to initialize New Relic agent: {e}. "
                    "Integration will be disabled."
                )
                self.enabled = False

    def _get_newrelic_params(self) -> Dict:
        """
        Get the newrelic_params from litellm.newrelic_params

        These are params specific to initializing the NewRelicLogger e.g. turn_off_message_logging
        """
        dict_newrelic_params: Dict = {}
        if litellm.newrelic_params is not None:
            if isinstance(litellm.newrelic_params, NewRelicInitParams):
                dict_newrelic_params = litellm.newrelic_params.model_dump()
            elif isinstance(litellm.newrelic_params, Dict):
                # only allow params that are of NewRelicInitParams
                dict_newrelic_params = NewRelicInitParams(
                    **litellm.newrelic_params
                ).model_dump()
        return dict_newrelic_params

    @property
    def record_content(self) -> bool:
        """Whether to record message content in New Relic.

        Both turn_off_message_logging param AND NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED
        env var must agree to record content. If either disables recording, content will not
        be recorded. Read at call time so UI config changes take effect without a restart.
        Default: True (record content) unless explicitly disabled by either method.
        """
        return (not self.turn_off_message_logging) and self._parse_bool_env(
            "NEW_RELIC_AI_MONITORING_RECORD_CONTENT_ENABLED", True
        )

    def _parse_bool_env(self, var_name: str, default: bool = False) -> bool:
        """Parse a boolean environment variable.

        Accepts true/false, 1/0, yes/no, on/off (case-insensitive,
        whitespace-tolerant) — matching the convention used in
        ``litellm/__init__.py`` and the standard library's
        ``configparser.BOOLEAN_STATES``. Unrecognised values log a
        warning and fall back to ``default`` rather than silently
        flipping user intent.
        """
        raw = os.getenv(var_name)
        if not raw:
            return default
        value = raw.strip().lower()
        if value in ("1", "true", "yes", "on"):
            return True
        if value in ("0", "false", "no", "off"):
            return False
        verbose_logger.warning(
            f"{var_name}={raw!r} is not a recognised boolean "
            f"(accepts true/false, 1/0, yes/no, on/off). "
            f"Falling back to default ({default})."
        )
        return default

    def _get_litellm_version(self) -> str:
        """
        Get litellm version for supportability metrics.

        Returns:
            Version string (e.g., "1.80.0") or "unknown" if unable to determine
        """
        try:
            from importlib.metadata import version

            return version("litellm")
        except Exception as e:
            verbose_logger.warning(f"Unable to determine litellm version: {e}")
            return "unknown"

    def _emit_supportability_metric(self):
        """
        Emit New Relic supportability metric for LiteLLM usage.

        Per spec, this metric should be emitted at least once every 27 hours
        to indicate the library is in use. Format:
        Supportability/Python/ML/LiteLLM/{version}

        This method updates _last_metric_emission_time and should
        be called within a lock when checking periodic emission.
        """
        try:
            litellm_version = self._get_litellm_version()
            metric_name = f"Supportability/Python/ML/LiteLLM/{litellm_version}"

            # Record metric with value of 1 (will be aggregated by New Relic)
            app = _newrelic_agent.application()

            # Always update the timestamp so the 27-hour back-off applies
            # regardless of whether the app is ready, preventing lock contention
            # on every request when the agent is slow to register or never starts.
            NewRelicLogger._last_metric_emission_time = time.time()

            if app and app.enabled:
                app.record_custom_metric(metric_name, 1)
                verbose_logger.info(
                    f"Emitted New Relic supportability metric: {metric_name}"
                )
            else:
                verbose_logger.info(
                    "New Relic application is not enabled; skipping metric recording."
                )

        except Exception as e:
            verbose_logger.warning(f"Failed to emit supportability metric: {e}")

    def _check_and_emit_periodic_metric(self):
        """
        Check if 27 hours have passed since last metric emission and re-emit if needed.

        Uses a mutex to ensure only one thread emits the metric even if multiple
        requests are being processed concurrently.
        """
        # Quick check without lock to avoid unnecessary locking
        current_time = time.time()
        time_since_last_emission = (
            current_time - NewRelicLogger._last_metric_emission_time
        )

        if time_since_last_emission >= 97200:  # 27 hours = 97200 seconds
            # Acquire lock to ensure only one thread emits
            with NewRelicLogger._metric_lock:
                # Double-check inside lock in case another thread just emitted
                current_time = time.time()
                time_since_last_emission = (
                    current_time - NewRelicLogger._last_metric_emission_time
                )

                if time_since_last_emission >= 97200:
                    self._emit_supportability_metric()

    def _get_trace_context(
        self,
        kwargs: Dict,
        standard_logging_object: Optional[StandardLoggingPayload] = None,
    ) -> str:
        """
        Get the New Relic trace ID for AI monitoring events.

        This integration runs in LiteLLM's async logging worker, outside the
        New Relic agent's current transaction. Because we can't call
        `newrelic.agent.current_trace_id()` to let the agent populate the
        trace_id on AIM custom events, we manually simulate what the agent
        would do. An AIM event without a trace_id is malformed per the NR
        schema, so this method always returns a valid string.

        Resolution order:
        1. W3C traceparent header (litellm_params.metadata.headers.traceparent) -
           what the agent would link to if we were in-transaction.
        2. StandardLoggingPayload.trace_id - LiteLLM's internal trace for
           retry/fallback grouping.
        3. Generated UUID - synthetic grouping key when upstream context is
           absent or parsing it fails.

        Span IDs are intentionally not emitted: any span ID recoverable from
        the inbound traceparent is the caller's parent span, not ours.

        Returns:
            trace_id: always a non-empty string.
        """
        trace_id: Optional[str] = None
        try:
            litellm_params = kwargs.get("litellm_params") or {}
            metadata = litellm_params.get("metadata") or {}
            headers = metadata.get("headers") or {}
            # Normalize header key lookup to be case-insensitive per W3C spec
            traceparent = next(
                (v for k, v in headers.items() if k.lower() == "traceparent"), None
            )

            if traceparent:
                # Extract trace_id from traceparent header if available
                # traceparent format: "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-00"
                parts = traceparent.split("-")
                if len(parts) == 4:
                    trace_id = parts[1]

            if not trace_id and standard_logging_object:
                slo_trace_id = standard_logging_object.get("trace_id")
                if slo_trace_id:
                    trace_id = slo_trace_id

        except Exception as e:
            verbose_logger.warning(
                f"Unable to parse New Relic trace context from upstream sources: {e}"
            )

        if not trace_id:
            trace_id = uuid.uuid4().hex
            verbose_logger.debug(
                f"New Relic trace_id not available from distributed tracing headers or "
                f"StandardLoggingPayload. Generated trace_id={trace_id} for AI monitoring "
                f"event grouping."
            )

        return trace_id

    def _extract_completion_id(self, kwargs: Dict, response_obj: ModelResponse) -> str:
        """
        Extract completion ID from kwargs or response_obj, or generate one.
        """
        completion_id = None

        if response_obj:
            completion_id = response_obj.get("id")

        if not completion_id:
            completion_id = kwargs.get("litellm_call_id")

        # If still not found, generate UUID and log warning per spec
        if not completion_id:
            completion_id = str(uuid.uuid4())

        return completion_id

    def _get_vendor(
        self,
        kwargs: Dict,
        standard_logging_object: Optional[StandardLoggingPayload] = None,
    ) -> str:
        """Extract vendor/provider, preferring StandardLoggingPayload."""
        if standard_logging_object:
            vendor = standard_logging_object.get("custom_llm_provider")
            if vendor:
                return vendor
        litellm_params = kwargs.get("litellm_params", {}) or {}
        return litellm_params.get("custom_llm_provider") or "litellm"

    def _get_model_names(
        self,
        kwargs: Dict,
        response_obj: ModelResponse,
        standard_logging_object: Optional[StandardLoggingPayload] = None,
    ) -> Tuple[str, str]:
        """
        Extract request and response model names, preferring StandardLoggingPayload
        for the request model.

        Returns:
            Tuple of (request_model, response_model)
        """
        request_model = None
        if standard_logging_object:
            slo_model = standard_logging_object.get("model")
            if slo_model:
                request_model = str(slo_model)
        if not request_model:
            request_model = str(kwargs.get("model") or "unknown")
        response_model: str = str(response_obj.get("model") or request_model)
        return request_model, response_model

    def _extract_usage(
        self,
        response_obj: ModelResponse,
        standard_logging_object: Optional[StandardLoggingPayload] = None,
    ) -> Dict[str, int]:
        """Extract usage statistics, preferring StandardLoggingPayload."""
        if standard_logging_object:
            prompt = standard_logging_object.get("prompt_tokens")
            completion = standard_logging_object.get("completion_tokens")
            total = standard_logging_object.get("total_tokens")
            if any(x is not None for x in [prompt, completion, total]):
                return {
                    "prompt_tokens": prompt or 0,
                    "completion_tokens": completion or 0,
                    "total_tokens": total or 0,
                }

        usage = response_obj.get("usage", None)
        if not usage:
            return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}

        return {
            "prompt_tokens": usage.get("prompt_tokens") or 0,
            "completion_tokens": usage.get("completion_tokens") or 0,
            "total_tokens": usage.get("total_tokens") or 0,
        }

    def _get_finish_reason(self, response_obj: ModelResponse) -> str:
        """
        Extract finish reason from first choice in the response.

        Returns "unknown" if choices are not present or finish_reason is not found.
        """
        choices = response_obj.get("choices") or []
        if choices and len(choices) > 0:
            return choices[0].get("finish_reason") or "unknown"
        return "unknown"

    def _to_epoch_ms(self, t: Any) -> float:
        """Convert a datetime or float timestamp to epoch milliseconds."""
        if hasattr(t, "timestamp"):
            return t.timestamp() * 1000.0
        return float(t) * 1000.0

    def _get_duration(
        self,
        kwargs: Dict,
        start_time: Any,
        end_time: Any,
        standard_logging_object: Optional[StandardLoggingPayload] = None,
    ) -> Optional[float]:
        """
        Extract duration in milliseconds.

        Resolution order:
        1. StandardLoggingPayload.response_time (already computed by LiteLLM)
        2. llm_api_duration_ms from kwargs
        3. Calculated from start_time and end_time
        """
        if standard_logging_object:
            response_time = standard_logging_object.get("response_time")
            if response_time is not None:
                return (
                    float(response_time) * 1000.0
                )  # SLO stores seconds; convert to ms

        duration_ms = kwargs.get("llm_api_duration_ms")
        if duration_ms is not None:
            return float(duration_ms)

        if start_time is not None and end_time is not None:
            return self._to_epoch_ms(end_time) - self._to_epoch_ms(start_time)

        return None

    def _get_request_params(
        self,
        kwargs: Dict,
        standard_logging_object: Optional[StandardLoggingPayload] = None,
    ) -> Dict[str, Any]:
        """
        Extract request parameters like temperature and max_tokens, preferring
        StandardLoggingPayload.model_parameters.

        Returns dict with available parameters, omitting those not present.
        """
        if standard_logging_object:
            source_params = standard_logging_object.get("model_parameters") or {}
        else:
            source_params = kwargs.get("optional_params") or {}

        params = {}

        temperature = source_params.get("temperature")
        if temperature is not None:
            params["temperature"] = temperature

        max_tokens = source_params.get("max_tokens")
        if max_tokens is not None:
            params["max_tokens"] = max_tokens

        return params

    def _extract_message_content(self, message: Union[Message, Dict]) -> str:
        """
        Extract content from a message, handling various formats.

        Handles tool calls, multimodal content (as JSON), and standard text content.
        Returns empty string if content is None or missing.
        """
        content = message.get("content")

        # Handle tool calls
        if message.get("tool_calls"):
            try:
                return json.dumps(message["tool_calls"])
            except Exception:
                return str(message["tool_calls"])

        # Handle None or missing content
        if content is None:
            return ""

        # Handle list content (multimodal)
        if isinstance(content, list):
            try:
                return json.dumps(content)
            except Exception:
                return str(content)

        # Handle non-string content
        if not isinstance(content, str):
            return str(content)

        return content

    def _extract_all_messages(
        self,
        kwargs: Dict,
        response_obj: ModelResponse,
        response_model: str,
        vendor: str,
        standard_logging_object: Optional[StandardLoggingPayload] = None,
    ) -> List[Dict[str, Any]]:
        """
        Extract all messages (request + response) with sequence numbers and timestamps.

        Processes request messages from StandardLoggingPayload.messages (preferred) or
        kwargs["messages"] (fallback), and response messages from response_obj["choices"].
        Assigns sequential numbers starting at 0.
        Adds timestamps from StandardLoggingPayload (preferred) or kwargs if available
        (converted to epoch milliseconds).
        """
        messages = []
        sequence = 0

        # Extract timestamps, preferring StandardLoggingPayload
        start_time = None
        if standard_logging_object:
            start_time = standard_logging_object.get("startTime")
        if not start_time:
            start_time = kwargs.get("start_time")

        end_time = None
        if standard_logging_object:
            end_time = standard_logging_object.get("endTime")
        if not end_time:
            end_time = kwargs.get("end_time")

        # Content is recorded only when the NR-specific switches allow it AND
        # LiteLLM's wider redaction decision (turn_off_message_logging, dynamic
        # params, headers) does not require redaction. Async streaming hands the
        # callback an unredacted async_complete_streaming_response, so without
        # this gate generated content would still reach NR even when the user
        # has globally disabled message logging.
        record_content = self.record_content and not should_redact_message_logging(
            kwargs
        )

        # Extract request messages, preferring StandardLoggingPayload.
        # SLO messages can be a string (serialized/redacted), so only use it when it's a list.
        slo_messages = (
            standard_logging_object.get("messages") if standard_logging_object else None
        )
        if isinstance(slo_messages, list):
            request_messages = slo_messages
        else:
            request_messages = kwargs.get("messages") or []
        for msg in request_messages:
            message_data = {
                "role": msg.get("role") or "user",
                "sequence": sequence,
                "response.model": response_model,
                "vendor": vendor,
            }

            # Add timestamp for request message if available (convert to milliseconds)
            if start_time is not None:
                message_data["timestamp"] = int(self._to_epoch_ms(start_time))

            if record_content:
                message_data["content"] = self._extract_message_content(msg)

            messages.append(message_data)
            sequence += 1

        # Extract response messages from choices
        choices = response_obj.get("choices") or []
        if choices and len(choices) > 0:
            for choice in choices:
                # Prefer "message" (non-streaming); fall back to "delta" (streaming-assembled)
                message = choice.get("message", None) or choice.get("delta", None)
                if message:
                    message_data = {
                        "role": message.get("role") or "assistant",
                        "sequence": sequence,
                        "response.model": response_model,
                        "vendor": vendor,
                        "is_response": True,
                    }

                    # Add timestamp for response message if available (convert to milliseconds)
                    if end_time is not None:
                        message_data["timestamp"] = int(self._to_epoch_ms(end_time))

                    if record_content:
                        message_data["content"] = self._extract_message_content(message)

                    messages.append(message_data)
                    sequence += 1

        return messages

    def _record_summary_event(
        self,
        request_id: str,
        trace_id: Optional[str],
        request_model: str,
        response_model: str,
        vendor: str,
        finish_reason: str,
        num_messages: int,
        usage: Dict[str, int],
        duration: Optional[float] = None,
        request_params: Optional[Dict[str, Any]] = None,
    ):
        """Record LlmChatCompletionSummary event to New Relic."""
        try:
            event_data = {
                "id": request_id,
                "request_id": request_id,
                "request.model": request_model,
                "response.model": response_model,
                "response.choices.finish_reason": finish_reason,
                "response.number_of_messages": num_messages,
                "vendor": vendor,
                "ingest_source": "litellm",
                "response.usage.prompt_tokens": usage["prompt_tokens"],
                "response.usage.completion_tokens": usage["completion_tokens"],
                "response.usage.total_tokens": usage["total_tokens"],
            }

            # Add optional attributes if present
            if trace_id:
                event_data["trace_id"] = trace_id

            if duration is not None:
                event_data["duration"] = duration

            # Add request parameters if present
            if request_params:
                if "temperature" in request_params:
                    event_data["request.temperature"] = request_params["temperature"]
                if "max_tokens" in request_params:
                    event_data["request.max_tokens"] = request_params["max_tokens"]

            app = _newrelic_agent.application()

            if app and app.enabled:
                app.record_custom_event("LlmChatCompletionSummary", event_data)
            else:
                verbose_logger.warning(
                    "New Relic application is not enabled; skipping summary event recording."
                )

        except Exception as e:
            verbose_logger.warning(f"Failed to record New Relic summary event: {e}")
            self.handle_callback_failure("newrelic")

    def _record_message_events(
        self,
        request_id: str,
        llm_response_id: str,
        trace_id: Optional[str],
        messages: List[Dict[str, Any]],
    ):
        """Record LlmChatCompletionMessage events to New Relic.

        Args:
            request_id: Agent-generated UUID that links to Summary event's id
            llm_response_id: LLM's response ID (e.g., "chatcmpl-...") for message id format
            trace_id: Trace ID for distributed tracing (None if not available)
            messages: List of message dicts to record
        """
        try:
            app = _newrelic_agent.application()

            if not (app and app.enabled):
                verbose_logger.warning(
                    "New Relic application is not enabled; skipping message event recording."
                )
                return

            for message in messages:
                sequence = message["sequence"]
                event_data = {
                    "id": f"{llm_response_id}-{sequence}",
                    "request_id": request_id,
                    "completion_id": request_id,
                    "role": message["role"],
                    "sequence": sequence,
                    "response.model": message["response.model"],
                    "vendor": message["vendor"],
                    "ingest_source": "litellm",
                    "token_count": 0,  # Per-message token counts are not available from LiteLLM
                }

                # Add trace context if available
                if trace_id:
                    event_data["trace_id"] = trace_id

                # Add content only if it was included in the message data
                if "content" in message:
                    event_data["content"] = message["content"]

                # Add is_response only if True (per spec, omit for request messages)
                if message.get("is_response"):
                    event_data["is_response"] = True

                # Forward actual request/response timestamp (ms) so NR uses the
                # real LLM call window rather than the async-logger fire time.
                # Requires newrelic>=11.2.0 which reads params["timestamp"] as
                # the intrinsic event timestamp.
                if "timestamp" in message:
                    event_data["timestamp"] = message["timestamp"]

                app.record_custom_event("LlmChatCompletionMessage", event_data)

        except Exception as e:
            verbose_logger.warning(f"Failed to record New Relic message events: {e}")
            self.handle_callback_failure("newrelic")

    def _record_error_metric(self):
        """Record error metric to New Relic."""
        try:
            if not self.enabled:
                return

            self._check_and_emit_periodic_metric()

            app = _newrelic_agent.application()
            if app and app.enabled:
                app.record_custom_metric("LLM/LiteLLM/Error", 1)
        except Exception as e:
            verbose_logger.warning(f"Failed to record New Relic error metric: {e}")
            self.handle_callback_failure("newrelic")

    def _process_success(
        self,
        kwargs: Dict,
        response_obj: ModelResponse,
        start_time: Optional[float] = None,
        end_time: Optional[float] = None,
    ):
        """
        Core logic for processing successful LLM calls.
        Used by both sync and async success event handlers.
        """
        # Early exit if not enabled
        if not self.enabled:
            return

        # Check and emit periodic supportability metric if 27 hours have passed
        self._check_and_emit_periodic_metric()

        # Use StandardLoggingPayload where available for normalized, pre-computed values
        standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
            "standard_logging_object"
        )

        # Get trace context
        trace_id = self._get_trace_context(kwargs, standard_logging_object)

        # Generate unique request ID for this request (used as Summary event id)
        request_id = str(uuid.uuid4())

        # Extract data from response
        llm_response_id = self._extract_completion_id(kwargs, response_obj)
        vendor = self._get_vendor(kwargs, standard_logging_object)
        request_model, response_model = self._get_model_names(
            kwargs, response_obj, standard_logging_object
        )
        usage = self._extract_usage(response_obj, standard_logging_object)
        finish_reason = self._get_finish_reason(response_obj)

        # Extract additional summary event fields
        duration = self._get_duration(
            kwargs, start_time, end_time, standard_logging_object
        )
        request_params = self._get_request_params(kwargs, standard_logging_object)

        # Extract all messages
        messages = self._extract_all_messages(
            kwargs, response_obj, response_model, vendor, standard_logging_object
        )

        # Record summary event
        self._record_summary_event(
            request_id=request_id,
            trace_id=trace_id,
            request_model=request_model,
            response_model=response_model,
            vendor=vendor,
            finish_reason=finish_reason,
            num_messages=len(messages),
            usage=usage,
            duration=duration,
            request_params=request_params,
        )

        # Record message events
        self._record_message_events(
            request_id=request_id,
            llm_response_id=llm_response_id,
            trace_id=trace_id,
            messages=messages,
        )

    async def async_health_check(self) -> IntegrationHealthCheckStatus:
        """
        Check if the New Relic integration is healthy.

        Verifies that the integration is enabled and the New Relic agent
        has an active, connected application, then records a small
        `LiteLLMConnectionTest` custom event so the user can confirm the
        end-to-end pipeline in the New Relic UI via NRQL:
        `SELECT * FROM LiteLLMConnectionTest SINCE 1 hour ago`.

        The `LiteLLMConnectionTest` event type is intentionally outside the
        `Llm*` family that AI Monitoring queries, so test events do not
        appear in AI Monitoring dashboards.
        """
        if not self.enabled:
            return IntegrationHealthCheckStatus(
                status="unhealthy",
                error_message="New Relic integration is disabled. Check that "
                "NEW_RELIC_LICENSE_KEY and NEW_RELIC_APP_NAME are set and the "
                "newrelic package is installed.",
            )

        try:
            app = _newrelic_agent.application()
            if not (app and app.enabled):
                return IntegrationHealthCheckStatus(
                    status="unhealthy",
                    error_message=(
                        "New Relic Python agent not installed. Review the New Relic integration documentation at https://docs.litellm.ai/docs/observability/newrelic."
                    ),
                )

            app.record_custom_event(
                "LiteLLMConnectionTest",
                {
                    "is_test_event": True,
                    "app_name": self.app_name,
                    "source": "litellm-proxy",
                    "timestamp": time.time(),
                },
            )
            return IntegrationHealthCheckStatus(status="healthy", error_message=None)
        except Exception as e:
            return IntegrationHealthCheckStatus(
                status="unhealthy",
                error_message=str(e),
            )

    # CustomLogger interface implementation

    def log_pre_api_call(self, model, messages, kwargs):
        """Unused per spec."""
        pass

    def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
        """Unused per spec."""
        pass

    def log_success_event(self, kwargs, response_obj, start_time, end_time):
        """
        Main success path for non-streaming requests.

        Note: New Relic's record_custom_event is synchronous but non-blocking
        (in-memory operation), so it's safe to call from sync context.
        """
        try:
            self._process_success(kwargs, response_obj, start_time, end_time)
        except Exception as e:
            verbose_logger.warning(f"Error in New Relic log_success_event: {e}")
            self.handle_callback_failure("newrelic")

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
        """
        Main success path for async/streaming requests.

        Note: New Relic's SDK is thread-safe and record_custom_event is fast,
        so we can call it directly without asyncio.to_thread().
        """
        try:
            self._process_success(kwargs, response_obj, start_time, end_time)
        except Exception as e:
            verbose_logger.warning(f"Error in New Relic async_log_success_event: {e}")
            self.handle_callback_failure("newrelic")

    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
        """
        Log error metric for failed LLM calls (sync).

        Per spec: Do not send AI events on failure, only record error metric.
        """
        try:
            self._record_error_metric()

        except Exception as e:
            verbose_logger.warning(f"Error in New Relic log_failure_event: {e}")
            self.handle_callback_failure("newrelic")

    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        """
        Log error metric for failed LLM calls (async).

        Per spec: Do not send AI events on failure, only record error metric.
        """
        try:
            self._record_error_metric()

        except Exception as e:
            verbose_logger.warning(f"Error in New Relic async_log_failure_event: {e}")
            self.handle_callback_failure("newrelic")