MoFin/venv/lib/python3.12/site-packages/litellm/integrations/galileo.py

from __future__ import annotations

import json
import os
import re
import uuid
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import httpx
from pydantic import BaseModel, Field

import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.prompt_templates.common_utils import (
    convert_content_list_to_str,
    get_content_from_model_response,
)
from litellm.types.llms.openai import (
    AllMessageValues,
    HttpxBinaryResponseContent,
    ResponsesAPIResponse,
)
from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
)
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus

GALILEO_CLOUD_API_BASE_URL = "https://api.galileo.ai"
# Cap the in-memory buffer so persistent flush failures (e.g. Galileo
# unavailable, invalid credentials) cannot leak memory unboundedly.
GALILEO_MAX_IN_MEMORY_RECORDS = 1000


class LLMResponse(BaseModel):
    latency_ms: int
    status_code: int
    input_text: str
    output_text: str
    node_type: str
    model: str
    num_input_tokens: int
    num_output_tokens: int
    num_total_tokens: int
    cost: Optional[float] = Field(
        default=None,
        description="Total cost of the LLM call in USD as computed by LiteLLM.",
    )
    output_logprobs: Optional[Dict[str, Any]] = Field(
        default=None,
        description="Optional. When available, logprobs are used to compute Uncertainty.",
    )
    created_at: str = Field(
        ..., description='timestamp constructed in "%Y-%m-%dT%H:%M:%S" format'
    )
    tags: Optional[List[str]] = None
    user_metadata: Optional[Dict[str, Any]] = None


class GalileoObserve(CustomLogger):
    def __init__(self) -> None:
        self.in_memory_records: List[dict] = []
        self.batch_size = 1
        self.api_key = os.getenv("GALILEO_API_KEY")
        self.project_id = os.getenv("GALILEO_PROJECT_ID")
        self.log_stream_id = os.getenv("GALILEO_LOG_STREAM_ID")
        self.username = os.getenv("GALILEO_USERNAME")
        self.password = os.getenv("GALILEO_PASSWORD")
        self.base_url = self._normalize_base_url(os.getenv("GALILEO_BASE_URL"))
        if self.api_key and not self.base_url:
            self.base_url = GALILEO_CLOUD_API_BASE_URL
        self.use_v2_api = bool(self.api_key)
        self.headers: Optional[Dict[str, str]] = None
        self.async_httpx_handler = get_async_httpx_client(
            llm_provider=httpxSpecialProvider.LoggingCallback
        )

    @staticmethod
    def _normalize_base_url(base_url: Optional[str]) -> Optional[str]:
        if base_url:
            return base_url.rstrip("/")
        return None

    def _is_configured(self) -> bool:
        if not self.project_id or not self.base_url:
            return False
        if self.use_v2_api:
            return bool(self.api_key)
        return bool(self.username and self.password)

    async def async_health_check(self) -> IntegrationHealthCheckStatus:
        try:
            if not self.project_id:
                return IntegrationHealthCheckStatus(
                    status="unhealthy",
                    error_message="GALILEO_PROJECT_ID environment variable not set",
                )

            if not self.base_url:
                return IntegrationHealthCheckStatus(
                    status="unhealthy",
                    error_message="GALILEO_BASE_URL environment variable not set",
                )

            if not self.use_v2_api and (not self.username or not self.password):
                return IntegrationHealthCheckStatus(
                    status="unhealthy",
                    error_message=(
                        "GALILEO_API_KEY or GALILEO_USERNAME and GALILEO_PASSWORD "
                        "environment variables must be set"
                    ),
                )

            if not await self._ensure_headers():
                return IntegrationHealthCheckStatus(
                    status="unhealthy",
                    error_message="Galileo authentication failed",
                )

            response = await self.async_httpx_handler.get(
                url=f"{self.base_url}/current_user",
                headers=self.headers,
            )
            if response.status_code >= 400:
                return IntegrationHealthCheckStatus(
                    status="unhealthy",
                    error_message=(f"Galileo API returned HTTP {response.status_code}"),
                )

            return IntegrationHealthCheckStatus(status="healthy", error_message=None)
        except Exception as e:
            return IntegrationHealthCheckStatus(
                status="unhealthy",
                error_message=f"Galileo health check failed: {str(e)}",
            )

    async def async_set_galileo_headers(self) -> None:
        galileo_login_response = await self.async_httpx_handler.post(
            url=f"{self.base_url}/login",
            headers={
                "accept": "application/json",
                "Content-Type": "application/x-www-form-urlencoded",
            },
            data={
                "username": self.username,
                "password": self.password,
            },
        )
        galileo_login_response.raise_for_status()
        access_token = galileo_login_response.json()["access_token"]
        self.headers = {
            "accept": "application/json",
            "Content-Type": "application/json",
            "Authorization": f"Bearer {access_token}",
        }

    async def _ensure_headers(self) -> bool:
        if self.headers is not None:
            return True

        if self.use_v2_api:
            if not self.api_key:
                return False
            self.headers = {
                "accept": "application/json",
                "Content-Type": "application/json",
                "Galileo-API-Key": self.api_key,
            }
            return True

        if not (self.username and self.password and self.base_url):
            return False

        try:
            await self.async_set_galileo_headers()
            return True
        except Exception as e:
            verbose_logger.debug("Galileo Logger: failed to authenticate: %s", e)
            return False

    @staticmethod
    def _galileo_input_messages(
        messages: Optional[Any], input_text: str
    ) -> List[Dict[str, str]]:
        if isinstance(messages, dict):
            messages = messages.get("messages")
        if not messages:
            return [{"role": "user", "content": input_text}]
        if not isinstance(messages, list):
            return [{"role": "user", "content": input_text}]

        galileo_messages: List[Dict[str, str]] = []
        for message in messages:
            if not isinstance(message, dict):
                continue
            role = message.get("role")
            if not role:
                continue
            galileo_messages.append(
                {
                    "role": str(role),
                    "content": convert_content_list_to_str(
                        message=cast(AllMessageValues, message)
                    ),
                }
            )

        if galileo_messages:
            return galileo_messages
        return [{"role": "user", "content": input_text}]

    @staticmethod
    def _local_timezone():
        return datetime.now().astimezone().tzinfo or timezone.utc

    @staticmethod
    def _format_created_at(dt: Union[datetime, Any]) -> str:
        """Serialize timestamps as UTC ISO-8601 for Galileo."""
        if not isinstance(dt, datetime):
            return str(dt)

        if dt.tzinfo is None:
            # LiteLLM often passes naive datetimes in local time; convert to UTC
            # instead of appending Z to local time (which shifts Traces tab sorting).
            dt = dt.replace(tzinfo=GalileoObserve._local_timezone())

        return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

    @staticmethod
    def _normalize_created_at(created_at: str) -> str:
        if created_at and not re.search(r"(Z|[+-]\d{2}:?\d{2})$", created_at):
            return f"{created_at}Z"
        return created_at

    @staticmethod
    def _token_metrics_from_record(record: Dict[str, Any]) -> Dict[str, Any]:
        num_input_tokens = int(record.get("num_input_tokens") or 0)
        num_output_tokens = int(record.get("num_output_tokens") or 0)
        num_total_tokens = int(record.get("num_total_tokens") or 0)
        if num_total_tokens == 0 and (num_input_tokens or num_output_tokens):
            num_total_tokens = num_input_tokens + num_output_tokens
        metrics: Dict[str, Any] = {
            "num_input_tokens": num_input_tokens,
            "num_output_tokens": num_output_tokens,
            "num_total_tokens": num_total_tokens,
        }
        cost = record.get("cost")
        if cost is not None:
            metrics["cost"] = float(cost)
        return metrics

    @staticmethod
    def _record_to_v2_span(
        record: Dict[str, Any],
        *,
        trace_id: str,
        span_id: str,
    ) -> Dict[str, Any]:
        created_at = GalileoObserve._normalize_created_at(record.get("created_at", ""))

        span: Dict[str, Any] = {
            "type": "llm",
            "id": span_id,
            "trace_id": trace_id,
            "parent_id": trace_id,
            "name": record.get("node_type", "litellm"),
            "created_at": created_at,
            "input": GalileoObserve._galileo_input_messages(
                record.get("messages"), record.get("input_text", "")
            ),
            "output": {
                "role": "assistant",
                "content": record.get("output_text", ""),
            },
            "status_code": record.get("status_code", 200),
            "model": record.get("model"),
            "metrics": {
                "duration_ns": int(record.get("latency_ms", 0)) * 1_000_000,
                **GalileoObserve._token_metrics_from_record(record),
            },
        }
        if record.get("tags"):
            span["tags"] = record["tags"]
        return span

    @staticmethod
    def _record_to_v2_trace(record: Dict[str, Any]) -> Dict[str, Any]:
        trace_id = str(uuid.uuid4())
        span_id = str(uuid.uuid4())
        created_at = GalileoObserve._normalize_created_at(record.get("created_at", ""))

        return {
            "type": "trace",
            "id": trace_id,
            "name": record.get("node_type", "litellm"),
            "created_at": created_at,
            "input": record.get("input_text", ""),
            "output": record.get("output_text", ""),
            "status_code": record.get("status_code", 200),
            "metrics": {
                "duration_ns": int(record.get("latency_ms", 0)) * 1_000_000,
                **GalileoObserve._token_metrics_from_record(record),
            },
            "spans": [
                GalileoObserve._record_to_v2_span(
                    record, trace_id=trace_id, span_id=span_id
                )
            ],
        }

    def _build_traces_payload(self, records: List[dict]) -> Dict[str, Any]:
        payload: Dict[str, Any] = {
            "traces": [self._record_to_v2_trace(record) for record in records],
            "logging_method": "api_direct",
            "reliable": False,
            "is_complete": True,
        }
        if self.log_stream_id:
            payload["log_stream_id"] = self.log_stream_id
        return payload

    def _get_ingest_request(self) -> Optional[Tuple[str, Dict[str, Any]]]:
        if not self.base_url or not self.project_id:
            return None

        # Snapshot the records to be sent into a new list so concurrent appends
        # during the network round-trip (across the await points in
        # flush_in_memory_records) aren't silently dropped when we later clear
        # the in-memory buffer.
        records = list(self.in_memory_records)
        payload = self._build_traces_payload(records)

        if self.use_v2_api:
            return (
                f"{self.base_url}/ingest/traces/{self.project_id}",
                payload,
            )

        # Username/password auth logs in for a JWT and uses the standard v2 traces API.
        return (
            f"{self.base_url}/v2/projects/{self.project_id}/traces",
            payload,
        )

    @staticmethod
    def _redact_headers(headers: Optional[Dict[str, str]]) -> Dict[str, str]:
        if not headers:
            return {}
        redacted: Dict[str, str] = {}
        for key, value in headers.items():
            if key.lower() in {"authorization", "galileo-api-key"} and value:
                redacted[key] = (
                    f"{value[:8]}...{value[-4:]}" if len(value) > 12 else "***"
                )
            else:
                redacted[key] = value
        return redacted

    def _log_flush_config(self) -> None:
        verbose_logger.debug(
            "Galileo Logger flush config: use_v2_api=%s base_url=%s project_id=%s "
            "log_stream_id=%s api_key_set=%s username_set=%s record_count=%s",
            self.use_v2_api,
            self.base_url,
            self.project_id,
            self.log_stream_id,
            bool(self.api_key),
            bool(self.username),
            len(self.in_memory_records),
        )

    @staticmethod
    def _log_v2_payload_validation(payload: Dict[str, Any]) -> None:
        missing_fields: List[str] = []
        traces = payload.get("traces", [])
        if not traces:
            missing_fields.append("traces")

        for trace_index, trace in enumerate(traces):
            if not isinstance(trace, dict):
                continue
            for field in ("id", "type", "spans"):
                if field not in trace:
                    missing_fields.append(f"traces[{trace_index}].{field}")

            trace_id = trace.get("id")
            for span_index, span in enumerate(trace.get("spans", [])):
                if not isinstance(span, dict):
                    continue
                for field in ("id", "trace_id", "parent_id"):
                    if field not in span:
                        missing_fields.append(
                            f"traces[{trace_index}].spans[{span_index}].{field}"
                        )
                if trace_id and span.get("trace_id") != trace_id:
                    missing_fields.append(
                        f"traces[{trace_index}].spans[{span_index}].trace_id mismatch"
                    )

        if missing_fields:
            verbose_logger.debug(
                "Galileo Logger: ingest /traces payload validation issues: %s",
                missing_fields,
            )

    def _log_flush_payload(self, url: str, payload: Dict[str, Any]) -> None:
        traces = payload.get("traces", [])
        verbose_logger.debug(
            "Galileo Logger flush URL: %s trace_count=%s",
            url,
            len(traces) if isinstance(traces, list) else 0,
        )
        if self.use_v2_api and "/ingest/traces/" in url:
            self._log_v2_payload_validation(payload)

    @staticmethod
    def _log_http_status_error(error: httpx.HTTPStatusError, url: str) -> None:
        response = error.response
        verbose_logger.debug(
            "Galileo Logger HTTP error: status=%s url=%s",
            response.status_code,
            url,
        )
        verbose_logger.debug(
            "Galileo Logger HTTP error response body: %s",
            response.text,
        )
        try:
            verbose_logger.debug(
                "Galileo Logger HTTP error response json: %s",
                response.json(),
            )
        except Exception:
            pass

    @staticmethod
    def _build_prompt(kwargs: Dict[str, Any]) -> Dict[str, Any]:
        optional_params = kwargs.get("optional_params", {}) or {}
        prompt: Dict[str, Any] = {"messages": kwargs.get("messages")}
        if optional_params.get("functions") is not None:
            prompt["functions"] = optional_params["functions"]
        if optional_params.get("tools") is not None:
            prompt["tools"] = optional_params["tools"]
        return prompt

    @staticmethod
    def _serialize_galileo_output(value: Any) -> str:
        if value is None:
            return ""
        if isinstance(value, str):
            return value

        def _json_default(obj: Any) -> Any:
            if hasattr(obj, "model_dump"):
                return obj.model_dump()
            return str(obj)

        return json.dumps(value, default=_json_default)

    @staticmethod
    def _prompt_to_input_text(prompt: Dict[str, Any]) -> str:
        messages = prompt.get("messages")
        if messages is not None:
            text = GalileoObserve._input_text_from_messages(messages)
            if text:
                return text
        return json.dumps(prompt, default=str)

    @staticmethod
    def _get_chat_content_for_galileo(response_obj: litellm.ModelResponse) -> Any:
        if response_obj.choices and len(response_obj.choices) > 0:
            message = response_obj["choices"][0]["message"]
            if hasattr(message, "json"):
                message_json = message.json()
                if isinstance(message_json, str):
                    return json.loads(message_json)
                return message_json
            return message
        return None

    @staticmethod
    def _get_text_completion_content_for_galileo(
        response_obj: litellm.TextCompletionResponse,
    ) -> Optional[str]:
        if response_obj.choices and len(response_obj.choices) > 0:
            return response_obj.choices[0].text
        return None

    @staticmethod
    def _get_responses_api_content_for_galileo(
        response_obj: ResponsesAPIResponse,
    ) -> Any:
        if hasattr(response_obj, "output") and response_obj.output:
            return response_obj.output
        return None

    @staticmethod
    def _langfuse_style_rerank_prompt(kwargs: Dict[str, Any]) -> Dict[str, Any]:
        """Match Langfuse rerank input: prompt = {"messages": kwargs.get("messages")}."""
        return {"messages": kwargs.get("messages")}

    def _get_galileo_input_output_content(
        self,
        kwargs: Dict[str, Any],
        response_obj: Any,
        level: str = "DEFAULT",
        status_message: Optional[str] = None,
    ) -> Tuple[str, str, Any]:
        """
        Mirror Langfuse _get_langfuse_input_output_content for Galileo ingest.

        Returns (input_text, output_text, messages_for_span).
        """
        call_type = kwargs.get("call_type")
        prompt = self._build_prompt(kwargs)

        if (
            level == "ERROR"
            and status_message is not None
            and isinstance(status_message, str)
        ):
            return self._prompt_to_input_text(prompt), status_message, prompt

        if response_obj is not None and (
            call_type in ("embedding", "aembedding")
            or isinstance(response_obj, litellm.EmbeddingResponse)
        ):
            # Match Langfuse OTEL: log embeddings without serializing vectors.
            return self._prompt_to_input_text(prompt), "embedding-output", prompt

        if response_obj is not None and isinstance(response_obj, litellm.ModelResponse):
            output = self._get_chat_content_for_galileo(response_obj)
            return (
                self._prompt_to_input_text(prompt),
                self._serialize_galileo_output(output),
                kwargs.get("messages") or [],
            )

        if response_obj is not None and isinstance(
            response_obj, HttpxBinaryResponseContent
        ):
            return self._prompt_to_input_text(prompt), "speech-output", prompt

        if response_obj is not None and isinstance(
            response_obj, litellm.TextCompletionResponse
        ):
            output = self._get_text_completion_content_for_galileo(response_obj)
            return (
                self._prompt_to_input_text(prompt),
                self._serialize_galileo_output(output),
                kwargs.get("messages") or [],
            )

        if response_obj is not None and isinstance(response_obj, litellm.ImageResponse):
            output = response_obj.get("data", None)
            return (
                self._prompt_to_input_text(prompt),
                self._serialize_galileo_output(output),
                prompt,
            )

        if response_obj is not None and isinstance(
            response_obj, litellm.TranscriptionResponse
        ):
            output = response_obj.get("text", None)
            return (
                self._prompt_to_input_text(prompt),
                self._serialize_galileo_output(output),
                prompt,
            )

        if response_obj is not None and isinstance(
            response_obj, litellm.RerankResponse
        ):
            output = response_obj.results
            rerank_prompt = self._langfuse_style_rerank_prompt(kwargs)
            return (
                json.dumps(rerank_prompt, default=str),
                self._serialize_galileo_output(output),
                rerank_prompt,
            )

        if response_obj is not None and isinstance(response_obj, ResponsesAPIResponse):
            output = self._get_responses_api_content_for_galileo(response_obj)
            return (
                self._prompt_to_input_text(prompt),
                self._serialize_galileo_output(output),
                kwargs.get("messages") or [],
            )

        if (
            call_type == "_arealtime"
            and response_obj is not None
            and isinstance(response_obj, list)
        ):
            input_val = kwargs.get("input")
            return (
                self._serialize_galileo_output(input_val),
                self._serialize_galileo_output(response_obj),
                input_val,
            )

        if (
            call_type == "pass_through_endpoint"
            and response_obj is not None
            and isinstance(response_obj, dict)
        ):
            output = response_obj.get("response", "")
            return (
                self._prompt_to_input_text(prompt),
                self._serialize_galileo_output(output),
                prompt,
            )

        if response_obj is not None and isinstance(response_obj, dict):
            output = get_content_from_model_response(response_obj)
            return (
                self._prompt_to_input_text(prompt),
                self._serialize_galileo_output(output),
                kwargs.get("messages") or [],
            )

        return self._prompt_to_input_text(prompt), "", kwargs.get("messages") or []

    def get_output_str_from_response(
        self, response_obj: Any, kwargs: Dict[str, Any]
    ) -> str:
        _, output_text, _ = self._get_galileo_input_output_content(
            kwargs=kwargs, response_obj=response_obj
        )
        return output_text

    @staticmethod
    def _input_text_from_messages(messages: Any) -> str:
        """Return a plain-string summary of the input suitable for the trace-level input field."""
        if isinstance(messages, str):
            return messages
        if not isinstance(messages, list):
            return ""
        # Use the last user/human message so the trace table shows the actual prompt
        for msg in reversed(messages):
            if not isinstance(msg, dict):
                continue
            if str(msg.get("role", "")).lower() in ("user", "human"):
                content = msg.get("content") or ""
                if isinstance(content, list):
                    content = " ".join(
                        b.get("text", "") if isinstance(b, dict) else str(b)
                        for b in content
                    )
                if content:
                    return str(content)
        # Fallback: first non-empty content of any role
        for msg in messages:
            if isinstance(msg, dict):
                content = msg.get("content") or ""
                if isinstance(content, list):
                    content = " ".join(
                        b.get("text", "") if isinstance(b, dict) else str(b)
                        for b in content
                    )
                if content:
                    return str(content)
        return ""

    async def async_log_success_event(
        self, kwargs: Any, response_obj: Any, start_time: Any, end_time: Any
    ):
        verbose_logger.debug("On Async Success")
        try:
            await self._async_log_success_event_impl(
                kwargs=kwargs,
                response_obj=response_obj,
                start_time=start_time,
                end_time=end_time,
            )
        except Exception:
            verbose_logger.exception(
                "Galileo Logger: unexpected error in async_log_success_event"
            )

    async def _async_log_success_event_impl(
        self, kwargs: Any, response_obj: Any, start_time: Any, end_time: Any
    ):
        if not self._is_configured():
            verbose_logger.debug(
                "Galileo Logger: skipping — GALILEO_PROJECT_ID=%s GALILEO_API_KEY=%s GALILEO_BASE_URL=%s",
                bool(self.project_id),
                bool(self.api_key),
                bool(self.base_url),
            )
            return

        slo: Optional[Dict[str, Any]] = kwargs.get("standard_logging_object")
        if slo is None:
            verbose_logger.debug(
                "Galileo Logger: no standard_logging_object in kwargs, skipping"
            )
            return

        _call_type: str = str(
            slo.get("call_type") or kwargs.get("call_type") or "litellm"
        )

        input_text, output_text, messages = self._get_galileo_input_output_content(
            kwargs=kwargs, response_obj=response_obj
        )

        raw_start = slo.get("startTime")
        raw_end = slo.get("endTime")
        if raw_start is None or raw_end is None:
            verbose_logger.debug(
                "Galileo Logger: standard_logging_object missing startTime/endTime, "
                "falling back to start_time/end_time params"
            )
            if not isinstance(start_time, datetime) or not isinstance(
                end_time, datetime
            ):
                return
            start_ts = start_time
            end_ts = end_time
            if start_ts.tzinfo is None:
                start_ts = start_ts.replace(tzinfo=GalileoObserve._local_timezone())
            if end_ts.tzinfo is None:
                end_ts = end_ts.replace(tzinfo=GalileoObserve._local_timezone())
            start_ts = start_ts.astimezone(timezone.utc)
            end_ts = end_ts.astimezone(timezone.utc)
        else:
            start_ts = datetime.fromtimestamp(float(raw_start), tz=timezone.utc)
            end_ts = datetime.fromtimestamp(float(raw_end), tz=timezone.utc)
        _latency_ms = max(0, int((end_ts - start_ts).total_seconds() * 1000))
        num_input_tokens = int(slo.get("prompt_tokens") or 0)
        num_output_tokens = int(slo.get("completion_tokens") or 0)
        num_total_tokens = int(slo.get("total_tokens") or 0)
        if num_total_tokens == 0 and (num_input_tokens or num_output_tokens):
            num_total_tokens = num_input_tokens + num_output_tokens

        request_record = LLMResponse(
            latency_ms=_latency_ms,
            status_code=200,
            input_text=input_text,
            output_text=output_text,
            node_type=_call_type,
            model=str(slo.get("model") or kwargs.get("model") or "-"),
            num_input_tokens=num_input_tokens,
            num_output_tokens=num_output_tokens,
            num_total_tokens=num_total_tokens,
            cost=slo.get("response_cost"),
            created_at=GalileoObserve._format_created_at(start_ts),
        )

        request_dict = request_record.model_dump()
        if isinstance(messages, dict):
            messages = messages.get("messages")
        if isinstance(messages, list) and messages:
            request_dict["messages"] = messages
        self.in_memory_records.append(request_dict)
        verbose_logger.debug(
            "Galileo Logger: queued record, in_memory=%d", len(self.in_memory_records)
        )

        # Bound the buffer so persistent flush failures cannot grow it
        # without limit. Drop the oldest records once we exceed the cap.
        if len(self.in_memory_records) > GALILEO_MAX_IN_MEMORY_RECORDS:
            dropped = len(self.in_memory_records) - GALILEO_MAX_IN_MEMORY_RECORDS
            self.in_memory_records = self.in_memory_records[
                -GALILEO_MAX_IN_MEMORY_RECORDS:
            ]
            verbose_logger.warning(
                "Galileo Logger: in-memory buffer exceeded %s records; "
                "dropped %s oldest record(s). Check Galileo connectivity/credentials.",
                GALILEO_MAX_IN_MEMORY_RECORDS,
                dropped,
            )

        if len(self.in_memory_records) >= self.batch_size:
            await self.flush_in_memory_records()

    async def flush_in_memory_records(self):
        if not self.in_memory_records:
            return

        # Capture the number of records that will be sent BEFORE any await so
        # that concurrent appends made by other asyncio tasks during the
        # network round-trip aren't silently dropped on the success-clear.
        records_in_payload = len(self.in_memory_records)

        ingest_request = self._get_ingest_request()
        if ingest_request is None:
            verbose_logger.debug(
                "Galileo Logger: missing GALILEO_BASE_URL or GALILEO_PROJECT_ID — skipping flush"
            )
            return

        if not await self._ensure_headers():
            verbose_logger.debug(
                "Galileo Logger: could not set request headers — skipping flush"
            )
            return

        url, payload = ingest_request
        self._log_flush_config()
        self._log_flush_payload(url=url, payload=payload)
        verbose_logger.debug(
            "Galileo Logger flush headers: %s",
            self._redact_headers(self.headers),
        )
        verbose_logger.debug("flushing in memory records to %s", url)

        try:
            response = await self.async_httpx_handler.post(
                url=url,
                headers=self.headers,
                json=payload,
            )
        except httpx.HTTPStatusError as e:
            self._log_http_status_error(error=e, url=url)
            verbose_logger.debug(
                "Galileo Logger: failed to flush in memory records: %s", e
            )
            return
        except Exception as e:
            verbose_logger.debug(
                "Galileo Logger: failed to flush in memory records: %s", e
            )
            return

        if response.is_success:
            verbose_logger.debug(
                "Galileo Logger: successfully flushed in memory records"
            )
            verbose_logger.debug(
                "Galileo Logger flush response: status=%s body=%s",
                response.status_code,
                response.text,
            )
            del self.in_memory_records[:records_in_payload]
        else:
            verbose_logger.debug("Galileo Logger: failed to flush in memory records")
            verbose_logger.debug(
                "Galileo Logger error=%s, status code=%s",
                response.text,
                response.status_code,
            )
            # Legacy enterprise auth caches a bearer token obtained from
            # /login. If the request was rejected for auth reasons, drop the
            # cached headers so the next flush re-authenticates instead of
            # silently failing forever on a stale token. The v2 API key path
            # uses a long-lived static key, so leave its headers in place.
            if not self.use_v2_api and response.status_code in (401, 403):
                self.headers = None

    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        verbose_logger.debug("On Async Failure")