Files
MoFin/venv/lib/python3.12/site-packages/litellm/integrations/focus/transformer.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

136 lines
5.4 KiB
Python

"""Focus export data transformer."""
from __future__ import annotations
import json
from datetime import timedelta
import polars as pl
from .schema import FOCUS_NORMALIZED_SCHEMA
_TAG_KEYS = (
"team_id",
"team_alias",
"organization_id",
"organization_alias",
"user_id",
"user_email",
"api_key_alias",
"model",
"model_group",
"custom_llm_provider",
)
def _build_tags_expr(available_keys: list[str]) -> pl.Expr:
"""Build a Polars expression that produces a JSON Tags string per row.
Uses ``pl.struct`` + ``map_elements`` to avoid materialising the entire
DataFrame to a list of Python dicts. The JSON serialisation callback
still runs in Python (GIL-bound), but struct-packing and loop dispatch
are handled by Polars' Rust engine.
"""
def _struct_to_json(row: dict) -> str:
tags = {k: str(v) for k, v in row.items() if v is not None}
return json.dumps(tags) if tags else "{}"
return (
pl.struct(available_keys)
.map_elements(_struct_to_json, return_dtype=pl.String)
.alias("Tags")
)
class FocusTransformer:
"""Transforms LiteLLM DB rows into Focus-compatible schema."""
schema = FOCUS_NORMALIZED_SCHEMA
def transform(self, frame: pl.DataFrame) -> pl.DataFrame:
"""Return a normalized frame expected by downstream serializers."""
if frame.is_empty():
return pl.DataFrame(schema=self.schema)
# Build Tags JSON from metadata columns using vectorized Polars expression
available_keys = [k for k in _TAG_KEYS if k in frame.columns]
if available_keys:
frame = frame.with_columns(_build_tags_expr(available_keys))
else:
frame = frame.with_columns(pl.lit("{}").alias("Tags"))
# derive period start/end from usage date
frame = frame.with_columns(
pl.col("date")
.cast(pl.Utf8)
.str.strptime(pl.Datetime(time_unit="us"), format="%Y-%m-%d", strict=False)
.alias("usage_date"),
)
frame = frame.with_columns(
pl.col("usage_date").alias("ChargePeriodStart"),
(pl.col("usage_date") + timedelta(days=1)).alias("ChargePeriodEnd"),
)
def fmt(col):
return col.dt.strftime("%Y-%m-%dT%H:%M:%SZ")
DEC = pl.Decimal(18, 6)
def dec(col):
return col.cast(DEC)
none_str = pl.lit(None, dtype=pl.Utf8)
none_dec = pl.lit(None, dtype=pl.Decimal(18, 6))
return frame.select(
dec(pl.col("spend").fill_null(0.0)).alias("BilledCost"),
pl.col("api_key").cast(pl.String).alias("BillingAccountId"),
pl.col("api_key_alias").cast(pl.String).alias("BillingAccountName"),
pl.lit("API Key").alias("BillingAccountType"),
pl.lit("USD").alias("BillingCurrency"),
fmt(pl.col("ChargePeriodEnd")).alias("BillingPeriodEnd"),
fmt(pl.col("ChargePeriodStart")).alias("BillingPeriodStart"),
pl.lit("Usage").alias("ChargeCategory"),
none_str.alias("ChargeClass"),
pl.col("model").cast(pl.String).alias("ChargeDescription"),
pl.lit("Usage-Based").alias("ChargeFrequency"),
fmt(pl.col("ChargePeriodEnd")).alias("ChargePeriodEnd"),
fmt(pl.col("ChargePeriodStart")).alias("ChargePeriodStart"),
dec(
pl.col("api_requests").cast(pl.Int64).cast(pl.Float64).fill_null(0.0)
).alias("ConsumedQuantity"),
pl.lit("Requests").alias("ConsumedUnit"),
dec(pl.col("spend").fill_null(0.0)).alias("ContractedCost"),
none_str.alias("ContractedUnitPrice"),
dec(pl.col("spend").fill_null(0.0)).alias("EffectiveCost"),
pl.col("custom_llm_provider").cast(pl.String).alias("InvoiceIssuerName"),
none_str.alias("InvoiceId"),
dec(pl.col("spend").fill_null(0.0)).alias("ListCost"),
none_dec.alias("ListUnitPrice"),
none_str.alias("AvailabilityZone"),
pl.lit("USD").alias("PricingCurrency"),
none_str.alias("PricingCategory"),
dec(
pl.col("api_requests").cast(pl.Int64).cast(pl.Float64).fill_null(0.0)
).alias("PricingQuantity"),
none_dec.alias("PricingCurrencyContractedUnitPrice"),
dec(pl.col("spend").fill_null(0.0)).alias("PricingCurrencyEffectiveCost"),
none_dec.alias("PricingCurrencyListUnitPrice"),
pl.lit("Requests").alias("PricingUnit"),
pl.col("custom_llm_provider").cast(pl.String).alias("ProviderName"),
pl.col("custom_llm_provider").cast(pl.String).alias("PublisherName"),
none_str.alias("RegionId"),
none_str.alias("RegionName"),
pl.col("model").cast(pl.String).alias("ResourceId"),
pl.col("model").cast(pl.String).alias("ResourceName"),
pl.col("model").cast(pl.String).alias("ResourceType"),
pl.lit("AI and Machine Learning").alias("ServiceCategory"),
pl.lit("Generative AI").alias("ServiceSubcategory"),
pl.col("model_group").cast(pl.String).alias("ServiceName"),
pl.col("team_id").cast(pl.String).alias("SubAccountId"),
pl.col("team_alias").cast(pl.String).alias("SubAccountName"),
none_str.alias("SubAccountType"),
pl.col("Tags").cast(pl.String).alias("Tags"),
)