fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
136 lines
5.4 KiB
Python
136 lines
5.4 KiB
Python
"""Focus export data transformer."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from datetime import timedelta
|
|
|
|
import polars as pl
|
|
|
|
from .schema import FOCUS_NORMALIZED_SCHEMA
|
|
|
|
_TAG_KEYS = (
|
|
"team_id",
|
|
"team_alias",
|
|
"organization_id",
|
|
"organization_alias",
|
|
"user_id",
|
|
"user_email",
|
|
"api_key_alias",
|
|
"model",
|
|
"model_group",
|
|
"custom_llm_provider",
|
|
)
|
|
|
|
|
|
def _build_tags_expr(available_keys: list[str]) -> pl.Expr:
|
|
"""Build a Polars expression that produces a JSON Tags string per row.
|
|
|
|
Uses ``pl.struct`` + ``map_elements`` to avoid materialising the entire
|
|
DataFrame to a list of Python dicts. The JSON serialisation callback
|
|
still runs in Python (GIL-bound), but struct-packing and loop dispatch
|
|
are handled by Polars' Rust engine.
|
|
"""
|
|
|
|
def _struct_to_json(row: dict) -> str:
|
|
tags = {k: str(v) for k, v in row.items() if v is not None}
|
|
return json.dumps(tags) if tags else "{}"
|
|
|
|
return (
|
|
pl.struct(available_keys)
|
|
.map_elements(_struct_to_json, return_dtype=pl.String)
|
|
.alias("Tags")
|
|
)
|
|
|
|
|
|
class FocusTransformer:
|
|
"""Transforms LiteLLM DB rows into Focus-compatible schema."""
|
|
|
|
schema = FOCUS_NORMALIZED_SCHEMA
|
|
|
|
def transform(self, frame: pl.DataFrame) -> pl.DataFrame:
|
|
"""Return a normalized frame expected by downstream serializers."""
|
|
if frame.is_empty():
|
|
return pl.DataFrame(schema=self.schema)
|
|
|
|
# Build Tags JSON from metadata columns using vectorized Polars expression
|
|
available_keys = [k for k in _TAG_KEYS if k in frame.columns]
|
|
if available_keys:
|
|
frame = frame.with_columns(_build_tags_expr(available_keys))
|
|
else:
|
|
frame = frame.with_columns(pl.lit("{}").alias("Tags"))
|
|
|
|
# derive period start/end from usage date
|
|
frame = frame.with_columns(
|
|
pl.col("date")
|
|
.cast(pl.Utf8)
|
|
.str.strptime(pl.Datetime(time_unit="us"), format="%Y-%m-%d", strict=False)
|
|
.alias("usage_date"),
|
|
)
|
|
frame = frame.with_columns(
|
|
pl.col("usage_date").alias("ChargePeriodStart"),
|
|
(pl.col("usage_date") + timedelta(days=1)).alias("ChargePeriodEnd"),
|
|
)
|
|
|
|
def fmt(col):
|
|
return col.dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
DEC = pl.Decimal(18, 6)
|
|
|
|
def dec(col):
|
|
return col.cast(DEC)
|
|
|
|
none_str = pl.lit(None, dtype=pl.Utf8)
|
|
none_dec = pl.lit(None, dtype=pl.Decimal(18, 6))
|
|
|
|
return frame.select(
|
|
dec(pl.col("spend").fill_null(0.0)).alias("BilledCost"),
|
|
pl.col("api_key").cast(pl.String).alias("BillingAccountId"),
|
|
pl.col("api_key_alias").cast(pl.String).alias("BillingAccountName"),
|
|
pl.lit("API Key").alias("BillingAccountType"),
|
|
pl.lit("USD").alias("BillingCurrency"),
|
|
fmt(pl.col("ChargePeriodEnd")).alias("BillingPeriodEnd"),
|
|
fmt(pl.col("ChargePeriodStart")).alias("BillingPeriodStart"),
|
|
pl.lit("Usage").alias("ChargeCategory"),
|
|
none_str.alias("ChargeClass"),
|
|
pl.col("model").cast(pl.String).alias("ChargeDescription"),
|
|
pl.lit("Usage-Based").alias("ChargeFrequency"),
|
|
fmt(pl.col("ChargePeriodEnd")).alias("ChargePeriodEnd"),
|
|
fmt(pl.col("ChargePeriodStart")).alias("ChargePeriodStart"),
|
|
dec(
|
|
pl.col("api_requests").cast(pl.Int64).cast(pl.Float64).fill_null(0.0)
|
|
).alias("ConsumedQuantity"),
|
|
pl.lit("Requests").alias("ConsumedUnit"),
|
|
dec(pl.col("spend").fill_null(0.0)).alias("ContractedCost"),
|
|
none_str.alias("ContractedUnitPrice"),
|
|
dec(pl.col("spend").fill_null(0.0)).alias("EffectiveCost"),
|
|
pl.col("custom_llm_provider").cast(pl.String).alias("InvoiceIssuerName"),
|
|
none_str.alias("InvoiceId"),
|
|
dec(pl.col("spend").fill_null(0.0)).alias("ListCost"),
|
|
none_dec.alias("ListUnitPrice"),
|
|
none_str.alias("AvailabilityZone"),
|
|
pl.lit("USD").alias("PricingCurrency"),
|
|
none_str.alias("PricingCategory"),
|
|
dec(
|
|
pl.col("api_requests").cast(pl.Int64).cast(pl.Float64).fill_null(0.0)
|
|
).alias("PricingQuantity"),
|
|
none_dec.alias("PricingCurrencyContractedUnitPrice"),
|
|
dec(pl.col("spend").fill_null(0.0)).alias("PricingCurrencyEffectiveCost"),
|
|
none_dec.alias("PricingCurrencyListUnitPrice"),
|
|
pl.lit("Requests").alias("PricingUnit"),
|
|
pl.col("custom_llm_provider").cast(pl.String).alias("ProviderName"),
|
|
pl.col("custom_llm_provider").cast(pl.String).alias("PublisherName"),
|
|
none_str.alias("RegionId"),
|
|
none_str.alias("RegionName"),
|
|
pl.col("model").cast(pl.String).alias("ResourceId"),
|
|
pl.col("model").cast(pl.String).alias("ResourceName"),
|
|
pl.col("model").cast(pl.String).alias("ResourceType"),
|
|
pl.lit("AI and Machine Learning").alias("ServiceCategory"),
|
|
pl.lit("Generative AI").alias("ServiceSubcategory"),
|
|
pl.col("model_group").cast(pl.String).alias("ServiceName"),
|
|
pl.col("team_id").cast(pl.String).alias("SubAccountId"),
|
|
pl.col("team_alias").cast(pl.String).alias("SubAccountName"),
|
|
none_str.alias("SubAccountType"),
|
|
pl.col("Tags").cast(pl.String).alias("Tags"),
|
|
)
|