lesson-highlights/src/core/llm.py

# -*- coding: utf-8 -*-
"""
LLM调用封装

统一管理火山方舟API调用，包含重试和错误处理
"""

import os
import time
import logging
from .constants import (
    DEFAULT_API_HOST, LLM_MODEL, LLM_TIMEOUT,
    LLM_MAX_RETRIES, LLM_TITLE_TIMEOUT, LLM_VALIDATE_TIMEOUT,
    get_api_key
)

logger = logging.getLogger(__name__)

import requests


class LLMClient:
    """LLM客户端封装"""

    def __init__(self, api_key=None, api_host=None):
        # 优先使用传入的参数，其次使用环境变量
        self.api_key = api_key or get_api_key()
        self.api_host = api_host or DEFAULT_API_HOST
        if not self.api_key:
            logger.warning("No API key configured - LLM calls will be skipped")

    def chat(self, prompt, max_tokens=500, timeout=LLM_TIMEOUT):
        """
        发送聊天请求到LLM

        Args:
            prompt: 提示词
            max_tokens: 最大token数
            timeout: 超时时间

        Returns:
            LLM回复文本，失败返回None
        """
        if not self.api_key:
            logger.info("LLM: No API key, skipping")
            return None

        url = f"{self.api_host}/chat/completions"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": LLM_MODEL,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": max_tokens
        }

        logger.info(f"[LLM] request chars={len(prompt)}, max_tokens={max_tokens}")

        for attempt in range(LLM_MAX_RETRIES):
            try:
                response = requests.post(url, headers=headers, json=payload, timeout=timeout)
                # 401错误立即停止，不重试
                if response.status_code == 401:
                    logger.error(f"LLM: 401 Unauthorized - API key invalid, stopping immediately")
                    return None
                response.raise_for_status()
                result = response.json()

                choices = result.get("choices", [])
                if not choices:
                    logger.warning(f"LLM: No choices in response (attempt {attempt+1})")
                    continue

                content = choices[0].get("message", {}).get("content", "").strip()
                if content:
                    logger.info(f"[LLM] response chars={len(content)}")
                    return content

                logger.warning(f"LLM: Empty content (attempt {attempt+1})")

            except requests.exceptions.Timeout:
                logger.warning(f"LLM: Timeout (attempt {attempt+1}/{LLM_MAX_RETRIES})")
                if attempt < LLM_MAX_RETRIES - 1:
                    time.sleep(1)
            except Exception as e:
                logger.error(f"LLM: Error - {e}")
                if attempt < LLM_MAX_RETRIES - 1:
                    time.sleep(1)

        return None

# 全局LLM客户端实例
_llm_client = None


def get_llm_client():
    """获取LLM客户端单例"""
    global _llm_client
    if _llm_client is None:
        _llm_client = LLMClient()
    return _llm_client