refactor: extract config.py, add burn_only, fix title_segments and font size

- Extract all path/API config to config.py (single source of truth) - Add run.py / burn_only.py / run.bat / burn.bat entry points - burn_only: skip transcription/subtitle gen, fast reburn existing SRTs - Fix title_segments: use transcript keyword time for split point - Fix subtitle: each overlapping title shows max title_duration (not full clip) - Fix burn_only font size: default from 90 to 60 - Delete old run_lesson1.bat/py, temp debug scripts - Update README, ARCHITECTURE, CHANGELOG, add USAGE.md
2026-05-03 23:22:10 +08:00
parent cf5004cf6a
commit aad1548348
39 changed files with 826 additions and 556 deletions
@@ -56,6 +56,8 @@ class LLMClient:
            "max_tokens": max_tokens
        }

+        logger.info(f"[LLM] request chars={len(prompt)}, max_tokens={max_tokens}")
+
        for attempt in range(LLM_MAX_RETRIES):
            try:
                response = requests.post(url, headers=headers, json=payload, timeout=timeout)
@@ -73,6 +75,7 @@ class LLMClient:

                content = choices[0].get("message", {}).get("content", "").strip()
                if content:
+                    logger.info(f"[LLM] response chars={len(content)}")
                    return content

                logger.warning(f"LLM: Empty content (attempt {attempt+1})")
@@ -88,106 +91,6 @@ class LLMClient:

        return None

-    def correct_title(self, transcript_text, original_title, all_titles=None):
-        """
-        使用LLM纠正标题
-
-        Args:
-            transcript_text: 字幕文本
-            original_title: 原始标题
-            all_titles: 所有标题列表
-
-        Returns:
-            纠正后的标题
-        """
-        titles_str = ", ".join(all_titles[:20]) if all_titles else "无"
-
-        prompt = f"""你是一个钢琴教学视频的标题验证专家。
-
-PPT提取的标题：{original_title}
-
-视频字幕内容：{transcript_text[:500] if transcript_text else "无"}
-
-本节课所有标题：{titles_str}
-
-【重要规则】
- 只有当你有90%以上把握认为原标题错误时，才输出纠正后的标题
- 如果原标题基本正确，即使不完美，也必须输出原标题
- 绝对不能输出与原标题完全不同概念的词
- 如果不确定，输出原标题
-
-请直接输出标题，不要添加任何解释。"""
-
-        result = self.chat(prompt, max_tokens=50, timeout=LLM_TITLE_TIMEOUT)
-        return result if result else original_title
-
-    def validate_content(self, transcript_text, title):
-        """
-        使用LLM验证内容是否与标题相关
-
-        Args:
-            transcript_text: 字幕文本
-            title: 标题
-
-        Returns:
-            (is_valid: bool, reason: str)
-        """
-        prompt = f"""判断视频字幕内容是否与标题相关。
-
-标题：{title}
-
-字幕内容：{transcript_text[:300] if transcript_text else "无"}
-
-判断标准：
- 内容讨论的主题与标题概念相关 = 相关
- 内容与标题无关（如广告、闲聊、无关话题）= 无关
- 无法判断 = 不确定
-
-请直接输出：相关/无关/不确定"""
-
-        result = self.chat(prompt, max_tokens=20, timeout=LLM_VALIDATE_TIMEOUT)
-        if not result:
-            return True, "error"
-
-        if "无关" in result:
-            return False, result
-        elif "不确定" in result:
-            return True, "uncertain"
-        return True, result
-
-    def full_text_correction(self, text, clip_title, knowledge_terms=None):
-        """
-        使用LLM进行全文字幕纠错
-
-        Args:
-            text: 原始字幕
-            clip_title: 片段标题
-            knowledge_terms: 知识点列表
-
-        Returns:
-            纠错后的字幕
-        """
-        knowledge_str = ", ".join(knowledge_terms[:20]) if knowledge_terms else "无"
-
-        prompt = f"""你是一个钢琴教学视频的字幕纠错专家。
-
-原始字幕：{text}
-
-本节课片段标题：{clip_title}
-本节课知识点：{knowledge_str}
-
-请进行字幕纠错：
-1. 修复语音识别错误（如"羞耻"→"休止"，"副点"→"附点"，"负点"→"附点"）
-2. 修复同音字错误
-3. 保留原文的专业术语和表达方式
-4. 不要改变原文的语气和意思
-
-请直接输出纠错后的字幕，不要添加任何解释。"""
-
-        result = self.chat(prompt, max_tokens=500, timeout=LLM_TIMEOUT)
-        return result if result else text
-
-
 # 全局LLM客户端实例
 _llm_client = None