refactor: extract config.py, add burn_only, fix title_segments and font size
- Extract all path/API config to config.py (single source of truth) - Add run.py / burn_only.py / run.bat / burn.bat entry points - burn_only: skip transcription/subtitle gen, fast reburn existing SRTs - Fix title_segments: use transcript keyword time for split point - Fix subtitle: each overlapping title shows max title_duration (not full clip) - Fix burn_only font size: default from 90 to 60 - Delete old run_lesson1.bat/py, temp debug scripts - Update README, ARCHITECTURE, CHANGELOG, add USAGE.md
This commit is contained in:
+3
-100
@@ -56,6 +56,8 @@ class LLMClient:
|
||||
"max_tokens": max_tokens
|
||||
}
|
||||
|
||||
logger.info(f"[LLM] request chars={len(prompt)}, max_tokens={max_tokens}")
|
||||
|
||||
for attempt in range(LLM_MAX_RETRIES):
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
||||
@@ -73,6 +75,7 @@ class LLMClient:
|
||||
|
||||
content = choices[0].get("message", {}).get("content", "").strip()
|
||||
if content:
|
||||
logger.info(f"[LLM] response chars={len(content)}")
|
||||
return content
|
||||
|
||||
logger.warning(f"LLM: Empty content (attempt {attempt+1})")
|
||||
@@ -88,106 +91,6 @@ class LLMClient:
|
||||
|
||||
return None
|
||||
|
||||
def correct_title(self, transcript_text, original_title, all_titles=None):
|
||||
"""
|
||||
使用LLM纠正标题
|
||||
|
||||
Args:
|
||||
transcript_text: 字幕文本
|
||||
original_title: 原始标题
|
||||
all_titles: 所有标题列表
|
||||
|
||||
Returns:
|
||||
纠正后的标题
|
||||
"""
|
||||
titles_str = ", ".join(all_titles[:20]) if all_titles else "无"
|
||||
|
||||
prompt = f"""你是一个钢琴教学视频的标题验证专家。
|
||||
|
||||
PPT提取的标题:{original_title}
|
||||
|
||||
视频字幕内容:{transcript_text[:500] if transcript_text else "无"}
|
||||
|
||||
本节课所有标题:{titles_str}
|
||||
|
||||
【重要规则】
|
||||
- 只有当你有90%以上把握认为原标题错误时,才输出纠正后的标题
|
||||
- 如果原标题基本正确,即使不完美,也必须输出原标题
|
||||
- 绝对不能输出与原标题完全不同概念的词
|
||||
- 如果不确定,输出原标题
|
||||
|
||||
请直接输出标题,不要添加任何解释。"""
|
||||
|
||||
result = self.chat(prompt, max_tokens=50, timeout=LLM_TITLE_TIMEOUT)
|
||||
return result if result else original_title
|
||||
|
||||
def validate_content(self, transcript_text, title):
|
||||
"""
|
||||
使用LLM验证内容是否与标题相关
|
||||
|
||||
Args:
|
||||
transcript_text: 字幕文本
|
||||
title: 标题
|
||||
|
||||
Returns:
|
||||
(is_valid: bool, reason: str)
|
||||
"""
|
||||
prompt = f"""判断视频字幕内容是否与标题相关。
|
||||
|
||||
标题:{title}
|
||||
|
||||
字幕内容:{transcript_text[:300] if transcript_text else "无"}
|
||||
|
||||
判断标准:
|
||||
- 内容讨论的主题与标题概念相关 = 相关
|
||||
- 内容与标题无关(如广告、闲聊、无关话题)= 无关
|
||||
- 无法判断 = 不确定
|
||||
|
||||
请直接输出:相关/无关/不确定"""
|
||||
|
||||
result = self.chat(prompt, max_tokens=20, timeout=LLM_VALIDATE_TIMEOUT)
|
||||
if not result:
|
||||
return True, "error"
|
||||
|
||||
if "无关" in result:
|
||||
return False, result
|
||||
elif "不确定" in result:
|
||||
return True, "uncertain"
|
||||
return True, result
|
||||
|
||||
def full_text_correction(self, text, clip_title, knowledge_terms=None):
|
||||
"""
|
||||
使用LLM进行全文字幕纠错
|
||||
|
||||
Args:
|
||||
text: 原始字幕
|
||||
clip_title: 片段标题
|
||||
knowledge_terms: 知识点列表
|
||||
|
||||
Returns:
|
||||
纠错后的字幕
|
||||
"""
|
||||
knowledge_str = ", ".join(knowledge_terms[:20]) if knowledge_terms else "无"
|
||||
|
||||
prompt = f"""你是一个钢琴教学视频的字幕纠错专家。
|
||||
|
||||
原始字幕:{text}
|
||||
|
||||
本节课片段标题:{clip_title}
|
||||
本节课知识点:{knowledge_str}
|
||||
|
||||
请进行字幕纠错:
|
||||
1. 修复语音识别错误(如"羞耻"→"休止","副点"→"附点","负点"→"附点")
|
||||
2. 修复同音字错误
|
||||
3. 保留原文的专业术语和表达方式
|
||||
4. 不要改变原文的语气和意思
|
||||
|
||||
请直接输出纠错后的字幕,不要添加任何解释。"""
|
||||
|
||||
result = self.chat(prompt, max_tokens=500, timeout=LLM_TIMEOUT)
|
||||
return result if result else text
|
||||
|
||||
|
||||
# 全局LLM客户端实例
|
||||
_llm_client = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user