refactor: extract config.py, add burn_only, fix title_segments and font size

- Extract all path/API config to config.py (single source of truth)
- Add run.py / burn_only.py / run.bat / burn.bat entry points
- burn_only: skip transcription/subtitle gen, fast reburn existing SRTs
- Fix title_segments: use transcript keyword time for split point
- Fix subtitle: each overlapping title shows max title_duration (not full clip)
- Fix burn_only font size: default from 90 to 60
- Delete old run_lesson1.bat/py, temp debug scripts
- Update README, ARCHITECTURE, CHANGELOG, add USAGE.md
This commit is contained in:
hmo
2026-05-03 23:22:10 +08:00
parent cf5004cf6a
commit aad1548348
39 changed files with 826 additions and 556 deletions
+127 -70
View File
@@ -12,7 +12,7 @@ import logging
from typing import Callable, Optional, List, Dict, Any
from .video import extract_clip, merge_clips, burn_dual_subtitles
from .subtitle import SubtitlePipeline
from .subtitle import SubtitlePipeline, correct_subtitles_llm
from .llm import LLMClient
from .corrections import apply_all_corrections, load_term_corrections_from_config
from .utils import ensure_dir
@@ -223,16 +223,41 @@ class Pipeline:
self.progress_callback('transcribing', int((i/total)*90), f"转录片段 {i}/{total}")
try:
segments, _ = model.transcribe(clip_path, language='zh', beam_size=5)
segments, _ = model.transcribe(clip_path, language='zh', beam_size=5, word_timestamps=True)
# 保存转录结果
# 保存转录结果(按句末标点进一步切分)
segments_data = []
for seg in segments:
segments_data.append({
'start': seg.start,
'end': seg.end,
'text': seg.text.strip()
})
words = seg.words if hasattr(seg, 'words') else []
if words:
# 用 word-level 时间戳在句末标点处切分
# 注意:标点可能附着在词后(如"吗?"、"奏,"),需 strip 后判断
_END_MARKS = '。!??'
sub_start = words[0].start
sub_text_parts = []
for word in words:
sub_text_parts.append(word.word)
# 剥离标点后判断是否为句末标记
stripped = word.word.rstrip(',、,')
if any(stripped.endswith(m) for m in _END_MARKS):
sub_end = word.end
sub_text = ''.join(sub_text_parts).strip()
if sub_text:
segments_data.append({'start': sub_start, 'end': sub_end, 'text': sub_text})
sub_start = word.end
sub_text_parts = []
# 剩余未到句末的文本
if sub_text_parts:
remaining = ''.join(sub_text_parts).strip()
if remaining:
segments_data.append({'start': sub_start, 'end': words[-1].end, 'text': remaining})
else:
# fallback:无 word timestamps,直接用原 segment
segments_data.append({
'start': seg.start,
'end': seg.end,
'text': seg.text.strip()
})
with open(json_path, 'w', encoding='utf-8') as f:
json.dump({'segments': segments_data}, f, ensure_ascii=False, indent=2)
@@ -249,59 +274,58 @@ class Pipeline:
self.step_callback('transcribing')
return json_paths
def step_correct_titles(self, json_paths: List[str]) -> List[Dict[str, Any]]:
def _recalculate_title_segments_from_transcript(
self,
clips: List[Dict],
json_paths: List[str]
) -> None:
"""
Step 3: LLM标题纠正
用 transcript 数据重新计算重叠片段的 title_segments 切分点。
Args:
json_paths: JSON文件路径列表
Returns:
corrected_clips: 纠正后的片段配置列表
重叠片段的 switch_offset 应该按 transcript 中第二个标题关键词
首次出现的时间来算,而不是按 clip 边界。
"""
self.step_callback('title_correcting')
self.progress_callback('title_correcting', 0, "开始标题纠正...")
for i, clip in enumerate(clips):
ts = clip.get('title_segments')
if not ts or len(ts) < 2:
continue
corrected_clips = []
total = len(self.clips)
# 取第二个标题段 [title, offset]
second_title, old_offset = ts[1]
json_path = json_paths[i] if i < len(json_paths) else None
if not json_path or not os.path.exists(json_path):
continue
for i, (clip, json_path) in enumerate(zip(self.clips, json_paths), 1):
original_title = clip.get('title', f'Clip {i}')
# 读取转录文本
transcript_text = ''
if json_path and os.path.exists(json_path):
try:
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
transcript_text = ' '.join(seg.get('text', '') for seg in data.get('segments', []))
except Exception:
continue
# LLM纠正标题
corrected_title = original_title
if transcript_text and self.config.get('api_key'):
try:
corrected_title = self.llm_client.correct_title(
transcript_text,
original_title,
[c.get('title', '') for c in self.clips]
) or original_title
except Exception as e:
logger.warning(f"LLM title correction failed for clip {i}: {e}")
# 在 transcript 中搜索 second_title 的首次出现时间
first_time = None
for seg in data.get('segments', []):
for word_info in seg.get('words', []):
w = word_info.get('word', '')
# 关键词匹配(标题可能含多字符,取子串)
if second_title and second_title in w:
first_time = word_info['start']
break
if first_time is not None:
break
corrected_clip = {
'index': i - 1,
'title': corrected_title,
'original_title': original_title,
'start': clip['start'],
'end': clip['end'],
}
corrected_clips.append(corrected_clip)
percent = int((i / total) * 100)
self.progress_callback('title_correcting', percent, f"纠正标题 {i}/{total}")
self.progress_callback('title_correcting', 100, "标题纠正完成")
self.step_callback('title_correcting')
return corrected_clips
if first_time is not None:
new_offset = first_time
clip['title_segments'][1][1] = new_offset
logger.info(
f" clip{i+1} title_segments: "
f"'{second_title}'{old_offset:.2f}s → {new_offset:.2f}s"
)
else:
logger.warning(
f" clip{i+1} title_segments: "
f"未在 transcript 中找到 '{second_title}',保留原 offset {old_offset:.2f}s"
)
def step_generate_subtitles(self, corrected_clips: List[Dict], json_paths: List[str]) -> tuple:
"""
@@ -327,6 +351,7 @@ class Pipeline:
'start': clip['start'],
'end': clip['end'],
'title': clip.get('title', clip.get('original_title', '')),
'title_segments': clip.get('title_segments'), # 可能为None
}
clip_configs.append(clip_config)
@@ -357,6 +382,39 @@ class Pipeline:
self.step_callback('generating_subtitles')
return title_path, content_path
def step_correct_subtitles(self, title_path: str, content_path: str) -> str:
"""
Step 4.5: LLM纠正字幕内容
参考title.srt(时间轴锚点)和PPT原文(术语参考),
修正content.srt中的错字、漏字、术语错误。
Args:
title_path: 标题字幕路径
content_path: 内容字幕路径
Returns:
修正后的content_path
"""
ppt_text = self.config.get('ppt_text', '')
if not ppt_text:
logger.warning("PPT原文为空,跳过字幕纠正步骤")
return content_path
self.step_callback('correcting_subtitles')
self.progress_callback('correcting_subtitles', 0, "开始纠正字幕...")
corrected_path = correct_subtitles_llm(
title_path=title_path,
content_path=content_path,
ppt_text=ppt_text,
llm_client=self.llm_client,
)
self.progress_callback('correcting_subtitles', 100, "字幕纠正完成")
self.step_callback('correcting_subtitles')
return corrected_path
def step_merge(self, clip_paths: List[str]) -> str:
"""
Step 5: 合并视频
@@ -411,7 +469,7 @@ class Pipeline:
title_path,
content_path,
final_path,
title_fontsize=video_params.get('title_fontsize', 90),
title_fontsize=video_params.get('title_fontsize', 60),
title_color=video_params.get('title_color', 'FFFF00'),
subtitle_fontsize=video_params.get('subtitle_fontsize', 24),
subtitle_color=video_params.get('subtitle_color', 'FFFFFF')
@@ -447,17 +505,14 @@ class Pipeline:
# Step 2: 转录
json_paths = self.step_transcribe(clip_paths)
# Step 3: 标题纠正
corrected_clips = self.step_correct_titles(json_paths)
# Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
self._recalculate_title_segments_from_transcript(self.clips, json_paths)
# Step 4: 生成字幕
title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
# Step 5: 合并
# Step 3-6: 生成字幕、纠正、合并、烧录
title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
corrected_content_path = self.step_correct_subtitles(title_path, content_path)
merged_path = self.step_merge(clip_paths)
# Step 6: 烧录
final_path = self.step_burn(merged_path, title_path, content_path)
final_path = self.step_burn(merged_path, title_path, corrected_content_path)
logger.info(f"Pipeline completed: {final_path}")
return final_path
@@ -474,23 +529,25 @@ class Pipeline:
"""
logger.info(f"Pipeline starting with user confirmation: {len(self.clips)} clips")
# Step 1-3: 同上
# Step 1-2: 提取+转录
clip_paths = self.step_extract()
if not clip_paths:
raise RuntimeError("No clips extracted")
json_paths = self.step_transcribe(clip_paths)
corrected_clips = self.step_correct_titles(json_paths)
# Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
self._recalculate_title_segments_from_transcript(self.clips, json_paths)
# 应用用户确认的标题
for i, confirmed in enumerate(confirmed_titles):
if i < len(corrected_clips):
corrected_clips[i]['title'] = confirmed.get('title', corrected_clips[i]['title'])
if i < len(self.clips):
self.clips[i]['title'] = confirmed.get('title', self.clips[i].get('title', ''))
# Step 4-6: 同上
title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
# Step 3-6: 生成字幕、纠正、合并、烧录
title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
corrected_content_path = self.step_correct_subtitles(title_path, content_path)
merged_path = self.step_merge(clip_paths)
final_path = self.step_burn(merged_path, title_path, content_path)
final_path = self.step_burn(merged_path, title_path, corrected_content_path)
logger.info(f"Pipeline completed: {final_path}")
return final_path