refactor: extract config.py, add burn_only, fix title_segments and font size
- Extract all path/API config to config.py (single source of truth) - Add run.py / burn_only.py / run.bat / burn.bat entry points - burn_only: skip transcription/subtitle gen, fast reburn existing SRTs - Fix title_segments: use transcript keyword time for split point - Fix subtitle: each overlapping title shows max title_duration (not full clip) - Fix burn_only font size: default from 90 to 60 - Delete old run_lesson1.bat/py, temp debug scripts - Update README, ARCHITECTURE, CHANGELOG, add USAGE.md
This commit is contained in:
+127
-70
@@ -12,7 +12,7 @@ import logging
|
||||
from typing import Callable, Optional, List, Dict, Any
|
||||
|
||||
from .video import extract_clip, merge_clips, burn_dual_subtitles
|
||||
from .subtitle import SubtitlePipeline
|
||||
from .subtitle import SubtitlePipeline, correct_subtitles_llm
|
||||
from .llm import LLMClient
|
||||
from .corrections import apply_all_corrections, load_term_corrections_from_config
|
||||
from .utils import ensure_dir
|
||||
@@ -223,16 +223,41 @@ class Pipeline:
|
||||
self.progress_callback('transcribing', int((i/total)*90), f"转录片段 {i}/{total}")
|
||||
|
||||
try:
|
||||
segments, _ = model.transcribe(clip_path, language='zh', beam_size=5)
|
||||
segments, _ = model.transcribe(clip_path, language='zh', beam_size=5, word_timestamps=True)
|
||||
|
||||
# 保存转录结果
|
||||
# 保存转录结果(按句末标点进一步切分)
|
||||
segments_data = []
|
||||
for seg in segments:
|
||||
segments_data.append({
|
||||
'start': seg.start,
|
||||
'end': seg.end,
|
||||
'text': seg.text.strip()
|
||||
})
|
||||
words = seg.words if hasattr(seg, 'words') else []
|
||||
if words:
|
||||
# 用 word-level 时间戳在句末标点处切分
|
||||
# 注意:标点可能附着在词后(如"吗?"、"奏,"),需 strip 后判断
|
||||
_END_MARKS = '。!??'
|
||||
sub_start = words[0].start
|
||||
sub_text_parts = []
|
||||
for word in words:
|
||||
sub_text_parts.append(word.word)
|
||||
# 剥离标点后判断是否为句末标记
|
||||
stripped = word.word.rstrip(',、,')
|
||||
if any(stripped.endswith(m) for m in _END_MARKS):
|
||||
sub_end = word.end
|
||||
sub_text = ''.join(sub_text_parts).strip()
|
||||
if sub_text:
|
||||
segments_data.append({'start': sub_start, 'end': sub_end, 'text': sub_text})
|
||||
sub_start = word.end
|
||||
sub_text_parts = []
|
||||
# 剩余未到句末的文本
|
||||
if sub_text_parts:
|
||||
remaining = ''.join(sub_text_parts).strip()
|
||||
if remaining:
|
||||
segments_data.append({'start': sub_start, 'end': words[-1].end, 'text': remaining})
|
||||
else:
|
||||
# fallback:无 word timestamps,直接用原 segment
|
||||
segments_data.append({
|
||||
'start': seg.start,
|
||||
'end': seg.end,
|
||||
'text': seg.text.strip()
|
||||
})
|
||||
|
||||
with open(json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump({'segments': segments_data}, f, ensure_ascii=False, indent=2)
|
||||
@@ -249,59 +274,58 @@ class Pipeline:
|
||||
self.step_callback('transcribing')
|
||||
return json_paths
|
||||
|
||||
def step_correct_titles(self, json_paths: List[str]) -> List[Dict[str, Any]]:
|
||||
def _recalculate_title_segments_from_transcript(
|
||||
self,
|
||||
clips: List[Dict],
|
||||
json_paths: List[str]
|
||||
) -> None:
|
||||
"""
|
||||
Step 3: LLM标题纠正
|
||||
用 transcript 数据重新计算重叠片段的 title_segments 切分点。
|
||||
|
||||
Args:
|
||||
json_paths: JSON文件路径列表
|
||||
|
||||
Returns:
|
||||
corrected_clips: 纠正后的片段配置列表
|
||||
重叠片段的 switch_offset 应该按 transcript 中第二个标题关键词
|
||||
首次出现的时间来算,而不是按 clip 边界。
|
||||
"""
|
||||
self.step_callback('title_correcting')
|
||||
self.progress_callback('title_correcting', 0, "开始标题纠正...")
|
||||
for i, clip in enumerate(clips):
|
||||
ts = clip.get('title_segments')
|
||||
if not ts or len(ts) < 2:
|
||||
continue
|
||||
|
||||
corrected_clips = []
|
||||
total = len(self.clips)
|
||||
# 取第二个标题段 [title, offset]
|
||||
second_title, old_offset = ts[1]
|
||||
json_path = json_paths[i] if i < len(json_paths) else None
|
||||
if not json_path or not os.path.exists(json_path):
|
||||
continue
|
||||
|
||||
for i, (clip, json_path) in enumerate(zip(self.clips, json_paths), 1):
|
||||
original_title = clip.get('title', f'Clip {i}')
|
||||
|
||||
# 读取转录文本
|
||||
transcript_text = ''
|
||||
if json_path and os.path.exists(json_path):
|
||||
try:
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
transcript_text = ' '.join(seg.get('text', '') for seg in data.get('segments', []))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# LLM纠正标题
|
||||
corrected_title = original_title
|
||||
if transcript_text and self.config.get('api_key'):
|
||||
try:
|
||||
corrected_title = self.llm_client.correct_title(
|
||||
transcript_text,
|
||||
original_title,
|
||||
[c.get('title', '') for c in self.clips]
|
||||
) or original_title
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM title correction failed for clip {i}: {e}")
|
||||
# 在 transcript 中搜索 second_title 的首次出现时间
|
||||
first_time = None
|
||||
for seg in data.get('segments', []):
|
||||
for word_info in seg.get('words', []):
|
||||
w = word_info.get('word', '')
|
||||
# 关键词匹配(标题可能含多字符,取子串)
|
||||
if second_title and second_title in w:
|
||||
first_time = word_info['start']
|
||||
break
|
||||
if first_time is not None:
|
||||
break
|
||||
|
||||
corrected_clip = {
|
||||
'index': i - 1,
|
||||
'title': corrected_title,
|
||||
'original_title': original_title,
|
||||
'start': clip['start'],
|
||||
'end': clip['end'],
|
||||
}
|
||||
corrected_clips.append(corrected_clip)
|
||||
|
||||
percent = int((i / total) * 100)
|
||||
self.progress_callback('title_correcting', percent, f"纠正标题 {i}/{total}")
|
||||
|
||||
self.progress_callback('title_correcting', 100, "标题纠正完成")
|
||||
self.step_callback('title_correcting')
|
||||
return corrected_clips
|
||||
if first_time is not None:
|
||||
new_offset = first_time
|
||||
clip['title_segments'][1][1] = new_offset
|
||||
logger.info(
|
||||
f" clip{i+1} title_segments: "
|
||||
f"'{second_title}' 从 {old_offset:.2f}s → {new_offset:.2f}s"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f" clip{i+1} title_segments: "
|
||||
f"未在 transcript 中找到 '{second_title}',保留原 offset {old_offset:.2f}s"
|
||||
)
|
||||
|
||||
def step_generate_subtitles(self, corrected_clips: List[Dict], json_paths: List[str]) -> tuple:
|
||||
"""
|
||||
@@ -327,6 +351,7 @@ class Pipeline:
|
||||
'start': clip['start'],
|
||||
'end': clip['end'],
|
||||
'title': clip.get('title', clip.get('original_title', '')),
|
||||
'title_segments': clip.get('title_segments'), # 可能为None
|
||||
}
|
||||
clip_configs.append(clip_config)
|
||||
|
||||
@@ -357,6 +382,39 @@ class Pipeline:
|
||||
self.step_callback('generating_subtitles')
|
||||
return title_path, content_path
|
||||
|
||||
def step_correct_subtitles(self, title_path: str, content_path: str) -> str:
|
||||
"""
|
||||
Step 4.5: LLM纠正字幕内容
|
||||
|
||||
参考title.srt(时间轴锚点)和PPT原文(术语参考),
|
||||
修正content.srt中的错字、漏字、术语错误。
|
||||
|
||||
Args:
|
||||
title_path: 标题字幕路径
|
||||
content_path: 内容字幕路径
|
||||
|
||||
Returns:
|
||||
修正后的content_path
|
||||
"""
|
||||
ppt_text = self.config.get('ppt_text', '')
|
||||
if not ppt_text:
|
||||
logger.warning("PPT原文为空,跳过字幕纠正步骤")
|
||||
return content_path
|
||||
|
||||
self.step_callback('correcting_subtitles')
|
||||
self.progress_callback('correcting_subtitles', 0, "开始纠正字幕...")
|
||||
|
||||
corrected_path = correct_subtitles_llm(
|
||||
title_path=title_path,
|
||||
content_path=content_path,
|
||||
ppt_text=ppt_text,
|
||||
llm_client=self.llm_client,
|
||||
)
|
||||
|
||||
self.progress_callback('correcting_subtitles', 100, "字幕纠正完成")
|
||||
self.step_callback('correcting_subtitles')
|
||||
return corrected_path
|
||||
|
||||
def step_merge(self, clip_paths: List[str]) -> str:
|
||||
"""
|
||||
Step 5: 合并视频
|
||||
@@ -411,7 +469,7 @@ class Pipeline:
|
||||
title_path,
|
||||
content_path,
|
||||
final_path,
|
||||
title_fontsize=video_params.get('title_fontsize', 90),
|
||||
title_fontsize=video_params.get('title_fontsize', 60),
|
||||
title_color=video_params.get('title_color', 'FFFF00'),
|
||||
subtitle_fontsize=video_params.get('subtitle_fontsize', 24),
|
||||
subtitle_color=video_params.get('subtitle_color', 'FFFFFF')
|
||||
@@ -447,17 +505,14 @@ class Pipeline:
|
||||
# Step 2: 转录
|
||||
json_paths = self.step_transcribe(clip_paths)
|
||||
|
||||
# Step 3: 标题纠正
|
||||
corrected_clips = self.step_correct_titles(json_paths)
|
||||
# Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
|
||||
self._recalculate_title_segments_from_transcript(self.clips, json_paths)
|
||||
|
||||
# Step 4: 生成字幕
|
||||
title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
|
||||
|
||||
# Step 5: 合并
|
||||
# Step 3-6: 生成字幕、纠正、合并、烧录
|
||||
title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
|
||||
corrected_content_path = self.step_correct_subtitles(title_path, content_path)
|
||||
merged_path = self.step_merge(clip_paths)
|
||||
|
||||
# Step 6: 烧录
|
||||
final_path = self.step_burn(merged_path, title_path, content_path)
|
||||
final_path = self.step_burn(merged_path, title_path, corrected_content_path)
|
||||
|
||||
logger.info(f"Pipeline completed: {final_path}")
|
||||
return final_path
|
||||
@@ -474,23 +529,25 @@ class Pipeline:
|
||||
"""
|
||||
logger.info(f"Pipeline starting with user confirmation: {len(self.clips)} clips")
|
||||
|
||||
# Step 1-3: 同上
|
||||
# Step 1-2: 提取+转录
|
||||
clip_paths = self.step_extract()
|
||||
if not clip_paths:
|
||||
raise RuntimeError("No clips extracted")
|
||||
|
||||
json_paths = self.step_transcribe(clip_paths)
|
||||
corrected_clips = self.step_correct_titles(json_paths)
|
||||
|
||||
# Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
|
||||
self._recalculate_title_segments_from_transcript(self.clips, json_paths)
|
||||
|
||||
# 应用用户确认的标题
|
||||
for i, confirmed in enumerate(confirmed_titles):
|
||||
if i < len(corrected_clips):
|
||||
corrected_clips[i]['title'] = confirmed.get('title', corrected_clips[i]['title'])
|
||||
if i < len(self.clips):
|
||||
self.clips[i]['title'] = confirmed.get('title', self.clips[i].get('title', ''))
|
||||
|
||||
# Step 4-6: 同上
|
||||
title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
|
||||
# Step 3-6: 生成字幕、纠正、合并、烧录
|
||||
title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
|
||||
corrected_content_path = self.step_correct_subtitles(title_path, content_path)
|
||||
merged_path = self.step_merge(clip_paths)
|
||||
final_path = self.step_burn(merged_path, title_path, content_path)
|
||||
final_path = self.step_burn(merged_path, title_path, corrected_content_path)
|
||||
|
||||
logger.info(f"Pipeline completed: {final_path}")
|
||||
return final_path
|
||||
|
||||
Reference in New Issue
Block a user