refactor: extract config.py, add burn_only, fix title_segments and font size

- Extract all path/API config to config.py (single source of truth) - Add run.py / burn_only.py / run.bat / burn.bat entry points - burn_only: skip transcription/subtitle gen, fast reburn existing SRTs - Fix title_segments: use transcript keyword time for split point - Fix subtitle: each overlapping title shows max title_duration (not full clip) - Fix burn_only font size: default from 90 to 60 - Delete old run_lesson1.bat/py, temp debug scripts - Update README, ARCHITECTURE, CHANGELOG, add USAGE.md
2026-05-03 23:22:10 +08:00
parent cf5004cf6a
commit aad1548348
39 changed files with 826 additions and 556 deletions
@@ -12,7 +12,7 @@ import logging
 from typing import Callable, Optional, List, Dict, Any

 from .video import extract_clip, merge_clips, burn_dual_subtitles
-from .subtitle import SubtitlePipeline
+from .subtitle import SubtitlePipeline, correct_subtitles_llm
 from .llm import LLMClient
 from .corrections import apply_all_corrections, load_term_corrections_from_config
 from .utils import ensure_dir
@@ -223,16 +223,41 @@ class Pipeline:
            self.progress_callback('transcribing', int((i/total)*90), f"转录片段 {i}/{total}")

            try:
-                segments, _ = model.transcribe(clip_path, language='zh', beam_size=5)
+                segments, _ = model.transcribe(clip_path, language='zh', beam_size=5, word_timestamps=True)

-                # 保存转录结果
+                # 保存转录结果（按句末标点进一步切分）
                segments_data = []
                for seg in segments:
-                    segments_data.append({
-                        'start': seg.start,
-                        'end': seg.end,
-                        'text': seg.text.strip()
-                    })
+                    words = seg.words if hasattr(seg, 'words') else []
+                    if words:
+                        # 用 word-level 时间戳在句末标点处切分
+                        # 注意：标点可能附着在词后（如"吗?"、"奏,"），需 strip 后判断
+                        _END_MARKS = '。！？?'
+                        sub_start = words[0].start
+                        sub_text_parts = []
+                        for word in words:
+                            sub_text_parts.append(word.word)
+                            # 剥离标点后判断是否为句末标记
+                            stripped = word.word.rstrip('，、,')
+                            if any(stripped.endswith(m) for m in _END_MARKS):
+                                sub_end = word.end
+                                sub_text = ''.join(sub_text_parts).strip()
+                                if sub_text:
+                                    segments_data.append({'start': sub_start, 'end': sub_end, 'text': sub_text})
+                                sub_start = word.end
+                                sub_text_parts = []
+                        # 剩余未到句末的文本
+                        if sub_text_parts:
+                            remaining = ''.join(sub_text_parts).strip()
+                            if remaining:
+                                segments_data.append({'start': sub_start, 'end': words[-1].end, 'text': remaining})
+                    else:
+                        # fallback：无 word timestamps，直接用原 segment
+                        segments_data.append({
+                            'start': seg.start,
+                            'end': seg.end,
+                            'text': seg.text.strip()
+                        })

                with open(json_path, 'w', encoding='utf-8') as f:
                    json.dump({'segments': segments_data}, f, ensure_ascii=False, indent=2)
@@ -249,59 +274,58 @@ class Pipeline:
        self.step_callback('transcribing')
        return json_paths

-    def step_correct_titles(self, json_paths: List[str]) -> List[Dict[str, Any]]:
+    def _recalculate_title_segments_from_transcript(
+        self,
+        clips: List[Dict],
+        json_paths: List[str]
+    ) -> None:
        """
-        Step 3: LLM标题纠正
+        用 transcript 数据重新计算重叠片段的 title_segments 切分点。

-        Args:
-            json_paths: JSON文件路径列表
-
-        Returns:
-            corrected_clips: 纠正后的片段配置列表
+        重叠片段的 switch_offset 应该按 transcript 中第二个标题关键词
+        首次出现的时间来算，而不是按 clip 边界。
        """
-        self.step_callback('title_correcting')
-        self.progress_callback('title_correcting', 0, "开始标题纠正...")
+        for i, clip in enumerate(clips):
+            ts = clip.get('title_segments')
+            if not ts or len(ts) < 2:
+                continue

-        corrected_clips = []
-        total = len(self.clips)
+            # 取第二个标题段 [title, offset]
+            second_title, old_offset = ts[1]
+            json_path = json_paths[i] if i < len(json_paths) else None
+            if not json_path or not os.path.exists(json_path):
+                continue

-        for i, (clip, json_path) in enumerate(zip(self.clips, json_paths), 1):
-            original_title = clip.get('title', f'Clip {i}')
-
-            # 读取转录文本
-            transcript_text = ''
-            if json_path and os.path.exists(json_path):
+            try:
                with open(json_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
-                transcript_text = ' '.join(seg.get('text', '') for seg in data.get('segments', []))
+            except Exception:
+                continue

-            # LLM纠正标题
-            corrected_title = original_title
-            if transcript_text and self.config.get('api_key'):
-                try:
-                    corrected_title = self.llm_client.correct_title(
-                        transcript_text,
-                        original_title,
-                        [c.get('title', '') for c in self.clips]
-                    ) or original_title
-                except Exception as e:
-                    logger.warning(f"LLM title correction failed for clip {i}: {e}")
+            # 在 transcript 中搜索 second_title 的首次出现时间
+            first_time = None
+            for seg in data.get('segments', []):
+                for word_info in seg.get('words', []):
+                    w = word_info.get('word', '')
+                    # 关键词匹配（标题可能含多字符，取子串）
+                    if second_title and second_title in w:
+                        first_time = word_info['start']
+                        break
+                if first_time is not None:
+                    break

-            corrected_clip = {
-                'index': i - 1,
-                'title': corrected_title,
-                'original_title': original_title,
-                'start': clip['start'],
-                'end': clip['end'],
-            }
-            corrected_clips.append(corrected_clip)
-
-            percent = int((i / total) * 100)
-            self.progress_callback('title_correcting', percent, f"纠正标题 {i}/{total}")
-
-        self.progress_callback('title_correcting', 100, "标题纠正完成")
-        self.step_callback('title_correcting')
-        return corrected_clips
+            if first_time is not None:
+                new_offset = first_time
+                clip['title_segments'][1][1] = new_offset
+                logger.info(
+                    f"  clip{i+1} title_segments: "
+                    f"'{second_title}' 从 {old_offset:.2f}s → {new_offset:.2f}s"
+                )
+            else:
+                logger.warning(
+                    f"  clip{i+1} title_segments: "
+                    f"未在 transcript 中找到 '{second_title}'，保留原 offset {old_offset:.2f}s"
+                )

    def step_generate_subtitles(self, corrected_clips: List[Dict], json_paths: List[str]) -> tuple:
        """
@@ -327,6 +351,7 @@ class Pipeline:
                'start': clip['start'],
                'end': clip['end'],
                'title': clip.get('title', clip.get('original_title', '')),
+                'title_segments': clip.get('title_segments'),  # 可能为None
            }
            clip_configs.append(clip_config)

@@ -357,6 +382,39 @@ class Pipeline:
        self.step_callback('generating_subtitles')
        return title_path, content_path

+    def step_correct_subtitles(self, title_path: str, content_path: str) -> str:
+        """
+        Step 4.5: LLM纠正字幕内容
+
+        参考title.srt（时间轴锚点）和PPT原文（术语参考），
+        修正content.srt中的错字、漏字、术语错误。
+
+        Args:
+            title_path: 标题字幕路径
+            content_path: 内容字幕路径
+
+        Returns:
+            修正后的content_path
+        """
+        ppt_text = self.config.get('ppt_text', '')
+        if not ppt_text:
+            logger.warning("PPT原文为空，跳过字幕纠正步骤")
+            return content_path
+
+        self.step_callback('correcting_subtitles')
+        self.progress_callback('correcting_subtitles', 0, "开始纠正字幕...")
+
+        corrected_path = correct_subtitles_llm(
+            title_path=title_path,
+            content_path=content_path,
+            ppt_text=ppt_text,
+            llm_client=self.llm_client,
+        )
+
+        self.progress_callback('correcting_subtitles', 100, "字幕纠正完成")
+        self.step_callback('correcting_subtitles')
+        return corrected_path
+
    def step_merge(self, clip_paths: List[str]) -> str:
        """
        Step 5: 合并视频
@@ -411,7 +469,7 @@ class Pipeline:
            title_path,
            content_path,
            final_path,
-            title_fontsize=video_params.get('title_fontsize', 90),
+            title_fontsize=video_params.get('title_fontsize', 60),
            title_color=video_params.get('title_color', 'FFFF00'),
            subtitle_fontsize=video_params.get('subtitle_fontsize', 24),
            subtitle_color=video_params.get('subtitle_color', 'FFFFFF')
@@ -447,17 +505,14 @@ class Pipeline:
        # Step 2: 转录
        json_paths = self.step_transcribe(clip_paths)

-        # Step 3: 标题纠正
-        corrected_clips = self.step_correct_titles(json_paths)
+        # Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
+        self._recalculate_title_segments_from_transcript(self.clips, json_paths)

-        # Step 4: 生成字幕
-        title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
-
-        # Step 5: 合并
+        # Step 3-6: 生成字幕、纠正、合并、烧录
+        title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
+        corrected_content_path = self.step_correct_subtitles(title_path, content_path)
        merged_path = self.step_merge(clip_paths)
-
-        # Step 6: 烧录
-        final_path = self.step_burn(merged_path, title_path, content_path)
+        final_path = self.step_burn(merged_path, title_path, corrected_content_path)

        logger.info(f"Pipeline completed: {final_path}")
        return final_path
@@ -474,23 +529,25 @@ class Pipeline:
        """
        logger.info(f"Pipeline starting with user confirmation: {len(self.clips)} clips")

-        # Step 1-3: 同上
+        # Step 1-2: 提取+转录
        clip_paths = self.step_extract()
        if not clip_paths:
            raise RuntimeError("No clips extracted")
-
        json_paths = self.step_transcribe(clip_paths)
-        corrected_clips = self.step_correct_titles(json_paths)
+
+        # Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
+        self._recalculate_title_segments_from_transcript(self.clips, json_paths)

        # 应用用户确认的标题
        for i, confirmed in enumerate(confirmed_titles):
-            if i < len(corrected_clips):
-                corrected_clips[i]['title'] = confirmed.get('title', corrected_clips[i]['title'])
+            if i < len(self.clips):
+                self.clips[i]['title'] = confirmed.get('title', self.clips[i].get('title', ''))

-        # Step 4-6: 同上
-        title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
+        # Step 3-6: 生成字幕、纠正、合并、烧录
+        title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
+        corrected_content_path = self.step_correct_subtitles(title_path, content_path)
        merged_path = self.step_merge(clip_paths)
-        final_path = self.step_burn(merged_path, title_path, content_path)
+        final_path = self.step_burn(merged_path, title_path, corrected_content_path)

        logger.info(f"Pipeline completed: {final_path}")
        return final_path