refactor: extract config.py, add burn_only, fix title_segments and font size

- Extract all path/API config to config.py (single source of truth) - Add run.py / burn_only.py / run.bat / burn.bat entry points - burn_only: skip transcription/subtitle gen, fast reburn existing SRTs - Fix title_segments: use transcript keyword time for split point - Fix subtitle: each overlapping title shows max title_duration (not full clip) - Fix burn_only font size: default from 90 to 60 - Delete old run_lesson1.bat/py, temp debug scripts - Update README, ARCHITECTURE, CHANGELOG, add USAGE.md
2026-05-03 23:22:10 +08:00
parent cf5004cf6a
commit aad1548348
39 changed files with 826 additions and 556 deletions
@@ -26,40 +26,50 @@ cp config.ini.example config.ini
 pip install -r requirements.txt
 ```
-### 3. 运行
+### 3. 配置
 编辑 `config.py` 中的视频路径、PPT路径、API Key 等。所有配置集中在一个文件。
 ### 4. 运行
 **完整流程（首次运行）：**
 ```bash
 .\run.bat
 ```
 **快速烧录（仅修改字幕后重烧）：**
 ```bash
 .\burn.bat
 ```
 **GUI（推荐）：**
 ```bash
 .\start.bat
 ```
 **CLI：**
 ```bash
 .\run_lesson1.bat
 ```
 或通用方式：
 ```bash
 python src/cli.py --video video.mp4 --ppt presentation.pptx --output ./output
 ```
 ## 项目结构
 ```
 lesson-highlights/
 ├── config.py              # 统一配置（修改这里）
 ├── run.py                 # 完整流水线
 ├── burn_only.py           # 快速烧录（跳过转录/字幕生成）
 ├── run.bat                # 运行完整流程
 ├── burn.bat               # 快速重烧字幕
 ├── src/
-│   ├── main.py          # GUI 入口
+│   ├── main.py            # GUI 入口
-│   ├── gui.py           # GUI（参数输入，调用底层）
+│   ├── gui.py             # GUI（参数输入，调用底层）
-│   ├── cli.py           # CLI 入口
+│   ├── cli.py             # CLI 入口
-│   └── core/            # 共享底层
+│   └── core/              # 共享底层
-│       ├── ppt_parser.py    # PPT 解析 + clips 生成
+│       ├── ppt_parser.py  # PPT 解析 + clips 生成
-│       ├── pipeline.py      # 视频处理流水线
+│       ├── pipeline.py    # 视频处理流水线
-│       ├── subtitle.py       # 字幕生成
+│       ├── subtitle.py    # 字幕生成
 │       └── ...
-├── config.ini           # API 配置（不提交 git）
+├── config.ini             # API 配置（不提交 git）
-├── config.ini.example  # 配置模板
+├── config.ini.example    # 配置模板
-├── start.bat           # 启动 GUI
+└── docs/
-└── run_lesson1.bat    # CLI 示例
+    ├── USAGE.md           # 使用指南
    └── ...
 ```
 ## 工作流程
@@ -87,10 +97,15 @@ api_key = your_api_key_here
 ```
 output/
-├── generated_config.yaml   # 生成的 clips 配置
+├── generated_config.yaml     # clips 配置（可手动修改后重新运行）
-├── clips/                  # 提取的片段视频
+├── intermediates/            # 中间文件
-├── subtitles/              # 字幕文件
+│   ├── clip*.json           # Whisper 转录结果
-└── final.mp4              # 最终输出
+│   └── clip*.mp4           # 提取的视频片段
 ├── subs/                    # 字幕文件
 │   ├── v1_title.srt        # 标题轨（可手动修改）
 │   └── v1_content.srt      # 正文字幕
 ├── concat_merged.mp4       # 合并视频
 └── final.mp4               # 最终输出
 ```
 ## 系统要求
@@ -0,0 +1,3 @@
@echo off
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\burn_only.py" %*
 pause
@@ -0,0 +1,73 @@
 # -*- coding: utf-8 -*-
 """
 快速烧录脚本 - 跳过所有转录/字幕生成步骤
 直接用已有的 clips + title.srt + content.srt 合并烧录
 用法:
    python burn_only.py
    python burn_only.py "D:\\path\\to\\output_dir"
 """
 import sys
 import os
 # 导入统一配置
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 import config
 OUTPUT = config.OUTPUT
 if len(sys.argv) > 1:
    OUTPUT = sys.argv[1]
 TITLE_SRT = os.path.join(OUTPUT, "subs", "v1_title.srt")
 CONTENT_SRT = os.path.join(OUTPUT, "subs", "v1_content.srt")
 CLIPS_DIR = os.path.join(OUTPUT, "intermediates")
 MERGED_PATH = os.path.join(OUTPUT, "concat_merged.mp4")
 print(f"[Fast Burn Mode]")
 print(f"Output: {OUTPUT}")
 print()
 # 检查必要文件
 if not os.path.exists(TITLE_SRT):
    print(f"ERROR: title.srt not found\n{TITLE_SRT}")
    sys.exit(1)
 if not os.path.exists(CONTENT_SRT):
    print(f"ERROR: content.srt not found\n{CONTENT_SRT}")
    sys.exit(1)
 # 导入 pipeline（src 目录）
 src_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
 sys.path.insert(0, src_dir)
 from core import Pipeline
 # 构造 minimal config（只需要 output_dir 和 video_params）
 pipeline_config = {
    'output_dir': OUTPUT,
    'clips': [],
    'video_src': None,
    'video_params': {},
    'term_corrections': {},
    'api_key': '',
    'api_host': '',
 }
 pipeline = Pipeline(pipeline_config)
 # 合并视频（如需要）
 if os.path.exists(MERGED_PATH):
    print(f"Found existing merged video: {MERGED_PATH}")
    merged_path = MERGED_PATH
 else:
    import glob
    clip_files = sorted(glob.glob(os.path.join(CLIPS_DIR, "clip*.mp4")))
    if not clip_files:
        print(f"ERROR: No clip videos found\n{CLIPS_DIR}\\clip*.mp4")
        sys.exit(1)
    print(f"Merging {len(clip_files)} clips...")
    merged_path = pipeline.step_merge(clip_files)
    print(f"Merged: {merged_path}")
 # 烧录
 print("Burning subtitles...")
 final_path = pipeline.step_burn(merged_path, TITLE_SRT, CONTENT_SRT)
 print(f"\nDone: {final_path}")
@@ -0,0 +1,24 @@
 # -*- coding: utf-8 -*-
 """
 统一配置 - 修改这里即可，不要改 run.py / burn_only.py / *.bat
 所有路径和 API 配置集中管理。
 """
 import os
 # ========== 路径配置 ==========
 VIDEO = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\直播回放-03月18日.mp4"
 PPT = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
 OUTPUT = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full"
 LOG_FILE = r"D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt"
 # ========== 运行参数 ==========
 MAX_TOTAL_DURATION = 600  # 精华片段总时长上限（秒）
 # ========== API 配置 ==========
 API_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3"
 API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
 # ========== 环境（一般不改）==========
 PYTHON = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
 CLI_DIR = os.path.dirname(os.path.abspath(__file__))  # 本文件所在目录
@@ -10,25 +10,30 @@
 ```
 lesson-highlights/
 ├── config.py              # 统一配置（所有路径/API只改这里）
 ├── run.py                 # 完整流水线入口
 ├── burn_only.py           # 快速烧录入口（跳过转录/字幕生成）
 ├── run.bat                # 运行完整流程
 ├── burn.bat               # 快速重烧字幕
 ├── src/
-│   ├── main.py              # GUI 入口
+│   ├── main.py            # GUI 入口
-│   ├── gui.py               # GUI（参数输入 → 调用底层）
+│   ├── gui.py             # GUI（参数输入 → 调用底层）
-│   ├── cli.py               # CLI 入口
+│   ├── cli.py             # CLI 入口
-│   └── core/                # 共享底层
+│   └── core/              # 共享底层
 │       ├── __init__.py
-│       ├── ppt_parser.py    # PPT 解析 + LLM clips 提取
+│       ├── ppt_parser.py  # PPT 解析 + LLM clips 提取
-│       ├── pipeline.py      # 视频处理流水线
+│       ├── pipeline.py    # 视频处理流水线
-│       ├── subtitle.py      # 字幕生成
+│       ├── subtitle.py    # 字幕生成
-│       ├── video.py         # 视频处理（提取/合并/烧录）
+│       ├── video.py       # 视频处理（提取/合并/烧录）
-│       ├── llm.py           # LLM 调用
+│       ├── llm.py         # LLM 调用
-│       ├── corrections.py    # 术语纠正
+│       ├── corrections.py  # 术语纠正
-│       ├── constants.py     # 常量配置
+│       ├── constants.py   # 常量配置
-│       └── errors.py        # 错误处理
+│       └── errors.py      # 错误处理
-├── config.ini               # API 配置（不提交 git）
+├── config.ini             # API 配置（不提交 git）
-├── config.ini.example      # 配置模板
+├── config.ini.example    # 配置模板
-├── start.bat               # GUI 启动器
+├── start.bat             # GUI 启动器
-├── run.bat                 # 通用 CLI 启动器
+└── docs/
-└── run_lesson1.bat        # 预设课程示例
+    └── USAGE.md          # 使用指南
 ```
 ## 3. 核心模块
@@ -5,31 +5,32 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## [版本号] - 日期
+## [Unreleased]
 ### Added
- 新功能
+- `docs/USAGE.md` - 使用指南（run.bat / burn.bat / 修改知识点流程）
 - `config.py` - 统一配置文件，所有路径和 API 配置集中管理
 - `run.py` / `burn_only.py` - 独立入口脚本
 - `--resume-from-burn` CLI 参数 - 快速烧录模式，跳过所有转录/字幕生成步骤
 ### Changed
- 功能变更
+- `run.bat` / `burn.bat` 替代原有的 `run_lesson1.bat`（不再需要改多处配置）
 - `ppt_parser.py`: 重叠片段的 `title_segments` 用 transcript 关键词首次出现时间计算切分点
 - `pipeline.py`: 新增 `_recalculate_title_segments_from_transcript()`，在转录完成后用实际 transcript 数据修正标题切换时间
 - `subtitle.py`: 多标题片段中每个标题最多显示 `title_duration` 秒（原逻辑会一直显示到片段结束）
 - `pipeline.py`: `step_burn` 的 `title_fontsize` 默认值从 90 改为 60
 ### Fixed
- 问题修复
+- `ppt_parser.py`: 不重叠的 clip 残留 `title_segments` 导致标题显示时长错误
-
+- `subtitle.py`: 重叠片段第二个标题显示时长超过 `title_duration`
-### Deprecated
+- `pipeline.py`: 快速烧录模式因 `video_params` 为空导致字号使用默认值 90 而非 60
 - 弃用功能
 ### Removed
- 移除的功能
+- `run_lesson1.bat` / `run_lesson1.py` - 旧入口，已由 `config.py` + `run.bat` / `burn.bat` 替代
 ### Security
 - 安全相关
 ---
-## 示例
+## [1.0.0] - 2026-05-02
 ### [1.0.0] - 2026-05-02
 ### Added
 - 初始版本发布
@@ -0,0 +1,117 @@
 # 使用指南
 ## 快速开始
 ### 1. 配置
 编辑项目根目录的 `config.py`：
 ```python
 VIDEO = r"D:\...\直播回放.mp4"
 PPT = r"D:\...\课程.pptx"
 OUTPUT = r"D:\...\output"
 MAX_TOTAL_DURATION = 600  # 精华片段总时长上限（秒）
 API_KEY = "your-api-key"
 API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
 ```
 所有路径和 API 配置只改这一个文件。
 ### 2. 完整流程（首次运行）
 ```bash
 run.bat
 ```
 或直接：
 ```bash
 python run.py
 ```
 完整流程：PPT解析 → Whisper转录 → LLM校正 → 字幕生成 → 合并 → 烧录
 ### 3. 修改字幕后快速重烧
 改完 `v1_title.srt` 或 `v1_content.srt` 后，直接：
 ```bash
 burn.bat
 ```
 跳过所有转录/字幕生成步骤，直接用已有片段和字幕文件合并烧录。**只改字幕文本时用这个**。
 ## 修改知识点（替换PPT中的某个知识点）
 LLM 从 PPT 提取了 clip 后，如果你想把其中一个换成 PPT 里另一个知识点（比如把"音高"换成"旋律"）：
 ### 步骤
 1. **改 `generated_config.yaml`**：把对应 clip 的 title 改成新知识点名称
 ```yaml
 clips:
  - title: 旋律    # ← 改成PPT里有的知识点
    start: 200
    end: 260
 ```
 2. **删该 clip 的中间文件**（让它重新生成）：
 ```
 intermediates/clip5.json    ← 删掉
 intermediates/clip5.mp4    ← 删掉
 ```
 3. **重新运行**：
 ```bash
 run.bat
 ```
 系统会跳过其他已有 JSON 的 clip，只重新生成被删除了 JSON 的那一个 clip。
 ### 原理
 - `run.bat` 检测到 `clip*.json` 已存在，就跳过 Whisper 转录
 - 删掉某个 clip 的 JSON 后，系统认为它需要重新生成
 - 重新生成时用新的 title 去 transcript 里匹配，重新找时间范围
 ### 注意
 - `start`/`end` 如果填错了，生成的视频片段时间会不对
 - 如果不确定新知识点的时间范围，可以先随便填一个，跑完看效果再调整
 ## 文件结构
 ```
 output/
 ├── generated_config.yaml   # clips 配置（可手动修改）
 ├── intermediates/         # 中间文件（可删除特定clip的.json/.mp4重生成）
 │   ├── clip1.json        # Whisper 转录结果
 │   ├── clip1.mp4         # 提取的视频片段
 │   └── ...
 ├── subs/                 # 字幕文件
 │   ├── v1_title.srt      # 标题轨（可手动修改文本+时间轴）
 │   └── v1_content.srt    # 正文字幕
 ├── concat_merged.mp4     # 合并后的视频
 └── final.mp4             # 最终输出
 ```
 ## 命令对比
 | 命令 | 用途 | 耗时 |
 |------|------|------|
 | `run.bat` | 完整流程（PPT→视频） | 几十分钟 |
 | `burn.bat` | 只改字幕后快速重烧 | 几分钟 |
 ## 常见问题
 **Q: `burn.bat` 改了字号没变化？**
 A: `burn.bat` 直接烧已有的 SRT 文件，不走 `subtitle.py` 的生成逻辑。如果改了渲染参数（如字号）需要重新生成字幕，必须 `run.bat`。
 **Q: 想改某个知识点的出现时间？**
 A: 直接改 `v1_title.srt` 里的时间轴，或者改 `generated_config.yaml` 然后删对应 clip 的 JSON 重新生成。
 **Q: 想删掉某个 clip？**
 A: 从 `generated_config.yaml` 里删掉那一条，然后删对应 `intermediates/clip*.json` 和 `clip*.mp4`，最后 `run.bat`。
@@ -0,0 +1,3 @@
@echo off
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\run.py"
 pause
@@ -0,0 +1,36 @@
 # -*- coding: utf-8 -*-
 """
 完整流水线 - 从 PPT 解析到最终视频输出
 配置统一在 config.py 中管理。
 """
 import sys
 import os
 import subprocess
 # 导入统一配置
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 import config
 env = os.environ.copy()
 env["PATH"] = os.path.dirname(config.PYTHON) + ";" + env.get("PATH", "")
 cmd = [
    config.PYTHON,
    os.path.join(config.CLI_DIR, "src", "cli.py"),
    "--video", config.VIDEO,
    "--ppt", config.PPT,
    "--output", config.OUTPUT,
    "--api-key", config.API_KEY,
    "--api-host", config.API_HOST,
    "--max-total-duration", str(config.MAX_TOTAL_DURATION),
    "--verbose",
 ]
 print(f"Running pipeline...")
 print(f"  Video: {config.VIDEO}")
 print(f"  PPT:   {config.PPT}")
 print(f"  Output: {config.OUTPUT}")
 print()
 proc = subprocess.Popen(cmd, cwd=config.CLI_DIR, env=env)
 proc.wait()
@@ -1,13 +0,0 @@
@echo off
 chcp 65001 >nul
 echo Cleaning pycache...
 rmdir /s /q "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src\__pycache__" 2>nul
 rmdir /s /q "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src\core\__pycache__" 2>nul
 echo Cache cleaned.
 echo.
 echo Running CLI...
 del "D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt" 2>nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\run_lesson1.py"
 echo.
 echo Exit: %errorlevel%
 pause
@@ -1,42 +0,0 @@
 import sys
 import os
 import subprocess
 VIDEO = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\直播回放-03月18日.mp4"
 PPT = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
 OUTPUT = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full"
 PYTHON = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
 CLI_DIR = r"D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src"
 API_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3"
 API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
 LOG_FILE = r"D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt"
 env = os.environ.copy()
 env["PATH"] = r"D:\ProgramData\anaconda3\envs\py312_cuda;" + env.get("PATH", "")
 cmd = [
    PYTHON,
    os.path.join(CLI_DIR, "cli.py"),
    "--video", VIDEO,
    "--ppt", PPT,
    "--output", OUTPUT,
    "--api-key", API_KEY,
    "--api-host", API_HOST,
    "--verbose"
 ]
 print("Starting CLI...")
 print(f"Video: {VIDEO}")
 print(f"PPT: {PPT}")
 print(f"Log: {LOG_FILE}")
 proc = subprocess.Popen(cmd, cwd=CLI_DIR, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8', errors='replace')
 with open(LOG_FILE, 'w', encoding='utf-8') as log:
    for line in proc.stdout:
        log.write(line)
        log.flush()
        print(line, end='')
 proc.wait()
 print(f"\nExit code: {proc.returncode}")
@@ -60,8 +60,12 @@ def parse_args():
                        help='LLM API地址')
    parser.add_argument('--whisper-model', type=str, default='large',
                        help='Whisper模型 (默认: large)')
    parser.add_argument('--max-total-duration', type=int, default=300,
                        help='精华片段总时长上限（秒），默认300')
    parser.add_argument('--verbose', '-V', action='store_true',
                        help='详细输出')
    parser.add_argument('--resume-from-burn', action='store_true',
                        help='快速模式：跳过所有步骤，直接用已有片段和字幕文件合并烧录（用于手动修改SRT后快速重生成）')
    return parser.parse_args()
@@ -77,7 +81,7 @@ def load_config_from_args(args) -> dict:
        'whisper_model': args.whisper_model,
        'video_params': {
            'fade_duration': 1,
-            'title_fontsize': 90,
+            'title_fontsize': 60,
            'title_color': 'FFFF00',
            'subtitle_fontsize': 24,
            'subtitle_color': 'FFFFFF',
@@ -137,8 +141,15 @@ def generate_config_from_ppt(args) -> dict:
        progress_callback=progress_callback,
        api_key=args.api_key,
        api_host=args.api_host,
        max_total_duration=args.max_total_duration,
    )
    # 补充API配置（parse_ppt_to_config不返回这些）
    if args.api_key:
        config['api_key'] = args.api_key
    if args.api_host:
        config['api_host'] = args.api_host
    # 保存生成的配置
    config_path = os.path.join(args.output, 'generated_config.yaml')
    import yaml
@@ -207,6 +218,42 @@ def main():
        pipeline = Pipeline(config)
        # 快速模式：跳过所有步骤，直接用已有片段和字幕合并烧录
        if args.resume_from_burn:
            import glob
            import shutil
            output_dir = config.get('output_dir')
            clips_dir = os.path.join(output_dir, 'clips')
            merged_dir = os.path.join(output_dir, 'merged')
            merged_path = os.path.join(merged_dir, 'merged.mp4')
            title_path = os.path.join(output_dir, 'title.srt')
            content_path = os.path.join(output_dir, 'content.srt')
            # 检查必要文件
            if not os.path.exists(title_path):
                logger.error(f"找不到 title.srt: {title_path}")
                return 1
            if not os.path.exists(content_path):
                logger.error(f"找不到 content.srt: {content_path}")
                return 1
            # 已有合并视频则直接烧录；否则先合并
            if os.path.exists(merged_path):
                logger.info(f"找到已有合并视频: {merged_path}")
            else:
                logger.info("开始合并片段...")
                clip_files = sorted(glob.glob(os.path.join(clips_dir, 'clip*.mp4')))
                if not clip_files:
                    logger.error(f"找不到片段视频: {clips_dir}/clip*.mp4")
                    return 1
                merged_path = pipeline.step_merge(clip_files)
                logger.info(f"合并完成: {merged_path}")
            logger.info("开始烧录...")
            final_path = pipeline.step_burn(merged_path, title_path, content_path)
            logger.info(f"完成! 最终视频: {final_path}")
            return 0
        logger.info("开始处理...")
        final_path = pipeline.run()
@@ -79,7 +79,7 @@ DEFAULT_OUTPUT_DIR = os.path.join(PROJECT_ROOT, "output")
 DEFAULT_VIDEO_PARAMS = {
    "fade_duration": 1,
    "title_duration": 3,
-    "title_fontsize": 90,
+    "title_fontsize": 60,
    "title_color": "FFFF00",
    "subtitle_fontsize": 24,
    "subtitle_color": "FFFFFF",
@@ -56,6 +56,8 @@ class LLMClient:
            "max_tokens": max_tokens
        }
        logger.info(f"[LLM] request chars={len(prompt)}, max_tokens={max_tokens}")
        for attempt in range(LLM_MAX_RETRIES):
            try:
                response = requests.post(url, headers=headers, json=payload, timeout=timeout)
@@ -73,6 +75,7 @@ class LLMClient:
                content = choices[0].get("message", {}).get("content", "").strip()
                if content:
                    logger.info(f"[LLM] response chars={len(content)}")
                    return content
                logger.warning(f"LLM: Empty content (attempt {attempt+1})")
@@ -88,106 +91,6 @@ class LLMClient:
        return None
    def correct_title(self, transcript_text, original_title, all_titles=None):
        """
        使用LLM纠正标题
        Args:
            transcript_text: 字幕文本
            original_title: 原始标题
            all_titles: 所有标题列表
        Returns:
            纠正后的标题
        """
        titles_str = ", ".join(all_titles[:20]) if all_titles else "无"
        prompt = f"""你是一个钢琴教学视频的标题验证专家。
 PPT提取的标题：{original_title}
 视频字幕内容：{transcript_text[:500] if transcript_text else "无"}
 本节课所有标题：{titles_str}
 【重要规则】
 - 只有当你有90%以上把握认为原标题错误时，才输出纠正后的标题
 - 如果原标题基本正确，即使不完美，也必须输出原标题
 - 绝对不能输出与原标题完全不同概念的词
 - 如果不确定，输出原标题
 请直接输出标题，不要添加任何解释。"""
        result = self.chat(prompt, max_tokens=50, timeout=LLM_TITLE_TIMEOUT)
        return result if result else original_title
    def validate_content(self, transcript_text, title):
        """
        使用LLM验证内容是否与标题相关
        Args:
            transcript_text: 字幕文本
            title: 标题
        Returns:
            (is_valid: bool, reason: str)
        """
        prompt = f"""判断视频字幕内容是否与标题相关。
 标题：{title}
 字幕内容：{transcript_text[:300] if transcript_text else "无"}
 判断标准：
 - 内容讨论的主题与标题概念相关 = 相关
 - 内容与标题无关（如广告、闲聊、无关话题）= 无关
 - 无法判断 = 不确定
 请直接输出：相关/无关/不确定"""
        result = self.chat(prompt, max_tokens=20, timeout=LLM_VALIDATE_TIMEOUT)
        if not result:
            return True, "error"
        if "无关" in result:
            return False, result
        elif "不确定" in result:
            return True, "uncertain"
        return True, result
    def full_text_correction(self, text, clip_title, knowledge_terms=None):
        """
        使用LLM进行全文字幕纠错
        Args:
            text: 原始字幕
            clip_title: 片段标题
            knowledge_terms: 知识点列表
        Returns:
            纠错后的字幕
        """
        knowledge_str = ", ".join(knowledge_terms[:20]) if knowledge_terms else "无"
        prompt = f"""你是一个钢琴教学视频的字幕纠错专家。
 原始字幕：{text}
 本节课片段标题：{clip_title}
 本节课知识点：{knowledge_str}
 请进行字幕纠错：
 1. 修复语音识别错误（如"羞耻"→"休止"，"副点"→"附点"，"负点"→"附点"）
 2. 修复同音字错误
 3. 保留原文的专业术语和表达方式
 4. 不要改变原文的语气和意思
 请直接输出纠错后的字幕，不要添加任何解释。"""
        result = self.chat(prompt, max_tokens=500, timeout=LLM_TIMEOUT)
        return result if result else text
 # 全局LLM客户端实例
 _llm_client = None
@@ -12,7 +12,7 @@ import logging
 from typing import Callable, Optional, List, Dict, Any
 from .video import extract_clip, merge_clips, burn_dual_subtitles
-from .subtitle import SubtitlePipeline
+from .subtitle import SubtitlePipeline, correct_subtitles_llm
 from .llm import LLMClient
 from .corrections import apply_all_corrections, load_term_corrections_from_config
 from .utils import ensure_dir
@@ -223,16 +223,41 @@ class Pipeline:
            self.progress_callback('transcribing', int((i/total)*90), f"转录片段 {i}/{total}")
            try:
-                segments, _ = model.transcribe(clip_path, language='zh', beam_size=5)
+                segments, _ = model.transcribe(clip_path, language='zh', beam_size=5, word_timestamps=True)
-                # 保存转录结果
+                # 保存转录结果（按句末标点进一步切分）
                segments_data = []
                for seg in segments:
-                    segments_data.append({
+                    words = seg.words if hasattr(seg, 'words') else []
-                        'start': seg.start,
+                    if words:
-                        'end': seg.end,
+                        # 用 word-level 时间戳在句末标点处切分
-                        'text': seg.text.strip()
+                        # 注意：标点可能附着在词后（如"吗?"、"奏,"），需 strip 后判断
-                    })
+                        _END_MARKS = '。！？?'
                        sub_start = words[0].start
                        sub_text_parts = []
                        for word in words:
                            sub_text_parts.append(word.word)
                            # 剥离标点后判断是否为句末标记
                            stripped = word.word.rstrip('，、,')
                            if any(stripped.endswith(m) for m in _END_MARKS):
                                sub_end = word.end
                                sub_text = ''.join(sub_text_parts).strip()
                                if sub_text:
                                    segments_data.append({'start': sub_start, 'end': sub_end, 'text': sub_text})
                                sub_start = word.end
                                sub_text_parts = []
                        # 剩余未到句末的文本
                        if sub_text_parts:
                            remaining = ''.join(sub_text_parts).strip()
                            if remaining:
                                segments_data.append({'start': sub_start, 'end': words[-1].end, 'text': remaining})
                    else:
                        # fallback：无 word timestamps，直接用原 segment
                        segments_data.append({
                            'start': seg.start,
                            'end': seg.end,
                            'text': seg.text.strip()
                        })
                with open(json_path, 'w', encoding='utf-8') as f:
                    json.dump({'segments': segments_data}, f, ensure_ascii=False, indent=2)
@@ -249,59 +274,58 @@ class Pipeline:
        self.step_callback('transcribing')
        return json_paths
-    def step_correct_titles(self, json_paths: List[str]) -> List[Dict[str, Any]]:
+    def _recalculate_title_segments_from_transcript(
        self,
        clips: List[Dict],
        json_paths: List[str]
    ) -> None:
        """
-        Step 3: LLM标题纠正
+        用 transcript 数据重新计算重叠片段的 title_segments 切分点。
-        Args:
+        重叠片段的 switch_offset 应该按 transcript 中第二个标题关键词
-            json_paths: JSON文件路径列表
+        首次出现的时间来算，而不是按 clip 边界。
        Returns:
            corrected_clips: 纠正后的片段配置列表
        """
-        self.step_callback('title_correcting')
+        for i, clip in enumerate(clips):
-        self.progress_callback('title_correcting', 0, "开始标题纠正...")
+            ts = clip.get('title_segments')
            if not ts or len(ts) < 2:
                continue
-        corrected_clips = []
+            # 取第二个标题段 [title, offset]
-        total = len(self.clips)
+            second_title, old_offset = ts[1]
            json_path = json_paths[i] if i < len(json_paths) else None
            if not json_path or not os.path.exists(json_path):
                continue
-        for i, (clip, json_path) in enumerate(zip(self.clips, json_paths), 1):
+            try:
            original_title = clip.get('title', f'Clip {i}')
            # 读取转录文本
            transcript_text = ''
            if json_path and os.path.exists(json_path):
                with open(json_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
-                transcript_text = ' '.join(seg.get('text', '') for seg in data.get('segments', []))
+            except Exception:
                continue
-            # LLM纠正标题
+            # 在 transcript 中搜索 second_title 的首次出现时间
-            corrected_title = original_title
+            first_time = None
-            if transcript_text and self.config.get('api_key'):
+            for seg in data.get('segments', []):
-                try:
+                for word_info in seg.get('words', []):
-                    corrected_title = self.llm_client.correct_title(
+                    w = word_info.get('word', '')
-                        transcript_text,
+                    # 关键词匹配（标题可能含多字符，取子串）
-                        original_title,
+                    if second_title and second_title in w:
-                        [c.get('title', '') for c in self.clips]
+                        first_time = word_info['start']
-                    ) or original_title
+                        break
-                except Exception as e:
+                if first_time is not None:
-                    logger.warning(f"LLM title correction failed for clip {i}: {e}")
+                    break
-            corrected_clip = {
+            if first_time is not None:
-                'index': i - 1,
+                new_offset = first_time
-                'title': corrected_title,
+                clip['title_segments'][1][1] = new_offset
-                'original_title': original_title,
+                logger.info(
-                'start': clip['start'],
+                    f"  clip{i+1} title_segments: "
-                'end': clip['end'],
+                    f"'{second_title}' 从 {old_offset:.2f}s → {new_offset:.2f}s"
-            }
+                )
-            corrected_clips.append(corrected_clip)
+            else:
-
+                logger.warning(
-            percent = int((i / total) * 100)
+                    f"  clip{i+1} title_segments: "
-            self.progress_callback('title_correcting', percent, f"纠正标题 {i}/{total}")
+                    f"未在 transcript 中找到 '{second_title}'，保留原 offset {old_offset:.2f}s"
-
+                )
        self.progress_callback('title_correcting', 100, "标题纠正完成")
        self.step_callback('title_correcting')
        return corrected_clips
    def step_generate_subtitles(self, corrected_clips: List[Dict], json_paths: List[str]) -> tuple:
        """
@@ -327,6 +351,7 @@ class Pipeline:
                'start': clip['start'],
                'end': clip['end'],
                'title': clip.get('title', clip.get('original_title', '')),
                'title_segments': clip.get('title_segments'),  # 可能为None
            }
            clip_configs.append(clip_config)
@@ -357,6 +382,39 @@ class Pipeline:
        self.step_callback('generating_subtitles')
        return title_path, content_path
    def step_correct_subtitles(self, title_path: str, content_path: str) -> str:
        """
        Step 4.5: LLM纠正字幕内容
        参考title.srt（时间轴锚点）和PPT原文（术语参考），
        修正content.srt中的错字、漏字、术语错误。
        Args:
            title_path: 标题字幕路径
            content_path: 内容字幕路径
        Returns:
            修正后的content_path
        """
        ppt_text = self.config.get('ppt_text', '')
        if not ppt_text:
            logger.warning("PPT原文为空，跳过字幕纠正步骤")
            return content_path
        self.step_callback('correcting_subtitles')
        self.progress_callback('correcting_subtitles', 0, "开始纠正字幕...")
        corrected_path = correct_subtitles_llm(
            title_path=title_path,
            content_path=content_path,
            ppt_text=ppt_text,
            llm_client=self.llm_client,
        )
        self.progress_callback('correcting_subtitles', 100, "字幕纠正完成")
        self.step_callback('correcting_subtitles')
        return corrected_path
    def step_merge(self, clip_paths: List[str]) -> str:
        """
        Step 5: 合并视频
@@ -411,7 +469,7 @@ class Pipeline:
            title_path,
            content_path,
            final_path,
-            title_fontsize=video_params.get('title_fontsize', 90),
+            title_fontsize=video_params.get('title_fontsize', 60),
            title_color=video_params.get('title_color', 'FFFF00'),
            subtitle_fontsize=video_params.get('subtitle_fontsize', 24),
            subtitle_color=video_params.get('subtitle_color', 'FFFFFF')
@@ -447,17 +505,14 @@ class Pipeline:
        # Step 2: 转录
        json_paths = self.step_transcribe(clip_paths)
-        # Step 3: 标题纠正
+        # Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
-        corrected_clips = self.step_correct_titles(json_paths)
+        self._recalculate_title_segments_from_transcript(self.clips, json_paths)
-        # Step 4: 生成字幕
+        # Step 3-6: 生成字幕、纠正、合并、烧录
-        title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
+        title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
-
+        corrected_content_path = self.step_correct_subtitles(title_path, content_path)
        # Step 5: 合并
        merged_path = self.step_merge(clip_paths)
-
+        final_path = self.step_burn(merged_path, title_path, corrected_content_path)
        # Step 6: 烧录
        final_path = self.step_burn(merged_path, title_path, content_path)
        logger.info(f"Pipeline completed: {final_path}")
        return final_path
@@ -474,23 +529,25 @@ class Pipeline:
        """
        logger.info(f"Pipeline starting with user confirmation: {len(self.clips)} clips")
-        # Step 1-3: 同上
+        # Step 1-2: 提取+转录
        clip_paths = self.step_extract()
        if not clip_paths:
            raise RuntimeError("No clips extracted")
        json_paths = self.step_transcribe(clip_paths)
-        corrected_clips = self.step_correct_titles(json_paths)
+
        # Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
        self._recalculate_title_segments_from_transcript(self.clips, json_paths)
        # 应用用户确认的标题
        for i, confirmed in enumerate(confirmed_titles):
-            if i < len(corrected_clips):
+            if i < len(self.clips):
-                corrected_clips[i]['title'] = confirmed.get('title', corrected_clips[i]['title'])
+                self.clips[i]['title'] = confirmed.get('title', self.clips[i].get('title', ''))
-        # Step 4-6: 同上
+        # Step 3-6: 生成字幕、纠正、合并、烧录
-        title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
+        title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
        corrected_content_path = self.step_correct_subtitles(title_path, content_path)
        merged_path = self.step_merge(clip_paths)
-        final_path = self.step_burn(merged_path, title_path, content_path)
+        final_path = self.step_burn(merged_path, title_path, corrected_content_path)
        logger.info(f"Pipeline completed: {final_path}")
        return final_path
@@ -17,6 +17,8 @@ import zipfile
 import logging
 from typing import List, Dict, Any, Optional, Callable, Tuple
 from .llm import LLMClient
 logger = logging.getLogger(__name__)
@@ -36,6 +38,7 @@ class PPTParser:
        api_key: Optional[str] = None,
        api_host: Optional[str] = None,
        max_clip_duration: int = 30,
        max_total_duration: int = 300,
    ):
        """
        初始化PPT解析器
@@ -48,6 +51,7 @@ class PPTParser:
            api_key: LLM API密钥
            api_host: LLM API地址
            max_clip_duration: 每个精华片段的最大时长（秒），默认30秒
            max_total_duration: 所有精华片段的总时长上限（秒），默认300秒（5分钟）
        """
        self.video_path = video_path
        self.ppt_path = ppt_path
@@ -56,6 +60,7 @@ class PPTParser:
        self.api_key = api_key
        self.api_host = api_host
        self.max_clip_duration = max_clip_duration
        self.max_total_duration = max_total_duration
        self.inter_dir = os.path.join(output_dir, 'intermediates')
        os.makedirs(self.inter_dir, exist_ok=True)
@@ -284,50 +289,19 @@ class PPTParser:
    def _call_llm(self, prompt: str, max_tokens: int = 4096, timeout: int = 300, retries: int = 3) -> Optional[str]:
        """
-        带重试的 LLM 调用。
+        使用实例的 api_key/api_host 创建 LLMClient 并调用 chat。
        Args:
            prompt: 发送给 LLM 的提示词
            max_tokens: 最大 token 数
            timeout: 单次请求超时（秒）
-            retries: 最大重试次数
+            retries: 最大重试次数（chat() 内部也有重试，这里传 retries 但 chat() 忽略它）
        Returns:
            LLM 返回的 content，失败返回 None
        """
-        import requests
+        client = LLMClient(api_key=self.api_key, api_host=self.api_host)
-        url = f"{self.api_host}/chat/completions"
+        return client.chat(prompt=prompt, max_tokens=max_tokens, timeout=timeout)
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": "doubao-seed-2.0-lite",
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": max_tokens,
            "temperature": 0.1
        }
        last_err = None
        for attempt in range(retries):
            try:
                response = requests.post(url, headers=headers, json=payload, timeout=timeout)
                response.raise_for_status()
                result = response.json()
                content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
                if content:
                    return content
                logger.warning(f"LLM返回空内容（第{attempt+1}次尝试）")
                last_err = "空内容"
            except requests.exceptions.Timeout:
                logger.warning(f"LLM请求超时（第{attempt+1}次尝试，timeout={timeout}s）")
                last_err = "超时"
            except requests.exceptions.RequestException as e:
                logger.warning(f"LLM请求失败（第{attempt+1}次尝试）: {e}")
                last_err = str(e)
        logger.error(f"LLM调用失败（已重试{retries}次）: {last_err}")
        return None
    def llm_extract_knowledge_points_from_ppt(self) -> Tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
        """
@@ -415,7 +389,7 @@ class PPTParser:
 - 一种方法：如"放松练习"、"分手练习"、"慢速练习"、"唱谱法"
 - 一个专题：如"乐理基础"、"手型要求"、"课后作业"
-【文本清理规则】（以不影响原文意思表达为前提）：
+【文本清理规则】（用于 cleaned_text，不影响知识点提取）：
 - 合并连续的空行（超过1个空行的压缩为1个）
 - 去除行首行尾多余空格
 - 保留页面之间的自然分段（每页独立段落）
@@ -423,12 +397,16 @@ class PPTParser:
 - 无标点的长句子：如果一行文字超过50字且无标点，才合并到下一行
 - 保留专有名词、术语的原始写法
-【重要规则】：
+【知识点提取规则】：
 1. 扫描全部页面：不要只找"知识点汇总页"，每页都要看
 2. 原文保留：知识点原文是什么就写什么，不要解释、概括、翻译或扩展
 3. 拆分合并：被拆分的片段（如"的三"+"种方法"、"谱号、"+"大谱表、"等）要合并为完整知识词
 4. 标题过滤：忽略"本课主要知识点"、"课程回顾"、"本节课重要知识点"等纯导航/目录类标题
-5. 分类项处理：格式如"XX：子项1、子项2、子项3"时，冒号后的每个子项各自独立成知识点；但如果冒号后是完整句子或定义（如"XX：这是指……"），则整句描述的对象本身才是知识点
+5. 列表/定义项拆分：
   - 格式为"XX：子项1，子项2，子项3"时，冒号后的每个子项各自独立成知识点
   - 格式为多行列表（如"重复：xxx\n级进：xxx\n跳进：xxx"），每行各自独立成知识点
   - 如果冒号后是完整句子或定义（如"XX：这是指……"），则整句描述的对象本身才是知识点
   - **知识点标题不得包含括号、冒号、引号等任何标点符号**，只保留核心词（如"重复（旋律进行方式）"应输出为"重复"，"音高、和弦"应输出为"音高"和"和弦"）
 6. 列表项过滤：只保留有独立含义的知识点，忽略序号、标点符号、无意义的装饰词
 7. 内容页优先：如果一个知识点在教学内容页展开讲解了，比仅出现在列表中更重要
 8. 最小粒度：宁可多输出几个独立的知识词，也不要合并成一个大而笼统的标题
@@ -668,13 +646,24 @@ class PPTParser:
        for clip in sorted_clips[1:]:
            prev = merged[-1]
            if clip['start'] < prev['end']:
-                # 重叠：prev延伸到clip的end，保留clip的标题（标题在clip原start处切换）
+                # 重叠：prev延伸到clip的end，检测标题切换
                if clip['title'] != prev['title']:
                    # 标题切换点 = clip['start'] 相对于 prev 起点的时间
                    switch_offset = clip['start'] - prev['start']
                    # 建立 title_segments
                    prev['title_segments'] = [
                        [prev['title'], 0],
                        [clip['title'], switch_offset],
                    ]
                    prev['title'] = prev['title']  # 保留第一个标题作主标题
                prev['end'] = clip['end']
                logger.info(f"  合并重叠: '{prev['title']}' 延伸至 {prev['end']}s，"
                            f"标题在 {clip['start']}s 切换为 '{clip['title']}'")
            else:
-                # 不重叠：直接添加
+                # 不重叠：直接添加，清除 title_segments（由系统默认处理）
-                merged.append(dict(clip))
+                c = dict(clip)
                c.pop('title_segments', None)
                merged.append(c)
        return merged
@@ -855,7 +844,11 @@ class PPTParser:
        # PPT参考（完整文本 + 知识点列表）
        if ppt_full_text or ppt_knowledge:
-            knowledge_lines = "\n".join([f"  - {kp['title']}" for kp in (ppt_knowledge or [])])
+            knowledge_list = ppt_knowledge or []
            # 带序号的列表，LLM 用序号引用，不许自由发挥
            knowledge_lines = "\n".join(
                [f"  [{i}] {kp['title']}" for i, kp in enumerate(knowledge_list)]
            )
            knowledge_section = f"""
 【PPT参考文本（权威背景）】
 以下是与本节课配套的PPT完整内容，请以此为权威参考：
@@ -887,14 +880,13 @@ class PPTParser:
 【重要规则】
 1. 逐条处理：必须为列表中的**每一个知识点**搜索视频转录文本，找到讲解最集中的片段
-2. **title 必须完全等于知识点列表中的原名**，不许改写、不许概括、不许扩展
+2. **输出序号而非名称**：kp_idx 必须是列表中的序号（如 0、3、7），不许自己发挥名称
-   - ✅ 正确：knowledge_point 是"弹琴的手型"，title 就用"弹琴的手型"
+   - ✅ 正确："kp_idx": 3 对应列表中第 4 项
-   - ❌ 错误：title 用"手型支撑与放松的核心要求"（自己发挥）
+   - ❌ 错误："kp_idx": "重复（旋律进行方式）"（这是自由发挥，不是序号）
-3. **knowledge_point 字段也必须用知识点列表中的原名**
+3. 时间必须精确：使用转录文本中的实际时间戳
-4. 时间必须精确：使用转录文本中的实际时间戳
+4. 时长控制：每个片段约5-15秒，重要内容可以稍长（最长不超过20秒）
-5. 时长控制：每个片段约5-15秒，重要内容可以稍长（最长不超过20秒）
+5. 总时长不超过{self.max_total_duration}秒：如果知识点太多导致总时长超标，优先保留最重要的知识点，其余在not_found中说明
-6. 总时长不超过180秒：如果知识点太多导致总时长超标，优先保留最重要的知识点，其余在not_found中说明
+6. 只输出JSON，不要添加任何解释
 7. 只输出JSON，不要添加任何解释
 【视频转录文本（带时间戳）】
 {transcript_text}
@@ -902,10 +894,10 @@ class PPTParser:
 请以以下JSON格式输出（不要输出其他内容）：
 {{
  "clips": [
-    {{"title": "知识点原名（不许改写）", "start": 开始秒数, "end": 结束秒数, "knowledge_point": "知识点原名"}},
+    {{"kp_idx": 序号, "start": 开始秒数, "end": 结束秒数}},
-    {{"title": "知识点原名", "start": 开始秒数, "end": 结束秒数, "knowledge_point": "知识点原名"}}
+    {{"kp_idx": 序号, "start": 开始秒数, "end": 结束秒数}}
  ],
-  "not_found": ["知识点原名（必须与列表中的名称完全一致）"]
+  "not_found": [序号, 序号]
 }}"""
        try:
@@ -929,31 +921,41 @@ class PPTParser:
                return None
            clips = parsed.get("clips", [])
-            not_found = parsed.get("not_found", [])
+            not_found_idxs = parsed.get("not_found", [])
-            if not clips and not not_found:
+            if not clips and not not_found_idxs:
                return None
-            # 验证和清理
+            # 通过序号映射回原始名称（序号 → 原始知识点名称）
            knowledge_list = ppt_knowledge or []
            title_map = {i: kp['title'] for i, kp in enumerate(knowledge_list)}
            # 验证和清理：序号 → 原始名称
            validated = []
            for clip in clips:
-                title = clip.get("title", "")
+                kp_idx = int(clip.get("kp_idx", -1))
                if kp_idx not in title_map:
                    logger.warning(f"  跳过无效序号 kp_idx={kp_idx}（超出范围 0-{len(title_map)-1}）")
                    continue
                title = title_map[kp_idx]
                start = max(0, float(clip.get("start", 0)))
                raw_end = float(clip.get("end", 0))
                end = min(raw_end, start + self.max_clip_duration)
                kp = clip.get("knowledge_point", "")
                validated.append({
                    "title": title,
                    "start": int(start),
                    "end": int(end),
-                    "knowledge_point": kp,
+                    "knowledge_point": title,
                })
-            logger.info(f"LLM提取成功: {len(validated)} 个片段，{len(not_found)} 个未找到")
+            # not_found 中的序号也映射回名称
            not_found_names = [title_map[i] for i in not_found_idxs if i in title_map]
            logger.info(f"LLM提取成功: {len(validated)} 个片段，{len(not_found_names)} 个未找到")
            for c in validated:
                logger.info(f"  [{c['knowledge_point']}] {c['title']}: {c['start']}s - {c['end']}s")
-            if not_found:
+            if not_found_names:
-                logger.info(f"  未找到知识点: {not_found}")
+                logger.info(f"  未找到知识点: {not_found_names}")
            return validated
@@ -1007,6 +1009,9 @@ class PPTParser:
                        }, f, ensure_ascii=False)
                    logger.info(f"已保存PPT知识点到checkpoint")
            # 保存PPT原文供后续步骤使用
            self.ppt_text = ppt_cleaned_text or ""
            # Step 3: LLM校正文本（以PPT全文为参考）- 带checkpoint复用
            self._report('parse', 30, "LLM校正文本...")
            corrected_checkpoint = os.path.join(self.inter_dir, "corrected_transcript.json")
@@ -1052,6 +1057,7 @@ class PPTParser:
            "clips": clips,
            "output_dir": self.output_dir,
            "term_corrections": self.term_corrections,
            "ppt_text": getattr(self, 'ppt_text', ''),
            "video_params": {
                "fade_duration": 1,
                "title_fontsize": 48,
@@ -228,15 +228,32 @@ class SubtitlePipeline:
            offset = offsets[i]
            clip_duration = offsets[i+1] - offsets[i] if i+1 < len(offsets) else 3
-            # 添加标题（使用title样式）- 标题显示3秒后正文才显示，避免重叠
+            # 添加标题（使用title样式）
-            title_duration = min(3, clip_duration)
+            if clip.get('title_segments'):
-            title_track.add(offset, offset + title_duration, clip['title'], style='title')
+                # 多标题片段：遍历 title_segments [(title, start_offset), ...]
                # 每个标题最多显示 title_duration 秒
                segs = clip['title_segments']
                for j, (title, seg_start) in enumerate(segs):
                    next_start = segs[j+1][1] if j+1 < len(segs) else clip_duration
                    seg_end = min(seg_start + title_duration, next_start)
                    title_track.add(
                        offset + seg_start,
                        offset + seg_end,
                        title,
                        style='title'
                    )
                # 正文字幕从最后一个标题段结束后开始
                content_start = offset + segs[-1][1]
            else:
                # 单标题：标题显示3秒后正文才显示，避免重叠
                title_duration = min(3, clip_duration)
                title_track.add(offset, offset + title_duration, clip['title'], style='title')
                content_start = offset + title_duration
            # 添加正文字幕 - 从标题结束后开始，避免重叠
            with open(json_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            content_start = offset + title_duration  # 正文从标题结束后开始
            for seg in data.get('segments', []):
                text = seg.get('text', '').strip()
                if not text:
@@ -253,12 +270,37 @@ class SubtitlePipeline:
                # 只添加在clip时间范围内的字幕
                clip_end = clip['end'] - clip['start'] + offset
                if seg_start < clip_end and seg_end <= clip_end:
-                    content_track.add(
+                    # pipeline.py 已按标点拆分，此处只处理意外超长segment（无标点且>8秒）
-                        seg_start,
+                    duration = seg_end - seg_start
-                        seg_end,
+                    if duration > 8.0:
-                        text,
+                        # 按标点拆分
-                        style='content'
+                        import re
-                    )
+                        parts = re.split(r'(?<=[。！？?！])', text)
                        if len(parts) > 1:
                            total_len = sum(len(p) for p in parts)
                            if total_len > 0:
                                cum_len = 0
                                s_start = seg_start
                                for part in parts:
                                    part = part.strip()
                                    if not part:
                                        continue
                                    cum_len += len(part)
                                    s_end = seg_start + duration * cum_len / total_len
                                    content_track.add(s_start, s_end, part, style='content')
                                    s_start = s_end
                                continue
                        # 无标点则平均拆分
                        num_splits = max(2, int(duration / 8.0) + 1)
                        chunk_len = len(text) // num_splits
                        for i in range(num_splits):
                            t_start = seg_start + duration * i / num_splits
                            t_end = seg_start + duration * (i + 1) / num_splits
                            chunk_text = text[i * chunk_len:(i + 1) * chunk_len].strip()
                            if chunk_text:
                                content_track.add(t_start, t_end, chunk_text, style='content')
                    else:
                        content_track.add(seg_start, seg_end, text, style='content')
        # 保存两个轨道 - 标题使用SRT格式
        version = self._get_next_version()
@@ -320,4 +362,192 @@ def load_clip_subtitles(inter_dir, clip_nums):
        if os.path.exists(json_path):
            with open(json_path, 'r', encoding='utf-8') as f:
                clips[num] = json.load(f)
-    return clips
+    return clips
 def parse_srt(content: str) -> list:
    """
    解析SRT文本为字幕段列表
    Args:
        content: SRT文件内容
    Returns:
        [(index, start, end, text), ...]
    """
    blocks = content.strip().split('\n\n')
    segments = []
    for block in blocks:
        lines = block.strip().split('\n')
        if len(lines) >= 3:
            try:
                idx = int(lines[0])
                times = lines[1].split(' --> ')
                start = times[0].strip().replace(',', '.')
                end = times[1].strip().replace(',', '.')
                text = '\n'.join(lines[2:])
                segments.append((idx, start, end, text))
            except (ValueError, IndexError):
                continue
    return segments
 def format_srt(segments: list) -> str:
    """
    将字幕段列表格式化为SRT文本
    Args:
        segments: [(index, start, end, text), ...]
    Returns:
        SRT格式字符串
    """
    lines = []
    for i, (idx, start, end, text) in enumerate(segments):
        start_s = start.replace('.', ',')
        end_s = end.replace('.', ',')
        lines.append(f"{idx}\n{start_s} --> {end_s}\n{text}")
    return '\n\n'.join(lines) + '\n'
 def correct_subtitles_llm(
    title_path: str,
    content_path: str,
    ppt_text: str,
    llm_client,
    output_path: str = None,
 ) -> str:
    """
    用LLM纠正字幕内容（idx|text格式，只发纯文本，保留时间轴）
    参考title.srt（时间轴+知识点锚点）和PPT原文（术语纠错），
    修正content.srt中的错字、漏字、术语错误。
    Args:
        title_path: 标题字幕SRT路径
        content_path: 内容字幕SRT路径（待修正）
        ppt_text: PPT原文（术语参考）
        llm_client: LLM客户端
        output_path: 修正后输出路径（默认覆盖原content_path）
    Returns:
        修正后的字幕文件路径
    """
    import json
    # 读取原始字幕
    with open(title_path, 'r', encoding='utf-8') as f:
        title_srt = f.read()
    with open(content_path, 'r', encoding='utf-8') as f:
        content_srt = f.read()
    # 解析SRT，保留完整timestamp
    content_segments = parse_srt(content_srt)
    # 构建idx|text格式的纯文本
    lines_for_llm = []
    for seg in content_segments:
        idx, start, end, text = seg
        lines_for_llm.append(f"{idx}|{text}")
    transcript_text = '\n'.join(lines_for_llm)
    # 构建prompt
    prompt = f"""你是一个钢琴教学视频的字幕纠错专家。
 ## 参考信息
 标题字幕（title.srt）- 权威知识点参考：
 {title_srt[:2000]}
 PPT原文（ppt）- 术语权威参考：
 {ppt_text[:3000]}
 ## 任务
 修正以下转录文本中的错字、漏字、术语错误（如"骚"改为"sol"，"拿两个音速"改为"拿两个因素"等）。
 每行格式：序号|原始文字
 ## 待纠正文本（{len(content_segments)}条）：
 {transcript_text}
 ## 输出要求
 - 以JSON格式输出，只输出JSON，不要有任何其他解释
 - 用原始序号匹配，不要改变结构
 {{
  "corrected": [
    {{"idx": 序号, "text": "修正后的文字"}},
    {{"idx": 序号, "text": "修正后的文字"}}
  ]
 }}"""
    # 调用LLM
    response = llm_client.chat(
        prompt=prompt,
        max_tokens=8192,
    )
    if not response:
        logger.warning("LLM返回为空，保留原字幕")
        return content_path
    # 解析JSON
    try:
        import re
        # 去掉markdown代码块
        response_clean = response.strip()
        if response_clean.startswith('```'):
            lines = response_clean.split('\n')
            if lines[0].strip().strip('`'):
                lines = lines[1:]
            if lines and lines[-1].strip().strip('`'):
                lines = lines[:-1]
            response_clean = '\n'.join(lines)
        # 提取JSON
        json_match = re.search(r'\{.*\}', response_clean, re.DOTALL)
        if not json_match:
            raise ValueError("No JSON found in response")
        result = json.loads(json_match.group())
        corrected_list = result.get('corrected', [])
        # 建立 idx -> corrected_text 的映射
        corrected_map = {item['idx']: item['text'] for item in corrected_list}
    except Exception as e:
        logger.warning(f"字幕纠正JSON解析失败，保留原字幕: {e}")
        return content_path
    # 重建SRT，对比diff
    orig_by_idx = {seg[0]: seg[3] for seg in content_segments}
    changed = []
    result_lines = []
    for seg in content_segments:
        idx, start, end, orig_text = seg
        new_text = corrected_map.get(idx, orig_text)
        # 恢复SRT格式
        start_s = start.replace('.', ',')
        end_s = end.replace('.', ',')
        result_lines.append(f"{idx}\n{start_s} --> {end_s}\n{new_text}")
        if new_text != orig_text:
            changed.append((idx, orig_text, new_text))
    corrected_srt = '\n\n'.join(result_lines) + '\n'
    # 保存
    if output_path is None:
        output_path = content_path
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(corrected_srt)
    # Diff日志
    if changed:
        logger.info(f"字幕纠正，共 {len(changed)} 处修改：")
        for idx, old, new in changed:
            old_s = old[:50] + ('...' if len(old) > 50 else '')
            new_s = new[:50] + ('...' if len(new) > 50 else '')
            logger.info(f"  [{idx:3d}] \"{old_s}\" → \"{new_s}\"")
    else:
        logger.info("字幕纠正，无修改")
    logger.info(f"字幕已修正: {output_path}")
    return output_path
@@ -146,7 +146,7 @@ def burn_subtitles(video_path, srt_path, output_path):
    return success
-def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_path, title_fontsize=90, title_color="FFFF00", subtitle_fontsize=24, subtitle_color="FFFFFF"):
+def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_path, title_fontsize=60, title_color="FFFF00", subtitle_fontsize=24, subtitle_color="FFFFFF"):
    """
    烧录两层字幕到视频（标题在屏幕正中，正文在下方）
@@ -163,7 +163,7 @@ def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_pat
    Returns:
        True if success
    """
-    # Windows路径转义
+    # Windows路径转义：D:/ 需要双反斜杠转义
    title_escaped = title_srt_path.replace('\\', '/').replace('D:/', 'D\\:/')
    content_escaped = content_srt_path.replace('\\', '/').replace('D:/', 'D\\:/')
@@ -180,19 +180,12 @@ def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_pat
    title_bgr = html_to_bgr(title_color)
    subtitle_bgr = html_to_bgr(subtitle_color)
-    # 标题样式：使用SRT+force_style，Alignment=5水平居中，垂直位置由MarginV控制
+    # 标题样式：使用SRT+force_style，Alignment=2水平居中，MarginV=150使其位于屏幕上偏下区域（36%高度）
    # 正文字样式：底部居中，24字号，白色，带描边
    content_style = f"FontName=微软雅黑,FontSize={subtitle_fontsize},PrimaryColour={subtitle_bgr},Alignment=2,MarginV=20,Outline=1,Shadow=1"
-    # 使用两个独立字幕滤镜分别渲染，然后叠加
+    title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=2,MarginV=150,Outline=3,Shadow=2"
    # 标题使用Alignment=5,MarginV=0（正中）
    title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=5,MarginV=0,Outline=3,Shadow=2"
    # 使用两个字幕滤镜叠加，然后映射视频+原始音频
    # 标题使用Alignment=5,MarginV=0（正中）
    title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=5,MarginV=0,Outline=3,Shadow=2"
    # 使用两个字幕滤镜叠加
    filter_str = f"[0:v]subtitles='{title_escaped}':force_style='{title_style}',subtitles='{content_escaped}':force_style='{content_style}'[out]"
    # 保留原始音频 - 映射视频输出和原始音频
@@ -1,9 +0,0 @@
 f = open(r'D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt', 'rb')
 data = f.read()
 f.close()
 print('Total bytes:', len(data))
 print('First 300 hex:', data[:300].hex())
 print()
 print('UTF-8 decode of first 300:')
 print(data[:300].decode('utf-8', 'replace'))
@@ -1,3 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -c "import pptx; print('pptx available')"
@@ -1,3 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\check_pptx2.py"
@@ -1,10 +0,0 @@
 import sys
 out = r"D:\F\NewI\opencode\daily-workspace\temp\check_pptx_out.txt"
 try:
    import pptx
    result = "pptx available: " + pptx.__version__
 except ImportError as e:
    result = "pptx NOT available: " + str(e)
 with open(out, "w", encoding="utf-8") as f:
    f.write(result)
 print(result)
@@ -1,3 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\check_transcript.py"
@@ -1,17 +0,0 @@
 import os
 import json
 inter_dir = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full\intermediates"
 transcript_file = os.path.join(inter_dir, "full_transcript.json")
 if os.path.exists(transcript_file):
    size = os.path.getsize(transcript_file)
    with open(transcript_file, "r", encoding="utf-8") as f:
        data = json.load(f)
    print(f"Transcript exists: {size} bytes")
    print(f"Segments: {len(data)}")
    if data:
        print(f"First segment: {data[0]}")
        print(f"Last segment: {data[-1]}")
 else:
    print("Transcript file NOT found")
@@ -1,4 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_ppt.py"
 pause
@@ -1,30 +0,0 @@
 import zipfile
 import re
 ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
 with zipfile.ZipFile(ppt, "r") as z:
    names = z.namelist()
    slide_files = [f for f in names if f.startswith("ppt/slides/slide") and f.endswith(".xml")]
    print(f"Total files in zip: {len(names)}")
    print(f"Slide files found: {len(slide_files)}")
    print(f"First 5 slide files: {slide_files[:5]}")
    # Test presentation.xml
    try:
        pres_xml = z.read("ppt/presentation.xml").decode("utf-8", errors="replace")
        sld_ids = re.findall(r'<p:sldId\b[^>]*r:id="([^"]+)"', pres_xml)
        print(f"\nsldIdList rIds: {sld_ids[:5]}")
    except Exception as e:
        print(f"\npresentation.xml error: {e}")
    # Test rels
    try:
        rels_xml = z.read("ppt/_rels/presentation.xml.rels").decode("utf-8", errors="replace")
        rid_to_target = dict(re.findall(r'Id="([^"]+)"[^>]*Target="([^"]+)"', rels_xml))
        print(f"Rels entries: {len(rid_to_target)}")
        # Show a sample
        for k, v in list(rid_to_target.items())[:3]:
            print(f"  {k} -> {v}")
    except Exception as e:
        print(f"\nrels error: {e}")
@@ -1,3 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_ppt2.py"
@@ -1,34 +0,0 @@
 import zipfile, re, sys
 ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
 out = r"D:\F\NewI\opencode\daily-workspace\temp\debug_ppt_out.txt"
 results = []
 with zipfile.ZipFile(ppt, "r") as z:
    names = z.namelist()
    slide_files = [f for f in names if f.startswith("ppt/slides/slide") and f.endswith(".xml")]
    results.append(f"Total files in zip: {len(names)}")
    results.append(f"Slide files found: {len(slide_files)}")
    results.append(f"First 5: {slide_files[:5]}")
    try:
        pres_xml = z.read("ppt/presentation.xml").decode("utf-8", errors="replace")
        sld_ids = re.findall(r'<p:sldId\b[^>]*r:id="([^"]+)"', pres_xml)
        results.append(f"sldIds: {sld_ids[:5]}")
    except Exception as e:
        results.append(f"pres error: {e}")
    try:
        rels_xml = z.read("ppt/_rels/presentation.xml.rels").decode("utf-8", errors="replace")
        rid_to_target = dict(re.findall(r'Id="([^"]+)"[^>]*Target="([^"]+)"', rels_xml))
        results.append(f"rels count: {len(rid_to_target)}")
        for k, v in list(rid_to_target.items())[:3]:
            results.append(f"  {k} -> {v}")
    except Exception as e:
        results.append(f"rels error: {e}")
 with open(out, "w", encoding="utf-8") as f:
    f.write("\n".join(results))
 print("Done, see", out)
@@ -1,3 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_slide1.py" > "D:\F\NewI\opencode\daily-workspace\temp\debug_slide1_out.txt" 2>&1
@@ -1,23 +0,0 @@
 import zipfile, re, os
 ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
 out_dir = r"D:\F\NewI\opencode\daily-workspace\temp"
 slide1_out = os.path.join(out_dir, "slide1_texts.txt")
 xml_out = os.path.join(out_dir, "slide1_xml_preview.txt")
 with zipfile.ZipFile(ppt, "r") as z:
    slide1_file = "ppt/slides/slide1.xml"
    content = z.read(slide1_file).decode("utf-8", errors="replace")
    all_texts = re.findall(r"<a:t[^>]*>([^<]*)</a:t>", content)
    meaningful = [t for t in all_texts if t.strip()]
    with open(slide1_out, "w", encoding="utf-8") as f:
        f.write(f"Total fragments: {len(all_texts)}\n")
        f.write(f"Meaningful fragments: {len(meaningful)}\n\n")
        for i, t in enumerate(meaningful):
            f.write(f"[{i}] {t}\n")
    with open(xml_out, "w", encoding="utf-8") as f:
        f.write(content[:8000])
 print("Done")
@@ -1,3 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\do_install.py"
@@ -1,12 +0,0 @@
 import subprocess
 import sys
 venv_python = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
 result = subprocess.run(
    [venv_python, "-m", "pip", "install", "python-pptx"],
    capture_output=True,
    text=True
 )
 print("STDOUT:", result.stdout)
 print("STDERR:", result.stderr)
 print("Return code:", result.returncode)
@@ -1,6 +0,0 @@
@echo off
 chcp 65001 >nul
 echo Installing python-pptx...
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx -q
 echo Done
 pause
@@ -1,4 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx
 echo Exit: %errorlevel%
@@ -1,4 +0,0 @@
@echo off
 chcp 65001 >nul
 "D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx > "D:\F\NewI\opencode\daily-workspace\temp\pip_out.txt" 2>&1
 echo Exit: %errorlevel%
@@ -1,12 +0,0 @@
 # Kill all python processes related to our CLI
 Get-Process python -ErrorAction SilentlyContinue | Stop-Process -Force
 Start-Sleep 3
 # Verify killed
 $remaining = Get-Process python -ErrorAction SilentlyContinue
 if ($remaining) {
    Write-Host "Still running:"
    $remaining | ForEach-Object { Write-Host "  PID:" $_.Id }
 } else {
    Write-Host "All python processes killed"
 }
@@ -1,5 +0,0 @@
 f = open(r'D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt', 'r', encoding='utf-8')
 lines = f.readlines()
 f.close()
 for l in lines[:35]:
    print(l.rstrip())