refactor: extract config.py, add burn_only, fix title_segments and font size
- Extract all path/API config to config.py (single source of truth) - Add run.py / burn_only.py / run.bat / burn.bat entry points - burn_only: skip transcription/subtitle gen, fast reburn existing SRTs - Fix title_segments: use transcript keyword time for split point - Fix subtitle: each overlapping title shows max title_duration (not full clip) - Fix burn_only font size: default from 90 to 60 - Delete old run_lesson1.bat/py, temp debug scripts - Update README, ARCHITECTURE, CHANGELOG, add USAGE.md
This commit is contained in:
@@ -26,40 +26,50 @@ cp config.ini.example config.ini
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 3. 运行
|
||||
### 3. 配置
|
||||
|
||||
编辑 `config.py` 中的视频路径、PPT路径、API Key 等。所有配置集中在一个文件。
|
||||
|
||||
### 4. 运行
|
||||
|
||||
**完整流程(首次运行):**
|
||||
```bash
|
||||
.\run.bat
|
||||
```
|
||||
|
||||
**快速烧录(仅修改字幕后重烧):**
|
||||
```bash
|
||||
.\burn.bat
|
||||
```
|
||||
|
||||
**GUI(推荐):**
|
||||
```bash
|
||||
.\start.bat
|
||||
```
|
||||
|
||||
**CLI:**
|
||||
```bash
|
||||
.\run_lesson1.bat
|
||||
```
|
||||
|
||||
或通用方式:
|
||||
```bash
|
||||
python src/cli.py --video video.mp4 --ppt presentation.pptx --output ./output
|
||||
```
|
||||
|
||||
## 项目结构
|
||||
|
||||
```
|
||||
lesson-highlights/
|
||||
├── config.py # 统一配置(修改这里)
|
||||
├── run.py # 完整流水线
|
||||
├── burn_only.py # 快速烧录(跳过转录/字幕生成)
|
||||
├── run.bat # 运行完整流程
|
||||
├── burn.bat # 快速重烧字幕
|
||||
├── src/
|
||||
│ ├── main.py # GUI 入口
|
||||
│ ├── gui.py # GUI(参数输入,调用底层)
|
||||
│ ├── cli.py # CLI 入口
|
||||
│ └── core/ # 共享底层
|
||||
│ ├── ppt_parser.py # PPT 解析 + clips 生成
|
||||
│ ├── pipeline.py # 视频处理流水线
|
||||
│ ├── subtitle.py # 字幕生成
|
||||
│ ├── main.py # GUI 入口
|
||||
│ ├── gui.py # GUI(参数输入,调用底层)
|
||||
│ ├── cli.py # CLI 入口
|
||||
│ └── core/ # 共享底层
|
||||
│ ├── ppt_parser.py # PPT 解析 + clips 生成
|
||||
│ ├── pipeline.py # 视频处理流水线
|
||||
│ ├── subtitle.py # 字幕生成
|
||||
│ └── ...
|
||||
├── config.ini # API 配置(不提交 git)
|
||||
├── config.ini.example # 配置模板
|
||||
├── start.bat # 启动 GUI
|
||||
└── run_lesson1.bat # CLI 示例
|
||||
├── config.ini # API 配置(不提交 git)
|
||||
├── config.ini.example # 配置模板
|
||||
└── docs/
|
||||
├── USAGE.md # 使用指南
|
||||
└── ...
|
||||
```
|
||||
|
||||
## 工作流程
|
||||
@@ -87,10 +97,15 @@ api_key = your_api_key_here
|
||||
|
||||
```
|
||||
output/
|
||||
├── generated_config.yaml # 生成的 clips 配置
|
||||
├── clips/ # 提取的片段视频
|
||||
├── subtitles/ # 字幕文件
|
||||
└── final.mp4 # 最终输出
|
||||
├── generated_config.yaml # clips 配置(可手动修改后重新运行)
|
||||
├── intermediates/ # 中间文件
|
||||
│ ├── clip*.json # Whisper 转录结果
|
||||
│ └── clip*.mp4 # 提取的视频片段
|
||||
├── subs/ # 字幕文件
|
||||
│ ├── v1_title.srt # 标题轨(可手动修改)
|
||||
│ └── v1_content.srt # 正文字幕
|
||||
├── concat_merged.mp4 # 合并视频
|
||||
└── final.mp4 # 最终输出
|
||||
```
|
||||
|
||||
## 系统要求
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
@echo off
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\burn_only.py" %*
|
||||
pause
|
||||
@@ -0,0 +1,73 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
快速烧录脚本 - 跳过所有转录/字幕生成步骤
|
||||
直接用已有的 clips + title.srt + content.srt 合并烧录
|
||||
|
||||
用法:
|
||||
python burn_only.py
|
||||
python burn_only.py "D:\\path\\to\\output_dir"
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 导入统一配置
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import config
|
||||
|
||||
OUTPUT = config.OUTPUT
|
||||
if len(sys.argv) > 1:
|
||||
OUTPUT = sys.argv[1]
|
||||
|
||||
TITLE_SRT = os.path.join(OUTPUT, "subs", "v1_title.srt")
|
||||
CONTENT_SRT = os.path.join(OUTPUT, "subs", "v1_content.srt")
|
||||
CLIPS_DIR = os.path.join(OUTPUT, "intermediates")
|
||||
MERGED_PATH = os.path.join(OUTPUT, "concat_merged.mp4")
|
||||
|
||||
print(f"[Fast Burn Mode]")
|
||||
print(f"Output: {OUTPUT}")
|
||||
print()
|
||||
|
||||
# 检查必要文件
|
||||
if not os.path.exists(TITLE_SRT):
|
||||
print(f"ERROR: title.srt not found\n{TITLE_SRT}")
|
||||
sys.exit(1)
|
||||
if not os.path.exists(CONTENT_SRT):
|
||||
print(f"ERROR: content.srt not found\n{CONTENT_SRT}")
|
||||
sys.exit(1)
|
||||
|
||||
# 导入 pipeline(src 目录)
|
||||
src_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
|
||||
sys.path.insert(0, src_dir)
|
||||
from core import Pipeline
|
||||
|
||||
# 构造 minimal config(只需要 output_dir 和 video_params)
|
||||
pipeline_config = {
|
||||
'output_dir': OUTPUT,
|
||||
'clips': [],
|
||||
'video_src': None,
|
||||
'video_params': {},
|
||||
'term_corrections': {},
|
||||
'api_key': '',
|
||||
'api_host': '',
|
||||
}
|
||||
|
||||
pipeline = Pipeline(pipeline_config)
|
||||
|
||||
# 合并视频(如需要)
|
||||
if os.path.exists(MERGED_PATH):
|
||||
print(f"Found existing merged video: {MERGED_PATH}")
|
||||
merged_path = MERGED_PATH
|
||||
else:
|
||||
import glob
|
||||
clip_files = sorted(glob.glob(os.path.join(CLIPS_DIR, "clip*.mp4")))
|
||||
if not clip_files:
|
||||
print(f"ERROR: No clip videos found\n{CLIPS_DIR}\\clip*.mp4")
|
||||
sys.exit(1)
|
||||
print(f"Merging {len(clip_files)} clips...")
|
||||
merged_path = pipeline.step_merge(clip_files)
|
||||
print(f"Merged: {merged_path}")
|
||||
|
||||
# 烧录
|
||||
print("Burning subtitles...")
|
||||
final_path = pipeline.step_burn(merged_path, TITLE_SRT, CONTENT_SRT)
|
||||
print(f"\nDone: {final_path}")
|
||||
@@ -0,0 +1,24 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
统一配置 - 修改这里即可,不要改 run.py / burn_only.py / *.bat
|
||||
|
||||
所有路径和 API 配置集中管理。
|
||||
"""
|
||||
import os
|
||||
|
||||
# ========== 路径配置 ==========
|
||||
VIDEO = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\直播回放-03月18日.mp4"
|
||||
PPT = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
||||
OUTPUT = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full"
|
||||
LOG_FILE = r"D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt"
|
||||
|
||||
# ========== 运行参数 ==========
|
||||
MAX_TOTAL_DURATION = 600 # 精华片段总时长上限(秒)
|
||||
|
||||
# ========== API 配置 ==========
|
||||
API_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3"
|
||||
API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
|
||||
|
||||
# ========== 环境(一般不改)==========
|
||||
PYTHON = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
|
||||
CLI_DIR = os.path.dirname(os.path.abspath(__file__)) # 本文件所在目录
|
||||
+22
-17
@@ -10,25 +10,30 @@
|
||||
|
||||
```
|
||||
lesson-highlights/
|
||||
├── config.py # 统一配置(所有路径/API只改这里)
|
||||
├── run.py # 完整流水线入口
|
||||
├── burn_only.py # 快速烧录入口(跳过转录/字幕生成)
|
||||
├── run.bat # 运行完整流程
|
||||
├── burn.bat # 快速重烧字幕
|
||||
├── src/
|
||||
│ ├── main.py # GUI 入口
|
||||
│ ├── gui.py # GUI(参数输入 → 调用底层)
|
||||
│ ├── cli.py # CLI 入口
|
||||
│ └── core/ # 共享底层
|
||||
│ ├── main.py # GUI 入口
|
||||
│ ├── gui.py # GUI(参数输入 → 调用底层)
|
||||
│ ├── cli.py # CLI 入口
|
||||
│ └── core/ # 共享底层
|
||||
│ ├── __init__.py
|
||||
│ ├── ppt_parser.py # PPT 解析 + LLM clips 提取
|
||||
│ ├── pipeline.py # 视频处理流水线
|
||||
│ ├── subtitle.py # 字幕生成
|
||||
│ ├── video.py # 视频处理(提取/合并/烧录)
|
||||
│ ├── llm.py # LLM 调用
|
||||
│ ├── corrections.py # 术语纠正
|
||||
│ ├── constants.py # 常量配置
|
||||
│ └── errors.py # 错误处理
|
||||
├── config.ini # API 配置(不提交 git)
|
||||
├── config.ini.example # 配置模板
|
||||
├── start.bat # GUI 启动器
|
||||
├── run.bat # 通用 CLI 启动器
|
||||
└── run_lesson1.bat # 预设课程示例
|
||||
│ ├── ppt_parser.py # PPT 解析 + LLM clips 提取
|
||||
│ ├── pipeline.py # 视频处理流水线
|
||||
│ ├── subtitle.py # 字幕生成
|
||||
│ ├── video.py # 视频处理(提取/合并/烧录)
|
||||
│ ├── llm.py # LLM 调用
|
||||
│ ├── corrections.py # 术语纠正
|
||||
│ ├── constants.py # 常量配置
|
||||
│ └── errors.py # 错误处理
|
||||
├── config.ini # API 配置(不提交 git)
|
||||
├── config.ini.example # 配置模板
|
||||
├── start.bat # GUI 启动器
|
||||
└── docs/
|
||||
└── USAGE.md # 使用指南
|
||||
```
|
||||
|
||||
## 3. 核心模块
|
||||
|
||||
+15
-14
@@ -5,31 +5,32 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [版本号] - 日期
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- 新功能
|
||||
- `docs/USAGE.md` - 使用指南(run.bat / burn.bat / 修改知识点流程)
|
||||
- `config.py` - 统一配置文件,所有路径和 API 配置集中管理
|
||||
- `run.py` / `burn_only.py` - 独立入口脚本
|
||||
- `--resume-from-burn` CLI 参数 - 快速烧录模式,跳过所有转录/字幕生成步骤
|
||||
|
||||
### Changed
|
||||
- 功能变更
|
||||
- `run.bat` / `burn.bat` 替代原有的 `run_lesson1.bat`(不再需要改多处配置)
|
||||
- `ppt_parser.py`: 重叠片段的 `title_segments` 用 transcript 关键词首次出现时间计算切分点
|
||||
- `pipeline.py`: 新增 `_recalculate_title_segments_from_transcript()`,在转录完成后用实际 transcript 数据修正标题切换时间
|
||||
- `subtitle.py`: 多标题片段中每个标题最多显示 `title_duration` 秒(原逻辑会一直显示到片段结束)
|
||||
- `pipeline.py`: `step_burn` 的 `title_fontsize` 默认值从 90 改为 60
|
||||
|
||||
### Fixed
|
||||
- 问题修复
|
||||
|
||||
### Deprecated
|
||||
- 弃用功能
|
||||
- `ppt_parser.py`: 不重叠的 clip 残留 `title_segments` 导致标题显示时长错误
|
||||
- `subtitle.py`: 重叠片段第二个标题显示时长超过 `title_duration`
|
||||
- `pipeline.py`: 快速烧录模式因 `video_params` 为空导致字号使用默认值 90 而非 60
|
||||
|
||||
### Removed
|
||||
- 移除的功能
|
||||
|
||||
### Security
|
||||
- 安全相关
|
||||
- `run_lesson1.bat` / `run_lesson1.py` - 旧入口,已由 `config.py` + `run.bat` / `burn.bat` 替代
|
||||
|
||||
---
|
||||
|
||||
## 示例
|
||||
|
||||
### [1.0.0] - 2026-05-02
|
||||
## [1.0.0] - 2026-05-02
|
||||
|
||||
### Added
|
||||
- 初始版本发布
|
||||
|
||||
+117
@@ -0,0 +1,117 @@
|
||||
# 使用指南
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 1. 配置
|
||||
|
||||
编辑项目根目录的 `config.py`:
|
||||
|
||||
```python
|
||||
VIDEO = r"D:\...\直播回放.mp4"
|
||||
PPT = r"D:\...\课程.pptx"
|
||||
OUTPUT = r"D:\...\output"
|
||||
MAX_TOTAL_DURATION = 600 # 精华片段总时长上限(秒)
|
||||
API_KEY = "your-api-key"
|
||||
API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
|
||||
```
|
||||
|
||||
所有路径和 API 配置只改这一个文件。
|
||||
|
||||
### 2. 完整流程(首次运行)
|
||||
|
||||
```bash
|
||||
run.bat
|
||||
```
|
||||
|
||||
或直接:
|
||||
|
||||
```bash
|
||||
python run.py
|
||||
```
|
||||
|
||||
完整流程:PPT解析 → Whisper转录 → LLM校正 → 字幕生成 → 合并 → 烧录
|
||||
|
||||
### 3. 修改字幕后快速重烧
|
||||
|
||||
改完 `v1_title.srt` 或 `v1_content.srt` 后,直接:
|
||||
|
||||
```bash
|
||||
burn.bat
|
||||
```
|
||||
|
||||
跳过所有转录/字幕生成步骤,直接用已有片段和字幕文件合并烧录。**只改字幕文本时用这个**。
|
||||
|
||||
## 修改知识点(替换PPT中的某个知识点)
|
||||
|
||||
LLM 从 PPT 提取了 clip 后,如果你想把其中一个换成 PPT 里另一个知识点(比如把"音高"换成"旋律"):
|
||||
|
||||
### 步骤
|
||||
|
||||
1. **改 `generated_config.yaml`**:把对应 clip 的 title 改成新知识点名称
|
||||
|
||||
```yaml
|
||||
clips:
|
||||
- title: 旋律 # ← 改成PPT里有的知识点
|
||||
start: 200
|
||||
end: 260
|
||||
```
|
||||
|
||||
2. **删该 clip 的中间文件**(让它重新生成):
|
||||
|
||||
```
|
||||
intermediates/clip5.json ← 删掉
|
||||
intermediates/clip5.mp4 ← 删掉
|
||||
```
|
||||
|
||||
3. **重新运行**:
|
||||
|
||||
```bash
|
||||
run.bat
|
||||
```
|
||||
|
||||
系统会跳过其他已有 JSON 的 clip,只重新生成被删除了 JSON 的那一个 clip。
|
||||
|
||||
### 原理
|
||||
|
||||
- `run.bat` 检测到 `clip*.json` 已存在,就跳过 Whisper 转录
|
||||
- 删掉某个 clip 的 JSON 后,系统认为它需要重新生成
|
||||
- 重新生成时用新的 title 去 transcript 里匹配,重新找时间范围
|
||||
|
||||
### 注意
|
||||
|
||||
- `start`/`end` 如果填错了,生成的视频片段时间会不对
|
||||
- 如果不确定新知识点的时间范围,可以先随便填一个,跑完看效果再调整
|
||||
|
||||
## 文件结构
|
||||
|
||||
```
|
||||
output/
|
||||
├── generated_config.yaml # clips 配置(可手动修改)
|
||||
├── intermediates/ # 中间文件(可删除特定clip的.json/.mp4重生成)
|
||||
│ ├── clip1.json # Whisper 转录结果
|
||||
│ ├── clip1.mp4 # 提取的视频片段
|
||||
│ └── ...
|
||||
├── subs/ # 字幕文件
|
||||
│ ├── v1_title.srt # 标题轨(可手动修改文本+时间轴)
|
||||
│ └── v1_content.srt # 正文字幕
|
||||
├── concat_merged.mp4 # 合并后的视频
|
||||
└── final.mp4 # 最终输出
|
||||
```
|
||||
|
||||
## 命令对比
|
||||
|
||||
| 命令 | 用途 | 耗时 |
|
||||
|------|------|------|
|
||||
| `run.bat` | 完整流程(PPT→视频) | 几十分钟 |
|
||||
| `burn.bat` | 只改字幕后快速重烧 | 几分钟 |
|
||||
|
||||
## 常见问题
|
||||
|
||||
**Q: `burn.bat` 改了字号没变化?**
|
||||
A: `burn.bat` 直接烧已有的 SRT 文件,不走 `subtitle.py` 的生成逻辑。如果改了渲染参数(如字号)需要重新生成字幕,必须 `run.bat`。
|
||||
|
||||
**Q: 想改某个知识点的出现时间?**
|
||||
A: 直接改 `v1_title.srt` 里的时间轴,或者改 `generated_config.yaml` 然后删对应 clip 的 JSON 重新生成。
|
||||
|
||||
**Q: 想删掉某个 clip?**
|
||||
A: 从 `generated_config.yaml` 里删掉那一条,然后删对应 `intermediates/clip*.json` 和 `clip*.mp4`,最后 `run.bat`。
|
||||
@@ -0,0 +1,3 @@
|
||||
@echo off
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\run.py"
|
||||
pause
|
||||
@@ -0,0 +1,36 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
完整流水线 - 从 PPT 解析到最终视频输出
|
||||
配置统一在 config.py 中管理。
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
# 导入统一配置
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import config
|
||||
|
||||
env = os.environ.copy()
|
||||
env["PATH"] = os.path.dirname(config.PYTHON) + ";" + env.get("PATH", "")
|
||||
|
||||
cmd = [
|
||||
config.PYTHON,
|
||||
os.path.join(config.CLI_DIR, "src", "cli.py"),
|
||||
"--video", config.VIDEO,
|
||||
"--ppt", config.PPT,
|
||||
"--output", config.OUTPUT,
|
||||
"--api-key", config.API_KEY,
|
||||
"--api-host", config.API_HOST,
|
||||
"--max-total-duration", str(config.MAX_TOTAL_DURATION),
|
||||
"--verbose",
|
||||
]
|
||||
|
||||
print(f"Running pipeline...")
|
||||
print(f" Video: {config.VIDEO}")
|
||||
print(f" PPT: {config.PPT}")
|
||||
print(f" Output: {config.OUTPUT}")
|
||||
print()
|
||||
|
||||
proc = subprocess.Popen(cmd, cwd=config.CLI_DIR, env=env)
|
||||
proc.wait()
|
||||
@@ -1,13 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
echo Cleaning pycache...
|
||||
rmdir /s /q "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src\__pycache__" 2>nul
|
||||
rmdir /s /q "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src\core\__pycache__" 2>nul
|
||||
echo Cache cleaned.
|
||||
echo.
|
||||
echo Running CLI...
|
||||
del "D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt" 2>nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\run_lesson1.py"
|
||||
echo.
|
||||
echo Exit: %errorlevel%
|
||||
pause
|
||||
@@ -1,42 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
VIDEO = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\直播回放-03月18日.mp4"
|
||||
PPT = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
||||
OUTPUT = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full"
|
||||
PYTHON = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
|
||||
CLI_DIR = r"D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src"
|
||||
API_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3"
|
||||
API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
|
||||
LOG_FILE = r"D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt"
|
||||
|
||||
env = os.environ.copy()
|
||||
env["PATH"] = r"D:\ProgramData\anaconda3\envs\py312_cuda;" + env.get("PATH", "")
|
||||
|
||||
cmd = [
|
||||
PYTHON,
|
||||
os.path.join(CLI_DIR, "cli.py"),
|
||||
"--video", VIDEO,
|
||||
"--ppt", PPT,
|
||||
"--output", OUTPUT,
|
||||
"--api-key", API_KEY,
|
||||
"--api-host", API_HOST,
|
||||
"--verbose"
|
||||
]
|
||||
|
||||
print("Starting CLI...")
|
||||
print(f"Video: {VIDEO}")
|
||||
print(f"PPT: {PPT}")
|
||||
print(f"Log: {LOG_FILE}")
|
||||
|
||||
proc = subprocess.Popen(cmd, cwd=CLI_DIR, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8', errors='replace')
|
||||
|
||||
with open(LOG_FILE, 'w', encoding='utf-8') as log:
|
||||
for line in proc.stdout:
|
||||
log.write(line)
|
||||
log.flush()
|
||||
print(line, end='')
|
||||
|
||||
proc.wait()
|
||||
print(f"\nExit code: {proc.returncode}")
|
||||
+48
-1
@@ -60,8 +60,12 @@ def parse_args():
|
||||
help='LLM API地址')
|
||||
parser.add_argument('--whisper-model', type=str, default='large',
|
||||
help='Whisper模型 (默认: large)')
|
||||
parser.add_argument('--max-total-duration', type=int, default=300,
|
||||
help='精华片段总时长上限(秒),默认300')
|
||||
parser.add_argument('--verbose', '-V', action='store_true',
|
||||
help='详细输出')
|
||||
parser.add_argument('--resume-from-burn', action='store_true',
|
||||
help='快速模式:跳过所有步骤,直接用已有片段和字幕文件合并烧录(用于手动修改SRT后快速重生成)')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
@@ -77,7 +81,7 @@ def load_config_from_args(args) -> dict:
|
||||
'whisper_model': args.whisper_model,
|
||||
'video_params': {
|
||||
'fade_duration': 1,
|
||||
'title_fontsize': 90,
|
||||
'title_fontsize': 60,
|
||||
'title_color': 'FFFF00',
|
||||
'subtitle_fontsize': 24,
|
||||
'subtitle_color': 'FFFFFF',
|
||||
@@ -137,8 +141,15 @@ def generate_config_from_ppt(args) -> dict:
|
||||
progress_callback=progress_callback,
|
||||
api_key=args.api_key,
|
||||
api_host=args.api_host,
|
||||
max_total_duration=args.max_total_duration,
|
||||
)
|
||||
|
||||
# 补充API配置(parse_ppt_to_config不返回这些)
|
||||
if args.api_key:
|
||||
config['api_key'] = args.api_key
|
||||
if args.api_host:
|
||||
config['api_host'] = args.api_host
|
||||
|
||||
# 保存生成的配置
|
||||
config_path = os.path.join(args.output, 'generated_config.yaml')
|
||||
import yaml
|
||||
@@ -207,6 +218,42 @@ def main():
|
||||
|
||||
pipeline = Pipeline(config)
|
||||
|
||||
# 快速模式:跳过所有步骤,直接用已有片段和字幕合并烧录
|
||||
if args.resume_from_burn:
|
||||
import glob
|
||||
import shutil
|
||||
output_dir = config.get('output_dir')
|
||||
clips_dir = os.path.join(output_dir, 'clips')
|
||||
merged_dir = os.path.join(output_dir, 'merged')
|
||||
merged_path = os.path.join(merged_dir, 'merged.mp4')
|
||||
title_path = os.path.join(output_dir, 'title.srt')
|
||||
content_path = os.path.join(output_dir, 'content.srt')
|
||||
|
||||
# 检查必要文件
|
||||
if not os.path.exists(title_path):
|
||||
logger.error(f"找不到 title.srt: {title_path}")
|
||||
return 1
|
||||
if not os.path.exists(content_path):
|
||||
logger.error(f"找不到 content.srt: {content_path}")
|
||||
return 1
|
||||
|
||||
# 已有合并视频则直接烧录;否则先合并
|
||||
if os.path.exists(merged_path):
|
||||
logger.info(f"找到已有合并视频: {merged_path}")
|
||||
else:
|
||||
logger.info("开始合并片段...")
|
||||
clip_files = sorted(glob.glob(os.path.join(clips_dir, 'clip*.mp4')))
|
||||
if not clip_files:
|
||||
logger.error(f"找不到片段视频: {clips_dir}/clip*.mp4")
|
||||
return 1
|
||||
merged_path = pipeline.step_merge(clip_files)
|
||||
logger.info(f"合并完成: {merged_path}")
|
||||
|
||||
logger.info("开始烧录...")
|
||||
final_path = pipeline.step_burn(merged_path, title_path, content_path)
|
||||
logger.info(f"完成! 最终视频: {final_path}")
|
||||
return 0
|
||||
|
||||
logger.info("开始处理...")
|
||||
final_path = pipeline.run()
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ DEFAULT_OUTPUT_DIR = os.path.join(PROJECT_ROOT, "output")
|
||||
DEFAULT_VIDEO_PARAMS = {
|
||||
"fade_duration": 1,
|
||||
"title_duration": 3,
|
||||
"title_fontsize": 90,
|
||||
"title_fontsize": 60,
|
||||
"title_color": "FFFF00",
|
||||
"subtitle_fontsize": 24,
|
||||
"subtitle_color": "FFFFFF",
|
||||
|
||||
+3
-100
@@ -56,6 +56,8 @@ class LLMClient:
|
||||
"max_tokens": max_tokens
|
||||
}
|
||||
|
||||
logger.info(f"[LLM] request chars={len(prompt)}, max_tokens={max_tokens}")
|
||||
|
||||
for attempt in range(LLM_MAX_RETRIES):
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
||||
@@ -73,6 +75,7 @@ class LLMClient:
|
||||
|
||||
content = choices[0].get("message", {}).get("content", "").strip()
|
||||
if content:
|
||||
logger.info(f"[LLM] response chars={len(content)}")
|
||||
return content
|
||||
|
||||
logger.warning(f"LLM: Empty content (attempt {attempt+1})")
|
||||
@@ -88,106 +91,6 @@ class LLMClient:
|
||||
|
||||
return None
|
||||
|
||||
def correct_title(self, transcript_text, original_title, all_titles=None):
|
||||
"""
|
||||
使用LLM纠正标题
|
||||
|
||||
Args:
|
||||
transcript_text: 字幕文本
|
||||
original_title: 原始标题
|
||||
all_titles: 所有标题列表
|
||||
|
||||
Returns:
|
||||
纠正后的标题
|
||||
"""
|
||||
titles_str = ", ".join(all_titles[:20]) if all_titles else "无"
|
||||
|
||||
prompt = f"""你是一个钢琴教学视频的标题验证专家。
|
||||
|
||||
PPT提取的标题:{original_title}
|
||||
|
||||
视频字幕内容:{transcript_text[:500] if transcript_text else "无"}
|
||||
|
||||
本节课所有标题:{titles_str}
|
||||
|
||||
【重要规则】
|
||||
- 只有当你有90%以上把握认为原标题错误时,才输出纠正后的标题
|
||||
- 如果原标题基本正确,即使不完美,也必须输出原标题
|
||||
- 绝对不能输出与原标题完全不同概念的词
|
||||
- 如果不确定,输出原标题
|
||||
|
||||
请直接输出标题,不要添加任何解释。"""
|
||||
|
||||
result = self.chat(prompt, max_tokens=50, timeout=LLM_TITLE_TIMEOUT)
|
||||
return result if result else original_title
|
||||
|
||||
def validate_content(self, transcript_text, title):
|
||||
"""
|
||||
使用LLM验证内容是否与标题相关
|
||||
|
||||
Args:
|
||||
transcript_text: 字幕文本
|
||||
title: 标题
|
||||
|
||||
Returns:
|
||||
(is_valid: bool, reason: str)
|
||||
"""
|
||||
prompt = f"""判断视频字幕内容是否与标题相关。
|
||||
|
||||
标题:{title}
|
||||
|
||||
字幕内容:{transcript_text[:300] if transcript_text else "无"}
|
||||
|
||||
判断标准:
|
||||
- 内容讨论的主题与标题概念相关 = 相关
|
||||
- 内容与标题无关(如广告、闲聊、无关话题)= 无关
|
||||
- 无法判断 = 不确定
|
||||
|
||||
请直接输出:相关/无关/不确定"""
|
||||
|
||||
result = self.chat(prompt, max_tokens=20, timeout=LLM_VALIDATE_TIMEOUT)
|
||||
if not result:
|
||||
return True, "error"
|
||||
|
||||
if "无关" in result:
|
||||
return False, result
|
||||
elif "不确定" in result:
|
||||
return True, "uncertain"
|
||||
return True, result
|
||||
|
||||
def full_text_correction(self, text, clip_title, knowledge_terms=None):
|
||||
"""
|
||||
使用LLM进行全文字幕纠错
|
||||
|
||||
Args:
|
||||
text: 原始字幕
|
||||
clip_title: 片段标题
|
||||
knowledge_terms: 知识点列表
|
||||
|
||||
Returns:
|
||||
纠错后的字幕
|
||||
"""
|
||||
knowledge_str = ", ".join(knowledge_terms[:20]) if knowledge_terms else "无"
|
||||
|
||||
prompt = f"""你是一个钢琴教学视频的字幕纠错专家。
|
||||
|
||||
原始字幕:{text}
|
||||
|
||||
本节课片段标题:{clip_title}
|
||||
本节课知识点:{knowledge_str}
|
||||
|
||||
请进行字幕纠错:
|
||||
1. 修复语音识别错误(如"羞耻"→"休止","副点"→"附点","负点"→"附点")
|
||||
2. 修复同音字错误
|
||||
3. 保留原文的专业术语和表达方式
|
||||
4. 不要改变原文的语气和意思
|
||||
|
||||
请直接输出纠错后的字幕,不要添加任何解释。"""
|
||||
|
||||
result = self.chat(prompt, max_tokens=500, timeout=LLM_TIMEOUT)
|
||||
return result if result else text
|
||||
|
||||
|
||||
# 全局LLM客户端实例
|
||||
_llm_client = None
|
||||
|
||||
|
||||
+127
-70
@@ -12,7 +12,7 @@ import logging
|
||||
from typing import Callable, Optional, List, Dict, Any
|
||||
|
||||
from .video import extract_clip, merge_clips, burn_dual_subtitles
|
||||
from .subtitle import SubtitlePipeline
|
||||
from .subtitle import SubtitlePipeline, correct_subtitles_llm
|
||||
from .llm import LLMClient
|
||||
from .corrections import apply_all_corrections, load_term_corrections_from_config
|
||||
from .utils import ensure_dir
|
||||
@@ -223,16 +223,41 @@ class Pipeline:
|
||||
self.progress_callback('transcribing', int((i/total)*90), f"转录片段 {i}/{total}")
|
||||
|
||||
try:
|
||||
segments, _ = model.transcribe(clip_path, language='zh', beam_size=5)
|
||||
segments, _ = model.transcribe(clip_path, language='zh', beam_size=5, word_timestamps=True)
|
||||
|
||||
# 保存转录结果
|
||||
# 保存转录结果(按句末标点进一步切分)
|
||||
segments_data = []
|
||||
for seg in segments:
|
||||
segments_data.append({
|
||||
'start': seg.start,
|
||||
'end': seg.end,
|
||||
'text': seg.text.strip()
|
||||
})
|
||||
words = seg.words if hasattr(seg, 'words') else []
|
||||
if words:
|
||||
# 用 word-level 时间戳在句末标点处切分
|
||||
# 注意:标点可能附着在词后(如"吗?"、"奏,"),需 strip 后判断
|
||||
_END_MARKS = '。!??'
|
||||
sub_start = words[0].start
|
||||
sub_text_parts = []
|
||||
for word in words:
|
||||
sub_text_parts.append(word.word)
|
||||
# 剥离标点后判断是否为句末标记
|
||||
stripped = word.word.rstrip(',、,')
|
||||
if any(stripped.endswith(m) for m in _END_MARKS):
|
||||
sub_end = word.end
|
||||
sub_text = ''.join(sub_text_parts).strip()
|
||||
if sub_text:
|
||||
segments_data.append({'start': sub_start, 'end': sub_end, 'text': sub_text})
|
||||
sub_start = word.end
|
||||
sub_text_parts = []
|
||||
# 剩余未到句末的文本
|
||||
if sub_text_parts:
|
||||
remaining = ''.join(sub_text_parts).strip()
|
||||
if remaining:
|
||||
segments_data.append({'start': sub_start, 'end': words[-1].end, 'text': remaining})
|
||||
else:
|
||||
# fallback:无 word timestamps,直接用原 segment
|
||||
segments_data.append({
|
||||
'start': seg.start,
|
||||
'end': seg.end,
|
||||
'text': seg.text.strip()
|
||||
})
|
||||
|
||||
with open(json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump({'segments': segments_data}, f, ensure_ascii=False, indent=2)
|
||||
@@ -249,59 +274,58 @@ class Pipeline:
|
||||
self.step_callback('transcribing')
|
||||
return json_paths
|
||||
|
||||
def step_correct_titles(self, json_paths: List[str]) -> List[Dict[str, Any]]:
|
||||
def _recalculate_title_segments_from_transcript(
|
||||
self,
|
||||
clips: List[Dict],
|
||||
json_paths: List[str]
|
||||
) -> None:
|
||||
"""
|
||||
Step 3: LLM标题纠正
|
||||
用 transcript 数据重新计算重叠片段的 title_segments 切分点。
|
||||
|
||||
Args:
|
||||
json_paths: JSON文件路径列表
|
||||
|
||||
Returns:
|
||||
corrected_clips: 纠正后的片段配置列表
|
||||
重叠片段的 switch_offset 应该按 transcript 中第二个标题关键词
|
||||
首次出现的时间来算,而不是按 clip 边界。
|
||||
"""
|
||||
self.step_callback('title_correcting')
|
||||
self.progress_callback('title_correcting', 0, "开始标题纠正...")
|
||||
for i, clip in enumerate(clips):
|
||||
ts = clip.get('title_segments')
|
||||
if not ts or len(ts) < 2:
|
||||
continue
|
||||
|
||||
corrected_clips = []
|
||||
total = len(self.clips)
|
||||
# 取第二个标题段 [title, offset]
|
||||
second_title, old_offset = ts[1]
|
||||
json_path = json_paths[i] if i < len(json_paths) else None
|
||||
if not json_path or not os.path.exists(json_path):
|
||||
continue
|
||||
|
||||
for i, (clip, json_path) in enumerate(zip(self.clips, json_paths), 1):
|
||||
original_title = clip.get('title', f'Clip {i}')
|
||||
|
||||
# 读取转录文本
|
||||
transcript_text = ''
|
||||
if json_path and os.path.exists(json_path):
|
||||
try:
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
transcript_text = ' '.join(seg.get('text', '') for seg in data.get('segments', []))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# LLM纠正标题
|
||||
corrected_title = original_title
|
||||
if transcript_text and self.config.get('api_key'):
|
||||
try:
|
||||
corrected_title = self.llm_client.correct_title(
|
||||
transcript_text,
|
||||
original_title,
|
||||
[c.get('title', '') for c in self.clips]
|
||||
) or original_title
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM title correction failed for clip {i}: {e}")
|
||||
# 在 transcript 中搜索 second_title 的首次出现时间
|
||||
first_time = None
|
||||
for seg in data.get('segments', []):
|
||||
for word_info in seg.get('words', []):
|
||||
w = word_info.get('word', '')
|
||||
# 关键词匹配(标题可能含多字符,取子串)
|
||||
if second_title and second_title in w:
|
||||
first_time = word_info['start']
|
||||
break
|
||||
if first_time is not None:
|
||||
break
|
||||
|
||||
corrected_clip = {
|
||||
'index': i - 1,
|
||||
'title': corrected_title,
|
||||
'original_title': original_title,
|
||||
'start': clip['start'],
|
||||
'end': clip['end'],
|
||||
}
|
||||
corrected_clips.append(corrected_clip)
|
||||
|
||||
percent = int((i / total) * 100)
|
||||
self.progress_callback('title_correcting', percent, f"纠正标题 {i}/{total}")
|
||||
|
||||
self.progress_callback('title_correcting', 100, "标题纠正完成")
|
||||
self.step_callback('title_correcting')
|
||||
return corrected_clips
|
||||
if first_time is not None:
|
||||
new_offset = first_time
|
||||
clip['title_segments'][1][1] = new_offset
|
||||
logger.info(
|
||||
f" clip{i+1} title_segments: "
|
||||
f"'{second_title}' 从 {old_offset:.2f}s → {new_offset:.2f}s"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f" clip{i+1} title_segments: "
|
||||
f"未在 transcript 中找到 '{second_title}',保留原 offset {old_offset:.2f}s"
|
||||
)
|
||||
|
||||
def step_generate_subtitles(self, corrected_clips: List[Dict], json_paths: List[str]) -> tuple:
|
||||
"""
|
||||
@@ -327,6 +351,7 @@ class Pipeline:
|
||||
'start': clip['start'],
|
||||
'end': clip['end'],
|
||||
'title': clip.get('title', clip.get('original_title', '')),
|
||||
'title_segments': clip.get('title_segments'), # 可能为None
|
||||
}
|
||||
clip_configs.append(clip_config)
|
||||
|
||||
@@ -357,6 +382,39 @@ class Pipeline:
|
||||
self.step_callback('generating_subtitles')
|
||||
return title_path, content_path
|
||||
|
||||
def step_correct_subtitles(self, title_path: str, content_path: str) -> str:
|
||||
"""
|
||||
Step 4.5: LLM纠正字幕内容
|
||||
|
||||
参考title.srt(时间轴锚点)和PPT原文(术语参考),
|
||||
修正content.srt中的错字、漏字、术语错误。
|
||||
|
||||
Args:
|
||||
title_path: 标题字幕路径
|
||||
content_path: 内容字幕路径
|
||||
|
||||
Returns:
|
||||
修正后的content_path
|
||||
"""
|
||||
ppt_text = self.config.get('ppt_text', '')
|
||||
if not ppt_text:
|
||||
logger.warning("PPT原文为空,跳过字幕纠正步骤")
|
||||
return content_path
|
||||
|
||||
self.step_callback('correcting_subtitles')
|
||||
self.progress_callback('correcting_subtitles', 0, "开始纠正字幕...")
|
||||
|
||||
corrected_path = correct_subtitles_llm(
|
||||
title_path=title_path,
|
||||
content_path=content_path,
|
||||
ppt_text=ppt_text,
|
||||
llm_client=self.llm_client,
|
||||
)
|
||||
|
||||
self.progress_callback('correcting_subtitles', 100, "字幕纠正完成")
|
||||
self.step_callback('correcting_subtitles')
|
||||
return corrected_path
|
||||
|
||||
def step_merge(self, clip_paths: List[str]) -> str:
|
||||
"""
|
||||
Step 5: 合并视频
|
||||
@@ -411,7 +469,7 @@ class Pipeline:
|
||||
title_path,
|
||||
content_path,
|
||||
final_path,
|
||||
title_fontsize=video_params.get('title_fontsize', 90),
|
||||
title_fontsize=video_params.get('title_fontsize', 60),
|
||||
title_color=video_params.get('title_color', 'FFFF00'),
|
||||
subtitle_fontsize=video_params.get('subtitle_fontsize', 24),
|
||||
subtitle_color=video_params.get('subtitle_color', 'FFFFFF')
|
||||
@@ -447,17 +505,14 @@ class Pipeline:
|
||||
# Step 2: 转录
|
||||
json_paths = self.step_transcribe(clip_paths)
|
||||
|
||||
# Step 3: 标题纠正
|
||||
corrected_clips = self.step_correct_titles(json_paths)
|
||||
# Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
|
||||
self._recalculate_title_segments_from_transcript(self.clips, json_paths)
|
||||
|
||||
# Step 4: 生成字幕
|
||||
title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
|
||||
|
||||
# Step 5: 合并
|
||||
# Step 3-6: 生成字幕、纠正、合并、烧录
|
||||
title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
|
||||
corrected_content_path = self.step_correct_subtitles(title_path, content_path)
|
||||
merged_path = self.step_merge(clip_paths)
|
||||
|
||||
# Step 6: 烧录
|
||||
final_path = self.step_burn(merged_path, title_path, content_path)
|
||||
final_path = self.step_burn(merged_path, title_path, corrected_content_path)
|
||||
|
||||
logger.info(f"Pipeline completed: {final_path}")
|
||||
return final_path
|
||||
@@ -474,23 +529,25 @@ class Pipeline:
|
||||
"""
|
||||
logger.info(f"Pipeline starting with user confirmation: {len(self.clips)} clips")
|
||||
|
||||
# Step 1-3: 同上
|
||||
# Step 1-2: 提取+转录
|
||||
clip_paths = self.step_extract()
|
||||
if not clip_paths:
|
||||
raise RuntimeError("No clips extracted")
|
||||
|
||||
json_paths = self.step_transcribe(clip_paths)
|
||||
corrected_clips = self.step_correct_titles(json_paths)
|
||||
|
||||
# Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
|
||||
self._recalculate_title_segments_from_transcript(self.clips, json_paths)
|
||||
|
||||
# 应用用户确认的标题
|
||||
for i, confirmed in enumerate(confirmed_titles):
|
||||
if i < len(corrected_clips):
|
||||
corrected_clips[i]['title'] = confirmed.get('title', corrected_clips[i]['title'])
|
||||
if i < len(self.clips):
|
||||
self.clips[i]['title'] = confirmed.get('title', self.clips[i].get('title', ''))
|
||||
|
||||
# Step 4-6: 同上
|
||||
title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
|
||||
# Step 3-6: 生成字幕、纠正、合并、烧录
|
||||
title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
|
||||
corrected_content_path = self.step_correct_subtitles(title_path, content_path)
|
||||
merged_path = self.step_merge(clip_paths)
|
||||
final_path = self.step_burn(merged_path, title_path, content_path)
|
||||
final_path = self.step_burn(merged_path, title_path, corrected_content_path)
|
||||
|
||||
logger.info(f"Pipeline completed: {final_path}")
|
||||
return final_path
|
||||
|
||||
+68
-62
@@ -17,6 +17,8 @@ import zipfile
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Callable, Tuple
|
||||
|
||||
from .llm import LLMClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -36,6 +38,7 @@ class PPTParser:
|
||||
api_key: Optional[str] = None,
|
||||
api_host: Optional[str] = None,
|
||||
max_clip_duration: int = 30,
|
||||
max_total_duration: int = 300,
|
||||
):
|
||||
"""
|
||||
初始化PPT解析器
|
||||
@@ -48,6 +51,7 @@ class PPTParser:
|
||||
api_key: LLM API密钥
|
||||
api_host: LLM API地址
|
||||
max_clip_duration: 每个精华片段的最大时长(秒),默认30秒
|
||||
max_total_duration: 所有精华片段的总时长上限(秒),默认300秒(5分钟)
|
||||
"""
|
||||
self.video_path = video_path
|
||||
self.ppt_path = ppt_path
|
||||
@@ -56,6 +60,7 @@ class PPTParser:
|
||||
self.api_key = api_key
|
||||
self.api_host = api_host
|
||||
self.max_clip_duration = max_clip_duration
|
||||
self.max_total_duration = max_total_duration
|
||||
|
||||
self.inter_dir = os.path.join(output_dir, 'intermediates')
|
||||
os.makedirs(self.inter_dir, exist_ok=True)
|
||||
@@ -284,50 +289,19 @@ class PPTParser:
|
||||
|
||||
def _call_llm(self, prompt: str, max_tokens: int = 4096, timeout: int = 300, retries: int = 3) -> Optional[str]:
|
||||
"""
|
||||
带重试的 LLM 调用。
|
||||
使用实例的 api_key/api_host 创建 LLMClient 并调用 chat。
|
||||
|
||||
Args:
|
||||
prompt: 发送给 LLM 的提示词
|
||||
max_tokens: 最大 token 数
|
||||
timeout: 单次请求超时(秒)
|
||||
retries: 最大重试次数
|
||||
retries: 最大重试次数(chat() 内部也有重试,这里传 retries 但 chat() 忽略它)
|
||||
|
||||
Returns:
|
||||
LLM 返回的 content,失败返回 None
|
||||
"""
|
||||
import requests
|
||||
url = f"{self.api_host}/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
payload = {
|
||||
"model": "doubao-seed-2.0-lite",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.1
|
||||
}
|
||||
|
||||
last_err = None
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
if content:
|
||||
return content
|
||||
logger.warning(f"LLM返回空内容(第{attempt+1}次尝试)")
|
||||
last_err = "空内容"
|
||||
except requests.exceptions.Timeout:
|
||||
logger.warning(f"LLM请求超时(第{attempt+1}次尝试,timeout={timeout}s)")
|
||||
last_err = "超时"
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"LLM请求失败(第{attempt+1}次尝试): {e}")
|
||||
last_err = str(e)
|
||||
|
||||
logger.error(f"LLM调用失败(已重试{retries}次): {last_err}")
|
||||
return None
|
||||
client = LLMClient(api_key=self.api_key, api_host=self.api_host)
|
||||
return client.chat(prompt=prompt, max_tokens=max_tokens, timeout=timeout)
|
||||
|
||||
def llm_extract_knowledge_points_from_ppt(self) -> Tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
|
||||
"""
|
||||
@@ -415,7 +389,7 @@ class PPTParser:
|
||||
- 一种方法:如"放松练习"、"分手练习"、"慢速练习"、"唱谱法"
|
||||
- 一个专题:如"乐理基础"、"手型要求"、"课后作业"
|
||||
|
||||
【文本清理规则】(以不影响原文意思表达为前提):
|
||||
【文本清理规则】(用于 cleaned_text,不影响知识点提取):
|
||||
- 合并连续的空行(超过1个空行的压缩为1个)
|
||||
- 去除行首行尾多余空格
|
||||
- 保留页面之间的自然分段(每页独立段落)
|
||||
@@ -423,12 +397,16 @@ class PPTParser:
|
||||
- 无标点的长句子:如果一行文字超过50字且无标点,才合并到下一行
|
||||
- 保留专有名词、术语的原始写法
|
||||
|
||||
【重要规则】:
|
||||
【知识点提取规则】:
|
||||
1. 扫描全部页面:不要只找"知识点汇总页",每页都要看
|
||||
2. 原文保留:知识点原文是什么就写什么,不要解释、概括、翻译或扩展
|
||||
3. 拆分合并:被拆分的片段(如"的三"+"种方法"、"谱号、"+"大谱表、"等)要合并为完整知识词
|
||||
4. 标题过滤:忽略"本课主要知识点"、"课程回顾"、"本节课重要知识点"等纯导航/目录类标题
|
||||
5. 分类项处理:格式如"XX:子项1、子项2、子项3"时,冒号后的每个子项各自独立成知识点;但如果冒号后是完整句子或定义(如"XX:这是指……"),则整句描述的对象本身才是知识点
|
||||
5. 列表/定义项拆分:
|
||||
- 格式为"XX:子项1,子项2,子项3"时,冒号后的每个子项各自独立成知识点
|
||||
- 格式为多行列表(如"重复:xxx\n级进:xxx\n跳进:xxx"),每行各自独立成知识点
|
||||
- 如果冒号后是完整句子或定义(如"XX:这是指……"),则整句描述的对象本身才是知识点
|
||||
- **知识点标题不得包含括号、冒号、引号等任何标点符号**,只保留核心词(如"重复(旋律进行方式)"应输出为"重复","音高、和弦"应输出为"音高"和"和弦")
|
||||
6. 列表项过滤:只保留有独立含义的知识点,忽略序号、标点符号、无意义的装饰词
|
||||
7. 内容页优先:如果一个知识点在教学内容页展开讲解了,比仅出现在列表中更重要
|
||||
8. 最小粒度:宁可多输出几个独立的知识词,也不要合并成一个大而笼统的标题
|
||||
@@ -668,13 +646,24 @@ class PPTParser:
|
||||
for clip in sorted_clips[1:]:
|
||||
prev = merged[-1]
|
||||
if clip['start'] < prev['end']:
|
||||
# 重叠:prev延伸到clip的end,保留clip的标题(标题在clip原start处切换)
|
||||
# 重叠:prev延伸到clip的end,检测标题切换
|
||||
if clip['title'] != prev['title']:
|
||||
# 标题切换点 = clip['start'] 相对于 prev 起点的时间
|
||||
switch_offset = clip['start'] - prev['start']
|
||||
# 建立 title_segments
|
||||
prev['title_segments'] = [
|
||||
[prev['title'], 0],
|
||||
[clip['title'], switch_offset],
|
||||
]
|
||||
prev['title'] = prev['title'] # 保留第一个标题作主标题
|
||||
prev['end'] = clip['end']
|
||||
logger.info(f" 合并重叠: '{prev['title']}' 延伸至 {prev['end']}s,"
|
||||
f"标题在 {clip['start']}s 切换为 '{clip['title']}'")
|
||||
else:
|
||||
# 不重叠:直接添加
|
||||
merged.append(dict(clip))
|
||||
# 不重叠:直接添加,清除 title_segments(由系统默认处理)
|
||||
c = dict(clip)
|
||||
c.pop('title_segments', None)
|
||||
merged.append(c)
|
||||
|
||||
return merged
|
||||
|
||||
@@ -855,7 +844,11 @@ class PPTParser:
|
||||
|
||||
# PPT参考(完整文本 + 知识点列表)
|
||||
if ppt_full_text or ppt_knowledge:
|
||||
knowledge_lines = "\n".join([f" - {kp['title']}" for kp in (ppt_knowledge or [])])
|
||||
knowledge_list = ppt_knowledge or []
|
||||
# 带序号的列表,LLM 用序号引用,不许自由发挥
|
||||
knowledge_lines = "\n".join(
|
||||
[f" [{i}] {kp['title']}" for i, kp in enumerate(knowledge_list)]
|
||||
)
|
||||
knowledge_section = f"""
|
||||
【PPT参考文本(权威背景)】
|
||||
以下是与本节课配套的PPT完整内容,请以此为权威参考:
|
||||
@@ -887,14 +880,13 @@ class PPTParser:
|
||||
|
||||
【重要规则】
|
||||
1. 逐条处理:必须为列表中的**每一个知识点**搜索视频转录文本,找到讲解最集中的片段
|
||||
2. **title 必须完全等于知识点列表中的原名**,不许改写、不许概括、不许扩展
|
||||
- ✅ 正确:knowledge_point 是"弹琴的手型",title 就用"弹琴的手型"
|
||||
- ❌ 错误:title 用"手型支撑与放松的核心要求"(自己发挥)
|
||||
3. **knowledge_point 字段也必须用知识点列表中的原名**
|
||||
4. 时间必须精确:使用转录文本中的实际时间戳
|
||||
5. 时长控制:每个片段约5-15秒,重要内容可以稍长(最长不超过20秒)
|
||||
6. 总时长不超过180秒:如果知识点太多导致总时长超标,优先保留最重要的知识点,其余在not_found中说明
|
||||
7. 只输出JSON,不要添加任何解释
|
||||
2. **输出序号而非名称**:kp_idx 必须是列表中的序号(如 0、3、7),不许自己发挥名称
|
||||
- ✅ 正确:"kp_idx": 3 对应列表中第 4 项
|
||||
- ❌ 错误:"kp_idx": "重复(旋律进行方式)"(这是自由发挥,不是序号)
|
||||
3. 时间必须精确:使用转录文本中的实际时间戳
|
||||
4. 时长控制:每个片段约5-15秒,重要内容可以稍长(最长不超过20秒)
|
||||
5. 总时长不超过{self.max_total_duration}秒:如果知识点太多导致总时长超标,优先保留最重要的知识点,其余在not_found中说明
|
||||
6. 只输出JSON,不要添加任何解释
|
||||
|
||||
【视频转录文本(带时间戳)】
|
||||
{transcript_text}
|
||||
@@ -902,10 +894,10 @@ class PPTParser:
|
||||
请以以下JSON格式输出(不要输出其他内容):
|
||||
{{
|
||||
"clips": [
|
||||
{{"title": "知识点原名(不许改写)", "start": 开始秒数, "end": 结束秒数, "knowledge_point": "知识点原名"}},
|
||||
{{"title": "知识点原名", "start": 开始秒数, "end": 结束秒数, "knowledge_point": "知识点原名"}}
|
||||
{{"kp_idx": 序号, "start": 开始秒数, "end": 结束秒数}},
|
||||
{{"kp_idx": 序号, "start": 开始秒数, "end": 结束秒数}}
|
||||
],
|
||||
"not_found": ["知识点原名(必须与列表中的名称完全一致)"]
|
||||
"not_found": [序号, 序号]
|
||||
}}"""
|
||||
|
||||
try:
|
||||
@@ -929,31 +921,41 @@ class PPTParser:
|
||||
return None
|
||||
|
||||
clips = parsed.get("clips", [])
|
||||
not_found = parsed.get("not_found", [])
|
||||
not_found_idxs = parsed.get("not_found", [])
|
||||
|
||||
if not clips and not not_found:
|
||||
if not clips and not not_found_idxs:
|
||||
return None
|
||||
|
||||
# 验证和清理
|
||||
# 通过序号映射回原始名称(序号 → 原始知识点名称)
|
||||
knowledge_list = ppt_knowledge or []
|
||||
title_map = {i: kp['title'] for i, kp in enumerate(knowledge_list)}
|
||||
|
||||
# 验证和清理:序号 → 原始名称
|
||||
validated = []
|
||||
for clip in clips:
|
||||
title = clip.get("title", "")
|
||||
kp_idx = int(clip.get("kp_idx", -1))
|
||||
if kp_idx not in title_map:
|
||||
logger.warning(f" 跳过无效序号 kp_idx={kp_idx}(超出范围 0-{len(title_map)-1})")
|
||||
continue
|
||||
title = title_map[kp_idx]
|
||||
start = max(0, float(clip.get("start", 0)))
|
||||
raw_end = float(clip.get("end", 0))
|
||||
end = min(raw_end, start + self.max_clip_duration)
|
||||
kp = clip.get("knowledge_point", "")
|
||||
validated.append({
|
||||
"title": title,
|
||||
"start": int(start),
|
||||
"end": int(end),
|
||||
"knowledge_point": kp,
|
||||
"knowledge_point": title,
|
||||
})
|
||||
|
||||
logger.info(f"LLM提取成功: {len(validated)} 个片段,{len(not_found)} 个未找到")
|
||||
# not_found 中的序号也映射回名称
|
||||
not_found_names = [title_map[i] for i in not_found_idxs if i in title_map]
|
||||
|
||||
logger.info(f"LLM提取成功: {len(validated)} 个片段,{len(not_found_names)} 个未找到")
|
||||
for c in validated:
|
||||
logger.info(f" [{c['knowledge_point']}] {c['title']}: {c['start']}s - {c['end']}s")
|
||||
if not_found:
|
||||
logger.info(f" 未找到知识点: {not_found}")
|
||||
if not_found_names:
|
||||
logger.info(f" 未找到知识点: {not_found_names}")
|
||||
|
||||
return validated
|
||||
|
||||
@@ -1007,6 +1009,9 @@ class PPTParser:
|
||||
}, f, ensure_ascii=False)
|
||||
logger.info(f"已保存PPT知识点到checkpoint")
|
||||
|
||||
# 保存PPT原文供后续步骤使用
|
||||
self.ppt_text = ppt_cleaned_text or ""
|
||||
|
||||
# Step 3: LLM校正文本(以PPT全文为参考)- 带checkpoint复用
|
||||
self._report('parse', 30, "LLM校正文本...")
|
||||
corrected_checkpoint = os.path.join(self.inter_dir, "corrected_transcript.json")
|
||||
@@ -1052,6 +1057,7 @@ class PPTParser:
|
||||
"clips": clips,
|
||||
"output_dir": self.output_dir,
|
||||
"term_corrections": self.term_corrections,
|
||||
"ppt_text": getattr(self, 'ppt_text', ''),
|
||||
"video_params": {
|
||||
"fade_duration": 1,
|
||||
"title_fontsize": 48,
|
||||
|
||||
+241
-11
@@ -228,15 +228,32 @@ class SubtitlePipeline:
|
||||
offset = offsets[i]
|
||||
clip_duration = offsets[i+1] - offsets[i] if i+1 < len(offsets) else 3
|
||||
|
||||
# 添加标题(使用title样式)- 标题显示3秒后正文才显示,避免重叠
|
||||
title_duration = min(3, clip_duration)
|
||||
title_track.add(offset, offset + title_duration, clip['title'], style='title')
|
||||
# 添加标题(使用title样式)
|
||||
if clip.get('title_segments'):
|
||||
# 多标题片段:遍历 title_segments [(title, start_offset), ...]
|
||||
# 每个标题最多显示 title_duration 秒
|
||||
segs = clip['title_segments']
|
||||
for j, (title, seg_start) in enumerate(segs):
|
||||
next_start = segs[j+1][1] if j+1 < len(segs) else clip_duration
|
||||
seg_end = min(seg_start + title_duration, next_start)
|
||||
title_track.add(
|
||||
offset + seg_start,
|
||||
offset + seg_end,
|
||||
title,
|
||||
style='title'
|
||||
)
|
||||
# 正文字幕从最后一个标题段结束后开始
|
||||
content_start = offset + segs[-1][1]
|
||||
else:
|
||||
# 单标题:标题显示3秒后正文才显示,避免重叠
|
||||
title_duration = min(3, clip_duration)
|
||||
title_track.add(offset, offset + title_duration, clip['title'], style='title')
|
||||
content_start = offset + title_duration
|
||||
|
||||
# 添加正文字幕 - 从标题结束后开始,避免重叠
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
content_start = offset + title_duration # 正文从标题结束后开始
|
||||
for seg in data.get('segments', []):
|
||||
text = seg.get('text', '').strip()
|
||||
if not text:
|
||||
@@ -253,12 +270,37 @@ class SubtitlePipeline:
|
||||
# 只添加在clip时间范围内的字幕
|
||||
clip_end = clip['end'] - clip['start'] + offset
|
||||
if seg_start < clip_end and seg_end <= clip_end:
|
||||
content_track.add(
|
||||
seg_start,
|
||||
seg_end,
|
||||
text,
|
||||
style='content'
|
||||
)
|
||||
# pipeline.py 已按标点拆分,此处只处理意外超长segment(无标点且>8秒)
|
||||
duration = seg_end - seg_start
|
||||
if duration > 8.0:
|
||||
# 按标点拆分
|
||||
import re
|
||||
parts = re.split(r'(?<=[。!??!])', text)
|
||||
if len(parts) > 1:
|
||||
total_len = sum(len(p) for p in parts)
|
||||
if total_len > 0:
|
||||
cum_len = 0
|
||||
s_start = seg_start
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
cum_len += len(part)
|
||||
s_end = seg_start + duration * cum_len / total_len
|
||||
content_track.add(s_start, s_end, part, style='content')
|
||||
s_start = s_end
|
||||
continue
|
||||
# 无标点则平均拆分
|
||||
num_splits = max(2, int(duration / 8.0) + 1)
|
||||
chunk_len = len(text) // num_splits
|
||||
for i in range(num_splits):
|
||||
t_start = seg_start + duration * i / num_splits
|
||||
t_end = seg_start + duration * (i + 1) / num_splits
|
||||
chunk_text = text[i * chunk_len:(i + 1) * chunk_len].strip()
|
||||
if chunk_text:
|
||||
content_track.add(t_start, t_end, chunk_text, style='content')
|
||||
else:
|
||||
content_track.add(seg_start, seg_end, text, style='content')
|
||||
|
||||
# 保存两个轨道 - 标题使用SRT格式
|
||||
version = self._get_next_version()
|
||||
@@ -320,4 +362,192 @@ def load_clip_subtitles(inter_dir, clip_nums):
|
||||
if os.path.exists(json_path):
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
clips[num] = json.load(f)
|
||||
return clips
|
||||
return clips
|
||||
|
||||
|
||||
def parse_srt(content: str) -> list:
|
||||
"""
|
||||
解析SRT文本为字幕段列表
|
||||
|
||||
Args:
|
||||
content: SRT文件内容
|
||||
|
||||
Returns:
|
||||
[(index, start, end, text), ...]
|
||||
"""
|
||||
blocks = content.strip().split('\n\n')
|
||||
segments = []
|
||||
for block in blocks:
|
||||
lines = block.strip().split('\n')
|
||||
if len(lines) >= 3:
|
||||
try:
|
||||
idx = int(lines[0])
|
||||
times = lines[1].split(' --> ')
|
||||
start = times[0].strip().replace(',', '.')
|
||||
end = times[1].strip().replace(',', '.')
|
||||
text = '\n'.join(lines[2:])
|
||||
segments.append((idx, start, end, text))
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
return segments
|
||||
|
||||
|
||||
def format_srt(segments: list) -> str:
|
||||
"""
|
||||
将字幕段列表格式化为SRT文本
|
||||
|
||||
Args:
|
||||
segments: [(index, start, end, text), ...]
|
||||
|
||||
Returns:
|
||||
SRT格式字符串
|
||||
"""
|
||||
lines = []
|
||||
for i, (idx, start, end, text) in enumerate(segments):
|
||||
start_s = start.replace('.', ',')
|
||||
end_s = end.replace('.', ',')
|
||||
lines.append(f"{idx}\n{start_s} --> {end_s}\n{text}")
|
||||
return '\n\n'.join(lines) + '\n'
|
||||
|
||||
|
||||
def correct_subtitles_llm(
|
||||
title_path: str,
|
||||
content_path: str,
|
||||
ppt_text: str,
|
||||
llm_client,
|
||||
output_path: str = None,
|
||||
) -> str:
|
||||
"""
|
||||
用LLM纠正字幕内容(idx|text格式,只发纯文本,保留时间轴)
|
||||
|
||||
参考title.srt(时间轴+知识点锚点)和PPT原文(术语纠错),
|
||||
修正content.srt中的错字、漏字、术语错误。
|
||||
|
||||
Args:
|
||||
title_path: 标题字幕SRT路径
|
||||
content_path: 内容字幕SRT路径(待修正)
|
||||
ppt_text: PPT原文(术语参考)
|
||||
llm_client: LLM客户端
|
||||
output_path: 修正后输出路径(默认覆盖原content_path)
|
||||
|
||||
Returns:
|
||||
修正后的字幕文件路径
|
||||
"""
|
||||
import json
|
||||
|
||||
# 读取原始字幕
|
||||
with open(title_path, 'r', encoding='utf-8') as f:
|
||||
title_srt = f.read()
|
||||
with open(content_path, 'r', encoding='utf-8') as f:
|
||||
content_srt = f.read()
|
||||
|
||||
# 解析SRT,保留完整timestamp
|
||||
content_segments = parse_srt(content_srt)
|
||||
|
||||
# 构建idx|text格式的纯文本
|
||||
lines_for_llm = []
|
||||
for seg in content_segments:
|
||||
idx, start, end, text = seg
|
||||
lines_for_llm.append(f"{idx}|{text}")
|
||||
transcript_text = '\n'.join(lines_for_llm)
|
||||
|
||||
# 构建prompt
|
||||
prompt = f"""你是一个钢琴教学视频的字幕纠错专家。
|
||||
|
||||
## 参考信息
|
||||
标题字幕(title.srt)- 权威知识点参考:
|
||||
{title_srt[:2000]}
|
||||
|
||||
PPT原文(ppt)- 术语权威参考:
|
||||
{ppt_text[:3000]}
|
||||
|
||||
## 任务
|
||||
修正以下转录文本中的错字、漏字、术语错误(如"骚"改为"sol","拿两个音速"改为"拿两个因素"等)。
|
||||
每行格式:序号|原始文字
|
||||
|
||||
## 待纠正文本({len(content_segments)}条):
|
||||
{transcript_text}
|
||||
|
||||
## 输出要求
|
||||
- 以JSON格式输出,只输出JSON,不要有任何其他解释
|
||||
- 用原始序号匹配,不要改变结构
|
||||
{{
|
||||
"corrected": [
|
||||
{{"idx": 序号, "text": "修正后的文字"}},
|
||||
{{"idx": 序号, "text": "修正后的文字"}}
|
||||
]
|
||||
}}"""
|
||||
|
||||
# 调用LLM
|
||||
response = llm_client.chat(
|
||||
prompt=prompt,
|
||||
max_tokens=8192,
|
||||
)
|
||||
if not response:
|
||||
logger.warning("LLM返回为空,保留原字幕")
|
||||
return content_path
|
||||
|
||||
# 解析JSON
|
||||
try:
|
||||
import re
|
||||
# 去掉markdown代码块
|
||||
response_clean = response.strip()
|
||||
if response_clean.startswith('```'):
|
||||
lines = response_clean.split('\n')
|
||||
if lines[0].strip().strip('`'):
|
||||
lines = lines[1:]
|
||||
if lines and lines[-1].strip().strip('`'):
|
||||
lines = lines[:-1]
|
||||
response_clean = '\n'.join(lines)
|
||||
|
||||
# 提取JSON
|
||||
json_match = re.search(r'\{.*\}', response_clean, re.DOTALL)
|
||||
if not json_match:
|
||||
raise ValueError("No JSON found in response")
|
||||
result = json.loads(json_match.group())
|
||||
|
||||
corrected_list = result.get('corrected', [])
|
||||
# 建立 idx -> corrected_text 的映射
|
||||
corrected_map = {item['idx']: item['text'] for item in corrected_list}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"字幕纠正JSON解析失败,保留原字幕: {e}")
|
||||
return content_path
|
||||
|
||||
# 重建SRT,对比diff
|
||||
orig_by_idx = {seg[0]: seg[3] for seg in content_segments}
|
||||
changed = []
|
||||
|
||||
result_lines = []
|
||||
for seg in content_segments:
|
||||
idx, start, end, orig_text = seg
|
||||
new_text = corrected_map.get(idx, orig_text)
|
||||
|
||||
# 恢复SRT格式
|
||||
start_s = start.replace('.', ',')
|
||||
end_s = end.replace('.', ',')
|
||||
result_lines.append(f"{idx}\n{start_s} --> {end_s}\n{new_text}")
|
||||
|
||||
if new_text != orig_text:
|
||||
changed.append((idx, orig_text, new_text))
|
||||
|
||||
corrected_srt = '\n\n'.join(result_lines) + '\n'
|
||||
|
||||
# 保存
|
||||
if output_path is None:
|
||||
output_path = content_path
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(corrected_srt)
|
||||
|
||||
# Diff日志
|
||||
if changed:
|
||||
logger.info(f"字幕纠正,共 {len(changed)} 处修改:")
|
||||
for idx, old, new in changed:
|
||||
old_s = old[:50] + ('...' if len(old) > 50 else '')
|
||||
new_s = new[:50] + ('...' if len(new) > 50 else '')
|
||||
logger.info(f" [{idx:3d}] \"{old_s}\" → \"{new_s}\"")
|
||||
else:
|
||||
logger.info("字幕纠正,无修改")
|
||||
|
||||
logger.info(f"字幕已修正: {output_path}")
|
||||
return output_path
|
||||
+4
-11
@@ -146,7 +146,7 @@ def burn_subtitles(video_path, srt_path, output_path):
|
||||
return success
|
||||
|
||||
|
||||
def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_path, title_fontsize=90, title_color="FFFF00", subtitle_fontsize=24, subtitle_color="FFFFFF"):
|
||||
def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_path, title_fontsize=60, title_color="FFFF00", subtitle_fontsize=24, subtitle_color="FFFFFF"):
|
||||
"""
|
||||
烧录两层字幕到视频(标题在屏幕正中,正文在下方)
|
||||
|
||||
@@ -163,7 +163,7 @@ def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_pat
|
||||
Returns:
|
||||
True if success
|
||||
"""
|
||||
# Windows路径转义
|
||||
# Windows路径转义:D:/ 需要双反斜杠转义
|
||||
title_escaped = title_srt_path.replace('\\', '/').replace('D:/', 'D\\:/')
|
||||
content_escaped = content_srt_path.replace('\\', '/').replace('D:/', 'D\\:/')
|
||||
|
||||
@@ -180,19 +180,12 @@ def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_pat
|
||||
title_bgr = html_to_bgr(title_color)
|
||||
subtitle_bgr = html_to_bgr(subtitle_color)
|
||||
|
||||
# 标题样式:使用SRT+force_style,Alignment=5水平居中,垂直位置由MarginV控制
|
||||
# 标题样式:使用SRT+force_style,Alignment=2水平居中,MarginV=150使其位于屏幕上偏下区域(36%高度)
|
||||
# 正文字样式:底部居中,24字号,白色,带描边
|
||||
content_style = f"FontName=微软雅黑,FontSize={subtitle_fontsize},PrimaryColour={subtitle_bgr},Alignment=2,MarginV=20,Outline=1,Shadow=1"
|
||||
|
||||
# 使用两个独立字幕滤镜分别渲染,然后叠加
|
||||
# 标题使用Alignment=5,MarginV=0(正中)
|
||||
title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=5,MarginV=0,Outline=3,Shadow=2"
|
||||
title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=2,MarginV=150,Outline=3,Shadow=2"
|
||||
|
||||
# 使用两个字幕滤镜叠加,然后映射视频+原始音频
|
||||
# 标题使用Alignment=5,MarginV=0(正中)
|
||||
title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=5,MarginV=0,Outline=3,Shadow=2"
|
||||
|
||||
# 使用两个字幕滤镜叠加
|
||||
filter_str = f"[0:v]subtitles='{title_escaped}':force_style='{title_style}',subtitles='{content_escaped}':force_style='{content_style}'[out]"
|
||||
|
||||
# 保留原始音频 - 映射视频输出和原始音频
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
f = open(r'D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt', 'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
|
||||
print('Total bytes:', len(data))
|
||||
print('First 300 hex:', data[:300].hex())
|
||||
print()
|
||||
print('UTF-8 decode of first 300:')
|
||||
print(data[:300].decode('utf-8', 'replace'))
|
||||
@@ -1,3 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -c "import pptx; print('pptx available')"
|
||||
@@ -1,3 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\check_pptx2.py"
|
||||
@@ -1,10 +0,0 @@
|
||||
import sys
|
||||
out = r"D:\F\NewI\opencode\daily-workspace\temp\check_pptx_out.txt"
|
||||
try:
|
||||
import pptx
|
||||
result = "pptx available: " + pptx.__version__
|
||||
except ImportError as e:
|
||||
result = "pptx NOT available: " + str(e)
|
||||
with open(out, "w", encoding="utf-8") as f:
|
||||
f.write(result)
|
||||
print(result)
|
||||
@@ -1,3 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\check_transcript.py"
|
||||
@@ -1,17 +0,0 @@
|
||||
import os
|
||||
import json
|
||||
|
||||
inter_dir = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full\intermediates"
|
||||
transcript_file = os.path.join(inter_dir, "full_transcript.json")
|
||||
|
||||
if os.path.exists(transcript_file):
|
||||
size = os.path.getsize(transcript_file)
|
||||
with open(transcript_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
print(f"Transcript exists: {size} bytes")
|
||||
print(f"Segments: {len(data)}")
|
||||
if data:
|
||||
print(f"First segment: {data[0]}")
|
||||
print(f"Last segment: {data[-1]}")
|
||||
else:
|
||||
print("Transcript file NOT found")
|
||||
@@ -1,4 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_ppt.py"
|
||||
pause
|
||||
@@ -1,30 +0,0 @@
|
||||
import zipfile
|
||||
import re
|
||||
|
||||
ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
||||
|
||||
with zipfile.ZipFile(ppt, "r") as z:
|
||||
names = z.namelist()
|
||||
slide_files = [f for f in names if f.startswith("ppt/slides/slide") and f.endswith(".xml")]
|
||||
print(f"Total files in zip: {len(names)}")
|
||||
print(f"Slide files found: {len(slide_files)}")
|
||||
print(f"First 5 slide files: {slide_files[:5]}")
|
||||
|
||||
# Test presentation.xml
|
||||
try:
|
||||
pres_xml = z.read("ppt/presentation.xml").decode("utf-8", errors="replace")
|
||||
sld_ids = re.findall(r'<p:sldId\b[^>]*r:id="([^"]+)"', pres_xml)
|
||||
print(f"\nsldIdList rIds: {sld_ids[:5]}")
|
||||
except Exception as e:
|
||||
print(f"\npresentation.xml error: {e}")
|
||||
|
||||
# Test rels
|
||||
try:
|
||||
rels_xml = z.read("ppt/_rels/presentation.xml.rels").decode("utf-8", errors="replace")
|
||||
rid_to_target = dict(re.findall(r'Id="([^"]+)"[^>]*Target="([^"]+)"', rels_xml))
|
||||
print(f"Rels entries: {len(rid_to_target)}")
|
||||
# Show a sample
|
||||
for k, v in list(rid_to_target.items())[:3]:
|
||||
print(f" {k} -> {v}")
|
||||
except Exception as e:
|
||||
print(f"\nrels error: {e}")
|
||||
@@ -1,3 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_ppt2.py"
|
||||
@@ -1,34 +0,0 @@
|
||||
import zipfile, re, sys
|
||||
|
||||
ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
||||
out = r"D:\F\NewI\opencode\daily-workspace\temp\debug_ppt_out.txt"
|
||||
|
||||
results = []
|
||||
|
||||
with zipfile.ZipFile(ppt, "r") as z:
|
||||
names = z.namelist()
|
||||
slide_files = [f for f in names if f.startswith("ppt/slides/slide") and f.endswith(".xml")]
|
||||
results.append(f"Total files in zip: {len(names)}")
|
||||
results.append(f"Slide files found: {len(slide_files)}")
|
||||
results.append(f"First 5: {slide_files[:5]}")
|
||||
|
||||
try:
|
||||
pres_xml = z.read("ppt/presentation.xml").decode("utf-8", errors="replace")
|
||||
sld_ids = re.findall(r'<p:sldId\b[^>]*r:id="([^"]+)"', pres_xml)
|
||||
results.append(f"sldIds: {sld_ids[:5]}")
|
||||
except Exception as e:
|
||||
results.append(f"pres error: {e}")
|
||||
|
||||
try:
|
||||
rels_xml = z.read("ppt/_rels/presentation.xml.rels").decode("utf-8", errors="replace")
|
||||
rid_to_target = dict(re.findall(r'Id="([^"]+)"[^>]*Target="([^"]+)"', rels_xml))
|
||||
results.append(f"rels count: {len(rid_to_target)}")
|
||||
for k, v in list(rid_to_target.items())[:3]:
|
||||
results.append(f" {k} -> {v}")
|
||||
except Exception as e:
|
||||
results.append(f"rels error: {e}")
|
||||
|
||||
with open(out, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(results))
|
||||
|
||||
print("Done, see", out)
|
||||
@@ -1,3 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_slide1.py" > "D:\F\NewI\opencode\daily-workspace\temp\debug_slide1_out.txt" 2>&1
|
||||
@@ -1,23 +0,0 @@
|
||||
import zipfile, re, os
|
||||
|
||||
ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
||||
out_dir = r"D:\F\NewI\opencode\daily-workspace\temp"
|
||||
slide1_out = os.path.join(out_dir, "slide1_texts.txt")
|
||||
xml_out = os.path.join(out_dir, "slide1_xml_preview.txt")
|
||||
|
||||
with zipfile.ZipFile(ppt, "r") as z:
|
||||
slide1_file = "ppt/slides/slide1.xml"
|
||||
content = z.read(slide1_file).decode("utf-8", errors="replace")
|
||||
all_texts = re.findall(r"<a:t[^>]*>([^<]*)</a:t>", content)
|
||||
|
||||
meaningful = [t for t in all_texts if t.strip()]
|
||||
with open(slide1_out, "w", encoding="utf-8") as f:
|
||||
f.write(f"Total fragments: {len(all_texts)}\n")
|
||||
f.write(f"Meaningful fragments: {len(meaningful)}\n\n")
|
||||
for i, t in enumerate(meaningful):
|
||||
f.write(f"[{i}] {t}\n")
|
||||
|
||||
with open(xml_out, "w", encoding="utf-8") as f:
|
||||
f.write(content[:8000])
|
||||
|
||||
print("Done")
|
||||
@@ -1,3 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\do_install.py"
|
||||
@@ -1,12 +0,0 @@
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
venv_python = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
|
||||
result = subprocess.run(
|
||||
[venv_python, "-m", "pip", "install", "python-pptx"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
print("STDOUT:", result.stdout)
|
||||
print("STDERR:", result.stderr)
|
||||
print("Return code:", result.returncode)
|
||||
@@ -1,6 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
echo Installing python-pptx...
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx -q
|
||||
echo Done
|
||||
pause
|
||||
@@ -1,4 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx
|
||||
echo Exit: %errorlevel%
|
||||
@@ -1,4 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx > "D:\F\NewI\opencode\daily-workspace\temp\pip_out.txt" 2>&1
|
||||
echo Exit: %errorlevel%
|
||||
@@ -1,12 +0,0 @@
|
||||
# Kill all python processes related to our CLI
|
||||
Get-Process python -ErrorAction SilentlyContinue | Stop-Process -Force
|
||||
Start-Sleep 3
|
||||
|
||||
# Verify killed
|
||||
$remaining = Get-Process python -ErrorAction SilentlyContinue
|
||||
if ($remaining) {
|
||||
Write-Host "Still running:"
|
||||
$remaining | ForEach-Object { Write-Host " PID:" $_.Id }
|
||||
} else {
|
||||
Write-Host "All python processes killed"
|
||||
}
|
||||
Binary file not shown.
@@ -1,5 +0,0 @@
|
||||
f = open(r'D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt', 'r', encoding='utf-8')
|
||||
lines = f.readlines()
|
||||
f.close()
|
||||
for l in lines[:35]:
|
||||
print(l.rstrip())
|
||||
Binary file not shown.
Reference in New Issue
Block a user