refactor: extract config.py, add burn_only, fix title_segments and font size
- Extract all path/API config to config.py (single source of truth) - Add run.py / burn_only.py / run.bat / burn.bat entry points - burn_only: skip transcription/subtitle gen, fast reburn existing SRTs - Fix title_segments: use transcript keyword time for split point - Fix subtitle: each overlapping title shows max title_duration (not full clip) - Fix burn_only font size: default from 90 to 60 - Delete old run_lesson1.bat/py, temp debug scripts - Update README, ARCHITECTURE, CHANGELOG, add USAGE.md
This commit is contained in:
@@ -26,40 +26,50 @@ cp config.ini.example config.ini
|
|||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. 运行
|
### 3. 配置
|
||||||
|
|
||||||
|
编辑 `config.py` 中的视频路径、PPT路径、API Key 等。所有配置集中在一个文件。
|
||||||
|
|
||||||
|
### 4. 运行
|
||||||
|
|
||||||
|
**完整流程(首次运行):**
|
||||||
|
```bash
|
||||||
|
.\run.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
**快速烧录(仅修改字幕后重烧):**
|
||||||
|
```bash
|
||||||
|
.\burn.bat
|
||||||
|
```
|
||||||
|
|
||||||
**GUI(推荐):**
|
**GUI(推荐):**
|
||||||
```bash
|
```bash
|
||||||
.\start.bat
|
.\start.bat
|
||||||
```
|
```
|
||||||
|
|
||||||
**CLI:**
|
|
||||||
```bash
|
|
||||||
.\run_lesson1.bat
|
|
||||||
```
|
|
||||||
|
|
||||||
或通用方式:
|
|
||||||
```bash
|
|
||||||
python src/cli.py --video video.mp4 --ppt presentation.pptx --output ./output
|
|
||||||
```
|
|
||||||
|
|
||||||
## 项目结构
|
## 项目结构
|
||||||
|
|
||||||
```
|
```
|
||||||
lesson-highlights/
|
lesson-highlights/
|
||||||
|
├── config.py # 统一配置(修改这里)
|
||||||
|
├── run.py # 完整流水线
|
||||||
|
├── burn_only.py # 快速烧录(跳过转录/字幕生成)
|
||||||
|
├── run.bat # 运行完整流程
|
||||||
|
├── burn.bat # 快速重烧字幕
|
||||||
├── src/
|
├── src/
|
||||||
│ ├── main.py # GUI 入口
|
│ ├── main.py # GUI 入口
|
||||||
│ ├── gui.py # GUI(参数输入,调用底层)
|
│ ├── gui.py # GUI(参数输入,调用底层)
|
||||||
│ ├── cli.py # CLI 入口
|
│ ├── cli.py # CLI 入口
|
||||||
│ └── core/ # 共享底层
|
│ └── core/ # 共享底层
|
||||||
│ ├── ppt_parser.py # PPT 解析 + clips 生成
|
│ ├── ppt_parser.py # PPT 解析 + clips 生成
|
||||||
│ ├── pipeline.py # 视频处理流水线
|
│ ├── pipeline.py # 视频处理流水线
|
||||||
│ ├── subtitle.py # 字幕生成
|
│ ├── subtitle.py # 字幕生成
|
||||||
│ └── ...
|
│ └── ...
|
||||||
├── config.ini # API 配置(不提交 git)
|
├── config.ini # API 配置(不提交 git)
|
||||||
├── config.ini.example # 配置模板
|
├── config.ini.example # 配置模板
|
||||||
├── start.bat # 启动 GUI
|
└── docs/
|
||||||
└── run_lesson1.bat # CLI 示例
|
├── USAGE.md # 使用指南
|
||||||
|
└── ...
|
||||||
```
|
```
|
||||||
|
|
||||||
## 工作流程
|
## 工作流程
|
||||||
@@ -87,10 +97,15 @@ api_key = your_api_key_here
|
|||||||
|
|
||||||
```
|
```
|
||||||
output/
|
output/
|
||||||
├── generated_config.yaml # 生成的 clips 配置
|
├── generated_config.yaml # clips 配置(可手动修改后重新运行)
|
||||||
├── clips/ # 提取的片段视频
|
├── intermediates/ # 中间文件
|
||||||
├── subtitles/ # 字幕文件
|
│ ├── clip*.json # Whisper 转录结果
|
||||||
└── final.mp4 # 最终输出
|
│ └── clip*.mp4 # 提取的视频片段
|
||||||
|
├── subs/ # 字幕文件
|
||||||
|
│ ├── v1_title.srt # 标题轨(可手动修改)
|
||||||
|
│ └── v1_content.srt # 正文字幕
|
||||||
|
├── concat_merged.mp4 # 合并视频
|
||||||
|
└── final.mp4 # 最终输出
|
||||||
```
|
```
|
||||||
|
|
||||||
## 系统要求
|
## 系统要求
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
@echo off
|
||||||
|
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\burn_only.py" %*
|
||||||
|
pause
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
快速烧录脚本 - 跳过所有转录/字幕生成步骤
|
||||||
|
直接用已有的 clips + title.srt + content.srt 合并烧录
|
||||||
|
|
||||||
|
用法:
|
||||||
|
python burn_only.py
|
||||||
|
python burn_only.py "D:\\path\\to\\output_dir"
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# 导入统一配置
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import config
|
||||||
|
|
||||||
|
OUTPUT = config.OUTPUT
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
OUTPUT = sys.argv[1]
|
||||||
|
|
||||||
|
TITLE_SRT = os.path.join(OUTPUT, "subs", "v1_title.srt")
|
||||||
|
CONTENT_SRT = os.path.join(OUTPUT, "subs", "v1_content.srt")
|
||||||
|
CLIPS_DIR = os.path.join(OUTPUT, "intermediates")
|
||||||
|
MERGED_PATH = os.path.join(OUTPUT, "concat_merged.mp4")
|
||||||
|
|
||||||
|
print(f"[Fast Burn Mode]")
|
||||||
|
print(f"Output: {OUTPUT}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# 检查必要文件
|
||||||
|
if not os.path.exists(TITLE_SRT):
|
||||||
|
print(f"ERROR: title.srt not found\n{TITLE_SRT}")
|
||||||
|
sys.exit(1)
|
||||||
|
if not os.path.exists(CONTENT_SRT):
|
||||||
|
print(f"ERROR: content.srt not found\n{CONTENT_SRT}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# 导入 pipeline(src 目录)
|
||||||
|
src_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
|
||||||
|
sys.path.insert(0, src_dir)
|
||||||
|
from core import Pipeline
|
||||||
|
|
||||||
|
# 构造 minimal config(只需要 output_dir 和 video_params)
|
||||||
|
pipeline_config = {
|
||||||
|
'output_dir': OUTPUT,
|
||||||
|
'clips': [],
|
||||||
|
'video_src': None,
|
||||||
|
'video_params': {},
|
||||||
|
'term_corrections': {},
|
||||||
|
'api_key': '',
|
||||||
|
'api_host': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline = Pipeline(pipeline_config)
|
||||||
|
|
||||||
|
# 合并视频(如需要)
|
||||||
|
if os.path.exists(MERGED_PATH):
|
||||||
|
print(f"Found existing merged video: {MERGED_PATH}")
|
||||||
|
merged_path = MERGED_PATH
|
||||||
|
else:
|
||||||
|
import glob
|
||||||
|
clip_files = sorted(glob.glob(os.path.join(CLIPS_DIR, "clip*.mp4")))
|
||||||
|
if not clip_files:
|
||||||
|
print(f"ERROR: No clip videos found\n{CLIPS_DIR}\\clip*.mp4")
|
||||||
|
sys.exit(1)
|
||||||
|
print(f"Merging {len(clip_files)} clips...")
|
||||||
|
merged_path = pipeline.step_merge(clip_files)
|
||||||
|
print(f"Merged: {merged_path}")
|
||||||
|
|
||||||
|
# 烧录
|
||||||
|
print("Burning subtitles...")
|
||||||
|
final_path = pipeline.step_burn(merged_path, TITLE_SRT, CONTENT_SRT)
|
||||||
|
print(f"\nDone: {final_path}")
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
统一配置 - 修改这里即可,不要改 run.py / burn_only.py / *.bat
|
||||||
|
|
||||||
|
所有路径和 API 配置集中管理。
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
# ========== 路径配置 ==========
|
||||||
|
VIDEO = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\直播回放-03月18日.mp4"
|
||||||
|
PPT = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
||||||
|
OUTPUT = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full"
|
||||||
|
LOG_FILE = r"D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt"
|
||||||
|
|
||||||
|
# ========== 运行参数 ==========
|
||||||
|
MAX_TOTAL_DURATION = 600 # 精华片段总时长上限(秒)
|
||||||
|
|
||||||
|
# ========== API 配置 ==========
|
||||||
|
API_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3"
|
||||||
|
API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
|
||||||
|
|
||||||
|
# ========== 环境(一般不改)==========
|
||||||
|
PYTHON = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
|
||||||
|
CLI_DIR = os.path.dirname(os.path.abspath(__file__)) # 本文件所在目录
|
||||||
+22
-17
@@ -10,25 +10,30 @@
|
|||||||
|
|
||||||
```
|
```
|
||||||
lesson-highlights/
|
lesson-highlights/
|
||||||
|
├── config.py # 统一配置(所有路径/API只改这里)
|
||||||
|
├── run.py # 完整流水线入口
|
||||||
|
├── burn_only.py # 快速烧录入口(跳过转录/字幕生成)
|
||||||
|
├── run.bat # 运行完整流程
|
||||||
|
├── burn.bat # 快速重烧字幕
|
||||||
├── src/
|
├── src/
|
||||||
│ ├── main.py # GUI 入口
|
│ ├── main.py # GUI 入口
|
||||||
│ ├── gui.py # GUI(参数输入 → 调用底层)
|
│ ├── gui.py # GUI(参数输入 → 调用底层)
|
||||||
│ ├── cli.py # CLI 入口
|
│ ├── cli.py # CLI 入口
|
||||||
│ └── core/ # 共享底层
|
│ └── core/ # 共享底层
|
||||||
│ ├── __init__.py
|
│ ├── __init__.py
|
||||||
│ ├── ppt_parser.py # PPT 解析 + LLM clips 提取
|
│ ├── ppt_parser.py # PPT 解析 + LLM clips 提取
|
||||||
│ ├── pipeline.py # 视频处理流水线
|
│ ├── pipeline.py # 视频处理流水线
|
||||||
│ ├── subtitle.py # 字幕生成
|
│ ├── subtitle.py # 字幕生成
|
||||||
│ ├── video.py # 视频处理(提取/合并/烧录)
|
│ ├── video.py # 视频处理(提取/合并/烧录)
|
||||||
│ ├── llm.py # LLM 调用
|
│ ├── llm.py # LLM 调用
|
||||||
│ ├── corrections.py # 术语纠正
|
│ ├── corrections.py # 术语纠正
|
||||||
│ ├── constants.py # 常量配置
|
│ ├── constants.py # 常量配置
|
||||||
│ └── errors.py # 错误处理
|
│ └── errors.py # 错误处理
|
||||||
├── config.ini # API 配置(不提交 git)
|
├── config.ini # API 配置(不提交 git)
|
||||||
├── config.ini.example # 配置模板
|
├── config.ini.example # 配置模板
|
||||||
├── start.bat # GUI 启动器
|
├── start.bat # GUI 启动器
|
||||||
├── run.bat # 通用 CLI 启动器
|
└── docs/
|
||||||
└── run_lesson1.bat # 预设课程示例
|
└── USAGE.md # 使用指南
|
||||||
```
|
```
|
||||||
|
|
||||||
## 3. 核心模块
|
## 3. 核心模块
|
||||||
|
|||||||
+15
-14
@@ -5,31 +5,32 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## [版本号] - 日期
|
## [Unreleased]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- 新功能
|
- `docs/USAGE.md` - 使用指南(run.bat / burn.bat / 修改知识点流程)
|
||||||
|
- `config.py` - 统一配置文件,所有路径和 API 配置集中管理
|
||||||
|
- `run.py` / `burn_only.py` - 独立入口脚本
|
||||||
|
- `--resume-from-burn` CLI 参数 - 快速烧录模式,跳过所有转录/字幕生成步骤
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- 功能变更
|
- `run.bat` / `burn.bat` 替代原有的 `run_lesson1.bat`(不再需要改多处配置)
|
||||||
|
- `ppt_parser.py`: 重叠片段的 `title_segments` 用 transcript 关键词首次出现时间计算切分点
|
||||||
|
- `pipeline.py`: 新增 `_recalculate_title_segments_from_transcript()`,在转录完成后用实际 transcript 数据修正标题切换时间
|
||||||
|
- `subtitle.py`: 多标题片段中每个标题最多显示 `title_duration` 秒(原逻辑会一直显示到片段结束)
|
||||||
|
- `pipeline.py`: `step_burn` 的 `title_fontsize` 默认值从 90 改为 60
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- 问题修复
|
- `ppt_parser.py`: 不重叠的 clip 残留 `title_segments` 导致标题显示时长错误
|
||||||
|
- `subtitle.py`: 重叠片段第二个标题显示时长超过 `title_duration`
|
||||||
### Deprecated
|
- `pipeline.py`: 快速烧录模式因 `video_params` 为空导致字号使用默认值 90 而非 60
|
||||||
- 弃用功能
|
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
- 移除的功能
|
- `run_lesson1.bat` / `run_lesson1.py` - 旧入口,已由 `config.py` + `run.bat` / `burn.bat` 替代
|
||||||
|
|
||||||
### Security
|
|
||||||
- 安全相关
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 示例
|
## [1.0.0] - 2026-05-02
|
||||||
|
|
||||||
### [1.0.0] - 2026-05-02
|
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- 初始版本发布
|
- 初始版本发布
|
||||||
|
|||||||
+117
@@ -0,0 +1,117 @@
|
|||||||
|
# 使用指南
|
||||||
|
|
||||||
|
## 快速开始
|
||||||
|
|
||||||
|
### 1. 配置
|
||||||
|
|
||||||
|
编辑项目根目录的 `config.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
VIDEO = r"D:\...\直播回放.mp4"
|
||||||
|
PPT = r"D:\...\课程.pptx"
|
||||||
|
OUTPUT = r"D:\...\output"
|
||||||
|
MAX_TOTAL_DURATION = 600 # 精华片段总时长上限(秒)
|
||||||
|
API_KEY = "your-api-key"
|
||||||
|
API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
|
||||||
|
```
|
||||||
|
|
||||||
|
所有路径和 API 配置只改这一个文件。
|
||||||
|
|
||||||
|
### 2. 完整流程(首次运行)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
run.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
或直接:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python run.py
|
||||||
|
```
|
||||||
|
|
||||||
|
完整流程:PPT解析 → Whisper转录 → LLM校正 → 字幕生成 → 合并 → 烧录
|
||||||
|
|
||||||
|
### 3. 修改字幕后快速重烧
|
||||||
|
|
||||||
|
改完 `v1_title.srt` 或 `v1_content.srt` 后,直接:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
burn.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
跳过所有转录/字幕生成步骤,直接用已有片段和字幕文件合并烧录。**只改字幕文本时用这个**。
|
||||||
|
|
||||||
|
## 修改知识点(替换PPT中的某个知识点)
|
||||||
|
|
||||||
|
LLM 从 PPT 提取了 clip 后,如果你想把其中一个换成 PPT 里另一个知识点(比如把"音高"换成"旋律"):
|
||||||
|
|
||||||
|
### 步骤
|
||||||
|
|
||||||
|
1. **改 `generated_config.yaml`**:把对应 clip 的 title 改成新知识点名称
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
clips:
|
||||||
|
- title: 旋律 # ← 改成PPT里有的知识点
|
||||||
|
start: 200
|
||||||
|
end: 260
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **删该 clip 的中间文件**(让它重新生成):
|
||||||
|
|
||||||
|
```
|
||||||
|
intermediates/clip5.json ← 删掉
|
||||||
|
intermediates/clip5.mp4 ← 删掉
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **重新运行**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
run.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
系统会跳过其他已有 JSON 的 clip,只重新生成被删除了 JSON 的那一个 clip。
|
||||||
|
|
||||||
|
### 原理
|
||||||
|
|
||||||
|
- `run.bat` 检测到 `clip*.json` 已存在,就跳过 Whisper 转录
|
||||||
|
- 删掉某个 clip 的 JSON 后,系统认为它需要重新生成
|
||||||
|
- 重新生成时用新的 title 去 transcript 里匹配,重新找时间范围
|
||||||
|
|
||||||
|
### 注意
|
||||||
|
|
||||||
|
- `start`/`end` 如果填错了,生成的视频片段时间会不对
|
||||||
|
- 如果不确定新知识点的时间范围,可以先随便填一个,跑完看效果再调整
|
||||||
|
|
||||||
|
## 文件结构
|
||||||
|
|
||||||
|
```
|
||||||
|
output/
|
||||||
|
├── generated_config.yaml # clips 配置(可手动修改)
|
||||||
|
├── intermediates/ # 中间文件(可删除特定clip的.json/.mp4重生成)
|
||||||
|
│ ├── clip1.json # Whisper 转录结果
|
||||||
|
│ ├── clip1.mp4 # 提取的视频片段
|
||||||
|
│ └── ...
|
||||||
|
├── subs/ # 字幕文件
|
||||||
|
│ ├── v1_title.srt # 标题轨(可手动修改文本+时间轴)
|
||||||
|
│ └── v1_content.srt # 正文字幕
|
||||||
|
├── concat_merged.mp4 # 合并后的视频
|
||||||
|
└── final.mp4 # 最终输出
|
||||||
|
```
|
||||||
|
|
||||||
|
## 命令对比
|
||||||
|
|
||||||
|
| 命令 | 用途 | 耗时 |
|
||||||
|
|------|------|------|
|
||||||
|
| `run.bat` | 完整流程(PPT→视频) | 几十分钟 |
|
||||||
|
| `burn.bat` | 只改字幕后快速重烧 | 几分钟 |
|
||||||
|
|
||||||
|
## 常见问题
|
||||||
|
|
||||||
|
**Q: `burn.bat` 改了字号没变化?**
|
||||||
|
A: `burn.bat` 直接烧已有的 SRT 文件,不走 `subtitle.py` 的生成逻辑。如果改了渲染参数(如字号)需要重新生成字幕,必须 `run.bat`。
|
||||||
|
|
||||||
|
**Q: 想改某个知识点的出现时间?**
|
||||||
|
A: 直接改 `v1_title.srt` 里的时间轴,或者改 `generated_config.yaml` 然后删对应 clip 的 JSON 重新生成。
|
||||||
|
|
||||||
|
**Q: 想删掉某个 clip?**
|
||||||
|
A: 从 `generated_config.yaml` 里删掉那一条,然后删对应 `intermediates/clip*.json` 和 `clip*.mp4`,最后 `run.bat`。
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
@echo off
|
||||||
|
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\run.py"
|
||||||
|
pause
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
完整流水线 - 从 PPT 解析到最终视频输出
|
||||||
|
配置统一在 config.py 中管理。
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# 导入统一配置
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import config
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env["PATH"] = os.path.dirname(config.PYTHON) + ";" + env.get("PATH", "")
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
config.PYTHON,
|
||||||
|
os.path.join(config.CLI_DIR, "src", "cli.py"),
|
||||||
|
"--video", config.VIDEO,
|
||||||
|
"--ppt", config.PPT,
|
||||||
|
"--output", config.OUTPUT,
|
||||||
|
"--api-key", config.API_KEY,
|
||||||
|
"--api-host", config.API_HOST,
|
||||||
|
"--max-total-duration", str(config.MAX_TOTAL_DURATION),
|
||||||
|
"--verbose",
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"Running pipeline...")
|
||||||
|
print(f" Video: {config.VIDEO}")
|
||||||
|
print(f" PPT: {config.PPT}")
|
||||||
|
print(f" Output: {config.OUTPUT}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
proc = subprocess.Popen(cmd, cwd=config.CLI_DIR, env=env)
|
||||||
|
proc.wait()
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
echo Cleaning pycache...
|
|
||||||
rmdir /s /q "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src\__pycache__" 2>nul
|
|
||||||
rmdir /s /q "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src\core\__pycache__" 2>nul
|
|
||||||
echo Cache cleaned.
|
|
||||||
echo.
|
|
||||||
echo Running CLI...
|
|
||||||
del "D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt" 2>nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\run_lesson1.py"
|
|
||||||
echo.
|
|
||||||
echo Exit: %errorlevel%
|
|
||||||
pause
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
import sys
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
VIDEO = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\直播回放-03月18日.mp4"
|
|
||||||
PPT = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
|
||||||
OUTPUT = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full"
|
|
||||||
PYTHON = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
|
|
||||||
CLI_DIR = r"D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src"
|
|
||||||
API_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3"
|
|
||||||
API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3"
|
|
||||||
LOG_FILE = r"D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt"
|
|
||||||
|
|
||||||
env = os.environ.copy()
|
|
||||||
env["PATH"] = r"D:\ProgramData\anaconda3\envs\py312_cuda;" + env.get("PATH", "")
|
|
||||||
|
|
||||||
cmd = [
|
|
||||||
PYTHON,
|
|
||||||
os.path.join(CLI_DIR, "cli.py"),
|
|
||||||
"--video", VIDEO,
|
|
||||||
"--ppt", PPT,
|
|
||||||
"--output", OUTPUT,
|
|
||||||
"--api-key", API_KEY,
|
|
||||||
"--api-host", API_HOST,
|
|
||||||
"--verbose"
|
|
||||||
]
|
|
||||||
|
|
||||||
print("Starting CLI...")
|
|
||||||
print(f"Video: {VIDEO}")
|
|
||||||
print(f"PPT: {PPT}")
|
|
||||||
print(f"Log: {LOG_FILE}")
|
|
||||||
|
|
||||||
proc = subprocess.Popen(cmd, cwd=CLI_DIR, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8', errors='replace')
|
|
||||||
|
|
||||||
with open(LOG_FILE, 'w', encoding='utf-8') as log:
|
|
||||||
for line in proc.stdout:
|
|
||||||
log.write(line)
|
|
||||||
log.flush()
|
|
||||||
print(line, end='')
|
|
||||||
|
|
||||||
proc.wait()
|
|
||||||
print(f"\nExit code: {proc.returncode}")
|
|
||||||
+48
-1
@@ -60,8 +60,12 @@ def parse_args():
|
|||||||
help='LLM API地址')
|
help='LLM API地址')
|
||||||
parser.add_argument('--whisper-model', type=str, default='large',
|
parser.add_argument('--whisper-model', type=str, default='large',
|
||||||
help='Whisper模型 (默认: large)')
|
help='Whisper模型 (默认: large)')
|
||||||
|
parser.add_argument('--max-total-duration', type=int, default=300,
|
||||||
|
help='精华片段总时长上限(秒),默认300')
|
||||||
parser.add_argument('--verbose', '-V', action='store_true',
|
parser.add_argument('--verbose', '-V', action='store_true',
|
||||||
help='详细输出')
|
help='详细输出')
|
||||||
|
parser.add_argument('--resume-from-burn', action='store_true',
|
||||||
|
help='快速模式:跳过所有步骤,直接用已有片段和字幕文件合并烧录(用于手动修改SRT后快速重生成)')
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
@@ -77,7 +81,7 @@ def load_config_from_args(args) -> dict:
|
|||||||
'whisper_model': args.whisper_model,
|
'whisper_model': args.whisper_model,
|
||||||
'video_params': {
|
'video_params': {
|
||||||
'fade_duration': 1,
|
'fade_duration': 1,
|
||||||
'title_fontsize': 90,
|
'title_fontsize': 60,
|
||||||
'title_color': 'FFFF00',
|
'title_color': 'FFFF00',
|
||||||
'subtitle_fontsize': 24,
|
'subtitle_fontsize': 24,
|
||||||
'subtitle_color': 'FFFFFF',
|
'subtitle_color': 'FFFFFF',
|
||||||
@@ -137,8 +141,15 @@ def generate_config_from_ppt(args) -> dict:
|
|||||||
progress_callback=progress_callback,
|
progress_callback=progress_callback,
|
||||||
api_key=args.api_key,
|
api_key=args.api_key,
|
||||||
api_host=args.api_host,
|
api_host=args.api_host,
|
||||||
|
max_total_duration=args.max_total_duration,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 补充API配置(parse_ppt_to_config不返回这些)
|
||||||
|
if args.api_key:
|
||||||
|
config['api_key'] = args.api_key
|
||||||
|
if args.api_host:
|
||||||
|
config['api_host'] = args.api_host
|
||||||
|
|
||||||
# 保存生成的配置
|
# 保存生成的配置
|
||||||
config_path = os.path.join(args.output, 'generated_config.yaml')
|
config_path = os.path.join(args.output, 'generated_config.yaml')
|
||||||
import yaml
|
import yaml
|
||||||
@@ -207,6 +218,42 @@ def main():
|
|||||||
|
|
||||||
pipeline = Pipeline(config)
|
pipeline = Pipeline(config)
|
||||||
|
|
||||||
|
# 快速模式:跳过所有步骤,直接用已有片段和字幕合并烧录
|
||||||
|
if args.resume_from_burn:
|
||||||
|
import glob
|
||||||
|
import shutil
|
||||||
|
output_dir = config.get('output_dir')
|
||||||
|
clips_dir = os.path.join(output_dir, 'clips')
|
||||||
|
merged_dir = os.path.join(output_dir, 'merged')
|
||||||
|
merged_path = os.path.join(merged_dir, 'merged.mp4')
|
||||||
|
title_path = os.path.join(output_dir, 'title.srt')
|
||||||
|
content_path = os.path.join(output_dir, 'content.srt')
|
||||||
|
|
||||||
|
# 检查必要文件
|
||||||
|
if not os.path.exists(title_path):
|
||||||
|
logger.error(f"找不到 title.srt: {title_path}")
|
||||||
|
return 1
|
||||||
|
if not os.path.exists(content_path):
|
||||||
|
logger.error(f"找不到 content.srt: {content_path}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# 已有合并视频则直接烧录;否则先合并
|
||||||
|
if os.path.exists(merged_path):
|
||||||
|
logger.info(f"找到已有合并视频: {merged_path}")
|
||||||
|
else:
|
||||||
|
logger.info("开始合并片段...")
|
||||||
|
clip_files = sorted(glob.glob(os.path.join(clips_dir, 'clip*.mp4')))
|
||||||
|
if not clip_files:
|
||||||
|
logger.error(f"找不到片段视频: {clips_dir}/clip*.mp4")
|
||||||
|
return 1
|
||||||
|
merged_path = pipeline.step_merge(clip_files)
|
||||||
|
logger.info(f"合并完成: {merged_path}")
|
||||||
|
|
||||||
|
logger.info("开始烧录...")
|
||||||
|
final_path = pipeline.step_burn(merged_path, title_path, content_path)
|
||||||
|
logger.info(f"完成! 最终视频: {final_path}")
|
||||||
|
return 0
|
||||||
|
|
||||||
logger.info("开始处理...")
|
logger.info("开始处理...")
|
||||||
final_path = pipeline.run()
|
final_path = pipeline.run()
|
||||||
|
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ DEFAULT_OUTPUT_DIR = os.path.join(PROJECT_ROOT, "output")
|
|||||||
DEFAULT_VIDEO_PARAMS = {
|
DEFAULT_VIDEO_PARAMS = {
|
||||||
"fade_duration": 1,
|
"fade_duration": 1,
|
||||||
"title_duration": 3,
|
"title_duration": 3,
|
||||||
"title_fontsize": 90,
|
"title_fontsize": 60,
|
||||||
"title_color": "FFFF00",
|
"title_color": "FFFF00",
|
||||||
"subtitle_fontsize": 24,
|
"subtitle_fontsize": 24,
|
||||||
"subtitle_color": "FFFFFF",
|
"subtitle_color": "FFFFFF",
|
||||||
|
|||||||
+3
-100
@@ -56,6 +56,8 @@ class LLMClient:
|
|||||||
"max_tokens": max_tokens
|
"max_tokens": max_tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.info(f"[LLM] request chars={len(prompt)}, max_tokens={max_tokens}")
|
||||||
|
|
||||||
for attempt in range(LLM_MAX_RETRIES):
|
for attempt in range(LLM_MAX_RETRIES):
|
||||||
try:
|
try:
|
||||||
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
||||||
@@ -73,6 +75,7 @@ class LLMClient:
|
|||||||
|
|
||||||
content = choices[0].get("message", {}).get("content", "").strip()
|
content = choices[0].get("message", {}).get("content", "").strip()
|
||||||
if content:
|
if content:
|
||||||
|
logger.info(f"[LLM] response chars={len(content)}")
|
||||||
return content
|
return content
|
||||||
|
|
||||||
logger.warning(f"LLM: Empty content (attempt {attempt+1})")
|
logger.warning(f"LLM: Empty content (attempt {attempt+1})")
|
||||||
@@ -88,106 +91,6 @@ class LLMClient:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def correct_title(self, transcript_text, original_title, all_titles=None):
|
|
||||||
"""
|
|
||||||
使用LLM纠正标题
|
|
||||||
|
|
||||||
Args:
|
|
||||||
transcript_text: 字幕文本
|
|
||||||
original_title: 原始标题
|
|
||||||
all_titles: 所有标题列表
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
纠正后的标题
|
|
||||||
"""
|
|
||||||
titles_str = ", ".join(all_titles[:20]) if all_titles else "无"
|
|
||||||
|
|
||||||
prompt = f"""你是一个钢琴教学视频的标题验证专家。
|
|
||||||
|
|
||||||
PPT提取的标题:{original_title}
|
|
||||||
|
|
||||||
视频字幕内容:{transcript_text[:500] if transcript_text else "无"}
|
|
||||||
|
|
||||||
本节课所有标题:{titles_str}
|
|
||||||
|
|
||||||
【重要规则】
|
|
||||||
- 只有当你有90%以上把握认为原标题错误时,才输出纠正后的标题
|
|
||||||
- 如果原标题基本正确,即使不完美,也必须输出原标题
|
|
||||||
- 绝对不能输出与原标题完全不同概念的词
|
|
||||||
- 如果不确定,输出原标题
|
|
||||||
|
|
||||||
请直接输出标题,不要添加任何解释。"""
|
|
||||||
|
|
||||||
result = self.chat(prompt, max_tokens=50, timeout=LLM_TITLE_TIMEOUT)
|
|
||||||
return result if result else original_title
|
|
||||||
|
|
||||||
def validate_content(self, transcript_text, title):
|
|
||||||
"""
|
|
||||||
使用LLM验证内容是否与标题相关
|
|
||||||
|
|
||||||
Args:
|
|
||||||
transcript_text: 字幕文本
|
|
||||||
title: 标题
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(is_valid: bool, reason: str)
|
|
||||||
"""
|
|
||||||
prompt = f"""判断视频字幕内容是否与标题相关。
|
|
||||||
|
|
||||||
标题:{title}
|
|
||||||
|
|
||||||
字幕内容:{transcript_text[:300] if transcript_text else "无"}
|
|
||||||
|
|
||||||
判断标准:
|
|
||||||
- 内容讨论的主题与标题概念相关 = 相关
|
|
||||||
- 内容与标题无关(如广告、闲聊、无关话题)= 无关
|
|
||||||
- 无法判断 = 不确定
|
|
||||||
|
|
||||||
请直接输出:相关/无关/不确定"""
|
|
||||||
|
|
||||||
result = self.chat(prompt, max_tokens=20, timeout=LLM_VALIDATE_TIMEOUT)
|
|
||||||
if not result:
|
|
||||||
return True, "error"
|
|
||||||
|
|
||||||
if "无关" in result:
|
|
||||||
return False, result
|
|
||||||
elif "不确定" in result:
|
|
||||||
return True, "uncertain"
|
|
||||||
return True, result
|
|
||||||
|
|
||||||
def full_text_correction(self, text, clip_title, knowledge_terms=None):
|
|
||||||
"""
|
|
||||||
使用LLM进行全文字幕纠错
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: 原始字幕
|
|
||||||
clip_title: 片段标题
|
|
||||||
knowledge_terms: 知识点列表
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
纠错后的字幕
|
|
||||||
"""
|
|
||||||
knowledge_str = ", ".join(knowledge_terms[:20]) if knowledge_terms else "无"
|
|
||||||
|
|
||||||
prompt = f"""你是一个钢琴教学视频的字幕纠错专家。
|
|
||||||
|
|
||||||
原始字幕:{text}
|
|
||||||
|
|
||||||
本节课片段标题:{clip_title}
|
|
||||||
本节课知识点:{knowledge_str}
|
|
||||||
|
|
||||||
请进行字幕纠错:
|
|
||||||
1. 修复语音识别错误(如"羞耻"→"休止","副点"→"附点","负点"→"附点")
|
|
||||||
2. 修复同音字错误
|
|
||||||
3. 保留原文的专业术语和表达方式
|
|
||||||
4. 不要改变原文的语气和意思
|
|
||||||
|
|
||||||
请直接输出纠错后的字幕,不要添加任何解释。"""
|
|
||||||
|
|
||||||
result = self.chat(prompt, max_tokens=500, timeout=LLM_TIMEOUT)
|
|
||||||
return result if result else text
|
|
||||||
|
|
||||||
|
|
||||||
# 全局LLM客户端实例
|
# 全局LLM客户端实例
|
||||||
_llm_client = None
|
_llm_client = None
|
||||||
|
|
||||||
|
|||||||
+127
-70
@@ -12,7 +12,7 @@ import logging
|
|||||||
from typing import Callable, Optional, List, Dict, Any
|
from typing import Callable, Optional, List, Dict, Any
|
||||||
|
|
||||||
from .video import extract_clip, merge_clips, burn_dual_subtitles
|
from .video import extract_clip, merge_clips, burn_dual_subtitles
|
||||||
from .subtitle import SubtitlePipeline
|
from .subtitle import SubtitlePipeline, correct_subtitles_llm
|
||||||
from .llm import LLMClient
|
from .llm import LLMClient
|
||||||
from .corrections import apply_all_corrections, load_term_corrections_from_config
|
from .corrections import apply_all_corrections, load_term_corrections_from_config
|
||||||
from .utils import ensure_dir
|
from .utils import ensure_dir
|
||||||
@@ -223,16 +223,41 @@ class Pipeline:
|
|||||||
self.progress_callback('transcribing', int((i/total)*90), f"转录片段 {i}/{total}")
|
self.progress_callback('transcribing', int((i/total)*90), f"转录片段 {i}/{total}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
segments, _ = model.transcribe(clip_path, language='zh', beam_size=5)
|
segments, _ = model.transcribe(clip_path, language='zh', beam_size=5, word_timestamps=True)
|
||||||
|
|
||||||
# 保存转录结果
|
# 保存转录结果(按句末标点进一步切分)
|
||||||
segments_data = []
|
segments_data = []
|
||||||
for seg in segments:
|
for seg in segments:
|
||||||
segments_data.append({
|
words = seg.words if hasattr(seg, 'words') else []
|
||||||
'start': seg.start,
|
if words:
|
||||||
'end': seg.end,
|
# 用 word-level 时间戳在句末标点处切分
|
||||||
'text': seg.text.strip()
|
# 注意:标点可能附着在词后(如"吗?"、"奏,"),需 strip 后判断
|
||||||
})
|
_END_MARKS = '。!??'
|
||||||
|
sub_start = words[0].start
|
||||||
|
sub_text_parts = []
|
||||||
|
for word in words:
|
||||||
|
sub_text_parts.append(word.word)
|
||||||
|
# 剥离标点后判断是否为句末标记
|
||||||
|
stripped = word.word.rstrip(',、,')
|
||||||
|
if any(stripped.endswith(m) for m in _END_MARKS):
|
||||||
|
sub_end = word.end
|
||||||
|
sub_text = ''.join(sub_text_parts).strip()
|
||||||
|
if sub_text:
|
||||||
|
segments_data.append({'start': sub_start, 'end': sub_end, 'text': sub_text})
|
||||||
|
sub_start = word.end
|
||||||
|
sub_text_parts = []
|
||||||
|
# 剩余未到句末的文本
|
||||||
|
if sub_text_parts:
|
||||||
|
remaining = ''.join(sub_text_parts).strip()
|
||||||
|
if remaining:
|
||||||
|
segments_data.append({'start': sub_start, 'end': words[-1].end, 'text': remaining})
|
||||||
|
else:
|
||||||
|
# fallback:无 word timestamps,直接用原 segment
|
||||||
|
segments_data.append({
|
||||||
|
'start': seg.start,
|
||||||
|
'end': seg.end,
|
||||||
|
'text': seg.text.strip()
|
||||||
|
})
|
||||||
|
|
||||||
with open(json_path, 'w', encoding='utf-8') as f:
|
with open(json_path, 'w', encoding='utf-8') as f:
|
||||||
json.dump({'segments': segments_data}, f, ensure_ascii=False, indent=2)
|
json.dump({'segments': segments_data}, f, ensure_ascii=False, indent=2)
|
||||||
@@ -249,59 +274,58 @@ class Pipeline:
|
|||||||
self.step_callback('transcribing')
|
self.step_callback('transcribing')
|
||||||
return json_paths
|
return json_paths
|
||||||
|
|
||||||
def step_correct_titles(self, json_paths: List[str]) -> List[Dict[str, Any]]:
|
def _recalculate_title_segments_from_transcript(
|
||||||
|
self,
|
||||||
|
clips: List[Dict],
|
||||||
|
json_paths: List[str]
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Step 3: LLM标题纠正
|
用 transcript 数据重新计算重叠片段的 title_segments 切分点。
|
||||||
|
|
||||||
Args:
|
重叠片段的 switch_offset 应该按 transcript 中第二个标题关键词
|
||||||
json_paths: JSON文件路径列表
|
首次出现的时间来算,而不是按 clip 边界。
|
||||||
|
|
||||||
Returns:
|
|
||||||
corrected_clips: 纠正后的片段配置列表
|
|
||||||
"""
|
"""
|
||||||
self.step_callback('title_correcting')
|
for i, clip in enumerate(clips):
|
||||||
self.progress_callback('title_correcting', 0, "开始标题纠正...")
|
ts = clip.get('title_segments')
|
||||||
|
if not ts or len(ts) < 2:
|
||||||
|
continue
|
||||||
|
|
||||||
corrected_clips = []
|
# 取第二个标题段 [title, offset]
|
||||||
total = len(self.clips)
|
second_title, old_offset = ts[1]
|
||||||
|
json_path = json_paths[i] if i < len(json_paths) else None
|
||||||
|
if not json_path or not os.path.exists(json_path):
|
||||||
|
continue
|
||||||
|
|
||||||
for i, (clip, json_path) in enumerate(zip(self.clips, json_paths), 1):
|
try:
|
||||||
original_title = clip.get('title', f'Clip {i}')
|
|
||||||
|
|
||||||
# 读取转录文本
|
|
||||||
transcript_text = ''
|
|
||||||
if json_path and os.path.exists(json_path):
|
|
||||||
with open(json_path, 'r', encoding='utf-8') as f:
|
with open(json_path, 'r', encoding='utf-8') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
transcript_text = ' '.join(seg.get('text', '') for seg in data.get('segments', []))
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
# LLM纠正标题
|
# 在 transcript 中搜索 second_title 的首次出现时间
|
||||||
corrected_title = original_title
|
first_time = None
|
||||||
if transcript_text and self.config.get('api_key'):
|
for seg in data.get('segments', []):
|
||||||
try:
|
for word_info in seg.get('words', []):
|
||||||
corrected_title = self.llm_client.correct_title(
|
w = word_info.get('word', '')
|
||||||
transcript_text,
|
# 关键词匹配(标题可能含多字符,取子串)
|
||||||
original_title,
|
if second_title and second_title in w:
|
||||||
[c.get('title', '') for c in self.clips]
|
first_time = word_info['start']
|
||||||
) or original_title
|
break
|
||||||
except Exception as e:
|
if first_time is not None:
|
||||||
logger.warning(f"LLM title correction failed for clip {i}: {e}")
|
break
|
||||||
|
|
||||||
corrected_clip = {
|
if first_time is not None:
|
||||||
'index': i - 1,
|
new_offset = first_time
|
||||||
'title': corrected_title,
|
clip['title_segments'][1][1] = new_offset
|
||||||
'original_title': original_title,
|
logger.info(
|
||||||
'start': clip['start'],
|
f" clip{i+1} title_segments: "
|
||||||
'end': clip['end'],
|
f"'{second_title}' 从 {old_offset:.2f}s → {new_offset:.2f}s"
|
||||||
}
|
)
|
||||||
corrected_clips.append(corrected_clip)
|
else:
|
||||||
|
logger.warning(
|
||||||
percent = int((i / total) * 100)
|
f" clip{i+1} title_segments: "
|
||||||
self.progress_callback('title_correcting', percent, f"纠正标题 {i}/{total}")
|
f"未在 transcript 中找到 '{second_title}',保留原 offset {old_offset:.2f}s"
|
||||||
|
)
|
||||||
self.progress_callback('title_correcting', 100, "标题纠正完成")
|
|
||||||
self.step_callback('title_correcting')
|
|
||||||
return corrected_clips
|
|
||||||
|
|
||||||
def step_generate_subtitles(self, corrected_clips: List[Dict], json_paths: List[str]) -> tuple:
|
def step_generate_subtitles(self, corrected_clips: List[Dict], json_paths: List[str]) -> tuple:
|
||||||
"""
|
"""
|
||||||
@@ -327,6 +351,7 @@ class Pipeline:
|
|||||||
'start': clip['start'],
|
'start': clip['start'],
|
||||||
'end': clip['end'],
|
'end': clip['end'],
|
||||||
'title': clip.get('title', clip.get('original_title', '')),
|
'title': clip.get('title', clip.get('original_title', '')),
|
||||||
|
'title_segments': clip.get('title_segments'), # 可能为None
|
||||||
}
|
}
|
||||||
clip_configs.append(clip_config)
|
clip_configs.append(clip_config)
|
||||||
|
|
||||||
@@ -357,6 +382,39 @@ class Pipeline:
|
|||||||
self.step_callback('generating_subtitles')
|
self.step_callback('generating_subtitles')
|
||||||
return title_path, content_path
|
return title_path, content_path
|
||||||
|
|
||||||
|
def step_correct_subtitles(self, title_path: str, content_path: str) -> str:
|
||||||
|
"""
|
||||||
|
Step 4.5: LLM纠正字幕内容
|
||||||
|
|
||||||
|
参考title.srt(时间轴锚点)和PPT原文(术语参考),
|
||||||
|
修正content.srt中的错字、漏字、术语错误。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title_path: 标题字幕路径
|
||||||
|
content_path: 内容字幕路径
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
修正后的content_path
|
||||||
|
"""
|
||||||
|
ppt_text = self.config.get('ppt_text', '')
|
||||||
|
if not ppt_text:
|
||||||
|
logger.warning("PPT原文为空,跳过字幕纠正步骤")
|
||||||
|
return content_path
|
||||||
|
|
||||||
|
self.step_callback('correcting_subtitles')
|
||||||
|
self.progress_callback('correcting_subtitles', 0, "开始纠正字幕...")
|
||||||
|
|
||||||
|
corrected_path = correct_subtitles_llm(
|
||||||
|
title_path=title_path,
|
||||||
|
content_path=content_path,
|
||||||
|
ppt_text=ppt_text,
|
||||||
|
llm_client=self.llm_client,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.progress_callback('correcting_subtitles', 100, "字幕纠正完成")
|
||||||
|
self.step_callback('correcting_subtitles')
|
||||||
|
return corrected_path
|
||||||
|
|
||||||
def step_merge(self, clip_paths: List[str]) -> str:
|
def step_merge(self, clip_paths: List[str]) -> str:
|
||||||
"""
|
"""
|
||||||
Step 5: 合并视频
|
Step 5: 合并视频
|
||||||
@@ -411,7 +469,7 @@ class Pipeline:
|
|||||||
title_path,
|
title_path,
|
||||||
content_path,
|
content_path,
|
||||||
final_path,
|
final_path,
|
||||||
title_fontsize=video_params.get('title_fontsize', 90),
|
title_fontsize=video_params.get('title_fontsize', 60),
|
||||||
title_color=video_params.get('title_color', 'FFFF00'),
|
title_color=video_params.get('title_color', 'FFFF00'),
|
||||||
subtitle_fontsize=video_params.get('subtitle_fontsize', 24),
|
subtitle_fontsize=video_params.get('subtitle_fontsize', 24),
|
||||||
subtitle_color=video_params.get('subtitle_color', 'FFFFFF')
|
subtitle_color=video_params.get('subtitle_color', 'FFFFFF')
|
||||||
@@ -447,17 +505,14 @@ class Pipeline:
|
|||||||
# Step 2: 转录
|
# Step 2: 转录
|
||||||
json_paths = self.step_transcribe(clip_paths)
|
json_paths = self.step_transcribe(clip_paths)
|
||||||
|
|
||||||
# Step 3: 标题纠正
|
# Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
|
||||||
corrected_clips = self.step_correct_titles(json_paths)
|
self._recalculate_title_segments_from_transcript(self.clips, json_paths)
|
||||||
|
|
||||||
# Step 4: 生成字幕
|
# Step 3-6: 生成字幕、纠正、合并、烧录
|
||||||
title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
|
title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
|
||||||
|
corrected_content_path = self.step_correct_subtitles(title_path, content_path)
|
||||||
# Step 5: 合并
|
|
||||||
merged_path = self.step_merge(clip_paths)
|
merged_path = self.step_merge(clip_paths)
|
||||||
|
final_path = self.step_burn(merged_path, title_path, corrected_content_path)
|
||||||
# Step 6: 烧录
|
|
||||||
final_path = self.step_burn(merged_path, title_path, content_path)
|
|
||||||
|
|
||||||
logger.info(f"Pipeline completed: {final_path}")
|
logger.info(f"Pipeline completed: {final_path}")
|
||||||
return final_path
|
return final_path
|
||||||
@@ -474,23 +529,25 @@ class Pipeline:
|
|||||||
"""
|
"""
|
||||||
logger.info(f"Pipeline starting with user confirmation: {len(self.clips)} clips")
|
logger.info(f"Pipeline starting with user confirmation: {len(self.clips)} clips")
|
||||||
|
|
||||||
# Step 1-3: 同上
|
# Step 1-2: 提取+转录
|
||||||
clip_paths = self.step_extract()
|
clip_paths = self.step_extract()
|
||||||
if not clip_paths:
|
if not clip_paths:
|
||||||
raise RuntimeError("No clips extracted")
|
raise RuntimeError("No clips extracted")
|
||||||
|
|
||||||
json_paths = self.step_transcribe(clip_paths)
|
json_paths = self.step_transcribe(clip_paths)
|
||||||
corrected_clips = self.step_correct_titles(json_paths)
|
|
||||||
|
# Step 2.5: 用 transcript 重新计算重叠片段的 title_segments 切分点
|
||||||
|
self._recalculate_title_segments_from_transcript(self.clips, json_paths)
|
||||||
|
|
||||||
# 应用用户确认的标题
|
# 应用用户确认的标题
|
||||||
for i, confirmed in enumerate(confirmed_titles):
|
for i, confirmed in enumerate(confirmed_titles):
|
||||||
if i < len(corrected_clips):
|
if i < len(self.clips):
|
||||||
corrected_clips[i]['title'] = confirmed.get('title', corrected_clips[i]['title'])
|
self.clips[i]['title'] = confirmed.get('title', self.clips[i].get('title', ''))
|
||||||
|
|
||||||
# Step 4-6: 同上
|
# Step 3-6: 生成字幕、纠正、合并、烧录
|
||||||
title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths)
|
title_path, content_path = self.step_generate_subtitles(self.clips, json_paths)
|
||||||
|
corrected_content_path = self.step_correct_subtitles(title_path, content_path)
|
||||||
merged_path = self.step_merge(clip_paths)
|
merged_path = self.step_merge(clip_paths)
|
||||||
final_path = self.step_burn(merged_path, title_path, content_path)
|
final_path = self.step_burn(merged_path, title_path, corrected_content_path)
|
||||||
|
|
||||||
logger.info(f"Pipeline completed: {final_path}")
|
logger.info(f"Pipeline completed: {final_path}")
|
||||||
return final_path
|
return final_path
|
||||||
|
|||||||
+68
-62
@@ -17,6 +17,8 @@ import zipfile
|
|||||||
import logging
|
import logging
|
||||||
from typing import List, Dict, Any, Optional, Callable, Tuple
|
from typing import List, Dict, Any, Optional, Callable, Tuple
|
||||||
|
|
||||||
|
from .llm import LLMClient
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -36,6 +38,7 @@ class PPTParser:
|
|||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
api_host: Optional[str] = None,
|
api_host: Optional[str] = None,
|
||||||
max_clip_duration: int = 30,
|
max_clip_duration: int = 30,
|
||||||
|
max_total_duration: int = 300,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
初始化PPT解析器
|
初始化PPT解析器
|
||||||
@@ -48,6 +51,7 @@ class PPTParser:
|
|||||||
api_key: LLM API密钥
|
api_key: LLM API密钥
|
||||||
api_host: LLM API地址
|
api_host: LLM API地址
|
||||||
max_clip_duration: 每个精华片段的最大时长(秒),默认30秒
|
max_clip_duration: 每个精华片段的最大时长(秒),默认30秒
|
||||||
|
max_total_duration: 所有精华片段的总时长上限(秒),默认300秒(5分钟)
|
||||||
"""
|
"""
|
||||||
self.video_path = video_path
|
self.video_path = video_path
|
||||||
self.ppt_path = ppt_path
|
self.ppt_path = ppt_path
|
||||||
@@ -56,6 +60,7 @@ class PPTParser:
|
|||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.api_host = api_host
|
self.api_host = api_host
|
||||||
self.max_clip_duration = max_clip_duration
|
self.max_clip_duration = max_clip_duration
|
||||||
|
self.max_total_duration = max_total_duration
|
||||||
|
|
||||||
self.inter_dir = os.path.join(output_dir, 'intermediates')
|
self.inter_dir = os.path.join(output_dir, 'intermediates')
|
||||||
os.makedirs(self.inter_dir, exist_ok=True)
|
os.makedirs(self.inter_dir, exist_ok=True)
|
||||||
@@ -284,50 +289,19 @@ class PPTParser:
|
|||||||
|
|
||||||
def _call_llm(self, prompt: str, max_tokens: int = 4096, timeout: int = 300, retries: int = 3) -> Optional[str]:
|
def _call_llm(self, prompt: str, max_tokens: int = 4096, timeout: int = 300, retries: int = 3) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
带重试的 LLM 调用。
|
使用实例的 api_key/api_host 创建 LLMClient 并调用 chat。
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt: 发送给 LLM 的提示词
|
prompt: 发送给 LLM 的提示词
|
||||||
max_tokens: 最大 token 数
|
max_tokens: 最大 token 数
|
||||||
timeout: 单次请求超时(秒)
|
timeout: 单次请求超时(秒)
|
||||||
retries: 最大重试次数
|
retries: 最大重试次数(chat() 内部也有重试,这里传 retries 但 chat() 忽略它)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
LLM 返回的 content,失败返回 None
|
LLM 返回的 content,失败返回 None
|
||||||
"""
|
"""
|
||||||
import requests
|
client = LLMClient(api_key=self.api_key, api_host=self.api_host)
|
||||||
url = f"{self.api_host}/chat/completions"
|
return client.chat(prompt=prompt, max_tokens=max_tokens, timeout=timeout)
|
||||||
headers = {
|
|
||||||
"Authorization": f"Bearer {self.api_key}",
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
}
|
|
||||||
payload = {
|
|
||||||
"model": "doubao-seed-2.0-lite",
|
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
"temperature": 0.1
|
|
||||||
}
|
|
||||||
|
|
||||||
last_err = None
|
|
||||||
for attempt in range(retries):
|
|
||||||
try:
|
|
||||||
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
|
||||||
response.raise_for_status()
|
|
||||||
result = response.json()
|
|
||||||
content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
||||||
if content:
|
|
||||||
return content
|
|
||||||
logger.warning(f"LLM返回空内容(第{attempt+1}次尝试)")
|
|
||||||
last_err = "空内容"
|
|
||||||
except requests.exceptions.Timeout:
|
|
||||||
logger.warning(f"LLM请求超时(第{attempt+1}次尝试,timeout={timeout}s)")
|
|
||||||
last_err = "超时"
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logger.warning(f"LLM请求失败(第{attempt+1}次尝试): {e}")
|
|
||||||
last_err = str(e)
|
|
||||||
|
|
||||||
logger.error(f"LLM调用失败(已重试{retries}次): {last_err}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def llm_extract_knowledge_points_from_ppt(self) -> Tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
|
def llm_extract_knowledge_points_from_ppt(self) -> Tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
|
||||||
"""
|
"""
|
||||||
@@ -415,7 +389,7 @@ class PPTParser:
|
|||||||
- 一种方法:如"放松练习"、"分手练习"、"慢速练习"、"唱谱法"
|
- 一种方法:如"放松练习"、"分手练习"、"慢速练习"、"唱谱法"
|
||||||
- 一个专题:如"乐理基础"、"手型要求"、"课后作业"
|
- 一个专题:如"乐理基础"、"手型要求"、"课后作业"
|
||||||
|
|
||||||
【文本清理规则】(以不影响原文意思表达为前提):
|
【文本清理规则】(用于 cleaned_text,不影响知识点提取):
|
||||||
- 合并连续的空行(超过1个空行的压缩为1个)
|
- 合并连续的空行(超过1个空行的压缩为1个)
|
||||||
- 去除行首行尾多余空格
|
- 去除行首行尾多余空格
|
||||||
- 保留页面之间的自然分段(每页独立段落)
|
- 保留页面之间的自然分段(每页独立段落)
|
||||||
@@ -423,12 +397,16 @@ class PPTParser:
|
|||||||
- 无标点的长句子:如果一行文字超过50字且无标点,才合并到下一行
|
- 无标点的长句子:如果一行文字超过50字且无标点,才合并到下一行
|
||||||
- 保留专有名词、术语的原始写法
|
- 保留专有名词、术语的原始写法
|
||||||
|
|
||||||
【重要规则】:
|
【知识点提取规则】:
|
||||||
1. 扫描全部页面:不要只找"知识点汇总页",每页都要看
|
1. 扫描全部页面:不要只找"知识点汇总页",每页都要看
|
||||||
2. 原文保留:知识点原文是什么就写什么,不要解释、概括、翻译或扩展
|
2. 原文保留:知识点原文是什么就写什么,不要解释、概括、翻译或扩展
|
||||||
3. 拆分合并:被拆分的片段(如"的三"+"种方法"、"谱号、"+"大谱表、"等)要合并为完整知识词
|
3. 拆分合并:被拆分的片段(如"的三"+"种方法"、"谱号、"+"大谱表、"等)要合并为完整知识词
|
||||||
4. 标题过滤:忽略"本课主要知识点"、"课程回顾"、"本节课重要知识点"等纯导航/目录类标题
|
4. 标题过滤:忽略"本课主要知识点"、"课程回顾"、"本节课重要知识点"等纯导航/目录类标题
|
||||||
5. 分类项处理:格式如"XX:子项1、子项2、子项3"时,冒号后的每个子项各自独立成知识点;但如果冒号后是完整句子或定义(如"XX:这是指……"),则整句描述的对象本身才是知识点
|
5. 列表/定义项拆分:
|
||||||
|
- 格式为"XX:子项1,子项2,子项3"时,冒号后的每个子项各自独立成知识点
|
||||||
|
- 格式为多行列表(如"重复:xxx\n级进:xxx\n跳进:xxx"),每行各自独立成知识点
|
||||||
|
- 如果冒号后是完整句子或定义(如"XX:这是指……"),则整句描述的对象本身才是知识点
|
||||||
|
- **知识点标题不得包含括号、冒号、引号等任何标点符号**,只保留核心词(如"重复(旋律进行方式)"应输出为"重复","音高、和弦"应输出为"音高"和"和弦")
|
||||||
6. 列表项过滤:只保留有独立含义的知识点,忽略序号、标点符号、无意义的装饰词
|
6. 列表项过滤:只保留有独立含义的知识点,忽略序号、标点符号、无意义的装饰词
|
||||||
7. 内容页优先:如果一个知识点在教学内容页展开讲解了,比仅出现在列表中更重要
|
7. 内容页优先:如果一个知识点在教学内容页展开讲解了,比仅出现在列表中更重要
|
||||||
8. 最小粒度:宁可多输出几个独立的知识词,也不要合并成一个大而笼统的标题
|
8. 最小粒度:宁可多输出几个独立的知识词,也不要合并成一个大而笼统的标题
|
||||||
@@ -668,13 +646,24 @@ class PPTParser:
|
|||||||
for clip in sorted_clips[1:]:
|
for clip in sorted_clips[1:]:
|
||||||
prev = merged[-1]
|
prev = merged[-1]
|
||||||
if clip['start'] < prev['end']:
|
if clip['start'] < prev['end']:
|
||||||
# 重叠:prev延伸到clip的end,保留clip的标题(标题在clip原start处切换)
|
# 重叠:prev延伸到clip的end,检测标题切换
|
||||||
|
if clip['title'] != prev['title']:
|
||||||
|
# 标题切换点 = clip['start'] 相对于 prev 起点的时间
|
||||||
|
switch_offset = clip['start'] - prev['start']
|
||||||
|
# 建立 title_segments
|
||||||
|
prev['title_segments'] = [
|
||||||
|
[prev['title'], 0],
|
||||||
|
[clip['title'], switch_offset],
|
||||||
|
]
|
||||||
|
prev['title'] = prev['title'] # 保留第一个标题作主标题
|
||||||
prev['end'] = clip['end']
|
prev['end'] = clip['end']
|
||||||
logger.info(f" 合并重叠: '{prev['title']}' 延伸至 {prev['end']}s,"
|
logger.info(f" 合并重叠: '{prev['title']}' 延伸至 {prev['end']}s,"
|
||||||
f"标题在 {clip['start']}s 切换为 '{clip['title']}'")
|
f"标题在 {clip['start']}s 切换为 '{clip['title']}'")
|
||||||
else:
|
else:
|
||||||
# 不重叠:直接添加
|
# 不重叠:直接添加,清除 title_segments(由系统默认处理)
|
||||||
merged.append(dict(clip))
|
c = dict(clip)
|
||||||
|
c.pop('title_segments', None)
|
||||||
|
merged.append(c)
|
||||||
|
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
@@ -855,7 +844,11 @@ class PPTParser:
|
|||||||
|
|
||||||
# PPT参考(完整文本 + 知识点列表)
|
# PPT参考(完整文本 + 知识点列表)
|
||||||
if ppt_full_text or ppt_knowledge:
|
if ppt_full_text or ppt_knowledge:
|
||||||
knowledge_lines = "\n".join([f" - {kp['title']}" for kp in (ppt_knowledge or [])])
|
knowledge_list = ppt_knowledge or []
|
||||||
|
# 带序号的列表,LLM 用序号引用,不许自由发挥
|
||||||
|
knowledge_lines = "\n".join(
|
||||||
|
[f" [{i}] {kp['title']}" for i, kp in enumerate(knowledge_list)]
|
||||||
|
)
|
||||||
knowledge_section = f"""
|
knowledge_section = f"""
|
||||||
【PPT参考文本(权威背景)】
|
【PPT参考文本(权威背景)】
|
||||||
以下是与本节课配套的PPT完整内容,请以此为权威参考:
|
以下是与本节课配套的PPT完整内容,请以此为权威参考:
|
||||||
@@ -887,14 +880,13 @@ class PPTParser:
|
|||||||
|
|
||||||
【重要规则】
|
【重要规则】
|
||||||
1. 逐条处理:必须为列表中的**每一个知识点**搜索视频转录文本,找到讲解最集中的片段
|
1. 逐条处理:必须为列表中的**每一个知识点**搜索视频转录文本,找到讲解最集中的片段
|
||||||
2. **title 必须完全等于知识点列表中的原名**,不许改写、不许概括、不许扩展
|
2. **输出序号而非名称**:kp_idx 必须是列表中的序号(如 0、3、7),不许自己发挥名称
|
||||||
- ✅ 正确:knowledge_point 是"弹琴的手型",title 就用"弹琴的手型"
|
- ✅ 正确:"kp_idx": 3 对应列表中第 4 项
|
||||||
- ❌ 错误:title 用"手型支撑与放松的核心要求"(自己发挥)
|
- ❌ 错误:"kp_idx": "重复(旋律进行方式)"(这是自由发挥,不是序号)
|
||||||
3. **knowledge_point 字段也必须用知识点列表中的原名**
|
3. 时间必须精确:使用转录文本中的实际时间戳
|
||||||
4. 时间必须精确:使用转录文本中的实际时间戳
|
4. 时长控制:每个片段约5-15秒,重要内容可以稍长(最长不超过20秒)
|
||||||
5. 时长控制:每个片段约5-15秒,重要内容可以稍长(最长不超过20秒)
|
5. 总时长不超过{self.max_total_duration}秒:如果知识点太多导致总时长超标,优先保留最重要的知识点,其余在not_found中说明
|
||||||
6. 总时长不超过180秒:如果知识点太多导致总时长超标,优先保留最重要的知识点,其余在not_found中说明
|
6. 只输出JSON,不要添加任何解释
|
||||||
7. 只输出JSON,不要添加任何解释
|
|
||||||
|
|
||||||
【视频转录文本(带时间戳)】
|
【视频转录文本(带时间戳)】
|
||||||
{transcript_text}
|
{transcript_text}
|
||||||
@@ -902,10 +894,10 @@ class PPTParser:
|
|||||||
请以以下JSON格式输出(不要输出其他内容):
|
请以以下JSON格式输出(不要输出其他内容):
|
||||||
{{
|
{{
|
||||||
"clips": [
|
"clips": [
|
||||||
{{"title": "知识点原名(不许改写)", "start": 开始秒数, "end": 结束秒数, "knowledge_point": "知识点原名"}},
|
{{"kp_idx": 序号, "start": 开始秒数, "end": 结束秒数}},
|
||||||
{{"title": "知识点原名", "start": 开始秒数, "end": 结束秒数, "knowledge_point": "知识点原名"}}
|
{{"kp_idx": 序号, "start": 开始秒数, "end": 结束秒数}}
|
||||||
],
|
],
|
||||||
"not_found": ["知识点原名(必须与列表中的名称完全一致)"]
|
"not_found": [序号, 序号]
|
||||||
}}"""
|
}}"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -929,31 +921,41 @@ class PPTParser:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
clips = parsed.get("clips", [])
|
clips = parsed.get("clips", [])
|
||||||
not_found = parsed.get("not_found", [])
|
not_found_idxs = parsed.get("not_found", [])
|
||||||
|
|
||||||
if not clips and not not_found:
|
if not clips and not not_found_idxs:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 验证和清理
|
# 通过序号映射回原始名称(序号 → 原始知识点名称)
|
||||||
|
knowledge_list = ppt_knowledge or []
|
||||||
|
title_map = {i: kp['title'] for i, kp in enumerate(knowledge_list)}
|
||||||
|
|
||||||
|
# 验证和清理:序号 → 原始名称
|
||||||
validated = []
|
validated = []
|
||||||
for clip in clips:
|
for clip in clips:
|
||||||
title = clip.get("title", "")
|
kp_idx = int(clip.get("kp_idx", -1))
|
||||||
|
if kp_idx not in title_map:
|
||||||
|
logger.warning(f" 跳过无效序号 kp_idx={kp_idx}(超出范围 0-{len(title_map)-1})")
|
||||||
|
continue
|
||||||
|
title = title_map[kp_idx]
|
||||||
start = max(0, float(clip.get("start", 0)))
|
start = max(0, float(clip.get("start", 0)))
|
||||||
raw_end = float(clip.get("end", 0))
|
raw_end = float(clip.get("end", 0))
|
||||||
end = min(raw_end, start + self.max_clip_duration)
|
end = min(raw_end, start + self.max_clip_duration)
|
||||||
kp = clip.get("knowledge_point", "")
|
|
||||||
validated.append({
|
validated.append({
|
||||||
"title": title,
|
"title": title,
|
||||||
"start": int(start),
|
"start": int(start),
|
||||||
"end": int(end),
|
"end": int(end),
|
||||||
"knowledge_point": kp,
|
"knowledge_point": title,
|
||||||
})
|
})
|
||||||
|
|
||||||
logger.info(f"LLM提取成功: {len(validated)} 个片段,{len(not_found)} 个未找到")
|
# not_found 中的序号也映射回名称
|
||||||
|
not_found_names = [title_map[i] for i in not_found_idxs if i in title_map]
|
||||||
|
|
||||||
|
logger.info(f"LLM提取成功: {len(validated)} 个片段,{len(not_found_names)} 个未找到")
|
||||||
for c in validated:
|
for c in validated:
|
||||||
logger.info(f" [{c['knowledge_point']}] {c['title']}: {c['start']}s - {c['end']}s")
|
logger.info(f" [{c['knowledge_point']}] {c['title']}: {c['start']}s - {c['end']}s")
|
||||||
if not_found:
|
if not_found_names:
|
||||||
logger.info(f" 未找到知识点: {not_found}")
|
logger.info(f" 未找到知识点: {not_found_names}")
|
||||||
|
|
||||||
return validated
|
return validated
|
||||||
|
|
||||||
@@ -1007,6 +1009,9 @@ class PPTParser:
|
|||||||
}, f, ensure_ascii=False)
|
}, f, ensure_ascii=False)
|
||||||
logger.info(f"已保存PPT知识点到checkpoint")
|
logger.info(f"已保存PPT知识点到checkpoint")
|
||||||
|
|
||||||
|
# 保存PPT原文供后续步骤使用
|
||||||
|
self.ppt_text = ppt_cleaned_text or ""
|
||||||
|
|
||||||
# Step 3: LLM校正文本(以PPT全文为参考)- 带checkpoint复用
|
# Step 3: LLM校正文本(以PPT全文为参考)- 带checkpoint复用
|
||||||
self._report('parse', 30, "LLM校正文本...")
|
self._report('parse', 30, "LLM校正文本...")
|
||||||
corrected_checkpoint = os.path.join(self.inter_dir, "corrected_transcript.json")
|
corrected_checkpoint = os.path.join(self.inter_dir, "corrected_transcript.json")
|
||||||
@@ -1052,6 +1057,7 @@ class PPTParser:
|
|||||||
"clips": clips,
|
"clips": clips,
|
||||||
"output_dir": self.output_dir,
|
"output_dir": self.output_dir,
|
||||||
"term_corrections": self.term_corrections,
|
"term_corrections": self.term_corrections,
|
||||||
|
"ppt_text": getattr(self, 'ppt_text', ''),
|
||||||
"video_params": {
|
"video_params": {
|
||||||
"fade_duration": 1,
|
"fade_duration": 1,
|
||||||
"title_fontsize": 48,
|
"title_fontsize": 48,
|
||||||
|
|||||||
+241
-11
@@ -228,15 +228,32 @@ class SubtitlePipeline:
|
|||||||
offset = offsets[i]
|
offset = offsets[i]
|
||||||
clip_duration = offsets[i+1] - offsets[i] if i+1 < len(offsets) else 3
|
clip_duration = offsets[i+1] - offsets[i] if i+1 < len(offsets) else 3
|
||||||
|
|
||||||
# 添加标题(使用title样式)- 标题显示3秒后正文才显示,避免重叠
|
# 添加标题(使用title样式)
|
||||||
title_duration = min(3, clip_duration)
|
if clip.get('title_segments'):
|
||||||
title_track.add(offset, offset + title_duration, clip['title'], style='title')
|
# 多标题片段:遍历 title_segments [(title, start_offset), ...]
|
||||||
|
# 每个标题最多显示 title_duration 秒
|
||||||
|
segs = clip['title_segments']
|
||||||
|
for j, (title, seg_start) in enumerate(segs):
|
||||||
|
next_start = segs[j+1][1] if j+1 < len(segs) else clip_duration
|
||||||
|
seg_end = min(seg_start + title_duration, next_start)
|
||||||
|
title_track.add(
|
||||||
|
offset + seg_start,
|
||||||
|
offset + seg_end,
|
||||||
|
title,
|
||||||
|
style='title'
|
||||||
|
)
|
||||||
|
# 正文字幕从最后一个标题段结束后开始
|
||||||
|
content_start = offset + segs[-1][1]
|
||||||
|
else:
|
||||||
|
# 单标题:标题显示3秒后正文才显示,避免重叠
|
||||||
|
title_duration = min(3, clip_duration)
|
||||||
|
title_track.add(offset, offset + title_duration, clip['title'], style='title')
|
||||||
|
content_start = offset + title_duration
|
||||||
|
|
||||||
# 添加正文字幕 - 从标题结束后开始,避免重叠
|
# 添加正文字幕 - 从标题结束后开始,避免重叠
|
||||||
with open(json_path, 'r', encoding='utf-8') as f:
|
with open(json_path, 'r', encoding='utf-8') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
content_start = offset + title_duration # 正文从标题结束后开始
|
|
||||||
for seg in data.get('segments', []):
|
for seg in data.get('segments', []):
|
||||||
text = seg.get('text', '').strip()
|
text = seg.get('text', '').strip()
|
||||||
if not text:
|
if not text:
|
||||||
@@ -253,12 +270,37 @@ class SubtitlePipeline:
|
|||||||
# 只添加在clip时间范围内的字幕
|
# 只添加在clip时间范围内的字幕
|
||||||
clip_end = clip['end'] - clip['start'] + offset
|
clip_end = clip['end'] - clip['start'] + offset
|
||||||
if seg_start < clip_end and seg_end <= clip_end:
|
if seg_start < clip_end and seg_end <= clip_end:
|
||||||
content_track.add(
|
# pipeline.py 已按标点拆分,此处只处理意外超长segment(无标点且>8秒)
|
||||||
seg_start,
|
duration = seg_end - seg_start
|
||||||
seg_end,
|
if duration > 8.0:
|
||||||
text,
|
# 按标点拆分
|
||||||
style='content'
|
import re
|
||||||
)
|
parts = re.split(r'(?<=[。!??!])', text)
|
||||||
|
if len(parts) > 1:
|
||||||
|
total_len = sum(len(p) for p in parts)
|
||||||
|
if total_len > 0:
|
||||||
|
cum_len = 0
|
||||||
|
s_start = seg_start
|
||||||
|
for part in parts:
|
||||||
|
part = part.strip()
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
cum_len += len(part)
|
||||||
|
s_end = seg_start + duration * cum_len / total_len
|
||||||
|
content_track.add(s_start, s_end, part, style='content')
|
||||||
|
s_start = s_end
|
||||||
|
continue
|
||||||
|
# 无标点则平均拆分
|
||||||
|
num_splits = max(2, int(duration / 8.0) + 1)
|
||||||
|
chunk_len = len(text) // num_splits
|
||||||
|
for i in range(num_splits):
|
||||||
|
t_start = seg_start + duration * i / num_splits
|
||||||
|
t_end = seg_start + duration * (i + 1) / num_splits
|
||||||
|
chunk_text = text[i * chunk_len:(i + 1) * chunk_len].strip()
|
||||||
|
if chunk_text:
|
||||||
|
content_track.add(t_start, t_end, chunk_text, style='content')
|
||||||
|
else:
|
||||||
|
content_track.add(seg_start, seg_end, text, style='content')
|
||||||
|
|
||||||
# 保存两个轨道 - 标题使用SRT格式
|
# 保存两个轨道 - 标题使用SRT格式
|
||||||
version = self._get_next_version()
|
version = self._get_next_version()
|
||||||
@@ -320,4 +362,192 @@ def load_clip_subtitles(inter_dir, clip_nums):
|
|||||||
if os.path.exists(json_path):
|
if os.path.exists(json_path):
|
||||||
with open(json_path, 'r', encoding='utf-8') as f:
|
with open(json_path, 'r', encoding='utf-8') as f:
|
||||||
clips[num] = json.load(f)
|
clips[num] = json.load(f)
|
||||||
return clips
|
return clips
|
||||||
|
|
||||||
|
|
||||||
|
def parse_srt(content: str) -> list:
|
||||||
|
"""
|
||||||
|
解析SRT文本为字幕段列表
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: SRT文件内容
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[(index, start, end, text), ...]
|
||||||
|
"""
|
||||||
|
blocks = content.strip().split('\n\n')
|
||||||
|
segments = []
|
||||||
|
for block in blocks:
|
||||||
|
lines = block.strip().split('\n')
|
||||||
|
if len(lines) >= 3:
|
||||||
|
try:
|
||||||
|
idx = int(lines[0])
|
||||||
|
times = lines[1].split(' --> ')
|
||||||
|
start = times[0].strip().replace(',', '.')
|
||||||
|
end = times[1].strip().replace(',', '.')
|
||||||
|
text = '\n'.join(lines[2:])
|
||||||
|
segments.append((idx, start, end, text))
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
continue
|
||||||
|
return segments
|
||||||
|
|
||||||
|
|
||||||
|
def format_srt(segments: list) -> str:
|
||||||
|
"""
|
||||||
|
将字幕段列表格式化为SRT文本
|
||||||
|
|
||||||
|
Args:
|
||||||
|
segments: [(index, start, end, text), ...]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SRT格式字符串
|
||||||
|
"""
|
||||||
|
lines = []
|
||||||
|
for i, (idx, start, end, text) in enumerate(segments):
|
||||||
|
start_s = start.replace('.', ',')
|
||||||
|
end_s = end.replace('.', ',')
|
||||||
|
lines.append(f"{idx}\n{start_s} --> {end_s}\n{text}")
|
||||||
|
return '\n\n'.join(lines) + '\n'
|
||||||
|
|
||||||
|
|
||||||
|
def correct_subtitles_llm(
|
||||||
|
title_path: str,
|
||||||
|
content_path: str,
|
||||||
|
ppt_text: str,
|
||||||
|
llm_client,
|
||||||
|
output_path: str = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
用LLM纠正字幕内容(idx|text格式,只发纯文本,保留时间轴)
|
||||||
|
|
||||||
|
参考title.srt(时间轴+知识点锚点)和PPT原文(术语纠错),
|
||||||
|
修正content.srt中的错字、漏字、术语错误。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title_path: 标题字幕SRT路径
|
||||||
|
content_path: 内容字幕SRT路径(待修正)
|
||||||
|
ppt_text: PPT原文(术语参考)
|
||||||
|
llm_client: LLM客户端
|
||||||
|
output_path: 修正后输出路径(默认覆盖原content_path)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
修正后的字幕文件路径
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
# 读取原始字幕
|
||||||
|
with open(title_path, 'r', encoding='utf-8') as f:
|
||||||
|
title_srt = f.read()
|
||||||
|
with open(content_path, 'r', encoding='utf-8') as f:
|
||||||
|
content_srt = f.read()
|
||||||
|
|
||||||
|
# 解析SRT,保留完整timestamp
|
||||||
|
content_segments = parse_srt(content_srt)
|
||||||
|
|
||||||
|
# 构建idx|text格式的纯文本
|
||||||
|
lines_for_llm = []
|
||||||
|
for seg in content_segments:
|
||||||
|
idx, start, end, text = seg
|
||||||
|
lines_for_llm.append(f"{idx}|{text}")
|
||||||
|
transcript_text = '\n'.join(lines_for_llm)
|
||||||
|
|
||||||
|
# 构建prompt
|
||||||
|
prompt = f"""你是一个钢琴教学视频的字幕纠错专家。
|
||||||
|
|
||||||
|
## 参考信息
|
||||||
|
标题字幕(title.srt)- 权威知识点参考:
|
||||||
|
{title_srt[:2000]}
|
||||||
|
|
||||||
|
PPT原文(ppt)- 术语权威参考:
|
||||||
|
{ppt_text[:3000]}
|
||||||
|
|
||||||
|
## 任务
|
||||||
|
修正以下转录文本中的错字、漏字、术语错误(如"骚"改为"sol","拿两个音速"改为"拿两个因素"等)。
|
||||||
|
每行格式:序号|原始文字
|
||||||
|
|
||||||
|
## 待纠正文本({len(content_segments)}条):
|
||||||
|
{transcript_text}
|
||||||
|
|
||||||
|
## 输出要求
|
||||||
|
- 以JSON格式输出,只输出JSON,不要有任何其他解释
|
||||||
|
- 用原始序号匹配,不要改变结构
|
||||||
|
{{
|
||||||
|
"corrected": [
|
||||||
|
{{"idx": 序号, "text": "修正后的文字"}},
|
||||||
|
{{"idx": 序号, "text": "修正后的文字"}}
|
||||||
|
]
|
||||||
|
}}"""
|
||||||
|
|
||||||
|
# 调用LLM
|
||||||
|
response = llm_client.chat(
|
||||||
|
prompt=prompt,
|
||||||
|
max_tokens=8192,
|
||||||
|
)
|
||||||
|
if not response:
|
||||||
|
logger.warning("LLM返回为空,保留原字幕")
|
||||||
|
return content_path
|
||||||
|
|
||||||
|
# 解析JSON
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
# 去掉markdown代码块
|
||||||
|
response_clean = response.strip()
|
||||||
|
if response_clean.startswith('```'):
|
||||||
|
lines = response_clean.split('\n')
|
||||||
|
if lines[0].strip().strip('`'):
|
||||||
|
lines = lines[1:]
|
||||||
|
if lines and lines[-1].strip().strip('`'):
|
||||||
|
lines = lines[:-1]
|
||||||
|
response_clean = '\n'.join(lines)
|
||||||
|
|
||||||
|
# 提取JSON
|
||||||
|
json_match = re.search(r'\{.*\}', response_clean, re.DOTALL)
|
||||||
|
if not json_match:
|
||||||
|
raise ValueError("No JSON found in response")
|
||||||
|
result = json.loads(json_match.group())
|
||||||
|
|
||||||
|
corrected_list = result.get('corrected', [])
|
||||||
|
# 建立 idx -> corrected_text 的映射
|
||||||
|
corrected_map = {item['idx']: item['text'] for item in corrected_list}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"字幕纠正JSON解析失败,保留原字幕: {e}")
|
||||||
|
return content_path
|
||||||
|
|
||||||
|
# 重建SRT,对比diff
|
||||||
|
orig_by_idx = {seg[0]: seg[3] for seg in content_segments}
|
||||||
|
changed = []
|
||||||
|
|
||||||
|
result_lines = []
|
||||||
|
for seg in content_segments:
|
||||||
|
idx, start, end, orig_text = seg
|
||||||
|
new_text = corrected_map.get(idx, orig_text)
|
||||||
|
|
||||||
|
# 恢复SRT格式
|
||||||
|
start_s = start.replace('.', ',')
|
||||||
|
end_s = end.replace('.', ',')
|
||||||
|
result_lines.append(f"{idx}\n{start_s} --> {end_s}\n{new_text}")
|
||||||
|
|
||||||
|
if new_text != orig_text:
|
||||||
|
changed.append((idx, orig_text, new_text))
|
||||||
|
|
||||||
|
corrected_srt = '\n\n'.join(result_lines) + '\n'
|
||||||
|
|
||||||
|
# 保存
|
||||||
|
if output_path is None:
|
||||||
|
output_path = content_path
|
||||||
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(corrected_srt)
|
||||||
|
|
||||||
|
# Diff日志
|
||||||
|
if changed:
|
||||||
|
logger.info(f"字幕纠正,共 {len(changed)} 处修改:")
|
||||||
|
for idx, old, new in changed:
|
||||||
|
old_s = old[:50] + ('...' if len(old) > 50 else '')
|
||||||
|
new_s = new[:50] + ('...' if len(new) > 50 else '')
|
||||||
|
logger.info(f" [{idx:3d}] \"{old_s}\" → \"{new_s}\"")
|
||||||
|
else:
|
||||||
|
logger.info("字幕纠正,无修改")
|
||||||
|
|
||||||
|
logger.info(f"字幕已修正: {output_path}")
|
||||||
|
return output_path
|
||||||
+4
-11
@@ -146,7 +146,7 @@ def burn_subtitles(video_path, srt_path, output_path):
|
|||||||
return success
|
return success
|
||||||
|
|
||||||
|
|
||||||
def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_path, title_fontsize=90, title_color="FFFF00", subtitle_fontsize=24, subtitle_color="FFFFFF"):
|
def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_path, title_fontsize=60, title_color="FFFF00", subtitle_fontsize=24, subtitle_color="FFFFFF"):
|
||||||
"""
|
"""
|
||||||
烧录两层字幕到视频(标题在屏幕正中,正文在下方)
|
烧录两层字幕到视频(标题在屏幕正中,正文在下方)
|
||||||
|
|
||||||
@@ -163,7 +163,7 @@ def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_pat
|
|||||||
Returns:
|
Returns:
|
||||||
True if success
|
True if success
|
||||||
"""
|
"""
|
||||||
# Windows路径转义
|
# Windows路径转义:D:/ 需要双反斜杠转义
|
||||||
title_escaped = title_srt_path.replace('\\', '/').replace('D:/', 'D\\:/')
|
title_escaped = title_srt_path.replace('\\', '/').replace('D:/', 'D\\:/')
|
||||||
content_escaped = content_srt_path.replace('\\', '/').replace('D:/', 'D\\:/')
|
content_escaped = content_srt_path.replace('\\', '/').replace('D:/', 'D\\:/')
|
||||||
|
|
||||||
@@ -180,19 +180,12 @@ def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_pat
|
|||||||
title_bgr = html_to_bgr(title_color)
|
title_bgr = html_to_bgr(title_color)
|
||||||
subtitle_bgr = html_to_bgr(subtitle_color)
|
subtitle_bgr = html_to_bgr(subtitle_color)
|
||||||
|
|
||||||
# 标题样式:使用SRT+force_style,Alignment=5水平居中,垂直位置由MarginV控制
|
# 标题样式:使用SRT+force_style,Alignment=2水平居中,MarginV=150使其位于屏幕上偏下区域(36%高度)
|
||||||
# 正文字样式:底部居中,24字号,白色,带描边
|
# 正文字样式:底部居中,24字号,白色,带描边
|
||||||
content_style = f"FontName=微软雅黑,FontSize={subtitle_fontsize},PrimaryColour={subtitle_bgr},Alignment=2,MarginV=20,Outline=1,Shadow=1"
|
content_style = f"FontName=微软雅黑,FontSize={subtitle_fontsize},PrimaryColour={subtitle_bgr},Alignment=2,MarginV=20,Outline=1,Shadow=1"
|
||||||
|
|
||||||
# 使用两个独立字幕滤镜分别渲染,然后叠加
|
title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=2,MarginV=150,Outline=3,Shadow=2"
|
||||||
# 标题使用Alignment=5,MarginV=0(正中)
|
|
||||||
title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=5,MarginV=0,Outline=3,Shadow=2"
|
|
||||||
|
|
||||||
# 使用两个字幕滤镜叠加,然后映射视频+原始音频
|
|
||||||
# 标题使用Alignment=5,MarginV=0(正中)
|
|
||||||
title_style = f"FontName=微软雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=5,MarginV=0,Outline=3,Shadow=2"
|
|
||||||
|
|
||||||
# 使用两个字幕滤镜叠加
|
|
||||||
filter_str = f"[0:v]subtitles='{title_escaped}':force_style='{title_style}',subtitles='{content_escaped}':force_style='{content_style}'[out]"
|
filter_str = f"[0:v]subtitles='{title_escaped}':force_style='{title_style}',subtitles='{content_escaped}':force_style='{content_style}'[out]"
|
||||||
|
|
||||||
# 保留原始音频 - 映射视频输出和原始音频
|
# 保留原始音频 - 映射视频输出和原始音频
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
f = open(r'D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt', 'rb')
|
|
||||||
data = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
print('Total bytes:', len(data))
|
|
||||||
print('First 300 hex:', data[:300].hex())
|
|
||||||
print()
|
|
||||||
print('UTF-8 decode of first 300:')
|
|
||||||
print(data[:300].decode('utf-8', 'replace'))
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -c "import pptx; print('pptx available')"
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\check_pptx2.py"
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
import sys
|
|
||||||
out = r"D:\F\NewI\opencode\daily-workspace\temp\check_pptx_out.txt"
|
|
||||||
try:
|
|
||||||
import pptx
|
|
||||||
result = "pptx available: " + pptx.__version__
|
|
||||||
except ImportError as e:
|
|
||||||
result = "pptx NOT available: " + str(e)
|
|
||||||
with open(out, "w", encoding="utf-8") as f:
|
|
||||||
f.write(result)
|
|
||||||
print(result)
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\check_transcript.py"
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
import os
|
|
||||||
import json
|
|
||||||
|
|
||||||
inter_dir = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full\intermediates"
|
|
||||||
transcript_file = os.path.join(inter_dir, "full_transcript.json")
|
|
||||||
|
|
||||||
if os.path.exists(transcript_file):
|
|
||||||
size = os.path.getsize(transcript_file)
|
|
||||||
with open(transcript_file, "r", encoding="utf-8") as f:
|
|
||||||
data = json.load(f)
|
|
||||||
print(f"Transcript exists: {size} bytes")
|
|
||||||
print(f"Segments: {len(data)}")
|
|
||||||
if data:
|
|
||||||
print(f"First segment: {data[0]}")
|
|
||||||
print(f"Last segment: {data[-1]}")
|
|
||||||
else:
|
|
||||||
print("Transcript file NOT found")
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_ppt.py"
|
|
||||||
pause
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
import zipfile
|
|
||||||
import re
|
|
||||||
|
|
||||||
ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
|
||||||
|
|
||||||
with zipfile.ZipFile(ppt, "r") as z:
|
|
||||||
names = z.namelist()
|
|
||||||
slide_files = [f for f in names if f.startswith("ppt/slides/slide") and f.endswith(".xml")]
|
|
||||||
print(f"Total files in zip: {len(names)}")
|
|
||||||
print(f"Slide files found: {len(slide_files)}")
|
|
||||||
print(f"First 5 slide files: {slide_files[:5]}")
|
|
||||||
|
|
||||||
# Test presentation.xml
|
|
||||||
try:
|
|
||||||
pres_xml = z.read("ppt/presentation.xml").decode("utf-8", errors="replace")
|
|
||||||
sld_ids = re.findall(r'<p:sldId\b[^>]*r:id="([^"]+)"', pres_xml)
|
|
||||||
print(f"\nsldIdList rIds: {sld_ids[:5]}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\npresentation.xml error: {e}")
|
|
||||||
|
|
||||||
# Test rels
|
|
||||||
try:
|
|
||||||
rels_xml = z.read("ppt/_rels/presentation.xml.rels").decode("utf-8", errors="replace")
|
|
||||||
rid_to_target = dict(re.findall(r'Id="([^"]+)"[^>]*Target="([^"]+)"', rels_xml))
|
|
||||||
print(f"Rels entries: {len(rid_to_target)}")
|
|
||||||
# Show a sample
|
|
||||||
for k, v in list(rid_to_target.items())[:3]:
|
|
||||||
print(f" {k} -> {v}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\nrels error: {e}")
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_ppt2.py"
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
import zipfile, re, sys
|
|
||||||
|
|
||||||
ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
|
||||||
out = r"D:\F\NewI\opencode\daily-workspace\temp\debug_ppt_out.txt"
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
with zipfile.ZipFile(ppt, "r") as z:
|
|
||||||
names = z.namelist()
|
|
||||||
slide_files = [f for f in names if f.startswith("ppt/slides/slide") and f.endswith(".xml")]
|
|
||||||
results.append(f"Total files in zip: {len(names)}")
|
|
||||||
results.append(f"Slide files found: {len(slide_files)}")
|
|
||||||
results.append(f"First 5: {slide_files[:5]}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
pres_xml = z.read("ppt/presentation.xml").decode("utf-8", errors="replace")
|
|
||||||
sld_ids = re.findall(r'<p:sldId\b[^>]*r:id="([^"]+)"', pres_xml)
|
|
||||||
results.append(f"sldIds: {sld_ids[:5]}")
|
|
||||||
except Exception as e:
|
|
||||||
results.append(f"pres error: {e}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
rels_xml = z.read("ppt/_rels/presentation.xml.rels").decode("utf-8", errors="replace")
|
|
||||||
rid_to_target = dict(re.findall(r'Id="([^"]+)"[^>]*Target="([^"]+)"', rels_xml))
|
|
||||||
results.append(f"rels count: {len(rid_to_target)}")
|
|
||||||
for k, v in list(rid_to_target.items())[:3]:
|
|
||||||
results.append(f" {k} -> {v}")
|
|
||||||
except Exception as e:
|
|
||||||
results.append(f"rels error: {e}")
|
|
||||||
|
|
||||||
with open(out, "w", encoding="utf-8") as f:
|
|
||||||
f.write("\n".join(results))
|
|
||||||
|
|
||||||
print("Done, see", out)
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_slide1.py" > "D:\F\NewI\opencode\daily-workspace\temp\debug_slide1_out.txt" 2>&1
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
import zipfile, re, os
|
|
||||||
|
|
||||||
ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
|
||||||
out_dir = r"D:\F\NewI\opencode\daily-workspace\temp"
|
|
||||||
slide1_out = os.path.join(out_dir, "slide1_texts.txt")
|
|
||||||
xml_out = os.path.join(out_dir, "slide1_xml_preview.txt")
|
|
||||||
|
|
||||||
with zipfile.ZipFile(ppt, "r") as z:
|
|
||||||
slide1_file = "ppt/slides/slide1.xml"
|
|
||||||
content = z.read(slide1_file).decode("utf-8", errors="replace")
|
|
||||||
all_texts = re.findall(r"<a:t[^>]*>([^<]*)</a:t>", content)
|
|
||||||
|
|
||||||
meaningful = [t for t in all_texts if t.strip()]
|
|
||||||
with open(slide1_out, "w", encoding="utf-8") as f:
|
|
||||||
f.write(f"Total fragments: {len(all_texts)}\n")
|
|
||||||
f.write(f"Meaningful fragments: {len(meaningful)}\n\n")
|
|
||||||
for i, t in enumerate(meaningful):
|
|
||||||
f.write(f"[{i}] {t}\n")
|
|
||||||
|
|
||||||
with open(xml_out, "w", encoding="utf-8") as f:
|
|
||||||
f.write(content[:8000])
|
|
||||||
|
|
||||||
print("Done")
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\do_install.py"
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
venv_python = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe"
|
|
||||||
result = subprocess.run(
|
|
||||||
[venv_python, "-m", "pip", "install", "python-pptx"],
|
|
||||||
capture_output=True,
|
|
||||||
text=True
|
|
||||||
)
|
|
||||||
print("STDOUT:", result.stdout)
|
|
||||||
print("STDERR:", result.stderr)
|
|
||||||
print("Return code:", result.returncode)
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
echo Installing python-pptx...
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx -q
|
|
||||||
echo Done
|
|
||||||
pause
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx
|
|
||||||
echo Exit: %errorlevel%
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
@echo off
|
|
||||||
chcp 65001 >nul
|
|
||||||
"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx > "D:\F\NewI\opencode\daily-workspace\temp\pip_out.txt" 2>&1
|
|
||||||
echo Exit: %errorlevel%
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
# Kill all python processes related to our CLI
|
|
||||||
Get-Process python -ErrorAction SilentlyContinue | Stop-Process -Force
|
|
||||||
Start-Sleep 3
|
|
||||||
|
|
||||||
# Verify killed
|
|
||||||
$remaining = Get-Process python -ErrorAction SilentlyContinue
|
|
||||||
if ($remaining) {
|
|
||||||
Write-Host "Still running:"
|
|
||||||
$remaining | ForEach-Object { Write-Host " PID:" $_.Id }
|
|
||||||
} else {
|
|
||||||
Write-Host "All python processes killed"
|
|
||||||
}
|
|
||||||
Binary file not shown.
@@ -1,5 +0,0 @@
|
|||||||
f = open(r'D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt', 'r', encoding='utf-8')
|
|
||||||
lines = f.readlines()
|
|
||||||
f.close()
|
|
||||||
for l in lines[:35]:
|
|
||||||
print(l.rstrip())
|
|
||||||
Binary file not shown.
Reference in New Issue
Block a user