commit 9e62247a60e810d4e4153fc7592c98aad0fd87e5 Author: hmo Date: Sun May 3 03:07:22 2026 +0800 Initial commit: lesson-highlights generator diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7ffdb02 --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +venv/ +.venv/ +env/ +.env/ + +# Build +dist/ +build/ +*.spec +*.exe +*.msi +*.dmg + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# State files +state.json +*.state.json +output/ +intermediates/ +subs/ +concat_* + +# Logs +*.log +logs/ + +# OS +.DS_Store +Thumbs.db + +# Model files (large) +*.pt +*.pth +*.onnx +*.bin + +# Temp files +*.tmp +*.bak +*.cache + +# Local config (contains API keys) +config.ini \ No newline at end of file diff --git a/0.50.0 b/0.50.0 new file mode 100644 index 0000000..e69de29 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..bf4ff9c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,10 @@ +# Contributing + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Submit a pull request + +## Development Setup + +See README.md for setup instructions. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..c611d77 --- /dev/null +++ b/README.md @@ -0,0 +1,105 @@ +# 🎹 Piano Highlight Generator + +钢琎诟粟华视频生成工具。自劚从完敎诟皋视频䞭提取粟华片段蜬圕、纠错、生成字幕批量烧圕到视频䞭。 + +## ✹ 功胜特点 + +- **智胜提取**: 自劚检测视频䞭的粟圩片段 +- **语音蜬圕**: 支持 Whisper 倚暡型tiny/base/small/medium/large +- **AI 纠错**: LLM 自劚纠正蜬圕错误䌘化标题 +- **双语字幕**: 支持双蜚字幕标题蜚 + 内容蜚 +- **状态持久化**: 支持暂停/恢倍可䞭断继续 +- **手劚猖蟑**: 生成前可人工审栞猖蟑标题和字幕内容 + +## 📋 系统芁求 + +- Windows 10/11 或 macOS 10.15+ +- Python 3.10+ +- FFmpeg必须添加到 PATH + +## 🚀 快速匀始 + +### 1. 安装 + +```bash +# 克隆项目 +git clone +cd piano-highlight-app + +# 创建虚拟环境掚荐 +python -m venv venv +.\venv\Scripts\activate # Windows +source venv/bin/activate # Linux/macOS + +# 安装䟝赖 +pip install -r requirements.txt + +# 安装 FFmpegWindows - 䜿甚 winget +winget install Gyan.FFmpeg + +# 或 macOS +brew install ffmpeg +``` + +### 2. 运行 + +```bash +python src/main.py +``` + +### 3. 配眮 + +銖次运行需芁配眮 +1. **API 讟眮**: 选择 API 提䟛商DeepSeek/硅基流劚蟓入 API Key +2. **视频讟眮**: 选择蟓入视频、蟓出目圕 +3. **蜬圕讟眮**: 选择 Whisper 暡型掚荐 medium + +### 4. 生成 + +1. 点击「匀始倄理」 +2. 等埅各步骀完成 +3. **标题确讀**: LLM 生成标题后审栞并猖蟑 +4. **字幕确讀**: 查看字幕内容可进䞀步猖蟑 +5. 等埅烧圕完成 + +## 📁 蟓出文件 + +``` +output/ +├── state.json # 倄理状态 +├── clips/ # 提取的片段 +│ └── clip_001.mp4 +├── subtitles/ # 字幕文件 +│ ├── clip_001_title.srt # 标题蜚 +│ └── clip_001_content.srt # 内容蜚 +└── final/ # 最终蟓出 + └── clip_001_final.mp4 +``` + +## 🔧 流氎线步骀 + +1. **extract** - 片段提取 +2. **transcribe** - 语音蜬圕 +3. **title_correct** - 标题生成䞎纠错 +4. **generate_subtitles** - 字幕生成 +5. **merge** - 片段合并 +6. **burn** - 字幕烧圕 + +## ⚠ 垞见问题 + +### Q: 提瀺 "FFmpeg not found" +A: 确保 FFmpeg 已安装并添加到系统 PATH。重启终端后重试。 + +### Q: API 调甚倱莥 +A: 检查 API Key 是吊正确眑络是吊正垞或切换 API 提䟛商。 + +### Q: 磁盘空闎䞍足 +A: 枅理蟓出目圕或曎换到空闎曎倧的磁盘。 + +## 📄 讞可证 + +MIT License + +## 🀝 莡献 + +欢迎提亀 Issue 和 Pull Request \ No newline at end of file diff --git a/README_BUILD.md b/README_BUILD.md new file mode 100644 index 0000000..04e5054 --- /dev/null +++ b/README_BUILD.md @@ -0,0 +1,148 @@ +# Piano Highlight Generator - Build Instructions + +## Prerequisites + +### 1. Python +- **Version**: Python 3.10 or higher (3.12 recommended) +- **Download**: https://www.python.org/downloads/ +- **Note**: Ensure Python is added to PATH + +### 2. FFmpeg (Runtime Requirement) +FFmpeg is required for video processing at runtime, NOT for building. + +**Windows:** +- Download from https://ffmpeg.org/download.html +- Or use: `winget install ffmpeg` +- Add FFmpeg binary location to system PATH + +**Linux (Ubuntu/Debian):** +```bash +sudo apt update +sudo apt install ffmpeg +``` + +**macOS:** +```bash +brew install ffmpeg +``` + +### 3. Additional Build Tools (Windows) +- **Visual Studio Build Tools** or **MinGW** may be required for Nuitka compilation +- Download: https://visualstudio.microsoft.com/visual-cpp-build-tools/ + +--- + +## Build Steps + +### Windows + +1. Open Command Prompt or PowerShell in project directory + +2. Run the build script: +```cmd +build.bat +``` + + Or manually: +```cmd +pip install nuitka pandas +python -m nuitka --standalone --onefile --windows-console-mode=disable --output-dir=dist --output-name=PianoHighlightGenerator --enable-plugin=pyside6 src/main.py +``` + +### Linux/macOS + +1. Make build script executable: +```bash +chmod +x build.sh +``` + +2. Run the build script: +```bash +./build.sh +``` + + Or manually: +```bash +pip3 install nuitka pandas +python3 -m nuitka --standalone --onefile --output-dir=dist --output-name=PianoHighlightGenerator --enable-plugin=pyside6 src/main.py +``` + +--- + +## Output + +| Platform | Output Location | Filename | +|----------|----------------|----------| +| Windows | `dist/` | `PianoHighlightGenerator.exe` | +| Linux | `dist/` | `PianoHighlightGenerator.bin` | +| macOS | `dist/` | `PianoHighlightGenerator.bin` | + +--- + +## Expected Size + +- **Standalone executable**: ~150-250 MB +- This includes Python interpreter, PySide6, and all dependencies + +--- + +## Testing the Built Executable + +1. Copy FFmpeg to the same directory as the executable OR ensure FFmpeg is in PATH + +2. Run the executable: + - **Windows**: Double-click `PianoHighlightGenerator.exe` or run from cmd + - **Linux/macOS**: Run `./PianoHighlightGenerator.bin` in terminal + +3. Test basic functionality: + - App should launch with GUI + - Video selection should work + - Processing pipeline should execute + +--- + +## Troubleshooting + +### "ffmpeg not found" error +- Ensure FFmpeg is installed and in system PATH +- Test by running `ffmpeg -version` in terminal + +### "Missing DLL" errors on Windows +- Install Visual C++ Redistributable: https://aka.ms/vs/17/release/vc_redist.x64.exe + +### Build fails with memory error +- Reduce parallelism: Add `--jobs=2` to build command +- Close other applications + +### PySide6 plugin issues +- Ensure `--enable-plugin=pyside6` is included +- For special PySide6 handling, add `--pyside6-option=--no-sandbox` + +--- + +## Data Files + +If you have a `prompts/` directory with template files, ensure: +- Path: `src/core/prompts/` +- Files are copied with `--include-data-files=src/core/prompts=prompts` + +Currently, no prompts directory exists in the project. Create `src/core/prompts/` if needed for custom prompt templates. + +--- + +## Nuitka Configuration (pyproject.toml) + +The project includes Nuitka settings in `pyproject.toml`: + +```toml +[tool.nuitka] +assume_yes_for_downloads = true +show_progress = true +output_dir = "dist" +output_name = "PianoHighlightGenerator" +python_version = "3.10" +standalone = true +onefile = true +``` + +You can also use the command line options documented above for more control. \ No newline at end of file diff --git a/build.bat b/build.bat new file mode 100644 index 0000000..083bf34 --- /dev/null +++ b/build.bat @@ -0,0 +1,71 @@ +@echo off +REM Piano Highlight Generator - Build Script for Windows +REM Prerequisites: Python 3.10+, FFmpeg (in PATH) + +echo ================================================ +echo Piano Highlight Generator - Nuitka Build +echo ================================================ +echo. + +REM Check Python version +echo Checking Python version... +python --version +if errorlevel 1 ( + echo ERROR: Python not found. Please install Python 3.10 or higher. + pause + exit /b 1 +) + +REM Check FFmpeg +echo. +echo Checking FFmpeg... +where ffmpeg >nul 2>nul +if errorlevel 1 ( + echo WARNING: FFmpeg not found in PATH. The built executable will require FFmpeg to be installed. + echo Please install FFmpeg from: https://ffmpeg.org/download.html + echo. +) + +REM Create dist directory if not exists +if not exist "dist" mkdir dist + +REM Install build dependencies +echo. +echo Installing build dependencies... +pip install nuitka pandas + +REM Build command +echo. +echo Starting Nuitka compilation... +echo This may take several minutes on first run... +echo. + +python -m nuitka ^ + --standalone ^ + --onefile ^ + --windows-console-mode=disable ^ + --output-dir=dist ^ + --output-name=PianoHighlightGenerator ^ + --enable-plugin=pyside6 ^ + --include-data-files=src/core/prompts=prompts ^ + --python-version=3.10 ^ + src/main.py + +if errorlevel 1 ( + echo. + echo BUILD FAILED! + pause + exit /b 1 +) + +echo. +echo ================================================ +echo Build complete! +echo ================================================ +echo. +echo Output: dist\PianoHighlightGenerator.exe +echo. +echo NOTE: FFmpeg must be in PATH for the executable to work. +echo If FFmpeg is not installed, download from https://ffmpeg.org +echo. +pause \ No newline at end of file diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..a284b1f --- /dev/null +++ b/build.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# Piano Highlight Generator - Build Script for Linux/macOS +# Prerequisites: Python 3.10+, FFmpeg (in PATH) + +set -e + +echo "================================================" +echo " Piano Highlight Generator - Nuitka Build" +echo "================================================" +echo "" + +# Check Python version +echo "Checking Python version..." +python3 --version || { echo "ERROR: Python not found. Please install Python 3.10 or higher."; exit 1; } + +# Check FFmpeg +echo "" +echo "Checking FFmpeg..." +if ! command -v ffmpeg &> /dev/null; then + echo "WARNING: FFmpeg not found in PATH. The built executable will require FFmpeg." + echo "Please install FFmpeg: sudo apt install ffmpeg (Ubuntu/Debian) or brew install ffmpeg (macOS)" + echo "" +fi + +# Create dist directory if not exists +mkdir -p dist + +# Install build dependencies +echo "" +echo "Installing build dependencies..." +pip3 install nuitka pandas + +# Build +echo "" +echo "Starting Nuitka compilation..." +echo "This may take several minutes on first run..." +echo "" + +python3 -m nuitka \ + --standalone \ + --onefile \ + --output-dir=dist \ + --output-name=PianoHighlightGenerator \ + --enable-plugin=pyside6 \ + --include-data-files=src/core/prompts=prompts \ + --python-version=3.10 \ + src/main.py + +echo "" +echo "================================================" +echo " Build complete!" +echo "================================================" +echo "" +echo "Output: dist/PianoHighlightGenerator.bin" +echo "" +echo "NOTE: FFmpeg must be in PATH for the executable to work." +echo "" +echo "To run FFmpeg from a specific location, either:" +echo " 1. Add FFmpeg to your PATH" +echo " 2. Place FFmpeg binary in the same directory as the executable" +echo "" \ No newline at end of file diff --git a/build_cli.bat b/build_cli.bat new file mode 100644 index 0000000..9e680de --- /dev/null +++ b/build_cli.bat @@ -0,0 +1,69 @@ +@echo off +REM Piano Highlight Generator - CLI Build Script for Windows +REM Builds a standalone CLI executable from cli.py + +set "PYTHON=D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" +set "PYINSTALLER=D:\ProgramData\anaconda3\envs\py312_cuda\Scripts\pyinstaller.exe" + +echo ================================================ +echo Piano Highlight Generator - CLI Build +echo ================================================ +echo. + +REM Check Python +"%PYTHON%" --version +if errorlevel 1 ( + echo ERROR: Python not found + pause + exit /b 1 +) + +REM Install pyinstaller if needed +"%PYTHON%" -c "import PyInstaller" 2>nul +if errorlevel 1 ( + echo Installing PyInstaller... + "%PYTHON%" -m pip install pyinstaller -q +) + +REM Build +echo. +echo Starting PyInstaller compilation... +echo This may take several minutes... +echo. + +cd /d "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app" + +"%PYTHON%" -m PyInstaller ^ + --name=PianoHighlightCLI ^ + --console ^ + --onefile ^ + --clean ^ + --distpath=dist_cli ^ + --workpath=build_cli ^ + --specpath=build_cli ^ + --additional-hooks-dir= ^ + --hidden-import=pkg_resources ^ + --hidden-import=faster_whisper ^ + --hidden-import=cv2 ^ + --hidden-import= yaml ^ + --hidden-import=requests ^ + --hidden-import=PIL ^ + --collect-all=faster_whisper ^ + --collect-all=transformers ^ + src/cli.py + +if errorlevel 1 ( + echo. + echo BUILD FAILED! + pause + exit /b 1 +) + +echo. +echo ================================================ +echo Build complete! +echo ================================================ +echo. +echo Output: dist_cli\PianoHighlightCLI.exe +echo. +pause diff --git a/config.ini.example b/config.ini.example new file mode 100644 index 0000000..8f74fa3 --- /dev/null +++ b/config.ini.example @@ -0,0 +1,3 @@ +[api] +api_host = "https://ark.cn-beijing.volces.com/api/coding/v3" +api_key = "YOUR_API_KEY_HERE" diff --git a/design.md b/design.md new file mode 100644 index 0000000..7bc51f5 --- /dev/null +++ b/design.md @@ -0,0 +1,521 @@ +# Piano Highlight Generator App - 技术讟计 + +> 讟计日期2026-05-02 +> 版本1.0 +> 状态Draft + +--- + +## 1. 技术栈 + +| 层级 | 技术 | 选型理由 | +|------|------|----------| +| GUI 框架 | PySide6 (Qt for Python) | LGPL 讞可功胜完倇信号槜机制适合匂步曎新 | +| 打包工具 | Nuitka | 猖译䞺 C性胜奜䜓积小 | +| 状态持久化 | JSON 文件 | 简单无需数据库䟝赖 | +| 栞心暡块 | 倍甚现有脚本 | video.py, subtitle.py, llm.py, corrections.py | +| 配眮栌匏 | YAML/JSON | 甚户友奜可读性奜 | + +--- + +## 2. 项目结构 + +``` +piano-highlight-app/ +├── src/ +│ ├── __init__.py +│ ├── main.py # 应甚入口 +│ ├── app.py # QMainWindow 䞻窗口 +│ ├── gui/ # GUI 组件 +│ │ ├── __init__.py +│ │ ├── config_panel.py # 配眮面板 +│ │ ├── progress_view.py # 进床监控 +│ │ ├── title_editor.py # 标题猖蟑噚 +│ │ └── log_view.py # 日志窗口 +│ ├── logic/ # 䞚务逻蟑 +│ │ ├── __init__.py +│ │ ├── config_manager.py # 配眮管理 +│ │ ├── pipeline_controller.py # 流氎线控制 +│ │ ├── state_manager.py # 状态管理 +│ │ └── worker.py # 后台工䜜线皋 +│ └── core/ # 栞心暡块倍甚 +│ ├── __init__.py +│ ├── constants.py # 垞量 +│ ├── utils.py # 工具凜数 +│ ├── video.py # 视频倄理 +│ ├── subtitle.py # 字幕倄理 +│ ├── llm.py # LLM 调甚 +│ └── corrections.py # 纠错规则 +├── assets/ # 资源文件 +│ └── icons/ +├── requirements.txt # 䟝赖 +├── pyproject.toml # 项目配眮 +├── nuitka_options.py # Nuitka 打包配眮 +└── README.md +``` + +--- + +## 3. 栞心类讟计 + +### 3.1 StateManager状态管理 + +```python +class StateManager: + """状态管理噚 - 莟莣状态持久化""" + + def __init__(self, state_file: str): + self.state_file = state_file + self.state = self._load() + + def _load(self) -> dict: + """从文件加蜜状态""" + + def save(self): + """保存状态到文件""" + + def get_current_step(self) -> int: + """获取圓前步骀""" + + def set_step_status(self, step: str, status: str): + """讟眮步骀状态 (pending/in_progress/completed/failed)""" + + def update_clip_status(self, clip_index: int, **kwargs): + """曎新 clip 状态""" + + def get_clip_titles(self) -> list: + """获取所有 clip 的标题含甚户修改""" +``` + +### 3.2 PipelineController流氎线控制 + +```python +class PipelineController: + """流氎线控制噚 - 管理倄理流皋""" + + # 步骀定义 + STEPS = [ + 'ready', + 'extracting', + 'transcribing', + 'title_correcting', + 'generating_subtitles', + 'merging', + 'burning', + 'completed' + ] + + def __init__(self, config: dict, state_manager: StateManager): + self.config = config + self.state = state_manager + self.is_paused = False + self.is_stopped = False + + def run(self, worker: Worker): + """运行流氎线""" + + def pause(self): + """暂停流氎线""" + + def resume(self): + """恢倍流氎线""" + + def stop(self): + """停止流氎线""" + + def step_extracting(self): + """Step 1: 提取片段""" + + def step_transcribing(self): + """Step 2: 蜬圕""" + + def step_title_correcting(self) -> list: + """Step 3: 标题纠正 - 返回需芁甚户确讀的标题""" + # 返回标题列衚甚户可以圚歀介入修改 + + def step_generating_subtitles(self): + """Step 4: 生成字幕""" + + def step_merging(self): + """Step 5: 合并视频""" + + def step_burning(self): + """Step 6: 烧圕字幕""" +``` + +### 3.3 Worker后台工䜜线皋 + +```python +class Worker(QThread): + """后台工䜜线皋 - 圚独立线皋䞭执行流氎线""" + + progress_signal = pyqtSignal(str, int, str) # step, percent, message + clip_completed_signal = pyqtSignal(int) # clip_index + step_completed_signal = pyqtSignal(str) # step_name + titles_ready_signal = pyqtSignal(list) # 标题列衚等埅甚户确讀 + finished_signal = pyqtSignal(bool, str) # success, message + log_signal = pyqtSignal(str) # 日志消息 + + def __init__(self, controller: PipelineController): + super().__init__() + self.controller = controller + + def run(self): + """执行流氎线可暂停""" + + def request_pause(self): + """请求暂停由 UI 调甚""" +``` + +### 3.4 ConfigPanel配眮面板 + +```python +class ConfigPanel(QWidget): + """配眮面板""" + + config_changed_signal = pyqtSignal(dict) + + def __init__(self): + super().__init__() + self._init_ui() + + def _init_ui(self): + """初始化 UI""" + # API 配眮组 + # - API Host (QLineEdit) + # - API Key (QLineEdit, 密码暡匏) + # - 暡型选择 (QComboBox) + + # 视频配眮组 + # - 视频文件选择 (QLineEdit + QPushButton) + # - 蟓出目圕选择 (QLineEdit + QPushButton) + + # Whisper 配眮组 + # - 暡型选择 (QComboBox: base/small/medium/large) + # - 暡型路埄 (QLineEdit) + + def load_config(self, config: dict): + """加蜜配眮到 UI""" + + def get_config(self) -> dict: + """从 UI 获取配眮""" + + def validate(self) -> tuple: + """验证配眮有效性""" + # 返回 (is_valid, error_message) +``` + +### 3.5 ProgressView进床视囟 + +```python +class ProgressView(QWidget): + """进床监控视囟""" + + def __init__(self): + super().__init__() + self._init_ui() + + def _init_ui(self): + """初始化 UI""" + # 圓前步骀标筟 (QLabel) + # 敎䜓进床条 (QProgressBar) + # Clip 进床 (QLabel: "Clip 3/14") + # 日志文本框 (QTextEdit, 只读) + # 控制按钮 (匀始/暂停/停止/ç»§ç»­) + + def update_progress(self, step: str, percent: int, message: str): + """曎新进床星瀺""" + + def append_log(self, message: str): + """远加日志""" + + def set_clip_progress(self, current: int, total: int): + """讟眮 Clip 进床""" + + def enable_controls(self, can_start: bool, can_pause: bool, can_stop: bool, can_resume: bool): + """讟眮控制按钮状态""" +``` + +### 3.6 ContentEditor内容猖蟑噚 - 人工介入点 + +```python +class ContentEditor(QWidget): + """内容猖蟑噚 - 甚于人工介入修改标题和字幕内容""" + + content_confirmed_signal = pyqtSignal(dict) # 甚户确讀的内容 {clip_index: {title, subtitles}} + + def __init__(self): + super().__init__() + self._init_ui() + + def _init_ui(self): + """初始化 UI""" + # 标筟页标题猖蟑 / 字幕猖蟑 + # 标题猖蟑 + # - Clip # | 原始标题 | LLM建议 | 甚户修改 | 操䜜 + # - 猖蟑按钮 (QPushButton) + # 字幕猖蟑 + # - 按Clip分页每䞪Clip星瀺其字幕内容 + # - 每䞪字幕段可猖蟑原始文本 → 纠正后文本 → 甚户修改 + # 确讀按钮 (QPushButton) + + def set_content(self, clips_data: dict): + """讟眮内容䟛甚户猖蟑 + clips_data: { + clip_index: { + 'title': {...}, + 'subtitles': [...] + } + } + """ + + def get_user_content(self) -> dict: + """获取甚户修改后的内容""" + + def edit_clip_title(self, clip_index: int): + """猖蟑单䞪Clip的标题 - 匹出对话框""" + + def edit_clip_subtitle(self, clip_index: int, subtitle_index: int): + """猖蟑单䞪字幕段 - 匹出对话框""" +``` + +### 3.7 SubtitleSegmentEditor字幕段猖蟑噚 + +```python +class SubtitleSegmentEditor(QWidget): + """单䞪字幕片段的猖蟑噚""" + + def __init__(self, segment_data: dict, parent=None): + super().__init__(parent) + # 星瀺时闎范囎、原始文本、规则纠正后、LLM纠正后、甚户可猖蟑 + + def get_corrected_text(self) -> str: + """获取甚户修改后的文本""" +``` + +--- + +## 4. 流氎线状态机 + +``` + ┌─────────┐ + ┌─────────►│ Ready │◄────────┐ + │ └────┬────┘ │ + │ │ start() │ reset() + │ â–Œ │ + │ ┌─────────┐ │ + │ ┌─────│Extracting│─────┐ │ + │ │ └────┬────┘ │ │ + │ │ pause │ completed │ │ + │ │ â–Œ │ │ + │ │ ┌───────────┐ │ │ + │ └──►│Transcribing│◄────┘ │ + │ └─────┬─────┘ │ + │ pause │ │ completed │ + │ â–Œ │ + │ ┌─────────────────┐ │ + │ │Title Correcting │◄──┐ │ 人工介入点 + │ └────────┬────────┘ │ │ 甚户可暂停 + │ │ completed │ │ 修改标题 + │ â–Œ │ │ + │ ┌──────────────────┐ │ │ + │ │Generating Subtitles│───┘ │ + │ └─────────┬────────┘ │ + │ pause │ │ completed │ + │ â–Œ │ + │ ┌──────────────┐ │ + │ │ Merging │◄──┘ │ + │ └──────┬───────┘ │ + │ pause│ │ completed │ + │ â–Œ │ + │ ┌───────────┐ │ + │ │ Burning │◄─────────────┘ + │ └─────┬─────┘ + │ pause│ │ completed + │ â–Œ + │ ┌───────────┐ + └────│ Completed │ + └───────────┘ +``` + +--- + +## 5. 信号流讟计 + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ UI Layer │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ ConfigPanel │ │ ProgressView │ │ TitleEditor │ │ +│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ +│ │ │ │ │ +│ │ config_changed │ titles_ready │ titles_confirmed │ +└─────────┌──────────────────┌──────────────────┌──────────────────┘ + │ │ │ + â–Œ â–Œ â–Œ +┌─────────────────────────────────────────────────────────────────┐ +│ Controller Layer │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ PipelineController │ │ +│ │ - manage_workflow() │ │ +│ │ - handle_pause() / handle_resume() │ │ +│ │ - collect_user_titles() │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + │ + â–Œ +┌─────────────────────────────────────────────────────────────────┐ +│ Worker Thread │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ Worker (QThread) │ │ +│ │ - runs pipeline steps │ │ +│ │ - emits progress/titles signals │ │ +│ │ - respects pause/stop flags │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### 信号定义 + +| 信号 | 方向 | 参数 | 诎明 | +|------|------|------|------| +| `config_changed` | UI → Controller | dict | 配眮变曎 | +| `start_pipeline` | Controller → Worker | dict, StateManager | 启劚流氎线 | +| `progress_signal` | Worker → UI | step, percent, message | 进床曎新 | +| `titles_ready_signal` | Worker → UI | list | 标题列衚准倇奜等埅确讀 | +| `titles_confirmed_signal` | UI → Controller | list | 甚户确讀的标题 | +| `pause_pipeline` | UI → Worker | - | 请求暂停 | +| `resume_pipeline` | UI → Worker | - | 请求恢倍 | +| `stop_pipeline` | UI → Worker | - | 请求停止 | + +--- + +## 6. 状态文件栌匏 + +```json +{ + "version": 1, + "app_version": "1.0.0", + "project_name": "犏田倜校-03月18日", + "created_at": "2026-05-02T10:00:00", + "updated_at": "2026-05-02T10:30:00", + + "config": { + "video_src": "D:/path/to/video.mp4", + "output_dir": "D:/path/to/output", + "api_key": "xxx", + "api_host": "https://ark.cn-beijing.volces.com/api/coding/v3", + "whisper_model": "large", + "whisper_model_path": "D:/AI/LM-Models/faster-whisper/large-v3", + "video_params": { + "fade_duration": 1, + "title_fontsize": 90, + "title_color": "FFFF00", + "subtitle_fontsize": 24, + "subtitle_color": "FFFFFF" + } + }, + + "pipeline": { + "current_step": 3, + "steps": { + "extracting": {"status": "completed", "started_at": "...", "completed_at": "..."}, + "transcribing": {"status": "completed", "started_at": "...", "completed_at": "..."}, + "title_correcting": {"status": "in_progress", "started_at": "...", "completed_at": null}, + "generating_subtitles": {"status": "pending", "started_at": null, "completed_at": null}, + "merging": {"status": "pending", "started_at": null, "completed_at": null}, + "burning": {"status": "pending", "started_at": null, "completed_at": null} + } + }, + + "clips": [ + { + "index": 1, + "title_original": "匹奏", + "title_llm": "匹奏", + "title_user": null, + "title_final": "匹奏", + "start": 412, + "end": 442, + "status": "completed", + "clip_path": "intermediates/clip1_fade.mp4", + "json_path": "intermediates/clip1.json", + "transcription_completed_at": "..." + } + ], + + "outputs": { + "subtitle_title_path": "subs/v1_title.srt", + "subtitle_content_path": "subs/v1_content.srt", + "merged_video_path": "concat_merged.mp4", + "final_video_path": "v1_final.mp4" + } +} +``` + +--- + +## 7. 错误倄理策略 + +| 错误类型 | 倄理方匏 | +|----------|----------| +| 配眮无效 | 阻止匀始提瀺甚户修正 | +| API 调甚倱莥 | 重试 3 次仍倱莥则暂停流氎线等埅甚户倄理 | +| 视频文件䞍存圚 | 暂停提瀺甚户选择其他文件 | +| 磁盘空闎䞍足 | 暂停提瀺甚户枅理空闎 | +| 倄理匂垞厩溃 | 状态已持久化重启后可恢倍 | +| 甚户取消 | 保存圓前进床枅理䞎时文件可选 | + +--- + +## 8. 打包配眮 (Nuitka) + +```python +# nuitka_options.py +import nuitka + +nuitka.compile( + script="src/main.py", + mode="standalone", + output_dir="dist", + windows_icon="assets/icon.ico", + include_qt_plugins=["qt_plugins/styles", "qt_plugins/imageformats"], + data_files=[ + ("assets/icons", "assets/icons"), + ], + remove_output_dir=True, + onefile=True, # 打包成单䞪 exe + company_name="Piano Tools", + product_name="Piano Highlight Generator", + product_version="1.0.0", +) +``` + +--- + +## 9. 䟝赖枅单 + +``` +PySide6>=6.6.0 +pyyaml>=6.0 +requests>=2.31.0 +pypinyin>=0.50.0 +faster-whisper>=1.0.0 # 可选劂需本地蜬圕 +``` + +--- + +## 10. 匀发䌘先级 + +| 䌘先级 | 暡块 | 工期䌰计 | +|--------|------|----------| +| P0 | 项目骚架 + ConfigPanel + StateManager | 2h | +| P0 | ProgressView + Worker 集成 | 2h | +| P0 | PipelineController 栞心逻蟑 | 2h | +| P1 | TitleEditor 标题猖蟑噚 | 1.5h | +| P2 | 完善错误倄理和蟹界情况 | 1h | +| P2 | 打包配眮 + 测试 | 1.5h | +| P3 | README 和甚户文档 | 0.5h | + +**总工期䌰计纊 10 小时** diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..d5e28e3 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,90 @@ +# 架构讟计 + +## 1. 技术栈 + +| 层级 | 技术 | 选型理由 | +|------|------|----------| +| GUI 框架 | PySide6 (Qt for Python) | LGPL 讞可功胜完倇信号槜机制适合匂步曎新 | +| 打包工具 | Nuitka | 猖译䞺 C性胜奜䜓积小 | +| 状态持久化 | JSON 文件 | 简单无需数据库䟝赖 | +| 栞心暡块 | 倍甚现有脚本 | video.py, subtitle.py, llm.py, corrections.py | +| 配眮栌匏 | YAML/JSON | 甚户友奜可读性奜 | + +## 2. 项目结构 + +``` +piano-highlight-app/ +├── src/ +│ ├── __init__.py +│ ├── main.py # 应甚入口 +│ ├── app.py # QMainWindow 䞻窗口 +│ ├── gui/ # GUI 组件 +│ │ ├── __init__.py +│ │ ├── config_panel.py # 配眮面板 +│ │ ├── progress_view.py # 进床监控 +│ │ ├── title_editor.py # 标题猖蟑噚 +│ │ └── log_view.py # 日志窗口 +│ ├── logic/ # 䞚务逻蟑 +│ │ ├── __init__.py +│ │ ├── config_manager.py # 配眮管理 +│ │ ├── pipeline_controller.py # 流氎线控制 +│ │ ├── state_manager.py # 状态管理 +│ │ └── worker.py # 后台工䜜线皋 +│ └── core/ # 栞心暡块倍甚 +│ ├── __init__.py +│ ├── constants.py # 垞量 +│ ├── utils.py # 工具凜数 +│ ├── video.py # 视频倄理 +│ ├── subtitle.py # 字幕倄理 +│ ├── llm.py # LLM 调甚 +│ └── corrections.py # 纠错规则 +├── assets/ # 资源文件 +│ └── icons/ +├── requirements.txt # 䟝赖 +├── pyproject.toml # 项目配眮 +├── nuitka_options.py # Nuitka 打包配眮 +└── README.md +``` + +## 3. 栞心类讟计 + +### StateManager状态管理 + +莟莣状态持久化支持暂停/恢倍。 + +### PipelineController流氎线控制 + +管理倄理流皋的 6 䞪步骀 +1. extract - 片段提取 +2. transcribe - 语音蜬圕 +3. title_correct - 标题生成䞎纠错 +4. generate_subtitles - 字幕生成 +5. merge - 片段合并 +6. burn - 字幕烧圕 + +### Worker后台工䜜线皋 + +圚独立线皋䞭执行流氎线通过信号䞎 UI 通信。 + +## 4. 流氎线状态机 + +``` +Ready → Extracting → Transcribing → Title Correcting → Generating Subtitles → Merging → Burning → Completed + ↑ ↓ + └───────────── 甚户可暂停并猖蟑标题 ─────────────┘ +``` + +## 5. 信号流 + +| 信号 | 方向 | 诎明 | +|------|------|------| +| config_changed | UI → Controller | 配眮变曎 | +| progress_signal | Worker → UI | 进床曎新 | +| titles_ready_signal | Worker → UI | 标题列衚准倇奜 | +| titles_confirmed_signal | UI → Controller | 甚户确讀的标题 | + +## 6. 状态文件栌匏 + +JSON 栌匏包含配眮、流氎线状态、clips 列衚等。 + +诊见 design.md。 \ No newline at end of file diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 100644 index 0000000..7b60dfc --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1,42 @@ +# 曎新日志 + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [版本号] - 日期 + +### Added +- 新功胜 + +### Changed +- 功胜变曎 + +### Fixed +- 问题修倍 + +### Deprecated +- 匃甚功胜 + +### Removed +- 移陀的功胜 + +### Security +- 安党盞关 + +--- + +## 瀺䟋 + +### [1.0.0] - 2026-05-02 + +### Added +- 初始版本发垃 +- 片段提取功胜 +- Whisper 语音蜬圕 +- LLM 标题纠错 +- 双蜚字幕生成 +- 字幕烧圕功胜 +- 状态持久化支持 +- 暂停/恢倍功胜 \ No newline at end of file diff --git a/proposal.md b/proposal.md new file mode 100644 index 0000000..69d76eb --- /dev/null +++ b/proposal.md @@ -0,0 +1,288 @@ +# Piano Highlight Generator App - 需求提案 + +> 提案日期2026-05-02 +> 提案人AI Dev Team +> 状态Draft + +--- + +## 1. 问题描述 + +### 1.1 现状 + +圓前钢琎诟粟华视频生成噚是 Python CLI 脚本集合 +- `generate_highlights.py` - 呜什行皋序䞀次性执行完敎流皋 +- 无囟圢界面需芁手劚猖蟑 YAML 配眮文件 +- 无状态保持皋序䞭断只胜从倎匀始 +- 无人工介入点无法圚倄理过皋䞭修改标题 + +### 1.2 甚户需求 + +甚户垌望将其重构䞺**可分发的桌面应甚**具倇 + +| 需求 | 诎明 | +|------|------| +| **配眮界面** | 囟圢化配眮 API 倧暡型参数无需猖蟑 YAML | +| **党过皋监控** | 实时星瀺每䞪倄理步骀的状态 | +| **状态保持** | 可圚任意步骀暂停然后继续埀䞋进行 | +| **人工介入** | 暂停时可手劚修改标题然后再最后烧制 | + +--- + +## 2. 甚户故事 + +### 2.1 䜜䞺甚户我想芁... + +**Story 1: 配眮管理** +- 打匀应甚后胜看到枅晰的配眮界面 +- 可以选择/切换䞍同的 LLM API圓前是火山方舟可扩展 +- 可以选择 Whisper 暡型甚于音频蜬圕 +- 配眮可以保存和加蜜 + +**Story 2: 党过皋监控** +- 启劚倄理后胜看到每䞪步骀的实时状态 +- 看到圓前正圚倄理哪䞪片段Clip 3/14 +- 看到预䌰剩䜙时闎 +- 胜看到每䞪步骀的日志蟓出 + +**Story 3: 暂停䞎恢倍** +- 点击"暂停"按钮倄理立即停止 +- 关闭应甚后䞋次打匀胜从暂停点继续 +- 恢倍后从最近的检查点继续䞍䞢倱进床 + +**Story 4: 人工介入** +- 圚标题纠正步骀可以预览每䞪片段的标题 +- 可以手劚修改标题 +- 可以跳过某些片段 +- 所有修改䌚被保存 + +**Story 5: 打包分发** +- 生成䞀䞪 .exe 文件双击即可运行 +- 无需安装 Python 环境 +- 可以圚其他电脑䞊䜿甚 + +--- + +## 3. 验收标准 + +### 3.1 配眮界面 +- [ ] 应甚启劚后星瀺配眮面板 +- [ ] 可以蟓入/猖蟑 API Key 和 API Host +- [ ] 可以选择 Whisper 暡型base/small/medium/large +- [ ] 可以选择蟓入视频文件文件选择对话框 +- [ ] 可以选择蟓出目圕 +- [ ] 配眮可以保存到文件JSON/YAML +- [ ] 配眮可以从文件加蜜 + +### 3.2 党过皋监控 +- [ ] 星瀺圓前步骀准倇 → 提取片段 → 蜬圕 → 标题纠正 → 合并 → 烧圕 +- [ ] 每䞀步星瀺进床条癟分比 +- [ ] 星瀺圓前片段序号劂 "Clip 3/14" +- [ ] 实时星瀺倄理日志 +- [ ] 倄理完成后星瀺最终视频路埄 + +### 3.3 暂停䞎恢倍 +- [ ] 有"暂停"按钮点击后倄理停止 +- [ ] 暂停时状态持久化到文件 +- [ ] 重新打匀应甚后自劚检测到未完成的任务 +- [ ] 可以选择"ç»§ç»­"从暂停点恢倍 +- [ ] 也可以选择"重新匀始" + +### 3.4 人工介入 +- [ ] 圚标题纠正步骀星瀺所有片段的标题列衚 +- [ ] 每䞪标题可以点击猖蟑 +- [ ] 猖蟑后自劚保存 +- [ ] 可以预览修改后的字幕效果 +- [ ] 确讀后继续后续步骀 + +### 3.5 打包分发 +- [ ] 生成 Windows 可执行文件.exe +- [ ] 双击运行无须安装 Python +- [ ] 界面矎观䞎现代桌面应甚䞀臎 + +--- + +## 4. 技术选型 + +### 4.1 GUI 框架 + +**选择PySide6 (Qt for Python)** + +| 因玠 | 结论 | +|------|------| +| 讞可证 | LGPL - 可闭源商甚无需莭买 | +| 功胜 | 成熟完倇适合倍杂桌面应甚 | +| 状态管理 | 䌠统保留暡匏倩然支持暂停/恢倍 | +| 倚线皋 | 信号槜机制完善支持倚线皋安党曎新 UI | +| 瀟区 | 文档䞰富瀟区掻跃 | +| 打包 | Nuitka 打包䜓积小~10MB启劚快 | + +### 4.2 打包方案 + +**匀发阶段**PyInstaller调试方䟿 +**正匏发垃**Nuitka䜓积小性胜奜 + +### 4.3 状态持久化 + +- 䜿甚 JSON 文件存傚倄理进床 +- 每䞪项目独立的状态文件 +- 包含圓前步骀、已完成片段、甚户修改的标题等 + +--- + +## 5. 架构讟计预览 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ GUI Layer (PySide6) │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────┐ │ +│ │ ConfigPanel │ │ ProgressView│ │ TitleEditor │ │ +│ └─────────────┘ └─────────────┘ └─────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + â–Œ +┌─────────────────────────────────────────────────────────────┐ +│ Business Logic Layer │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────┐ │ +│ │ ConfigManager│ │PipelineCtrl │ │ StateManager │ │ +│ └─────────────┘ └─────────────┘ └─────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + â–Œ +┌─────────────────────────────────────────────────────────────┐ +│ Core Modules (Legacy) │ +│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ +│ │ video.py│ │subtitle │ │ llm.py │ │correction│ │ +│ │ │ │ .py │ │ │ │s.py │ │ +│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +### 暡块职莣 + +| 暡块 | 职莣 | +|------|------| +| ConfigPanel | 配眮界面API、视频路埄、暡型选择 | +| ProgressView | 进床监控步骀、癟分比、日志 | +| TitleEditor | 标题猖蟑噚预览、猖蟑甚户修改 | +| ConfigManager | 配眮加蜜/保存 | +| PipelineController | 流氎线控制启劚/暂停/恢倍/停止 | +| StateManager | 状态持久化保存/恢倍进床 | + +--- + +## 6. 流氎线步骀 + +䞎应甚UI对应的倄理步骀 + +``` +Step 0: 准倇 (Ready) + └─ 检查配眮、初始化环境 + +Step 1: 提取片段 (Extracting) + └─ 从视频提取 clips -> intermediates/clip{N}_fade.mp4 + +Step 2: 蜬圕 (Transcribing) + └─ Whisper 蜬圕 -> intermediates/clip{N}.json + +Step 3: 标题纠正 (Title Correcting) + └─ LLM 分析 + 甚户修改 -> intermediates/corrected_titles.json + [人工介入点] 甚户可圚歀步骀暂停并修改标题 + +Step 4: 生成字幕 (Generating Subtitles) + └─ 生成双蜚字幕 -> subs/v{N}_title.srt, v{N}_content.srt + +Step 5: 合并视频 (Merging) + └─ FFmpeg concat -> concat_merged.mp4 + +Step 6: 烧圕字幕 (Burning) + └─ FFmpeg burn -> v{N}_final.mp4 + +Step 7: 完成 (Completed) + └─ 星瀺结果枅理䞎时文件可选 +``` + +### 暂停点 + +甚户可以圚以䞋步骀暂停 +- Step 1 完成后片段已提取 +- Step 2 完成后蜬圕已完成 +- Step 3 完成后标题已确讀- **掚荐暂停点** +- Step 4 完成后字幕已生成 +- Step 5 完成后视频已合并 + +--- + +## 7. 状态数据结构 + +```json +{ + "version": 1, + "project_name": "犏田倜校-03月18日", + "config": { + "video_src": "D:/path/to/video.mp4", + "output_dir": "D:/path/to/output", + "api_key": "xxx", + "api_host": "https://...", + "whisper_model": "large" + }, + "current_step": 3, + "clips": [ + { + "index": 1, + "title_original": "匹奏", + "title_corrected": "匹奏", + "title_user_modified": null, + "start": 412, + "end": 442, + "status": "completed", + "clip_path": "intermediates/clip1_fade.mp4", + "json_path": "intermediates/clip1.json" + } + ], + "step_status": { + "extracting": "completed", + "transcribing": "completed", + "title_correcting": "in_progress", + "generating_subtitles": "pending", + "merging": "pending", + "burning": "pending" + }, + "created_at": "2026-05-02T10:00:00", + "updated_at": "2026-05-02T10:30:00" +} +``` + +--- + +## 8. 蟹界情况 + +| 情况 | 倄理方匏 | +|------|----------| +| API Key 无效 | 星瀺错误提瀺䞍匀始倄理 | +| 视频文件䞍存圚 | 配眮检查时发现提瀺甚户 | +| 倄理到䞀半厩溃 | 状态已持久化重启后可恢倍 | +| 甚户取消倄理 | 保存圓前进床可选择重新匀始或继续 | +| Whisper 暡型未䞋蜜 | 星瀺䞋蜜铟接或䜿甚默讀暡型 | +| 蟓出目圕磁盘空闎䞍足 | 倄理前检查提瀺甚户 | + +--- + +## 9. 非功胜性需求 + +| 需求 | 标准 | +|------|------| +| 启劚时闎 | < 3 秒䜿甚 Nuitka 打包后 | +| UI 响应性 | 所有操䜜圚 100ms 内响应 | +| 内存占甚 | < 500MB䞍含视频倄理 | +| 打包后䜓积 | < 50MB | +| 兌容性 | Windows 10/11 64-bit | + +--- + +## 10. 后续步骀 + +1. **Phase 2**: 技术讟计 - 诊细架构讟计、数据库选型、接口定义 +2. **Phase 3**: 任务猖排 - 拆分并行任务、创建 git worktree +3. **Phase 4**: 并行匀发 - 各暡块实现 +4. **Phase 5**: 莚量亀付 - 审查、测试、打包 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5de28e1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,71 @@ +[project] +name = "piano-highlight-app" +version = "1.0.0" +description = "Piano Highlight Generator App - 自劚生成钢琎诟粟华宣䌠视频" +readme = "README.md" +requires-python = ">=3.12" +license = {text = "MIT"} +authors = [ + {name = "Piano Tools Team"} +] +dependencies = [ + "PySide6>=6.6.0", + "pyyaml>=6.0", + "requests>=2.31.0", + "pypinyin>=0.50.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-qt>=4.0.0", +] + +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] + +[tool.nuitka] +# Basic options +assume_yes_for_downloads = true +show_progress = true +verbose = false + +# Output configuration +output_dir = "dist" +output_name = "PianoHighlightGenerator" + +# Python version +python_version = "3.10" + +# Compilation mode +standalone = true +onefile = true + +# Plugin enablement +enable_plugin.pyside6 = true + +# Windows specific +[tool.nuitka.windows] +console = false # GUI app, no console window + +# Data files to include (relative to project root) +# Uncomment when prompts directory exists: +# include_data_files = [ +# { from = "src/core/prompts", to = "prompts", relative_to = "." } +# ] + +# Advanced options +[tool.nuitka.advanced] +# Treat sqlite as required (for state storage) +no_sqlite = false + +# Include optional dependencies for better compatibility +include_optional = true diff --git a/requirements-build.txt b/requirements-build.txt new file mode 100644 index 0000000..b23ed59 --- /dev/null +++ b/requirements-build.txt @@ -0,0 +1,5 @@ +# Build dependencies for Nuitka compilation +# Install with: pip install -r requirements-build.txt + +nuitka>=1.7.0 +pandas>=1.5.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f28f882 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +# Core dependencies +PySide6>=6.6.0 +pyyaml>=6.0 +requests>=2.31.0 +pypinyin>=0.50.0 + +# Optional: Transcription (required for full functionality) +# faster-whisper>=0.10.0 diff --git a/run_lesson1.bat b/run_lesson1.bat new file mode 100644 index 0000000..ee3ecc9 --- /dev/null +++ b/run_lesson1.bat @@ -0,0 +1,13 @@ +@echo off +chcp 65001 >nul +echo Cleaning pycache... +rmdir /s /q "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src\__pycache__" 2>nul +rmdir /s /q "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src\core\__pycache__" 2>nul +echo Cache cleaned. +echo. +echo Running CLI... +del "D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt" 2>nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\run_lesson1.py" +echo. +echo Exit: %errorlevel% +pause diff --git a/run_lesson1.py b/run_lesson1.py new file mode 100644 index 0000000..580b2f6 --- /dev/null +++ b/run_lesson1.py @@ -0,0 +1,42 @@ +import sys +import os +import subprocess + +VIDEO = r"D:\F\yc\诟皋䞊架\犏田商圈倜校\诟皋视频\盎播回攟-03月18日.mp4" +PPT = r"D:\F\yc\诟皋䞊架\犏田商圈倜校\诟皋视频\钢琎挔奏入闚第䞀诟.pptx" +OUTPUT = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full" +PYTHON = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" +CLI_DIR = r"D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\src" +API_KEY = "b0359bed-09f2-49e2-a53c-32ba057412e3" +API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3" +LOG_FILE = r"D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt" + +env = os.environ.copy() +env["PATH"] = r"D:\ProgramData\anaconda3\envs\py312_cuda;" + env.get("PATH", "") + +cmd = [ + PYTHON, + os.path.join(CLI_DIR, "cli.py"), + "--video", VIDEO, + "--ppt", PPT, + "--output", OUTPUT, + "--api-key", API_KEY, + "--api-host", API_HOST, + "--verbose" +] + +print("Starting CLI...") +print(f"Video: {VIDEO}") +print(f"PPT: {PPT}") +print(f"Log: {LOG_FILE}") + +proc = subprocess.Popen(cmd, cwd=CLI_DIR, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8', errors='replace') + +with open(LOG_FILE, 'w', encoding='utf-8') as log: + for line in proc.stdout: + log.write(line) + log.flush() + print(line, end='') + +proc.wait() +print(f"\nExit code: {proc.returncode}") diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..b59b12e --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# Piano Highlight Generator App \ No newline at end of file diff --git a/src/cli.py b/src/cli.py new file mode 100644 index 0000000..1837ecf --- /dev/null +++ b/src/cli.py @@ -0,0 +1,228 @@ +# -*- coding: utf-8 -*- +""" +CLI - 呜什行入口 + +甚法: + python cli.py --video video.mp4 --clips clips.yaml --output ./output + python cli.py --config config.yaml + python cli.py --video video.mp4 --ppt presentation.pptx --output ./output +""" + +import sys +import os +import argparse +import logging +from typing import Optional + +# Add src directory to path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +# Windows控制台UTF-8猖码讟眮必须圚logging.basicConfig之前吊则handler绑定旧stderr +if sys.platform == 'win32': + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') + + +# Setup logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +def parse_args(): + """解析呜什行参数""" + parser = argparse.ArgumentParser( + description='Piano Highlight Generator - CLI', + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + # 䞻芁蟓入暡匏 + parser.add_argument('--video', '-v', type=str, + help='视频文件路埄') + parser.add_argument('--clips', '-c', type=str, + help='clips配眮文件(YAML栌匏)') + parser.add_argument('--ppt', '-p', type=str, + help='PPT/PDF文件路埄 (甚于自劚生成clips)') + parser.add_argument('--output', '-o', type=str, default='./output', + help='蟓出目圕 (默讀: ./output)') + + # 完敎配眮暡匏 + parser.add_argument('--config', '-f', type=str, + help='完敎配眮文件路埄') + + # 可选参数 + parser.add_argument('--api-key', type=str, + help='LLM API密钥') + parser.add_argument('--api-host', type=str, + help='LLM API地址') + parser.add_argument('--whisper-model', type=str, default='large', + help='Whisper暡型 (默讀: large)') + parser.add_argument('--verbose', '-V', action='store_true', + help='诊细蟓出') + + return parser.parse_args() + + +def load_config_from_args(args) -> dict: + """从呜什行参数构建配眮""" + config = { + 'output_dir': args.output, + 'video_src': args.video, + 'clips': [], + 'api_key': args.api_key, + 'api_host': args.api_host, + 'whisper_model': args.whisper_model, + 'video_params': { + 'fade_duration': 1, + 'title_fontsize': 90, + 'title_color': 'FFFF00', + 'subtitle_fontsize': 24, + 'subtitle_color': 'FFFFFF', + } + } + + # 从YAML加蜜clips + if args.clips and os.path.exists(args.clips): + import yaml + with open(args.clips, 'r', encoding='utf-8') as f: + clips_config = yaml.safe_load(f) + if 'clips' in clips_config: + config['clips'] = clips_config['clips'] + if 'term_corrections' in clips_config: + config['term_corrections'] = clips_config['term_corrections'] + if 'video_params' in clips_config: + config['video_params'].update(clips_config['video_params']) + + # 从完敎配眮加蜜 + if args.config and os.path.exists(args.config): + import yaml + with open(args.config, 'r', encoding='utf-8') as f: + file_config = yaml.safe_load(f) + config.update(file_config) + + # 䌘先级: 呜什行参数 > 配眮文件 + if args.api_key: + config['api_key'] = args.api_key + if args.api_host: + config['api_host'] = args.api_host + if args.video: + config['video_src'] = args.video + if args.output: + config['output_dir'] = args.output + + return config + + +def generate_config_from_ppt(args) -> dict: + """从PPT自劚生成配眮""" + from core import parse_ppt_to_config + + def progress_callback(step, percent, message): + logger.info(f"[{step}] {percent}%: {message}") + + logger.info("=" * 50) + logger.info("从PPT自劚生成clips配眮...") + logger.info(f"视频: {args.video}") + logger.info(f"PPT: {args.ppt}") + logger.info(f"蟓出: {args.output}") + logger.info("=" * 50) + + config = parse_ppt_to_config( + video_path=args.video, + ppt_path=args.ppt, + output_dir=args.output, + progress_callback=progress_callback, + api_key=args.api_key, + api_host=args.api_host, + ) + + # 保存生成的配眮 + config_path = os.path.join(args.output, 'generated_config.yaml') + import yaml + with open(config_path, 'w', encoding='utf-8') as f: + yaml.dump(config, f, allow_unicode=True, default_flow_style=False) + logger.info(f"配眮已保存: {config_path}") + + return config + + +def main(): + """䞻入口""" + # 讟眮FFmpeg PATH䞍调甚init_environment以避免猖码问题 + import shutil + ffmpeg_path = shutil.which("ffmpeg") + if ffmpeg_path: + ffmpeg_bin = os.path.dirname(ffmpeg_path) + if ffmpeg_bin not in os.environ.get('PATH', ''): + os.environ['PATH'] = ffmpeg_bin + os.pathsep + os.environ.get('PATH', '') + + args = parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + # 没有参数时星瀺垮助 + if len(sys.argv) == 1: + print("Piano Highlight Generator - CLI") + print("=" * 50) + print("甚法:") + print(" python cli.py --video video.mp4 --clips clips.yaml --output ./output") + print(" python cli.py --config config.yaml") + print(" python cli.py --video video.mp4 --ppt presentation.pptx --output ./output") + print() + print("完敎垮助: python cli.py --help") + return 0 + + try: + # 劂果指定了--ppt自劚从PPT生成clips配眮 + if args.ppt and args.video: + config = generate_config_from_ppt(args) + else: + # 构建配眮 + config = load_config_from_args(args) + + # 验证必芁参数 + if not config.get('video_src'): + logger.error("错误: 必须指定视频文件 (--video)") + return 1 + + if not config.get('clips'): + logger.error("错误: 必须指定clips配眮 (--clips 或 --config)") + logger.error("或䜿甚 --ppt 自劚从PPT生成clips") + return 1 + + logger.info("=" * 50) + logger.info("Piano Highlight Generator - CLI") + logger.info("=" * 50) + logger.info(f"视频: {config.get('video_src')}") + logger.info(f"片段数: {len(config.get('clips', []))}") + logger.info(f"蟓出: {config.get('output_dir')}") + logger.info("=" * 50) + + # 创建并运行Pipeline + from core import Pipeline + + pipeline = Pipeline(config) + + logger.info("匀始倄理...") + final_path = pipeline.run() + + logger.info("=" * 50) + logger.info(f"完成! 最终视频: {final_path}") + logger.info("=" * 50) + + return 0 + + except KeyboardInterrupt: + logger.info("甚户䞭断") + return 130 + except Exception as e: + logger.exception(f"倄理倱莥: {e}") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/src/core/__init__.py b/src/core/__init__.py new file mode 100644 index 0000000..44da258 --- /dev/null +++ b/src/core/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +""" +Core modules for Piano Highlight Generator App + +Exports video processing, subtitle processing, LLM client, and utilities +""" + +from .video import extract_clip, merge_clips, burn_subtitles, burn_dual_subtitles, VideoPipeline +from .subtitle import SubtitleTrack, SubtitlePipeline +from .llm import LLMClient +from .corrections import apply_all_corrections, load_term_corrections_from_config +from .pipeline import Pipeline, create_pipeline_from_yaml +from .ppt_parser import PPTParser, parse_ppt_to_config +from .utils import run_cmd, to_srt_time, ensure_dir +from .constants import FFMPEG_CMD, FFPROBE_CMD, DEFAULT_API_HOST, DEFAULT_OUTPUT_DIR, WHISPER_MODEL_PATH +from .errors import ( + PianoAppError, APIError, APIKeyError, APIRateLimitError, APITimeoutError, + FileError, FileNotFoundError, FilePermissionError, DiskSpaceError, + VideoError, VideoNotFoundError, VideoCodecError, FFmpegNotFoundError, SubtitleBurnError, + ErrorHandler, get_error_handler, handle_error, get_user_message, USER_MESSAGES +) \ No newline at end of file diff --git a/src/core/constants.py b/src/core/constants.py new file mode 100644 index 0000000..edb2269 --- /dev/null +++ b/src/core/constants.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +""" +环境配眮和垞量定义 + +所有硬猖码路埄和配眮集䞭管理确保可绎技性 +""" + +import os +import sys +import shutil + +# ============== 项目根目圕 ============== +def _get_project_root(): + """获取项目根目圕""" + # Assume this file is in src/core/, so project root is 2 levels up + return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +PROJECT_ROOT = _get_project_root() + +# ============== FFmpeg路埄劚态检测============== +def _find_ffmpeg(): + """劚态查扟FFmpeg路埄""" + # 1. Check local ffmpeg folder (for portable distribution) + # Direct: ffmpeg/bin/ffmpeg.exe + local_ffmpeg = os.path.join(PROJECT_ROOT, "ffmpeg", "bin") + if os.path.exists(os.path.join(local_ffmpeg, "ffmpeg.exe")): + return local_ffmpeg + + # Nested: ffmpeg/ffmpeg-8.1-full_build/bin/ffmpeg.exe + ffmpeg_base = os.path.join(PROJECT_ROOT, "ffmpeg") + if os.path.isdir(ffmpeg_base): + for subdir in os.listdir(ffmpeg_base): + nested_path = os.path.join(ffmpeg_base, subdir, "bin") + if os.path.exists(os.path.join(nested_path, "ffmpeg.exe")): + return nested_path + + # 2. Check system PATH + system_ffmpeg = shutil.which("ffmpeg") + if system_ffmpeg: + return os.path.dirname(system_ffmpeg) + + # 3. Return empty - will fail later with clear error + return "" + +FFMPEG_BIN = _find_ffmpeg() +FFMPEG_CMD = os.path.join(FFMPEG_BIN, "ffmpeg.exe") if FFMPEG_BIN else "ffmpeg" +FFPROBE_CMD = os.path.join(FFMPEG_BIN, "ffprobe.exe") if FFMPEG_BIN else "ffprobe" + +# ============== Whisper暡型劚态检测============== +def _find_whisper_model(): + """查扟Whisper暡型路埄""" + # 1. Check local whisper_models folder + local_models = os.path.join(PROJECT_ROOT, "whisper_models") + if os.path.exists(local_models): + # Find any model folder + for name in os.listdir(local_models): + model_path = os.path.join(local_models, name) + if os.path.isdir(model_path): + return model_path.replace('\\', '/') + + # 2. Check environment variable + env_path = os.environ.get("WHISPER_MODEL_PATH") + if env_path and os.path.exists(env_path): + return env_path.replace('\\', '/') + + # 3. Default path (for this user's environment) + default_path = r"D:/AI/LM-Models/faster-whisper/large-v3" + if os.path.exists(default_path): + return default_path.replace('\\', '/') + + return "" + +WHISPER_MODEL_PATH = _find_whisper_model() + +# ============== 蟓出目圕默讀倌 ============== +DEFAULT_OUTPUT_DIR = os.path.join(PROJECT_ROOT, "output") + +# ============== 视频参数默讀倌 ============== +DEFAULT_VIDEO_PARAMS = { + "fade_duration": 1, + "title_duration": 3, + "title_fontsize": 90, + "title_color": "FFFF00", + "subtitle_fontsize": 24, + "subtitle_color": "FFFFFF", + "use_fast_whisper": True, + "whisper_model": "large", +} + +# ============== API配眮 ============== +DEFAULT_API_HOST = "https://ark.cn-beijing.volces.com/api/coding/v3" +API_KEY_ENVS = ["VOLCENGINE_API_KEY", "MINIMAX_API_KEY"] + +# ============== LLM配眮 ============== +LLM_MODEL = "doubao-seed-2.0-lite" +LLM_TIMEOUT = 60 +LLM_MAX_RETRIES = 3 +LLM_TITLE_TIMEOUT = 60 +LLM_VALIDATE_TIMEOUT = 30 + +# ============== 字幕样匏 ============== +SUBTITLE_STYLE = "FontName=埮蜯雅黑,FontSize=24,PrimaryColour=&H00FFFFFF" + +# ============== Windows控制台猖码 ============== +def setup_windows_encoding(): + """Windows控制台UTF-8猖码讟眮""" + if sys.platform == 'win32': + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') + +# ============== FFmpeg PATH讟眮 ============== +def setup_ffmpeg_path(): + """将FFmpeg添加到PATH仅圓䜿甚本地FFmpeg时""" + if FFMPEG_BIN and FFMPEG_BIN not in os.environ.get('PATH', ''): + os.environ['PATH'] = FFMPEG_BIN + os.pathsep + os.environ.get('PATH', '') + +# ============== 环境初始化 ============== +def init_environment(): + """初始化环境讟眮""" + setup_windows_encoding() + setup_ffmpeg_path() + +def get_api_key(): + """获取API密钥""" + for env_name in API_KEY_ENVS: + key = os.environ.get(env_name) + if key: + return key + return None \ No newline at end of file diff --git a/src/core/corrections.py b/src/core/corrections.py new file mode 100644 index 0000000..a7374a3 --- /dev/null +++ b/src/core/corrections.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +""" +字幕纠错暡块 + +包含术语纠正、匂垞检测、䞊䞋文纠错等功胜 +""" + +import re +from pypinyin import pinyin, Style + +# ============== 盎接替换词兞 ============== +# 栌匏: "错误词": "正确词" +DIRECT_FIXES = { + "副点": "附点", + "拍苻": "拍笊", + "挔音": "延音", + "调苻": "调号", + "谱苻": "谱号", + "莟点": "附点", + "阅历": "乐理", + "音苻": "音笊", + "銖䜍": "手䜍", + "黑剑": "黑键", + # 新增从lesson1经验 + "非联奏": "非连奏", + "任谱": "讀谱", + "实谱": "识谱", + "任音": "讀音", + "䌠人": "唱人", + "修纞笊": "䌑止笊", + "修纞": "䌑止", + "修纞敎小节": "䌑止敎小节", +} + +# ============== 音笊名称纠错 ============== +SONG_NAME_FIXES = { + "Doramifasalasi": "Do Re Mi Fa So La Si", + "刀": "do", + "锐": "re", + "咪": "mi", + "发": "fa", + "å—Š": "so", + "啊": "la", + "西": "si", +} + +# ============== 匂垞词检测 ============== +ANOMALY_WORDS = [ + "矞耻", "䌑息", # 可胜是"䌑止" + "实莚", "时倌", # 时倌盞关 +] + +# ============== 音乐术语 ============== +MUSIC_TERMS = [ + "音笊", "䌑止笊", "拍子", "节拍", "节奏", + "党分", "二分", "四分", "八分", "十六分", "䞉十二分", + "附点", "调号", "谱号", "音名", "唱名", + "手型", "手䜍", "支撑", "攟束", + "匹奏", "非连奏", "跳奏", "连奏", +] + +# ============== 匂垞暡匏 ============== +ANOMALY_PATTERNS = [ + (r'矞耻', '䌑止'), + (r'实莚音笊', '时倌音笊'), + (r'实莚', '时倌'), +] + + +def apply_term_corrections(text, corrections=None): + """ + 应甚术语纠正 + + Args: + text: 原始文本 + corrections: 额倖的纠正词兞 + + Returns: + 纠正后的文本 + """ + if not text: + return text + + # 合并纠正词兞 + all_fixes = dict(DIRECT_FIXES) + if corrections: + all_fixes.update(corrections) + + # 先倄理长词再倄理短词避免郚分替换 + sorted_fixes = sorted(all_fixes.items(), key=lambda x: len(x[0]), reverse=True) + + for wrong, correct in sorted_fixes: + if wrong in text: + text = text.replace(wrong, correct) + + return text + + +def apply_song_name_fixes(text): + """应甚音笊名称纠错""" + for wrong, correct in SONG_NAME_FIXES.items(): + if wrong in text: + text = text.replace(wrong, correct) + return text + + +def apply_anomaly_fixes(text): + """应甚匂垞暡匏纠错""" + for pattern, replacement in ANOMALY_PATTERNS: + text = re.sub(pattern, replacement, text) + return text + + +def apply_all_corrections(text, extra_corrections=None): + """ + 应甚所有纠错规则 + + Args: + text: 原始文本 + extra_corrections: 额倖的纠正词兞来自config + + Returns: + 纠错后的文本 + """ + text = apply_term_corrections(text, extra_corrections) + text = apply_song_name_fixes(text) + text = apply_anomaly_fixes(text) + return text + + +def detect_anomalies(text, knowledge_terms=None): + """ + 检测文本䞭的匂垞 + + Args: + text: 文本 + knowledge_terms: 知识点列衚 + + Returns: + 匂垞词列衚 + """ + anomalies = [] + + # 检查匂垞词 + for word in ANOMALY_WORDS: + if word in text: + anomalies.append(word) + + # 检查是吊包含知识术语 + if knowledge_terms: + text_lower = text.lower() + has_knowledge = any(term.lower() in text_lower for term in knowledge_terms) + if not has_knowledge and len(text) > 10: + anomalies.append("NO_KNOWLEDGE_TERM") + + return anomalies + + +def get_pinyin(text): + """获取文本的拌音""" + try: + return ' '.join([p[0] for p in pinyin(text, style=Style.TONE3)]) + except: + return text + + +def pinyin_similarity(word1, word2): + """ + 计算䞀䞪词的拌音盞䌌床 + + Args: + word1: 词1 + word2: 词2 + + Returns: + 盞䌌床分数 (0-1) + """ + p1 = get_pinyin(word1) + p2 = get_pinyin(word2) + + if p1 == p2: + return 1.0 + + # 简单盞䌌床计算 + common = sum(1 for c1, c2 in zip(p1, p2) if c1 == c2) + max_len = max(len(p1), len(p2)) + + return common / max_len if max_len > 0 else 0 + + +def ai_context_correct(text, clip_title="", all_clips=None): + """ + 䞊䞋文感知纠错基于拌音盞䌌床 + + Args: + text: 文本 + clip_title: 片段标题 + all_clips: 所有片段信息 + + Returns: + 纠错后的文本 + """ + # 劂果文本包含匂垞词尝试修倍 + for wrong, correct in DIRECT_FIXES.items(): + if wrong in text: + # 检查拌音盞䌌床 + similarity = pinyin_similarity(wrong, correct) + if similarity > 0.5: + text = text.replace(wrong, correct) + + return text + + +def load_term_corrections_from_config(config): + """ + 从配眮加蜜术语纠正词兞 + + Args: + config: 配眮字兞 + + Returns: + 术语纠正词兞 + """ + term_corrections = config.get('term_corrections', {}) + + # 确保基本纠正规则存圚 + defaults = { + "副点": "附点", + "实莚": "时倌", + } + + defaults.update(term_corrections) + return defaults \ No newline at end of file diff --git a/src/core/errors.py b/src/core/errors.py new file mode 100644 index 0000000..71c75d0 --- /dev/null +++ b/src/core/errors.py @@ -0,0 +1,384 @@ +# -*- coding: utf-8 -*- +"""Comprehensive error handling system for the piano highlight pipeline.""" + +import logging +import time +from typing import Callable, Optional, Any + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Exception Hierarchy +# ============================================================================= + +class PianoAppError(Exception): + """Base exception for all app errors""" + + def __init__(self, message: str, recoverable: bool = True): + self.message = message + self.recoverable = recoverable # If True, pipeline can continue after handling + super().__init__(self.message) + + def __str__(self): + return self.message + + +# ----------------------------------------------------------------------------- +# API Errors (recoverable - can retry) +# ----------------------------------------------------------------------------- + +class APIError(PianoAppError): + """API related errors""" + + def __init__(self, message: str, recoverable: bool = True): + super().__init__(message, recoverable=recoverable) + + +class APIKeyError(APIError): + """Invalid or expired API key - Non-recoverable""" + + def __init__(self, message: str = "API密钥无效或已过期"): + super().__init__(message, recoverable=False) + + +class APIRateLimitError(APIError): + """API rate limit exceeded - Recoverable with delay""" + + def __init__(self, message: str = "API调甚频率超限", retry_after: float = 60.0): + super().__init__(message, recoverable=True) + self.retry_after = retry_after + + +class APITimeoutError(APIError): + """API request timeout - Recoverable""" + + def __init__(self, message: str = "API请求超时"): + super().__init__(message, recoverable=True) + + +# ----------------------------------------------------------------------------- +# File Errors (recoverable depending on type) +# ----------------------------------------------------------------------------- + +class FileError(PianoAppError): + """File related errors""" + + def __init__(self, message: str, recoverable: bool = True): + super().__init__(message, recoverable=recoverable) + + +class FileNotFoundError(FileError): + """File not found - Non-recoverable""" + + def __init__(self, message: str = "文件䞍存圚"): + super().__init__(message, recoverable=False) + + +class FilePermissionError(FileError): + """Permission denied - Non-recoverable""" + + def __init__(self, message: str = "文件权限䞍足"): + super().__init__(message, recoverable=False) + + +class DiskSpaceError(FileError): + """Disk space insufficient - Recoverable after cleanup""" + + def __init__(self, message: str = "磁盘空闎䞍足"): + super().__init__(message, recoverable=True) + + +# ----------------------------------------------------------------------------- +# Video Processing Errors +# ----------------------------------------------------------------------------- + +class VideoError(PianoAppError): + """Video processing errors""" + + def __init__(self, message: str, recoverable: bool = True): + super().__init__(message, recoverable=recoverable) + + +class VideoNotFoundError(VideoError): + """Video file not found - Non-recoverable""" + + def __init__(self, message: str = "视频文件䞍存圚"): + super().__init__(message, recoverable=False) + + +class VideoCodecError(VideoError): + """Video codec error - May be recoverable""" + + def __init__(self, message: str = "视频猖码错误"): + super().__init__(message, recoverable=False) + + +class FFmpegNotFoundError(VideoError): + """FFmpeg not found - Non-recoverable""" + + def __init__(self, message: str = "未扟到FFmpeg请确保已正确安装并配眮PATH环境变量"): + super().__init__(message, recoverable=False) + + +class SubtitleBurnError(VideoError): + """Subtitle burn failed - May be recoverable with different settings""" + + def __init__(self, message: str = "字幕烧圕倱莥"): + super().__init__(message, recoverable=True) + + +# ============================================================================= +# User-Facing Error Messages +# ============================================================================= + +USER_MESSAGES = { + "api_key_invalid": "API密钥无效或已过期请圚讟眮䞭曎新API密钥。", + "api_rate_limit": "API调甚频率超限请圚{}秒后重试。", + "api_timeout": "API请求超时请检查眑络连接后重试。", + "api_error": "API调甚倱莥{}", + "ffmpeg_not_found": "未扟到FFmpeg请确保已正确安装并配眮PATH环境变量。", + "ffmpeg_error": "FFmpeg执行错误{}", + "video_not_found": "视频文件䞍存圚{}", + "video_corrupt": "视频文件损坏或栌匏䞍支持。", + "video_codec": "视频猖码错误䞍支持圓前栌匏。", + "file_not_found": "文件䞍存圚{}", + "file_permission": "文件权限䞍足无法访问{}", + "disk_space": "磁盘空闎䞍足请枅理后重试。", + "subtitle_burn": "字幕烧圕倱莥{}", + "transcription_failed": "语音蜬圕倱莥{}", + "title_correction_failed": "标题纠正倱莥{}", + "merge_failed": "视频合并倱莥{}", + "clip_extract_failed": "片段提取倱莥{}", + "unknown_error": "发生未知错误{}", +} + + +def get_user_message(error_key: str, *args) -> str: + """ + Get user-friendly error message. + + Args: + error_key: Key in USER_MESSAGES dict + *args: Format arguments for the message + + Returns: + Formatted user message in Chinese + """ + if error_key in USER_MESSAGES: + msg = USER_MESSAGES[error_key] + if args: + return msg.format(*args) + return msg + return USER_MESSAGES["unknown_error"].format(error_key) + + +# ============================================================================= +# Error Handler Class +# ============================================================================= + +class ErrorHandler: + """ + Centralized error handling with retry logic. + + Provides consistent error handling across the pipeline with: + - Retry logic for recoverable errors + - User-friendly error messages + - Recovery action callbacks + """ + + def __init__(self, max_retries: int = 3, base_delay: float = 1.0): + """ + Initialize ErrorHandler. + + Args: + max_retries: Maximum number of retry attempts for recoverable errors + base_delay: Base delay in seconds between retries (doubles each retry) + """ + self.max_retries = max_retries + self.base_delay = base_delay + self._retry_counts = {} # Track retries per context + + def handle( + self, + error: Exception, + context: str = "", + show_dialog_callback: Optional[Callable[[str, str, bool], str]] = None + ) -> bool: + """ + Handle an error with appropriate recovery strategy. + + Args: + error: The exception that occurred + context: Description of where the error occurred + show_dialog_callback: Optional callback to show dialog to user. + Signature: callback(title, message, recoverable) -> action ('retry', 'skip', 'cancel') + + Returns: + True if error was handled and recovery is possible + False if error is non-recoverable or user chose to cancel + """ + # Log the error + error_type = type(error).__name__ + error_msg = str(error) + full_context = f"{context}: {error_msg}" if context else error_msg + logger.error(f"[{error_type}] {full_context}") + + # Determine if error is recoverable + if isinstance(error, PianoAppError): + recoverable = error.recoverable + else: + # Default for unknown exceptions - assume recoverable if it's a common transient error + recoverable = True + + # Get user-friendly message + user_msg = self._get_error_key(error) + if context: + user_msg = f"{context}: {user_msg}" + + # If we have a dialog callback, let user choose the action + if show_dialog_callback: + action = show_dialog_callback(error_type, user_msg, recoverable) + if action == 'retry': + return True + elif action == 'skip': + return True + else: # cancel + return False + + # Default behavior: return whether error is recoverable + return recoverable + + def _get_error_key(self, error: Exception) -> str: + """Map exception to user message key.""" + error_type = type(error).__name__ + + key_mapping = { + APIKeyError: "api_key_invalid", + APIRateLimitError: "api_rate_limit", + APITimeoutError: "api_timeout", + FFmpegNotFoundError: "ffmpeg_not_found", + FileNotFoundError: "file_not_found", + FilePermissionError: "file_permission", + DiskSpaceError: "disk_space", + VideoNotFoundError: "video_not_found", + VideoCodecError: "video_codec", + SubtitleBurnError: "subtitle_burn", + } + + if type(error) in key_mapping: + return get_user_message(key_mapping[type(error)]) + elif isinstance(error, APIError): + return get_user_message("api_error", str(error)) + elif isinstance(error, FileError): + return get_user_message("file_not_found", str(error)) + elif isinstance(error, VideoError): + return get_user_message("video_corrupt") + else: + return get_user_message("unknown_error", str(error)) + + def with_retry( + self, + func: Callable[..., Any], + *args, + context: str = "", + show_dialog_callback: Optional[Callable[[str, str, bool], str]] = None, + **kwargs + ) -> Any: + """ + Execute function with retry logic for recoverable errors. + + Args: + func: Function to execute + *args: Positional arguments for func + context: Description for error messages + show_dialog_callback: Optional dialog callback + **kwargs: Keyword arguments for func + + Returns: + Return value of func if successful + + Raises: + The original exception if all retries fail + """ + retry_key = context or str(func) + self._retry_counts[retry_key] = 0 + + last_error = None + delay = self.base_delay + + while self._retry_counts[retry_key] <= self.max_retries: + try: + result = func(*args, **kwargs) + # Success - reset retry count + if retry_key in self._retry_counts: + del self._retry_counts[retry_key] + return result + + except Exception as e: + last_error = e + self._retry_counts[retry_key] += 1 + + # Check if error is recoverable + if isinstance(e, PianoAppError) and not e.recoverable: + logger.error(f"Non-recoverable error in {context}: {e}") + raise + + # Check if we have retries left + if self._retry_counts[retry_key] <= self.max_retries: + retry_msg = f"Retry {self._retry_counts[retry_key]}/{self.max_retries} after {delay}s" + logger.warning(f"{context}: {e}. {retry_msg}") + + # Handle rate limiting specially + if isinstance(e, APIRateLimitError) and e.retry_after: + delay = max(delay, e.retry_after) + + # Wait before retry + time.sleep(delay) + delay *= 2 # Exponential backoff + else: + logger.error(f"Max retries exceeded for {context}") + break + + # All retries exhausted + if last_error: + raise last_error + + def reset_retry_count(self, context: str = ""): + """Reset retry count for a specific context.""" + if context in self._retry_counts: + del self._retry_counts[context] + + +# ============================================================================= +# Global Error Handler Instance +# ============================================================================= + +_default_error_handler: Optional[ErrorHandler] = None + + +def get_error_handler() -> ErrorHandler: + """Get or create the global error handler instance.""" + global _default_error_handler + if _default_error_handler is None: + _default_error_handler = ErrorHandler() + return _default_error_handler + + +def handle_error( + error: Exception, + context: str = "", + show_dialog_callback: Optional[Callable[[str, str, bool], str]] = None +) -> bool: + """ + Convenience function to handle an error using the global error handler. + + Args: + error: The exception that occurred + context: Description of where the error occurred + show_dialog_callback: Optional callback to show dialog to user + + Returns: + True if error was handled and recovery is possible + """ + return get_error_handler().handle(error, context, show_dialog_callback) diff --git a/src/core/llm.py b/src/core/llm.py new file mode 100644 index 0000000..e4da199 --- /dev/null +++ b/src/core/llm.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +""" +LLM调甚封装 + +统䞀管理火山方舟API调甚包含重试和错误倄理 +""" + +import os +import time +import logging +from .constants import ( + DEFAULT_API_HOST, LLM_MODEL, LLM_TIMEOUT, + LLM_MAX_RETRIES, LLM_TITLE_TIMEOUT, LLM_VALIDATE_TIMEOUT, + get_api_key +) + +logger = logging.getLogger(__name__) + +import requests + + +class LLMClient: + """LLM客户端封装""" + + def __init__(self, api_key=None, api_host=None): + # 䌘先䜿甚䌠入的参数其次䜿甚环境变量 + self.api_key = api_key or get_api_key() + self.api_host = api_host or DEFAULT_API_HOST + if not self.api_key: + logger.warning("No API key configured - LLM calls will be skipped") + + def chat(self, prompt, max_tokens=500, timeout=LLM_TIMEOUT): + """ + 发送聊倩请求到LLM + + Args: + prompt: 提瀺词 + max_tokens: 最倧token数 + timeout: è¶…æ—¶æ—¶é—Ž + + Returns: + LLM回倍文本倱莥返回None + """ + if not self.api_key: + logger.info("LLM: No API key, skipping") + return None + + url = f"{self.api_host}/chat/completions" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + payload = { + "model": LLM_MODEL, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": max_tokens + } + + for attempt in range(LLM_MAX_RETRIES): + try: + response = requests.post(url, headers=headers, json=payload, timeout=timeout) + # 401错误立即停止䞍重试 + if response.status_code == 401: + logger.error(f"LLM: 401 Unauthorized - API key invalid, stopping immediately") + return None + response.raise_for_status() + result = response.json() + + choices = result.get("choices", []) + if not choices: + logger.warning(f"LLM: No choices in response (attempt {attempt+1})") + continue + + content = choices[0].get("message", {}).get("content", "").strip() + if content: + return content + + logger.warning(f"LLM: Empty content (attempt {attempt+1})") + + except requests.exceptions.Timeout: + logger.warning(f"LLM: Timeout (attempt {attempt+1}/{LLM_MAX_RETRIES})") + if attempt < LLM_MAX_RETRIES - 1: + time.sleep(1) + except Exception as e: + logger.error(f"LLM: Error - {e}") + if attempt < LLM_MAX_RETRIES - 1: + time.sleep(1) + + return None + + def correct_title(self, transcript_text, original_title, all_titles=None): + """ + 䜿甚LLM纠正标题 + + Args: + transcript_text: 字幕文本 + original_title: 原始标题 + all_titles: 所有标题列衚 + + Returns: + 纠正后的标题 + """ + titles_str = ", ".join(all_titles[:20]) if all_titles else "无" + + prompt = f"""䜠是䞀䞪钢琎教孊视频的标题验证䞓家。 + +PPT提取的标题{original_title} + +视频字幕内容{transcript_text[:500] if transcript_text else "无"} + +本节诟所有标题{titles_str} + +【重芁规则】 +- 只有圓䜠有90%以䞊把握讀䞺原标题错误时才蟓出纠正后的标题 +- 劂果原标题基本正确即䜿䞍完矎也必须蟓出原标题 +- 绝对䞍胜蟓出䞎原标题完党䞍同抂念的词 +- 劂果䞍确定蟓出原标题 + +请盎接蟓出标题䞍芁添加任䜕解释。""" + + result = self.chat(prompt, max_tokens=50, timeout=LLM_TITLE_TIMEOUT) + return result if result else original_title + + def validate_content(self, transcript_text, title): + """ + 䜿甚LLM验证内容是吊䞎标题盞关 + + Args: + transcript_text: 字幕文本 + title: 标题 + + Returns: + (is_valid: bool, reason: str) + """ + prompt = f"""刀断视频字幕内容是吊䞎标题盞关。 + +标题{title} + +字幕内容{transcript_text[:300] if transcript_text else "无"} + +刀断标准 +- 内容讚论的䞻题䞎标题抂念盞关 = 盞关 +- 内容䞎标题无关劂广告、闲聊、无关话题= 无关 +- 无法刀断 = 䞍确定 + +请盎接蟓出盞关/无关/䞍确定""" + + result = self.chat(prompt, max_tokens=20, timeout=LLM_VALIDATE_TIMEOUT) + if not result: + return True, "error" + + if "无关" in result: + return False, result + elif "䞍确定" in result: + return True, "uncertain" + return True, result + + def full_text_correction(self, text, clip_title, knowledge_terms=None): + """ + 䜿甚LLM进行党文字幕纠错 + + Args: + text: 原始字幕 + clip_title: 片段标题 + knowledge_terms: 知识点列衚 + + Returns: + 纠错后的字幕 + """ + knowledge_str = ", ".join(knowledge_terms[:20]) if knowledge_terms else "无" + + prompt = f"""䜠是䞀䞪钢琎教孊视频的字幕纠错䞓家。 + +原始字幕{text} + +本节诟片段标题{clip_title} +本节诟知识点{knowledge_str} + +请进行字幕纠错 +1. 修倍语音识别错误劂"矞耻"→"䌑止""副点"→"附点""莟点"→"附点" +2. 修倍同音字错误 +3. 保留原文的䞓䞚术语和衚蟟方匏 +4. 䞍芁改变原文的语气和意思 + +请盎接蟓出纠错后的字幕䞍芁添加任䜕解释。""" + + result = self.chat(prompt, max_tokens=500, timeout=LLM_TIMEOUT) + return result if result else text + + +# å…šå±€LLM客户端实䟋 +_llm_client = None + + +def get_llm_client(): + """获取LLM客户端单䟋""" + global _llm_client + if _llm_client is None: + _llm_client = LLMClient() + return _llm_client \ No newline at end of file diff --git a/src/core/pipeline.py b/src/core/pipeline.py new file mode 100644 index 0000000..8274689 --- /dev/null +++ b/src/core/pipeline.py @@ -0,0 +1,518 @@ +# -*- coding: utf-8 -*- +""" +Pipeline - 栞心䞚务逻蟑 + +统䞀管理从视频提取到最终蟓出的完敎流皋 +UI和CLI共甚同䞀套逻蟑 +""" + +import os +import json +import logging +from typing import Callable, Optional, List, Dict, Any + +from .video import extract_clip, merge_clips, burn_dual_subtitles +from .subtitle import SubtitlePipeline +from .llm import LLMClient +from .corrections import apply_all_corrections, load_term_corrections_from_config +from .utils import ensure_dir + +logger = logging.getLogger(__name__) + + +class Pipeline: + """ + 粟华视频生成流氎线 + + 䜿甚方法: + # CLI暡匏 + pipeline = Pipeline(config) + pipeline.run() + + # UI暡匏 (垊回调) + pipeline = Pipeline(config, progress_callback=my_callback) + pipeline.run() + """ + + def __init__( + self, + config: dict, + progress_callback: Optional[Callable[[str, int, str], None]] = None, + step_callback: Optional[Callable[[str], None]] = None, + ): + """ + 初始化流氎线 + + Args: + config: 配眮字兞包含: + - video_src: 视频路埄 + - clips: [{title, start, end}, ...] + - output_dir: 蟓出目圕 + - api_key: LLM API密钥 + - api_host: LLM API地址 + - whisper_model_path: Whisper暡型路埄 + - term_corrections: 术语纠正字兞 + - video_params: 视频参数 + progress_callback: 进床回调 (step, percent, message) + step_callback: 步骀匀始/完成回调 (step_name) + """ + self.config = config + self.progress_callback = progress_callback if progress_callback else (lambda s, p, m: logger.info(f"[{s}] {p}%: {m}")) + self.step_callback = step_callback if step_callback else (lambda s: None) + + # 路埄 + self.output_dir = config.get('output_dir', './output') + self.inter_dir = ensure_dir(os.path.join(self.output_dir, 'intermediates')) + self.subs_dir = ensure_dir(os.path.join(self.output_dir, 'subs')) + + # 配眮 + self.clips = config.get('clips', []) + self.video_src = config.get('video_src') + self.video_params = config.get('video_params', {}) + self.fade_duration = self.video_params.get('fade_duration', 1) + + # LLM客户端 (延迟初始化) + self._llm_client = None + + # 字幕倄理 + self._subtitle_pipeline = None + + # 术语纠正 + self.term_corrections = load_term_corrections_from_config(config) + + @property + def llm_client(self) -> LLMClient: + if self._llm_client is None: + self._llm_client = LLMClient( + api_key=self.config.get('api_key'), + api_host=self.config.get('api_host') + ) + return self._llm_client + + @property + def subtitle_pipeline(self) -> SubtitlePipeline: + if self._subtitle_pipeline is None: + self._subtitle_pipeline = SubtitlePipeline(self.config, self.output_dir) + return self._subtitle_pipeline + + # ==================== 步骀方法 ==================== + + def step_extract(self) -> List[str]: + """ + Step 1: 提取视频片段 + + Returns: + clip_paths: 提取的片段路埄列衚 + """ + self.step_callback('extracting') + self.progress_callback('extracting', 0, "匀始提取片段...") + + if not self.clips: + raise ValueError("No clips configured") + if not self.video_src or not os.path.exists(self.video_src): + raise ValueError(f"Video file not found: {self.video_src}") + + clip_paths = [] + total = len(self.clips) + + for i, clip in enumerate(self.clips, 1): + clip_path = os.path.join(self.inter_dir, f"clip{i}.mp4") + fade_path = os.path.join(self.inter_dir, f"clip{i}_fade.mp4") + + # 提取片段 + success = extract_clip( + self.video_src, + clip['start'], + clip['end'], + clip_path, + fade_duration=0 # 先䞍添加淡出 + ) + + if not success: + logger.warning(f"Failed to extract clip {i}") + continue + + # 劂果需芁淡入淡出 + if self.fade_duration > 0: + duration = clip['end'] - clip['start'] + fade_out_start = max(0, duration - self.fade_duration) + + from .constants import FFMPEG_CMD + from .utils import run_cmd + + cmd = f'"{FFMPEG_CMD}" -y -i "{clip_path}" ' + cmd += f'-vf "fade=t=in:st=0:d={self.fade_duration},fade=t=out:st={fade_out_start}:d={self.fade_duration}" ' + cmd += f'-c:v libx264 -crf 20 -c:a aac -y "{fade_path}"' + + if run_cmd(cmd): + clip_paths.append(fade_path) + else: + clip_paths.append(clip_path) + else: + clip_paths.append(clip_path) + + percent = int((i / total) * 100) + self.progress_callback('extracting', percent, f"提取片段 {i}/{total}") + + self.progress_callback('extracting', 100, f"提取完成共 {len(clip_paths)} 䞪片段") + self.step_callback('extracting') + return clip_paths + + def step_transcribe(self, clip_paths: List[str]) -> List[str]: + """ + Step 2: 蜬圕片段 + + Args: + clip_paths: 片段路埄列衚 + + Returns: + json_paths: JSON蜬圕文件路埄列衚 + """ + self.step_callback('transcribing') + self.progress_callback('transcribing', 0, "匀始蜬圕...") + + # 延迟富入避免没有faster-whisper时无法import + try: + from faster_whisper import WhisperModel + except ImportError: + logger.warning("faster-whisper not available, skipping transcription") + self.progress_callback('transcribing', 100, "faster-whisper未安装跳过蜬圕") + self.step_callback('transcribing') + return [] + + model_path = self.config.get('whisper_model_path') + model_name = self.config.get('whisper_model', 'large') + + # 加蜜暡型 + self.progress_callback('transcribing', 5, "加蜜Whisper暡型...") + model = WhisperModel(model_path or model_name, compute_type="float16") + + # 通过YAML配眮hash检测配眮是吊改变劂果改变则删陀所有旧JSON + import hashlib + config_str = str([(c['start'], c['end'], c.get('title', '')) for c in self.clips]) + config_hash = hashlib.md5(config_str.encode()).hexdigest() + hash_file = os.path.join(self.inter_dir, '.config_hash') + old_hash = None + if os.path.exists(hash_file): + with open(hash_file, 'r') as f: + old_hash = f.read().strip() + if old_hash != config_hash: + # 配眮变了删陀所有旧JSON + for f in os.listdir(self.inter_dir): + if f.startswith('clip') and f.endswith('.json'): + os.remove(os.path.join(self.inter_dir, f)) + logger.info(f"枅理旧JSON: {f} (配眮已改变)") + with open(hash_file, 'w') as f: + f.write(config_hash) + logger.info("配眮已曎新枅陀所有旧JSON重新蜬圕") + + json_paths = [] + total = len(clip_paths) + + for i, clip_path in enumerate(clip_paths, 1): + json_path = os.path.join(self.inter_dir, f"clip{i}.json") + json_paths.append(json_path) + + # 劂果JSON已存圚跳过 + if os.path.exists(json_path): + logger.info(f"Clip {i}: JSON exists, skipping") + self.progress_callback('transcribing', int((i/total)*100), f"跳过片段 {i} (已存圚)") + continue + + # 蜬圕 + self.progress_callback('transcribing', int((i/total)*90), f"蜬圕片段 {i}/{total}") + + try: + segments, _ = model.transcribe(clip_path, language='zh', beam_size=5) + + # 保存蜬圕结果 + segments_data = [] + for seg in segments: + segments_data.append({ + 'start': seg.start, + 'end': seg.end, + 'text': seg.text.strip() + }) + + with open(json_path, 'w', encoding='utf-8') as f: + json.dump({'segments': segments_data}, f, ensure_ascii=False, indent=2) + + logger.info(f"Transcribed clip {i}: {json_path}") + + except Exception as e: + logger.error(f"Failed to transcribe clip {i}: {e}") + + # 䞍手劚 del model —— CUDA 䞊䞋文圚 Windows 䞋销毁时容易觊发 + # Access Violation (0xC0000005)让进皋自然释攟即可。 + + self.progress_callback('transcribing', 100, "蜬圕完成") + self.step_callback('transcribing') + return json_paths + + def step_correct_titles(self, json_paths: List[str]) -> List[Dict[str, Any]]: + """ + Step 3: LLM标题纠正 + + Args: + json_paths: JSON文件路埄列衚 + + Returns: + corrected_clips: 纠正后的片段配眮列衚 + """ + self.step_callback('title_correcting') + self.progress_callback('title_correcting', 0, "匀始标题纠正...") + + corrected_clips = [] + total = len(self.clips) + + for i, (clip, json_path) in enumerate(zip(self.clips, json_paths), 1): + original_title = clip.get('title', f'Clip {i}') + + # 读取蜬圕文本 + transcript_text = '' + if json_path and os.path.exists(json_path): + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + transcript_text = ' '.join(seg.get('text', '') for seg in data.get('segments', [])) + + # LLM纠正标题 + corrected_title = original_title + if transcript_text and self.config.get('api_key'): + try: + corrected_title = self.llm_client.correct_title( + transcript_text, + original_title, + [c.get('title', '') for c in self.clips] + ) or original_title + except Exception as e: + logger.warning(f"LLM title correction failed for clip {i}: {e}") + + corrected_clip = { + 'index': i - 1, + 'title': corrected_title, + 'original_title': original_title, + 'start': clip['start'], + 'end': clip['end'], + } + corrected_clips.append(corrected_clip) + + percent = int((i / total) * 100) + self.progress_callback('title_correcting', percent, f"纠正标题 {i}/{total}") + + self.progress_callback('title_correcting', 100, "标题纠正完成") + self.step_callback('title_correcting') + return corrected_clips + + def step_generate_subtitles(self, corrected_clips: List[Dict], json_paths: List[str]) -> tuple: + """ + Step 4: 生成字幕 + + Args: + corrected_clips: 纠正后的片段配眮 + json_paths: JSON文件路埄列衚 + + Returns: + (title_path, content_path): 字幕文件路埄 + """ + self.step_callback('generating_subtitles') + self.progress_callback('generating_subtitles', 0, "匀始生成字幕...") + + # 准倇clip配眮 + clip_configs = [] + valid_json_paths = [] + + for i, (clip, json_path) in enumerate(zip(corrected_clips, json_paths), 1): + clip_config = { + 'index': i - 1, + 'start': clip['start'], + 'end': clip['end'], + 'title': clip.get('title', clip.get('original_title', '')), + } + clip_configs.append(clip_config) + + if json_path and os.path.exists(json_path): + valid_json_paths.append(json_path) + else: + valid_json_path = os.path.join(self.inter_dir, f"clip{i}.json") + if os.path.exists(valid_json_path): + valid_json_paths.append(valid_json_path) + + if not valid_json_paths: + raise ValueError("No valid JSON files for subtitle generation") + + # 纠错凜数 + def correct(text): + return apply_all_corrections(text, self.term_corrections) + + self.progress_callback('generating_subtitles', 50, "生成字幕蜚道...") + + # 生成字幕 + _, _, title_path, content_path = self.subtitle_pipeline.generate_from_clips( + clip_configs, + valid_json_paths, + apply_corrections=correct + ) + + self.progress_callback('generating_subtitles', 100, "字幕生成完成") + self.step_callback('generating_subtitles') + return title_path, content_path + + def step_merge(self, clip_paths: List[str]) -> str: + """ + Step 5: 合并视频 + + Args: + clip_paths: 片段路埄列衚 + + Returns: + merged_path: 合并后的视频路埄 + """ + self.step_callback('merging') + self.progress_callback('merging', 0, "匀始合并视频...") + + if not clip_paths: + raise ValueError("No clips to merge") + + merged_path = os.path.join(self.output_dir, "concat_merged.mp4") + + success = merge_clips(clip_paths, merged_path, self.inter_dir) + + if not success: + raise RuntimeError("Failed to merge clips") + + self.progress_callback('merging', 100, f"合并完成: {merged_path}") + self.step_callback('merging') + return merged_path + + def step_burn(self, merged_path: str, title_path: str, content_path: str) -> str: + """ + Step 6: 烧圕字幕 + + Args: + merged_path: 合并后的视频路埄 + title_path: 标题字幕路埄 + content_path: 正文字幕路埄 + + Returns: + final_path: 最终视频路埄 + """ + self.step_callback('burning') + self.progress_callback('burning', 0, "匀始烧圕字幕...") + + if not os.path.exists(merged_path): + raise ValueError(f"Merged video not found: {merged_path}") + + final_path = os.path.join(self.output_dir, "final.mp4") + + video_params = self.config.get('video_params', {}) + + success = burn_dual_subtitles( + merged_path, + title_path, + content_path, + final_path, + title_fontsize=video_params.get('title_fontsize', 90), + title_color=video_params.get('title_color', 'FFFF00'), + subtitle_fontsize=video_params.get('subtitle_fontsize', 24), + subtitle_color=video_params.get('subtitle_color', 'FFFFFF') + ) + + if not success: + raise RuntimeError("Failed to burn subtitles") + + self.progress_callback('burning', 100, f"完成: {final_path}") + self.step_callback('burning') + return final_path + + # ==================== 䞻流皋 ==================== + + def run(self) -> str: + """ + 运行完敎流氎线 + + Returns: + final_path: 最终视频路埄 + + Raises: + ValueError: 配眮错误 + RuntimeError: 倄理倱莥 + """ + logger.info(f"Pipeline starting: {len(self.clips)} clips, output: {self.output_dir}") + + # Step 1: 提取 + clip_paths = self.step_extract() + if not clip_paths: + raise RuntimeError("No clips extracted") + + # Step 2: 蜬圕 + json_paths = self.step_transcribe(clip_paths) + + # Step 3: 标题纠正 + corrected_clips = self.step_correct_titles(json_paths) + + # Step 4: 生成字幕 + title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths) + + # Step 5: 合并 + merged_path = self.step_merge(clip_paths) + + # Step 6: 烧圕 + final_path = self.step_burn(merged_path, title_path, content_path) + + logger.info(f"Pipeline completed: {final_path}") + return final_path + + def run_with_user_confirm(self, confirmed_titles: List[Dict[str, Any]]) -> str: + """ + 运行流氎线圚标题纠正后等埅甚户确讀 + + Args: + confirmed_titles: 甚户确讀后的标题列衚 + + Returns: + final_path: 最终视频路埄 + """ + logger.info(f"Pipeline starting with user confirmation: {len(self.clips)} clips") + + # Step 1-3: 同䞊 + clip_paths = self.step_extract() + if not clip_paths: + raise RuntimeError("No clips extracted") + + json_paths = self.step_transcribe(clip_paths) + corrected_clips = self.step_correct_titles(json_paths) + + # 应甚甚户确讀的标题 + for i, confirmed in enumerate(confirmed_titles): + if i < len(corrected_clips): + corrected_clips[i]['title'] = confirmed.get('title', corrected_clips[i]['title']) + + # Step 4-6: 同䞊 + title_path, content_path = self.step_generate_subtitles(corrected_clips, json_paths) + merged_path = self.step_merge(clip_paths) + final_path = self.step_burn(merged_path, title_path, content_path) + + logger.info(f"Pipeline completed: {final_path}") + return final_path + + +def create_pipeline_from_yaml(config_path: str, **kwargs) -> Pipeline: + """ + 从YAML配眮文件创建Pipeline + + Args: + config_path: 配眮文件路埄 + **kwargs: 额倖配眮参数 + + Returns: + Pipeline实䟋 + """ + import yaml + + with open(config_path, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) + + # 合并额倖参数 + config.update(kwargs) + + return Pipeline(config, **kwargs) \ No newline at end of file diff --git a/src/core/ppt_parser.py b/src/core/ppt_parser.py new file mode 100644 index 0000000..632ca0a --- /dev/null +++ b/src/core/ppt_parser.py @@ -0,0 +1,1084 @@ +# -*- coding: utf-8 -*- +""" +PPT解析噚 - 从PPT/PDF自劚生成clips配眮 + +功胜: +1. 从PPTX提取知识点列衚 +2. 蜬圕视频获取时闎戳 +3. 将知识点匹配到视频时闎 +4. 生成clips配眮 +""" + +import os +import re +import json +import yaml +import zipfile +import logging +from typing import List, Dict, Any, Optional, Callable, Tuple + +logger = logging.getLogger(__name__) + + +class PPTParser: + """ + PPT解析噚 + + 从PPT提取知识点蜬圕视频匹配时闎戳生成clips配眮 + """ + + def __init__( + self, + video_path: str, + ppt_path: str, + output_dir: str, + progress_callback: Optional[Callable[[str, int, str], None]] = None, + api_key: Optional[str] = None, + api_host: Optional[str] = None, + max_clip_duration: int = 30, + ): + """ + 初始化PPT解析噚 + + Args: + video_path: 视频文件路埄 + ppt_path: PPT/PPTX文件路埄 + output_dir: 蟓出目圕 + progress_callback: 进床回调 (step, percent, message) + api_key: LLM API密钥 + api_host: LLM API地址 + max_clip_duration: 每䞪粟华片段的最倧时长秒默讀30秒 + """ + self.video_path = video_path + self.ppt_path = ppt_path + self.output_dir = output_dir + self.progress_callback = progress_callback if progress_callback else (lambda s, p, m: logger.info(f"[{s}] {p}%: {m}")) + self.api_key = api_key + self.api_host = api_host + self.max_clip_duration = max_clip_duration + + self.inter_dir = os.path.join(output_dir, 'intermediates') + os.makedirs(self.inter_dir, exist_ok=True) + + # 术语纠正映射 + self.term_corrections = { + "副点": "附点", "莟点": "附点", "付点": "附点", + "黑剑": "黑键", "实莚": "时倌", "挔音": "延音", + "阅历": "乐理", "音苻": "音笊", "调苻": "调号", + "拍苻": "拍笊", "谱苻": "谱号", "銖䜍": "手䜍", + "守䜍": "手䜍", "只发": "指法", "织法": "指法", + } + + def _report(self, step: str, percent: int, message: str): + """报告进床""" + self.progress_callback(step, percent, message) + + # ==================== PPT解析 ==================== + + def _extract_all_text_from_slide_xml(self, slide_xml: str) -> List[str]: + """盎接从slide XML䞭提取所有标筟文本䞍遗挏衚栌等内容。""" + import re + texts = re.findall(r"]*)?>([^<]*)", slide_xml) + cleaned = [self._clean_shape_text(t) for t in texts] + return [t for t in cleaned if t] + + def _normalize_text(self, text: str) -> str: + """枅理文本䞭的倚䜙空行和空癜。""" + import re + # 将2䞪以䞊连续换行压猩䞺1䞪 + text = re.sub(r'\n{3,}', '\n\n', text) + # 移陀每行銖尟倚䜙空栌 + lines = [line.strip() for line in text.split('\n')] + # 过滀连续空行 + cleaned = [] + prev_empty = False + for line in lines: + if line: + cleaned.append(line) + prev_empty = False + elif not prev_empty: + cleaned.append('') + prev_empty = True + # 移陀末尟空行 + while cleaned and cleaned[-1] == '': + cleaned.pop() + return '\n'.join(cleaned) + + def _clean_shape_text(self, text: str) -> str: + """ + 枅理从PowerPoint shape.text䞭提取的文本䞭的乱空栌。 + + PowerPoint XML䞭换行笊可胜被解析䞺倚䞪空栌/制衚笊 + 行内也可胜残留倚䜙空栌劂 "䞍 少的"。 + """ + import re + # 先把所有连续的空癜字笊空栌、制衚笊替换䞺单䞪空栌 + text = re.sub(r'[ \t]+', ' ', text) + # 枅理行銖行尟 + lines = text.split('\n') + cleaned = [line.strip() for line in lines] + # 移陀完党空的行䜆保留段萜分隔 + result = [] + prev_empty = False + for line in cleaned: + if not line: + if not prev_empty: + result.append('') + prev_empty = True + else: + result.append(line) + prev_empty = False + # 移陀末尟空行 + while result and result[-1] == '': + result.pop() + return '\n'.join(result) + + def extract_ppt_text(self) -> List[Dict[str, Any]]: + """ + 从PPTX提取文本python-pptx倄理结构 + XML后倇确保完敎 + + Returns: + [{slide: int, texts: [str], full_text: str}, ...] + """ + self._report('parsing_ppt', 0, "解析PPT...") + + try: + from pptx import Presentation + except ImportError: + raise RuntimeError( + "python-pptx未安装无法提取PPT文本。" + "请运行: pip install python-pptx" + ) + + prs = Presentation(self.ppt_path) + texts_by_slide = [] + + for slide_idx, slide in enumerate(prs.slides, start=1): + # 方法1python-pptx提取保留文本框结构信息 + texts = [] + has_content = False + for shape in slide.shapes: + if hasattr(shape, "text") and shape.text.strip(): + cleaned = self._clean_shape_text(shape.text) + if cleaned: + texts.append(cleaned) + has_content = True + # 倄理衚栌 + try: + if shape.has_table: + for row in shape.table.rows: + for cell in row.cells: + ct = self._clean_shape_text(cell.text_frame.text) + if ct: + texts.append(ct) + has_content = True + except Exception: + pass + + # 方法2劂果python-pptx提取的内容非垞少<50字笊甚XML盎接提取 + full_text = "\n".join(texts) + if len(full_text) < 50: + import zipfile + with zipfile.ZipFile(self.ppt_path, "r") as z: + slide_file = f"ppt/slides/slide{slide_idx}.xml" + if slide_file in z.namelist(): + slide_xml = z.read(slide_file).decode("utf-8", errors="replace") + xml_texts = self._extract_all_text_from_slide_xml(slide_xml) + if xml_texts: + texts = xml_texts + full_text = "\n".join(texts) + + # 枅理倚䜙空行 + full_text = self._normalize_text(full_text) + + if has_content or texts: + texts_by_slide.append({ + "slide": slide_idx, + "texts": texts, + "full_text": full_text, + }) + + self._report('parsing_ppt', 100, f"解析完成: {len(texts_by_slide)} 页") + return texts_by_slide + + def find_main_knowledge_slide(self, ppt_texts: List[Dict]) -> Optional[Dict]: + """扟到'本诟䞻芁知识点'页面""" + keywords = [ + "本诟䞻芁知识点", "本节诟重芁知识点", "本诟知识点", + "䞻芁知识点", "本诟内容", + ] + + for slide in ppt_texts: + text = slide["full_text"] + if any(kw in text for kw in keywords): + return slide + return None + + def extract_knowledge_points_from_slide(self, slide: Dict) -> List[str]: + """ + 从知识点页面提取知识点 + + 䜿甚启发匏规则合并被拆分的术语 + """ + knowledge_points = [] + seen = set() + + # 后猀暡匏被PPT XML拆分 + SUFFIX_PATTERNS = [ + ("的", "方法"), + ("的侉", "种方法"), ("的四", "种方法"), ("的五", "种方法"), + ("附点", "音笊"), + ] + + raw_texts = slide.get("texts", []) + merged = [] + i = 0 + + while i < len(raw_texts): + text = raw_texts[i].strip() + if not text: + i += 1 + continue + + # 跳过标题词 + if text in ["本诟䞻芁知识点", "本节诟重芁知识点", "本诟知识点", "䞻芁知识点", "本诟内容", "诟皋回顟"]: + i += 1 + continue + + # 合并被拆分的术语 + if merged and len(text) <= 3: + prev = merged[-1] + should_merge = False + merge_suffix = "" + + if prev.endswith("的") and text in ["方法", "种方法"]: + should_merge = True + merge_suffix = text + elif prev.endswith(("的侉", "的四", "的五")) and text == "种方法": + should_merge = True + merge_suffix = text + elif prev.endswith("附点") and text == "音笊": + should_merge = True + merge_suffix = text + + if should_merge: + merged[-1] = prev + merge_suffix + i += 1 + continue + + merged.append(text) + i += 1 + + # 后倄理过滀和验证 + for part in merged: + part = part.strip() + if not part or len(part) < 2: + continue + part = re.sub(r"[《》]", "", part) + part = re.sub(r"^\d+[.、]\s*", "", part) + if part and part not in seen and len(part) >= 2: + seen.add(part) + knowledge_points.append(part) + + return knowledge_points + + def _call_llm(self, prompt: str, max_tokens: int = 4096, timeout: int = 300, retries: int = 3) -> Optional[str]: + """ + 垊重试的 LLM 调甚。 + + Args: + prompt: 发送给 LLM 的提瀺词 + max_tokens: 最倧 token 数 + timeout: 单次请求超时秒 + retries: 最倧重试次数 + + Returns: + LLM 返回的 content倱莥返回 None + """ + import requests + url = f"{self.api_host}/chat/completions" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + payload = { + "model": "doubao-seed-2.0-lite", + "messages": [{"role": "user", "content": prompt}], + "max_tokens": max_tokens, + "temperature": 0.1 + } + + last_err = None + for attempt in range(retries): + try: + response = requests.post(url, headers=headers, json=payload, timeout=timeout) + response.raise_for_status() + result = response.json() + content = result.get("choices", [{}])[0].get("message", {}).get("content", "") + if content: + return content + logger.warning(f"LLM返回空内容第{attempt+1}次尝试") + last_err = "空内容" + except requests.exceptions.Timeout: + logger.warning(f"LLM请求超时第{attempt+1}次尝试timeout={timeout}s") + last_err = "超时" + except requests.exceptions.RequestException as e: + logger.warning(f"LLM请求倱莥第{attempt+1}次尝试: {e}") + last_err = str(e) + + logger.error(f"LLM调甚倱莥已重试{retries}次: {last_err}") + return None + + def llm_extract_knowledge_points_from_ppt(self) -> Tuple[Optional[List[Dict[str, Any]]], Optional[str]]: + """ + 从PPT提取知识点自劚倄理PPT文本提取同时返回枅理后的PPT党文。 + + 劂果没有配眮PPT路埄返回(None, None)。 + + Returns: + (知识点列衚 [{title: str, slide: int}], 枅理后的PPTå…šæ–‡)无PPT或倱莥返回(None, None) + """ + if not self.ppt_path: + return None, None + + try: + all_slides = self.extract_ppt_text() + logger.info(f"[parsing_ppt] 共提取 {len(all_slides)} 页幻灯片") + for slide in all_slides: + logger.info(f"[parsing_ppt] 第{slide['slide']}页 ({len(slide['full_text'])}字):") + logger.info(slide['full_text']) + return self.llm_extract_knowledge_points(all_slides) + except Exception as e: + logger.warning(f"从PPT提取知识点倱莥: {e}") + return None, None + + def get_ppt_full_text(self) -> Optional[str]: + """ + 获取PPT完敎文本甚于发给LLM䜜䞺校正参考。 + + 䌘先䜿甚LLM枅理后的文本倱莥则甚原始文本。 + + Returns: + PPT党文无PPT返回None + """ + if not self.ppt_path: + return None + # 䌘先甚LLM枅理后的文本 + _, cleaned = self.llm_extract_knowledge_points_from_ppt() + if cleaned: + return cleaned + # 回退到原始文本 + try: + all_slides = self.extract_ppt_text() + texts = [] + for slide in all_slides: + texts.append(f"--- 第{slide['slide']}页 ---\n{slide['full_text']}") + return "\n\n".join(texts) + except Exception as e: + logger.warning(f"提取PPT文本倱莥: {e}") + return None + + def llm_extract_knowledge_points(self, all_slides: List[Dict[str, Any]]) -> Tuple[Optional[List[Dict[str, Any]]], Optional[str]]: + """ + 䜿甚LLM从党PPT文本䞭提取知识点并同时返回枅理后的PPT党文。 + + Args: + all_slides: 所有幻灯片的文本 [{slide: int, texts: [str], full_text: str}, ...] + + Returns: + (知识点列衚 [{title: str, slide: int}], 枅理后的PPTå…šæ–‡)倱莥返回(None, None) + """ + if not self.api_key: + logger.info("未配眮API_KEY无法䜿甚LLM提取知识点") + return None, None + + import requests + + # 栌匏化所有幻灯片文本 + slides_text = [] + for slide in all_slides: + slide_content = f"--- 第{slide['slide']}页 ---\n" + "\n".join([f" {t}" for t in slide["texts"]]) + slides_text.append(slide_content) + + full_ppt_text = "\n\n".join(slides_text) + + prompt = f"""䜠是䞀䞪钢琎教孊PPT的知识点提取䞓家。 + +我䌚给䜠攟䞀䞪PPT的党郚文本内容按页组织。䜠的任务是 +1. 扫描PPT的**每䞀页**扟出所有可以独立制䜜成粟华视频的知识点 +2. 同时对PPT文本进行栌匏化枅理去陀倚䜙空栌/空行保留页面之闎的自然分段 + +【知识点定义】 +䞀䞪知识点是指钢琎教孊䞭䞀䞪倌埗单独讲解的最小单元。它可以是 +- 䞀䞪技巧劂"萜滚"、"高抬指"、"非连奏"、"跳音"、"和匊蜬䜍" +- 䞀䞪抂念劂"音高"、"时倌"、"节拍"、"调号"、"五指䜍眮" +- 䞀种方法劂"攟束练习"、"分手练习"、"慢速练习"、"唱谱法" +- 䞀䞪䞓题劂"乐理基础"、"手型芁求"、"诟后䜜䞚" + +【文本枅理规则】以䞍圱响原文意思衚蟟䞺前提 +- 合并连续的空行超过1䞪空行的压猩䞺1䞪 +- 去陀行銖行尟倚䜙空栌 +- 保留页面之闎的自然分段每页独立段萜 +- 保留有标点的完敎句子内郚的换行有标点诎明是特意的 +- 无标点的长句子劂果䞀行文字超过50字䞔无标点才合并到䞋䞀行 +- 保留䞓有名词、术语的原始写法 + +【重芁规则】 +1. 扫描党郚页面䞍芁只扟"知识点汇总页"每页郜芁看 +2. 原文保留知识点原文是什么就写什么䞍芁解释、抂括、翻译或扩展 +3. 拆分合并被拆分的片段劂"的侉"+"种方法"、"谱号、"+"倧谱衚、"等芁合并䞺完敎知识词 +4. 标题过滀応略"本诟䞻芁知识点"、"诟皋回顟"、"本节诟重芁知识点"等纯富航/目圕类标题 +5. 分类项倄理栌匏劂"XX子项1、子项2、子项3"时冒号后的每䞪子项各自独立成知识点䜆劂果冒号后是完敎句子或定义劂"XX这是指  "则敎句描述的对象本身才是知识点 +6. 列衚项过滀只保留有独立含义的知识点応略序号、标点笊号、无意义的装饰词 +7. 内容页䌘先劂果䞀䞪知识点圚教孊内容页展匀讲解了比仅出现圚列衚䞭曎重芁 +8. 最小粒床宁可倚蟓出几䞪独立的知识词也䞍芁合并成䞀䞪倧而笌统的标题 +9. 去重劂果同䞀䞪知识点圚倚䞪页面出现只保留䞀条slide填该知识点**讲解最诊细/最完敎**的那䞀页 + +请以以䞋JSON栌匏蟓出䞍芁蟓出其他内容 +{{ + "knowledge_points": [ + {{"title": "知识点1", "slide": 来源页码}}, + {{"title": "知识点2", "slide": 来源页码}}, + {{"title": "知识点3", "slide": 来源页码}} + ], + "cleaned_text": "枅理后的PPT完敎文本保留页面分段" +}} + +芁求 +- 知识点数量尜量倚目标是8-20䞪䞍芁少于5䞪 +- 每页有倚䞪知识点的芁分别列出 +- cleaned_text 芁包含所有页的内容保持页面结构 + +【PPT党郚文本】 +{full_ppt_text}""" + + try: + self._report('parsing_ppt', 35, "调甚LLM提取知识点...") + content = self._call_llm(prompt, max_tokens=4096, timeout=300, retries=3) + if not content: + return None, None + + # 提取JSON + import re as re_module + json_match = re_module.search(r'\{[\s\S]*\}', content) + if not json_match: + logger.warning(f"LLM返回非JSON内容: {content[:200]}") + return None, None + + import json as json_module + try: + parsed = json_module.loads(json_match.group()) + except json_module.JSONDecodeError as e: + logger.warning(f"JSON解析倱莥: {e}, 内容: {json_match.group()[:200]}") + return None, None + + knowledge_points_raw = parsed.get("knowledge_points", []) + cleaned_text = parsed.get("cleaned_text", None) + + if not knowledge_points_raw: + return None, None + + # 构建知识点列衚 [{title, slide}] + knowledge_points = [ + {"title": p.get("title", ""), "slide": p.get("slide")} + for p in knowledge_points_raw + if p.get("title") + ] + logger.info(f"LLM提取知识点成功: {knowledge_points}") + logger.info(f"PPT文本枅理{'成功' if cleaned_text else '倱莥䜿甚原始文本'}") + return knowledge_points, cleaned_text + + except Exception as e: + logger.warning(f"LLM提取知识点倱莥: {e}") + return None, None + + def parse(self) -> List[Dict[str, Any]]: + """ + 解析PPT提取知识点列衚 + + Returns: + 知识点列衚 [{title: str, slide: int}, ...] + """ + # 1. 提取PPT所有文本 + self._report('parsing_ppt', 10, "提取PPT文本...") + all_slides = self.extract_ppt_text() + logger.info(f"[parsing_ppt] 共提取 {len(all_slides)} 页幻灯片") + + # 打印每页完敎内容䞍做截断 + for slide in all_slides: + logger.info(f"[parsing_ppt] 第{slide['slide']}页 ({len(slide['full_text'])}字):") + logger.info(slide['full_text']) + + # 2. LLM从党PPT文本䞭提取知识点同时返回枅理后的文本 + self._report('parsing_ppt', 30, "调甚LLM分析PPT结构...") + knowledge_points, _ = self.llm_extract_knowledge_points(all_slides) + + if not knowledge_points: + raise RuntimeError("LLM提取知识点倱莥请检查眑络和API配眮") + + self._report('parsing_ppt', 100, f"提取到 {len(knowledge_points)} 䞪知识点") + return knowledge_points + + # ==================== 视频蜬圕 ==================== + + def transcribe_video(self) -> List[Dict[str, Any]]: + """ + 蜬圕视频 + + Returns: + 蜬圕片段列衚 [{start, end, text}, ...] + """ + self._report('transcribing', 0, "匀始蜬圕视频...") + + transcript_path = os.path.join(self.inter_dir, "full_transcript.json") + + # 劂果已有蜬圕文件倍甚 + if os.path.exists(transcript_path): + self._report('transcribing', 100, "倍甚已有蜬圕文件") + with open(transcript_path, 'r', encoding='utf-8') as f: + return json.load(f) + + # 富入faster-whisper + try: + from faster_whisper import WhisperModel + except ImportError: + logger.error("faster-whisper未安装") + self._report('transcribing', 100, "faster-whisper未安装") + return [] + + from core.constants import FFMPEG_CMD + + # 获取视频时长 + import cv2 + cap = cv2.VideoCapture(self.video_path) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) + duration = frame_count / fps if fps > 0 else 0 + cap.release() + + self._report('transcribing', 5, f"视频时长: {duration:.0f}秒") + if duration == 0: + return [] + + # 加蜜暡型 + from core.constants import WHISPER_MODEL_PATH + model_path = WHISPER_MODEL_PATH + + try: + model = WhisperModel(model_path, device="cuda", compute_type="float16") + self._report('transcribing', 10, "䜿甚GPU蜬圕") + except Exception as e: + logger.warning(f"GPU加蜜倱莥: {e}䜿甚CPU") + try: + model = WhisperModel("large", device="cpu") + except: + model = WhisperModel("base", device="cpu") + + # 分段蜬圕 + chunk_size = 300 # 5分钟䞀段 + all_segments = [] + offset = 0 + chunk_idx = 0 + + try: + while offset < duration: + end = min(offset + chunk_size, duration) + chunk_path = os.path.join(self.inter_dir, f"chunk_{chunk_idx}.mp4") + + # 提取片段 + import subprocess + subprocess.run( + f'"{FFMPEG_CMD}" -y -ss {offset} -t {end - offset} -i "{self.video_path}" ' + f'-c:v copy -c:a copy "{chunk_path}" -hide_banner -loglevel error', + shell=True, capture_output=True + ) + + self._report('transcribing', 10 + int((offset / duration) * 80), + f"蜬圕 {offset//60}min-{end//60}min...") + + segments, _ = model.transcribe(chunk_path, language="zh", beam_size=5) + for seg in segments: + all_segments.append({ + "start": offset + seg.start, + "end": offset + seg.end, + "text": seg.text, + }) + + # 枅理䞎时文件 + if os.path.exists(chunk_path): + os.remove(chunk_path) + + offset += chunk_size + chunk_idx += 1 + + # 保存蜬圕结果 + with open(transcript_path, 'w', encoding='utf-8') as f: + json.dump(all_segments, f, ensure_ascii=False, indent=2) + + self._report('transcribing', 100, f"蜬圕完成: {len(all_segments)} 䞪片段") + + finally: + # 䞍做任䜕枅理 —— CUDA/GPU 资源让进皋自然释攟。 + # 任䜕手劚 del / gc.collect() / torch.cuda.empty_cache() + # 圚 Windows + CUDA 䞋郜可胜觊发 Access Violation (0xC0000005)。 + pass + + return all_segments + + # ==================== 知识点匹配 ==================== + + def _correct_text(self, text: str) -> str: + """应甚术语纠正""" + for wrong, correct in self.term_corrections.items(): + text = text.replace(wrong, correct) + return text + + def _get_keywords(self, title: str) -> List[str]: + """从标题提取搜玢关键词""" + keywords = [title] + # 去掉"的"等连接词 + shorter = re.sub(r"[的䞎和及]", "", title) + if shorter != title and len(shorter) >= 2: + keywords.append(shorter) + # 栞心词 + core_words = re.findall(r"[\u4e00-\u9fff]{2,4}", title) + keywords.extend(core_words) + return keywords + + def _merge_overlapping_clips(self, clips: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + 合并时闎重叠的片段。 + + 规则劂果clip[i]和clip[i+1]圚时闎䞊重叠则将clip[i]的end延䌞到clip[i+1]的end + 并䞢匃clip[i+1]。圚播攟到clip[i+1]原start倄时标题切换䞺clip[i+1]的标题。 + + Args: + clips: 已排序的片段列衚 [{title, start, end}, ...] + + Returns: + 合并后的片段列衚 + """ + if not clips: + return [] + + # 按start排序 + sorted_clips = sorted(clips, key=lambda c: c['start']) + merged = [dict(sorted_clips[0])] # 深拷莝第䞀䞪 + + for clip in sorted_clips[1:]: + prev = merged[-1] + if clip['start'] < prev['end']: + # 重叠prev延䌞到clip的end保留clip的标题标题圚clip原start倄切换 + prev['end'] = clip['end'] + logger.info(f" 合并重叠: '{prev['title']}' 延䌞至 {prev['end']}s" + f"标题圚 {clip['start']}s 切换䞺 '{clip['title']}'") + else: + # 䞍重叠盎接添加 + merged.append(dict(clip)) + + return merged + + def _llm_correct_transcript( + self, + segments: List[Dict], + ppt_full_text: Optional[str] = None + ) -> List[Dict]: + """ + 䜿甚LLM对完敎蜬圕文本进行校对修正Whisper识别错误。 + + 栌匏idx|text 逐行发给LLMLLM返回 idx|corrected_text 逐行 + 甹idx匹配回原始segments的时闎戳。分批发倄理避免超时。 + + Args: + segments: 原始蜬圕片段 [{start, end, text}, ...] + ppt_full_text: 可选的PPT完敎文本䜜䞺校正参考 + + Returns: + 校对后的蜬圕片段保留原结构仅修正text字段 + """ + if not self.api_key: + logger.info("无API_KEY跳过蜬圕文本校对") + return segments + + # PPT参考文本劂果有 + if ppt_full_text: + ppt_section = f""" +【PPT参考文本】 +以䞋是䞎本节诟配套的PPT内容摘圕请以歀䞺权嚁参考来校正视频蜬圕文本䞭的䞓䞚术语 +--- +{ppt_full_text[:5000]} +--- + +校准时对于视频蜬圕䞭䞎PPT内容高床盞关的䞓䞚术语劂音笊、节奏、指法等应以PPT的衚述䞺准。 +""" + else: + ppt_section = "" + + BATCH_SIZE = 500 + corrected_texts: List[Optional[str]] = [None] * len(segments) + + try: + for batch_start in range(0, len(segments), BATCH_SIZE): + batch_end = min(batch_start + BATCH_SIZE, len(segments)) + batch = segments[batch_start:batch_end] + + # 栌匏化䞺 idx|text + lines = [f"{i}|{seg['text']}" for i, seg in enumerate(batch)] + transcript_text = "\n".join(lines) + + prompt = f"""䜠是䞀䞪钢琎教孊视频的字幕校对䞓家。 + +{ppt_section} + +原始蜬圕文本每行栌匏序号|文字 +{transcript_text} + +请对䞊述蜬圕文本进行校对 +1. 修正Whisper语音识别错误劂同音字错误"矞耻"→"䌑止""副点"→"附点""法奜"→"法官"→"番奜"等 +2. 修正垞见钢琎术语的识别错误"付点"→"附点""莟点"→"附点""只发"→"指法""守䜍"→"手䜍""调苻"→"调号""谱苻"→"谱号""音苻"→"音笊""拍苻"→"拍笊"等 +3. 保留原文的语气、停顿和䞓䞚衚蟟 +4. 劂果某句话完党听䞍懂保留原文字䞍芁区行修改 +5. 只需修正文字内容䞍芁改变序号和结构 + +请以以䞋JSON栌匏蟓出只蟓出JSON䞍芁添加任䜕解释 +{{ + "corrected": [ + {{"idx": 0, "text": "修正后的文字"}}, + {{"idx": 1, "text": "修正后的文字"}} + ] +}}""" + + self._report('matching', 2 + int(batch_start / len(segments) * 50), + f"校对 {batch_start}-{batch_end}/{len(segments)}...") + content = self._call_llm(prompt, max_tokens=16384, timeout=300, retries=3) + if not content: + # 重试倱莥则甚原始文本 + for i in range(batch_start, batch_end): + corrected_texts[i] = segments[i]["text"] + logger.warning(f"批次{batch_start}-{batch_end} LLM校对倱莥䜿甚原始文本") + continue + + import re as re_module + json_match = re_module.search(r'\{[\s\S]*\}', content) + if not json_match: + logger.warning(f"批次{batch_start}-{batch_end} LLM返回非JSON䜿甚原始文本") + for i in range(batch_start, batch_end): + corrected_texts[i] = segments[i]["text"] + continue + + import json as json_module + raw_matched = json_match.group() + # 去掉 markdown 代码块标记 + json_str = re_module.sub(r'^```json\s*', '', raw_matched) + json_str = re_module.sub(r'\s*```$', '', json_str) + try: + parsed = json_module.loads(json_str) + except json_module.JSONDecodeError as e: + logger.warning(f"批次{batch_start}-{batch_end} JSON解析倱莥: {e}") + logger.warning(f" 实际返回内容: {content[:500]}") + for i in range(batch_start, batch_end): + corrected_texts[i] = segments[i]["text"] + continue + + corrected_list = parsed.get("corrected", []) + batch_changes = [] + for item in corrected_list: + idx = item.get("idx") + if idx is not None and 0 <= idx < len(batch): + new_text = item.get("text", "") + corrected_texts[batch_start + idx] = new_text + orig_text = batch[idx]["text"] + if new_text != orig_text: + batch_changes.append((orig_text, new_text)) + + if batch_changes: + logger.info(f"批次{batch_start}-{batch_end} 完成: {len(corrected_list)}条, 修正 {len(batch_changes)} 倄") + for orig, new in batch_changes: + logger.info(f" \"{orig}\" → \"{new}\"") + else: + logger.info(f"批次{batch_start}-{batch_end} 完成: {len(corrected_list)}条, 无修正") + + # 组装结果 + corrected_segments = [] + for i, seg in enumerate(segments): + corrected_text = corrected_texts[i] if corrected_texts[i] is not None else seg["text"] + corrected_segments.append({ + "start": seg["start"], + "end": seg["end"], + "text": corrected_text + }) + + fixed = sum( + 1 for orig, corr in zip(segments, corrected_segments) + if orig["text"] != corr["text"] + ) + logger.info(f"LLM蜬圕文本校对完成: 修正 {fixed} 倄") + return corrected_segments + + except Exception as e: + logger.warning(f"LLM蜬圕文本校对倱莥: {e}䜿甚原始蜬圕文本") + return segments + + def _llm_extract_clips( + self, + corrected_segments: List[Dict], + ppt_full_text: Optional[str] = None, + ppt_knowledge: Optional[List[Dict]] = None + ) -> Optional[List[Dict[str, Any]]]: + """ + 䜿甚LLM从校正后的蜬圕文本䞭提取粟华片段。 + + Args: + corrected_segments: 校正后的蜬圕片段 [{start, end, text}, ...] + ppt_full_text: 可选的PPT完敎文本䜜䞺背景参考 + ppt_knowledge: 可选的PPT知识点列衚 [{title, slide}, ...] + + Returns: + clips配眮 [{title, start, end}]LLM倱莥返回None + """ + if not self.api_key: + logger.info("未配眮API_KEY无法䜿甚LLM提取片段") + return None + + import requests + + # 栌匏化蜬圕文本垊时闎戳 + segment_lines = [] + for seg in corrected_segments: + segment_lines.append(f"[{seg['start']:.1f}-{seg['end']:.1f}] {seg['text']}") + transcript_text = "\n".join(segment_lines) + + # 控制长床 + max_chars = 80000 + if len(transcript_text) > max_chars: + transcript_text = transcript_text[:max_chars] + "\n...蜬圕文本截断" + + # PPT参考完敎文本 + 知识点列衚 + if ppt_full_text or ppt_knowledge: + knowledge_lines = "\n".join([f" - {kp['title']}" for kp in (ppt_knowledge or [])]) + knowledge_section = f""" +【PPT参考文本权嚁背景】 +以䞋是䞎本节诟配套的PPT完敎内容请以歀䞺权嚁参考 +--- +{ppt_full_text[:5000] if ppt_full_text else "(无PPT文本)"} +--- + +【PPT知识点列衚必须逐条倄理】 +{knowledge_lines or "(无知识点列衚)"} + +泚意 +- 䜠必须䞺**列衚䞭的每䞀䞪知识点**扟到视频䞭的讲解片段 +- 劂果某䞪知识点圚视频䞭扟䞍到对应讲解圚not_found䞭列出 +- 劂果有PPT参考文本䌘先扟PPT䞭讲过的知识点对应的视频片段 +""" + else: + knowledge_section = """ +【泚意】本节诟没有提䟛PPT。请盎接从视频蜬圕文本䞭识别 +- 钢琎教孊䞭倌埗单独讲解的抂念、技巧或方法 +- 每䞪知识点扟视频䞭讲解最集䞭的片段 +- 尜量芆盖本节诟的完敎内容 +""" + + prompt = f"""䜠是䞀䞪钢琎教孊视频的粟华片段定䜍䞓家。 + +给定䞀䞪视频的完敎蜬圕文本垊时闎戳䜠需芁䞺PPT知识点列衚䞭的**每䞀䞪知识点**扟到视频䞭对应的讲解片段。 + +{knowledge_section} + +【重芁规则】 +1. 逐条倄理必须䞺列衚䞭的**每䞀䞪知识点**搜玢视频蜬圕文本扟到讲解最集䞭的片段 +2. **title 必须完党等于知识点列衚䞭的原名**䞍讞改写、䞍讞抂括、䞍讞扩展 + - ✅ 正确knowledge_point 是"匹琎的手型"title 就甚"匹琎的手型" + - ❌ 错误title 甹"手型支撑䞎攟束的栞心芁求"自己发挥 +3. **knowledge_point 字段也必须甚知识点列衚䞭的原名** +4. 时闎必须粟确䜿甚蜬圕文本䞭的实际时闎戳 +5. 时长控制每䞪片段纊5-15秒重芁内容可以皍长最长䞍超过20秒 +6. 总时长䞍超过180秒劂果知识点倪倚富臎总时长超标䌘先保留最重芁的知识点其䜙圚not_found䞭诎明 +7. 只蟓出JSON䞍芁添加任䜕解释 + +【视频蜬圕文本垊时闎戳】 +{transcript_text} + +请以以䞋JSON栌匏蟓出䞍芁蟓出其他内容 +{{ + "clips": [ + {{"title": "知识点原名䞍讞改写", "start": 匀始秒数, "end": 结束秒数, "knowledge_point": "知识点原名"}}, + {{"title": "知识点原名", "start": 匀始秒数, "end": 结束秒数, "knowledge_point": "知识点原名"}} + ], + "not_found": ["知识点原名必须䞎列衚䞭的名称完党䞀臎"] +}}""" + + try: + self._report('parse', 55, "调甚LLM提取片段...") + content = self._call_llm(prompt, max_tokens=4096, timeout=300, retries=3) + if not content: + return None + + # 提取JSON + import re as re_module + json_match = re_module.search(r'\{[\s\S]*\}', content) + if not json_match: + logger.warning(f"LLM提取返回非JSON: {content[:200]}") + return None + + import json as json_module + try: + parsed = json_module.loads(json_match.group()) + except json_module.JSONDecodeError as e: + logger.warning(f"JSON解析倱莥: {e}") + return None + + clips = parsed.get("clips", []) + not_found = parsed.get("not_found", []) + + if not clips and not not_found: + return None + + # 验证和枅理 + validated = [] + for clip in clips: + title = clip.get("title", "") + start = max(0, float(clip.get("start", 0))) + raw_end = float(clip.get("end", 0)) + end = min(raw_end, start + self.max_clip_duration) + kp = clip.get("knowledge_point", "") + validated.append({ + "title": title, + "start": int(start), + "end": int(end), + "knowledge_point": kp, + }) + + logger.info(f"LLM提取成功: {len(validated)} 䞪片段{len(not_found)} 䞪未扟到") + for c in validated: + logger.info(f" [{c['knowledge_point']}] {c['title']}: {c['start']}s - {c['end']}s") + if not_found: + logger.info(f" 未扟到知识点: {not_found}") + + return validated + + except Exception as e: + logger.warning(f"LLM提取片段倱莥: {e}") + return None + + # ==================== 䞻流皋 ==================== + + def run(self) -> dict: + """ + 运行完敎流皋。 + + 基础流无PPT: + 蜬圕视频 → LLM校正文本 → LLM提取片段 + + 增区流有PPT: + PPT提取知识点 → 蜬圕视频 → LLM校正文本(参考PPTå…šæ–‡) → LLM提取片段(参考PPTå…šæ–‡+知识点) + + Returns: + configå­—å…ž {video_src, clips, output_dir, ...} + """ + self._report('parse', 0, "匀始倄理...") + + try: + # Step 1: 蜬圕视频 + self._report('parse', 10, "蜬圕视频...") + segments = self.transcribe_video() + if not segments: + logger.error("蜬圕倱莥") + return self._create_config([]) + + # Step 2: PPT党文和知识点仅增区流有- 垊checkpoint倍甚 + self._report('parse', 15, "解析PPT...") + ppt_checkpoint = os.path.join(self.inter_dir, "ppt_knowledge_and_cleaned.json") + if os.path.exists(ppt_checkpoint): + with open(ppt_checkpoint, 'r', encoding='utf-8') as f: + saved = json.load(f) + ppt_knowledge = saved.get("knowledge_points", []) + ppt_cleaned_text = saved.get("cleaned_text", None) + titles = [kp.get("title","") for kp in ppt_knowledge] + logger.info(f"倍甚已保存的PPT知识点: {len(ppt_knowledge)}䞪") + logger.info(f" 知识点: {titles}") + else: + ppt_knowledge, ppt_cleaned_text = self.llm_extract_knowledge_points_from_ppt() + if ppt_knowledge: + with open(ppt_checkpoint, 'w', encoding='utf-8') as f: + json.dump({ + "knowledge_points": ppt_knowledge, + "cleaned_text": ppt_cleaned_text or "" + }, f, ensure_ascii=False) + logger.info(f"已保存PPT知识点到checkpoint") + + # Step 3: LLM校正文本以PPT党文䞺参考- 垊checkpoint倍甚 + self._report('parse', 30, "LLM校正文本...") + corrected_checkpoint = os.path.join(self.inter_dir, "corrected_transcript.json") + if os.path.exists(corrected_checkpoint): + with open(corrected_checkpoint, 'r', encoding='utf-8') as f: + corrected_segments = json.load(f) + logger.info(f"倍甚已校正的蜬圕文本: {len(corrected_segments)}条") + else: + corrected_segments = self._llm_correct_transcript(segments, ppt_full_text=ppt_cleaned_text) + if not corrected_segments: + logger.warning("LLM校正倱莥䜿甚原始蜬圕文本") + corrected_segments = segments + with open(corrected_checkpoint, 'w', encoding='utf-8') as f: + json.dump(corrected_segments, f, ensure_ascii=False) + logger.info(f"已保存校正后蜬圕文本到checkpoint") + + # Step 4: LLM提取片段以PPTå…šæ–‡+知识点䞺背景 + self._report('parse', 50, "LLM提取片段...") + clips = self._llm_extract_clips( + corrected_segments, + ppt_full_text=ppt_cleaned_text, + ppt_knowledge=ppt_knowledge + ) + if not clips: + logger.error("LLM提取片段倱莥") + return self._create_config([]) + + # Step 4: 合并重叠片段 + self._report('parse', 80, "合并重叠片段...") + clips = self._merge_overlapping_clips(clips) + + self._report('parse', 100, f"完成: {len(clips)} 䞪片段") + return self._create_config(clips) + + except Exception as e: + logger.error(f"倄理流皋匂垞: {e}") + raise + + def _create_config(self, clips: List[Dict]) -> dict: + """创建配眮字兞""" + return { + "video_src": self.video_path, + "clips": clips, + "output_dir": self.output_dir, + "term_corrections": self.term_corrections, + "video_params": { + "fade_duration": 1, + "title_fontsize": 48, + "title_color": "FFFF00", + "subtitle_fontsize": 24, + "subtitle_color": "FFFFFF", + } + } + + +def parse_ppt_to_config( + video_path: str, + ppt_path: str, + output_dir: str, + **kwargs +) -> dict: + """ + 䟿捷凜数从PPT生成完敎配眮 + + Args: + video_path: 视频路埄 + ppt_path: PPT路埄 + output_dir: 蟓出目圕 + **kwargs: 额倖参数 (api_key, api_host, progress_callback) + + Returns: + 配眮字兞 + """ + parser = PPTParser(video_path, ppt_path, output_dir, **kwargs) + return parser.run() \ No newline at end of file diff --git a/src/core/subtitle.py b/src/core/subtitle.py new file mode 100644 index 0000000..ec44f98 --- /dev/null +++ b/src/core/subtitle.py @@ -0,0 +1,323 @@ +# -*- coding: utf-8 -*- +""" +字幕倄理暡块 + +包含字幕生成、SRT栌匏蜬换、纠错等功胜 +""" + +import os +import json +import logging +from .utils import to_srt_time, to_ass_time, ensure_dir + +logger = logging.getLogger(__name__) + + +class SubtitleSegment: + """字幕片段""" + + def __init__(self, start, end, text, style=None): + self.start = start + self.end = end + self.text = text + self.style = style # 'title' or 'content' + + def to_srt_line(self): + """蜬换䞺SRT栌匏""" + return f"{to_srt_time(self.start)} --> {to_srt_time(self.end)}" + + def to_ass_line(self): + """蜬换䞺ASS栌匏""" + # ASS format: Start --> End + return f"{to_ass_time(self.start)} --> {to_ass_time(self.end)}" + + +class SubtitleTrack: + """字幕蜚道""" + + def __init__(self, style=None): + self.segments = [] + self.style = style # 可以是 'title' 或 'content' + + def add(self, start, end, text, style=None): + """添加字幕段""" + seg = SubtitleSegment(start, end, text) + seg.style = style or self.style + self.segments.append(seg) + + def to_srt(self, with_index=True): + """ + 蜬换䞺SRT栌匏 + + Args: + with_index: 是吊包含序号 + + Returns: + SRT栌匏字笊䞲 + """ + lines = [] + for i, seg in enumerate(self.segments, 1): + if with_index: + lines.append(str(i)) + lines.append(seg.to_srt_line()) + lines.append(seg.text) + lines.append('') + + return '\n'.join(lines) + + def to_ass(self, style_name="Default", font_size=24, primary_color="FFFFFF", alignment=2): + """ + 蜬换䞺ASS栌匏 + + Args: + style_name: 样匏名称 + font_size: 字䜓倧小 + primary_color: 颜色(HTML栌匏) + alignment: 对霐方匏 (5=正䞭, 2=底郚居䞭) + + Returns: + ASS栌匏字笊䞲 + """ + # ASS header + ass_lines = [ + "[Script Info]", + "Title: Generated by piano-lesson-highlight-generator", + "ScriptType: v4.00+", + "PlayResX: 1920", + "PlayResY: 1080", + "WrapStyle: 0", + "", + "[V4+ Styles]", + f"Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding", + ] + + # 蜬换HTML颜色到ASS栌匏 (BGR with &H prefix) + def html_to_ass_bgr(color): + if color.startswith('&H'): + return color + r = int(color[0:2], 16) + g = int(color[2:4], 16) + b = int(color[4:6], 16) + return f"&H{b:02X}{g:02X}{r:02X}" + + primary_bgr = html_to_ass_bgr(primary_color) + # Outline颜色䞺黑色 + outline_bgr = "&H000000" + # BackColour (阎圱)䞺半透明黑色 + shadow_bgr = "&H80000000" + + # Style行 + style_line = ( + f"Style: {style_name},埮蜯雅黑,{font_size},{primary_bgr}," + f"{primary_bgr},{outline_bgr},{shadow_bgr},0,0,0,0,100,100,0,0,1,2,2," + f"{alignment},10,10,30,1" + ) + ass_lines.append(style_line) + ass_lines.append("") + ass_lines.append("[Events]") + ass_lines.append("Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text") + + # 添加字幕段 + for seg in self.segments: + # Layer=0, Style=name, Name=, Margins=0, Effect= + line = ( + f"Dialogue: 0,{seg.to_ass_line()},{style_name}," + f"0,0,0,0,," # Margins and Effect + f"{seg.text.replace(chr(10), '\\N')}" + ) + ass_lines.append(line) + + return '\n'.join(ass_lines) + + def save(self, path): + """保存到文件""" + with open(path, 'w', encoding='utf-8') as f: + f.write(self.to_srt()) + logger.info(f"Saved subtitles: {path}") + + def save_ass(self, path, style_name="Default", font_size=24, primary_color="FFFFFF", alignment=2): + """保存䞺ASS栌匏""" + with open(path, 'w', encoding='utf-8') as f: + f.write(self.to_ass(style_name, font_size, primary_color, alignment)) + logger.info(f"Saved ASS subtitles: {path}") + + @classmethod + def from_json(cls, json_path, title=None): + """ + 从JSON文件加蜜字幕 + + Args: + json_path: JSON文件路埄 + title: 可选的标题 + + Returns: + SubtitleTrack对象 + """ + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + track = cls() + + # 添加标题 + if title: + track.add(0, 3, title) + + # 添加字幕段 + for seg in data.get('segments', []): + track.add( + seg.get('start', 0), + seg.get('end', 0), + seg.get('text', '') + ) + + return track + + +class SubtitlePipeline: + """字幕倄理流氎线""" + + def __init__(self, config, output_dir): + self.config = config + self.output_dir = output_dir + self.subs_dir = ensure_dir(os.path.join(output_dir, 'subs')) + + def load_clip_json(self, clip_num, inter_dir): + """ + 加蜜clip的JSON + + Args: + clip_num: clip猖号 + inter_dir: 䞭闎目圕 + + Returns: + JSON数据 + """ + json_path = os.path.join(inter_dir, f"clip{clip_num}.json") + with open(json_path, 'r', encoding='utf-8') as f: + return json.load(f) + + def generate_from_clips(self, clip_configs, json_paths, apply_corrections=None): + """ + 从clips生成字幕分犻标题和正文蜚道 + + Args: + clip_configs: clip配眮列衚 + json_paths: JSON文件路埄列衚 + apply_corrections: 纠错凜数 + + Returns: + (title_track, content_track, title_path, content_path) + """ + title_track = SubtitleTrack(style='title') + content_track = SubtitleTrack(style='content') + current_time = 0 + + # 计算每䞪clip的偏移 + # 必须甚 clip_configs 里的实际时长而䞍是 Whisper 检测的语音结束时闎 + # 因䞺 Whisper 只检测有语音的郚分无语音的闎隙䌚被応略富臎偏移环积偏差 + offsets = [] + for i, json_path in enumerate(json_paths): + offsets.append(current_time) + clip = clip_configs[i] + clip_duration = clip['end'] - clip['start'] + current_time += clip_duration + + # 重新遍历生成字幕 + current_time = 0 + for i, (clip, json_path) in enumerate(zip(clip_configs, json_paths)): + offset = offsets[i] + clip_duration = offsets[i+1] - offsets[i] if i+1 < len(offsets) else 3 + + # 添加标题䜿甚title样匏- 标题星瀺3秒后正文才星瀺避免重叠 + title_duration = min(3, clip_duration) + title_track.add(offset, offset + title_duration, clip['title'], style='title') + + # 添加正文字幕 - 从标题结束后匀始避免重叠 + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + content_start = offset + title_duration # 正文从标题结束后匀始 + for seg in data.get('segments', []): + text = seg.get('text', '').strip() + if not text: + continue + + # 应甚纠错 + if apply_corrections: + text = apply_corrections(text) + + # 计算盞对偏移正文时闎从标题结束后匀始 + seg_start = offset + seg['start'] + seg_end = offset + seg['end'] + + # 只添加圚clip时闎范囎内的字幕 + clip_end = clip['end'] - clip['start'] + offset + if seg_start < clip_end and seg_end <= clip_end: + content_track.add( + seg_start, + seg_end, + text, + style='content' + ) + + # 保存䞀䞪蜚道 - 标题䜿甚SRT栌匏 + version = self._get_next_version() + title_path = os.path.join(self.subs_dir, f"v{version}_title.srt") + content_path = os.path.join(self.subs_dir, f"v{version}_content.srt") + + title_track.save(title_path) + content_track.save(content_path) + + return title_track, content_track, title_path, content_path + + def _get_next_version(self): + """获取䞋䞀䞪版本号""" + existing = [f for f in os.listdir(self.subs_dir) if f.startswith('v') and f.endswith('_terms.srt')] + + if not existing: + return 1 + + # 提取版本号 + versions = [] + for f in existing: + try: + v = int(f.split('_')[0][1:]) + versions.append(v) + except: + pass + + return max(versions) + 1 if versions else 1 + + def generate_v1(self, clip_configs, json_paths, apply_corrections=None): + """ + 生成V1版本字幕原版+纠错 + + Args: + clip_configs: clip配眮 + json_paths: JSON路埄 + apply_corrections: 纠错凜数 + + Returns: + 字幕路埄 + """ + return self.generate_from_clips(clip_configs, json_paths, apply_corrections)[1] + + +def load_clip_subtitles(inter_dir, clip_nums): + """ + 批量加蜜倚䞪clip的字幕 + + Args: + inter_dir: 䞭闎目圕 + clip_nums: clip猖号列衚 + + Returns: + {clip_num: json_data} + """ + clips = {} + for num in clip_nums: + json_path = os.path.join(inter_dir, f"clip{num}.json") + if os.path.exists(json_path): + with open(json_path, 'r', encoding='utf-8') as f: + clips[num] = json.load(f) + return clips \ No newline at end of file diff --git a/src/core/utils.py b/src/core/utils.py new file mode 100644 index 0000000..4830bee --- /dev/null +++ b/src/core/utils.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +""" +通甚工具凜数 + +提䟛跚暡块䜿甚的通甚功胜 +""" + +import os +import re +import subprocess +import logging + +logger = logging.getLogger(__name__) + + +def ensure_dir(path): + """确保目圕存圚""" + os.makedirs(path, exist_ok=True) + return path + + +def get_clip_num(path): + """从路埄䞭提取clip猖号""" + match = re.search(r'clip(\d+)', path) + return int(match.group(1)) if match else 0 + + +def run_cmd(cmd, capture=True, timeout=300): + """ + 执行shell呜什 + + Args: + cmd: 呜什字笊䞲 + capture: 是吊捕获蟓出 + timeout: è¶…æ—¶æ—¶é—Ž(秒) + + Returns: + True if success, False otherwise + """ + try: + if capture: + result = subprocess.run( + cmd, + shell=True, + capture_output=True, + text=True, + encoding='utf-8', + errors='replace', + timeout=timeout + ) + return result.returncode == 0 + else: + result = subprocess.run(cmd, shell=True, timeout=timeout) + return result.returncode == 0 + except subprocess.TimeoutExpired: + logger.error(f"Command timeout: {cmd[:100]}...") + return False + except Exception as e: + logger.error(f"Command failed: {e}") + return False + + +def to_srt_time(t): + """将秒数蜬换䞺SRT时闎栌匏 (HH:MM:SS,mmm)""" + hours = int(t // 3600) + minutes = int((t % 3600) // 60) + seconds = int(t % 60) + millis = int((t % 1) * 1000) + return f"{hours:02d}:{minutes:02d}:{seconds:02d},{millis:03d}" + + +def to_ass_time(t): + """将秒数蜬换䞺ASS时闎栌匏 (HH:MM:SS.cc)""" + hours = int(t // 3600) + minutes = int((t % 3600) // 60) + seconds = int(t % 60) + centis = int((t % 1) * 100) + return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{centis:02d}" + + +def format_duration(seconds): + """栌匏化时长䞺可读字笊䞲""" + if seconds < 60: + return f"{seconds:.1f}秒" + elif seconds < 3600: + return f"{seconds/60:.1f}分钟" + else: + return f"{seconds/3600:.1f}小时" + + +def format_filesize(bytes_size): + """栌匏化文件倧小䞺可读字笊䞲""" + mb = bytes_size / (1024 * 1024) + if mb < 1024: + return f"{mb:.1f} MB" + else: + return f"{mb/1024:.1f} GB" + + +class SubtitleError(Exception): + """字幕倄理匂垞""" + pass + + +class VideoError(Exception): + """视频倄理匂垞""" + pass + + +class LLMError(Exception): + """LLM调甚匂垞""" + pass \ No newline at end of file diff --git a/src/core/video.py b/src/core/video.py new file mode 100644 index 0000000..21f8d08 --- /dev/null +++ b/src/core/video.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- +""" +视频倄理暡块 + +包含视频剪蟑、合并、淡入淡出等操䜜 +""" + +import os +import json +import logging +from .constants import FFMPEG_CMD, FFPROBE_CMD, DEFAULT_VIDEO_PARAMS, SUBTITLE_STYLE +from .utils import run_cmd, ensure_dir, get_clip_num + +logger = logging.getLogger(__name__) + + +def get_video_info(video_path): + """ + 获取视频信息 + + Args: + video_path: 视频路埄 + + Returns: + 视频信息字兞 + """ + cmd = f'"{FFPROBE_CMD}" -v error -show_entries format=duration,size -of json "{video_path}"' + import subprocess + result = subprocess.run(cmd, shell=True, capture_output=True, text=True, encoding='utf-8', errors='ignore') + + try: + import json as json_mod + return json_mod.loads(result.stdout) + except: + return {} + + +def extract_clip(video_src, start, end, output_path, fade_duration=1): + """ + 从视频提取片段 + + Args: + video_src: 源视频路埄 + start: 匀始时闎(秒) + end: 结束时闎(秒) + output_path: 蟓出路埄 + fade_duration: 淡入淡出时长 + + Returns: + True if success + """ + duration = end - start + + # 䜿甚掐倎去尟粟确控制 + cmd = f'"{FFMPEG_CMD}" -y -ss {start} -i "{video_src}" -t {duration} ' + + if fade_duration > 0: + # 添加淡入淡出 + cmd += f'-vf "fade=t=in:st=0:d={fade_duration},fade=t=out:st={duration-fade_duration}:d={fade_duration}" ' + + cmd += f'-c:v libx264 -crf 20 -c:a aac -y "{output_path}"' + + success = run_cmd(cmd) + if success: + logger.info(f"Extracted clip: {output_path}") + else: + logger.error(f"Failed to extract clip: {output_path}") + + return success + + +def merge_clips(clip_paths, output_path, inter_dir=None): + """ + 合并倚䞪视频片段 + + Args: + clip_paths: 片段路埄列衚 + output_path: 蟓出路埄 + inter_dir: 䞭闎目圕甚于保存concat list + + Returns: + True if success + """ + if not clip_paths: + logger.error("No clips to merge") + return False + + # 按猖号排序 + sorted_clips = sorted(clip_paths, key=lambda p: get_clip_num(p)) + + # 创建concat list + if inter_dir: + list_path = os.path.join(inter_dir, "concat_list.txt") + else: + list_path = os.path.join(os.path.dirname(output_path), "concat_list.txt") + + with open(list_path, 'w', encoding='utf-8') as f: + for p in sorted_clips: + clip_num = get_clip_num(p) + # 验证clip对应的json存圚 + if inter_dir: + json_path = os.path.join(inter_dir, f"clip{clip_num}.json") + if not os.path.exists(json_path): + logger.warning(f"Skipping clip{clip_num} - no JSON found") + continue + f.write(f"file '{p}'\n") + + # 合并 + cmd = f'"{FFMPEG_CMD}" -y -f concat -safe 0 -i "{list_path}" -c copy -y "{output_path}"' + success = run_cmd(cmd) + + if success: + logger.info(f"Merged {len(sorted_clips)} clips -> {output_path}") + else: + logger.error(f"Failed to merge clips") + + return success + + +def burn_subtitles(video_path, srt_path, output_path): + """ + 烧圕字幕到视频 + + Args: + video_path: 蟓入视频路埄 + srt_path: 字幕文件路埄 + output_path: 蟓出路埄 + + Returns: + True if success + """ + # Windows路埄蜬义 + srt_escaped = srt_path.replace('\\', '/').replace('D:/', 'D\\:/') + + filter_str = f"subtitles='{srt_escaped}':force_style='{SUBTITLE_STYLE}'" + + cmd = f'"{FFMPEG_CMD}" -y -i "{video_path}" -vf "{filter_str}" -c:a copy -y "{output_path}"' + + success = run_cmd(cmd) + + if success: + logger.info(f"Burned subtitles -> {output_path}") + else: + logger.error(f"Failed to burn subtitles") + + return success + + +def burn_dual_subtitles(video_path, title_srt_path, content_srt_path, output_path, title_fontsize=90, title_color="FFFF00", subtitle_fontsize=24, subtitle_color="FFFFFF"): + """ + 烧圕䞀层字幕到视频标题圚屏幕正䞭正文圚䞋方 + + Args: + video_path: 蟓入视频路埄 + title_srt_path: 标题字幕文件路埄 + content_srt_path: 正文字幕文件路埄 + output_path: 蟓出路埄 + title_fontsize: 标题字号 + title_color: 标题颜色(HTML栌匏劂FFFF00) + subtitle_fontsize: 正文字号 + subtitle_color: 正文颜色 + + Returns: + True if success + """ + # Windows路埄蜬义 + title_escaped = title_srt_path.replace('\\', '/').replace('D:/', 'D\\:/') + content_escaped = content_srt_path.replace('\\', '/').replace('D:/', 'D\\:/') + + # 蜬换颜色栌匏HTML (FFFF00) -> FFmpeg BGR (&H00FFFF) + def html_to_bgr(color): + if color.startswith('&H') or color.startswith('0x'): + return color + # HTML format like FFFF00 -> FFmpeg BGR format + r = int(color[0:2], 16) + g = int(color[2:4], 16) + b = int(color[4:6], 16) + return f"&H00{b:02X}{g:02X}{r:02X}" + + title_bgr = html_to_bgr(title_color) + subtitle_bgr = html_to_bgr(subtitle_color) + + # 标题样匏䜿甚SRT+force_styleAlignment=5氎平居䞭垂盎䜍眮由MarginV控制 + # 正文字样匏底郚居䞭24字号癜色垊描蟹 + content_style = f"FontName=埮蜯雅黑,FontSize={subtitle_fontsize},PrimaryColour={subtitle_bgr},Alignment=2,MarginV=20,Outline=1,Shadow=1" + + # 䜿甚䞀䞪独立字幕滀镜分别枲染然后叠加 + # 标题䜿甚Alignment=5,MarginV=0正䞭 + title_style = f"FontName=埮蜯雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=5,MarginV=0,Outline=3,Shadow=2" + + # 䜿甚䞀䞪字幕滀镜叠加然后映射视频+原始音频 + # 标题䜿甚Alignment=5,MarginV=0正䞭 + title_style = f"FontName=埮蜯雅黑,FontSize={title_fontsize},PrimaryColour={title_bgr},Alignment=5,MarginV=0,Outline=3,Shadow=2" + + # 䜿甚䞀䞪字幕滀镜叠加 + filter_str = f"[0:v]subtitles='{title_escaped}':force_style='{title_style}',subtitles='{content_escaped}':force_style='{content_style}'[out]" + + # 保留原始音频 - 映射视频蟓出和原始音频 + cmd = f'"{FFMPEG_CMD}" -y -i "{video_path}" -filter_complex "{filter_str}" -map "[out]" -map 0:a? -c:a copy -c:v libx264 -crf 18 -y "{output_path}"' + + success = run_cmd(cmd) + + if success: + logger.info(f"Burned dual subtitles (title+content) -> {output_path}") + else: + logger.error(f"Failed to burn dual subtitles") + + return success + + +def extract_key_frames(video_path, timestamps, output_dir, prefix="frame"): + """ + 从视频提取关键垧 + + Args: + video_path: 视频路埄 + timestamps: 时闎戳列衚 + output_dir: 蟓出目圕 + prefix: 文件名前猀 + + Returns: + 垧文件路埄列衚 + """ + ensure_dir(output_dir) + frames = [] + + for i, ts in enumerate(timestamps): + output_path = os.path.join(output_dir, f"{prefix}_{i:03d}.jpg") + + cmd = f'"{FFMPEG_CMD}" -y -ss {ts} -i "{video_path}" -vframes 1 -q:v 2 "{output_path}"' + + if run_cmd(cmd) and os.path.exists(output_path): + frames.append(output_path) + else: + logger.warning(f"Failed to extract frame at {ts}s") + + return frames + + +class VideoPipeline: + """视频流氎线封装""" + + def __init__(self, config): + self.config = config + self.video_params = config.get('video_params', DEFAULT_VIDEO_PARAMS) + self.video_src = config.get('video_src') + self.output_dir = config.get('output_dir') + self.inter_dir = os.path.join(self.output_dir, 'intermediates') + + ensure_dir(self.output_dir) + ensure_dir(self.inter_dir) + + def extract_clips(self, clips): + """ + 提取所有视频片段 + + Args: + clips: 片段配眮列衚 + + Returns: + 片段路埄列衚 + """ + clip_paths = [] + fade_dur = self.video_params.get('fade_duration', 1) + + for i, clip in enumerate(clips, 1): + clip_path = os.path.join(self.inter_dir, f"clip{i}.mp4") + fade_path = os.path.join(self.inter_dir, f"clip{i}_fade.mp4") + + # 提取片段 + success = extract_clip( + self.video_src, + clip['start'], + clip['end'], + clip_path, + fade_duration=0 # 先䞍添加淡出 + ) + + if success and fade_dur > 0: + # 添加淡入淡出 + from .utils import to_srt_time + duration = clip['end'] - clip['start'] + fade_in_end = fade_dur + fade_out_start = max(0, duration - fade_dur) + + cmd = f'"{FFMPEG_CMD}" -y -i "{clip_path}" ' + cmd += f'-vf "fade=t=in:st=0:d={fade_dur},fade=t=out:st={fade_out_start}:d={fade_dur}" ' + cmd += f'-c:v libx264 -crf 20 -c:a aac -y "{fade_path}"' + + if run_cmd(cmd): + clip_paths.append(fade_path) + else: + # 淡出倱莥䜿甚原始片段 + clip_paths.append(clip_path) + else: + clip_paths.append(clip_path) + + return clip_paths + + def merge(self, clip_paths, output_name="concat_merged.mp4"): + """合并片段""" + output_path = os.path.join(self.output_dir, output_name) + return merge_clips(clip_paths, output_path, self.inter_dir) + + def burn(self, video_path, srt_path, output_name="final.mp4"): + """烧圕字幕""" + output_path = os.path.join(self.output_dir, output_name) + return burn_subtitles(video_path, srt_path, output_path) \ No newline at end of file diff --git a/src/gui.py b/src/gui.py new file mode 100644 index 0000000..c3778e6 --- /dev/null +++ b/src/gui.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +""" +Piano Highlight Generator - GUI +简单的 GUI 包装调甚䞎 CLI 完党盞同的底层凜数 +API 配眮圚 config.ini äž­ +""" + +import sys +import os +import threading +import logging +import configparser +from pathlib import Path + +# 讟眮环境 +import shutil +ffmpeg_path = shutil.which("ffmpeg") +if ffmpeg_path: + ffmpeg_bin = os.path.dirname(ffmpeg_path) + if ffmpeg_bin not in os.environ.get('PATH', ''): + os.environ['PATH'] = ffmpeg_bin + os.pathsep + os.environ.get('PATH', '') + +from PySide6.QtWidgets import ( + QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, + QLineEdit, QPushButton, QTextEdit, QLabel, QFileDialog, + QMessageBox, QGroupBox, QFormLayout, QProgressBar +) +from PySide6.QtCore import Qt, Signal, QObject + +# 底层凜数䞎 CLI 共甚 +from core import parse_ppt_to_config, Pipeline + +logger = logging.getLogger(__name__) + + +def load_config(): + """从 config.ini 加蜜配眮""" + config = configparser.ConfigParser() + # 配眮文件䜍于项目根目圕 + config_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'config.ini') + if os.path.exists(config_path): + config.read(config_path, encoding='utf-8') + return config + return None + + +class Signaller(QObject): + """线皋安党的信号发射噚""" + log_signal = Signal(str) + progress_signal = Signal(str, int, str) # step, percent, message + finished_signal = Signal(bool, str) + + +class GUI(QMainWindow): + def __init__(self): + super().__init__() + + # 加蜜配眮文件 + self.config = load_config() + if not self.config: + QMessageBox.critical(None, "错误", "扟䞍到 config.ini 配眮文件") + sys.exit(1) + + self.signaller = Signaller() + self.worker_thread = None + self._setup_ui() + + # 连接信号 + self.signaller.log_signal.connect(self._append_log) + self.signaller.finished_signal.connect(self._on_finished) + + def _setup_ui(self): + self.setWindowTitle("Piano Highlight Generator - GUI") + self.setGeometry(100, 100, 700, 500) + + central = QWidget() + self.setCentralWidget(central) + layout = QVBoxLayout(central) + + # === 文件选择区 === + file_group = QGroupBox("文件配眮") + file_layout = QFormLayout() + + self.video_edit = QLineEdit() + self.video_btn = QPushButton("选择视频") + self.video_btn.clicked.connect(lambda: self._select_file(self.video_edit, "视频文件 (*.mp4 *.avi *.mov)")) + video_row = QHBoxLayout() + video_row.addWidget(self.video_edit) + video_row.addWidget(self.video_btn) + file_layout.addRow("视频:", video_row) + + self.ppt_edit = QLineEdit() + self.ppt_btn = QPushButton("选择PPT") + self.ppt_btn.clicked.connect(lambda: self._select_file(self.ppt_edit, "PPT文件 (*.pptx)")) + ppt_row = QHBoxLayout() + ppt_row.addWidget(self.ppt_edit) + ppt_row.addWidget(self.ppt_btn) + file_layout.addRow("PPT:", ppt_row) + + self.output_edit = QLineEdit(os.path.expanduser("~/piano_output")) + self.output_btn = QPushButton("选择蟓出目圕") + self.output_btn.clicked.connect(lambda: self._select_dir(self.output_edit)) + output_row = QHBoxLayout() + output_row.addWidget(self.output_edit) + output_row.addWidget(self.output_btn) + file_layout.addRow("蟓出目圕:", output_row) + + file_group.setLayout(file_layout) + layout.addWidget(file_group) + + # === 进床条 === + self.progress_bar = QProgressBar() + self.progress_bar.setValue(0) + layout.addWidget(self.progress_bar) + + # === 日志区 === + log_label = QLabel("日志:") + layout.addWidget(log_label) + self.log_area = QTextEdit() + self.log_area.setReadOnly(True) + self.log_area.setMaximumHeight(250) + layout.addWidget(self.log_area) + + # === 按钮区 === + btn_layout = QHBoxLayout() + self.start_btn = QPushButton("匀始倄理") + self.start_btn.clicked.connect(self._on_start) + self.clear_btn = QPushButton("枅空日志") + self.clear_btn.clicked.connect(lambda: self.log_area.clear()) + btn_layout.addWidget(self.start_btn) + btn_layout.addWidget(self.clear_btn) + btn_layout.addStretch() + layout.addLayout(btn_layout) + + def _select_file(self, edit, filter_str): + path, _ = QFileDialog.getOpenFileName(self, "选择文件", "", filter_str) + if path: + edit.setText(path) + + def _select_dir(self, edit): + path = QFileDialog.getExistingDirectory(self, "选择目圕") + if path: + edit.setText(path) + + def _append_log(self, text): + self.log_area.append(text) + # 滚劚到底郚 + self.log_area.verticalScrollBar().setValue( + self.log_area.verticalScrollBar().maximum() + ) + + def _on_finished(self, success, message): + self.start_btn.setEnabled(True) + if success: + QMessageBox.information(self, "完成", message) + else: + QMessageBox.critical(self, "错误", message) + + def _on_start(self): + # 收集参数 + video_path = self.video_edit.text().strip() + ppt_path = self.ppt_edit.text().strip() + output_dir = self.output_edit.text().strip() + + # 从配眮文件读取 API 配眮 + api_key = self.config.get('api', 'api_key', fallback='').strip() + api_host = self.config.get('api', 'api_host', fallback='').strip() + + # 验证 + if not video_path: + QMessageBox.warning(self, "譊告", "请选择视频文件") + return + if not ppt_path: + QMessageBox.warning(self, "譊告", "请选择PPT文件") + return + if not output_dir: + QMessageBox.warning(self, "譊告", "请选择蟓出目圕") + return + + self.start_btn.setEnabled(False) + self.log_area.clear() + + # 后台线皋执行 + self.worker_thread = threading.Thread( + target=self._worker, + args=(video_path, ppt_path, output_dir, api_key, api_host) + ) + self.worker_thread.start() + + def _worker(self, video_path, ppt_path, output_dir, api_key, api_host): + try: + os.makedirs(output_dir, exist_ok=True) + + # Step 1: 从 PPT 生成配眮 + self.signaller.log_signal.emit("=" * 50) + self.signaller.log_signal.emit("从PPT生成clips配眮...") + self.signaller.log_signal.emit(f"视频: {video_path}") + self.signaller.log_signal.emit(f"PPT: {ppt_path}") + self.signaller.log_signal.emit(f"蟓出: {output_dir}") + self.signaller.log_signal.emit("=" * 50) + + def progress_callback(step, percent, message): + self.signaller.progress_signal.emit(step, percent, message) + self.signaller.log_signal.emit(f"[{step}] {percent}%: {message}") + + config = parse_ppt_to_config( + video_path=video_path, + ppt_path=ppt_path, + output_dir=output_dir, + api_key=api_key, + api_host=api_host, + progress_callback=progress_callback, + ) + + if not config.get('clips'): + raise ValueError("LLM未胜提取到任䜕片段") + + # 保存配眮 + config_path = os.path.join(output_dir, 'generated_config.yaml') + import yaml + with open(config_path, 'w', encoding='utf-8') as f: + yaml.dump(config, f, allow_unicode=True, default_flow_style=False) + self.signaller.log_signal.emit(f"配眮已保存: {config_path}") + + # Step 2: 运行 Pipeline + self.signaller.log_signal.emit("=" * 50) + self.signaller.log_signal.emit("匀始倄理视频...") + self.signaller.log_signal.emit("=" * 50) + + pipeline = Pipeline(config) + final_path = pipeline.run() + + self.signaller.log_signal.emit("=" * 50) + self.signaller.log_signal.emit(f"完成! 最终视频: {final_path}") + self.signaller.log_signal.emit("=" * 50) + + self.signaller.finished_signal.emit(True, f"倄理完成!\n最终视频: {final_path}") + + except Exception as e: + self.signaller.log_signal.emit(f"错误: {e}") + import traceback + self.signaller.log_signal.emit(traceback.format_exc()) + self.signaller.finished_signal.emit(False, str(e)) + + +def main(): + app = QApplication(sys.argv) + window = GUI() + window.show() + sys.exit(app.exec_()) + + +if __name__ == "__main__": + main() diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..f56ae66 --- /dev/null +++ b/src/main.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +""" +Piano Highlight Generator - GUI Entry Point +""" + +import sys +import os + +# 添加 src 目圕到路埄 +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from gui import main + +if __name__ == "__main__": + main() diff --git a/start.bat b/start.bat new file mode 100644 index 0000000..ef5cdf7 --- /dev/null +++ b/start.bat @@ -0,0 +1,112 @@ +@echo off +setlocal EnableDelayedExpansion + +:: ============================================ +:: Piano Highlight Generator - Startup Script +:: ============================================ + +set "PROJECT_DIR=%~dp0" +set "SRC_DIR=%PROJECT_DIR%src" +set "VENV_DIR=%PROJECT_DIR%venv" +set "PYTHON_SRC=D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" +set "STATE_FILE=%PROJECT_DIR%piano_highlight_state.json" + +echo ============================================ +echo Piano Highlight Generator - Launcher +echo ============================================ +echo. + +:: -------------------------------------------- +:: 1. Create venv using working Python +:: -------------------------------------------- +echo [1/4] Setting up Python environment... + +if exist "%VENV_DIR%\Scripts\python.exe" ( + echo venv exists, skipping creation +) else ( + echo Creating venv using py312_cuda Python... + "%PYTHON_SRC%" -m venv "%VENV_DIR%" + if errorlevel 1 ( + echo [ERROR] Failed to create venv + pause + exit /b 1 + ) + echo venv created +) + +:: -------------------------------------------- +:: 2. Install dependencies +:: -------------------------------------------- +echo. +echo [2/4] Installing dependencies... + +set "VENV_PIP=%VENV_DIR%\Scripts\pip.exe" + +:: Check if PySide6 installed +"%VENV_DIR%\python.exe" -c "import PySide6" 2>nul +if errorlevel 1 ( + echo Installing PySide6 and dependencies... + "%VENV_PIP%" install PySide6 pyyaml requests pypinyin + if errorlevel 1 ( + echo [ERROR] Failed to install dependencies + pause + exit /b 1 + ) +) else ( + echo Dependencies already installed +) + +:: -------------------------------------------- +:: 3. Setup FFmpeg +:: -------------------------------------------- +echo. +echo [3/4] Setting up FFmpeg... + +set "FFMPEG_SOURCE=D:\ProgramData\anaconda3\envs\py312_cuda\Scripts" +set "FFMPEG_LOCAL=%PROJECT_DIR%ffmpeg" + +if exist "%FFMPEG_LOCAL%\ffmpeg-8.1-full_build\bin\ffmpeg.exe" ( + echo Local FFmpeg found +) else ( + echo Copying FFmpeg from conda env... + if not exist "%FFMPEG_LOCAL%\ffmpeg-8.1-full_build\bin" mkdir "%FFMPEG_LOCAL%\ffmpeg-8.1-full_build\bin" + for %%f in (ffmpeg.exe ffprobe.exe ffplay.exe) do ( + if exist "%FFMPEG_SOURCE%\%%f" copy /Y "%FFMPEG_SOURCE%\%%f" "%FFMPEG_LOCAL%\ffmpeg-8.1-full_build\bin\%%f" >nul + ) +) +set "FFMPEG_BIN=%FFMPEG_LOCAL%\ffmpeg-8.1-full_build\bin" +echo FFmpeg ready + +:: -------------------------------------------- +:: 4. Launch Application +:: -------------------------------------------- +echo. +echo [4/4] Launching... + +set "PATH=%FFMPEG_BIN%;%PATH%" + +if exist "%STATE_FILE%" ( + echo. + echo Found incomplete work. Resume? + set /p RESTORE=" [Y/N]: " + if /i "!RESTORE!"=="N" ( + del /f "%STATE_FILE%" >nul 2>&1 + ) +) + +echo. +echo ============================================ +echo Starting... +echo ============================================ + +cd /d "%SRC_DIR%" +"%VENV_DIR%\Scripts\python.exe" main.py + +if errorlevel 1 ( + echo. + echo [ERROR] Exit code: !errorlevel! +) else ( + echo. + echo Done +) +pause diff --git a/tasks.md b/tasks.md new file mode 100644 index 0000000..a6b9113 --- /dev/null +++ b/tasks.md @@ -0,0 +1,477 @@ +# Piano Highlight Generator App - 任务拆解 + +> 创建日期2026-05-02 +> 基于design.md + +--- + +## 任务总览 + +| # | 任务 | 䟝赖 | 䌘先级 | 工期 | +|---|------|------|--------|------| +| 1 | 项目骚架搭建 | - | P0 | 1h | +| 2 | ConfigPanel 配眮面板 | 1 | P0 | 1h | +| 3 | StateManager 状态管理 | 1 | P0 | 0.5h | +| 4 | ProgressView 进床视囟 | 1 | P0 | 1h | +| 5 | Worker 后台线皋 | 1 | P0 | 1h | +| 6 | PipelineController 流氎线控制 | 3, 5 | P0 | 1.5h | +| 7 | TitleEditor 标题猖蟑噚 | 6 | P1 | 1.5h | +| 8 | 䞻窗口集成 | 2, 4, 7 | P0 | 1h | +| 9 | 栞心暡块适配 | 1 | P0 | 0.5h | +| 10 | 错误倄理完善 | 8 | P2 | 1h | +| 11 | 打包配眮 + 测试 | 10 | P2 | 1.5h | +| 12 | 文档和 README | 11 | P3 | 0.5h | + +--- + +## 任务 1: 项目骚架搭建 + +**文件** +- `src/__init__.py` +- `src/main.py` - 应甚入口 +- `src/app.py` - QMainWindow 䞻窗口 +- `src/gui/__init__.py` +- `src/logic/__init__.py` +- `src/core/__init__.py` +- `requirements.txt` +- `pyproject.toml` + +**内容** +- 创建目圕结构 +- 创建空的 `__init__.py` +- `main.py` 䜿甚 QApplication 启劚 +- `app.py` 创建空的䞻窗口框架 +- `requirements.txt` 包含所有䟝赖 +- 配眮 pyproject.toml + +**验收标准** +- 运行 `python src/main.py` 胜启劚空窗口 +- 窗口标题䞺 "Piano Highlight Generator" + +--- + +## 任务 2: ConfigPanel 配眮面板 + +**文件**`src/gui/config_panel.py` + +**UI 组件** +``` +┌─────────────────────────────────────────────┐ +│ API 配眮 │ +│ API Host: [________________________] │ +│ API Key: [________________________] │ +│ 暡型: [火山方舟 Ark v] │ +├────────────────────────────────────────────── +│ 视频配眮 │ +│ 视频文件: [________________] [浏览...] │ +│ 蟓出目圕: [________________] [浏览...] │ +├────────────────────────────────────────────── +│ Whisper 配眮 │ +│ 暡型: [large v] │ +│ 暡型路埄: [________________] [浏览...] │ +├────────────────────────────────────────────── +│ [匀始倄理] [保存配眮] │ +└─────────────────────────────────────────────┘ +``` + +**信号** +- `config_changed_signal(dict)` - 配眮变曎时发出 + +**方法** +- `load_config(config: dict)` - 加蜜配眮到 UI +- `get_config() -> dict` - 从 UI 获取配眮 +- `validate() -> (bool, str)` - 验证配眮有效性 + +**验收标准** +- 所有蟓入框胜正垞蟓入 +- 文件选择对话框胜选择文件和目圕 +- 配眮变曎时发出信号 + +--- + +## 任务 3: StateManager 状态管理 + +**文件**`src/logic/state_manager.py` + +**ç±»**`StateManager` + +**方法** +```python +def __init__(self, state_file: str): + """初始化加蜜或创建状态文件""" + +def save(self): + """保存状态到 JSON 文件""" + +def get_current_step(self) -> int: + """获取圓前步骀玢匕 (0-6)""" + +def get_step_name(self) -> str: + """获取圓前步骀名称""" + +def set_step_status(self, step_name: str, status: str): + """讟眮步骀状态 (pending/in_progress/completed/failed)""" + +def update_clip(self, clip_index: int, **kwargs): + """曎新 clip 信息""" + +def get_clips(self) -> list: + """获取所有 clips""" + +def get_user_modified_titles(self) -> dict: + """获取甚户修改过的标题 {clip_index: title}""" + +def reset(self): + """重眮状态匀始新项目""" +``` + +**状态文件**`{output_dir}/state.json` + +**验收标准** +- 创建新状态时生成默讀结构 +- 加蜜已有状态时恢倍完敎信息 +- 保存后 JSON 文件栌匏正确 + +--- + +## 任务 4: ProgressView 进床视囟 + +**文件**`src/gui/progress_view.py` + +**UI 组件** +``` +┌─────────────────────────────────────────────┐ +│ 圓前步骀: 提取视频片段 [Clip 3/14] │ +│ ████████████░░░░░░░░░░░░ 50% │ +├────────────────────────────────────────────── +│ [准倇] → [提取] → [蜬圕] → [纠正] → [字幕] │ +│ → [合并] → [烧圕] │ +├────────────────────────────────────────────── +│ 日志: │ +│ ┌───────────────────────────────────────┐ │ +│ │ 10:30:01 匀始提取片段 3/14 │ │ +│ │ 10:30:02 片段 3 提取完成 │ │ +│ │ 10:30:03 匀始提取片段 4/14 │ │ +│ └───────────────────────────────────────┘ │ +├────────────────────────────────────────────── +│ [暂停] [停止] [ç»§ç»­] │ +└─────────────────────────────────────────────┘ +``` + +**信号** +- `start_signal` - 匀始倄理 +- `pause_signal` - 暂停 +- `resume_signal` - ç»§ç»­ +- `stop_signal` - 停止 + +**方法** +- `update_step(step_name: str, percent: int)` - 曎新步骀进床 +- `update_clip_progress(current: int, total: int)` - 曎新 Clip 进床 +- `append_log(message: str)` - 远加日志 +- `show_titles_for_review(titles: list)` - 星瀺标题埅审栞觊发 TitleEditor + +**验收标准** +- 进床条胜实时曎新 +- 日志胜自劚滚劚到最新 +- 按钮状态胜根据流氎线状态变化 + +--- + +## 任务 5: Worker 后台线皋 + +**文件**`src/logic/worker.py` + +**ç±»**`Worker(QThread)` + +**信号** +```python +progress_signal = pyqtSignal(str, int, str) # step_name, percent, message +clip_completed_signal = pyqtSignal(int) # clip_index +step_started_signal = pyqtSignal(str) # step_name +step_completed_signal = pyqtSignal(str) # step_name +titles_ready_signal = pyqtSignal(list) # [{clip_index, original, llm_suggested}] +error_signal = pyqtSignal(str) # error_message +finished_signal = pyqtSignal(bool, str) # success, message +log_signal = pyqtSignal(str) # log message +``` + +**方法** +- `__init__(config, state_manager, controller)` +- `run()` - 执行流氎线 +- `request_pause()` - 请求暂停 +- `request_stop()` - 请求停止 + +**暂停实现** +```python +def run(self): + for step in self.steps: + if self.is_stopped: + break + if self.is_paused: + self.wait_for_resume() # 等埅甚户resume信号 + # 执行步骀... +``` + +**验收标准** +- UI 圚倄理过皋䞭䞍卡顿 +- 暂停信号胜圚 1 秒内响应 +- 所有信号胜正确䌠递到 UI + +--- + +## 任务 6: PipelineController 流氎线控制 + +**文件**`src/logic/pipeline_controller.py` + +**ç±»**`PipelineController` + +**步骀定义** +```python +STEPS = [ + 'ready', + 'extracting', # 提取片段 + 'transcribing', # 蜬圕 + 'title_correcting', # 标题纠正人工介入点 + 'generating_subtitles', # 生成字幕 + 'merging', # 合并 + 'burning', # 烧圕 + 'completed' +] +``` + +**方法** +```python +def __init__(self, config: dict, state: StateManager): + self.config = config + self.state = state + self.is_paused = False + self.is_stopped = False + +def run(self, worker: Worker): + """运行流氎线""" + +def pause(self): + """暂停""" + +def resume(self): + """恢倍""" + +def stop(self): + """停止""" + +def step_extracting(self, worker: Worker): + """提取片段""" + +def step_transcribing(self, worker: Worker): + """蜬圕调甚 Whisper""" + +def step_title_correcting(self, worker: Worker) -> list: + """标题纠正 - 调甚 LLM返回需芁甚户确讀的标题""" + +def step_generating_subtitles(self, worker: Worker): + """生成字幕""" + +def step_merging(self, worker: Worker): + """合并视频""" + +def step_burning(self, worker: Worker): + """烧圕字幕""" +``` + +**验收标准** +- 每䞪步骀胜正确执行 +- 暂停/恢倍胜正确工䜜 +- 状态胜正确保存 + +--- + +## 任务 7: TitleEditor 标题猖蟑噚 + +**文件**`src/gui/title_editor.py` + +**UI 组件** +``` +┌─────────────────────────────────────────────────────────────┐ +│ 标题审栞 - 请确讀以䞋标题是吊正确 │ +├────────────────────────────────────────────────────────────── +│ # │ 原始标题 │ LLM建议 │ 修改后 │ 操䜜 │ +├───┌──────────────┌─────────────┌───────────────┌─────────── +│ 1 │ 匹奏 │ 匹奏 │ [匹奏 ] │ [猖蟑] │ +│ 2 │ 非连奏匹奏法 │ 非连奏匹奏法 │ [非连奏匹奏法] │ [猖蟑] │ +│ 3 │ 时倌 │ 䌑止笊 ✗ │ [䌑止笊 ] │ [猖蟑] │ +│ 4 │ 䌑止笊 │ 䌑止笊 │ [䌑止笊 ] │ [猖蟑] │ +│ 5 │ 节奏 │ 节奏 │ [节奏 ] │ [猖蟑] │ +├────────────────────────────────────────────────────────────── +│ [党郚确讀] [取消] │ +└─────────────────────────────────────────────────────────────┘ +``` + +**猖蟑匹窗** +``` +┌─────────────────────────────────┐ +│ 猖蟑标题 - Clip #3 │ +│ │ +│ 原始标题: 时倌 │ +│ LLM建议: 䌑止笊 │ +│ │ +│ 修改后: [_______________] │ +│ │ +│ [确定] [取消] │ +└─────────────────────────────────┘ +``` + +**信号** +- `titles_confirmed_signal(list)` - 甚户确讀的标题 + +**验收标准** +- 胜星瀺所有标题 +- 胜猖蟑单䞪标题 +- 胜批量确讀 +- 确讀后返回列衚 + +--- + +## 任务 8: 䞻窗口集成 + +**文件**`src/app.py` + +**垃局** +``` +┌─────────────────────────────────────────────────────────────┐ +│ Piano Highlight Generator [_][□][X] │ +├────────────────────────────────────────────────────────────── +│ ┌─────────────────┐ ┌─────────────────────────────────┐ │ +│ │ │ │ │ │ +│ │ 配眮面板 │ │ 进床视囟 │ │ +│ │ ConfigPanel │ │ ProgressView │ │ +│ │ │ │ │ │ +│ │ │ │ │ │ +│ │ │ │ │ │ +│ │ │ ├────────────────────────────────── │ +│ │ │ │ 标题猖蟑噚 (折叠) │ │ +│ │ │ │ TitleEditor │ │ +│ └─────────────────┘ └─────────────────────────────────┘ │ +├────────────────────────────────────────────────────────────── +│ 状态: 就绪 v1.0 │ +└─────────────────────────────────────────────────────────────┘ +``` + +**集成逻蟑** +1. ConfigPanel 发出 `config_changed_signal` → Controller 接收 +2. 甚户点击"匀始" → Controller 启劚 Worker +3. Worker 发出 `titles_ready_signal` → TitleEditor 星瀺 +4. 甚户确讀标题 → TitleEditor 发出 `titles_confirmed_signal` → Worker ç»§ç»­ +5. Worker 发出 `progress_signal` → ProgressView 曎新 + +**验收标准** +- 胜启劚倄理 +- 各组件胜正确通信 +- 标题猖蟑噚圚正确时机星瀺 + +--- + +## 任务 9: 栞心暡块适配 + +**文件**`src/core/` 倍甚现有暡块 + +**适配工䜜** +1. 倍制 `scripts/` 䞋的栞心暡块到 `src/core/` +2. 修改 import 路埄 +3. 确保 `constants.py` 䞭的路埄配眮可从倖郚䌠入 +4. 适配视频倄理凜数返回成功/倱莥状态 + +**验收标准** +- 栞心暡块胜圚 GUI 䞭正垞调甚 +- 错误胜被捕获并䌠递到 UI + +--- + +## 任务 10: 错误倄理完善 + +**倄理场景** + +| 场景 | 倄理方匏 | +|------|----------| +| API Key 无效 | 401 错误提瀺甚户检查配眮 | +| 视频文件䞍存圚 | 暂停匹窗提瀺 | +| 磁盘空闎䞍足 | 暂停匹窗提瀺 | +| Whisper 暡型未扟到 | 提瀺䞋蜜或选择其他暡型 | +| 倄理匂垞 | 保存状态星瀺错误日志 | + +**验收标准** +- 错误䞍富臎皋序厩溃 +- 错误信息枅晰甚户友奜 +- 状态正确保存 + +--- + +## 任务 11: 打包配眮 + 测试 + +**文件** +- `nuitka_options.py` +- `build.bat` (Windows 打包脚本) +- `build.sh` (Linux/Mac 打包脚本) + +**打包步骀** +1. 安装䟝赖`pip install -r requirements.txt` +2. 匀发测试`python src/main.py` +3. 打包`python -m nuitka nuitka_options.py` +4. 测试 exe`dist/PianoHighlightGenerator.exe` + +**验收标准** +- 打包后䜓积 < 50MB +- 双击胜正垞运行 +- 所有功胜圚打包后正垞工䜜 + +--- + +## 任务 12: 文档和 README + +**文件**`README.md` + +**内容** +- 应甚介绍和截囟 +- 系统芁求 +- 安装指南匀发安装、打包安装 +- 䜿甚诎明 +- 垞见问题 +- 讞可证 + +**验收标准** +- 甚户胜根据 README 运行应甚 +- 垞见问题有解决方案 + +--- + +## 并行化分析 + +**可并行任务** + +| 任务组 | 可并行任务 | 原因 | +|--------|-----------|------| +| UI 组件组 | 2, 4, 7 | 独立匀发独立 UI 组件 | +| 栞心逻蟑组 | 3, 5, 6 | 有䟝赖关系需顺序匀发 | +| 集成测试组 | 8, 10 | 䟝赖前面所有任务 | + +**掚荐匀发顺序** +1. **第䞀波可并行**1, 2, 4, 7, 9 +2. **第二波䟝赖第䞀波**3, 5, 6 +3. **第䞉波集成**8, 10 +4. **最后**11, 12 + +--- + +## Git 分支规划 + +**建议分支** +- `main` - 䞻分支皳定代码 +- `feat/ui` - UI 组件匀发 (任务 2, 4, 7) +- `feat/core` - 栞心逻蟑匀发 (任务 3, 5, 6) +- `feat/integration` - 集成和打包 (任务 8, 10, 11, 12) + +**合并顺序** +``` +feat/ui ─────────┐ +feat/core ────────┌──► main +feat/integration ─┘ +``` diff --git a/temp/check_log.py b/temp/check_log.py new file mode 100644 index 0000000..d24d9c6 --- /dev/null +++ b/temp/check_log.py @@ -0,0 +1,9 @@ +f = open(r'D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt', 'rb') +data = f.read() +f.close() + +print('Total bytes:', len(data)) +print('First 300 hex:', data[:300].hex()) +print() +print('UTF-8 decode of first 300:') +print(data[:300].decode('utf-8', 'replace')) diff --git a/temp/check_pptx.bat b/temp/check_pptx.bat new file mode 100644 index 0000000..de9ed79 --- /dev/null +++ b/temp/check_pptx.bat @@ -0,0 +1,3 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -c "import pptx; print('pptx available')" diff --git a/temp/check_pptx2.bat b/temp/check_pptx2.bat new file mode 100644 index 0000000..37f3fe5 --- /dev/null +++ b/temp/check_pptx2.bat @@ -0,0 +1,3 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\check_pptx2.py" diff --git a/temp/check_pptx2.py b/temp/check_pptx2.py new file mode 100644 index 0000000..553a075 --- /dev/null +++ b/temp/check_pptx2.py @@ -0,0 +1,10 @@ +import sys +out = r"D:\F\NewI\opencode\daily-workspace\temp\check_pptx_out.txt" +try: + import pptx + result = "pptx available: " + pptx.__version__ +except ImportError as e: + result = "pptx NOT available: " + str(e) +with open(out, "w", encoding="utf-8") as f: + f.write(result) +print(result) diff --git a/temp/check_transcript.bat b/temp/check_transcript.bat new file mode 100644 index 0000000..b5ede15 --- /dev/null +++ b/temp/check_transcript.bat @@ -0,0 +1,3 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\check_transcript.py" diff --git a/temp/check_transcript.py b/temp/check_transcript.py new file mode 100644 index 0000000..389503e --- /dev/null +++ b/temp/check_transcript.py @@ -0,0 +1,17 @@ +import os +import json + +inter_dir = r"D:\F\NewI\opencode\daily-workspace\projects\piano-lesson-highlights\cases\lesson1\output_cli_full\intermediates" +transcript_file = os.path.join(inter_dir, "full_transcript.json") + +if os.path.exists(transcript_file): + size = os.path.getsize(transcript_file) + with open(transcript_file, "r", encoding="utf-8") as f: + data = json.load(f) + print(f"Transcript exists: {size} bytes") + print(f"Segments: {len(data)}") + if data: + print(f"First segment: {data[0]}") + print(f"Last segment: {data[-1]}") +else: + print("Transcript file NOT found") diff --git a/temp/debug_ppt.bat b/temp/debug_ppt.bat new file mode 100644 index 0000000..d4f845b --- /dev/null +++ b/temp/debug_ppt.bat @@ -0,0 +1,4 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_ppt.py" +pause diff --git a/temp/debug_ppt.py b/temp/debug_ppt.py new file mode 100644 index 0000000..62efb16 --- /dev/null +++ b/temp/debug_ppt.py @@ -0,0 +1,30 @@ +import zipfile +import re + +ppt = r"D:\F\yc\诟皋䞊架\犏田商圈倜校\诟皋视频\钢琎挔奏入闚第䞀诟.pptx" + +with zipfile.ZipFile(ppt, "r") as z: + names = z.namelist() + slide_files = [f for f in names if f.startswith("ppt/slides/slide") and f.endswith(".xml")] + print(f"Total files in zip: {len(names)}") + print(f"Slide files found: {len(slide_files)}") + print(f"First 5 slide files: {slide_files[:5]}") + + # Test presentation.xml + try: + pres_xml = z.read("ppt/presentation.xml").decode("utf-8", errors="replace") + sld_ids = re.findall(r']*r:id="([^"]+)"', pres_xml) + print(f"\nsldIdList rIds: {sld_ids[:5]}") + except Exception as e: + print(f"\npresentation.xml error: {e}") + + # Test rels + try: + rels_xml = z.read("ppt/_rels/presentation.xml.rels").decode("utf-8", errors="replace") + rid_to_target = dict(re.findall(r'Id="([^"]+)"[^>]*Target="([^"]+)"', rels_xml)) + print(f"Rels entries: {len(rid_to_target)}") + # Show a sample + for k, v in list(rid_to_target.items())[:3]: + print(f" {k} -> {v}") + except Exception as e: + print(f"\nrels error: {e}") diff --git a/temp/debug_ppt2.bat b/temp/debug_ppt2.bat new file mode 100644 index 0000000..7ba22eb --- /dev/null +++ b/temp/debug_ppt2.bat @@ -0,0 +1,3 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_ppt2.py" diff --git a/temp/debug_ppt2.py b/temp/debug_ppt2.py new file mode 100644 index 0000000..5f62fbe --- /dev/null +++ b/temp/debug_ppt2.py @@ -0,0 +1,34 @@ +import zipfile, re, sys + +ppt = r"D:\F\yc\诟皋䞊架\犏田商圈倜校\诟皋视频\钢琎挔奏入闚第䞀诟.pptx" +out = r"D:\F\NewI\opencode\daily-workspace\temp\debug_ppt_out.txt" + +results = [] + +with zipfile.ZipFile(ppt, "r") as z: + names = z.namelist() + slide_files = [f for f in names if f.startswith("ppt/slides/slide") and f.endswith(".xml")] + results.append(f"Total files in zip: {len(names)}") + results.append(f"Slide files found: {len(slide_files)}") + results.append(f"First 5: {slide_files[:5]}") + + try: + pres_xml = z.read("ppt/presentation.xml").decode("utf-8", errors="replace") + sld_ids = re.findall(r']*r:id="([^"]+)"', pres_xml) + results.append(f"sldIds: {sld_ids[:5]}") + except Exception as e: + results.append(f"pres error: {e}") + + try: + rels_xml = z.read("ppt/_rels/presentation.xml.rels").decode("utf-8", errors="replace") + rid_to_target = dict(re.findall(r'Id="([^"]+)"[^>]*Target="([^"]+)"', rels_xml)) + results.append(f"rels count: {len(rid_to_target)}") + for k, v in list(rid_to_target.items())[:3]: + results.append(f" {k} -> {v}") + except Exception as e: + results.append(f"rels error: {e}") + +with open(out, "w", encoding="utf-8") as f: + f.write("\n".join(results)) + +print("Done, see", out) diff --git a/temp/debug_slide1.bat b/temp/debug_slide1.bat new file mode 100644 index 0000000..16bafff --- /dev/null +++ b/temp/debug_slide1.bat @@ -0,0 +1,3 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\debug_slide1.py" > "D:\F\NewI\opencode\daily-workspace\temp\debug_slide1_out.txt" 2>&1 diff --git a/temp/debug_slide1.py b/temp/debug_slide1.py new file mode 100644 index 0000000..f900cfa --- /dev/null +++ b/temp/debug_slide1.py @@ -0,0 +1,23 @@ +import zipfile, re, os + +ppt = r"D:\F\yc\诟皋䞊架\犏田商圈倜校\诟皋视频\钢琎挔奏入闚第䞀诟.pptx" +out_dir = r"D:\F\NewI\opencode\daily-workspace\temp" +slide1_out = os.path.join(out_dir, "slide1_texts.txt") +xml_out = os.path.join(out_dir, "slide1_xml_preview.txt") + +with zipfile.ZipFile(ppt, "r") as z: + slide1_file = "ppt/slides/slide1.xml" + content = z.read(slide1_file).decode("utf-8", errors="replace") + all_texts = re.findall(r"]*>([^<]*)", content) + + meaningful = [t for t in all_texts if t.strip()] + with open(slide1_out, "w", encoding="utf-8") as f: + f.write(f"Total fragments: {len(all_texts)}\n") + f.write(f"Meaningful fragments: {len(meaningful)}\n\n") + for i, t in enumerate(meaningful): + f.write(f"[{i}] {t}\n") + + with open(xml_out, "w", encoding="utf-8") as f: + f.write(content[:8000]) + +print("Done") diff --git a/temp/do_install.bat b/temp/do_install.bat new file mode 100644 index 0000000..00c3402 --- /dev/null +++ b/temp/do_install.bat @@ -0,0 +1,3 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" "D:\F\NewI\opencode\daily-workspace\projects\piano-highlight-app\temp\do_install.py" diff --git a/temp/do_install.py b/temp/do_install.py new file mode 100644 index 0000000..2479ff1 --- /dev/null +++ b/temp/do_install.py @@ -0,0 +1,12 @@ +import subprocess +import sys + +venv_python = r"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" +result = subprocess.run( + [venv_python, "-m", "pip", "install", "python-pptx"], + capture_output=True, + text=True +) +print("STDOUT:", result.stdout) +print("STDERR:", result.stderr) +print("Return code:", result.returncode) diff --git a/temp/install_pptx.bat b/temp/install_pptx.bat new file mode 100644 index 0000000..d2bfc4a --- /dev/null +++ b/temp/install_pptx.bat @@ -0,0 +1,6 @@ +@echo off +chcp 65001 >nul +echo Installing python-pptx... +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx -q +echo Done +pause diff --git a/temp/install_pptx2.bat b/temp/install_pptx2.bat new file mode 100644 index 0000000..13a9979 --- /dev/null +++ b/temp/install_pptx2.bat @@ -0,0 +1,4 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx +echo Exit: %errorlevel% diff --git a/temp/install_pptx3.bat b/temp/install_pptx3.bat new file mode 100644 index 0000000..fba0e10 --- /dev/null +++ b/temp/install_pptx3.bat @@ -0,0 +1,4 @@ +@echo off +chcp 65001 >nul +"D:\ProgramData\anaconda3\envs\py312_cuda\python.exe" -m pip install python-pptx > "D:\F\NewI\opencode\daily-workspace\temp\pip_out.txt" 2>&1 +echo Exit: %errorlevel% diff --git a/temp/kill_python.ps1 b/temp/kill_python.ps1 new file mode 100644 index 0000000..5649c21 --- /dev/null +++ b/temp/kill_python.ps1 @@ -0,0 +1,12 @@ +# Kill all python processes related to our CLI +Get-Process python -ErrorAction SilentlyContinue | Stop-Process -Force +Start-Sleep 3 + +# Verify killed +$remaining = Get-Process python -ErrorAction SilentlyContinue +if ($remaining) { + Write-Host "Still running:" + $remaining | ForEach-Object { Write-Host " PID:" $_.Id } +} else { + Write-Host "All python processes killed" +} diff --git a/temp/ppt.lnk b/temp/ppt.lnk new file mode 100644 index 0000000..a5353ac Binary files /dev/null and b/temp/ppt.lnk differ diff --git a/temp/read_log.py b/temp/read_log.py new file mode 100644 index 0000000..f8e2f99 --- /dev/null +++ b/temp/read_log.py @@ -0,0 +1,5 @@ +f = open(r'D:\F\NewI\opencode\daily-workspace\temp\cli_run_log.txt', 'r', encoding='utf-8') +lines = f.readlines() +f.close() +for l in lines[:35]: + print(l.rstrip()) diff --git a/temp/video.lnk b/temp/video.lnk new file mode 100644 index 0000000..a5353ac Binary files /dev/null and b/temp/video.lnk differ