import whisper import os from pathlib import Path # 视频文件路径 video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp") video_files = list(video_dir.glob("*.mp4")) if not video_files: print("未找到mp4文件") exit(1) video_file = video_files[0] print(f"找到视频文件: {video_file.name}") print(f"文件大小: {video_file.stat().st_size / 1024 / 1024:.2f} MB") # 使用Whisper转录 print("\n加载Whisper模型...") model = whisper.load_model("base") # 使用base模型,速度较快 print("开始转录...") result = model.transcribe(str(video_file), language="zh") # 保存结果 output_file = video_file.with_suffix(".txt") with open(output_file, "w", encoding="utf-8") as f: f.write(result["text"]) print(f"\n✅ 转录完成!保存到: {output_file.name}") print(f"转录文本长度: {len(result['text'])} 字符") # 显示前500字符预览 print("\n=== 转录预览(前500字符)===") print(result["text"][:500] + "...") # 如果有分段信息,也保存 if "segments" in result: json_file = video_file.with_suffix(".json") import json with open(json_file, "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=2) print(f"详细分段信息保存到: {json_file.name}")