04db423416
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
44 lines
1.2 KiB
Python
44 lines
1.2 KiB
Python
import whisper
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# 视频文件路径
|
|
video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp")
|
|
video_files = list(video_dir.glob("*.mp4"))
|
|
|
|
if not video_files:
|
|
print("未找到mp4文件")
|
|
exit(1)
|
|
|
|
video_file = video_files[0]
|
|
print(f"找到视频文件: {video_file.name}")
|
|
print(f"文件大小: {video_file.stat().st_size / 1024 / 1024:.2f} MB")
|
|
|
|
# 使用Whisper转录
|
|
print("\n加载Whisper模型...")
|
|
model = whisper.load_model("base") # 使用base模型,速度较快
|
|
|
|
print("开始转录...")
|
|
result = model.transcribe(str(video_file), language="zh")
|
|
|
|
# 保存结果
|
|
output_file = video_file.with_suffix(".txt")
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
f.write(result["text"])
|
|
|
|
print(f"\n✅ 转录完成!保存到: {output_file.name}")
|
|
print(f"转录文本长度: {len(result['text'])} 字符")
|
|
|
|
# 显示前500字符预览
|
|
print("\n=== 转录预览(前500字符)===")
|
|
print(result["text"][:500] + "...")
|
|
|
|
# 如果有分段信息,也保存
|
|
if "segments" in result:
|
|
json_file = video_file.with_suffix(".json")
|
|
import json
|
|
|
|
with open(json_file, "w", encoding="utf-8") as f:
|
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
print(f"详细分段信息保存到: {json_file.name}")
|