Files
skills/video-analysis/scripts/simple_transcribe.py
T
hmo 04db423416 Initial commit: skills library
- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00

44 lines
1.2 KiB
Python

import whisper
import os
from pathlib import Path
# 视频文件路径
video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp")
video_files = list(video_dir.glob("*.mp4"))
if not video_files:
print("未找到mp4文件")
exit(1)
video_file = video_files[0]
print(f"找到视频文件: {video_file.name}")
print(f"文件大小: {video_file.stat().st_size / 1024 / 1024:.2f} MB")
# 使用Whisper转录
print("\n加载Whisper模型...")
model = whisper.load_model("base") # 使用base模型,速度较快
print("开始转录...")
result = model.transcribe(str(video_file), language="zh")
# 保存结果
output_file = video_file.with_suffix(".txt")
with open(output_file, "w", encoding="utf-8") as f:
f.write(result["text"])
print(f"\n✅ 转录完成!保存到: {output_file.name}")
print(f"转录文本长度: {len(result['text'])} 字符")
# 显示前500字符预览
print("\n=== 转录预览(前500字符)===")
print(result["text"][:500] + "...")
# 如果有分段信息,也保存
if "segments" in result:
json_file = video_file.with_suffix(".json")
import json
with open(json_file, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print(f"详细分段信息保存到: {json_file.name}")