skills/video-analysis/scripts/simple_transcribe.py

import whisper
import os
from pathlib import Path

# 视频文件路径
video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp")
video_files = list(video_dir.glob("*.mp4"))

if not video_files:
    print("未找到mp4文件")
    exit(1)

video_file = video_files[0]
print(f"找到视频文件: {video_file.name}")
print(f"文件大小: {video_file.stat().st_size / 1024 / 1024:.2f} MB")

# 使用Whisper转录
print("\n加载Whisper模型...")
model = whisper.load_model("base")  # 使用base模型，速度较快

print("开始转录...")
result = model.transcribe(str(video_file), language="zh")

# 保存结果
output_file = video_file.with_suffix(".txt")
with open(output_file, "w", encoding="utf-8") as f:
    f.write(result["text"])

print(f"\n✅ 转录完成！保存到: {output_file.name}")
print(f"转录文本长度: {len(result['text'])} 字符")

# 显示前500字符预览
print("\n=== 转录预览（前500字符）===")
print(result["text"][:500] + "...")

# 如果有分段信息，也保存
if "segments" in result:
    json_file = video_file.with_suffix(".json")
    import json

    with open(json_file, "w", encoding="utf-8") as f:
        json.dump(result, f, ensure_ascii=False, indent=2)
    print(f"详细分段信息保存到: {json_file.name}")