Files
skills/video-analysis/scripts/quick_transcribe.py
T
hmo 04db423416 Initial commit: skills library
- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00

156 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
快速转录方案:如果本地Whisper太慢,尝试其他方法
"""
import os
import subprocess
from pathlib import Path
import json
def extract_audio_from_video(video_path):
"""从视频提取音频"""
audio_path = video_path.with_suffix(".wav")
print(f"提取音频: {video_path.name}{audio_path.name}")
cmd = [
"ffmpeg",
"-y",
"-i",
str(video_path),
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
str(audio_path),
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"音频提取失败: {result.stderr}")
return None
print(f"✅ 音频提取完成: {audio_path.stat().st_size / 1024 / 1024:.2f} MB")
return audio_path
def check_whisper_availability():
"""检查Whisper是否可用"""
try:
import whisper
print("✅ Whisper已安装")
return True
except ImportError:
print("❌ Whisper未安装")
return False
def transcribe_with_whisper(audio_path):
"""使用Whisper转录"""
try:
import whisper
print("加载Whisper模型...")
# 使用最小的模型加快速度
model = whisper.load_model("tiny")
print("开始转录...")
result = model.transcribe(str(audio_path), language="zh")
return result
except Exception as e:
print(f"Whisper转录失败: {e}")
return None
def save_transcription(result, video_path):
"""保存转录结果"""
if not result:
return False
# 保存文本
txt_path = video_path.with_suffix(".txt")
with open(txt_path, "w", encoding="utf-8") as f:
f.write(result["text"])
print(f"✅ 转录文本保存到: {txt_path.name}")
print(f"文本长度: {len(result['text'])} 字符")
# 保存完整结果(JSON
json_path = video_path.with_suffix(".json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print(f"✅ 完整结果保存到: {json_path.name}")
# 预览
print("\n=== 转录预览(前500字符)===")
preview = result["text"][:500]
print(preview + "..." if len(result["text"]) > 500 else preview)
return True
def main():
print("=== 视频转录程序 ===")
# 查找视频文件
video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp")
video_files = list(video_dir.glob("*.mp4"))
if not video_files:
print("❌ 未找到mp4文件")
return
video_path = video_files[0]
print(f"处理视频: {video_path.name}")
print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB")
# 检查是否已有转录文件
txt_path = video_path.with_suffix(".txt")
if txt_path.exists():
print(f"✅ 已有转录文件: {txt_path.name}")
with open(txt_path, "r", encoding="utf-8") as f:
text = f.read()
print(f"文本长度: {len(text)} 字符")
print("\n=== 现有转录预览 ===")
print(text[:500] + "..." if len(text) > 500 else text)
return
# 提取音频
audio_path = extract_audio_from_video(video_path)
if not audio_path:
print("❌ 无法提取音频")
return
# 检查Whisper
if not check_whisper_availability():
print("请安装Whisper: pip install openai-whisper")
return
# 转录
result = transcribe_with_whisper(audio_path)
if not result:
print("❌ 转录失败")
return
# 保存结果
if save_transcription(result, video_path):
print("\n✅ 转录完成!")
# 清理临时音频文件
if audio_path.exists():
audio_path.unlink()
print("临时音频文件已删除")
else:
print("❌ 保存转录结果失败")
if __name__ == "__main__":
main()