Initial commit: skills library
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
This commit is contained in:
@@ -0,0 +1,155 @@
|
||||
"""
|
||||
快速转录方案:如果本地Whisper太慢,尝试其他方法
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
|
||||
def extract_audio_from_video(video_path):
|
||||
"""从视频提取音频"""
|
||||
audio_path = video_path.with_suffix(".wav")
|
||||
|
||||
print(f"提取音频: {video_path.name} → {audio_path.name}")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
str(video_path),
|
||||
"-vn",
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
str(audio_path),
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
print(f"音频提取失败: {result.stderr}")
|
||||
return None
|
||||
|
||||
print(f"✅ 音频提取完成: {audio_path.stat().st_size / 1024 / 1024:.2f} MB")
|
||||
return audio_path
|
||||
|
||||
|
||||
def check_whisper_availability():
|
||||
"""检查Whisper是否可用"""
|
||||
try:
|
||||
import whisper
|
||||
|
||||
print("✅ Whisper已安装")
|
||||
return True
|
||||
except ImportError:
|
||||
print("❌ Whisper未安装")
|
||||
return False
|
||||
|
||||
|
||||
def transcribe_with_whisper(audio_path):
|
||||
"""使用Whisper转录"""
|
||||
try:
|
||||
import whisper
|
||||
|
||||
print("加载Whisper模型...")
|
||||
# 使用最小的模型加快速度
|
||||
model = whisper.load_model("tiny")
|
||||
|
||||
print("开始转录...")
|
||||
result = model.transcribe(str(audio_path), language="zh")
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"Whisper转录失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def save_transcription(result, video_path):
|
||||
"""保存转录结果"""
|
||||
if not result:
|
||||
return False
|
||||
|
||||
# 保存文本
|
||||
txt_path = video_path.with_suffix(".txt")
|
||||
with open(txt_path, "w", encoding="utf-8") as f:
|
||||
f.write(result["text"])
|
||||
|
||||
print(f"✅ 转录文本保存到: {txt_path.name}")
|
||||
print(f"文本长度: {len(result['text'])} 字符")
|
||||
|
||||
# 保存完整结果(JSON)
|
||||
json_path = video_path.with_suffix(".json")
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"✅ 完整结果保存到: {json_path.name}")
|
||||
|
||||
# 预览
|
||||
print("\n=== 转录预览(前500字符)===")
|
||||
preview = result["text"][:500]
|
||||
print(preview + "..." if len(result["text"]) > 500 else preview)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
print("=== 视频转录程序 ===")
|
||||
|
||||
# 查找视频文件
|
||||
video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp")
|
||||
video_files = list(video_dir.glob("*.mp4"))
|
||||
|
||||
if not video_files:
|
||||
print("❌ 未找到mp4文件")
|
||||
return
|
||||
|
||||
video_path = video_files[0]
|
||||
print(f"处理视频: {video_path.name}")
|
||||
print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB")
|
||||
|
||||
# 检查是否已有转录文件
|
||||
txt_path = video_path.with_suffix(".txt")
|
||||
if txt_path.exists():
|
||||
print(f"✅ 已有转录文件: {txt_path.name}")
|
||||
with open(txt_path, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
print(f"文本长度: {len(text)} 字符")
|
||||
print("\n=== 现有转录预览 ===")
|
||||
print(text[:500] + "..." if len(text) > 500 else text)
|
||||
return
|
||||
|
||||
# 提取音频
|
||||
audio_path = extract_audio_from_video(video_path)
|
||||
if not audio_path:
|
||||
print("❌ 无法提取音频")
|
||||
return
|
||||
|
||||
# 检查Whisper
|
||||
if not check_whisper_availability():
|
||||
print("请安装Whisper: pip install openai-whisper")
|
||||
return
|
||||
|
||||
# 转录
|
||||
result = transcribe_with_whisper(audio_path)
|
||||
if not result:
|
||||
print("❌ 转录失败")
|
||||
return
|
||||
|
||||
# 保存结果
|
||||
if save_transcription(result, video_path):
|
||||
print("\n✅ 转录完成!")
|
||||
|
||||
# 清理临时音频文件
|
||||
if audio_path.exists():
|
||||
audio_path.unlink()
|
||||
print("临时音频文件已删除")
|
||||
else:
|
||||
print("❌ 保存转录结果失败")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user