Initial commit: skills library

- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00
commit 04db423416
861 changed files with 210414 additions and 0 deletions
@@ -0,0 +1,155 @@
+"""
+快速转录方案：如果本地Whisper太慢，尝试其他方法
+"""
+
+import os
+import subprocess
+from pathlib import Path
+import json
+
+
+def extract_audio_from_video(video_path):
+    """从视频提取音频"""
+    audio_path = video_path.with_suffix(".wav")
+
+    print(f"提取音频: {video_path.name} → {audio_path.name}")
+
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        str(video_path),
+        "-vn",
+        "-acodec",
+        "pcm_s16le",
+        "-ar",
+        "16000",
+        "-ac",
+        "1",
+        str(audio_path),
+    ]
+
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        print(f"音频提取失败: {result.stderr}")
+        return None
+
+    print(f"✅ 音频提取完成: {audio_path.stat().st_size / 1024 / 1024:.2f} MB")
+    return audio_path
+
+
+def check_whisper_availability():
+    """检查Whisper是否可用"""
+    try:
+        import whisper
+
+        print("✅ Whisper已安装")
+        return True
+    except ImportError:
+        print("❌ Whisper未安装")
+        return False
+
+
+def transcribe_with_whisper(audio_path):
+    """使用Whisper转录"""
+    try:
+        import whisper
+
+        print("加载Whisper模型...")
+        # 使用最小的模型加快速度
+        model = whisper.load_model("tiny")
+
+        print("开始转录...")
+        result = model.transcribe(str(audio_path), language="zh")
+
+        return result
+    except Exception as e:
+        print(f"Whisper转录失败: {e}")
+        return None
+
+
+def save_transcription(result, video_path):
+    """保存转录结果"""
+    if not result:
+        return False
+
+    # 保存文本
+    txt_path = video_path.with_suffix(".txt")
+    with open(txt_path, "w", encoding="utf-8") as f:
+        f.write(result["text"])
+
+    print(f"✅ 转录文本保存到: {txt_path.name}")
+    print(f"文本长度: {len(result['text'])} 字符")
+
+    # 保存完整结果（JSON）
+    json_path = video_path.with_suffix(".json")
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(result, f, ensure_ascii=False, indent=2)
+
+    print(f"✅ 完整结果保存到: {json_path.name}")
+
+    # 预览
+    print("\n=== 转录预览（前500字符）===")
+    preview = result["text"][:500]
+    print(preview + "..." if len(result["text"]) > 500 else preview)
+
+    return True
+
+
+def main():
+    print("=== 视频转录程序 ===")
+
+    # 查找视频文件
+    video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp")
+    video_files = list(video_dir.glob("*.mp4"))
+
+    if not video_files:
+        print("❌ 未找到mp4文件")
+        return
+
+    video_path = video_files[0]
+    print(f"处理视频: {video_path.name}")
+    print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB")
+
+    # 检查是否已有转录文件
+    txt_path = video_path.with_suffix(".txt")
+    if txt_path.exists():
+        print(f"✅ 已有转录文件: {txt_path.name}")
+        with open(txt_path, "r", encoding="utf-8") as f:
+            text = f.read()
+        print(f"文本长度: {len(text)} 字符")
+        print("\n=== 现有转录预览 ===")
+        print(text[:500] + "..." if len(text) > 500 else text)
+        return
+
+    # 提取音频
+    audio_path = extract_audio_from_video(video_path)
+    if not audio_path:
+        print("❌ 无法提取音频")
+        return
+
+    # 检查Whisper
+    if not check_whisper_availability():
+        print("请安装Whisper: pip install openai-whisper")
+        return
+
+    # 转录
+    result = transcribe_with_whisper(audio_path)
+    if not result:
+        print("❌ 转录失败")
+        return
+
+    # 保存结果
+    if save_transcription(result, video_path):
+        print("\n✅ 转录完成！")
+
+        # 清理临时音频文件
+        if audio_path.exists():
+            audio_path.unlink()
+            print("临时音频文件已删除")
+    else:
+        print("❌ 保存转录结果失败")
+
+
+if __name__ == "__main__":
+    main()