import os import subprocess import json import re from pathlib import Path # 视频文件路径 video_path = Path( r'D:\F\NewI\opencode\daily-workspace\temp\一个很"脏"的方法,让你喜欢的女人强行爱上你!.mp4' ) print(f"处理视频: {video_path.name}") print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB") # 1. 提取音频 audio_path = video_path.with_suffix(".wav") print(f"\n1. 提取音频到: {audio_path.name}") ffmpeg_cmd = [ "ffmpeg", "-y", "-i", str(video_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(audio_path), ] print(f"运行命令: {' '.join(ffmpeg_cmd[:4])}...") result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True) if result.returncode != 0: print(f"提取音频失败: {result.stderr}") exit(1) print("✅ 音频提取完成") # 2. 检查是否安装了funasr print("\n2. 检查FunASR安装...") try: import funasr print("✅ FunASR已安装") except ImportError: print("❌ FunASR未安装,正在安装...") subprocess.run(["pip", "install", "funasr", "modelscope"], capture_output=True) print("✅ FunASR安装完成") # 3. 转录音频 print("\n3. 开始转录...") try: from funasr import AutoModel # 加载模型 print("加载Paraformer模型...") model = AutoModel( model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc", disable_update=True, ) # 转录 print("转录中...") result = model.generate( input=str(audio_path), batch_size_s=300, timestamp_granularity="sentence" ) # 保存结果 output_path = video_path.with_suffix(".json") with open(output_path, "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=2) print(f"✅ 转录完成,保存到: {output_path.name}") # 显示摘要 print("\n转录摘要:") if isinstance(result, list) and len(result) > 0: full_text = "" for item in result: if "text" in item: full_text += item["text"] print(f"总字符数: {len(full_text)}") print(f"句子数: {len(result)}") print("\n前3句:") for i, item in enumerate(result[:3]): if "text" in item: print(f" {i + 1}. {item['text'][:100]}...") except Exception as e: print(f"❌ 转录失败: {e}") import traceback traceback.print_exc() # 4. 清理临时文件 print("\n4. 清理临时文件...") if audio_path.exists(): audio_path.unlink() print("✅ 临时音频文件已删除") print("\n✅ 处理完成!")