04db423416
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
109 lines
2.6 KiB
Python
109 lines
2.6 KiB
Python
import os
|
|
import subprocess
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
# 视频文件路径
|
|
video_path = Path(
|
|
r'D:\F\NewI\opencode\daily-workspace\temp\一个很"脏"的方法,让你喜欢的女人强行爱上你!.mp4'
|
|
)
|
|
|
|
print(f"处理视频: {video_path.name}")
|
|
print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB")
|
|
|
|
# 1. 提取音频
|
|
audio_path = video_path.with_suffix(".wav")
|
|
print(f"\n1. 提取音频到: {audio_path.name}")
|
|
|
|
ffmpeg_cmd = [
|
|
"ffmpeg",
|
|
"-y",
|
|
"-i",
|
|
str(video_path),
|
|
"-vn",
|
|
"-acodec",
|
|
"pcm_s16le",
|
|
"-ar",
|
|
"16000",
|
|
"-ac",
|
|
"1",
|
|
str(audio_path),
|
|
]
|
|
|
|
print(f"运行命令: {' '.join(ffmpeg_cmd[:4])}...")
|
|
result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
|
|
|
|
if result.returncode != 0:
|
|
print(f"提取音频失败: {result.stderr}")
|
|
exit(1)
|
|
|
|
print("✅ 音频提取完成")
|
|
|
|
# 2. 检查是否安装了funasr
|
|
print("\n2. 检查FunASR安装...")
|
|
try:
|
|
import funasr
|
|
|
|
print("✅ FunASR已安装")
|
|
except ImportError:
|
|
print("❌ FunASR未安装,正在安装...")
|
|
subprocess.run(["pip", "install", "funasr", "modelscope"], capture_output=True)
|
|
print("✅ FunASR安装完成")
|
|
|
|
# 3. 转录音频
|
|
print("\n3. 开始转录...")
|
|
try:
|
|
from funasr import AutoModel
|
|
|
|
# 加载模型
|
|
print("加载Paraformer模型...")
|
|
model = AutoModel(
|
|
model="paraformer-zh",
|
|
vad_model="fsmn-vad",
|
|
punc_model="ct-punc",
|
|
disable_update=True,
|
|
)
|
|
|
|
# 转录
|
|
print("转录中...")
|
|
result = model.generate(
|
|
input=str(audio_path), batch_size_s=300, timestamp_granularity="sentence"
|
|
)
|
|
|
|
# 保存结果
|
|
output_path = video_path.with_suffix(".json")
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"✅ 转录完成,保存到: {output_path.name}")
|
|
|
|
# 显示摘要
|
|
print("\n转录摘要:")
|
|
if isinstance(result, list) and len(result) > 0:
|
|
full_text = ""
|
|
for item in result:
|
|
if "text" in item:
|
|
full_text += item["text"]
|
|
|
|
print(f"总字符数: {len(full_text)}")
|
|
print(f"句子数: {len(result)}")
|
|
print("\n前3句:")
|
|
for i, item in enumerate(result[:3]):
|
|
if "text" in item:
|
|
print(f" {i + 1}. {item['text'][:100]}...")
|
|
|
|
except Exception as e:
|
|
print(f"❌ 转录失败: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
|
|
# 4. 清理临时文件
|
|
print("\n4. 清理临时文件...")
|
|
if audio_path.exists():
|
|
audio_path.unlink()
|
|
print("✅ 临时音频文件已删除")
|
|
|
|
print("\n✅ 处理完成!")
|