Files
hmo 04db423416 Initial commit: skills library
- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00

109 lines
2.6 KiB
Python

import os
import subprocess
import json
import re
from pathlib import Path
# 视频文件路径
video_path = Path(
r'D:\F\NewI\opencode\daily-workspace\temp\一个很""的方法,让你喜欢的女人强行爱上你!.mp4'
)
print(f"处理视频: {video_path.name}")
print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB")
# 1. 提取音频
audio_path = video_path.with_suffix(".wav")
print(f"\n1. 提取音频到: {audio_path.name}")
ffmpeg_cmd = [
"ffmpeg",
"-y",
"-i",
str(video_path),
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
str(audio_path),
]
print(f"运行命令: {' '.join(ffmpeg_cmd[:4])}...")
result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"提取音频失败: {result.stderr}")
exit(1)
print("✅ 音频提取完成")
# 2. 检查是否安装了funasr
print("\n2. 检查FunASR安装...")
try:
import funasr
print("✅ FunASR已安装")
except ImportError:
print("❌ FunASR未安装,正在安装...")
subprocess.run(["pip", "install", "funasr", "modelscope"], capture_output=True)
print("✅ FunASR安装完成")
# 3. 转录音频
print("\n3. 开始转录...")
try:
from funasr import AutoModel
# 加载模型
print("加载Paraformer模型...")
model = AutoModel(
model="paraformer-zh",
vad_model="fsmn-vad",
punc_model="ct-punc",
disable_update=True,
)
# 转录
print("转录中...")
result = model.generate(
input=str(audio_path), batch_size_s=300, timestamp_granularity="sentence"
)
# 保存结果
output_path = video_path.with_suffix(".json")
with open(output_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print(f"✅ 转录完成,保存到: {output_path.name}")
# 显示摘要
print("\n转录摘要:")
if isinstance(result, list) and len(result) > 0:
full_text = ""
for item in result:
if "text" in item:
full_text += item["text"]
print(f"总字符数: {len(full_text)}")
print(f"句子数: {len(result)}")
print("\n前3句:")
for i, item in enumerate(result[:3]):
if "text" in item:
print(f" {i + 1}. {item['text'][:100]}...")
except Exception as e:
print(f"❌ 转录失败: {e}")
import traceback
traceback.print_exc()
# 4. 清理临时文件
print("\n4. 清理临时文件...")
if audio_path.exists():
audio_path.unlink()
print("✅ 临时音频文件已删除")
print("\n✅ 处理完成!")