Initial commit: skills library
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
import os
|
||||
import subprocess
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
# 视频文件路径
|
||||
video_path = Path(
|
||||
r'D:\F\NewI\opencode\daily-workspace\temp\一个很"脏"的方法,让你喜欢的女人强行爱上你!.mp4'
|
||||
)
|
||||
|
||||
print(f"处理视频: {video_path.name}")
|
||||
print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB")
|
||||
|
||||
# 1. 提取音频
|
||||
audio_path = video_path.with_suffix(".wav")
|
||||
print(f"\n1. 提取音频到: {audio_path.name}")
|
||||
|
||||
ffmpeg_cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
str(video_path),
|
||||
"-vn",
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
str(audio_path),
|
||||
]
|
||||
|
||||
print(f"运行命令: {' '.join(ffmpeg_cmd[:4])}...")
|
||||
result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f"提取音频失败: {result.stderr}")
|
||||
exit(1)
|
||||
|
||||
print("✅ 音频提取完成")
|
||||
|
||||
# 2. 检查是否安装了funasr
|
||||
print("\n2. 检查FunASR安装...")
|
||||
try:
|
||||
import funasr
|
||||
|
||||
print("✅ FunASR已安装")
|
||||
except ImportError:
|
||||
print("❌ FunASR未安装,正在安装...")
|
||||
subprocess.run(["pip", "install", "funasr", "modelscope"], capture_output=True)
|
||||
print("✅ FunASR安装完成")
|
||||
|
||||
# 3. 转录音频
|
||||
print("\n3. 开始转录...")
|
||||
try:
|
||||
from funasr import AutoModel
|
||||
|
||||
# 加载模型
|
||||
print("加载Paraformer模型...")
|
||||
model = AutoModel(
|
||||
model="paraformer-zh",
|
||||
vad_model="fsmn-vad",
|
||||
punc_model="ct-punc",
|
||||
disable_update=True,
|
||||
)
|
||||
|
||||
# 转录
|
||||
print("转录中...")
|
||||
result = model.generate(
|
||||
input=str(audio_path), batch_size_s=300, timestamp_granularity="sentence"
|
||||
)
|
||||
|
||||
# 保存结果
|
||||
output_path = video_path.with_suffix(".json")
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"✅ 转录完成,保存到: {output_path.name}")
|
||||
|
||||
# 显示摘要
|
||||
print("\n转录摘要:")
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
full_text = ""
|
||||
for item in result:
|
||||
if "text" in item:
|
||||
full_text += item["text"]
|
||||
|
||||
print(f"总字符数: {len(full_text)}")
|
||||
print(f"句子数: {len(result)}")
|
||||
print("\n前3句:")
|
||||
for i, item in enumerate(result[:3]):
|
||||
if "text" in item:
|
||||
print(f" {i + 1}. {item['text'][:100]}...")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 转录失败: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
# 4. 清理临时文件
|
||||
print("\n4. 清理临时文件...")
|
||||
if audio_path.exists():
|
||||
audio_path.unlink()
|
||||
print("✅ 临时音频文件已删除")
|
||||
|
||||
print("\n✅ 处理完成!")
|
||||
Reference in New Issue
Block a user