Initial commit: skills library
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Example script - delete if not needed."""
|
||||
|
||||
print("Hello from skill!")
|
||||
@@ -0,0 +1,199 @@
|
||||
"""
|
||||
视频转录与简介生成脚本
|
||||
1. 使用 faster-whisper 转录视频
|
||||
2. 读取转录字幕,自动生成4平台简介
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
# 环境路径
|
||||
WHISPER_MODEL_PATH = "large-v3" # 使用在线模型
|
||||
|
||||
|
||||
def extract_audio(video_path):
|
||||
"""从视频提取音频"""
|
||||
audio_path = video_path.with_suffix(".wav")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
str(video_path),
|
||||
"-vn",
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
str(audio_path),
|
||||
]
|
||||
subprocess.run(cmd, capture_output=True)
|
||||
return audio_path
|
||||
|
||||
|
||||
def format_timestamp(seconds):
|
||||
"""将秒数转换为SRT时间戳格式"""
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
secs = int(seconds % 60)
|
||||
millis = int((seconds % 1) * 1000)
|
||||
return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
|
||||
|
||||
|
||||
def read_srt(srt_path):
|
||||
"""读取SRT文件,返回纯文本列表"""
|
||||
content = srt_path.read_text(encoding="utf-8")
|
||||
blocks = content.strip().split("\n\n")
|
||||
|
||||
lines = []
|
||||
for block in blocks:
|
||||
parts = block.strip().split("\n")
|
||||
if len(parts) >= 3:
|
||||
lines.append(parts[2].strip())
|
||||
return lines
|
||||
|
||||
|
||||
def generate_desc_from_subtitle(subtitle_lines, video_title):
|
||||
"""根据字幕生成4平台简介"""
|
||||
|
||||
# 提取核心内容
|
||||
main_content = subtitle_lines[:10] if len(subtitle_lines) > 10 else subtitle_lines
|
||||
|
||||
# ===== 视频号(短标题≤16字 + 描述~20字)=====
|
||||
# 短标题:提取核心关键词,不超过16字符
|
||||
if len(video_title) <= 16:
|
||||
short_title = video_title
|
||||
else:
|
||||
# 尝试提取前几个关键字
|
||||
short_title = video_title[:14] + ".." if len(video_title) > 14 else video_title
|
||||
|
||||
video_desc = f"{video_title},记住这几点!"
|
||||
if len(video_desc) > 25:
|
||||
video_desc = video_title
|
||||
|
||||
# ===== 抖音(80-120字)=====
|
||||
douyin = f"{video_title},今天一次讲清楚!\n\n"
|
||||
for i, line in enumerate(main_content[:5], 1):
|
||||
douyin += f"{i}️⃣ {line}\n"
|
||||
douyin += "\n记住这几点,弹琴更轻松~"
|
||||
if len(douyin) > 150:
|
||||
douyin = f"{video_title},3个关键点教会你!\n\n"
|
||||
for i, line in enumerate(main_content[:3], 1):
|
||||
douyin += f"{i}️⃣ {line}\n"
|
||||
douyin += "\n快学起来!"
|
||||
|
||||
# ===== 小红书(100-600字)=====
|
||||
xhs_title = f"【{video_title}】初学者必看"
|
||||
xhs = xhs_title + "\n\n"
|
||||
for i, line in enumerate(main_content, 1):
|
||||
xhs += f"▫️{line}\n"
|
||||
xhs += "\n新手学琴一定要记住这几点!"
|
||||
|
||||
# ===== B站(~100字)=====
|
||||
bz = f"【钢琴教学】{video_title}\n\n"
|
||||
bz += "【核心内容】\n"
|
||||
for i, line in enumerate(main_content[:3], 1):
|
||||
bz += f"{i}. {line}\n"
|
||||
bz += "\n适合零基础初学者学习参考~\n\n觉得有帮助请一键三连!"
|
||||
|
||||
# 标签
|
||||
tags = "#钢琴教学 #成人学琴 #零基础学钢琴"
|
||||
tags_xhs = "#钢琴教学 #成人学琴 #零基础学钢琴 #钢琴入门 #学琴日记"
|
||||
|
||||
# 组装输出
|
||||
output = f"""【标题】{video_title}
|
||||
|
||||
=== 视频号 ===
|
||||
【短标题】{short_title}
|
||||
|
||||
{video_desc}
|
||||
{tags}
|
||||
|
||||
=== 抖音 ===
|
||||
{douyin}
|
||||
{tags}
|
||||
|
||||
=== 小红书 ===
|
||||
{xhs}
|
||||
{tags_xhs}
|
||||
|
||||
=== B站 ===
|
||||
{bz}
|
||||
{tags}
|
||||
"""
|
||||
return output
|
||||
|
||||
|
||||
def transcribe_and_generate(video_dir):
|
||||
"""转录并生成简介"""
|
||||
video_dir = Path(video_dir)
|
||||
|
||||
# 查找视频文件
|
||||
video_files = list(video_dir.glob("*.mp4"))
|
||||
if not video_files:
|
||||
print(f"未找到视频文件: {video_dir}")
|
||||
return
|
||||
|
||||
video_path = video_files[0]
|
||||
video_title = video_path.stem
|
||||
print(f"处理视频: {video_title}")
|
||||
|
||||
# 1. 转录
|
||||
print("\n1. 转录视频...")
|
||||
audio_path = extract_audio(video_path)
|
||||
|
||||
import torch
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
print(" 加载模型...")
|
||||
model = WhisperModel(WHISPER_MODEL_PATH, device="cuda", compute_type="float16")
|
||||
|
||||
print(" 转录中...")
|
||||
segments, info = model.transcribe(str(audio_path), language="zh", beam_size=5)
|
||||
all_segments = list(segments)
|
||||
|
||||
# 保存SRT
|
||||
srt_content = []
|
||||
for i, segment in enumerate(all_segments, 1):
|
||||
start = format_timestamp(segment.start)
|
||||
end = format_timestamp(segment.end)
|
||||
text = segment.text.strip()
|
||||
if text:
|
||||
srt_content.append(f"{i}\n{start} --> {end}\n{text}\n")
|
||||
|
||||
srt_path = video_path.with_name(video_title + "_transcribed.srt")
|
||||
srt_path.write_text("".join(srt_content), encoding="utf-8")
|
||||
print(f" SRT保存到: {srt_path.name}")
|
||||
|
||||
# 清理
|
||||
del model
|
||||
torch.cuda.empty_cache()
|
||||
audio_path.unlink()
|
||||
|
||||
# 2. 生成简介
|
||||
print("\n2. 生成简介...")
|
||||
subtitle_lines = read_srt(srt_path)
|
||||
desc_content = generate_desc_from_subtitle(subtitle_lines, video_title)
|
||||
|
||||
desc_path = video_path.with_name(video_title + "_简介.txt")
|
||||
desc_path.write_text(desc_content, encoding="utf-8")
|
||||
print(f" 简介保存到: {desc_path.name}")
|
||||
|
||||
print(f"\n完成!")
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("用法: python transcribe_and_merge.py <视频文件夹路径>")
|
||||
sys.exit(1)
|
||||
|
||||
video_dir = sys.argv[1]
|
||||
transcribe_and_generate(video_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user