Files
skills/video-desc-generator/scripts/transcribe_and_merge.py
T
hmo 04db423416 Initial commit: skills library
- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00

200 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
视频转录与简介生成脚本
1. 使用 faster-whisper 转录视频
2. 读取转录字幕,自动生成4平台简介
"""
import os
import sys
import subprocess
from pathlib import Path
# 环境路径
WHISPER_MODEL_PATH = "large-v3" # 使用在线模型
def extract_audio(video_path):
"""从视频提取音频"""
audio_path = video_path.with_suffix(".wav")
cmd = [
"ffmpeg",
"-y",
"-i",
str(video_path),
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
str(audio_path),
]
subprocess.run(cmd, capture_output=True)
return audio_path
def format_timestamp(seconds):
"""将秒数转换为SRT时间戳格式"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds % 1) * 1000)
return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
def read_srt(srt_path):
"""读取SRT文件,返回纯文本列表"""
content = srt_path.read_text(encoding="utf-8")
blocks = content.strip().split("\n\n")
lines = []
for block in blocks:
parts = block.strip().split("\n")
if len(parts) >= 3:
lines.append(parts[2].strip())
return lines
def generate_desc_from_subtitle(subtitle_lines, video_title):
"""根据字幕生成4平台简介"""
# 提取核心内容
main_content = subtitle_lines[:10] if len(subtitle_lines) > 10 else subtitle_lines
# ===== 视频号(短标题≤16字 + 描述~20字)=====
# 短标题:提取核心关键词,不超过16字符
if len(video_title) <= 16:
short_title = video_title
else:
# 尝试提取前几个关键字
short_title = video_title[:14] + ".." if len(video_title) > 14 else video_title
video_desc = f"{video_title},记住这几点!"
if len(video_desc) > 25:
video_desc = video_title
# ===== 抖音(80-120字)=====
douyin = f"{video_title},今天一次讲清楚!\n\n"
for i, line in enumerate(main_content[:5], 1):
douyin += f"{i}️⃣ {line}\n"
douyin += "\n记住这几点,弹琴更轻松~"
if len(douyin) > 150:
douyin = f"{video_title}3个关键点教会你!\n\n"
for i, line in enumerate(main_content[:3], 1):
douyin += f"{i}️⃣ {line}\n"
douyin += "\n快学起来!"
# ===== 小红书(100-600字)=====
xhs_title = f"{video_title}】初学者必看"
xhs = xhs_title + "\n\n"
for i, line in enumerate(main_content, 1):
xhs += f"▫️{line}\n"
xhs += "\n新手学琴一定要记住这几点!"
# ===== B站(~100字)=====
bz = f"【钢琴教学】{video_title}\n\n"
bz += "【核心内容】\n"
for i, line in enumerate(main_content[:3], 1):
bz += f"{i}. {line}\n"
bz += "\n适合零基础初学者学习参考~\n\n觉得有帮助请一键三连!"
# 标签
tags = "#钢琴教学 #成人学琴 #零基础学钢琴"
tags_xhs = "#钢琴教学 #成人学琴 #零基础学钢琴 #钢琴入门 #学琴日记"
# 组装输出
output = f"""【标题】{video_title}
=== 视频号 ===
【短标题】{short_title}
{video_desc}
{tags}
=== 抖音 ===
{douyin}
{tags}
=== 小红书 ===
{xhs}
{tags_xhs}
=== B站 ===
{bz}
{tags}
"""
return output
def transcribe_and_generate(video_dir):
"""转录并生成简介"""
video_dir = Path(video_dir)
# 查找视频文件
video_files = list(video_dir.glob("*.mp4"))
if not video_files:
print(f"未找到视频文件: {video_dir}")
return
video_path = video_files[0]
video_title = video_path.stem
print(f"处理视频: {video_title}")
# 1. 转录
print("\n1. 转录视频...")
audio_path = extract_audio(video_path)
import torch
from faster_whisper import WhisperModel
print(" 加载模型...")
model = WhisperModel(WHISPER_MODEL_PATH, device="cuda", compute_type="float16")
print(" 转录中...")
segments, info = model.transcribe(str(audio_path), language="zh", beam_size=5)
all_segments = list(segments)
# 保存SRT
srt_content = []
for i, segment in enumerate(all_segments, 1):
start = format_timestamp(segment.start)
end = format_timestamp(segment.end)
text = segment.text.strip()
if text:
srt_content.append(f"{i}\n{start} --> {end}\n{text}\n")
srt_path = video_path.with_name(video_title + "_transcribed.srt")
srt_path.write_text("".join(srt_content), encoding="utf-8")
print(f" SRT保存到: {srt_path.name}")
# 清理
del model
torch.cuda.empty_cache()
audio_path.unlink()
# 2. 生成简介
print("\n2. 生成简介...")
subtitle_lines = read_srt(srt_path)
desc_content = generate_desc_from_subtitle(subtitle_lines, video_title)
desc_path = video_path.with_name(video_title + "_简介.txt")
desc_path.write_text(desc_content, encoding="utf-8")
print(f" 简介保存到: {desc_path.name}")
print(f"\n完成!")
def main():
if len(sys.argv) < 2:
print("用法: python transcribe_and_merge.py <视频文件夹路径>")
sys.exit(1)
video_dir = sys.argv[1]
transcribe_and_generate(video_dir)
if __name__ == "__main__":
main()