Initial commit: skills library

- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00
commit 04db423416
861 changed files with 210414 additions and 0 deletions
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+"""Example script - delete if not needed."""
+
+print("Hello from skill!")
@@ -0,0 +1,199 @@
+"""
+视频转录与简介生成脚本
+1. 使用 faster-whisper 转录视频
+2. 读取转录字幕，自动生成4平台简介
+"""
+
+import os
+import sys
+import subprocess
+from pathlib import Path
+
+# 环境路径
+WHISPER_MODEL_PATH = "large-v3"  # 使用在线模型
+
+
+def extract_audio(video_path):
+    """从视频提取音频"""
+    audio_path = video_path.with_suffix(".wav")
+
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        str(video_path),
+        "-vn",
+        "-acodec",
+        "pcm_s16le",
+        "-ar",
+        "16000",
+        "-ac",
+        "1",
+        str(audio_path),
+    ]
+    subprocess.run(cmd, capture_output=True)
+    return audio_path
+
+
+def format_timestamp(seconds):
+    """将秒数转换为SRT时间戳格式"""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    millis = int((seconds % 1) * 1000)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
+
+
+def read_srt(srt_path):
+    """读取SRT文件，返回纯文本列表"""
+    content = srt_path.read_text(encoding="utf-8")
+    blocks = content.strip().split("\n\n")
+
+    lines = []
+    for block in blocks:
+        parts = block.strip().split("\n")
+        if len(parts) >= 3:
+            lines.append(parts[2].strip())
+    return lines
+
+
+def generate_desc_from_subtitle(subtitle_lines, video_title):
+    """根据字幕生成4平台简介"""
+
+    # 提取核心内容
+    main_content = subtitle_lines[:10] if len(subtitle_lines) > 10 else subtitle_lines
+
+    # ===== 视频号（短标题≤16字 + 描述~20字）=====
+    # 短标题：提取核心关键词，不超过16字符
+    if len(video_title) <= 16:
+        short_title = video_title
+    else:
+        # 尝试提取前几个关键字
+        short_title = video_title[:14] + ".." if len(video_title) > 14 else video_title
+
+    video_desc = f"{video_title}，记住这几点！"
+    if len(video_desc) > 25:
+        video_desc = video_title
+
+    # ===== 抖音（80-120字）=====
+    douyin = f"{video_title}，今天一次讲清楚！\n\n"
+    for i, line in enumerate(main_content[:5], 1):
+        douyin += f"{i}️⃣ {line}\n"
+    douyin += "\n记住这几点，弹琴更轻松～"
+    if len(douyin) > 150:
+        douyin = f"{video_title}，3个关键点教会你！\n\n"
+        for i, line in enumerate(main_content[:3], 1):
+            douyin += f"{i}️⃣ {line}\n"
+        douyin += "\n快学起来！"
+
+    # ===== 小红书（100-600字）=====
+    xhs_title = f"【{video_title}】初学者必看"
+    xhs = xhs_title + "\n\n"
+    for i, line in enumerate(main_content, 1):
+        xhs += f"▫️{line}\n"
+    xhs += "\n新手学琴一定要记住这几点！"
+
+    # ===== B站（~100字）=====
+    bz = f"【钢琴教学】{video_title}\n\n"
+    bz += "【核心内容】\n"
+    for i, line in enumerate(main_content[:3], 1):
+        bz += f"{i}. {line}\n"
+    bz += "\n适合零基础初学者学习参考～\n\n觉得有帮助请一键三连！"
+
+    # 标签
+    tags = "#钢琴教学 #成人学琴 #零基础学钢琴"
+    tags_xhs = "#钢琴教学 #成人学琴 #零基础学钢琴 #钢琴入门 #学琴日记"
+
+    # 组装输出
+    output = f"""【标题】{video_title}
+
+=== 视频号 ===
+【短标题】{short_title}
+
+{video_desc}
+{tags}
+
+=== 抖音 ===
+{douyin}
+{tags}
+
+=== 小红书 ===
+{xhs}
+{tags_xhs}
+
+=== B站 ===
+{bz}
+{tags}
+"""
+    return output
+
+
+def transcribe_and_generate(video_dir):
+    """转录并生成简介"""
+    video_dir = Path(video_dir)
+
+    # 查找视频文件
+    video_files = list(video_dir.glob("*.mp4"))
+    if not video_files:
+        print(f"未找到视频文件: {video_dir}")
+        return
+
+    video_path = video_files[0]
+    video_title = video_path.stem
+    print(f"处理视频: {video_title}")
+
+    # 1. 转录
+    print("\n1. 转录视频...")
+    audio_path = extract_audio(video_path)
+
+    import torch
+    from faster_whisper import WhisperModel
+
+    print("  加载模型...")
+    model = WhisperModel(WHISPER_MODEL_PATH, device="cuda", compute_type="float16")
+
+    print("  转录中...")
+    segments, info = model.transcribe(str(audio_path), language="zh", beam_size=5)
+    all_segments = list(segments)
+
+    # 保存SRT
+    srt_content = []
+    for i, segment in enumerate(all_segments, 1):
+        start = format_timestamp(segment.start)
+        end = format_timestamp(segment.end)
+        text = segment.text.strip()
+        if text:
+            srt_content.append(f"{i}\n{start} --> {end}\n{text}\n")
+
+    srt_path = video_path.with_name(video_title + "_transcribed.srt")
+    srt_path.write_text("".join(srt_content), encoding="utf-8")
+    print(f"  SRT保存到: {srt_path.name}")
+
+    # 清理
+    del model
+    torch.cuda.empty_cache()
+    audio_path.unlink()
+
+    # 2. 生成简介
+    print("\n2. 生成简介...")
+    subtitle_lines = read_srt(srt_path)
+    desc_content = generate_desc_from_subtitle(subtitle_lines, video_title)
+
+    desc_path = video_path.with_name(video_title + "_简介.txt")
+    desc_path.write_text(desc_content, encoding="utf-8")
+    print(f"  简介保存到: {desc_path.name}")
+
+    print(f"\n完成！")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("用法: python transcribe_and_merge.py <视频文件夹路径>")
+        sys.exit(1)
+
+    video_dir = sys.argv[1]
+    transcribe_and_generate(video_dir)
+
+
+if __name__ == "__main__":
+    main()