Files
hmo 04db423416 Initial commit: skills library
- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00

264 lines
7.7 KiB
Python

#!/usr/bin/env python3
"""
Audio Generator Skill
文本转音频生成工具
功能:
- 支持Markdown和纯文本两种格式
- 自动分割长文本为章节
- 使用edge-tts生成高质量中文语音
- 支持批量生成和增量更新
依赖:
- pip install edge-tts
使用:
python scripts/generate_audio.py <input_file> [options]
示例:
# 生成纯文本音频
python scripts/generate_audio.py text.txt --format plain --output-dir ./audio
# 生成Markdown音频(按标题分割)
python scripts/generate_audio.py doc.md --format markdown --output-dir ./audio
# 只生成特定章节
python scripts/generate_audio.py text.txt --chapters "第一章,第二章"
"""
import sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
import asyncio
import argparse
import edge_tts
import os
from pathlib import Path
# 默认配置
DEFAULT_VOICE = "zh-CN-XiaoxiaoNeural" # 晓晓,适合长文朗读
DEFAULT_FORMAT = "plain"
DEFAULT_OUTPUT_DIR = "./audio_output"
MIN_SECTION_LENGTH = 200 # 最小章节长度(字符)
def clean_filename(title: str, max_length: int = 30) -> str:
"""清理文件名,移除非法字符"""
illegal_chars = ["/", "\\", ":", "?", '"', "<", ">", "|", "*", "#"]
clean = title
for char in illegal_chars:
clean = clean.replace(char, "-")
return clean[:max_length].strip()
def split_by_markdown(content: str) -> list:
"""按Markdown标题分割文本"""
sections = []
current_section = []
current_title = "未命名章节"
for line in content.split("\n"):
if line.startswith("##") and len(line) > 2:
if current_section:
sections.append((current_title, "\n".join(current_section)))
current_title = line.replace("#", "").strip()
current_section = [line]
else:
current_section.append(line)
if current_section:
sections.append((current_title, "\n".join(current_section)))
return sections
def split_by_separator(content: str, separator: str = "===") -> list:
"""按分隔符和章节标记分割文本"""
sections = []
current_section = []
current_title = "开场"
for line in content.split("\n"):
if line.startswith(separator) and len(line) > 10:
if current_section:
sections.append((current_title, "\n".join(current_section)))
current_section = []
elif line.startswith("") and line.endswith(""):
if current_section and len("\n".join(current_section)) > 100:
sections.append((current_title, "\n".join(current_section)))
current_section = []
current_title = line.replace("", "").replace("", "").strip()
else:
current_section.append(line)
if current_section:
sections.append((current_title, "\n".join(current_section)))
return sections
def split_plain_text(content: str, chunk_size: int = 2000) -> list:
"""将纯文本按大小分割"""
lines = content.split("\n")
sections = []
current_chunk = []
current_size = 0
chunk_num = 1
for line in lines:
current_chunk.append(line)
current_size += len(line)
if current_size >= chunk_size:
sections.append((f"{chunk_num}部分", "\n".join(current_chunk)))
current_chunk = []
current_size = 0
chunk_num += 1
if current_chunk:
sections.append((f"{chunk_num}部分", "\n".join(current_chunk)))
return sections
async def generate_audio_file(text: str, output_path: str, voice: str) -> bool:
"""生成单个音频文件"""
try:
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output_path)
return True
except Exception as e:
print(f" 生成失败: {str(e)[:80]}")
return False
async def generate_audio_files(
sections: list, output_dir: str, voice: str, filter_chapters=None
) -> tuple:
"""批量生成音频文件"""
os.makedirs(output_dir, exist_ok=True)
generated = 0
total_chars = 0
for i, (title, content) in enumerate(sections):
content_len = len(content.strip())
# 跳过太短的章节
if content_len < MIN_SECTION_LENGTH:
continue
# 如果指定了章节过滤
if filter_chapters and title not in filter_chapters:
continue
clean_title = clean_filename(title)
output_file = os.path.join(output_dir, f"{i + 1:02d}_{clean_title}.mp3")
print(f"[{i + 1}/{len(sections)}] {title[:40]}...")
print(f" 字数: {content_len}")
success = await generate_audio_file(content, output_file, voice)
if success:
generated += 1
total_chars += content_len
file_size = os.path.getsize(output_file) / 1024 / 1024
print(f" ✓ 完成 [{file_size:.1f}MB]")
else:
print(f" ✗ 失败")
print()
return generated, total_chars
def main():
parser = argparse.ArgumentParser(
description="文本转音频生成工具",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
%(prog)s input.txt # 默认生成纯文本音频
%(prog)s doc.md --format markdown # Markdown格式
%(prog)s text.txt --voice zh-CN-YunxiNeural # 使用男声
%(prog)s text.txt --output-dir ./my_audio # 指定输出目录
""",
)
parser.add_argument("input_file", help="输入文本文件路径")
parser.add_argument(
"--format",
choices=["plain", "markdown", "separator"],
default=DEFAULT_FORMAT,
help="文本格式 (默认: plain)",
)
parser.add_argument(
"--output-dir",
default=DEFAULT_OUTPUT_DIR,
help=f"输出目录 (默认: {DEFAULT_OUTPUT_DIR})",
)
parser.add_argument(
"--voice", default=DEFAULT_VOICE, help=f"语音模型 (默认: {DEFAULT_VOICE})"
)
parser.add_argument("--chapters", help="只生成指定章节,用逗号分隔")
args = parser.parse_args()
# 检查输入文件
if not os.path.exists(args.input_file):
print(f"错误: 找不到文件 {args.input_file}")
sys.exit(1)
# 读取文件
print(f"正在读取: {args.input_file}")
with open(args.input_file, "r", encoding="utf-8") as f:
content = f.read()
print(f"文件大小: {len(content)} 字符")
print()
# 分割章节
if args.format == "markdown":
sections = split_by_markdown(content)
elif args.format == "separator":
sections = split_by_separator(content)
else:
sections = split_plain_text(content)
print(f"共识别 {len(sections)} 个章节/段落")
print(f"使用语音: {args.voice}")
print(f"输出目录: {args.output_dir}")
print()
# 解析章节过滤
filter_chapters = []
if args.chapters:
filter_chapters = [c.strip() for c in args.chapters.split(",")]
print(f"只生成章节: {', '.join(filter_chapters)}")
print()
# 生成音频
print("开始生成音频文件...")
print("=" * 60)
result = asyncio.run(
generate_audio_files(sections, args.output_dir, args.voice, filter_chapters)
)
generated, total_chars = result
print("=" * 60)
print(f"音频生成完成!")
print(f"共生成 {generated} 个音频文件")
print(f"总字数: {total_chars}")
print(f"预计总时长: {total_chars // 250} 分钟")
print(f"保存位置: {args.output_dir}")
print("=" * 60)
if __name__ == "__main__":
main()