#!/usr/bin/env python3 """ Audio Generator Skill 文本转音频生成工具 功能: - 支持Markdown和纯文本两种格式 - 自动分割长文本为章节 - 使用edge-tts生成高质量中文语音 - 支持批量生成和增量更新 依赖: - pip install edge-tts 使用: python scripts/generate_audio.py [options] 示例: # 生成纯文本音频 python scripts/generate_audio.py text.txt --format plain --output-dir ./audio # 生成Markdown音频(按标题分割) python scripts/generate_audio.py doc.md --format markdown --output-dir ./audio # 只生成特定章节 python scripts/generate_audio.py text.txt --chapters "第一章,第二章" """ import sys import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8") sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8") import asyncio import argparse import edge_tts import os from pathlib import Path # 默认配置 DEFAULT_VOICE = "zh-CN-XiaoxiaoNeural" # 晓晓,适合长文朗读 DEFAULT_FORMAT = "plain" DEFAULT_OUTPUT_DIR = "./audio_output" MIN_SECTION_LENGTH = 200 # 最小章节长度(字符) def clean_filename(title: str, max_length: int = 30) -> str: """清理文件名,移除非法字符""" illegal_chars = ["/", "\\", ":", "?", '"', "<", ">", "|", "*", "#"] clean = title for char in illegal_chars: clean = clean.replace(char, "-") return clean[:max_length].strip() def split_by_markdown(content: str) -> list: """按Markdown标题分割文本""" sections = [] current_section = [] current_title = "未命名章节" for line in content.split("\n"): if line.startswith("##") and len(line) > 2: if current_section: sections.append((current_title, "\n".join(current_section))) current_title = line.replace("#", "").strip() current_section = [line] else: current_section.append(line) if current_section: sections.append((current_title, "\n".join(current_section))) return sections def split_by_separator(content: str, separator: str = "===") -> list: """按分隔符和章节标记分割文本""" sections = [] current_section = [] current_title = "开场" for line in content.split("\n"): if line.startswith(separator) and len(line) > 10: if current_section: sections.append((current_title, "\n".join(current_section))) current_section = [] elif line.startswith("【") and line.endswith("】"): if current_section and len("\n".join(current_section)) > 100: sections.append((current_title, "\n".join(current_section))) current_section = [] current_title = line.replace("【", "").replace("】", "").strip() else: current_section.append(line) if current_section: sections.append((current_title, "\n".join(current_section))) return sections def split_plain_text(content: str, chunk_size: int = 2000) -> list: """将纯文本按大小分割""" lines = content.split("\n") sections = [] current_chunk = [] current_size = 0 chunk_num = 1 for line in lines: current_chunk.append(line) current_size += len(line) if current_size >= chunk_size: sections.append((f"第{chunk_num}部分", "\n".join(current_chunk))) current_chunk = [] current_size = 0 chunk_num += 1 if current_chunk: sections.append((f"第{chunk_num}部分", "\n".join(current_chunk))) return sections async def generate_audio_file(text: str, output_path: str, voice: str) -> bool: """生成单个音频文件""" try: communicate = edge_tts.Communicate(text, voice) await communicate.save(output_path) return True except Exception as e: print(f" 生成失败: {str(e)[:80]}") return False async def generate_audio_files( sections: list, output_dir: str, voice: str, filter_chapters=None ) -> tuple: """批量生成音频文件""" os.makedirs(output_dir, exist_ok=True) generated = 0 total_chars = 0 for i, (title, content) in enumerate(sections): content_len = len(content.strip()) # 跳过太短的章节 if content_len < MIN_SECTION_LENGTH: continue # 如果指定了章节过滤 if filter_chapters and title not in filter_chapters: continue clean_title = clean_filename(title) output_file = os.path.join(output_dir, f"{i + 1:02d}_{clean_title}.mp3") print(f"[{i + 1}/{len(sections)}] {title[:40]}...") print(f" 字数: {content_len}") success = await generate_audio_file(content, output_file, voice) if success: generated += 1 total_chars += content_len file_size = os.path.getsize(output_file) / 1024 / 1024 print(f" ✓ 完成 [{file_size:.1f}MB]") else: print(f" ✗ 失败") print() return generated, total_chars def main(): parser = argparse.ArgumentParser( description="文本转音频生成工具", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: %(prog)s input.txt # 默认生成纯文本音频 %(prog)s doc.md --format markdown # Markdown格式 %(prog)s text.txt --voice zh-CN-YunxiNeural # 使用男声 %(prog)s text.txt --output-dir ./my_audio # 指定输出目录 """, ) parser.add_argument("input_file", help="输入文本文件路径") parser.add_argument( "--format", choices=["plain", "markdown", "separator"], default=DEFAULT_FORMAT, help="文本格式 (默认: plain)", ) parser.add_argument( "--output-dir", default=DEFAULT_OUTPUT_DIR, help=f"输出目录 (默认: {DEFAULT_OUTPUT_DIR})", ) parser.add_argument( "--voice", default=DEFAULT_VOICE, help=f"语音模型 (默认: {DEFAULT_VOICE})" ) parser.add_argument("--chapters", help="只生成指定章节,用逗号分隔") args = parser.parse_args() # 检查输入文件 if not os.path.exists(args.input_file): print(f"错误: 找不到文件 {args.input_file}") sys.exit(1) # 读取文件 print(f"正在读取: {args.input_file}") with open(args.input_file, "r", encoding="utf-8") as f: content = f.read() print(f"文件大小: {len(content)} 字符") print() # 分割章节 if args.format == "markdown": sections = split_by_markdown(content) elif args.format == "separator": sections = split_by_separator(content) else: sections = split_plain_text(content) print(f"共识别 {len(sections)} 个章节/段落") print(f"使用语音: {args.voice}") print(f"输出目录: {args.output_dir}") print() # 解析章节过滤 filter_chapters = [] if args.chapters: filter_chapters = [c.strip() for c in args.chapters.split(",")] print(f"只生成章节: {', '.join(filter_chapters)}") print() # 生成音频 print("开始生成音频文件...") print("=" * 60) result = asyncio.run( generate_audio_files(sections, args.output_dir, args.voice, filter_chapters) ) generated, total_chars = result print("=" * 60) print(f"音频生成完成!") print(f"共生成 {generated} 个音频文件") print(f"总字数: {total_chars}") print(f"预计总时长: {total_chars // 250} 分钟") print(f"保存位置: {args.output_dir}") print("=" * 60) if __name__ == "__main__": main()