Initial commit: skills library
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
This commit is contained in:
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
TTS 语音生成器 - 使用 edge-tts
|
||||
支持时间戳输出,用于字幕同步和镜头切换
|
||||
"""
|
||||
|
||||
import sys
|
||||
import io
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
import yaml
|
||||
import edge_tts
|
||||
|
||||
|
||||
async def generate_tts(
|
||||
text: str,
|
||||
voice: str,
|
||||
output_path: str,
|
||||
rate: str = "+0%",
|
||||
pitch: str = "+0Hz",
|
||||
with_timestamps: bool = False,
|
||||
):
|
||||
"""生成单条语音,可选输出时间戳"""
|
||||
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
|
||||
|
||||
if with_timestamps:
|
||||
timestamps = []
|
||||
audio_chunks = []
|
||||
|
||||
async for chunk in communicate.stream():
|
||||
chunk_type = chunk.get("type", "")
|
||||
if chunk_type == "audio":
|
||||
audio_chunks.append(chunk.get("data", b""))
|
||||
elif chunk_type == "WordBoundary":
|
||||
timestamps.append(
|
||||
{
|
||||
"text": chunk.get("text", ""),
|
||||
"start": chunk.get("offset", 0) / 10000000,
|
||||
"end": (chunk.get("offset", 0) + chunk.get("duration", 0))
|
||||
/ 10000000,
|
||||
}
|
||||
)
|
||||
elif chunk_type == "SentenceBoundary":
|
||||
timestamps.append(
|
||||
{
|
||||
"text": chunk.get("text", ""),
|
||||
"start": chunk.get("offset", 0) / 10000000,
|
||||
"end": (chunk.get("offset", 0) + chunk.get("duration", 0))
|
||||
/ 10000000,
|
||||
"type": "sentence",
|
||||
}
|
||||
)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
for data in audio_chunks:
|
||||
f.write(data)
|
||||
|
||||
ts_path = output_path.rsplit(".", 1)[0] + ".json"
|
||||
with open(ts_path, "w", encoding="utf-8") as f:
|
||||
json.dump(timestamps, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f" ✓ 生成: {output_path} + 时间戳")
|
||||
return timestamps
|
||||
else:
|
||||
await communicate.save(output_path)
|
||||
print(f" ✓ 生成: {output_path}")
|
||||
return None
|
||||
|
||||
|
||||
async def generate_batch(config_path: str, output_dir: str):
|
||||
"""批量生成语音"""
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
voice_config = config.get("voice", {})
|
||||
voice_name = voice_config.get("name", "zh-CN-YunxiNeural")
|
||||
rate = voice_config.get("rate", "+0%")
|
||||
pitch = voice_config.get("pitch", "+0Hz")
|
||||
|
||||
scenes = config.get("scenes", [])
|
||||
tasks = []
|
||||
|
||||
for i, scene in enumerate(scenes):
|
||||
text = scene.get("text", "")
|
||||
if not text:
|
||||
continue
|
||||
output_path = os.path.join(output_dir, f"{i:03d}.mp3")
|
||||
tasks.append(generate_tts(text, voice_name, output_path, rate, pitch))
|
||||
|
||||
print(f"开始生成 {len(tasks)} 条语音...")
|
||||
await asyncio.gather(*tasks)
|
||||
print(f"✓ 完成!语音文件保存在: {output_dir}")
|
||||
|
||||
|
||||
async def list_voices():
|
||||
"""列出所有可用音色"""
|
||||
voices = await edge_tts.list_voices()
|
||||
zh_voices = [v for v in voices if v["Locale"].startswith("zh")]
|
||||
|
||||
print("\n中文可用音色:")
|
||||
print("-" * 60)
|
||||
for v in zh_voices:
|
||||
gender = "♂" if v["Gender"] == "Male" else "♀"
|
||||
print(f"{gender} {v['ShortName']:<30} {v['Locale']}")
|
||||
print("-" * 60)
|
||||
print(f"共 {len(zh_voices)} 个中文音色")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Edge-TTS 语音生成器")
|
||||
parser.add_argument("--text", type=str, help="要转换的文本")
|
||||
parser.add_argument(
|
||||
"--voice", type=str, default="zh-CN-YunxiNeural", help="音色名称"
|
||||
)
|
||||
parser.add_argument("--rate", type=str, default="+0%", help="语速调整")
|
||||
parser.add_argument("--pitch", type=str, default="+0Hz", help="音调调整")
|
||||
parser.add_argument("--output", type=str, help="输出文件路径")
|
||||
parser.add_argument("--timestamps", action="store_true", help="输出时间戳JSON文件")
|
||||
parser.add_argument("--config", type=str, help="配置文件路径(批量生成)")
|
||||
parser.add_argument(
|
||||
"--output-dir", type=str, default="temp/audio", help="批量输出目录"
|
||||
)
|
||||
parser.add_argument("--list-voices", action="store_true", help="列出可用音色")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 支持 file: 格式读取文件内容
|
||||
text_input = args.text
|
||||
if text_input and text_input.startswith("file:"):
|
||||
file_path = text_input[5:] # 移除 "file:" 前缀
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
text_input = f.read()
|
||||
print(f"已从文件读取文本: {file_path}")
|
||||
|
||||
if args.list_voices:
|
||||
asyncio.run(list_voices())
|
||||
elif args.config:
|
||||
asyncio.run(generate_batch(args.config, args.output_dir))
|
||||
elif text_input and args.output:
|
||||
asyncio.run(
|
||||
generate_tts(
|
||||
text_input,
|
||||
args.voice,
|
||||
args.output,
|
||||
args.rate,
|
||||
args.pitch,
|
||||
args.timestamps,
|
||||
)
|
||||
)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user