Files

434 lines
14 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
文生图脚本 (Text-to-Image)
使用 Lyra Flash API 根据中文文本描述生成图片
支持参考图风格生成
Author: 翟星人
"""
import httpx
import base64
import json
import os
from typing import Dict, Any, Optional, Union
from pathlib import Path
VALID_ASPECT_RATIOS = [
"1:1",
"2:3",
"3:2",
"3:4",
"4:3",
"4:5",
"5:4",
"9:16",
"16:9",
"21:9",
]
VALID_SIZES = [
"1024x1024",
"1536x1024",
"1792x1024",
"1344x768",
"1248x832",
"1184x864",
"1152x896",
"1536x672",
"1024x1536",
"1024x1792",
"768x1344",
"832x1248",
"864x1184",
"896x1152",
]
RATIO_TO_SIZE = {
"1:1": "1024x1024",
"2:3": "832x1248",
"3:2": "1248x832",
"3:4": "1024x1536",
"4:3": "1536x1024",
"4:5": "864x1184",
"5:4": "1184x864",
"9:16": "1024x1792",
"16:9": "1792x1024",
"21:9": "1536x672",
}
class TextToImageGenerator:
"""文生图生成器"""
def __init__(self, config: Optional[Dict[str, str]] = None):
"""
初始化生成器
Args:
config: 配置字典,包含 api_key, base_url, model
如果不传则从环境变量或配置文件读取
"""
if config is None:
config = self._load_config()
self.api_key = config.get("api_key") or config.get("IMAGE_API_KEY")
self.base_url = config.get("base_url") or config.get("IMAGE_API_BASE_URL")
self.model = config.get("model") or config.get("IMAGE_MODEL") or "lyra-flash-9"
if not self.api_key or not self.base_url:
raise ValueError("缺少必要的 API 配置api_key 和 base_url")
def _load_config(self) -> Dict[str, str]:
"""从配置文件或环境变量加载配置"""
config = {}
config_path = Path(__file__).parent.parent / "config" / "settings.json"
if config_path.exists():
with open(config_path, "r", encoding="utf-8") as f:
settings = json.load(f)
api_config = settings.get("image_api", {})
config["api_key"] = api_config.get("key")
config["base_url"] = api_config.get("base_url")
config["model"] = api_config.get("model")
config["api_key"] = os.getenv("IMAGE_API_KEY", config.get("api_key"))
config["base_url"] = os.getenv("IMAGE_API_BASE_URL", config.get("base_url"))
config["model"] = os.getenv("IMAGE_MODEL", config.get("model"))
return config
@staticmethod
def image_to_base64(image_path: str, with_prefix: bool = True) -> str:
"""将图片文件转换为 base64 编码"""
path = Path(image_path)
if not path.exists():
raise FileNotFoundError(f"图片文件不存在: {image_path}")
suffix = path.suffix.lower()
mime_types = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
}
mime_type = mime_types.get(suffix, "image/png")
with open(image_path, "rb") as f:
b64_str = base64.b64encode(f.read()).decode("utf-8")
if with_prefix:
return f"data:{mime_type};base64,{b64_str}"
return b64_str
def generate(
self,
prompt: str,
size: Optional[str] = None,
aspect_ratio: Optional[str] = None,
image_size: Optional[str] = None,
output_path: Optional[str] = None,
response_format: str = "b64_json",
ref_image: Optional[str] = None,
) -> Dict[str, Any]:
"""
生成图片
Args:
prompt: 中文图像描述提示词
size: 图片尺寸 (如 1792x1024),与 aspect_ratio 二选一
aspect_ratio: 宽高比 (如 16:9, 3:4),推荐使用
image_size: 分辨率 (1K/2K/4K),仅 gemini-3.0-pro-image-preview 支持
output_path: 输出文件路径,如果提供则保存图片
response_format: 响应格式,默认 b64_json
ref_image: 参考图片路径,用于风格参考
Returns:
包含生成结果的字典
"""
if ref_image:
return self._generate_with_reference(
prompt=prompt,
ref_image=ref_image,
aspect_ratio=aspect_ratio,
size=size,
output_path=output_path,
response_format=response_format,
)
payload: Dict[str, Any] = {
"model": self.model,
"prompt": prompt,
"response_format": response_format,
}
# 确定尺寸:优先用 aspect_ratio 映射,其次用 size
if aspect_ratio:
payload["size"] = RATIO_TO_SIZE.get(aspect_ratio, "1024x1024")
elif size:
payload["size"] = size
else:
payload["size"] = "1792x1024" # 默认 16:9
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
}
try:
if self.base_url and "dashscope" in self.base_url:
return self._generate_aliyun(
prompt, size, aspect_ratio, output_path, response_format
)
with httpx.Client(timeout=180.0) as client:
response = client.post(
f"{self.base_url}/images/generations", headers=headers, json=payload
)
response.raise_for_status()
result = response.json()
if output_path and result.get("data"):
b64_data = result["data"][0].get("b64_json")
if b64_data:
self._save_image(b64_data, output_path)
result["saved_path"] = output_path
return {
"success": True,
"data": result,
"saved_path": output_path if output_path else None,
}
except httpx.HTTPStatusError as e:
return {
"success": False,
"error": f"HTTP 错误: {e.response.status_code}",
"detail": str(e),
}
except Exception as e:
return {"success": False, "error": "生成失败", "detail": str(e)}
def _generate_aliyun(
self, prompt, size, aspect_ratio, output_path, response_format
):
if aspect_ratio:
size = RATIO_TO_SIZE.get(aspect_ratio, "1024*1024").replace("x", "*")
elif not size:
size = "1024*1024"
else:
size = size.replace("x", "*")
payload = {
"model": self.model,
"input": {"messages": [{"role": "user", "content": [{"text": prompt}]}]},
"parameters": {"size": size, "response_format": "base64"},
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
}
try:
with httpx.Client(timeout=300.0) as client:
response = client.post(
f"{self.base_url}/api/v1/services/aigc/multimodal-generation/generation",
headers=headers,
json=payload,
)
response.raise_for_status()
result = response.json()
if output_path:
image_url = (
result.get("output", {})
.get("choices", [{}])[0]
.get("message", {})
.get("content", [{}])[0]
.get("image")
)
if image_url:
self._download_image(image_url, output_path)
result["saved_path"] = output_path
return {"success": True, "data": result, "saved_path": output_path}
except httpx.HTTPStatusError as e:
return {
"success": False,
"error": "HTTP错误: %s" % e.response.status_code,
"detail": str(e),
}
except Exception as e:
return {"success": False, "error": "生成失败", "detail": str(e)}
def _download_image(self, url, output_path):
with httpx.Client(timeout=60.0) as client:
response = client.get(url)
response.raise_for_status()
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "wb") as f:
f.write(response.content)
def _generate_with_reference(
self,
prompt: str,
ref_image: str,
aspect_ratio: Optional[str] = None,
size: Optional[str] = None,
output_path: Optional[str] = None,
response_format: str = "b64_json",
) -> Dict[str, Any]:
"""
参考图片风格生成新图
Args:
prompt: 新图内容描述
ref_image: 参考图片路径
aspect_ratio: 宽高比
size: 尺寸
output_path: 输出路径
response_format: 响应格式
"""
image_b64 = self.image_to_base64(ref_image)
enhanced_prompt = f"参考这张图片的背景风格、配色方案和视觉设计,保持完全一致的风格,生成新内容:{prompt}"
# 确定尺寸:优先用 aspect_ratio 映射,其次用 size
if size is None:
size = (
RATIO_TO_SIZE.get(aspect_ratio, "1024x1792")
if aspect_ratio
else "1024x1792"
)
payload = {
"model": self.model,
"prompt": enhanced_prompt,
"image": image_b64,
"size": size,
"response_format": response_format,
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
}
try:
with httpx.Client(timeout=180.0) as client:
response = client.post(
f"{self.base_url}/images/edits", headers=headers, json=payload
)
response.raise_for_status()
result = response.json()
if output_path and result.get("data"):
b64_data = result["data"][0].get("b64_json")
if b64_data:
self._save_image(b64_data, output_path)
result["saved_path"] = output_path
return {
"success": True,
"data": result,
"saved_path": output_path if output_path else None,
}
except httpx.HTTPStatusError as e:
return {
"success": False,
"error": f"HTTP 错误: {e.response.status_code}",
"detail": str(e),
}
except Exception as e:
return {"success": False, "error": "生成失败", "detail": str(e)}
def _save_image(self, b64_data: str, output_path: str) -> None:
"""保存 base64 图片到文件"""
image_data = base64.b64decode(b64_data)
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "wb") as f:
f.write(image_data)
def main():
"""命令行入口"""
import argparse
import time
parser = argparse.ArgumentParser(
description="文生图工具",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=f"""
尺寸参数说明:
-r/--ratio 推荐使用,支持: {", ".join(VALID_ASPECT_RATIOS)}
-s/--size 传统尺寸,支持: {", ".join(VALID_SIZES[:4])}...
--resolution 分辨率(1K/2K/4K),仅 gemini-3.0-pro-image-preview 支持
--ref 参考图片路径,后续图片将参考首图风格生成
示例:
python text_to_image.py "描述" -r 3:4 # 竖版 3:4
python text_to_image.py "描述" -r 9:16 -o out.png # 竖屏 9:16
python text_to_image.py "描述" -s 1024x1792 # 传统尺寸
# 长图场景:首图定调,后续参考首图风格
python text_to_image.py "首屏内容" -r 3:4 -o 01.png
python text_to_image.py "第二屏内容" -r 3:4 --ref 01.png -o 02.png
""",
)
parser.add_argument("prompt", help="中文图像描述提示词")
parser.add_argument("-o", "--output", help="输出文件路径(默认保存到当前目录)")
parser.add_argument(
"-r",
"--ratio",
help=f"宽高比,推荐使用。可选: {', '.join(VALID_ASPECT_RATIOS)}",
)
parser.add_argument("-s", "--size", help="图片尺寸 (如 1792x1024)")
parser.add_argument("--resolution", help="分辨率 (1K/2K/4K),仅部分模型支持")
parser.add_argument("--ref", help="参考图片路径,用于风格参考(长图场景)")
args = parser.parse_args()
if args.ratio and args.ratio not in VALID_ASPECT_RATIOS:
print(f"错误: 不支持的宽高比 '{args.ratio}'")
print(f"支持的宽高比: {', '.join(VALID_ASPECT_RATIOS)}")
return
if args.size and args.size not in VALID_SIZES:
print(f"警告: 尺寸 '{args.size}' 可能不被支持")
print(f"推荐使用 -r/--ratio 参数指定宽高比")
if args.ref and not os.path.exists(args.ref):
print(f"错误: 参考图片不存在: {args.ref}")
return
output_path = args.output
if not output_path:
timestamp = time.strftime("%Y%m%d_%H%M%S")
output_path = f"generated_{timestamp}.png"
generator = TextToImageGenerator()
result = generator.generate(
prompt=args.prompt,
size=args.size,
aspect_ratio=args.ratio,
image_size=args.resolution,
output_path=output_path,
ref_image=args.ref,
)
if result["success"]:
print(f"生成成功!")
if result.get("saved_path"):
print(f"图片已保存到: {result['saved_path']}")
else:
print(f"生成失败: {result['error']}")
print(f"详情: {result.get('detail', 'N/A')}")
if __name__ == "__main__":
main()