#!/usr/bin/env python3 """ 文生图脚本 (Text-to-Image) 使用 Lyra Flash API 根据中文文本描述生成图片 支持参考图风格生成 Author: 翟星人 """ import httpx import base64 import json import os from typing import Dict, Any, Optional, Union from pathlib import Path VALID_ASPECT_RATIOS = [ "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9", ] VALID_SIZES = [ "1024x1024", "1536x1024", "1792x1024", "1344x768", "1248x832", "1184x864", "1152x896", "1536x672", "1024x1536", "1024x1792", "768x1344", "832x1248", "864x1184", "896x1152", ] RATIO_TO_SIZE = { "1:1": "1024x1024", "2:3": "832x1248", "3:2": "1248x832", "3:4": "1024x1536", "4:3": "1536x1024", "4:5": "864x1184", "5:4": "1184x864", "9:16": "1024x1792", "16:9": "1792x1024", "21:9": "1536x672", } class TextToImageGenerator: """文生图生成器""" def __init__(self, config: Optional[Dict[str, str]] = None): """ 初始化生成器 Args: config: 配置字典,包含 api_key, base_url, model 如果不传则从环境变量或配置文件读取 """ if config is None: config = self._load_config() self.api_key = config.get("api_key") or config.get("IMAGE_API_KEY") self.base_url = config.get("base_url") or config.get("IMAGE_API_BASE_URL") self.model = config.get("model") or config.get("IMAGE_MODEL") or "lyra-flash-9" if not self.api_key or not self.base_url: raise ValueError("缺少必要的 API 配置:api_key 和 base_url") def _load_config(self) -> Dict[str, str]: """从配置文件或环境变量加载配置""" config = {} config_path = Path(__file__).parent.parent / "config" / "settings.json" if config_path.exists(): with open(config_path, "r", encoding="utf-8") as f: settings = json.load(f) api_config = settings.get("image_api", {}) config["api_key"] = api_config.get("key") config["base_url"] = api_config.get("base_url") config["model"] = api_config.get("model") config["api_key"] = os.getenv("IMAGE_API_KEY", config.get("api_key")) config["base_url"] = os.getenv("IMAGE_API_BASE_URL", config.get("base_url")) config["model"] = os.getenv("IMAGE_MODEL", config.get("model")) return config @staticmethod def image_to_base64(image_path: str, with_prefix: bool = True) -> str: """将图片文件转换为 base64 编码""" path = Path(image_path) if not path.exists(): raise FileNotFoundError(f"图片文件不存在: {image_path}") suffix = path.suffix.lower() mime_types = { ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", ".gif": "image/gif", ".webp": "image/webp", } mime_type = mime_types.get(suffix, "image/png") with open(image_path, "rb") as f: b64_str = base64.b64encode(f.read()).decode("utf-8") if with_prefix: return f"data:{mime_type};base64,{b64_str}" return b64_str def generate( self, prompt: str, size: Optional[str] = None, aspect_ratio: Optional[str] = None, image_size: Optional[str] = None, output_path: Optional[str] = None, response_format: str = "b64_json", ref_image: Optional[str] = None, ) -> Dict[str, Any]: """ 生成图片 Args: prompt: 中文图像描述提示词 size: 图片尺寸 (如 1792x1024),与 aspect_ratio 二选一 aspect_ratio: 宽高比 (如 16:9, 3:4),推荐使用 image_size: 分辨率 (1K/2K/4K),仅 gemini-3.0-pro-image-preview 支持 output_path: 输出文件路径,如果提供则保存图片 response_format: 响应格式,默认 b64_json ref_image: 参考图片路径,用于风格参考 Returns: 包含生成结果的字典 """ if ref_image: return self._generate_with_reference( prompt=prompt, ref_image=ref_image, aspect_ratio=aspect_ratio, size=size, output_path=output_path, response_format=response_format, ) payload: Dict[str, Any] = { "model": self.model, "prompt": prompt, "response_format": response_format, } # 确定尺寸:优先用 aspect_ratio 映射,其次用 size if aspect_ratio: payload["size"] = RATIO_TO_SIZE.get(aspect_ratio, "1024x1024") elif size: payload["size"] = size else: payload["size"] = "1792x1024" # 默认 16:9 headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}", } try: if self.base_url and "dashscope" in self.base_url: return self._generate_aliyun( prompt, size, aspect_ratio, output_path, response_format ) with httpx.Client(timeout=180.0) as client: response = client.post( f"{self.base_url}/images/generations", headers=headers, json=payload ) response.raise_for_status() result = response.json() if output_path and result.get("data"): b64_data = result["data"][0].get("b64_json") if b64_data: self._save_image(b64_data, output_path) result["saved_path"] = output_path return { "success": True, "data": result, "saved_path": output_path if output_path else None, } except httpx.HTTPStatusError as e: return { "success": False, "error": f"HTTP 错误: {e.response.status_code}", "detail": str(e), } except Exception as e: return {"success": False, "error": "生成失败", "detail": str(e)} def _generate_aliyun( self, prompt, size, aspect_ratio, output_path, response_format ): if aspect_ratio: size = RATIO_TO_SIZE.get(aspect_ratio, "1024*1024").replace("x", "*") elif not size: size = "1024*1024" else: size = size.replace("x", "*") payload = { "model": self.model, "input": {"messages": [{"role": "user", "content": [{"text": prompt}]}]}, "parameters": {"size": size, "response_format": "base64"}, } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}", } try: with httpx.Client(timeout=300.0) as client: response = client.post( f"{self.base_url}/api/v1/services/aigc/multimodal-generation/generation", headers=headers, json=payload, ) response.raise_for_status() result = response.json() if output_path: image_url = ( result.get("output", {}) .get("choices", [{}])[0] .get("message", {}) .get("content", [{}])[0] .get("image") ) if image_url: self._download_image(image_url, output_path) result["saved_path"] = output_path return {"success": True, "data": result, "saved_path": output_path} except httpx.HTTPStatusError as e: return { "success": False, "error": "HTTP错误: %s" % e.response.status_code, "detail": str(e), } except Exception as e: return {"success": False, "error": "生成失败", "detail": str(e)} def _download_image(self, url, output_path): with httpx.Client(timeout=60.0) as client: response = client.get(url) response.raise_for_status() Path(output_path).parent.mkdir(parents=True, exist_ok=True) with open(output_path, "wb") as f: f.write(response.content) def _generate_with_reference( self, prompt: str, ref_image: str, aspect_ratio: Optional[str] = None, size: Optional[str] = None, output_path: Optional[str] = None, response_format: str = "b64_json", ) -> Dict[str, Any]: """ 参考图片风格生成新图 Args: prompt: 新图内容描述 ref_image: 参考图片路径 aspect_ratio: 宽高比 size: 尺寸 output_path: 输出路径 response_format: 响应格式 """ image_b64 = self.image_to_base64(ref_image) enhanced_prompt = f"参考这张图片的背景风格、配色方案和视觉设计,保持完全一致的风格,生成新内容:{prompt}" # 确定尺寸:优先用 aspect_ratio 映射,其次用 size if size is None: size = ( RATIO_TO_SIZE.get(aspect_ratio, "1024x1792") if aspect_ratio else "1024x1792" ) payload = { "model": self.model, "prompt": enhanced_prompt, "image": image_b64, "size": size, "response_format": response_format, } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}", } try: with httpx.Client(timeout=180.0) as client: response = client.post( f"{self.base_url}/images/edits", headers=headers, json=payload ) response.raise_for_status() result = response.json() if output_path and result.get("data"): b64_data = result["data"][0].get("b64_json") if b64_data: self._save_image(b64_data, output_path) result["saved_path"] = output_path return { "success": True, "data": result, "saved_path": output_path if output_path else None, } except httpx.HTTPStatusError as e: return { "success": False, "error": f"HTTP 错误: {e.response.status_code}", "detail": str(e), } except Exception as e: return {"success": False, "error": "生成失败", "detail": str(e)} def _save_image(self, b64_data: str, output_path: str) -> None: """保存 base64 图片到文件""" image_data = base64.b64decode(b64_data) Path(output_path).parent.mkdir(parents=True, exist_ok=True) with open(output_path, "wb") as f: f.write(image_data) def main(): """命令行入口""" import argparse import time parser = argparse.ArgumentParser( description="文生图工具", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=f""" 尺寸参数说明: -r/--ratio 推荐使用,支持: {", ".join(VALID_ASPECT_RATIOS)} -s/--size 传统尺寸,支持: {", ".join(VALID_SIZES[:4])}... --resolution 分辨率(1K/2K/4K),仅 gemini-3.0-pro-image-preview 支持 --ref 参考图片路径,后续图片将参考首图风格生成 示例: python text_to_image.py "描述" -r 3:4 # 竖版 3:4 python text_to_image.py "描述" -r 9:16 -o out.png # 竖屏 9:16 python text_to_image.py "描述" -s 1024x1792 # 传统尺寸 # 长图场景:首图定调,后续参考首图风格 python text_to_image.py "首屏内容" -r 3:4 -o 01.png python text_to_image.py "第二屏内容" -r 3:4 --ref 01.png -o 02.png """, ) parser.add_argument("prompt", help="中文图像描述提示词") parser.add_argument("-o", "--output", help="输出文件路径(默认保存到当前目录)") parser.add_argument( "-r", "--ratio", help=f"宽高比,推荐使用。可选: {', '.join(VALID_ASPECT_RATIOS)}", ) parser.add_argument("-s", "--size", help="图片尺寸 (如 1792x1024)") parser.add_argument("--resolution", help="分辨率 (1K/2K/4K),仅部分模型支持") parser.add_argument("--ref", help="参考图片路径,用于风格参考(长图场景)") args = parser.parse_args() if args.ratio and args.ratio not in VALID_ASPECT_RATIOS: print(f"错误: 不支持的宽高比 '{args.ratio}'") print(f"支持的宽高比: {', '.join(VALID_ASPECT_RATIOS)}") return if args.size and args.size not in VALID_SIZES: print(f"警告: 尺寸 '{args.size}' 可能不被支持") print(f"推荐使用 -r/--ratio 参数指定宽高比") if args.ref and not os.path.exists(args.ref): print(f"错误: 参考图片不存在: {args.ref}") return output_path = args.output if not output_path: timestamp = time.strftime("%Y%m%d_%H%M%S") output_path = f"generated_{timestamp}.png" generator = TextToImageGenerator() result = generator.generate( prompt=args.prompt, size=args.size, aspect_ratio=args.ratio, image_size=args.resolution, output_path=output_path, ref_image=args.ref, ) if result["success"]: print(f"生成成功!") if result.get("saved_path"): print(f"图片已保存到: {result['saved_path']}") else: print(f"生成失败: {result['error']}") print(f"详情: {result.get('detail', 'N/A')}") if __name__ == "__main__": main()