新增16个AI技能:包含图像生成、视频剪辑、数据分析、智能查询等功能模块
This commit is contained in:
273
.opencode/skills/image-service/scripts/image_to_image.py
Normal file
273
.opencode/skills/image-service/scripts/image_to_image.py
Normal file
@@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
图生图脚本 (Image-to-Image)
|
||||
使用 Lyra Flash API 基于参考图片和中文指令进行图片编辑
|
||||
|
||||
Author: 翟星人
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, Any, Optional, Union
|
||||
from pathlib import Path
|
||||
|
||||
VALID_ASPECT_RATIOS = [
|
||||
"1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"
|
||||
]
|
||||
|
||||
VALID_SIZES = [
|
||||
"1024x1024",
|
||||
"1536x1024", "1792x1024", "1344x768", "1248x832", "1184x864", "1152x896", "1536x672",
|
||||
"1024x1536", "1024x1792", "768x1344", "832x1248", "864x1184", "896x1152"
|
||||
]
|
||||
|
||||
RATIO_TO_SIZE = {
|
||||
"1:1": "1024x1024",
|
||||
"2:3": "832x1248",
|
||||
"3:2": "1248x832",
|
||||
"3:4": "1024x1536",
|
||||
"4:3": "1536x1024",
|
||||
"4:5": "864x1184",
|
||||
"5:4": "1184x864",
|
||||
"9:16": "1024x1792",
|
||||
"16:9": "1792x1024",
|
||||
"21:9": "1536x672"
|
||||
}
|
||||
|
||||
|
||||
class ImageToImageEditor:
|
||||
"""图生图编辑器"""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, str]] = None):
|
||||
"""
|
||||
初始化编辑器
|
||||
|
||||
Args:
|
||||
config: 配置字典,包含 api_key, base_url, model
|
||||
如果不传则从环境变量或配置文件读取
|
||||
"""
|
||||
if config is None:
|
||||
config = self._load_config()
|
||||
|
||||
self.api_key = config.get('api_key') or config.get('IMAGE_API_KEY')
|
||||
self.base_url = config.get('base_url') or config.get('IMAGE_API_BASE_URL')
|
||||
self.model = config.get('model') or config.get('IMAGE_MODEL') or 'lyra-flash-9'
|
||||
|
||||
if not self.api_key or not self.base_url:
|
||||
raise ValueError("缺少必要的 API 配置:api_key 和 base_url")
|
||||
|
||||
def _load_config(self) -> Dict[str, str]:
|
||||
"""从配置文件或环境变量加载配置"""
|
||||
config = {}
|
||||
|
||||
# 尝试从配置文件加载
|
||||
config_path = Path(__file__).parent.parent / 'config' / 'settings.json'
|
||||
if config_path.exists():
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
settings = json.load(f)
|
||||
api_config = settings.get('image_api', {})
|
||||
config['api_key'] = api_config.get('key')
|
||||
config['base_url'] = api_config.get('base_url')
|
||||
config['model'] = api_config.get('model')
|
||||
|
||||
# 环境变量优先级更高
|
||||
config['api_key'] = os.getenv('IMAGE_API_KEY', config.get('api_key'))
|
||||
config['base_url'] = os.getenv('IMAGE_API_BASE_URL', config.get('base_url'))
|
||||
config['model'] = os.getenv('IMAGE_MODEL', config.get('model'))
|
||||
|
||||
return config
|
||||
|
||||
@staticmethod
|
||||
def image_to_base64(image_path: str, with_prefix: bool = True) -> str:
|
||||
"""
|
||||
将图片文件转换为 base64 编码
|
||||
|
||||
Args:
|
||||
image_path: 图片文件路径
|
||||
with_prefix: 是否添加 data URL 前缀
|
||||
|
||||
Returns:
|
||||
base64 编码字符串
|
||||
"""
|
||||
path = Path(image_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"图片文件不存在: {image_path}")
|
||||
|
||||
# 获取 MIME 类型
|
||||
suffix = path.suffix.lower()
|
||||
mime_types = {
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.webp': 'image/webp'
|
||||
}
|
||||
mime_type = mime_types.get(suffix, 'image/png')
|
||||
|
||||
with open(image_path, 'rb') as f:
|
||||
b64_str = base64.b64encode(f.read()).decode('utf-8')
|
||||
|
||||
if with_prefix:
|
||||
return f"data:{mime_type};base64,{b64_str}"
|
||||
return b64_str
|
||||
|
||||
def edit(
|
||||
self,
|
||||
image: Union[str, bytes],
|
||||
prompt: str,
|
||||
aspect_ratio: Optional[str] = None,
|
||||
size: Optional[str] = None,
|
||||
output_path: Optional[str] = None,
|
||||
response_format: str = "b64_json"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
编辑图片
|
||||
|
||||
Args:
|
||||
image: 图片路径或 base64 字符串
|
||||
prompt: 中文编辑指令
|
||||
aspect_ratio: 宽高比 (如 3:4, 16:9)
|
||||
size: 传统尺寸 (如 1024x1792)
|
||||
output_path: 输出文件路径
|
||||
response_format: 响应格式
|
||||
|
||||
Returns:
|
||||
包含编辑结果的字典
|
||||
"""
|
||||
# 处理图片输入
|
||||
if isinstance(image, str):
|
||||
if os.path.isfile(image):
|
||||
image_b64 = self.image_to_base64(image)
|
||||
elif image.startswith('data:'):
|
||||
image_b64 = image
|
||||
else:
|
||||
# 假设是纯 base64 字符串
|
||||
image_b64 = f"data:image/png;base64,{image}"
|
||||
else:
|
||||
image_b64 = f"data:image/png;base64,{base64.b64encode(image).decode('utf-8')}"
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"image": image_b64,
|
||||
"response_format": response_format
|
||||
}
|
||||
|
||||
# 确定尺寸:优先用 aspect_ratio 映射,其次用 size
|
||||
if aspect_ratio:
|
||||
payload["size"] = RATIO_TO_SIZE.get(aspect_ratio, "1024x1536")
|
||||
elif size:
|
||||
payload["size"] = size
|
||||
else:
|
||||
payload["size"] = "1024x1536" # 默认 3:4
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=180.0) as client:
|
||||
response = client.post(
|
||||
f"{self.base_url}/images/edits",
|
||||
headers=headers,
|
||||
json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# 如果指定了输出路径,保存图片
|
||||
if output_path and result.get("data"):
|
||||
b64_data = result["data"][0].get("b64_json")
|
||||
if b64_data:
|
||||
self._save_image(b64_data, output_path)
|
||||
result["saved_path"] = output_path
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": result,
|
||||
"saved_path": output_path if output_path else None
|
||||
}
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"HTTP 错误: {e.response.status_code}",
|
||||
"detail": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "编辑失败",
|
||||
"detail": str(e)
|
||||
}
|
||||
|
||||
def _save_image(self, b64_data: str, output_path: str) -> None:
|
||||
"""保存 base64 图片到文件"""
|
||||
image_data = base64.b64decode(b64_data)
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(image_data)
|
||||
|
||||
|
||||
def main():
|
||||
"""命令行入口"""
|
||||
import argparse
|
||||
import time
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='图生图编辑工具',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=f'''
|
||||
尺寸参数说明:
|
||||
-r/--ratio 宽高比(推荐),支持: {", ".join(VALID_ASPECT_RATIOS)}
|
||||
-s/--size 传统尺寸,支持: {", ".join(VALID_SIZES[:4])}...
|
||||
|
||||
示例:
|
||||
python image_to_image.py input.png "编辑描述" -r 3:4
|
||||
python image_to_image.py input.png "编辑描述" -s 1024x1536
|
||||
'''
|
||||
)
|
||||
parser.add_argument('image', help='输入图片路径')
|
||||
parser.add_argument('prompt', help='中文编辑指令')
|
||||
parser.add_argument('-o', '--output', help='输出文件路径(默认保存到当前目录)')
|
||||
parser.add_argument('-r', '--ratio', help=f'宽高比(推荐)。可选: {", ".join(VALID_ASPECT_RATIOS)}')
|
||||
parser.add_argument('-s', '--size', help='传统尺寸,如 1024x1536')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.ratio and args.ratio not in VALID_ASPECT_RATIOS:
|
||||
print(f"错误: 不支持的宽高比 '{args.ratio}'")
|
||||
print(f"支持的宽高比: {', '.join(VALID_ASPECT_RATIOS)}")
|
||||
return
|
||||
|
||||
if args.size and args.size not in VALID_SIZES:
|
||||
print(f"警告: 尺寸 '{args.size}' 可能不被支持")
|
||||
|
||||
output_path = args.output
|
||||
if not output_path:
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
output_path = f"edited_{timestamp}.png"
|
||||
|
||||
editor = ImageToImageEditor()
|
||||
result = editor.edit(
|
||||
image=args.image,
|
||||
prompt=args.prompt,
|
||||
aspect_ratio=args.ratio,
|
||||
size=args.size,
|
||||
output_path=output_path
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
print(f"编辑成功!")
|
||||
if result.get("saved_path"):
|
||||
print(f"图片已保存到: {result['saved_path']}")
|
||||
else:
|
||||
print(f"编辑失败: {result['error']}")
|
||||
print(f"详情: {result.get('detail', 'N/A')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
287
.opencode/skills/image-service/scripts/image_to_text.py
Normal file
287
.opencode/skills/image-service/scripts/image_to_text.py
Normal file
@@ -0,0 +1,287 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
图生文脚本 (Image-to-Text) - 视觉识别
|
||||
使用 Qwen2.5-VL 模型分析图片内容并生成文字描述
|
||||
|
||||
Author: 翟星人
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, Any, Optional, Union, List
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class ImageToTextAnalyzer:
|
||||
"""图生文分析器 - 视觉识别"""
|
||||
|
||||
# 预定义的分析模式
|
||||
ANALYSIS_MODES = {
|
||||
"describe": "请详细描述这张图片的内容,包括:人物、场景、物品、颜色、布局等所有细节。",
|
||||
"ocr": "请仔细识别这张图片中的所有文字内容,按照文字在图片中的位置顺序输出。如果是中文,请保持原文输出。",
|
||||
"chart": "请分析这张图表的内容,包括:图表类型、数据趋势、关键数据点、标题标签、以及数据的结论或洞察。",
|
||||
"fashion": "请分析这张图片中人物的穿搭,包括:服装款式、颜色搭配、配饰、整体风格等。",
|
||||
"product": "请分析这张产品图片,包括:产品类型、外观特征、功能特点、品牌信息等。",
|
||||
"scene": "请描述这张图片的场景,包括:地点、环境、氛围、时间(白天/夜晚)等。"
|
||||
}
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, str]] = None):
|
||||
"""
|
||||
初始化分析器
|
||||
|
||||
Args:
|
||||
config: 配置字典,包含 api_key, base_url, model
|
||||
如果不传则从环境变量或配置文件读取
|
||||
"""
|
||||
if config is None:
|
||||
config = self._load_config()
|
||||
|
||||
self.api_key = config.get('api_key') or config.get('VISION_API_KEY') or config.get('IMAGE_API_KEY')
|
||||
self.base_url = config.get('base_url') or config.get('VISION_API_BASE_URL') or config.get('IMAGE_API_BASE_URL')
|
||||
self.model = config.get('model') or config.get('VISION_MODEL') or 'qwen2.5-vl-72b-instruct'
|
||||
|
||||
if not self.api_key or not self.base_url:
|
||||
raise ValueError("缺少必要的 API 配置:api_key 和 base_url")
|
||||
|
||||
def _load_config(self) -> Dict[str, str]:
|
||||
"""从配置文件或环境变量加载配置"""
|
||||
config = {}
|
||||
|
||||
# 尝试从配置文件加载
|
||||
config_path = Path(__file__).parent.parent / 'config' / 'settings.json'
|
||||
if config_path.exists():
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
settings = json.load(f)
|
||||
# 优先使用 vision_api 配置
|
||||
vision_config = settings.get('vision_api', {})
|
||||
if vision_config:
|
||||
config['api_key'] = vision_config.get('key')
|
||||
config['base_url'] = vision_config.get('base_url')
|
||||
config['model'] = vision_config.get('model')
|
||||
else:
|
||||
# 回退到 image_api 配置
|
||||
api_config = settings.get('image_api', {})
|
||||
config['api_key'] = api_config.get('key')
|
||||
config['base_url'] = api_config.get('base_url')
|
||||
|
||||
# 环境变量优先级更高
|
||||
config['api_key'] = os.getenv('VISION_API_KEY', os.getenv('IMAGE_API_KEY', config.get('api_key')))
|
||||
config['base_url'] = os.getenv('VISION_API_BASE_URL', os.getenv('IMAGE_API_BASE_URL', config.get('base_url')))
|
||||
config['model'] = os.getenv('VISION_MODEL', config.get('model', 'qwen2.5-vl-72b-instruct'))
|
||||
|
||||
return config
|
||||
|
||||
@staticmethod
|
||||
def image_to_base64(image_path: str) -> str:
|
||||
"""
|
||||
将图片文件转换为 base64 编码(带 data URL 前缀)
|
||||
|
||||
Args:
|
||||
image_path: 图片文件路径
|
||||
|
||||
Returns:
|
||||
base64 编码字符串(含 data URL 前缀)
|
||||
"""
|
||||
path = Path(image_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"图片文件不存在: {image_path}")
|
||||
|
||||
# 获取 MIME 类型
|
||||
suffix = path.suffix.lower()
|
||||
mime_types = {
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.webp': 'image/webp'
|
||||
}
|
||||
mime_type = mime_types.get(suffix, 'image/png')
|
||||
|
||||
with open(image_path, 'rb') as f:
|
||||
b64_str = base64.b64encode(f.read()).decode('utf-8')
|
||||
|
||||
return f"data:{mime_type};base64,{b64_str}"
|
||||
|
||||
def analyze(
|
||||
self,
|
||||
image: Union[str, bytes],
|
||||
prompt: Optional[str] = None,
|
||||
mode: str = "describe",
|
||||
max_tokens: int = 2000,
|
||||
temperature: float = 0.7
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
分析图片并生成文字描述
|
||||
|
||||
Args:
|
||||
image: 图片路径、URL 或 base64 字符串
|
||||
prompt: 自定义分析提示词(如果提供则忽略 mode)
|
||||
mode: 分析模式 (describe/ocr/chart/fashion/product/scene)
|
||||
max_tokens: 最大输出 token 数
|
||||
temperature: 温度参数
|
||||
|
||||
Returns:
|
||||
包含分析结果的字典
|
||||
"""
|
||||
# 确定使用的提示词
|
||||
if prompt is None:
|
||||
prompt = self.ANALYSIS_MODES.get(mode, self.ANALYSIS_MODES["describe"])
|
||||
|
||||
# 处理图片输入
|
||||
if isinstance(image, str):
|
||||
if os.path.isfile(image):
|
||||
image_url = self.image_to_base64(image)
|
||||
elif image.startswith('data:') or image.startswith('http'):
|
||||
image_url = image
|
||||
else:
|
||||
# 假设是纯 base64 字符串
|
||||
image_url = f"data:image/png;base64,{image}"
|
||||
else:
|
||||
image_url = f"data:image/png;base64,{base64.b64encode(image).decode('utf-8')}"
|
||||
|
||||
# 构建请求
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": prompt
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": image_url
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=120.0) as client:
|
||||
response = client.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=headers,
|
||||
json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# 提取文本内容
|
||||
content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"content": content,
|
||||
"mode": mode,
|
||||
"usage": result.get("usage", {})
|
||||
}
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"HTTP 错误: {e.response.status_code}",
|
||||
"detail": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "分析失败",
|
||||
"detail": str(e)
|
||||
}
|
||||
|
||||
def describe(self, image: Union[str, bytes]) -> Dict[str, Any]:
|
||||
"""通用图片描述"""
|
||||
return self.analyze(image, mode="describe")
|
||||
|
||||
def ocr(self, image: Union[str, bytes]) -> Dict[str, Any]:
|
||||
"""文字识别 (OCR)"""
|
||||
return self.analyze(image, mode="ocr")
|
||||
|
||||
def analyze_chart(self, image: Union[str, bytes]) -> Dict[str, Any]:
|
||||
"""图表分析"""
|
||||
return self.analyze(image, mode="chart")
|
||||
|
||||
def analyze_fashion(self, image: Union[str, bytes]) -> Dict[str, Any]:
|
||||
"""穿搭分析"""
|
||||
return self.analyze(image, mode="fashion")
|
||||
|
||||
def analyze_product(self, image: Union[str, bytes]) -> Dict[str, Any]:
|
||||
"""产品分析"""
|
||||
return self.analyze(image, mode="product")
|
||||
|
||||
def analyze_scene(self, image: Union[str, bytes]) -> Dict[str, Any]:
|
||||
"""场景分析"""
|
||||
return self.analyze(image, mode="scene")
|
||||
|
||||
def batch_analyze(
|
||||
self,
|
||||
images: List[str],
|
||||
mode: str = "describe"
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
批量分析多张图片
|
||||
|
||||
Args:
|
||||
images: 图片路径列表
|
||||
mode: 分析模式
|
||||
|
||||
Returns:
|
||||
分析结果列表
|
||||
"""
|
||||
results = []
|
||||
for image in images:
|
||||
result = self.analyze(image, mode=mode)
|
||||
result["image"] = image
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
"""命令行入口"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='图生文分析工具(视觉识别)')
|
||||
parser.add_argument('image', help='输入图片路径')
|
||||
parser.add_argument('-m', '--mode', default='describe',
|
||||
choices=['describe', 'ocr', 'chart', 'fashion', 'product', 'scene'],
|
||||
help='分析模式')
|
||||
parser.add_argument('-p', '--prompt', help='自定义分析提示词')
|
||||
parser.add_argument('--max-tokens', type=int, default=2000, help='最大输出 token 数')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = ImageToTextAnalyzer()
|
||||
result = analyzer.analyze(
|
||||
image=args.image,
|
||||
prompt=args.prompt,
|
||||
mode=args.mode,
|
||||
max_tokens=args.max_tokens
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
print(f"\n=== 分析结果 ({result['mode']}) ===\n")
|
||||
print(result["content"])
|
||||
print(f"\n=== Token 使用 ===")
|
||||
print(f"输入: {result['usage'].get('prompt_tokens', 'N/A')}")
|
||||
print(f"输出: {result['usage'].get('completion_tokens', 'N/A')}")
|
||||
else:
|
||||
print(f"分析失败: {result['error']}")
|
||||
print(f"详情: {result.get('detail', 'N/A')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
251
.opencode/skills/image-service/scripts/merge_long_image.py
Normal file
251
.opencode/skills/image-service/scripts/merge_long_image.py
Normal file
@@ -0,0 +1,251 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
长图拼接脚本 (Merge Long Image)
|
||||
将多张图片按顺序垂直拼接成一张微信长图
|
||||
|
||||
Author: 翟星人
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import glob as glob_module
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
|
||||
class LongImageMerger:
|
||||
"""长图拼接器"""
|
||||
|
||||
def __init__(self, target_width: int = 1080):
|
||||
"""
|
||||
初始化拼接器
|
||||
|
||||
Args:
|
||||
target_width: 目标宽度,默认1080(微信推荐宽度)
|
||||
"""
|
||||
self.target_width = target_width
|
||||
|
||||
def _blend_images(self, img_top: Image.Image, img_bottom: Image.Image, blend_height: int) -> Image.Image:
|
||||
"""
|
||||
在两张图的接缝处创建渐变融合过渡
|
||||
|
||||
Args:
|
||||
img_top: 上方图片
|
||||
img_bottom: 下方图片
|
||||
blend_height: 融合区域高度(像素)
|
||||
|
||||
Returns:
|
||||
融合后的下方图片(顶部已与上方图片底部融合)
|
||||
"""
|
||||
blend_height = min(blend_height, img_top.height // 4, img_bottom.height // 4)
|
||||
|
||||
top_region = img_top.crop((0, img_top.height - blend_height, img_top.width, img_top.height))
|
||||
bottom_region = img_bottom.crop((0, 0, img_bottom.width, blend_height))
|
||||
|
||||
top_array = np.array(top_region, dtype=np.float32)
|
||||
bottom_array = np.array(bottom_region, dtype=np.float32)
|
||||
|
||||
alpha = np.linspace(1, 0, blend_height).reshape(-1, 1, 1)
|
||||
|
||||
blended_array = top_array * alpha + bottom_array * (1 - alpha)
|
||||
blended_array = np.clip(blended_array, 0, 255).astype(np.uint8)
|
||||
|
||||
blended_region = Image.fromarray(blended_array)
|
||||
|
||||
result = img_bottom.copy()
|
||||
result.paste(blended_region, (0, 0))
|
||||
|
||||
return result
|
||||
|
||||
def merge(
|
||||
self,
|
||||
image_paths: List[str],
|
||||
output_path: str,
|
||||
gap: int = 0,
|
||||
background_color: str = "white",
|
||||
blend: int = 0
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
拼接多张图片为长图
|
||||
|
||||
Args:
|
||||
image_paths: 图片路径列表,按顺序拼接
|
||||
output_path: 输出文件路径
|
||||
gap: 图片之间的间隔像素,默认0
|
||||
background_color: 背景颜色,默认白色
|
||||
blend: 接缝融合过渡区域高度(像素),默认0不融合,推荐30-50
|
||||
|
||||
Returns:
|
||||
包含拼接结果的字典
|
||||
"""
|
||||
if not image_paths:
|
||||
return {"success": False, "error": "没有提供图片路径"}
|
||||
|
||||
valid_paths = []
|
||||
for p in image_paths:
|
||||
if os.path.exists(p):
|
||||
valid_paths.append(p)
|
||||
else:
|
||||
print(f"警告: 文件不存在,跳过 - {p}")
|
||||
|
||||
if not valid_paths:
|
||||
return {"success": False, "error": "没有有效的图片文件"}
|
||||
|
||||
try:
|
||||
imgs = [Image.open(p) for p in valid_paths]
|
||||
|
||||
resized_imgs = []
|
||||
for img in imgs:
|
||||
if img.mode in ('RGBA', 'P'):
|
||||
img = img.convert('RGB')
|
||||
ratio = self.target_width / img.width
|
||||
new_height = int(img.height * ratio)
|
||||
resized = img.resize((self.target_width, new_height), Image.Resampling.LANCZOS)
|
||||
resized_imgs.append(resized)
|
||||
|
||||
if blend > 0 and len(resized_imgs) > 1:
|
||||
for i in range(1, len(resized_imgs)):
|
||||
resized_imgs[i] = self._blend_images(resized_imgs[i-1], resized_imgs[i], blend)
|
||||
|
||||
total_height = sum(img.height for img in resized_imgs) + gap * (len(resized_imgs) - 1)
|
||||
|
||||
long_image = Image.new('RGB', (self.target_width, total_height), background_color)
|
||||
|
||||
y_offset = 0
|
||||
for img in resized_imgs:
|
||||
long_image.paste(img, (0, y_offset))
|
||||
y_offset += img.height + gap
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
long_image.save(output_path, quality=95)
|
||||
|
||||
for img in imgs:
|
||||
img.close()
|
||||
for img in resized_imgs:
|
||||
img.close()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"saved_path": output_path,
|
||||
"width": self.target_width,
|
||||
"height": total_height,
|
||||
"image_count": len(resized_imgs)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
def merge_from_pattern(
|
||||
self,
|
||||
pattern: str,
|
||||
output_path: str,
|
||||
sort_by: str = "name",
|
||||
gap: int = 0,
|
||||
background_color: str = "white",
|
||||
blend: int = 0
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
通过 glob 模式匹配图片并拼接
|
||||
|
||||
Args:
|
||||
pattern: glob 模式,如 "*.png" 或 "generated_*.png"
|
||||
output_path: 输出文件路径
|
||||
sort_by: 排序方式 - "name"(文件名) / "time"(修改时间) / "none"(不排序)
|
||||
gap: 图片间隔
|
||||
background_color: 背景颜色
|
||||
blend: 接缝融合过渡高度
|
||||
|
||||
Returns:
|
||||
包含拼接结果的字典
|
||||
"""
|
||||
image_paths = glob_module.glob(pattern)
|
||||
|
||||
if not image_paths:
|
||||
return {"success": False, "error": f"没有找到匹配 '{pattern}' 的图片"}
|
||||
|
||||
if sort_by == "name":
|
||||
image_paths.sort()
|
||||
elif sort_by == "time":
|
||||
image_paths.sort(key=lambda x: os.path.getmtime(x))
|
||||
|
||||
print(f"找到 {len(image_paths)} 张图片:")
|
||||
for i, p in enumerate(image_paths, 1):
|
||||
print(f" {i}. {os.path.basename(p)}")
|
||||
|
||||
return self.merge(image_paths, output_path, gap, background_color, blend)
|
||||
|
||||
|
||||
def main():
|
||||
"""命令行入口"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='长图拼接工具 - 将多张图片垂直拼接成微信长图',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
示例用法:
|
||||
# 拼接指定图片
|
||||
python merge_long_image.py img1.png img2.png img3.png -o output.png
|
||||
|
||||
# 使用通配符匹配
|
||||
python merge_long_image.py -p "generated_*.png" -o long_image.png
|
||||
|
||||
# 指定宽度和间隔
|
||||
python merge_long_image.py -p "*.png" -o out.png -w 750 -g 20
|
||||
|
||||
# 按修改时间排序
|
||||
python merge_long_image.py -p "*.png" -o out.png --sort time
|
||||
|
||||
# 启用接缝融合过渡(推荐40px)
|
||||
python merge_long_image.py img1.png img2.png -o out.png --blend 40
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('images', nargs='*', help='要拼接的图片路径列表')
|
||||
parser.add_argument('-p', '--pattern', help='glob 模式匹配图片,如 "*.png"')
|
||||
parser.add_argument('-o', '--output', required=True, help='输出文件路径')
|
||||
parser.add_argument('-w', '--width', type=int, default=1080, help='目标宽度,默认1080')
|
||||
parser.add_argument('-g', '--gap', type=int, default=0, help='图片间隔像素,默认0')
|
||||
parser.add_argument('--sort', choices=['name', 'time', 'none'], default='name',
|
||||
help='排序方式:name(文件名)/time(修改时间)/none')
|
||||
parser.add_argument('--bg', default='white', help='背景颜色,默认 white')
|
||||
parser.add_argument('--blend', type=int, default=0,
|
||||
help='接缝融合过渡高度(像素),推荐30-50,默认0不融合')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.images and not args.pattern:
|
||||
parser.error("请提供图片路径列表或使用 -p 指定匹配模式")
|
||||
|
||||
merger = LongImageMerger(target_width=args.width)
|
||||
|
||||
if args.pattern:
|
||||
result = merger.merge_from_pattern(
|
||||
pattern=args.pattern,
|
||||
output_path=args.output,
|
||||
sort_by=args.sort,
|
||||
gap=args.gap,
|
||||
background_color=args.bg,
|
||||
blend=args.blend
|
||||
)
|
||||
else:
|
||||
result = merger.merge(
|
||||
image_paths=args.images,
|
||||
output_path=args.output,
|
||||
gap=args.gap,
|
||||
background_color=args.bg,
|
||||
blend=args.blend
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
print(f"\n拼接成功!")
|
||||
print(f"输出文件: {result['saved_path']}")
|
||||
print(f"尺寸: {result['width']} x {result['height']}")
|
||||
print(f"共 {result['image_count']} 张图片")
|
||||
else:
|
||||
print(f"\n拼接失败: {result['error']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
140
.opencode/skills/image-service/scripts/research_image.py
Normal file
140
.opencode/skills/image-service/scripts/research_image.py
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
调研报告专用信息图生成脚本
|
||||
预设手绘风格可视化模板,保持系列配图风格统一
|
||||
|
||||
Author: 翟星人
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 预设风格模板 - 手绘体可视化风格
|
||||
STYLE_TEMPLATES = {
|
||||
"arch": {
|
||||
"name": "架构图",
|
||||
"prefix": "手绘风格技术架构信息图,简洁扁平设计,",
|
||||
"suffix": "手绘线条感,柔和的科技蓝配色(#4A90D9),浅灰白色背景,模块化分层布局,圆角矩形框,手写体中文标签,简约图标,整体清新专业。",
|
||||
"trigger": "核心架构、系统结构、技术栈、模块组成"
|
||||
},
|
||||
"flow": {
|
||||
"name": "流程图",
|
||||
"prefix": "手绘风格流程信息图,简洁扁平设计,",
|
||||
"suffix": "手绘线条和箭头,科技蓝(#4A90D9)主色调,浅绿色(#81C784)表示成功节点,浅橙色(#FFB74D)表示判断节点,浅灰白色背景,从上到下或从左到右布局,手写体中文标签,步骤清晰。",
|
||||
"trigger": "流程、步骤、工作流、执行顺序"
|
||||
},
|
||||
"compare": {
|
||||
"name": "对比图",
|
||||
"prefix": "手绘风格对比信息图,左右分栏设计,",
|
||||
"suffix": "手绘线条感,左侧用柔和蓝色(#4A90D9),右侧用柔和橙色(#FF8A65),中间VS分隔,浅灰白色背景,手写体中文标签,对比项目清晰列出,简约图标点缀。",
|
||||
"trigger": "对比、vs、区别、差异"
|
||||
},
|
||||
"concept": {
|
||||
"name": "概念图",
|
||||
"prefix": "手绘风格概念信息图,中心发散设计,",
|
||||
"suffix": "手绘线条感,中心主题用科技蓝(#4A90D9),周围要素用柔和的蓝紫渐变色系,浅灰白色背景,连接线条有手绘感,手写体中文标签,布局均衡美观。",
|
||||
"trigger": "核心概念、要素组成、多个方面"
|
||||
}
|
||||
}
|
||||
|
||||
# 基础路径
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
TEXT_TO_IMAGE_SCRIPT = os.path.join(BASE_DIR, "scripts", "text_to_image.py")
|
||||
|
||||
|
||||
def generate_image(style: str, title: str, content: str, output: str):
|
||||
"""
|
||||
使用预设风格生成信息图
|
||||
|
||||
Args:
|
||||
style: 风格类型 (arch/flow/compare/concept)
|
||||
title: 图表标题
|
||||
content: 图表内容描述
|
||||
output: 输出路径
|
||||
"""
|
||||
if style not in STYLE_TEMPLATES:
|
||||
print(f"错误: 未知风格 '{style}'")
|
||||
print(f"可用风格: {', '.join(STYLE_TEMPLATES.keys())}")
|
||||
sys.exit(1)
|
||||
|
||||
template = STYLE_TEMPLATES[style]
|
||||
|
||||
# 组装完整提示词
|
||||
prompt = f"{template['prefix']}标题:{title},{content},{template['suffix']}"
|
||||
|
||||
print(f"生成 {template['name']}: {title}")
|
||||
print(f"风格: 手绘体可视化")
|
||||
print(f"输出: {output}")
|
||||
|
||||
# 调用 text_to_image.py
|
||||
cmd = [
|
||||
sys.executable,
|
||||
TEXT_TO_IMAGE_SCRIPT,
|
||||
prompt,
|
||||
"--output", output
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=False)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f"生成失败")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def list_styles():
|
||||
"""列出所有可用风格"""
|
||||
print("可用风格模板(手绘体可视化):\n")
|
||||
for key, template in STYLE_TEMPLATES.items():
|
||||
print(f" {key:10} - {template['name']}")
|
||||
print(f" 触发场景: {template['trigger']}")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="调研报告专用信息图生成(手绘风格)",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
示例:
|
||||
# 生成架构图
|
||||
python research_image.py -t arch -n "Ralph Loop 核心架构" -c "展示 Prompt、Agent、Stop Hook、Files 四个模块的循环关系" -o images/arch.png
|
||||
|
||||
# 生成流程图
|
||||
python research_image.py -t flow -n "Stop Hook 工作流程" -c "Agent尝试退出、Hook触发、检查条件、允许或阻止退出" -o images/flow.png
|
||||
|
||||
# 生成对比图
|
||||
python research_image.py -t compare -n "ReAct vs Ralph Loop" -c "左侧ReAct自我评估停止,右侧Ralph外部Hook控制" -o images/compare.png
|
||||
|
||||
# 生成概念图
|
||||
python research_image.py -t concept -n "状态持久化" -c "中心是Agent,周围是progress.txt、prd.json、Git历史、代码文件四个要素" -o images/concept.png
|
||||
|
||||
# 查看所有风格
|
||||
python research_image.py --list
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument("-t", "--type", choices=list(STYLE_TEMPLATES.keys()),
|
||||
help="图解类型: arch(架构图), flow(流程图), compare(对比图), concept(概念图)")
|
||||
parser.add_argument("-n", "--name", help="图表标题")
|
||||
parser.add_argument("-c", "--content", help="图表内容描述")
|
||||
parser.add_argument("-o", "--output", help="输出文件路径")
|
||||
parser.add_argument("--list", action="store_true", help="列出所有可用风格")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.list:
|
||||
list_styles()
|
||||
return
|
||||
|
||||
if not all([args.type, args.name, args.content, args.output]):
|
||||
parser.print_help()
|
||||
print("\n错误: 必须提供 -t, -n, -c, -o 参数")
|
||||
sys.exit(1)
|
||||
|
||||
generate_image(args.type, args.name, args.content, args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
350
.opencode/skills/image-service/scripts/text_to_image.py
Normal file
350
.opencode/skills/image-service/scripts/text_to_image.py
Normal file
@@ -0,0 +1,350 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
文生图脚本 (Text-to-Image)
|
||||
使用 Lyra Flash API 根据中文文本描述生成图片
|
||||
支持参考图风格生成
|
||||
|
||||
Author: 翟星人
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, Any, Optional, Union
|
||||
from pathlib import Path
|
||||
|
||||
VALID_ASPECT_RATIOS = [
|
||||
"1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"
|
||||
]
|
||||
|
||||
VALID_SIZES = [
|
||||
"1024x1024",
|
||||
"1536x1024", "1792x1024", "1344x768", "1248x832", "1184x864", "1152x896", "1536x672",
|
||||
"1024x1536", "1024x1792", "768x1344", "832x1248", "864x1184", "896x1152"
|
||||
]
|
||||
|
||||
RATIO_TO_SIZE = {
|
||||
"1:1": "1024x1024",
|
||||
"2:3": "832x1248",
|
||||
"3:2": "1248x832",
|
||||
"3:4": "1024x1536",
|
||||
"4:3": "1536x1024",
|
||||
"4:5": "864x1184",
|
||||
"5:4": "1184x864",
|
||||
"9:16": "1024x1792",
|
||||
"16:9": "1792x1024",
|
||||
"21:9": "1536x672"
|
||||
}
|
||||
|
||||
|
||||
class TextToImageGenerator:
|
||||
"""文生图生成器"""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, str]] = None):
|
||||
"""
|
||||
初始化生成器
|
||||
|
||||
Args:
|
||||
config: 配置字典,包含 api_key, base_url, model
|
||||
如果不传则从环境变量或配置文件读取
|
||||
"""
|
||||
if config is None:
|
||||
config = self._load_config()
|
||||
|
||||
self.api_key = config.get('api_key') or config.get('IMAGE_API_KEY')
|
||||
self.base_url = config.get('base_url') or config.get('IMAGE_API_BASE_URL')
|
||||
self.model = config.get('model') or config.get('IMAGE_MODEL') or 'lyra-flash-9'
|
||||
|
||||
if not self.api_key or not self.base_url:
|
||||
raise ValueError("缺少必要的 API 配置:api_key 和 base_url")
|
||||
|
||||
def _load_config(self) -> Dict[str, str]:
|
||||
"""从配置文件或环境变量加载配置"""
|
||||
config = {}
|
||||
|
||||
config_path = Path(__file__).parent.parent / 'config' / 'settings.json'
|
||||
if config_path.exists():
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
settings = json.load(f)
|
||||
api_config = settings.get('image_api', {})
|
||||
config['api_key'] = api_config.get('key')
|
||||
config['base_url'] = api_config.get('base_url')
|
||||
config['model'] = api_config.get('model')
|
||||
|
||||
config['api_key'] = os.getenv('IMAGE_API_KEY', config.get('api_key'))
|
||||
config['base_url'] = os.getenv('IMAGE_API_BASE_URL', config.get('base_url'))
|
||||
config['model'] = os.getenv('IMAGE_MODEL', config.get('model'))
|
||||
|
||||
return config
|
||||
|
||||
@staticmethod
|
||||
def image_to_base64(image_path: str, with_prefix: bool = True) -> str:
|
||||
"""将图片文件转换为 base64 编码"""
|
||||
path = Path(image_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"图片文件不存在: {image_path}")
|
||||
|
||||
suffix = path.suffix.lower()
|
||||
mime_types = {
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.webp': 'image/webp'
|
||||
}
|
||||
mime_type = mime_types.get(suffix, 'image/png')
|
||||
|
||||
with open(image_path, 'rb') as f:
|
||||
b64_str = base64.b64encode(f.read()).decode('utf-8')
|
||||
|
||||
if with_prefix:
|
||||
return f"data:{mime_type};base64,{b64_str}"
|
||||
return b64_str
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
size: Optional[str] = None,
|
||||
aspect_ratio: Optional[str] = None,
|
||||
image_size: Optional[str] = None,
|
||||
output_path: Optional[str] = None,
|
||||
response_format: str = "b64_json",
|
||||
ref_image: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
生成图片
|
||||
|
||||
Args:
|
||||
prompt: 中文图像描述提示词
|
||||
size: 图片尺寸 (如 1792x1024),与 aspect_ratio 二选一
|
||||
aspect_ratio: 宽高比 (如 16:9, 3:4),推荐使用
|
||||
image_size: 分辨率 (1K/2K/4K),仅 gemini-3.0-pro-image-preview 支持
|
||||
output_path: 输出文件路径,如果提供则保存图片
|
||||
response_format: 响应格式,默认 b64_json
|
||||
ref_image: 参考图片路径,用于风格参考
|
||||
|
||||
Returns:
|
||||
包含生成结果的字典
|
||||
"""
|
||||
if ref_image:
|
||||
return self._generate_with_reference(
|
||||
prompt=prompt,
|
||||
ref_image=ref_image,
|
||||
aspect_ratio=aspect_ratio,
|
||||
size=size,
|
||||
output_path=output_path,
|
||||
response_format=response_format
|
||||
)
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"response_format": response_format
|
||||
}
|
||||
|
||||
# 确定尺寸:优先用 aspect_ratio 映射,其次用 size
|
||||
if aspect_ratio:
|
||||
payload["size"] = RATIO_TO_SIZE.get(aspect_ratio, "1024x1024")
|
||||
elif size:
|
||||
payload["size"] = size
|
||||
else:
|
||||
payload["size"] = "1792x1024" # 默认 16:9
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=180.0) as client:
|
||||
response = client.post(
|
||||
f"{self.base_url}/images/generations",
|
||||
headers=headers,
|
||||
json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if output_path and result.get("data"):
|
||||
b64_data = result["data"][0].get("b64_json")
|
||||
if b64_data:
|
||||
self._save_image(b64_data, output_path)
|
||||
result["saved_path"] = output_path
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": result,
|
||||
"saved_path": output_path if output_path else None
|
||||
}
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"HTTP 错误: {e.response.status_code}",
|
||||
"detail": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "生成失败",
|
||||
"detail": str(e)
|
||||
}
|
||||
|
||||
def _generate_with_reference(
|
||||
self,
|
||||
prompt: str,
|
||||
ref_image: str,
|
||||
aspect_ratio: Optional[str] = None,
|
||||
size: Optional[str] = None,
|
||||
output_path: Optional[str] = None,
|
||||
response_format: str = "b64_json"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
参考图片风格生成新图
|
||||
|
||||
Args:
|
||||
prompt: 新图内容描述
|
||||
ref_image: 参考图片路径
|
||||
aspect_ratio: 宽高比
|
||||
size: 尺寸
|
||||
output_path: 输出路径
|
||||
response_format: 响应格式
|
||||
"""
|
||||
image_b64 = self.image_to_base64(ref_image)
|
||||
|
||||
enhanced_prompt = f"参考这张图片的背景风格、配色方案和视觉设计,保持完全一致的风格,生成新内容:{prompt}"
|
||||
|
||||
# 确定尺寸:优先用 aspect_ratio 映射,其次用 size
|
||||
if size is None:
|
||||
size = RATIO_TO_SIZE.get(aspect_ratio, "1024x1792") if aspect_ratio else "1024x1792"
|
||||
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"prompt": enhanced_prompt,
|
||||
"image": image_b64,
|
||||
"size": size,
|
||||
"response_format": response_format
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=180.0) as client:
|
||||
response = client.post(
|
||||
f"{self.base_url}/images/edits",
|
||||
headers=headers,
|
||||
json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if output_path and result.get("data"):
|
||||
b64_data = result["data"][0].get("b64_json")
|
||||
if b64_data:
|
||||
self._save_image(b64_data, output_path)
|
||||
result["saved_path"] = output_path
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": result,
|
||||
"saved_path": output_path if output_path else None
|
||||
}
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"HTTP 错误: {e.response.status_code}",
|
||||
"detail": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "生成失败",
|
||||
"detail": str(e)
|
||||
}
|
||||
|
||||
def _save_image(self, b64_data: str, output_path: str) -> None:
|
||||
"""保存 base64 图片到文件"""
|
||||
image_data = base64.b64decode(b64_data)
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(image_data)
|
||||
|
||||
|
||||
def main():
|
||||
"""命令行入口"""
|
||||
import argparse
|
||||
import time
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='文生图工具',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=f'''
|
||||
尺寸参数说明:
|
||||
-r/--ratio 推荐使用,支持: {", ".join(VALID_ASPECT_RATIOS)}
|
||||
-s/--size 传统尺寸,支持: {", ".join(VALID_SIZES[:4])}...
|
||||
--resolution 分辨率(1K/2K/4K),仅 gemini-3.0-pro-image-preview 支持
|
||||
--ref 参考图片路径,后续图片将参考首图风格生成
|
||||
|
||||
示例:
|
||||
python text_to_image.py "描述" -r 3:4 # 竖版 3:4
|
||||
python text_to_image.py "描述" -r 9:16 -o out.png # 竖屏 9:16
|
||||
python text_to_image.py "描述" -s 1024x1792 # 传统尺寸
|
||||
|
||||
# 长图场景:首图定调,后续参考首图风格
|
||||
python text_to_image.py "首屏内容" -r 3:4 -o 01.png
|
||||
python text_to_image.py "第二屏内容" -r 3:4 --ref 01.png -o 02.png
|
||||
'''
|
||||
)
|
||||
parser.add_argument('prompt', help='中文图像描述提示词')
|
||||
parser.add_argument('-o', '--output', help='输出文件路径(默认保存到当前目录)')
|
||||
parser.add_argument('-r', '--ratio', help=f'宽高比,推荐使用。可选: {", ".join(VALID_ASPECT_RATIOS)}')
|
||||
parser.add_argument('-s', '--size', help='图片尺寸 (如 1792x1024)')
|
||||
parser.add_argument('--resolution', help='分辨率 (1K/2K/4K),仅部分模型支持')
|
||||
parser.add_argument('--ref', help='参考图片路径,用于风格参考(长图场景)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.ratio and args.ratio not in VALID_ASPECT_RATIOS:
|
||||
print(f"错误: 不支持的宽高比 '{args.ratio}'")
|
||||
print(f"支持的宽高比: {', '.join(VALID_ASPECT_RATIOS)}")
|
||||
return
|
||||
|
||||
if args.size and args.size not in VALID_SIZES:
|
||||
print(f"警告: 尺寸 '{args.size}' 可能不被支持")
|
||||
print(f"推荐使用 -r/--ratio 参数指定宽高比")
|
||||
|
||||
if args.ref and not os.path.exists(args.ref):
|
||||
print(f"错误: 参考图片不存在: {args.ref}")
|
||||
return
|
||||
|
||||
output_path = args.output
|
||||
if not output_path:
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
output_path = f"generated_{timestamp}.png"
|
||||
|
||||
generator = TextToImageGenerator()
|
||||
result = generator.generate(
|
||||
prompt=args.prompt,
|
||||
size=args.size,
|
||||
aspect_ratio=args.ratio,
|
||||
image_size=args.resolution,
|
||||
output_path=output_path,
|
||||
ref_image=args.ref
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
print(f"生成成功!")
|
||||
if result.get("saved_path"):
|
||||
print(f"图片已保存到: {result['saved_path']}")
|
||||
else:
|
||||
print(f"生成失败: {result['error']}")
|
||||
print(f"详情: {result.get('detail', 'N/A')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user