From 64307951f256a7a2c936ab3207dfcbd3e8c3df47 Mon Sep 17 00:00:00 2001 From: hmo Date: Sat, 14 Feb 2026 22:39:25 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20image-service=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E9=98=BF=E9=87=8C=E4=BA=91=E7=99=BE=E7=82=BC=E6=96=87=E7=94=9F?= =?UTF-8?q?=E5=9B=BE=E6=94=AF=E6=8C=81=EF=BC=8C=E6=B7=BB=E5=8A=A0=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E6=95=8F=E6=84=9F=E4=BF=A1=E6=81=AF=E4=BF=9D=E6=8A=A4?= =?UTF-8?q?=E8=A7=84=E5=88=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 13 +- .../image-service/scripts/text_to_image.py | 285 +++++++++++------- AGENTS.md | 28 +- 3 files changed, 222 insertions(+), 104 deletions(-) diff --git a/.gitignore b/.gitignore index 7a4a25e..0471717 100644 --- a/.gitignore +++ b/.gitignore @@ -10,8 +10,19 @@ npm-debug.log* .env .env.* +# 忽略技能配置目录(所有 skills 的 config) +.opencode/skills/*/config/ + +# 但保留 .example 模板文件 +!.opencode/skills/*/config/settings.json.example + # 允许 Git 追踪 .opencode 下的技能定义 !.opencode/skills/ # 忽略临时文件目录 -temp/ \ No newline at end of file +temp/ + +# 忽略 Python 缓存 +__pycache__/ +*.pyc +*.pyo \ No newline at end of file diff --git a/.opencode/skills/image-service/scripts/text_to_image.py b/.opencode/skills/image-service/scripts/text_to_image.py index b6ff833..2512e24 100644 --- a/.opencode/skills/image-service/scripts/text_to_image.py +++ b/.opencode/skills/image-service/scripts/text_to_image.py @@ -15,13 +15,33 @@ from typing import Dict, Any, Optional, Union from pathlib import Path VALID_ASPECT_RATIOS = [ - "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9" + "1:1", + "2:3", + "3:2", + "3:4", + "4:3", + "4:5", + "5:4", + "9:16", + "16:9", + "21:9", ] VALID_SIZES = [ "1024x1024", - "1536x1024", "1792x1024", "1344x768", "1248x832", "1184x864", "1152x896", "1536x672", - "1024x1536", "1024x1792", "768x1344", "832x1248", "864x1184", "896x1152" + "1536x1024", + "1792x1024", + "1344x768", + "1248x832", + "1184x864", + "1152x896", + "1536x672", + "1024x1536", + "1024x1792", + "768x1344", + "832x1248", + "864x1184", + "896x1152", ] RATIO_TO_SIZE = { @@ -34,74 +54,74 @@ RATIO_TO_SIZE = { "5:4": "1184x864", "9:16": "1024x1792", "16:9": "1792x1024", - "21:9": "1536x672" + "21:9": "1536x672", } class TextToImageGenerator: """文生图生成器""" - + def __init__(self, config: Optional[Dict[str, str]] = None): """ 初始化生成器 - + Args: config: 配置字典,包含 api_key, base_url, model 如果不传则从环境变量或配置文件读取 """ if config is None: config = self._load_config() - - self.api_key = config.get('api_key') or config.get('IMAGE_API_KEY') - self.base_url = config.get('base_url') or config.get('IMAGE_API_BASE_URL') - self.model = config.get('model') or config.get('IMAGE_MODEL') or 'lyra-flash-9' - + + self.api_key = config.get("api_key") or config.get("IMAGE_API_KEY") + self.base_url = config.get("base_url") or config.get("IMAGE_API_BASE_URL") + self.model = config.get("model") or config.get("IMAGE_MODEL") or "lyra-flash-9" + if not self.api_key or not self.base_url: raise ValueError("缺少必要的 API 配置:api_key 和 base_url") - + def _load_config(self) -> Dict[str, str]: """从配置文件或环境变量加载配置""" config = {} - - config_path = Path(__file__).parent.parent / 'config' / 'settings.json' + + config_path = Path(__file__).parent.parent / "config" / "settings.json" if config_path.exists(): - with open(config_path, 'r', encoding='utf-8') as f: + with open(config_path, "r", encoding="utf-8") as f: settings = json.load(f) - api_config = settings.get('image_api', {}) - config['api_key'] = api_config.get('key') - config['base_url'] = api_config.get('base_url') - config['model'] = api_config.get('model') - - config['api_key'] = os.getenv('IMAGE_API_KEY', config.get('api_key')) - config['base_url'] = os.getenv('IMAGE_API_BASE_URL', config.get('base_url')) - config['model'] = os.getenv('IMAGE_MODEL', config.get('model')) - + api_config = settings.get("image_api", {}) + config["api_key"] = api_config.get("key") + config["base_url"] = api_config.get("base_url") + config["model"] = api_config.get("model") + + config["api_key"] = os.getenv("IMAGE_API_KEY", config.get("api_key")) + config["base_url"] = os.getenv("IMAGE_API_BASE_URL", config.get("base_url")) + config["model"] = os.getenv("IMAGE_MODEL", config.get("model")) + return config - + @staticmethod def image_to_base64(image_path: str, with_prefix: bool = True) -> str: """将图片文件转换为 base64 编码""" path = Path(image_path) if not path.exists(): raise FileNotFoundError(f"图片文件不存在: {image_path}") - + suffix = path.suffix.lower() mime_types = { - '.jpg': 'image/jpeg', - '.jpeg': 'image/jpeg', - '.png': 'image/png', - '.gif': 'image/gif', - '.webp': 'image/webp' + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".gif": "image/gif", + ".webp": "image/webp", } - mime_type = mime_types.get(suffix, 'image/png') - - with open(image_path, 'rb') as f: - b64_str = base64.b64encode(f.read()).decode('utf-8') - + mime_type = mime_types.get(suffix, "image/png") + + with open(image_path, "rb") as f: + b64_str = base64.b64encode(f.read()).decode("utf-8") + if with_prefix: return f"data:{mime_type};base64,{b64_str}" return b64_str - + def generate( self, prompt: str, @@ -110,11 +130,11 @@ class TextToImageGenerator: image_size: Optional[str] = None, output_path: Optional[str] = None, response_format: str = "b64_json", - ref_image: Optional[str] = None + ref_image: Optional[str] = None, ) -> Dict[str, Any]: """ 生成图片 - + Args: prompt: 中文图像描述提示词 size: 图片尺寸 (如 1792x1024),与 aspect_ratio 二选一 @@ -123,7 +143,7 @@ class TextToImageGenerator: output_path: 输出文件路径,如果提供则保存图片 response_format: 响应格式,默认 b64_json ref_image: 参考图片路径,用于风格参考 - + Returns: 包含生成结果的字典 """ @@ -134,15 +154,15 @@ class TextToImageGenerator: aspect_ratio=aspect_ratio, size=size, output_path=output_path, - response_format=response_format + response_format=response_format, ) - + payload: Dict[str, Any] = { "model": self.model, "prompt": prompt, - "response_format": response_format + "response_format": response_format, } - + # 确定尺寸:优先用 aspect_ratio 映射,其次用 size if aspect_ratio: payload["size"] = RATIO_TO_SIZE.get(aspect_ratio, "1024x1024") @@ -150,47 +170,108 @@ class TextToImageGenerator: payload["size"] = size else: payload["size"] = "1792x1024" # 默认 16:9 - + headers = { "Content-Type": "application/json", - "Authorization": f"Bearer {self.api_key}" + "Authorization": f"Bearer {self.api_key}", } - + try: + if self.base_url and "dashscope" in self.base_url: + return self._generate_aliyun( + prompt, size, aspect_ratio, output_path, response_format + ) + with httpx.Client(timeout=180.0) as client: response = client.post( - f"{self.base_url}/images/generations", - headers=headers, - json=payload + f"{self.base_url}/images/generations", headers=headers, json=payload ) response.raise_for_status() result = response.json() - + if output_path and result.get("data"): b64_data = result["data"][0].get("b64_json") if b64_data: self._save_image(b64_data, output_path) result["saved_path"] = output_path - + return { "success": True, "data": result, - "saved_path": output_path if output_path else None + "saved_path": output_path if output_path else None, } - + except httpx.HTTPStatusError as e: return { "success": False, "error": f"HTTP 错误: {e.response.status_code}", - "detail": str(e) + "detail": str(e), } except Exception as e: + return {"success": False, "error": "生成失败", "detail": str(e)} + + def _generate_aliyun( + self, prompt, size, aspect_ratio, output_path, response_format + ): + if aspect_ratio: + size = RATIO_TO_SIZE.get(aspect_ratio, "1024*1024").replace("x", "*") + elif not size: + size = "1024*1024" + else: + size = size.replace("x", "*") + + payload = { + "model": self.model, + "input": {"messages": [{"role": "user", "content": [{"text": prompt}]}]}, + "parameters": {"size": size, "response_format": "base64"}, + } + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + + try: + with httpx.Client(timeout=300.0) as client: + response = client.post( + f"{self.base_url}/api/v1/services/aigc/multimodal-generation/generation", + headers=headers, + json=payload, + ) + response.raise_for_status() + result = response.json() + + if output_path: + image_url = ( + result.get("output", {}) + .get("choices", [{}])[0] + .get("message", {}) + .get("content", [{}])[0] + .get("image") + ) + if image_url: + self._download_image(image_url, output_path) + result["saved_path"] = output_path + + return {"success": True, "data": result, "saved_path": output_path} + + except httpx.HTTPStatusError as e: return { "success": False, - "error": "生成失败", - "detail": str(e) + "error": "HTTP错误: %s" % e.response.status_code, + "detail": str(e), } - + except Exception as e: + return {"success": False, "error": "生成失败", "detail": str(e)} + + def _download_image(self, url, output_path): + with httpx.Client(timeout=60.0) as client: + response = client.get(url) + response.raise_for_status() + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "wb") as f: + f.write(response.content) + def _generate_with_reference( self, prompt: str, @@ -198,11 +279,11 @@ class TextToImageGenerator: aspect_ratio: Optional[str] = None, size: Optional[str] = None, output_path: Optional[str] = None, - response_format: str = "b64_json" + response_format: str = "b64_json", ) -> Dict[str, Any]: """ 参考图片风格生成新图 - + Args: prompt: 新图内容描述 ref_image: 参考图片路径 @@ -212,66 +293,64 @@ class TextToImageGenerator: response_format: 响应格式 """ image_b64 = self.image_to_base64(ref_image) - + enhanced_prompt = f"参考这张图片的背景风格、配色方案和视觉设计,保持完全一致的风格,生成新内容:{prompt}" - + # 确定尺寸:优先用 aspect_ratio 映射,其次用 size if size is None: - size = RATIO_TO_SIZE.get(aspect_ratio, "1024x1792") if aspect_ratio else "1024x1792" - + size = ( + RATIO_TO_SIZE.get(aspect_ratio, "1024x1792") + if aspect_ratio + else "1024x1792" + ) + payload = { "model": self.model, "prompt": enhanced_prompt, "image": image_b64, "size": size, - "response_format": response_format + "response_format": response_format, } - + headers = { "Content-Type": "application/json", - "Authorization": f"Bearer {self.api_key}" + "Authorization": f"Bearer {self.api_key}", } - + try: with httpx.Client(timeout=180.0) as client: response = client.post( - f"{self.base_url}/images/edits", - headers=headers, - json=payload + f"{self.base_url}/images/edits", headers=headers, json=payload ) response.raise_for_status() result = response.json() - + if output_path and result.get("data"): b64_data = result["data"][0].get("b64_json") if b64_data: self._save_image(b64_data, output_path) result["saved_path"] = output_path - + return { "success": True, "data": result, - "saved_path": output_path if output_path else None + "saved_path": output_path if output_path else None, } - + except httpx.HTTPStatusError as e: return { "success": False, "error": f"HTTP 错误: {e.response.status_code}", - "detail": str(e) + "detail": str(e), } except Exception as e: - return { - "success": False, - "error": "生成失败", - "detail": str(e) - } - + return {"success": False, "error": "生成失败", "detail": str(e)} + def _save_image(self, b64_data: str, output_path: str) -> None: """保存 base64 图片到文件""" image_data = base64.b64decode(b64_data) Path(output_path).parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'wb') as f: + with open(output_path, "wb") as f: f.write(image_data) @@ -279,11 +358,11 @@ def main(): """命令行入口""" import argparse import time - + parser = argparse.ArgumentParser( - description='文生图工具', + description="文生图工具", formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=f''' + epilog=f""" 尺寸参数说明: -r/--ratio 推荐使用,支持: {", ".join(VALID_ASPECT_RATIOS)} -s/--size 传统尺寸,支持: {", ".join(VALID_SIZES[:4])}... @@ -298,35 +377,39 @@ def main(): # 长图场景:首图定调,后续参考首图风格 python text_to_image.py "首屏内容" -r 3:4 -o 01.png python text_to_image.py "第二屏内容" -r 3:4 --ref 01.png -o 02.png -''' +""", ) - parser.add_argument('prompt', help='中文图像描述提示词') - parser.add_argument('-o', '--output', help='输出文件路径(默认保存到当前目录)') - parser.add_argument('-r', '--ratio', help=f'宽高比,推荐使用。可选: {", ".join(VALID_ASPECT_RATIOS)}') - parser.add_argument('-s', '--size', help='图片尺寸 (如 1792x1024)') - parser.add_argument('--resolution', help='分辨率 (1K/2K/4K),仅部分模型支持') - parser.add_argument('--ref', help='参考图片路径,用于风格参考(长图场景)') - + parser.add_argument("prompt", help="中文图像描述提示词") + parser.add_argument("-o", "--output", help="输出文件路径(默认保存到当前目录)") + parser.add_argument( + "-r", + "--ratio", + help=f"宽高比,推荐使用。可选: {', '.join(VALID_ASPECT_RATIOS)}", + ) + parser.add_argument("-s", "--size", help="图片尺寸 (如 1792x1024)") + parser.add_argument("--resolution", help="分辨率 (1K/2K/4K),仅部分模型支持") + parser.add_argument("--ref", help="参考图片路径,用于风格参考(长图场景)") + args = parser.parse_args() - + if args.ratio and args.ratio not in VALID_ASPECT_RATIOS: print(f"错误: 不支持的宽高比 '{args.ratio}'") print(f"支持的宽高比: {', '.join(VALID_ASPECT_RATIOS)}") return - + if args.size and args.size not in VALID_SIZES: print(f"警告: 尺寸 '{args.size}' 可能不被支持") print(f"推荐使用 -r/--ratio 参数指定宽高比") - + if args.ref and not os.path.exists(args.ref): print(f"错误: 参考图片不存在: {args.ref}") return - + output_path = args.output if not output_path: timestamp = time.strftime("%Y%m%d_%H%M%S") output_path = f"generated_{timestamp}.png" - + generator = TextToImageGenerator() result = generator.generate( prompt=args.prompt, @@ -334,9 +417,9 @@ def main(): aspect_ratio=args.ratio, image_size=args.resolution, output_path=output_path, - ref_image=args.ref + ref_image=args.ref, ) - + if result["success"]: print(f"生成成功!") if result.get("saved_path"): diff --git a/AGENTS.md b/AGENTS.md index 09784e3..dd5e544 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -133,10 +133,28 @@ ## 四、技能清单 -### 当前已安装技能(0个) +### 当前已安装技能(16个) 技能存放在 `.opencode/skills/` 目录下: -(技能列表) + +| 技能名 | 描述 | 触发词 | +|--------|------|--------| +| `csv-data-summarizer` | CSV数据分析技能。使用Python和pandas分析CSV文件,生成统计摘要和快速可视化图表 | CSV文件、表格数据 | +| `deep-research` | 深度调研技能。搜索、整理、汇总指定主题的技术内容 | 调研、深度调研、帮我研究 | +| `image-service` | 多模态图像处理技能,支持文生图、图生图、图生文、长图拼接 | 图片、图像、生成图、信息图、OCR | +| `log-analyzer` | 全维度日志分析技能。自动识别日志类型,进行根因定位、告警分析、异常洞察 | 分析日志、日志排查、根因定位 | +| `mcp-builder` | MCP服务器开发指南,用于创建高质量的MCP服务器 | 构建MCP服务器、MCP开发 | +| `searchnews` | AI新闻搜索技能,搜索、整理、汇总指定日期的AI行业新闻 | 搜索新闻、查询AI新闻、整理新闻 | +| `skill-creator` | 技能创建指南,用于创建新的技能或更新现有技能 | 创建技能、更新技能 | +| `smart-query` | 智能数据库查询技能。通过SSH隧道连接线上数据库,支持自然语言转SQL | 查询数据库、问数据、看表结构 | +| `story-to-scenes` | 长文本拆镜批量生图引擎。智能拆分场景,批量生成风格统一的配图 | 拆镜生图、故事配图、批量场景图 | +| `uni-agent` | 统一智能体协议适配层。一套API调用所有Agent协议(ANP/MCP/A2A等) | 调用Agent、跨协议通信、连接工具 | +| `video-creator` | 视频创作技能。图片+音频合成视频,支持转场、片尾、BGM | 生成视频、图文转视频、做视频号 | +| `videocut-clip` | 执行视频剪辑。根据删除任务执行FFmpeg剪辑 | 执行剪辑、开始剪、确认剪辑 | +| `videocut-clip-oral` | 口播视频转录和口误识别。生成审查稿和删除任务清单 | 剪口播、处理视频、识别口误 | +| `videocut-install` | 环境准备。安装依赖、下载模型、验证环境 | 安装、环境准备、初始化 | +| `videocut-self-update` | 自更新skills。记录用户反馈,更新方法论和规则 | 更新规则、记录反馈、改进skill | +| `videocut-subtitle` | 字幕生成与烧录。转录→词典纠错→审核→烧录 | 加字幕、生成字幕、字幕 | ### 技能使用说明 @@ -203,6 +221,12 @@ - 每次会话开始时主动读取 - 执行相关任务前查阅对应记忆 +### 短期待办事项 +每次会话开始时自动读取 `.memory/pending.md`,包含: +- 近期待办任务(有时间限制的) +- 任务完成后自动移除 +- 无需用户提醒 + --- ## 八、铁律(违反即解雇)