新增16个AI技能：包含图像生成、视频剪辑、数据分析、智能查询等功能模块

2026-02-13 20:18:38 +08:00
parent 456cd45de4
commit 8200a17176
154 changed files with 14585 additions and 1 deletions
--- a/.opencode/skills/image-service/scripts/image_to_image.py
+++ b/.opencode/skills/image-service/scripts/image_to_image.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python3
+"""
+图生图脚本 (Image-to-Image)
+使用 Lyra Flash API 基于参考图片和中文指令进行图片编辑
+
+Author: 翟星人
+"""
+
+import httpx
+import base64
+import json
+import os
+from typing import Dict, Any, Optional, Union
+from pathlib import Path
+
+VALID_ASPECT_RATIOS = [
+    "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"
+]
+
+VALID_SIZES = [
+    "1024x1024",
+    "1536x1024", "1792x1024", "1344x768", "1248x832", "1184x864", "1152x896", "1536x672",
+    "1024x1536", "1024x1792", "768x1344", "832x1248", "864x1184", "896x1152"
+]
+
+RATIO_TO_SIZE = {
+    "1:1": "1024x1024",
+    "2:3": "832x1248",
+    "3:2": "1248x832",
+    "3:4": "1024x1536",
+    "4:3": "1536x1024",
+    "4:5": "864x1184",
+    "5:4": "1184x864",
+    "9:16": "1024x1792",
+    "16:9": "1792x1024",
+    "21:9": "1536x672"
+}
+
+
+class ImageToImageEditor:
+    """图生图编辑器"""
+    
+    def __init__(self, config: Optional[Dict[str, str]] = None):
+        """
+        初始化编辑器
+        
+        Args:
+            config: 配置字典，包含 api_key, base_url, model
+                   如果不传则从环境变量或配置文件读取
+        """
+        if config is None:
+            config = self._load_config()
+        
+        self.api_key = config.get('api_key') or config.get('IMAGE_API_KEY')
+        self.base_url = config.get('base_url') or config.get('IMAGE_API_BASE_URL')
+        self.model = config.get('model') or config.get('IMAGE_MODEL') or 'lyra-flash-9'
+        
+        if not self.api_key or not self.base_url:
+            raise ValueError("缺少必要的 API 配置：api_key 和 base_url")
+    
+    def _load_config(self) -> Dict[str, str]:
+        """从配置文件或环境变量加载配置"""
+        config = {}
+        
+        # 尝试从配置文件加载
+        config_path = Path(__file__).parent.parent / 'config' / 'settings.json'
+        if config_path.exists():
+            with open(config_path, 'r', encoding='utf-8') as f:
+                settings = json.load(f)
+                api_config = settings.get('image_api', {})
+                config['api_key'] = api_config.get('key')
+                config['base_url'] = api_config.get('base_url')
+                config['model'] = api_config.get('model')
+        
+        # 环境变量优先级更高
+        config['api_key'] = os.getenv('IMAGE_API_KEY', config.get('api_key'))
+        config['base_url'] = os.getenv('IMAGE_API_BASE_URL', config.get('base_url'))
+        config['model'] = os.getenv('IMAGE_MODEL', config.get('model'))
+        
+        return config
+    
+    @staticmethod
+    def image_to_base64(image_path: str, with_prefix: bool = True) -> str:
+        """
+        将图片文件转换为 base64 编码
+        
+        Args:
+            image_path: 图片文件路径
+            with_prefix: 是否添加 data URL 前缀
+            
+        Returns:
+            base64 编码字符串
+        """
+        path = Path(image_path)
+        if not path.exists():
+            raise FileNotFoundError(f"图片文件不存在: {image_path}")
+        
+        # 获取 MIME 类型
+        suffix = path.suffix.lower()
+        mime_types = {
+            '.jpg': 'image/jpeg',
+            '.jpeg': 'image/jpeg',
+            '.png': 'image/png',
+            '.gif': 'image/gif',
+            '.webp': 'image/webp'
+        }
+        mime_type = mime_types.get(suffix, 'image/png')
+        
+        with open(image_path, 'rb') as f:
+            b64_str = base64.b64encode(f.read()).decode('utf-8')
+        
+        if with_prefix:
+            return f"data:{mime_type};base64,{b64_str}"
+        return b64_str
+    
+    def edit(
+        self,
+        image: Union[str, bytes],
+        prompt: str,
+        aspect_ratio: Optional[str] = None,
+        size: Optional[str] = None,
+        output_path: Optional[str] = None,
+        response_format: str = "b64_json"
+    ) -> Dict[str, Any]:
+        """
+        编辑图片
+        
+        Args:
+            image: 图片路径或 base64 字符串
+            prompt: 中文编辑指令
+            aspect_ratio: 宽高比 (如 3:4, 16:9)
+            size: 传统尺寸 (如 1024x1792)
+            output_path: 输出文件路径
+            response_format: 响应格式
+            
+        Returns:
+            包含编辑结果的字典
+        """
+        # 处理图片输入
+        if isinstance(image, str):
+            if os.path.isfile(image):
+                image_b64 = self.image_to_base64(image)
+            elif image.startswith('data:'):
+                image_b64 = image
+            else:
+                # 假设是纯 base64 字符串
+                image_b64 = f"data:image/png;base64,{image}"
+        else:
+            image_b64 = f"data:image/png;base64,{base64.b64encode(image).decode('utf-8')}"
+        
+        payload: Dict[str, Any] = {
+            "model": self.model,
+            "prompt": prompt,
+            "image": image_b64,
+            "response_format": response_format
+        }
+        
+        # 确定尺寸：优先用 aspect_ratio 映射，其次用 size
+        if aspect_ratio:
+            payload["size"] = RATIO_TO_SIZE.get(aspect_ratio, "1024x1536")
+        elif size:
+            payload["size"] = size
+        else:
+            payload["size"] = "1024x1536"  # 默认 3:4
+        
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
+        
+        try:
+            with httpx.Client(timeout=180.0) as client:
+                response = client.post(
+                    f"{self.base_url}/images/edits",
+                    headers=headers,
+                    json=payload
+                )
+                response.raise_for_status()
+                result = response.json()
+                
+                # 如果指定了输出路径，保存图片
+                if output_path and result.get("data"):
+                    b64_data = result["data"][0].get("b64_json")
+                    if b64_data:
+                        self._save_image(b64_data, output_path)
+                        result["saved_path"] = output_path
+                
+                return {
+                    "success": True,
+                    "data": result,
+                    "saved_path": output_path if output_path else None
+                }
+                
+        except httpx.HTTPStatusError as e:
+            return {
+                "success": False,
+                "error": f"HTTP 错误: {e.response.status_code}",
+                "detail": str(e)
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": "编辑失败",
+                "detail": str(e)
+            }
+    
+    def _save_image(self, b64_data: str, output_path: str) -> None:
+        """保存 base64 图片到文件"""
+        image_data = base64.b64decode(b64_data)
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, 'wb') as f:
+            f.write(image_data)
+
+
+def main():
+    """命令行入口"""
+    import argparse
+    import time
+    
+    parser = argparse.ArgumentParser(
+        description='图生图编辑工具',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=f'''
+尺寸参数说明:
+  -r/--ratio    宽高比（推荐），支持: {", ".join(VALID_ASPECT_RATIOS)}
+  -s/--size     传统尺寸，支持: {", ".join(VALID_SIZES[:4])}...
+
+示例:
+  python image_to_image.py input.png "编辑描述" -r 3:4
+  python image_to_image.py input.png "编辑描述" -s 1024x1536
+'''
+    )
+    parser.add_argument('image', help='输入图片路径')
+    parser.add_argument('prompt', help='中文编辑指令')
+    parser.add_argument('-o', '--output', help='输出文件路径（默认保存到当前目录）')
+    parser.add_argument('-r', '--ratio', help=f'宽高比（推荐）。可选: {", ".join(VALID_ASPECT_RATIOS)}')
+    parser.add_argument('-s', '--size', help='传统尺寸，如 1024x1536')
+    
+    args = parser.parse_args()
+    
+    if args.ratio and args.ratio not in VALID_ASPECT_RATIOS:
+        print(f"错误: 不支持的宽高比 '{args.ratio}'")
+        print(f"支持的宽高比: {', '.join(VALID_ASPECT_RATIOS)}")
+        return
+    
+    if args.size and args.size not in VALID_SIZES:
+        print(f"警告: 尺寸 '{args.size}' 可能不被支持")
+    
+    output_path = args.output
+    if not output_path:
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        output_path = f"edited_{timestamp}.png"
+    
+    editor = ImageToImageEditor()
+    result = editor.edit(
+        image=args.image,
+        prompt=args.prompt,
+        aspect_ratio=args.ratio,
+        size=args.size,
+        output_path=output_path
+    )
+    
+    if result["success"]:
+        print(f"编辑成功！")
+        if result.get("saved_path"):
+            print(f"图片已保存到: {result['saved_path']}")
+    else:
+        print(f"编辑失败: {result['error']}")
+        print(f"详情: {result.get('detail', 'N/A')}")
+
+
+if __name__ == "__main__":
+    main()
--- a/.opencode/skills/image-service/scripts/image_to_text.py
+++ b/.opencode/skills/image-service/scripts/image_to_text.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""
+图生文脚本 (Image-to-Text) - 视觉识别
+使用 Qwen2.5-VL 模型分析图片内容并生成文字描述
+
+Author: 翟星人
+"""
+
+import httpx
+import base64
+import json
+import os
+from typing import Dict, Any, Optional, Union, List
+from pathlib import Path
+
+
+class ImageToTextAnalyzer:
+    """图生文分析器 - 视觉识别"""
+    
+    # 预定义的分析模式
+    ANALYSIS_MODES = {
+        "describe": "请详细描述这张图片的内容，包括：人物、场景、物品、颜色、布局等所有细节。",
+        "ocr": "请仔细识别这张图片中的所有文字内容，按照文字在图片中的位置顺序输出。如果是中文，请保持原文输出。",
+        "chart": "请分析这张图表的内容，包括：图表类型、数据趋势、关键数据点、标题标签、以及数据的结论或洞察。",
+        "fashion": "请分析这张图片中人物的穿搭，包括：服装款式、颜色搭配、配饰、整体风格等。",
+        "product": "请分析这张产品图片，包括：产品类型、外观特征、功能特点、品牌信息等。",
+        "scene": "请描述这张图片的场景，包括：地点、环境、氛围、时间（白天/夜晚）等。"
+    }
+    
+    def __init__(self, config: Optional[Dict[str, str]] = None):
+        """
+        初始化分析器
+        
+        Args:
+            config: 配置字典，包含 api_key, base_url, model
+                   如果不传则从环境变量或配置文件读取
+        """
+        if config is None:
+            config = self._load_config()
+        
+        self.api_key = config.get('api_key') or config.get('VISION_API_KEY') or config.get('IMAGE_API_KEY')
+        self.base_url = config.get('base_url') or config.get('VISION_API_BASE_URL') or config.get('IMAGE_API_BASE_URL')
+        self.model = config.get('model') or config.get('VISION_MODEL') or 'qwen2.5-vl-72b-instruct'
+        
+        if not self.api_key or not self.base_url:
+            raise ValueError("缺少必要的 API 配置：api_key 和 base_url")
+    
+    def _load_config(self) -> Dict[str, str]:
+        """从配置文件或环境变量加载配置"""
+        config = {}
+        
+        # 尝试从配置文件加载
+        config_path = Path(__file__).parent.parent / 'config' / 'settings.json'
+        if config_path.exists():
+            with open(config_path, 'r', encoding='utf-8') as f:
+                settings = json.load(f)
+                # 优先使用 vision_api 配置
+                vision_config = settings.get('vision_api', {})
+                if vision_config:
+                    config['api_key'] = vision_config.get('key')
+                    config['base_url'] = vision_config.get('base_url')
+                    config['model'] = vision_config.get('model')
+                else:
+                    # 回退到 image_api 配置
+                    api_config = settings.get('image_api', {})
+                    config['api_key'] = api_config.get('key')
+                    config['base_url'] = api_config.get('base_url')
+        
+        # 环境变量优先级更高
+        config['api_key'] = os.getenv('VISION_API_KEY', os.getenv('IMAGE_API_KEY', config.get('api_key')))
+        config['base_url'] = os.getenv('VISION_API_BASE_URL', os.getenv('IMAGE_API_BASE_URL', config.get('base_url')))
+        config['model'] = os.getenv('VISION_MODEL', config.get('model', 'qwen2.5-vl-72b-instruct'))
+        
+        return config
+    
+    @staticmethod
+    def image_to_base64(image_path: str) -> str:
+        """
+        将图片文件转换为 base64 编码（带 data URL 前缀）
+        
+        Args:
+            image_path: 图片文件路径
+            
+        Returns:
+            base64 编码字符串（含 data URL 前缀）
+        """
+        path = Path(image_path)
+        if not path.exists():
+            raise FileNotFoundError(f"图片文件不存在: {image_path}")
+        
+        # 获取 MIME 类型
+        suffix = path.suffix.lower()
+        mime_types = {
+            '.jpg': 'image/jpeg',
+            '.jpeg': 'image/jpeg',
+            '.png': 'image/png',
+            '.gif': 'image/gif',
+            '.webp': 'image/webp'
+        }
+        mime_type = mime_types.get(suffix, 'image/png')
+        
+        with open(image_path, 'rb') as f:
+            b64_str = base64.b64encode(f.read()).decode('utf-8')
+        
+        return f"data:{mime_type};base64,{b64_str}"
+    
+    def analyze(
+        self,
+        image: Union[str, bytes],
+        prompt: Optional[str] = None,
+        mode: str = "describe",
+        max_tokens: int = 2000,
+        temperature: float = 0.7
+    ) -> Dict[str, Any]:
+        """
+        分析图片并生成文字描述
+        
+        Args:
+            image: 图片路径、URL 或 base64 字符串
+            prompt: 自定义分析提示词（如果提供则忽略 mode）
+            mode: 分析模式 (describe/ocr/chart/fashion/product/scene)
+            max_tokens: 最大输出 token 数
+            temperature: 温度参数
+            
+        Returns:
+            包含分析结果的字典
+        """
+        # 确定使用的提示词
+        if prompt is None:
+            prompt = self.ANALYSIS_MODES.get(mode, self.ANALYSIS_MODES["describe"])
+        
+        # 处理图片输入
+        if isinstance(image, str):
+            if os.path.isfile(image):
+                image_url = self.image_to_base64(image)
+            elif image.startswith('data:') or image.startswith('http'):
+                image_url = image
+            else:
+                # 假设是纯 base64 字符串
+                image_url = f"data:image/png;base64,{image}"
+        else:
+            image_url = f"data:image/png;base64,{base64.b64encode(image).decode('utf-8')}"
+        
+        # 构建请求
+        payload = {
+            "model": self.model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": image_url
+                            }
+                        }
+                    ]
+                }
+            ],
+            "max_tokens": max_tokens,
+            "temperature": temperature
+        }
+        
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
+        
+        try:
+            with httpx.Client(timeout=120.0) as client:
+                response = client.post(
+                    f"{self.base_url}/chat/completions",
+                    headers=headers,
+                    json=payload
+                )
+                response.raise_for_status()
+                result = response.json()
+                
+                # 提取文本内容
+                content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
+                
+                return {
+                    "success": True,
+                    "content": content,
+                    "mode": mode,
+                    "usage": result.get("usage", {})
+                }
+                
+        except httpx.HTTPStatusError as e:
+            return {
+                "success": False,
+                "error": f"HTTP 错误: {e.response.status_code}",
+                "detail": str(e)
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": "分析失败",
+                "detail": str(e)
+            }
+    
+    def describe(self, image: Union[str, bytes]) -> Dict[str, Any]:
+        """通用图片描述"""
+        return self.analyze(image, mode="describe")
+    
+    def ocr(self, image: Union[str, bytes]) -> Dict[str, Any]:
+        """文字识别 (OCR)"""
+        return self.analyze(image, mode="ocr")
+    
+    def analyze_chart(self, image: Union[str, bytes]) -> Dict[str, Any]:
+        """图表分析"""
+        return self.analyze(image, mode="chart")
+    
+    def analyze_fashion(self, image: Union[str, bytes]) -> Dict[str, Any]:
+        """穿搭分析"""
+        return self.analyze(image, mode="fashion")
+    
+    def analyze_product(self, image: Union[str, bytes]) -> Dict[str, Any]:
+        """产品分析"""
+        return self.analyze(image, mode="product")
+    
+    def analyze_scene(self, image: Union[str, bytes]) -> Dict[str, Any]:
+        """场景分析"""
+        return self.analyze(image, mode="scene")
+    
+    def batch_analyze(
+        self,
+        images: List[str],
+        mode: str = "describe"
+    ) -> List[Dict[str, Any]]:
+        """
+        批量分析多张图片
+        
+        Args:
+            images: 图片路径列表
+            mode: 分析模式
+            
+        Returns:
+            分析结果列表
+        """
+        results = []
+        for image in images:
+            result = self.analyze(image, mode=mode)
+            result["image"] = image
+            results.append(result)
+        return results
+
+
+def main():
+    """命令行入口"""
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='图生文分析工具（视觉识别）')
+    parser.add_argument('image', help='输入图片路径')
+    parser.add_argument('-m', '--mode', default='describe',
+                       choices=['describe', 'ocr', 'chart', 'fashion', 'product', 'scene'],
+                       help='分析模式')
+    parser.add_argument('-p', '--prompt', help='自定义分析提示词')
+    parser.add_argument('--max-tokens', type=int, default=2000, help='最大输出 token 数')
+    
+    args = parser.parse_args()
+    
+    analyzer = ImageToTextAnalyzer()
+    result = analyzer.analyze(
+        image=args.image,
+        prompt=args.prompt,
+        mode=args.mode,
+        max_tokens=args.max_tokens
+    )
+    
+    if result["success"]:
+        print(f"\n=== 分析结果 ({result['mode']}) ===\n")
+        print(result["content"])
+        print(f"\n=== Token 使用 ===")
+        print(f"输入: {result['usage'].get('prompt_tokens', 'N/A')}")
+        print(f"输出: {result['usage'].get('completion_tokens', 'N/A')}")
+    else:
+        print(f"分析失败: {result['error']}")
+        print(f"详情: {result.get('detail', 'N/A')}")
+
+
+if __name__ == "__main__":
+    main()
--- a/.opencode/skills/image-service/scripts/merge_long_image.py
+++ b/.opencode/skills/image-service/scripts/merge_long_image.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+"""
+长图拼接脚本 (Merge Long Image)
+将多张图片按顺序垂直拼接成一张微信长图
+
+Author: 翟星人
+"""
+
+import argparse
+import os
+import glob as glob_module
+from pathlib import Path
+from typing import List, Optional, Dict, Any
+
+from PIL import Image
+import numpy as np
+
+
+class LongImageMerger:
+    """长图拼接器"""
+    
+    def __init__(self, target_width: int = 1080):
+        """
+        初始化拼接器
+        
+        Args:
+            target_width: 目标宽度，默认1080（微信推荐宽度）
+        """
+        self.target_width = target_width
+    
+    def _blend_images(self, img_top: Image.Image, img_bottom: Image.Image, blend_height: int) -> Image.Image:
+        """
+        在两张图的接缝处创建渐变融合过渡
+        
+        Args:
+            img_top: 上方图片
+            img_bottom: 下方图片
+            blend_height: 融合区域高度（像素）
+            
+        Returns:
+            融合后的下方图片（顶部已与上方图片底部融合）
+        """
+        blend_height = min(blend_height, img_top.height // 4, img_bottom.height // 4)
+        
+        top_region = img_top.crop((0, img_top.height - blend_height, img_top.width, img_top.height))
+        bottom_region = img_bottom.crop((0, 0, img_bottom.width, blend_height))
+        
+        top_array = np.array(top_region, dtype=np.float32)
+        bottom_array = np.array(bottom_region, dtype=np.float32)
+        
+        alpha = np.linspace(1, 0, blend_height).reshape(-1, 1, 1)
+        
+        blended_array = top_array * alpha + bottom_array * (1 - alpha)
+        blended_array = np.clip(blended_array, 0, 255).astype(np.uint8)
+        
+        blended_region = Image.fromarray(blended_array)
+        
+        result = img_bottom.copy()
+        result.paste(blended_region, (0, 0))
+        
+        return result
+    
+    def merge(
+        self,
+        image_paths: List[str],
+        output_path: str,
+        gap: int = 0,
+        background_color: str = "white",
+        blend: int = 0
+    ) -> Dict[str, Any]:
+        """
+        拼接多张图片为长图
+        
+        Args:
+            image_paths: 图片路径列表，按顺序拼接
+            output_path: 输出文件路径
+            gap: 图片之间的间隔像素，默认0
+            background_color: 背景颜色，默认白色
+            blend: 接缝融合过渡区域高度（像素），默认0不融合，推荐30-50
+            
+        Returns:
+            包含拼接结果的字典
+        """
+        if not image_paths:
+            return {"success": False, "error": "没有提供图片路径"}
+        
+        valid_paths = []
+        for p in image_paths:
+            if os.path.exists(p):
+                valid_paths.append(p)
+            else:
+                print(f"警告: 文件不存在，跳过 - {p}")
+        
+        if not valid_paths:
+            return {"success": False, "error": "没有有效的图片文件"}
+        
+        try:
+            imgs = [Image.open(p) for p in valid_paths]
+            
+            resized_imgs = []
+            for img in imgs:
+                if img.mode in ('RGBA', 'P'):
+                    img = img.convert('RGB')
+                ratio = self.target_width / img.width
+                new_height = int(img.height * ratio)
+                resized = img.resize((self.target_width, new_height), Image.Resampling.LANCZOS)
+                resized_imgs.append(resized)
+            
+            if blend > 0 and len(resized_imgs) > 1:
+                for i in range(1, len(resized_imgs)):
+                    resized_imgs[i] = self._blend_images(resized_imgs[i-1], resized_imgs[i], blend)
+            
+            total_height = sum(img.height for img in resized_imgs) + gap * (len(resized_imgs) - 1)
+            
+            long_image = Image.new('RGB', (self.target_width, total_height), background_color)
+            
+            y_offset = 0
+            for img in resized_imgs:
+                long_image.paste(img, (0, y_offset))
+                y_offset += img.height + gap
+            
+            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+            long_image.save(output_path, quality=95)
+            
+            for img in imgs:
+                img.close()
+            for img in resized_imgs:
+                img.close()
+            
+            return {
+                "success": True,
+                "saved_path": output_path,
+                "width": self.target_width,
+                "height": total_height,
+                "image_count": len(resized_imgs)
+            }
+            
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+    
+    def merge_from_pattern(
+        self,
+        pattern: str,
+        output_path: str,
+        sort_by: str = "name",
+        gap: int = 0,
+        background_color: str = "white",
+        blend: int = 0
+    ) -> Dict[str, Any]:
+        """
+        通过 glob 模式匹配图片并拼接
+        
+        Args:
+            pattern: glob 模式，如 "*.png" 或 "generated_*.png"
+            output_path: 输出文件路径
+            sort_by: 排序方式 - "name"(文件名) / "time"(修改时间) / "none"(不排序)
+            gap: 图片间隔
+            background_color: 背景颜色
+            blend: 接缝融合过渡高度
+            
+        Returns:
+            包含拼接结果的字典
+        """
+        image_paths = glob_module.glob(pattern)
+        
+        if not image_paths:
+            return {"success": False, "error": f"没有找到匹配 '{pattern}' 的图片"}
+        
+        if sort_by == "name":
+            image_paths.sort()
+        elif sort_by == "time":
+            image_paths.sort(key=lambda x: os.path.getmtime(x))
+        
+        print(f"找到 {len(image_paths)} 张图片:")
+        for i, p in enumerate(image_paths, 1):
+            print(f"  {i}. {os.path.basename(p)}")
+        
+        return self.merge(image_paths, output_path, gap, background_color, blend)
+
+
+def main():
+    """命令行入口"""
+    parser = argparse.ArgumentParser(
+        description='长图拼接工具 - 将多张图片垂直拼接成微信长图',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例用法:
+  # 拼接指定图片
+  python merge_long_image.py img1.png img2.png img3.png -o output.png
+  
+  # 使用通配符匹配
+  python merge_long_image.py -p "generated_*.png" -o long_image.png
+  
+  # 指定宽度和间隔
+  python merge_long_image.py -p "*.png" -o out.png -w 750 -g 20
+  
+  # 按修改时间排序
+  python merge_long_image.py -p "*.png" -o out.png --sort time
+  
+  # 启用接缝融合过渡（推荐40px）
+  python merge_long_image.py img1.png img2.png -o out.png --blend 40
+        """
+    )
+    
+    parser.add_argument('images', nargs='*', help='要拼接的图片路径列表')
+    parser.add_argument('-p', '--pattern', help='glob 模式匹配图片，如 "*.png"')
+    parser.add_argument('-o', '--output', required=True, help='输出文件路径')
+    parser.add_argument('-w', '--width', type=int, default=1080, help='目标宽度，默认1080')
+    parser.add_argument('-g', '--gap', type=int, default=0, help='图片间隔像素，默认0')
+    parser.add_argument('--sort', choices=['name', 'time', 'none'], default='name', 
+                        help='排序方式：name(文件名)/time(修改时间)/none')
+    parser.add_argument('--bg', default='white', help='背景颜色，默认 white')
+    parser.add_argument('--blend', type=int, default=0, 
+                        help='接缝融合过渡高度（像素），推荐30-50，默认0不融合')
+    
+    args = parser.parse_args()
+    
+    if not args.images and not args.pattern:
+        parser.error("请提供图片路径列表或使用 -p 指定匹配模式")
+    
+    merger = LongImageMerger(target_width=args.width)
+    
+    if args.pattern:
+        result = merger.merge_from_pattern(
+            pattern=args.pattern,
+            output_path=args.output,
+            sort_by=args.sort,
+            gap=args.gap,
+            background_color=args.bg,
+            blend=args.blend
+        )
+    else:
+        result = merger.merge(
+            image_paths=args.images,
+            output_path=args.output,
+            gap=args.gap,
+            background_color=args.bg,
+            blend=args.blend
+        )
+    
+    if result["success"]:
+        print(f"\n拼接成功！")
+        print(f"输出文件: {result['saved_path']}")
+        print(f"尺寸: {result['width']} x {result['height']}")
+        print(f"共 {result['image_count']} 张图片")
+    else:
+        print(f"\n拼接失败: {result['error']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/.opencode/skills/image-service/scripts/research_image.py
+++ b/.opencode/skills/image-service/scripts/research_image.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""
+调研报告专用信息图生成脚本
+预设手绘风格可视化模板，保持系列配图风格统一
+
+Author: 翟星人
+"""
+
+import argparse
+import subprocess
+import sys
+import os
+
+# 预设风格模板 - 手绘体可视化风格
+STYLE_TEMPLATES = {
+    "arch": {
+        "name": "架构图",
+        "prefix": "手绘风格技术架构信息图，简洁扁平设计，",
+        "suffix": "手绘线条感，柔和的科技蓝配色(#4A90D9)，浅灰白色背景，模块化分层布局，圆角矩形框，手写体中文标签，简约图标，整体清新专业。",
+        "trigger": "核心架构、系统结构、技术栈、模块组成"
+    },
+    "flow": {
+        "name": "流程图",
+        "prefix": "手绘风格流程信息图，简洁扁平设计，",
+        "suffix": "手绘线条和箭头，科技蓝(#4A90D9)主色调，浅绿色(#81C784)表示成功节点，浅橙色(#FFB74D)表示判断节点，浅灰白色背景，从上到下或从左到右布局，手写体中文标签，步骤清晰。",
+        "trigger": "流程、步骤、工作流、执行顺序"
+    },
+    "compare": {
+        "name": "对比图",
+        "prefix": "手绘风格对比信息图，左右分栏设计，",
+        "suffix": "手绘线条感，左侧用柔和蓝色(#4A90D9)，右侧用柔和橙色(#FF8A65)，中间VS分隔，浅灰白色背景，手写体中文标签，对比项目清晰列出，简约图标点缀。",
+        "trigger": "对比、vs、区别、差异"
+    },
+    "concept": {
+        "name": "概念图",
+        "prefix": "手绘风格概念信息图，中心发散设计，",
+        "suffix": "手绘线条感，中心主题用科技蓝(#4A90D9)，周围要素用柔和的蓝紫渐变色系，浅灰白色背景，连接线条有手绘感，手写体中文标签，布局均衡美观。",
+        "trigger": "核心概念、要素组成、多个方面"
+    }
+}
+
+# 基础路径
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+TEXT_TO_IMAGE_SCRIPT = os.path.join(BASE_DIR, "scripts", "text_to_image.py")
+
+
+def generate_image(style: str, title: str, content: str, output: str):
+    """
+    使用预设风格生成信息图
+    
+    Args:
+        style: 风格类型 (arch/flow/compare/concept)
+        title: 图表标题
+        content: 图表内容描述
+        output: 输出路径
+    """
+    if style not in STYLE_TEMPLATES:
+        print(f"错误: 未知风格 '{style}'")
+        print(f"可用风格: {', '.join(STYLE_TEMPLATES.keys())}")
+        sys.exit(1)
+    
+    template = STYLE_TEMPLATES[style]
+    
+    # 组装完整提示词
+    prompt = f"{template['prefix']}标题：{title}，{content}，{template['suffix']}"
+    
+    print(f"生成 {template['name']}: {title}")
+    print(f"风格: 手绘体可视化")
+    print(f"输出: {output}")
+    
+    # 调用 text_to_image.py
+    cmd = [
+        sys.executable,
+        TEXT_TO_IMAGE_SCRIPT,
+        prompt,
+        "--output", output
+    ]
+    
+    result = subprocess.run(cmd, capture_output=False)
+    
+    if result.returncode != 0:
+        print(f"生成失败")
+        sys.exit(1)
+
+
+def list_styles():
+    """列出所有可用风格"""
+    print("可用风格模板（手绘体可视化）:\n")
+    for key, template in STYLE_TEMPLATES.items():
+        print(f"  {key:10} - {template['name']}")
+        print(f"             触发场景: {template['trigger']}")
+        print()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="调研报告专用信息图生成（手绘风格）",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例:
+  # 生成架构图
+  python research_image.py -t arch -n "Ralph Loop 核心架构" -c "展示 Prompt、Agent、Stop Hook、Files 四个模块的循环关系" -o images/arch.png
+
+  # 生成流程图
+  python research_image.py -t flow -n "Stop Hook 工作流程" -c "Agent尝试退出、Hook触发、检查条件、允许或阻止退出" -o images/flow.png
+
+  # 生成对比图
+  python research_image.py -t compare -n "ReAct vs Ralph Loop" -c "左侧ReAct自我评估停止，右侧Ralph外部Hook控制" -o images/compare.png
+
+  # 生成概念图
+  python research_image.py -t concept -n "状态持久化" -c "中心是Agent，周围是progress.txt、prd.json、Git历史、代码文件四个要素" -o images/concept.png
+
+  # 查看所有风格
+  python research_image.py --list
+        """
+    )
+    
+    parser.add_argument("-t", "--type", choices=list(STYLE_TEMPLATES.keys()),
+                        help="图解类型: arch(架构图), flow(流程图), compare(对比图), concept(概念图)")
+    parser.add_argument("-n", "--name", help="图表标题")
+    parser.add_argument("-c", "--content", help="图表内容描述")
+    parser.add_argument("-o", "--output", help="输出文件路径")
+    parser.add_argument("--list", action="store_true", help="列出所有可用风格")
+    
+    args = parser.parse_args()
+    
+    if args.list:
+        list_styles()
+        return
+    
+    if not all([args.type, args.name, args.content, args.output]):
+        parser.print_help()
+        print("\n错误: 必须提供 -t, -n, -c, -o 参数")
+        sys.exit(1)
+    
+    generate_image(args.type, args.name, args.content, args.output)
+
+
+if __name__ == "__main__":
+    main()
--- a/.opencode/skills/image-service/scripts/text_to_image.py
+++ b/.opencode/skills/image-service/scripts/text_to_image.py
@@ -0,0 +1,350 @@
+#!/usr/bin/env python3
+"""
+文生图脚本 (Text-to-Image)
+使用 Lyra Flash API 根据中文文本描述生成图片
+支持参考图风格生成
+
+Author: 翟星人
+"""
+
+import httpx
+import base64
+import json
+import os
+from typing import Dict, Any, Optional, Union
+from pathlib import Path
+
+VALID_ASPECT_RATIOS = [
+    "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"
+]
+
+VALID_SIZES = [
+    "1024x1024",
+    "1536x1024", "1792x1024", "1344x768", "1248x832", "1184x864", "1152x896", "1536x672",
+    "1024x1536", "1024x1792", "768x1344", "832x1248", "864x1184", "896x1152"
+]
+
+RATIO_TO_SIZE = {
+    "1:1": "1024x1024",
+    "2:3": "832x1248",
+    "3:2": "1248x832",
+    "3:4": "1024x1536",
+    "4:3": "1536x1024",
+    "4:5": "864x1184",
+    "5:4": "1184x864",
+    "9:16": "1024x1792",
+    "16:9": "1792x1024",
+    "21:9": "1536x672"
+}
+
+
+class TextToImageGenerator:
+    """文生图生成器"""
+    
+    def __init__(self, config: Optional[Dict[str, str]] = None):
+        """
+        初始化生成器
+        
+        Args:
+            config: 配置字典，包含 api_key, base_url, model
+                   如果不传则从环境变量或配置文件读取
+        """
+        if config is None:
+            config = self._load_config()
+        
+        self.api_key = config.get('api_key') or config.get('IMAGE_API_KEY')
+        self.base_url = config.get('base_url') or config.get('IMAGE_API_BASE_URL')
+        self.model = config.get('model') or config.get('IMAGE_MODEL') or 'lyra-flash-9'
+        
+        if not self.api_key or not self.base_url:
+            raise ValueError("缺少必要的 API 配置：api_key 和 base_url")
+    
+    def _load_config(self) -> Dict[str, str]:
+        """从配置文件或环境变量加载配置"""
+        config = {}
+        
+        config_path = Path(__file__).parent.parent / 'config' / 'settings.json'
+        if config_path.exists():
+            with open(config_path, 'r', encoding='utf-8') as f:
+                settings = json.load(f)
+                api_config = settings.get('image_api', {})
+                config['api_key'] = api_config.get('key')
+                config['base_url'] = api_config.get('base_url')
+                config['model'] = api_config.get('model')
+        
+        config['api_key'] = os.getenv('IMAGE_API_KEY', config.get('api_key'))
+        config['base_url'] = os.getenv('IMAGE_API_BASE_URL', config.get('base_url'))
+        config['model'] = os.getenv('IMAGE_MODEL', config.get('model'))
+        
+        return config
+    
+    @staticmethod
+    def image_to_base64(image_path: str, with_prefix: bool = True) -> str:
+        """将图片文件转换为 base64 编码"""
+        path = Path(image_path)
+        if not path.exists():
+            raise FileNotFoundError(f"图片文件不存在: {image_path}")
+        
+        suffix = path.suffix.lower()
+        mime_types = {
+            '.jpg': 'image/jpeg',
+            '.jpeg': 'image/jpeg',
+            '.png': 'image/png',
+            '.gif': 'image/gif',
+            '.webp': 'image/webp'
+        }
+        mime_type = mime_types.get(suffix, 'image/png')
+        
+        with open(image_path, 'rb') as f:
+            b64_str = base64.b64encode(f.read()).decode('utf-8')
+        
+        if with_prefix:
+            return f"data:{mime_type};base64,{b64_str}"
+        return b64_str
+    
+    def generate(
+        self,
+        prompt: str,
+        size: Optional[str] = None,
+        aspect_ratio: Optional[str] = None,
+        image_size: Optional[str] = None,
+        output_path: Optional[str] = None,
+        response_format: str = "b64_json",
+        ref_image: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        生成图片
+        
+        Args:
+            prompt: 中文图像描述提示词
+            size: 图片尺寸 (如 1792x1024)，与 aspect_ratio 二选一
+            aspect_ratio: 宽高比 (如 16:9, 3:4)，推荐使用
+            image_size: 分辨率 (1K/2K/4K)，仅 gemini-3.0-pro-image-preview 支持
+            output_path: 输出文件路径，如果提供则保存图片
+            response_format: 响应格式，默认 b64_json
+            ref_image: 参考图片路径，用于风格参考
+            
+        Returns:
+            包含生成结果的字典
+        """
+        if ref_image:
+            return self._generate_with_reference(
+                prompt=prompt,
+                ref_image=ref_image,
+                aspect_ratio=aspect_ratio,
+                size=size,
+                output_path=output_path,
+                response_format=response_format
+            )
+        
+        payload: Dict[str, Any] = {
+            "model": self.model,
+            "prompt": prompt,
+            "response_format": response_format
+        }
+        
+        # 确定尺寸：优先用 aspect_ratio 映射，其次用 size
+        if aspect_ratio:
+            payload["size"] = RATIO_TO_SIZE.get(aspect_ratio, "1024x1024")
+        elif size:
+            payload["size"] = size
+        else:
+            payload["size"] = "1792x1024"  # 默认 16:9
+        
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
+        
+        try:
+            with httpx.Client(timeout=180.0) as client:
+                response = client.post(
+                    f"{self.base_url}/images/generations",
+                    headers=headers,
+                    json=payload
+                )
+                response.raise_for_status()
+                result = response.json()
+                
+                if output_path and result.get("data"):
+                    b64_data = result["data"][0].get("b64_json")
+                    if b64_data:
+                        self._save_image(b64_data, output_path)
+                        result["saved_path"] = output_path
+                
+                return {
+                    "success": True,
+                    "data": result,
+                    "saved_path": output_path if output_path else None
+                }
+                
+        except httpx.HTTPStatusError as e:
+            return {
+                "success": False,
+                "error": f"HTTP 错误: {e.response.status_code}",
+                "detail": str(e)
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": "生成失败",
+                "detail": str(e)
+            }
+    
+    def _generate_with_reference(
+        self,
+        prompt: str,
+        ref_image: str,
+        aspect_ratio: Optional[str] = None,
+        size: Optional[str] = None,
+        output_path: Optional[str] = None,
+        response_format: str = "b64_json"
+    ) -> Dict[str, Any]:
+        """
+        参考图片风格生成新图
+        
+        Args:
+            prompt: 新图内容描述
+            ref_image: 参考图片路径
+            aspect_ratio: 宽高比
+            size: 尺寸
+            output_path: 输出路径
+            response_format: 响应格式
+        """
+        image_b64 = self.image_to_base64(ref_image)
+        
+        enhanced_prompt = f"参考这张图片的背景风格、配色方案和视觉设计，保持完全一致的风格，生成新内容：{prompt}"
+        
+        # 确定尺寸：优先用 aspect_ratio 映射，其次用 size
+        if size is None:
+            size = RATIO_TO_SIZE.get(aspect_ratio, "1024x1792") if aspect_ratio else "1024x1792"
+        
+        payload = {
+            "model": self.model,
+            "prompt": enhanced_prompt,
+            "image": image_b64,
+            "size": size,
+            "response_format": response_format
+        }
+        
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
+        
+        try:
+            with httpx.Client(timeout=180.0) as client:
+                response = client.post(
+                    f"{self.base_url}/images/edits",
+                    headers=headers,
+                    json=payload
+                )
+                response.raise_for_status()
+                result = response.json()
+                
+                if output_path and result.get("data"):
+                    b64_data = result["data"][0].get("b64_json")
+                    if b64_data:
+                        self._save_image(b64_data, output_path)
+                        result["saved_path"] = output_path
+                
+                return {
+                    "success": True,
+                    "data": result,
+                    "saved_path": output_path if output_path else None
+                }
+                
+        except httpx.HTTPStatusError as e:
+            return {
+                "success": False,
+                "error": f"HTTP 错误: {e.response.status_code}",
+                "detail": str(e)
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": "生成失败",
+                "detail": str(e)
+            }
+    
+    def _save_image(self, b64_data: str, output_path: str) -> None:
+        """保存 base64 图片到文件"""
+        image_data = base64.b64decode(b64_data)
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, 'wb') as f:
+            f.write(image_data)
+
+
+def main():
+    """命令行入口"""
+    import argparse
+    import time
+    
+    parser = argparse.ArgumentParser(
+        description='文生图工具',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=f'''
+尺寸参数说明:
+  -r/--ratio  推荐使用，支持: {", ".join(VALID_ASPECT_RATIOS)}
+  -s/--size   传统尺寸，支持: {", ".join(VALID_SIZES[:4])}...
+  --resolution 分辨率(1K/2K/4K)，仅 gemini-3.0-pro-image-preview 支持
+  --ref       参考图片路径，后续图片将参考首图风格生成
+
+示例:
+  python text_to_image.py "描述" -r 3:4              # 竖版 3:4
+  python text_to_image.py "描述" -r 9:16 -o out.png  # 竖屏 9:16
+  python text_to_image.py "描述" -s 1024x1792        # 传统尺寸
+  
+  # 长图场景：首图定调，后续参考首图风格
+  python text_to_image.py "首屏内容" -r 3:4 -o 01.png
+  python text_to_image.py "第二屏内容" -r 3:4 --ref 01.png -o 02.png
+'''
+    )
+    parser.add_argument('prompt', help='中文图像描述提示词')
+    parser.add_argument('-o', '--output', help='输出文件路径（默认保存到当前目录）')
+    parser.add_argument('-r', '--ratio', help=f'宽高比，推荐使用。可选: {", ".join(VALID_ASPECT_RATIOS)}')
+    parser.add_argument('-s', '--size', help='图片尺寸 (如 1792x1024)')
+    parser.add_argument('--resolution', help='分辨率 (1K/2K/4K)，仅部分模型支持')
+    parser.add_argument('--ref', help='参考图片路径，用于风格参考（长图场景）')
+    
+    args = parser.parse_args()
+    
+    if args.ratio and args.ratio not in VALID_ASPECT_RATIOS:
+        print(f"错误: 不支持的宽高比 '{args.ratio}'")
+        print(f"支持的宽高比: {', '.join(VALID_ASPECT_RATIOS)}")
+        return
+    
+    if args.size and args.size not in VALID_SIZES:
+        print(f"警告: 尺寸 '{args.size}' 可能不被支持")
+        print(f"推荐使用 -r/--ratio 参数指定宽高比")
+    
+    if args.ref and not os.path.exists(args.ref):
+        print(f"错误: 参考图片不存在: {args.ref}")
+        return
+    
+    output_path = args.output
+    if not output_path:
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        output_path = f"generated_{timestamp}.png"
+    
+    generator = TextToImageGenerator()
+    result = generator.generate(
+        prompt=args.prompt,
+        size=args.size,
+        aspect_ratio=args.ratio,
+        image_size=args.resolution,
+        output_path=output_path,
+        ref_image=args.ref
+    )
+    
+    if result["success"]:
+        print(f"生成成功！")
+        if result.get("saved_path"):
+            print(f"图片已保存到: {result['saved_path']}")
+    else:
+        print(f"生成失败: {result['error']}")
+        print(f"详情: {result.get('detail', 'N/A')}")
+
+
+if __name__ == "__main__":
+    main()