Initial commit: skills library

- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00
commit 04db423416
861 changed files with 210414 additions and 0 deletions
@@ -0,0 +1,216 @@
+#!/usr/bin/env python3
+"""
+Standalone Vision Analyzer - Simplified version for agent-vision-awareness skill
+
+This is a self-contained version of the vision analyzer that doesn't depend on
+the image-service skill structure, making it easier to integrate directly.
+"""
+
+import base64
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Dict, Any, Optional
+import httpx
+
+
+class StandaloneVisionAnalyzer:
+    """Standalone vision analyzer using direct API calls."""
+
+    # Predefined analysis modes
+    ANALYSIS_MODES = {
+        "describe": "请详细描述这张图片的内容，包括：人物、场景、物品、颜色、布局等所有细节。",
+        "ocr": "请仔细识别这张图片中的所有文字内容，按照文字在图片中的位置顺序输出。如果是中文，请保持原文输出。",
+        "chart": "请分析这张图表的内容，包括：图表类型、数据趋势、关键数据点、标题标签、以及数据的结论或洞察。",
+        "fashion": "请分析这张图片中人物的穿搭，包括：服装款式、颜色搭配、配饰、整体风格等。",
+        "product": "请分析这张产品图片，包括：产品类型、外观特征、功能特点、品牌信息等。",
+        "scene": "请描述这张图片的场景，包括：地点、环境、氛围、时间（白天/夜晚）等。",
+        "custom": "用户自定义问题",
+    }
+
+    def __init__(self, config: Optional[Dict[str, str]] = None):
+        """
+        Initialize the analyzer.
+
+        Args:
+            config: Configuration dictionary with api_key, base_url, model
+        """
+        if config is None:
+            config = self._load_config()
+
+        self.api_key = (
+            config.get("api_key")
+            or config.get("VOLCENGINE_API_KEY")
+            or "b0359bed-09f2-49e2-a53c-32ba057412e3"
+        )
+        self.base_url = (
+            config.get("base_url") or "https://ark.cn-beijing.volces.com/api/coding/v3"
+        )
+        self.model = config.get("model") or "doubao-seed-code"
+
+        if not self.api_key or not self.base_url:
+            raise ValueError("Missing required API configuration: api_key and base_url")
+
+    def _load_config(self) -> Dict[str, str]:
+        """Load configuration from environment variables or config file."""
+        config = {}
+
+        # Load from environment variables
+        config["api_key"] = os.environ.get("VOLCENGINE_API_KEY") or os.environ.get(
+            "DASHSCOPE_API_KEY"
+        )
+        config["base_url"] = os.environ.get("VISION_API_BASE_URL")
+        config["model"] = os.environ.get("VISION_MODEL")
+
+        return config
+
+    def encode_image(self, image_path: Path) -> str:
+        """Encode image to base64."""
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")
+
+    def analyze(self, image_path: Path, question: str) -> str:
+        """
+        Analyze image content.
+
+        Args:
+            image_path: Path to the image file
+            question: Question/prompt for analysis
+
+        Returns:
+            Analysis result text
+        """
+        if not image_path.exists():
+            raise FileNotFoundError(f"Image not found: {image_path}")
+
+        base64_image = self.encode_image(image_path)
+
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+        payload = {
+            "model": self.model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": question},
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/png;base64,{base64_image}"
+                            },
+                        },
+                    ],
+                }
+            ],
+            "max_tokens": 2000,
+        }
+
+        try:
+            with httpx.Client(timeout=30.0) as client:
+                response = client.post(
+                    f"{self.base_url}/chat/completions", headers=headers, json=payload
+                )
+                response.raise_for_status()
+                result = response.json()
+                return result["choices"][0]["message"]["content"]
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 404:
+                raise ValueError(
+                    f"API endpoint not found, check base_url: {self.base_url}"
+                )
+            elif e.response.status_code == 401:
+                raise ValueError("Invalid or expired API key")
+            else:
+                raise RuntimeError(f"API request failed: {e}")
+        except Exception as e:
+            raise RuntimeError(f"Analysis failed: {e}")
+
+    def analyze_with_mode(
+        self,
+        image_path: Path,
+        mode: str = "describe",
+        custom_question: Optional[str] = None,
+    ) -> str:
+        """
+        Analyze image with predefined mode.
+
+        Args:
+            image_path: Path to the image file
+            mode: Analysis mode (describe, ocr, chart, fashion, product, scene, custom)
+            custom_question: Custom question for custom mode
+
+        Returns:
+            Analysis result text
+        """
+        if mode not in self.ANALYSIS_MODES:
+            raise ValueError(
+                f"Unsupported mode: {mode}, available: {list(self.ANALYSIS_MODES.keys())}"
+            )
+
+        if mode == "custom":
+            if not custom_question:
+                raise ValueError("Custom mode requires custom_question parameter")
+            question = custom_question
+        else:
+            question = self.ANALYSIS_MODES[mode]
+
+        return self.analyze(image_path, question)
+
+
+def main():
+    """Command line interface."""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Standalone Vision Analyzer")
+    parser.add_argument("image", help="Image path")
+    parser.add_argument(
+        "--mode",
+        "-m",
+        choices=["describe", "ocr", "chart", "fashion", "product", "scene", "custom"],
+        default="describe",
+        help="Analysis mode",
+    )
+    parser.add_argument("--question", "-q", help="Custom question for custom mode")
+    parser.add_argument("--output", "-o", help="Output file")
+
+    args = parser.parse_args()
+
+    image_path = Path(args.image)
+    if not image_path.exists():
+        print(f"Error: Image not found: {image_path}", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        analyzer = StandaloneVisionAnalyzer()
+
+        if args.mode == "custom":
+            if not args.question:
+                print(
+                    "Error: Custom mode requires --question parameter", file=sys.stderr
+                )
+                sys.exit(1)
+            result = analyzer.analyze_with_mode(image_path, "custom", args.question)
+        else:
+            result = analyzer.analyze_with_mode(image_path, args.mode)
+
+        if args.output:
+            with open(args.output, "w", encoding="utf-8") as f:
+                f.write(result)
+            print(f"Result saved to: {args.output}")
+        else:
+            print("Analysis Result:")
+            print("-" * 50)
+            print(result)
+
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()