Initial commit: skills library

- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00
commit 04db423416
861 changed files with 210414 additions and 0 deletions
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+标准化视觉分析脚本 - DashScope OpenAI兼容模式
+使用正确的API配置，避免Google Vision API依赖
+"""
+
+import base64
+import json
+import httpx
+import os
+from pathlib import Path
+
+# 从 skill config 读取配置
+CONFIG_PATH = Path(__file__).parent / "config" / "settings.json"
+if CONFIG_PATH.exists():
+    with open(CONFIG_PATH, "r", encoding="utf-8") as f:
+        config = json.load(f)
+        API_KEY = config.get("vision_api", {}).get("key")
+        BASE_URL = config.get("vision_api", {}).get(
+            "base_url", "https://ark.cn-beijing.volces.com/api/coding/v3"
+        )
+        DEFAULT_MODEL = config.get("vision_api", {}).get("model", "doubao-seed-code")
+else:
+    # 回退到环境变量
+    API_KEY = os.getenv("VOLCENGINE_API_KEY")
+    BASE_URL = os.getenv(
+        "VOLCENGINE_BASE_URL", "https://ark.cn-beijing.volces.com/api/coding/v3"
+    )
+    DEFAULT_MODEL = "doubao-seed-code"
+
+if not API_KEY:
+    raise ValueError(
+        "No API Key found. Please configure in config/settings.json or set VOLCENGINE_API_KEY"
+    )
+
+
+def encode_image(image_path):
+    """将图片编码为base64"""
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+
+
+def analyze_image(image_path, prompt="请识别这张图片中的所有文字内容", model=None):
+    """分析图片内容"""
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Image file not found: {image_path}")
+
+    image_base64 = encode_image(image_path)
+    model = model or DEFAULT_MODEL
+
+    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
+
+    payload = {
+        "model": model,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+                    },
+                    {"type": "text", "text": prompt},
+                ],
+            }
+        ],
+        "max_tokens": 2000,
+    }
+
+    try:
+        response = httpx.post(
+            f"{BASE_URL}/chat/completions", headers=headers, json=payload, timeout=120.0
+        )
+        response.raise_for_status()
+        result = response.json()
+        return result.get("choices", [{}])[0].get("message", {}).get("content", "")
+    except Exception as e:
+        raise RuntimeError(f"Vision analysis failed: {e}")
+
+
+def main():
+    """命令行接口"""
+    import sys
+
+    if len(sys.argv) < 2:
+        print("Usage: python vision-analyze.py <image_path> [prompt]")
+        sys.exit(1)
+
+    image_path = sys.argv[1]
+    prompt = sys.argv[2] if len(sys.argv) > 2 else "请识别这张图片中的所有文字内容"
+
+    try:
+        result = analyze_image(image_path, prompt)
+        print(result)
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()