Initial commit: skills library
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
This commit is contained in:
@@ -0,0 +1,216 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Standalone Vision Analyzer - Simplified version for agent-vision-awareness skill
|
||||
|
||||
This is a self-contained version of the vision analyzer that doesn't depend on
|
||||
the image-service skill structure, making it easier to integrate directly.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
import httpx
|
||||
|
||||
|
||||
class StandaloneVisionAnalyzer:
|
||||
"""Standalone vision analyzer using direct API calls."""
|
||||
|
||||
# Predefined analysis modes
|
||||
ANALYSIS_MODES = {
|
||||
"describe": "请详细描述这张图片的内容,包括:人物、场景、物品、颜色、布局等所有细节。",
|
||||
"ocr": "请仔细识别这张图片中的所有文字内容,按照文字在图片中的位置顺序输出。如果是中文,请保持原文输出。",
|
||||
"chart": "请分析这张图表的内容,包括:图表类型、数据趋势、关键数据点、标题标签、以及数据的结论或洞察。",
|
||||
"fashion": "请分析这张图片中人物的穿搭,包括:服装款式、颜色搭配、配饰、整体风格等。",
|
||||
"product": "请分析这张产品图片,包括:产品类型、外观特征、功能特点、品牌信息等。",
|
||||
"scene": "请描述这张图片的场景,包括:地点、环境、氛围、时间(白天/夜晚)等。",
|
||||
"custom": "用户自定义问题",
|
||||
}
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, str]] = None):
|
||||
"""
|
||||
Initialize the analyzer.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary with api_key, base_url, model
|
||||
"""
|
||||
if config is None:
|
||||
config = self._load_config()
|
||||
|
||||
self.api_key = (
|
||||
config.get("api_key")
|
||||
or config.get("VOLCENGINE_API_KEY")
|
||||
or "b0359bed-09f2-49e2-a53c-32ba057412e3"
|
||||
)
|
||||
self.base_url = (
|
||||
config.get("base_url") or "https://ark.cn-beijing.volces.com/api/coding/v3"
|
||||
)
|
||||
self.model = config.get("model") or "doubao-seed-code"
|
||||
|
||||
if not self.api_key or not self.base_url:
|
||||
raise ValueError("Missing required API configuration: api_key and base_url")
|
||||
|
||||
def _load_config(self) -> Dict[str, str]:
|
||||
"""Load configuration from environment variables or config file."""
|
||||
config = {}
|
||||
|
||||
# Load from environment variables
|
||||
config["api_key"] = os.environ.get("VOLCENGINE_API_KEY") or os.environ.get(
|
||||
"DASHSCOPE_API_KEY"
|
||||
)
|
||||
config["base_url"] = os.environ.get("VISION_API_BASE_URL")
|
||||
config["model"] = os.environ.get("VISION_MODEL")
|
||||
|
||||
return config
|
||||
|
||||
def encode_image(self, image_path: Path) -> str:
|
||||
"""Encode image to base64."""
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
|
||||
def analyze(self, image_path: Path, question: str) -> str:
|
||||
"""
|
||||
Analyze image content.
|
||||
|
||||
Args:
|
||||
image_path: Path to the image file
|
||||
question: Question/prompt for analysis
|
||||
|
||||
Returns:
|
||||
Analysis result text
|
||||
"""
|
||||
if not image_path.exists():
|
||||
raise FileNotFoundError(f"Image not found: {image_path}")
|
||||
|
||||
base64_image = self.encode_image(image_path)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": question},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{base64_image}"
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
"max_tokens": 2000,
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.post(
|
||||
f"{self.base_url}/chat/completions", headers=headers, json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
return result["choices"][0]["message"]["content"]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ValueError(
|
||||
f"API endpoint not found, check base_url: {self.base_url}"
|
||||
)
|
||||
elif e.response.status_code == 401:
|
||||
raise ValueError("Invalid or expired API key")
|
||||
else:
|
||||
raise RuntimeError(f"API request failed: {e}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Analysis failed: {e}")
|
||||
|
||||
def analyze_with_mode(
|
||||
self,
|
||||
image_path: Path,
|
||||
mode: str = "describe",
|
||||
custom_question: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Analyze image with predefined mode.
|
||||
|
||||
Args:
|
||||
image_path: Path to the image file
|
||||
mode: Analysis mode (describe, ocr, chart, fashion, product, scene, custom)
|
||||
custom_question: Custom question for custom mode
|
||||
|
||||
Returns:
|
||||
Analysis result text
|
||||
"""
|
||||
if mode not in self.ANALYSIS_MODES:
|
||||
raise ValueError(
|
||||
f"Unsupported mode: {mode}, available: {list(self.ANALYSIS_MODES.keys())}"
|
||||
)
|
||||
|
||||
if mode == "custom":
|
||||
if not custom_question:
|
||||
raise ValueError("Custom mode requires custom_question parameter")
|
||||
question = custom_question
|
||||
else:
|
||||
question = self.ANALYSIS_MODES[mode]
|
||||
|
||||
return self.analyze(image_path, question)
|
||||
|
||||
|
||||
def main():
|
||||
"""Command line interface."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Standalone Vision Analyzer")
|
||||
parser.add_argument("image", help="Image path")
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
"-m",
|
||||
choices=["describe", "ocr", "chart", "fashion", "product", "scene", "custom"],
|
||||
default="describe",
|
||||
help="Analysis mode",
|
||||
)
|
||||
parser.add_argument("--question", "-q", help="Custom question for custom mode")
|
||||
parser.add_argument("--output", "-o", help="Output file")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
image_path = Path(args.image)
|
||||
if not image_path.exists():
|
||||
print(f"Error: Image not found: {image_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
analyzer = StandaloneVisionAnalyzer()
|
||||
|
||||
if args.mode == "custom":
|
||||
if not args.question:
|
||||
print(
|
||||
"Error: Custom mode requires --question parameter", file=sys.stderr
|
||||
)
|
||||
sys.exit(1)
|
||||
result = analyzer.analyze_with_mode(image_path, "custom", args.question)
|
||||
else:
|
||||
result = analyzer.analyze_with_mode(image_path, args.mode)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(result)
|
||||
print(f"Result saved to: {args.output}")
|
||||
else:
|
||||
print("Analysis Result:")
|
||||
print("-" * 50)
|
||||
print(result)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user