04db423416
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
102 lines
2.9 KiB
Python
102 lines
2.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
标准化视觉分析脚本 - DashScope OpenAI兼容模式
|
|
使用正确的API配置,避免Google Vision API依赖
|
|
"""
|
|
|
|
import base64
|
|
import json
|
|
import httpx
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# 从 skill config 读取配置
|
|
CONFIG_PATH = Path(__file__).parent / "config" / "settings.json"
|
|
if CONFIG_PATH.exists():
|
|
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
|
|
config = json.load(f)
|
|
API_KEY = config.get("vision_api", {}).get("key")
|
|
BASE_URL = config.get("vision_api", {}).get(
|
|
"base_url", "https://ark.cn-beijing.volces.com/api/coding/v3"
|
|
)
|
|
DEFAULT_MODEL = config.get("vision_api", {}).get("model", "doubao-seed-code")
|
|
else:
|
|
# 回退到环境变量
|
|
API_KEY = os.getenv("VOLCENGINE_API_KEY")
|
|
BASE_URL = os.getenv(
|
|
"VOLCENGINE_BASE_URL", "https://ark.cn-beijing.volces.com/api/coding/v3"
|
|
)
|
|
DEFAULT_MODEL = "doubao-seed-code"
|
|
|
|
if not API_KEY:
|
|
raise ValueError(
|
|
"No API Key found. Please configure in config/settings.json or set VOLCENGINE_API_KEY"
|
|
)
|
|
|
|
|
|
def encode_image(image_path):
|
|
"""将图片编码为base64"""
|
|
with open(image_path, "rb") as image_file:
|
|
return base64.b64encode(image_file.read()).decode("utf-8")
|
|
|
|
|
|
def analyze_image(image_path, prompt="请识别这张图片中的所有文字内容", model=None):
|
|
"""分析图片内容"""
|
|
if not os.path.exists(image_path):
|
|
raise FileNotFoundError(f"Image file not found: {image_path}")
|
|
|
|
image_base64 = encode_image(image_path)
|
|
model = model or DEFAULT_MODEL
|
|
|
|
headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
|
|
|
|
payload = {
|
|
"model": model,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
|
|
},
|
|
{"type": "text", "text": prompt},
|
|
],
|
|
}
|
|
],
|
|
"max_tokens": 2000,
|
|
}
|
|
|
|
try:
|
|
response = httpx.post(
|
|
f"{BASE_URL}/chat/completions", headers=headers, json=payload, timeout=120.0
|
|
)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
return result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
except Exception as e:
|
|
raise RuntimeError(f"Vision analysis failed: {e}")
|
|
|
|
|
|
def main():
|
|
"""命令行接口"""
|
|
import sys
|
|
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python vision-analyze.py <image_path> [prompt]")
|
|
sys.exit(1)
|
|
|
|
image_path = sys.argv[1]
|
|
prompt = sys.argv[2] if len(sys.argv) > 2 else "请识别这张图片中的所有文字内容"
|
|
|
|
try:
|
|
result = analyze_image(image_path, prompt)
|
|
print(result)
|
|
except Exception as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|