#!/usr/bin/env python3 """ 固定的 look_at 工具实现,使用 火山方舟 API 替代系统默认的 Google Vision API 依赖 """ import base64 import json import os import sys from pathlib import Path def look_at(file_path, goal): """ Fixed look_at implementation using 火山方舟 API This replaces the default Google Vision API dependency """ # Get API key from environment or use default api_key = os.getenv("VOLCENGINE_API_KEY", "b0359bed-09f2-49e2-a53c-32ba057412e3") base_url = "https://ark.cn-beijing.volces.com/api/coding/v3" model = "doubao-seed-code" # Coding Plan唯一支持的视觉模型 # Encode image to base64 with open(file_path, "rb") as f: image_base64 = base64.b64encode(f.read()).decode("utf-8") # Make API request import httpx headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} payload = { "model": model, "messages": [ { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}, }, {"type": "text", "text": goal}, ], } ], "max_tokens": 2000, } try: response = httpx.post( f"{base_url}/chat/completions", headers=headers, json=payload, timeout=120.0 ) response.raise_for_status() result = response.json() return result.get("choices", [{}])[0].get("message", {}).get("content", "") except Exception as e: raise RuntimeError(f"Vision analysis failed: {e}") # Make it callable as a module if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: python look_at_fixed.py ", file=sys.stderr) sys.exit(1) result = look_at(sys.argv[1], sys.argv[2]) print(result)