skills/agent-vision-awareness/scripts/vision_direct.py

# -*- coding: utf-8 -*-
import sys
sys.stdout.reconfigure(errors='replace')
sys.stderr.reconfigure(errors='replace')
import os
os.environ['PYTHONIOENCODING'] = 'utf-8'
import base64
import time
import tempfile
from pathlib import Path
from openai import OpenAI

# 统一临时目录
TEMP_DIR = r'D:\F\NewI\opencode\daily-workspace\temp'
os.makedirs(TEMP_DIR, exist_ok=True)

# 从OpenCode配置读取火山方舟API Key
CONFIG_PATH = r'C:\Users\hmo\.config\opencode\config.json'
import json
with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
    config = json.load(f)
API_KEY = config['provider']['volcengine']['options']['apiKey']
BASE_URL = config['provider']['volcengine']['options']['baseURL']

client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
MODEL = 'doubao-seed-2.0-pro'

def analyze_image(image_path_or_url, prompt="详细描述这张图片的内容"):
    """
    分析图片内容，支持本地路径和http/https URL
    :param image_path_or_url: 图片路径或URL
    :param prompt: 分析提示词
    :return: 分析结果
    """
    try:
        # 处理URL
        if image_path_or_url.lower().startswith(('http://', 'https://')):
            image_url = image_path_or_url
        else:
            # 处理本地路径
            image_path = Path(image_path_or_url)
            if not image_path.exists():
                return f"错误：图片不存在 {image_path}"
            # 转base64
            with open(image_path, 'rb') as f:
                image_base64 = base64.b64encode(f.read()).decode('utf-8')
            image_url = f"data:image/{image_path.suffix.lstrip('.')};base64,{image_base64}"

        # 调用API
        response = client.chat.completions.create(
            model=MODEL,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": {"url": image_url}}
                    ]
                }
            ],
            max_tokens=1000
        )
        return response.choices[0].message.content

    except Exception as e:
        return f"图片识别失败：{type(e).__name__}: {str(e)}"

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("用法：python vision_direct.py <图片路径/URL> [提示词]")
        sys.exit(1)

    image_path = sys.argv[1]
    prompt = sys.argv[2] if len(sys.argv) > 2 else "详细描述这张图片的内容"

    result = analyze_image(image_path, prompt)
    print(result)

    # 保存到临时文件
    output_file = os.path.join(TEMP_DIR, f"vision_result_{int(time.time())}.txt")
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(result)