#!/usr/bin/env python3 """ 修复版 vision_analyzer.py - 使用火山方舟(VolcEngine)视觉模型 """ import base64 import json import os import sys from pathlib import Path from typing import Dict, Any, Optional import httpx class VisionAnalyzer: """视觉分析器 - 使用火山方舟doubao-vision模型""" # 预定义的分析模式 ANALYSIS_MODES = { "describe": "请详细描述这张图片的内容,包括:人物、场景、物品、颜色、布局等所有细节。", "ocr": "请仔细识别这张图片中的所有文字内容,按照文字在图片中的位置顺序输出。如果是中文,请保持原文输出。", "chart": "请分析这张图表的内容,包括:图表类型、数据趋势、关键数据点、标题标签、以及数据的结论或洞察。", "fashion": "请分析这张图片中人物的穿搭,包括:服装款式、颜色搭配,配饰、整体风格等。", "product": "请分析这张产品图片,包括:产品类型、外观特征、功能特点,品牌信息等。", "scene": "请描述这张图片的场景,包括:地点、环境、氛围、时间(白天/夜晚)等。", "custom": "用户自定义问题", } def __init__(self, config: Optional[Dict[str, str]] = None): """ 初始化分析器 - 使用火山方舟API """ if config is None: config = self._load_config() # 使用火山方舟的配置 self.api_key = ( config.get("api_key") or config.get("VOLCENGINE_API_KEY") or config.get("DASHSCOPE_API_KEY") # 后备 or "b0359bed-09f2-49e2-a53c-32ba057412e3" # 硬编码后备 ) self.base_url = ( config.get("base_url") or "https://ark.cn-beijing.volces.com/api/coding/v3" # 火山方舟 Coding Plan API ) self.model = ( config.get("model") or "doubao-seed-code" # 火山方舟Coding Plan唯一支持的视觉模型 ) if not self.api_key: raise ValueError("缺少必要的 API 配置:api_key") def _load_config(self) -> Dict[str, str]: """从配置文件或环境变量加载配置""" config = {} # 1. 从环境变量读取 config["api_key"] = os.environ.get("VOLCENGINE_API_KEY") or os.environ.get( "DASHSCOPE_API_KEY" ) config["base_url"] = os.environ.get("VISION_API_BASE_URL") config["model"] = os.environ.get("VISION_MODEL") # 2. 如果环境变量没有,尝试从配置文件读取 if not config["api_key"]: try: from .load_config import load_config cfg = load_config() config["api_key"] = cfg.get("IMAGE_API_KEY") config["base_url"] = cfg.get("IMAGE_API_BASE_URL") config["model"] = cfg.get("VISION_MODEL") except ImportError: pass return config def encode_image(self, image_path: Path) -> str: """将图片编码为base64""" with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8") def analyze( self, image_path: str, mode: str = "describe", custom_query: str = None ) -> str: """分析图片""" image_path = Path(image_path) if not image_path.exists(): raise FileNotFoundError(f"Image file not found: {image_path}") # 获取提示词 if mode == "custom" and custom_query: prompt = custom_query else: prompt = self.ANALYSIS_MODES.get(mode, self.ANALYSIS_MODES["describe"]) # 编码图片 image_base64 = self.encode_image(image_path) # 构建请求 - 火山方舟使用标准OpenAI兼容格式 headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } # 火山方舟的消息格式 payload = { "model": self.model, "messages": [ { "role": "user", "content": [ { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_base64}" }, }, {"type": "text", "text": prompt}, ], } ], "max_tokens": 2000, } # 发送请求 try: response = httpx.post( f"{self.base_url}/chat/completions", headers=headers, json=payload, timeout=120.0, ) response.raise_for_status() result = response.json() return result["choices"][0]["message"]["content"] except Exception as e: print(f"API request failed: {e}") raise def main(): """主函数""" if len(sys.argv) < 2: print( "Usage: python vision_analyzer_fixed.py [-m mode] [-q query]" ) sys.exit(1) image_path = sys.argv[1] mode = "describe" custom_query = None # 解析命令行参数 i = 2 while i < len(sys.argv): if sys.argv[i] == "-m": mode = sys.argv[i + 1] i += 2 elif sys.argv[i] == "-q": custom_query = sys.argv[i + 1] mode = "custom" i += 2 else: i += 1 try: analyzer = VisionAnalyzer() result = analyzer.analyze(image_path, mode, custom_query) print(result) except Exception as e: print(f"Error: {e}") sys.exit(1) if __name__ == "__main__": main()