Files
skills/image-service/scripts/unified_vision_analyzer.py
T
hmo 04db423416 Initial commit: skills library
- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00

160 lines
4.7 KiB
Python

#!/usr/bin/env python3
"""
统一视觉分析器 - 专为Coding Plan优化
支持股票截图、文档图片等多种场景的OCR和内容分析
"""
import base64
import json
import os
import sys
from pathlib import Path
import httpx
class UnifiedVisionAnalyzer:
"""统一视觉分析器"""
def __init__(self):
# 从配置文件或环境变量加载配置
self.api_key = (
os.environ.get("VOLCENGINE_API_KEY")
or "b0359bed-09f2-49e2-a53c-32ba057412e3"
)
self.base_url = (
os.environ.get("VOLCENGINE_BASE_URL")
or "https://ark.cn-beijing.volces.com/api/coding/v3"
)
self.model = os.environ.get("VISION_MODEL") or "doubao-seed-code"
def encode_image(self, image_path: str) -> str:
"""编码图片为base64"""
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def analyze_stock_screenshot(self, image_path: str) -> str:
"""专门用于分析股票自选股截图"""
image_base64 = self.encode_image(image_path)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "请仔细识别这张股票自选股截图中的所有股票代码和股票名称。股票代码通常是数字,股票名称是中文。请严格按照图片中的格式输出,不要添加额外信息。",
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_base64}"
},
},
],
}
],
"max_tokens": 2000,
}
try:
response = httpx.post(
f"{self.base_url}/v1/chat/completions",
headers=headers,
json=payload,
timeout=30.0,
)
response.raise_for_status()
result = response.json()
return result["choices"][0]["message"]["content"]
except Exception as e:
print(f"API调用失败: {e}")
raise
def analyze_general_image(self, image_path: str, custom_prompt: str = None) -> str:
"""通用图片分析"""
if not custom_prompt:
custom_prompt = "请详细描述这张图片的内容,包括文字、图表、布局等所有细节。"
image_base64 = self.encode_image(image_path)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": custom_prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_base64}"
},
},
],
}
],
"max_tokens": 2000,
}
try:
response = httpx.post(
f"{self.base_url}/v1/chat/completions",
headers=headers,
json=payload,
timeout=30.0,
)
response.raise_for_status()
result = response.json()
return result["choices"][0]["message"]["content"]
except Exception as e:
print(f"API调用失败: {e}")
raise
def main():
"""主函数"""
if len(sys.argv) < 2:
print(
"用法: python unified_vision_analyzer.py <image_path> [mode] [custom_prompt]"
)
print("模式: stock (股票截图), general (通用分析)")
sys.exit(1)
image_path = sys.argv[1]
mode = sys.argv[2] if len(sys.argv) > 2 else "general"
custom_prompt = sys.argv[3] if len(sys.argv) > 3 else None
analyzer = UnifiedVisionAnalyzer()
try:
if mode == "stock":
result = analyzer.analyze_stock_screenshot(image_path)
else:
result = analyzer.analyze_general_image(image_path, custom_prompt)
print(result)
except Exception as e:
print(f"分析失败: {e}")
sys.exit(1)
if __name__ == "__main__":
main()