04db423416
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
160 lines
4.7 KiB
Python
160 lines
4.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
统一视觉分析器 - 专为Coding Plan优化
|
|
支持股票截图、文档图片等多种场景的OCR和内容分析
|
|
"""
|
|
|
|
import base64
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
import httpx
|
|
|
|
|
|
class UnifiedVisionAnalyzer:
|
|
"""统一视觉分析器"""
|
|
|
|
def __init__(self):
|
|
# 从配置文件或环境变量加载配置
|
|
self.api_key = (
|
|
os.environ.get("VOLCENGINE_API_KEY")
|
|
or "b0359bed-09f2-49e2-a53c-32ba057412e3"
|
|
)
|
|
self.base_url = (
|
|
os.environ.get("VOLCENGINE_BASE_URL")
|
|
or "https://ark.cn-beijing.volces.com/api/coding/v3"
|
|
)
|
|
self.model = os.environ.get("VISION_MODEL") or "doubao-seed-code"
|
|
|
|
def encode_image(self, image_path: str) -> str:
|
|
"""编码图片为base64"""
|
|
with open(image_path, "rb") as f:
|
|
return base64.b64encode(f.read()).decode("utf-8")
|
|
|
|
def analyze_stock_screenshot(self, image_path: str) -> str:
|
|
"""专门用于分析股票自选股截图"""
|
|
image_base64 = self.encode_image(image_path)
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
payload = {
|
|
"model": self.model,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "请仔细识别这张股票自选股截图中的所有股票代码和股票名称。股票代码通常是数字,股票名称是中文。请严格按照图片中的格式输出,不要添加额外信息。",
|
|
},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/png;base64,{image_base64}"
|
|
},
|
|
},
|
|
],
|
|
}
|
|
],
|
|
"max_tokens": 2000,
|
|
}
|
|
|
|
try:
|
|
response = httpx.post(
|
|
f"{self.base_url}/v1/chat/completions",
|
|
headers=headers,
|
|
json=payload,
|
|
timeout=30.0,
|
|
)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
return result["choices"][0]["message"]["content"]
|
|
|
|
except Exception as e:
|
|
print(f"API调用失败: {e}")
|
|
raise
|
|
|
|
def analyze_general_image(self, image_path: str, custom_prompt: str = None) -> str:
|
|
"""通用图片分析"""
|
|
if not custom_prompt:
|
|
custom_prompt = "请详细描述这张图片的内容,包括文字、图表、布局等所有细节。"
|
|
|
|
image_base64 = self.encode_image(image_path)
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
payload = {
|
|
"model": self.model,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": custom_prompt},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/png;base64,{image_base64}"
|
|
},
|
|
},
|
|
],
|
|
}
|
|
],
|
|
"max_tokens": 2000,
|
|
}
|
|
|
|
try:
|
|
response = httpx.post(
|
|
f"{self.base_url}/v1/chat/completions",
|
|
headers=headers,
|
|
json=payload,
|
|
timeout=30.0,
|
|
)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
return result["choices"][0]["message"]["content"]
|
|
|
|
except Exception as e:
|
|
print(f"API调用失败: {e}")
|
|
raise
|
|
|
|
|
|
def main():
|
|
"""主函数"""
|
|
if len(sys.argv) < 2:
|
|
print(
|
|
"用法: python unified_vision_analyzer.py <image_path> [mode] [custom_prompt]"
|
|
)
|
|
print("模式: stock (股票截图), general (通用分析)")
|
|
sys.exit(1)
|
|
|
|
image_path = sys.argv[1]
|
|
mode = sys.argv[2] if len(sys.argv) > 2 else "general"
|
|
custom_prompt = sys.argv[3] if len(sys.argv) > 3 else None
|
|
|
|
analyzer = UnifiedVisionAnalyzer()
|
|
|
|
try:
|
|
if mode == "stock":
|
|
result = analyzer.analyze_stock_screenshot(image_path)
|
|
else:
|
|
result = analyzer.analyze_general_image(image_path, custom_prompt)
|
|
|
|
print(result)
|
|
|
|
except Exception as e:
|
|
print(f"分析失败: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|