#!/usr/bin/env python3 """ ocr_client.py — 通过小果网关(node122:18003)的 GLM-OCR-8bit 做截图OCR 用法: python3 ocr_client.py python3 ocr_client.py "自定义提问" 返回 dict {"success": bool, "text": "...", "raw": "..."} """ import sys import json import base64 import urllib.request OCR_URL = "http://node122:18003/v1/chat/completions" OCR_MODEL = "GLM-OCR-8bit" def ocr_image(image_path, prompt="请详细识别这张图片中的所有文字,包括数字、股票名称、金额等。用中文回复。"): """发送截图到小果的GLM-OCR-8bit识别文字""" try: with open(image_path, 'rb') as f: img_b64 = base64.b64encode(f.read()).decode() except FileNotFoundError: return {"success": False, "text": f"文件不存在: {image_path}"} except Exception as e: return {"success": False, "text": f"读取文件失败: {e}"} payload = json.dumps({ "model": OCR_MODEL, "messages": [{ "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_b64}"}} ] }], "max_tokens": 1500, "temperature": 0.1, }).encode() req = urllib.request.Request(OCR_URL, data=payload, headers={"Content-Type": "application/json"}) try: resp = urllib.request.urlopen(req, timeout=60) data = json.loads(resp.read().decode()) text = data.get("choices", [{}])[0].get("message", {}).get("content", "") return {"success": True, "text": text, "raw": data} except urllib.error.URLError as e: return {"success": False, "text": f"请求失败: {e}"} except Exception as e: return {"success": False, "text": f"解析失败: {e}"} if __name__ == "__main__": args = sys.argv[1:] if not args: print("用法: python3 ocr_client.py <图片路径> [自定义提示]") sys.exit(1) img_path = args[0] prompt = args[1] if len(args) > 1 else "请详细识别这张图片中的所有文字,包括数字、股票名称、金额等。用中文回复。" result = ocr_image(img_path, prompt) if result["success"]: print(result["text"]) else: print(f"OCR失败: {result['text']}", file=sys.stderr) sys.exit(1)