""" api_proxy.py — 多 upstream API 错误码吞掉代理 监听本地端口,转发请求到 upstream API。 - 2xx: 透传,完全透明 - 429/5xx: 自动重试最多 3 次(指数退避 1s/2s/4s) - 重试耗尽: 返回假 200,错误信息嵌入响应文本 → opencode 永远看不到 HTTP 错误码,retry-cache 永不触发 支持多 upstream 路由: volcengine / opencode-go-new — 按模型名自动选择 """ import os, sys, json, time, logging from http.server import HTTPServer, BaseHTTPRequestHandler from urllib.request import Request, urlopen, HTTPError from urllib.error import URLError # ── 项目根目录 ────────────────────────────────────────────── PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.join(PROJECT_ROOT, "scripts")) from proc_guard import guard # PID 锁 # ── 配置 ──────────────────────────────────────────────────── LISTEN_HOST = "0.0.0.0" LISTEN_PORT = 8787 MAX_RETRIES = 3 RETRY_DELAYS = [1, 2, 4] # seconds # upstream 路由表: 名称 → base URL + API key UPSTREAMS = { "volcengine": { "base_url": "https://ark.cn-beijing.volces.com/api/coding/v3", "api_key": "b0359bed-09f2-49e2-a53c-32ba057412e3", }, "opencode-go-new": { "base_url": "https://opencode.ai/zen/go/v1", "api_key": "sk-5miR8xAMhlaXWJz3kXoYPub4ZSUISr8Fy3BXN7teThGkWonQAjZmeJdMu17htGTB", }, "opencode-go-old": { "base_url": "https://opencode.ai/zen/go/v1", "api_key": "sk-MBLGxsGQU1Ngr1M7DKMt1TiCKvOEdKiwClwiUTcOPJKRZ4wbrgKZ25l3dHmvozhj", }, } # 模型路由: 模型名 → 走哪个 upstream # 支持 volcengine 原生模型名(如 deepseek-v4-flash)也支持 proxy 安全名(如 deepseek-v4-flash-go-safe) MODEL_ROUTES = { "deepseek-v4-flash-safe": "volcengine", "deepseek-v4-pro-safe": "volcengine", "deepseek-v4-flash-go-safe": "opencode-go-new", "deepseek-v4-pro-go-safe": "opencode-go-new", # volcengine 原生模型名 → 走 opencode-go(劫持 volcengine baseURL 后使用) "deepseek-v4-flash": "opencode-go-new", "deepseek-v4-pro": "opencode-go-new", } # 模型名映射: 安全名 → upstream 真正用的名称 MODEL_MAP = { "deepseek-v4-flash-safe": "deepseek-v4-flash", "deepseek-v4-pro-safe": "deepseek-v4-pro", "deepseek-v4-flash-go-safe": "deepseek-v4-flash", "deepseek-v4-pro-go-safe": "deepseek-v4-pro", } # 默认 upstream(当模型名不在路由表中时) DEFAULT_UPSTREAM = "volcengine" LOG_DIR = os.path.join(PROJECT_ROOT, "logs") os.makedirs(LOG_DIR, exist_ok=True) logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler(os.path.join(LOG_DIR, "api_proxy.log"), encoding="utf-8"), logging.StreamHandler(), ], ) log = logging.getLogger("api_proxy") def make_fake_response(model: str, error_text: str, stream: bool) -> tuple: """构造假 200 响应,把错误信息嵌入文案。""" content = f"[api_proxy] upstream API 请求失败({MAX_RETRIES} 次重试均未成功)。原始错误: {error_text}" if stream: fake_chunks = [ f'data: {{"choices":[{{"delta":{{"role":"assistant","content":""}},"index":0}}]}}\n\n', f'data: {{"choices":[{{"delta":{{"content":{json.dumps(content)}}},"index":0}}]}}\n\n', f"data: [DONE]\n\n", ] body = "".join(fake_chunks).encode("utf-8") return body, "text/event-stream" else: resp = { "id": "api_proxy_error_fallback", "object": "chat.completion", "created": int(time.time()), "model": model, "choices": [ { "index": 0, "message": {"role": "assistant", "content": content}, "finish_reason": "stop", } ], "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}, } return json.dumps(resp, ensure_ascii=False).encode("utf-8"), "application/json" class ProxyHandler(BaseHTTPRequestHandler): """HTTP 请求代理处理器。""" # 禁止 BaseHTTPRequestHandler 写日志到 stderr(我们自己记) def log_message(self, format, *args): pass def do_GET(self): self._handle_request("GET") def do_POST(self): self._handle_request("POST") def do_DELETE(self): self._handle_request("DELETE") def _handle_request(self, method): start = time.time() req_id = f"{method}{self.path}"[:80] log.info("→ %s %s", method, self.path) # 读取请求体 content_length = int(self.headers.get("Content-Length", 0)) body = self.rfile.read(content_length) if content_length > 0 else b"" # 解析模型名、路由和流式标记 is_stream = False model_in = "" upstream_name = DEFAULT_UPSTREAM if body: try: req_json = json.loads(body) is_stream = req_json.get("stream", False) model_in = req_json.get("model", "") # 根据模型名选 upstream upstream_name = MODEL_ROUTES.get(model_in, DEFAULT_UPSTREAM) # 模型名映射: 安全名 → 真实 upstream 名 if model_in in MODEL_MAP: real_model = MODEL_MAP[model_in] req_json["model"] = real_model body = json.dumps(req_json, ensure_ascii=False).encode("utf-8") log.info(" model remap: %s → %s (upstream: %s)", model_in, real_model, upstream_name) except (json.JSONDecodeError, UnicodeDecodeError): pass # 构建 upstream URL(从路由表选 base URL) upstream_cfg = UPSTREAMS.get(upstream_name, UPSTREAMS[DEFAULT_UPSTREAM]) upstream_url = upstream_cfg["base_url"] + self.path # 准备转发的头部(过滤 hop-by-hop 头) excluded_headers = { "host", "connection", "keep-alive", "proxy-authenticate", "proxy-authorization", "te", "trailers", "transfer-encoding", "upgrade", "content-length", "content-encoding", } upstream_headers = { k: v for k, v in self.headers.items() if k.lower() not in excluded_headers } # 用 upstream 自己的 API key 覆盖客户端传过来的 if "api_key" in upstream_cfg: upstream_headers["Authorization"] = f"Bearer {upstream_cfg['api_key']}" last_error = "" last_status = 0 for attempt in range(1, MAX_RETRIES + 1): try: upstream_headers.pop("Content-Length", None) req = Request(upstream_url, data=body, headers=upstream_headers, method=method) with urlopen(req, timeout=120) as resp: # 成功 — 透传 status = resp.status if status < 400: resp_body = resp.read() self._send_response(status, resp.headers, resp_body) elapsed = time.time() - start log.info("✓ %s %s → %d (%.2fs)", method, self.path, status, elapsed) return # 4xx/5xx — 记录准备重试 last_status = status last_error = f"HTTP {status}: {resp.read().decode('utf-8', errors='replace')[:500]}" log.warning("⚠ attempt %d/%d: %s", attempt, MAX_RETRIES, last_error) except HTTPError as e: last_status = e.code last_error = f"HTTP {e.code}: {e.read().decode('utf-8', errors='replace')[:500]}" log.warning("⚠ attempt %d/%d: %s", attempt, MAX_RETRIES, last_error) except URLError as e: last_status = 0 last_error = f"URLError: {e.reason}" log.warning("⚠ attempt %d/%d: %s", attempt, MAX_RETRIES, last_error) except Exception as e: last_status = 0 last_error = f"Exception: {e}" log.warning("⚠ attempt %d/%d: %s", attempt, MAX_RETRIES, last_error) # 最后一次尝试失败了,不 sleep if attempt < MAX_RETRIES: delay = RETRY_DELAYS[min(attempt - 1, len(RETRY_DELAYS) - 1)] log.info(" sleep %ds before retry %d/%d", delay, attempt + 1, MAX_RETRIES) time.sleep(delay) # ── 所有重试耗尽,返回假 200 ── model_name = "unknown" if body: try: req_json = json.loads(body) model_name = req_json.get("model", "unknown") except Exception: pass log.warning("✗ %s %s — 重试耗尽, model=%s, error=%s", method, self.path, model_name, last_error) fake_body, content_type = make_fake_response(model_name, last_error, is_stream) fake_headers = { "Content-Type": content_type, "Access-Control-Allow-Origin": "*", "Cache-Control": "no-cache", } self._send_response(200, fake_headers, fake_body) elapsed = time.time() - start log.info("✗ %s %s → fake 200 (%.2fs)", method, self.path, elapsed) def _send_response(self, status: int, headers, body: bytes): """发送响应给客户端。""" self.send_response(status) # 透传安全/有用的响应头 allowed = {"content-type", "content-encoding", "cache-control", "x-request-id", "x-ratelimit-remaining", "x-ratelimit-reset", "access-control-allow-origin"} if isinstance(headers, dict): for k, v in headers.items(): if k.lower() in allowed: self.send_header(k, v) else: # http.client.HTTPMessage 对象 for k, v in headers.items(): if k.lower() in allowed: self.send_header(k, v) self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body) self.wfile.flush() def main(): # PID 锁 g = guard("api_proxy") if not g.ok: log.error("api_proxy 已有实例在运行 (PID %s),退出", g.pid) sys.exit(1) server = HTTPServer((LISTEN_HOST, LISTEN_PORT), ProxyHandler) log.info("api_proxy 启动 → http://%s:%d", LISTEN_HOST, LISTEN_PORT) for name, cfg in UPSTREAMS.items(): log.info(" upstream [%s]: %s", name, cfg["base_url"]) log.info("retry: %d 次, 退避 %s", MAX_RETRIES, RETRY_DELAYS) log.info("重试耗尽后返回 fake 200(opencode retry-cache 永不触发)") try: server.serve_forever() except KeyboardInterrupt: log.info("收到中断信号,关闭服务器...") server.shutdown() if __name__ == "__main__": main()