Initial: multi-agent XMPP communication system with dashboard

- Platform-based architecture (Windows/Linux/Mac) - Agent instance registry (agents.yaml) - Management dashboard with cross-platform monitoring - xmpp_bot with HTTP bridge + health endpoints - wechat_agent with WeChat-Hermes bridging - Platform services: ProcessGuardian, HealthProbe, APIRouter, ChannelBridge - Deployment: systemd (Linux) + PowerShell (Windows) - Monitoring: SSH+ejabberdctl for cross-platform presence
2026-06-12 21:49:05 +08:00
commit 1b2b935832
76 changed files with 15943 additions and 0 deletions
@@ -0,0 +1,285 @@
+"""
+api_proxy.py — 多 upstream API 错误码吞掉代理
+
+监听本地端口，转发请求到 upstream API。
+- 2xx: 透传，完全透明
+- 429/5xx: 自动重试最多 3 次（指数退避 1s/2s/4s）
+- 重试耗尽: 返回假 200，错误信息嵌入响应文本
+  → opencode 永远看不到 HTTP 错误码，retry-cache 永不触发
+
+支持多 upstream 路由:
+  volcengine / opencode-go-new — 按模型名自动选择
+"""
+
+import os, sys, json, time, logging
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from urllib.request import Request, urlopen, HTTPError
+from urllib.error import URLError
+
+# ── 项目根目录 ──────────────────────────────────────────────
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, os.path.join(PROJECT_ROOT, "scripts"))
+from proc_guard import guard  # PID 锁
+
+# ── 配置 ────────────────────────────────────────────────────
+LISTEN_HOST = "0.0.0.0"
+LISTEN_PORT = 8787
+MAX_RETRIES = 3
+RETRY_DELAYS = [1, 2, 4]  # seconds
+
+# upstream 路由表: 名称 → base URL + API key
+UPSTREAMS = {
+    "volcengine": {
+        "base_url": "https://ark.cn-beijing.volces.com/api/coding/v3",
+        "api_key": "b0359bed-09f2-49e2-a53c-32ba057412e3",
+    },
+    "opencode-go-new": {
+        "base_url": "https://opencode.ai/zen/go/v1",
+        "api_key": "sk-5miR8xAMhlaXWJz3kXoYPub4ZSUISr8Fy3BXN7teThGkWonQAjZmeJdMu17htGTB",
+    },
+    "opencode-go-old": {
+        "base_url": "https://opencode.ai/zen/go/v1",
+        "api_key": "sk-MBLGxsGQU1Ngr1M7DKMt1TiCKvOEdKiwClwiUTcOPJKRZ4wbrgKZ25l3dHmvozhj",
+    },
+}
+
+# 模型路由: 模型名 → 走哪个 upstream
+# 支持 volcengine 原生模型名（如 deepseek-v4-flash）也支持 proxy 安全名（如 deepseek-v4-flash-go-safe）
+MODEL_ROUTES = {
+    "deepseek-v4-flash-safe": "volcengine",
+    "deepseek-v4-pro-safe": "volcengine",
+    "deepseek-v4-flash-go-safe": "opencode-go-new",
+    "deepseek-v4-pro-go-safe": "opencode-go-new",
+    # volcengine 原生模型名 → 走 opencode-go（劫持 volcengine baseURL 后使用）
+    "deepseek-v4-flash": "opencode-go-new",
+    "deepseek-v4-pro": "opencode-go-new",
+}
+
+# 模型名映射: 安全名 → upstream 真正用的名称
+MODEL_MAP = {
+    "deepseek-v4-flash-safe": "deepseek-v4-flash",
+    "deepseek-v4-pro-safe": "deepseek-v4-pro",
+    "deepseek-v4-flash-go-safe": "deepseek-v4-flash",
+    "deepseek-v4-pro-go-safe": "deepseek-v4-pro",
+}
+
+# 默认 upstream（当模型名不在路由表中时）
+DEFAULT_UPSTREAM = "volcengine"
+
+LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
+os.makedirs(LOG_DIR, exist_ok=True)
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    handlers=[
+        logging.FileHandler(os.path.join(LOG_DIR, "api_proxy.log"), encoding="utf-8"),
+        logging.StreamHandler(),
+    ],
+)
+log = logging.getLogger("api_proxy")
+
+
+def make_fake_response(model: str, error_text: str, stream: bool) -> tuple:
+    """构造假 200 响应，把错误信息嵌入文案。"""
+    content = f"[api_proxy] upstream API 请求失败（{MAX_RETRIES} 次重试均未成功）。原始错误: {error_text}"
+
+    if stream:
+        fake_chunks = [
+            f'data: {{"choices":[{{"delta":{{"role":"assistant","content":""}},"index":0}}]}}\n\n',
+            f'data: {{"choices":[{{"delta":{{"content":{json.dumps(content)}}},"index":0}}]}}\n\n',
+            f"data: [DONE]\n\n",
+        ]
+        body = "".join(fake_chunks).encode("utf-8")
+        return body, "text/event-stream"
+    else:
+        resp = {
+            "id": "api_proxy_error_fallback",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": content},
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+        }
+        return json.dumps(resp, ensure_ascii=False).encode("utf-8"), "application/json"
+
+
+class ProxyHandler(BaseHTTPRequestHandler):
+    """HTTP 请求代理处理器。"""
+
+    # 禁止 BaseHTTPRequestHandler 写日志到 stderr（我们自己记）
+    def log_message(self, format, *args):
+        pass
+
+    def do_GET(self):
+        self._handle_request("GET")
+
+    def do_POST(self):
+        self._handle_request("POST")
+
+    def do_DELETE(self):
+        self._handle_request("DELETE")
+
+    def _handle_request(self, method):
+        start = time.time()
+        req_id = f"{method}{self.path}"[:80]
+        log.info("→ %s %s", method, self.path)
+
+        # 读取请求体
+        content_length = int(self.headers.get("Content-Length", 0))
+        body = self.rfile.read(content_length) if content_length > 0 else b""
+
+        # 解析模型名、路由和流式标记
+        is_stream = False
+        model_in = ""
+        upstream_name = DEFAULT_UPSTREAM
+        if body:
+            try:
+                req_json = json.loads(body)
+                is_stream = req_json.get("stream", False)
+                model_in = req_json.get("model", "")
+                # 根据模型名选 upstream
+                upstream_name = MODEL_ROUTES.get(model_in, DEFAULT_UPSTREAM)
+                # 模型名映射: 安全名 → 真实 upstream 名
+                if model_in in MODEL_MAP:
+                    real_model = MODEL_MAP[model_in]
+                    req_json["model"] = real_model
+                    body = json.dumps(req_json, ensure_ascii=False).encode("utf-8")
+                    log.info("  model remap: %s → %s (upstream: %s)", model_in, real_model, upstream_name)
+            except (json.JSONDecodeError, UnicodeDecodeError):
+                pass
+
+        # 构建 upstream URL（从路由表选 base URL）
+        upstream_cfg = UPSTREAMS.get(upstream_name, UPSTREAMS[DEFAULT_UPSTREAM])
+        upstream_url = upstream_cfg["base_url"] + self.path
+
+        # 准备转发的头部（过滤 hop-by-hop 头）
+        excluded_headers = {
+            "host", "connection", "keep-alive", "proxy-authenticate",
+            "proxy-authorization", "te", "trailers", "transfer-encoding",
+            "upgrade", "content-length", "content-encoding",
+        }
+        upstream_headers = {
+            k: v for k, v in self.headers.items()
+            if k.lower() not in excluded_headers
+        }
+
+        # 用 upstream 自己的 API key 覆盖客户端传过来的
+        if "api_key" in upstream_cfg:
+            upstream_headers["Authorization"] = f"Bearer {upstream_cfg['api_key']}"
+
+        last_error = ""
+        last_status = 0
+
+        for attempt in range(1, MAX_RETRIES + 1):
+            try:
+                upstream_headers.pop("Content-Length", None)
+                req = Request(upstream_url, data=body, headers=upstream_headers, method=method)
+
+                with urlopen(req, timeout=120) as resp:
+                    # 成功 — 透传
+                    status = resp.status
+                    if status < 400:
+                        resp_body = resp.read()
+                        self._send_response(status, resp.headers, resp_body)
+                        elapsed = time.time() - start
+                        log.info("✓ %s %s → %d (%.2fs)", method, self.path, status, elapsed)
+                        return
+
+                    # 4xx/5xx — 记录准备重试
+                    last_status = status
+                    last_error = f"HTTP {status}: {resp.read().decode('utf-8', errors='replace')[:500]}"
+                    log.warning("⚠ attempt %d/%d: %s", attempt, MAX_RETRIES, last_error)
+
+            except HTTPError as e:
+                last_status = e.code
+                last_error = f"HTTP {e.code}: {e.read().decode('utf-8', errors='replace')[:500]}"
+                log.warning("⚠ attempt %d/%d: %s", attempt, MAX_RETRIES, last_error)
+
+            except URLError as e:
+                last_status = 0
+                last_error = f"URLError: {e.reason}"
+                log.warning("⚠ attempt %d/%d: %s", attempt, MAX_RETRIES, last_error)
+
+            except Exception as e:
+                last_status = 0
+                last_error = f"Exception: {e}"
+                log.warning("⚠ attempt %d/%d: %s", attempt, MAX_RETRIES, last_error)
+
+            # 最后一次尝试失败了，不 sleep
+            if attempt < MAX_RETRIES:
+                delay = RETRY_DELAYS[min(attempt - 1, len(RETRY_DELAYS) - 1)]
+                log.info("  sleep %ds before retry %d/%d", delay, attempt + 1, MAX_RETRIES)
+                time.sleep(delay)
+
+        # ── 所有重试耗尽，返回假 200 ──
+        model_name = "unknown"
+        if body:
+            try:
+                req_json = json.loads(body)
+                model_name = req_json.get("model", "unknown")
+            except Exception:
+                pass
+
+        log.warning("✗ %s %s — 重试耗尽, model=%s, error=%s", method, self.path, model_name, last_error)
+        fake_body, content_type = make_fake_response(model_name, last_error, is_stream)
+        fake_headers = {
+            "Content-Type": content_type,
+            "Access-Control-Allow-Origin": "*",
+            "Cache-Control": "no-cache",
+        }
+        self._send_response(200, fake_headers, fake_body)
+        elapsed = time.time() - start
+        log.info("✗ %s %s → fake 200 (%.2fs)", method, self.path, elapsed)
+
+    def _send_response(self, status: int, headers, body: bytes):
+        """发送响应给客户端。"""
+        self.send_response(status)
+        # 透传安全/有用的响应头
+        allowed = {"content-type", "content-encoding", "cache-control",
+                    "x-request-id", "x-ratelimit-remaining", "x-ratelimit-reset",
+                    "access-control-allow-origin"}
+        if isinstance(headers, dict):
+            for k, v in headers.items():
+                if k.lower() in allowed:
+                    self.send_header(k, v)
+        else:
+            # http.client.HTTPMessage 对象
+            for k, v in headers.items():
+                if k.lower() in allowed:
+                    self.send_header(k, v)
+
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+        self.wfile.flush()
+
+
+def main():
+    # PID 锁
+    g = guard("api_proxy")
+    if not g.ok:
+        log.error("api_proxy 已有实例在运行 (PID %s)，退出", g.pid)
+        sys.exit(1)
+
+    server = HTTPServer((LISTEN_HOST, LISTEN_PORT), ProxyHandler)
+    log.info("api_proxy 启动 → http://%s:%d", LISTEN_HOST, LISTEN_PORT)
+    for name, cfg in UPSTREAMS.items():
+        log.info("  upstream [%s]: %s", name, cfg["base_url"])
+    log.info("retry: %d 次, 退避 %s", MAX_RETRIES, RETRY_DELAYS)
+    log.info("重试耗尽后返回 fake 200（opencode retry-cache 永不触发）")
+
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        log.info("收到中断信号，关闭服务器...")
+        server.shutdown()
+
+
+if __name__ == "__main__":
+    main()