From 683bf39a46c35d0098d6a3ad45c96861e78a37bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=9F=A5=E5=BE=AE?= <zhiwei@mofin.local>
Date: Sat, 20 Jun 2026 22:39:09 +0800
Subject: [PATCH] =?UTF-8?q?xiaoguo=5Fnews=5Fprocessor:=20=E6=94=B9?=
 =?UTF-8?q?=E7=94=A8=E8=A7=84=E5=88=99=E5=88=86=E7=B1=BB=EF=BC=88=E4=B8=8D?=
 =?UTF-8?q?=E8=B0=83LLM=EF=BC=89=EF=BC=8C=E5=B0=8F=E6=9E=9C=E6=9C=8D?=
 =?UTF-8?q?=E5=8A=A1=E5=99=A8=E8=B4=9F=E8=8D=B7=E5=A4=AA=E9=AB=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 xiaoguo_news_processor.py | 126 +++++++++++++-------------------------
 1 file changed, 44 insertions(+), 82 deletions(-)

diff --git a/xiaoguo_news_processor.py b/xiaoguo_news_processor.py
index 1588921..4311ff8 100644
--- a/xiaoguo_news_processor.py
+++ b/xiaoguo_news_processor.py
@@ -26,8 +26,8 @@ except ImportError:
 DATA_DIR = Path(__file__).parent / "data"
 DB_PATH = DATA_DIR / "mofin.db"
 XIAOGUO_API = "http://192.168.1.122:18003/v1/chat/completions"
-XIAOGUO_MODEL = "Qwen3.6-27B-MTPLX-Optimized-Speed"
-XIAOGUO_TIMEOUT = 60
+XIAOGUO_MODEL = "Qwen3.6-27B-AEON-Uncensored-4bit"
+XIAOGUO_TIMEOUT = 120
 
 
 def get_conn():
@@ -55,55 +55,24 @@ def search_akshare_news(code, max_results=3):
     return titles
 
 
-def call_xiaoguo(articles_text, timeout=XIAOGUO_TIMEOUT):
-    """调小果 LLM 分析新闻情感"""
-    prompt = f"""分析以下新闻标题，对每篇给出情感分类和摘要，再加总体判断。
+def classify_sentiment(title):
+    """基于关键词的快速情感分类（不调LLM，速度快）"""
+    title_lower = title.lower()
+    positive_kw = ['突破', '增长', '利好', '加单', '订单', '放量', '新高', '获批', '量产',
+                   '超预期', '供应', '投产', '融资', '加仓', '增持', '回购', '降息',
+                   '减税', '补贴', '国产替代', '自主可控', '准入']
+    negative_kw = ['管制', '限制', '制裁', '利空', '减持', '抛售', '下跌', '跌停',
+                   '风险', '违约', '调查', '暂停', '取消', '下滑', '亏损', '裁员',
+                   '诉讼', '退市', '做空', '关税', '禁令']
 
-新闻：
-{articles_text}
+    pos_score = sum(1 for kw in positive_kw if kw in title)
+    neg_score = sum(1 for kw in negative_kw if kw in title)
 
-JSON格式：
-{{"overall_sentiment":"利好|利空|中性","summary":"总体判断","articles":[{{"title":"","sentiment":"","summary":"","reason":""}}]}}"""
-
-    payload = json.dumps({
-        "model": XIAOGUO_MODEL,
-        "messages": [
-            {"role": "system", "content": "你只输出JSON。"},
-            {"role": "user", "content": prompt}
-        ],
-        "temperature": 0.1,
-        "max_tokens": 2000,
-    }).encode()
-
-    opener = urllib.request.build_opener(urllib.request.ProxyHandler({}))
-    req = urllib.request.Request(
-        XIAOGUO_API, data=payload,
-        headers={"Content-Type": "application/json"},
-        method="POST"
-    )
-    try:
-        resp = opener.open(req, timeout=timeout)
-        result = json.loads(resp.read())
-        content = result["choices"][0]["message"]["content"]
-        # 从末尾提取完整JSON
-        depth = 0
-        start = -1
-        end = len(content)
-        for i in range(len(content) - 1, -1, -1):
-            if content[i] == "}":
-                if depth == 0:
-                    end = i + 1
-                depth += 1
-            elif content[i] == "{":
-                depth -= 1
-                if depth == 0:
-                    start = i
-                    break
-        if start >= 0:
-            return json.loads(content[start:end])
-    except Exception as e:
-        print(f"  小果调用失败: {e}", flush=True)
-    return None
+    if pos_score > neg_score:
+        return "利好"
+    elif neg_score > pos_score:
+        return "利空"
+    return "中性"
 
 
 def main():
@@ -129,33 +98,33 @@ def main():
     watchlist = json.loads(signal["watchlist_in_sector"] or "[]")
 
     # 收集所有要搜的股票代码
-    codes_to_search = []
+    codes_to_search = {}
     for item in related + holdings + watchlist:
         code = item.get("code", "")
-        if code and code not in [c["code"] for c in codes_to_search]:
-            codes_to_search.append(item)
+        name = item.get("name", "")
+        if code:
+            codes_to_search[code] = name
 
-    # 如果 stock_sectors 表中有成分股数据，也搜一下
+    # 补充板块成分股
     members = conn.execute(
         "SELECT s.code, s.name FROM stocks s "
         "JOIN stock_sectors ss ON s.code = ss.code "
         "WHERE ss.sector_name = ? LIMIT 5", (sector,)
     ).fetchall()
     for m in members:
-        if not any(c.get("code") == m["code"] for c in codes_to_search):
-            codes_to_search.append({"code": m["code"], "name": m["name"]})
+        if m["code"] not in codes_to_search:
+            codes_to_search[m["code"]] = m["name"]
 
     # 搜新闻
     all_articles = []
-    for item in codes_to_search:
-        code = item.get("code", "")
-        name = item.get("name", "")
-        if code:
-            articles = search_akshare_news(code, 3)
-            for a in articles:
-                if a["title"] not in [x["title"] for x in all_articles]:
-                    all_articles.append(a)
-            print(f"  搜 {name}({code}): {len(articles)} 篇", flush=True)
+    for code, name in codes_to_search.items():
+        articles = search_akshare_news(code, 3)
+        for a in articles:
+            if a["title"] not in [x["title"] for x in all_articles]:
+                # 规则分类
+                a["sentiment"] = classify_sentiment(a["title"])
+                all_articles.append(a)
+        print(f"  搜 {name}({code}): {len(articles)} 篇", flush=True)
 
     if not all_articles:
         print(f"  未搜到相关新闻", flush=True)
@@ -164,37 +133,30 @@ def main():
         conn.close()
         return
 
-    print(f"  共搜到 {len(all_articles)} 篇新闻，取前8篇分析", flush=True)
+    print(f"  共搜到 {len(all_articles)} 篇新闻（规则分类）", flush=True)
 
-    # 只取前8篇，避免小果LLM处理超时
-    batch = all_articles[:8]
-
-    # 调小果LLM分析
-    articles_text = "\n".join([f"{i+1}. {a['title']}" for i, a in enumerate(batch)])
-    result = call_xiaoguo(articles_text)
-
-    if not result:
-        print("  小果分析失败", flush=True)
-        conn.close()
-        return
+    # 统计总体情感
+    sentiments = [a["sentiment"] for a in all_articles]
+    pos = sentiments.count("利好")
+    neg = sentiments.count("利空")
+    overall = "利好" if pos > neg * 1.5 else "利空" if neg > pos * 1.5 else "中性"
+    summary = f"{sector}板块搜到{len(all_articles)}篇相关新闻，利好{pos}篇，利空{neg}篇，整体{overall}。"
 
     # 写入 signal_news
-    searched_names = list(set([c.get("name", "") for c in codes_to_search if c.get("name")]))
+    searched_names = list(set(codes_to_search.values()))
     conn.execute("""
         INSERT INTO signal_news
         (signal_id, sector, overall_sentiment, summary, key_articles, searched_stocks)
         VALUES (?, ?, ?, ?, ?, ?)
     """, (
-        signal["id"], sector,
-        result.get("overall_sentiment", "中性"),
-        result.get("summary", ""),
-        json.dumps(result.get("articles", []), ensure_ascii=False),
+        signal["id"], sector, overall, summary,
+        json.dumps(all_articles, ensure_ascii=False),
         json.dumps(searched_names, ensure_ascii=False),
     ))
     conn.execute("UPDATE sector_signals SET processed = 1 WHERE id = ?", (signal["id"],))
     conn.commit()
 
-    print(f"  完成: {result.get('overall_sentiment', '?')} — {str(result.get('summary', ''))[:80]}", flush=True)
+    print(f"  完成: {overall} — {summary}", flush=True)
     conn.close()