From 683bf39a46c35d0098d6a3ad45c96861e78a37bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=A5=E5=BE=AE?= Date: Sat, 20 Jun 2026 22:39:09 +0800 Subject: [PATCH] =?UTF-8?q?xiaoguo=5Fnews=5Fprocessor:=20=E6=94=B9?= =?UTF-8?q?=E7=94=A8=E8=A7=84=E5=88=99=E5=88=86=E7=B1=BB=EF=BC=88=E4=B8=8D?= =?UTF-8?q?=E8=B0=83LLM=EF=BC=89=EF=BC=8C=E5=B0=8F=E6=9E=9C=E6=9C=8D?= =?UTF-8?q?=E5=8A=A1=E5=99=A8=E8=B4=9F=E8=8D=B7=E5=A4=AA=E9=AB=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xiaoguo_news_processor.py | 126 +++++++++++++------------------------- 1 file changed, 44 insertions(+), 82 deletions(-) diff --git a/xiaoguo_news_processor.py b/xiaoguo_news_processor.py index 1588921..4311ff8 100644 --- a/xiaoguo_news_processor.py +++ b/xiaoguo_news_processor.py @@ -26,8 +26,8 @@ except ImportError: DATA_DIR = Path(__file__).parent / "data" DB_PATH = DATA_DIR / "mofin.db" XIAOGUO_API = "http://192.168.1.122:18003/v1/chat/completions" -XIAOGUO_MODEL = "Qwen3.6-27B-MTPLX-Optimized-Speed" -XIAOGUO_TIMEOUT = 60 +XIAOGUO_MODEL = "Qwen3.6-27B-AEON-Uncensored-4bit" +XIAOGUO_TIMEOUT = 120 def get_conn(): @@ -55,55 +55,24 @@ def search_akshare_news(code, max_results=3): return titles -def call_xiaoguo(articles_text, timeout=XIAOGUO_TIMEOUT): - """调小果 LLM 分析新闻情感""" - prompt = f"""分析以下新闻标题,对每篇给出情感分类和摘要,再加总体判断。 +def classify_sentiment(title): + """基于关键词的快速情感分类(不调LLM,速度快)""" + title_lower = title.lower() + positive_kw = ['突破', '增长', '利好', '加单', '订单', '放量', '新高', '获批', '量产', + '超预期', '供应', '投产', '融资', '加仓', '增持', '回购', '降息', + '减税', '补贴', '国产替代', '自主可控', '准入'] + negative_kw = ['管制', '限制', '制裁', '利空', '减持', '抛售', '下跌', '跌停', + '风险', '违约', '调查', '暂停', '取消', '下滑', '亏损', '裁员', + '诉讼', '退市', '做空', '关税', '禁令'] -新闻: -{articles_text} + pos_score = sum(1 for kw in positive_kw if kw in title) + neg_score = sum(1 for kw in negative_kw if kw in title) -JSON格式: -{{"overall_sentiment":"利好|利空|中性","summary":"总体判断","articles":[{{"title":"","sentiment":"","summary":"","reason":""}}]}}""" - - payload = json.dumps({ - "model": XIAOGUO_MODEL, - "messages": [ - {"role": "system", "content": "你只输出JSON。"}, - {"role": "user", "content": prompt} - ], - "temperature": 0.1, - "max_tokens": 2000, - }).encode() - - opener = urllib.request.build_opener(urllib.request.ProxyHandler({})) - req = urllib.request.Request( - XIAOGUO_API, data=payload, - headers={"Content-Type": "application/json"}, - method="POST" - ) - try: - resp = opener.open(req, timeout=timeout) - result = json.loads(resp.read()) - content = result["choices"][0]["message"]["content"] - # 从末尾提取完整JSON - depth = 0 - start = -1 - end = len(content) - for i in range(len(content) - 1, -1, -1): - if content[i] == "}": - if depth == 0: - end = i + 1 - depth += 1 - elif content[i] == "{": - depth -= 1 - if depth == 0: - start = i - break - if start >= 0: - return json.loads(content[start:end]) - except Exception as e: - print(f" 小果调用失败: {e}", flush=True) - return None + if pos_score > neg_score: + return "利好" + elif neg_score > pos_score: + return "利空" + return "中性" def main(): @@ -129,33 +98,33 @@ def main(): watchlist = json.loads(signal["watchlist_in_sector"] or "[]") # 收集所有要搜的股票代码 - codes_to_search = [] + codes_to_search = {} for item in related + holdings + watchlist: code = item.get("code", "") - if code and code not in [c["code"] for c in codes_to_search]: - codes_to_search.append(item) + name = item.get("name", "") + if code: + codes_to_search[code] = name - # 如果 stock_sectors 表中有成分股数据,也搜一下 + # 补充板块成分股 members = conn.execute( "SELECT s.code, s.name FROM stocks s " "JOIN stock_sectors ss ON s.code = ss.code " "WHERE ss.sector_name = ? LIMIT 5", (sector,) ).fetchall() for m in members: - if not any(c.get("code") == m["code"] for c in codes_to_search): - codes_to_search.append({"code": m["code"], "name": m["name"]}) + if m["code"] not in codes_to_search: + codes_to_search[m["code"]] = m["name"] # 搜新闻 all_articles = [] - for item in codes_to_search: - code = item.get("code", "") - name = item.get("name", "") - if code: - articles = search_akshare_news(code, 3) - for a in articles: - if a["title"] not in [x["title"] for x in all_articles]: - all_articles.append(a) - print(f" 搜 {name}({code}): {len(articles)} 篇", flush=True) + for code, name in codes_to_search.items(): + articles = search_akshare_news(code, 3) + for a in articles: + if a["title"] not in [x["title"] for x in all_articles]: + # 规则分类 + a["sentiment"] = classify_sentiment(a["title"]) + all_articles.append(a) + print(f" 搜 {name}({code}): {len(articles)} 篇", flush=True) if not all_articles: print(f" 未搜到相关新闻", flush=True) @@ -164,37 +133,30 @@ def main(): conn.close() return - print(f" 共搜到 {len(all_articles)} 篇新闻,取前8篇分析", flush=True) + print(f" 共搜到 {len(all_articles)} 篇新闻(规则分类)", flush=True) - # 只取前8篇,避免小果LLM处理超时 - batch = all_articles[:8] - - # 调小果LLM分析 - articles_text = "\n".join([f"{i+1}. {a['title']}" for i, a in enumerate(batch)]) - result = call_xiaoguo(articles_text) - - if not result: - print(" 小果分析失败", flush=True) - conn.close() - return + # 统计总体情感 + sentiments = [a["sentiment"] for a in all_articles] + pos = sentiments.count("利好") + neg = sentiments.count("利空") + overall = "利好" if pos > neg * 1.5 else "利空" if neg > pos * 1.5 else "中性" + summary = f"{sector}板块搜到{len(all_articles)}篇相关新闻,利好{pos}篇,利空{neg}篇,整体{overall}。" # 写入 signal_news - searched_names = list(set([c.get("name", "") for c in codes_to_search if c.get("name")])) + searched_names = list(set(codes_to_search.values())) conn.execute(""" INSERT INTO signal_news (signal_id, sector, overall_sentiment, summary, key_articles, searched_stocks) VALUES (?, ?, ?, ?, ?, ?) """, ( - signal["id"], sector, - result.get("overall_sentiment", "中性"), - result.get("summary", ""), - json.dumps(result.get("articles", []), ensure_ascii=False), + signal["id"], sector, overall, summary, + json.dumps(all_articles, ensure_ascii=False), json.dumps(searched_names, ensure_ascii=False), )) conn.execute("UPDATE sector_signals SET processed = 1 WHERE id = ?", (signal["id"],)) conn.commit() - print(f" 完成: {result.get('overall_sentiment', '?')} — {str(result.get('summary', ''))[:80]}", flush=True) + print(f" 完成: {overall} — {summary}", flush=True) conn.close()