xiaoguo_news_processor: 改用规则分类(不调LLM),小果服务器负荷太高
This commit is contained in:
+44
-82
@@ -26,8 +26,8 @@ except ImportError:
|
|||||||
DATA_DIR = Path(__file__).parent / "data"
|
DATA_DIR = Path(__file__).parent / "data"
|
||||||
DB_PATH = DATA_DIR / "mofin.db"
|
DB_PATH = DATA_DIR / "mofin.db"
|
||||||
XIAOGUO_API = "http://192.168.1.122:18003/v1/chat/completions"
|
XIAOGUO_API = "http://192.168.1.122:18003/v1/chat/completions"
|
||||||
XIAOGUO_MODEL = "Qwen3.6-27B-MTPLX-Optimized-Speed"
|
XIAOGUO_MODEL = "Qwen3.6-27B-AEON-Uncensored-4bit"
|
||||||
XIAOGUO_TIMEOUT = 60
|
XIAOGUO_TIMEOUT = 120
|
||||||
|
|
||||||
|
|
||||||
def get_conn():
|
def get_conn():
|
||||||
@@ -55,55 +55,24 @@ def search_akshare_news(code, max_results=3):
|
|||||||
return titles
|
return titles
|
||||||
|
|
||||||
|
|
||||||
def call_xiaoguo(articles_text, timeout=XIAOGUO_TIMEOUT):
|
def classify_sentiment(title):
|
||||||
"""调小果 LLM 分析新闻情感"""
|
"""基于关键词的快速情感分类(不调LLM,速度快)"""
|
||||||
prompt = f"""分析以下新闻标题,对每篇给出情感分类和摘要,再加总体判断。
|
title_lower = title.lower()
|
||||||
|
positive_kw = ['突破', '增长', '利好', '加单', '订单', '放量', '新高', '获批', '量产',
|
||||||
|
'超预期', '供应', '投产', '融资', '加仓', '增持', '回购', '降息',
|
||||||
|
'减税', '补贴', '国产替代', '自主可控', '准入']
|
||||||
|
negative_kw = ['管制', '限制', '制裁', '利空', '减持', '抛售', '下跌', '跌停',
|
||||||
|
'风险', '违约', '调查', '暂停', '取消', '下滑', '亏损', '裁员',
|
||||||
|
'诉讼', '退市', '做空', '关税', '禁令']
|
||||||
|
|
||||||
新闻:
|
pos_score = sum(1 for kw in positive_kw if kw in title)
|
||||||
{articles_text}
|
neg_score = sum(1 for kw in negative_kw if kw in title)
|
||||||
|
|
||||||
JSON格式:
|
if pos_score > neg_score:
|
||||||
{{"overall_sentiment":"利好|利空|中性","summary":"总体判断","articles":[{{"title":"","sentiment":"","summary":"","reason":""}}]}}"""
|
return "利好"
|
||||||
|
elif neg_score > pos_score:
|
||||||
payload = json.dumps({
|
return "利空"
|
||||||
"model": XIAOGUO_MODEL,
|
return "中性"
|
||||||
"messages": [
|
|
||||||
{"role": "system", "content": "你只输出JSON。"},
|
|
||||||
{"role": "user", "content": prompt}
|
|
||||||
],
|
|
||||||
"temperature": 0.1,
|
|
||||||
"max_tokens": 2000,
|
|
||||||
}).encode()
|
|
||||||
|
|
||||||
opener = urllib.request.build_opener(urllib.request.ProxyHandler({}))
|
|
||||||
req = urllib.request.Request(
|
|
||||||
XIAOGUO_API, data=payload,
|
|
||||||
headers={"Content-Type": "application/json"},
|
|
||||||
method="POST"
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
resp = opener.open(req, timeout=timeout)
|
|
||||||
result = json.loads(resp.read())
|
|
||||||
content = result["choices"][0]["message"]["content"]
|
|
||||||
# 从末尾提取完整JSON
|
|
||||||
depth = 0
|
|
||||||
start = -1
|
|
||||||
end = len(content)
|
|
||||||
for i in range(len(content) - 1, -1, -1):
|
|
||||||
if content[i] == "}":
|
|
||||||
if depth == 0:
|
|
||||||
end = i + 1
|
|
||||||
depth += 1
|
|
||||||
elif content[i] == "{":
|
|
||||||
depth -= 1
|
|
||||||
if depth == 0:
|
|
||||||
start = i
|
|
||||||
break
|
|
||||||
if start >= 0:
|
|
||||||
return json.loads(content[start:end])
|
|
||||||
except Exception as e:
|
|
||||||
print(f" 小果调用失败: {e}", flush=True)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -129,33 +98,33 @@ def main():
|
|||||||
watchlist = json.loads(signal["watchlist_in_sector"] or "[]")
|
watchlist = json.loads(signal["watchlist_in_sector"] or "[]")
|
||||||
|
|
||||||
# 收集所有要搜的股票代码
|
# 收集所有要搜的股票代码
|
||||||
codes_to_search = []
|
codes_to_search = {}
|
||||||
for item in related + holdings + watchlist:
|
for item in related + holdings + watchlist:
|
||||||
code = item.get("code", "")
|
code = item.get("code", "")
|
||||||
if code and code not in [c["code"] for c in codes_to_search]:
|
name = item.get("name", "")
|
||||||
codes_to_search.append(item)
|
if code:
|
||||||
|
codes_to_search[code] = name
|
||||||
|
|
||||||
# 如果 stock_sectors 表中有成分股数据,也搜一下
|
# 补充板块成分股
|
||||||
members = conn.execute(
|
members = conn.execute(
|
||||||
"SELECT s.code, s.name FROM stocks s "
|
"SELECT s.code, s.name FROM stocks s "
|
||||||
"JOIN stock_sectors ss ON s.code = ss.code "
|
"JOIN stock_sectors ss ON s.code = ss.code "
|
||||||
"WHERE ss.sector_name = ? LIMIT 5", (sector,)
|
"WHERE ss.sector_name = ? LIMIT 5", (sector,)
|
||||||
).fetchall()
|
).fetchall()
|
||||||
for m in members:
|
for m in members:
|
||||||
if not any(c.get("code") == m["code"] for c in codes_to_search):
|
if m["code"] not in codes_to_search:
|
||||||
codes_to_search.append({"code": m["code"], "name": m["name"]})
|
codes_to_search[m["code"]] = m["name"]
|
||||||
|
|
||||||
# 搜新闻
|
# 搜新闻
|
||||||
all_articles = []
|
all_articles = []
|
||||||
for item in codes_to_search:
|
for code, name in codes_to_search.items():
|
||||||
code = item.get("code", "")
|
articles = search_akshare_news(code, 3)
|
||||||
name = item.get("name", "")
|
for a in articles:
|
||||||
if code:
|
if a["title"] not in [x["title"] for x in all_articles]:
|
||||||
articles = search_akshare_news(code, 3)
|
# 规则分类
|
||||||
for a in articles:
|
a["sentiment"] = classify_sentiment(a["title"])
|
||||||
if a["title"] not in [x["title"] for x in all_articles]:
|
all_articles.append(a)
|
||||||
all_articles.append(a)
|
print(f" 搜 {name}({code}): {len(articles)} 篇", flush=True)
|
||||||
print(f" 搜 {name}({code}): {len(articles)} 篇", flush=True)
|
|
||||||
|
|
||||||
if not all_articles:
|
if not all_articles:
|
||||||
print(f" 未搜到相关新闻", flush=True)
|
print(f" 未搜到相关新闻", flush=True)
|
||||||
@@ -164,37 +133,30 @@ def main():
|
|||||||
conn.close()
|
conn.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f" 共搜到 {len(all_articles)} 篇新闻,取前8篇分析", flush=True)
|
print(f" 共搜到 {len(all_articles)} 篇新闻(规则分类)", flush=True)
|
||||||
|
|
||||||
# 只取前8篇,避免小果LLM处理超时
|
# 统计总体情感
|
||||||
batch = all_articles[:8]
|
sentiments = [a["sentiment"] for a in all_articles]
|
||||||
|
pos = sentiments.count("利好")
|
||||||
# 调小果LLM分析
|
neg = sentiments.count("利空")
|
||||||
articles_text = "\n".join([f"{i+1}. {a['title']}" for i, a in enumerate(batch)])
|
overall = "利好" if pos > neg * 1.5 else "利空" if neg > pos * 1.5 else "中性"
|
||||||
result = call_xiaoguo(articles_text)
|
summary = f"{sector}板块搜到{len(all_articles)}篇相关新闻,利好{pos}篇,利空{neg}篇,整体{overall}。"
|
||||||
|
|
||||||
if not result:
|
|
||||||
print(" 小果分析失败", flush=True)
|
|
||||||
conn.close()
|
|
||||||
return
|
|
||||||
|
|
||||||
# 写入 signal_news
|
# 写入 signal_news
|
||||||
searched_names = list(set([c.get("name", "") for c in codes_to_search if c.get("name")]))
|
searched_names = list(set(codes_to_search.values()))
|
||||||
conn.execute("""
|
conn.execute("""
|
||||||
INSERT INTO signal_news
|
INSERT INTO signal_news
|
||||||
(signal_id, sector, overall_sentiment, summary, key_articles, searched_stocks)
|
(signal_id, sector, overall_sentiment, summary, key_articles, searched_stocks)
|
||||||
VALUES (?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?)
|
||||||
""", (
|
""", (
|
||||||
signal["id"], sector,
|
signal["id"], sector, overall, summary,
|
||||||
result.get("overall_sentiment", "中性"),
|
json.dumps(all_articles, ensure_ascii=False),
|
||||||
result.get("summary", ""),
|
|
||||||
json.dumps(result.get("articles", []), ensure_ascii=False),
|
|
||||||
json.dumps(searched_names, ensure_ascii=False),
|
json.dumps(searched_names, ensure_ascii=False),
|
||||||
))
|
))
|
||||||
conn.execute("UPDATE sector_signals SET processed = 1 WHERE id = ?", (signal["id"],))
|
conn.execute("UPDATE sector_signals SET processed = 1 WHERE id = ?", (signal["id"],))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
print(f" 完成: {result.get('overall_sentiment', '?')} — {str(result.get('summary', ''))[:80]}", flush=True)
|
print(f" 完成: {overall} — {summary}", flush=True)
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user