xiaoguo_news_processor: 带全文分析5篇12秒,摘要不限字数
This commit is contained in:
+31
-12
@@ -43,20 +43,25 @@ def get_conn():
|
|||||||
|
|
||||||
|
|
||||||
def search_akshare_news(code, max_results=3):
|
def search_akshare_news(code, max_results=3):
|
||||||
"""用 akshare 搜个股新闻"""
|
"""用 akshare 搜个股新闻(含全文)"""
|
||||||
titles = []
|
articles = []
|
||||||
if not HAS_AKSHARE:
|
if not HAS_AKSHARE:
|
||||||
return titles
|
return articles
|
||||||
try:
|
try:
|
||||||
clean_proxy()
|
clean_proxy()
|
||||||
df = ak.stock_news_em(symbol=code)
|
df = ak.stock_news_em(symbol=code)
|
||||||
for _, r in df.head(max_results).iterrows():
|
for _, r in df.head(max_results).iterrows():
|
||||||
title = r.get('新闻标题', '')
|
title = r.get('新闻标题', '')
|
||||||
|
content = r.get('新闻内容', '')
|
||||||
if title and len(title) > 5:
|
if title and len(title) > 5:
|
||||||
titles.append({"title": title, "url": r.get('新闻链接', '')})
|
articles.append({
|
||||||
|
"title": title,
|
||||||
|
"content": content,
|
||||||
|
"url": r.get('新闻链接', '')
|
||||||
|
})
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
return titles
|
return articles
|
||||||
|
|
||||||
|
|
||||||
def extract_json(text):
|
def extract_json(text):
|
||||||
@@ -90,11 +95,17 @@ def call_xiaoguo(articles):
|
|||||||
"""调小果LLM:给摘要+情感"""
|
"""调小果LLM:给摘要+情感"""
|
||||||
lines = []
|
lines = []
|
||||||
for a in articles:
|
for a in articles:
|
||||||
# 清理标题:去掉股票代码(6位数字+前后空格)
|
|
||||||
title = re.sub(r'\b\d{6}\b', '', a['title']).strip()
|
title = re.sub(r'\b\d{6}\b', '', a['title']).strip()
|
||||||
title = re.sub(r'\s+', ' ', title)
|
title = re.sub(r'\s+', ' ', title)
|
||||||
|
content = (a.get('content') or '')[:200]
|
||||||
|
# 给正文加标点分隔(akshare正文无标点,模型推理会卡)
|
||||||
|
if content and not any(c in content for c in '。,!?;'):
|
||||||
|
content = '。'.join([content[i:i+20] for i in range(0, len(content), 20)])
|
||||||
|
if content:
|
||||||
|
lines.append(f"{len(lines)+1}. {title}\n {content}")
|
||||||
|
else:
|
||||||
lines.append(f"{len(lines)+1}. {title}")
|
lines.append(f"{len(lines)+1}. {title}")
|
||||||
prompt = "\n".join(lines) + "\n\n逐篇给一句话摘要和情感(positive/negative/neutral)。JSON数组。"
|
prompt = "\n".join(lines) + "\n\n逐篇分析:给摘要(概括核心内容)和情感(positive/negative/neutral)。JSON数组。"
|
||||||
|
|
||||||
payload = json.dumps({
|
payload = json.dumps({
|
||||||
"model": XIAOGUO_MODEL,
|
"model": XIAOGUO_MODEL,
|
||||||
@@ -174,7 +185,16 @@ def main():
|
|||||||
conn.close()
|
conn.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
batch = all_articles[:MAX_ARTICLES]
|
# 只取前5篇,跳过含有表格数据的脏内容
|
||||||
|
filtered = []
|
||||||
|
for a in all_articles:
|
||||||
|
c = a.get('content', '') or ''
|
||||||
|
if any(kw in c for kw in ['主力资金', '资金净流入', '代码', '简称']):
|
||||||
|
continue
|
||||||
|
filtered.append(a)
|
||||||
|
if len(filtered) >= MAX_ARTICLES:
|
||||||
|
break
|
||||||
|
batch = filtered[:MAX_ARTICLES]
|
||||||
print(f" 共{len(all_articles)}篇,送小果分析{len(batch)}篇", flush=True)
|
print(f" 共{len(all_articles)}篇,送小果分析{len(batch)}篇", flush=True)
|
||||||
|
|
||||||
results = call_xiaoguo(batch)
|
results = call_xiaoguo(batch)
|
||||||
@@ -183,13 +203,12 @@ def main():
|
|||||||
conn.close()
|
conn.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
# 合并结果(用索引位置匹配,不依赖标题文本)
|
# 合并结果(用索引位置匹配)
|
||||||
for i, r in enumerate(results):
|
for i, r in enumerate(results):
|
||||||
if i < len(batch):
|
if i < len(batch):
|
||||||
batch[i]["sentiment"] = translate_sentiment(r.get("sentiment", ""))
|
batch[i]["sentiment"] = translate_sentiment(r.get("sentiment", r.get("情感", "")))
|
||||||
batch[i]["summary"] = r.get("summary", "")
|
batch[i]["summary"] = r.get("summary", r.get("摘要", ""))
|
||||||
else:
|
else:
|
||||||
# 超过批次的额外结果
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# 汇总情感
|
# 汇总情感
|
||||||
|
|||||||
Reference in New Issue
Block a user