#!/usr/bin/env python3 """ macro_context_collector.py — 宏观新闻采集器+实时红绿灯(no_agent) 采集来源: 1. akshare.stock_info_global_em() — 东方财富全球宏观新闻(200条实时) 核心功能: A. 采集新闻→macro_raw_news(去重入库) B. 实时红绿灯检测——用关键词规则快速识别HIGH/MEDIUM风险 - 不等LLM,采集完立刻判断 - 检测到风险→写入signal_news+macro_risk_state.json - 盘中的监控cron(每15-25分)读到后立刻调整策略 红绿灯规则: HIGH(单条就触发): 全球巨头+核心产业负面/美联储意外/指数暴跌/地缘冲突 MEDIUM(累计2条触发): 常规宏观事件/板块级/资金面 """ import sys, json, hashlib, os, re from datetime import datetime from pathlib import Path DATA_DIR = Path("/home/hmo/MoFin/data") DB_PATH = DATA_DIR / "mofin.db" STATE_PATH = DATA_DIR / "macro_risk_state.json" # ── 红绿灯 关键词规则 ── # HIGH: 任何一条匹配 → 立即 HIGH 预警 HIGH_PATTERNS = [ # 全球巨头+核心产业 r"苹果[^。]*(?:涨价|降价|推迟|取消|禁|制裁|调查|召回|大跌|暴跌)", r"openai[^。]*(?:推迟|取消|风险|调查|起诉|倒闭|ipo)", r"(?:英伟达|nvidia)[^。]*(?:跌|调查|制裁|推迟|禁令)", r"台积电[^。]*(?:跌|推迟|取消|地震|火灾|禁)", r"特斯拉.*(?:暴跌|召回|调查|破产|禁)", # 美联储/央行意外 r"美联储.*(?:意外|紧急|缩表|风暴|警告|超预期|加息\s*50|降息\s*50|紧急\s*(?:会议|声明))", r"美联储.*(?:利率|决议).*(?:超预期|意外|紧急)", r"fed.*(?:emergency|unexpected|surprise|hike|cut)", # 指数暴跌(需 ≥2% 跌幅或使用更强范围词) r"指数[^。]*?(?:暴跌|熔断|闪崩|重挫)", r"指数[^。]*?(?:跌幅[^。]{0,20}(?:扩大至|达|至|超|为|逾)[^。]*?(?= 2: level = "medium" matched = hits_medium[:5] titles = [f"· {h['title'][:50]}" for h in hits_medium[:5]] summary = f"【中风险】{len(hits_medium)}条预警信号:\n" + "\n".join(titles) return level, matched, summary def write_risk_signal(conn, level, matched, summary): """写入 signal_news + macro_risk_state.json""" now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") sentiment_map = {"high": "宏观-WATCH_HIGH", "medium": "宏观-WATCH_MEDIUM"} sentiment = sentiment_map.get(level, "") # 写入 signal_news articles_json = json.dumps([{"title": a["title"][:80], "summary": a["summary"][:120]} for a in matched], ensure_ascii=False) conn.execute( "INSERT INTO signal_news (signal_id, sector, overall_sentiment, summary, key_articles, searched_stocks, source) VALUES (0, ?, ?, ?, ?, '', ?)", ("宏观", sentiment, summary[:500], articles_json, "macro_watch") ) conn.commit() # 写入状态文件(供监控cron实时读取) state = { "level": level, "signals": [{"sentiment": sentiment, "summary": summary[:300], "key_articles": articles_json, "created_at": now}], "signal_count": len(matched), "created_at": now, "expired": False, "source": "collector_realtime", } STATE_PATH.write_text(json.dumps(state, ensure_ascii=False, indent=2)) def main(): conn = sqlite3.connect(str(DB_PATH)) ensure_tables(conn) # 去重基础 existing = set() for row in conn.execute("SELECT title FROM macro_raw_news ORDER BY id DESC LIMIT 200"): existing.add(title_hash(row[0])) items = fetch_news() if not items: conn.close() return # 去重写入 new_items = [] now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S') for item in items: if not item["title"].strip(): continue h = title_hash(item["title"]) if h in existing: continue existing.add(h) try: conn.execute( "INSERT INTO macro_raw_news (title, summary, url, source_ts, fetched_at) VALUES (?, ?, ?, ?, ?)", (item["title"][:300], item["summary"][:500], item["url"][:500], item["source_ts"][:20], now_str) ) new_items.append(item) except Exception: pass conn.commit() # 红绿灯检测(只针对新采集的) level, matched, summary = "none", [], "" if new_items: level, matched, summary = quick_risk_check(new_items) if level in ("high", "medium"): write_risk_signal(conn, level, matched, summary) conn.close() # no_agent 输出 if new_items: print(f"[MACRO] {len(new_items)}条新宏观新闻") # HIGH风险:输出预警(会推送到XMPP) if level == "high": print(f"⚠️ HIGH风险预警 ({len(matched)}条): {summary[:200]}") elif level == "medium": print(f" MEDIUM风险: {len(matched)}条匹配") if __name__ == "__main__": import sqlite3 main()