Files
MoFin/scripts/hardcode_scanner.py
T
知微 b4af8c9927 元自成长层:meta_growth 每周扫描修复模式→自动扩展扫描规则
- scripts/meta_growth.py (NEW): 每周日22:00分析git log中的修复模式,
  识别新问题类型,向 hardcode_scanner 注入新规则
- scripts/hardcode_scanner.py (MODIFIED): 预置扩展点注释,
  meta_growth 可直接在其后追加新规则元组
- docs/SELF_GROWTH_SYSTEM.md (UPDATED): 新增第七章"元自成长层"
- cron: 元自成长-每周 周日22:00 no_agent

设计理念:自成长机制本身必须也是自成长的。
hardcode_scanner 能扫什么不是写死的——meta_growth
会从你的修复习惯中学习新的扫描类别。
2026-06-24 00:10:45 +08:00

111 lines
3.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
hardcode_scanner.py — 自成长扫描器
检测脚本中可能已过时的硬编码数值,写审计 JSON 供 system_audit 调用。
扫描规则:
1. 财务类硬编码(cash/金额/仓位)— 应来自 data/*.json
2. 汇率类硬编码(0.86xx, 0.87xx, 0.93等)— 应来自 hk_rate 模块
3. 数字 fallbackreturn X, fallback=X)— 应来自实时数据源
4. 每手股数硬编码(500, 1000 等)— 应来自 Tencent API field[60]
输出:/home/hmo/web-dashboard/data/hardcode_audit.json
"""
import re, ast, json, os, sys
SCAN_DIRS = [
"/home/hmo/.hermes/profiles/position-analyst/scripts",
"/home/hmo/MoFin",
"/home/hmo/web-dashboard",
]
SAFE_FALLBACK_PATTERNS = [
# Known valid fallbacks where network data is genuinely optional
"rate = 0.87", # hk_rate module's own fallback
"retry_for_secs=5", # timeouts
"timeout=5",
"timeout=10",
"timeout=30",
"timeout=60",
"port 5805",
"127.0.0.1:5805",
]
SUSPICIOUS_NUMBERS = [
# (pattern, reason)
(r'return\s+\d{4,}\b', '可能的硬编码现金/金额'),
(r'=\s*\d{5,}\b', '可能的硬编码大额数字'),
(r'0\.8[5-9]\d{1,3}', '可能的硬编码汇率值'),
(r'0\.9[0-5]\d{1,3}', '可能的硬编码汇率值'),
(r'1手\s*[:=]\s*\d{3,}', '可能的每手股数硬编码'),
# 扩展点 — meta_growth 在此追加新规则
]
def scan_file(filepath):
findings = []
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
except Exception:
return []
lines = content.split('\n')
for i, line in enumerate(lines, 1):
stripped = line.strip()
# Skip comments and empty lines
if not stripped or stripped.startswith('#') or '"""' in stripped:
continue
for pat, reason in SUSPICIOUS_NUMBERS:
if re.search(pat, stripped):
# Check if it's a safe fallback
if any(safe in stripped for safe in SAFE_FALLBACK_PATTERNS):
continue
findings.append({
"file": filepath,
"line": i,
"code": stripped[:120],
"reason": reason,
"suggestion": "考虑从 data/*.json 或 API 实时读取,不使用硬编码值"
})
break # one finding per line
return findings
def main():
all_findings = []
for directory in SCAN_DIRS:
if not os.path.isdir(directory):
continue
for root, _, files in os.walk(directory):
for f in files:
if f.endswith('.py'):
path = os.path.join(root, f)
findings = scan_file(path)
all_findings.extend(findings)
# Only output to stdout for cron
if all_findings:
print(f"[HARDCODE_SCAN] 发现 {len(all_findings)} 处可能硬编码:")
for f in all_findings:
rel = f['file'].replace('/home/hmo/', '')
print(f"{rel}:L{f['line']} {f['reason']}")
print(f" {f['code']}")
print(f"{f['suggestion']}")
else:
print("[HARDCODE_SCAN] 未发现可疑硬编码")
# Write audit log
os.makedirs(os.path.dirname(AUDIT_PATH), exist_ok=True)
json.dump({
"timestamp": __import__('datetime').datetime.now().isoformat(),
"findings": all_findings,
"count": len(all_findings),
}, open(AUDIT_PATH, 'w'), ensure_ascii=False, indent=2)
if __name__ == '__main__':
AUDIT_PATH = "/home/hmo/web-dashboard/data/hardcode_audit.json" if 'AUDIT_PATH' not in dir() else AUDIT_PATH
main()