Files
MoFin/scripts/hardcode_scanner.py

114 lines
3.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
hardcode_scanner.py — 自成长扫描器
检测脚本中可能已过时的硬编码数值,写审计 JSON 供 system_audit 调用。
扫描规则:
1. 财务类硬编码(cash/金额/仓位)— 应来自 data/*.json
2. 汇率类硬编码(0.86xx, 0.87xx, 0.93等)— 应来自 hk_rate 模块
3. 数字 fallbackreturn X, fallback=X)— 应来自实时数据源
4. 每手股数硬编码(500, 1000 等)— 应来自 Tencent API field[60]
输出:/home/hmo/web-dashboard/data/hardcode_audit.json
"""
import re, ast, json, os, sys
SCAN_DIRS = [
"/home/hmo/.hermes/profiles/position-analyst/scripts",
"/home/hmo/MoFin",
"/home/hmo/web-dashboard",
]
SAFE_FALLBACK_PATTERNS = [
# Known valid fallbacks where network data is genuinely optional
"rate = 0.87", # hk_rate module's own fallback
"retry_for_secs=5", # timeouts
"timeout=5",
"timeout=10",
"timeout=30",
"timeout=60",
"port 5805",
"127.0.0.1:5805",
]
SUSPICIOUS_NUMBERS = [
# (pattern, reason)
(r'return\s+\d{4,}\b', '可能的硬编码现金/金额'),
(r'=\s*\d{5,}\b', '可能的硬编码大额数字'),
(r'0\.8[5-9]\d{1,3}', '可能的硬编码汇率值'),
(r'0\.9[0-5]\d{1,3}', '可能的硬编码汇率值'),
(r'1手\s*[:=]\s*\d{3,}', '可能的每手股数硬编码'),
(r'[><=]\s*0\.[0-9]+', '可能的百分比阈值硬编码'),
(r'仓位\s*[:=]\s*\d{3,}', '可能的仓位金额硬编码'),
(r"['\"](?!http|~|\./|\.\./)/home/[^'\"]+['\"]", '可能的文件路径硬编码(应使用环境变量或配置)'),
# 扩展点 — meta_growth 在此追加新规则
]
def scan_file(filepath):
findings = []
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
except Exception:
return []
lines = content.split('\n')
for i, line in enumerate(lines, 1):
stripped = line.strip()
# Skip comments and empty lines
if not stripped or stripped.startswith('#') or '"""' in stripped:
continue
for pat, reason in SUSPICIOUS_NUMBERS:
if re.search(pat, stripped):
# Check if it's a safe fallback
if any(safe in stripped for safe in SAFE_FALLBACK_PATTERNS):
continue
findings.append({
"file": filepath,
"line": i,
"code": stripped[:120],
"reason": reason,
"suggestion": "考虑从 data/*.json 或 API 实时读取,不使用硬编码值"
})
break # one finding per line
return findings
def main():
all_findings = []
for directory in SCAN_DIRS:
if not os.path.isdir(directory):
continue
for root, _, files in os.walk(directory):
for f in files:
if f.endswith('.py'):
path = os.path.join(root, f)
findings = scan_file(path)
all_findings.extend(findings)
# Only output to stdout for cron
if all_findings:
print(f"[HARDCODE_SCAN] 发现 {len(all_findings)} 处可能硬编码:")
for f in all_findings:
rel = f['file'].replace('/home/hmo/', '')
print(f"{rel}:L{f['line']} {f['reason']}")
print(f" {f['code']}")
print(f"{f['suggestion']}")
else:
print("[HARDCODE_SCAN] 未发现可疑硬编码")
# Write audit log
os.makedirs(os.path.dirname(AUDIT_PATH), exist_ok=True)
json.dump({
"timestamp": __import__('datetime').datetime.now().isoformat(),
"findings": all_findings,
"count": len(all_findings),
}, open(AUDIT_PATH, 'w'), ensure_ascii=False, indent=2)
if __name__ == '__main__':
AUDIT_PATH = "/home/hmo/web-dashboard/data/hardcode_audit.json" if 'AUDIT_PATH' not in dir() else AUDIT_PATH
main()