MoFin/scripts/hardcode_scanner.py

#!/usr/bin/env python3
"""
hardcode_scanner.py — 自成长扫描器

检测脚本中可能已过时的硬编码数值，写审计 JSON 供 system_audit 调用。

扫描规则：
1. 财务类硬编码（cash/金额/仓位）— 应来自 data/*.json
2. 汇率类硬编码（0.86xx, 0.87xx, 0.93等）— 应来自 hk_rate 模块
3. 数字 fallback（return X, fallback=X）— 应来自实时数据源
4. 每手股数硬编码（500, 1000 等）— 应来自 Tencent API field[60]

输出：/home/hmo/web-dashboard/data/hardcode_audit.json
"""
import re, ast, json, os, sys

SCAN_DIRS = [
    "/home/hmo/.hermes/profiles/position-analyst/scripts",
    "/home/hmo/MoFin",
    "/home/hmo/web-dashboard",
]

SAFE_FALLBACK_PATTERNS = [
    # Known valid fallbacks where network data is genuinely optional
    "rate = 0.87",       # hk_rate module's own fallback
    "retry_for_secs=5",  # timeouts
    "timeout=5",
    "timeout=10",
    "timeout=30",
    "timeout=60",
    "port 5805",
    "127.0.0.1:5805",
]

SUSPICIOUS_NUMBERS = [
    # (pattern, reason)
    (r'return\s+\d{4,}\b', '可能的硬编码现金/金额'),
    (r'=\s*\d{5,}\b', '可能的硬编码大额数字'),
    (r'0\.8[5-9]\d{1,3}', '可能的硬编码汇率值'),
    (r'0\.9[0-5]\d{1,3}', '可能的硬编码汇率值'),
    (r'1手\s*[:=]\s*\d{3,}', '可能的每手股数硬编码'),
        (r'[><=]\s*0\.[0-9]+', '可能的百分比阈值硬编码'),
        (r'仓位\s*[:=]\s*\d{3,}', '可能的仓位金额硬编码'),
        (r"['\"](?!http|~|\./|\.\./)/home/[^'\"]+['\"]", '可能的文件路径硬编码（应使用环境变量或配置）'),
    # 扩展点 — meta_growth 在此追加新规则
]

def scan_file(filepath):
    findings = []
    try:
        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
            content = f.read()
    except Exception:
        return []

    lines = content.split('\n')
    for i, line in enumerate(lines, 1):
        stripped = line.strip()
        # Skip comments and empty lines
        if not stripped or stripped.startswith('#') or '"""' in stripped:
            continue

        for pat, reason in SUSPICIOUS_NUMBERS:
            if re.search(pat, stripped):
                # Check if it's a safe fallback
                if any(safe in stripped for safe in SAFE_FALLBACK_PATTERNS):
                    continue
                findings.append({
                    "file": filepath,
                    "line": i,
                    "code": stripped[:120],
                    "reason": reason,
                    "suggestion": "考虑从 data/*.json 或 API 实时读取，不使用硬编码值"
                })
                break  # one finding per line
    return findings


def main():
    all_findings = []
    for directory in SCAN_DIRS:
        if not os.path.isdir(directory):
            continue
        for root, _, files in os.walk(directory):
            for f in files:
                if f.endswith('.py'):
                    path = os.path.join(root, f)
                    findings = scan_file(path)
                    all_findings.extend(findings)

    # Only output to stdout for cron
    if all_findings:
        print(f"[HARDCODE_SCAN] 发现 {len(all_findings)} 处可能硬编码：")
        for f in all_findings:
            rel = f['file'].replace('/home/hmo/', '')
            print(f"  ⚠ {rel}:L{f['line']} {f['reason']}")
            print(f"    {f['code']}")
            print(f"    → {f['suggestion']}")
    else:
        print("[HARDCODE_SCAN] 未发现可疑硬编码")

    # Write audit log
    os.makedirs(os.path.dirname(AUDIT_PATH), exist_ok=True)
    json.dump({
        "timestamp": __import__('datetime').datetime.now().isoformat(),
        "findings": all_findings,
        "count": len(all_findings),
    }, open(AUDIT_PATH, 'w'), ensure_ascii=False, indent=2)


if __name__ == '__main__':
    AUDIT_PATH = "/home/hmo/web-dashboard/data/hardcode_audit.json" if 'AUDIT_PATH' not in dir() else AUDIT_PATH
    main()