skills/gen_requirements.py

#!/usr/bin/env python3
"""
自动生成技能 requirements.txt 脚本

分析技能的Python代码，提取第三方依赖并生成requirements.txt

用法:
    python .opencode/skills/gen_requirements.py                    # 分析所有无requirements.txt的技能
    python .opencode/skills/gen_requirements.py image-service    # 只分析指定技能
    python .opencode/skills/gen_requirements.py --dry-run         # 只显示不写入
"""

import os
import re
import sys
from pathlib import Path
from typing import Dict, Set, List
from collections import defaultdict

# 内置模块（排除）
BUILTIN_MODULES = {
    "sys",
    "os",
    "re",
    "json",
    "pathlib",
    "typing",
    "asyncio",
    "argparse",
    "shutil",
    "subprocess",
    "tempfile",
    "io",
    "hashlib",
    "uuid",
    "datetime",
    "zipfile",
    "base64",
    "collections",
    "logging",
    "unittest",
    "dataclasses",
    "contextmanager",
    "copy",
    "traceback",
    "warnings",
    "abc",
    "functools",
    "itertools",
    "operator",
    "random",
    "time",
    "gc",
    "inspect",
    "textwrap",
    "types",
    "ast",
    "linecache",
    "locale",
    "fnmatch",
    "glob",
    "signal",
    "atexit",
    "errno",
    "stat",
    "pathlib",
    "enum",
    "csv",
    "configparser",
    "textwrap",
    "string",
    "sqlite3",
    "urllib",
    "urllib.parse",
    "http",
    "ftplib",
    "smtplib",
    "email",
    "html",
    "xml",
    "webbrowser",
    "math",
    "statistics",
    "random",
    "decimal",
    "fractions",
    "cmath",
    "pprint",
    "doctest",
    "unittest",
    "collections",
    "itertools",
    "functools",
    "operator",
    "multiprocessing",
    "threading",
    "concurrent",
    "asyncio",
    "socket",
    "ssl",
    "select",
    "signal",
    "platform",
    "errno",
    "ctypes",
    "struct",
    "array",
    "weakref",
    "types",
    "copy",
    "pprint",
    "reprlib",
    "abc",
    "contextvars",
    "dataclasses",
    "typing",
    "collections.abc",
    "itertools",
    "functools",
    "operator",
    "pathlib",
    "fileinput",
    "getopt",
    "getpass",
    "getopt",
    "optparse",
    "argparse",
    "cmd",
    "shlex",
    "subprocess",
    "pipes",
    "node",
    "symbol",
    "keyword",
    "lexer",
    "parser",
    "ast",
    "dis",
    "inspect",
    "traceback",
    "gc",
    "weakref",
    "gc",
    "warnings",
    "contextlib",
    "abc",
    "dataclasses",
    "typing",
    "collections.abc",
    "contextvars",
    "numbers",
    "abc",
    "functools",
    "operator",
    "pathlib",
}

# 常见第三方包映射（模块名 -> 包名）
MODULE_TO_PACKAGE = {
    "PIL": "Pillow",
    "cv2": "opencv-python",
    "numpy": "numpy",
    "pandas": "pandas",
    "matplotlib": "matplotlib",
    "seaborn": "seaborn",
    "sklearn": "scikit-learn",
    "torch": "torch",
    "tensorflow": "tensorflow",
    "transformers": "transformers",
    "requests": "requests",
    "httpx": "httpx",
    "aiohttp": "aiohttp",
    "flask": "flask",
    "django": "django",
    "fastapi": "fastapi",
    "pydantic": "pydantic",
    "sqlalchemy": "sqlalchemy",
    "psycopg2": "psycopg2-binary",
    "pymysql": "pymysql",
    "redis": "redis",
    "pymongo": "pymongo",
    "yaml": "pyyaml",
    "docx": "python-docx",
    "pptx": "python-pptx",
    "openpyxl": "openpyxl",
    "xlsxwriter": "xlsxwriter",
    "pdfplumber": "pdfplumber",
    "pypdf": "pypdf",
    "pypdf2": "pypdf2",
    "reportlab": "reportlab",
    "markdown": "markdown",
    "jinja2": "jinja2",
    "celery": "celery",
    "rq": "rq",
    "dotenv": "python-dotenv",
    "tqdm": "tqdm",
    "click": "click",
    "typer": "typer",
    "rich": "rich",
    "colorama": "colorama",
    "pillow": "Pillow",
    "yaml": "pyyaml",
    "edge_tts": "edge-tts",
    "manim": "manim",
    "whisper": "openai-whisper",
    "diffusers": "diffusers",
    "accelerate": "accelerate",
    "torchvision": "torchvision",
    "cv2": "opencv-python",
    "albumentations": "albumentations",
    "soundfile": "soundfile",
    "librosa": "librosa",
    "scipy": "scipy",
    "skimage": "scikit-image",
    "plotly": "plotly",
    "altair": "altair",
    "bokeh": "bokeh",
    "streamlit": "streamlit",
    "dash": "dash",
    "psycopg2": "psycopg2-binary",
    "pymysql": "pymysql",
    "sqlite3": None,  # builtin
    " MySQLdb": None,
    "mysql.connector": "mysql-connector-python",
    "psycopg2": "psycopg2-binary",
    "jinja2": "jinja2",
    "mako": "mako",
    "email": None,  # builtin
    "html": None,  # builtin
    "http": None,  # builtin
    "urllib": None,  # builtin
    "xml": None,  # builtin
    "dbus": "dbus-python",
    "gi": "pygobject",
    "gi.repository": "pygobject",
    "gtk": "pygtk",
    "wx": "wxpython",
    "tkinter": None,  # builtin
    "PyQt5": "PyQt5",
    "PyQt6": "PyQt6",
    "PySide2": "PySide2",
    "PySide6": "PySide6",
    "kivy": "kivy",
    "flask": "flask",
    "bottle": "bottle",
    "cherrypy": "cherrypy",
    "tornado": "tornado",
    "webapp2": "webapp2",
    "falcon": "falcon",
    "sanic": "sanic",
    "starlette": "starlette",
    "fastapi": "fastapi",
    "black": "black",
    "ruff": "ruff",
    "flake8": "flake8",
    "pylint": "pylint",
    "mypy": "mypy",
    "pytest": "pytest",
    "coverage": "coverage",
    "tox": "tox",
    "poetry": "poetry",
    "pip": "pip",
    "setuptools": "setuptools",
    "wheel": "wheel",
    "twine": "twine",
    "build": "build",
    "jupytext": "jupytext",
    "nbformat": "nbformat",
    "nbconvert": "nbconvert",
    "jupyter": "jupyter",
    "ipykernel": "ipykernel",
    "ipywidgets": "ipywidgets",
    "widgetsnbextension": "widgetsnbextension",
    "ipyleaflet": "ipyleaflet",
    "plotly": "plotly",
    "altair": "altair",
    "bqplot": "bqplot",
    "ipyvolume": "ipyvolume",
    "pythreejs": "pythreejs",
}


def extract_imports(skill_dir: Path) -> Set[str]:
    """从技能目录提取所有第三方import"""
    imports = set()

    for py_file in skill_dir.rglob("*.py"):
        try:
            content = py_file.read_text(encoding="utf-8", errors="ignore")

            # 匹配 "from xxx import" 和 "import xxx"
            for line in content.splitlines():
                line = line.strip()

                # from module import xxx
                match = re.match(r"^from\s+([a-zA-Z_][a-zA-Z0-9_.]*)", line)
                if match:
                    module = match.group(1).split(".")[0]
                    imports.add(module)

                # import xxx
                match = re.match(r"^import\s+([a-zA-Z_][a-zA-Z0-9_.]*)", line)
                if match:
                    module = match.group(1).split(" as ")[0].strip()
                    imports.add(module)
        except Exception as e:
            print(f"  Warning: Could not read {py_file}: {e}", file=sys.stderr)

    return imports


def filter_third_party(imports: Set[str]) -> Set[str]:
    """过滤出第三方包"""
    third_party = set()

    for imp in imports:
        # 排除内置模块
        if imp.lower() in BUILTIN_MODULES:
            continue
        # 排除以_开头的私有模块
        if imp.startswith("_"):
            continue
        # 排除本地模块（.开头的目录或本地文件）
        if imp.startswith("."):
            continue

        third_party.add(imp)

    return third_party


def resolve_package_name(module: str) -> str:
    """将模块名转换为包名"""
    # 直接映射
    if module in MODULE_TO_PACKAGE:
        return MODULE_TO_PACKAGE[module]

    # 尝试将下划线转为横线（common -> common-mark）
    package = module.replace("_", "-")

    return package


def generate_requirements(skill_dir: Path, dry_run: bool = False) -> bool:
    """为单个技能生成requirements.txt"""
    skill_name = skill_dir.name
    req_file = skill_dir / "requirements.txt"

    # 检查是否已有
    if req_file.exists():
        print(f"[=] {skill_name}: already has requirements.txt, skipping")
        return True

    # 提取依赖
    imports = extract_imports(skill_dir)
    third_party = filter_third_party(imports)

    if not third_party:
        print(f"[-] {skill_name}: no third-party dependencies found")
        return True

    # 转换为包名
    packages = set()
    for module in third_party:
        pkg = resolve_package_name(module)
        if pkg:
            packages.add(pkg)

    if not packages:
        print(f"[-] {skill_name}: no third-party packages to write")
        return True

    # 生成内容
    content_lines = [f"# {skill_name} - dependencies"]
    for pkg in sorted(packages):
        content_lines.append(f"{pkg}>=0.0.1")

    content = "\n".join(content_lines) + "\n"

    if dry_run:
        print(f"[dry-run] {skill_name}: would create:")
        for line in content_lines[1:]:
            print(f"  {line}")
        return True

    # 写入文件
    req_file.write_text(content, encoding="utf-8")
    print(f"[+] {skill_name}: created requirements.txt with {len(packages)} packages")
    return True


def main():
    import argparse

    parser = argparse.ArgumentParser(description="自动生成技能requirements.txt")
    parser.add_argument("skill", nargs="*", help="技能名称（可选，多个）")
    parser.add_argument("--dry-run", action="store_true", help="只显示不写入")
    parser.add_argument("--path", default=".", help="工作目录")
    args = parser.parse_args()

    base_path = Path(args.path).resolve()

    # 尝试多种可能的技能目录位置
    possible_paths = [
        base_path / ".opencode" / "skills",
        base_path / "skills",
        base_path,
    ]

    skills_dir = None
    for path in possible_paths:
        if path.exists() and path.is_dir():
            skills_dir = path
            break

    if not skills_dir:
        print(
            f"Error: skills directory not found in any of: {possible_paths}",
            file=sys.stderr,
        )
        sys.exit(1)

    print(f"Using skills directory: {skills_dir}")

    # 确定要处理的技能
    if args.skill:
        skill_dirs = []
        for s in args.skill:
            skill_dir = skills_dir / s
            if not skill_dir.exists():
                print(f"Error: skill not found: {s}", file=sys.stderr)
                sys.exit(1)
            skill_dirs.append(skill_dir)
    else:
        # 所有没有requirements.txt的技能
        skill_dirs = []
        for d in sorted(skills_dir.iterdir()):
            if d.is_dir() and not (d / "requirements.txt").exists():
                skill_dirs.append(d)

    print(f"Found {len(skill_dirs)} skills without requirements.txt")
    print()

    # 生成
    success = 0
    for skill_dir in skill_dirs:
        if generate_requirements(skill_dir, args.dry_run):
            success += 1

    print()
    print(f"Processed {success}/{len(skill_dirs)} skills")

    if args.dry_run:
        print("\n(dry-run mode - no files were written)")


if __name__ == "__main__":
    main()