Add ai-code-project-template repo files.

2025-08-23 06:09:26 -07:00
parent 9321e54bce
commit e725ecf942
76 changed files with 17830 additions and 8 deletions
--- a/tools/README.md
+++ b/tools/README.md
@@ -0,0 +1,15 @@
+# Tools Directory
+
+This directory contains utilities for the recipe-tool project.
+
+## Core Utilities
+
+### AI Context Generation
+
+- `build_ai_context_files.py` - Main orchestrator for collecting project files into AI context documents
+- `collect_files.py` - Core utility for pattern-based file collection with glob support
+- `build_git_collector_files.py` - Downloads external documentation using git-collector
+
+### Other Tools
+
+- `list_by_filesize.py` - List files sorted by size for analysis
--- a/tools/build_ai_context_files.py
+++ b/tools/build_ai_context_files.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+"""
+Build AI Context Files Script
+
+This script imports the collect_files module and calls its functions directly
+ to generate Markdown files containing code and recipe files for AI context.
+
+This script should be placed at:
+[repo_root]/tools/build_ai_context_files.py
+
+And will be run from the repository root.
+"""
+
+import argparse
+import os
+import sys
+import datetime
+import re
+import platform
+
+OUTPUT_DIR = "ai_context/generated"
+
+# We're running from repo root, so that's our current directory
+global repo_root
+repo_root = os.getcwd()
+
+# Add the tools directory to the Python path
+tools_dir = os.path.join(repo_root, "tools")
+sys.path.append(tools_dir)
+
+# Import the collect_files module
+try:
+    import collect_files  # type: ignore
+except ImportError:
+    print(f"Error: Could not import collect_files module from {tools_dir}")
+    print("Make sure this script is run from the repository root.")
+    sys.exit(1)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Build AI Context Files script that collects project files into markdown."
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Always overwrite files, even if content unchanged",
+    )
+    return parser.parse_args()
+
+
+def ensure_directory_exists(file_path) -> None:
+    """Create directory for file if it doesn't exist."""
+    directory = os.path.dirname(file_path)
+    if directory and not os.path.exists(directory):
+        os.makedirs(directory)
+        print(f"Created directory: {directory}")
+
+
+def strip_date_line(text: str) -> str:
+    """Remove any '**Date:** …' line so we can compare content ignoring timestamps."""
+    # Remove the entire line that begins with **Date:**
+    return re.sub(r"^\*\*Date:\*\*.*\n?", "", text, flags=re.MULTILINE)
+
+
+def build_context_files(force=False) -> None:
+    # Define the tasks to run
+    tasks = []
+
+    # Execute each task
+    for task in tasks:
+        patterns = task["patterns"]
+        output = task["output"]
+        exclude = task["exclude"]
+        include = task["include"]
+
+        # Ensure the output directory exists
+        ensure_directory_exists(output)
+
+        print(f"Collecting files for patterns: {patterns}")
+        print(f"Excluding patterns: {exclude}")
+        print(f"Including patterns: {include}")
+
+        # Collect the files
+        files = collect_files.collect_files(patterns, exclude, include)
+        print(f"Found {len(files)} files.")
+
+        # Build header
+        now = datetime.datetime.now()
+        # Use appropriate format specifiers based on the platform
+        if platform.system() == "Windows":
+            date_str = now.strftime("%#m/%#d/%Y, %#I:%M:%S %p")  # Windows non-padding format
+        else:
+            date_str = now.strftime("%-m/%-d/%Y, %-I:%M:%S %p")  # Unix non-padding format
+        header_lines = [
+            f"# {' | '.join(patterns)}",
+            "",
+            "[collect-files]",
+            "",
+            f"**Search:** {patterns}",
+            f"**Exclude:** {exclude}",
+            f"**Include:** {include}",
+            f"**Date:** {date_str}",
+            f"**Files:** {len(files)}\n\n",
+        ]
+        header = "\n".join(header_lines)
+
+        # Build content body
+        content_body = ""
+        for file in files:
+            rel_path = os.path.relpath(file)
+            content_body += f"=== File: {rel_path} ===\n"
+            try:
+                with open(file, "r", encoding="utf-8") as f:
+                    content_body += f.read()
+            except Exception as e:
+                content_body += f"[ERROR reading file: {e}]\n"
+            content_body += "\n\n"
+
+        new_content = header + content_body
+
+        # If file exists and we're not forcing, compare (ignoring only the date)
+        if os.path.exists(output) and not force:
+            try:
+                with open(output, "r", encoding="utf-8") as f:
+                    existing_content = f.read()
+                # Strip out date lines from both
+                existing_sanitized = strip_date_line(existing_content).strip()
+                new_sanitized = strip_date_line(new_content).strip()
+                if existing_sanitized == new_sanitized:
+                    print(f"No substantive changes in {output}, skipping write.")
+                    continue
+            except Exception as e:
+                print(f"Warning: unable to compare existing file {output}: {e}")
+
+        # Write the file (new or forced update)
+        with open(output, "w", encoding="utf-8") as outfile:
+            outfile.write(new_content)
+        print(f"Written to {output}")
+
+
+def main():
+    args = parse_args()
+
+    # Verify we're in the repository root by checking for key directories/files
+    required_paths = [os.path.join(repo_root, "tools", "collect_files.py")]
+
+    missing_paths = [path for path in required_paths if not os.path.exists(path)]
+    if missing_paths:
+        print("Warning: This script should be run from the repository root.")
+        print("The following expected paths were not found:")
+        for path in missing_paths:
+            print(f"  - {path}")
+        response = input("Continue anyway? (y/n): ")
+        if response.lower() != "y":
+            sys.exit(1)
+
+    build_context_files(force=args.force)
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/build_git_collector_files.py
+++ b/tools/build_git_collector_files.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""
+Runs git-collector → falls back to npx automatically (with --yes) →
+shows guidance only if everything fails.
+"""
+
+from shutil import which
+import subprocess
+import sys
+import os
+from textwrap import dedent
+
+OUTPUT_DIR = "ai_context/git_collector"
+
+
+# Debug function - can be removed or commented out when fixed
+def print_debug_info():
+    print("===== DEBUG INFO =====")
+    print(f"PATH: {os.environ.get('PATH', '')}")
+    npx_location = which("npx")
+    print(f"NPX location: {npx_location}")
+    print("======================")
+
+
+def guidance() -> str:
+    return dedent(
+        """\
+        ❌  git-collector could not be run.
+
+        Fixes:
+          • Global install ……  npm i -g git-collector
+          • Or rely on npx (no install).
+
+        Then re-run:  make ai-context-files
+        """
+    )
+
+
+def run(cmd: list[str], capture: bool = True) -> subprocess.CompletedProcess:
+    """Run a command, optionally capturing its output."""
+    print("→", " ".join(cmd))
+    return subprocess.run(
+        cmd,
+        text=True,
+        capture_output=capture,
+    )
+
+
+def main() -> None:
+    root = sys.argv[1] if len(sys.argv) > 1 else OUTPUT_DIR
+
+    # Uncomment to see debug info when needed
+    # print_debug_info()
+
+    # Preferred runners in order
+    runners: list[list[str]] = []
+    git_collecto_path = which("git-collector")
+    if git_collecto_path:
+        runners.append([git_collecto_path])
+    pnpm_path = which("pnpm")
+    if pnpm_path:
+        try:
+            # Check if git-collector is available via pnpm by running a simple list command
+            # Redirect output to avoid cluttering the console
+            result = subprocess.run(
+                [pnpm_path, "list", "git-collector"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                check=False,
+            )
+
+            # If git-collector is in the output, it's installed via pnpm
+            if "git-collector" in result.stdout and "ERR" not in result.stdout:
+                runners.append([pnpm_path, "exec", "git-collector"])
+        except Exception:
+            # If any error occurs during check, move to next option
+            pass
+
+    # For npx, we need to try multiple approaches
+    # First, check if npx is in the PATH
+    npx_path = which("npx")
+    if npx_path:
+        # Use the full path to npx if we can find it
+        runners.append([npx_path, "--yes", "git-collector"])
+    else:
+        # Fallback to just the command name as a last resort
+        runners.append(["npx", "--yes", "git-collector"])
+
+    if not runners:
+        sys.exit(guidance())
+
+    last_result = None
+    for r in runners:
+        # Capture output for git-collector / pnpm, but stream for npx (shows progress)
+        is_npx = "npx" in r[0].lower() if isinstance(r[0], str) else False
+        is_git_collector = "git-collector" in r[0].lower() if isinstance(r[0], str) else False
+        capture = not (is_npx or is_git_collector)
+
+        print(f"Executing command: {' '.join(r + [root, '--update'])}")
+        try:
+            last_result = run(r + [root, "--update"], capture=capture)
+        except Exception as e:
+            print(f"Error executing command: {e}")
+            continue
+        if last_result.returncode == 0:
+            return  # success 🎉
+        if r[:2] == ["pnpm", "exec"]:
+            print("pnpm run not supported — falling back to npx …")
+
+    # All attempts failed → print stderr (if any) and guidance
+    if last_result and last_result.stderr:
+        print(last_result.stderr.strip(), file=sys.stderr)
+    sys.exit(guidance())
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/clean_wsl_files.py
+++ b/tools/clean_wsl_files.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+"""
+Clean up WSL-related files that accidentally get created in the repository.
+
+This tool removes Windows Subsystem for Linux (WSL) metadata files that can
+clutter the repository, including Zone.Identifier and endpoint DLP files.
+"""
+
+import sys
+from pathlib import Path
+
+
+def find_wsl_files(root_dir: Path) -> list[Path]:
+    """
+    Find all WSL-related files in the directory tree.
+
+    Args:
+        root_dir: Root directory to search from
+
+    Returns:
+        List of paths to WSL-related files
+    """
+    patterns = ["*:Zone.Identifier", "*:sec.endpointdlp"]
+
+    wsl_files = []
+    for pattern in patterns:
+        wsl_files.extend(root_dir.rglob(pattern))
+
+    return wsl_files
+
+
+def clean_wsl_files(root_dir: Path, dry_run: bool = False) -> int:
+    """
+    Remove WSL-related files from the directory tree.
+
+    Args:
+        root_dir: Root directory to clean
+        dry_run: If True, only show what would be deleted without actually deleting
+
+    Returns:
+        Number of files cleaned
+    """
+    wsl_files = find_wsl_files(root_dir)
+
+    if not wsl_files:
+        print("No WSL-related files found.")
+        return 0
+
+    print(f"Found {len(wsl_files)} WSL-related file(s):")
+
+    for file_path in wsl_files:
+        rel_path = file_path.relative_to(root_dir)
+        if dry_run:
+            print(f"  [DRY RUN] Would remove: {rel_path}")
+        else:
+            try:
+                file_path.unlink()
+                print(f"  Removed: {rel_path}")
+            except Exception as e:
+                print(f"  ERROR removing {rel_path}: {e}", file=sys.stderr)
+
+    if dry_run:
+        print(f"\nDry run complete. Would have removed {len(wsl_files)} file(s).")
+    else:
+        print(f"\nCleaned {len(wsl_files)} WSL-related file(s).")
+
+    return len(wsl_files)
+
+
+def main():
+    """Main entry point for the script."""
+    import argparse
+    import subprocess
+
+    parser = argparse.ArgumentParser(description="Clean up WSL-related files from the repository")
+    parser.add_argument("--dry-run", action="store_true", help="Show what would be deleted without actually deleting")
+    parser.add_argument("--path", type=Path, default=Path.cwd(), help="Path to clean (defaults to current directory)")
+
+    args = parser.parse_args()
+
+    if not args.path.exists():
+        print(f"Error: Path {args.path} does not exist", file=sys.stderr)
+        sys.exit(1)
+
+    if not args.path.is_dir():
+        print(f"Error: Path {args.path} is not a directory", file=sys.stderr)
+        sys.exit(1)
+
+    # Find git root if we're in a git repository
+    try:
+        git_root = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], cwd=args.path, text=True).strip()
+        root_dir = Path(git_root)
+        print(f"Cleaning WSL files from git repository: {root_dir}")
+    except subprocess.CalledProcessError:
+        root_dir = args.path
+        print(f"Cleaning WSL files from directory: {root_dir}")
+
+    print()
+
+    clean_wsl_files(root_dir, dry_run=args.dry_run)
+
+    # Always exit with status 0 (success) - finding no files is not an error
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/collect_files.py
+++ b/tools/collect_files.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+"""
+Collect Files Utility
+
+Recursively scans the specified file/directory patterns and outputs a single Markdown
+document containing each file's relative path and its content.
+
+This tool helps aggregate source code files for analysis or documentation purposes.
+
+Usage examples:
+  # Collect all Python files in the current directory:
+  python collect_files.py *.py > my_python_files.md
+
+  # Collect all files in the 'output' directory:
+  python collect_files.py output > my_output_dir_files.md
+
+  # Collect specific files, excluding 'utils' and 'logs', but including Markdown files from 'utils':
+  python collect_files.py *.py --exclude "utils,logs,__pycache__,*.pyc" --include "utils/*.md" > my_output.md
+"""
+
+import argparse
+import datetime
+import fnmatch
+import glob
+import os
+import pathlib
+from typing import List, Optional, Set, Tuple
+
+# Default exclude patterns: common directories and binary files to ignore.
+DEFAULT_EXCLUDE = [".venv", "node_modules", "*.lock", ".git", "__pycache__", "*.pyc", "*.ruff_cache", "logs", "output"]
+
+
+def parse_patterns(pattern_str: str) -> List[str]:
+    """Splits a comma-separated string into a list of stripped patterns."""
+    return [p.strip() for p in pattern_str.split(",") if p.strip()]
+
+
+def resolve_pattern(pattern: str) -> str:
+    """
+    Resolves a pattern that might contain relative path navigation.
+    Returns the absolute path of the pattern.
+    """
+    # Convert the pattern to a Path object
+    pattern_path = pathlib.Path(pattern)
+
+    # Check if the pattern is absolute or contains relative navigation
+    if os.path.isabs(pattern) or ".." in pattern:
+        # Resolve to absolute path
+        return str(pattern_path.resolve())
+
+    # For simple patterns without navigation, return as is
+    return pattern
+
+
+def match_pattern(path: str, pattern: str, component_matching=False) -> bool:
+    """
+    Centralized pattern matching logic.
+
+    Args:
+        path: File path to match against
+        pattern: Pattern to match
+        component_matching: If True, matches individual path components
+                           (used primarily for exclude patterns)
+
+    Returns:
+        True if path matches the pattern
+    """
+    # For simple exclude-style component matching
+    if component_matching:
+        parts = os.path.normpath(path).split(os.sep)
+        for part in parts:
+            if fnmatch.fnmatch(part, pattern):
+                return True
+        return False
+
+    # Convert paths to absolute for consistent comparison
+    abs_path = os.path.abspath(path)
+
+    # Handle relative path navigation in the pattern
+    if ".." in pattern or "/" in pattern or "\\" in pattern:
+        # If pattern contains path navigation, resolve it to an absolute path
+        resolved_pattern = resolve_pattern(pattern)
+
+        # Check if this is a directory pattern with a wildcard
+        if "*" in resolved_pattern:
+            # Get the directory part of the pattern
+            pattern_dir = os.path.dirname(resolved_pattern)
+            # Get the filename pattern
+            pattern_file = os.path.basename(resolved_pattern)
+
+            # Check if the file is in or under the pattern directory
+            file_dir = os.path.dirname(abs_path)
+            if file_dir.startswith(pattern_dir):
+                # Match the filename against the pattern
+                return fnmatch.fnmatch(os.path.basename(abs_path), pattern_file)
+            return False  # Not under the pattern directory
+        else:
+            # Direct file match
+            return abs_path == resolved_pattern or fnmatch.fnmatch(abs_path, resolved_pattern)
+    else:
+        # Regular pattern without navigation, use relative path matching
+        return fnmatch.fnmatch(path, pattern)
+
+
+def should_exclude(path: str, exclude_patterns: List[str]) -> bool:
+    """
+    Returns True if any component of the path matches an exclude pattern.
+    """
+    for pattern in exclude_patterns:
+        if match_pattern(path, pattern, component_matching=True):
+            return True
+    return False
+
+
+def should_include(path: str, include_patterns: List[str]) -> bool:
+    """
+    Returns True if the path matches any of the include patterns.
+    Handles relative path navigation in include patterns.
+    """
+    for pattern in include_patterns:
+        if match_pattern(path, pattern):
+            return True
+    return False
+
+
+def collect_files(patterns: List[str], exclude_patterns: List[str], include_patterns: List[str]) -> List[str]:
+    """
+    Collects file paths matching the given patterns, applying exclusion first.
+    Files that match an include pattern are added back in.
+
+    Returns a sorted list of absolute file paths.
+    """
+    collected = set()
+
+    # Process included files with simple filenames or relative paths
+    for pattern in include_patterns:
+        # Check for files in the current directory first
+        direct_matches = glob.glob(pattern, recursive=True)
+        for match in direct_matches:
+            if os.path.isfile(match):
+                collected.add(os.path.abspath(match))
+
+        # Then check for relative paths
+        if ".." in pattern or os.path.isabs(pattern):
+            resolved_pattern = resolve_pattern(pattern)
+
+            # Direct file inclusion
+            if "*" not in resolved_pattern and os.path.isfile(resolved_pattern):
+                collected.add(resolved_pattern)
+            else:
+                # Pattern with wildcards
+                directory = os.path.dirname(resolved_pattern)
+                if os.path.exists(directory):
+                    filename_pattern = os.path.basename(resolved_pattern)
+                    for root, _, files in os.walk(directory):
+                        for file in files:
+                            if fnmatch.fnmatch(file, filename_pattern):
+                                full_path = os.path.join(root, file)
+                                collected.add(os.path.abspath(full_path))
+
+    # Process the main patterns
+    for pattern in patterns:
+        matches = glob.glob(pattern, recursive=True)
+        for path in matches:
+            if os.path.isfile(path):
+                process_file(path, collected, exclude_patterns, include_patterns)
+            elif os.path.isdir(path):
+                process_directory(path, collected, exclude_patterns, include_patterns)
+
+    return sorted(collected)
+
+
+def process_file(file_path: str, collected: Set[str], exclude_patterns: List[str], include_patterns: List[str]) -> None:
+    """Process a single file"""
+    abs_path = os.path.abspath(file_path)
+    rel_path = os.path.relpath(file_path)
+
+    # Skip if excluded and not specifically included
+    if should_exclude(rel_path, exclude_patterns) and not should_include(rel_path, include_patterns):
+        return
+
+    collected.add(abs_path)
+
+
+def process_directory(
+    dir_path: str, collected: Set[str], exclude_patterns: List[str], include_patterns: List[str]
+) -> None:
+    """Process a directory recursively"""
+    for root, dirs, files in os.walk(dir_path):
+        # Filter directories based on exclude patterns, but respect include patterns
+        dirs[:] = [
+            d
+            for d in dirs
+            if not should_exclude(os.path.join(root, d), exclude_patterns)
+            or should_include(os.path.join(root, d), include_patterns)
+        ]
+
+        # Process each file in the directory
+        for file in files:
+            full_path = os.path.join(root, file)
+            process_file(full_path, collected, exclude_patterns, include_patterns)
+
+
+def read_file(file_path: str) -> Tuple[str, Optional[str]]:
+    """
+    Read a file and return its content.
+
+    Returns:
+        Tuple of (content, error_message)
+    """
+    # Check if file is likely binary
+    try:
+        with open(file_path, "rb") as f:
+            chunk = f.read(1024)
+            if b"\0" in chunk:  # Simple binary check
+                return "[Binary file not displayed]", None
+
+        # If not binary, read as text
+        with open(file_path, "r", encoding="utf-8") as f:
+            return f.read(), None
+    except UnicodeDecodeError:
+        # Handle encoding issues
+        return "[File contains non-UTF-8 characters]", None
+    except Exception as e:
+        return "", f"[ERROR reading file: {e}]"
+
+
+def format_output(
+    file_paths: List[str],
+    format_type: str,
+    exclude_patterns: List[str],
+    include_patterns: List[str],
+    patterns: List[str],
+) -> str:
+    """
+    Format the collected files according to the output format.
+
+    Args:
+        file_paths: List of absolute file paths to format
+        format_type: Output format type ("markdown" or "plain")
+        exclude_patterns: List of exclusion patterns (for info)
+        include_patterns: List of inclusion patterns (for info)
+        patterns: Original input patterns (for info)
+
+    Returns:
+        Formatted output string
+    """
+    output_lines = []
+
+    # Add metadata header
+    now = datetime.datetime.now()
+    date_str = now.strftime("%-m/%-d/%Y, %-I:%M:%S %p")
+    output_lines.append(f"# {patterns}")
+    output_lines.append("")
+    output_lines.append("[collect-files]")
+    output_lines.append("")
+    output_lines.append(f"**Search:** {patterns}")
+    output_lines.append(f"**Exclude:** {exclude_patterns}")
+    output_lines.append(f"**Include:** {include_patterns}")
+    output_lines.append(f"**Date:** {date_str}")
+    output_lines.append(f"**Files:** {len(file_paths)}\n\n")
+
+    # Process each file
+    for file_path in file_paths:
+        rel_path = os.path.relpath(file_path)
+
+        # Add file header based on format
+        if format_type == "markdown":
+            output_lines.append(f"### File: {rel_path}")
+            output_lines.append("```")
+        else:
+            output_lines.append(f"=== File: {rel_path} ===")
+
+        # Read and add file content
+        content, error = read_file(file_path)
+        if error:
+            output_lines.append(error)
+        else:
+            output_lines.append(content)
+
+        # Add file footer based on format
+        if format_type == "markdown":
+            output_lines.append("```")
+
+        # Add separator between files
+        output_lines.append("\n")
+
+    return "\n".join(output_lines)
+
+
+def main() -> None:
+    """Main function"""
+    parser = argparse.ArgumentParser(
+        description="Recursively collect files matching the given patterns and output a document with file names and content."
+    )
+    parser.add_argument("patterns", nargs="+", help="File and/or directory patterns to collect (e.g. *.py or output)")
+    parser.add_argument(
+        "--exclude",
+        type=str,
+        default="",
+        help="Comma-separated patterns to exclude (will be combined with default excludes: "
+        + ",".join(DEFAULT_EXCLUDE)
+        + ")",
+    )
+    parser.add_argument(
+        "--include", type=str, default="", help="Comma-separated patterns to include (overrides excludes if matched)"
+    )
+    parser.add_argument(
+        "--format", type=str, choices=["markdown", "plain"], default="plain", help="Output format (default: plain)"
+    )
+    args = parser.parse_args()
+
+    # Parse pattern arguments and combine with default excludes
+    user_exclude_patterns = parse_patterns(args.exclude)
+    exclude_patterns = DEFAULT_EXCLUDE + user_exclude_patterns
+
+    include_patterns = parse_patterns(args.include) if args.include else []
+
+    # Collect files
+    patterns = args.patterns
+    files = collect_files(patterns, exclude_patterns, include_patterns)
+
+    # Format and print output
+    output = format_output(files, args.format, exclude_patterns, include_patterns, patterns)
+    print(output)
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/create_worktree.py
+++ b/tools/create_worktree.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+"""
+Create a git worktree for parallel development with efficient data copying.
+
+Usage:
+    python tools/create_worktree.py <branch-name>
+
+This will:
+1. Create a worktree in ../repo-name-branch-name/
+2. Copy .data/ directory contents efficiently using rsync
+3. Output a cd command to navigate to the new worktree
+"""
+
+import subprocess
+import sys
+from pathlib import Path
+
+
+def main():
+    # Get branch name from arguments
+    if len(sys.argv) != 2:
+        print("Usage: python tools/create_worktree.py <branch-name>")
+        sys.exit(1)
+
+    branch_name = sys.argv[1]
+
+    # Get current repo path and name
+    current_path = Path.cwd()
+    repo_name = current_path.name
+
+    # Build worktree path
+    worktree_name = f"{repo_name}-{branch_name}"
+    worktree_path = current_path.parent / worktree_name
+
+    # Create the worktree
+    print(f"Creating worktree at {worktree_path}...")
+    try:
+        # Check if branch exists locally
+        result = subprocess.run(["git", "rev-parse", "--verify", branch_name], capture_output=True, text=True)
+
+        if result.returncode == 0:
+            # Branch exists, use it
+            subprocess.run(["git", "worktree", "add", str(worktree_path), branch_name], check=True)
+        else:
+            # Branch doesn't exist, create it
+            subprocess.run(["git", "worktree", "add", "-b", branch_name, str(worktree_path)], check=True)
+            print(f"Created new branch: {branch_name}")
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to create worktree: {e}")
+        sys.exit(1)
+
+    # Copy .data directory if it exists
+    data_dir = current_path / ".data"
+    if data_dir.exists() and data_dir.is_dir():
+        print("\nCopying .data directory (this may take a moment)...")
+        target_data_dir = worktree_path / ".data"
+
+        try:
+            # Use rsync for efficient copying with progress
+            subprocess.run(
+                [
+                    "rsync",
+                    "-av",  # archive mode with verbose
+                    "--progress",  # show progress
+                    f"{data_dir}/",  # trailing slash to copy contents
+                    f"{target_data_dir}/",
+                ],
+                check=True,
+            )
+            print("Data copy complete!")
+        except subprocess.CalledProcessError as e:
+            print(f"Warning: Failed to copy .data directory: {e}")
+            print("You may need to copy it manually or use cp instead of rsync")
+        except FileNotFoundError:
+            # rsync not available, fallback to cp
+            print("rsync not found, using cp instead...")
+            try:
+                subprocess.run(["cp", "-r", str(data_dir), str(worktree_path)], check=True)
+                print("Data copy complete!")
+            except subprocess.CalledProcessError as e:
+                print(f"Warning: Failed to copy .data directory: {e}")
+
+    # Output the cd command
+    print("\n✓ Worktree created successfully!")
+    print("\nTo navigate to your new worktree, run:")
+    print(f"cd {worktree_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/list_by_filesize.py
+++ b/tools/list_by_filesize.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+import os
+import sys
+
+
+def get_file_sizes(directory):
+    """
+    Recursively get all files in the directory tree and their sizes.
+    Returns a list of tuples (file_path, size_in_bytes).
+    """
+    file_sizes = []
+
+    # Walk through the directory tree
+    for dirpath, dirnames, filenames in os.walk(directory):
+        for filename in filenames:
+            # Get the full path of the file
+            file_path = os.path.join(dirpath, filename)
+
+            # Get the file size if it's a file (not a symbolic link)
+            if os.path.isfile(file_path) and not os.path.islink(file_path):
+                try:
+                    size = os.path.getsize(file_path)
+                    file_sizes.append((file_path, size))
+                except (OSError, FileNotFoundError):
+                    # Skip files that can't be accessed
+                    pass
+
+    return file_sizes
+
+
+def format_size(size_bytes):
+    """Format file size in a human-readable format"""
+    # Define size units
+    units = ["B", "KB", "MB", "GB", "TB", "PB"]
+
+    # Convert to appropriate unit
+    unit_index = 0
+    while size_bytes >= 1024 and unit_index < len(units) - 1:
+        size_bytes /= 1024
+        unit_index += 1
+
+    # Format with 2 decimal places if not bytes
+    if unit_index == 0:
+        return f"{size_bytes} {units[unit_index]}"
+    else:
+        return f"{size_bytes:.2f} {units[unit_index]}"
+
+
+def main():
+    # Use the provided directory or default to current directory
+    if len(sys.argv) > 1:
+        directory = sys.argv[1]
+    else:
+        directory = "."
+
+    # Ensure the directory exists
+    if not os.path.isdir(directory):
+        print(f"Error: '{directory}' is not a valid directory")
+        sys.exit(1)
+
+    # Get all files and their sizes
+    file_sizes = get_file_sizes(directory)
+
+    # Sort by size in descending order
+    file_sizes.sort(key=lambda x: x[1], reverse=True)
+
+    # Print the results
+    print(f"Files in '{directory}' (sorted by size, largest first):")
+    print("-" * 80)
+    print(f"{'Size':<10} {'Path':<70}")
+    print("-" * 80)
+
+    for file_path, size in file_sizes:
+        # Convert the size to a human-readable format
+        size_str = format_size(size)
+        print(f"{size_str:<10} {file_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/makefiles/python.mk
+++ b/tools/makefiles/python.mk
@@ -0,0 +1,64 @@
+mkfile_dir = $(patsubst %/,%,$(dir $(realpath $(lastword $(MAKEFILE_LIST)))))
+include $(mkfile_dir)/shell.mk
+
+.DEFAULT_GOAL ?= install
+
+ifdef UV_PROJECT_DIR
+uv_project_args = --directory $(UV_PROJECT_DIR)
+venv_dir = $(UV_PROJECT_DIR)/.venv
+else
+venv_dir = .venv
+endif
+
+UV_SYNC_INSTALL_ARGS ?= --all-extras --frozen
+UV_RUN_ARGS ?= --all-extras --frozen
+
+PYTEST_ARGS ?= --color=yes
+
+## Rules
+
+.PHONY: install
+install:
+	uv sync $(uv_project_args) $(UV_SYNC_INSTALL_ARGS)
+
+.PHONY: lock-upgrade
+lock-upgrade:
+	uv lock --upgrade $(uv_project_args)
+
+.PHONY: lock
+lock:
+	uv sync $(uv_project_args) $(UV_SYNC_LOCK_ARGS)
+
+.PHONY: clean
+clean:
+	$(rm_dir) $(venv_dir) $(ignore_failure)
+
+.PHONY: check
+check:
+	@echo "Running code quality checks..."
+	@echo ""
+	@echo "→ Formatting code..."
+	@uvx ruff format --no-cache .
+	@echo ""
+	@echo "→ Linting code..."
+	@uvx ruff check --no-cache --fix .
+	@echo ""
+	@echo "→ Type checking code..."
+	@uv run $(uv_project_args) $(UV_RUN_ARGS) pyright $(PYRIGHT_ARGS) || (echo ""; echo "❌ Type check failed!"; exit 1)
+	@echo ""
+	@echo "✅ All checks passed!"
+
+ifneq ($(findstring pytest,$(if $(shell $(call command_exists,uv) $(stderr_redirect_null)),$(shell uv tree --depth 1 $(stderr_redirect_null)),)),)
+PYTEST_EXISTS=true
+endif
+ifneq ($(findstring pyright,$(if $(shell $(call command_exists,uv) $(stderr_redirect_null)),$(shell uv tree --depth 1 $(stderr_redirect_null)),)),)
+PYRIGHT_EXISTS=true
+endif
+
+
+ifeq ($(PYTEST_EXISTS),true)
+.PHONY: test pytest
+test: pytest
+pytest:
+	uv run $(uv_project_args) $(UV_RUN_ARGS) pytest $(PYTEST_ARGS)
+endif
--- a/tools/makefiles/recursive.mk
+++ b/tools/makefiles/recursive.mk
@@ -0,0 +1,112 @@
+# Runs make in all recursive subdirectories with a Makefile, passing the make target to each.
+# Directories are make'ed in top down order.
+# ex: make (runs DEFAULT_GOAL)
+# ex: make clean (runs clean)
+# ex: make install (runs install)
+mkfile_dir = $(patsubst %/,%,$(dir $(realpath $(lastword $(MAKEFILE_LIST)))))
+
+# if IS_RECURSIVE_MAKE is set, then this is being invoked by another recursive.mk.
+# in that case, we don't want any targets
+ifndef IS_RECURSIVE_MAKE
+
+.DEFAULT_GOAL := help
+
+# make with VERBOSE=1 to print all outputs of recursive makes
+VERBOSE ?= 0
+
+RECURSIVE_TARGETS = clean test check lock install
+
+# You can pass in a list of files or directories to retain when running `clean/git-clean`
+# ex: make clean GIT_CLEAN_RETAIN=".env .data"
+# As always with make, you can also set this as an environment variable
+GIT_CLEAN_RETAIN ?= .env
+GIT_CLEAN_EXTRA_ARGS = $(foreach v,$(GIT_CLEAN_RETAIN),--exclude !$(v))
+ifeq ($(VERBOSE),0)
+GIT_CLEAN_EXTRA_ARGS += --quiet
+endif
+
+
+.PHONY: git-clean
+git-clean:
+	git clean -dffX . $(GIT_CLEAN_EXTRA_ARGS)
+
+FILTER_OUT = $(foreach v,$(2),$(if $(findstring $(1),$(v)),,$(v)))
+MAKE_FILES = $(shell find . -mindepth 2 -name Makefile)
+ALL_MAKE_DIRS = $(sort $(filter-out ./,$(dir $(MAKE_FILES))))
+ifeq ($(suffix $(SHELL)),.exe)
+MAKE_FILES = $(shell dir Makefile /b /s)
+ALL_MAKE_DIRS = $(sort $(filter-out $(subst /,\,$(abspath ./)),$(patsubst %\,%,$(dir $(MAKE_FILES)))))
+endif
+
+MAKE_DIRS := $(call FILTER_OUT,site-packages,$(call FILTER_OUT,node_modules,$(ALL_MAKE_DIRS)))
+
+.PHONY: .clean-error-log .print-error-log
+
+MAKE_CMD_MESSAGE = $(if $(MAKECMDGOALS), $(MAKECMDGOALS),)
+
+.clean-error-log:
+	@$(rm_file) $(call fix_path,$(mkfile_dir)/make*.log) $(ignore_output) $(ignore_failure)
+
+.print-error-log:
+ifeq ($(suffix $(SHELL)),.exe)
+	@if exist $(call fix_path,$(mkfile_dir)/make_error_dirs.log) ( \
+		echo Directories failed to make$(MAKE_CMD_MESSAGE): && \
+		type $(call fix_path,$(mkfile_dir)/make_error_dirs.log) && \
+		($(rm_file) $(call fix_path,$(mkfile_dir)/make*.log) $(ignore_output) $(ignore_failure)) && \
+		exit 1 \
+	)
+else
+	@if [ -e $(call fix_path,$(mkfile_dir)/make_error_dirs.log) ]; then \
+		echo "\n\033[31;1mDirectories failed to make$(MAKE_CMD_MESSAGE):\033[0m\n"; \
+		cat $(call fix_path,$(mkfile_dir)/make_error_dirs.log); \
+		echo ""; \
+		$(rm_file) $(call fix_path,$(mkfile_dir)/make*.log) $(ignore_output) $(ignore_failure); \
+		exit 1; \
+	fi
+endif
+
+.PHONY: $(RECURSIVE_TARGETS) $(MAKE_DIRS)
+
+clean: git-clean
+
+$(RECURSIVE_TARGETS): .clean-error-log $(MAKE_DIRS) .print-error-log
+
+$(MAKE_DIRS):
+ifdef FAIL_ON_ERROR
+	$(MAKE) -C $@ $(MAKECMDGOALS) IS_RECURSIVE_MAKE=1
+else
+	@$(rm_file) $(call fix_path,$@/make*.log) $(ignore_output) $(ignore_failure)
+	@echo make -C $@ $(MAKECMDGOALS)
+ifeq ($(suffix $(SHELL)),.exe)
+	@$(MAKE) -C $@ $(MAKECMDGOALS) IS_RECURSIVE_MAKE=1 1>$(call fix_path,$@/make.log) $(stderr_redirect_stdout) || \
+		( \
+			(findstr /c:"*** No" $(call fix_path,$@/make.log) ${ignore_output}) || ( \
+				echo $@ >> $(call fix_path,$(mkfile_dir)/make_error_dirs.log) && \
+				$(call touch,$@/make_error.log) \
+			) \
+		)
+	@if exist $(call fix_path,$@/make_error.log) echo make -C $@$(MAKE_CMD_MESSAGE) failed:
+	@if exist $(call fix_path,$@/make_error.log) $(call touch,$@/make_print.log)
+	@if "$(VERBOSE)" neq "0" $(call touch,$@/make_print.log)
+	@if exist $(call fix_path,$@/make_print.log) type $(call fix_path,$@/make.log)
+else
+	@$(MAKE) -C $@ $(MAKECMDGOALS) IS_RECURSIVE_MAKE=1 1>$(call fix_path,$@/make.log) $(stderr_redirect_stdout) || \
+		( \
+			grep -qF "*** No" $(call fix_path,$@/make.log) || ( \
+				echo "\t$@" >> $(call fix_path,$(mkfile_dir)/make_error_dirs.log) ; \
+				$(call touch,$@/make_error.log) ; \
+			) \
+		)
+	@if [ -e $(call fix_path,$@/make_error.log) ]; then \
+		echo "\n\033[31;1mmake -C $@$(MAKE_CMD_MESSAGE) failed:\033[0m\n" ; \
+	fi
+	@if [ "$(VERBOSE)" != "0" -o -e $(call fix_path,$@/make_error.log) ]; then \
+		cat $(call fix_path,$@/make.log); \
+	fi
+endif
+	@$(rm_file) $(call fix_path,$@/make*.log) $(ignore_output) $(ignore_failure)
+endif # ifdef FAIL_ON_ERROR
+
+endif # ifndef IS_RECURSIVE_MAKE
+
+include $(mkfile_dir)/shell.mk
--- a/tools/makefiles/shell.mk
+++ b/tools/makefiles/shell.mk
@@ -0,0 +1,27 @@
+# posix shell
+rm_dir = rm -rf
+rm_file = rm -rf
+fix_path = $(1)
+touch = touch $(1)
+true_expression = true
+stdout_redirect_null = 1>/dev/null
+stderr_redirect_null = 2>/dev/null
+stderr_redirect_stdout = 2>&1
+command_exists = command -v $(1)
+
+# windows shell
+ifeq ($(suffix $(SHELL)),.exe)
+rm_dir = rd /s /q
+rm_file = del /f /q
+fix_path = $(subst /,\,$(abspath $(1)))
+# https://ss64.com/nt/touch.html
+touch = type nul >> $(call fix_path,$(1)) && copy /y /b $(call fix_path,$(1))+,, $(call fix_path,$(1)) $(ignore_output)
+true_expression = VER>NUL
+stdout_redirect_null = 1>NUL
+stderr_redirect_null = 2>NUL
+stderr_redirect_stdout = 2>&1
+command_exists = where $(1)
+endif
+
+ignore_output = $(stdout_redirect_null) $(stderr_redirect_stdout)
+ignore_failure = || $(true_expression)
--- a/tools/remove_worktree.py
+++ b/tools/remove_worktree.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""
+Remove a git worktree and optionally delete the associated branch.
+
+Usage:
+    python tools/remove_worktree.py feature-branch
+    python tools/remove_worktree.py feature-branch --force
+"""
+
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+
+def run_git_command(cmd: list[str]) -> tuple[int, str, str]:
+    """Run a git command and return exit code, stdout, stderr."""
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    return result.returncode, result.stdout.strip(), result.stderr.strip()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Remove a git worktree and optionally delete its branch")
+    parser.add_argument("branch", help="Name of the branch/worktree to remove")
+    parser.add_argument("--force", action="store_true", help="Force removal even with uncommitted changes")
+    args = parser.parse_args()
+
+    # Get the base repository name
+    current_dir = Path.cwd()
+    repo_name = current_dir.name
+
+    # Construct worktree path (same pattern as create_worktree.py)
+    worktree_path = current_dir.parent / f"{repo_name}-{args.branch}"
+
+    print(f"Looking for worktree at: {worktree_path}")
+
+    # Check if worktree exists
+    returncode, stdout, _ = run_git_command(["git", "worktree", "list"])
+    if returncode != 0:
+        print("Error: Failed to list worktrees")
+        sys.exit(1)
+
+    worktree_exists = str(worktree_path) in stdout
+    if not worktree_exists:
+        print(f"Error: Worktree for branch '{args.branch}' not found at {worktree_path}")
+        sys.exit(1)
+
+    # Remove the worktree
+    remove_cmd = ["git", "worktree", "remove", str(worktree_path)]
+    if args.force:
+        remove_cmd.append("--force")
+
+    print(f"Removing worktree at {worktree_path}...")
+    returncode, stdout, stderr = run_git_command(remove_cmd)
+
+    if returncode != 0:
+        if "contains modified or untracked files" in stderr:
+            print("Error: Worktree contains uncommitted changes. Use --force to override.")
+        else:
+            print(f"Error removing worktree: {stderr}")
+        sys.exit(1)
+
+    print(f"Successfully removed worktree at {worktree_path}")
+
+    # Try to delete the branch
+    print(f"Attempting to delete branch '{args.branch}'...")
+
+    # Check current branch
+    returncode, current_branch, _ = run_git_command(["git", "branch", "--show-current"])
+    if returncode == 0 and current_branch == args.branch:
+        print(f"Cannot delete branch '{args.branch}' - it is currently checked out")
+        return
+
+    # Try to delete the branch
+    returncode, stdout, stderr = run_git_command(["git", "branch", "-d", args.branch])
+
+    if returncode == 0:
+        print(f"Successfully deleted branch '{args.branch}'")
+    elif "not fully merged" in stderr:
+        # Try force delete if regular delete fails due to unmerged changes
+        print("Branch has unmerged changes, force deleting...")
+        returncode, stdout, stderr = run_git_command(["git", "branch", "-D", args.branch])
+        if returncode == 0:
+            print(f"Successfully force-deleted branch '{args.branch}'")
+        else:
+            print(f"Warning: Could not delete branch: {stderr}")
+    else:
+        print(f"Warning: Could not delete branch: {stderr}")
+
+
+if __name__ == "__main__":
+    main()