Claude-Code-Workflow/.claude/python_script/tools/module_analyzer.py

#!/usr/bin/env python3
"""
Unified Module Analyzer
Combines functionality from detect_changed_modules.py and get_modules_by_depth.py
into a single, comprehensive module analysis tool.
"""

import os
import sys
import subprocess
import time
import json
from pathlib import Path
from typing import List, Dict, Optional, Set, Tuple
from dataclasses import dataclass, asdict

# Add parent directory for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from core.config import get_config
from core.gitignore_parser import GitignoreParser

@dataclass
class ModuleInfo:
    """Information about a module/directory."""
    depth: int
    path: str
    files: int
    types: List[str]
    has_claude: bool
    status: str = "normal"  # changed, normal, new, deleted
    last_modified: Optional[float] = None

    def to_dict(self) -> Dict:
        return asdict(self)

class ModuleAnalyzer:
    """Unified module analysis tool with change detection and depth analysis."""

    def __init__(self, root_path: str = ".", config_path: Optional[str] = None):
        self.root_path = Path(root_path).resolve()
        self.config = get_config(config_path)

        # Source file extensions for analysis
        self.source_extensions = {
            '.md', '.js', '.ts', '.jsx', '.tsx', '.py', '.go', '.rs',
            '.java', '.cpp', '.c', '.h', '.sh', '.ps1', '.json', '.yaml', '.yml',
            '.php', '.rb', '.swift', '.kt', '.scala', '.dart'
        }

        # Initialize gitignore parser for exclusions
        self.gitignore_parser = GitignoreParser(str(self.root_path))
        self.exclude_patterns = self._build_exclusion_patterns()

    def _build_exclusion_patterns(self) -> Set[str]:
        """Build exclusion patterns from config and gitignore."""
        exclusions = {
            '.git', '.history', '.vscode', '__pycache__', '.pytest_cache',
            'node_modules', 'dist', 'build', '.egg-info', '.env',
            '.cache', '.tmp', '.temp', '.DS_Store', 'Thumbs.db'
        }

        # Add patterns from config
        config_patterns = self.config.get('exclude_patterns', [])
        for pattern in config_patterns:
            # Extract directory names from patterns
            if '/' in pattern:
                parts = pattern.replace('*/', '').replace('/*', '').split('/')
                exclusions.update(part for part in parts if part and not part.startswith('*'))

        return exclusions

    def _should_exclude_directory(self, dir_path: Path) -> bool:
        """Check if directory should be excluded from analysis."""
        dir_name = dir_path.name

        # Check against exclusion patterns
        if dir_name in self.exclude_patterns:
            return True

        # Check if directory starts with . (hidden directories)
        if dir_name.startswith('.') and dir_name not in {'.github', '.vscode'}:
            return True

        return False

    def get_git_changed_files(self, since: str = "HEAD") -> Set[str]:
        """Get files changed in git."""
        changed_files = set()

        try:
            # Check if we're in a git repository
            subprocess.run(['git', 'rev-parse', '--git-dir'],
                         check=True, capture_output=True, cwd=self.root_path)

            # Get changes since specified reference
            commands = [
                ['git', 'diff', '--name-only', since],  # Changes since reference
                ['git', 'diff', '--name-only', '--staged'],  # Staged changes
                ['git', 'ls-files', '--others', '--exclude-standard']  # Untracked files
            ]

            for cmd in commands:
                try:
                    result = subprocess.run(cmd, capture_output=True, text=True,
                                          cwd=self.root_path, check=True)
                    if result.stdout.strip():
                        files = result.stdout.strip().split('\n')
                        changed_files.update(f for f in files if f)
                except subprocess.CalledProcessError:
                    continue

        except subprocess.CalledProcessError:
            # Not a git repository or git not available
            pass

        return changed_files

    def get_recently_modified_files(self, hours: int = 24) -> Set[str]:
        """Get files modified within the specified hours."""
        cutoff_time = time.time() - (hours * 3600)
        recent_files = set()

        try:
            for file_path in self.root_path.rglob('*'):
                if file_path.is_file():
                    try:
                        if file_path.stat().st_mtime > cutoff_time:
                            rel_path = file_path.relative_to(self.root_path)
                            recent_files.add(str(rel_path))
                    except (OSError, ValueError):
                        continue
        except Exception:
            pass

        return recent_files

    def analyze_directory(self, dir_path: Path) -> Optional[ModuleInfo]:
        """Analyze a single directory and return module information."""
        if self._should_exclude_directory(dir_path):
            return None

        try:
            # Count files by type
            file_types = set()
            file_count = 0
            has_claude = False
            last_modified = 0

            for item in dir_path.iterdir():
                if item.is_file():
                    file_count += 1

                    # Track file types
                    if item.suffix.lower() in self.source_extensions:
                        file_types.add(item.suffix.lower())

                    # Check for CLAUDE.md
                    if item.name.upper() == 'CLAUDE.MD':
                        has_claude = True

                    # Track latest modification
                    try:
                        mtime = item.stat().st_mtime
                        last_modified = max(last_modified, mtime)
                    except OSError:
                        continue

            # Calculate depth relative to root
            try:
                relative_path = dir_path.relative_to(self.root_path)
                depth = len(relative_path.parts)
            except ValueError:
                depth = 0

            return ModuleInfo(
                depth=depth,
                path=str(relative_path) if depth > 0 else ".",
                files=file_count,
                types=sorted(list(file_types)),
                has_claude=has_claude,
                last_modified=last_modified if last_modified > 0 else None
            )

        except (PermissionError, OSError):
            return None

    def detect_changed_modules(self, since: str = "HEAD") -> List[ModuleInfo]:
        """Detect modules affected by changes."""
        changed_files = self.get_git_changed_files(since)

        # If no git changes, fall back to recently modified files
        if not changed_files:
            changed_files = self.get_recently_modified_files(24)

        # Get affected directories
        affected_dirs = set()
        for file_path in changed_files:
            full_path = self.root_path / file_path
            if full_path.exists():
                # Add the file's directory and parent directories
                current_dir = full_path.parent
                while current_dir != self.root_path and current_dir.parent != current_dir:
                    affected_dirs.add(current_dir)
                    current_dir = current_dir.parent

        # Analyze affected directories
        modules = []
        for dir_path in affected_dirs:
            module_info = self.analyze_directory(dir_path)
            if module_info:
                module_info.status = "changed"
                modules.append(module_info)

        return sorted(modules, key=lambda m: (m.depth, m.path))

    def analyze_by_depth(self, max_depth: Optional[int] = None) -> List[ModuleInfo]:
        """Analyze all modules organized by depth (deepest first)."""
        modules = []

        def scan_directory(dir_path: Path, current_depth: int = 0):
            """Recursively scan directories."""
            if max_depth and current_depth > max_depth:
                return

            module_info = self.analyze_directory(dir_path)
            if module_info and module_info.files > 0:
                modules.append(module_info)

            # Recurse into subdirectories
            try:
                for item in dir_path.iterdir():
                    if item.is_dir() and not self._should_exclude_directory(item):
                        scan_directory(item, current_depth + 1)
            except (PermissionError, OSError):
                pass

        scan_directory(self.root_path)

        # Sort by depth (deepest first), then by path
        return sorted(modules, key=lambda m: (-m.depth, m.path))

    def get_dependencies(self, module_path: str) -> List[str]:
        """Get module dependencies (basic implementation)."""
        dependencies = []
        module_dir = self.root_path / module_path

        if not module_dir.exists() or not module_dir.is_dir():
            return dependencies

        # Look for common dependency files
        dependency_files = [
            'package.json',  # Node.js
            'requirements.txt',  # Python
            'Cargo.toml',  # Rust
            'go.mod',  # Go
            'pom.xml',  # Java Maven
            'build.gradle',  # Java Gradle
        ]

        for dep_file in dependency_files:
            dep_path = module_dir / dep_file
            if dep_path.exists():
                dependencies.append(str(dep_path.relative_to(self.root_path)))

        return dependencies

    def find_modules_with_pattern(self, pattern: str) -> List[ModuleInfo]:
        """Find modules matching a specific pattern in their path or files."""
        modules = self.analyze_by_depth()
        matching_modules = []

        for module in modules:
            # Check if pattern matches path
            if pattern.lower() in module.path.lower():
                matching_modules.append(module)
                continue

            # Check if pattern matches file types
            if any(pattern.lower() in ext.lower() for ext in module.types):
                matching_modules.append(module)

        return matching_modules

    def export_analysis(self, modules: List[ModuleInfo], format: str = "json") -> str:
        """Export module analysis in specified format."""
        if format == "json":
            return json.dumps([module.to_dict() for module in modules], indent=2)

        elif format == "list":
            lines = []
            for module in modules:
                status = f"[{module.status}]" if module.status != "normal" else ""
                claude_marker = "[CLAUDE]" if module.has_claude else ""
                lines.append(f"{module.path} (depth:{module.depth}, files:{module.files}) {status} {claude_marker}")
            return "\n".join(lines)

        elif format == "grouped":
            grouped = {}
            for module in modules:
                depth = module.depth
                if depth not in grouped:
                    grouped[depth] = []
                grouped[depth].append(module)

            lines = []
            for depth in sorted(grouped.keys()):
                lines.append(f"\n=== Depth {depth} ===")
                for module in grouped[depth]:
                    status = f"[{module.status}]" if module.status != "normal" else ""
                    claude_marker = "[CLAUDE]" if module.has_claude else ""
                    lines.append(f"  {module.path} (files:{module.files}) {status} {claude_marker}")
            return "\n".join(lines)

        elif format == "paths":
            return "\n".join(module.path for module in modules)

        else:
            raise ValueError(f"Unsupported format: {format}")


def main():
    """Main CLI entry point."""
    import argparse

    parser = argparse.ArgumentParser(description="Module Analysis Tool")
    parser.add_argument("command", choices=["changed", "depth", "dependencies", "find"],
                       help="Analysis command to run")
    parser.add_argument("--format", choices=["json", "list", "grouped", "paths"],
                       default="list", help="Output format")
    parser.add_argument("--since", default="HEAD~1",
                       help="Git reference for change detection (default: HEAD~1)")
    parser.add_argument("--max-depth", type=int,
                       help="Maximum directory depth to analyze")
    parser.add_argument("--pattern", help="Pattern to search for (for find command)")
    parser.add_argument("--module", help="Module path for dependency analysis")
    parser.add_argument("--config", help="Configuration file path")

    args = parser.parse_args()

    analyzer = ModuleAnalyzer(config_path=args.config)

    if args.command == "changed":
        modules = analyzer.detect_changed_modules(args.since)
        print(analyzer.export_analysis(modules, args.format))

    elif args.command == "depth":
        modules = analyzer.analyze_by_depth(args.max_depth)
        print(analyzer.export_analysis(modules, args.format))

    elif args.command == "dependencies":
        if not args.module:
            print("Error: --module required for dependencies command", file=sys.stderr)
            sys.exit(1)
        deps = analyzer.get_dependencies(args.module)
        if args.format == "json":
            print(json.dumps(deps, indent=2))
        else:
            print("\n".join(deps))

    elif args.command == "find":
        if not args.pattern:
            print("Error: --pattern required for find command", file=sys.stderr)
            sys.exit(1)
        modules = analyzer.find_modules_with_pattern(args.pattern)
        print(analyzer.export_analysis(modules, args.format))


if __name__ == "__main__":
    main()