diff --git a/.claude/python_script/__init__.py b/.claude/python_script/__init__.py deleted file mode 100644 index e02aa58c..00000000 --- a/.claude/python_script/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Refactored Python Script Analyzer -Modular, reusable architecture for intelligent file analysis and workflow automation. -""" - -__version__ = "2.0.0" -__author__ = "Claude Development Team" -__email__ = "dev@example.com" - -from .analyzer import Analyzer -from .indexer import ProjectIndexer -from .cli import AnalysisCLI -from .core import ( - Config, FileIndexer, FileInfo, IndexStats, - ContextAnalyzer, AnalysisResult, - PathMatcher, MatchResult, PathMatchingResult, - EmbeddingManager, GitignoreParser -) -from .tools import ModuleAnalyzer, ModuleInfo, TechStackLoader -from .utils import Colors, CacheManager, IOHelpers - -__all__ = [ - 'Analyzer', 'ProjectIndexer', 'AnalysisCLI', - # Core modules - 'Config', - 'FileIndexer', 'FileInfo', 'IndexStats', - 'ContextAnalyzer', 'AnalysisResult', - 'PathMatcher', 'MatchResult', 'PathMatchingResult', - 'EmbeddingManager', 'GitignoreParser', - # Tools - 'ModuleAnalyzer', 'ModuleInfo', - 'TechStackLoader', - # Utils - 'Colors', 'CacheManager', 'IOHelpers' -] \ No newline at end of file diff --git a/.claude/python_script/__pycache__/context_analyzer.cpython-313.pyc b/.claude/python_script/__pycache__/context_analyzer.cpython-313.pyc deleted file mode 100644 index fbc49d2d..00000000 Binary files a/.claude/python_script/__pycache__/context_analyzer.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/__pycache__/embedding_manager.cpython-313.pyc b/.claude/python_script/__pycache__/embedding_manager.cpython-313.pyc deleted file mode 100644 index 4835d0ca..00000000 Binary files a/.claude/python_script/__pycache__/embedding_manager.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/__pycache__/file_indexer.cpython-313.pyc b/.claude/python_script/__pycache__/file_indexer.cpython-313.pyc deleted file mode 100644 index 7c6852bd..00000000 Binary files a/.claude/python_script/__pycache__/file_indexer.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/__pycache__/path_matcher.cpython-313.pyc b/.claude/python_script/__pycache__/path_matcher.cpython-313.pyc deleted file mode 100644 index 40858048..00000000 Binary files a/.claude/python_script/__pycache__/path_matcher.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/cli.py b/.claude/python_script/cli.py deleted file mode 100644 index eef6e2fb..00000000 --- a/.claude/python_script/cli.py +++ /dev/null @@ -1,207 +0,0 @@ -#!/usr/bin/env python3 -""" -CLI Interface for Path-Aware Analysis -Provides command-line interface for intelligent file analysis and pattern matching. -""" - -import sys -import argparse -import logging -import json -import time -from pathlib import Path -from typing import Dict, List, Optional, Any - -# Add current directory to path for imports -sys.path.insert(0, str(Path(__file__).parent)) - -from core.config import get_config -from core.file_indexer import FileIndexer -from core.context_analyzer import ContextAnalyzer -from core.path_matcher import PathMatcher -from utils.colors import Colors - - -class AnalysisCLI: - """Command-line interface for file analysis and pattern matching.""" - - def __init__(self, config_path: Optional[str] = None, root_path: str = "."): - self.root_path = Path(root_path).resolve() - self.config = get_config(config_path) - - # Setup logging - logging.basicConfig( - level=getattr(logging, self.config.get('logging.level', 'INFO')), - format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s') - ) - self.logger = logging.getLogger(__name__) - - # Initialize core components - self.indexer = FileIndexer(self.config, str(self.root_path)) - self.context_analyzer = ContextAnalyzer(self.config) - self.path_matcher = PathMatcher(self.config) - - def analyze(self, prompt: str, patterns: Optional[List[str]] = None) -> Dict[str, Any]: - """Analyze and return relevant file paths for a given prompt.""" - print(Colors.yellow("Analyzing project and prompt...")) - start_time = time.time() - - # Load index (build if not exists) - index = self.indexer.load_index() - if not index: - print(Colors.warning("No file index found. Run 'python indexer.py --build' first or use --auto-build")) - return {} - - stats = self.indexer.get_stats() - print(Colors.cyan(f"Project stats: ~{stats.total_tokens:,} tokens across {stats.total_files} files")) - print(Colors.cyan(f"Categories: {', '.join(f'{k}: {v}' for k, v in stats.categories.items())}")) - - # Determine project size - project_size = self._classify_project_size(stats.total_tokens) - print(Colors.cyan(f"Project size: {project_size}")) - - # Analyze prompt context - print(Colors.yellow("Analyzing prompt context...")) - context_result = self.context_analyzer.analyze(prompt) - - print(Colors.cyan(f"Identified: {len(context_result.domains)} domains, {len(context_result.languages)} languages")) - if context_result.domains: - print(Colors.cyan(f"Top domains: {', '.join(context_result.domains[:3])}")) - - # Match files to context - print(Colors.yellow("Matching files to context...")) - matching_result = self.path_matcher.match_files( - index, - context_result, - explicit_patterns=patterns - ) - - elapsed = time.time() - start_time - - print(Colors.green(f"Analysis complete: {len(matching_result.matched_files)} files, ~{matching_result.total_tokens:,} tokens")) - print(Colors.cyan(f"Confidence: {matching_result.confidence_score:.2f}")) - print(Colors.cyan(f"Execution time: {elapsed:.2f}s")) - - return { - 'files': [match.file_info.relative_path for match in matching_result.matched_files], - 'total_tokens': matching_result.total_tokens, - 'confidence': matching_result.confidence_score, - 'context': { - 'domains': context_result.domains, - 'languages': context_result.languages, - 'keywords': context_result.keywords - }, - 'stats': { - 'project_size': project_size, - 'total_files': stats.total_files, - 'analysis_time': elapsed - } - } - - def generate_command(self, prompt: str, tool: str, files: List[str]) -> str: - """Generate a command for external tools (gemini/codex).""" - file_patterns = " ".join(f"@{{{file}}}" for file in files) - - if tool == "gemini": - if len(files) > 50: - return f'gemini --all-files -p "{prompt}"' - else: - return f'gemini -p "{file_patterns} {prompt}"' - elif tool == "codex": - # Estimate tokens for workspace selection - total_tokens = sum(len(file) * 50 for file in files) # Rough estimate - workspace_flag = "-s workspace-write" if total_tokens > 100000 else "-s danger-full-access" - return f'codex {workspace_flag} --full-auto exec "{file_patterns} {prompt}"' - else: - raise ValueError(f"Unsupported tool: {tool}") - - def _classify_project_size(self, tokens: int) -> str: - """Classify project size based on token count.""" - small_limit = self.config.get('token_limits.small_project', 500000) - medium_limit = self.config.get('token_limits.medium_project', 2000000) - - if tokens < small_limit: - return "small" - elif tokens < medium_limit: - return "medium" - else: - return "large" - - def auto_build_index(self): - """Auto-build index if it doesn't exist.""" - from indexer import ProjectIndexer - indexer = ProjectIndexer(root_path=str(self.root_path)) - indexer.build_index() - - -def main(): - """CLI entry point for analysis.""" - parser = argparse.ArgumentParser( - description="Path-Aware Analysis CLI - Intelligent file pattern detection", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - python cli.py "analyze authentication flow" - python cli.py "fix database connection" --patterns "src/**/*.py" - python cli.py "review API endpoints" --tool gemini - """ - ) - - parser.add_argument('prompt', help='Analysis prompt or task description') - parser.add_argument('--patterns', nargs='*', help='Explicit file patterns to include') - parser.add_argument('--tool', choices=['gemini', 'codex'], help='Generate command for specific tool') - parser.add_argument('--output', choices=['patterns', 'json'], default='patterns', help='Output format') - parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') - parser.add_argument('--auto-build', action='store_true', help='Auto-build index if missing') - parser.add_argument('--config', help='Configuration file path') - parser.add_argument('--root', default='.', help='Root directory to analyze') - - args = parser.parse_args() - - # Create CLI interface - cli = AnalysisCLI(args.config, args.root) - - try: - # Auto-build index if requested and missing - if args.auto_build: - index = cli.indexer.load_index() - if not index: - print(Colors.yellow("Auto-building missing index...")) - cli.auto_build_index() - - # Perform analysis - result = cli.analyze(args.prompt, patterns=args.patterns) - - if not result: - sys.exit(1) - - # Generate output - if args.tool: - command = cli.generate_command(args.prompt, args.tool, result['files']) - print(command) - elif args.output == 'json': - print(json.dumps(result, indent=2, default=str)) - else: # patterns output (default) - for file_path in result['files']: - print(f"@{{{file_path}}}") - - # Show verbose details - if args.verbose: - print(f"\n# Analysis Details:") - print(f"# Matched files: {len(result['files'])}") - print(f"# Total tokens: {result['total_tokens']:,}") - print(f"# Confidence: {result['confidence']:.2f}") - - except KeyboardInterrupt: - print(Colors.warning("\nAnalysis interrupted by user")) - sys.exit(1) - except Exception as e: - print(Colors.error(f"Analysis failed: {e}")) - if args.verbose: - import traceback - traceback.print_exc() - sys.exit(1) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/config.yaml b/.claude/python_script/config.yaml deleted file mode 100644 index 226b024f..00000000 --- a/.claude/python_script/config.yaml +++ /dev/null @@ -1,159 +0,0 @@ -# Configuration for UltraThink Path-Aware Analyzer -# Based on gemini-wrapper patterns with intelligent enhancements - -# Token limits for project size classification -token_limits: - small_project: 500000 # <500K tokens - include most files - medium_project: 2000000 # 500K-2M tokens - smart selection - large_project: 10000000 # >2M tokens - precise targeting - max_files: 1000 # Maximum files to process - -# File patterns to exclude (performance and relevance) -exclude_patterns: - - "*/node_modules/*" - - "*/.git/*" - - "*/build/*" - - "*/dist/*" - - "*/.next/*" - - "*/.nuxt/*" - - "*/target/*" - - "*/vendor/*" - - "*/__pycache__/*" - - "*.pyc" - - "*.pyo" - - "*.log" - - "*.tmp" - - "*.temp" - - "*.history" - -# File extensions grouped by category -file_extensions: - code: - - ".py" - - ".js" - - ".ts" - - ".tsx" - - ".jsx" - - ".java" - - ".cpp" - - ".c" - - ".h" - - ".rs" - - ".go" - - ".php" - - ".rb" - - ".sh" - - ".bash" - docs: - - ".md" - - ".txt" - - ".rst" - - ".adoc" - config: - - ".json" - - ".yaml" - - ".yml" - - ".toml" - - ".ini" - - ".env" - web: - - ".html" - - ".css" - - ".scss" - - ".sass" - - ".xml" - -# Embedding/RAG configuration -embedding: - enabled: true # Set to true to enable RAG features - model: "all-MiniLM-L6-v2" # Stable general-purpose embedding model - cache_dir: "cache" - similarity_threshold: 0.6 # Higher threshold for better code similarity - max_context_length: 512 # Standard context length - batch_size: 32 # Standard batch size - trust_remote_code: false # Not required for standard models - -# Context analysis settings -context_analysis: - # Keywords that indicate specific domains/modules - domain_keywords: - auth: ["auth", "login", "user", "password", "jwt", "token", "session"] - database: ["db", "database", "sql", "query", "model", "schema", "migration"] - api: ["api", "endpoint", "route", "controller", "service", "handler"] - frontend: ["ui", "component", "view", "template", "style", "css"] - backend: ["server", "service", "logic", "business", "core"] - test: ["test", "spec", "unit", "integration", "mock"] - config: ["config", "setting", "environment", "env"] - util: ["util", "helper", "common", "shared", "lib"] - - # Programming language indicators - language_indicators: - python: [".py", "python", "pip", "requirements.txt", "setup.py"] - javascript: [".js", ".ts", "npm", "package.json", "node"] - java: [".java", "maven", "gradle", "pom.xml"] - go: [".go", "go.mod", "go.sum"] - rust: [".rs", "cargo", "Cargo.toml"] - -# Path matching and ranking -path_matching: - # Scoring weights for relevance calculation - weights: - keyword_match: 0.4 # Direct keyword match in filename/path - extension_match: 0.2 # File extension relevance - directory_context: 0.2 # Directory name relevance - file_size_penalty: 0.1 # Penalty for very large files - recency_bonus: 0.1 # Bonus for recently modified files - - # Maximum files to return per category - max_files_per_category: 20 - - # Minimum relevance score to include file - min_relevance_score: 0.1 - -# Output formatting -output: - # How to format path patterns - pattern_format: "@{{{path}}}" # Results in @{path/to/file} - - # Include project documentation by default - always_include: - - "CLAUDE.md" - - "**/CLAUDE.md" - - "README.md" - - "docs/**/*.md" - - # Maximum total files in output - max_total_files: 50 - -# Analysis modes -modes: - auto: - description: "Fully automatic path detection" - enabled: true - guided: - description: "Suggest paths for user confirmation" - enabled: true - pattern: - description: "Use explicit patterns from user" - enabled: true - hybrid: - description: "Combine auto-detection with user patterns" - enabled: true - -# Performance settings -performance: - # Cache settings - cache_enabled: true - cache_ttl: 3600 # Cache TTL in seconds (1 hour) - - # File size limits - max_file_size: 10485760 # 10MB max file size to analyze - - # Parallel processing - max_workers: 4 # Number of parallel workers for file processing - -# Logging configuration -logging: - level: "INFO" # DEBUG, INFO, WARNING, ERROR - file: ".claude/scripts/ultrathink/ultrathink.log" - format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" \ No newline at end of file diff --git a/.claude/python_script/core/__init__.py b/.claude/python_script/core/__init__.py deleted file mode 100644 index ac4eac25..00000000 --- a/.claude/python_script/core/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Core modules for the Python script analyzer. -Provides unified interfaces for file indexing, context analysis, and path matching. -""" - -from .config import Config -from .file_indexer import FileIndexer, FileInfo, IndexStats -from .context_analyzer import ContextAnalyzer, AnalysisResult -from .path_matcher import PathMatcher, MatchResult, PathMatchingResult -from .embedding_manager import EmbeddingManager -from .gitignore_parser import GitignoreParser - -__all__ = [ - 'Config', - 'FileIndexer', - 'FileInfo', - 'IndexStats', - 'ContextAnalyzer', - 'AnalysisResult', - 'PathMatcher', - 'MatchResult', - 'PathMatchingResult', - 'EmbeddingManager', - 'GitignoreParser' -] \ No newline at end of file diff --git a/.claude/python_script/core/__pycache__/__init__.cpython-313.pyc b/.claude/python_script/core/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index 29c8bdd5..00000000 Binary files a/.claude/python_script/core/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/core/__pycache__/config.cpython-313.pyc b/.claude/python_script/core/__pycache__/config.cpython-313.pyc deleted file mode 100644 index 1f0e167b..00000000 Binary files a/.claude/python_script/core/__pycache__/config.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/core/__pycache__/context_analyzer.cpython-313.pyc b/.claude/python_script/core/__pycache__/context_analyzer.cpython-313.pyc deleted file mode 100644 index 77ac2027..00000000 Binary files a/.claude/python_script/core/__pycache__/context_analyzer.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/core/__pycache__/embedding_manager.cpython-313.pyc b/.claude/python_script/core/__pycache__/embedding_manager.cpython-313.pyc deleted file mode 100644 index f1fe9860..00000000 Binary files a/.claude/python_script/core/__pycache__/embedding_manager.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/core/__pycache__/file_indexer.cpython-313.pyc b/.claude/python_script/core/__pycache__/file_indexer.cpython-313.pyc deleted file mode 100644 index ed8fe806..00000000 Binary files a/.claude/python_script/core/__pycache__/file_indexer.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/core/__pycache__/gitignore_parser.cpython-313.pyc b/.claude/python_script/core/__pycache__/gitignore_parser.cpython-313.pyc deleted file mode 100644 index 5417ba71..00000000 Binary files a/.claude/python_script/core/__pycache__/gitignore_parser.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/core/__pycache__/path_matcher.cpython-313.pyc b/.claude/python_script/core/__pycache__/path_matcher.cpython-313.pyc deleted file mode 100644 index 1ed1969c..00000000 Binary files a/.claude/python_script/core/__pycache__/path_matcher.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/core/config.py b/.claude/python_script/core/config.py deleted file mode 100644 index 83f06528..00000000 --- a/.claude/python_script/core/config.py +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env python3 -""" -Configuration Management Module -Provides unified configuration management with gitignore integration. -""" - -import os -import yaml -import logging -from pathlib import Path -from typing import Dict, Any, Optional, List -from .gitignore_parser import get_all_gitignore_patterns - - -class Config: - """Singleton configuration manager with hierarchical loading.""" - - _instance = None - _initialized = False - - def __new__(cls, config_path: Optional[str] = None): - if cls._instance is None: - cls._instance = super(Config, cls).__new__(cls) - return cls._instance - - def __init__(self, config_path: Optional[str] = None): - if self._initialized: - return - - self.config_path = config_path - self.config = {} - self.logger = logging.getLogger(__name__) - - self._load_config() - self._add_gitignore_patterns() - self._apply_env_overrides() - self._validate_config() - - self._initialized = True - - def _load_config(self): - """Load configuration from file with fallback hierarchy.""" - config_paths = self._get_config_paths() - - for config_file in config_paths: - if config_file.exists(): - try: - with open(config_file, 'r', encoding='utf-8') as f: - loaded_config = yaml.safe_load(f) - if loaded_config: - self.config = self._merge_configs(self.config, loaded_config) - self.logger.info(f"Loaded config from {config_file}") - except Exception as e: - self.logger.warning(f"Failed to load config from {config_file}: {e}") - - # Apply default config if no config loaded - if not self.config: - self.config = self._get_default_config() - self.logger.info("Using default configuration") - - def _get_config_paths(self) -> List[Path]: - """Get ordered list of config file paths to check.""" - paths = [] - - # 1. Explicitly provided config path - if self.config_path: - paths.append(Path(self.config_path)) - - # 2. Current directory config.yaml - paths.append(Path('config.yaml')) - - # 3. Script directory config.yaml - script_dir = Path(__file__).parent.parent - paths.append(script_dir / 'config.yaml') - - # 4. Default config in script directory - paths.append(script_dir / 'default_config.yaml') - - return paths - - def _get_default_config(self) -> Dict[str, Any]: - """Get default configuration.""" - return { - 'token_limits': { - 'small_project': 500000, - 'medium_project': 2000000, - 'large_project': 10000000, - 'max_files': 1000 - }, - 'exclude_patterns': [ - "*/node_modules/*", - "*/.git/*", - "*/build/*", - "*/dist/*", - "*/.next/*", - "*/.nuxt/*", - "*/target/*", - "*/vendor/*", - "*/__pycache__/*", - "*.pyc", - "*.pyo", - "*.log", - "*.tmp", - "*.temp", - "*.history" - ], - 'file_extensions': { - 'code': ['.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h', '.rs', '.go', '.php', '.rb', '.sh', '.bash'], - 'docs': ['.md', '.txt', '.rst', '.adoc'], - 'config': ['.json', '.yaml', '.yml', '.toml', '.ini', '.env'], - 'web': ['.html', '.css', '.scss', '.sass', '.xml'] - }, - 'embedding': { - 'enabled': True, - 'model': 'all-MiniLM-L6-v2', - 'cache_dir': 'cache', - 'similarity_threshold': 0.3, - 'max_context_length': 512, - 'batch_size': 32 - }, - 'context_analysis': { - 'domain_keywords': { - 'auth': ['auth', 'login', 'user', 'password', 'jwt', 'token', 'session'], - 'database': ['db', 'database', 'sql', 'query', 'model', 'schema', 'migration'], - 'api': ['api', 'endpoint', 'route', 'controller', 'service', 'handler'], - 'frontend': ['ui', 'component', 'view', 'template', 'style', 'css'], - 'backend': ['server', 'service', 'logic', 'business', 'core'], - 'test': ['test', 'spec', 'unit', 'integration', 'mock'], - 'config': ['config', 'setting', 'environment', 'env'], - 'util': ['util', 'helper', 'common', 'shared', 'lib'] - }, - 'language_indicators': { - 'python': ['.py', 'python', 'pip', 'requirements.txt', 'setup.py'], - 'javascript': ['.js', '.ts', 'npm', 'package.json', 'node'], - 'java': ['.java', 'maven', 'gradle', 'pom.xml'], - 'go': ['.go', 'go.mod', 'go.sum'], - 'rust': ['.rs', 'cargo', 'Cargo.toml'] - } - }, - 'path_matching': { - 'weights': { - 'keyword_match': 0.4, - 'extension_match': 0.2, - 'directory_context': 0.2, - 'file_size_penalty': 0.1, - 'recency_bonus': 0.1 - }, - 'max_files_per_category': 20, - 'min_relevance_score': 0.1 - }, - 'output': { - 'pattern_format': '@{{{path}}}', - 'always_include': [ - 'CLAUDE.md', - '**/CLAUDE.md', - 'README.md', - 'docs/**/*.md' - ], - 'max_total_files': 50 - }, - 'performance': { - 'cache_enabled': True, - 'cache_ttl': 3600, - 'max_file_size': 10485760, - 'max_workers': 4 - }, - 'logging': { - 'level': 'INFO', - 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - } - } - - def _merge_configs(self, base: Dict, override: Dict) -> Dict: - """Recursively merge configuration dictionaries.""" - result = base.copy() - - for key, value in override.items(): - if key in result and isinstance(result[key], dict) and isinstance(value, dict): - result[key] = self._merge_configs(result[key], value) - else: - result[key] = value - - return result - - def _add_gitignore_patterns(self): - """Add patterns from .gitignore files to exclude_patterns.""" - try: - # Find root directory (current working directory or script parent) - root_dir = Path.cwd() - - gitignore_patterns = get_all_gitignore_patterns(str(root_dir)) - - if gitignore_patterns: - # Ensure exclude_patterns exists - if 'exclude_patterns' not in self.config: - self.config['exclude_patterns'] = [] - - # Add gitignore patterns, avoiding duplicates - existing_patterns = set(self.config['exclude_patterns']) - new_patterns = [p for p in gitignore_patterns if p not in existing_patterns] - - self.config['exclude_patterns'].extend(new_patterns) - - self.logger.info(f"Added {len(new_patterns)} patterns from .gitignore files") - - except Exception as e: - self.logger.warning(f"Failed to load .gitignore patterns: {e}") - - def _apply_env_overrides(self): - """Apply environment variable overrides.""" - env_mappings = { - 'ANALYZER_CACHE_DIR': ('embedding', 'cache_dir'), - 'ANALYZER_LOG_LEVEL': ('logging', 'level'), - 'ANALYZER_MAX_FILES': ('token_limits', 'max_files'), - 'ANALYZER_EMBEDDING_MODEL': ('embedding', 'model') - } - - for env_var, config_path in env_mappings.items(): - env_value = os.getenv(env_var) - if env_value: - self._set_nested_value(config_path, env_value) - self.logger.info(f"Applied environment override: {env_var} = {env_value}") - - def _set_nested_value(self, path: tuple, value: str): - """Set a nested configuration value.""" - current = self.config - for key in path[:-1]: - if key not in current: - current[key] = {} - current = current[key] - - # Try to convert value to appropriate type - if isinstance(current.get(path[-1]), int): - try: - value = int(value) - except ValueError: - pass - elif isinstance(current.get(path[-1]), bool): - value = value.lower() in ('true', '1', 'yes', 'on') - - current[path[-1]] = value - - def _validate_config(self): - """Validate configuration values.""" - required_sections = ['exclude_patterns', 'file_extensions', 'token_limits'] - - for section in required_sections: - if section not in self.config: - self.logger.warning(f"Missing required config section: {section}") - - # Validate token limits - if 'token_limits' in self.config: - limits = self.config['token_limits'] - if limits.get('small_project', 0) >= limits.get('medium_project', 0): - self.logger.warning("Token limit configuration may be incorrect") - - def get(self, path: str, default: Any = None) -> Any: - """Get configuration value using dot notation.""" - keys = path.split('.') - current = self.config - - try: - for key in keys: - current = current[key] - return current - except (KeyError, TypeError): - return default - - def set(self, path: str, value: Any): - """Set configuration value using dot notation.""" - keys = path.split('.') - current = self.config - - for key in keys[:-1]: - if key not in current: - current[key] = {} - current = current[key] - - current[keys[-1]] = value - - def get_exclude_patterns(self) -> List[str]: - """Get all exclude patterns including gitignore patterns.""" - return self.config.get('exclude_patterns', []) - - def get_file_extensions(self) -> Dict[str, List[str]]: - """Get file extension mappings.""" - return self.config.get('file_extensions', {}) - - def is_embedding_enabled(self) -> bool: - """Check if embedding functionality is enabled.""" - return self.config.get('embedding', {}).get('enabled', False) - - def get_cache_dir(self) -> str: - """Get cache directory path.""" - return self.config.get('embedding', {}).get('cache_dir', 'cache') - - def to_dict(self) -> Dict[str, Any]: - """Return configuration as dictionary.""" - return self.config.copy() - - def reload(self, config_path: Optional[str] = None): - """Reload configuration from file.""" - self._initialized = False - if config_path: - self.config_path = config_path - self.__init__(self.config_path) - - -# Global configuration instance -_global_config = None - - -def get_config(config_path: Optional[str] = None) -> Config: - """Get global configuration instance.""" - global _global_config - if _global_config is None: - _global_config = Config(config_path) - return _global_config - - -if __name__ == "__main__": - # Test configuration loading - config = Config() - print("Configuration loaded successfully!") - print(f"Cache dir: {config.get_cache_dir()}") - print(f"Exclude patterns: {len(config.get_exclude_patterns())}") - print(f"Embedding enabled: {config.is_embedding_enabled()}") \ No newline at end of file diff --git a/.claude/python_script/core/context_analyzer.py b/.claude/python_script/core/context_analyzer.py deleted file mode 100644 index bf3ca0d3..00000000 --- a/.claude/python_script/core/context_analyzer.py +++ /dev/null @@ -1,359 +0,0 @@ -#!/usr/bin/env python3 -""" -Context Analyzer Module for UltraThink Path-Aware Analyzer -Analyzes user prompts to extract relevant context and keywords. -""" - -import re -import logging -from typing import Dict, List, Set, Tuple, Optional -from dataclasses import dataclass -from collections import Counter -import string - -@dataclass -class AnalysisResult: - """Results of context analysis.""" - keywords: List[str] - domains: List[str] - languages: List[str] - file_patterns: List[str] - confidence_scores: Dict[str, float] - extracted_entities: Dict[str, List[str]] - -class ContextAnalyzer: - """Analyzes user prompts to understand context and intent.""" - - def __init__(self, config: Dict): - self.config = config - self.logger = logging.getLogger(__name__) - - # Load domain and language mappings from config - self.domain_keywords = config.get('context_analysis', {}).get('domain_keywords', {}) - self.language_indicators = config.get('context_analysis', {}).get('language_indicators', {}) - - # Common programming terms and patterns - self.technical_terms = self._build_technical_terms() - self.file_pattern_indicators = self._build_pattern_indicators() - - # Stop words to filter out - self.stop_words = { - 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', - 'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', - 'above', 'below', 'between', 'among', 'as', 'is', 'are', 'was', 'were', 'be', - 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', - 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', - 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', - 'us', 'them', 'my', 'your', 'his', 'its', 'our', 'their' - } - - def _build_technical_terms(self) -> Dict[str, List[str]]: - """Build comprehensive list of technical terms grouped by category.""" - return { - 'authentication': [ - 'auth', 'authentication', 'login', 'logout', 'signin', 'signout', - 'user', 'password', 'token', 'jwt', 'oauth', 'session', 'cookie', - 'credential', 'authorize', 'permission', 'role', 'access' - ], - 'database': [ - 'database', 'db', 'sql', 'query', 'table', 'schema', 'migration', - 'model', 'orm', 'entity', 'relation', 'index', 'transaction', - 'crud', 'select', 'insert', 'update', 'delete', 'join' - ], - 'api': [ - 'api', 'rest', 'graphql', 'endpoint', 'route', 'controller', - 'handler', 'middleware', 'service', 'request', 'response', - 'http', 'get', 'post', 'put', 'delete', 'patch' - ], - 'frontend': [ - 'ui', 'component', 'view', 'template', 'page', 'layout', - 'style', 'css', 'html', 'javascript', 'react', 'vue', - 'angular', 'dom', 'event', 'state', 'props' - ], - 'backend': [ - 'server', 'service', 'business', 'logic', 'core', 'engine', - 'worker', 'job', 'queue', 'cache', 'redis', 'memcache' - ], - 'testing': [ - 'test', 'testing', 'spec', 'unit', 'integration', 'e2e', - 'mock', 'stub', 'fixture', 'assert', 'expect', 'should' - ], - 'configuration': [ - 'config', 'configuration', 'setting', 'environment', 'env', - 'variable', 'constant', 'parameter', 'option' - ], - 'utility': [ - 'util', 'utility', 'helper', 'common', 'shared', 'lib', - 'library', 'tool', 'function', 'method' - ] - } - - def _build_pattern_indicators(self) -> Dict[str, List[str]]: - """Build indicators that suggest specific file patterns.""" - return { - 'source_code': ['implement', 'code', 'function', 'class', 'method'], - 'tests': ['test', 'testing', 'spec', 'unittest', 'pytest'], - 'documentation': ['doc', 'readme', 'guide', 'documentation', 'manual'], - 'configuration': ['config', 'setting', 'env', 'environment'], - 'build': ['build', 'compile', 'package', 'deploy', 'release'], - 'scripts': ['script', 'automation', 'tool', 'utility'] - } - - def extract_keywords(self, text: str) -> List[str]: - """Extract meaningful keywords from text.""" - # Clean and normalize text - text = text.lower() - text = re.sub(r'[^\w\s-]', ' ', text) # Remove punctuation except hyphens - words = text.split() - - # Filter stop words and short words - keywords = [] - for word in words: - word = word.strip('-') # Remove leading/trailing hyphens - if (len(word) >= 2 and - word not in self.stop_words and - not word.isdigit()): - keywords.append(word) - - # Count frequency and return top keywords - word_counts = Counter(keywords) - return [word for word, count in word_counts.most_common(20)] - - def identify_domains(self, keywords: List[str]) -> List[Tuple[str, float]]: - """Identify relevant domains based on keywords.""" - domain_scores = {} - - for domain, domain_keywords in self.domain_keywords.items(): - score = 0.0 - matched_keywords = [] - - for keyword in keywords: - for domain_keyword in domain_keywords: - if keyword in domain_keyword or domain_keyword in keyword: - score += 1.0 - matched_keywords.append(keyword) - break - - if score > 0: - # Normalize score by number of domain keywords - normalized_score = score / len(domain_keywords) - domain_scores[domain] = normalized_score - - # Also check technical terms - for category, terms in self.technical_terms.items(): - score = 0.0 - for keyword in keywords: - for term in terms: - if keyword in term or term in keyword: - score += 1.0 - break - - if score > 0: - normalized_score = score / len(terms) - if category not in domain_scores: - domain_scores[category] = normalized_score - else: - domain_scores[category] = max(domain_scores[category], normalized_score) - - # Sort by score and return top domains - sorted_domains = sorted(domain_scores.items(), key=lambda x: x[1], reverse=True) - return sorted_domains[:5] - - def identify_languages(self, keywords: List[str]) -> List[Tuple[str, float]]: - """Identify programming languages based on keywords.""" - language_scores = {} - - for language, indicators in self.language_indicators.items(): - score = 0.0 - for keyword in keywords: - for indicator in indicators: - if keyword in indicator or indicator in keyword: - score += 1.0 - break - - if score > 0: - normalized_score = score / len(indicators) - language_scores[language] = normalized_score - - sorted_languages = sorted(language_scores.items(), key=lambda x: x[1], reverse=True) - return sorted_languages[:3] - - def extract_file_patterns(self, text: str) -> List[str]: - """Extract explicit file patterns from text.""" - patterns = [] - - # Look for @{pattern} syntax - at_patterns = re.findall(r'@\{([^}]+)\}', text) - patterns.extend(at_patterns) - - # Look for file extensions - extensions = re.findall(r'\*\.(\w+)', text) - for ext in extensions: - patterns.append(f"*.{ext}") - - # Look for directory patterns - dir_patterns = re.findall(r'(\w+)/\*\*?', text) - for dir_pattern in dir_patterns: - patterns.append(f"{dir_pattern}/**/*") - - # Look for specific file names - file_patterns = re.findall(r'\b(\w+\.\w+)\b', text) - for file_pattern in file_patterns: - if '.' in file_pattern: - patterns.append(file_pattern) - - return list(set(patterns)) # Remove duplicates - - def suggest_patterns_from_domains(self, domains: List[str]) -> List[str]: - """Suggest file patterns based on identified domains.""" - patterns = [] - - domain_to_patterns = { - 'auth': ['**/auth/**/*', '**/login/**/*', '**/user/**/*'], - 'authentication': ['**/auth/**/*', '**/login/**/*', '**/user/**/*'], - 'database': ['**/db/**/*', '**/model/**/*', '**/migration/**/*', '**/*model*'], - 'api': ['**/api/**/*', '**/route/**/*', '**/controller/**/*', '**/handler/**/*'], - 'frontend': ['**/ui/**/*', '**/component/**/*', '**/view/**/*', '**/template/**/*'], - 'backend': ['**/service/**/*', '**/core/**/*', '**/server/**/*'], - 'test': ['**/test/**/*', '**/spec/**/*', '**/*test*', '**/*spec*'], - 'testing': ['**/test/**/*', '**/spec/**/*', '**/*test*', '**/*spec*'], - 'config': ['**/config/**/*', '**/*.config.*', '**/env/**/*'], - 'configuration': ['**/config/**/*', '**/*.config.*', '**/env/**/*'], - 'util': ['**/util/**/*', '**/helper/**/*', '**/common/**/*'], - 'utility': ['**/util/**/*', '**/helper/**/*', '**/common/**/*'] - } - - for domain in domains: - if domain in domain_to_patterns: - patterns.extend(domain_to_patterns[domain]) - - return list(set(patterns)) # Remove duplicates - - def extract_entities(self, text: str) -> Dict[str, List[str]]: - """Extract named entities from text.""" - entities = { - 'files': [], - 'functions': [], - 'classes': [], - 'variables': [], - 'technologies': [] - } - - # File patterns - file_patterns = re.findall(r'\b(\w+\.\w+)\b', text) - entities['files'] = list(set(file_patterns)) - - # Function patterns (camelCase or snake_case followed by parentheses) - function_patterns = re.findall(r'\b([a-z][a-zA-Z0-9_]*)\s*\(', text) - entities['functions'] = list(set(function_patterns)) - - # Class patterns (PascalCase) - class_patterns = re.findall(r'\b([A-Z][a-zA-Z0-9]*)\b', text) - entities['classes'] = list(set(class_patterns)) - - # Technology mentions - tech_keywords = [ - 'react', 'vue', 'angular', 'node', 'express', 'django', 'flask', - 'spring', 'rails', 'laravel', 'docker', 'kubernetes', 'aws', - 'azure', 'gcp', 'postgresql', 'mysql', 'mongodb', 'redis' - ] - text_lower = text.lower() - for tech in tech_keywords: - if tech in text_lower: - entities['technologies'].append(tech) - - return entities - - def analyze(self, prompt: str) -> AnalysisResult: - """Perform comprehensive analysis of the user prompt.""" - self.logger.debug(f"Analyzing prompt: {prompt[:100]}...") - - # Extract keywords - keywords = self.extract_keywords(prompt) - - # Identify domains and languages - domains_with_scores = self.identify_domains(keywords) - languages_with_scores = self.identify_languages(keywords) - - # Extract patterns and entities - explicit_patterns = self.extract_file_patterns(prompt) - entities = self.extract_entities(prompt) - - # Get top domains and languages - domains = [domain for domain, score in domains_with_scores] - languages = [lang for lang, score in languages_with_scores] - - # Suggest additional patterns based on domains - suggested_patterns = self.suggest_patterns_from_domains(domains) - - # Combine explicit and suggested patterns - all_patterns = list(set(explicit_patterns + suggested_patterns)) - - # Build confidence scores - confidence_scores = { - 'keywords': len(keywords) / 20, # Normalize to 0-1 - 'domain_match': max([score for _, score in domains_with_scores[:1]], default=0), - 'language_match': max([score for _, score in languages_with_scores[:1]], default=0), - 'pattern_extraction': len(explicit_patterns) / 5, # Normalize to 0-1 - } - - result = AnalysisResult( - keywords=keywords, - domains=domains, - languages=languages, - file_patterns=all_patterns, - confidence_scores=confidence_scores, - extracted_entities=entities - ) - - self.logger.info(f"Analysis complete: {len(domains)} domains, {len(languages)} languages, {len(all_patterns)} patterns") - return result - -def main(): - """Command-line interface for context analyzer.""" - import yaml - import argparse - import json - - parser = argparse.ArgumentParser(description="Context Analyzer for UltraThink") - parser.add_argument("prompt", help="Prompt to analyze") - parser.add_argument("--config", default="config.yaml", help="Configuration file path") - parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") - - args = parser.parse_args() - - # Setup logging - level = logging.DEBUG if args.verbose else logging.INFO - logging.basicConfig(level=level, format='%(levelname)s: %(message)s') - - # Load configuration - from pathlib import Path - config_path = Path(__file__).parent / args.config - with open(config_path, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) - - # Create analyzer - analyzer = ContextAnalyzer(config) - - # Analyze prompt - result = analyzer.analyze(args.prompt) - - # Output results - print(f"Keywords: {', '.join(result.keywords[:10])}") - print(f"Domains: {', '.join(result.domains[:5])}") - print(f"Languages: {', '.join(result.languages[:3])}") - print(f"Patterns: {', '.join(result.file_patterns[:10])}") - - if args.verbose: - print("\nDetailed Results:") - print(json.dumps({ - 'keywords': result.keywords, - 'domains': result.domains, - 'languages': result.languages, - 'file_patterns': result.file_patterns, - 'confidence_scores': result.confidence_scores, - 'extracted_entities': result.extracted_entities - }, indent=2)) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/core/embedding_manager.py b/.claude/python_script/core/embedding_manager.py deleted file mode 100644 index 67ed1652..00000000 --- a/.claude/python_script/core/embedding_manager.py +++ /dev/null @@ -1,458 +0,0 @@ -#!/usr/bin/env python3 -""" -Embedding Manager Module for UltraThink Path-Aware Analyzer -Manages embeddings for semantic similarity search (RAG functionality). -""" - -import os -import json -import hashlib -import logging -import pickle -from pathlib import Path -from typing import Dict, List, Tuple, Optional, Any -from dataclasses import dataclass -import time - -# Optional imports for embedding functionality -try: - import numpy as np - NUMPY_AVAILABLE = True -except ImportError: - NUMPY_AVAILABLE = False - -try: - from sentence_transformers import SentenceTransformer - SENTENCE_TRANSFORMERS_AVAILABLE = True -except ImportError: - SENTENCE_TRANSFORMERS_AVAILABLE = False - -from .file_indexer import FileInfo - -@dataclass -class EmbeddingInfo: - """Information about a file's embedding.""" - file_path: str - content_hash: str - embedding_hash: str - created_time: float - vector_size: int - -@dataclass -class SimilarityResult: - """Result of similarity search.""" - file_info: FileInfo - similarity_score: float - matching_content: str - -class EmbeddingManager: - """Manages embeddings for semantic file matching.""" - - def __init__(self, config: Dict): - self.config = config - self.logger = logging.getLogger(__name__) - - # Check if embeddings are enabled - self.enabled = config.get('embedding', {}).get('enabled', False) - if not self.enabled: - self.logger.info("Embeddings disabled in configuration") - return - - # Check dependencies - if not NUMPY_AVAILABLE: - self.logger.warning("NumPy not available, disabling embeddings") - self.enabled = False - return - - if not SENTENCE_TRANSFORMERS_AVAILABLE: - self.logger.warning("sentence-transformers not available, disabling embeddings") - self.enabled = False - return - - # Load configuration - self.model_name = config.get('embedding', {}).get('model', 'all-MiniLM-L6-v2') - self.cache_dir = Path(config.get('embedding', {}).get('cache_dir', '.claude/cache/embeddings')) - self.similarity_threshold = config.get('embedding', {}).get('similarity_threshold', 0.6) - self.max_context_length = config.get('embedding', {}).get('max_context_length', 512) - self.batch_size = config.get('embedding', {}).get('batch_size', 32) - self.trust_remote_code = config.get('embedding', {}).get('trust_remote_code', False) - - # Setup cache directories - self.cache_dir.mkdir(parents=True, exist_ok=True) - self.embeddings_file = self.cache_dir / "embeddings.pkl" - self.index_file = self.cache_dir / "embedding_index.json" - - # Initialize model lazily - self._model = None - self._embeddings_cache = None - self._embedding_index = None - - @property - def model(self): - """Lazy load the embedding model.""" - if not self.enabled: - return None - - if self._model is None: - try: - self.logger.info(f"Loading embedding model: {self.model_name}") - # Initialize with trust_remote_code for CodeSage V2 - if self.trust_remote_code: - self._model = SentenceTransformer(self.model_name, trust_remote_code=True) - else: - self._model = SentenceTransformer(self.model_name) - self.logger.info(f"Model loaded successfully") - except Exception as e: - self.logger.error(f"Failed to load embedding model: {e}") - self.enabled = False - return None - - return self._model - - def embeddings_exist(self) -> bool: - """Check if embeddings cache exists.""" - return self.embeddings_file.exists() and self.index_file.exists() - - def _load_embedding_cache(self) -> Dict[str, np.ndarray]: - """Load embeddings from cache.""" - if self._embeddings_cache is not None: - return self._embeddings_cache - - if not self.embeddings_file.exists(): - self._embeddings_cache = {} - return self._embeddings_cache - - try: - with open(self.embeddings_file, 'rb') as f: - self._embeddings_cache = pickle.load(f) - self.logger.debug(f"Loaded {len(self._embeddings_cache)} embeddings from cache") - except Exception as e: - self.logger.warning(f"Failed to load embeddings cache: {e}") - self._embeddings_cache = {} - - return self._embeddings_cache - - def _save_embedding_cache(self): - """Save embeddings to cache.""" - if self._embeddings_cache is None: - return - - try: - with open(self.embeddings_file, 'wb') as f: - pickle.dump(self._embeddings_cache, f) - self.logger.debug(f"Saved {len(self._embeddings_cache)} embeddings to cache") - except Exception as e: - self.logger.error(f"Failed to save embeddings cache: {e}") - - def _load_embedding_index(self) -> Dict[str, EmbeddingInfo]: - """Load embedding index.""" - if self._embedding_index is not None: - return self._embedding_index - - if not self.index_file.exists(): - self._embedding_index = {} - return self._embedding_index - - try: - with open(self.index_file, 'r', encoding='utf-8') as f: - data = json.load(f) - self._embedding_index = {} - for path, info_dict in data.items(): - self._embedding_index[path] = EmbeddingInfo(**info_dict) - self.logger.debug(f"Loaded embedding index with {len(self._embedding_index)} entries") - except Exception as e: - self.logger.warning(f"Failed to load embedding index: {e}") - self._embedding_index = {} - - return self._embedding_index - - def _save_embedding_index(self): - """Save embedding index.""" - if self._embedding_index is None: - return - - try: - data = {} - for path, info in self._embedding_index.items(): - data[path] = { - 'file_path': info.file_path, - 'content_hash': info.content_hash, - 'embedding_hash': info.embedding_hash, - 'created_time': info.created_time, - 'vector_size': info.vector_size - } - - with open(self.index_file, 'w', encoding='utf-8') as f: - json.dump(data, f, indent=2) - self.logger.debug(f"Saved embedding index with {len(self._embedding_index)} entries") - except Exception as e: - self.logger.error(f"Failed to save embedding index: {e}") - - def _extract_text_content(self, file_info: FileInfo) -> Optional[str]: - """Extract text content from a file for embedding.""" - try: - file_path = Path(file_info.path) - - # Skip binary files and very large files - if file_info.size > self.config.get('performance', {}).get('max_file_size', 10485760): - return None - - # Only process text-based files - text_extensions = {'.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h', - '.rs', '.go', '.php', '.rb', '.sh', '.bash', '.md', '.txt', '.json', - '.yaml', '.yml', '.xml', '.html', '.css', '.scss', '.sass'} - - if file_info.extension.lower() not in text_extensions: - return None - - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: - content = f.read() - - # Truncate content if too long (CodeSage V2 supports longer contexts) - if len(content) > self.max_context_length * 4: # Approximate token limit - content = content[:self.max_context_length * 4] - - return content - - except Exception as e: - self.logger.debug(f"Could not extract content from {file_info.path}: {e}") - return None - - def _create_embedding(self, text: str) -> Optional[np.ndarray]: - """Create embedding for text content.""" - if not self.enabled or self.model is None: - return None - - try: - # Truncate text if needed - if len(text) > self.max_context_length * 4: - text = text[:self.max_context_length * 4] - - embedding = self.model.encode([text])[0] - return embedding - - except Exception as e: - self.logger.warning(f"Failed to create embedding: {e}") - return None - - def _get_content_hash(self, content: str) -> str: - """Get hash of content for caching.""" - return hashlib.md5(content.encode('utf-8')).hexdigest() - - def _get_embedding_hash(self, embedding: np.ndarray) -> str: - """Get hash of embedding for verification.""" - return hashlib.md5(embedding.tobytes()).hexdigest() - - def update_embeddings(self, file_index: Dict[str, FileInfo], force_rebuild: bool = False) -> int: - """Update embeddings for files in the index.""" - if not self.enabled: - self.logger.info("Embeddings disabled, skipping update") - return 0 - - self.logger.info("Updating embeddings...") - - # Load caches - embeddings_cache = self._load_embedding_cache() - embedding_index = self._load_embedding_index() - - new_embeddings = 0 - batch_texts = [] - batch_paths = [] - - for file_path, file_info in file_index.items(): - # Check if embedding exists and is current - if not force_rebuild and file_path in embedding_index: - cached_info = embedding_index[file_path] - if cached_info.content_hash == file_info.content_hash: - continue # Embedding is current - - # Extract content - content = self._extract_text_content(file_info) - if content is None: - continue - - # Prepare for batch processing - batch_texts.append(content) - batch_paths.append(file_path) - - # Process batch when full - if len(batch_texts) >= self.batch_size: - self._process_batch(batch_texts, batch_paths, file_index, embeddings_cache, embedding_index) - new_embeddings += len(batch_texts) - batch_texts = [] - batch_paths = [] - - # Process remaining batch - if batch_texts: - self._process_batch(batch_texts, batch_paths, file_index, embeddings_cache, embedding_index) - new_embeddings += len(batch_texts) - - # Save caches - self._save_embedding_cache() - self._save_embedding_index() - - self.logger.info(f"Updated {new_embeddings} embeddings") - return new_embeddings - - def _process_batch(self, texts: List[str], paths: List[str], file_index: Dict[str, FileInfo], - embeddings_cache: Dict[str, np.ndarray], embedding_index: Dict[str, EmbeddingInfo]): - """Process a batch of texts for embedding.""" - try: - # Create embeddings for batch - embeddings = self.model.encode(texts) - - for i, (text, path) in enumerate(zip(texts, paths)): - embedding = embeddings[i] - file_info = file_index[path] - - # Store embedding - content_hash = self._get_content_hash(text) - embedding_hash = self._get_embedding_hash(embedding) - - embeddings_cache[path] = embedding - embedding_index[path] = EmbeddingInfo( - file_path=path, - content_hash=content_hash, - embedding_hash=embedding_hash, - created_time=time.time(), - vector_size=len(embedding) - ) - - except Exception as e: - self.logger.error(f"Failed to process embedding batch: {e}") - - def find_similar_files(self, query: str, file_index: Dict[str, FileInfo], - top_k: int = 20) -> List[SimilarityResult]: - """Find files similar to the query using embeddings.""" - if not self.enabled: - return [] - - # Create query embedding - query_embedding = self._create_embedding(query) - if query_embedding is None: - return [] - - # Load embeddings - embeddings_cache = self._load_embedding_cache() - if not embeddings_cache: - self.logger.warning("No embeddings available for similarity search") - return [] - - # Calculate similarities - similarities = [] - for file_path, file_embedding in embeddings_cache.items(): - if file_path not in file_index: - continue - - try: - # Calculate cosine similarity - similarity = np.dot(query_embedding, file_embedding) / ( - np.linalg.norm(query_embedding) * np.linalg.norm(file_embedding) - ) - - if similarity >= self.similarity_threshold: - similarities.append((file_path, similarity)) - - except Exception as e: - self.logger.debug(f"Failed to calculate similarity for {file_path}: {e}") - continue - - # Sort by similarity - similarities.sort(key=lambda x: x[1], reverse=True) - - # Create results - results = [] - for file_path, similarity in similarities[:top_k]: - file_info = file_index[file_path] - - # Extract a snippet of matching content - content = self._extract_text_content(file_info) - snippet = content[:200] + "..." if content and len(content) > 200 else content or "" - - result = SimilarityResult( - file_info=file_info, - similarity_score=similarity, - matching_content=snippet - ) - results.append(result) - - self.logger.info(f"Found {len(results)} similar files for query") - return results - - def get_stats(self) -> Dict[str, Any]: - """Get statistics about the embedding cache.""" - if not self.enabled: - return {'enabled': False} - - embedding_index = self._load_embedding_index() - embeddings_cache = self._load_embedding_cache() - - return { - 'enabled': True, - 'model_name': self.model_name, - 'total_embeddings': len(embedding_index), - 'cache_size_mb': os.path.getsize(self.embeddings_file) / 1024 / 1024 if self.embeddings_file.exists() else 0, - 'similarity_threshold': self.similarity_threshold, - 'vector_size': list(embedding_index.values())[0].vector_size if embedding_index else 0 - } - -def main(): - """Command-line interface for embedding manager.""" - import yaml - import argparse - from .file_indexer import FileIndexer - - parser = argparse.ArgumentParser(description="Embedding Manager for UltraThink") - parser.add_argument("--config", default="config.yaml", help="Configuration file path") - parser.add_argument("--update", action="store_true", help="Update embeddings") - parser.add_argument("--rebuild", action="store_true", help="Force rebuild all embeddings") - parser.add_argument("--query", help="Search for similar files") - parser.add_argument("--stats", action="store_true", help="Show embedding statistics") - parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") - - args = parser.parse_args() - - # Setup logging - level = logging.DEBUG if args.verbose else logging.INFO - logging.basicConfig(level=level, format='%(levelname)s: %(message)s') - - # Load configuration - config_path = Path(__file__).parent / args.config - with open(config_path, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) - - # Create components - indexer = FileIndexer(config) - embedding_manager = EmbeddingManager(config) - - if not embedding_manager.enabled: - print("Embeddings are disabled. Enable in config.yaml or install required dependencies.") - return - - # Load file index - file_index = indexer.load_index() - if not file_index: - print("Building file index...") - file_index = indexer.build_index() - - if args.stats: - stats = embedding_manager.get_stats() - print("Embedding Statistics:") - for key, value in stats.items(): - print(f" {key}: {value}") - return - - if args.update or args.rebuild: - count = embedding_manager.update_embeddings(file_index, force_rebuild=args.rebuild) - print(f"Updated {count} embeddings") - - if args.query: - results = embedding_manager.find_similar_files(args.query, file_index) - print(f"Found {len(results)} similar files:") - for result in results: - print(f" {result.file_info.relative_path} (similarity: {result.similarity_score:.3f})") - if args.verbose and result.matching_content: - print(f" Content: {result.matching_content[:100]}...") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/core/file_indexer.py b/.claude/python_script/core/file_indexer.py deleted file mode 100644 index 83dcd290..00000000 --- a/.claude/python_script/core/file_indexer.py +++ /dev/null @@ -1,383 +0,0 @@ -#!/usr/bin/env python3 -""" -File Indexer Module for UltraThink Path-Aware Analyzer -Builds and maintains an index of repository files with metadata. -Enhanced with gitignore support and unified configuration. -""" - -import os -import hashlib -import json -import time -import logging -from pathlib import Path -from typing import Dict, List, Optional, Set, Tuple, Union -from dataclasses import dataclass, asdict -from datetime import datetime -import fnmatch - -from .gitignore_parser import GitignoreParser - -@dataclass -class FileInfo: - """Information about a single file in the repository.""" - path: str - relative_path: str - size: int - modified_time: float - extension: str - category: str # code, docs, config, web - estimated_tokens: int - content_hash: str - - def to_dict(self) -> Dict: - return asdict(self) - - @classmethod - def from_dict(cls, data: Dict) -> 'FileInfo': - return cls(**data) - -@dataclass -class IndexStats: - """Statistics about the file index.""" - total_files: int - total_tokens: int - total_size: int - categories: Dict[str, int] - last_updated: float - - def to_dict(self) -> Dict: - return asdict(self) - -class FileIndexer: - """Builds and maintains an efficient index of repository files.""" - - def __init__(self, config: Union['Config', Dict], root_path: str = "."): - # Support both Config object and Dict for backward compatibility - if hasattr(config, 'to_dict'): - self.config_obj = config - self.config = config.to_dict() - else: - self.config_obj = None - self.config = config - - self.root_path = Path(root_path).resolve() - self.cache_dir = Path(self.config.get('embedding', {}).get('cache_dir', '.claude/cache')) - self.cache_dir.mkdir(parents=True, exist_ok=True) - self.index_file = self.cache_dir / "file_index.json" - - # Setup logging - self.logger = logging.getLogger(__name__) - - # File extension mappings - self.extension_categories = self._build_extension_map() - - # Exclude patterns from config - self.exclude_patterns = list(self.config.get('exclude_patterns', [])) - - # Initialize gitignore parser and add patterns - self.gitignore_parser = GitignoreParser(str(self.root_path)) - self._load_gitignore_patterns() - - # Performance settings - self.max_file_size = self.config.get('performance', {}).get('max_file_size', 10485760) - - def _build_extension_map(self) -> Dict[str, str]: - """Build mapping from file extensions to categories.""" - ext_map = {} - for category, extensions in self.config.get('file_extensions', {}).items(): - for ext in extensions: - ext_map[ext.lower()] = category - return ext_map - - def _load_gitignore_patterns(self): - """Load patterns from .gitignore files and add to exclude_patterns.""" - try: - gitignore_patterns = self.gitignore_parser.parse_all_gitignores() - - if gitignore_patterns: - # Avoid duplicates - existing_patterns = set(self.exclude_patterns) - new_patterns = [p for p in gitignore_patterns if p not in existing_patterns] - - self.exclude_patterns.extend(new_patterns) - self.logger.info(f"Added {len(new_patterns)} patterns from .gitignore files") - - except Exception as e: - self.logger.warning(f"Failed to load .gitignore patterns: {e}") - - def _should_exclude_file(self, file_path: Path) -> bool: - """Check if file should be excluded based on patterns and gitignore rules.""" - relative_path = str(file_path.relative_to(self.root_path)) - - # Check against exclude patterns from config - for pattern in self.exclude_patterns: - # Convert pattern to work with fnmatch - if fnmatch.fnmatch(relative_path, pattern) or fnmatch.fnmatch(str(file_path), pattern): - return True - - # Check if any parent directory matches - parts = relative_path.split(os.sep) - for i in range(len(parts)): - partial_path = "/".join(parts[:i+1]) - if fnmatch.fnmatch(partial_path, pattern): - return True - - # Also check gitignore rules using dedicated parser - # Note: gitignore patterns are already included in self.exclude_patterns - # but we can add additional gitignore-specific checking here if needed - try: - # The gitignore patterns are already loaded into exclude_patterns, - # but we can do additional gitignore-specific checks if needed - pass - except Exception as e: - self.logger.debug(f"Error in gitignore checking for {file_path}: {e}") - - return False - - def _estimate_tokens(self, file_path: Path) -> int: - """Estimate token count for a file (chars/4 approximation).""" - try: - if file_path.stat().st_size > self.max_file_size: - return file_path.stat().st_size // 8 # Penalty for large files - - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: - content = f.read() - return len(content) // 4 # Rough approximation - except (UnicodeDecodeError, OSError): - # Binary files or unreadable files - return file_path.stat().st_size // 8 - - def _get_file_hash(self, file_path: Path) -> str: - """Get a hash of file metadata for change detection.""" - stat = file_path.stat() - return hashlib.md5(f"{file_path}:{stat.st_size}:{stat.st_mtime}".encode()).hexdigest() - - def _categorize_file(self, file_path: Path) -> str: - """Categorize file based on extension.""" - extension = file_path.suffix.lower() - return self.extension_categories.get(extension, 'other') - - def _scan_file(self, file_path: Path) -> Optional[FileInfo]: - """Scan a single file and create FileInfo.""" - try: - if not file_path.is_file() or self._should_exclude_file(file_path): - return None - - stat = file_path.stat() - relative_path = str(file_path.relative_to(self.root_path)) - - file_info = FileInfo( - path=str(file_path), - relative_path=relative_path, - size=stat.st_size, - modified_time=stat.st_mtime, - extension=file_path.suffix.lower(), - category=self._categorize_file(file_path), - estimated_tokens=self._estimate_tokens(file_path), - content_hash=self._get_file_hash(file_path) - ) - - return file_info - - except (OSError, PermissionError) as e: - self.logger.warning(f"Could not scan file {file_path}: {e}") - return None - - def build_index(self, force_rebuild: bool = False) -> Dict[str, FileInfo]: - """Build or update the file index.""" - self.logger.info(f"Building file index for {self.root_path}") - - # Load existing index if available - existing_index = {} - if not force_rebuild and self.index_file.exists(): - existing_index = self.load_index() - - new_index = {} - changed_files = 0 - - # Walk through all files - for file_path in self.root_path.rglob('*'): - if not file_path.is_file(): - continue - - file_info = self._scan_file(file_path) - if file_info is None: - continue - - # Check if file has changed - relative_path = file_info.relative_path - if relative_path in existing_index: - old_info = existing_index[relative_path] - if old_info.content_hash == file_info.content_hash: - # File unchanged, keep old info - new_index[relative_path] = old_info - continue - - # File is new or changed - new_index[relative_path] = file_info - changed_files += 1 - - self.logger.info(f"Indexed {len(new_index)} files ({changed_files} new/changed)") - - # Save index - self.save_index(new_index) - - return new_index - - def load_index(self) -> Dict[str, FileInfo]: - """Load file index from cache.""" - if not self.index_file.exists(): - return {} - - try: - with open(self.index_file, 'r', encoding='utf-8') as f: - data = json.load(f) - index = {} - for path, info_dict in data.get('files', {}).items(): - index[path] = FileInfo.from_dict(info_dict) - return index - except (json.JSONDecodeError, KeyError) as e: - self.logger.warning(f"Could not load index: {e}") - return {} - - def save_index(self, index: Dict[str, FileInfo]) -> None: - """Save file index to cache.""" - try: - # Calculate stats - stats = self._calculate_stats(index) - - data = { - 'stats': stats.to_dict(), - 'files': {path: info.to_dict() for path, info in index.items()} - } - - with open(self.index_file, 'w', encoding='utf-8') as f: - json.dump(data, f, indent=2) - - except OSError as e: - self.logger.error(f"Could not save index: {e}") - - def _calculate_stats(self, index: Dict[str, FileInfo]) -> IndexStats: - """Calculate statistics for the index.""" - total_files = len(index) - total_tokens = sum(info.estimated_tokens for info in index.values()) - total_size = sum(info.size for info in index.values()) - - categories = {} - for info in index.values(): - categories[info.category] = categories.get(info.category, 0) + 1 - - return IndexStats( - total_files=total_files, - total_tokens=total_tokens, - total_size=total_size, - categories=categories, - last_updated=time.time() - ) - - def get_stats(self) -> Optional[IndexStats]: - """Get statistics about the current index.""" - if not self.index_file.exists(): - return None - - try: - with open(self.index_file, 'r', encoding='utf-8') as f: - data = json.load(f) - return IndexStats(**data.get('stats', {})) - except (json.JSONDecodeError, KeyError): - return None - - def find_files_by_pattern(self, pattern: str, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]: - """Find files matching a glob pattern.""" - if index is None: - index = self.load_index() - - matching_files = [] - for path, info in index.items(): - if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(info.path, pattern): - matching_files.append(info) - - return matching_files - - def find_files_by_category(self, category: str, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]: - """Find files by category (code, docs, config, etc.).""" - if index is None: - index = self.load_index() - - return [info for info in index.values() if info.category == category] - - def find_files_by_keywords(self, keywords: List[str], index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]: - """Find files whose paths contain any of the specified keywords.""" - if index is None: - index = self.load_index() - - matching_files = [] - keywords_lower = [kw.lower() for kw in keywords] - - for info in index.values(): - path_lower = info.relative_path.lower() - if any(keyword in path_lower for keyword in keywords_lower): - matching_files.append(info) - - return matching_files - - def get_recent_files(self, limit: int = 20, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]: - """Get most recently modified files.""" - if index is None: - index = self.load_index() - - files = list(index.values()) - files.sort(key=lambda f: f.modified_time, reverse=True) - return files[:limit] - -def main(): - """Command-line interface for file indexer.""" - import yaml - import argparse - - parser = argparse.ArgumentParser(description="File Indexer for UltraThink") - parser.add_argument("--config", default="config.yaml", help="Configuration file path") - parser.add_argument("--rebuild", action="store_true", help="Force rebuild index") - parser.add_argument("--stats", action="store_true", help="Show index statistics") - parser.add_argument("--pattern", help="Find files matching pattern") - - args = parser.parse_args() - - # Load configuration - config_path = Path(__file__).parent / args.config - with open(config_path, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) - - # Setup logging - logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') - - # Create indexer - indexer = FileIndexer(config) - - if args.stats: - stats = indexer.get_stats() - if stats: - print(f"Total files: {stats.total_files}") - print(f"Total tokens: {stats.total_tokens:,}") - print(f"Total size: {stats.total_size:,} bytes") - print(f"Categories: {stats.categories}") - print(f"Last updated: {datetime.fromtimestamp(stats.last_updated)}") - else: - print("No index found. Run without --stats to build index.") - return - - # Build index - index = indexer.build_index(force_rebuild=args.rebuild) - - if args.pattern: - files = indexer.find_files_by_pattern(args.pattern, index) - print(f"Found {len(files)} files matching pattern '{args.pattern}':") - for file_info in files[:20]: # Limit output - print(f" {file_info.relative_path}") - else: - stats = indexer._calculate_stats(index) - print(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/core/gitignore_parser.py b/.claude/python_script/core/gitignore_parser.py deleted file mode 100644 index 549e0014..00000000 --- a/.claude/python_script/core/gitignore_parser.py +++ /dev/null @@ -1,182 +0,0 @@ -#!/usr/bin/env python3 -""" -GitIgnore Parser Module -Parses .gitignore files and converts rules to fnmatch patterns for file exclusion. -""" - -import os -import fnmatch -from pathlib import Path -from typing import List, Set, Optional - - -class GitignoreParser: - """Parser for .gitignore files that converts rules to fnmatch patterns.""" - - def __init__(self, root_path: str = "."): - self.root_path = Path(root_path).resolve() - self.patterns: List[str] = [] - self.negation_patterns: List[str] = [] - - def parse_file(self, gitignore_path: str) -> List[str]: - """Parse a .gitignore file and return exclude patterns.""" - gitignore_file = Path(gitignore_path) - if not gitignore_file.exists(): - return [] - - patterns = [] - try: - with open(gitignore_file, 'r', encoding='utf-8') as f: - for line_num, line in enumerate(f, 1): - pattern = self._parse_line(line.strip()) - if pattern: - patterns.append(pattern) - except (UnicodeDecodeError, IOError): - # Fallback to system encoding if UTF-8 fails - try: - with open(gitignore_file, 'r') as f: - for line_num, line in enumerate(f, 1): - pattern = self._parse_line(line.strip()) - if pattern: - patterns.append(pattern) - except IOError: - # If file can't be read, return empty list - return [] - - return patterns - - def _parse_line(self, line: str) -> Optional[str]: - """Parse a single line from .gitignore file.""" - # Skip empty lines and comments - if not line or line.startswith('#'): - return None - - # Handle negation patterns (starting with !) - if line.startswith('!'): - # For now, we'll skip negation patterns as they require - # more complex logic to implement correctly - return None - - # Convert gitignore pattern to fnmatch pattern - return self._convert_to_fnmatch(line) - - def _convert_to_fnmatch(self, pattern: str) -> str: - """Convert gitignore pattern to fnmatch pattern.""" - # Remove trailing slash (directory indicator) - if pattern.endswith('/'): - pattern = pattern[:-1] - - # Handle absolute paths (starting with /) - if pattern.startswith('/'): - pattern = pattern[1:] - # Make it match from root - return pattern - - # Handle patterns that should match anywhere in the tree - # If pattern doesn't contain '/', it matches files/dirs at any level - if '/' not in pattern: - return f"*/{pattern}" - - # Pattern contains '/', so it's relative to the gitignore location - return pattern - - def parse_all_gitignores(self, root_path: Optional[str] = None) -> List[str]: - """Parse all .gitignore files in the repository hierarchy.""" - if root_path: - self.root_path = Path(root_path).resolve() - - all_patterns = [] - - # Find all .gitignore files in the repository - gitignore_files = self._find_gitignore_files() - - for gitignore_file in gitignore_files: - patterns = self.parse_file(gitignore_file) - all_patterns.extend(patterns) - - return all_patterns - - def _find_gitignore_files(self) -> List[Path]: - """Find all .gitignore files in the repository.""" - gitignore_files = [] - - # Start with root .gitignore - root_gitignore = self.root_path / '.gitignore' - if root_gitignore.exists(): - gitignore_files.append(root_gitignore) - - # Find .gitignore files in subdirectories - try: - for gitignore_file in self.root_path.rglob('.gitignore'): - if gitignore_file != root_gitignore: - gitignore_files.append(gitignore_file) - except (PermissionError, OSError): - # Skip directories we can't access - pass - - return gitignore_files - - def should_exclude(self, file_path: str, gitignore_patterns: List[str]) -> bool: - """Check if a file should be excluded based on gitignore patterns.""" - # Convert to relative path from root - try: - rel_path = str(Path(file_path).relative_to(self.root_path)) - except ValueError: - # File is not under root path - return False - - # Normalize path separators for consistent matching - rel_path = rel_path.replace(os.sep, '/') - - for pattern in gitignore_patterns: - if self._matches_pattern(rel_path, pattern): - return True - - return False - - def _matches_pattern(self, file_path: str, pattern: str) -> bool: - """Check if a file path matches a gitignore pattern.""" - # Normalize pattern separators - pattern = pattern.replace(os.sep, '/') - - # Handle different pattern types - if pattern.startswith('*/'): - # Pattern like */pattern - matches at any level - sub_pattern = pattern[2:] - return fnmatch.fnmatch(file_path, f"*/{sub_pattern}") or fnmatch.fnmatch(file_path, sub_pattern) - elif '/' in pattern: - # Pattern contains slash - match exact path - return fnmatch.fnmatch(file_path, pattern) - else: - # Simple pattern - match filename or directory at any level - parts = file_path.split('/') - return any(fnmatch.fnmatch(part, pattern) for part in parts) - - -def parse_gitignore(gitignore_path: str) -> List[str]: - """Convenience function to parse a single .gitignore file.""" - parser = GitignoreParser() - return parser.parse_file(gitignore_path) - - -def get_all_gitignore_patterns(root_path: str = ".") -> List[str]: - """Convenience function to get all gitignore patterns in a repository.""" - parser = GitignoreParser(root_path) - return parser.parse_all_gitignores() - - -if __name__ == "__main__": - import sys - - if len(sys.argv) > 1: - gitignore_path = sys.argv[1] - patterns = parse_gitignore(gitignore_path) - print(f"Parsed {len(patterns)} patterns from {gitignore_path}:") - for pattern in patterns: - print(f" {pattern}") - else: - # Parse all .gitignore files in current directory - patterns = get_all_gitignore_patterns() - print(f"Found {len(patterns)} gitignore patterns:") - for pattern in patterns: - print(f" {pattern}") \ No newline at end of file diff --git a/.claude/python_script/core/path_matcher.py b/.claude/python_script/core/path_matcher.py deleted file mode 100644 index c410ef77..00000000 --- a/.claude/python_script/core/path_matcher.py +++ /dev/null @@ -1,500 +0,0 @@ -#!/usr/bin/env python3 -""" -Path Matcher Module for UltraThink Path-Aware Analyzer -Matches files to analysis context and ranks them by relevance. -""" - -import re -import logging -import fnmatch -from typing import Dict, List, Tuple, Optional, Set -from dataclasses import dataclass -from pathlib import Path -import math - -from .file_indexer import FileInfo -from .context_analyzer import AnalysisResult - -@dataclass -class MatchResult: - """Result of path matching with relevance score.""" - file_info: FileInfo - relevance_score: float - match_reasons: List[str] - category_bonus: float - -@dataclass -class PathMatchingResult: - """Complete result of path matching operation.""" - matched_files: List[MatchResult] - total_tokens: int - categories: Dict[str, int] - patterns_used: List[str] - confidence_score: float - -class PathMatcher: - """Matches files to analysis context using various algorithms.""" - - def __init__(self, config: Dict): - self.config = config - self.logger = logging.getLogger(__name__) - - # Load scoring weights - self.weights = config.get('path_matching', {}).get('weights', { - 'keyword_match': 0.4, - 'extension_match': 0.2, - 'directory_context': 0.2, - 'file_size_penalty': 0.1, - 'recency_bonus': 0.1 - }) - - # Load limits - self.max_files_per_category = config.get('path_matching', {}).get('max_files_per_category', 20) - self.min_relevance_score = config.get('path_matching', {}).get('min_relevance_score', 0.1) - self.max_total_files = config.get('output', {}).get('max_total_files', 50) - - # Load always include patterns - self.always_include = config.get('output', {}).get('always_include', []) - - # Category priorities - self.category_priorities = { - 'code': 1.0, - 'config': 0.8, - 'docs': 0.6, - 'web': 0.4, - 'other': 0.2 - } - - def _calculate_keyword_score(self, file_info: FileInfo, keywords: List[str]) -> Tuple[float, List[str]]: - """Calculate score based on keyword matches in file path.""" - if not keywords: - return 0.0, [] - - path_lower = file_info.relative_path.lower() - filename_lower = Path(file_info.relative_path).name.lower() - - matches = [] - score = 0.0 - - for keyword in keywords: - keyword_lower = keyword.lower() - - # Exact filename match (highest weight) - if keyword_lower in filename_lower: - score += 2.0 - matches.append(f"filename:{keyword}") - continue - - # Directory name match - if keyword_lower in path_lower: - score += 1.0 - matches.append(f"path:{keyword}") - continue - - # Partial match in path components - path_parts = path_lower.split('/') - for part in path_parts: - if keyword_lower in part: - score += 0.5 - matches.append(f"partial:{keyword}") - break - - # Normalize by number of keywords - normalized_score = score / len(keywords) if keywords else 0.0 - return min(normalized_score, 1.0), matches - - def _calculate_extension_score(self, file_info: FileInfo, languages: List[str]) -> float: - """Calculate score based on file extension relevance.""" - if not languages: - return 0.5 # Neutral score - - extension = file_info.extension.lower() - - # Language-specific extension mapping - lang_extensions = { - 'python': ['.py', '.pyx', '.pyi'], - 'javascript': ['.js', '.jsx', '.mjs'], - 'typescript': ['.ts', '.tsx'], - 'java': ['.java'], - 'go': ['.go'], - 'rust': ['.rs'], - 'cpp': ['.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'], - 'csharp': ['.cs'], - 'php': ['.php'], - 'ruby': ['.rb'], - 'shell': ['.sh', '.bash', '.zsh'] - } - - score = 0.0 - for language in languages: - if language in lang_extensions: - if extension in lang_extensions[language]: - score = 1.0 - break - - # Fallback to category-based scoring - if score == 0.0: - category_scores = { - 'code': 1.0, - 'config': 0.8, - 'docs': 0.6, - 'web': 0.4, - 'other': 0.2 - } - score = category_scores.get(file_info.category, 0.2) - - return score - - def _calculate_directory_score(self, file_info: FileInfo, domains: List[str]) -> Tuple[float, List[str]]: - """Calculate score based on directory context.""" - if not domains: - return 0.0, [] - - path_parts = file_info.relative_path.lower().split('/') - matches = [] - score = 0.0 - - # Domain-specific directory patterns - domain_patterns = { - 'auth': ['auth', 'authentication', 'login', 'user', 'account'], - 'authentication': ['auth', 'authentication', 'login', 'user', 'account'], - 'database': ['db', 'database', 'model', 'entity', 'migration', 'schema'], - 'api': ['api', 'rest', 'graphql', 'route', 'controller', 'handler'], - 'frontend': ['ui', 'component', 'view', 'template', 'client', 'web'], - 'backend': ['service', 'server', 'core', 'business', 'logic'], - 'test': ['test', 'spec', 'tests', '__tests__', 'testing'], - 'testing': ['test', 'spec', 'tests', '__tests__', 'testing'], - 'config': ['config', 'configuration', 'env', 'settings'], - 'configuration': ['config', 'configuration', 'env', 'settings'], - 'util': ['util', 'utils', 'helper', 'common', 'shared', 'lib'], - 'utility': ['util', 'utils', 'helper', 'common', 'shared', 'lib'] - } - - for domain in domains: - if domain in domain_patterns: - patterns = domain_patterns[domain] - for pattern in patterns: - for part in path_parts: - if pattern in part: - score += 1.0 - matches.append(f"dir:{domain}->{pattern}") - break - - # Normalize by number of domains - normalized_score = score / len(domains) if domains else 0.0 - return min(normalized_score, 1.0), matches - - def _calculate_size_penalty(self, file_info: FileInfo) -> float: - """Calculate penalty for very large files.""" - max_size = self.config.get('performance', {}).get('max_file_size', 10485760) # 10MB - - if file_info.size > max_size: - # Heavy penalty for oversized files - return -0.5 - elif file_info.size > max_size * 0.5: - # Light penalty for large files - return -0.2 - else: - return 0.0 - - def _calculate_recency_bonus(self, file_info: FileInfo) -> float: - """Calculate bonus for recently modified files.""" - import time - - current_time = time.time() - file_age = current_time - file_info.modified_time - - # Files modified in last day get bonus - if file_age < 86400: # 1 day - return 0.3 - elif file_age < 604800: # 1 week - return 0.1 - else: - return 0.0 - - def calculate_relevance_score(self, file_info: FileInfo, analysis: AnalysisResult) -> MatchResult: - """Calculate overall relevance score for a file.""" - # Calculate individual scores - keyword_score, keyword_matches = self._calculate_keyword_score(file_info, analysis.keywords) - extension_score = self._calculate_extension_score(file_info, analysis.languages) - directory_score, dir_matches = self._calculate_directory_score(file_info, analysis.domains) - size_penalty = self._calculate_size_penalty(file_info) - recency_bonus = self._calculate_recency_bonus(file_info) - - # Apply weights - weighted_score = ( - keyword_score * self.weights.get('keyword_match', 0.4) + - extension_score * self.weights.get('extension_match', 0.2) + - directory_score * self.weights.get('directory_context', 0.2) + - size_penalty * self.weights.get('file_size_penalty', 0.1) + - recency_bonus * self.weights.get('recency_bonus', 0.1) - ) - - # Category bonus - category_bonus = self.category_priorities.get(file_info.category, 0.2) - - # Final score with category bonus - final_score = weighted_score + (category_bonus * 0.1) - - # Collect match reasons - match_reasons = keyword_matches + dir_matches - if extension_score > 0.5: - match_reasons.append(f"extension:{file_info.extension}") - if recency_bonus > 0: - match_reasons.append("recent") - - return MatchResult( - file_info=file_info, - relevance_score=max(0.0, final_score), - match_reasons=match_reasons, - category_bonus=category_bonus - ) - - def match_by_patterns(self, file_index: Dict[str, FileInfo], patterns: List[str]) -> List[FileInfo]: - """Match files using explicit glob patterns.""" - matched_files = [] - - for pattern in patterns: - for path, file_info in file_index.items(): - # Try matching both relative path and full path - if (fnmatch.fnmatch(path, pattern) or - fnmatch.fnmatch(file_info.path, pattern) or - fnmatch.fnmatch(Path(path).name, pattern)): - matched_files.append(file_info) - - # Remove duplicates based on path - seen_paths = set() - unique_files = [] - for file_info in matched_files: - if file_info.relative_path not in seen_paths: - seen_paths.add(file_info.relative_path) - unique_files.append(file_info) - return unique_files - - def match_always_include(self, file_index: Dict[str, FileInfo]) -> List[FileInfo]: - """Match files that should always be included.""" - return self.match_by_patterns(file_index, self.always_include) - - def rank_files(self, files: List[FileInfo], analysis: AnalysisResult) -> List[MatchResult]: - """Rank files by relevance score.""" - match_results = [] - - for file_info in files: - match_result = self.calculate_relevance_score(file_info, analysis) - if match_result.relevance_score >= self.min_relevance_score: - match_results.append(match_result) - - # Sort by relevance score (descending) - match_results.sort(key=lambda x: x.relevance_score, reverse=True) - - return match_results - - def select_best_files(self, ranked_files: List[MatchResult], token_limit: Optional[int] = None) -> List[MatchResult]: - """Select the best files within token limits and category constraints.""" - if not ranked_files: - return [] - - selected_files = [] - total_tokens = 0 - category_counts = {} - - for match_result in ranked_files: - file_info = match_result.file_info - category = file_info.category - - # Check category limit - if category_counts.get(category, 0) >= self.max_files_per_category: - continue - - # Check token limit - if token_limit and total_tokens + file_info.estimated_tokens > token_limit: - continue - - # Check total file limit - if len(selected_files) >= self.max_total_files: - break - - # Add file - selected_files.append(match_result) - total_tokens += file_info.estimated_tokens - category_counts[category] = category_counts.get(category, 0) + 1 - - return selected_files - - def match_files(self, file_index: Dict[str, FileInfo], analysis: AnalysisResult, - token_limit: Optional[int] = None, explicit_patterns: Optional[List[str]] = None) -> PathMatchingResult: - """Main file matching function.""" - self.logger.info(f"Matching files for analysis with {len(analysis.keywords)} keywords and {len(analysis.domains)} domains") - - # Start with always-include files - always_include_files = self.match_always_include(file_index) - self.logger.debug(f"Always include: {len(always_include_files)} files") - - # Add explicit pattern matches - pattern_files = [] - patterns_used = [] - if explicit_patterns: - pattern_files = self.match_by_patterns(file_index, explicit_patterns) - patterns_used.extend(explicit_patterns) - self.logger.debug(f"Explicit patterns: {len(pattern_files)} files") - - # Add suggested pattern matches - if analysis.file_patterns: - suggested_files = self.match_by_patterns(file_index, analysis.file_patterns) - pattern_files.extend(suggested_files) - patterns_used.extend(analysis.file_patterns) - self.logger.debug(f"Suggested patterns: {len(suggested_files)} files") - - # Combine all candidate files and remove duplicates - all_files = always_include_files + pattern_files + list(file_index.values()) - seen_paths = set() - all_candidates = [] - for file_info in all_files: - if file_info.relative_path not in seen_paths: - seen_paths.add(file_info.relative_path) - all_candidates.append(file_info) - self.logger.debug(f"Total candidates: {len(all_candidates)} files") - - # Rank all candidates - ranked_files = self.rank_files(all_candidates, analysis) - self.logger.debug(f"Files above threshold: {len(ranked_files)}") - - # Select best files within limits - selected_files = self.select_best_files(ranked_files, token_limit) - self.logger.info(f"Selected {len(selected_files)} files") - - # Calculate statistics - total_tokens = sum(match.file_info.estimated_tokens for match in selected_files) - categories = {} - for match in selected_files: - category = match.file_info.category - categories[category] = categories.get(category, 0) + 1 - - # Calculate confidence score - confidence_score = self._calculate_confidence(selected_files, analysis) - - return PathMatchingResult( - matched_files=selected_files, - total_tokens=total_tokens, - categories=categories, - patterns_used=patterns_used, - confidence_score=confidence_score - ) - - def _calculate_confidence(self, selected_files: List[MatchResult], analysis: AnalysisResult) -> float: - """Calculate confidence score for the matching result.""" - if not selected_files: - return 0.0 - - # Average relevance score - avg_relevance = sum(match.relevance_score for match in selected_files) / len(selected_files) - - # Keyword coverage (how many keywords are represented) - keyword_coverage = 0.0 - if analysis.keywords: - covered_keywords = set() - for match in selected_files: - for reason in match.match_reasons: - if reason.startswith('filename:') or reason.startswith('path:'): - keyword = reason.split(':', 1)[1] - covered_keywords.add(keyword) - keyword_coverage = len(covered_keywords) / len(analysis.keywords) - - # Domain coverage - domain_coverage = 0.0 - if analysis.domains: - covered_domains = set() - for match in selected_files: - for reason in match.match_reasons: - if reason.startswith('dir:'): - domain = reason.split('->', 1)[0].split(':', 1)[1] - covered_domains.add(domain) - domain_coverage = len(covered_domains) / len(analysis.domains) - - # Weighted confidence score - confidence = ( - avg_relevance * 0.5 + - keyword_coverage * 0.3 + - domain_coverage * 0.2 - ) - - return min(confidence, 1.0) - - def format_patterns(self, selected_files: List[MatchResult]) -> List[str]: - """Format selected files as @{pattern} strings.""" - pattern_format = self.config.get('output', {}).get('pattern_format', '@{{{path}}}') - - patterns = [] - for match in selected_files: - pattern = pattern_format.format(path=match.file_info.relative_path) - patterns.append(pattern) - - return patterns - -def main(): - """Command-line interface for path matcher.""" - import yaml - import argparse - import json - from .file_indexer import FileIndexer - from .context_analyzer import ContextAnalyzer - - parser = argparse.ArgumentParser(description="Path Matcher for UltraThink") - parser.add_argument("prompt", help="Prompt to analyze and match") - parser.add_argument("--config", default="config.yaml", help="Configuration file path") - parser.add_argument("--token-limit", type=int, help="Token limit for selection") - parser.add_argument("--patterns", nargs="*", help="Explicit patterns to include") - parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") - - args = parser.parse_args() - - # Setup logging - level = logging.DEBUG if args.verbose else logging.INFO - logging.basicConfig(level=level, format='%(levelname)s: %(message)s') - - # Load configuration - config_path = Path(__file__).parent / args.config - with open(config_path, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) - - # Create components - indexer = FileIndexer(config) - analyzer = ContextAnalyzer(config) - matcher = PathMatcher(config) - - # Build file index - file_index = indexer.load_index() - if not file_index: - print("Building file index...") - file_index = indexer.build_index() - - # Analyze prompt - analysis = analyzer.analyze(args.prompt) - - # Match files - result = matcher.match_files( - file_index=file_index, - analysis=analysis, - token_limit=args.token_limit, - explicit_patterns=args.patterns - ) - - # Output results - print(f"Matched {len(result.matched_files)} files (~{result.total_tokens:,} tokens)") - print(f"Categories: {result.categories}") - print(f"Confidence: {result.confidence_score:.2f}") - print() - - patterns = matcher.format_patterns(result.matched_files) - print("Patterns:") - for pattern in patterns[:20]: # Limit output - print(f" {pattern}") - - if args.verbose: - print("\nDetailed matches:") - for match in result.matched_files[:10]: - print(f" {match.file_info.relative_path} (score: {match.relevance_score:.3f})") - print(f" Reasons: {', '.join(match.match_reasons)}") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/indexer.py b/.claude/python_script/indexer.py deleted file mode 100644 index 978951a8..00000000 --- a/.claude/python_script/indexer.py +++ /dev/null @@ -1,204 +0,0 @@ -#!/usr/bin/env python3 -""" -File Structure Indexer -Builds and maintains file indices for intelligent analysis. -""" - -import sys -import argparse -import logging -import json -import time -from pathlib import Path -from typing import Dict, List, Optional, Any - -# Add current directory to path for imports -sys.path.insert(0, str(Path(__file__).parent)) - -from core.config import get_config -from core.file_indexer import FileIndexer, IndexStats -from core.embedding_manager import EmbeddingManager -from utils.colors import Colors - - -class ProjectIndexer: - """Manages file indexing and project statistics.""" - - def __init__(self, config_path: Optional[str] = None, root_path: str = "."): - self.root_path = Path(root_path).resolve() - self.config = get_config(config_path) - - # Setup logging - logging.basicConfig( - level=getattr(logging, self.config.get('logging.level', 'INFO')), - format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s') - ) - self.logger = logging.getLogger(__name__) - - # Initialize core components - self.indexer = FileIndexer(self.config, str(self.root_path)) - - # Initialize embedding manager if enabled - self.embedding_manager = None - if self.config.is_embedding_enabled(): - try: - self.embedding_manager = EmbeddingManager(self.config) - except ImportError: - self.logger.warning("Embedding dependencies not available. Install sentence-transformers for enhanced functionality.") - - def build_index(self) -> IndexStats: - """Build or update the file index.""" - print(Colors.yellow("Building file index...")) - start_time = time.time() - - self.indexer.build_index() - stats = self.indexer.get_stats() - - elapsed = time.time() - start_time - if stats: - print(Colors.green(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens ({elapsed:.2f}s)")) - else: - print(Colors.green(f"Index built successfully ({elapsed:.2f}s)")) - - return stats - - def update_embeddings(self) -> bool: - """Update embeddings for semantic similarity.""" - if not self.embedding_manager: - print(Colors.error("Embedding functionality not available")) - return False - - print(Colors.yellow("Updating embeddings...")) - start_time = time.time() - - # Load file index - index = self.indexer.load_index() - if not index: - print(Colors.warning("No file index found. Building index first...")) - self.build_index() - index = self.indexer.load_index() - - try: - count = self.embedding_manager.update_embeddings(index) - elapsed = time.time() - start_time - print(Colors.green(f"Updated {count} embeddings ({elapsed:.2f}s)")) - return True - except Exception as e: - print(Colors.error(f"Failed to update embeddings: {e}")) - return False - - def get_project_stats(self) -> Dict[str, Any]: - """Get comprehensive project statistics.""" - stats = self.indexer.get_stats() - embedding_stats = {} - - if self.embedding_manager: - embedding_stats = { - 'embeddings_exist': self.embedding_manager.embeddings_exist(), - 'embedding_count': len(self.embedding_manager._load_embedding_cache()) if self.embedding_manager.embeddings_exist() else 0 - } - - project_size = self._classify_project_size(stats.total_tokens if stats else 0) - - return { - 'files': stats.total_files if stats else 0, - 'tokens': stats.total_tokens if stats else 0, - 'size_bytes': stats.total_size if stats else 0, - 'categories': stats.categories if stats else {}, - 'project_size': project_size, - 'last_updated': stats.last_updated if stats else 0, - 'embeddings': embedding_stats, - 'config': { - 'cache_dir': self.config.get_cache_dir(), - 'embedding_enabled': self.config.is_embedding_enabled(), - 'exclude_patterns_count': len(self.config.get_exclude_patterns()) - } - } - - def _classify_project_size(self, tokens: int) -> str: - """Classify project size based on token count.""" - small_limit = self.config.get('token_limits.small_project', 500000) - medium_limit = self.config.get('token_limits.medium_project', 2000000) - - if tokens < small_limit: - return "small" - elif tokens < medium_limit: - return "medium" - else: - return "large" - - def cleanup_cache(self): - """Clean up old cache files.""" - cache_dir = Path(self.config.get_cache_dir()) - if cache_dir.exists(): - print(Colors.yellow("Cleaning up cache...")) - for file in cache_dir.glob("*"): - if file.is_file(): - file.unlink() - print(f"Removed: {file}") - print(Colors.green("Cache cleaned")) - - -def main(): - """CLI entry point for indexer.""" - parser = argparse.ArgumentParser( - description="Project File Indexer - Build and manage file indices", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - python indexer.py --build # Build file index - python indexer.py --stats # Show project statistics - python indexer.py --embeddings # Update embeddings - python indexer.py --cleanup # Clean cache - """ - ) - - parser.add_argument('--build', action='store_true', help='Build file index') - parser.add_argument('--stats', action='store_true', help='Show project statistics') - parser.add_argument('--embeddings', action='store_true', help='Update embeddings') - parser.add_argument('--cleanup', action='store_true', help='Clean up cache files') - parser.add_argument('--output', choices=['json', 'text'], default='text', help='Output format') - parser.add_argument('--config', help='Configuration file path') - parser.add_argument('--root', default='.', help='Root directory to analyze') - - args = parser.parse_args() - - # Require at least one action - if not any([args.build, args.stats, args.embeddings, args.cleanup]): - parser.error("At least one action is required: --build, --stats, --embeddings, or --cleanup") - - # Create indexer - indexer = ProjectIndexer(args.config, args.root) - - try: - if args.cleanup: - indexer.cleanup_cache() - - if args.build: - indexer.build_index() - - if args.embeddings: - indexer.update_embeddings() - - if args.stats: - stats = indexer.get_project_stats() - if args.output == 'json': - print(json.dumps(stats, indent=2, default=str)) - else: - print(f"Total files: {stats['files']}") - print(f"Total tokens: {stats['tokens']:,}") - print(f"Project size: {stats['project_size']}") - print(f"Categories: {stats['categories']}") - if 'embeddings' in stats: - print(f"Embeddings: {stats['embeddings']['embedding_count']}") - - except KeyboardInterrupt: - print(Colors.warning("\nOperation interrupted by user")) - sys.exit(1) - except Exception as e: - print(Colors.error(f"Operation failed: {e}")) - sys.exit(1) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/install.sh b/.claude/python_script/install.sh deleted file mode 100644 index d855fa13..00000000 --- a/.claude/python_script/install.sh +++ /dev/null @@ -1,189 +0,0 @@ -#!/bin/bash -# Installation script for UltraThink Path-Aware Analyzer - -set -e - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Functions -print_status() { - echo -e "${BLUE}[INFO]${NC} $1" -} - -print_success() { - echo -e "${GREEN}[SUCCESS]${NC} $1" -} - -print_warning() { - echo -e "${YELLOW}[WARNING]${NC} $1" -} - -print_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -# Check Python version -check_python() { - if command -v python3 &> /dev/null; then - PYTHON_VERSION=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") - PYTHON_CMD="python3" - elif command -v python &> /dev/null; then - PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") - PYTHON_CMD="python" - else - print_error "Python not found. Please install Python 3.8 or later." - exit 1 - fi - - # Check version - if [[ $(echo "$PYTHON_VERSION >= 3.8" | bc -l) -eq 1 ]]; then - print_success "Python $PYTHON_VERSION found" - else - print_error "Python 3.8 or later required. Found Python $PYTHON_VERSION" - exit 1 - fi -} - -# Install dependencies -install_dependencies() { - print_status "Installing core dependencies..." - - # Install core requirements - $PYTHON_CMD -m pip install --user -r requirements.txt - - if [ $? -eq 0 ]; then - print_success "Core dependencies installed" - else - print_error "Failed to install core dependencies" - exit 1 - fi -} - -# Install optional dependencies -install_optional() { - read -p "Install RAG/embedding features? (requires ~200MB download) [y/N]: " install_rag - if [[ $install_rag =~ ^[Yy]$ ]]; then - print_status "Installing RAG dependencies..." - $PYTHON_CMD -m pip install --user sentence-transformers numpy - if [ $? -eq 0 ]; then - print_success "RAG dependencies installed" - else - print_warning "Failed to install RAG dependencies (optional)" - fi - fi - - read -p "Install development tools? [y/N]: " install_dev - if [[ $install_dev =~ ^[Yy]$ ]]; then - print_status "Installing development dependencies..." - $PYTHON_CMD -m pip install --user pytest pytest-cov black flake8 - if [ $? -eq 0 ]; then - print_success "Development dependencies installed" - else - print_warning "Failed to install development dependencies (optional)" - fi - fi -} - -# Create wrapper script -create_wrapper() { - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" - WRAPPER_PATH="$HOME/.local/bin/ultrathink" - - # Create .local/bin if it doesn't exist - mkdir -p "$HOME/.local/bin" - - # Create wrapper script - cat > "$WRAPPER_PATH" << EOF -#!/bin/bash -# UltraThink Path-Aware Analyzer Wrapper -# Auto-generated by install.sh - -SCRIPT_DIR="$SCRIPT_DIR" -export PYTHONPATH="\$SCRIPT_DIR:\$PYTHONPATH" - -exec $PYTHON_CMD "\$SCRIPT_DIR/path_aware_analyzer.py" "\$@" -EOF - - chmod +x "$WRAPPER_PATH" - - if [ -f "$WRAPPER_PATH" ]; then - print_success "Wrapper script created at $WRAPPER_PATH" - else - print_error "Failed to create wrapper script" - exit 1 - fi -} - -# Update configuration -setup_config() { - print_status "Setting up configuration..." - - # Create cache directory - mkdir -p .claude/cache/embeddings - - # Check if config needs updating - if [ ! -f config.yaml ]; then - print_error "Configuration file config.yaml not found" - exit 1 - fi - - print_success "Configuration ready" -} - -# Test installation -test_installation() { - print_status "Testing installation..." - - # Test basic functionality - if $PYTHON_CMD path_aware_analyzer.py --stats &> /dev/null; then - print_success "Installation test passed" - else - print_warning "Installation test failed - but this might be normal for first run" - fi -} - -# Add to PATH instructions -show_path_instructions() { - if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then - print_warning "Add $HOME/.local/bin to your PATH to use 'ultrathink' command globally" - echo "" - echo "Add this line to your ~/.bashrc or ~/.zshrc:" - echo "export PATH=\"\$HOME/.local/bin:\$PATH\"" - echo "" - echo "Or run: echo 'export PATH=\"\$HOME/.local/bin:\$PATH\"' >> ~/.bashrc" - echo "Then: source ~/.bashrc" - fi -} - -# Main installation -main() { - print_status "Installing UltraThink Path-Aware Analyzer..." - echo "" - - check_python - install_dependencies - install_optional - create_wrapper - setup_config - test_installation - - echo "" - print_success "Installation complete!" - echo "" - - print_status "Usage examples:" - echo " ./path_aware_analyzer.py \"analyze authentication flow\"" - echo " ultrathink \"implement user login feature\"" - echo " ultrathink --tool gemini \"review API endpoints\"" - echo "" - - show_path_instructions -} - -# Run main function -main "$@" \ No newline at end of file diff --git a/.claude/python_script/requirements.txt b/.claude/python_script/requirements.txt deleted file mode 100644 index 425e8b14..00000000 --- a/.claude/python_script/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Core dependencies for embedding tests -numpy>=1.21.0 -scikit-learn>=1.0.0 - -# Sentence Transformers for advanced embeddings (CodeSage V2 compatible) -sentence-transformers>=3.0.0 -transformers>=4.40.0 - -# PyTorch for model execution (required for CodeSage V2) -torch>=2.0.0 - -# Development and testing -pytest>=6.0.0 - -# Data handling -pandas>=1.3.0 - -# Additional dependencies for CodeSage V2 -accelerate>=0.26.0 diff --git a/.claude/python_script/setup.py b/.claude/python_script/setup.py deleted file mode 100644 index ab8d01f0..00000000 --- a/.claude/python_script/setup.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 -""" -Setup script for UltraThink Path-Aware Analyzer -""" - -from setuptools import setup, find_packages -from pathlib import Path - -# Read README -readme_path = Path(__file__).parent / "README.md" -long_description = readme_path.read_text(encoding='utf-8') if readme_path.exists() else "" - -# Read requirements -requirements_path = Path(__file__).parent / "requirements.txt" -requirements = [] -if requirements_path.exists(): - with open(requirements_path, 'r', encoding='utf-8') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#'): - requirements.append(line) - -setup( - name="ultrathink-path-analyzer", - version="1.0.0", - description="Lightweight path-aware program for intelligent file pattern detection and analysis", - long_description=long_description, - long_description_content_type="text/markdown", - author="UltraThink Development Team", - author_email="dev@ultrathink.ai", - url="https://github.com/ultrathink/path-analyzer", - - packages=find_packages(), - py_modules=[ - 'analyzer', # Main entry point - ], - - install_requires=requirements, - - extras_require={ - 'rag': [ - 'sentence-transformers>=2.2.0', - 'numpy>=1.21.0' - ], - 'nlp': [ - 'nltk>=3.8', - 'spacy>=3.4.0' - ], - 'performance': [ - 'numba>=0.56.0' - ], - 'dev': [ - 'pytest>=7.0.0', - 'pytest-cov>=4.0.0', - 'black>=22.0.0', - 'flake8>=5.0.0' - ] - }, - - entry_points={ - 'console_scripts': [ - 'path-analyzer=cli:main', - 'path-indexer=indexer:main', - 'analyzer=analyzer:main', # Legacy compatibility - 'module-analyzer=tools.module_analyzer:main', - 'tech-stack=tools.tech_stack:main', - ], - }, - - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Topic :: Software Development :: Tools", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Operating System :: OS Independent", - ], - - python_requires=">=3.8", - - keywords="ai, analysis, path-detection, code-analysis, file-matching, rag, nlp", - - project_urls={ - "Bug Reports": "https://github.com/ultrathink/path-analyzer/issues", - "Source": "https://github.com/ultrathink/path-analyzer", - "Documentation": "https://github.com/ultrathink/path-analyzer/docs", - }, -) \ No newline at end of file diff --git a/.claude/python_script/tools/__init__.py b/.claude/python_script/tools/__init__.py deleted file mode 100644 index 205b99d4..00000000 --- a/.claude/python_script/tools/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -""" -Independent tool scripts for specialized analysis tasks. -Provides module analysis, tech stack detection, and workflow management tools. -""" - -from .module_analyzer import ModuleAnalyzer, ModuleInfo -from .tech_stack import TechStackLoader - -__all__ = [ - 'ModuleAnalyzer', - 'ModuleInfo', - 'TechStackLoader' -] \ No newline at end of file diff --git a/.claude/python_script/tools/__pycache__/__init__.cpython-313.pyc b/.claude/python_script/tools/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index 42acf8b8..00000000 Binary files a/.claude/python_script/tools/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/tools/__pycache__/module_analyzer.cpython-313.pyc b/.claude/python_script/tools/__pycache__/module_analyzer.cpython-313.pyc deleted file mode 100644 index 4705dee7..00000000 Binary files a/.claude/python_script/tools/__pycache__/module_analyzer.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/tools/__pycache__/tech_stack.cpython-313.pyc b/.claude/python_script/tools/__pycache__/tech_stack.cpython-313.pyc deleted file mode 100644 index f9eda9a9..00000000 Binary files a/.claude/python_script/tools/__pycache__/tech_stack.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/tools/__pycache__/workflow_updater.cpython-313.pyc b/.claude/python_script/tools/__pycache__/workflow_updater.cpython-313.pyc deleted file mode 100644 index 105ac6df..00000000 Binary files a/.claude/python_script/tools/__pycache__/workflow_updater.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/tools/module_analyzer.py b/.claude/python_script/tools/module_analyzer.py deleted file mode 100644 index 79e1027f..00000000 --- a/.claude/python_script/tools/module_analyzer.py +++ /dev/null @@ -1,369 +0,0 @@ -#!/usr/bin/env python3 -""" -Unified Module Analyzer -Combines functionality from detect_changed_modules.py and get_modules_by_depth.py -into a single, comprehensive module analysis tool. -""" - -import os -import sys -import subprocess -import time -import json -from pathlib import Path -from typing import List, Dict, Optional, Set, Tuple -from dataclasses import dataclass, asdict - -# Add parent directory for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) -from core.config import get_config -from core.gitignore_parser import GitignoreParser - -@dataclass -class ModuleInfo: - """Information about a module/directory.""" - depth: int - path: str - files: int - types: List[str] - has_claude: bool - status: str = "normal" # changed, normal, new, deleted - last_modified: Optional[float] = None - - def to_dict(self) -> Dict: - return asdict(self) - -class ModuleAnalyzer: - """Unified module analysis tool with change detection and depth analysis.""" - - def __init__(self, root_path: str = ".", config_path: Optional[str] = None): - self.root_path = Path(root_path).resolve() - self.config = get_config(config_path) - - # Source file extensions for analysis - self.source_extensions = { - '.md', '.js', '.ts', '.jsx', '.tsx', '.py', '.go', '.rs', - '.java', '.cpp', '.c', '.h', '.sh', '.ps1', '.json', '.yaml', '.yml', - '.php', '.rb', '.swift', '.kt', '.scala', '.dart' - } - - # Initialize gitignore parser for exclusions - self.gitignore_parser = GitignoreParser(str(self.root_path)) - self.exclude_patterns = self._build_exclusion_patterns() - - def _build_exclusion_patterns(self) -> Set[str]: - """Build exclusion patterns from config and gitignore.""" - exclusions = { - '.git', '.history', '.vscode', '__pycache__', '.pytest_cache', - 'node_modules', 'dist', 'build', '.egg-info', '.env', - '.cache', '.tmp', '.temp', '.DS_Store', 'Thumbs.db' - } - - # Add patterns from config - config_patterns = self.config.get('exclude_patterns', []) - for pattern in config_patterns: - # Extract directory names from patterns - if '/' in pattern: - parts = pattern.replace('*/', '').replace('/*', '').split('/') - exclusions.update(part for part in parts if part and not part.startswith('*')) - - return exclusions - - def _should_exclude_directory(self, dir_path: Path) -> bool: - """Check if directory should be excluded from analysis.""" - dir_name = dir_path.name - - # Check against exclusion patterns - if dir_name in self.exclude_patterns: - return True - - # Check if directory starts with . (hidden directories) - if dir_name.startswith('.') and dir_name not in {'.github', '.vscode'}: - return True - - return False - - def get_git_changed_files(self, since: str = "HEAD") -> Set[str]: - """Get files changed in git.""" - changed_files = set() - - try: - # Check if we're in a git repository - subprocess.run(['git', 'rev-parse', '--git-dir'], - check=True, capture_output=True, cwd=self.root_path) - - # Get changes since specified reference - commands = [ - ['git', 'diff', '--name-only', since], # Changes since reference - ['git', 'diff', '--name-only', '--staged'], # Staged changes - ['git', 'ls-files', '--others', '--exclude-standard'] # Untracked files - ] - - for cmd in commands: - try: - result = subprocess.run(cmd, capture_output=True, text=True, - cwd=self.root_path, check=True) - if result.stdout.strip(): - files = result.stdout.strip().split('\n') - changed_files.update(f for f in files if f) - except subprocess.CalledProcessError: - continue - - except subprocess.CalledProcessError: - # Not a git repository or git not available - pass - - return changed_files - - def get_recently_modified_files(self, hours: int = 24) -> Set[str]: - """Get files modified within the specified hours.""" - cutoff_time = time.time() - (hours * 3600) - recent_files = set() - - try: - for file_path in self.root_path.rglob('*'): - if file_path.is_file(): - try: - if file_path.stat().st_mtime > cutoff_time: - rel_path = file_path.relative_to(self.root_path) - recent_files.add(str(rel_path)) - except (OSError, ValueError): - continue - except Exception: - pass - - return recent_files - - def analyze_directory(self, dir_path: Path) -> Optional[ModuleInfo]: - """Analyze a single directory and return module information.""" - if self._should_exclude_directory(dir_path): - return None - - try: - # Count files by type - file_types = set() - file_count = 0 - has_claude = False - last_modified = 0 - - for item in dir_path.iterdir(): - if item.is_file(): - file_count += 1 - - # Track file types - if item.suffix.lower() in self.source_extensions: - file_types.add(item.suffix.lower()) - - # Check for CLAUDE.md - if item.name.upper() == 'CLAUDE.MD': - has_claude = True - - # Track latest modification - try: - mtime = item.stat().st_mtime - last_modified = max(last_modified, mtime) - except OSError: - continue - - # Calculate depth relative to root - try: - relative_path = dir_path.relative_to(self.root_path) - depth = len(relative_path.parts) - except ValueError: - depth = 0 - - return ModuleInfo( - depth=depth, - path=str(relative_path) if depth > 0 else ".", - files=file_count, - types=sorted(list(file_types)), - has_claude=has_claude, - last_modified=last_modified if last_modified > 0 else None - ) - - except (PermissionError, OSError): - return None - - def detect_changed_modules(self, since: str = "HEAD") -> List[ModuleInfo]: - """Detect modules affected by changes.""" - changed_files = self.get_git_changed_files(since) - - # If no git changes, fall back to recently modified files - if not changed_files: - changed_files = self.get_recently_modified_files(24) - - # Get affected directories - affected_dirs = set() - for file_path in changed_files: - full_path = self.root_path / file_path - if full_path.exists(): - # Add the file's directory and parent directories - current_dir = full_path.parent - while current_dir != self.root_path and current_dir.parent != current_dir: - affected_dirs.add(current_dir) - current_dir = current_dir.parent - - # Analyze affected directories - modules = [] - for dir_path in affected_dirs: - module_info = self.analyze_directory(dir_path) - if module_info: - module_info.status = "changed" - modules.append(module_info) - - return sorted(modules, key=lambda m: (m.depth, m.path)) - - def analyze_by_depth(self, max_depth: Optional[int] = None) -> List[ModuleInfo]: - """Analyze all modules organized by depth (deepest first).""" - modules = [] - - def scan_directory(dir_path: Path, current_depth: int = 0): - """Recursively scan directories.""" - if max_depth and current_depth > max_depth: - return - - module_info = self.analyze_directory(dir_path) - if module_info and module_info.files > 0: - modules.append(module_info) - - # Recurse into subdirectories - try: - for item in dir_path.iterdir(): - if item.is_dir() and not self._should_exclude_directory(item): - scan_directory(item, current_depth + 1) - except (PermissionError, OSError): - pass - - scan_directory(self.root_path) - - # Sort by depth (deepest first), then by path - return sorted(modules, key=lambda m: (-m.depth, m.path)) - - def get_dependencies(self, module_path: str) -> List[str]: - """Get module dependencies (basic implementation).""" - dependencies = [] - module_dir = self.root_path / module_path - - if not module_dir.exists() or not module_dir.is_dir(): - return dependencies - - # Look for common dependency files - dependency_files = [ - 'package.json', # Node.js - 'requirements.txt', # Python - 'Cargo.toml', # Rust - 'go.mod', # Go - 'pom.xml', # Java Maven - 'build.gradle', # Java Gradle - ] - - for dep_file in dependency_files: - dep_path = module_dir / dep_file - if dep_path.exists(): - dependencies.append(str(dep_path.relative_to(self.root_path))) - - return dependencies - - def find_modules_with_pattern(self, pattern: str) -> List[ModuleInfo]: - """Find modules matching a specific pattern in their path or files.""" - modules = self.analyze_by_depth() - matching_modules = [] - - for module in modules: - # Check if pattern matches path - if pattern.lower() in module.path.lower(): - matching_modules.append(module) - continue - - # Check if pattern matches file types - if any(pattern.lower() in ext.lower() for ext in module.types): - matching_modules.append(module) - - return matching_modules - - def export_analysis(self, modules: List[ModuleInfo], format: str = "json") -> str: - """Export module analysis in specified format.""" - if format == "json": - return json.dumps([module.to_dict() for module in modules], indent=2) - - elif format == "list": - lines = [] - for module in modules: - status = f"[{module.status}]" if module.status != "normal" else "" - claude_marker = "[CLAUDE]" if module.has_claude else "" - lines.append(f"{module.path} (depth:{module.depth}, files:{module.files}) {status} {claude_marker}") - return "\n".join(lines) - - elif format == "grouped": - grouped = {} - for module in modules: - depth = module.depth - if depth not in grouped: - grouped[depth] = [] - grouped[depth].append(module) - - lines = [] - for depth in sorted(grouped.keys()): - lines.append(f"\n=== Depth {depth} ===") - for module in grouped[depth]: - status = f"[{module.status}]" if module.status != "normal" else "" - claude_marker = "[CLAUDE]" if module.has_claude else "" - lines.append(f" {module.path} (files:{module.files}) {status} {claude_marker}") - return "\n".join(lines) - - elif format == "paths": - return "\n".join(module.path for module in modules) - - else: - raise ValueError(f"Unsupported format: {format}") - - -def main(): - """Main CLI entry point.""" - import argparse - - parser = argparse.ArgumentParser(description="Module Analysis Tool") - parser.add_argument("command", choices=["changed", "depth", "dependencies", "find"], - help="Analysis command to run") - parser.add_argument("--format", choices=["json", "list", "grouped", "paths"], - default="list", help="Output format") - parser.add_argument("--since", default="HEAD~1", - help="Git reference for change detection (default: HEAD~1)") - parser.add_argument("--max-depth", type=int, - help="Maximum directory depth to analyze") - parser.add_argument("--pattern", help="Pattern to search for (for find command)") - parser.add_argument("--module", help="Module path for dependency analysis") - parser.add_argument("--config", help="Configuration file path") - - args = parser.parse_args() - - analyzer = ModuleAnalyzer(config_path=args.config) - - if args.command == "changed": - modules = analyzer.detect_changed_modules(args.since) - print(analyzer.export_analysis(modules, args.format)) - - elif args.command == "depth": - modules = analyzer.analyze_by_depth(args.max_depth) - print(analyzer.export_analysis(modules, args.format)) - - elif args.command == "dependencies": - if not args.module: - print("Error: --module required for dependencies command", file=sys.stderr) - sys.exit(1) - deps = analyzer.get_dependencies(args.module) - if args.format == "json": - print(json.dumps(deps, indent=2)) - else: - print("\n".join(deps)) - - elif args.command == "find": - if not args.pattern: - print("Error: --pattern required for find command", file=sys.stderr) - sys.exit(1) - modules = analyzer.find_modules_with_pattern(args.pattern) - print(analyzer.export_analysis(modules, args.format)) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/tools/tech_stack.py b/.claude/python_script/tools/tech_stack.py deleted file mode 100644 index 2d434f5c..00000000 --- a/.claude/python_script/tools/tech_stack.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python3 -""" -Python equivalent of tech-stack-loader.sh -DMSFlow Tech Stack Guidelines Loader -Returns tech stack specific coding guidelines and best practices for Claude processing - -Usage: python tech_stack_loader.py [command] [tech_stack] -""" - -import sys -import argparse -import re -from pathlib import Path -from typing import Dict, List, Optional, Tuple - -class TechStackLoader: - """Load tech stack specific development guidelines.""" - - def __init__(self, script_dir: Optional[str] = None): - if script_dir: - self.script_dir = Path(script_dir) - else: - self.script_dir = Path(__file__).parent - - # Look for template directory in multiple locations - possible_template_dirs = [ - self.script_dir / "../tech-stack-templates", - self.script_dir / "../workflows/cli-templates/tech-stacks", - self.script_dir / "tech-stack-templates", - self.script_dir / "templates", - ] - - self.template_dir = None - for template_dir in possible_template_dirs: - if template_dir.exists(): - self.template_dir = template_dir.resolve() - break - - if not self.template_dir: - # Create a default template directory - self.template_dir = self.script_dir / "tech-stack-templates" - self.template_dir.mkdir(exist_ok=True) - - def parse_yaml_frontmatter(self, content: str) -> Tuple[Dict[str, str], str]: - """Parse YAML frontmatter from markdown content.""" - frontmatter = {} - content_start = 0 - - lines = content.split('\n') - if lines and lines[0].strip() == '---': - # Find the closing --- - for i, line in enumerate(lines[1:], 1): - if line.strip() == '---': - content_start = i + 1 - break - elif ':' in line: - key, value = line.split(':', 1) - frontmatter[key.strip()] = value.strip() - - # Return frontmatter and content without YAML - remaining_content = '\n'.join(lines[content_start:]) - return frontmatter, remaining_content - - def list_available_guidelines(self) -> str: - """List all available development guidelines.""" - output = ["Available Development Guidelines:", "=" * 33] - - if not self.template_dir.exists(): - output.append("No template directory found.") - return '\n'.join(output) - - for file_path in self.template_dir.glob("*.md"): - try: - with open(file_path, 'r', encoding='utf-8') as f: - content = f.read() - - frontmatter, _ = self.parse_yaml_frontmatter(content) - name = frontmatter.get('name', file_path.stem) - description = frontmatter.get('description', 'No description available') - - output.append(f"{name:<20} - {description}") - - except Exception as e: - output.append(f"{file_path.stem:<20} - Error reading file: {e}") - - return '\n'.join(output) - - def load_guidelines(self, tech_stack: str) -> str: - """Load specific development guidelines.""" - template_path = self.template_dir / f"{tech_stack}.md" - - if not template_path.exists(): - # Try with different naming conventions - alternatives = [ - f"{tech_stack}-dev.md", - f"{tech_stack}_dev.md", - f"{tech_stack.replace('-', '_')}.md", - f"{tech_stack.replace('_', '-')}.md" - ] - - for alt in alternatives: - alt_path = self.template_dir / alt - if alt_path.exists(): - template_path = alt_path - break - else: - raise FileNotFoundError( - f"Error: Development guidelines '{tech_stack}' not found\n" - f"Use --list to see available guidelines" - ) - - try: - with open(template_path, 'r', encoding='utf-8') as f: - content = f.read() - - # Parse and return content without YAML frontmatter - _, content_without_yaml = self.parse_yaml_frontmatter(content) - return content_without_yaml.strip() - - except Exception as e: - raise RuntimeError(f"Error reading guidelines file: {e}") - - def get_version(self) -> str: - """Get version information.""" - return "DMSFlow tech-stack-loader v2.0 (Python)\nSemantic-based development guidelines system" - - def get_help(self) -> str: - """Get help message.""" - return """Usage: - tech_stack_loader.py --list List all available guidelines with descriptions - tech_stack_loader.py --load Load specific development guidelines - tech_stack_loader.py Load specific guidelines (legacy format) - tech_stack_loader.py --help Show this help message - tech_stack_loader.py --version Show version information - -Examples: - tech_stack_loader.py --list - tech_stack_loader.py --load javascript-dev - tech_stack_loader.py python-dev""" - -def main(): - """Command-line interface.""" - parser = argparse.ArgumentParser( - description="DMSFlow Tech Stack Guidelines Loader", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog="""Examples: - python tech_stack_loader.py --list - python tech_stack_loader.py --load javascript-dev - python tech_stack_loader.py python-dev""" - ) - - parser.add_argument("command", nargs="?", help="Command or tech stack name") - parser.add_argument("tech_stack", nargs="?", help="Tech stack name (when using --load)") - parser.add_argument("--list", action="store_true", help="List all available guidelines") - parser.add_argument("--load", metavar="TECH_STACK", help="Load specific development guidelines") - parser.add_argument("--version", "-v", action="store_true", help="Show version information") - parser.add_argument("--template-dir", help="Override template directory path") - - args = parser.parse_args() - - try: - loader = TechStackLoader(args.template_dir) - - # Handle version check - if args.version or args.command == "--version": - print(loader.get_version()) - return - - # Handle list command - if args.list or args.command == "--list": - print(loader.list_available_guidelines()) - return - - # Handle load command - if args.load: - result = loader.load_guidelines(args.load) - print(result) - return - - if args.command == "--load" and args.tech_stack: - result = loader.load_guidelines(args.tech_stack) - print(result) - return - - # Handle legacy usage (direct tech stack name) - if args.command and args.command not in ["--help", "--list", "--load"]: - result = loader.load_guidelines(args.command) - print(result) - return - - # Show help - print(loader.get_help()) - - except (FileNotFoundError, RuntimeError) as e: - print(str(e), file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"Unexpected error: {e}", file=sys.stderr) - sys.exit(1) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/tools/workflow_updater.py b/.claude/python_script/tools/workflow_updater.py deleted file mode 100644 index 14822286..00000000 --- a/.claude/python_script/tools/workflow_updater.py +++ /dev/null @@ -1,241 +0,0 @@ -#!/usr/bin/env python3 -""" -Python equivalent of update_module_claude.sh -Update CLAUDE.md for a specific module with automatic layer detection - -Usage: python update_module_claude.py [update_type] - module_path: Path to the module directory - update_type: full|related (default: full) - Script automatically detects layer depth and selects appropriate template -""" - -import os -import sys -import subprocess -import time -import argparse -from pathlib import Path -from typing import Optional, Tuple, Dict -from dataclasses import dataclass - -@dataclass -class LayerInfo: - """Information about a documentation layer.""" - name: str - template_path: str - analysis_strategy: str - -class ModuleClaudeUpdater: - """Update CLAUDE.md documentation for modules with layer detection.""" - - def __init__(self, home_dir: Optional[str] = None): - self.home_dir = Path(home_dir) if home_dir else Path.home() - self.template_base = self.home_dir / ".claude/workflows/cli-templates/prompts/dms" - - def detect_layer(self, module_path: str) -> LayerInfo: - """Determine documentation layer based on path patterns.""" - clean_path = module_path.replace('./', '') if module_path.startswith('./') else module_path - - if module_path == ".": - # Root directory - return LayerInfo( - name="Layer 1 (Root)", - template_path=str(self.template_base / "claude-layer1-root.txt"), - analysis_strategy="--all-files" - ) - elif '/' not in clean_path: - # Top-level directories (e.g., .claude, src, tests) - return LayerInfo( - name="Layer 2 (Domain)", - template_path=str(self.template_base / "claude-layer2-domain.txt"), - analysis_strategy="@{*/CLAUDE.md}" - ) - elif clean_path.count('/') == 1: - # Second-level directories (e.g., .claude/scripts, src/components) - return LayerInfo( - name="Layer 3 (Module)", - template_path=str(self.template_base / "claude-layer3-module.txt"), - analysis_strategy="@{*/CLAUDE.md}" - ) - else: - # Deeper directories (e.g., .claude/workflows/cli-templates/prompts) - return LayerInfo( - name="Layer 4 (Sub-Module)", - template_path=str(self.template_base / "claude-layer4-submodule.txt"), - analysis_strategy="--all-files" - ) - - def load_template(self, template_path: str) -> str: - """Load template content from file.""" - try: - with open(template_path, 'r', encoding='utf-8') as f: - return f.read() - except FileNotFoundError: - print(f" [WARN] Template not found: {template_path}, using fallback") - return "Update CLAUDE.md documentation for this module following hierarchy standards." - except Exception as e: - print(f" [WARN] Error reading template: {e}, using fallback") - return "Update CLAUDE.md documentation for this module following hierarchy standards." - - def build_prompt(self, layer_info: LayerInfo, module_path: str, update_type: str) -> str: - """Build the prompt for gemini.""" - template_content = self.load_template(layer_info.template_path) - module_name = os.path.basename(module_path) - - if update_type == "full": - update_context = """ - Update Mode: Complete refresh - - Perform comprehensive analysis of all content - - Document patterns, architecture, and purpose - - Consider existing documentation hierarchy - - Follow template guidelines strictly""" - else: - update_context = """ - Update Mode: Context-aware update - - Focus on recent changes and affected areas - - Maintain consistency with existing documentation - - Update only relevant sections - - Follow template guidelines for updated content""" - - base_prompt = f""" - [CRITICAL] RULES - MUST FOLLOW: - 1. ONLY modify CLAUDE.md files at any hierarchy level - 2. NEVER modify source code files - 3. Focus exclusively on updating documentation - 4. Follow the template guidelines exactly - - {template_content} - - {update_context} - - Module Information: - - Name: {module_name} - - Path: {module_path} - - Layer: {layer_info.name} - - Analysis Strategy: {layer_info.analysis_strategy}""" - - return base_prompt - - def execute_gemini_command(self, prompt: str, analysis_strategy: str, module_path: str) -> bool: - """Execute gemini command with the appropriate strategy.""" - original_dir = os.getcwd() - - try: - os.chdir(module_path) - - if analysis_strategy == "--all-files": - cmd = ["gemini", "--all-files", "--yolo", "-p", prompt] - else: - cmd = ["gemini", "--yolo", "-p", f"{analysis_strategy} {prompt}"] - - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - return True - else: - print(f" [ERROR] Gemini command failed: {result.stderr}") - return False - - except subprocess.CalledProcessError as e: - print(f" [ERROR] Error executing gemini: {e}") - return False - except FileNotFoundError: - print(f" [ERROR] Gemini command not found. Make sure gemini is installed and in PATH.") - return False - finally: - os.chdir(original_dir) - - def update_module_claude(self, module_path: str, update_type: str = "full") -> bool: - """Main function to update CLAUDE.md for a module.""" - # Validate parameters - if not module_path: - print("[ERROR] Module path is required") - print("Usage: update_module_claude.py [update_type]") - return False - - path_obj = Path(module_path) - if not path_obj.exists() or not path_obj.is_dir(): - print(f"[ERROR] Directory '{module_path}' does not exist") - return False - - # Check if directory has files - files = list(path_obj.glob('*')) - file_count = len([f for f in files if f.is_file()]) - if file_count == 0: - print(f"[SKIP] Skipping '{module_path}' - no files found") - return True - - # Detect layer and get configuration - layer_info = self.detect_layer(module_path) - - print(f"[UPDATE] Updating: {module_path}") - print(f" Layer: {layer_info.name} | Type: {update_type} | Files: {file_count}") - print(f" Template: {os.path.basename(layer_info.template_path)} | Strategy: {layer_info.analysis_strategy}") - - # Build prompt - prompt = self.build_prompt(layer_info, module_path, update_type) - - # Execute update - start_time = time.time() - print(" [PROGRESS] Starting update...") - - success = self.execute_gemini_command(prompt, layer_info.analysis_strategy, module_path) - - if success: - duration = int(time.time() - start_time) - print(f" [OK] Completed in {duration}s") - return True - else: - print(f" [ERROR] Update failed for {module_path}") - return False - -def main(): - """Command-line interface.""" - parser = argparse.ArgumentParser( - description="Update CLAUDE.md for a specific module with automatic layer detection", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog="""Examples: - python update_module_claude.py . - python update_module_claude.py src/components full - python update_module_claude.py .claude/scripts related""" - ) - - parser.add_argument("module_path", help="Path to the module directory") - parser.add_argument("update_type", nargs="?", choices=["full", "related"], - default="full", help="Update type (default: full)") - parser.add_argument("--home", help="Override home directory path") - parser.add_argument("--dry-run", action="store_true", - help="Show what would be done without executing") - - args = parser.parse_args() - - try: - updater = ModuleClaudeUpdater(args.home) - - if args.dry_run: - layer_info = updater.detect_layer(args.module_path) - prompt = updater.build_prompt(layer_info, args.module_path, args.update_type) - - print("[DRY-RUN] Dry run mode - showing configuration:") - print(f"Module Path: {args.module_path}") - print(f"Update Type: {args.update_type}") - print(f"Layer: {layer_info.name}") - print(f"Template: {layer_info.template_path}") - print(f"Strategy: {layer_info.analysis_strategy}") - print("\nPrompt preview:") - print("-" * 50) - print(prompt[:500] + "..." if len(prompt) > 500 else prompt) - return - - success = updater.update_module_claude(args.module_path, args.update_type) - sys.exit(0 if success else 1) - - except KeyboardInterrupt: - print("\n[ERROR] Operation cancelled by user") - sys.exit(1) - except Exception as e: - print(f"[ERROR] Unexpected error: {e}") - sys.exit(1) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/utils/__init__.py b/.claude/python_script/utils/__init__.py deleted file mode 100644 index a7a6fdae..00000000 --- a/.claude/python_script/utils/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Shared utility functions and helpers. -Provides common functionality for colors, caching, and I/O operations. -""" - -from .colors import Colors -from .cache import CacheManager -from .io_helpers import IOHelpers, ensure_directory, safe_read_file - -__all__ = [ - 'Colors', - 'CacheManager', - 'IOHelpers', - 'ensure_directory', - 'safe_read_file' -] \ No newline at end of file diff --git a/.claude/python_script/utils/__pycache__/__init__.cpython-313.pyc b/.claude/python_script/utils/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index 8e1a0943..00000000 Binary files a/.claude/python_script/utils/__pycache__/__init__.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/utils/__pycache__/cache.cpython-313.pyc b/.claude/python_script/utils/__pycache__/cache.cpython-313.pyc deleted file mode 100644 index 30d28904..00000000 Binary files a/.claude/python_script/utils/__pycache__/cache.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/utils/__pycache__/colors.cpython-313.pyc b/.claude/python_script/utils/__pycache__/colors.cpython-313.pyc deleted file mode 100644 index 32bd94fe..00000000 Binary files a/.claude/python_script/utils/__pycache__/colors.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/utils/__pycache__/io_helpers.cpython-313.pyc b/.claude/python_script/utils/__pycache__/io_helpers.cpython-313.pyc deleted file mode 100644 index 49a204e4..00000000 Binary files a/.claude/python_script/utils/__pycache__/io_helpers.cpython-313.pyc and /dev/null differ diff --git a/.claude/python_script/utils/cache.py b/.claude/python_script/utils/cache.py deleted file mode 100644 index 01b9f19a..00000000 --- a/.claude/python_script/utils/cache.py +++ /dev/null @@ -1,350 +0,0 @@ -#!/usr/bin/env python3 -""" -Cache Management Utility -Provides unified caching functionality for the analyzer system. -""" - -import os -import json -import time -import hashlib -import pickle -import logging -from pathlib import Path -from typing import Any, Optional, Dict, Union -from dataclasses import dataclass, asdict - - -@dataclass -class CacheEntry: - """Cache entry with metadata.""" - value: Any - timestamp: float - ttl: Optional[float] = None - key_hash: Optional[str] = None - - def is_expired(self) -> bool: - """Check if cache entry is expired.""" - if self.ttl is None: - return False - return time.time() - self.timestamp > self.ttl - - def to_dict(self) -> Dict: - """Convert to dictionary for JSON serialization.""" - return { - 'value': self.value, - 'timestamp': self.timestamp, - 'ttl': self.ttl, - 'key_hash': self.key_hash - } - - @classmethod - def from_dict(cls, data: Dict) -> 'CacheEntry': - """Create from dictionary.""" - return cls(**data) - - -class CacheManager: - """Unified cache manager with multiple storage backends.""" - - def __init__(self, cache_dir: str = "cache", default_ttl: int = 3600): - self.cache_dir = Path(cache_dir) - self.cache_dir.mkdir(parents=True, exist_ok=True) - self.default_ttl = default_ttl - self.logger = logging.getLogger(__name__) - - # In-memory cache for fast access - self._memory_cache: Dict[str, CacheEntry] = {} - - # Cache subdirectories - self.json_cache_dir = self.cache_dir / "json" - self.pickle_cache_dir = self.cache_dir / "pickle" - self.temp_cache_dir = self.cache_dir / "temp" - - for cache_subdir in [self.json_cache_dir, self.pickle_cache_dir, self.temp_cache_dir]: - cache_subdir.mkdir(exist_ok=True) - - def _generate_key_hash(self, key: str) -> str: - """Generate a hash for the cache key.""" - return hashlib.md5(key.encode('utf-8')).hexdigest() - - def _get_cache_path(self, key: str, cache_type: str = "json") -> Path: - """Get cache file path for a key.""" - key_hash = self._generate_key_hash(key) - - if cache_type == "json": - return self.json_cache_dir / f"{key_hash}.json" - elif cache_type == "pickle": - return self.pickle_cache_dir / f"{key_hash}.pkl" - elif cache_type == "temp": - return self.temp_cache_dir / f"{key_hash}.tmp" - else: - raise ValueError(f"Unsupported cache type: {cache_type}") - - def set(self, key: str, value: Any, ttl: Optional[int] = None, - storage: str = "memory") -> bool: - """Set a cache value.""" - if ttl is None: - ttl = self.default_ttl - - entry = CacheEntry( - value=value, - timestamp=time.time(), - ttl=ttl, - key_hash=self._generate_key_hash(key) - ) - - try: - if storage == "memory": - self._memory_cache[key] = entry - return True - - elif storage == "json": - cache_path = self._get_cache_path(key, "json") - with open(cache_path, 'w', encoding='utf-8') as f: - json.dump(entry.to_dict(), f, indent=2, default=str) - return True - - elif storage == "pickle": - cache_path = self._get_cache_path(key, "pickle") - with open(cache_path, 'wb') as f: - pickle.dump(entry, f) - return True - - else: - self.logger.warning(f"Unsupported storage type: {storage}") - return False - - except Exception as e: - self.logger.error(f"Failed to set cache for key '{key}': {e}") - return False - - def get(self, key: str, storage: str = "memory", - default: Any = None) -> Any: - """Get a cache value.""" - try: - entry = None - - if storage == "memory": - entry = self._memory_cache.get(key) - - elif storage == "json": - cache_path = self._get_cache_path(key, "json") - if cache_path.exists(): - with open(cache_path, 'r', encoding='utf-8') as f: - data = json.load(f) - entry = CacheEntry.from_dict(data) - - elif storage == "pickle": - cache_path = self._get_cache_path(key, "pickle") - if cache_path.exists(): - with open(cache_path, 'rb') as f: - entry = pickle.load(f) - - else: - self.logger.warning(f"Unsupported storage type: {storage}") - return default - - if entry is None: - return default - - # Check if entry is expired - if entry.is_expired(): - self.delete(key, storage) - return default - - return entry.value - - except Exception as e: - self.logger.error(f"Failed to get cache for key '{key}': {e}") - return default - - def delete(self, key: str, storage: str = "memory") -> bool: - """Delete a cache entry.""" - try: - if storage == "memory": - if key in self._memory_cache: - del self._memory_cache[key] - return True - - elif storage in ["json", "pickle", "temp"]: - cache_path = self._get_cache_path(key, storage) - if cache_path.exists(): - cache_path.unlink() - return True - - else: - self.logger.warning(f"Unsupported storage type: {storage}") - return False - - except Exception as e: - self.logger.error(f"Failed to delete cache for key '{key}': {e}") - return False - - def exists(self, key: str, storage: str = "memory") -> bool: - """Check if a cache entry exists and is not expired.""" - return self.get(key, storage) is not None - - def clear(self, storage: Optional[str] = None) -> bool: - """Clear cache entries.""" - try: - if storage is None or storage == "memory": - self._memory_cache.clear() - - if storage is None or storage == "json": - for cache_file in self.json_cache_dir.glob("*.json"): - cache_file.unlink() - - if storage is None or storage == "pickle": - for cache_file in self.pickle_cache_dir.glob("*.pkl"): - cache_file.unlink() - - if storage is None or storage == "temp": - for cache_file in self.temp_cache_dir.glob("*.tmp"): - cache_file.unlink() - - return True - - except Exception as e: - self.logger.error(f"Failed to clear cache: {e}") - return False - - def cleanup_expired(self) -> int: - """Clean up expired cache entries.""" - cleaned_count = 0 - - try: - # Clean memory cache - expired_keys = [] - for key, entry in self._memory_cache.items(): - if entry.is_expired(): - expired_keys.append(key) - - for key in expired_keys: - del self._memory_cache[key] - cleaned_count += 1 - - # Clean file caches - for cache_type in ["json", "pickle"]: - cache_dir = self.json_cache_dir if cache_type == "json" else self.pickle_cache_dir - extension = f".{cache_type}" if cache_type == "json" else ".pkl" - - for cache_file in cache_dir.glob(f"*{extension}"): - try: - if cache_type == "json": - with open(cache_file, 'r', encoding='utf-8') as f: - data = json.load(f) - entry = CacheEntry.from_dict(data) - else: - with open(cache_file, 'rb') as f: - entry = pickle.load(f) - - if entry.is_expired(): - cache_file.unlink() - cleaned_count += 1 - - except Exception: - # If we can't read the cache file, delete it - cache_file.unlink() - cleaned_count += 1 - - self.logger.info(f"Cleaned up {cleaned_count} expired cache entries") - return cleaned_count - - except Exception as e: - self.logger.error(f"Failed to cleanup expired cache entries: {e}") - return 0 - - def get_stats(self) -> Dict[str, Any]: - """Get cache statistics.""" - stats = { - 'memory_entries': len(self._memory_cache), - 'json_files': len(list(self.json_cache_dir.glob("*.json"))), - 'pickle_files': len(list(self.pickle_cache_dir.glob("*.pkl"))), - 'temp_files': len(list(self.temp_cache_dir.glob("*.tmp"))), - 'cache_dir_size': 0 - } - - # Calculate total cache directory size - try: - for cache_file in self.cache_dir.rglob("*"): - if cache_file.is_file(): - stats['cache_dir_size'] += cache_file.stat().st_size - except Exception: - pass - - return stats - - def set_file_cache(self, key: str, file_path: Union[str, Path], - ttl: Optional[int] = None) -> bool: - """Cache a file by copying it to the cache directory.""" - try: - source_path = Path(file_path) - if not source_path.exists(): - return False - - cache_path = self.temp_cache_dir / f"{self._generate_key_hash(key)}.cached" - - # Copy file to cache - import shutil - shutil.copy2(source_path, cache_path) - - # Store metadata - metadata = { - 'original_path': str(source_path), - 'cached_path': str(cache_path), - 'size': source_path.stat().st_size, - 'timestamp': time.time(), - 'ttl': ttl or self.default_ttl - } - - return self.set(f"{key}_metadata", metadata, ttl, "json") - - except Exception as e: - self.logger.error(f"Failed to cache file '{file_path}': {e}") - return False - - def get_file_cache(self, key: str) -> Optional[Path]: - """Get cached file path.""" - metadata = self.get(f"{key}_metadata", "json") - if metadata is None: - return None - - cached_path = Path(metadata['cached_path']) - if not cached_path.exists(): - # Cache file missing, clean up metadata - self.delete(f"{key}_metadata", "json") - return None - - return cached_path - - -# Global cache manager instance -_global_cache = None - - -def get_cache_manager(cache_dir: str = "cache", default_ttl: int = 3600) -> CacheManager: - """Get global cache manager instance.""" - global _global_cache - if _global_cache is None: - _global_cache = CacheManager(cache_dir, default_ttl) - return _global_cache - - -if __name__ == "__main__": - # Test cache functionality - cache = CacheManager("test_cache") - - # Test memory cache - cache.set("test_key", {"data": "test_value"}, ttl=60) - print(f"Memory cache: {cache.get('test_key')}") - - # Test JSON cache - cache.set("json_key", {"complex": {"data": [1, 2, 3]}}, ttl=60, storage="json") - print(f"JSON cache: {cache.get('json_key', storage='json')}") - - # Test stats - print(f"Cache stats: {cache.get_stats()}") - - # Clean up - cache.clear() \ No newline at end of file diff --git a/.claude/python_script/utils/colors.py b/.claude/python_script/utils/colors.py deleted file mode 100644 index 0c0f0d67..00000000 --- a/.claude/python_script/utils/colors.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python3 -""" -Terminal Colors Utility -Provides ANSI color codes for terminal output formatting. -""" - -import os -import sys -from typing import Optional - - -class Colors: - """ANSI color codes for terminal output.""" - - # Basic colors - RED = '\033[0;31m' - GREEN = '\033[0;32m' - YELLOW = '\033[1;33m' - BLUE = '\033[0;34m' - PURPLE = '\033[0;35m' - CYAN = '\033[0;36m' - WHITE = '\033[0;37m' - BLACK = '\033[0;30m' - - # Bright colors - BRIGHT_RED = '\033[1;31m' - BRIGHT_GREEN = '\033[1;32m' - BRIGHT_YELLOW = '\033[1;33m' - BRIGHT_BLUE = '\033[1;34m' - BRIGHT_PURPLE = '\033[1;35m' - BRIGHT_CYAN = '\033[1;36m' - BRIGHT_WHITE = '\033[1;37m' - - # Background colors - BG_RED = '\033[41m' - BG_GREEN = '\033[42m' - BG_YELLOW = '\033[43m' - BG_BLUE = '\033[44m' - BG_PURPLE = '\033[45m' - BG_CYAN = '\033[46m' - BG_WHITE = '\033[47m' - - # Text formatting - BOLD = '\033[1m' - DIM = '\033[2m' - UNDERLINE = '\033[4m' - BLINK = '\033[5m' - REVERSE = '\033[7m' - STRIKETHROUGH = '\033[9m' - - # Reset - NC = '\033[0m' # No Color / Reset - RESET = '\033[0m' - - @classmethod - def is_tty(cls) -> bool: - """Check if output is a TTY (supports colors).""" - return hasattr(sys.stdout, 'isatty') and sys.stdout.isatty() - - @classmethod - def supports_color(cls) -> bool: - """Check if the terminal supports color output.""" - # Check environment variables - if os.getenv('NO_COLOR'): - return False - - if os.getenv('FORCE_COLOR'): - return True - - # Check if output is a TTY - if not cls.is_tty(): - return False - - # Check TERM environment variable - term = os.getenv('TERM', '').lower() - if 'color' in term or term in ('xterm', 'xterm-256color', 'screen', 'tmux'): - return True - - # Windows Terminal detection - if os.name == 'nt': - # Windows 10 version 1511 and later support ANSI colors - try: - import subprocess - result = subprocess.run(['ver'], capture_output=True, text=True, shell=True) - if result.returncode == 0: - version_info = result.stdout - # Extract Windows version (simplified check) - if 'Windows' in version_info: - return True - except Exception: - pass - - return False - - @classmethod - def colorize(cls, text: str, color: str, bold: bool = False) -> str: - """Apply color to text if colors are supported.""" - if not cls.supports_color(): - return text - - prefix = color - if bold: - prefix = cls.BOLD + prefix - - return f"{prefix}{text}{cls.RESET}" - - @classmethod - def red(cls, text: str, bold: bool = False) -> str: - """Color text red.""" - return cls.colorize(text, cls.RED, bold) - - @classmethod - def green(cls, text: str, bold: bool = False) -> str: - """Color text green.""" - return cls.colorize(text, cls.GREEN, bold) - - @classmethod - def yellow(cls, text: str, bold: bool = False) -> str: - """Color text yellow.""" - return cls.colorize(text, cls.YELLOW, bold) - - @classmethod - def blue(cls, text: str, bold: bool = False) -> str: - """Color text blue.""" - return cls.colorize(text, cls.BLUE, bold) - - @classmethod - def purple(cls, text: str, bold: bool = False) -> str: - """Color text purple.""" - return cls.colorize(text, cls.PURPLE, bold) - - @classmethod - def cyan(cls, text: str, bold: bool = False) -> str: - """Color text cyan.""" - return cls.colorize(text, cls.CYAN, bold) - - @classmethod - def bold(cls, text: str) -> str: - """Make text bold.""" - return cls.colorize(text, '', True) - - @classmethod - def dim(cls, text: str) -> str: - """Make text dim.""" - return cls.colorize(text, cls.DIM) - - @classmethod - def underline(cls, text: str) -> str: - """Underline text.""" - return cls.colorize(text, cls.UNDERLINE) - - @classmethod - def success(cls, text: str) -> str: - """Format success message (green).""" - return cls.green(f"[SUCCESS] {text}", bold=True) - - @classmethod - def error(cls, text: str) -> str: - """Format error message (red).""" - return cls.red(f"[ERROR] {text}", bold=True) - - @classmethod - def warning(cls, text: str) -> str: - """Format warning message (yellow).""" - return cls.yellow(f"[WARNING] {text}", bold=True) - - @classmethod - def info(cls, text: str) -> str: - """Format info message (blue).""" - return cls.blue(f"[INFO] {text}") - - @classmethod - def highlight(cls, text: str) -> str: - """Highlight text (cyan background).""" - if not cls.supports_color(): - return f"[{text}]" - return f"{cls.BG_CYAN}{cls.BLACK}{text}{cls.RESET}" - - @classmethod - def strip_colors(cls, text: str) -> str: - """Remove ANSI color codes from text.""" - import re - ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') - return ansi_escape.sub('', text) - - -# Convenience functions for common usage -def colorize(text: str, color: str) -> str: - """Convenience function to colorize text.""" - return Colors.colorize(text, color) - - -def red(text: str) -> str: - """Red text.""" - return Colors.red(text) - - -def green(text: str) -> str: - """Green text.""" - return Colors.green(text) - - -def yellow(text: str) -> str: - """Yellow text.""" - return Colors.yellow(text) - - -def blue(text: str) -> str: - """Blue text.""" - return Colors.blue(text) - - -def success(text: str) -> str: - """Success message.""" - return Colors.success(text) - - -def error(text: str) -> str: - """Error message.""" - return Colors.error(text) - - -def warning(text: str) -> str: - """Warning message.""" - return Colors.warning(text) - - -def info(text: str) -> str: - """Info message.""" - return Colors.info(text) - - -if __name__ == "__main__": - # Test color output - print(Colors.red("Red text")) - print(Colors.green("Green text")) - print(Colors.yellow("Yellow text")) - print(Colors.blue("Blue text")) - print(Colors.purple("Purple text")) - print(Colors.cyan("Cyan text")) - print(Colors.bold("Bold text")) - print(Colors.success("Success message")) - print(Colors.error("Error message")) - print(Colors.warning("Warning message")) - print(Colors.info("Info message")) - print(Colors.highlight("Highlighted text")) - print(f"Color support: {Colors.supports_color()}") - print(f"TTY: {Colors.is_tty()}") \ No newline at end of file diff --git a/.claude/python_script/utils/io_helpers.py b/.claude/python_script/utils/io_helpers.py deleted file mode 100644 index 8a86a887..00000000 --- a/.claude/python_script/utils/io_helpers.py +++ /dev/null @@ -1,378 +0,0 @@ -#!/usr/bin/env python3 -""" -I/O Helper Functions -Provides common file and directory operations with error handling. -""" - -import os -import json -import yaml -import logging -from pathlib import Path -from typing import Any, Optional, Union, List, Dict -import shutil -import tempfile - - -class IOHelpers: - """Collection of I/O helper methods.""" - - @staticmethod - def ensure_directory(path: Union[str, Path], mode: int = 0o755) -> bool: - """Ensure directory exists, create if necessary.""" - try: - dir_path = Path(path) - dir_path.mkdir(parents=True, exist_ok=True, mode=mode) - return True - except (PermissionError, OSError) as e: - logging.error(f"Failed to create directory '{path}': {e}") - return False - - @staticmethod - def safe_read_file(file_path: Union[str, Path], encoding: str = 'utf-8', - fallback_encoding: str = 'latin-1') -> Optional[str]: - """Safely read file content with encoding fallback.""" - path = Path(file_path) - if not path.exists(): - return None - - encodings = [encoding, fallback_encoding] if encoding != fallback_encoding else [encoding] - - for enc in encodings: - try: - with open(path, 'r', encoding=enc) as f: - return f.read() - except UnicodeDecodeError: - continue - except (IOError, OSError) as e: - logging.error(f"Failed to read file '{file_path}': {e}") - return None - - logging.warning(f"Failed to decode file '{file_path}' with any encoding") - return None - - @staticmethod - def safe_write_file(file_path: Union[str, Path], content: str, - encoding: str = 'utf-8', backup: bool = False) -> bool: - """Safely write content to file with optional backup.""" - path = Path(file_path) - - try: - # Create backup if requested and file exists - if backup and path.exists(): - backup_path = path.with_suffix(path.suffix + '.bak') - shutil.copy2(path, backup_path) - - # Ensure parent directory exists - if not IOHelpers.ensure_directory(path.parent): - return False - - # Write to temporary file first, then move to final location - with tempfile.NamedTemporaryFile(mode='w', encoding=encoding, - dir=path.parent, delete=False) as tmp_file: - tmp_file.write(content) - tmp_path = Path(tmp_file.name) - - # Atomic move - shutil.move(str(tmp_path), str(path)) - return True - - except (IOError, OSError) as e: - logging.error(f"Failed to write file '{file_path}': {e}") - return False - - @staticmethod - def read_json(file_path: Union[str, Path], default: Any = None) -> Any: - """Read JSON file with error handling.""" - content = IOHelpers.safe_read_file(file_path) - if content is None: - return default - - try: - return json.loads(content) - except json.JSONDecodeError as e: - logging.error(f"Failed to parse JSON from '{file_path}': {e}") - return default - - @staticmethod - def write_json(file_path: Union[str, Path], data: Any, - indent: int = 2, backup: bool = False) -> bool: - """Write data to JSON file.""" - try: - content = json.dumps(data, indent=indent, ensure_ascii=False, default=str) - return IOHelpers.safe_write_file(file_path, content, backup=backup) - except (TypeError, ValueError) as e: - logging.error(f"Failed to serialize data to JSON for '{file_path}': {e}") - return False - - @staticmethod - def read_yaml(file_path: Union[str, Path], default: Any = None) -> Any: - """Read YAML file with error handling.""" - content = IOHelpers.safe_read_file(file_path) - if content is None: - return default - - try: - return yaml.safe_load(content) - except yaml.YAMLError as e: - logging.error(f"Failed to parse YAML from '{file_path}': {e}") - return default - - @staticmethod - def write_yaml(file_path: Union[str, Path], data: Any, backup: bool = False) -> bool: - """Write data to YAML file.""" - try: - content = yaml.dump(data, default_flow_style=False, allow_unicode=True) - return IOHelpers.safe_write_file(file_path, content, backup=backup) - except yaml.YAMLError as e: - logging.error(f"Failed to serialize data to YAML for '{file_path}': {e}") - return False - - @staticmethod - def find_files(directory: Union[str, Path], pattern: str = "*", - recursive: bool = True, max_depth: Optional[int] = None) -> List[Path]: - """Find files matching pattern in directory.""" - dir_path = Path(directory) - if not dir_path.exists() or not dir_path.is_dir(): - return [] - - files = [] - try: - if recursive: - if max_depth is not None: - # Implement depth-limited search - def search_with_depth(path: Path, current_depth: int = 0): - if current_depth > max_depth: - return - - for item in path.iterdir(): - if item.is_file() and item.match(pattern): - files.append(item) - elif item.is_dir() and current_depth < max_depth: - search_with_depth(item, current_depth + 1) - - search_with_depth(dir_path) - else: - files = list(dir_path.rglob(pattern)) - else: - files = list(dir_path.glob(pattern)) - - return sorted(files) - - except (PermissionError, OSError) as e: - logging.error(f"Failed to search files in '{directory}': {e}") - return [] - - @staticmethod - def get_file_stats(file_path: Union[str, Path]) -> Optional[Dict[str, Any]]: - """Get file statistics.""" - path = Path(file_path) - if not path.exists(): - return None - - try: - stat = path.stat() - return { - 'size': stat.st_size, - 'modified_time': stat.st_mtime, - 'created_time': stat.st_ctime, - 'is_file': path.is_file(), - 'is_dir': path.is_dir(), - 'permissions': oct(stat.st_mode)[-3:], - 'extension': path.suffix.lower(), - 'name': path.name, - 'parent': str(path.parent) - } - except (OSError, PermissionError) as e: - logging.error(f"Failed to get stats for '{file_path}': {e}") - return None - - @staticmethod - def copy_with_backup(source: Union[str, Path], dest: Union[str, Path]) -> bool: - """Copy file with automatic backup if destination exists.""" - source_path = Path(source) - dest_path = Path(dest) - - if not source_path.exists(): - logging.error(f"Source file '{source}' does not exist") - return False - - try: - # Create backup if destination exists - if dest_path.exists(): - backup_path = dest_path.with_suffix(dest_path.suffix + '.bak') - shutil.copy2(dest_path, backup_path) - logging.info(f"Created backup: {backup_path}") - - # Ensure destination directory exists - if not IOHelpers.ensure_directory(dest_path.parent): - return False - - # Copy file - shutil.copy2(source_path, dest_path) - return True - - except (IOError, OSError) as e: - logging.error(f"Failed to copy '{source}' to '{dest}': {e}") - return False - - @staticmethod - def move_with_backup(source: Union[str, Path], dest: Union[str, Path]) -> bool: - """Move file with automatic backup if destination exists.""" - source_path = Path(source) - dest_path = Path(dest) - - if not source_path.exists(): - logging.error(f"Source file '{source}' does not exist") - return False - - try: - # Create backup if destination exists - if dest_path.exists(): - backup_path = dest_path.with_suffix(dest_path.suffix + '.bak') - shutil.move(str(dest_path), str(backup_path)) - logging.info(f"Created backup: {backup_path}") - - # Ensure destination directory exists - if not IOHelpers.ensure_directory(dest_path.parent): - return False - - # Move file - shutil.move(str(source_path), str(dest_path)) - return True - - except (IOError, OSError) as e: - logging.error(f"Failed to move '{source}' to '{dest}': {e}") - return False - - @staticmethod - def clean_temp_files(directory: Union[str, Path], extensions: List[str] = None, - max_age_hours: int = 24) -> int: - """Clean temporary files older than specified age.""" - if extensions is None: - extensions = ['.tmp', '.temp', '.bak', '.swp', '.~'] - - dir_path = Path(directory) - if not dir_path.exists(): - return 0 - - import time - cutoff_time = time.time() - (max_age_hours * 3600) - cleaned_count = 0 - - try: - for file_path in dir_path.rglob('*'): - if file_path.is_file(): - # Check extension - if file_path.suffix.lower() in extensions: - # Check age - if file_path.stat().st_mtime < cutoff_time: - try: - file_path.unlink() - cleaned_count += 1 - except OSError: - continue - - logging.info(f"Cleaned {cleaned_count} temporary files from '{directory}'") - return cleaned_count - - except (PermissionError, OSError) as e: - logging.error(f"Failed to clean temp files in '{directory}': {e}") - return 0 - - @staticmethod - def get_directory_size(directory: Union[str, Path]) -> int: - """Get total size of directory in bytes.""" - dir_path = Path(directory) - if not dir_path.exists() or not dir_path.is_dir(): - return 0 - - total_size = 0 - try: - for file_path in dir_path.rglob('*'): - if file_path.is_file(): - total_size += file_path.stat().st_size - except (PermissionError, OSError): - pass - - return total_size - - @staticmethod - def make_executable(file_path: Union[str, Path]) -> bool: - """Make file executable (Unix/Linux/Mac).""" - if os.name == 'nt': # Windows - return True # Windows doesn't use Unix permissions - - try: - path = Path(file_path) - current_mode = path.stat().st_mode - path.chmod(current_mode | 0o111) # Add execute permission - return True - except (OSError, PermissionError) as e: - logging.error(f"Failed to make '{file_path}' executable: {e}") - return False - - -# Convenience functions -def ensure_directory(path: Union[str, Path]) -> bool: - """Ensure directory exists.""" - return IOHelpers.ensure_directory(path) - - -def safe_read_file(file_path: Union[str, Path]) -> Optional[str]: - """Safely read file content.""" - return IOHelpers.safe_read_file(file_path) - - -def safe_write_file(file_path: Union[str, Path], content: str) -> bool: - """Safely write content to file.""" - return IOHelpers.safe_write_file(file_path, content) - - -def read_json(file_path: Union[str, Path], default: Any = None) -> Any: - """Read JSON file.""" - return IOHelpers.read_json(file_path, default) - - -def write_json(file_path: Union[str, Path], data: Any) -> bool: - """Write data to JSON file.""" - return IOHelpers.write_json(file_path, data) - - -def read_yaml(file_path: Union[str, Path], default: Any = None) -> Any: - """Read YAML file.""" - return IOHelpers.read_yaml(file_path, default) - - -def write_yaml(file_path: Union[str, Path], data: Any) -> bool: - """Write data to YAML file.""" - return IOHelpers.write_yaml(file_path, data) - - -if __name__ == "__main__": - # Test I/O operations - test_dir = Path("test_io") - - # Test directory creation - print(f"Create directory: {ensure_directory(test_dir)}") - - # Test file operations - test_file = test_dir / "test.txt" - content = "Hello, World!\nThis is a test file." - - print(f"Write file: {safe_write_file(test_file, content)}") - print(f"Read file: {safe_read_file(test_file)}") - - # Test JSON operations - json_file = test_dir / "test.json" - json_data = {"name": "test", "numbers": [1, 2, 3], "nested": {"key": "value"}} - - print(f"Write JSON: {write_json(json_file, json_data)}") - print(f"Read JSON: {read_json(json_file)}") - - # Test file stats - stats = IOHelpers.get_file_stats(test_file) - print(f"File stats: {stats}") - - # Cleanup - shutil.rmtree(test_dir, ignore_errors=True) \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 7a73a41b..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,2 +0,0 @@ -{ -} \ No newline at end of file