refactor: Update workflow plan system and template organization

- Remove --analyze|--deep parameters from plan.md, use default analysis - Change .analysis to .process directory structure for better organization - Create ANALYSIS_RESULTS.md template focused on verified results - Add .process folder to workflow-architecture.md file structure - Template emphasizes verification of files, methods, and commands - Prevent execution errors from non-existent references 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-08 02:14:08 +08:00 · 2025-09-18 16:26:50 +08:00
parent 9167e4e39e
commit fc6e851230
49 changed files with 5865 additions and 174 deletions
--- a/.claude/python_script/core/init.py
+++ b/.claude/python_script/core/init.py
@@ -0,0 +1,25 @@
+"""
+Core modules for the Python script analyzer.
+Provides unified interfaces for file indexing, context analysis, and path matching.
+"""
+
+from .config import Config
+from .file_indexer import FileIndexer, FileInfo, IndexStats
+from .context_analyzer import ContextAnalyzer, AnalysisResult
+from .path_matcher import PathMatcher, MatchResult, PathMatchingResult
+from .embedding_manager import EmbeddingManager
+from .gitignore_parser import GitignoreParser
+
+__all__ = [
+    'Config',
+    'FileIndexer',
+    'FileInfo',
+    'IndexStats',
+    'ContextAnalyzer',
+    'AnalysisResult',
+    'PathMatcher',
+    'MatchResult',
+    'PathMatchingResult',
+    'EmbeddingManager',
+    'GitignoreParser'
+]
--- a/.claude/python_script/core/pycache/init.cpython-313.pyc
+++ b/.claude/python_script/core/pycache/init.cpython-313.pyc
--- a/.claude/python_script/core/pycache/config.cpython-313.pyc
+++ b/.claude/python_script/core/pycache/config.cpython-313.pyc
--- a/.claude/python_script/core/pycache/context_analyzer.cpython-313.pyc
+++ b/.claude/python_script/core/pycache/context_analyzer.cpython-313.pyc
--- a/.claude/python_script/core/pycache/embedding_manager.cpython-313.pyc
+++ b/.claude/python_script/core/pycache/embedding_manager.cpython-313.pyc
--- a/.claude/python_script/core/pycache/file_indexer.cpython-313.pyc
+++ b/.claude/python_script/core/pycache/file_indexer.cpython-313.pyc
--- a/.claude/python_script/core/pycache/gitignore_parser.cpython-313.pyc
+++ b/.claude/python_script/core/pycache/gitignore_parser.cpython-313.pyc
--- a/.claude/python_script/core/pycache/path_matcher.cpython-313.pyc
+++ b/.claude/python_script/core/pycache/path_matcher.cpython-313.pyc
--- a/.claude/python_script/core/config.py
+++ b/.claude/python_script/core/config.py
@@ -0,0 +1,327 @@
+#!/usr/bin/env python3
+"""
+Configuration Management Module
+Provides unified configuration management with gitignore integration.
+"""
+
+import os
+import yaml
+import logging
+from pathlib import Path
+from typing import Dict, Any, Optional, List
+from .gitignore_parser import get_all_gitignore_patterns
+
+
+class Config:
+    """Singleton configuration manager with hierarchical loading."""
+
+    _instance = None
+    _initialized = False
+
+    def __new__(cls, config_path: Optional[str] = None):
+        if cls._instance is None:
+            cls._instance = super(Config, cls).__new__(cls)
+        return cls._instance
+
+    def __init__(self, config_path: Optional[str] = None):
+        if self._initialized:
+            return
+
+        self.config_path = config_path
+        self.config = {}
+        self.logger = logging.getLogger(__name__)
+
+        self._load_config()
+        self._add_gitignore_patterns()
+        self._apply_env_overrides()
+        self._validate_config()
+
+        self._initialized = True
+
+    def _load_config(self):
+        """Load configuration from file with fallback hierarchy."""
+        config_paths = self._get_config_paths()
+
+        for config_file in config_paths:
+            if config_file.exists():
+                try:
+                    with open(config_file, 'r', encoding='utf-8') as f:
+                        loaded_config = yaml.safe_load(f)
+                        if loaded_config:
+                            self.config = self._merge_configs(self.config, loaded_config)
+                            self.logger.info(f"Loaded config from {config_file}")
+                except Exception as e:
+                    self.logger.warning(f"Failed to load config from {config_file}: {e}")
+
+        # Apply default config if no config loaded
+        if not self.config:
+            self.config = self._get_default_config()
+            self.logger.info("Using default configuration")
+
+    def _get_config_paths(self) -> List[Path]:
+        """Get ordered list of config file paths to check."""
+        paths = []
+
+        # 1. Explicitly provided config path
+        if self.config_path:
+            paths.append(Path(self.config_path))
+
+        # 2. Current directory config.yaml
+        paths.append(Path('config.yaml'))
+
+        # 3. Script directory config.yaml
+        script_dir = Path(__file__).parent.parent
+        paths.append(script_dir / 'config.yaml')
+
+        # 4. Default config in script directory
+        paths.append(script_dir / 'default_config.yaml')
+
+        return paths
+
+    def _get_default_config(self) -> Dict[str, Any]:
+        """Get default configuration."""
+        return {
+            'token_limits': {
+                'small_project': 500000,
+                'medium_project': 2000000,
+                'large_project': 10000000,
+                'max_files': 1000
+            },
+            'exclude_patterns': [
+                "*/node_modules/*",
+                "*/.git/*",
+                "*/build/*",
+                "*/dist/*",
+                "*/.next/*",
+                "*/.nuxt/*",
+                "*/target/*",
+                "*/vendor/*",
+                "*/__pycache__/*",
+                "*.pyc",
+                "*.pyo",
+                "*.log",
+                "*.tmp",
+                "*.temp",
+                "*.history"
+            ],
+            'file_extensions': {
+                'code': ['.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h', '.rs', '.go', '.php', '.rb', '.sh', '.bash'],
+                'docs': ['.md', '.txt', '.rst', '.adoc'],
+                'config': ['.json', '.yaml', '.yml', '.toml', '.ini', '.env'],
+                'web': ['.html', '.css', '.scss', '.sass', '.xml']
+            },
+            'embedding': {
+                'enabled': True,
+                'model': 'all-MiniLM-L6-v2',
+                'cache_dir': 'cache',
+                'similarity_threshold': 0.3,
+                'max_context_length': 512,
+                'batch_size': 32
+            },
+            'context_analysis': {
+                'domain_keywords': {
+                    'auth': ['auth', 'login', 'user', 'password', 'jwt', 'token', 'session'],
+                    'database': ['db', 'database', 'sql', 'query', 'model', 'schema', 'migration'],
+                    'api': ['api', 'endpoint', 'route', 'controller', 'service', 'handler'],
+                    'frontend': ['ui', 'component', 'view', 'template', 'style', 'css'],
+                    'backend': ['server', 'service', 'logic', 'business', 'core'],
+                    'test': ['test', 'spec', 'unit', 'integration', 'mock'],
+                    'config': ['config', 'setting', 'environment', 'env'],
+                    'util': ['util', 'helper', 'common', 'shared', 'lib']
+                },
+                'language_indicators': {
+                    'python': ['.py', 'python', 'pip', 'requirements.txt', 'setup.py'],
+                    'javascript': ['.js', '.ts', 'npm', 'package.json', 'node'],
+                    'java': ['.java', 'maven', 'gradle', 'pom.xml'],
+                    'go': ['.go', 'go.mod', 'go.sum'],
+                    'rust': ['.rs', 'cargo', 'Cargo.toml']
+                }
+            },
+            'path_matching': {
+                'weights': {
+                    'keyword_match': 0.4,
+                    'extension_match': 0.2,
+                    'directory_context': 0.2,
+                    'file_size_penalty': 0.1,
+                    'recency_bonus': 0.1
+                },
+                'max_files_per_category': 20,
+                'min_relevance_score': 0.1
+            },
+            'output': {
+                'pattern_format': '@{{{path}}}',
+                'always_include': [
+                    'CLAUDE.md',
+                    '**/CLAUDE.md',
+                    'README.md',
+                    'docs/**/*.md'
+                ],
+                'max_total_files': 50
+            },
+            'performance': {
+                'cache_enabled': True,
+                'cache_ttl': 3600,
+                'max_file_size': 10485760,
+                'max_workers': 4
+            },
+            'logging': {
+                'level': 'INFO',
+                'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+            }
+        }
+
+    def _merge_configs(self, base: Dict, override: Dict) -> Dict:
+        """Recursively merge configuration dictionaries."""
+        result = base.copy()
+
+        for key, value in override.items():
+            if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+                result[key] = self._merge_configs(result[key], value)
+            else:
+                result[key] = value
+
+        return result
+
+    def _add_gitignore_patterns(self):
+        """Add patterns from .gitignore files to exclude_patterns."""
+        try:
+            # Find root directory (current working directory or script parent)
+            root_dir = Path.cwd()
+
+            gitignore_patterns = get_all_gitignore_patterns(str(root_dir))
+
+            if gitignore_patterns:
+                # Ensure exclude_patterns exists
+                if 'exclude_patterns' not in self.config:
+                    self.config['exclude_patterns'] = []
+
+                # Add gitignore patterns, avoiding duplicates
+                existing_patterns = set(self.config['exclude_patterns'])
+                new_patterns = [p for p in gitignore_patterns if p not in existing_patterns]
+
+                self.config['exclude_patterns'].extend(new_patterns)
+
+                self.logger.info(f"Added {len(new_patterns)} patterns from .gitignore files")
+
+        except Exception as e:
+            self.logger.warning(f"Failed to load .gitignore patterns: {e}")
+
+    def _apply_env_overrides(self):
+        """Apply environment variable overrides."""
+        env_mappings = {
+            'ANALYZER_CACHE_DIR': ('embedding', 'cache_dir'),
+            'ANALYZER_LOG_LEVEL': ('logging', 'level'),
+            'ANALYZER_MAX_FILES': ('token_limits', 'max_files'),
+            'ANALYZER_EMBEDDING_MODEL': ('embedding', 'model')
+        }
+
+        for env_var, config_path in env_mappings.items():
+            env_value = os.getenv(env_var)
+            if env_value:
+                self._set_nested_value(config_path, env_value)
+                self.logger.info(f"Applied environment override: {env_var} = {env_value}")
+
+    def _set_nested_value(self, path: tuple, value: str):
+        """Set a nested configuration value."""
+        current = self.config
+        for key in path[:-1]:
+            if key not in current:
+                current[key] = {}
+            current = current[key]
+
+        # Try to convert value to appropriate type
+        if isinstance(current.get(path[-1]), int):
+            try:
+                value = int(value)
+            except ValueError:
+                pass
+        elif isinstance(current.get(path[-1]), bool):
+            value = value.lower() in ('true', '1', 'yes', 'on')
+
+        current[path[-1]] = value
+
+    def _validate_config(self):
+        """Validate configuration values."""
+        required_sections = ['exclude_patterns', 'file_extensions', 'token_limits']
+
+        for section in required_sections:
+            if section not in self.config:
+                self.logger.warning(f"Missing required config section: {section}")
+
+        # Validate token limits
+        if 'token_limits' in self.config:
+            limits = self.config['token_limits']
+            if limits.get('small_project', 0) >= limits.get('medium_project', 0):
+                self.logger.warning("Token limit configuration may be incorrect")
+
+    def get(self, path: str, default: Any = None) -> Any:
+        """Get configuration value using dot notation."""
+        keys = path.split('.')
+        current = self.config
+
+        try:
+            for key in keys:
+                current = current[key]
+            return current
+        except (KeyError, TypeError):
+            return default
+
+    def set(self, path: str, value: Any):
+        """Set configuration value using dot notation."""
+        keys = path.split('.')
+        current = self.config
+
+        for key in keys[:-1]:
+            if key not in current:
+                current[key] = {}
+            current = current[key]
+
+        current[keys[-1]] = value
+
+    def get_exclude_patterns(self) -> List[str]:
+        """Get all exclude patterns including gitignore patterns."""
+        return self.config.get('exclude_patterns', [])
+
+    def get_file_extensions(self) -> Dict[str, List[str]]:
+        """Get file extension mappings."""
+        return self.config.get('file_extensions', {})
+
+    def is_embedding_enabled(self) -> bool:
+        """Check if embedding functionality is enabled."""
+        return self.config.get('embedding', {}).get('enabled', False)
+
+    def get_cache_dir(self) -> str:
+        """Get cache directory path."""
+        return self.config.get('embedding', {}).get('cache_dir', 'cache')
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return configuration as dictionary."""
+        return self.config.copy()
+
+    def reload(self, config_path: Optional[str] = None):
+        """Reload configuration from file."""
+        self._initialized = False
+        if config_path:
+            self.config_path = config_path
+        self.__init__(self.config_path)
+
+
+# Global configuration instance
+_global_config = None
+
+
+def get_config(config_path: Optional[str] = None) -> Config:
+    """Get global configuration instance."""
+    global _global_config
+    if _global_config is None:
+        _global_config = Config(config_path)
+    return _global_config
+
+
+if __name__ == "__main__":
+    # Test configuration loading
+    config = Config()
+    print("Configuration loaded successfully!")
+    print(f"Cache dir: {config.get_cache_dir()}")
+    print(f"Exclude patterns: {len(config.get_exclude_patterns())}")
+    print(f"Embedding enabled: {config.is_embedding_enabled()}")
--- a/.claude/python_script/core/context_analyzer.py
+++ b/.claude/python_script/core/context_analyzer.py
@@ -0,0 +1,359 @@
+#!/usr/bin/env python3
+"""
+Context Analyzer Module for UltraThink Path-Aware Analyzer
+Analyzes user prompts to extract relevant context and keywords.
+"""
+
+import re
+import logging
+from typing import Dict, List, Set, Tuple, Optional
+from dataclasses import dataclass
+from collections import Counter
+import string
+
+@dataclass
+class AnalysisResult:
+    """Results of context analysis."""
+    keywords: List[str]
+    domains: List[str]
+    languages: List[str]
+    file_patterns: List[str]
+    confidence_scores: Dict[str, float]
+    extracted_entities: Dict[str, List[str]]
+
+class ContextAnalyzer:
+    """Analyzes user prompts to understand context and intent."""
+
+    def __init__(self, config: Dict):
+        self.config = config
+        self.logger = logging.getLogger(__name__)
+
+        # Load domain and language mappings from config
+        self.domain_keywords = config.get('context_analysis', {}).get('domain_keywords', {})
+        self.language_indicators = config.get('context_analysis', {}).get('language_indicators', {})
+
+        # Common programming terms and patterns
+        self.technical_terms = self._build_technical_terms()
+        self.file_pattern_indicators = self._build_pattern_indicators()
+
+        # Stop words to filter out
+        self.stop_words = {
+            'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
+            'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after',
+            'above', 'below', 'between', 'among', 'as', 'is', 'are', 'was', 'were', 'be',
+            'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
+            'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these',
+            'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her',
+            'us', 'them', 'my', 'your', 'his', 'its', 'our', 'their'
+        }
+
+    def _build_technical_terms(self) -> Dict[str, List[str]]:
+        """Build comprehensive list of technical terms grouped by category."""
+        return {
+            'authentication': [
+                'auth', 'authentication', 'login', 'logout', 'signin', 'signout',
+                'user', 'password', 'token', 'jwt', 'oauth', 'session', 'cookie',
+                'credential', 'authorize', 'permission', 'role', 'access'
+            ],
+            'database': [
+                'database', 'db', 'sql', 'query', 'table', 'schema', 'migration',
+                'model', 'orm', 'entity', 'relation', 'index', 'transaction',
+                'crud', 'select', 'insert', 'update', 'delete', 'join'
+            ],
+            'api': [
+                'api', 'rest', 'graphql', 'endpoint', 'route', 'controller',
+                'handler', 'middleware', 'service', 'request', 'response',
+                'http', 'get', 'post', 'put', 'delete', 'patch'
+            ],
+            'frontend': [
+                'ui', 'component', 'view', 'template', 'page', 'layout',
+                'style', 'css', 'html', 'javascript', 'react', 'vue',
+                'angular', 'dom', 'event', 'state', 'props'
+            ],
+            'backend': [
+                'server', 'service', 'business', 'logic', 'core', 'engine',
+                'worker', 'job', 'queue', 'cache', 'redis', 'memcache'
+            ],
+            'testing': [
+                'test', 'testing', 'spec', 'unit', 'integration', 'e2e',
+                'mock', 'stub', 'fixture', 'assert', 'expect', 'should'
+            ],
+            'configuration': [
+                'config', 'configuration', 'setting', 'environment', 'env',
+                'variable', 'constant', 'parameter', 'option'
+            ],
+            'utility': [
+                'util', 'utility', 'helper', 'common', 'shared', 'lib',
+                'library', 'tool', 'function', 'method'
+            ]
+        }
+
+    def _build_pattern_indicators(self) -> Dict[str, List[str]]:
+        """Build indicators that suggest specific file patterns."""
+        return {
+            'source_code': ['implement', 'code', 'function', 'class', 'method'],
+            'tests': ['test', 'testing', 'spec', 'unittest', 'pytest'],
+            'documentation': ['doc', 'readme', 'guide', 'documentation', 'manual'],
+            'configuration': ['config', 'setting', 'env', 'environment'],
+            'build': ['build', 'compile', 'package', 'deploy', 'release'],
+            'scripts': ['script', 'automation', 'tool', 'utility']
+        }
+
+    def extract_keywords(self, text: str) -> List[str]:
+        """Extract meaningful keywords from text."""
+        # Clean and normalize text
+        text = text.lower()
+        text = re.sub(r'[^\w\s-]', ' ', text)  # Remove punctuation except hyphens
+        words = text.split()
+
+        # Filter stop words and short words
+        keywords = []
+        for word in words:
+            word = word.strip('-')  # Remove leading/trailing hyphens
+            if (len(word) >= 2 and
+                word not in self.stop_words and
+                not word.isdigit()):
+                keywords.append(word)
+
+        # Count frequency and return top keywords
+        word_counts = Counter(keywords)
+        return [word for word, count in word_counts.most_common(20)]
+
+    def identify_domains(self, keywords: List[str]) -> List[Tuple[str, float]]:
+        """Identify relevant domains based on keywords."""
+        domain_scores = {}
+
+        for domain, domain_keywords in self.domain_keywords.items():
+            score = 0.0
+            matched_keywords = []
+
+            for keyword in keywords:
+                for domain_keyword in domain_keywords:
+                    if keyword in domain_keyword or domain_keyword in keyword:
+                        score += 1.0
+                        matched_keywords.append(keyword)
+                        break
+
+            if score > 0:
+                # Normalize score by number of domain keywords
+                normalized_score = score / len(domain_keywords)
+                domain_scores[domain] = normalized_score
+
+        # Also check technical terms
+        for category, terms in self.technical_terms.items():
+            score = 0.0
+            for keyword in keywords:
+                for term in terms:
+                    if keyword in term or term in keyword:
+                        score += 1.0
+                        break
+
+            if score > 0:
+                normalized_score = score / len(terms)
+                if category not in domain_scores:
+                    domain_scores[category] = normalized_score
+                else:
+                    domain_scores[category] = max(domain_scores[category], normalized_score)
+
+        # Sort by score and return top domains
+        sorted_domains = sorted(domain_scores.items(), key=lambda x: x[1], reverse=True)
+        return sorted_domains[:5]
+
+    def identify_languages(self, keywords: List[str]) -> List[Tuple[str, float]]:
+        """Identify programming languages based on keywords."""
+        language_scores = {}
+
+        for language, indicators in self.language_indicators.items():
+            score = 0.0
+            for keyword in keywords:
+                for indicator in indicators:
+                    if keyword in indicator or indicator in keyword:
+                        score += 1.0
+                        break
+
+            if score > 0:
+                normalized_score = score / len(indicators)
+                language_scores[language] = normalized_score
+
+        sorted_languages = sorted(language_scores.items(), key=lambda x: x[1], reverse=True)
+        return sorted_languages[:3]
+
+    def extract_file_patterns(self, text: str) -> List[str]:
+        """Extract explicit file patterns from text."""
+        patterns = []
+
+        # Look for @{pattern} syntax
+        at_patterns = re.findall(r'@\{([^}]+)\}', text)
+        patterns.extend(at_patterns)
+
+        # Look for file extensions
+        extensions = re.findall(r'\*\.(\w+)', text)
+        for ext in extensions:
+            patterns.append(f"*.{ext}")
+
+        # Look for directory patterns
+        dir_patterns = re.findall(r'(\w+)/\*\*?', text)
+        for dir_pattern in dir_patterns:
+            patterns.append(f"{dir_pattern}/**/*")
+
+        # Look for specific file names
+        file_patterns = re.findall(r'\b(\w+\.\w+)\b', text)
+        for file_pattern in file_patterns:
+            if '.' in file_pattern:
+                patterns.append(file_pattern)
+
+        return list(set(patterns))  # Remove duplicates
+
+    def suggest_patterns_from_domains(self, domains: List[str]) -> List[str]:
+        """Suggest file patterns based on identified domains."""
+        patterns = []
+
+        domain_to_patterns = {
+            'auth': ['**/auth/**/*', '**/login/**/*', '**/user/**/*'],
+            'authentication': ['**/auth/**/*', '**/login/**/*', '**/user/**/*'],
+            'database': ['**/db/**/*', '**/model/**/*', '**/migration/**/*', '**/*model*'],
+            'api': ['**/api/**/*', '**/route/**/*', '**/controller/**/*', '**/handler/**/*'],
+            'frontend': ['**/ui/**/*', '**/component/**/*', '**/view/**/*', '**/template/**/*'],
+            'backend': ['**/service/**/*', '**/core/**/*', '**/server/**/*'],
+            'test': ['**/test/**/*', '**/spec/**/*', '**/*test*', '**/*spec*'],
+            'testing': ['**/test/**/*', '**/spec/**/*', '**/*test*', '**/*spec*'],
+            'config': ['**/config/**/*', '**/*.config.*', '**/env/**/*'],
+            'configuration': ['**/config/**/*', '**/*.config.*', '**/env/**/*'],
+            'util': ['**/util/**/*', '**/helper/**/*', '**/common/**/*'],
+            'utility': ['**/util/**/*', '**/helper/**/*', '**/common/**/*']
+        }
+
+        for domain in domains:
+            if domain in domain_to_patterns:
+                patterns.extend(domain_to_patterns[domain])
+
+        return list(set(patterns))  # Remove duplicates
+
+    def extract_entities(self, text: str) -> Dict[str, List[str]]:
+        """Extract named entities from text."""
+        entities = {
+            'files': [],
+            'functions': [],
+            'classes': [],
+            'variables': [],
+            'technologies': []
+        }
+
+        # File patterns
+        file_patterns = re.findall(r'\b(\w+\.\w+)\b', text)
+        entities['files'] = list(set(file_patterns))
+
+        # Function patterns (camelCase or snake_case followed by parentheses)
+        function_patterns = re.findall(r'\b([a-z][a-zA-Z0-9_]*)\s*\(', text)
+        entities['functions'] = list(set(function_patterns))
+
+        # Class patterns (PascalCase)
+        class_patterns = re.findall(r'\b([A-Z][a-zA-Z0-9]*)\b', text)
+        entities['classes'] = list(set(class_patterns))
+
+        # Technology mentions
+        tech_keywords = [
+            'react', 'vue', 'angular', 'node', 'express', 'django', 'flask',
+            'spring', 'rails', 'laravel', 'docker', 'kubernetes', 'aws',
+            'azure', 'gcp', 'postgresql', 'mysql', 'mongodb', 'redis'
+        ]
+        text_lower = text.lower()
+        for tech in tech_keywords:
+            if tech in text_lower:
+                entities['technologies'].append(tech)
+
+        return entities
+
+    def analyze(self, prompt: str) -> AnalysisResult:
+        """Perform comprehensive analysis of the user prompt."""
+        self.logger.debug(f"Analyzing prompt: {prompt[:100]}...")
+
+        # Extract keywords
+        keywords = self.extract_keywords(prompt)
+
+        # Identify domains and languages
+        domains_with_scores = self.identify_domains(keywords)
+        languages_with_scores = self.identify_languages(keywords)
+
+        # Extract patterns and entities
+        explicit_patterns = self.extract_file_patterns(prompt)
+        entities = self.extract_entities(prompt)
+
+        # Get top domains and languages
+        domains = [domain for domain, score in domains_with_scores]
+        languages = [lang for lang, score in languages_with_scores]
+
+        # Suggest additional patterns based on domains
+        suggested_patterns = self.suggest_patterns_from_domains(domains)
+
+        # Combine explicit and suggested patterns
+        all_patterns = list(set(explicit_patterns + suggested_patterns))
+
+        # Build confidence scores
+        confidence_scores = {
+            'keywords': len(keywords) / 20,  # Normalize to 0-1
+            'domain_match': max([score for _, score in domains_with_scores[:1]], default=0),
+            'language_match': max([score for _, score in languages_with_scores[:1]], default=0),
+            'pattern_extraction': len(explicit_patterns) / 5,  # Normalize to 0-1
+        }
+
+        result = AnalysisResult(
+            keywords=keywords,
+            domains=domains,
+            languages=languages,
+            file_patterns=all_patterns,
+            confidence_scores=confidence_scores,
+            extracted_entities=entities
+        )
+
+        self.logger.info(f"Analysis complete: {len(domains)} domains, {len(languages)} languages, {len(all_patterns)} patterns")
+        return result
+
+def main():
+    """Command-line interface for context analyzer."""
+    import yaml
+    import argparse
+    import json
+
+    parser = argparse.ArgumentParser(description="Context Analyzer for UltraThink")
+    parser.add_argument("prompt", help="Prompt to analyze")
+    parser.add_argument("--config", default="config.yaml", help="Configuration file path")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
+
+    args = parser.parse_args()
+
+    # Setup logging
+    level = logging.DEBUG if args.verbose else logging.INFO
+    logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
+
+    # Load configuration
+    from pathlib import Path
+    config_path = Path(__file__).parent / args.config
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = yaml.safe_load(f)
+
+    # Create analyzer
+    analyzer = ContextAnalyzer(config)
+
+    # Analyze prompt
+    result = analyzer.analyze(args.prompt)
+
+    # Output results
+    print(f"Keywords: {', '.join(result.keywords[:10])}")
+    print(f"Domains: {', '.join(result.domains[:5])}")
+    print(f"Languages: {', '.join(result.languages[:3])}")
+    print(f"Patterns: {', '.join(result.file_patterns[:10])}")
+
+    if args.verbose:
+        print("\nDetailed Results:")
+        print(json.dumps({
+            'keywords': result.keywords,
+            'domains': result.domains,
+            'languages': result.languages,
+            'file_patterns': result.file_patterns,
+            'confidence_scores': result.confidence_scores,
+            'extracted_entities': result.extracted_entities
+        }, indent=2))
+
+if __name__ == "__main__":
+    main()
--- a/.claude/python_script/core/embedding_manager.py
+++ b/.claude/python_script/core/embedding_manager.py
@@ -0,0 +1,453 @@
+#!/usr/bin/env python3
+"""
+Embedding Manager Module for UltraThink Path-Aware Analyzer
+Manages embeddings for semantic similarity search (RAG functionality).
+"""
+
+import os
+import json
+import hashlib
+import logging
+import pickle
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass
+import time
+
+# Optional imports for embedding functionality
+try:
+    import numpy as np
+    NUMPY_AVAILABLE = True
+except ImportError:
+    NUMPY_AVAILABLE = False
+
+try:
+    from sentence_transformers import SentenceTransformer
+    SENTENCE_TRANSFORMERS_AVAILABLE = True
+except ImportError:
+    SENTENCE_TRANSFORMERS_AVAILABLE = False
+
+from .file_indexer import FileInfo
+
+@dataclass
+class EmbeddingInfo:
+    """Information about a file's embedding."""
+    file_path: str
+    content_hash: str
+    embedding_hash: str
+    created_time: float
+    vector_size: int
+
+@dataclass
+class SimilarityResult:
+    """Result of similarity search."""
+    file_info: FileInfo
+    similarity_score: float
+    matching_content: str
+
+class EmbeddingManager:
+    """Manages embeddings for semantic file matching."""
+
+    def __init__(self, config: Dict):
+        self.config = config
+        self.logger = logging.getLogger(__name__)
+
+        # Check if embeddings are enabled
+        self.enabled = config.get('embedding', {}).get('enabled', False)
+        if not self.enabled:
+            self.logger.info("Embeddings disabled in configuration")
+            return
+
+        # Check dependencies
+        if not NUMPY_AVAILABLE:
+            self.logger.warning("NumPy not available, disabling embeddings")
+            self.enabled = False
+            return
+
+        if not SENTENCE_TRANSFORMERS_AVAILABLE:
+            self.logger.warning("sentence-transformers not available, disabling embeddings")
+            self.enabled = False
+            return
+
+        # Load configuration
+        self.model_name = config.get('embedding', {}).get('model', 'all-MiniLM-L6-v2')
+        self.cache_dir = Path(config.get('embedding', {}).get('cache_dir', '.claude/cache/embeddings'))
+        self.similarity_threshold = config.get('embedding', {}).get('similarity_threshold', 0.6)
+        self.max_context_length = config.get('embedding', {}).get('max_context_length', 512)
+        self.batch_size = config.get('embedding', {}).get('batch_size', 32)
+
+        # Setup cache directories
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.embeddings_file = self.cache_dir / "embeddings.pkl"
+        self.index_file = self.cache_dir / "embedding_index.json"
+
+        # Initialize model lazily
+        self._model = None
+        self._embeddings_cache = None
+        self._embedding_index = None
+
+    @property
+    def model(self):
+        """Lazy load the embedding model."""
+        if not self.enabled:
+            return None
+
+        if self._model is None:
+            try:
+                self.logger.info(f"Loading embedding model: {self.model_name}")
+                self._model = SentenceTransformer(self.model_name)
+                self.logger.info(f"Model loaded successfully")
+            except Exception as e:
+                self.logger.error(f"Failed to load embedding model: {e}")
+                self.enabled = False
+                return None
+
+        return self._model
+
+    def embeddings_exist(self) -> bool:
+        """Check if embeddings cache exists."""
+        return self.embeddings_file.exists() and self.index_file.exists()
+
+    def _load_embedding_cache(self) -> Dict[str, np.ndarray]:
+        """Load embeddings from cache."""
+        if self._embeddings_cache is not None:
+            return self._embeddings_cache
+
+        if not self.embeddings_file.exists():
+            self._embeddings_cache = {}
+            return self._embeddings_cache
+
+        try:
+            with open(self.embeddings_file, 'rb') as f:
+                self._embeddings_cache = pickle.load(f)
+            self.logger.debug(f"Loaded {len(self._embeddings_cache)} embeddings from cache")
+        except Exception as e:
+            self.logger.warning(f"Failed to load embeddings cache: {e}")
+            self._embeddings_cache = {}
+
+        return self._embeddings_cache
+
+    def _save_embedding_cache(self):
+        """Save embeddings to cache."""
+        if self._embeddings_cache is None:
+            return
+
+        try:
+            with open(self.embeddings_file, 'wb') as f:
+                pickle.dump(self._embeddings_cache, f)
+            self.logger.debug(f"Saved {len(self._embeddings_cache)} embeddings to cache")
+        except Exception as e:
+            self.logger.error(f"Failed to save embeddings cache: {e}")
+
+    def _load_embedding_index(self) -> Dict[str, EmbeddingInfo]:
+        """Load embedding index."""
+        if self._embedding_index is not None:
+            return self._embedding_index
+
+        if not self.index_file.exists():
+            self._embedding_index = {}
+            return self._embedding_index
+
+        try:
+            with open(self.index_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                self._embedding_index = {}
+                for path, info_dict in data.items():
+                    self._embedding_index[path] = EmbeddingInfo(**info_dict)
+            self.logger.debug(f"Loaded embedding index with {len(self._embedding_index)} entries")
+        except Exception as e:
+            self.logger.warning(f"Failed to load embedding index: {e}")
+            self._embedding_index = {}
+
+        return self._embedding_index
+
+    def _save_embedding_index(self):
+        """Save embedding index."""
+        if self._embedding_index is None:
+            return
+
+        try:
+            data = {}
+            for path, info in self._embedding_index.items():
+                data[path] = {
+                    'file_path': info.file_path,
+                    'content_hash': info.content_hash,
+                    'embedding_hash': info.embedding_hash,
+                    'created_time': info.created_time,
+                    'vector_size': info.vector_size
+                }
+
+            with open(self.index_file, 'w', encoding='utf-8') as f:
+                json.dump(data, f, indent=2)
+            self.logger.debug(f"Saved embedding index with {len(self._embedding_index)} entries")
+        except Exception as e:
+            self.logger.error(f"Failed to save embedding index: {e}")
+
+    def _extract_text_content(self, file_info: FileInfo) -> Optional[str]:
+        """Extract text content from a file for embedding."""
+        try:
+            file_path = Path(file_info.path)
+
+            # Skip binary files and very large files
+            if file_info.size > self.config.get('performance', {}).get('max_file_size', 10485760):
+                return None
+
+            # Only process text-based files
+            text_extensions = {'.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h',
+                             '.rs', '.go', '.php', '.rb', '.sh', '.bash', '.md', '.txt', '.json',
+                             '.yaml', '.yml', '.xml', '.html', '.css', '.scss', '.sass'}
+
+            if file_info.extension.lower() not in text_extensions:
+                return None
+
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+
+            # Truncate content if too long
+            if len(content) > self.max_context_length * 4:  # Approximate token limit
+                content = content[:self.max_context_length * 4]
+
+            return content
+
+        except Exception as e:
+            self.logger.debug(f"Could not extract content from {file_info.path}: {e}")
+            return None
+
+    def _create_embedding(self, text: str) -> Optional[np.ndarray]:
+        """Create embedding for text content."""
+        if not self.enabled or self.model is None:
+            return None
+
+        try:
+            # Truncate text if needed
+            if len(text) > self.max_context_length * 4:
+                text = text[:self.max_context_length * 4]
+
+            embedding = self.model.encode([text])[0]
+            return embedding
+
+        except Exception as e:
+            self.logger.warning(f"Failed to create embedding: {e}")
+            return None
+
+    def _get_content_hash(self, content: str) -> str:
+        """Get hash of content for caching."""
+        return hashlib.md5(content.encode('utf-8')).hexdigest()
+
+    def _get_embedding_hash(self, embedding: np.ndarray) -> str:
+        """Get hash of embedding for verification."""
+        return hashlib.md5(embedding.tobytes()).hexdigest()
+
+    def update_embeddings(self, file_index: Dict[str, FileInfo], force_rebuild: bool = False) -> int:
+        """Update embeddings for files in the index."""
+        if not self.enabled:
+            self.logger.info("Embeddings disabled, skipping update")
+            return 0
+
+        self.logger.info("Updating embeddings...")
+
+        # Load caches
+        embeddings_cache = self._load_embedding_cache()
+        embedding_index = self._load_embedding_index()
+
+        new_embeddings = 0
+        batch_texts = []
+        batch_paths = []
+
+        for file_path, file_info in file_index.items():
+            # Check if embedding exists and is current
+            if not force_rebuild and file_path in embedding_index:
+                cached_info = embedding_index[file_path]
+                if cached_info.content_hash == file_info.content_hash:
+                    continue  # Embedding is current
+
+            # Extract content
+            content = self._extract_text_content(file_info)
+            if content is None:
+                continue
+
+            # Prepare for batch processing
+            batch_texts.append(content)
+            batch_paths.append(file_path)
+
+            # Process batch when full
+            if len(batch_texts) >= self.batch_size:
+                self._process_batch(batch_texts, batch_paths, file_index, embeddings_cache, embedding_index)
+                new_embeddings += len(batch_texts)
+                batch_texts = []
+                batch_paths = []
+
+        # Process remaining batch
+        if batch_texts:
+            self._process_batch(batch_texts, batch_paths, file_index, embeddings_cache, embedding_index)
+            new_embeddings += len(batch_texts)
+
+        # Save caches
+        self._save_embedding_cache()
+        self._save_embedding_index()
+
+        self.logger.info(f"Updated {new_embeddings} embeddings")
+        return new_embeddings
+
+    def _process_batch(self, texts: List[str], paths: List[str], file_index: Dict[str, FileInfo],
+                      embeddings_cache: Dict[str, np.ndarray], embedding_index: Dict[str, EmbeddingInfo]):
+        """Process a batch of texts for embedding."""
+        try:
+            # Create embeddings for batch
+            embeddings = self.model.encode(texts)
+
+            for i, (text, path) in enumerate(zip(texts, paths)):
+                embedding = embeddings[i]
+                file_info = file_index[path]
+
+                # Store embedding
+                content_hash = self._get_content_hash(text)
+                embedding_hash = self._get_embedding_hash(embedding)
+
+                embeddings_cache[path] = embedding
+                embedding_index[path] = EmbeddingInfo(
+                    file_path=path,
+                    content_hash=content_hash,
+                    embedding_hash=embedding_hash,
+                    created_time=time.time(),
+                    vector_size=len(embedding)
+                )
+
+        except Exception as e:
+            self.logger.error(f"Failed to process embedding batch: {e}")
+
+    def find_similar_files(self, query: str, file_index: Dict[str, FileInfo],
+                          top_k: int = 20) -> List[SimilarityResult]:
+        """Find files similar to the query using embeddings."""
+        if not self.enabled:
+            return []
+
+        # Create query embedding
+        query_embedding = self._create_embedding(query)
+        if query_embedding is None:
+            return []
+
+        # Load embeddings
+        embeddings_cache = self._load_embedding_cache()
+        if not embeddings_cache:
+            self.logger.warning("No embeddings available for similarity search")
+            return []
+
+        # Calculate similarities
+        similarities = []
+        for file_path, file_embedding in embeddings_cache.items():
+            if file_path not in file_index:
+                continue
+
+            try:
+                # Calculate cosine similarity
+                similarity = np.dot(query_embedding, file_embedding) / (
+                    np.linalg.norm(query_embedding) * np.linalg.norm(file_embedding)
+                )
+
+                if similarity >= self.similarity_threshold:
+                    similarities.append((file_path, similarity))
+
+            except Exception as e:
+                self.logger.debug(f"Failed to calculate similarity for {file_path}: {e}")
+                continue
+
+        # Sort by similarity
+        similarities.sort(key=lambda x: x[1], reverse=True)
+
+        # Create results
+        results = []
+        for file_path, similarity in similarities[:top_k]:
+            file_info = file_index[file_path]
+
+            # Extract a snippet of matching content
+            content = self._extract_text_content(file_info)
+            snippet = content[:200] + "..." if content and len(content) > 200 else content or ""
+
+            result = SimilarityResult(
+                file_info=file_info,
+                similarity_score=similarity,
+                matching_content=snippet
+            )
+            results.append(result)
+
+        self.logger.info(f"Found {len(results)} similar files for query")
+        return results
+
+    def get_stats(self) -> Dict[str, Any]:
+        """Get statistics about the embedding cache."""
+        if not self.enabled:
+            return {'enabled': False}
+
+        embedding_index = self._load_embedding_index()
+        embeddings_cache = self._load_embedding_cache()
+
+        return {
+            'enabled': True,
+            'model_name': self.model_name,
+            'total_embeddings': len(embedding_index),
+            'cache_size_mb': os.path.getsize(self.embeddings_file) / 1024 / 1024 if self.embeddings_file.exists() else 0,
+            'similarity_threshold': self.similarity_threshold,
+            'vector_size': list(embedding_index.values())[0].vector_size if embedding_index else 0
+        }
+
+def main():
+    """Command-line interface for embedding manager."""
+    import yaml
+    import argparse
+    from .file_indexer import FileIndexer
+
+    parser = argparse.ArgumentParser(description="Embedding Manager for UltraThink")
+    parser.add_argument("--config", default="config.yaml", help="Configuration file path")
+    parser.add_argument("--update", action="store_true", help="Update embeddings")
+    parser.add_argument("--rebuild", action="store_true", help="Force rebuild all embeddings")
+    parser.add_argument("--query", help="Search for similar files")
+    parser.add_argument("--stats", action="store_true", help="Show embedding statistics")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
+
+    args = parser.parse_args()
+
+    # Setup logging
+    level = logging.DEBUG if args.verbose else logging.INFO
+    logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
+
+    # Load configuration
+    config_path = Path(__file__).parent / args.config
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = yaml.safe_load(f)
+
+    # Create components
+    indexer = FileIndexer(config)
+    embedding_manager = EmbeddingManager(config)
+
+    if not embedding_manager.enabled:
+        print("Embeddings are disabled. Enable in config.yaml or install required dependencies.")
+        return
+
+    # Load file index
+    file_index = indexer.load_index()
+    if not file_index:
+        print("Building file index...")
+        file_index = indexer.build_index()
+
+    if args.stats:
+        stats = embedding_manager.get_stats()
+        print("Embedding Statistics:")
+        for key, value in stats.items():
+            print(f"  {key}: {value}")
+        return
+
+    if args.update or args.rebuild:
+        count = embedding_manager.update_embeddings(file_index, force_rebuild=args.rebuild)
+        print(f"Updated {count} embeddings")
+
+    if args.query:
+        results = embedding_manager.find_similar_files(args.query, file_index)
+        print(f"Found {len(results)} similar files:")
+        for result in results:
+            print(f"  {result.file_info.relative_path} (similarity: {result.similarity_score:.3f})")
+            if args.verbose and result.matching_content:
+                print(f"    Content: {result.matching_content[:100]}...")
+
+if __name__ == "__main__":
+    main()
--- a/.claude/python_script/core/file_indexer.py
+++ b/.claude/python_script/core/file_indexer.py
@@ -0,0 +1,383 @@
+#!/usr/bin/env python3
+"""
+File Indexer Module for UltraThink Path-Aware Analyzer
+Builds and maintains an index of repository files with metadata.
+Enhanced with gitignore support and unified configuration.
+"""
+
+import os
+import hashlib
+import json
+import time
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional, Set, Tuple, Union
+from dataclasses import dataclass, asdict
+from datetime import datetime
+import fnmatch
+
+from .gitignore_parser import GitignoreParser
+
+@dataclass
+class FileInfo:
+    """Information about a single file in the repository."""
+    path: str
+    relative_path: str
+    size: int
+    modified_time: float
+    extension: str
+    category: str  # code, docs, config, web
+    estimated_tokens: int
+    content_hash: str
+
+    def to_dict(self) -> Dict:
+        return asdict(self)
+
+    @classmethod
+    def from_dict(cls, data: Dict) -> 'FileInfo':
+        return cls(**data)
+
+@dataclass
+class IndexStats:
+    """Statistics about the file index."""
+    total_files: int
+    total_tokens: int
+    total_size: int
+    categories: Dict[str, int]
+    last_updated: float
+
+    def to_dict(self) -> Dict:
+        return asdict(self)
+
+class FileIndexer:
+    """Builds and maintains an efficient index of repository files."""
+
+    def __init__(self, config: Union['Config', Dict], root_path: str = "."):
+        # Support both Config object and Dict for backward compatibility
+        if hasattr(config, 'to_dict'):
+            self.config_obj = config
+            self.config = config.to_dict()
+        else:
+            self.config_obj = None
+            self.config = config
+
+        self.root_path = Path(root_path).resolve()
+        self.cache_dir = Path(self.config.get('embedding', {}).get('cache_dir', '.claude/cache'))
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.index_file = self.cache_dir / "file_index.json"
+
+        # Setup logging
+        self.logger = logging.getLogger(__name__)
+
+        # File extension mappings
+        self.extension_categories = self._build_extension_map()
+
+        # Exclude patterns from config
+        self.exclude_patterns = list(self.config.get('exclude_patterns', []))
+
+        # Initialize gitignore parser and add patterns
+        self.gitignore_parser = GitignoreParser(str(self.root_path))
+        self._load_gitignore_patterns()
+
+        # Performance settings
+        self.max_file_size = self.config.get('performance', {}).get('max_file_size', 10485760)
+
+    def _build_extension_map(self) -> Dict[str, str]:
+        """Build mapping from file extensions to categories."""
+        ext_map = {}
+        for category, extensions in self.config.get('file_extensions', {}).items():
+            for ext in extensions:
+                ext_map[ext.lower()] = category
+        return ext_map
+
+    def _load_gitignore_patterns(self):
+        """Load patterns from .gitignore files and add to exclude_patterns."""
+        try:
+            gitignore_patterns = self.gitignore_parser.parse_all_gitignores()
+
+            if gitignore_patterns:
+                # Avoid duplicates
+                existing_patterns = set(self.exclude_patterns)
+                new_patterns = [p for p in gitignore_patterns if p not in existing_patterns]
+
+                self.exclude_patterns.extend(new_patterns)
+                self.logger.info(f"Added {len(new_patterns)} patterns from .gitignore files")
+
+        except Exception as e:
+            self.logger.warning(f"Failed to load .gitignore patterns: {e}")
+
+    def _should_exclude_file(self, file_path: Path) -> bool:
+        """Check if file should be excluded based on patterns and gitignore rules."""
+        relative_path = str(file_path.relative_to(self.root_path))
+
+        # Check against exclude patterns from config
+        for pattern in self.exclude_patterns:
+            # Convert pattern to work with fnmatch
+            if fnmatch.fnmatch(relative_path, pattern) or fnmatch.fnmatch(str(file_path), pattern):
+                return True
+
+            # Check if any parent directory matches
+            parts = relative_path.split(os.sep)
+            for i in range(len(parts)):
+                partial_path = "/".join(parts[:i+1])
+                if fnmatch.fnmatch(partial_path, pattern):
+                    return True
+
+        # Also check gitignore rules using dedicated parser
+        # Note: gitignore patterns are already included in self.exclude_patterns
+        # but we can add additional gitignore-specific checking here if needed
+        try:
+            # The gitignore patterns are already loaded into exclude_patterns,
+            # but we can do additional gitignore-specific checks if needed
+            pass
+        except Exception as e:
+            self.logger.debug(f"Error in gitignore checking for {file_path}: {e}")
+
+        return False
+
+    def _estimate_tokens(self, file_path: Path) -> int:
+        """Estimate token count for a file (chars/4 approximation)."""
+        try:
+            if file_path.stat().st_size > self.max_file_size:
+                return file_path.stat().st_size // 8  # Penalty for large files
+
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+                return len(content) // 4  # Rough approximation
+        except (UnicodeDecodeError, OSError):
+            # Binary files or unreadable files
+            return file_path.stat().st_size // 8
+
+    def _get_file_hash(self, file_path: Path) -> str:
+        """Get a hash of file metadata for change detection."""
+        stat = file_path.stat()
+        return hashlib.md5(f"{file_path}:{stat.st_size}:{stat.st_mtime}".encode()).hexdigest()
+
+    def _categorize_file(self, file_path: Path) -> str:
+        """Categorize file based on extension."""
+        extension = file_path.suffix.lower()
+        return self.extension_categories.get(extension, 'other')
+
+    def _scan_file(self, file_path: Path) -> Optional[FileInfo]:
+        """Scan a single file and create FileInfo."""
+        try:
+            if not file_path.is_file() or self._should_exclude_file(file_path):
+                return None
+
+            stat = file_path.stat()
+            relative_path = str(file_path.relative_to(self.root_path))
+
+            file_info = FileInfo(
+                path=str(file_path),
+                relative_path=relative_path,
+                size=stat.st_size,
+                modified_time=stat.st_mtime,
+                extension=file_path.suffix.lower(),
+                category=self._categorize_file(file_path),
+                estimated_tokens=self._estimate_tokens(file_path),
+                content_hash=self._get_file_hash(file_path)
+            )
+
+            return file_info
+
+        except (OSError, PermissionError) as e:
+            self.logger.warning(f"Could not scan file {file_path}: {e}")
+            return None
+
+    def build_index(self, force_rebuild: bool = False) -> Dict[str, FileInfo]:
+        """Build or update the file index."""
+        self.logger.info(f"Building file index for {self.root_path}")
+
+        # Load existing index if available
+        existing_index = {}
+        if not force_rebuild and self.index_file.exists():
+            existing_index = self.load_index()
+
+        new_index = {}
+        changed_files = 0
+
+        # Walk through all files
+        for file_path in self.root_path.rglob('*'):
+            if not file_path.is_file():
+                continue
+
+            file_info = self._scan_file(file_path)
+            if file_info is None:
+                continue
+
+            # Check if file has changed
+            relative_path = file_info.relative_path
+            if relative_path in existing_index:
+                old_info = existing_index[relative_path]
+                if old_info.content_hash == file_info.content_hash:
+                    # File unchanged, keep old info
+                    new_index[relative_path] = old_info
+                    continue
+
+            # File is new or changed
+            new_index[relative_path] = file_info
+            changed_files += 1
+
+        self.logger.info(f"Indexed {len(new_index)} files ({changed_files} new/changed)")
+
+        # Save index
+        self.save_index(new_index)
+
+        return new_index
+
+    def load_index(self) -> Dict[str, FileInfo]:
+        """Load file index from cache."""
+        if not self.index_file.exists():
+            return {}
+
+        try:
+            with open(self.index_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                index = {}
+                for path, info_dict in data.get('files', {}).items():
+                    index[path] = FileInfo.from_dict(info_dict)
+                return index
+        except (json.JSONDecodeError, KeyError) as e:
+            self.logger.warning(f"Could not load index: {e}")
+            return {}
+
+    def save_index(self, index: Dict[str, FileInfo]) -> None:
+        """Save file index to cache."""
+        try:
+            # Calculate stats
+            stats = self._calculate_stats(index)
+
+            data = {
+                'stats': stats.to_dict(),
+                'files': {path: info.to_dict() for path, info in index.items()}
+            }
+
+            with open(self.index_file, 'w', encoding='utf-8') as f:
+                json.dump(data, f, indent=2)
+
+        except OSError as e:
+            self.logger.error(f"Could not save index: {e}")
+
+    def _calculate_stats(self, index: Dict[str, FileInfo]) -> IndexStats:
+        """Calculate statistics for the index."""
+        total_files = len(index)
+        total_tokens = sum(info.estimated_tokens for info in index.values())
+        total_size = sum(info.size for info in index.values())
+
+        categories = {}
+        for info in index.values():
+            categories[info.category] = categories.get(info.category, 0) + 1
+
+        return IndexStats(
+            total_files=total_files,
+            total_tokens=total_tokens,
+            total_size=total_size,
+            categories=categories,
+            last_updated=time.time()
+        )
+
+    def get_stats(self) -> Optional[IndexStats]:
+        """Get statistics about the current index."""
+        if not self.index_file.exists():
+            return None
+
+        try:
+            with open(self.index_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                return IndexStats(**data.get('stats', {}))
+        except (json.JSONDecodeError, KeyError):
+            return None
+
+    def find_files_by_pattern(self, pattern: str, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
+        """Find files matching a glob pattern."""
+        if index is None:
+            index = self.load_index()
+
+        matching_files = []
+        for path, info in index.items():
+            if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(info.path, pattern):
+                matching_files.append(info)
+
+        return matching_files
+
+    def find_files_by_category(self, category: str, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
+        """Find files by category (code, docs, config, etc.)."""
+        if index is None:
+            index = self.load_index()
+
+        return [info for info in index.values() if info.category == category]
+
+    def find_files_by_keywords(self, keywords: List[str], index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
+        """Find files whose paths contain any of the specified keywords."""
+        if index is None:
+            index = self.load_index()
+
+        matching_files = []
+        keywords_lower = [kw.lower() for kw in keywords]
+
+        for info in index.values():
+            path_lower = info.relative_path.lower()
+            if any(keyword in path_lower for keyword in keywords_lower):
+                matching_files.append(info)
+
+        return matching_files
+
+    def get_recent_files(self, limit: int = 20, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
+        """Get most recently modified files."""
+        if index is None:
+            index = self.load_index()
+
+        files = list(index.values())
+        files.sort(key=lambda f: f.modified_time, reverse=True)
+        return files[:limit]
+
+def main():
+    """Command-line interface for file indexer."""
+    import yaml
+    import argparse
+
+    parser = argparse.ArgumentParser(description="File Indexer for UltraThink")
+    parser.add_argument("--config", default="config.yaml", help="Configuration file path")
+    parser.add_argument("--rebuild", action="store_true", help="Force rebuild index")
+    parser.add_argument("--stats", action="store_true", help="Show index statistics")
+    parser.add_argument("--pattern", help="Find files matching pattern")
+
+    args = parser.parse_args()
+
+    # Load configuration
+    config_path = Path(__file__).parent / args.config
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = yaml.safe_load(f)
+
+    # Setup logging
+    logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
+
+    # Create indexer
+    indexer = FileIndexer(config)
+
+    if args.stats:
+        stats = indexer.get_stats()
+        if stats:
+            print(f"Total files: {stats.total_files}")
+            print(f"Total tokens: {stats.total_tokens:,}")
+            print(f"Total size: {stats.total_size:,} bytes")
+            print(f"Categories: {stats.categories}")
+            print(f"Last updated: {datetime.fromtimestamp(stats.last_updated)}")
+        else:
+            print("No index found. Run without --stats to build index.")
+        return
+
+    # Build index
+    index = indexer.build_index(force_rebuild=args.rebuild)
+
+    if args.pattern:
+        files = indexer.find_files_by_pattern(args.pattern, index)
+        print(f"Found {len(files)} files matching pattern '{args.pattern}':")
+        for file_info in files[:20]:  # Limit output
+            print(f"  {file_info.relative_path}")
+    else:
+        stats = indexer._calculate_stats(index)
+        print(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens")
+
+if __name__ == "__main__":
+    main()
--- a/.claude/python_script/core/gitignore_parser.py
+++ b/.claude/python_script/core/gitignore_parser.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+"""
+GitIgnore Parser Module
+Parses .gitignore files and converts rules to fnmatch patterns for file exclusion.
+"""
+
+import os
+import fnmatch
+from pathlib import Path
+from typing import List, Set, Optional
+
+
+class GitignoreParser:
+    """Parser for .gitignore files that converts rules to fnmatch patterns."""
+
+    def __init__(self, root_path: str = "."):
+        self.root_path = Path(root_path).resolve()
+        self.patterns: List[str] = []
+        self.negation_patterns: List[str] = []
+
+    def parse_file(self, gitignore_path: str) -> List[str]:
+        """Parse a .gitignore file and return exclude patterns."""
+        gitignore_file = Path(gitignore_path)
+        if not gitignore_file.exists():
+            return []
+
+        patterns = []
+        try:
+            with open(gitignore_file, 'r', encoding='utf-8') as f:
+                for line_num, line in enumerate(f, 1):
+                    pattern = self._parse_line(line.strip())
+                    if pattern:
+                        patterns.append(pattern)
+        except (UnicodeDecodeError, IOError):
+            # Fallback to system encoding if UTF-8 fails
+            try:
+                with open(gitignore_file, 'r') as f:
+                    for line_num, line in enumerate(f, 1):
+                        pattern = self._parse_line(line.strip())
+                        if pattern:
+                            patterns.append(pattern)
+            except IOError:
+                # If file can't be read, return empty list
+                return []
+
+        return patterns
+
+    def _parse_line(self, line: str) -> Optional[str]:
+        """Parse a single line from .gitignore file."""
+        # Skip empty lines and comments
+        if not line or line.startswith('#'):
+            return None
+
+        # Handle negation patterns (starting with !)
+        if line.startswith('!'):
+            # For now, we'll skip negation patterns as they require
+            # more complex logic to implement correctly
+            return None
+
+        # Convert gitignore pattern to fnmatch pattern
+        return self._convert_to_fnmatch(line)
+
+    def _convert_to_fnmatch(self, pattern: str) -> str:
+        """Convert gitignore pattern to fnmatch pattern."""
+        # Remove trailing slash (directory indicator)
+        if pattern.endswith('/'):
+            pattern = pattern[:-1]
+
+        # Handle absolute paths (starting with /)
+        if pattern.startswith('/'):
+            pattern = pattern[1:]
+            # Make it match from root
+            return pattern
+
+        # Handle patterns that should match anywhere in the tree
+        # If pattern doesn't contain '/', it matches files/dirs at any level
+        if '/' not in pattern:
+            return f"*/{pattern}"
+
+        # Pattern contains '/', so it's relative to the gitignore location
+        return pattern
+
+    def parse_all_gitignores(self, root_path: Optional[str] = None) -> List[str]:
+        """Parse all .gitignore files in the repository hierarchy."""
+        if root_path:
+            self.root_path = Path(root_path).resolve()
+
+        all_patterns = []
+
+        # Find all .gitignore files in the repository
+        gitignore_files = self._find_gitignore_files()
+
+        for gitignore_file in gitignore_files:
+            patterns = self.parse_file(gitignore_file)
+            all_patterns.extend(patterns)
+
+        return all_patterns
+
+    def _find_gitignore_files(self) -> List[Path]:
+        """Find all .gitignore files in the repository."""
+        gitignore_files = []
+
+        # Start with root .gitignore
+        root_gitignore = self.root_path / '.gitignore'
+        if root_gitignore.exists():
+            gitignore_files.append(root_gitignore)
+
+        # Find .gitignore files in subdirectories
+        try:
+            for gitignore_file in self.root_path.rglob('.gitignore'):
+                if gitignore_file != root_gitignore:
+                    gitignore_files.append(gitignore_file)
+        except (PermissionError, OSError):
+            # Skip directories we can't access
+            pass
+
+        return gitignore_files
+
+    def should_exclude(self, file_path: str, gitignore_patterns: List[str]) -> bool:
+        """Check if a file should be excluded based on gitignore patterns."""
+        # Convert to relative path from root
+        try:
+            rel_path = str(Path(file_path).relative_to(self.root_path))
+        except ValueError:
+            # File is not under root path
+            return False
+
+        # Normalize path separators for consistent matching
+        rel_path = rel_path.replace(os.sep, '/')
+
+        for pattern in gitignore_patterns:
+            if self._matches_pattern(rel_path, pattern):
+                return True
+
+        return False
+
+    def _matches_pattern(self, file_path: str, pattern: str) -> bool:
+        """Check if a file path matches a gitignore pattern."""
+        # Normalize pattern separators
+        pattern = pattern.replace(os.sep, '/')
+
+        # Handle different pattern types
+        if pattern.startswith('*/'):
+            # Pattern like */pattern - matches at any level
+            sub_pattern = pattern[2:]
+            return fnmatch.fnmatch(file_path, f"*/{sub_pattern}") or fnmatch.fnmatch(file_path, sub_pattern)
+        elif '/' in pattern:
+            # Pattern contains slash - match exact path
+            return fnmatch.fnmatch(file_path, pattern)
+        else:
+            # Simple pattern - match filename or directory at any level
+            parts = file_path.split('/')
+            return any(fnmatch.fnmatch(part, pattern) for part in parts)
+
+
+def parse_gitignore(gitignore_path: str) -> List[str]:
+    """Convenience function to parse a single .gitignore file."""
+    parser = GitignoreParser()
+    return parser.parse_file(gitignore_path)
+
+
+def get_all_gitignore_patterns(root_path: str = ".") -> List[str]:
+    """Convenience function to get all gitignore patterns in a repository."""
+    parser = GitignoreParser(root_path)
+    return parser.parse_all_gitignores()
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) > 1:
+        gitignore_path = sys.argv[1]
+        patterns = parse_gitignore(gitignore_path)
+        print(f"Parsed {len(patterns)} patterns from {gitignore_path}:")
+        for pattern in patterns:
+            print(f"  {pattern}")
+    else:
+        # Parse all .gitignore files in current directory
+        patterns = get_all_gitignore_patterns()
+        print(f"Found {len(patterns)} gitignore patterns:")
+        for pattern in patterns:
+            print(f"  {pattern}")
--- a/.claude/python_script/core/path_matcher.py
+++ b/.claude/python_script/core/path_matcher.py
@@ -0,0 +1,500 @@
+#!/usr/bin/env python3
+"""
+Path Matcher Module for UltraThink Path-Aware Analyzer
+Matches files to analysis context and ranks them by relevance.
+"""
+
+import re
+import logging
+import fnmatch
+from typing import Dict, List, Tuple, Optional, Set
+from dataclasses import dataclass
+from pathlib import Path
+import math
+
+from .file_indexer import FileInfo
+from .context_analyzer import AnalysisResult
+
+@dataclass
+class MatchResult:
+    """Result of path matching with relevance score."""
+    file_info: FileInfo
+    relevance_score: float
+    match_reasons: List[str]
+    category_bonus: float
+
+@dataclass
+class PathMatchingResult:
+    """Complete result of path matching operation."""
+    matched_files: List[MatchResult]
+    total_tokens: int
+    categories: Dict[str, int]
+    patterns_used: List[str]
+    confidence_score: float
+
+class PathMatcher:
+    """Matches files to analysis context using various algorithms."""
+
+    def __init__(self, config: Dict):
+        self.config = config
+        self.logger = logging.getLogger(__name__)
+
+        # Load scoring weights
+        self.weights = config.get('path_matching', {}).get('weights', {
+            'keyword_match': 0.4,
+            'extension_match': 0.2,
+            'directory_context': 0.2,
+            'file_size_penalty': 0.1,
+            'recency_bonus': 0.1
+        })
+
+        # Load limits
+        self.max_files_per_category = config.get('path_matching', {}).get('max_files_per_category', 20)
+        self.min_relevance_score = config.get('path_matching', {}).get('min_relevance_score', 0.1)
+        self.max_total_files = config.get('output', {}).get('max_total_files', 50)
+
+        # Load always include patterns
+        self.always_include = config.get('output', {}).get('always_include', [])
+
+        # Category priorities
+        self.category_priorities = {
+            'code': 1.0,
+            'config': 0.8,
+            'docs': 0.6,
+            'web': 0.4,
+            'other': 0.2
+        }
+
+    def _calculate_keyword_score(self, file_info: FileInfo, keywords: List[str]) -> Tuple[float, List[str]]:
+        """Calculate score based on keyword matches in file path."""
+        if not keywords:
+            return 0.0, []
+
+        path_lower = file_info.relative_path.lower()
+        filename_lower = Path(file_info.relative_path).name.lower()
+
+        matches = []
+        score = 0.0
+
+        for keyword in keywords:
+            keyword_lower = keyword.lower()
+
+            # Exact filename match (highest weight)
+            if keyword_lower in filename_lower:
+                score += 2.0
+                matches.append(f"filename:{keyword}")
+                continue
+
+            # Directory name match
+            if keyword_lower in path_lower:
+                score += 1.0
+                matches.append(f"path:{keyword}")
+                continue
+
+            # Partial match in path components
+            path_parts = path_lower.split('/')
+            for part in path_parts:
+                if keyword_lower in part:
+                    score += 0.5
+                    matches.append(f"partial:{keyword}")
+                    break
+
+        # Normalize by number of keywords
+        normalized_score = score / len(keywords) if keywords else 0.0
+        return min(normalized_score, 1.0), matches
+
+    def _calculate_extension_score(self, file_info: FileInfo, languages: List[str]) -> float:
+        """Calculate score based on file extension relevance."""
+        if not languages:
+            return 0.5  # Neutral score
+
+        extension = file_info.extension.lower()
+
+        # Language-specific extension mapping
+        lang_extensions = {
+            'python': ['.py', '.pyx', '.pyi'],
+            'javascript': ['.js', '.jsx', '.mjs'],
+            'typescript': ['.ts', '.tsx'],
+            'java': ['.java'],
+            'go': ['.go'],
+            'rust': ['.rs'],
+            'cpp': ['.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'],
+            'csharp': ['.cs'],
+            'php': ['.php'],
+            'ruby': ['.rb'],
+            'shell': ['.sh', '.bash', '.zsh']
+        }
+
+        score = 0.0
+        for language in languages:
+            if language in lang_extensions:
+                if extension in lang_extensions[language]:
+                    score = 1.0
+                    break
+
+        # Fallback to category-based scoring
+        if score == 0.0:
+            category_scores = {
+                'code': 1.0,
+                'config': 0.8,
+                'docs': 0.6,
+                'web': 0.4,
+                'other': 0.2
+            }
+            score = category_scores.get(file_info.category, 0.2)
+
+        return score
+
+    def _calculate_directory_score(self, file_info: FileInfo, domains: List[str]) -> Tuple[float, List[str]]:
+        """Calculate score based on directory context."""
+        if not domains:
+            return 0.0, []
+
+        path_parts = file_info.relative_path.lower().split('/')
+        matches = []
+        score = 0.0
+
+        # Domain-specific directory patterns
+        domain_patterns = {
+            'auth': ['auth', 'authentication', 'login', 'user', 'account'],
+            'authentication': ['auth', 'authentication', 'login', 'user', 'account'],
+            'database': ['db', 'database', 'model', 'entity', 'migration', 'schema'],
+            'api': ['api', 'rest', 'graphql', 'route', 'controller', 'handler'],
+            'frontend': ['ui', 'component', 'view', 'template', 'client', 'web'],
+            'backend': ['service', 'server', 'core', 'business', 'logic'],
+            'test': ['test', 'spec', 'tests', '__tests__', 'testing'],
+            'testing': ['test', 'spec', 'tests', '__tests__', 'testing'],
+            'config': ['config', 'configuration', 'env', 'settings'],
+            'configuration': ['config', 'configuration', 'env', 'settings'],
+            'util': ['util', 'utils', 'helper', 'common', 'shared', 'lib'],
+            'utility': ['util', 'utils', 'helper', 'common', 'shared', 'lib']
+        }
+
+        for domain in domains:
+            if domain in domain_patterns:
+                patterns = domain_patterns[domain]
+                for pattern in patterns:
+                    for part in path_parts:
+                        if pattern in part:
+                            score += 1.0
+                            matches.append(f"dir:{domain}->{pattern}")
+                            break
+
+        # Normalize by number of domains
+        normalized_score = score / len(domains) if domains else 0.0
+        return min(normalized_score, 1.0), matches
+
+    def _calculate_size_penalty(self, file_info: FileInfo) -> float:
+        """Calculate penalty for very large files."""
+        max_size = self.config.get('performance', {}).get('max_file_size', 10485760)  # 10MB
+
+        if file_info.size > max_size:
+            # Heavy penalty for oversized files
+            return -0.5
+        elif file_info.size > max_size * 0.5:
+            # Light penalty for large files
+            return -0.2
+        else:
+            return 0.0
+
+    def _calculate_recency_bonus(self, file_info: FileInfo) -> float:
+        """Calculate bonus for recently modified files."""
+        import time
+
+        current_time = time.time()
+        file_age = current_time - file_info.modified_time
+
+        # Files modified in last day get bonus
+        if file_age < 86400:  # 1 day
+            return 0.3
+        elif file_age < 604800:  # 1 week
+            return 0.1
+        else:
+            return 0.0
+
+    def calculate_relevance_score(self, file_info: FileInfo, analysis: AnalysisResult) -> MatchResult:
+        """Calculate overall relevance score for a file."""
+        # Calculate individual scores
+        keyword_score, keyword_matches = self._calculate_keyword_score(file_info, analysis.keywords)
+        extension_score = self._calculate_extension_score(file_info, analysis.languages)
+        directory_score, dir_matches = self._calculate_directory_score(file_info, analysis.domains)
+        size_penalty = self._calculate_size_penalty(file_info)
+        recency_bonus = self._calculate_recency_bonus(file_info)
+
+        # Apply weights
+        weighted_score = (
+            keyword_score * self.weights.get('keyword_match', 0.4) +
+            extension_score * self.weights.get('extension_match', 0.2) +
+            directory_score * self.weights.get('directory_context', 0.2) +
+            size_penalty * self.weights.get('file_size_penalty', 0.1) +
+            recency_bonus * self.weights.get('recency_bonus', 0.1)
+        )
+
+        # Category bonus
+        category_bonus = self.category_priorities.get(file_info.category, 0.2)
+
+        # Final score with category bonus
+        final_score = weighted_score + (category_bonus * 0.1)
+
+        # Collect match reasons
+        match_reasons = keyword_matches + dir_matches
+        if extension_score > 0.5:
+            match_reasons.append(f"extension:{file_info.extension}")
+        if recency_bonus > 0:
+            match_reasons.append("recent")
+
+        return MatchResult(
+            file_info=file_info,
+            relevance_score=max(0.0, final_score),
+            match_reasons=match_reasons,
+            category_bonus=category_bonus
+        )
+
+    def match_by_patterns(self, file_index: Dict[str, FileInfo], patterns: List[str]) -> List[FileInfo]:
+        """Match files using explicit glob patterns."""
+        matched_files = []
+
+        for pattern in patterns:
+            for path, file_info in file_index.items():
+                # Try matching both relative path and full path
+                if (fnmatch.fnmatch(path, pattern) or
+                    fnmatch.fnmatch(file_info.path, pattern) or
+                    fnmatch.fnmatch(Path(path).name, pattern)):
+                    matched_files.append(file_info)
+
+        # Remove duplicates based on path
+        seen_paths = set()
+        unique_files = []
+        for file_info in matched_files:
+            if file_info.relative_path not in seen_paths:
+                seen_paths.add(file_info.relative_path)
+                unique_files.append(file_info)
+        return unique_files
+
+    def match_always_include(self, file_index: Dict[str, FileInfo]) -> List[FileInfo]:
+        """Match files that should always be included."""
+        return self.match_by_patterns(file_index, self.always_include)
+
+    def rank_files(self, files: List[FileInfo], analysis: AnalysisResult) -> List[MatchResult]:
+        """Rank files by relevance score."""
+        match_results = []
+
+        for file_info in files:
+            match_result = self.calculate_relevance_score(file_info, analysis)
+            if match_result.relevance_score >= self.min_relevance_score:
+                match_results.append(match_result)
+
+        # Sort by relevance score (descending)
+        match_results.sort(key=lambda x: x.relevance_score, reverse=True)
+
+        return match_results
+
+    def select_best_files(self, ranked_files: List[MatchResult], token_limit: Optional[int] = None) -> List[MatchResult]:
+        """Select the best files within token limits and category constraints."""
+        if not ranked_files:
+            return []
+
+        selected_files = []
+        total_tokens = 0
+        category_counts = {}
+
+        for match_result in ranked_files:
+            file_info = match_result.file_info
+            category = file_info.category
+
+            # Check category limit
+            if category_counts.get(category, 0) >= self.max_files_per_category:
+                continue
+
+            # Check token limit
+            if token_limit and total_tokens + file_info.estimated_tokens > token_limit:
+                continue
+
+            # Check total file limit
+            if len(selected_files) >= self.max_total_files:
+                break
+
+            # Add file
+            selected_files.append(match_result)
+            total_tokens += file_info.estimated_tokens
+            category_counts[category] = category_counts.get(category, 0) + 1
+
+        return selected_files
+
+    def match_files(self, file_index: Dict[str, FileInfo], analysis: AnalysisResult,
+                   token_limit: Optional[int] = None, explicit_patterns: Optional[List[str]] = None) -> PathMatchingResult:
+        """Main file matching function."""
+        self.logger.info(f"Matching files for analysis with {len(analysis.keywords)} keywords and {len(analysis.domains)} domains")
+
+        # Start with always-include files
+        always_include_files = self.match_always_include(file_index)
+        self.logger.debug(f"Always include: {len(always_include_files)} files")
+
+        # Add explicit pattern matches
+        pattern_files = []
+        patterns_used = []
+        if explicit_patterns:
+            pattern_files = self.match_by_patterns(file_index, explicit_patterns)
+            patterns_used.extend(explicit_patterns)
+            self.logger.debug(f"Explicit patterns: {len(pattern_files)} files")
+
+        # Add suggested pattern matches
+        if analysis.file_patterns:
+            suggested_files = self.match_by_patterns(file_index, analysis.file_patterns)
+            pattern_files.extend(suggested_files)
+            patterns_used.extend(analysis.file_patterns)
+            self.logger.debug(f"Suggested patterns: {len(suggested_files)} files")
+
+        # Combine all candidate files and remove duplicates
+        all_files = always_include_files + pattern_files + list(file_index.values())
+        seen_paths = set()
+        all_candidates = []
+        for file_info in all_files:
+            if file_info.relative_path not in seen_paths:
+                seen_paths.add(file_info.relative_path)
+                all_candidates.append(file_info)
+        self.logger.debug(f"Total candidates: {len(all_candidates)} files")
+
+        # Rank all candidates
+        ranked_files = self.rank_files(all_candidates, analysis)
+        self.logger.debug(f"Files above threshold: {len(ranked_files)}")
+
+        # Select best files within limits
+        selected_files = self.select_best_files(ranked_files, token_limit)
+        self.logger.info(f"Selected {len(selected_files)} files")
+
+        # Calculate statistics
+        total_tokens = sum(match.file_info.estimated_tokens for match in selected_files)
+        categories = {}
+        for match in selected_files:
+            category = match.file_info.category
+            categories[category] = categories.get(category, 0) + 1
+
+        # Calculate confidence score
+        confidence_score = self._calculate_confidence(selected_files, analysis)
+
+        return PathMatchingResult(
+            matched_files=selected_files,
+            total_tokens=total_tokens,
+            categories=categories,
+            patterns_used=patterns_used,
+            confidence_score=confidence_score
+        )
+
+    def _calculate_confidence(self, selected_files: List[MatchResult], analysis: AnalysisResult) -> float:
+        """Calculate confidence score for the matching result."""
+        if not selected_files:
+            return 0.0
+
+        # Average relevance score
+        avg_relevance = sum(match.relevance_score for match in selected_files) / len(selected_files)
+
+        # Keyword coverage (how many keywords are represented)
+        keyword_coverage = 0.0
+        if analysis.keywords:
+            covered_keywords = set()
+            for match in selected_files:
+                for reason in match.match_reasons:
+                    if reason.startswith('filename:') or reason.startswith('path:'):
+                        keyword = reason.split(':', 1)[1]
+                        covered_keywords.add(keyword)
+            keyword_coverage = len(covered_keywords) / len(analysis.keywords)
+
+        # Domain coverage
+        domain_coverage = 0.0
+        if analysis.domains:
+            covered_domains = set()
+            for match in selected_files:
+                for reason in match.match_reasons:
+                    if reason.startswith('dir:'):
+                        domain = reason.split('->', 1)[0].split(':', 1)[1]
+                        covered_domains.add(domain)
+            domain_coverage = len(covered_domains) / len(analysis.domains)
+
+        # Weighted confidence score
+        confidence = (
+            avg_relevance * 0.5 +
+            keyword_coverage * 0.3 +
+            domain_coverage * 0.2
+        )
+
+        return min(confidence, 1.0)
+
+    def format_patterns(self, selected_files: List[MatchResult]) -> List[str]:
+        """Format selected files as @{pattern} strings."""
+        pattern_format = self.config.get('output', {}).get('pattern_format', '@{{{path}}}')
+
+        patterns = []
+        for match in selected_files:
+            pattern = pattern_format.format(path=match.file_info.relative_path)
+            patterns.append(pattern)
+
+        return patterns
+
+def main():
+    """Command-line interface for path matcher."""
+    import yaml
+    import argparse
+    import json
+    from .file_indexer import FileIndexer
+    from .context_analyzer import ContextAnalyzer
+
+    parser = argparse.ArgumentParser(description="Path Matcher for UltraThink")
+    parser.add_argument("prompt", help="Prompt to analyze and match")
+    parser.add_argument("--config", default="config.yaml", help="Configuration file path")
+    parser.add_argument("--token-limit", type=int, help="Token limit for selection")
+    parser.add_argument("--patterns", nargs="*", help="Explicit patterns to include")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
+
+    args = parser.parse_args()
+
+    # Setup logging
+    level = logging.DEBUG if args.verbose else logging.INFO
+    logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
+
+    # Load configuration
+    config_path = Path(__file__).parent / args.config
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = yaml.safe_load(f)
+
+    # Create components
+    indexer = FileIndexer(config)
+    analyzer = ContextAnalyzer(config)
+    matcher = PathMatcher(config)
+
+    # Build file index
+    file_index = indexer.load_index()
+    if not file_index:
+        print("Building file index...")
+        file_index = indexer.build_index()
+
+    # Analyze prompt
+    analysis = analyzer.analyze(args.prompt)
+
+    # Match files
+    result = matcher.match_files(
+        file_index=file_index,
+        analysis=analysis,
+        token_limit=args.token_limit,
+        explicit_patterns=args.patterns
+    )
+
+    # Output results
+    print(f"Matched {len(result.matched_files)} files (~{result.total_tokens:,} tokens)")
+    print(f"Categories: {result.categories}")
+    print(f"Confidence: {result.confidence_score:.2f}")
+    print()
+
+    patterns = matcher.format_patterns(result.matched_files)
+    print("Patterns:")
+    for pattern in patterns[:20]:  # Limit output
+        print(f"  {pattern}")
+
+    if args.verbose:
+        print("\nDetailed matches:")
+        for match in result.matched_files[:10]:
+            print(f"  {match.file_info.relative_path} (score: {match.relevance_score:.3f})")
+            print(f"    Reasons: {', '.join(match.match_reasons)}")
+
+if __name__ == "__main__":
+    main()