mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-12 02:37:45 +08:00
chore: Remove Python cache files
Clean up __pycache__ directory to keep repository clean 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,35 +0,0 @@
|
|||||||
"""
|
|
||||||
Refactored Python Script Analyzer
|
|
||||||
Modular, reusable architecture for intelligent file analysis and workflow automation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
__version__ = "2.0.0"
|
|
||||||
__author__ = "Claude Development Team"
|
|
||||||
__email__ = "dev@example.com"
|
|
||||||
|
|
||||||
from .analyzer import Analyzer
|
|
||||||
from .indexer import ProjectIndexer
|
|
||||||
from .cli import AnalysisCLI
|
|
||||||
from .core import (
|
|
||||||
Config, FileIndexer, FileInfo, IndexStats,
|
|
||||||
ContextAnalyzer, AnalysisResult,
|
|
||||||
PathMatcher, MatchResult, PathMatchingResult,
|
|
||||||
EmbeddingManager, GitignoreParser
|
|
||||||
)
|
|
||||||
from .tools import ModuleAnalyzer, ModuleInfo, TechStackLoader
|
|
||||||
from .utils import Colors, CacheManager, IOHelpers
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'Analyzer', 'ProjectIndexer', 'AnalysisCLI',
|
|
||||||
# Core modules
|
|
||||||
'Config',
|
|
||||||
'FileIndexer', 'FileInfo', 'IndexStats',
|
|
||||||
'ContextAnalyzer', 'AnalysisResult',
|
|
||||||
'PathMatcher', 'MatchResult', 'PathMatchingResult',
|
|
||||||
'EmbeddingManager', 'GitignoreParser',
|
|
||||||
# Tools
|
|
||||||
'ModuleAnalyzer', 'ModuleInfo',
|
|
||||||
'TechStackLoader',
|
|
||||||
# Utils
|
|
||||||
'Colors', 'CacheManager', 'IOHelpers'
|
|
||||||
]
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,207 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
CLI Interface for Path-Aware Analysis
|
|
||||||
Provides command-line interface for intelligent file analysis and pattern matching.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, List, Optional, Any
|
|
||||||
|
|
||||||
# Add current directory to path for imports
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent))
|
|
||||||
|
|
||||||
from core.config import get_config
|
|
||||||
from core.file_indexer import FileIndexer
|
|
||||||
from core.context_analyzer import ContextAnalyzer
|
|
||||||
from core.path_matcher import PathMatcher
|
|
||||||
from utils.colors import Colors
|
|
||||||
|
|
||||||
|
|
||||||
class AnalysisCLI:
|
|
||||||
"""Command-line interface for file analysis and pattern matching."""
|
|
||||||
|
|
||||||
def __init__(self, config_path: Optional[str] = None, root_path: str = "."):
|
|
||||||
self.root_path = Path(root_path).resolve()
|
|
||||||
self.config = get_config(config_path)
|
|
||||||
|
|
||||||
# Setup logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=getattr(logging, self.config.get('logging.level', 'INFO')),
|
|
||||||
format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
||||||
)
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Initialize core components
|
|
||||||
self.indexer = FileIndexer(self.config, str(self.root_path))
|
|
||||||
self.context_analyzer = ContextAnalyzer(self.config)
|
|
||||||
self.path_matcher = PathMatcher(self.config)
|
|
||||||
|
|
||||||
def analyze(self, prompt: str, patterns: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
||||||
"""Analyze and return relevant file paths for a given prompt."""
|
|
||||||
print(Colors.yellow("Analyzing project and prompt..."))
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# Load index (build if not exists)
|
|
||||||
index = self.indexer.load_index()
|
|
||||||
if not index:
|
|
||||||
print(Colors.warning("No file index found. Run 'python indexer.py --build' first or use --auto-build"))
|
|
||||||
return {}
|
|
||||||
|
|
||||||
stats = self.indexer.get_stats()
|
|
||||||
print(Colors.cyan(f"Project stats: ~{stats.total_tokens:,} tokens across {stats.total_files} files"))
|
|
||||||
print(Colors.cyan(f"Categories: {', '.join(f'{k}: {v}' for k, v in stats.categories.items())}"))
|
|
||||||
|
|
||||||
# Determine project size
|
|
||||||
project_size = self._classify_project_size(stats.total_tokens)
|
|
||||||
print(Colors.cyan(f"Project size: {project_size}"))
|
|
||||||
|
|
||||||
# Analyze prompt context
|
|
||||||
print(Colors.yellow("Analyzing prompt context..."))
|
|
||||||
context_result = self.context_analyzer.analyze(prompt)
|
|
||||||
|
|
||||||
print(Colors.cyan(f"Identified: {len(context_result.domains)} domains, {len(context_result.languages)} languages"))
|
|
||||||
if context_result.domains:
|
|
||||||
print(Colors.cyan(f"Top domains: {', '.join(context_result.domains[:3])}"))
|
|
||||||
|
|
||||||
# Match files to context
|
|
||||||
print(Colors.yellow("Matching files to context..."))
|
|
||||||
matching_result = self.path_matcher.match_files(
|
|
||||||
index,
|
|
||||||
context_result,
|
|
||||||
explicit_patterns=patterns
|
|
||||||
)
|
|
||||||
|
|
||||||
elapsed = time.time() - start_time
|
|
||||||
|
|
||||||
print(Colors.green(f"Analysis complete: {len(matching_result.matched_files)} files, ~{matching_result.total_tokens:,} tokens"))
|
|
||||||
print(Colors.cyan(f"Confidence: {matching_result.confidence_score:.2f}"))
|
|
||||||
print(Colors.cyan(f"Execution time: {elapsed:.2f}s"))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'files': [match.file_info.relative_path for match in matching_result.matched_files],
|
|
||||||
'total_tokens': matching_result.total_tokens,
|
|
||||||
'confidence': matching_result.confidence_score,
|
|
||||||
'context': {
|
|
||||||
'domains': context_result.domains,
|
|
||||||
'languages': context_result.languages,
|
|
||||||
'keywords': context_result.keywords
|
|
||||||
},
|
|
||||||
'stats': {
|
|
||||||
'project_size': project_size,
|
|
||||||
'total_files': stats.total_files,
|
|
||||||
'analysis_time': elapsed
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def generate_command(self, prompt: str, tool: str, files: List[str]) -> str:
|
|
||||||
"""Generate a command for external tools (gemini/codex)."""
|
|
||||||
file_patterns = " ".join(f"@{{{file}}}" for file in files)
|
|
||||||
|
|
||||||
if tool == "gemini":
|
|
||||||
if len(files) > 50:
|
|
||||||
return f'gemini --all-files -p "{prompt}"'
|
|
||||||
else:
|
|
||||||
return f'gemini -p "{file_patterns} {prompt}"'
|
|
||||||
elif tool == "codex":
|
|
||||||
# Estimate tokens for workspace selection
|
|
||||||
total_tokens = sum(len(file) * 50 for file in files) # Rough estimate
|
|
||||||
workspace_flag = "-s workspace-write" if total_tokens > 100000 else "-s danger-full-access"
|
|
||||||
return f'codex {workspace_flag} --full-auto exec "{file_patterns} {prompt}"'
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported tool: {tool}")
|
|
||||||
|
|
||||||
def _classify_project_size(self, tokens: int) -> str:
|
|
||||||
"""Classify project size based on token count."""
|
|
||||||
small_limit = self.config.get('token_limits.small_project', 500000)
|
|
||||||
medium_limit = self.config.get('token_limits.medium_project', 2000000)
|
|
||||||
|
|
||||||
if tokens < small_limit:
|
|
||||||
return "small"
|
|
||||||
elif tokens < medium_limit:
|
|
||||||
return "medium"
|
|
||||||
else:
|
|
||||||
return "large"
|
|
||||||
|
|
||||||
def auto_build_index(self):
|
|
||||||
"""Auto-build index if it doesn't exist."""
|
|
||||||
from indexer import ProjectIndexer
|
|
||||||
indexer = ProjectIndexer(root_path=str(self.root_path))
|
|
||||||
indexer.build_index()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""CLI entry point for analysis."""
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Path-Aware Analysis CLI - Intelligent file pattern detection",
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
epilog="""
|
|
||||||
Examples:
|
|
||||||
python cli.py "analyze authentication flow"
|
|
||||||
python cli.py "fix database connection" --patterns "src/**/*.py"
|
|
||||||
python cli.py "review API endpoints" --tool gemini
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument('prompt', help='Analysis prompt or task description')
|
|
||||||
parser.add_argument('--patterns', nargs='*', help='Explicit file patterns to include')
|
|
||||||
parser.add_argument('--tool', choices=['gemini', 'codex'], help='Generate command for specific tool')
|
|
||||||
parser.add_argument('--output', choices=['patterns', 'json'], default='patterns', help='Output format')
|
|
||||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
|
||||||
parser.add_argument('--auto-build', action='store_true', help='Auto-build index if missing')
|
|
||||||
parser.add_argument('--config', help='Configuration file path')
|
|
||||||
parser.add_argument('--root', default='.', help='Root directory to analyze')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Create CLI interface
|
|
||||||
cli = AnalysisCLI(args.config, args.root)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Auto-build index if requested and missing
|
|
||||||
if args.auto_build:
|
|
||||||
index = cli.indexer.load_index()
|
|
||||||
if not index:
|
|
||||||
print(Colors.yellow("Auto-building missing index..."))
|
|
||||||
cli.auto_build_index()
|
|
||||||
|
|
||||||
# Perform analysis
|
|
||||||
result = cli.analyze(args.prompt, patterns=args.patterns)
|
|
||||||
|
|
||||||
if not result:
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Generate output
|
|
||||||
if args.tool:
|
|
||||||
command = cli.generate_command(args.prompt, args.tool, result['files'])
|
|
||||||
print(command)
|
|
||||||
elif args.output == 'json':
|
|
||||||
print(json.dumps(result, indent=2, default=str))
|
|
||||||
else: # patterns output (default)
|
|
||||||
for file_path in result['files']:
|
|
||||||
print(f"@{{{file_path}}}")
|
|
||||||
|
|
||||||
# Show verbose details
|
|
||||||
if args.verbose:
|
|
||||||
print(f"\n# Analysis Details:")
|
|
||||||
print(f"# Matched files: {len(result['files'])}")
|
|
||||||
print(f"# Total tokens: {result['total_tokens']:,}")
|
|
||||||
print(f"# Confidence: {result['confidence']:.2f}")
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print(Colors.warning("\nAnalysis interrupted by user"))
|
|
||||||
sys.exit(1)
|
|
||||||
except Exception as e:
|
|
||||||
print(Colors.error(f"Analysis failed: {e}"))
|
|
||||||
if args.verbose:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,159 +0,0 @@
|
|||||||
# Configuration for UltraThink Path-Aware Analyzer
|
|
||||||
# Based on gemini-wrapper patterns with intelligent enhancements
|
|
||||||
|
|
||||||
# Token limits for project size classification
|
|
||||||
token_limits:
|
|
||||||
small_project: 500000 # <500K tokens - include most files
|
|
||||||
medium_project: 2000000 # 500K-2M tokens - smart selection
|
|
||||||
large_project: 10000000 # >2M tokens - precise targeting
|
|
||||||
max_files: 1000 # Maximum files to process
|
|
||||||
|
|
||||||
# File patterns to exclude (performance and relevance)
|
|
||||||
exclude_patterns:
|
|
||||||
- "*/node_modules/*"
|
|
||||||
- "*/.git/*"
|
|
||||||
- "*/build/*"
|
|
||||||
- "*/dist/*"
|
|
||||||
- "*/.next/*"
|
|
||||||
- "*/.nuxt/*"
|
|
||||||
- "*/target/*"
|
|
||||||
- "*/vendor/*"
|
|
||||||
- "*/__pycache__/*"
|
|
||||||
- "*.pyc"
|
|
||||||
- "*.pyo"
|
|
||||||
- "*.log"
|
|
||||||
- "*.tmp"
|
|
||||||
- "*.temp"
|
|
||||||
- "*.history"
|
|
||||||
|
|
||||||
# File extensions grouped by category
|
|
||||||
file_extensions:
|
|
||||||
code:
|
|
||||||
- ".py"
|
|
||||||
- ".js"
|
|
||||||
- ".ts"
|
|
||||||
- ".tsx"
|
|
||||||
- ".jsx"
|
|
||||||
- ".java"
|
|
||||||
- ".cpp"
|
|
||||||
- ".c"
|
|
||||||
- ".h"
|
|
||||||
- ".rs"
|
|
||||||
- ".go"
|
|
||||||
- ".php"
|
|
||||||
- ".rb"
|
|
||||||
- ".sh"
|
|
||||||
- ".bash"
|
|
||||||
docs:
|
|
||||||
- ".md"
|
|
||||||
- ".txt"
|
|
||||||
- ".rst"
|
|
||||||
- ".adoc"
|
|
||||||
config:
|
|
||||||
- ".json"
|
|
||||||
- ".yaml"
|
|
||||||
- ".yml"
|
|
||||||
- ".toml"
|
|
||||||
- ".ini"
|
|
||||||
- ".env"
|
|
||||||
web:
|
|
||||||
- ".html"
|
|
||||||
- ".css"
|
|
||||||
- ".scss"
|
|
||||||
- ".sass"
|
|
||||||
- ".xml"
|
|
||||||
|
|
||||||
# Embedding/RAG configuration
|
|
||||||
embedding:
|
|
||||||
enabled: true # Set to true to enable RAG features
|
|
||||||
model: "all-MiniLM-L6-v2" # Stable general-purpose embedding model
|
|
||||||
cache_dir: "cache"
|
|
||||||
similarity_threshold: 0.6 # Higher threshold for better code similarity
|
|
||||||
max_context_length: 512 # Standard context length
|
|
||||||
batch_size: 32 # Standard batch size
|
|
||||||
trust_remote_code: false # Not required for standard models
|
|
||||||
|
|
||||||
# Context analysis settings
|
|
||||||
context_analysis:
|
|
||||||
# Keywords that indicate specific domains/modules
|
|
||||||
domain_keywords:
|
|
||||||
auth: ["auth", "login", "user", "password", "jwt", "token", "session"]
|
|
||||||
database: ["db", "database", "sql", "query", "model", "schema", "migration"]
|
|
||||||
api: ["api", "endpoint", "route", "controller", "service", "handler"]
|
|
||||||
frontend: ["ui", "component", "view", "template", "style", "css"]
|
|
||||||
backend: ["server", "service", "logic", "business", "core"]
|
|
||||||
test: ["test", "spec", "unit", "integration", "mock"]
|
|
||||||
config: ["config", "setting", "environment", "env"]
|
|
||||||
util: ["util", "helper", "common", "shared", "lib"]
|
|
||||||
|
|
||||||
# Programming language indicators
|
|
||||||
language_indicators:
|
|
||||||
python: [".py", "python", "pip", "requirements.txt", "setup.py"]
|
|
||||||
javascript: [".js", ".ts", "npm", "package.json", "node"]
|
|
||||||
java: [".java", "maven", "gradle", "pom.xml"]
|
|
||||||
go: [".go", "go.mod", "go.sum"]
|
|
||||||
rust: [".rs", "cargo", "Cargo.toml"]
|
|
||||||
|
|
||||||
# Path matching and ranking
|
|
||||||
path_matching:
|
|
||||||
# Scoring weights for relevance calculation
|
|
||||||
weights:
|
|
||||||
keyword_match: 0.4 # Direct keyword match in filename/path
|
|
||||||
extension_match: 0.2 # File extension relevance
|
|
||||||
directory_context: 0.2 # Directory name relevance
|
|
||||||
file_size_penalty: 0.1 # Penalty for very large files
|
|
||||||
recency_bonus: 0.1 # Bonus for recently modified files
|
|
||||||
|
|
||||||
# Maximum files to return per category
|
|
||||||
max_files_per_category: 20
|
|
||||||
|
|
||||||
# Minimum relevance score to include file
|
|
||||||
min_relevance_score: 0.1
|
|
||||||
|
|
||||||
# Output formatting
|
|
||||||
output:
|
|
||||||
# How to format path patterns
|
|
||||||
pattern_format: "@{{{path}}}" # Results in @{path/to/file}
|
|
||||||
|
|
||||||
# Include project documentation by default
|
|
||||||
always_include:
|
|
||||||
- "CLAUDE.md"
|
|
||||||
- "**/CLAUDE.md"
|
|
||||||
- "README.md"
|
|
||||||
- "docs/**/*.md"
|
|
||||||
|
|
||||||
# Maximum total files in output
|
|
||||||
max_total_files: 50
|
|
||||||
|
|
||||||
# Analysis modes
|
|
||||||
modes:
|
|
||||||
auto:
|
|
||||||
description: "Fully automatic path detection"
|
|
||||||
enabled: true
|
|
||||||
guided:
|
|
||||||
description: "Suggest paths for user confirmation"
|
|
||||||
enabled: true
|
|
||||||
pattern:
|
|
||||||
description: "Use explicit patterns from user"
|
|
||||||
enabled: true
|
|
||||||
hybrid:
|
|
||||||
description: "Combine auto-detection with user patterns"
|
|
||||||
enabled: true
|
|
||||||
|
|
||||||
# Performance settings
|
|
||||||
performance:
|
|
||||||
# Cache settings
|
|
||||||
cache_enabled: true
|
|
||||||
cache_ttl: 3600 # Cache TTL in seconds (1 hour)
|
|
||||||
|
|
||||||
# File size limits
|
|
||||||
max_file_size: 10485760 # 10MB max file size to analyze
|
|
||||||
|
|
||||||
# Parallel processing
|
|
||||||
max_workers: 4 # Number of parallel workers for file processing
|
|
||||||
|
|
||||||
# Logging configuration
|
|
||||||
logging:
|
|
||||||
level: "INFO" # DEBUG, INFO, WARNING, ERROR
|
|
||||||
file: ".claude/scripts/ultrathink/ultrathink.log"
|
|
||||||
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
"""
|
|
||||||
Core modules for the Python script analyzer.
|
|
||||||
Provides unified interfaces for file indexing, context analysis, and path matching.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .config import Config
|
|
||||||
from .file_indexer import FileIndexer, FileInfo, IndexStats
|
|
||||||
from .context_analyzer import ContextAnalyzer, AnalysisResult
|
|
||||||
from .path_matcher import PathMatcher, MatchResult, PathMatchingResult
|
|
||||||
from .embedding_manager import EmbeddingManager
|
|
||||||
from .gitignore_parser import GitignoreParser
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'Config',
|
|
||||||
'FileIndexer',
|
|
||||||
'FileInfo',
|
|
||||||
'IndexStats',
|
|
||||||
'ContextAnalyzer',
|
|
||||||
'AnalysisResult',
|
|
||||||
'PathMatcher',
|
|
||||||
'MatchResult',
|
|
||||||
'PathMatchingResult',
|
|
||||||
'EmbeddingManager',
|
|
||||||
'GitignoreParser'
|
|
||||||
]
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,327 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Configuration Management Module
|
|
||||||
Provides unified configuration management with gitignore integration.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import yaml
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, Any, Optional, List
|
|
||||||
from .gitignore_parser import get_all_gitignore_patterns
|
|
||||||
|
|
||||||
|
|
||||||
class Config:
|
|
||||||
"""Singleton configuration manager with hierarchical loading."""
|
|
||||||
|
|
||||||
_instance = None
|
|
||||||
_initialized = False
|
|
||||||
|
|
||||||
def __new__(cls, config_path: Optional[str] = None):
|
|
||||||
if cls._instance is None:
|
|
||||||
cls._instance = super(Config, cls).__new__(cls)
|
|
||||||
return cls._instance
|
|
||||||
|
|
||||||
def __init__(self, config_path: Optional[str] = None):
|
|
||||||
if self._initialized:
|
|
||||||
return
|
|
||||||
|
|
||||||
self.config_path = config_path
|
|
||||||
self.config = {}
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
self._load_config()
|
|
||||||
self._add_gitignore_patterns()
|
|
||||||
self._apply_env_overrides()
|
|
||||||
self._validate_config()
|
|
||||||
|
|
||||||
self._initialized = True
|
|
||||||
|
|
||||||
def _load_config(self):
|
|
||||||
"""Load configuration from file with fallback hierarchy."""
|
|
||||||
config_paths = self._get_config_paths()
|
|
||||||
|
|
||||||
for config_file in config_paths:
|
|
||||||
if config_file.exists():
|
|
||||||
try:
|
|
||||||
with open(config_file, 'r', encoding='utf-8') as f:
|
|
||||||
loaded_config = yaml.safe_load(f)
|
|
||||||
if loaded_config:
|
|
||||||
self.config = self._merge_configs(self.config, loaded_config)
|
|
||||||
self.logger.info(f"Loaded config from {config_file}")
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(f"Failed to load config from {config_file}: {e}")
|
|
||||||
|
|
||||||
# Apply default config if no config loaded
|
|
||||||
if not self.config:
|
|
||||||
self.config = self._get_default_config()
|
|
||||||
self.logger.info("Using default configuration")
|
|
||||||
|
|
||||||
def _get_config_paths(self) -> List[Path]:
|
|
||||||
"""Get ordered list of config file paths to check."""
|
|
||||||
paths = []
|
|
||||||
|
|
||||||
# 1. Explicitly provided config path
|
|
||||||
if self.config_path:
|
|
||||||
paths.append(Path(self.config_path))
|
|
||||||
|
|
||||||
# 2. Current directory config.yaml
|
|
||||||
paths.append(Path('config.yaml'))
|
|
||||||
|
|
||||||
# 3. Script directory config.yaml
|
|
||||||
script_dir = Path(__file__).parent.parent
|
|
||||||
paths.append(script_dir / 'config.yaml')
|
|
||||||
|
|
||||||
# 4. Default config in script directory
|
|
||||||
paths.append(script_dir / 'default_config.yaml')
|
|
||||||
|
|
||||||
return paths
|
|
||||||
|
|
||||||
def _get_default_config(self) -> Dict[str, Any]:
|
|
||||||
"""Get default configuration."""
|
|
||||||
return {
|
|
||||||
'token_limits': {
|
|
||||||
'small_project': 500000,
|
|
||||||
'medium_project': 2000000,
|
|
||||||
'large_project': 10000000,
|
|
||||||
'max_files': 1000
|
|
||||||
},
|
|
||||||
'exclude_patterns': [
|
|
||||||
"*/node_modules/*",
|
|
||||||
"*/.git/*",
|
|
||||||
"*/build/*",
|
|
||||||
"*/dist/*",
|
|
||||||
"*/.next/*",
|
|
||||||
"*/.nuxt/*",
|
|
||||||
"*/target/*",
|
|
||||||
"*/vendor/*",
|
|
||||||
"*/__pycache__/*",
|
|
||||||
"*.pyc",
|
|
||||||
"*.pyo",
|
|
||||||
"*.log",
|
|
||||||
"*.tmp",
|
|
||||||
"*.temp",
|
|
||||||
"*.history"
|
|
||||||
],
|
|
||||||
'file_extensions': {
|
|
||||||
'code': ['.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h', '.rs', '.go', '.php', '.rb', '.sh', '.bash'],
|
|
||||||
'docs': ['.md', '.txt', '.rst', '.adoc'],
|
|
||||||
'config': ['.json', '.yaml', '.yml', '.toml', '.ini', '.env'],
|
|
||||||
'web': ['.html', '.css', '.scss', '.sass', '.xml']
|
|
||||||
},
|
|
||||||
'embedding': {
|
|
||||||
'enabled': True,
|
|
||||||
'model': 'all-MiniLM-L6-v2',
|
|
||||||
'cache_dir': 'cache',
|
|
||||||
'similarity_threshold': 0.3,
|
|
||||||
'max_context_length': 512,
|
|
||||||
'batch_size': 32
|
|
||||||
},
|
|
||||||
'context_analysis': {
|
|
||||||
'domain_keywords': {
|
|
||||||
'auth': ['auth', 'login', 'user', 'password', 'jwt', 'token', 'session'],
|
|
||||||
'database': ['db', 'database', 'sql', 'query', 'model', 'schema', 'migration'],
|
|
||||||
'api': ['api', 'endpoint', 'route', 'controller', 'service', 'handler'],
|
|
||||||
'frontend': ['ui', 'component', 'view', 'template', 'style', 'css'],
|
|
||||||
'backend': ['server', 'service', 'logic', 'business', 'core'],
|
|
||||||
'test': ['test', 'spec', 'unit', 'integration', 'mock'],
|
|
||||||
'config': ['config', 'setting', 'environment', 'env'],
|
|
||||||
'util': ['util', 'helper', 'common', 'shared', 'lib']
|
|
||||||
},
|
|
||||||
'language_indicators': {
|
|
||||||
'python': ['.py', 'python', 'pip', 'requirements.txt', 'setup.py'],
|
|
||||||
'javascript': ['.js', '.ts', 'npm', 'package.json', 'node'],
|
|
||||||
'java': ['.java', 'maven', 'gradle', 'pom.xml'],
|
|
||||||
'go': ['.go', 'go.mod', 'go.sum'],
|
|
||||||
'rust': ['.rs', 'cargo', 'Cargo.toml']
|
|
||||||
}
|
|
||||||
},
|
|
||||||
'path_matching': {
|
|
||||||
'weights': {
|
|
||||||
'keyword_match': 0.4,
|
|
||||||
'extension_match': 0.2,
|
|
||||||
'directory_context': 0.2,
|
|
||||||
'file_size_penalty': 0.1,
|
|
||||||
'recency_bonus': 0.1
|
|
||||||
},
|
|
||||||
'max_files_per_category': 20,
|
|
||||||
'min_relevance_score': 0.1
|
|
||||||
},
|
|
||||||
'output': {
|
|
||||||
'pattern_format': '@{{{path}}}',
|
|
||||||
'always_include': [
|
|
||||||
'CLAUDE.md',
|
|
||||||
'**/CLAUDE.md',
|
|
||||||
'README.md',
|
|
||||||
'docs/**/*.md'
|
|
||||||
],
|
|
||||||
'max_total_files': 50
|
|
||||||
},
|
|
||||||
'performance': {
|
|
||||||
'cache_enabled': True,
|
|
||||||
'cache_ttl': 3600,
|
|
||||||
'max_file_size': 10485760,
|
|
||||||
'max_workers': 4
|
|
||||||
},
|
|
||||||
'logging': {
|
|
||||||
'level': 'INFO',
|
|
||||||
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _merge_configs(self, base: Dict, override: Dict) -> Dict:
|
|
||||||
"""Recursively merge configuration dictionaries."""
|
|
||||||
result = base.copy()
|
|
||||||
|
|
||||||
for key, value in override.items():
|
|
||||||
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
||||||
result[key] = self._merge_configs(result[key], value)
|
|
||||||
else:
|
|
||||||
result[key] = value
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _add_gitignore_patterns(self):
|
|
||||||
"""Add patterns from .gitignore files to exclude_patterns."""
|
|
||||||
try:
|
|
||||||
# Find root directory (current working directory or script parent)
|
|
||||||
root_dir = Path.cwd()
|
|
||||||
|
|
||||||
gitignore_patterns = get_all_gitignore_patterns(str(root_dir))
|
|
||||||
|
|
||||||
if gitignore_patterns:
|
|
||||||
# Ensure exclude_patterns exists
|
|
||||||
if 'exclude_patterns' not in self.config:
|
|
||||||
self.config['exclude_patterns'] = []
|
|
||||||
|
|
||||||
# Add gitignore patterns, avoiding duplicates
|
|
||||||
existing_patterns = set(self.config['exclude_patterns'])
|
|
||||||
new_patterns = [p for p in gitignore_patterns if p not in existing_patterns]
|
|
||||||
|
|
||||||
self.config['exclude_patterns'].extend(new_patterns)
|
|
||||||
|
|
||||||
self.logger.info(f"Added {len(new_patterns)} patterns from .gitignore files")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(f"Failed to load .gitignore patterns: {e}")
|
|
||||||
|
|
||||||
def _apply_env_overrides(self):
|
|
||||||
"""Apply environment variable overrides."""
|
|
||||||
env_mappings = {
|
|
||||||
'ANALYZER_CACHE_DIR': ('embedding', 'cache_dir'),
|
|
||||||
'ANALYZER_LOG_LEVEL': ('logging', 'level'),
|
|
||||||
'ANALYZER_MAX_FILES': ('token_limits', 'max_files'),
|
|
||||||
'ANALYZER_EMBEDDING_MODEL': ('embedding', 'model')
|
|
||||||
}
|
|
||||||
|
|
||||||
for env_var, config_path in env_mappings.items():
|
|
||||||
env_value = os.getenv(env_var)
|
|
||||||
if env_value:
|
|
||||||
self._set_nested_value(config_path, env_value)
|
|
||||||
self.logger.info(f"Applied environment override: {env_var} = {env_value}")
|
|
||||||
|
|
||||||
def _set_nested_value(self, path: tuple, value: str):
|
|
||||||
"""Set a nested configuration value."""
|
|
||||||
current = self.config
|
|
||||||
for key in path[:-1]:
|
|
||||||
if key not in current:
|
|
||||||
current[key] = {}
|
|
||||||
current = current[key]
|
|
||||||
|
|
||||||
# Try to convert value to appropriate type
|
|
||||||
if isinstance(current.get(path[-1]), int):
|
|
||||||
try:
|
|
||||||
value = int(value)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
elif isinstance(current.get(path[-1]), bool):
|
|
||||||
value = value.lower() in ('true', '1', 'yes', 'on')
|
|
||||||
|
|
||||||
current[path[-1]] = value
|
|
||||||
|
|
||||||
def _validate_config(self):
|
|
||||||
"""Validate configuration values."""
|
|
||||||
required_sections = ['exclude_patterns', 'file_extensions', 'token_limits']
|
|
||||||
|
|
||||||
for section in required_sections:
|
|
||||||
if section not in self.config:
|
|
||||||
self.logger.warning(f"Missing required config section: {section}")
|
|
||||||
|
|
||||||
# Validate token limits
|
|
||||||
if 'token_limits' in self.config:
|
|
||||||
limits = self.config['token_limits']
|
|
||||||
if limits.get('small_project', 0) >= limits.get('medium_project', 0):
|
|
||||||
self.logger.warning("Token limit configuration may be incorrect")
|
|
||||||
|
|
||||||
def get(self, path: str, default: Any = None) -> Any:
|
|
||||||
"""Get configuration value using dot notation."""
|
|
||||||
keys = path.split('.')
|
|
||||||
current = self.config
|
|
||||||
|
|
||||||
try:
|
|
||||||
for key in keys:
|
|
||||||
current = current[key]
|
|
||||||
return current
|
|
||||||
except (KeyError, TypeError):
|
|
||||||
return default
|
|
||||||
|
|
||||||
def set(self, path: str, value: Any):
|
|
||||||
"""Set configuration value using dot notation."""
|
|
||||||
keys = path.split('.')
|
|
||||||
current = self.config
|
|
||||||
|
|
||||||
for key in keys[:-1]:
|
|
||||||
if key not in current:
|
|
||||||
current[key] = {}
|
|
||||||
current = current[key]
|
|
||||||
|
|
||||||
current[keys[-1]] = value
|
|
||||||
|
|
||||||
def get_exclude_patterns(self) -> List[str]:
|
|
||||||
"""Get all exclude patterns including gitignore patterns."""
|
|
||||||
return self.config.get('exclude_patterns', [])
|
|
||||||
|
|
||||||
def get_file_extensions(self) -> Dict[str, List[str]]:
|
|
||||||
"""Get file extension mappings."""
|
|
||||||
return self.config.get('file_extensions', {})
|
|
||||||
|
|
||||||
def is_embedding_enabled(self) -> bool:
|
|
||||||
"""Check if embedding functionality is enabled."""
|
|
||||||
return self.config.get('embedding', {}).get('enabled', False)
|
|
||||||
|
|
||||||
def get_cache_dir(self) -> str:
|
|
||||||
"""Get cache directory path."""
|
|
||||||
return self.config.get('embedding', {}).get('cache_dir', 'cache')
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
"""Return configuration as dictionary."""
|
|
||||||
return self.config.copy()
|
|
||||||
|
|
||||||
def reload(self, config_path: Optional[str] = None):
|
|
||||||
"""Reload configuration from file."""
|
|
||||||
self._initialized = False
|
|
||||||
if config_path:
|
|
||||||
self.config_path = config_path
|
|
||||||
self.__init__(self.config_path)
|
|
||||||
|
|
||||||
|
|
||||||
# Global configuration instance
|
|
||||||
_global_config = None
|
|
||||||
|
|
||||||
|
|
||||||
def get_config(config_path: Optional[str] = None) -> Config:
|
|
||||||
"""Get global configuration instance."""
|
|
||||||
global _global_config
|
|
||||||
if _global_config is None:
|
|
||||||
_global_config = Config(config_path)
|
|
||||||
return _global_config
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Test configuration loading
|
|
||||||
config = Config()
|
|
||||||
print("Configuration loaded successfully!")
|
|
||||||
print(f"Cache dir: {config.get_cache_dir()}")
|
|
||||||
print(f"Exclude patterns: {len(config.get_exclude_patterns())}")
|
|
||||||
print(f"Embedding enabled: {config.is_embedding_enabled()}")
|
|
||||||
@@ -1,359 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Context Analyzer Module for UltraThink Path-Aware Analyzer
|
|
||||||
Analyzes user prompts to extract relevant context and keywords.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
import logging
|
|
||||||
from typing import Dict, List, Set, Tuple, Optional
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from collections import Counter
|
|
||||||
import string
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class AnalysisResult:
|
|
||||||
"""Results of context analysis."""
|
|
||||||
keywords: List[str]
|
|
||||||
domains: List[str]
|
|
||||||
languages: List[str]
|
|
||||||
file_patterns: List[str]
|
|
||||||
confidence_scores: Dict[str, float]
|
|
||||||
extracted_entities: Dict[str, List[str]]
|
|
||||||
|
|
||||||
class ContextAnalyzer:
|
|
||||||
"""Analyzes user prompts to understand context and intent."""
|
|
||||||
|
|
||||||
def __init__(self, config: Dict):
|
|
||||||
self.config = config
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Load domain and language mappings from config
|
|
||||||
self.domain_keywords = config.get('context_analysis', {}).get('domain_keywords', {})
|
|
||||||
self.language_indicators = config.get('context_analysis', {}).get('language_indicators', {})
|
|
||||||
|
|
||||||
# Common programming terms and patterns
|
|
||||||
self.technical_terms = self._build_technical_terms()
|
|
||||||
self.file_pattern_indicators = self._build_pattern_indicators()
|
|
||||||
|
|
||||||
# Stop words to filter out
|
|
||||||
self.stop_words = {
|
|
||||||
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
|
|
||||||
'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after',
|
|
||||||
'above', 'below', 'between', 'among', 'as', 'is', 'are', 'was', 'were', 'be',
|
|
||||||
'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
|
|
||||||
'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these',
|
|
||||||
'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her',
|
|
||||||
'us', 'them', 'my', 'your', 'his', 'its', 'our', 'their'
|
|
||||||
}
|
|
||||||
|
|
||||||
def _build_technical_terms(self) -> Dict[str, List[str]]:
|
|
||||||
"""Build comprehensive list of technical terms grouped by category."""
|
|
||||||
return {
|
|
||||||
'authentication': [
|
|
||||||
'auth', 'authentication', 'login', 'logout', 'signin', 'signout',
|
|
||||||
'user', 'password', 'token', 'jwt', 'oauth', 'session', 'cookie',
|
|
||||||
'credential', 'authorize', 'permission', 'role', 'access'
|
|
||||||
],
|
|
||||||
'database': [
|
|
||||||
'database', 'db', 'sql', 'query', 'table', 'schema', 'migration',
|
|
||||||
'model', 'orm', 'entity', 'relation', 'index', 'transaction',
|
|
||||||
'crud', 'select', 'insert', 'update', 'delete', 'join'
|
|
||||||
],
|
|
||||||
'api': [
|
|
||||||
'api', 'rest', 'graphql', 'endpoint', 'route', 'controller',
|
|
||||||
'handler', 'middleware', 'service', 'request', 'response',
|
|
||||||
'http', 'get', 'post', 'put', 'delete', 'patch'
|
|
||||||
],
|
|
||||||
'frontend': [
|
|
||||||
'ui', 'component', 'view', 'template', 'page', 'layout',
|
|
||||||
'style', 'css', 'html', 'javascript', 'react', 'vue',
|
|
||||||
'angular', 'dom', 'event', 'state', 'props'
|
|
||||||
],
|
|
||||||
'backend': [
|
|
||||||
'server', 'service', 'business', 'logic', 'core', 'engine',
|
|
||||||
'worker', 'job', 'queue', 'cache', 'redis', 'memcache'
|
|
||||||
],
|
|
||||||
'testing': [
|
|
||||||
'test', 'testing', 'spec', 'unit', 'integration', 'e2e',
|
|
||||||
'mock', 'stub', 'fixture', 'assert', 'expect', 'should'
|
|
||||||
],
|
|
||||||
'configuration': [
|
|
||||||
'config', 'configuration', 'setting', 'environment', 'env',
|
|
||||||
'variable', 'constant', 'parameter', 'option'
|
|
||||||
],
|
|
||||||
'utility': [
|
|
||||||
'util', 'utility', 'helper', 'common', 'shared', 'lib',
|
|
||||||
'library', 'tool', 'function', 'method'
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
def _build_pattern_indicators(self) -> Dict[str, List[str]]:
|
|
||||||
"""Build indicators that suggest specific file patterns."""
|
|
||||||
return {
|
|
||||||
'source_code': ['implement', 'code', 'function', 'class', 'method'],
|
|
||||||
'tests': ['test', 'testing', 'spec', 'unittest', 'pytest'],
|
|
||||||
'documentation': ['doc', 'readme', 'guide', 'documentation', 'manual'],
|
|
||||||
'configuration': ['config', 'setting', 'env', 'environment'],
|
|
||||||
'build': ['build', 'compile', 'package', 'deploy', 'release'],
|
|
||||||
'scripts': ['script', 'automation', 'tool', 'utility']
|
|
||||||
}
|
|
||||||
|
|
||||||
def extract_keywords(self, text: str) -> List[str]:
|
|
||||||
"""Extract meaningful keywords from text."""
|
|
||||||
# Clean and normalize text
|
|
||||||
text = text.lower()
|
|
||||||
text = re.sub(r'[^\w\s-]', ' ', text) # Remove punctuation except hyphens
|
|
||||||
words = text.split()
|
|
||||||
|
|
||||||
# Filter stop words and short words
|
|
||||||
keywords = []
|
|
||||||
for word in words:
|
|
||||||
word = word.strip('-') # Remove leading/trailing hyphens
|
|
||||||
if (len(word) >= 2 and
|
|
||||||
word not in self.stop_words and
|
|
||||||
not word.isdigit()):
|
|
||||||
keywords.append(word)
|
|
||||||
|
|
||||||
# Count frequency and return top keywords
|
|
||||||
word_counts = Counter(keywords)
|
|
||||||
return [word for word, count in word_counts.most_common(20)]
|
|
||||||
|
|
||||||
def identify_domains(self, keywords: List[str]) -> List[Tuple[str, float]]:
|
|
||||||
"""Identify relevant domains based on keywords."""
|
|
||||||
domain_scores = {}
|
|
||||||
|
|
||||||
for domain, domain_keywords in self.domain_keywords.items():
|
|
||||||
score = 0.0
|
|
||||||
matched_keywords = []
|
|
||||||
|
|
||||||
for keyword in keywords:
|
|
||||||
for domain_keyword in domain_keywords:
|
|
||||||
if keyword in domain_keyword or domain_keyword in keyword:
|
|
||||||
score += 1.0
|
|
||||||
matched_keywords.append(keyword)
|
|
||||||
break
|
|
||||||
|
|
||||||
if score > 0:
|
|
||||||
# Normalize score by number of domain keywords
|
|
||||||
normalized_score = score / len(domain_keywords)
|
|
||||||
domain_scores[domain] = normalized_score
|
|
||||||
|
|
||||||
# Also check technical terms
|
|
||||||
for category, terms in self.technical_terms.items():
|
|
||||||
score = 0.0
|
|
||||||
for keyword in keywords:
|
|
||||||
for term in terms:
|
|
||||||
if keyword in term or term in keyword:
|
|
||||||
score += 1.0
|
|
||||||
break
|
|
||||||
|
|
||||||
if score > 0:
|
|
||||||
normalized_score = score / len(terms)
|
|
||||||
if category not in domain_scores:
|
|
||||||
domain_scores[category] = normalized_score
|
|
||||||
else:
|
|
||||||
domain_scores[category] = max(domain_scores[category], normalized_score)
|
|
||||||
|
|
||||||
# Sort by score and return top domains
|
|
||||||
sorted_domains = sorted(domain_scores.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
return sorted_domains[:5]
|
|
||||||
|
|
||||||
def identify_languages(self, keywords: List[str]) -> List[Tuple[str, float]]:
|
|
||||||
"""Identify programming languages based on keywords."""
|
|
||||||
language_scores = {}
|
|
||||||
|
|
||||||
for language, indicators in self.language_indicators.items():
|
|
||||||
score = 0.0
|
|
||||||
for keyword in keywords:
|
|
||||||
for indicator in indicators:
|
|
||||||
if keyword in indicator or indicator in keyword:
|
|
||||||
score += 1.0
|
|
||||||
break
|
|
||||||
|
|
||||||
if score > 0:
|
|
||||||
normalized_score = score / len(indicators)
|
|
||||||
language_scores[language] = normalized_score
|
|
||||||
|
|
||||||
sorted_languages = sorted(language_scores.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
return sorted_languages[:3]
|
|
||||||
|
|
||||||
def extract_file_patterns(self, text: str) -> List[str]:
|
|
||||||
"""Extract explicit file patterns from text."""
|
|
||||||
patterns = []
|
|
||||||
|
|
||||||
# Look for @{pattern} syntax
|
|
||||||
at_patterns = re.findall(r'@\{([^}]+)\}', text)
|
|
||||||
patterns.extend(at_patterns)
|
|
||||||
|
|
||||||
# Look for file extensions
|
|
||||||
extensions = re.findall(r'\*\.(\w+)', text)
|
|
||||||
for ext in extensions:
|
|
||||||
patterns.append(f"*.{ext}")
|
|
||||||
|
|
||||||
# Look for directory patterns
|
|
||||||
dir_patterns = re.findall(r'(\w+)/\*\*?', text)
|
|
||||||
for dir_pattern in dir_patterns:
|
|
||||||
patterns.append(f"{dir_pattern}/**/*")
|
|
||||||
|
|
||||||
# Look for specific file names
|
|
||||||
file_patterns = re.findall(r'\b(\w+\.\w+)\b', text)
|
|
||||||
for file_pattern in file_patterns:
|
|
||||||
if '.' in file_pattern:
|
|
||||||
patterns.append(file_pattern)
|
|
||||||
|
|
||||||
return list(set(patterns)) # Remove duplicates
|
|
||||||
|
|
||||||
def suggest_patterns_from_domains(self, domains: List[str]) -> List[str]:
|
|
||||||
"""Suggest file patterns based on identified domains."""
|
|
||||||
patterns = []
|
|
||||||
|
|
||||||
domain_to_patterns = {
|
|
||||||
'auth': ['**/auth/**/*', '**/login/**/*', '**/user/**/*'],
|
|
||||||
'authentication': ['**/auth/**/*', '**/login/**/*', '**/user/**/*'],
|
|
||||||
'database': ['**/db/**/*', '**/model/**/*', '**/migration/**/*', '**/*model*'],
|
|
||||||
'api': ['**/api/**/*', '**/route/**/*', '**/controller/**/*', '**/handler/**/*'],
|
|
||||||
'frontend': ['**/ui/**/*', '**/component/**/*', '**/view/**/*', '**/template/**/*'],
|
|
||||||
'backend': ['**/service/**/*', '**/core/**/*', '**/server/**/*'],
|
|
||||||
'test': ['**/test/**/*', '**/spec/**/*', '**/*test*', '**/*spec*'],
|
|
||||||
'testing': ['**/test/**/*', '**/spec/**/*', '**/*test*', '**/*spec*'],
|
|
||||||
'config': ['**/config/**/*', '**/*.config.*', '**/env/**/*'],
|
|
||||||
'configuration': ['**/config/**/*', '**/*.config.*', '**/env/**/*'],
|
|
||||||
'util': ['**/util/**/*', '**/helper/**/*', '**/common/**/*'],
|
|
||||||
'utility': ['**/util/**/*', '**/helper/**/*', '**/common/**/*']
|
|
||||||
}
|
|
||||||
|
|
||||||
for domain in domains:
|
|
||||||
if domain in domain_to_patterns:
|
|
||||||
patterns.extend(domain_to_patterns[domain])
|
|
||||||
|
|
||||||
return list(set(patterns)) # Remove duplicates
|
|
||||||
|
|
||||||
def extract_entities(self, text: str) -> Dict[str, List[str]]:
|
|
||||||
"""Extract named entities from text."""
|
|
||||||
entities = {
|
|
||||||
'files': [],
|
|
||||||
'functions': [],
|
|
||||||
'classes': [],
|
|
||||||
'variables': [],
|
|
||||||
'technologies': []
|
|
||||||
}
|
|
||||||
|
|
||||||
# File patterns
|
|
||||||
file_patterns = re.findall(r'\b(\w+\.\w+)\b', text)
|
|
||||||
entities['files'] = list(set(file_patterns))
|
|
||||||
|
|
||||||
# Function patterns (camelCase or snake_case followed by parentheses)
|
|
||||||
function_patterns = re.findall(r'\b([a-z][a-zA-Z0-9_]*)\s*\(', text)
|
|
||||||
entities['functions'] = list(set(function_patterns))
|
|
||||||
|
|
||||||
# Class patterns (PascalCase)
|
|
||||||
class_patterns = re.findall(r'\b([A-Z][a-zA-Z0-9]*)\b', text)
|
|
||||||
entities['classes'] = list(set(class_patterns))
|
|
||||||
|
|
||||||
# Technology mentions
|
|
||||||
tech_keywords = [
|
|
||||||
'react', 'vue', 'angular', 'node', 'express', 'django', 'flask',
|
|
||||||
'spring', 'rails', 'laravel', 'docker', 'kubernetes', 'aws',
|
|
||||||
'azure', 'gcp', 'postgresql', 'mysql', 'mongodb', 'redis'
|
|
||||||
]
|
|
||||||
text_lower = text.lower()
|
|
||||||
for tech in tech_keywords:
|
|
||||||
if tech in text_lower:
|
|
||||||
entities['technologies'].append(tech)
|
|
||||||
|
|
||||||
return entities
|
|
||||||
|
|
||||||
def analyze(self, prompt: str) -> AnalysisResult:
|
|
||||||
"""Perform comprehensive analysis of the user prompt."""
|
|
||||||
self.logger.debug(f"Analyzing prompt: {prompt[:100]}...")
|
|
||||||
|
|
||||||
# Extract keywords
|
|
||||||
keywords = self.extract_keywords(prompt)
|
|
||||||
|
|
||||||
# Identify domains and languages
|
|
||||||
domains_with_scores = self.identify_domains(keywords)
|
|
||||||
languages_with_scores = self.identify_languages(keywords)
|
|
||||||
|
|
||||||
# Extract patterns and entities
|
|
||||||
explicit_patterns = self.extract_file_patterns(prompt)
|
|
||||||
entities = self.extract_entities(prompt)
|
|
||||||
|
|
||||||
# Get top domains and languages
|
|
||||||
domains = [domain for domain, score in domains_with_scores]
|
|
||||||
languages = [lang for lang, score in languages_with_scores]
|
|
||||||
|
|
||||||
# Suggest additional patterns based on domains
|
|
||||||
suggested_patterns = self.suggest_patterns_from_domains(domains)
|
|
||||||
|
|
||||||
# Combine explicit and suggested patterns
|
|
||||||
all_patterns = list(set(explicit_patterns + suggested_patterns))
|
|
||||||
|
|
||||||
# Build confidence scores
|
|
||||||
confidence_scores = {
|
|
||||||
'keywords': len(keywords) / 20, # Normalize to 0-1
|
|
||||||
'domain_match': max([score for _, score in domains_with_scores[:1]], default=0),
|
|
||||||
'language_match': max([score for _, score in languages_with_scores[:1]], default=0),
|
|
||||||
'pattern_extraction': len(explicit_patterns) / 5, # Normalize to 0-1
|
|
||||||
}
|
|
||||||
|
|
||||||
result = AnalysisResult(
|
|
||||||
keywords=keywords,
|
|
||||||
domains=domains,
|
|
||||||
languages=languages,
|
|
||||||
file_patterns=all_patterns,
|
|
||||||
confidence_scores=confidence_scores,
|
|
||||||
extracted_entities=entities
|
|
||||||
)
|
|
||||||
|
|
||||||
self.logger.info(f"Analysis complete: {len(domains)} domains, {len(languages)} languages, {len(all_patterns)} patterns")
|
|
||||||
return result
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Command-line interface for context analyzer."""
|
|
||||||
import yaml
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Context Analyzer for UltraThink")
|
|
||||||
parser.add_argument("prompt", help="Prompt to analyze")
|
|
||||||
parser.add_argument("--config", default="config.yaml", help="Configuration file path")
|
|
||||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Setup logging
|
|
||||||
level = logging.DEBUG if args.verbose else logging.INFO
|
|
||||||
logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
|
|
||||||
|
|
||||||
# Load configuration
|
|
||||||
from pathlib import Path
|
|
||||||
config_path = Path(__file__).parent / args.config
|
|
||||||
with open(config_path, 'r', encoding='utf-8') as f:
|
|
||||||
config = yaml.safe_load(f)
|
|
||||||
|
|
||||||
# Create analyzer
|
|
||||||
analyzer = ContextAnalyzer(config)
|
|
||||||
|
|
||||||
# Analyze prompt
|
|
||||||
result = analyzer.analyze(args.prompt)
|
|
||||||
|
|
||||||
# Output results
|
|
||||||
print(f"Keywords: {', '.join(result.keywords[:10])}")
|
|
||||||
print(f"Domains: {', '.join(result.domains[:5])}")
|
|
||||||
print(f"Languages: {', '.join(result.languages[:3])}")
|
|
||||||
print(f"Patterns: {', '.join(result.file_patterns[:10])}")
|
|
||||||
|
|
||||||
if args.verbose:
|
|
||||||
print("\nDetailed Results:")
|
|
||||||
print(json.dumps({
|
|
||||||
'keywords': result.keywords,
|
|
||||||
'domains': result.domains,
|
|
||||||
'languages': result.languages,
|
|
||||||
'file_patterns': result.file_patterns,
|
|
||||||
'confidence_scores': result.confidence_scores,
|
|
||||||
'extracted_entities': result.extracted_entities
|
|
||||||
}, indent=2))
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,458 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Embedding Manager Module for UltraThink Path-Aware Analyzer
|
|
||||||
Manages embeddings for semantic similarity search (RAG functionality).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import hashlib
|
|
||||||
import logging
|
|
||||||
import pickle
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, List, Tuple, Optional, Any
|
|
||||||
from dataclasses import dataclass
|
|
||||||
import time
|
|
||||||
|
|
||||||
# Optional imports for embedding functionality
|
|
||||||
try:
|
|
||||||
import numpy as np
|
|
||||||
NUMPY_AVAILABLE = True
|
|
||||||
except ImportError:
|
|
||||||
NUMPY_AVAILABLE = False
|
|
||||||
|
|
||||||
try:
|
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
SENTENCE_TRANSFORMERS_AVAILABLE = True
|
|
||||||
except ImportError:
|
|
||||||
SENTENCE_TRANSFORMERS_AVAILABLE = False
|
|
||||||
|
|
||||||
from .file_indexer import FileInfo
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class EmbeddingInfo:
|
|
||||||
"""Information about a file's embedding."""
|
|
||||||
file_path: str
|
|
||||||
content_hash: str
|
|
||||||
embedding_hash: str
|
|
||||||
created_time: float
|
|
||||||
vector_size: int
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SimilarityResult:
|
|
||||||
"""Result of similarity search."""
|
|
||||||
file_info: FileInfo
|
|
||||||
similarity_score: float
|
|
||||||
matching_content: str
|
|
||||||
|
|
||||||
class EmbeddingManager:
|
|
||||||
"""Manages embeddings for semantic file matching."""
|
|
||||||
|
|
||||||
def __init__(self, config: Dict):
|
|
||||||
self.config = config
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Check if embeddings are enabled
|
|
||||||
self.enabled = config.get('embedding', {}).get('enabled', False)
|
|
||||||
if not self.enabled:
|
|
||||||
self.logger.info("Embeddings disabled in configuration")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Check dependencies
|
|
||||||
if not NUMPY_AVAILABLE:
|
|
||||||
self.logger.warning("NumPy not available, disabling embeddings")
|
|
||||||
self.enabled = False
|
|
||||||
return
|
|
||||||
|
|
||||||
if not SENTENCE_TRANSFORMERS_AVAILABLE:
|
|
||||||
self.logger.warning("sentence-transformers not available, disabling embeddings")
|
|
||||||
self.enabled = False
|
|
||||||
return
|
|
||||||
|
|
||||||
# Load configuration
|
|
||||||
self.model_name = config.get('embedding', {}).get('model', 'all-MiniLM-L6-v2')
|
|
||||||
self.cache_dir = Path(config.get('embedding', {}).get('cache_dir', '.claude/cache/embeddings'))
|
|
||||||
self.similarity_threshold = config.get('embedding', {}).get('similarity_threshold', 0.6)
|
|
||||||
self.max_context_length = config.get('embedding', {}).get('max_context_length', 512)
|
|
||||||
self.batch_size = config.get('embedding', {}).get('batch_size', 32)
|
|
||||||
self.trust_remote_code = config.get('embedding', {}).get('trust_remote_code', False)
|
|
||||||
|
|
||||||
# Setup cache directories
|
|
||||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
self.embeddings_file = self.cache_dir / "embeddings.pkl"
|
|
||||||
self.index_file = self.cache_dir / "embedding_index.json"
|
|
||||||
|
|
||||||
# Initialize model lazily
|
|
||||||
self._model = None
|
|
||||||
self._embeddings_cache = None
|
|
||||||
self._embedding_index = None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def model(self):
|
|
||||||
"""Lazy load the embedding model."""
|
|
||||||
if not self.enabled:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if self._model is None:
|
|
||||||
try:
|
|
||||||
self.logger.info(f"Loading embedding model: {self.model_name}")
|
|
||||||
# Initialize with trust_remote_code for CodeSage V2
|
|
||||||
if self.trust_remote_code:
|
|
||||||
self._model = SentenceTransformer(self.model_name, trust_remote_code=True)
|
|
||||||
else:
|
|
||||||
self._model = SentenceTransformer(self.model_name)
|
|
||||||
self.logger.info(f"Model loaded successfully")
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to load embedding model: {e}")
|
|
||||||
self.enabled = False
|
|
||||||
return None
|
|
||||||
|
|
||||||
return self._model
|
|
||||||
|
|
||||||
def embeddings_exist(self) -> bool:
|
|
||||||
"""Check if embeddings cache exists."""
|
|
||||||
return self.embeddings_file.exists() and self.index_file.exists()
|
|
||||||
|
|
||||||
def _load_embedding_cache(self) -> Dict[str, np.ndarray]:
|
|
||||||
"""Load embeddings from cache."""
|
|
||||||
if self._embeddings_cache is not None:
|
|
||||||
return self._embeddings_cache
|
|
||||||
|
|
||||||
if not self.embeddings_file.exists():
|
|
||||||
self._embeddings_cache = {}
|
|
||||||
return self._embeddings_cache
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(self.embeddings_file, 'rb') as f:
|
|
||||||
self._embeddings_cache = pickle.load(f)
|
|
||||||
self.logger.debug(f"Loaded {len(self._embeddings_cache)} embeddings from cache")
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(f"Failed to load embeddings cache: {e}")
|
|
||||||
self._embeddings_cache = {}
|
|
||||||
|
|
||||||
return self._embeddings_cache
|
|
||||||
|
|
||||||
def _save_embedding_cache(self):
|
|
||||||
"""Save embeddings to cache."""
|
|
||||||
if self._embeddings_cache is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(self.embeddings_file, 'wb') as f:
|
|
||||||
pickle.dump(self._embeddings_cache, f)
|
|
||||||
self.logger.debug(f"Saved {len(self._embeddings_cache)} embeddings to cache")
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to save embeddings cache: {e}")
|
|
||||||
|
|
||||||
def _load_embedding_index(self) -> Dict[str, EmbeddingInfo]:
|
|
||||||
"""Load embedding index."""
|
|
||||||
if self._embedding_index is not None:
|
|
||||||
return self._embedding_index
|
|
||||||
|
|
||||||
if not self.index_file.exists():
|
|
||||||
self._embedding_index = {}
|
|
||||||
return self._embedding_index
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(self.index_file, 'r', encoding='utf-8') as f:
|
|
||||||
data = json.load(f)
|
|
||||||
self._embedding_index = {}
|
|
||||||
for path, info_dict in data.items():
|
|
||||||
self._embedding_index[path] = EmbeddingInfo(**info_dict)
|
|
||||||
self.logger.debug(f"Loaded embedding index with {len(self._embedding_index)} entries")
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(f"Failed to load embedding index: {e}")
|
|
||||||
self._embedding_index = {}
|
|
||||||
|
|
||||||
return self._embedding_index
|
|
||||||
|
|
||||||
def _save_embedding_index(self):
|
|
||||||
"""Save embedding index."""
|
|
||||||
if self._embedding_index is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
data = {}
|
|
||||||
for path, info in self._embedding_index.items():
|
|
||||||
data[path] = {
|
|
||||||
'file_path': info.file_path,
|
|
||||||
'content_hash': info.content_hash,
|
|
||||||
'embedding_hash': info.embedding_hash,
|
|
||||||
'created_time': info.created_time,
|
|
||||||
'vector_size': info.vector_size
|
|
||||||
}
|
|
||||||
|
|
||||||
with open(self.index_file, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(data, f, indent=2)
|
|
||||||
self.logger.debug(f"Saved embedding index with {len(self._embedding_index)} entries")
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to save embedding index: {e}")
|
|
||||||
|
|
||||||
def _extract_text_content(self, file_info: FileInfo) -> Optional[str]:
|
|
||||||
"""Extract text content from a file for embedding."""
|
|
||||||
try:
|
|
||||||
file_path = Path(file_info.path)
|
|
||||||
|
|
||||||
# Skip binary files and very large files
|
|
||||||
if file_info.size > self.config.get('performance', {}).get('max_file_size', 10485760):
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Only process text-based files
|
|
||||||
text_extensions = {'.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h',
|
|
||||||
'.rs', '.go', '.php', '.rb', '.sh', '.bash', '.md', '.txt', '.json',
|
|
||||||
'.yaml', '.yml', '.xml', '.html', '.css', '.scss', '.sass'}
|
|
||||||
|
|
||||||
if file_info.extension.lower() not in text_extensions:
|
|
||||||
return None
|
|
||||||
|
|
||||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
# Truncate content if too long (CodeSage V2 supports longer contexts)
|
|
||||||
if len(content) > self.max_context_length * 4: # Approximate token limit
|
|
||||||
content = content[:self.max_context_length * 4]
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.debug(f"Could not extract content from {file_info.path}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _create_embedding(self, text: str) -> Optional[np.ndarray]:
|
|
||||||
"""Create embedding for text content."""
|
|
||||||
if not self.enabled or self.model is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Truncate text if needed
|
|
||||||
if len(text) > self.max_context_length * 4:
|
|
||||||
text = text[:self.max_context_length * 4]
|
|
||||||
|
|
||||||
embedding = self.model.encode([text])[0]
|
|
||||||
return embedding
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(f"Failed to create embedding: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _get_content_hash(self, content: str) -> str:
|
|
||||||
"""Get hash of content for caching."""
|
|
||||||
return hashlib.md5(content.encode('utf-8')).hexdigest()
|
|
||||||
|
|
||||||
def _get_embedding_hash(self, embedding: np.ndarray) -> str:
|
|
||||||
"""Get hash of embedding for verification."""
|
|
||||||
return hashlib.md5(embedding.tobytes()).hexdigest()
|
|
||||||
|
|
||||||
def update_embeddings(self, file_index: Dict[str, FileInfo], force_rebuild: bool = False) -> int:
|
|
||||||
"""Update embeddings for files in the index."""
|
|
||||||
if not self.enabled:
|
|
||||||
self.logger.info("Embeddings disabled, skipping update")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
self.logger.info("Updating embeddings...")
|
|
||||||
|
|
||||||
# Load caches
|
|
||||||
embeddings_cache = self._load_embedding_cache()
|
|
||||||
embedding_index = self._load_embedding_index()
|
|
||||||
|
|
||||||
new_embeddings = 0
|
|
||||||
batch_texts = []
|
|
||||||
batch_paths = []
|
|
||||||
|
|
||||||
for file_path, file_info in file_index.items():
|
|
||||||
# Check if embedding exists and is current
|
|
||||||
if not force_rebuild and file_path in embedding_index:
|
|
||||||
cached_info = embedding_index[file_path]
|
|
||||||
if cached_info.content_hash == file_info.content_hash:
|
|
||||||
continue # Embedding is current
|
|
||||||
|
|
||||||
# Extract content
|
|
||||||
content = self._extract_text_content(file_info)
|
|
||||||
if content is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Prepare for batch processing
|
|
||||||
batch_texts.append(content)
|
|
||||||
batch_paths.append(file_path)
|
|
||||||
|
|
||||||
# Process batch when full
|
|
||||||
if len(batch_texts) >= self.batch_size:
|
|
||||||
self._process_batch(batch_texts, batch_paths, file_index, embeddings_cache, embedding_index)
|
|
||||||
new_embeddings += len(batch_texts)
|
|
||||||
batch_texts = []
|
|
||||||
batch_paths = []
|
|
||||||
|
|
||||||
# Process remaining batch
|
|
||||||
if batch_texts:
|
|
||||||
self._process_batch(batch_texts, batch_paths, file_index, embeddings_cache, embedding_index)
|
|
||||||
new_embeddings += len(batch_texts)
|
|
||||||
|
|
||||||
# Save caches
|
|
||||||
self._save_embedding_cache()
|
|
||||||
self._save_embedding_index()
|
|
||||||
|
|
||||||
self.logger.info(f"Updated {new_embeddings} embeddings")
|
|
||||||
return new_embeddings
|
|
||||||
|
|
||||||
def _process_batch(self, texts: List[str], paths: List[str], file_index: Dict[str, FileInfo],
|
|
||||||
embeddings_cache: Dict[str, np.ndarray], embedding_index: Dict[str, EmbeddingInfo]):
|
|
||||||
"""Process a batch of texts for embedding."""
|
|
||||||
try:
|
|
||||||
# Create embeddings for batch
|
|
||||||
embeddings = self.model.encode(texts)
|
|
||||||
|
|
||||||
for i, (text, path) in enumerate(zip(texts, paths)):
|
|
||||||
embedding = embeddings[i]
|
|
||||||
file_info = file_index[path]
|
|
||||||
|
|
||||||
# Store embedding
|
|
||||||
content_hash = self._get_content_hash(text)
|
|
||||||
embedding_hash = self._get_embedding_hash(embedding)
|
|
||||||
|
|
||||||
embeddings_cache[path] = embedding
|
|
||||||
embedding_index[path] = EmbeddingInfo(
|
|
||||||
file_path=path,
|
|
||||||
content_hash=content_hash,
|
|
||||||
embedding_hash=embedding_hash,
|
|
||||||
created_time=time.time(),
|
|
||||||
vector_size=len(embedding)
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to process embedding batch: {e}")
|
|
||||||
|
|
||||||
def find_similar_files(self, query: str, file_index: Dict[str, FileInfo],
|
|
||||||
top_k: int = 20) -> List[SimilarityResult]:
|
|
||||||
"""Find files similar to the query using embeddings."""
|
|
||||||
if not self.enabled:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Create query embedding
|
|
||||||
query_embedding = self._create_embedding(query)
|
|
||||||
if query_embedding is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Load embeddings
|
|
||||||
embeddings_cache = self._load_embedding_cache()
|
|
||||||
if not embeddings_cache:
|
|
||||||
self.logger.warning("No embeddings available for similarity search")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Calculate similarities
|
|
||||||
similarities = []
|
|
||||||
for file_path, file_embedding in embeddings_cache.items():
|
|
||||||
if file_path not in file_index:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Calculate cosine similarity
|
|
||||||
similarity = np.dot(query_embedding, file_embedding) / (
|
|
||||||
np.linalg.norm(query_embedding) * np.linalg.norm(file_embedding)
|
|
||||||
)
|
|
||||||
|
|
||||||
if similarity >= self.similarity_threshold:
|
|
||||||
similarities.append((file_path, similarity))
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.debug(f"Failed to calculate similarity for {file_path}: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Sort by similarity
|
|
||||||
similarities.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
|
|
||||||
# Create results
|
|
||||||
results = []
|
|
||||||
for file_path, similarity in similarities[:top_k]:
|
|
||||||
file_info = file_index[file_path]
|
|
||||||
|
|
||||||
# Extract a snippet of matching content
|
|
||||||
content = self._extract_text_content(file_info)
|
|
||||||
snippet = content[:200] + "..." if content and len(content) > 200 else content or ""
|
|
||||||
|
|
||||||
result = SimilarityResult(
|
|
||||||
file_info=file_info,
|
|
||||||
similarity_score=similarity,
|
|
||||||
matching_content=snippet
|
|
||||||
)
|
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
self.logger.info(f"Found {len(results)} similar files for query")
|
|
||||||
return results
|
|
||||||
|
|
||||||
def get_stats(self) -> Dict[str, Any]:
|
|
||||||
"""Get statistics about the embedding cache."""
|
|
||||||
if not self.enabled:
|
|
||||||
return {'enabled': False}
|
|
||||||
|
|
||||||
embedding_index = self._load_embedding_index()
|
|
||||||
embeddings_cache = self._load_embedding_cache()
|
|
||||||
|
|
||||||
return {
|
|
||||||
'enabled': True,
|
|
||||||
'model_name': self.model_name,
|
|
||||||
'total_embeddings': len(embedding_index),
|
|
||||||
'cache_size_mb': os.path.getsize(self.embeddings_file) / 1024 / 1024 if self.embeddings_file.exists() else 0,
|
|
||||||
'similarity_threshold': self.similarity_threshold,
|
|
||||||
'vector_size': list(embedding_index.values())[0].vector_size if embedding_index else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Command-line interface for embedding manager."""
|
|
||||||
import yaml
|
|
||||||
import argparse
|
|
||||||
from .file_indexer import FileIndexer
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Embedding Manager for UltraThink")
|
|
||||||
parser.add_argument("--config", default="config.yaml", help="Configuration file path")
|
|
||||||
parser.add_argument("--update", action="store_true", help="Update embeddings")
|
|
||||||
parser.add_argument("--rebuild", action="store_true", help="Force rebuild all embeddings")
|
|
||||||
parser.add_argument("--query", help="Search for similar files")
|
|
||||||
parser.add_argument("--stats", action="store_true", help="Show embedding statistics")
|
|
||||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Setup logging
|
|
||||||
level = logging.DEBUG if args.verbose else logging.INFO
|
|
||||||
logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
|
|
||||||
|
|
||||||
# Load configuration
|
|
||||||
config_path = Path(__file__).parent / args.config
|
|
||||||
with open(config_path, 'r', encoding='utf-8') as f:
|
|
||||||
config = yaml.safe_load(f)
|
|
||||||
|
|
||||||
# Create components
|
|
||||||
indexer = FileIndexer(config)
|
|
||||||
embedding_manager = EmbeddingManager(config)
|
|
||||||
|
|
||||||
if not embedding_manager.enabled:
|
|
||||||
print("Embeddings are disabled. Enable in config.yaml or install required dependencies.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Load file index
|
|
||||||
file_index = indexer.load_index()
|
|
||||||
if not file_index:
|
|
||||||
print("Building file index...")
|
|
||||||
file_index = indexer.build_index()
|
|
||||||
|
|
||||||
if args.stats:
|
|
||||||
stats = embedding_manager.get_stats()
|
|
||||||
print("Embedding Statistics:")
|
|
||||||
for key, value in stats.items():
|
|
||||||
print(f" {key}: {value}")
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.update or args.rebuild:
|
|
||||||
count = embedding_manager.update_embeddings(file_index, force_rebuild=args.rebuild)
|
|
||||||
print(f"Updated {count} embeddings")
|
|
||||||
|
|
||||||
if args.query:
|
|
||||||
results = embedding_manager.find_similar_files(args.query, file_index)
|
|
||||||
print(f"Found {len(results)} similar files:")
|
|
||||||
for result in results:
|
|
||||||
print(f" {result.file_info.relative_path} (similarity: {result.similarity_score:.3f})")
|
|
||||||
if args.verbose and result.matching_content:
|
|
||||||
print(f" Content: {result.matching_content[:100]}...")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,383 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
File Indexer Module for UltraThink Path-Aware Analyzer
|
|
||||||
Builds and maintains an index of repository files with metadata.
|
|
||||||
Enhanced with gitignore support and unified configuration.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, List, Optional, Set, Tuple, Union
|
|
||||||
from dataclasses import dataclass, asdict
|
|
||||||
from datetime import datetime
|
|
||||||
import fnmatch
|
|
||||||
|
|
||||||
from .gitignore_parser import GitignoreParser
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class FileInfo:
|
|
||||||
"""Information about a single file in the repository."""
|
|
||||||
path: str
|
|
||||||
relative_path: str
|
|
||||||
size: int
|
|
||||||
modified_time: float
|
|
||||||
extension: str
|
|
||||||
category: str # code, docs, config, web
|
|
||||||
estimated_tokens: int
|
|
||||||
content_hash: str
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict:
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict) -> 'FileInfo':
|
|
||||||
return cls(**data)
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class IndexStats:
|
|
||||||
"""Statistics about the file index."""
|
|
||||||
total_files: int
|
|
||||||
total_tokens: int
|
|
||||||
total_size: int
|
|
||||||
categories: Dict[str, int]
|
|
||||||
last_updated: float
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict:
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
class FileIndexer:
|
|
||||||
"""Builds and maintains an efficient index of repository files."""
|
|
||||||
|
|
||||||
def __init__(self, config: Union['Config', Dict], root_path: str = "."):
|
|
||||||
# Support both Config object and Dict for backward compatibility
|
|
||||||
if hasattr(config, 'to_dict'):
|
|
||||||
self.config_obj = config
|
|
||||||
self.config = config.to_dict()
|
|
||||||
else:
|
|
||||||
self.config_obj = None
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
self.root_path = Path(root_path).resolve()
|
|
||||||
self.cache_dir = Path(self.config.get('embedding', {}).get('cache_dir', '.claude/cache'))
|
|
||||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
self.index_file = self.cache_dir / "file_index.json"
|
|
||||||
|
|
||||||
# Setup logging
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# File extension mappings
|
|
||||||
self.extension_categories = self._build_extension_map()
|
|
||||||
|
|
||||||
# Exclude patterns from config
|
|
||||||
self.exclude_patterns = list(self.config.get('exclude_patterns', []))
|
|
||||||
|
|
||||||
# Initialize gitignore parser and add patterns
|
|
||||||
self.gitignore_parser = GitignoreParser(str(self.root_path))
|
|
||||||
self._load_gitignore_patterns()
|
|
||||||
|
|
||||||
# Performance settings
|
|
||||||
self.max_file_size = self.config.get('performance', {}).get('max_file_size', 10485760)
|
|
||||||
|
|
||||||
def _build_extension_map(self) -> Dict[str, str]:
|
|
||||||
"""Build mapping from file extensions to categories."""
|
|
||||||
ext_map = {}
|
|
||||||
for category, extensions in self.config.get('file_extensions', {}).items():
|
|
||||||
for ext in extensions:
|
|
||||||
ext_map[ext.lower()] = category
|
|
||||||
return ext_map
|
|
||||||
|
|
||||||
def _load_gitignore_patterns(self):
|
|
||||||
"""Load patterns from .gitignore files and add to exclude_patterns."""
|
|
||||||
try:
|
|
||||||
gitignore_patterns = self.gitignore_parser.parse_all_gitignores()
|
|
||||||
|
|
||||||
if gitignore_patterns:
|
|
||||||
# Avoid duplicates
|
|
||||||
existing_patterns = set(self.exclude_patterns)
|
|
||||||
new_patterns = [p for p in gitignore_patterns if p not in existing_patterns]
|
|
||||||
|
|
||||||
self.exclude_patterns.extend(new_patterns)
|
|
||||||
self.logger.info(f"Added {len(new_patterns)} patterns from .gitignore files")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(f"Failed to load .gitignore patterns: {e}")
|
|
||||||
|
|
||||||
def _should_exclude_file(self, file_path: Path) -> bool:
|
|
||||||
"""Check if file should be excluded based on patterns and gitignore rules."""
|
|
||||||
relative_path = str(file_path.relative_to(self.root_path))
|
|
||||||
|
|
||||||
# Check against exclude patterns from config
|
|
||||||
for pattern in self.exclude_patterns:
|
|
||||||
# Convert pattern to work with fnmatch
|
|
||||||
if fnmatch.fnmatch(relative_path, pattern) or fnmatch.fnmatch(str(file_path), pattern):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check if any parent directory matches
|
|
||||||
parts = relative_path.split(os.sep)
|
|
||||||
for i in range(len(parts)):
|
|
||||||
partial_path = "/".join(parts[:i+1])
|
|
||||||
if fnmatch.fnmatch(partial_path, pattern):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Also check gitignore rules using dedicated parser
|
|
||||||
# Note: gitignore patterns are already included in self.exclude_patterns
|
|
||||||
# but we can add additional gitignore-specific checking here if needed
|
|
||||||
try:
|
|
||||||
# The gitignore patterns are already loaded into exclude_patterns,
|
|
||||||
# but we can do additional gitignore-specific checks if needed
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.debug(f"Error in gitignore checking for {file_path}: {e}")
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _estimate_tokens(self, file_path: Path) -> int:
|
|
||||||
"""Estimate token count for a file (chars/4 approximation)."""
|
|
||||||
try:
|
|
||||||
if file_path.stat().st_size > self.max_file_size:
|
|
||||||
return file_path.stat().st_size // 8 # Penalty for large files
|
|
||||||
|
|
||||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
||||||
content = f.read()
|
|
||||||
return len(content) // 4 # Rough approximation
|
|
||||||
except (UnicodeDecodeError, OSError):
|
|
||||||
# Binary files or unreadable files
|
|
||||||
return file_path.stat().st_size // 8
|
|
||||||
|
|
||||||
def _get_file_hash(self, file_path: Path) -> str:
|
|
||||||
"""Get a hash of file metadata for change detection."""
|
|
||||||
stat = file_path.stat()
|
|
||||||
return hashlib.md5(f"{file_path}:{stat.st_size}:{stat.st_mtime}".encode()).hexdigest()
|
|
||||||
|
|
||||||
def _categorize_file(self, file_path: Path) -> str:
|
|
||||||
"""Categorize file based on extension."""
|
|
||||||
extension = file_path.suffix.lower()
|
|
||||||
return self.extension_categories.get(extension, 'other')
|
|
||||||
|
|
||||||
def _scan_file(self, file_path: Path) -> Optional[FileInfo]:
|
|
||||||
"""Scan a single file and create FileInfo."""
|
|
||||||
try:
|
|
||||||
if not file_path.is_file() or self._should_exclude_file(file_path):
|
|
||||||
return None
|
|
||||||
|
|
||||||
stat = file_path.stat()
|
|
||||||
relative_path = str(file_path.relative_to(self.root_path))
|
|
||||||
|
|
||||||
file_info = FileInfo(
|
|
||||||
path=str(file_path),
|
|
||||||
relative_path=relative_path,
|
|
||||||
size=stat.st_size,
|
|
||||||
modified_time=stat.st_mtime,
|
|
||||||
extension=file_path.suffix.lower(),
|
|
||||||
category=self._categorize_file(file_path),
|
|
||||||
estimated_tokens=self._estimate_tokens(file_path),
|
|
||||||
content_hash=self._get_file_hash(file_path)
|
|
||||||
)
|
|
||||||
|
|
||||||
return file_info
|
|
||||||
|
|
||||||
except (OSError, PermissionError) as e:
|
|
||||||
self.logger.warning(f"Could not scan file {file_path}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def build_index(self, force_rebuild: bool = False) -> Dict[str, FileInfo]:
|
|
||||||
"""Build or update the file index."""
|
|
||||||
self.logger.info(f"Building file index for {self.root_path}")
|
|
||||||
|
|
||||||
# Load existing index if available
|
|
||||||
existing_index = {}
|
|
||||||
if not force_rebuild and self.index_file.exists():
|
|
||||||
existing_index = self.load_index()
|
|
||||||
|
|
||||||
new_index = {}
|
|
||||||
changed_files = 0
|
|
||||||
|
|
||||||
# Walk through all files
|
|
||||||
for file_path in self.root_path.rglob('*'):
|
|
||||||
if not file_path.is_file():
|
|
||||||
continue
|
|
||||||
|
|
||||||
file_info = self._scan_file(file_path)
|
|
||||||
if file_info is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check if file has changed
|
|
||||||
relative_path = file_info.relative_path
|
|
||||||
if relative_path in existing_index:
|
|
||||||
old_info = existing_index[relative_path]
|
|
||||||
if old_info.content_hash == file_info.content_hash:
|
|
||||||
# File unchanged, keep old info
|
|
||||||
new_index[relative_path] = old_info
|
|
||||||
continue
|
|
||||||
|
|
||||||
# File is new or changed
|
|
||||||
new_index[relative_path] = file_info
|
|
||||||
changed_files += 1
|
|
||||||
|
|
||||||
self.logger.info(f"Indexed {len(new_index)} files ({changed_files} new/changed)")
|
|
||||||
|
|
||||||
# Save index
|
|
||||||
self.save_index(new_index)
|
|
||||||
|
|
||||||
return new_index
|
|
||||||
|
|
||||||
def load_index(self) -> Dict[str, FileInfo]:
|
|
||||||
"""Load file index from cache."""
|
|
||||||
if not self.index_file.exists():
|
|
||||||
return {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(self.index_file, 'r', encoding='utf-8') as f:
|
|
||||||
data = json.load(f)
|
|
||||||
index = {}
|
|
||||||
for path, info_dict in data.get('files', {}).items():
|
|
||||||
index[path] = FileInfo.from_dict(info_dict)
|
|
||||||
return index
|
|
||||||
except (json.JSONDecodeError, KeyError) as e:
|
|
||||||
self.logger.warning(f"Could not load index: {e}")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def save_index(self, index: Dict[str, FileInfo]) -> None:
|
|
||||||
"""Save file index to cache."""
|
|
||||||
try:
|
|
||||||
# Calculate stats
|
|
||||||
stats = self._calculate_stats(index)
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'stats': stats.to_dict(),
|
|
||||||
'files': {path: info.to_dict() for path, info in index.items()}
|
|
||||||
}
|
|
||||||
|
|
||||||
with open(self.index_file, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(data, f, indent=2)
|
|
||||||
|
|
||||||
except OSError as e:
|
|
||||||
self.logger.error(f"Could not save index: {e}")
|
|
||||||
|
|
||||||
def _calculate_stats(self, index: Dict[str, FileInfo]) -> IndexStats:
|
|
||||||
"""Calculate statistics for the index."""
|
|
||||||
total_files = len(index)
|
|
||||||
total_tokens = sum(info.estimated_tokens for info in index.values())
|
|
||||||
total_size = sum(info.size for info in index.values())
|
|
||||||
|
|
||||||
categories = {}
|
|
||||||
for info in index.values():
|
|
||||||
categories[info.category] = categories.get(info.category, 0) + 1
|
|
||||||
|
|
||||||
return IndexStats(
|
|
||||||
total_files=total_files,
|
|
||||||
total_tokens=total_tokens,
|
|
||||||
total_size=total_size,
|
|
||||||
categories=categories,
|
|
||||||
last_updated=time.time()
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_stats(self) -> Optional[IndexStats]:
|
|
||||||
"""Get statistics about the current index."""
|
|
||||||
if not self.index_file.exists():
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(self.index_file, 'r', encoding='utf-8') as f:
|
|
||||||
data = json.load(f)
|
|
||||||
return IndexStats(**data.get('stats', {}))
|
|
||||||
except (json.JSONDecodeError, KeyError):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def find_files_by_pattern(self, pattern: str, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
|
|
||||||
"""Find files matching a glob pattern."""
|
|
||||||
if index is None:
|
|
||||||
index = self.load_index()
|
|
||||||
|
|
||||||
matching_files = []
|
|
||||||
for path, info in index.items():
|
|
||||||
if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(info.path, pattern):
|
|
||||||
matching_files.append(info)
|
|
||||||
|
|
||||||
return matching_files
|
|
||||||
|
|
||||||
def find_files_by_category(self, category: str, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
|
|
||||||
"""Find files by category (code, docs, config, etc.)."""
|
|
||||||
if index is None:
|
|
||||||
index = self.load_index()
|
|
||||||
|
|
||||||
return [info for info in index.values() if info.category == category]
|
|
||||||
|
|
||||||
def find_files_by_keywords(self, keywords: List[str], index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
|
|
||||||
"""Find files whose paths contain any of the specified keywords."""
|
|
||||||
if index is None:
|
|
||||||
index = self.load_index()
|
|
||||||
|
|
||||||
matching_files = []
|
|
||||||
keywords_lower = [kw.lower() for kw in keywords]
|
|
||||||
|
|
||||||
for info in index.values():
|
|
||||||
path_lower = info.relative_path.lower()
|
|
||||||
if any(keyword in path_lower for keyword in keywords_lower):
|
|
||||||
matching_files.append(info)
|
|
||||||
|
|
||||||
return matching_files
|
|
||||||
|
|
||||||
def get_recent_files(self, limit: int = 20, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
|
|
||||||
"""Get most recently modified files."""
|
|
||||||
if index is None:
|
|
||||||
index = self.load_index()
|
|
||||||
|
|
||||||
files = list(index.values())
|
|
||||||
files.sort(key=lambda f: f.modified_time, reverse=True)
|
|
||||||
return files[:limit]
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Command-line interface for file indexer."""
|
|
||||||
import yaml
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="File Indexer for UltraThink")
|
|
||||||
parser.add_argument("--config", default="config.yaml", help="Configuration file path")
|
|
||||||
parser.add_argument("--rebuild", action="store_true", help="Force rebuild index")
|
|
||||||
parser.add_argument("--stats", action="store_true", help="Show index statistics")
|
|
||||||
parser.add_argument("--pattern", help="Find files matching pattern")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Load configuration
|
|
||||||
config_path = Path(__file__).parent / args.config
|
|
||||||
with open(config_path, 'r', encoding='utf-8') as f:
|
|
||||||
config = yaml.safe_load(f)
|
|
||||||
|
|
||||||
# Setup logging
|
|
||||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
|
||||||
|
|
||||||
# Create indexer
|
|
||||||
indexer = FileIndexer(config)
|
|
||||||
|
|
||||||
if args.stats:
|
|
||||||
stats = indexer.get_stats()
|
|
||||||
if stats:
|
|
||||||
print(f"Total files: {stats.total_files}")
|
|
||||||
print(f"Total tokens: {stats.total_tokens:,}")
|
|
||||||
print(f"Total size: {stats.total_size:,} bytes")
|
|
||||||
print(f"Categories: {stats.categories}")
|
|
||||||
print(f"Last updated: {datetime.fromtimestamp(stats.last_updated)}")
|
|
||||||
else:
|
|
||||||
print("No index found. Run without --stats to build index.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Build index
|
|
||||||
index = indexer.build_index(force_rebuild=args.rebuild)
|
|
||||||
|
|
||||||
if args.pattern:
|
|
||||||
files = indexer.find_files_by_pattern(args.pattern, index)
|
|
||||||
print(f"Found {len(files)} files matching pattern '{args.pattern}':")
|
|
||||||
for file_info in files[:20]: # Limit output
|
|
||||||
print(f" {file_info.relative_path}")
|
|
||||||
else:
|
|
||||||
stats = indexer._calculate_stats(index)
|
|
||||||
print(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,182 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
GitIgnore Parser Module
|
|
||||||
Parses .gitignore files and converts rules to fnmatch patterns for file exclusion.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import fnmatch
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import List, Set, Optional
|
|
||||||
|
|
||||||
|
|
||||||
class GitignoreParser:
|
|
||||||
"""Parser for .gitignore files that converts rules to fnmatch patterns."""
|
|
||||||
|
|
||||||
def __init__(self, root_path: str = "."):
|
|
||||||
self.root_path = Path(root_path).resolve()
|
|
||||||
self.patterns: List[str] = []
|
|
||||||
self.negation_patterns: List[str] = []
|
|
||||||
|
|
||||||
def parse_file(self, gitignore_path: str) -> List[str]:
|
|
||||||
"""Parse a .gitignore file and return exclude patterns."""
|
|
||||||
gitignore_file = Path(gitignore_path)
|
|
||||||
if not gitignore_file.exists():
|
|
||||||
return []
|
|
||||||
|
|
||||||
patterns = []
|
|
||||||
try:
|
|
||||||
with open(gitignore_file, 'r', encoding='utf-8') as f:
|
|
||||||
for line_num, line in enumerate(f, 1):
|
|
||||||
pattern = self._parse_line(line.strip())
|
|
||||||
if pattern:
|
|
||||||
patterns.append(pattern)
|
|
||||||
except (UnicodeDecodeError, IOError):
|
|
||||||
# Fallback to system encoding if UTF-8 fails
|
|
||||||
try:
|
|
||||||
with open(gitignore_file, 'r') as f:
|
|
||||||
for line_num, line in enumerate(f, 1):
|
|
||||||
pattern = self._parse_line(line.strip())
|
|
||||||
if pattern:
|
|
||||||
patterns.append(pattern)
|
|
||||||
except IOError:
|
|
||||||
# If file can't be read, return empty list
|
|
||||||
return []
|
|
||||||
|
|
||||||
return patterns
|
|
||||||
|
|
||||||
def _parse_line(self, line: str) -> Optional[str]:
|
|
||||||
"""Parse a single line from .gitignore file."""
|
|
||||||
# Skip empty lines and comments
|
|
||||||
if not line or line.startswith('#'):
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Handle negation patterns (starting with !)
|
|
||||||
if line.startswith('!'):
|
|
||||||
# For now, we'll skip negation patterns as they require
|
|
||||||
# more complex logic to implement correctly
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Convert gitignore pattern to fnmatch pattern
|
|
||||||
return self._convert_to_fnmatch(line)
|
|
||||||
|
|
||||||
def _convert_to_fnmatch(self, pattern: str) -> str:
|
|
||||||
"""Convert gitignore pattern to fnmatch pattern."""
|
|
||||||
# Remove trailing slash (directory indicator)
|
|
||||||
if pattern.endswith('/'):
|
|
||||||
pattern = pattern[:-1]
|
|
||||||
|
|
||||||
# Handle absolute paths (starting with /)
|
|
||||||
if pattern.startswith('/'):
|
|
||||||
pattern = pattern[1:]
|
|
||||||
# Make it match from root
|
|
||||||
return pattern
|
|
||||||
|
|
||||||
# Handle patterns that should match anywhere in the tree
|
|
||||||
# If pattern doesn't contain '/', it matches files/dirs at any level
|
|
||||||
if '/' not in pattern:
|
|
||||||
return f"*/{pattern}"
|
|
||||||
|
|
||||||
# Pattern contains '/', so it's relative to the gitignore location
|
|
||||||
return pattern
|
|
||||||
|
|
||||||
def parse_all_gitignores(self, root_path: Optional[str] = None) -> List[str]:
|
|
||||||
"""Parse all .gitignore files in the repository hierarchy."""
|
|
||||||
if root_path:
|
|
||||||
self.root_path = Path(root_path).resolve()
|
|
||||||
|
|
||||||
all_patterns = []
|
|
||||||
|
|
||||||
# Find all .gitignore files in the repository
|
|
||||||
gitignore_files = self._find_gitignore_files()
|
|
||||||
|
|
||||||
for gitignore_file in gitignore_files:
|
|
||||||
patterns = self.parse_file(gitignore_file)
|
|
||||||
all_patterns.extend(patterns)
|
|
||||||
|
|
||||||
return all_patterns
|
|
||||||
|
|
||||||
def _find_gitignore_files(self) -> List[Path]:
|
|
||||||
"""Find all .gitignore files in the repository."""
|
|
||||||
gitignore_files = []
|
|
||||||
|
|
||||||
# Start with root .gitignore
|
|
||||||
root_gitignore = self.root_path / '.gitignore'
|
|
||||||
if root_gitignore.exists():
|
|
||||||
gitignore_files.append(root_gitignore)
|
|
||||||
|
|
||||||
# Find .gitignore files in subdirectories
|
|
||||||
try:
|
|
||||||
for gitignore_file in self.root_path.rglob('.gitignore'):
|
|
||||||
if gitignore_file != root_gitignore:
|
|
||||||
gitignore_files.append(gitignore_file)
|
|
||||||
except (PermissionError, OSError):
|
|
||||||
# Skip directories we can't access
|
|
||||||
pass
|
|
||||||
|
|
||||||
return gitignore_files
|
|
||||||
|
|
||||||
def should_exclude(self, file_path: str, gitignore_patterns: List[str]) -> bool:
|
|
||||||
"""Check if a file should be excluded based on gitignore patterns."""
|
|
||||||
# Convert to relative path from root
|
|
||||||
try:
|
|
||||||
rel_path = str(Path(file_path).relative_to(self.root_path))
|
|
||||||
except ValueError:
|
|
||||||
# File is not under root path
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Normalize path separators for consistent matching
|
|
||||||
rel_path = rel_path.replace(os.sep, '/')
|
|
||||||
|
|
||||||
for pattern in gitignore_patterns:
|
|
||||||
if self._matches_pattern(rel_path, pattern):
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _matches_pattern(self, file_path: str, pattern: str) -> bool:
|
|
||||||
"""Check if a file path matches a gitignore pattern."""
|
|
||||||
# Normalize pattern separators
|
|
||||||
pattern = pattern.replace(os.sep, '/')
|
|
||||||
|
|
||||||
# Handle different pattern types
|
|
||||||
if pattern.startswith('*/'):
|
|
||||||
# Pattern like */pattern - matches at any level
|
|
||||||
sub_pattern = pattern[2:]
|
|
||||||
return fnmatch.fnmatch(file_path, f"*/{sub_pattern}") or fnmatch.fnmatch(file_path, sub_pattern)
|
|
||||||
elif '/' in pattern:
|
|
||||||
# Pattern contains slash - match exact path
|
|
||||||
return fnmatch.fnmatch(file_path, pattern)
|
|
||||||
else:
|
|
||||||
# Simple pattern - match filename or directory at any level
|
|
||||||
parts = file_path.split('/')
|
|
||||||
return any(fnmatch.fnmatch(part, pattern) for part in parts)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_gitignore(gitignore_path: str) -> List[str]:
|
|
||||||
"""Convenience function to parse a single .gitignore file."""
|
|
||||||
parser = GitignoreParser()
|
|
||||||
return parser.parse_file(gitignore_path)
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_gitignore_patterns(root_path: str = ".") -> List[str]:
|
|
||||||
"""Convenience function to get all gitignore patterns in a repository."""
|
|
||||||
parser = GitignoreParser(root_path)
|
|
||||||
return parser.parse_all_gitignores()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if len(sys.argv) > 1:
|
|
||||||
gitignore_path = sys.argv[1]
|
|
||||||
patterns = parse_gitignore(gitignore_path)
|
|
||||||
print(f"Parsed {len(patterns)} patterns from {gitignore_path}:")
|
|
||||||
for pattern in patterns:
|
|
||||||
print(f" {pattern}")
|
|
||||||
else:
|
|
||||||
# Parse all .gitignore files in current directory
|
|
||||||
patterns = get_all_gitignore_patterns()
|
|
||||||
print(f"Found {len(patterns)} gitignore patterns:")
|
|
||||||
for pattern in patterns:
|
|
||||||
print(f" {pattern}")
|
|
||||||
@@ -1,500 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Path Matcher Module for UltraThink Path-Aware Analyzer
|
|
||||||
Matches files to analysis context and ranks them by relevance.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
import logging
|
|
||||||
import fnmatch
|
|
||||||
from typing import Dict, List, Tuple, Optional, Set
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import Path
|
|
||||||
import math
|
|
||||||
|
|
||||||
from .file_indexer import FileInfo
|
|
||||||
from .context_analyzer import AnalysisResult
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class MatchResult:
|
|
||||||
"""Result of path matching with relevance score."""
|
|
||||||
file_info: FileInfo
|
|
||||||
relevance_score: float
|
|
||||||
match_reasons: List[str]
|
|
||||||
category_bonus: float
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class PathMatchingResult:
|
|
||||||
"""Complete result of path matching operation."""
|
|
||||||
matched_files: List[MatchResult]
|
|
||||||
total_tokens: int
|
|
||||||
categories: Dict[str, int]
|
|
||||||
patterns_used: List[str]
|
|
||||||
confidence_score: float
|
|
||||||
|
|
||||||
class PathMatcher:
|
|
||||||
"""Matches files to analysis context using various algorithms."""
|
|
||||||
|
|
||||||
def __init__(self, config: Dict):
|
|
||||||
self.config = config
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Load scoring weights
|
|
||||||
self.weights = config.get('path_matching', {}).get('weights', {
|
|
||||||
'keyword_match': 0.4,
|
|
||||||
'extension_match': 0.2,
|
|
||||||
'directory_context': 0.2,
|
|
||||||
'file_size_penalty': 0.1,
|
|
||||||
'recency_bonus': 0.1
|
|
||||||
})
|
|
||||||
|
|
||||||
# Load limits
|
|
||||||
self.max_files_per_category = config.get('path_matching', {}).get('max_files_per_category', 20)
|
|
||||||
self.min_relevance_score = config.get('path_matching', {}).get('min_relevance_score', 0.1)
|
|
||||||
self.max_total_files = config.get('output', {}).get('max_total_files', 50)
|
|
||||||
|
|
||||||
# Load always include patterns
|
|
||||||
self.always_include = config.get('output', {}).get('always_include', [])
|
|
||||||
|
|
||||||
# Category priorities
|
|
||||||
self.category_priorities = {
|
|
||||||
'code': 1.0,
|
|
||||||
'config': 0.8,
|
|
||||||
'docs': 0.6,
|
|
||||||
'web': 0.4,
|
|
||||||
'other': 0.2
|
|
||||||
}
|
|
||||||
|
|
||||||
def _calculate_keyword_score(self, file_info: FileInfo, keywords: List[str]) -> Tuple[float, List[str]]:
|
|
||||||
"""Calculate score based on keyword matches in file path."""
|
|
||||||
if not keywords:
|
|
||||||
return 0.0, []
|
|
||||||
|
|
||||||
path_lower = file_info.relative_path.lower()
|
|
||||||
filename_lower = Path(file_info.relative_path).name.lower()
|
|
||||||
|
|
||||||
matches = []
|
|
||||||
score = 0.0
|
|
||||||
|
|
||||||
for keyword in keywords:
|
|
||||||
keyword_lower = keyword.lower()
|
|
||||||
|
|
||||||
# Exact filename match (highest weight)
|
|
||||||
if keyword_lower in filename_lower:
|
|
||||||
score += 2.0
|
|
||||||
matches.append(f"filename:{keyword}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Directory name match
|
|
||||||
if keyword_lower in path_lower:
|
|
||||||
score += 1.0
|
|
||||||
matches.append(f"path:{keyword}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Partial match in path components
|
|
||||||
path_parts = path_lower.split('/')
|
|
||||||
for part in path_parts:
|
|
||||||
if keyword_lower in part:
|
|
||||||
score += 0.5
|
|
||||||
matches.append(f"partial:{keyword}")
|
|
||||||
break
|
|
||||||
|
|
||||||
# Normalize by number of keywords
|
|
||||||
normalized_score = score / len(keywords) if keywords else 0.0
|
|
||||||
return min(normalized_score, 1.0), matches
|
|
||||||
|
|
||||||
def _calculate_extension_score(self, file_info: FileInfo, languages: List[str]) -> float:
|
|
||||||
"""Calculate score based on file extension relevance."""
|
|
||||||
if not languages:
|
|
||||||
return 0.5 # Neutral score
|
|
||||||
|
|
||||||
extension = file_info.extension.lower()
|
|
||||||
|
|
||||||
# Language-specific extension mapping
|
|
||||||
lang_extensions = {
|
|
||||||
'python': ['.py', '.pyx', '.pyi'],
|
|
||||||
'javascript': ['.js', '.jsx', '.mjs'],
|
|
||||||
'typescript': ['.ts', '.tsx'],
|
|
||||||
'java': ['.java'],
|
|
||||||
'go': ['.go'],
|
|
||||||
'rust': ['.rs'],
|
|
||||||
'cpp': ['.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'],
|
|
||||||
'csharp': ['.cs'],
|
|
||||||
'php': ['.php'],
|
|
||||||
'ruby': ['.rb'],
|
|
||||||
'shell': ['.sh', '.bash', '.zsh']
|
|
||||||
}
|
|
||||||
|
|
||||||
score = 0.0
|
|
||||||
for language in languages:
|
|
||||||
if language in lang_extensions:
|
|
||||||
if extension in lang_extensions[language]:
|
|
||||||
score = 1.0
|
|
||||||
break
|
|
||||||
|
|
||||||
# Fallback to category-based scoring
|
|
||||||
if score == 0.0:
|
|
||||||
category_scores = {
|
|
||||||
'code': 1.0,
|
|
||||||
'config': 0.8,
|
|
||||||
'docs': 0.6,
|
|
||||||
'web': 0.4,
|
|
||||||
'other': 0.2
|
|
||||||
}
|
|
||||||
score = category_scores.get(file_info.category, 0.2)
|
|
||||||
|
|
||||||
return score
|
|
||||||
|
|
||||||
def _calculate_directory_score(self, file_info: FileInfo, domains: List[str]) -> Tuple[float, List[str]]:
|
|
||||||
"""Calculate score based on directory context."""
|
|
||||||
if not domains:
|
|
||||||
return 0.0, []
|
|
||||||
|
|
||||||
path_parts = file_info.relative_path.lower().split('/')
|
|
||||||
matches = []
|
|
||||||
score = 0.0
|
|
||||||
|
|
||||||
# Domain-specific directory patterns
|
|
||||||
domain_patterns = {
|
|
||||||
'auth': ['auth', 'authentication', 'login', 'user', 'account'],
|
|
||||||
'authentication': ['auth', 'authentication', 'login', 'user', 'account'],
|
|
||||||
'database': ['db', 'database', 'model', 'entity', 'migration', 'schema'],
|
|
||||||
'api': ['api', 'rest', 'graphql', 'route', 'controller', 'handler'],
|
|
||||||
'frontend': ['ui', 'component', 'view', 'template', 'client', 'web'],
|
|
||||||
'backend': ['service', 'server', 'core', 'business', 'logic'],
|
|
||||||
'test': ['test', 'spec', 'tests', '__tests__', 'testing'],
|
|
||||||
'testing': ['test', 'spec', 'tests', '__tests__', 'testing'],
|
|
||||||
'config': ['config', 'configuration', 'env', 'settings'],
|
|
||||||
'configuration': ['config', 'configuration', 'env', 'settings'],
|
|
||||||
'util': ['util', 'utils', 'helper', 'common', 'shared', 'lib'],
|
|
||||||
'utility': ['util', 'utils', 'helper', 'common', 'shared', 'lib']
|
|
||||||
}
|
|
||||||
|
|
||||||
for domain in domains:
|
|
||||||
if domain in domain_patterns:
|
|
||||||
patterns = domain_patterns[domain]
|
|
||||||
for pattern in patterns:
|
|
||||||
for part in path_parts:
|
|
||||||
if pattern in part:
|
|
||||||
score += 1.0
|
|
||||||
matches.append(f"dir:{domain}->{pattern}")
|
|
||||||
break
|
|
||||||
|
|
||||||
# Normalize by number of domains
|
|
||||||
normalized_score = score / len(domains) if domains else 0.0
|
|
||||||
return min(normalized_score, 1.0), matches
|
|
||||||
|
|
||||||
def _calculate_size_penalty(self, file_info: FileInfo) -> float:
|
|
||||||
"""Calculate penalty for very large files."""
|
|
||||||
max_size = self.config.get('performance', {}).get('max_file_size', 10485760) # 10MB
|
|
||||||
|
|
||||||
if file_info.size > max_size:
|
|
||||||
# Heavy penalty for oversized files
|
|
||||||
return -0.5
|
|
||||||
elif file_info.size > max_size * 0.5:
|
|
||||||
# Light penalty for large files
|
|
||||||
return -0.2
|
|
||||||
else:
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
def _calculate_recency_bonus(self, file_info: FileInfo) -> float:
|
|
||||||
"""Calculate bonus for recently modified files."""
|
|
||||||
import time
|
|
||||||
|
|
||||||
current_time = time.time()
|
|
||||||
file_age = current_time - file_info.modified_time
|
|
||||||
|
|
||||||
# Files modified in last day get bonus
|
|
||||||
if file_age < 86400: # 1 day
|
|
||||||
return 0.3
|
|
||||||
elif file_age < 604800: # 1 week
|
|
||||||
return 0.1
|
|
||||||
else:
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
def calculate_relevance_score(self, file_info: FileInfo, analysis: AnalysisResult) -> MatchResult:
|
|
||||||
"""Calculate overall relevance score for a file."""
|
|
||||||
# Calculate individual scores
|
|
||||||
keyword_score, keyword_matches = self._calculate_keyword_score(file_info, analysis.keywords)
|
|
||||||
extension_score = self._calculate_extension_score(file_info, analysis.languages)
|
|
||||||
directory_score, dir_matches = self._calculate_directory_score(file_info, analysis.domains)
|
|
||||||
size_penalty = self._calculate_size_penalty(file_info)
|
|
||||||
recency_bonus = self._calculate_recency_bonus(file_info)
|
|
||||||
|
|
||||||
# Apply weights
|
|
||||||
weighted_score = (
|
|
||||||
keyword_score * self.weights.get('keyword_match', 0.4) +
|
|
||||||
extension_score * self.weights.get('extension_match', 0.2) +
|
|
||||||
directory_score * self.weights.get('directory_context', 0.2) +
|
|
||||||
size_penalty * self.weights.get('file_size_penalty', 0.1) +
|
|
||||||
recency_bonus * self.weights.get('recency_bonus', 0.1)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Category bonus
|
|
||||||
category_bonus = self.category_priorities.get(file_info.category, 0.2)
|
|
||||||
|
|
||||||
# Final score with category bonus
|
|
||||||
final_score = weighted_score + (category_bonus * 0.1)
|
|
||||||
|
|
||||||
# Collect match reasons
|
|
||||||
match_reasons = keyword_matches + dir_matches
|
|
||||||
if extension_score > 0.5:
|
|
||||||
match_reasons.append(f"extension:{file_info.extension}")
|
|
||||||
if recency_bonus > 0:
|
|
||||||
match_reasons.append("recent")
|
|
||||||
|
|
||||||
return MatchResult(
|
|
||||||
file_info=file_info,
|
|
||||||
relevance_score=max(0.0, final_score),
|
|
||||||
match_reasons=match_reasons,
|
|
||||||
category_bonus=category_bonus
|
|
||||||
)
|
|
||||||
|
|
||||||
def match_by_patterns(self, file_index: Dict[str, FileInfo], patterns: List[str]) -> List[FileInfo]:
|
|
||||||
"""Match files using explicit glob patterns."""
|
|
||||||
matched_files = []
|
|
||||||
|
|
||||||
for pattern in patterns:
|
|
||||||
for path, file_info in file_index.items():
|
|
||||||
# Try matching both relative path and full path
|
|
||||||
if (fnmatch.fnmatch(path, pattern) or
|
|
||||||
fnmatch.fnmatch(file_info.path, pattern) or
|
|
||||||
fnmatch.fnmatch(Path(path).name, pattern)):
|
|
||||||
matched_files.append(file_info)
|
|
||||||
|
|
||||||
# Remove duplicates based on path
|
|
||||||
seen_paths = set()
|
|
||||||
unique_files = []
|
|
||||||
for file_info in matched_files:
|
|
||||||
if file_info.relative_path not in seen_paths:
|
|
||||||
seen_paths.add(file_info.relative_path)
|
|
||||||
unique_files.append(file_info)
|
|
||||||
return unique_files
|
|
||||||
|
|
||||||
def match_always_include(self, file_index: Dict[str, FileInfo]) -> List[FileInfo]:
|
|
||||||
"""Match files that should always be included."""
|
|
||||||
return self.match_by_patterns(file_index, self.always_include)
|
|
||||||
|
|
||||||
def rank_files(self, files: List[FileInfo], analysis: AnalysisResult) -> List[MatchResult]:
|
|
||||||
"""Rank files by relevance score."""
|
|
||||||
match_results = []
|
|
||||||
|
|
||||||
for file_info in files:
|
|
||||||
match_result = self.calculate_relevance_score(file_info, analysis)
|
|
||||||
if match_result.relevance_score >= self.min_relevance_score:
|
|
||||||
match_results.append(match_result)
|
|
||||||
|
|
||||||
# Sort by relevance score (descending)
|
|
||||||
match_results.sort(key=lambda x: x.relevance_score, reverse=True)
|
|
||||||
|
|
||||||
return match_results
|
|
||||||
|
|
||||||
def select_best_files(self, ranked_files: List[MatchResult], token_limit: Optional[int] = None) -> List[MatchResult]:
|
|
||||||
"""Select the best files within token limits and category constraints."""
|
|
||||||
if not ranked_files:
|
|
||||||
return []
|
|
||||||
|
|
||||||
selected_files = []
|
|
||||||
total_tokens = 0
|
|
||||||
category_counts = {}
|
|
||||||
|
|
||||||
for match_result in ranked_files:
|
|
||||||
file_info = match_result.file_info
|
|
||||||
category = file_info.category
|
|
||||||
|
|
||||||
# Check category limit
|
|
||||||
if category_counts.get(category, 0) >= self.max_files_per_category:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check token limit
|
|
||||||
if token_limit and total_tokens + file_info.estimated_tokens > token_limit:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check total file limit
|
|
||||||
if len(selected_files) >= self.max_total_files:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Add file
|
|
||||||
selected_files.append(match_result)
|
|
||||||
total_tokens += file_info.estimated_tokens
|
|
||||||
category_counts[category] = category_counts.get(category, 0) + 1
|
|
||||||
|
|
||||||
return selected_files
|
|
||||||
|
|
||||||
def match_files(self, file_index: Dict[str, FileInfo], analysis: AnalysisResult,
|
|
||||||
token_limit: Optional[int] = None, explicit_patterns: Optional[List[str]] = None) -> PathMatchingResult:
|
|
||||||
"""Main file matching function."""
|
|
||||||
self.logger.info(f"Matching files for analysis with {len(analysis.keywords)} keywords and {len(analysis.domains)} domains")
|
|
||||||
|
|
||||||
# Start with always-include files
|
|
||||||
always_include_files = self.match_always_include(file_index)
|
|
||||||
self.logger.debug(f"Always include: {len(always_include_files)} files")
|
|
||||||
|
|
||||||
# Add explicit pattern matches
|
|
||||||
pattern_files = []
|
|
||||||
patterns_used = []
|
|
||||||
if explicit_patterns:
|
|
||||||
pattern_files = self.match_by_patterns(file_index, explicit_patterns)
|
|
||||||
patterns_used.extend(explicit_patterns)
|
|
||||||
self.logger.debug(f"Explicit patterns: {len(pattern_files)} files")
|
|
||||||
|
|
||||||
# Add suggested pattern matches
|
|
||||||
if analysis.file_patterns:
|
|
||||||
suggested_files = self.match_by_patterns(file_index, analysis.file_patterns)
|
|
||||||
pattern_files.extend(suggested_files)
|
|
||||||
patterns_used.extend(analysis.file_patterns)
|
|
||||||
self.logger.debug(f"Suggested patterns: {len(suggested_files)} files")
|
|
||||||
|
|
||||||
# Combine all candidate files and remove duplicates
|
|
||||||
all_files = always_include_files + pattern_files + list(file_index.values())
|
|
||||||
seen_paths = set()
|
|
||||||
all_candidates = []
|
|
||||||
for file_info in all_files:
|
|
||||||
if file_info.relative_path not in seen_paths:
|
|
||||||
seen_paths.add(file_info.relative_path)
|
|
||||||
all_candidates.append(file_info)
|
|
||||||
self.logger.debug(f"Total candidates: {len(all_candidates)} files")
|
|
||||||
|
|
||||||
# Rank all candidates
|
|
||||||
ranked_files = self.rank_files(all_candidates, analysis)
|
|
||||||
self.logger.debug(f"Files above threshold: {len(ranked_files)}")
|
|
||||||
|
|
||||||
# Select best files within limits
|
|
||||||
selected_files = self.select_best_files(ranked_files, token_limit)
|
|
||||||
self.logger.info(f"Selected {len(selected_files)} files")
|
|
||||||
|
|
||||||
# Calculate statistics
|
|
||||||
total_tokens = sum(match.file_info.estimated_tokens for match in selected_files)
|
|
||||||
categories = {}
|
|
||||||
for match in selected_files:
|
|
||||||
category = match.file_info.category
|
|
||||||
categories[category] = categories.get(category, 0) + 1
|
|
||||||
|
|
||||||
# Calculate confidence score
|
|
||||||
confidence_score = self._calculate_confidence(selected_files, analysis)
|
|
||||||
|
|
||||||
return PathMatchingResult(
|
|
||||||
matched_files=selected_files,
|
|
||||||
total_tokens=total_tokens,
|
|
||||||
categories=categories,
|
|
||||||
patterns_used=patterns_used,
|
|
||||||
confidence_score=confidence_score
|
|
||||||
)
|
|
||||||
|
|
||||||
def _calculate_confidence(self, selected_files: List[MatchResult], analysis: AnalysisResult) -> float:
|
|
||||||
"""Calculate confidence score for the matching result."""
|
|
||||||
if not selected_files:
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
# Average relevance score
|
|
||||||
avg_relevance = sum(match.relevance_score for match in selected_files) / len(selected_files)
|
|
||||||
|
|
||||||
# Keyword coverage (how many keywords are represented)
|
|
||||||
keyword_coverage = 0.0
|
|
||||||
if analysis.keywords:
|
|
||||||
covered_keywords = set()
|
|
||||||
for match in selected_files:
|
|
||||||
for reason in match.match_reasons:
|
|
||||||
if reason.startswith('filename:') or reason.startswith('path:'):
|
|
||||||
keyword = reason.split(':', 1)[1]
|
|
||||||
covered_keywords.add(keyword)
|
|
||||||
keyword_coverage = len(covered_keywords) / len(analysis.keywords)
|
|
||||||
|
|
||||||
# Domain coverage
|
|
||||||
domain_coverage = 0.0
|
|
||||||
if analysis.domains:
|
|
||||||
covered_domains = set()
|
|
||||||
for match in selected_files:
|
|
||||||
for reason in match.match_reasons:
|
|
||||||
if reason.startswith('dir:'):
|
|
||||||
domain = reason.split('->', 1)[0].split(':', 1)[1]
|
|
||||||
covered_domains.add(domain)
|
|
||||||
domain_coverage = len(covered_domains) / len(analysis.domains)
|
|
||||||
|
|
||||||
# Weighted confidence score
|
|
||||||
confidence = (
|
|
||||||
avg_relevance * 0.5 +
|
|
||||||
keyword_coverage * 0.3 +
|
|
||||||
domain_coverage * 0.2
|
|
||||||
)
|
|
||||||
|
|
||||||
return min(confidence, 1.0)
|
|
||||||
|
|
||||||
def format_patterns(self, selected_files: List[MatchResult]) -> List[str]:
|
|
||||||
"""Format selected files as @{pattern} strings."""
|
|
||||||
pattern_format = self.config.get('output', {}).get('pattern_format', '@{{{path}}}')
|
|
||||||
|
|
||||||
patterns = []
|
|
||||||
for match in selected_files:
|
|
||||||
pattern = pattern_format.format(path=match.file_info.relative_path)
|
|
||||||
patterns.append(pattern)
|
|
||||||
|
|
||||||
return patterns
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Command-line interface for path matcher."""
|
|
||||||
import yaml
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
from .file_indexer import FileIndexer
|
|
||||||
from .context_analyzer import ContextAnalyzer
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Path Matcher for UltraThink")
|
|
||||||
parser.add_argument("prompt", help="Prompt to analyze and match")
|
|
||||||
parser.add_argument("--config", default="config.yaml", help="Configuration file path")
|
|
||||||
parser.add_argument("--token-limit", type=int, help="Token limit for selection")
|
|
||||||
parser.add_argument("--patterns", nargs="*", help="Explicit patterns to include")
|
|
||||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Setup logging
|
|
||||||
level = logging.DEBUG if args.verbose else logging.INFO
|
|
||||||
logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
|
|
||||||
|
|
||||||
# Load configuration
|
|
||||||
config_path = Path(__file__).parent / args.config
|
|
||||||
with open(config_path, 'r', encoding='utf-8') as f:
|
|
||||||
config = yaml.safe_load(f)
|
|
||||||
|
|
||||||
# Create components
|
|
||||||
indexer = FileIndexer(config)
|
|
||||||
analyzer = ContextAnalyzer(config)
|
|
||||||
matcher = PathMatcher(config)
|
|
||||||
|
|
||||||
# Build file index
|
|
||||||
file_index = indexer.load_index()
|
|
||||||
if not file_index:
|
|
||||||
print("Building file index...")
|
|
||||||
file_index = indexer.build_index()
|
|
||||||
|
|
||||||
# Analyze prompt
|
|
||||||
analysis = analyzer.analyze(args.prompt)
|
|
||||||
|
|
||||||
# Match files
|
|
||||||
result = matcher.match_files(
|
|
||||||
file_index=file_index,
|
|
||||||
analysis=analysis,
|
|
||||||
token_limit=args.token_limit,
|
|
||||||
explicit_patterns=args.patterns
|
|
||||||
)
|
|
||||||
|
|
||||||
# Output results
|
|
||||||
print(f"Matched {len(result.matched_files)} files (~{result.total_tokens:,} tokens)")
|
|
||||||
print(f"Categories: {result.categories}")
|
|
||||||
print(f"Confidence: {result.confidence_score:.2f}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
patterns = matcher.format_patterns(result.matched_files)
|
|
||||||
print("Patterns:")
|
|
||||||
for pattern in patterns[:20]: # Limit output
|
|
||||||
print(f" {pattern}")
|
|
||||||
|
|
||||||
if args.verbose:
|
|
||||||
print("\nDetailed matches:")
|
|
||||||
for match in result.matched_files[:10]:
|
|
||||||
print(f" {match.file_info.relative_path} (score: {match.relevance_score:.3f})")
|
|
||||||
print(f" Reasons: {', '.join(match.match_reasons)}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,204 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
File Structure Indexer
|
|
||||||
Builds and maintains file indices for intelligent analysis.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, List, Optional, Any
|
|
||||||
|
|
||||||
# Add current directory to path for imports
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent))
|
|
||||||
|
|
||||||
from core.config import get_config
|
|
||||||
from core.file_indexer import FileIndexer, IndexStats
|
|
||||||
from core.embedding_manager import EmbeddingManager
|
|
||||||
from utils.colors import Colors
|
|
||||||
|
|
||||||
|
|
||||||
class ProjectIndexer:
|
|
||||||
"""Manages file indexing and project statistics."""
|
|
||||||
|
|
||||||
def __init__(self, config_path: Optional[str] = None, root_path: str = "."):
|
|
||||||
self.root_path = Path(root_path).resolve()
|
|
||||||
self.config = get_config(config_path)
|
|
||||||
|
|
||||||
# Setup logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=getattr(logging, self.config.get('logging.level', 'INFO')),
|
|
||||||
format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
||||||
)
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Initialize core components
|
|
||||||
self.indexer = FileIndexer(self.config, str(self.root_path))
|
|
||||||
|
|
||||||
# Initialize embedding manager if enabled
|
|
||||||
self.embedding_manager = None
|
|
||||||
if self.config.is_embedding_enabled():
|
|
||||||
try:
|
|
||||||
self.embedding_manager = EmbeddingManager(self.config)
|
|
||||||
except ImportError:
|
|
||||||
self.logger.warning("Embedding dependencies not available. Install sentence-transformers for enhanced functionality.")
|
|
||||||
|
|
||||||
def build_index(self) -> IndexStats:
|
|
||||||
"""Build or update the file index."""
|
|
||||||
print(Colors.yellow("Building file index..."))
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
self.indexer.build_index()
|
|
||||||
stats = self.indexer.get_stats()
|
|
||||||
|
|
||||||
elapsed = time.time() - start_time
|
|
||||||
if stats:
|
|
||||||
print(Colors.green(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens ({elapsed:.2f}s)"))
|
|
||||||
else:
|
|
||||||
print(Colors.green(f"Index built successfully ({elapsed:.2f}s)"))
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
def update_embeddings(self) -> bool:
|
|
||||||
"""Update embeddings for semantic similarity."""
|
|
||||||
if not self.embedding_manager:
|
|
||||||
print(Colors.error("Embedding functionality not available"))
|
|
||||||
return False
|
|
||||||
|
|
||||||
print(Colors.yellow("Updating embeddings..."))
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# Load file index
|
|
||||||
index = self.indexer.load_index()
|
|
||||||
if not index:
|
|
||||||
print(Colors.warning("No file index found. Building index first..."))
|
|
||||||
self.build_index()
|
|
||||||
index = self.indexer.load_index()
|
|
||||||
|
|
||||||
try:
|
|
||||||
count = self.embedding_manager.update_embeddings(index)
|
|
||||||
elapsed = time.time() - start_time
|
|
||||||
print(Colors.green(f"Updated {count} embeddings ({elapsed:.2f}s)"))
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
print(Colors.error(f"Failed to update embeddings: {e}"))
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get_project_stats(self) -> Dict[str, Any]:
|
|
||||||
"""Get comprehensive project statistics."""
|
|
||||||
stats = self.indexer.get_stats()
|
|
||||||
embedding_stats = {}
|
|
||||||
|
|
||||||
if self.embedding_manager:
|
|
||||||
embedding_stats = {
|
|
||||||
'embeddings_exist': self.embedding_manager.embeddings_exist(),
|
|
||||||
'embedding_count': len(self.embedding_manager._load_embedding_cache()) if self.embedding_manager.embeddings_exist() else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
project_size = self._classify_project_size(stats.total_tokens if stats else 0)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'files': stats.total_files if stats else 0,
|
|
||||||
'tokens': stats.total_tokens if stats else 0,
|
|
||||||
'size_bytes': stats.total_size if stats else 0,
|
|
||||||
'categories': stats.categories if stats else {},
|
|
||||||
'project_size': project_size,
|
|
||||||
'last_updated': stats.last_updated if stats else 0,
|
|
||||||
'embeddings': embedding_stats,
|
|
||||||
'config': {
|
|
||||||
'cache_dir': self.config.get_cache_dir(),
|
|
||||||
'embedding_enabled': self.config.is_embedding_enabled(),
|
|
||||||
'exclude_patterns_count': len(self.config.get_exclude_patterns())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _classify_project_size(self, tokens: int) -> str:
|
|
||||||
"""Classify project size based on token count."""
|
|
||||||
small_limit = self.config.get('token_limits.small_project', 500000)
|
|
||||||
medium_limit = self.config.get('token_limits.medium_project', 2000000)
|
|
||||||
|
|
||||||
if tokens < small_limit:
|
|
||||||
return "small"
|
|
||||||
elif tokens < medium_limit:
|
|
||||||
return "medium"
|
|
||||||
else:
|
|
||||||
return "large"
|
|
||||||
|
|
||||||
def cleanup_cache(self):
|
|
||||||
"""Clean up old cache files."""
|
|
||||||
cache_dir = Path(self.config.get_cache_dir())
|
|
||||||
if cache_dir.exists():
|
|
||||||
print(Colors.yellow("Cleaning up cache..."))
|
|
||||||
for file in cache_dir.glob("*"):
|
|
||||||
if file.is_file():
|
|
||||||
file.unlink()
|
|
||||||
print(f"Removed: {file}")
|
|
||||||
print(Colors.green("Cache cleaned"))
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""CLI entry point for indexer."""
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Project File Indexer - Build and manage file indices",
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
epilog="""
|
|
||||||
Examples:
|
|
||||||
python indexer.py --build # Build file index
|
|
||||||
python indexer.py --stats # Show project statistics
|
|
||||||
python indexer.py --embeddings # Update embeddings
|
|
||||||
python indexer.py --cleanup # Clean cache
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument('--build', action='store_true', help='Build file index')
|
|
||||||
parser.add_argument('--stats', action='store_true', help='Show project statistics')
|
|
||||||
parser.add_argument('--embeddings', action='store_true', help='Update embeddings')
|
|
||||||
parser.add_argument('--cleanup', action='store_true', help='Clean up cache files')
|
|
||||||
parser.add_argument('--output', choices=['json', 'text'], default='text', help='Output format')
|
|
||||||
parser.add_argument('--config', help='Configuration file path')
|
|
||||||
parser.add_argument('--root', default='.', help='Root directory to analyze')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Require at least one action
|
|
||||||
if not any([args.build, args.stats, args.embeddings, args.cleanup]):
|
|
||||||
parser.error("At least one action is required: --build, --stats, --embeddings, or --cleanup")
|
|
||||||
|
|
||||||
# Create indexer
|
|
||||||
indexer = ProjectIndexer(args.config, args.root)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if args.cleanup:
|
|
||||||
indexer.cleanup_cache()
|
|
||||||
|
|
||||||
if args.build:
|
|
||||||
indexer.build_index()
|
|
||||||
|
|
||||||
if args.embeddings:
|
|
||||||
indexer.update_embeddings()
|
|
||||||
|
|
||||||
if args.stats:
|
|
||||||
stats = indexer.get_project_stats()
|
|
||||||
if args.output == 'json':
|
|
||||||
print(json.dumps(stats, indent=2, default=str))
|
|
||||||
else:
|
|
||||||
print(f"Total files: {stats['files']}")
|
|
||||||
print(f"Total tokens: {stats['tokens']:,}")
|
|
||||||
print(f"Project size: {stats['project_size']}")
|
|
||||||
print(f"Categories: {stats['categories']}")
|
|
||||||
if 'embeddings' in stats:
|
|
||||||
print(f"Embeddings: {stats['embeddings']['embedding_count']}")
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print(Colors.warning("\nOperation interrupted by user"))
|
|
||||||
sys.exit(1)
|
|
||||||
except Exception as e:
|
|
||||||
print(Colors.error(f"Operation failed: {e}"))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,189 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Installation script for UltraThink Path-Aware Analyzer
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Colors for output
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
# Functions
|
|
||||||
print_status() {
|
|
||||||
echo -e "${BLUE}[INFO]${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_success() {
|
|
||||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_warning() {
|
|
||||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_error() {
|
|
||||||
echo -e "${RED}[ERROR]${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check Python version
|
|
||||||
check_python() {
|
|
||||||
if command -v python3 &> /dev/null; then
|
|
||||||
PYTHON_VERSION=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
|
|
||||||
PYTHON_CMD="python3"
|
|
||||||
elif command -v python &> /dev/null; then
|
|
||||||
PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
|
|
||||||
PYTHON_CMD="python"
|
|
||||||
else
|
|
||||||
print_error "Python not found. Please install Python 3.8 or later."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check version
|
|
||||||
if [[ $(echo "$PYTHON_VERSION >= 3.8" | bc -l) -eq 1 ]]; then
|
|
||||||
print_success "Python $PYTHON_VERSION found"
|
|
||||||
else
|
|
||||||
print_error "Python 3.8 or later required. Found Python $PYTHON_VERSION"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Install dependencies
|
|
||||||
install_dependencies() {
|
|
||||||
print_status "Installing core dependencies..."
|
|
||||||
|
|
||||||
# Install core requirements
|
|
||||||
$PYTHON_CMD -m pip install --user -r requirements.txt
|
|
||||||
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
print_success "Core dependencies installed"
|
|
||||||
else
|
|
||||||
print_error "Failed to install core dependencies"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Install optional dependencies
|
|
||||||
install_optional() {
|
|
||||||
read -p "Install RAG/embedding features? (requires ~200MB download) [y/N]: " install_rag
|
|
||||||
if [[ $install_rag =~ ^[Yy]$ ]]; then
|
|
||||||
print_status "Installing RAG dependencies..."
|
|
||||||
$PYTHON_CMD -m pip install --user sentence-transformers numpy
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
print_success "RAG dependencies installed"
|
|
||||||
else
|
|
||||||
print_warning "Failed to install RAG dependencies (optional)"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
read -p "Install development tools? [y/N]: " install_dev
|
|
||||||
if [[ $install_dev =~ ^[Yy]$ ]]; then
|
|
||||||
print_status "Installing development dependencies..."
|
|
||||||
$PYTHON_CMD -m pip install --user pytest pytest-cov black flake8
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
print_success "Development dependencies installed"
|
|
||||||
else
|
|
||||||
print_warning "Failed to install development dependencies (optional)"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create wrapper script
|
|
||||||
create_wrapper() {
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
|
|
||||||
WRAPPER_PATH="$HOME/.local/bin/ultrathink"
|
|
||||||
|
|
||||||
# Create .local/bin if it doesn't exist
|
|
||||||
mkdir -p "$HOME/.local/bin"
|
|
||||||
|
|
||||||
# Create wrapper script
|
|
||||||
cat > "$WRAPPER_PATH" << EOF
|
|
||||||
#!/bin/bash
|
|
||||||
# UltraThink Path-Aware Analyzer Wrapper
|
|
||||||
# Auto-generated by install.sh
|
|
||||||
|
|
||||||
SCRIPT_DIR="$SCRIPT_DIR"
|
|
||||||
export PYTHONPATH="\$SCRIPT_DIR:\$PYTHONPATH"
|
|
||||||
|
|
||||||
exec $PYTHON_CMD "\$SCRIPT_DIR/path_aware_analyzer.py" "\$@"
|
|
||||||
EOF
|
|
||||||
|
|
||||||
chmod +x "$WRAPPER_PATH"
|
|
||||||
|
|
||||||
if [ -f "$WRAPPER_PATH" ]; then
|
|
||||||
print_success "Wrapper script created at $WRAPPER_PATH"
|
|
||||||
else
|
|
||||||
print_error "Failed to create wrapper script"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Update configuration
|
|
||||||
setup_config() {
|
|
||||||
print_status "Setting up configuration..."
|
|
||||||
|
|
||||||
# Create cache directory
|
|
||||||
mkdir -p .claude/cache/embeddings
|
|
||||||
|
|
||||||
# Check if config needs updating
|
|
||||||
if [ ! -f config.yaml ]; then
|
|
||||||
print_error "Configuration file config.yaml not found"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
print_success "Configuration ready"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Test installation
|
|
||||||
test_installation() {
|
|
||||||
print_status "Testing installation..."
|
|
||||||
|
|
||||||
# Test basic functionality
|
|
||||||
if $PYTHON_CMD path_aware_analyzer.py --stats &> /dev/null; then
|
|
||||||
print_success "Installation test passed"
|
|
||||||
else
|
|
||||||
print_warning "Installation test failed - but this might be normal for first run"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add to PATH instructions
|
|
||||||
show_path_instructions() {
|
|
||||||
if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
|
|
||||||
print_warning "Add $HOME/.local/bin to your PATH to use 'ultrathink' command globally"
|
|
||||||
echo ""
|
|
||||||
echo "Add this line to your ~/.bashrc or ~/.zshrc:"
|
|
||||||
echo "export PATH=\"\$HOME/.local/bin:\$PATH\""
|
|
||||||
echo ""
|
|
||||||
echo "Or run: echo 'export PATH=\"\$HOME/.local/bin:\$PATH\"' >> ~/.bashrc"
|
|
||||||
echo "Then: source ~/.bashrc"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Main installation
|
|
||||||
main() {
|
|
||||||
print_status "Installing UltraThink Path-Aware Analyzer..."
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
check_python
|
|
||||||
install_dependencies
|
|
||||||
install_optional
|
|
||||||
create_wrapper
|
|
||||||
setup_config
|
|
||||||
test_installation
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
print_success "Installation complete!"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
print_status "Usage examples:"
|
|
||||||
echo " ./path_aware_analyzer.py \"analyze authentication flow\""
|
|
||||||
echo " ultrathink \"implement user login feature\""
|
|
||||||
echo " ultrathink --tool gemini \"review API endpoints\""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
show_path_instructions
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run main function
|
|
||||||
main "$@"
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
# Core dependencies for embedding tests
|
|
||||||
numpy>=1.21.0
|
|
||||||
scikit-learn>=1.0.0
|
|
||||||
|
|
||||||
# Sentence Transformers for advanced embeddings (CodeSage V2 compatible)
|
|
||||||
sentence-transformers>=3.0.0
|
|
||||||
transformers>=4.40.0
|
|
||||||
|
|
||||||
# PyTorch for model execution (required for CodeSage V2)
|
|
||||||
torch>=2.0.0
|
|
||||||
|
|
||||||
# Development and testing
|
|
||||||
pytest>=6.0.0
|
|
||||||
|
|
||||||
# Data handling
|
|
||||||
pandas>=1.3.0
|
|
||||||
|
|
||||||
# Additional dependencies for CodeSage V2
|
|
||||||
accelerate>=0.26.0
|
|
||||||
@@ -1,92 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Setup script for UltraThink Path-Aware Analyzer
|
|
||||||
"""
|
|
||||||
|
|
||||||
from setuptools import setup, find_packages
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
# Read README
|
|
||||||
readme_path = Path(__file__).parent / "README.md"
|
|
||||||
long_description = readme_path.read_text(encoding='utf-8') if readme_path.exists() else ""
|
|
||||||
|
|
||||||
# Read requirements
|
|
||||||
requirements_path = Path(__file__).parent / "requirements.txt"
|
|
||||||
requirements = []
|
|
||||||
if requirements_path.exists():
|
|
||||||
with open(requirements_path, 'r', encoding='utf-8') as f:
|
|
||||||
for line in f:
|
|
||||||
line = line.strip()
|
|
||||||
if line and not line.startswith('#'):
|
|
||||||
requirements.append(line)
|
|
||||||
|
|
||||||
setup(
|
|
||||||
name="ultrathink-path-analyzer",
|
|
||||||
version="1.0.0",
|
|
||||||
description="Lightweight path-aware program for intelligent file pattern detection and analysis",
|
|
||||||
long_description=long_description,
|
|
||||||
long_description_content_type="text/markdown",
|
|
||||||
author="UltraThink Development Team",
|
|
||||||
author_email="dev@ultrathink.ai",
|
|
||||||
url="https://github.com/ultrathink/path-analyzer",
|
|
||||||
|
|
||||||
packages=find_packages(),
|
|
||||||
py_modules=[
|
|
||||||
'analyzer', # Main entry point
|
|
||||||
],
|
|
||||||
|
|
||||||
install_requires=requirements,
|
|
||||||
|
|
||||||
extras_require={
|
|
||||||
'rag': [
|
|
||||||
'sentence-transformers>=2.2.0',
|
|
||||||
'numpy>=1.21.0'
|
|
||||||
],
|
|
||||||
'nlp': [
|
|
||||||
'nltk>=3.8',
|
|
||||||
'spacy>=3.4.0'
|
|
||||||
],
|
|
||||||
'performance': [
|
|
||||||
'numba>=0.56.0'
|
|
||||||
],
|
|
||||||
'dev': [
|
|
||||||
'pytest>=7.0.0',
|
|
||||||
'pytest-cov>=4.0.0',
|
|
||||||
'black>=22.0.0',
|
|
||||||
'flake8>=5.0.0'
|
|
||||||
]
|
|
||||||
},
|
|
||||||
|
|
||||||
entry_points={
|
|
||||||
'console_scripts': [
|
|
||||||
'path-analyzer=cli:main',
|
|
||||||
'path-indexer=indexer:main',
|
|
||||||
'analyzer=analyzer:main', # Legacy compatibility
|
|
||||||
'module-analyzer=tools.module_analyzer:main',
|
|
||||||
'tech-stack=tools.tech_stack:main',
|
|
||||||
],
|
|
||||||
},
|
|
||||||
|
|
||||||
classifiers=[
|
|
||||||
"Development Status :: 4 - Beta",
|
|
||||||
"Intended Audience :: Developers",
|
|
||||||
"Topic :: Software Development :: Tools",
|
|
||||||
"License :: OSI Approved :: MIT License",
|
|
||||||
"Programming Language :: Python :: 3",
|
|
||||||
"Programming Language :: Python :: 3.8",
|
|
||||||
"Programming Language :: Python :: 3.9",
|
|
||||||
"Programming Language :: Python :: 3.10",
|
|
||||||
"Programming Language :: Python :: 3.11",
|
|
||||||
"Operating System :: OS Independent",
|
|
||||||
],
|
|
||||||
|
|
||||||
python_requires=">=3.8",
|
|
||||||
|
|
||||||
keywords="ai, analysis, path-detection, code-analysis, file-matching, rag, nlp",
|
|
||||||
|
|
||||||
project_urls={
|
|
||||||
"Bug Reports": "https://github.com/ultrathink/path-analyzer/issues",
|
|
||||||
"Source": "https://github.com/ultrathink/path-analyzer",
|
|
||||||
"Documentation": "https://github.com/ultrathink/path-analyzer/docs",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
"""
|
|
||||||
Independent tool scripts for specialized analysis tasks.
|
|
||||||
Provides module analysis, tech stack detection, and workflow management tools.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .module_analyzer import ModuleAnalyzer, ModuleInfo
|
|
||||||
from .tech_stack import TechStackLoader
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'ModuleAnalyzer',
|
|
||||||
'ModuleInfo',
|
|
||||||
'TechStackLoader'
|
|
||||||
]
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,369 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Unified Module Analyzer
|
|
||||||
Combines functionality from detect_changed_modules.py and get_modules_by_depth.py
|
|
||||||
into a single, comprehensive module analysis tool.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import List, Dict, Optional, Set, Tuple
|
|
||||||
from dataclasses import dataclass, asdict
|
|
||||||
|
|
||||||
# Add parent directory for imports
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
||||||
from core.config import get_config
|
|
||||||
from core.gitignore_parser import GitignoreParser
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ModuleInfo:
|
|
||||||
"""Information about a module/directory."""
|
|
||||||
depth: int
|
|
||||||
path: str
|
|
||||||
files: int
|
|
||||||
types: List[str]
|
|
||||||
has_claude: bool
|
|
||||||
status: str = "normal" # changed, normal, new, deleted
|
|
||||||
last_modified: Optional[float] = None
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict:
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
class ModuleAnalyzer:
|
|
||||||
"""Unified module analysis tool with change detection and depth analysis."""
|
|
||||||
|
|
||||||
def __init__(self, root_path: str = ".", config_path: Optional[str] = None):
|
|
||||||
self.root_path = Path(root_path).resolve()
|
|
||||||
self.config = get_config(config_path)
|
|
||||||
|
|
||||||
# Source file extensions for analysis
|
|
||||||
self.source_extensions = {
|
|
||||||
'.md', '.js', '.ts', '.jsx', '.tsx', '.py', '.go', '.rs',
|
|
||||||
'.java', '.cpp', '.c', '.h', '.sh', '.ps1', '.json', '.yaml', '.yml',
|
|
||||||
'.php', '.rb', '.swift', '.kt', '.scala', '.dart'
|
|
||||||
}
|
|
||||||
|
|
||||||
# Initialize gitignore parser for exclusions
|
|
||||||
self.gitignore_parser = GitignoreParser(str(self.root_path))
|
|
||||||
self.exclude_patterns = self._build_exclusion_patterns()
|
|
||||||
|
|
||||||
def _build_exclusion_patterns(self) -> Set[str]:
|
|
||||||
"""Build exclusion patterns from config and gitignore."""
|
|
||||||
exclusions = {
|
|
||||||
'.git', '.history', '.vscode', '__pycache__', '.pytest_cache',
|
|
||||||
'node_modules', 'dist', 'build', '.egg-info', '.env',
|
|
||||||
'.cache', '.tmp', '.temp', '.DS_Store', 'Thumbs.db'
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add patterns from config
|
|
||||||
config_patterns = self.config.get('exclude_patterns', [])
|
|
||||||
for pattern in config_patterns:
|
|
||||||
# Extract directory names from patterns
|
|
||||||
if '/' in pattern:
|
|
||||||
parts = pattern.replace('*/', '').replace('/*', '').split('/')
|
|
||||||
exclusions.update(part for part in parts if part and not part.startswith('*'))
|
|
||||||
|
|
||||||
return exclusions
|
|
||||||
|
|
||||||
def _should_exclude_directory(self, dir_path: Path) -> bool:
|
|
||||||
"""Check if directory should be excluded from analysis."""
|
|
||||||
dir_name = dir_path.name
|
|
||||||
|
|
||||||
# Check against exclusion patterns
|
|
||||||
if dir_name in self.exclude_patterns:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check if directory starts with . (hidden directories)
|
|
||||||
if dir_name.startswith('.') and dir_name not in {'.github', '.vscode'}:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get_git_changed_files(self, since: str = "HEAD") -> Set[str]:
|
|
||||||
"""Get files changed in git."""
|
|
||||||
changed_files = set()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Check if we're in a git repository
|
|
||||||
subprocess.run(['git', 'rev-parse', '--git-dir'],
|
|
||||||
check=True, capture_output=True, cwd=self.root_path)
|
|
||||||
|
|
||||||
# Get changes since specified reference
|
|
||||||
commands = [
|
|
||||||
['git', 'diff', '--name-only', since], # Changes since reference
|
|
||||||
['git', 'diff', '--name-only', '--staged'], # Staged changes
|
|
||||||
['git', 'ls-files', '--others', '--exclude-standard'] # Untracked files
|
|
||||||
]
|
|
||||||
|
|
||||||
for cmd in commands:
|
|
||||||
try:
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True,
|
|
||||||
cwd=self.root_path, check=True)
|
|
||||||
if result.stdout.strip():
|
|
||||||
files = result.stdout.strip().split('\n')
|
|
||||||
changed_files.update(f for f in files if f)
|
|
||||||
except subprocess.CalledProcessError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
except subprocess.CalledProcessError:
|
|
||||||
# Not a git repository or git not available
|
|
||||||
pass
|
|
||||||
|
|
||||||
return changed_files
|
|
||||||
|
|
||||||
def get_recently_modified_files(self, hours: int = 24) -> Set[str]:
|
|
||||||
"""Get files modified within the specified hours."""
|
|
||||||
cutoff_time = time.time() - (hours * 3600)
|
|
||||||
recent_files = set()
|
|
||||||
|
|
||||||
try:
|
|
||||||
for file_path in self.root_path.rglob('*'):
|
|
||||||
if file_path.is_file():
|
|
||||||
try:
|
|
||||||
if file_path.stat().st_mtime > cutoff_time:
|
|
||||||
rel_path = file_path.relative_to(self.root_path)
|
|
||||||
recent_files.add(str(rel_path))
|
|
||||||
except (OSError, ValueError):
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return recent_files
|
|
||||||
|
|
||||||
def analyze_directory(self, dir_path: Path) -> Optional[ModuleInfo]:
|
|
||||||
"""Analyze a single directory and return module information."""
|
|
||||||
if self._should_exclude_directory(dir_path):
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Count files by type
|
|
||||||
file_types = set()
|
|
||||||
file_count = 0
|
|
||||||
has_claude = False
|
|
||||||
last_modified = 0
|
|
||||||
|
|
||||||
for item in dir_path.iterdir():
|
|
||||||
if item.is_file():
|
|
||||||
file_count += 1
|
|
||||||
|
|
||||||
# Track file types
|
|
||||||
if item.suffix.lower() in self.source_extensions:
|
|
||||||
file_types.add(item.suffix.lower())
|
|
||||||
|
|
||||||
# Check for CLAUDE.md
|
|
||||||
if item.name.upper() == 'CLAUDE.MD':
|
|
||||||
has_claude = True
|
|
||||||
|
|
||||||
# Track latest modification
|
|
||||||
try:
|
|
||||||
mtime = item.stat().st_mtime
|
|
||||||
last_modified = max(last_modified, mtime)
|
|
||||||
except OSError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Calculate depth relative to root
|
|
||||||
try:
|
|
||||||
relative_path = dir_path.relative_to(self.root_path)
|
|
||||||
depth = len(relative_path.parts)
|
|
||||||
except ValueError:
|
|
||||||
depth = 0
|
|
||||||
|
|
||||||
return ModuleInfo(
|
|
||||||
depth=depth,
|
|
||||||
path=str(relative_path) if depth > 0 else ".",
|
|
||||||
files=file_count,
|
|
||||||
types=sorted(list(file_types)),
|
|
||||||
has_claude=has_claude,
|
|
||||||
last_modified=last_modified if last_modified > 0 else None
|
|
||||||
)
|
|
||||||
|
|
||||||
except (PermissionError, OSError):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def detect_changed_modules(self, since: str = "HEAD") -> List[ModuleInfo]:
|
|
||||||
"""Detect modules affected by changes."""
|
|
||||||
changed_files = self.get_git_changed_files(since)
|
|
||||||
|
|
||||||
# If no git changes, fall back to recently modified files
|
|
||||||
if not changed_files:
|
|
||||||
changed_files = self.get_recently_modified_files(24)
|
|
||||||
|
|
||||||
# Get affected directories
|
|
||||||
affected_dirs = set()
|
|
||||||
for file_path in changed_files:
|
|
||||||
full_path = self.root_path / file_path
|
|
||||||
if full_path.exists():
|
|
||||||
# Add the file's directory and parent directories
|
|
||||||
current_dir = full_path.parent
|
|
||||||
while current_dir != self.root_path and current_dir.parent != current_dir:
|
|
||||||
affected_dirs.add(current_dir)
|
|
||||||
current_dir = current_dir.parent
|
|
||||||
|
|
||||||
# Analyze affected directories
|
|
||||||
modules = []
|
|
||||||
for dir_path in affected_dirs:
|
|
||||||
module_info = self.analyze_directory(dir_path)
|
|
||||||
if module_info:
|
|
||||||
module_info.status = "changed"
|
|
||||||
modules.append(module_info)
|
|
||||||
|
|
||||||
return sorted(modules, key=lambda m: (m.depth, m.path))
|
|
||||||
|
|
||||||
def analyze_by_depth(self, max_depth: Optional[int] = None) -> List[ModuleInfo]:
|
|
||||||
"""Analyze all modules organized by depth (deepest first)."""
|
|
||||||
modules = []
|
|
||||||
|
|
||||||
def scan_directory(dir_path: Path, current_depth: int = 0):
|
|
||||||
"""Recursively scan directories."""
|
|
||||||
if max_depth and current_depth > max_depth:
|
|
||||||
return
|
|
||||||
|
|
||||||
module_info = self.analyze_directory(dir_path)
|
|
||||||
if module_info and module_info.files > 0:
|
|
||||||
modules.append(module_info)
|
|
||||||
|
|
||||||
# Recurse into subdirectories
|
|
||||||
try:
|
|
||||||
for item in dir_path.iterdir():
|
|
||||||
if item.is_dir() and not self._should_exclude_directory(item):
|
|
||||||
scan_directory(item, current_depth + 1)
|
|
||||||
except (PermissionError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
scan_directory(self.root_path)
|
|
||||||
|
|
||||||
# Sort by depth (deepest first), then by path
|
|
||||||
return sorted(modules, key=lambda m: (-m.depth, m.path))
|
|
||||||
|
|
||||||
def get_dependencies(self, module_path: str) -> List[str]:
|
|
||||||
"""Get module dependencies (basic implementation)."""
|
|
||||||
dependencies = []
|
|
||||||
module_dir = self.root_path / module_path
|
|
||||||
|
|
||||||
if not module_dir.exists() or not module_dir.is_dir():
|
|
||||||
return dependencies
|
|
||||||
|
|
||||||
# Look for common dependency files
|
|
||||||
dependency_files = [
|
|
||||||
'package.json', # Node.js
|
|
||||||
'requirements.txt', # Python
|
|
||||||
'Cargo.toml', # Rust
|
|
||||||
'go.mod', # Go
|
|
||||||
'pom.xml', # Java Maven
|
|
||||||
'build.gradle', # Java Gradle
|
|
||||||
]
|
|
||||||
|
|
||||||
for dep_file in dependency_files:
|
|
||||||
dep_path = module_dir / dep_file
|
|
||||||
if dep_path.exists():
|
|
||||||
dependencies.append(str(dep_path.relative_to(self.root_path)))
|
|
||||||
|
|
||||||
return dependencies
|
|
||||||
|
|
||||||
def find_modules_with_pattern(self, pattern: str) -> List[ModuleInfo]:
|
|
||||||
"""Find modules matching a specific pattern in their path or files."""
|
|
||||||
modules = self.analyze_by_depth()
|
|
||||||
matching_modules = []
|
|
||||||
|
|
||||||
for module in modules:
|
|
||||||
# Check if pattern matches path
|
|
||||||
if pattern.lower() in module.path.lower():
|
|
||||||
matching_modules.append(module)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check if pattern matches file types
|
|
||||||
if any(pattern.lower() in ext.lower() for ext in module.types):
|
|
||||||
matching_modules.append(module)
|
|
||||||
|
|
||||||
return matching_modules
|
|
||||||
|
|
||||||
def export_analysis(self, modules: List[ModuleInfo], format: str = "json") -> str:
|
|
||||||
"""Export module analysis in specified format."""
|
|
||||||
if format == "json":
|
|
||||||
return json.dumps([module.to_dict() for module in modules], indent=2)
|
|
||||||
|
|
||||||
elif format == "list":
|
|
||||||
lines = []
|
|
||||||
for module in modules:
|
|
||||||
status = f"[{module.status}]" if module.status != "normal" else ""
|
|
||||||
claude_marker = "[CLAUDE]" if module.has_claude else ""
|
|
||||||
lines.append(f"{module.path} (depth:{module.depth}, files:{module.files}) {status} {claude_marker}")
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
elif format == "grouped":
|
|
||||||
grouped = {}
|
|
||||||
for module in modules:
|
|
||||||
depth = module.depth
|
|
||||||
if depth not in grouped:
|
|
||||||
grouped[depth] = []
|
|
||||||
grouped[depth].append(module)
|
|
||||||
|
|
||||||
lines = []
|
|
||||||
for depth in sorted(grouped.keys()):
|
|
||||||
lines.append(f"\n=== Depth {depth} ===")
|
|
||||||
for module in grouped[depth]:
|
|
||||||
status = f"[{module.status}]" if module.status != "normal" else ""
|
|
||||||
claude_marker = "[CLAUDE]" if module.has_claude else ""
|
|
||||||
lines.append(f" {module.path} (files:{module.files}) {status} {claude_marker}")
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
elif format == "paths":
|
|
||||||
return "\n".join(module.path for module in modules)
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported format: {format}")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main CLI entry point."""
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Module Analysis Tool")
|
|
||||||
parser.add_argument("command", choices=["changed", "depth", "dependencies", "find"],
|
|
||||||
help="Analysis command to run")
|
|
||||||
parser.add_argument("--format", choices=["json", "list", "grouped", "paths"],
|
|
||||||
default="list", help="Output format")
|
|
||||||
parser.add_argument("--since", default="HEAD~1",
|
|
||||||
help="Git reference for change detection (default: HEAD~1)")
|
|
||||||
parser.add_argument("--max-depth", type=int,
|
|
||||||
help="Maximum directory depth to analyze")
|
|
||||||
parser.add_argument("--pattern", help="Pattern to search for (for find command)")
|
|
||||||
parser.add_argument("--module", help="Module path for dependency analysis")
|
|
||||||
parser.add_argument("--config", help="Configuration file path")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
analyzer = ModuleAnalyzer(config_path=args.config)
|
|
||||||
|
|
||||||
if args.command == "changed":
|
|
||||||
modules = analyzer.detect_changed_modules(args.since)
|
|
||||||
print(analyzer.export_analysis(modules, args.format))
|
|
||||||
|
|
||||||
elif args.command == "depth":
|
|
||||||
modules = analyzer.analyze_by_depth(args.max_depth)
|
|
||||||
print(analyzer.export_analysis(modules, args.format))
|
|
||||||
|
|
||||||
elif args.command == "dependencies":
|
|
||||||
if not args.module:
|
|
||||||
print("Error: --module required for dependencies command", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
deps = analyzer.get_dependencies(args.module)
|
|
||||||
if args.format == "json":
|
|
||||||
print(json.dumps(deps, indent=2))
|
|
||||||
else:
|
|
||||||
print("\n".join(deps))
|
|
||||||
|
|
||||||
elif args.command == "find":
|
|
||||||
if not args.pattern:
|
|
||||||
print("Error: --pattern required for find command", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
modules = analyzer.find_modules_with_pattern(args.pattern)
|
|
||||||
print(analyzer.export_analysis(modules, args.format))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,202 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Python equivalent of tech-stack-loader.sh
|
|
||||||
DMSFlow Tech Stack Guidelines Loader
|
|
||||||
Returns tech stack specific coding guidelines and best practices for Claude processing
|
|
||||||
|
|
||||||
Usage: python tech_stack_loader.py [command] [tech_stack]
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
import re
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, List, Optional, Tuple
|
|
||||||
|
|
||||||
class TechStackLoader:
|
|
||||||
"""Load tech stack specific development guidelines."""
|
|
||||||
|
|
||||||
def __init__(self, script_dir: Optional[str] = None):
|
|
||||||
if script_dir:
|
|
||||||
self.script_dir = Path(script_dir)
|
|
||||||
else:
|
|
||||||
self.script_dir = Path(__file__).parent
|
|
||||||
|
|
||||||
# Look for template directory in multiple locations
|
|
||||||
possible_template_dirs = [
|
|
||||||
self.script_dir / "../tech-stack-templates",
|
|
||||||
self.script_dir / "../workflows/cli-templates/tech-stacks",
|
|
||||||
self.script_dir / "tech-stack-templates",
|
|
||||||
self.script_dir / "templates",
|
|
||||||
]
|
|
||||||
|
|
||||||
self.template_dir = None
|
|
||||||
for template_dir in possible_template_dirs:
|
|
||||||
if template_dir.exists():
|
|
||||||
self.template_dir = template_dir.resolve()
|
|
||||||
break
|
|
||||||
|
|
||||||
if not self.template_dir:
|
|
||||||
# Create a default template directory
|
|
||||||
self.template_dir = self.script_dir / "tech-stack-templates"
|
|
||||||
self.template_dir.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
def parse_yaml_frontmatter(self, content: str) -> Tuple[Dict[str, str], str]:
|
|
||||||
"""Parse YAML frontmatter from markdown content."""
|
|
||||||
frontmatter = {}
|
|
||||||
content_start = 0
|
|
||||||
|
|
||||||
lines = content.split('\n')
|
|
||||||
if lines and lines[0].strip() == '---':
|
|
||||||
# Find the closing ---
|
|
||||||
for i, line in enumerate(lines[1:], 1):
|
|
||||||
if line.strip() == '---':
|
|
||||||
content_start = i + 1
|
|
||||||
break
|
|
||||||
elif ':' in line:
|
|
||||||
key, value = line.split(':', 1)
|
|
||||||
frontmatter[key.strip()] = value.strip()
|
|
||||||
|
|
||||||
# Return frontmatter and content without YAML
|
|
||||||
remaining_content = '\n'.join(lines[content_start:])
|
|
||||||
return frontmatter, remaining_content
|
|
||||||
|
|
||||||
def list_available_guidelines(self) -> str:
|
|
||||||
"""List all available development guidelines."""
|
|
||||||
output = ["Available Development Guidelines:", "=" * 33]
|
|
||||||
|
|
||||||
if not self.template_dir.exists():
|
|
||||||
output.append("No template directory found.")
|
|
||||||
return '\n'.join(output)
|
|
||||||
|
|
||||||
for file_path in self.template_dir.glob("*.md"):
|
|
||||||
try:
|
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
frontmatter, _ = self.parse_yaml_frontmatter(content)
|
|
||||||
name = frontmatter.get('name', file_path.stem)
|
|
||||||
description = frontmatter.get('description', 'No description available')
|
|
||||||
|
|
||||||
output.append(f"{name:<20} - {description}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
output.append(f"{file_path.stem:<20} - Error reading file: {e}")
|
|
||||||
|
|
||||||
return '\n'.join(output)
|
|
||||||
|
|
||||||
def load_guidelines(self, tech_stack: str) -> str:
|
|
||||||
"""Load specific development guidelines."""
|
|
||||||
template_path = self.template_dir / f"{tech_stack}.md"
|
|
||||||
|
|
||||||
if not template_path.exists():
|
|
||||||
# Try with different naming conventions
|
|
||||||
alternatives = [
|
|
||||||
f"{tech_stack}-dev.md",
|
|
||||||
f"{tech_stack}_dev.md",
|
|
||||||
f"{tech_stack.replace('-', '_')}.md",
|
|
||||||
f"{tech_stack.replace('_', '-')}.md"
|
|
||||||
]
|
|
||||||
|
|
||||||
for alt in alternatives:
|
|
||||||
alt_path = self.template_dir / alt
|
|
||||||
if alt_path.exists():
|
|
||||||
template_path = alt_path
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise FileNotFoundError(
|
|
||||||
f"Error: Development guidelines '{tech_stack}' not found\n"
|
|
||||||
f"Use --list to see available guidelines"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(template_path, 'r', encoding='utf-8') as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
# Parse and return content without YAML frontmatter
|
|
||||||
_, content_without_yaml = self.parse_yaml_frontmatter(content)
|
|
||||||
return content_without_yaml.strip()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise RuntimeError(f"Error reading guidelines file: {e}")
|
|
||||||
|
|
||||||
def get_version(self) -> str:
|
|
||||||
"""Get version information."""
|
|
||||||
return "DMSFlow tech-stack-loader v2.0 (Python)\nSemantic-based development guidelines system"
|
|
||||||
|
|
||||||
def get_help(self) -> str:
|
|
||||||
"""Get help message."""
|
|
||||||
return """Usage:
|
|
||||||
tech_stack_loader.py --list List all available guidelines with descriptions
|
|
||||||
tech_stack_loader.py --load <name> Load specific development guidelines
|
|
||||||
tech_stack_loader.py <name> Load specific guidelines (legacy format)
|
|
||||||
tech_stack_loader.py --help Show this help message
|
|
||||||
tech_stack_loader.py --version Show version information
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
tech_stack_loader.py --list
|
|
||||||
tech_stack_loader.py --load javascript-dev
|
|
||||||
tech_stack_loader.py python-dev"""
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Command-line interface."""
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="DMSFlow Tech Stack Guidelines Loader",
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
epilog="""Examples:
|
|
||||||
python tech_stack_loader.py --list
|
|
||||||
python tech_stack_loader.py --load javascript-dev
|
|
||||||
python tech_stack_loader.py python-dev"""
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument("command", nargs="?", help="Command or tech stack name")
|
|
||||||
parser.add_argument("tech_stack", nargs="?", help="Tech stack name (when using --load)")
|
|
||||||
parser.add_argument("--list", action="store_true", help="List all available guidelines")
|
|
||||||
parser.add_argument("--load", metavar="TECH_STACK", help="Load specific development guidelines")
|
|
||||||
parser.add_argument("--version", "-v", action="store_true", help="Show version information")
|
|
||||||
parser.add_argument("--template-dir", help="Override template directory path")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
try:
|
|
||||||
loader = TechStackLoader(args.template_dir)
|
|
||||||
|
|
||||||
# Handle version check
|
|
||||||
if args.version or args.command == "--version":
|
|
||||||
print(loader.get_version())
|
|
||||||
return
|
|
||||||
|
|
||||||
# Handle list command
|
|
||||||
if args.list or args.command == "--list":
|
|
||||||
print(loader.list_available_guidelines())
|
|
||||||
return
|
|
||||||
|
|
||||||
# Handle load command
|
|
||||||
if args.load:
|
|
||||||
result = loader.load_guidelines(args.load)
|
|
||||||
print(result)
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.command == "--load" and args.tech_stack:
|
|
||||||
result = loader.load_guidelines(args.tech_stack)
|
|
||||||
print(result)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Handle legacy usage (direct tech stack name)
|
|
||||||
if args.command and args.command not in ["--help", "--list", "--load"]:
|
|
||||||
result = loader.load_guidelines(args.command)
|
|
||||||
print(result)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Show help
|
|
||||||
print(loader.get_help())
|
|
||||||
|
|
||||||
except (FileNotFoundError, RuntimeError) as e:
|
|
||||||
print(str(e), file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Unexpected error: {e}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,241 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Python equivalent of update_module_claude.sh
|
|
||||||
Update CLAUDE.md for a specific module with automatic layer detection
|
|
||||||
|
|
||||||
Usage: python update_module_claude.py <module_path> [update_type]
|
|
||||||
module_path: Path to the module directory
|
|
||||||
update_type: full|related (default: full)
|
|
||||||
Script automatically detects layer depth and selects appropriate template
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import argparse
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional, Tuple, Dict
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class LayerInfo:
|
|
||||||
"""Information about a documentation layer."""
|
|
||||||
name: str
|
|
||||||
template_path: str
|
|
||||||
analysis_strategy: str
|
|
||||||
|
|
||||||
class ModuleClaudeUpdater:
|
|
||||||
"""Update CLAUDE.md documentation for modules with layer detection."""
|
|
||||||
|
|
||||||
def __init__(self, home_dir: Optional[str] = None):
|
|
||||||
self.home_dir = Path(home_dir) if home_dir else Path.home()
|
|
||||||
self.template_base = self.home_dir / ".claude/workflows/cli-templates/prompts/dms"
|
|
||||||
|
|
||||||
def detect_layer(self, module_path: str) -> LayerInfo:
|
|
||||||
"""Determine documentation layer based on path patterns."""
|
|
||||||
clean_path = module_path.replace('./', '') if module_path.startswith('./') else module_path
|
|
||||||
|
|
||||||
if module_path == ".":
|
|
||||||
# Root directory
|
|
||||||
return LayerInfo(
|
|
||||||
name="Layer 1 (Root)",
|
|
||||||
template_path=str(self.template_base / "claude-layer1-root.txt"),
|
|
||||||
analysis_strategy="--all-files"
|
|
||||||
)
|
|
||||||
elif '/' not in clean_path:
|
|
||||||
# Top-level directories (e.g., .claude, src, tests)
|
|
||||||
return LayerInfo(
|
|
||||||
name="Layer 2 (Domain)",
|
|
||||||
template_path=str(self.template_base / "claude-layer2-domain.txt"),
|
|
||||||
analysis_strategy="@{*/CLAUDE.md}"
|
|
||||||
)
|
|
||||||
elif clean_path.count('/') == 1:
|
|
||||||
# Second-level directories (e.g., .claude/scripts, src/components)
|
|
||||||
return LayerInfo(
|
|
||||||
name="Layer 3 (Module)",
|
|
||||||
template_path=str(self.template_base / "claude-layer3-module.txt"),
|
|
||||||
analysis_strategy="@{*/CLAUDE.md}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Deeper directories (e.g., .claude/workflows/cli-templates/prompts)
|
|
||||||
return LayerInfo(
|
|
||||||
name="Layer 4 (Sub-Module)",
|
|
||||||
template_path=str(self.template_base / "claude-layer4-submodule.txt"),
|
|
||||||
analysis_strategy="--all-files"
|
|
||||||
)
|
|
||||||
|
|
||||||
def load_template(self, template_path: str) -> str:
|
|
||||||
"""Load template content from file."""
|
|
||||||
try:
|
|
||||||
with open(template_path, 'r', encoding='utf-8') as f:
|
|
||||||
return f.read()
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(f" [WARN] Template not found: {template_path}, using fallback")
|
|
||||||
return "Update CLAUDE.md documentation for this module following hierarchy standards."
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [WARN] Error reading template: {e}, using fallback")
|
|
||||||
return "Update CLAUDE.md documentation for this module following hierarchy standards."
|
|
||||||
|
|
||||||
def build_prompt(self, layer_info: LayerInfo, module_path: str, update_type: str) -> str:
|
|
||||||
"""Build the prompt for gemini."""
|
|
||||||
template_content = self.load_template(layer_info.template_path)
|
|
||||||
module_name = os.path.basename(module_path)
|
|
||||||
|
|
||||||
if update_type == "full":
|
|
||||||
update_context = """
|
|
||||||
Update Mode: Complete refresh
|
|
||||||
- Perform comprehensive analysis of all content
|
|
||||||
- Document patterns, architecture, and purpose
|
|
||||||
- Consider existing documentation hierarchy
|
|
||||||
- Follow template guidelines strictly"""
|
|
||||||
else:
|
|
||||||
update_context = """
|
|
||||||
Update Mode: Context-aware update
|
|
||||||
- Focus on recent changes and affected areas
|
|
||||||
- Maintain consistency with existing documentation
|
|
||||||
- Update only relevant sections
|
|
||||||
- Follow template guidelines for updated content"""
|
|
||||||
|
|
||||||
base_prompt = f"""
|
|
||||||
[CRITICAL] RULES - MUST FOLLOW:
|
|
||||||
1. ONLY modify CLAUDE.md files at any hierarchy level
|
|
||||||
2. NEVER modify source code files
|
|
||||||
3. Focus exclusively on updating documentation
|
|
||||||
4. Follow the template guidelines exactly
|
|
||||||
|
|
||||||
{template_content}
|
|
||||||
|
|
||||||
{update_context}
|
|
||||||
|
|
||||||
Module Information:
|
|
||||||
- Name: {module_name}
|
|
||||||
- Path: {module_path}
|
|
||||||
- Layer: {layer_info.name}
|
|
||||||
- Analysis Strategy: {layer_info.analysis_strategy}"""
|
|
||||||
|
|
||||||
return base_prompt
|
|
||||||
|
|
||||||
def execute_gemini_command(self, prompt: str, analysis_strategy: str, module_path: str) -> bool:
|
|
||||||
"""Execute gemini command with the appropriate strategy."""
|
|
||||||
original_dir = os.getcwd()
|
|
||||||
|
|
||||||
try:
|
|
||||||
os.chdir(module_path)
|
|
||||||
|
|
||||||
if analysis_strategy == "--all-files":
|
|
||||||
cmd = ["gemini", "--all-files", "--yolo", "-p", prompt]
|
|
||||||
else:
|
|
||||||
cmd = ["gemini", "--yolo", "-p", f"{analysis_strategy} {prompt}"]
|
|
||||||
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print(f" [ERROR] Gemini command failed: {result.stderr}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
print(f" [ERROR] Error executing gemini: {e}")
|
|
||||||
return False
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(f" [ERROR] Gemini command not found. Make sure gemini is installed and in PATH.")
|
|
||||||
return False
|
|
||||||
finally:
|
|
||||||
os.chdir(original_dir)
|
|
||||||
|
|
||||||
def update_module_claude(self, module_path: str, update_type: str = "full") -> bool:
|
|
||||||
"""Main function to update CLAUDE.md for a module."""
|
|
||||||
# Validate parameters
|
|
||||||
if not module_path:
|
|
||||||
print("[ERROR] Module path is required")
|
|
||||||
print("Usage: update_module_claude.py <module_path> [update_type]")
|
|
||||||
return False
|
|
||||||
|
|
||||||
path_obj = Path(module_path)
|
|
||||||
if not path_obj.exists() or not path_obj.is_dir():
|
|
||||||
print(f"[ERROR] Directory '{module_path}' does not exist")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check if directory has files
|
|
||||||
files = list(path_obj.glob('*'))
|
|
||||||
file_count = len([f for f in files if f.is_file()])
|
|
||||||
if file_count == 0:
|
|
||||||
print(f"[SKIP] Skipping '{module_path}' - no files found")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Detect layer and get configuration
|
|
||||||
layer_info = self.detect_layer(module_path)
|
|
||||||
|
|
||||||
print(f"[UPDATE] Updating: {module_path}")
|
|
||||||
print(f" Layer: {layer_info.name} | Type: {update_type} | Files: {file_count}")
|
|
||||||
print(f" Template: {os.path.basename(layer_info.template_path)} | Strategy: {layer_info.analysis_strategy}")
|
|
||||||
|
|
||||||
# Build prompt
|
|
||||||
prompt = self.build_prompt(layer_info, module_path, update_type)
|
|
||||||
|
|
||||||
# Execute update
|
|
||||||
start_time = time.time()
|
|
||||||
print(" [PROGRESS] Starting update...")
|
|
||||||
|
|
||||||
success = self.execute_gemini_command(prompt, layer_info.analysis_strategy, module_path)
|
|
||||||
|
|
||||||
if success:
|
|
||||||
duration = int(time.time() - start_time)
|
|
||||||
print(f" [OK] Completed in {duration}s")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print(f" [ERROR] Update failed for {module_path}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Command-line interface."""
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Update CLAUDE.md for a specific module with automatic layer detection",
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
epilog="""Examples:
|
|
||||||
python update_module_claude.py .
|
|
||||||
python update_module_claude.py src/components full
|
|
||||||
python update_module_claude.py .claude/scripts related"""
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument("module_path", help="Path to the module directory")
|
|
||||||
parser.add_argument("update_type", nargs="?", choices=["full", "related"],
|
|
||||||
default="full", help="Update type (default: full)")
|
|
||||||
parser.add_argument("--home", help="Override home directory path")
|
|
||||||
parser.add_argument("--dry-run", action="store_true",
|
|
||||||
help="Show what would be done without executing")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
try:
|
|
||||||
updater = ModuleClaudeUpdater(args.home)
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
layer_info = updater.detect_layer(args.module_path)
|
|
||||||
prompt = updater.build_prompt(layer_info, args.module_path, args.update_type)
|
|
||||||
|
|
||||||
print("[DRY-RUN] Dry run mode - showing configuration:")
|
|
||||||
print(f"Module Path: {args.module_path}")
|
|
||||||
print(f"Update Type: {args.update_type}")
|
|
||||||
print(f"Layer: {layer_info.name}")
|
|
||||||
print(f"Template: {layer_info.template_path}")
|
|
||||||
print(f"Strategy: {layer_info.analysis_strategy}")
|
|
||||||
print("\nPrompt preview:")
|
|
||||||
print("-" * 50)
|
|
||||||
print(prompt[:500] + "..." if len(prompt) > 500 else prompt)
|
|
||||||
return
|
|
||||||
|
|
||||||
success = updater.update_module_claude(args.module_path, args.update_type)
|
|
||||||
sys.exit(0 if success else 1)
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\n[ERROR] Operation cancelled by user")
|
|
||||||
sys.exit(1)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[ERROR] Unexpected error: {e}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
"""
|
|
||||||
Shared utility functions and helpers.
|
|
||||||
Provides common functionality for colors, caching, and I/O operations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .colors import Colors
|
|
||||||
from .cache import CacheManager
|
|
||||||
from .io_helpers import IOHelpers, ensure_directory, safe_read_file
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'Colors',
|
|
||||||
'CacheManager',
|
|
||||||
'IOHelpers',
|
|
||||||
'ensure_directory',
|
|
||||||
'safe_read_file'
|
|
||||||
]
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,350 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Cache Management Utility
|
|
||||||
Provides unified caching functionality for the analyzer system.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
import hashlib
|
|
||||||
import pickle
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Optional, Dict, Union
|
|
||||||
from dataclasses import dataclass, asdict
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class CacheEntry:
|
|
||||||
"""Cache entry with metadata."""
|
|
||||||
value: Any
|
|
||||||
timestamp: float
|
|
||||||
ttl: Optional[float] = None
|
|
||||||
key_hash: Optional[str] = None
|
|
||||||
|
|
||||||
def is_expired(self) -> bool:
|
|
||||||
"""Check if cache entry is expired."""
|
|
||||||
if self.ttl is None:
|
|
||||||
return False
|
|
||||||
return time.time() - self.timestamp > self.ttl
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict:
|
|
||||||
"""Convert to dictionary for JSON serialization."""
|
|
||||||
return {
|
|
||||||
'value': self.value,
|
|
||||||
'timestamp': self.timestamp,
|
|
||||||
'ttl': self.ttl,
|
|
||||||
'key_hash': self.key_hash
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict) -> 'CacheEntry':
|
|
||||||
"""Create from dictionary."""
|
|
||||||
return cls(**data)
|
|
||||||
|
|
||||||
|
|
||||||
class CacheManager:
|
|
||||||
"""Unified cache manager with multiple storage backends."""
|
|
||||||
|
|
||||||
def __init__(self, cache_dir: str = "cache", default_ttl: int = 3600):
|
|
||||||
self.cache_dir = Path(cache_dir)
|
|
||||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
self.default_ttl = default_ttl
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# In-memory cache for fast access
|
|
||||||
self._memory_cache: Dict[str, CacheEntry] = {}
|
|
||||||
|
|
||||||
# Cache subdirectories
|
|
||||||
self.json_cache_dir = self.cache_dir / "json"
|
|
||||||
self.pickle_cache_dir = self.cache_dir / "pickle"
|
|
||||||
self.temp_cache_dir = self.cache_dir / "temp"
|
|
||||||
|
|
||||||
for cache_subdir in [self.json_cache_dir, self.pickle_cache_dir, self.temp_cache_dir]:
|
|
||||||
cache_subdir.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
def _generate_key_hash(self, key: str) -> str:
|
|
||||||
"""Generate a hash for the cache key."""
|
|
||||||
return hashlib.md5(key.encode('utf-8')).hexdigest()
|
|
||||||
|
|
||||||
def _get_cache_path(self, key: str, cache_type: str = "json") -> Path:
|
|
||||||
"""Get cache file path for a key."""
|
|
||||||
key_hash = self._generate_key_hash(key)
|
|
||||||
|
|
||||||
if cache_type == "json":
|
|
||||||
return self.json_cache_dir / f"{key_hash}.json"
|
|
||||||
elif cache_type == "pickle":
|
|
||||||
return self.pickle_cache_dir / f"{key_hash}.pkl"
|
|
||||||
elif cache_type == "temp":
|
|
||||||
return self.temp_cache_dir / f"{key_hash}.tmp"
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported cache type: {cache_type}")
|
|
||||||
|
|
||||||
def set(self, key: str, value: Any, ttl: Optional[int] = None,
|
|
||||||
storage: str = "memory") -> bool:
|
|
||||||
"""Set a cache value."""
|
|
||||||
if ttl is None:
|
|
||||||
ttl = self.default_ttl
|
|
||||||
|
|
||||||
entry = CacheEntry(
|
|
||||||
value=value,
|
|
||||||
timestamp=time.time(),
|
|
||||||
ttl=ttl,
|
|
||||||
key_hash=self._generate_key_hash(key)
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if storage == "memory":
|
|
||||||
self._memory_cache[key] = entry
|
|
||||||
return True
|
|
||||||
|
|
||||||
elif storage == "json":
|
|
||||||
cache_path = self._get_cache_path(key, "json")
|
|
||||||
with open(cache_path, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(entry.to_dict(), f, indent=2, default=str)
|
|
||||||
return True
|
|
||||||
|
|
||||||
elif storage == "pickle":
|
|
||||||
cache_path = self._get_cache_path(key, "pickle")
|
|
||||||
with open(cache_path, 'wb') as f:
|
|
||||||
pickle.dump(entry, f)
|
|
||||||
return True
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Unsupported storage type: {storage}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to set cache for key '{key}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get(self, key: str, storage: str = "memory",
|
|
||||||
default: Any = None) -> Any:
|
|
||||||
"""Get a cache value."""
|
|
||||||
try:
|
|
||||||
entry = None
|
|
||||||
|
|
||||||
if storage == "memory":
|
|
||||||
entry = self._memory_cache.get(key)
|
|
||||||
|
|
||||||
elif storage == "json":
|
|
||||||
cache_path = self._get_cache_path(key, "json")
|
|
||||||
if cache_path.exists():
|
|
||||||
with open(cache_path, 'r', encoding='utf-8') as f:
|
|
||||||
data = json.load(f)
|
|
||||||
entry = CacheEntry.from_dict(data)
|
|
||||||
|
|
||||||
elif storage == "pickle":
|
|
||||||
cache_path = self._get_cache_path(key, "pickle")
|
|
||||||
if cache_path.exists():
|
|
||||||
with open(cache_path, 'rb') as f:
|
|
||||||
entry = pickle.load(f)
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Unsupported storage type: {storage}")
|
|
||||||
return default
|
|
||||||
|
|
||||||
if entry is None:
|
|
||||||
return default
|
|
||||||
|
|
||||||
# Check if entry is expired
|
|
||||||
if entry.is_expired():
|
|
||||||
self.delete(key, storage)
|
|
||||||
return default
|
|
||||||
|
|
||||||
return entry.value
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get cache for key '{key}': {e}")
|
|
||||||
return default
|
|
||||||
|
|
||||||
def delete(self, key: str, storage: str = "memory") -> bool:
|
|
||||||
"""Delete a cache entry."""
|
|
||||||
try:
|
|
||||||
if storage == "memory":
|
|
||||||
if key in self._memory_cache:
|
|
||||||
del self._memory_cache[key]
|
|
||||||
return True
|
|
||||||
|
|
||||||
elif storage in ["json", "pickle", "temp"]:
|
|
||||||
cache_path = self._get_cache_path(key, storage)
|
|
||||||
if cache_path.exists():
|
|
||||||
cache_path.unlink()
|
|
||||||
return True
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Unsupported storage type: {storage}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to delete cache for key '{key}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def exists(self, key: str, storage: str = "memory") -> bool:
|
|
||||||
"""Check if a cache entry exists and is not expired."""
|
|
||||||
return self.get(key, storage) is not None
|
|
||||||
|
|
||||||
def clear(self, storage: Optional[str] = None) -> bool:
|
|
||||||
"""Clear cache entries."""
|
|
||||||
try:
|
|
||||||
if storage is None or storage == "memory":
|
|
||||||
self._memory_cache.clear()
|
|
||||||
|
|
||||||
if storage is None or storage == "json":
|
|
||||||
for cache_file in self.json_cache_dir.glob("*.json"):
|
|
||||||
cache_file.unlink()
|
|
||||||
|
|
||||||
if storage is None or storage == "pickle":
|
|
||||||
for cache_file in self.pickle_cache_dir.glob("*.pkl"):
|
|
||||||
cache_file.unlink()
|
|
||||||
|
|
||||||
if storage is None or storage == "temp":
|
|
||||||
for cache_file in self.temp_cache_dir.glob("*.tmp"):
|
|
||||||
cache_file.unlink()
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to clear cache: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def cleanup_expired(self) -> int:
|
|
||||||
"""Clean up expired cache entries."""
|
|
||||||
cleaned_count = 0
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Clean memory cache
|
|
||||||
expired_keys = []
|
|
||||||
for key, entry in self._memory_cache.items():
|
|
||||||
if entry.is_expired():
|
|
||||||
expired_keys.append(key)
|
|
||||||
|
|
||||||
for key in expired_keys:
|
|
||||||
del self._memory_cache[key]
|
|
||||||
cleaned_count += 1
|
|
||||||
|
|
||||||
# Clean file caches
|
|
||||||
for cache_type in ["json", "pickle"]:
|
|
||||||
cache_dir = self.json_cache_dir if cache_type == "json" else self.pickle_cache_dir
|
|
||||||
extension = f".{cache_type}" if cache_type == "json" else ".pkl"
|
|
||||||
|
|
||||||
for cache_file in cache_dir.glob(f"*{extension}"):
|
|
||||||
try:
|
|
||||||
if cache_type == "json":
|
|
||||||
with open(cache_file, 'r', encoding='utf-8') as f:
|
|
||||||
data = json.load(f)
|
|
||||||
entry = CacheEntry.from_dict(data)
|
|
||||||
else:
|
|
||||||
with open(cache_file, 'rb') as f:
|
|
||||||
entry = pickle.load(f)
|
|
||||||
|
|
||||||
if entry.is_expired():
|
|
||||||
cache_file.unlink()
|
|
||||||
cleaned_count += 1
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
# If we can't read the cache file, delete it
|
|
||||||
cache_file.unlink()
|
|
||||||
cleaned_count += 1
|
|
||||||
|
|
||||||
self.logger.info(f"Cleaned up {cleaned_count} expired cache entries")
|
|
||||||
return cleaned_count
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to cleanup expired cache entries: {e}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def get_stats(self) -> Dict[str, Any]:
|
|
||||||
"""Get cache statistics."""
|
|
||||||
stats = {
|
|
||||||
'memory_entries': len(self._memory_cache),
|
|
||||||
'json_files': len(list(self.json_cache_dir.glob("*.json"))),
|
|
||||||
'pickle_files': len(list(self.pickle_cache_dir.glob("*.pkl"))),
|
|
||||||
'temp_files': len(list(self.temp_cache_dir.glob("*.tmp"))),
|
|
||||||
'cache_dir_size': 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Calculate total cache directory size
|
|
||||||
try:
|
|
||||||
for cache_file in self.cache_dir.rglob("*"):
|
|
||||||
if cache_file.is_file():
|
|
||||||
stats['cache_dir_size'] += cache_file.stat().st_size
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
def set_file_cache(self, key: str, file_path: Union[str, Path],
|
|
||||||
ttl: Optional[int] = None) -> bool:
|
|
||||||
"""Cache a file by copying it to the cache directory."""
|
|
||||||
try:
|
|
||||||
source_path = Path(file_path)
|
|
||||||
if not source_path.exists():
|
|
||||||
return False
|
|
||||||
|
|
||||||
cache_path = self.temp_cache_dir / f"{self._generate_key_hash(key)}.cached"
|
|
||||||
|
|
||||||
# Copy file to cache
|
|
||||||
import shutil
|
|
||||||
shutil.copy2(source_path, cache_path)
|
|
||||||
|
|
||||||
# Store metadata
|
|
||||||
metadata = {
|
|
||||||
'original_path': str(source_path),
|
|
||||||
'cached_path': str(cache_path),
|
|
||||||
'size': source_path.stat().st_size,
|
|
||||||
'timestamp': time.time(),
|
|
||||||
'ttl': ttl or self.default_ttl
|
|
||||||
}
|
|
||||||
|
|
||||||
return self.set(f"{key}_metadata", metadata, ttl, "json")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to cache file '{file_path}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get_file_cache(self, key: str) -> Optional[Path]:
|
|
||||||
"""Get cached file path."""
|
|
||||||
metadata = self.get(f"{key}_metadata", "json")
|
|
||||||
if metadata is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
cached_path = Path(metadata['cached_path'])
|
|
||||||
if not cached_path.exists():
|
|
||||||
# Cache file missing, clean up metadata
|
|
||||||
self.delete(f"{key}_metadata", "json")
|
|
||||||
return None
|
|
||||||
|
|
||||||
return cached_path
|
|
||||||
|
|
||||||
|
|
||||||
# Global cache manager instance
|
|
||||||
_global_cache = None
|
|
||||||
|
|
||||||
|
|
||||||
def get_cache_manager(cache_dir: str = "cache", default_ttl: int = 3600) -> CacheManager:
|
|
||||||
"""Get global cache manager instance."""
|
|
||||||
global _global_cache
|
|
||||||
if _global_cache is None:
|
|
||||||
_global_cache = CacheManager(cache_dir, default_ttl)
|
|
||||||
return _global_cache
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Test cache functionality
|
|
||||||
cache = CacheManager("test_cache")
|
|
||||||
|
|
||||||
# Test memory cache
|
|
||||||
cache.set("test_key", {"data": "test_value"}, ttl=60)
|
|
||||||
print(f"Memory cache: {cache.get('test_key')}")
|
|
||||||
|
|
||||||
# Test JSON cache
|
|
||||||
cache.set("json_key", {"complex": {"data": [1, 2, 3]}}, ttl=60, storage="json")
|
|
||||||
print(f"JSON cache: {cache.get('json_key', storage='json')}")
|
|
||||||
|
|
||||||
# Test stats
|
|
||||||
print(f"Cache stats: {cache.get_stats()}")
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
cache.clear()
|
|
||||||
@@ -1,248 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Terminal Colors Utility
|
|
||||||
Provides ANSI color codes for terminal output formatting.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
class Colors:
|
|
||||||
"""ANSI color codes for terminal output."""
|
|
||||||
|
|
||||||
# Basic colors
|
|
||||||
RED = '\033[0;31m'
|
|
||||||
GREEN = '\033[0;32m'
|
|
||||||
YELLOW = '\033[1;33m'
|
|
||||||
BLUE = '\033[0;34m'
|
|
||||||
PURPLE = '\033[0;35m'
|
|
||||||
CYAN = '\033[0;36m'
|
|
||||||
WHITE = '\033[0;37m'
|
|
||||||
BLACK = '\033[0;30m'
|
|
||||||
|
|
||||||
# Bright colors
|
|
||||||
BRIGHT_RED = '\033[1;31m'
|
|
||||||
BRIGHT_GREEN = '\033[1;32m'
|
|
||||||
BRIGHT_YELLOW = '\033[1;33m'
|
|
||||||
BRIGHT_BLUE = '\033[1;34m'
|
|
||||||
BRIGHT_PURPLE = '\033[1;35m'
|
|
||||||
BRIGHT_CYAN = '\033[1;36m'
|
|
||||||
BRIGHT_WHITE = '\033[1;37m'
|
|
||||||
|
|
||||||
# Background colors
|
|
||||||
BG_RED = '\033[41m'
|
|
||||||
BG_GREEN = '\033[42m'
|
|
||||||
BG_YELLOW = '\033[43m'
|
|
||||||
BG_BLUE = '\033[44m'
|
|
||||||
BG_PURPLE = '\033[45m'
|
|
||||||
BG_CYAN = '\033[46m'
|
|
||||||
BG_WHITE = '\033[47m'
|
|
||||||
|
|
||||||
# Text formatting
|
|
||||||
BOLD = '\033[1m'
|
|
||||||
DIM = '\033[2m'
|
|
||||||
UNDERLINE = '\033[4m'
|
|
||||||
BLINK = '\033[5m'
|
|
||||||
REVERSE = '\033[7m'
|
|
||||||
STRIKETHROUGH = '\033[9m'
|
|
||||||
|
|
||||||
# Reset
|
|
||||||
NC = '\033[0m' # No Color / Reset
|
|
||||||
RESET = '\033[0m'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def is_tty(cls) -> bool:
|
|
||||||
"""Check if output is a TTY (supports colors)."""
|
|
||||||
return hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def supports_color(cls) -> bool:
|
|
||||||
"""Check if the terminal supports color output."""
|
|
||||||
# Check environment variables
|
|
||||||
if os.getenv('NO_COLOR'):
|
|
||||||
return False
|
|
||||||
|
|
||||||
if os.getenv('FORCE_COLOR'):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check if output is a TTY
|
|
||||||
if not cls.is_tty():
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check TERM environment variable
|
|
||||||
term = os.getenv('TERM', '').lower()
|
|
||||||
if 'color' in term or term in ('xterm', 'xterm-256color', 'screen', 'tmux'):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Windows Terminal detection
|
|
||||||
if os.name == 'nt':
|
|
||||||
# Windows 10 version 1511 and later support ANSI colors
|
|
||||||
try:
|
|
||||||
import subprocess
|
|
||||||
result = subprocess.run(['ver'], capture_output=True, text=True, shell=True)
|
|
||||||
if result.returncode == 0:
|
|
||||||
version_info = result.stdout
|
|
||||||
# Extract Windows version (simplified check)
|
|
||||||
if 'Windows' in version_info:
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def colorize(cls, text: str, color: str, bold: bool = False) -> str:
|
|
||||||
"""Apply color to text if colors are supported."""
|
|
||||||
if not cls.supports_color():
|
|
||||||
return text
|
|
||||||
|
|
||||||
prefix = color
|
|
||||||
if bold:
|
|
||||||
prefix = cls.BOLD + prefix
|
|
||||||
|
|
||||||
return f"{prefix}{text}{cls.RESET}"
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def red(cls, text: str, bold: bool = False) -> str:
|
|
||||||
"""Color text red."""
|
|
||||||
return cls.colorize(text, cls.RED, bold)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def green(cls, text: str, bold: bool = False) -> str:
|
|
||||||
"""Color text green."""
|
|
||||||
return cls.colorize(text, cls.GREEN, bold)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def yellow(cls, text: str, bold: bool = False) -> str:
|
|
||||||
"""Color text yellow."""
|
|
||||||
return cls.colorize(text, cls.YELLOW, bold)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def blue(cls, text: str, bold: bool = False) -> str:
|
|
||||||
"""Color text blue."""
|
|
||||||
return cls.colorize(text, cls.BLUE, bold)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def purple(cls, text: str, bold: bool = False) -> str:
|
|
||||||
"""Color text purple."""
|
|
||||||
return cls.colorize(text, cls.PURPLE, bold)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def cyan(cls, text: str, bold: bool = False) -> str:
|
|
||||||
"""Color text cyan."""
|
|
||||||
return cls.colorize(text, cls.CYAN, bold)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def bold(cls, text: str) -> str:
|
|
||||||
"""Make text bold."""
|
|
||||||
return cls.colorize(text, '', True)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def dim(cls, text: str) -> str:
|
|
||||||
"""Make text dim."""
|
|
||||||
return cls.colorize(text, cls.DIM)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def underline(cls, text: str) -> str:
|
|
||||||
"""Underline text."""
|
|
||||||
return cls.colorize(text, cls.UNDERLINE)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def success(cls, text: str) -> str:
|
|
||||||
"""Format success message (green)."""
|
|
||||||
return cls.green(f"[SUCCESS] {text}", bold=True)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def error(cls, text: str) -> str:
|
|
||||||
"""Format error message (red)."""
|
|
||||||
return cls.red(f"[ERROR] {text}", bold=True)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def warning(cls, text: str) -> str:
|
|
||||||
"""Format warning message (yellow)."""
|
|
||||||
return cls.yellow(f"[WARNING] {text}", bold=True)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def info(cls, text: str) -> str:
|
|
||||||
"""Format info message (blue)."""
|
|
||||||
return cls.blue(f"[INFO] {text}")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def highlight(cls, text: str) -> str:
|
|
||||||
"""Highlight text (cyan background)."""
|
|
||||||
if not cls.supports_color():
|
|
||||||
return f"[{text}]"
|
|
||||||
return f"{cls.BG_CYAN}{cls.BLACK}{text}{cls.RESET}"
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def strip_colors(cls, text: str) -> str:
|
|
||||||
"""Remove ANSI color codes from text."""
|
|
||||||
import re
|
|
||||||
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
|
||||||
return ansi_escape.sub('', text)
|
|
||||||
|
|
||||||
|
|
||||||
# Convenience functions for common usage
|
|
||||||
def colorize(text: str, color: str) -> str:
|
|
||||||
"""Convenience function to colorize text."""
|
|
||||||
return Colors.colorize(text, color)
|
|
||||||
|
|
||||||
|
|
||||||
def red(text: str) -> str:
|
|
||||||
"""Red text."""
|
|
||||||
return Colors.red(text)
|
|
||||||
|
|
||||||
|
|
||||||
def green(text: str) -> str:
|
|
||||||
"""Green text."""
|
|
||||||
return Colors.green(text)
|
|
||||||
|
|
||||||
|
|
||||||
def yellow(text: str) -> str:
|
|
||||||
"""Yellow text."""
|
|
||||||
return Colors.yellow(text)
|
|
||||||
|
|
||||||
|
|
||||||
def blue(text: str) -> str:
|
|
||||||
"""Blue text."""
|
|
||||||
return Colors.blue(text)
|
|
||||||
|
|
||||||
|
|
||||||
def success(text: str) -> str:
|
|
||||||
"""Success message."""
|
|
||||||
return Colors.success(text)
|
|
||||||
|
|
||||||
|
|
||||||
def error(text: str) -> str:
|
|
||||||
"""Error message."""
|
|
||||||
return Colors.error(text)
|
|
||||||
|
|
||||||
|
|
||||||
def warning(text: str) -> str:
|
|
||||||
"""Warning message."""
|
|
||||||
return Colors.warning(text)
|
|
||||||
|
|
||||||
|
|
||||||
def info(text: str) -> str:
|
|
||||||
"""Info message."""
|
|
||||||
return Colors.info(text)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Test color output
|
|
||||||
print(Colors.red("Red text"))
|
|
||||||
print(Colors.green("Green text"))
|
|
||||||
print(Colors.yellow("Yellow text"))
|
|
||||||
print(Colors.blue("Blue text"))
|
|
||||||
print(Colors.purple("Purple text"))
|
|
||||||
print(Colors.cyan("Cyan text"))
|
|
||||||
print(Colors.bold("Bold text"))
|
|
||||||
print(Colors.success("Success message"))
|
|
||||||
print(Colors.error("Error message"))
|
|
||||||
print(Colors.warning("Warning message"))
|
|
||||||
print(Colors.info("Info message"))
|
|
||||||
print(Colors.highlight("Highlighted text"))
|
|
||||||
print(f"Color support: {Colors.supports_color()}")
|
|
||||||
print(f"TTY: {Colors.is_tty()}")
|
|
||||||
@@ -1,378 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
I/O Helper Functions
|
|
||||||
Provides common file and directory operations with error handling.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import yaml
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Optional, Union, List, Dict
|
|
||||||
import shutil
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
|
|
||||||
class IOHelpers:
|
|
||||||
"""Collection of I/O helper methods."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def ensure_directory(path: Union[str, Path], mode: int = 0o755) -> bool:
|
|
||||||
"""Ensure directory exists, create if necessary."""
|
|
||||||
try:
|
|
||||||
dir_path = Path(path)
|
|
||||||
dir_path.mkdir(parents=True, exist_ok=True, mode=mode)
|
|
||||||
return True
|
|
||||||
except (PermissionError, OSError) as e:
|
|
||||||
logging.error(f"Failed to create directory '{path}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def safe_read_file(file_path: Union[str, Path], encoding: str = 'utf-8',
|
|
||||||
fallback_encoding: str = 'latin-1') -> Optional[str]:
|
|
||||||
"""Safely read file content with encoding fallback."""
|
|
||||||
path = Path(file_path)
|
|
||||||
if not path.exists():
|
|
||||||
return None
|
|
||||||
|
|
||||||
encodings = [encoding, fallback_encoding] if encoding != fallback_encoding else [encoding]
|
|
||||||
|
|
||||||
for enc in encodings:
|
|
||||||
try:
|
|
||||||
with open(path, 'r', encoding=enc) as f:
|
|
||||||
return f.read()
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
continue
|
|
||||||
except (IOError, OSError) as e:
|
|
||||||
logging.error(f"Failed to read file '{file_path}': {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
logging.warning(f"Failed to decode file '{file_path}' with any encoding")
|
|
||||||
return None
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def safe_write_file(file_path: Union[str, Path], content: str,
|
|
||||||
encoding: str = 'utf-8', backup: bool = False) -> bool:
|
|
||||||
"""Safely write content to file with optional backup."""
|
|
||||||
path = Path(file_path)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Create backup if requested and file exists
|
|
||||||
if backup and path.exists():
|
|
||||||
backup_path = path.with_suffix(path.suffix + '.bak')
|
|
||||||
shutil.copy2(path, backup_path)
|
|
||||||
|
|
||||||
# Ensure parent directory exists
|
|
||||||
if not IOHelpers.ensure_directory(path.parent):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Write to temporary file first, then move to final location
|
|
||||||
with tempfile.NamedTemporaryFile(mode='w', encoding=encoding,
|
|
||||||
dir=path.parent, delete=False) as tmp_file:
|
|
||||||
tmp_file.write(content)
|
|
||||||
tmp_path = Path(tmp_file.name)
|
|
||||||
|
|
||||||
# Atomic move
|
|
||||||
shutil.move(str(tmp_path), str(path))
|
|
||||||
return True
|
|
||||||
|
|
||||||
except (IOError, OSError) as e:
|
|
||||||
logging.error(f"Failed to write file '{file_path}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_json(file_path: Union[str, Path], default: Any = None) -> Any:
|
|
||||||
"""Read JSON file with error handling."""
|
|
||||||
content = IOHelpers.safe_read_file(file_path)
|
|
||||||
if content is None:
|
|
||||||
return default
|
|
||||||
|
|
||||||
try:
|
|
||||||
return json.loads(content)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logging.error(f"Failed to parse JSON from '{file_path}': {e}")
|
|
||||||
return default
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def write_json(file_path: Union[str, Path], data: Any,
|
|
||||||
indent: int = 2, backup: bool = False) -> bool:
|
|
||||||
"""Write data to JSON file."""
|
|
||||||
try:
|
|
||||||
content = json.dumps(data, indent=indent, ensure_ascii=False, default=str)
|
|
||||||
return IOHelpers.safe_write_file(file_path, content, backup=backup)
|
|
||||||
except (TypeError, ValueError) as e:
|
|
||||||
logging.error(f"Failed to serialize data to JSON for '{file_path}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_yaml(file_path: Union[str, Path], default: Any = None) -> Any:
|
|
||||||
"""Read YAML file with error handling."""
|
|
||||||
content = IOHelpers.safe_read_file(file_path)
|
|
||||||
if content is None:
|
|
||||||
return default
|
|
||||||
|
|
||||||
try:
|
|
||||||
return yaml.safe_load(content)
|
|
||||||
except yaml.YAMLError as e:
|
|
||||||
logging.error(f"Failed to parse YAML from '{file_path}': {e}")
|
|
||||||
return default
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def write_yaml(file_path: Union[str, Path], data: Any, backup: bool = False) -> bool:
|
|
||||||
"""Write data to YAML file."""
|
|
||||||
try:
|
|
||||||
content = yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
|
||||||
return IOHelpers.safe_write_file(file_path, content, backup=backup)
|
|
||||||
except yaml.YAMLError as e:
|
|
||||||
logging.error(f"Failed to serialize data to YAML for '{file_path}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def find_files(directory: Union[str, Path], pattern: str = "*",
|
|
||||||
recursive: bool = True, max_depth: Optional[int] = None) -> List[Path]:
|
|
||||||
"""Find files matching pattern in directory."""
|
|
||||||
dir_path = Path(directory)
|
|
||||||
if not dir_path.exists() or not dir_path.is_dir():
|
|
||||||
return []
|
|
||||||
|
|
||||||
files = []
|
|
||||||
try:
|
|
||||||
if recursive:
|
|
||||||
if max_depth is not None:
|
|
||||||
# Implement depth-limited search
|
|
||||||
def search_with_depth(path: Path, current_depth: int = 0):
|
|
||||||
if current_depth > max_depth:
|
|
||||||
return
|
|
||||||
|
|
||||||
for item in path.iterdir():
|
|
||||||
if item.is_file() and item.match(pattern):
|
|
||||||
files.append(item)
|
|
||||||
elif item.is_dir() and current_depth < max_depth:
|
|
||||||
search_with_depth(item, current_depth + 1)
|
|
||||||
|
|
||||||
search_with_depth(dir_path)
|
|
||||||
else:
|
|
||||||
files = list(dir_path.rglob(pattern))
|
|
||||||
else:
|
|
||||||
files = list(dir_path.glob(pattern))
|
|
||||||
|
|
||||||
return sorted(files)
|
|
||||||
|
|
||||||
except (PermissionError, OSError) as e:
|
|
||||||
logging.error(f"Failed to search files in '{directory}': {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_file_stats(file_path: Union[str, Path]) -> Optional[Dict[str, Any]]:
|
|
||||||
"""Get file statistics."""
|
|
||||||
path = Path(file_path)
|
|
||||||
if not path.exists():
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
stat = path.stat()
|
|
||||||
return {
|
|
||||||
'size': stat.st_size,
|
|
||||||
'modified_time': stat.st_mtime,
|
|
||||||
'created_time': stat.st_ctime,
|
|
||||||
'is_file': path.is_file(),
|
|
||||||
'is_dir': path.is_dir(),
|
|
||||||
'permissions': oct(stat.st_mode)[-3:],
|
|
||||||
'extension': path.suffix.lower(),
|
|
||||||
'name': path.name,
|
|
||||||
'parent': str(path.parent)
|
|
||||||
}
|
|
||||||
except (OSError, PermissionError) as e:
|
|
||||||
logging.error(f"Failed to get stats for '{file_path}': {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def copy_with_backup(source: Union[str, Path], dest: Union[str, Path]) -> bool:
|
|
||||||
"""Copy file with automatic backup if destination exists."""
|
|
||||||
source_path = Path(source)
|
|
||||||
dest_path = Path(dest)
|
|
||||||
|
|
||||||
if not source_path.exists():
|
|
||||||
logging.error(f"Source file '{source}' does not exist")
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Create backup if destination exists
|
|
||||||
if dest_path.exists():
|
|
||||||
backup_path = dest_path.with_suffix(dest_path.suffix + '.bak')
|
|
||||||
shutil.copy2(dest_path, backup_path)
|
|
||||||
logging.info(f"Created backup: {backup_path}")
|
|
||||||
|
|
||||||
# Ensure destination directory exists
|
|
||||||
if not IOHelpers.ensure_directory(dest_path.parent):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Copy file
|
|
||||||
shutil.copy2(source_path, dest_path)
|
|
||||||
return True
|
|
||||||
|
|
||||||
except (IOError, OSError) as e:
|
|
||||||
logging.error(f"Failed to copy '{source}' to '{dest}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def move_with_backup(source: Union[str, Path], dest: Union[str, Path]) -> bool:
|
|
||||||
"""Move file with automatic backup if destination exists."""
|
|
||||||
source_path = Path(source)
|
|
||||||
dest_path = Path(dest)
|
|
||||||
|
|
||||||
if not source_path.exists():
|
|
||||||
logging.error(f"Source file '{source}' does not exist")
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Create backup if destination exists
|
|
||||||
if dest_path.exists():
|
|
||||||
backup_path = dest_path.with_suffix(dest_path.suffix + '.bak')
|
|
||||||
shutil.move(str(dest_path), str(backup_path))
|
|
||||||
logging.info(f"Created backup: {backup_path}")
|
|
||||||
|
|
||||||
# Ensure destination directory exists
|
|
||||||
if not IOHelpers.ensure_directory(dest_path.parent):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Move file
|
|
||||||
shutil.move(str(source_path), str(dest_path))
|
|
||||||
return True
|
|
||||||
|
|
||||||
except (IOError, OSError) as e:
|
|
||||||
logging.error(f"Failed to move '{source}' to '{dest}': {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def clean_temp_files(directory: Union[str, Path], extensions: List[str] = None,
|
|
||||||
max_age_hours: int = 24) -> int:
|
|
||||||
"""Clean temporary files older than specified age."""
|
|
||||||
if extensions is None:
|
|
||||||
extensions = ['.tmp', '.temp', '.bak', '.swp', '.~']
|
|
||||||
|
|
||||||
dir_path = Path(directory)
|
|
||||||
if not dir_path.exists():
|
|
||||||
return 0
|
|
||||||
|
|
||||||
import time
|
|
||||||
cutoff_time = time.time() - (max_age_hours * 3600)
|
|
||||||
cleaned_count = 0
|
|
||||||
|
|
||||||
try:
|
|
||||||
for file_path in dir_path.rglob('*'):
|
|
||||||
if file_path.is_file():
|
|
||||||
# Check extension
|
|
||||||
if file_path.suffix.lower() in extensions:
|
|
||||||
# Check age
|
|
||||||
if file_path.stat().st_mtime < cutoff_time:
|
|
||||||
try:
|
|
||||||
file_path.unlink()
|
|
||||||
cleaned_count += 1
|
|
||||||
except OSError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
logging.info(f"Cleaned {cleaned_count} temporary files from '{directory}'")
|
|
||||||
return cleaned_count
|
|
||||||
|
|
||||||
except (PermissionError, OSError) as e:
|
|
||||||
logging.error(f"Failed to clean temp files in '{directory}': {e}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_directory_size(directory: Union[str, Path]) -> int:
|
|
||||||
"""Get total size of directory in bytes."""
|
|
||||||
dir_path = Path(directory)
|
|
||||||
if not dir_path.exists() or not dir_path.is_dir():
|
|
||||||
return 0
|
|
||||||
|
|
||||||
total_size = 0
|
|
||||||
try:
|
|
||||||
for file_path in dir_path.rglob('*'):
|
|
||||||
if file_path.is_file():
|
|
||||||
total_size += file_path.stat().st_size
|
|
||||||
except (PermissionError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
return total_size
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def make_executable(file_path: Union[str, Path]) -> bool:
|
|
||||||
"""Make file executable (Unix/Linux/Mac)."""
|
|
||||||
if os.name == 'nt': # Windows
|
|
||||||
return True # Windows doesn't use Unix permissions
|
|
||||||
|
|
||||||
try:
|
|
||||||
path = Path(file_path)
|
|
||||||
current_mode = path.stat().st_mode
|
|
||||||
path.chmod(current_mode | 0o111) # Add execute permission
|
|
||||||
return True
|
|
||||||
except (OSError, PermissionError) as e:
|
|
||||||
logging.error(f"Failed to make '{file_path}' executable: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# Convenience functions
|
|
||||||
def ensure_directory(path: Union[str, Path]) -> bool:
|
|
||||||
"""Ensure directory exists."""
|
|
||||||
return IOHelpers.ensure_directory(path)
|
|
||||||
|
|
||||||
|
|
||||||
def safe_read_file(file_path: Union[str, Path]) -> Optional[str]:
|
|
||||||
"""Safely read file content."""
|
|
||||||
return IOHelpers.safe_read_file(file_path)
|
|
||||||
|
|
||||||
|
|
||||||
def safe_write_file(file_path: Union[str, Path], content: str) -> bool:
|
|
||||||
"""Safely write content to file."""
|
|
||||||
return IOHelpers.safe_write_file(file_path, content)
|
|
||||||
|
|
||||||
|
|
||||||
def read_json(file_path: Union[str, Path], default: Any = None) -> Any:
|
|
||||||
"""Read JSON file."""
|
|
||||||
return IOHelpers.read_json(file_path, default)
|
|
||||||
|
|
||||||
|
|
||||||
def write_json(file_path: Union[str, Path], data: Any) -> bool:
|
|
||||||
"""Write data to JSON file."""
|
|
||||||
return IOHelpers.write_json(file_path, data)
|
|
||||||
|
|
||||||
|
|
||||||
def read_yaml(file_path: Union[str, Path], default: Any = None) -> Any:
|
|
||||||
"""Read YAML file."""
|
|
||||||
return IOHelpers.read_yaml(file_path, default)
|
|
||||||
|
|
||||||
|
|
||||||
def write_yaml(file_path: Union[str, Path], data: Any) -> bool:
|
|
||||||
"""Write data to YAML file."""
|
|
||||||
return IOHelpers.write_yaml(file_path, data)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Test I/O operations
|
|
||||||
test_dir = Path("test_io")
|
|
||||||
|
|
||||||
# Test directory creation
|
|
||||||
print(f"Create directory: {ensure_directory(test_dir)}")
|
|
||||||
|
|
||||||
# Test file operations
|
|
||||||
test_file = test_dir / "test.txt"
|
|
||||||
content = "Hello, World!\nThis is a test file."
|
|
||||||
|
|
||||||
print(f"Write file: {safe_write_file(test_file, content)}")
|
|
||||||
print(f"Read file: {safe_read_file(test_file)}")
|
|
||||||
|
|
||||||
# Test JSON operations
|
|
||||||
json_file = test_dir / "test.json"
|
|
||||||
json_data = {"name": "test", "numbers": [1, 2, 3], "nested": {"key": "value"}}
|
|
||||||
|
|
||||||
print(f"Write JSON: {write_json(json_file, json_data)}")
|
|
||||||
print(f"Read JSON: {read_json(json_file)}")
|
|
||||||
|
|
||||||
# Test file stats
|
|
||||||
stats = IOHelpers.get_file_stats(test_file)
|
|
||||||
print(f"File stats: {stats}")
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
shutil.rmtree(test_dir, ignore_errors=True)
|
|
||||||
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -1,2 +0,0 @@
|
|||||||
{
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user