mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-06 01:54:11 +08:00
- Remove --analyze|--deep parameters from plan.md, use default analysis - Change .analysis to .process directory structure for better organization - Create ANALYSIS_RESULTS.md template focused on verified results - Add .process folder to workflow-architecture.md file structure - Template emphasizes verification of files, methods, and commands - Prevent execution errors from non-existent references 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
204 lines
7.4 KiB
Python
204 lines
7.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
File Structure Indexer
|
|
Builds and maintains file indices for intelligent analysis.
|
|
"""
|
|
|
|
import sys
|
|
import argparse
|
|
import logging
|
|
import json
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Any
|
|
|
|
# Add current directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from core.config import get_config
|
|
from core.file_indexer import FileIndexer, IndexStats
|
|
from core.embedding_manager import EmbeddingManager
|
|
from utils.colors import Colors
|
|
|
|
|
|
class ProjectIndexer:
|
|
"""Manages file indexing and project statistics."""
|
|
|
|
def __init__(self, config_path: Optional[str] = None, root_path: str = "."):
|
|
self.root_path = Path(root_path).resolve()
|
|
self.config = get_config(config_path)
|
|
|
|
# Setup logging
|
|
logging.basicConfig(
|
|
level=getattr(logging, self.config.get('logging.level', 'INFO')),
|
|
format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
)
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
# Initialize core components
|
|
self.indexer = FileIndexer(self.config, str(self.root_path))
|
|
|
|
# Initialize embedding manager if enabled
|
|
self.embedding_manager = None
|
|
if self.config.is_embedding_enabled():
|
|
try:
|
|
self.embedding_manager = EmbeddingManager(self.config)
|
|
except ImportError:
|
|
self.logger.warning("Embedding dependencies not available. Install sentence-transformers for enhanced functionality.")
|
|
|
|
def build_index(self) -> IndexStats:
|
|
"""Build or update the file index."""
|
|
print(Colors.yellow("Building file index..."))
|
|
start_time = time.time()
|
|
|
|
self.indexer.build_index()
|
|
stats = self.indexer.get_stats()
|
|
|
|
elapsed = time.time() - start_time
|
|
if stats:
|
|
print(Colors.green(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens ({elapsed:.2f}s)"))
|
|
else:
|
|
print(Colors.green(f"Index built successfully ({elapsed:.2f}s)"))
|
|
|
|
return stats
|
|
|
|
def update_embeddings(self) -> bool:
|
|
"""Update embeddings for semantic similarity."""
|
|
if not self.embedding_manager:
|
|
print(Colors.error("Embedding functionality not available"))
|
|
return False
|
|
|
|
print(Colors.yellow("Updating embeddings..."))
|
|
start_time = time.time()
|
|
|
|
# Load file index
|
|
index = self.indexer.load_index()
|
|
if not index:
|
|
print(Colors.warning("No file index found. Building index first..."))
|
|
self.build_index()
|
|
index = self.indexer.load_index()
|
|
|
|
try:
|
|
count = self.embedding_manager.update_embeddings(index)
|
|
elapsed = time.time() - start_time
|
|
print(Colors.green(f"Updated {count} embeddings ({elapsed:.2f}s)"))
|
|
return True
|
|
except Exception as e:
|
|
print(Colors.error(f"Failed to update embeddings: {e}"))
|
|
return False
|
|
|
|
def get_project_stats(self) -> Dict[str, Any]:
|
|
"""Get comprehensive project statistics."""
|
|
stats = self.indexer.get_stats()
|
|
embedding_stats = {}
|
|
|
|
if self.embedding_manager:
|
|
embedding_stats = {
|
|
'embeddings_exist': self.embedding_manager.embeddings_exist(),
|
|
'embedding_count': len(self.embedding_manager._load_embedding_cache()) if self.embedding_manager.embeddings_exist() else 0
|
|
}
|
|
|
|
project_size = self._classify_project_size(stats.total_tokens if stats else 0)
|
|
|
|
return {
|
|
'files': stats.total_files if stats else 0,
|
|
'tokens': stats.total_tokens if stats else 0,
|
|
'size_bytes': stats.total_size if stats else 0,
|
|
'categories': stats.categories if stats else {},
|
|
'project_size': project_size,
|
|
'last_updated': stats.last_updated if stats else 0,
|
|
'embeddings': embedding_stats,
|
|
'config': {
|
|
'cache_dir': self.config.get_cache_dir(),
|
|
'embedding_enabled': self.config.is_embedding_enabled(),
|
|
'exclude_patterns_count': len(self.config.get_exclude_patterns())
|
|
}
|
|
}
|
|
|
|
def _classify_project_size(self, tokens: int) -> str:
|
|
"""Classify project size based on token count."""
|
|
small_limit = self.config.get('token_limits.small_project', 500000)
|
|
medium_limit = self.config.get('token_limits.medium_project', 2000000)
|
|
|
|
if tokens < small_limit:
|
|
return "small"
|
|
elif tokens < medium_limit:
|
|
return "medium"
|
|
else:
|
|
return "large"
|
|
|
|
def cleanup_cache(self):
|
|
"""Clean up old cache files."""
|
|
cache_dir = Path(self.config.get_cache_dir())
|
|
if cache_dir.exists():
|
|
print(Colors.yellow("Cleaning up cache..."))
|
|
for file in cache_dir.glob("*"):
|
|
if file.is_file():
|
|
file.unlink()
|
|
print(f"Removed: {file}")
|
|
print(Colors.green("Cache cleaned"))
|
|
|
|
|
|
def main():
|
|
"""CLI entry point for indexer."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Project File Indexer - Build and manage file indices",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python indexer.py --build # Build file index
|
|
python indexer.py --stats # Show project statistics
|
|
python indexer.py --embeddings # Update embeddings
|
|
python indexer.py --cleanup # Clean cache
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('--build', action='store_true', help='Build file index')
|
|
parser.add_argument('--stats', action='store_true', help='Show project statistics')
|
|
parser.add_argument('--embeddings', action='store_true', help='Update embeddings')
|
|
parser.add_argument('--cleanup', action='store_true', help='Clean up cache files')
|
|
parser.add_argument('--output', choices=['json', 'text'], default='text', help='Output format')
|
|
parser.add_argument('--config', help='Configuration file path')
|
|
parser.add_argument('--root', default='.', help='Root directory to analyze')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Require at least one action
|
|
if not any([args.build, args.stats, args.embeddings, args.cleanup]):
|
|
parser.error("At least one action is required: --build, --stats, --embeddings, or --cleanup")
|
|
|
|
# Create indexer
|
|
indexer = ProjectIndexer(args.config, args.root)
|
|
|
|
try:
|
|
if args.cleanup:
|
|
indexer.cleanup_cache()
|
|
|
|
if args.build:
|
|
indexer.build_index()
|
|
|
|
if args.embeddings:
|
|
indexer.update_embeddings()
|
|
|
|
if args.stats:
|
|
stats = indexer.get_project_stats()
|
|
if args.output == 'json':
|
|
print(json.dumps(stats, indent=2, default=str))
|
|
else:
|
|
print(f"Total files: {stats['files']}")
|
|
print(f"Total tokens: {stats['tokens']:,}")
|
|
print(f"Project size: {stats['project_size']}")
|
|
print(f"Categories: {stats['categories']}")
|
|
if 'embeddings' in stats:
|
|
print(f"Embeddings: {stats['embeddings']['embedding_count']}")
|
|
|
|
except KeyboardInterrupt:
|
|
print(Colors.warning("\nOperation interrupted by user"))
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(Colors.error(f"Operation failed: {e}"))
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |