Files
Claude-Code-Workflow/.claude/python_script/indexer.py
catlog22 fc6e851230 refactor: Update workflow plan system and template organization
- Remove --analyze|--deep parameters from plan.md, use default analysis
- Change .analysis to .process directory structure for better organization
- Create ANALYSIS_RESULTS.md template focused on verified results
- Add .process folder to workflow-architecture.md file structure
- Template emphasizes verification of files, methods, and commands
- Prevent execution errors from non-existent references

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-18 16:26:50 +08:00

204 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""
File Structure Indexer
Builds and maintains file indices for intelligent analysis.
"""
import sys
import argparse
import logging
import json
import time
from pathlib import Path
from typing import Dict, List, Optional, Any
# Add current directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from core.config import get_config
from core.file_indexer import FileIndexer, IndexStats
from core.embedding_manager import EmbeddingManager
from utils.colors import Colors
class ProjectIndexer:
"""Manages file indexing and project statistics."""
def __init__(self, config_path: Optional[str] = None, root_path: str = "."):
self.root_path = Path(root_path).resolve()
self.config = get_config(config_path)
# Setup logging
logging.basicConfig(
level=getattr(logging, self.config.get('logging.level', 'INFO')),
format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
)
self.logger = logging.getLogger(__name__)
# Initialize core components
self.indexer = FileIndexer(self.config, str(self.root_path))
# Initialize embedding manager if enabled
self.embedding_manager = None
if self.config.is_embedding_enabled():
try:
self.embedding_manager = EmbeddingManager(self.config)
except ImportError:
self.logger.warning("Embedding dependencies not available. Install sentence-transformers for enhanced functionality.")
def build_index(self) -> IndexStats:
"""Build or update the file index."""
print(Colors.yellow("Building file index..."))
start_time = time.time()
self.indexer.build_index()
stats = self.indexer.get_stats()
elapsed = time.time() - start_time
if stats:
print(Colors.green(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens ({elapsed:.2f}s)"))
else:
print(Colors.green(f"Index built successfully ({elapsed:.2f}s)"))
return stats
def update_embeddings(self) -> bool:
"""Update embeddings for semantic similarity."""
if not self.embedding_manager:
print(Colors.error("Embedding functionality not available"))
return False
print(Colors.yellow("Updating embeddings..."))
start_time = time.time()
# Load file index
index = self.indexer.load_index()
if not index:
print(Colors.warning("No file index found. Building index first..."))
self.build_index()
index = self.indexer.load_index()
try:
count = self.embedding_manager.update_embeddings(index)
elapsed = time.time() - start_time
print(Colors.green(f"Updated {count} embeddings ({elapsed:.2f}s)"))
return True
except Exception as e:
print(Colors.error(f"Failed to update embeddings: {e}"))
return False
def get_project_stats(self) -> Dict[str, Any]:
"""Get comprehensive project statistics."""
stats = self.indexer.get_stats()
embedding_stats = {}
if self.embedding_manager:
embedding_stats = {
'embeddings_exist': self.embedding_manager.embeddings_exist(),
'embedding_count': len(self.embedding_manager._load_embedding_cache()) if self.embedding_manager.embeddings_exist() else 0
}
project_size = self._classify_project_size(stats.total_tokens if stats else 0)
return {
'files': stats.total_files if stats else 0,
'tokens': stats.total_tokens if stats else 0,
'size_bytes': stats.total_size if stats else 0,
'categories': stats.categories if stats else {},
'project_size': project_size,
'last_updated': stats.last_updated if stats else 0,
'embeddings': embedding_stats,
'config': {
'cache_dir': self.config.get_cache_dir(),
'embedding_enabled': self.config.is_embedding_enabled(),
'exclude_patterns_count': len(self.config.get_exclude_patterns())
}
}
def _classify_project_size(self, tokens: int) -> str:
"""Classify project size based on token count."""
small_limit = self.config.get('token_limits.small_project', 500000)
medium_limit = self.config.get('token_limits.medium_project', 2000000)
if tokens < small_limit:
return "small"
elif tokens < medium_limit:
return "medium"
else:
return "large"
def cleanup_cache(self):
"""Clean up old cache files."""
cache_dir = Path(self.config.get_cache_dir())
if cache_dir.exists():
print(Colors.yellow("Cleaning up cache..."))
for file in cache_dir.glob("*"):
if file.is_file():
file.unlink()
print(f"Removed: {file}")
print(Colors.green("Cache cleaned"))
def main():
"""CLI entry point for indexer."""
parser = argparse.ArgumentParser(
description="Project File Indexer - Build and manage file indices",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python indexer.py --build # Build file index
python indexer.py --stats # Show project statistics
python indexer.py --embeddings # Update embeddings
python indexer.py --cleanup # Clean cache
"""
)
parser.add_argument('--build', action='store_true', help='Build file index')
parser.add_argument('--stats', action='store_true', help='Show project statistics')
parser.add_argument('--embeddings', action='store_true', help='Update embeddings')
parser.add_argument('--cleanup', action='store_true', help='Clean up cache files')
parser.add_argument('--output', choices=['json', 'text'], default='text', help='Output format')
parser.add_argument('--config', help='Configuration file path')
parser.add_argument('--root', default='.', help='Root directory to analyze')
args = parser.parse_args()
# Require at least one action
if not any([args.build, args.stats, args.embeddings, args.cleanup]):
parser.error("At least one action is required: --build, --stats, --embeddings, or --cleanup")
# Create indexer
indexer = ProjectIndexer(args.config, args.root)
try:
if args.cleanup:
indexer.cleanup_cache()
if args.build:
indexer.build_index()
if args.embeddings:
indexer.update_embeddings()
if args.stats:
stats = indexer.get_project_stats()
if args.output == 'json':
print(json.dumps(stats, indent=2, default=str))
else:
print(f"Total files: {stats['files']}")
print(f"Total tokens: {stats['tokens']:,}")
print(f"Project size: {stats['project_size']}")
print(f"Categories: {stats['categories']}")
if 'embeddings' in stats:
print(f"Embeddings: {stats['embeddings']['embedding_count']}")
except KeyboardInterrupt:
print(Colors.warning("\nOperation interrupted by user"))
sys.exit(1)
except Exception as e:
print(Colors.error(f"Operation failed: {e}"))
sys.exit(1)
if __name__ == "__main__":
main()