mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-06 01:54:11 +08:00
refactor: Update workflow plan system and template organization
- Remove --analyze|--deep parameters from plan.md, use default analysis - Change .analysis to .process directory structure for better organization - Create ANALYSIS_RESULTS.md template focused on verified results - Add .process folder to workflow-architecture.md file structure - Template emphasizes verification of files, methods, and commands - Prevent execution errors from non-existent references 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
35
.claude/python_script/__init__.py
Normal file
35
.claude/python_script/__init__.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""
|
||||
Refactored Python Script Analyzer
|
||||
Modular, reusable architecture for intelligent file analysis and workflow automation.
|
||||
"""
|
||||
|
||||
__version__ = "2.0.0"
|
||||
__author__ = "Claude Development Team"
|
||||
__email__ = "dev@example.com"
|
||||
|
||||
from .analyzer import Analyzer
|
||||
from .indexer import ProjectIndexer
|
||||
from .cli import AnalysisCLI
|
||||
from .core import (
|
||||
Config, FileIndexer, FileInfo, IndexStats,
|
||||
ContextAnalyzer, AnalysisResult,
|
||||
PathMatcher, MatchResult, PathMatchingResult,
|
||||
EmbeddingManager, GitignoreParser
|
||||
)
|
||||
from .tools import ModuleAnalyzer, ModuleInfo, TechStackLoader
|
||||
from .utils import Colors, CacheManager, IOHelpers
|
||||
|
||||
__all__ = [
|
||||
'Analyzer', 'ProjectIndexer', 'AnalysisCLI',
|
||||
# Core modules
|
||||
'Config',
|
||||
'FileIndexer', 'FileInfo', 'IndexStats',
|
||||
'ContextAnalyzer', 'AnalysisResult',
|
||||
'PathMatcher', 'MatchResult', 'PathMatchingResult',
|
||||
'EmbeddingManager', 'GitignoreParser',
|
||||
# Tools
|
||||
'ModuleAnalyzer', 'ModuleInfo',
|
||||
'TechStackLoader',
|
||||
# Utils
|
||||
'Colors', 'CacheManager', 'IOHelpers'
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
BIN
.claude/python_script/__pycache__/file_indexer.cpython-313.pyc
Normal file
BIN
.claude/python_script/__pycache__/file_indexer.cpython-313.pyc
Normal file
Binary file not shown.
BIN
.claude/python_script/__pycache__/path_matcher.cpython-313.pyc
Normal file
BIN
.claude/python_script/__pycache__/path_matcher.cpython-313.pyc
Normal file
Binary file not shown.
305
.claude/python_script/analyzer.py
Normal file
305
.claude/python_script/analyzer.py
Normal file
@@ -0,0 +1,305 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unified Path-Aware Analyzer
|
||||
Main entry point for the refactored analyzer system.
|
||||
Provides a clean, simple API for intelligent file analysis.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
# Add current directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from core.config import get_config
|
||||
from core.file_indexer import FileIndexer, IndexStats
|
||||
from core.context_analyzer import ContextAnalyzer, AnalysisResult
|
||||
from core.path_matcher import PathMatcher, PathMatchingResult
|
||||
from core.embedding_manager import EmbeddingManager
|
||||
from utils.colors import Colors
|
||||
|
||||
|
||||
class Analyzer:
|
||||
"""Main analyzer class with simplified API."""
|
||||
|
||||
def __init__(self, config_path: Optional[str] = None, root_path: str = "."):
|
||||
self.root_path = Path(root_path).resolve()
|
||||
self.config = get_config(config_path)
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, self.config.get('logging.level', 'INFO')),
|
||||
format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize core components
|
||||
self.indexer = FileIndexer(self.config, str(self.root_path))
|
||||
self.context_analyzer = ContextAnalyzer(self.config)
|
||||
self.path_matcher = PathMatcher(self.config)
|
||||
|
||||
# Initialize embedding manager if enabled
|
||||
self.embedding_manager = None
|
||||
if self.config.is_embedding_enabled():
|
||||
try:
|
||||
self.embedding_manager = EmbeddingManager(self.config)
|
||||
except ImportError:
|
||||
self.logger.warning("Embedding dependencies not available. Install sentence-transformers for enhanced functionality.")
|
||||
|
||||
def build_index(self) -> IndexStats:
|
||||
"""Build or update the file index."""
|
||||
print(Colors.yellow("Building file index..."))
|
||||
start_time = time.time()
|
||||
|
||||
self.indexer.build_index()
|
||||
stats = self.indexer.get_stats()
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
if stats:
|
||||
print(Colors.green(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens ({elapsed:.2f}s)"))
|
||||
else:
|
||||
print(Colors.green(f"Index built successfully ({elapsed:.2f}s)"))
|
||||
|
||||
return stats
|
||||
|
||||
def analyze(self, prompt: str, mode: str = "auto", patterns: Optional[List[str]] = None,
|
||||
token_limit: Optional[int] = None, use_embeddings: Optional[bool] = None) -> Dict[str, Any]:
|
||||
"""Analyze and return relevant file paths for a given prompt."""
|
||||
|
||||
print(Colors.yellow("Analyzing project and prompt..."))
|
||||
start_time = time.time()
|
||||
|
||||
# Load or build index
|
||||
index = self.indexer.load_index()
|
||||
if not index:
|
||||
self.build_index()
|
||||
index = self.indexer.load_index()
|
||||
|
||||
stats = self.indexer.get_stats()
|
||||
print(Colors.cyan(f"Project stats: ~{stats.total_tokens:,} tokens across {stats.total_files} files"))
|
||||
print(Colors.cyan(f"Categories: {', '.join(f'{k}: {v}' for k, v in stats.categories.items())}"))
|
||||
|
||||
# Determine project size
|
||||
project_size = self._classify_project_size(stats.total_tokens)
|
||||
print(Colors.cyan(f"Project size: {project_size}"))
|
||||
|
||||
# Analyze prompt context
|
||||
print(Colors.yellow("Analyzing prompt context..."))
|
||||
context_result = self.context_analyzer.analyze(prompt)
|
||||
|
||||
print(Colors.cyan(f"Identified: {len(context_result.domains)} domains, {len(context_result.languages)} languages"))
|
||||
if context_result.domains:
|
||||
print(Colors.cyan(f"Top domains: {', '.join(context_result.domains[:3])}"))
|
||||
|
||||
# Determine if we should use embeddings
|
||||
should_use_embeddings = use_embeddings
|
||||
if should_use_embeddings is None:
|
||||
should_use_embeddings = (
|
||||
self.embedding_manager is not None and
|
||||
self.config.is_embedding_enabled() and
|
||||
len(context_result.keywords) < 5 # Use embeddings for vague queries
|
||||
)
|
||||
|
||||
similar_files = []
|
||||
if should_use_embeddings and self.embedding_manager:
|
||||
print(Colors.yellow("Using semantic similarity search..."))
|
||||
# Update embeddings if needed
|
||||
if not self.embedding_manager.embeddings_exist():
|
||||
print(Colors.yellow("Building embeddings (first run)..."))
|
||||
self.embedding_manager.update_embeddings(index)
|
||||
|
||||
similar_files = self.embedding_manager.find_similar_files(prompt, index)
|
||||
print(Colors.cyan(f"Found {len(similar_files)} semantically similar files"))
|
||||
|
||||
# Match files to context
|
||||
print(Colors.yellow("Matching files to context..."))
|
||||
matching_result = self.path_matcher.match_files(
|
||||
index,
|
||||
context_result,
|
||||
token_limit=token_limit,
|
||||
explicit_patterns=patterns
|
||||
)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
print(Colors.green(f"Analysis complete: {len(matching_result.matched_files)} files, ~{matching_result.total_tokens:,} tokens"))
|
||||
print(Colors.cyan(f"Confidence: {matching_result.confidence_score:.2f}"))
|
||||
print(Colors.cyan(f"Execution time: {elapsed:.2f}s"))
|
||||
|
||||
return {
|
||||
'files': [match.file_info.relative_path for match in matching_result.matched_files],
|
||||
'total_tokens': matching_result.total_tokens,
|
||||
'confidence': matching_result.confidence_score,
|
||||
'context': {
|
||||
'domains': context_result.domains,
|
||||
'languages': context_result.languages,
|
||||
'keywords': context_result.keywords
|
||||
},
|
||||
'stats': {
|
||||
'project_size': project_size,
|
||||
'total_files': stats.total_files,
|
||||
'analysis_time': elapsed,
|
||||
'embeddings_used': should_use_embeddings
|
||||
}
|
||||
}
|
||||
|
||||
def generate_command(self, prompt: str, tool: str = "gemini", **kwargs) -> str:
|
||||
"""Generate a command for external tools (gemini/codex)."""
|
||||
analysis_result = self.analyze(prompt, **kwargs)
|
||||
|
||||
# Format file patterns
|
||||
file_patterns = " ".join(f"@{{{file}}}" for file in analysis_result['files'])
|
||||
|
||||
if tool == "gemini":
|
||||
if len(analysis_result['files']) > 50: # Too many files for individual patterns
|
||||
return f'gemini --all-files -p "{prompt}"'
|
||||
else:
|
||||
return f'gemini -p "{file_patterns} {prompt}"'
|
||||
|
||||
elif tool == "codex":
|
||||
workspace_flag = "-s workspace-write" if analysis_result['total_tokens'] > 100000 else "-s danger-full-access"
|
||||
return f'codex {workspace_flag} --full-auto exec "{file_patterns} {prompt}"'
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported tool: {tool}")
|
||||
|
||||
def _classify_project_size(self, tokens: int) -> str:
|
||||
"""Classify project size based on token count."""
|
||||
small_limit = self.config.get('token_limits.small_project', 500000)
|
||||
medium_limit = self.config.get('token_limits.medium_project', 2000000)
|
||||
|
||||
if tokens < small_limit:
|
||||
return "small"
|
||||
elif tokens < medium_limit:
|
||||
return "medium"
|
||||
else:
|
||||
return "large"
|
||||
|
||||
def get_project_stats(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive project statistics."""
|
||||
stats = self.indexer.get_stats()
|
||||
embedding_stats = {}
|
||||
|
||||
if self.embedding_manager:
|
||||
embedding_stats = {
|
||||
'embeddings_exist': self.embedding_manager.embeddings_exist(),
|
||||
'embedding_count': len(self.embedding_manager.load_embeddings()) if self.embedding_manager.embeddings_exist() else 0
|
||||
}
|
||||
|
||||
return {
|
||||
'files': stats.total_files,
|
||||
'tokens': stats.total_tokens,
|
||||
'size_bytes': stats.total_size,
|
||||
'categories': stats.categories,
|
||||
'project_size': self._classify_project_size(stats.total_tokens),
|
||||
'last_updated': stats.last_updated,
|
||||
'embeddings': embedding_stats,
|
||||
'config': {
|
||||
'cache_dir': self.config.get_cache_dir(),
|
||||
'embedding_enabled': self.config.is_embedding_enabled(),
|
||||
'exclude_patterns_count': len(self.config.get_exclude_patterns())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Path-Aware Analyzer - Intelligent file pattern detection",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python analyzer.py "analyze authentication flow"
|
||||
python analyzer.py "fix database connection" --patterns "src/**/*.py"
|
||||
python analyzer.py "review API endpoints" --tool gemini
|
||||
python analyzer.py --stats
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('prompt', nargs='?', help='Analysis prompt or task description')
|
||||
parser.add_argument('--patterns', nargs='*', help='Explicit file patterns to include')
|
||||
parser.add_argument('--tool', choices=['gemini', 'codex'], help='Generate command for specific tool')
|
||||
parser.add_argument('--output', choices=['patterns', 'json'], default='patterns', help='Output format')
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||
parser.add_argument('--stats', action='store_true', help='Show project statistics and exit')
|
||||
parser.add_argument('--build-index', action='store_true', help='Build file index and exit')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create analyzer with default values
|
||||
analyzer = Analyzer(config_path=None, root_path=".")
|
||||
|
||||
# Handle special commands
|
||||
if args.build_index:
|
||||
analyzer.build_index()
|
||||
return
|
||||
|
||||
if args.stats:
|
||||
stats = analyzer.get_project_stats()
|
||||
if args.output == 'json':
|
||||
print(json.dumps(stats, indent=2, default=str))
|
||||
else:
|
||||
print(f"Total files: {stats['files']}")
|
||||
print(f"Total tokens: {stats['tokens']:,}")
|
||||
print(f"Categories: {stats['categories']}")
|
||||
if 'embeddings' in stats:
|
||||
print(f"Embeddings: {stats['embeddings']['embedding_count']}")
|
||||
return
|
||||
|
||||
# Require prompt for analysis
|
||||
if not args.prompt:
|
||||
parser.error("Analysis prompt is required unless using --build-index or --stats")
|
||||
|
||||
# Perform analysis
|
||||
try:
|
||||
result = analyzer.analyze(
|
||||
args.prompt,
|
||||
patterns=args.patterns,
|
||||
use_embeddings=False # Disable embeddings by default for simplicity
|
||||
)
|
||||
|
||||
# Generate output
|
||||
if args.tool:
|
||||
# Generate command using already computed result
|
||||
file_patterns = " ".join(f"@{{{file}}}" for file in result['files'])
|
||||
if args.tool == "gemini":
|
||||
if len(result['files']) > 50:
|
||||
command = f'gemini --all-files -p "{args.prompt}"'
|
||||
else:
|
||||
command = f'gemini -p "{file_patterns} {args.prompt}"'
|
||||
elif args.tool == "codex":
|
||||
workspace_flag = "-s workspace-write" if result['total_tokens'] > 100000 else "-s danger-full-access"
|
||||
command = f'codex {workspace_flag} --full-auto exec "{file_patterns} {args.prompt}"'
|
||||
print(command)
|
||||
elif args.output == 'json':
|
||||
print(json.dumps(result, indent=2, default=str))
|
||||
else: # patterns output (default)
|
||||
for file_path in result['files']:
|
||||
print(f"@{{{file_path}}}")
|
||||
|
||||
# Show verbose details
|
||||
if args.verbose:
|
||||
print(f"\n# Analysis Details:")
|
||||
print(f"# Matched files: {len(result['files'])}")
|
||||
print(f"# Total tokens: {result['total_tokens']:,}")
|
||||
print(f"# Confidence: {result['confidence']:.2f}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(Colors.warning("\nAnalysis interrupted by user"))
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(Colors.error(f"Analysis failed: {e}"))
|
||||
if args.verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
156
.claude/python_script/cache/embedding_index.json
vendored
Normal file
156
.claude/python_script/cache/embedding_index.json
vendored
Normal file
@@ -0,0 +1,156 @@
|
||||
{
|
||||
"analyzer.py": {
|
||||
"file_path": "analyzer.py",
|
||||
"content_hash": "9a7665c34d5ac84634342f8b1425bb13",
|
||||
"embedding_hash": "fb5b5a58ec8e070620747c7313b0b2b6",
|
||||
"created_time": 1758175163.6748724,
|
||||
"vector_size": 384
|
||||
},
|
||||
"config.yaml": {
|
||||
"file_path": "config.yaml",
|
||||
"content_hash": "fc0526eea28cf37d15425035d2dd17d9",
|
||||
"embedding_hash": "4866d8bd2b14c16c448c34c0251d199e",
|
||||
"created_time": 1758175163.6748896,
|
||||
"vector_size": 384
|
||||
},
|
||||
"install.sh": {
|
||||
"file_path": "install.sh",
|
||||
"content_hash": "6649df913eadef34fa2f253aed541dfd",
|
||||
"embedding_hash": "54af072da7c1139108c79b64bd1ee291",
|
||||
"created_time": 1758175163.6748998,
|
||||
"vector_size": 384
|
||||
},
|
||||
"requirements.txt": {
|
||||
"file_path": "requirements.txt",
|
||||
"content_hash": "e981a0aa103bdec4a99b75831967766d",
|
||||
"embedding_hash": "37bc877ea041ad606234262423cf578a",
|
||||
"created_time": 1758175163.6749053,
|
||||
"vector_size": 384
|
||||
},
|
||||
"setup.py": {
|
||||
"file_path": "setup.py",
|
||||
"content_hash": "7b93af473bfe37284c6cf493458bc421",
|
||||
"embedding_hash": "bdda9a6e8d3bd34465436b119a17e263",
|
||||
"created_time": 1758175163.6749127,
|
||||
"vector_size": 384
|
||||
},
|
||||
"__init__.py": {
|
||||
"file_path": "__init__.py",
|
||||
"content_hash": "c981c4ffc664bbd3c253d0dc82f48ac6",
|
||||
"embedding_hash": "3ab1a0c5d0d4bd832108b7a6ade0ad9c",
|
||||
"created_time": 1758175163.6749194,
|
||||
"vector_size": 384
|
||||
},
|
||||
"cache\\file_index.json": {
|
||||
"file_path": "cache\\file_index.json",
|
||||
"content_hash": "6534fef14d12e39aff1dc0dcf5b91d1d",
|
||||
"embedding_hash": "d76efa530f0d21e52f9d5b3a9ccc358c",
|
||||
"created_time": 1758175163.6749268,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\config.py": {
|
||||
"file_path": "core\\config.py",
|
||||
"content_hash": "ee72a95cea7397db8dd25b10a4436eaa",
|
||||
"embedding_hash": "65d1fca1cf59bcd36409c3b11f50aab1",
|
||||
"created_time": 1758175163.6749349,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\context_analyzer.py": {
|
||||
"file_path": "core\\context_analyzer.py",
|
||||
"content_hash": "2e9ac2050e463c9d3f94bad23e65d4e5",
|
||||
"embedding_hash": "dfb51c8eaafd96ac544b3d9c8dcd3f51",
|
||||
"created_time": 1758175163.674943,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\embedding_manager.py": {
|
||||
"file_path": "core\\embedding_manager.py",
|
||||
"content_hash": "cafa24b0431c6463266dde8b37fc3ab7",
|
||||
"embedding_hash": "531c3206f0caf9789873719cdd644e99",
|
||||
"created_time": 1758175163.6749508,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\file_indexer.py": {
|
||||
"file_path": "core\\file_indexer.py",
|
||||
"content_hash": "0626c89c060d6022261ca094aed47093",
|
||||
"embedding_hash": "93d5fc6e84334d3bd9be0f07f9823b20",
|
||||
"created_time": 1758175163.6749592,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\gitignore_parser.py": {
|
||||
"file_path": "core\\gitignore_parser.py",
|
||||
"content_hash": "5f1d87fb03bc3b19833406be0fa5125f",
|
||||
"embedding_hash": "784be673b6b428cce60ab5390bfc7f08",
|
||||
"created_time": 1758175163.6749675,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\path_matcher.py": {
|
||||
"file_path": "core\\path_matcher.py",
|
||||
"content_hash": "89132273951a091610c1579ccc44f3a7",
|
||||
"embedding_hash": "e01ca0180c2834a514ad6d8e62315ce0",
|
||||
"created_time": 1758175163.6749754,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\__init__.py": {
|
||||
"file_path": "core\\__init__.py",
|
||||
"content_hash": "3a323be141f1ce6b9d9047aa444029b0",
|
||||
"embedding_hash": "3fc5a5427067e59b054428083a5899ca",
|
||||
"created_time": 1758175163.6749818,
|
||||
"vector_size": 384
|
||||
},
|
||||
"tools\\module_analyzer.py": {
|
||||
"file_path": "tools\\module_analyzer.py",
|
||||
"content_hash": "926289c2fd8d681ed20c445d2ac34fa1",
|
||||
"embedding_hash": "3378fcde062914859b765d8dfce1207f",
|
||||
"created_time": 1758175163.67499,
|
||||
"vector_size": 384
|
||||
},
|
||||
"tools\\tech_stack.py": {
|
||||
"file_path": "tools\\tech_stack.py",
|
||||
"content_hash": "eef6eabcbc8ba0ece0dfacb9314f3585",
|
||||
"embedding_hash": "bc3aa5334ef17328490bc5a8162d776a",
|
||||
"created_time": 1758175163.674997,
|
||||
"vector_size": 384
|
||||
},
|
||||
"tools\\workflow_updater.py": {
|
||||
"file_path": "tools\\workflow_updater.py",
|
||||
"content_hash": "40d7d884e0db24eb45aa27739fef8210",
|
||||
"embedding_hash": "00488f4acdb7fe1b5126da4da3bb9869",
|
||||
"created_time": 1758175163.6750047,
|
||||
"vector_size": 384
|
||||
},
|
||||
"tools\\__init__.py": {
|
||||
"file_path": "tools\\__init__.py",
|
||||
"content_hash": "41bf583571f4355e4af90842d0674b1f",
|
||||
"embedding_hash": "fccd7745f9e1e242df3bace7cee9759c",
|
||||
"created_time": 1758175163.6750097,
|
||||
"vector_size": 384
|
||||
},
|
||||
"utils\\cache.py": {
|
||||
"file_path": "utils\\cache.py",
|
||||
"content_hash": "dc7c08bcd9af9ae465020997e4b9127e",
|
||||
"embedding_hash": "68394bc0f57a0f66b83a57249b39957d",
|
||||
"created_time": 1758175163.6750169,
|
||||
"vector_size": 384
|
||||
},
|
||||
"utils\\colors.py": {
|
||||
"file_path": "utils\\colors.py",
|
||||
"content_hash": "8ce555a2dcf4057ee7adfb3286d47da2",
|
||||
"embedding_hash": "1b18e22acb095e83ed291b6c5dc7a2ce",
|
||||
"created_time": 1758175163.6750243,
|
||||
"vector_size": 384
|
||||
},
|
||||
"utils\\io_helpers.py": {
|
||||
"file_path": "utils\\io_helpers.py",
|
||||
"content_hash": "fb276a0e46b28f80d5684368a8b15e57",
|
||||
"embedding_hash": "f6ff8333b1afc5b98d4644f334c18cda",
|
||||
"created_time": 1758175163.6750326,
|
||||
"vector_size": 384
|
||||
},
|
||||
"utils\\__init__.py": {
|
||||
"file_path": "utils\\__init__.py",
|
||||
"content_hash": "f305ede9cbdec2f2e0189a4b89558b7e",
|
||||
"embedding_hash": "7d3f10fe4210d40eafd3c065b8e0c8b7",
|
||||
"created_time": 1758175163.6750393,
|
||||
"vector_size": 384
|
||||
}
|
||||
}
|
||||
BIN
.claude/python_script/cache/embeddings.pkl
vendored
Normal file
BIN
.claude/python_script/cache/embeddings.pkl
vendored
Normal file
Binary file not shown.
276
.claude/python_script/cache/file_index.json
vendored
Normal file
276
.claude/python_script/cache/file_index.json
vendored
Normal file
@@ -0,0 +1,276 @@
|
||||
{
|
||||
"stats": {
|
||||
"total_files": 26,
|
||||
"total_tokens": 56126,
|
||||
"total_size": 246519,
|
||||
"categories": {
|
||||
"code": 21,
|
||||
"config": 3,
|
||||
"docs": 1,
|
||||
"other": 1
|
||||
},
|
||||
"last_updated": 1758177270.9103189
|
||||
},
|
||||
"files": {
|
||||
"analyzer.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\analyzer.py",
|
||||
"relative_path": "analyzer.py",
|
||||
"size": 12595,
|
||||
"modified_time": 1758175179.730658,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 3072,
|
||||
"content_hash": "3fb090745b5080e0731e7ef3fc94029d"
|
||||
},
|
||||
"cli.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\cli.py",
|
||||
"relative_path": "cli.py",
|
||||
"size": 8329,
|
||||
"modified_time": 1758177193.3710027,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 2030,
|
||||
"content_hash": "b9f0b5d6a154cf51c8665b2344c9faf8"
|
||||
},
|
||||
"config.yaml": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\config.yaml",
|
||||
"relative_path": "config.yaml",
|
||||
"size": 4317,
|
||||
"modified_time": 1758163450.6223683,
|
||||
"extension": ".yaml",
|
||||
"category": "config",
|
||||
"estimated_tokens": 1040,
|
||||
"content_hash": "b431b73dfa86ff83145468bbf4422a79"
|
||||
},
|
||||
"indexer.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\indexer.py",
|
||||
"relative_path": "indexer.py",
|
||||
"size": 7776,
|
||||
"modified_time": 1758177151.2160237,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 1893,
|
||||
"content_hash": "f88b5e5bffce26f3170974df2906aac3"
|
||||
},
|
||||
"install.sh": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\install.sh",
|
||||
"relative_path": "install.sh",
|
||||
"size": 5236,
|
||||
"modified_time": 1758161898.317552,
|
||||
"extension": ".sh",
|
||||
"category": "code",
|
||||
"estimated_tokens": 1262,
|
||||
"content_hash": "cc3a9121a0b8281457270f30ad76f5f6"
|
||||
},
|
||||
"requirements.txt": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\requirements.txt",
|
||||
"relative_path": "requirements.txt",
|
||||
"size": 495,
|
||||
"modified_time": 1758164967.7707567,
|
||||
"extension": ".txt",
|
||||
"category": "docs",
|
||||
"estimated_tokens": 118,
|
||||
"content_hash": "aea2ba14dfa7b37b1dde5518de87d956"
|
||||
},
|
||||
"setup.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\setup.py",
|
||||
"relative_path": "setup.py",
|
||||
"size": 2860,
|
||||
"modified_time": 1758177212.9095325,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 692,
|
||||
"content_hash": "609abf8b9c84a09f6a59d5815eb90bc5"
|
||||
},
|
||||
"__init__.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\__init__.py",
|
||||
"relative_path": "__init__.py",
|
||||
"size": 1065,
|
||||
"modified_time": 1758177224.8017242,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 257,
|
||||
"content_hash": "47368b235086fc0c75ba34a824c58506"
|
||||
},
|
||||
"cache\\embeddings.pkl": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\cache\\embeddings.pkl",
|
||||
"relative_path": "cache\\embeddings.pkl",
|
||||
"size": 35109,
|
||||
"modified_time": 1758175163.6754165,
|
||||
"extension": ".pkl",
|
||||
"category": "other",
|
||||
"estimated_tokens": 4713,
|
||||
"content_hash": "b8ed5c068acd5ed52ba10839701a5a24"
|
||||
},
|
||||
"cache\\embedding_index.json": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\cache\\embedding_index.json",
|
||||
"relative_path": "cache\\embedding_index.json",
|
||||
"size": 5589,
|
||||
"modified_time": 1758175163.6764157,
|
||||
"extension": ".json",
|
||||
"category": "config",
|
||||
"estimated_tokens": 1358,
|
||||
"content_hash": "5c2ba41b1b69ce19d2fc3b5854f6ee53"
|
||||
},
|
||||
"cache\\file_index.json": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\cache\\file_index.json",
|
||||
"relative_path": "cache\\file_index.json",
|
||||
"size": 12164,
|
||||
"modified_time": 1758165699.0883024,
|
||||
"extension": ".json",
|
||||
"category": "config",
|
||||
"estimated_tokens": 2957,
|
||||
"content_hash": "73563db28a2808aa28544c0275b97f94"
|
||||
},
|
||||
"core\\config.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\core\\config.py",
|
||||
"relative_path": "core\\config.py",
|
||||
"size": 12266,
|
||||
"modified_time": 1758164531.5934324,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 2985,
|
||||
"content_hash": "d85aedc01a528b486d41acbd823181d7"
|
||||
},
|
||||
"core\\context_analyzer.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\core\\context_analyzer.py",
|
||||
"relative_path": "core\\context_analyzer.py",
|
||||
"size": 15002,
|
||||
"modified_time": 1758164846.7665854,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 3661,
|
||||
"content_hash": "677903b5aaf3db13575ca1ca99ec7c16"
|
||||
},
|
||||
"core\\embedding_manager.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\core\\embedding_manager.py",
|
||||
"relative_path": "core\\embedding_manager.py",
|
||||
"size": 17271,
|
||||
"modified_time": 1758166063.1635072,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 4204,
|
||||
"content_hash": "d8f52cb93140a46fe3d22d465ec01b22"
|
||||
},
|
||||
"core\\file_indexer.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\core\\file_indexer.py",
|
||||
"relative_path": "core\\file_indexer.py",
|
||||
"size": 14484,
|
||||
"modified_time": 1758164612.5888917,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 3525,
|
||||
"content_hash": "1518d309108f3300417b65f6234241d1"
|
||||
},
|
||||
"core\\gitignore_parser.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\core\\gitignore_parser.py",
|
||||
"relative_path": "core\\gitignore_parser.py",
|
||||
"size": 6757,
|
||||
"modified_time": 1758164472.643646,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 1644,
|
||||
"content_hash": "9cd97725576727080aaafd329d9ce2c4"
|
||||
},
|
||||
"core\\path_matcher.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\core\\path_matcher.py",
|
||||
"relative_path": "core\\path_matcher.py",
|
||||
"size": 19568,
|
||||
"modified_time": 1758166045.8395746,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 4767,
|
||||
"content_hash": "f1dc44dc3ed67f100770aea40197623f"
|
||||
},
|
||||
"core\\__init__.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\core\\__init__.py",
|
||||
"relative_path": "core\\__init__.py",
|
||||
"size": 712,
|
||||
"modified_time": 1758164419.4437866,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 172,
|
||||
"content_hash": "b25991cb8d977021362f45e121e89de7"
|
||||
},
|
||||
"tools\\module_analyzer.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\tools\\module_analyzer.py",
|
||||
"relative_path": "tools\\module_analyzer.py",
|
||||
"size": 14273,
|
||||
"modified_time": 1758164687.488236,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 3476,
|
||||
"content_hash": "b958ec7ed264242f2bb30b1cca66b144"
|
||||
},
|
||||
"tools\\tech_stack.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\tools\\tech_stack.py",
|
||||
"relative_path": "tools\\tech_stack.py",
|
||||
"size": 7576,
|
||||
"modified_time": 1758164695.643722,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 1843,
|
||||
"content_hash": "f391a45d8254f0c4f4f789027dd69afc"
|
||||
},
|
||||
"tools\\workflow_updater.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\tools\\workflow_updater.py",
|
||||
"relative_path": "tools\\workflow_updater.py",
|
||||
"size": 9577,
|
||||
"modified_time": 1758164703.2230499,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 2334,
|
||||
"content_hash": "526edf0cfbe3c2041135eace9f89ef13"
|
||||
},
|
||||
"tools\\__init__.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\tools\\__init__.py",
|
||||
"relative_path": "tools\\__init__.py",
|
||||
"size": 329,
|
||||
"modified_time": 1758165927.9923615,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 79,
|
||||
"content_hash": "139aa450d7511347cc6799c471eac745"
|
||||
},
|
||||
"utils\\cache.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\utils\\cache.py",
|
||||
"relative_path": "utils\\cache.py",
|
||||
"size": 12067,
|
||||
"modified_time": 1758164781.2914226,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 2929,
|
||||
"content_hash": "39e49b731d601fafac74e96ed074e654"
|
||||
},
|
||||
"utils\\colors.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\utils\\colors.py",
|
||||
"relative_path": "utils\\colors.py",
|
||||
"size": 6959,
|
||||
"modified_time": 1758165650.9865932,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 1678,
|
||||
"content_hash": "8bb57134555d8fb07d2e351d4e100f0f"
|
||||
},
|
||||
"utils\\io_helpers.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\utils\\io_helpers.py",
|
||||
"relative_path": "utils\\io_helpers.py",
|
||||
"size": 13773,
|
||||
"modified_time": 1758164823.513003,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 3349,
|
||||
"content_hash": "aa54747c49319cc2c90c0544c668009a"
|
||||
},
|
||||
"utils\\__init__.py": {
|
||||
"path": "D:\\Claude_dms3\\.claude\\python_script\\utils\\__init__.py",
|
||||
"relative_path": "utils\\__init__.py",
|
||||
"size": 370,
|
||||
"modified_time": 1758164433.7142198,
|
||||
"extension": ".py",
|
||||
"category": "code",
|
||||
"estimated_tokens": 88,
|
||||
"content_hash": "62ec4a34f1643a23c79207061bdb8d49"
|
||||
}
|
||||
}
|
||||
}
|
||||
207
.claude/python_script/cli.py
Normal file
207
.claude/python_script/cli.py
Normal file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
CLI Interface for Path-Aware Analysis
|
||||
Provides command-line interface for intelligent file analysis and pattern matching.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
# Add current directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from core.config import get_config
|
||||
from core.file_indexer import FileIndexer
|
||||
from core.context_analyzer import ContextAnalyzer
|
||||
from core.path_matcher import PathMatcher
|
||||
from utils.colors import Colors
|
||||
|
||||
|
||||
class AnalysisCLI:
|
||||
"""Command-line interface for file analysis and pattern matching."""
|
||||
|
||||
def __init__(self, config_path: Optional[str] = None, root_path: str = "."):
|
||||
self.root_path = Path(root_path).resolve()
|
||||
self.config = get_config(config_path)
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, self.config.get('logging.level', 'INFO')),
|
||||
format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize core components
|
||||
self.indexer = FileIndexer(self.config, str(self.root_path))
|
||||
self.context_analyzer = ContextAnalyzer(self.config)
|
||||
self.path_matcher = PathMatcher(self.config)
|
||||
|
||||
def analyze(self, prompt: str, patterns: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Analyze and return relevant file paths for a given prompt."""
|
||||
print(Colors.yellow("Analyzing project and prompt..."))
|
||||
start_time = time.time()
|
||||
|
||||
# Load index (build if not exists)
|
||||
index = self.indexer.load_index()
|
||||
if not index:
|
||||
print(Colors.warning("No file index found. Run 'python indexer.py --build' first or use --auto-build"))
|
||||
return {}
|
||||
|
||||
stats = self.indexer.get_stats()
|
||||
print(Colors.cyan(f"Project stats: ~{stats.total_tokens:,} tokens across {stats.total_files} files"))
|
||||
print(Colors.cyan(f"Categories: {', '.join(f'{k}: {v}' for k, v in stats.categories.items())}"))
|
||||
|
||||
# Determine project size
|
||||
project_size = self._classify_project_size(stats.total_tokens)
|
||||
print(Colors.cyan(f"Project size: {project_size}"))
|
||||
|
||||
# Analyze prompt context
|
||||
print(Colors.yellow("Analyzing prompt context..."))
|
||||
context_result = self.context_analyzer.analyze(prompt)
|
||||
|
||||
print(Colors.cyan(f"Identified: {len(context_result.domains)} domains, {len(context_result.languages)} languages"))
|
||||
if context_result.domains:
|
||||
print(Colors.cyan(f"Top domains: {', '.join(context_result.domains[:3])}"))
|
||||
|
||||
# Match files to context
|
||||
print(Colors.yellow("Matching files to context..."))
|
||||
matching_result = self.path_matcher.match_files(
|
||||
index,
|
||||
context_result,
|
||||
explicit_patterns=patterns
|
||||
)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
print(Colors.green(f"Analysis complete: {len(matching_result.matched_files)} files, ~{matching_result.total_tokens:,} tokens"))
|
||||
print(Colors.cyan(f"Confidence: {matching_result.confidence_score:.2f}"))
|
||||
print(Colors.cyan(f"Execution time: {elapsed:.2f}s"))
|
||||
|
||||
return {
|
||||
'files': [match.file_info.relative_path for match in matching_result.matched_files],
|
||||
'total_tokens': matching_result.total_tokens,
|
||||
'confidence': matching_result.confidence_score,
|
||||
'context': {
|
||||
'domains': context_result.domains,
|
||||
'languages': context_result.languages,
|
||||
'keywords': context_result.keywords
|
||||
},
|
||||
'stats': {
|
||||
'project_size': project_size,
|
||||
'total_files': stats.total_files,
|
||||
'analysis_time': elapsed
|
||||
}
|
||||
}
|
||||
|
||||
def generate_command(self, prompt: str, tool: str, files: List[str]) -> str:
|
||||
"""Generate a command for external tools (gemini/codex)."""
|
||||
file_patterns = " ".join(f"@{{{file}}}" for file in files)
|
||||
|
||||
if tool == "gemini":
|
||||
if len(files) > 50:
|
||||
return f'gemini --all-files -p "{prompt}"'
|
||||
else:
|
||||
return f'gemini -p "{file_patterns} {prompt}"'
|
||||
elif tool == "codex":
|
||||
# Estimate tokens for workspace selection
|
||||
total_tokens = sum(len(file) * 50 for file in files) # Rough estimate
|
||||
workspace_flag = "-s workspace-write" if total_tokens > 100000 else "-s danger-full-access"
|
||||
return f'codex {workspace_flag} --full-auto exec "{file_patterns} {prompt}"'
|
||||
else:
|
||||
raise ValueError(f"Unsupported tool: {tool}")
|
||||
|
||||
def _classify_project_size(self, tokens: int) -> str:
|
||||
"""Classify project size based on token count."""
|
||||
small_limit = self.config.get('token_limits.small_project', 500000)
|
||||
medium_limit = self.config.get('token_limits.medium_project', 2000000)
|
||||
|
||||
if tokens < small_limit:
|
||||
return "small"
|
||||
elif tokens < medium_limit:
|
||||
return "medium"
|
||||
else:
|
||||
return "large"
|
||||
|
||||
def auto_build_index(self):
|
||||
"""Auto-build index if it doesn't exist."""
|
||||
from indexer import ProjectIndexer
|
||||
indexer = ProjectIndexer(root_path=str(self.root_path))
|
||||
indexer.build_index()
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point for analysis."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Path-Aware Analysis CLI - Intelligent file pattern detection",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python cli.py "analyze authentication flow"
|
||||
python cli.py "fix database connection" --patterns "src/**/*.py"
|
||||
python cli.py "review API endpoints" --tool gemini
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('prompt', help='Analysis prompt or task description')
|
||||
parser.add_argument('--patterns', nargs='*', help='Explicit file patterns to include')
|
||||
parser.add_argument('--tool', choices=['gemini', 'codex'], help='Generate command for specific tool')
|
||||
parser.add_argument('--output', choices=['patterns', 'json'], default='patterns', help='Output format')
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||
parser.add_argument('--auto-build', action='store_true', help='Auto-build index if missing')
|
||||
parser.add_argument('--config', help='Configuration file path')
|
||||
parser.add_argument('--root', default='.', help='Root directory to analyze')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create CLI interface
|
||||
cli = AnalysisCLI(args.config, args.root)
|
||||
|
||||
try:
|
||||
# Auto-build index if requested and missing
|
||||
if args.auto_build:
|
||||
index = cli.indexer.load_index()
|
||||
if not index:
|
||||
print(Colors.yellow("Auto-building missing index..."))
|
||||
cli.auto_build_index()
|
||||
|
||||
# Perform analysis
|
||||
result = cli.analyze(args.prompt, patterns=args.patterns)
|
||||
|
||||
if not result:
|
||||
sys.exit(1)
|
||||
|
||||
# Generate output
|
||||
if args.tool:
|
||||
command = cli.generate_command(args.prompt, args.tool, result['files'])
|
||||
print(command)
|
||||
elif args.output == 'json':
|
||||
print(json.dumps(result, indent=2, default=str))
|
||||
else: # patterns output (default)
|
||||
for file_path in result['files']:
|
||||
print(f"@{{{file_path}}}")
|
||||
|
||||
# Show verbose details
|
||||
if args.verbose:
|
||||
print(f"\n# Analysis Details:")
|
||||
print(f"# Matched files: {len(result['files'])}")
|
||||
print(f"# Total tokens: {result['total_tokens']:,}")
|
||||
print(f"# Confidence: {result['confidence']:.2f}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(Colors.warning("\nAnalysis interrupted by user"))
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(Colors.error(f"Analysis failed: {e}"))
|
||||
if args.verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
158
.claude/python_script/config.yaml
Normal file
158
.claude/python_script/config.yaml
Normal file
@@ -0,0 +1,158 @@
|
||||
# Configuration for UltraThink Path-Aware Analyzer
|
||||
# Based on gemini-wrapper patterns with intelligent enhancements
|
||||
|
||||
# Token limits for project size classification
|
||||
token_limits:
|
||||
small_project: 500000 # <500K tokens - include most files
|
||||
medium_project: 2000000 # 500K-2M tokens - smart selection
|
||||
large_project: 10000000 # >2M tokens - precise targeting
|
||||
max_files: 1000 # Maximum files to process
|
||||
|
||||
# File patterns to exclude (performance and relevance)
|
||||
exclude_patterns:
|
||||
- "*/node_modules/*"
|
||||
- "*/.git/*"
|
||||
- "*/build/*"
|
||||
- "*/dist/*"
|
||||
- "*/.next/*"
|
||||
- "*/.nuxt/*"
|
||||
- "*/target/*"
|
||||
- "*/vendor/*"
|
||||
- "*/__pycache__/*"
|
||||
- "*.pyc"
|
||||
- "*.pyo"
|
||||
- "*.log"
|
||||
- "*.tmp"
|
||||
- "*.temp"
|
||||
- "*.history"
|
||||
|
||||
# File extensions grouped by category
|
||||
file_extensions:
|
||||
code:
|
||||
- ".py"
|
||||
- ".js"
|
||||
- ".ts"
|
||||
- ".tsx"
|
||||
- ".jsx"
|
||||
- ".java"
|
||||
- ".cpp"
|
||||
- ".c"
|
||||
- ".h"
|
||||
- ".rs"
|
||||
- ".go"
|
||||
- ".php"
|
||||
- ".rb"
|
||||
- ".sh"
|
||||
- ".bash"
|
||||
docs:
|
||||
- ".md"
|
||||
- ".txt"
|
||||
- ".rst"
|
||||
- ".adoc"
|
||||
config:
|
||||
- ".json"
|
||||
- ".yaml"
|
||||
- ".yml"
|
||||
- ".toml"
|
||||
- ".ini"
|
||||
- ".env"
|
||||
web:
|
||||
- ".html"
|
||||
- ".css"
|
||||
- ".scss"
|
||||
- ".sass"
|
||||
- ".xml"
|
||||
|
||||
# Embedding/RAG configuration
|
||||
embedding:
|
||||
enabled: true # Set to true to enable RAG features
|
||||
model: "all-MiniLM-L6-v2" # Lightweight sentence transformer
|
||||
cache_dir: "cache"
|
||||
similarity_threshold: 0.3
|
||||
max_context_length: 512
|
||||
batch_size: 32
|
||||
|
||||
# Context analysis settings
|
||||
context_analysis:
|
||||
# Keywords that indicate specific domains/modules
|
||||
domain_keywords:
|
||||
auth: ["auth", "login", "user", "password", "jwt", "token", "session"]
|
||||
database: ["db", "database", "sql", "query", "model", "schema", "migration"]
|
||||
api: ["api", "endpoint", "route", "controller", "service", "handler"]
|
||||
frontend: ["ui", "component", "view", "template", "style", "css"]
|
||||
backend: ["server", "service", "logic", "business", "core"]
|
||||
test: ["test", "spec", "unit", "integration", "mock"]
|
||||
config: ["config", "setting", "environment", "env"]
|
||||
util: ["util", "helper", "common", "shared", "lib"]
|
||||
|
||||
# Programming language indicators
|
||||
language_indicators:
|
||||
python: [".py", "python", "pip", "requirements.txt", "setup.py"]
|
||||
javascript: [".js", ".ts", "npm", "package.json", "node"]
|
||||
java: [".java", "maven", "gradle", "pom.xml"]
|
||||
go: [".go", "go.mod", "go.sum"]
|
||||
rust: [".rs", "cargo", "Cargo.toml"]
|
||||
|
||||
# Path matching and ranking
|
||||
path_matching:
|
||||
# Scoring weights for relevance calculation
|
||||
weights:
|
||||
keyword_match: 0.4 # Direct keyword match in filename/path
|
||||
extension_match: 0.2 # File extension relevance
|
||||
directory_context: 0.2 # Directory name relevance
|
||||
file_size_penalty: 0.1 # Penalty for very large files
|
||||
recency_bonus: 0.1 # Bonus for recently modified files
|
||||
|
||||
# Maximum files to return per category
|
||||
max_files_per_category: 20
|
||||
|
||||
# Minimum relevance score to include file
|
||||
min_relevance_score: 0.1
|
||||
|
||||
# Output formatting
|
||||
output:
|
||||
# How to format path patterns
|
||||
pattern_format: "@{{{path}}}" # Results in @{path/to/file}
|
||||
|
||||
# Include project documentation by default
|
||||
always_include:
|
||||
- "CLAUDE.md"
|
||||
- "**/CLAUDE.md"
|
||||
- "README.md"
|
||||
- "docs/**/*.md"
|
||||
|
||||
# Maximum total files in output
|
||||
max_total_files: 50
|
||||
|
||||
# Analysis modes
|
||||
modes:
|
||||
auto:
|
||||
description: "Fully automatic path detection"
|
||||
enabled: true
|
||||
guided:
|
||||
description: "Suggest paths for user confirmation"
|
||||
enabled: true
|
||||
pattern:
|
||||
description: "Use explicit patterns from user"
|
||||
enabled: true
|
||||
hybrid:
|
||||
description: "Combine auto-detection with user patterns"
|
||||
enabled: true
|
||||
|
||||
# Performance settings
|
||||
performance:
|
||||
# Cache settings
|
||||
cache_enabled: true
|
||||
cache_ttl: 3600 # Cache TTL in seconds (1 hour)
|
||||
|
||||
# File size limits
|
||||
max_file_size: 10485760 # 10MB max file size to analyze
|
||||
|
||||
# Parallel processing
|
||||
max_workers: 4 # Number of parallel workers for file processing
|
||||
|
||||
# Logging configuration
|
||||
logging:
|
||||
level: "INFO" # DEBUG, INFO, WARNING, ERROR
|
||||
file: ".claude/scripts/ultrathink/ultrathink.log"
|
||||
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
25
.claude/python_script/core/__init__.py
Normal file
25
.claude/python_script/core/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
Core modules for the Python script analyzer.
|
||||
Provides unified interfaces for file indexing, context analysis, and path matching.
|
||||
"""
|
||||
|
||||
from .config import Config
|
||||
from .file_indexer import FileIndexer, FileInfo, IndexStats
|
||||
from .context_analyzer import ContextAnalyzer, AnalysisResult
|
||||
from .path_matcher import PathMatcher, MatchResult, PathMatchingResult
|
||||
from .embedding_manager import EmbeddingManager
|
||||
from .gitignore_parser import GitignoreParser
|
||||
|
||||
__all__ = [
|
||||
'Config',
|
||||
'FileIndexer',
|
||||
'FileInfo',
|
||||
'IndexStats',
|
||||
'ContextAnalyzer',
|
||||
'AnalysisResult',
|
||||
'PathMatcher',
|
||||
'MatchResult',
|
||||
'PathMatchingResult',
|
||||
'EmbeddingManager',
|
||||
'GitignoreParser'
|
||||
]
|
||||
BIN
.claude/python_script/core/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
.claude/python_script/core/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
.claude/python_script/core/__pycache__/config.cpython-313.pyc
Normal file
BIN
.claude/python_script/core/__pycache__/config.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
327
.claude/python_script/core/config.py
Normal file
327
.claude/python_script/core/config.py
Normal file
@@ -0,0 +1,327 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Configuration Management Module
|
||||
Provides unified configuration management with gitignore integration.
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List
|
||||
from .gitignore_parser import get_all_gitignore_patterns
|
||||
|
||||
|
||||
class Config:
|
||||
"""Singleton configuration manager with hierarchical loading."""
|
||||
|
||||
_instance = None
|
||||
_initialized = False
|
||||
|
||||
def __new__(cls, config_path: Optional[str] = None):
|
||||
if cls._instance is None:
|
||||
cls._instance = super(Config, cls).__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, config_path: Optional[str] = None):
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self.config_path = config_path
|
||||
self.config = {}
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
self._load_config()
|
||||
self._add_gitignore_patterns()
|
||||
self._apply_env_overrides()
|
||||
self._validate_config()
|
||||
|
||||
self._initialized = True
|
||||
|
||||
def _load_config(self):
|
||||
"""Load configuration from file with fallback hierarchy."""
|
||||
config_paths = self._get_config_paths()
|
||||
|
||||
for config_file in config_paths:
|
||||
if config_file.exists():
|
||||
try:
|
||||
with open(config_file, 'r', encoding='utf-8') as f:
|
||||
loaded_config = yaml.safe_load(f)
|
||||
if loaded_config:
|
||||
self.config = self._merge_configs(self.config, loaded_config)
|
||||
self.logger.info(f"Loaded config from {config_file}")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to load config from {config_file}: {e}")
|
||||
|
||||
# Apply default config if no config loaded
|
||||
if not self.config:
|
||||
self.config = self._get_default_config()
|
||||
self.logger.info("Using default configuration")
|
||||
|
||||
def _get_config_paths(self) -> List[Path]:
|
||||
"""Get ordered list of config file paths to check."""
|
||||
paths = []
|
||||
|
||||
# 1. Explicitly provided config path
|
||||
if self.config_path:
|
||||
paths.append(Path(self.config_path))
|
||||
|
||||
# 2. Current directory config.yaml
|
||||
paths.append(Path('config.yaml'))
|
||||
|
||||
# 3. Script directory config.yaml
|
||||
script_dir = Path(__file__).parent.parent
|
||||
paths.append(script_dir / 'config.yaml')
|
||||
|
||||
# 4. Default config in script directory
|
||||
paths.append(script_dir / 'default_config.yaml')
|
||||
|
||||
return paths
|
||||
|
||||
def _get_default_config(self) -> Dict[str, Any]:
|
||||
"""Get default configuration."""
|
||||
return {
|
||||
'token_limits': {
|
||||
'small_project': 500000,
|
||||
'medium_project': 2000000,
|
||||
'large_project': 10000000,
|
||||
'max_files': 1000
|
||||
},
|
||||
'exclude_patterns': [
|
||||
"*/node_modules/*",
|
||||
"*/.git/*",
|
||||
"*/build/*",
|
||||
"*/dist/*",
|
||||
"*/.next/*",
|
||||
"*/.nuxt/*",
|
||||
"*/target/*",
|
||||
"*/vendor/*",
|
||||
"*/__pycache__/*",
|
||||
"*.pyc",
|
||||
"*.pyo",
|
||||
"*.log",
|
||||
"*.tmp",
|
||||
"*.temp",
|
||||
"*.history"
|
||||
],
|
||||
'file_extensions': {
|
||||
'code': ['.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h', '.rs', '.go', '.php', '.rb', '.sh', '.bash'],
|
||||
'docs': ['.md', '.txt', '.rst', '.adoc'],
|
||||
'config': ['.json', '.yaml', '.yml', '.toml', '.ini', '.env'],
|
||||
'web': ['.html', '.css', '.scss', '.sass', '.xml']
|
||||
},
|
||||
'embedding': {
|
||||
'enabled': True,
|
||||
'model': 'all-MiniLM-L6-v2',
|
||||
'cache_dir': 'cache',
|
||||
'similarity_threshold': 0.3,
|
||||
'max_context_length': 512,
|
||||
'batch_size': 32
|
||||
},
|
||||
'context_analysis': {
|
||||
'domain_keywords': {
|
||||
'auth': ['auth', 'login', 'user', 'password', 'jwt', 'token', 'session'],
|
||||
'database': ['db', 'database', 'sql', 'query', 'model', 'schema', 'migration'],
|
||||
'api': ['api', 'endpoint', 'route', 'controller', 'service', 'handler'],
|
||||
'frontend': ['ui', 'component', 'view', 'template', 'style', 'css'],
|
||||
'backend': ['server', 'service', 'logic', 'business', 'core'],
|
||||
'test': ['test', 'spec', 'unit', 'integration', 'mock'],
|
||||
'config': ['config', 'setting', 'environment', 'env'],
|
||||
'util': ['util', 'helper', 'common', 'shared', 'lib']
|
||||
},
|
||||
'language_indicators': {
|
||||
'python': ['.py', 'python', 'pip', 'requirements.txt', 'setup.py'],
|
||||
'javascript': ['.js', '.ts', 'npm', 'package.json', 'node'],
|
||||
'java': ['.java', 'maven', 'gradle', 'pom.xml'],
|
||||
'go': ['.go', 'go.mod', 'go.sum'],
|
||||
'rust': ['.rs', 'cargo', 'Cargo.toml']
|
||||
}
|
||||
},
|
||||
'path_matching': {
|
||||
'weights': {
|
||||
'keyword_match': 0.4,
|
||||
'extension_match': 0.2,
|
||||
'directory_context': 0.2,
|
||||
'file_size_penalty': 0.1,
|
||||
'recency_bonus': 0.1
|
||||
},
|
||||
'max_files_per_category': 20,
|
||||
'min_relevance_score': 0.1
|
||||
},
|
||||
'output': {
|
||||
'pattern_format': '@{{{path}}}',
|
||||
'always_include': [
|
||||
'CLAUDE.md',
|
||||
'**/CLAUDE.md',
|
||||
'README.md',
|
||||
'docs/**/*.md'
|
||||
],
|
||||
'max_total_files': 50
|
||||
},
|
||||
'performance': {
|
||||
'cache_enabled': True,
|
||||
'cache_ttl': 3600,
|
||||
'max_file_size': 10485760,
|
||||
'max_workers': 4
|
||||
},
|
||||
'logging': {
|
||||
'level': 'INFO',
|
||||
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
}
|
||||
}
|
||||
|
||||
def _merge_configs(self, base: Dict, override: Dict) -> Dict:
|
||||
"""Recursively merge configuration dictionaries."""
|
||||
result = base.copy()
|
||||
|
||||
for key, value in override.items():
|
||||
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
||||
result[key] = self._merge_configs(result[key], value)
|
||||
else:
|
||||
result[key] = value
|
||||
|
||||
return result
|
||||
|
||||
def _add_gitignore_patterns(self):
|
||||
"""Add patterns from .gitignore files to exclude_patterns."""
|
||||
try:
|
||||
# Find root directory (current working directory or script parent)
|
||||
root_dir = Path.cwd()
|
||||
|
||||
gitignore_patterns = get_all_gitignore_patterns(str(root_dir))
|
||||
|
||||
if gitignore_patterns:
|
||||
# Ensure exclude_patterns exists
|
||||
if 'exclude_patterns' not in self.config:
|
||||
self.config['exclude_patterns'] = []
|
||||
|
||||
# Add gitignore patterns, avoiding duplicates
|
||||
existing_patterns = set(self.config['exclude_patterns'])
|
||||
new_patterns = [p for p in gitignore_patterns if p not in existing_patterns]
|
||||
|
||||
self.config['exclude_patterns'].extend(new_patterns)
|
||||
|
||||
self.logger.info(f"Added {len(new_patterns)} patterns from .gitignore files")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to load .gitignore patterns: {e}")
|
||||
|
||||
def _apply_env_overrides(self):
|
||||
"""Apply environment variable overrides."""
|
||||
env_mappings = {
|
||||
'ANALYZER_CACHE_DIR': ('embedding', 'cache_dir'),
|
||||
'ANALYZER_LOG_LEVEL': ('logging', 'level'),
|
||||
'ANALYZER_MAX_FILES': ('token_limits', 'max_files'),
|
||||
'ANALYZER_EMBEDDING_MODEL': ('embedding', 'model')
|
||||
}
|
||||
|
||||
for env_var, config_path in env_mappings.items():
|
||||
env_value = os.getenv(env_var)
|
||||
if env_value:
|
||||
self._set_nested_value(config_path, env_value)
|
||||
self.logger.info(f"Applied environment override: {env_var} = {env_value}")
|
||||
|
||||
def _set_nested_value(self, path: tuple, value: str):
|
||||
"""Set a nested configuration value."""
|
||||
current = self.config
|
||||
for key in path[:-1]:
|
||||
if key not in current:
|
||||
current[key] = {}
|
||||
current = current[key]
|
||||
|
||||
# Try to convert value to appropriate type
|
||||
if isinstance(current.get(path[-1]), int):
|
||||
try:
|
||||
value = int(value)
|
||||
except ValueError:
|
||||
pass
|
||||
elif isinstance(current.get(path[-1]), bool):
|
||||
value = value.lower() in ('true', '1', 'yes', 'on')
|
||||
|
||||
current[path[-1]] = value
|
||||
|
||||
def _validate_config(self):
|
||||
"""Validate configuration values."""
|
||||
required_sections = ['exclude_patterns', 'file_extensions', 'token_limits']
|
||||
|
||||
for section in required_sections:
|
||||
if section not in self.config:
|
||||
self.logger.warning(f"Missing required config section: {section}")
|
||||
|
||||
# Validate token limits
|
||||
if 'token_limits' in self.config:
|
||||
limits = self.config['token_limits']
|
||||
if limits.get('small_project', 0) >= limits.get('medium_project', 0):
|
||||
self.logger.warning("Token limit configuration may be incorrect")
|
||||
|
||||
def get(self, path: str, default: Any = None) -> Any:
|
||||
"""Get configuration value using dot notation."""
|
||||
keys = path.split('.')
|
||||
current = self.config
|
||||
|
||||
try:
|
||||
for key in keys:
|
||||
current = current[key]
|
||||
return current
|
||||
except (KeyError, TypeError):
|
||||
return default
|
||||
|
||||
def set(self, path: str, value: Any):
|
||||
"""Set configuration value using dot notation."""
|
||||
keys = path.split('.')
|
||||
current = self.config
|
||||
|
||||
for key in keys[:-1]:
|
||||
if key not in current:
|
||||
current[key] = {}
|
||||
current = current[key]
|
||||
|
||||
current[keys[-1]] = value
|
||||
|
||||
def get_exclude_patterns(self) -> List[str]:
|
||||
"""Get all exclude patterns including gitignore patterns."""
|
||||
return self.config.get('exclude_patterns', [])
|
||||
|
||||
def get_file_extensions(self) -> Dict[str, List[str]]:
|
||||
"""Get file extension mappings."""
|
||||
return self.config.get('file_extensions', {})
|
||||
|
||||
def is_embedding_enabled(self) -> bool:
|
||||
"""Check if embedding functionality is enabled."""
|
||||
return self.config.get('embedding', {}).get('enabled', False)
|
||||
|
||||
def get_cache_dir(self) -> str:
|
||||
"""Get cache directory path."""
|
||||
return self.config.get('embedding', {}).get('cache_dir', 'cache')
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Return configuration as dictionary."""
|
||||
return self.config.copy()
|
||||
|
||||
def reload(self, config_path: Optional[str] = None):
|
||||
"""Reload configuration from file."""
|
||||
self._initialized = False
|
||||
if config_path:
|
||||
self.config_path = config_path
|
||||
self.__init__(self.config_path)
|
||||
|
||||
|
||||
# Global configuration instance
|
||||
_global_config = None
|
||||
|
||||
|
||||
def get_config(config_path: Optional[str] = None) -> Config:
|
||||
"""Get global configuration instance."""
|
||||
global _global_config
|
||||
if _global_config is None:
|
||||
_global_config = Config(config_path)
|
||||
return _global_config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test configuration loading
|
||||
config = Config()
|
||||
print("Configuration loaded successfully!")
|
||||
print(f"Cache dir: {config.get_cache_dir()}")
|
||||
print(f"Exclude patterns: {len(config.get_exclude_patterns())}")
|
||||
print(f"Embedding enabled: {config.is_embedding_enabled()}")
|
||||
359
.claude/python_script/core/context_analyzer.py
Normal file
359
.claude/python_script/core/context_analyzer.py
Normal file
@@ -0,0 +1,359 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Context Analyzer Module for UltraThink Path-Aware Analyzer
|
||||
Analyzes user prompts to extract relevant context and keywords.
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import Dict, List, Set, Tuple, Optional
|
||||
from dataclasses import dataclass
|
||||
from collections import Counter
|
||||
import string
|
||||
|
||||
@dataclass
|
||||
class AnalysisResult:
|
||||
"""Results of context analysis."""
|
||||
keywords: List[str]
|
||||
domains: List[str]
|
||||
languages: List[str]
|
||||
file_patterns: List[str]
|
||||
confidence_scores: Dict[str, float]
|
||||
extracted_entities: Dict[str, List[str]]
|
||||
|
||||
class ContextAnalyzer:
|
||||
"""Analyzes user prompts to understand context and intent."""
|
||||
|
||||
def __init__(self, config: Dict):
|
||||
self.config = config
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Load domain and language mappings from config
|
||||
self.domain_keywords = config.get('context_analysis', {}).get('domain_keywords', {})
|
||||
self.language_indicators = config.get('context_analysis', {}).get('language_indicators', {})
|
||||
|
||||
# Common programming terms and patterns
|
||||
self.technical_terms = self._build_technical_terms()
|
||||
self.file_pattern_indicators = self._build_pattern_indicators()
|
||||
|
||||
# Stop words to filter out
|
||||
self.stop_words = {
|
||||
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
|
||||
'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after',
|
||||
'above', 'below', 'between', 'among', 'as', 'is', 'are', 'was', 'were', 'be',
|
||||
'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
|
||||
'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these',
|
||||
'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her',
|
||||
'us', 'them', 'my', 'your', 'his', 'its', 'our', 'their'
|
||||
}
|
||||
|
||||
def _build_technical_terms(self) -> Dict[str, List[str]]:
|
||||
"""Build comprehensive list of technical terms grouped by category."""
|
||||
return {
|
||||
'authentication': [
|
||||
'auth', 'authentication', 'login', 'logout', 'signin', 'signout',
|
||||
'user', 'password', 'token', 'jwt', 'oauth', 'session', 'cookie',
|
||||
'credential', 'authorize', 'permission', 'role', 'access'
|
||||
],
|
||||
'database': [
|
||||
'database', 'db', 'sql', 'query', 'table', 'schema', 'migration',
|
||||
'model', 'orm', 'entity', 'relation', 'index', 'transaction',
|
||||
'crud', 'select', 'insert', 'update', 'delete', 'join'
|
||||
],
|
||||
'api': [
|
||||
'api', 'rest', 'graphql', 'endpoint', 'route', 'controller',
|
||||
'handler', 'middleware', 'service', 'request', 'response',
|
||||
'http', 'get', 'post', 'put', 'delete', 'patch'
|
||||
],
|
||||
'frontend': [
|
||||
'ui', 'component', 'view', 'template', 'page', 'layout',
|
||||
'style', 'css', 'html', 'javascript', 'react', 'vue',
|
||||
'angular', 'dom', 'event', 'state', 'props'
|
||||
],
|
||||
'backend': [
|
||||
'server', 'service', 'business', 'logic', 'core', 'engine',
|
||||
'worker', 'job', 'queue', 'cache', 'redis', 'memcache'
|
||||
],
|
||||
'testing': [
|
||||
'test', 'testing', 'spec', 'unit', 'integration', 'e2e',
|
||||
'mock', 'stub', 'fixture', 'assert', 'expect', 'should'
|
||||
],
|
||||
'configuration': [
|
||||
'config', 'configuration', 'setting', 'environment', 'env',
|
||||
'variable', 'constant', 'parameter', 'option'
|
||||
],
|
||||
'utility': [
|
||||
'util', 'utility', 'helper', 'common', 'shared', 'lib',
|
||||
'library', 'tool', 'function', 'method'
|
||||
]
|
||||
}
|
||||
|
||||
def _build_pattern_indicators(self) -> Dict[str, List[str]]:
|
||||
"""Build indicators that suggest specific file patterns."""
|
||||
return {
|
||||
'source_code': ['implement', 'code', 'function', 'class', 'method'],
|
||||
'tests': ['test', 'testing', 'spec', 'unittest', 'pytest'],
|
||||
'documentation': ['doc', 'readme', 'guide', 'documentation', 'manual'],
|
||||
'configuration': ['config', 'setting', 'env', 'environment'],
|
||||
'build': ['build', 'compile', 'package', 'deploy', 'release'],
|
||||
'scripts': ['script', 'automation', 'tool', 'utility']
|
||||
}
|
||||
|
||||
def extract_keywords(self, text: str) -> List[str]:
|
||||
"""Extract meaningful keywords from text."""
|
||||
# Clean and normalize text
|
||||
text = text.lower()
|
||||
text = re.sub(r'[^\w\s-]', ' ', text) # Remove punctuation except hyphens
|
||||
words = text.split()
|
||||
|
||||
# Filter stop words and short words
|
||||
keywords = []
|
||||
for word in words:
|
||||
word = word.strip('-') # Remove leading/trailing hyphens
|
||||
if (len(word) >= 2 and
|
||||
word not in self.stop_words and
|
||||
not word.isdigit()):
|
||||
keywords.append(word)
|
||||
|
||||
# Count frequency and return top keywords
|
||||
word_counts = Counter(keywords)
|
||||
return [word for word, count in word_counts.most_common(20)]
|
||||
|
||||
def identify_domains(self, keywords: List[str]) -> List[Tuple[str, float]]:
|
||||
"""Identify relevant domains based on keywords."""
|
||||
domain_scores = {}
|
||||
|
||||
for domain, domain_keywords in self.domain_keywords.items():
|
||||
score = 0.0
|
||||
matched_keywords = []
|
||||
|
||||
for keyword in keywords:
|
||||
for domain_keyword in domain_keywords:
|
||||
if keyword in domain_keyword or domain_keyword in keyword:
|
||||
score += 1.0
|
||||
matched_keywords.append(keyword)
|
||||
break
|
||||
|
||||
if score > 0:
|
||||
# Normalize score by number of domain keywords
|
||||
normalized_score = score / len(domain_keywords)
|
||||
domain_scores[domain] = normalized_score
|
||||
|
||||
# Also check technical terms
|
||||
for category, terms in self.technical_terms.items():
|
||||
score = 0.0
|
||||
for keyword in keywords:
|
||||
for term in terms:
|
||||
if keyword in term or term in keyword:
|
||||
score += 1.0
|
||||
break
|
||||
|
||||
if score > 0:
|
||||
normalized_score = score / len(terms)
|
||||
if category not in domain_scores:
|
||||
domain_scores[category] = normalized_score
|
||||
else:
|
||||
domain_scores[category] = max(domain_scores[category], normalized_score)
|
||||
|
||||
# Sort by score and return top domains
|
||||
sorted_domains = sorted(domain_scores.items(), key=lambda x: x[1], reverse=True)
|
||||
return sorted_domains[:5]
|
||||
|
||||
def identify_languages(self, keywords: List[str]) -> List[Tuple[str, float]]:
|
||||
"""Identify programming languages based on keywords."""
|
||||
language_scores = {}
|
||||
|
||||
for language, indicators in self.language_indicators.items():
|
||||
score = 0.0
|
||||
for keyword in keywords:
|
||||
for indicator in indicators:
|
||||
if keyword in indicator or indicator in keyword:
|
||||
score += 1.0
|
||||
break
|
||||
|
||||
if score > 0:
|
||||
normalized_score = score / len(indicators)
|
||||
language_scores[language] = normalized_score
|
||||
|
||||
sorted_languages = sorted(language_scores.items(), key=lambda x: x[1], reverse=True)
|
||||
return sorted_languages[:3]
|
||||
|
||||
def extract_file_patterns(self, text: str) -> List[str]:
|
||||
"""Extract explicit file patterns from text."""
|
||||
patterns = []
|
||||
|
||||
# Look for @{pattern} syntax
|
||||
at_patterns = re.findall(r'@\{([^}]+)\}', text)
|
||||
patterns.extend(at_patterns)
|
||||
|
||||
# Look for file extensions
|
||||
extensions = re.findall(r'\*\.(\w+)', text)
|
||||
for ext in extensions:
|
||||
patterns.append(f"*.{ext}")
|
||||
|
||||
# Look for directory patterns
|
||||
dir_patterns = re.findall(r'(\w+)/\*\*?', text)
|
||||
for dir_pattern in dir_patterns:
|
||||
patterns.append(f"{dir_pattern}/**/*")
|
||||
|
||||
# Look for specific file names
|
||||
file_patterns = re.findall(r'\b(\w+\.\w+)\b', text)
|
||||
for file_pattern in file_patterns:
|
||||
if '.' in file_pattern:
|
||||
patterns.append(file_pattern)
|
||||
|
||||
return list(set(patterns)) # Remove duplicates
|
||||
|
||||
def suggest_patterns_from_domains(self, domains: List[str]) -> List[str]:
|
||||
"""Suggest file patterns based on identified domains."""
|
||||
patterns = []
|
||||
|
||||
domain_to_patterns = {
|
||||
'auth': ['**/auth/**/*', '**/login/**/*', '**/user/**/*'],
|
||||
'authentication': ['**/auth/**/*', '**/login/**/*', '**/user/**/*'],
|
||||
'database': ['**/db/**/*', '**/model/**/*', '**/migration/**/*', '**/*model*'],
|
||||
'api': ['**/api/**/*', '**/route/**/*', '**/controller/**/*', '**/handler/**/*'],
|
||||
'frontend': ['**/ui/**/*', '**/component/**/*', '**/view/**/*', '**/template/**/*'],
|
||||
'backend': ['**/service/**/*', '**/core/**/*', '**/server/**/*'],
|
||||
'test': ['**/test/**/*', '**/spec/**/*', '**/*test*', '**/*spec*'],
|
||||
'testing': ['**/test/**/*', '**/spec/**/*', '**/*test*', '**/*spec*'],
|
||||
'config': ['**/config/**/*', '**/*.config.*', '**/env/**/*'],
|
||||
'configuration': ['**/config/**/*', '**/*.config.*', '**/env/**/*'],
|
||||
'util': ['**/util/**/*', '**/helper/**/*', '**/common/**/*'],
|
||||
'utility': ['**/util/**/*', '**/helper/**/*', '**/common/**/*']
|
||||
}
|
||||
|
||||
for domain in domains:
|
||||
if domain in domain_to_patterns:
|
||||
patterns.extend(domain_to_patterns[domain])
|
||||
|
||||
return list(set(patterns)) # Remove duplicates
|
||||
|
||||
def extract_entities(self, text: str) -> Dict[str, List[str]]:
|
||||
"""Extract named entities from text."""
|
||||
entities = {
|
||||
'files': [],
|
||||
'functions': [],
|
||||
'classes': [],
|
||||
'variables': [],
|
||||
'technologies': []
|
||||
}
|
||||
|
||||
# File patterns
|
||||
file_patterns = re.findall(r'\b(\w+\.\w+)\b', text)
|
||||
entities['files'] = list(set(file_patterns))
|
||||
|
||||
# Function patterns (camelCase or snake_case followed by parentheses)
|
||||
function_patterns = re.findall(r'\b([a-z][a-zA-Z0-9_]*)\s*\(', text)
|
||||
entities['functions'] = list(set(function_patterns))
|
||||
|
||||
# Class patterns (PascalCase)
|
||||
class_patterns = re.findall(r'\b([A-Z][a-zA-Z0-9]*)\b', text)
|
||||
entities['classes'] = list(set(class_patterns))
|
||||
|
||||
# Technology mentions
|
||||
tech_keywords = [
|
||||
'react', 'vue', 'angular', 'node', 'express', 'django', 'flask',
|
||||
'spring', 'rails', 'laravel', 'docker', 'kubernetes', 'aws',
|
||||
'azure', 'gcp', 'postgresql', 'mysql', 'mongodb', 'redis'
|
||||
]
|
||||
text_lower = text.lower()
|
||||
for tech in tech_keywords:
|
||||
if tech in text_lower:
|
||||
entities['technologies'].append(tech)
|
||||
|
||||
return entities
|
||||
|
||||
def analyze(self, prompt: str) -> AnalysisResult:
|
||||
"""Perform comprehensive analysis of the user prompt."""
|
||||
self.logger.debug(f"Analyzing prompt: {prompt[:100]}...")
|
||||
|
||||
# Extract keywords
|
||||
keywords = self.extract_keywords(prompt)
|
||||
|
||||
# Identify domains and languages
|
||||
domains_with_scores = self.identify_domains(keywords)
|
||||
languages_with_scores = self.identify_languages(keywords)
|
||||
|
||||
# Extract patterns and entities
|
||||
explicit_patterns = self.extract_file_patterns(prompt)
|
||||
entities = self.extract_entities(prompt)
|
||||
|
||||
# Get top domains and languages
|
||||
domains = [domain for domain, score in domains_with_scores]
|
||||
languages = [lang for lang, score in languages_with_scores]
|
||||
|
||||
# Suggest additional patterns based on domains
|
||||
suggested_patterns = self.suggest_patterns_from_domains(domains)
|
||||
|
||||
# Combine explicit and suggested patterns
|
||||
all_patterns = list(set(explicit_patterns + suggested_patterns))
|
||||
|
||||
# Build confidence scores
|
||||
confidence_scores = {
|
||||
'keywords': len(keywords) / 20, # Normalize to 0-1
|
||||
'domain_match': max([score for _, score in domains_with_scores[:1]], default=0),
|
||||
'language_match': max([score for _, score in languages_with_scores[:1]], default=0),
|
||||
'pattern_extraction': len(explicit_patterns) / 5, # Normalize to 0-1
|
||||
}
|
||||
|
||||
result = AnalysisResult(
|
||||
keywords=keywords,
|
||||
domains=domains,
|
||||
languages=languages,
|
||||
file_patterns=all_patterns,
|
||||
confidence_scores=confidence_scores,
|
||||
extracted_entities=entities
|
||||
)
|
||||
|
||||
self.logger.info(f"Analysis complete: {len(domains)} domains, {len(languages)} languages, {len(all_patterns)} patterns")
|
||||
return result
|
||||
|
||||
def main():
|
||||
"""Command-line interface for context analyzer."""
|
||||
import yaml
|
||||
import argparse
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser(description="Context Analyzer for UltraThink")
|
||||
parser.add_argument("prompt", help="Prompt to analyze")
|
||||
parser.add_argument("--config", default="config.yaml", help="Configuration file path")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
level = logging.DEBUG if args.verbose else logging.INFO
|
||||
logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
|
||||
|
||||
# Load configuration
|
||||
from pathlib import Path
|
||||
config_path = Path(__file__).parent / args.config
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
# Create analyzer
|
||||
analyzer = ContextAnalyzer(config)
|
||||
|
||||
# Analyze prompt
|
||||
result = analyzer.analyze(args.prompt)
|
||||
|
||||
# Output results
|
||||
print(f"Keywords: {', '.join(result.keywords[:10])}")
|
||||
print(f"Domains: {', '.join(result.domains[:5])}")
|
||||
print(f"Languages: {', '.join(result.languages[:3])}")
|
||||
print(f"Patterns: {', '.join(result.file_patterns[:10])}")
|
||||
|
||||
if args.verbose:
|
||||
print("\nDetailed Results:")
|
||||
print(json.dumps({
|
||||
'keywords': result.keywords,
|
||||
'domains': result.domains,
|
||||
'languages': result.languages,
|
||||
'file_patterns': result.file_patterns,
|
||||
'confidence_scores': result.confidence_scores,
|
||||
'extracted_entities': result.extracted_entities
|
||||
}, indent=2))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
453
.claude/python_script/core/embedding_manager.py
Normal file
453
.claude/python_script/core/embedding_manager.py
Normal file
@@ -0,0 +1,453 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Embedding Manager Module for UltraThink Path-Aware Analyzer
|
||||
Manages embeddings for semantic similarity search (RAG functionality).
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
import logging
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Optional, Any
|
||||
from dataclasses import dataclass
|
||||
import time
|
||||
|
||||
# Optional imports for embedding functionality
|
||||
try:
|
||||
import numpy as np
|
||||
NUMPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
NUMPY_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
SENTENCE_TRANSFORMERS_AVAILABLE = True
|
||||
except ImportError:
|
||||
SENTENCE_TRANSFORMERS_AVAILABLE = False
|
||||
|
||||
from .file_indexer import FileInfo
|
||||
|
||||
@dataclass
|
||||
class EmbeddingInfo:
|
||||
"""Information about a file's embedding."""
|
||||
file_path: str
|
||||
content_hash: str
|
||||
embedding_hash: str
|
||||
created_time: float
|
||||
vector_size: int
|
||||
|
||||
@dataclass
|
||||
class SimilarityResult:
|
||||
"""Result of similarity search."""
|
||||
file_info: FileInfo
|
||||
similarity_score: float
|
||||
matching_content: str
|
||||
|
||||
class EmbeddingManager:
|
||||
"""Manages embeddings for semantic file matching."""
|
||||
|
||||
def __init__(self, config: Dict):
|
||||
self.config = config
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Check if embeddings are enabled
|
||||
self.enabled = config.get('embedding', {}).get('enabled', False)
|
||||
if not self.enabled:
|
||||
self.logger.info("Embeddings disabled in configuration")
|
||||
return
|
||||
|
||||
# Check dependencies
|
||||
if not NUMPY_AVAILABLE:
|
||||
self.logger.warning("NumPy not available, disabling embeddings")
|
||||
self.enabled = False
|
||||
return
|
||||
|
||||
if not SENTENCE_TRANSFORMERS_AVAILABLE:
|
||||
self.logger.warning("sentence-transformers not available, disabling embeddings")
|
||||
self.enabled = False
|
||||
return
|
||||
|
||||
# Load configuration
|
||||
self.model_name = config.get('embedding', {}).get('model', 'all-MiniLM-L6-v2')
|
||||
self.cache_dir = Path(config.get('embedding', {}).get('cache_dir', '.claude/cache/embeddings'))
|
||||
self.similarity_threshold = config.get('embedding', {}).get('similarity_threshold', 0.6)
|
||||
self.max_context_length = config.get('embedding', {}).get('max_context_length', 512)
|
||||
self.batch_size = config.get('embedding', {}).get('batch_size', 32)
|
||||
|
||||
# Setup cache directories
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.embeddings_file = self.cache_dir / "embeddings.pkl"
|
||||
self.index_file = self.cache_dir / "embedding_index.json"
|
||||
|
||||
# Initialize model lazily
|
||||
self._model = None
|
||||
self._embeddings_cache = None
|
||||
self._embedding_index = None
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
"""Lazy load the embedding model."""
|
||||
if not self.enabled:
|
||||
return None
|
||||
|
||||
if self._model is None:
|
||||
try:
|
||||
self.logger.info(f"Loading embedding model: {self.model_name}")
|
||||
self._model = SentenceTransformer(self.model_name)
|
||||
self.logger.info(f"Model loaded successfully")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to load embedding model: {e}")
|
||||
self.enabled = False
|
||||
return None
|
||||
|
||||
return self._model
|
||||
|
||||
def embeddings_exist(self) -> bool:
|
||||
"""Check if embeddings cache exists."""
|
||||
return self.embeddings_file.exists() and self.index_file.exists()
|
||||
|
||||
def _load_embedding_cache(self) -> Dict[str, np.ndarray]:
|
||||
"""Load embeddings from cache."""
|
||||
if self._embeddings_cache is not None:
|
||||
return self._embeddings_cache
|
||||
|
||||
if not self.embeddings_file.exists():
|
||||
self._embeddings_cache = {}
|
||||
return self._embeddings_cache
|
||||
|
||||
try:
|
||||
with open(self.embeddings_file, 'rb') as f:
|
||||
self._embeddings_cache = pickle.load(f)
|
||||
self.logger.debug(f"Loaded {len(self._embeddings_cache)} embeddings from cache")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to load embeddings cache: {e}")
|
||||
self._embeddings_cache = {}
|
||||
|
||||
return self._embeddings_cache
|
||||
|
||||
def _save_embedding_cache(self):
|
||||
"""Save embeddings to cache."""
|
||||
if self._embeddings_cache is None:
|
||||
return
|
||||
|
||||
try:
|
||||
with open(self.embeddings_file, 'wb') as f:
|
||||
pickle.dump(self._embeddings_cache, f)
|
||||
self.logger.debug(f"Saved {len(self._embeddings_cache)} embeddings to cache")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to save embeddings cache: {e}")
|
||||
|
||||
def _load_embedding_index(self) -> Dict[str, EmbeddingInfo]:
|
||||
"""Load embedding index."""
|
||||
if self._embedding_index is not None:
|
||||
return self._embedding_index
|
||||
|
||||
if not self.index_file.exists():
|
||||
self._embedding_index = {}
|
||||
return self._embedding_index
|
||||
|
||||
try:
|
||||
with open(self.index_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
self._embedding_index = {}
|
||||
for path, info_dict in data.items():
|
||||
self._embedding_index[path] = EmbeddingInfo(**info_dict)
|
||||
self.logger.debug(f"Loaded embedding index with {len(self._embedding_index)} entries")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to load embedding index: {e}")
|
||||
self._embedding_index = {}
|
||||
|
||||
return self._embedding_index
|
||||
|
||||
def _save_embedding_index(self):
|
||||
"""Save embedding index."""
|
||||
if self._embedding_index is None:
|
||||
return
|
||||
|
||||
try:
|
||||
data = {}
|
||||
for path, info in self._embedding_index.items():
|
||||
data[path] = {
|
||||
'file_path': info.file_path,
|
||||
'content_hash': info.content_hash,
|
||||
'embedding_hash': info.embedding_hash,
|
||||
'created_time': info.created_time,
|
||||
'vector_size': info.vector_size
|
||||
}
|
||||
|
||||
with open(self.index_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
self.logger.debug(f"Saved embedding index with {len(self._embedding_index)} entries")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to save embedding index: {e}")
|
||||
|
||||
def _extract_text_content(self, file_info: FileInfo) -> Optional[str]:
|
||||
"""Extract text content from a file for embedding."""
|
||||
try:
|
||||
file_path = Path(file_info.path)
|
||||
|
||||
# Skip binary files and very large files
|
||||
if file_info.size > self.config.get('performance', {}).get('max_file_size', 10485760):
|
||||
return None
|
||||
|
||||
# Only process text-based files
|
||||
text_extensions = {'.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h',
|
||||
'.rs', '.go', '.php', '.rb', '.sh', '.bash', '.md', '.txt', '.json',
|
||||
'.yaml', '.yml', '.xml', '.html', '.css', '.scss', '.sass'}
|
||||
|
||||
if file_info.extension.lower() not in text_extensions:
|
||||
return None
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
# Truncate content if too long
|
||||
if len(content) > self.max_context_length * 4: # Approximate token limit
|
||||
content = content[:self.max_context_length * 4]
|
||||
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Could not extract content from {file_info.path}: {e}")
|
||||
return None
|
||||
|
||||
def _create_embedding(self, text: str) -> Optional[np.ndarray]:
|
||||
"""Create embedding for text content."""
|
||||
if not self.enabled or self.model is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Truncate text if needed
|
||||
if len(text) > self.max_context_length * 4:
|
||||
text = text[:self.max_context_length * 4]
|
||||
|
||||
embedding = self.model.encode([text])[0]
|
||||
return embedding
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to create embedding: {e}")
|
||||
return None
|
||||
|
||||
def _get_content_hash(self, content: str) -> str:
|
||||
"""Get hash of content for caching."""
|
||||
return hashlib.md5(content.encode('utf-8')).hexdigest()
|
||||
|
||||
def _get_embedding_hash(self, embedding: np.ndarray) -> str:
|
||||
"""Get hash of embedding for verification."""
|
||||
return hashlib.md5(embedding.tobytes()).hexdigest()
|
||||
|
||||
def update_embeddings(self, file_index: Dict[str, FileInfo], force_rebuild: bool = False) -> int:
|
||||
"""Update embeddings for files in the index."""
|
||||
if not self.enabled:
|
||||
self.logger.info("Embeddings disabled, skipping update")
|
||||
return 0
|
||||
|
||||
self.logger.info("Updating embeddings...")
|
||||
|
||||
# Load caches
|
||||
embeddings_cache = self._load_embedding_cache()
|
||||
embedding_index = self._load_embedding_index()
|
||||
|
||||
new_embeddings = 0
|
||||
batch_texts = []
|
||||
batch_paths = []
|
||||
|
||||
for file_path, file_info in file_index.items():
|
||||
# Check if embedding exists and is current
|
||||
if not force_rebuild and file_path in embedding_index:
|
||||
cached_info = embedding_index[file_path]
|
||||
if cached_info.content_hash == file_info.content_hash:
|
||||
continue # Embedding is current
|
||||
|
||||
# Extract content
|
||||
content = self._extract_text_content(file_info)
|
||||
if content is None:
|
||||
continue
|
||||
|
||||
# Prepare for batch processing
|
||||
batch_texts.append(content)
|
||||
batch_paths.append(file_path)
|
||||
|
||||
# Process batch when full
|
||||
if len(batch_texts) >= self.batch_size:
|
||||
self._process_batch(batch_texts, batch_paths, file_index, embeddings_cache, embedding_index)
|
||||
new_embeddings += len(batch_texts)
|
||||
batch_texts = []
|
||||
batch_paths = []
|
||||
|
||||
# Process remaining batch
|
||||
if batch_texts:
|
||||
self._process_batch(batch_texts, batch_paths, file_index, embeddings_cache, embedding_index)
|
||||
new_embeddings += len(batch_texts)
|
||||
|
||||
# Save caches
|
||||
self._save_embedding_cache()
|
||||
self._save_embedding_index()
|
||||
|
||||
self.logger.info(f"Updated {new_embeddings} embeddings")
|
||||
return new_embeddings
|
||||
|
||||
def _process_batch(self, texts: List[str], paths: List[str], file_index: Dict[str, FileInfo],
|
||||
embeddings_cache: Dict[str, np.ndarray], embedding_index: Dict[str, EmbeddingInfo]):
|
||||
"""Process a batch of texts for embedding."""
|
||||
try:
|
||||
# Create embeddings for batch
|
||||
embeddings = self.model.encode(texts)
|
||||
|
||||
for i, (text, path) in enumerate(zip(texts, paths)):
|
||||
embedding = embeddings[i]
|
||||
file_info = file_index[path]
|
||||
|
||||
# Store embedding
|
||||
content_hash = self._get_content_hash(text)
|
||||
embedding_hash = self._get_embedding_hash(embedding)
|
||||
|
||||
embeddings_cache[path] = embedding
|
||||
embedding_index[path] = EmbeddingInfo(
|
||||
file_path=path,
|
||||
content_hash=content_hash,
|
||||
embedding_hash=embedding_hash,
|
||||
created_time=time.time(),
|
||||
vector_size=len(embedding)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to process embedding batch: {e}")
|
||||
|
||||
def find_similar_files(self, query: str, file_index: Dict[str, FileInfo],
|
||||
top_k: int = 20) -> List[SimilarityResult]:
|
||||
"""Find files similar to the query using embeddings."""
|
||||
if not self.enabled:
|
||||
return []
|
||||
|
||||
# Create query embedding
|
||||
query_embedding = self._create_embedding(query)
|
||||
if query_embedding is None:
|
||||
return []
|
||||
|
||||
# Load embeddings
|
||||
embeddings_cache = self._load_embedding_cache()
|
||||
if not embeddings_cache:
|
||||
self.logger.warning("No embeddings available for similarity search")
|
||||
return []
|
||||
|
||||
# Calculate similarities
|
||||
similarities = []
|
||||
for file_path, file_embedding in embeddings_cache.items():
|
||||
if file_path not in file_index:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Calculate cosine similarity
|
||||
similarity = np.dot(query_embedding, file_embedding) / (
|
||||
np.linalg.norm(query_embedding) * np.linalg.norm(file_embedding)
|
||||
)
|
||||
|
||||
if similarity >= self.similarity_threshold:
|
||||
similarities.append((file_path, similarity))
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Failed to calculate similarity for {file_path}: {e}")
|
||||
continue
|
||||
|
||||
# Sort by similarity
|
||||
similarities.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Create results
|
||||
results = []
|
||||
for file_path, similarity in similarities[:top_k]:
|
||||
file_info = file_index[file_path]
|
||||
|
||||
# Extract a snippet of matching content
|
||||
content = self._extract_text_content(file_info)
|
||||
snippet = content[:200] + "..." if content and len(content) > 200 else content or ""
|
||||
|
||||
result = SimilarityResult(
|
||||
file_info=file_info,
|
||||
similarity_score=similarity,
|
||||
matching_content=snippet
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
self.logger.info(f"Found {len(results)} similar files for query")
|
||||
return results
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics about the embedding cache."""
|
||||
if not self.enabled:
|
||||
return {'enabled': False}
|
||||
|
||||
embedding_index = self._load_embedding_index()
|
||||
embeddings_cache = self._load_embedding_cache()
|
||||
|
||||
return {
|
||||
'enabled': True,
|
||||
'model_name': self.model_name,
|
||||
'total_embeddings': len(embedding_index),
|
||||
'cache_size_mb': os.path.getsize(self.embeddings_file) / 1024 / 1024 if self.embeddings_file.exists() else 0,
|
||||
'similarity_threshold': self.similarity_threshold,
|
||||
'vector_size': list(embedding_index.values())[0].vector_size if embedding_index else 0
|
||||
}
|
||||
|
||||
def main():
|
||||
"""Command-line interface for embedding manager."""
|
||||
import yaml
|
||||
import argparse
|
||||
from .file_indexer import FileIndexer
|
||||
|
||||
parser = argparse.ArgumentParser(description="Embedding Manager for UltraThink")
|
||||
parser.add_argument("--config", default="config.yaml", help="Configuration file path")
|
||||
parser.add_argument("--update", action="store_true", help="Update embeddings")
|
||||
parser.add_argument("--rebuild", action="store_true", help="Force rebuild all embeddings")
|
||||
parser.add_argument("--query", help="Search for similar files")
|
||||
parser.add_argument("--stats", action="store_true", help="Show embedding statistics")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
level = logging.DEBUG if args.verbose else logging.INFO
|
||||
logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
|
||||
|
||||
# Load configuration
|
||||
config_path = Path(__file__).parent / args.config
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
# Create components
|
||||
indexer = FileIndexer(config)
|
||||
embedding_manager = EmbeddingManager(config)
|
||||
|
||||
if not embedding_manager.enabled:
|
||||
print("Embeddings are disabled. Enable in config.yaml or install required dependencies.")
|
||||
return
|
||||
|
||||
# Load file index
|
||||
file_index = indexer.load_index()
|
||||
if not file_index:
|
||||
print("Building file index...")
|
||||
file_index = indexer.build_index()
|
||||
|
||||
if args.stats:
|
||||
stats = embedding_manager.get_stats()
|
||||
print("Embedding Statistics:")
|
||||
for key, value in stats.items():
|
||||
print(f" {key}: {value}")
|
||||
return
|
||||
|
||||
if args.update or args.rebuild:
|
||||
count = embedding_manager.update_embeddings(file_index, force_rebuild=args.rebuild)
|
||||
print(f"Updated {count} embeddings")
|
||||
|
||||
if args.query:
|
||||
results = embedding_manager.find_similar_files(args.query, file_index)
|
||||
print(f"Found {len(results)} similar files:")
|
||||
for result in results:
|
||||
print(f" {result.file_info.relative_path} (similarity: {result.similarity_score:.3f})")
|
||||
if args.verbose and result.matching_content:
|
||||
print(f" Content: {result.matching_content[:100]}...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
383
.claude/python_script/core/file_indexer.py
Normal file
383
.claude/python_script/core/file_indexer.py
Normal file
@@ -0,0 +1,383 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
File Indexer Module for UltraThink Path-Aware Analyzer
|
||||
Builds and maintains an index of repository files with metadata.
|
||||
Enhanced with gitignore support and unified configuration.
|
||||
"""
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Set, Tuple, Union
|
||||
from dataclasses import dataclass, asdict
|
||||
from datetime import datetime
|
||||
import fnmatch
|
||||
|
||||
from .gitignore_parser import GitignoreParser
|
||||
|
||||
@dataclass
|
||||
class FileInfo:
|
||||
"""Information about a single file in the repository."""
|
||||
path: str
|
||||
relative_path: str
|
||||
size: int
|
||||
modified_time: float
|
||||
extension: str
|
||||
category: str # code, docs, config, web
|
||||
estimated_tokens: int
|
||||
content_hash: str
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict) -> 'FileInfo':
|
||||
return cls(**data)
|
||||
|
||||
@dataclass
|
||||
class IndexStats:
|
||||
"""Statistics about the file index."""
|
||||
total_files: int
|
||||
total_tokens: int
|
||||
total_size: int
|
||||
categories: Dict[str, int]
|
||||
last_updated: float
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
return asdict(self)
|
||||
|
||||
class FileIndexer:
|
||||
"""Builds and maintains an efficient index of repository files."""
|
||||
|
||||
def __init__(self, config: Union['Config', Dict], root_path: str = "."):
|
||||
# Support both Config object and Dict for backward compatibility
|
||||
if hasattr(config, 'to_dict'):
|
||||
self.config_obj = config
|
||||
self.config = config.to_dict()
|
||||
else:
|
||||
self.config_obj = None
|
||||
self.config = config
|
||||
|
||||
self.root_path = Path(root_path).resolve()
|
||||
self.cache_dir = Path(self.config.get('embedding', {}).get('cache_dir', '.claude/cache'))
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.index_file = self.cache_dir / "file_index.json"
|
||||
|
||||
# Setup logging
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# File extension mappings
|
||||
self.extension_categories = self._build_extension_map()
|
||||
|
||||
# Exclude patterns from config
|
||||
self.exclude_patterns = list(self.config.get('exclude_patterns', []))
|
||||
|
||||
# Initialize gitignore parser and add patterns
|
||||
self.gitignore_parser = GitignoreParser(str(self.root_path))
|
||||
self._load_gitignore_patterns()
|
||||
|
||||
# Performance settings
|
||||
self.max_file_size = self.config.get('performance', {}).get('max_file_size', 10485760)
|
||||
|
||||
def _build_extension_map(self) -> Dict[str, str]:
|
||||
"""Build mapping from file extensions to categories."""
|
||||
ext_map = {}
|
||||
for category, extensions in self.config.get('file_extensions', {}).items():
|
||||
for ext in extensions:
|
||||
ext_map[ext.lower()] = category
|
||||
return ext_map
|
||||
|
||||
def _load_gitignore_patterns(self):
|
||||
"""Load patterns from .gitignore files and add to exclude_patterns."""
|
||||
try:
|
||||
gitignore_patterns = self.gitignore_parser.parse_all_gitignores()
|
||||
|
||||
if gitignore_patterns:
|
||||
# Avoid duplicates
|
||||
existing_patterns = set(self.exclude_patterns)
|
||||
new_patterns = [p for p in gitignore_patterns if p not in existing_patterns]
|
||||
|
||||
self.exclude_patterns.extend(new_patterns)
|
||||
self.logger.info(f"Added {len(new_patterns)} patterns from .gitignore files")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to load .gitignore patterns: {e}")
|
||||
|
||||
def _should_exclude_file(self, file_path: Path) -> bool:
|
||||
"""Check if file should be excluded based on patterns and gitignore rules."""
|
||||
relative_path = str(file_path.relative_to(self.root_path))
|
||||
|
||||
# Check against exclude patterns from config
|
||||
for pattern in self.exclude_patterns:
|
||||
# Convert pattern to work with fnmatch
|
||||
if fnmatch.fnmatch(relative_path, pattern) or fnmatch.fnmatch(str(file_path), pattern):
|
||||
return True
|
||||
|
||||
# Check if any parent directory matches
|
||||
parts = relative_path.split(os.sep)
|
||||
for i in range(len(parts)):
|
||||
partial_path = "/".join(parts[:i+1])
|
||||
if fnmatch.fnmatch(partial_path, pattern):
|
||||
return True
|
||||
|
||||
# Also check gitignore rules using dedicated parser
|
||||
# Note: gitignore patterns are already included in self.exclude_patterns
|
||||
# but we can add additional gitignore-specific checking here if needed
|
||||
try:
|
||||
# The gitignore patterns are already loaded into exclude_patterns,
|
||||
# but we can do additional gitignore-specific checks if needed
|
||||
pass
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Error in gitignore checking for {file_path}: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def _estimate_tokens(self, file_path: Path) -> int:
|
||||
"""Estimate token count for a file (chars/4 approximation)."""
|
||||
try:
|
||||
if file_path.stat().st_size > self.max_file_size:
|
||||
return file_path.stat().st_size // 8 # Penalty for large files
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
return len(content) // 4 # Rough approximation
|
||||
except (UnicodeDecodeError, OSError):
|
||||
# Binary files or unreadable files
|
||||
return file_path.stat().st_size // 8
|
||||
|
||||
def _get_file_hash(self, file_path: Path) -> str:
|
||||
"""Get a hash of file metadata for change detection."""
|
||||
stat = file_path.stat()
|
||||
return hashlib.md5(f"{file_path}:{stat.st_size}:{stat.st_mtime}".encode()).hexdigest()
|
||||
|
||||
def _categorize_file(self, file_path: Path) -> str:
|
||||
"""Categorize file based on extension."""
|
||||
extension = file_path.suffix.lower()
|
||||
return self.extension_categories.get(extension, 'other')
|
||||
|
||||
def _scan_file(self, file_path: Path) -> Optional[FileInfo]:
|
||||
"""Scan a single file and create FileInfo."""
|
||||
try:
|
||||
if not file_path.is_file() or self._should_exclude_file(file_path):
|
||||
return None
|
||||
|
||||
stat = file_path.stat()
|
||||
relative_path = str(file_path.relative_to(self.root_path))
|
||||
|
||||
file_info = FileInfo(
|
||||
path=str(file_path),
|
||||
relative_path=relative_path,
|
||||
size=stat.st_size,
|
||||
modified_time=stat.st_mtime,
|
||||
extension=file_path.suffix.lower(),
|
||||
category=self._categorize_file(file_path),
|
||||
estimated_tokens=self._estimate_tokens(file_path),
|
||||
content_hash=self._get_file_hash(file_path)
|
||||
)
|
||||
|
||||
return file_info
|
||||
|
||||
except (OSError, PermissionError) as e:
|
||||
self.logger.warning(f"Could not scan file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def build_index(self, force_rebuild: bool = False) -> Dict[str, FileInfo]:
|
||||
"""Build or update the file index."""
|
||||
self.logger.info(f"Building file index for {self.root_path}")
|
||||
|
||||
# Load existing index if available
|
||||
existing_index = {}
|
||||
if not force_rebuild and self.index_file.exists():
|
||||
existing_index = self.load_index()
|
||||
|
||||
new_index = {}
|
||||
changed_files = 0
|
||||
|
||||
# Walk through all files
|
||||
for file_path in self.root_path.rglob('*'):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
|
||||
file_info = self._scan_file(file_path)
|
||||
if file_info is None:
|
||||
continue
|
||||
|
||||
# Check if file has changed
|
||||
relative_path = file_info.relative_path
|
||||
if relative_path in existing_index:
|
||||
old_info = existing_index[relative_path]
|
||||
if old_info.content_hash == file_info.content_hash:
|
||||
# File unchanged, keep old info
|
||||
new_index[relative_path] = old_info
|
||||
continue
|
||||
|
||||
# File is new or changed
|
||||
new_index[relative_path] = file_info
|
||||
changed_files += 1
|
||||
|
||||
self.logger.info(f"Indexed {len(new_index)} files ({changed_files} new/changed)")
|
||||
|
||||
# Save index
|
||||
self.save_index(new_index)
|
||||
|
||||
return new_index
|
||||
|
||||
def load_index(self) -> Dict[str, FileInfo]:
|
||||
"""Load file index from cache."""
|
||||
if not self.index_file.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
with open(self.index_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
index = {}
|
||||
for path, info_dict in data.get('files', {}).items():
|
||||
index[path] = FileInfo.from_dict(info_dict)
|
||||
return index
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
self.logger.warning(f"Could not load index: {e}")
|
||||
return {}
|
||||
|
||||
def save_index(self, index: Dict[str, FileInfo]) -> None:
|
||||
"""Save file index to cache."""
|
||||
try:
|
||||
# Calculate stats
|
||||
stats = self._calculate_stats(index)
|
||||
|
||||
data = {
|
||||
'stats': stats.to_dict(),
|
||||
'files': {path: info.to_dict() for path, info in index.items()}
|
||||
}
|
||||
|
||||
with open(self.index_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
except OSError as e:
|
||||
self.logger.error(f"Could not save index: {e}")
|
||||
|
||||
def _calculate_stats(self, index: Dict[str, FileInfo]) -> IndexStats:
|
||||
"""Calculate statistics for the index."""
|
||||
total_files = len(index)
|
||||
total_tokens = sum(info.estimated_tokens for info in index.values())
|
||||
total_size = sum(info.size for info in index.values())
|
||||
|
||||
categories = {}
|
||||
for info in index.values():
|
||||
categories[info.category] = categories.get(info.category, 0) + 1
|
||||
|
||||
return IndexStats(
|
||||
total_files=total_files,
|
||||
total_tokens=total_tokens,
|
||||
total_size=total_size,
|
||||
categories=categories,
|
||||
last_updated=time.time()
|
||||
)
|
||||
|
||||
def get_stats(self) -> Optional[IndexStats]:
|
||||
"""Get statistics about the current index."""
|
||||
if not self.index_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(self.index_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return IndexStats(**data.get('stats', {}))
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
return None
|
||||
|
||||
def find_files_by_pattern(self, pattern: str, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
|
||||
"""Find files matching a glob pattern."""
|
||||
if index is None:
|
||||
index = self.load_index()
|
||||
|
||||
matching_files = []
|
||||
for path, info in index.items():
|
||||
if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(info.path, pattern):
|
||||
matching_files.append(info)
|
||||
|
||||
return matching_files
|
||||
|
||||
def find_files_by_category(self, category: str, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
|
||||
"""Find files by category (code, docs, config, etc.)."""
|
||||
if index is None:
|
||||
index = self.load_index()
|
||||
|
||||
return [info for info in index.values() if info.category == category]
|
||||
|
||||
def find_files_by_keywords(self, keywords: List[str], index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
|
||||
"""Find files whose paths contain any of the specified keywords."""
|
||||
if index is None:
|
||||
index = self.load_index()
|
||||
|
||||
matching_files = []
|
||||
keywords_lower = [kw.lower() for kw in keywords]
|
||||
|
||||
for info in index.values():
|
||||
path_lower = info.relative_path.lower()
|
||||
if any(keyword in path_lower for keyword in keywords_lower):
|
||||
matching_files.append(info)
|
||||
|
||||
return matching_files
|
||||
|
||||
def get_recent_files(self, limit: int = 20, index: Optional[Dict[str, FileInfo]] = None) -> List[FileInfo]:
|
||||
"""Get most recently modified files."""
|
||||
if index is None:
|
||||
index = self.load_index()
|
||||
|
||||
files = list(index.values())
|
||||
files.sort(key=lambda f: f.modified_time, reverse=True)
|
||||
return files[:limit]
|
||||
|
||||
def main():
|
||||
"""Command-line interface for file indexer."""
|
||||
import yaml
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="File Indexer for UltraThink")
|
||||
parser.add_argument("--config", default="config.yaml", help="Configuration file path")
|
||||
parser.add_argument("--rebuild", action="store_true", help="Force rebuild index")
|
||||
parser.add_argument("--stats", action="store_true", help="Show index statistics")
|
||||
parser.add_argument("--pattern", help="Find files matching pattern")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load configuration
|
||||
config_path = Path(__file__).parent / args.config
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
# Create indexer
|
||||
indexer = FileIndexer(config)
|
||||
|
||||
if args.stats:
|
||||
stats = indexer.get_stats()
|
||||
if stats:
|
||||
print(f"Total files: {stats.total_files}")
|
||||
print(f"Total tokens: {stats.total_tokens:,}")
|
||||
print(f"Total size: {stats.total_size:,} bytes")
|
||||
print(f"Categories: {stats.categories}")
|
||||
print(f"Last updated: {datetime.fromtimestamp(stats.last_updated)}")
|
||||
else:
|
||||
print("No index found. Run without --stats to build index.")
|
||||
return
|
||||
|
||||
# Build index
|
||||
index = indexer.build_index(force_rebuild=args.rebuild)
|
||||
|
||||
if args.pattern:
|
||||
files = indexer.find_files_by_pattern(args.pattern, index)
|
||||
print(f"Found {len(files)} files matching pattern '{args.pattern}':")
|
||||
for file_info in files[:20]: # Limit output
|
||||
print(f" {file_info.relative_path}")
|
||||
else:
|
||||
stats = indexer._calculate_stats(index)
|
||||
print(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
182
.claude/python_script/core/gitignore_parser.py
Normal file
182
.claude/python_script/core/gitignore_parser.py
Normal file
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
GitIgnore Parser Module
|
||||
Parses .gitignore files and converts rules to fnmatch patterns for file exclusion.
|
||||
"""
|
||||
|
||||
import os
|
||||
import fnmatch
|
||||
from pathlib import Path
|
||||
from typing import List, Set, Optional
|
||||
|
||||
|
||||
class GitignoreParser:
|
||||
"""Parser for .gitignore files that converts rules to fnmatch patterns."""
|
||||
|
||||
def __init__(self, root_path: str = "."):
|
||||
self.root_path = Path(root_path).resolve()
|
||||
self.patterns: List[str] = []
|
||||
self.negation_patterns: List[str] = []
|
||||
|
||||
def parse_file(self, gitignore_path: str) -> List[str]:
|
||||
"""Parse a .gitignore file and return exclude patterns."""
|
||||
gitignore_file = Path(gitignore_path)
|
||||
if not gitignore_file.exists():
|
||||
return []
|
||||
|
||||
patterns = []
|
||||
try:
|
||||
with open(gitignore_file, 'r', encoding='utf-8') as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
pattern = self._parse_line(line.strip())
|
||||
if pattern:
|
||||
patterns.append(pattern)
|
||||
except (UnicodeDecodeError, IOError):
|
||||
# Fallback to system encoding if UTF-8 fails
|
||||
try:
|
||||
with open(gitignore_file, 'r') as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
pattern = self._parse_line(line.strip())
|
||||
if pattern:
|
||||
patterns.append(pattern)
|
||||
except IOError:
|
||||
# If file can't be read, return empty list
|
||||
return []
|
||||
|
||||
return patterns
|
||||
|
||||
def _parse_line(self, line: str) -> Optional[str]:
|
||||
"""Parse a single line from .gitignore file."""
|
||||
# Skip empty lines and comments
|
||||
if not line or line.startswith('#'):
|
||||
return None
|
||||
|
||||
# Handle negation patterns (starting with !)
|
||||
if line.startswith('!'):
|
||||
# For now, we'll skip negation patterns as they require
|
||||
# more complex logic to implement correctly
|
||||
return None
|
||||
|
||||
# Convert gitignore pattern to fnmatch pattern
|
||||
return self._convert_to_fnmatch(line)
|
||||
|
||||
def _convert_to_fnmatch(self, pattern: str) -> str:
|
||||
"""Convert gitignore pattern to fnmatch pattern."""
|
||||
# Remove trailing slash (directory indicator)
|
||||
if pattern.endswith('/'):
|
||||
pattern = pattern[:-1]
|
||||
|
||||
# Handle absolute paths (starting with /)
|
||||
if pattern.startswith('/'):
|
||||
pattern = pattern[1:]
|
||||
# Make it match from root
|
||||
return pattern
|
||||
|
||||
# Handle patterns that should match anywhere in the tree
|
||||
# If pattern doesn't contain '/', it matches files/dirs at any level
|
||||
if '/' not in pattern:
|
||||
return f"*/{pattern}"
|
||||
|
||||
# Pattern contains '/', so it's relative to the gitignore location
|
||||
return pattern
|
||||
|
||||
def parse_all_gitignores(self, root_path: Optional[str] = None) -> List[str]:
|
||||
"""Parse all .gitignore files in the repository hierarchy."""
|
||||
if root_path:
|
||||
self.root_path = Path(root_path).resolve()
|
||||
|
||||
all_patterns = []
|
||||
|
||||
# Find all .gitignore files in the repository
|
||||
gitignore_files = self._find_gitignore_files()
|
||||
|
||||
for gitignore_file in gitignore_files:
|
||||
patterns = self.parse_file(gitignore_file)
|
||||
all_patterns.extend(patterns)
|
||||
|
||||
return all_patterns
|
||||
|
||||
def _find_gitignore_files(self) -> List[Path]:
|
||||
"""Find all .gitignore files in the repository."""
|
||||
gitignore_files = []
|
||||
|
||||
# Start with root .gitignore
|
||||
root_gitignore = self.root_path / '.gitignore'
|
||||
if root_gitignore.exists():
|
||||
gitignore_files.append(root_gitignore)
|
||||
|
||||
# Find .gitignore files in subdirectories
|
||||
try:
|
||||
for gitignore_file in self.root_path.rglob('.gitignore'):
|
||||
if gitignore_file != root_gitignore:
|
||||
gitignore_files.append(gitignore_file)
|
||||
except (PermissionError, OSError):
|
||||
# Skip directories we can't access
|
||||
pass
|
||||
|
||||
return gitignore_files
|
||||
|
||||
def should_exclude(self, file_path: str, gitignore_patterns: List[str]) -> bool:
|
||||
"""Check if a file should be excluded based on gitignore patterns."""
|
||||
# Convert to relative path from root
|
||||
try:
|
||||
rel_path = str(Path(file_path).relative_to(self.root_path))
|
||||
except ValueError:
|
||||
# File is not under root path
|
||||
return False
|
||||
|
||||
# Normalize path separators for consistent matching
|
||||
rel_path = rel_path.replace(os.sep, '/')
|
||||
|
||||
for pattern in gitignore_patterns:
|
||||
if self._matches_pattern(rel_path, pattern):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _matches_pattern(self, file_path: str, pattern: str) -> bool:
|
||||
"""Check if a file path matches a gitignore pattern."""
|
||||
# Normalize pattern separators
|
||||
pattern = pattern.replace(os.sep, '/')
|
||||
|
||||
# Handle different pattern types
|
||||
if pattern.startswith('*/'):
|
||||
# Pattern like */pattern - matches at any level
|
||||
sub_pattern = pattern[2:]
|
||||
return fnmatch.fnmatch(file_path, f"*/{sub_pattern}") or fnmatch.fnmatch(file_path, sub_pattern)
|
||||
elif '/' in pattern:
|
||||
# Pattern contains slash - match exact path
|
||||
return fnmatch.fnmatch(file_path, pattern)
|
||||
else:
|
||||
# Simple pattern - match filename or directory at any level
|
||||
parts = file_path.split('/')
|
||||
return any(fnmatch.fnmatch(part, pattern) for part in parts)
|
||||
|
||||
|
||||
def parse_gitignore(gitignore_path: str) -> List[str]:
|
||||
"""Convenience function to parse a single .gitignore file."""
|
||||
parser = GitignoreParser()
|
||||
return parser.parse_file(gitignore_path)
|
||||
|
||||
|
||||
def get_all_gitignore_patterns(root_path: str = ".") -> List[str]:
|
||||
"""Convenience function to get all gitignore patterns in a repository."""
|
||||
parser = GitignoreParser(root_path)
|
||||
return parser.parse_all_gitignores()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
gitignore_path = sys.argv[1]
|
||||
patterns = parse_gitignore(gitignore_path)
|
||||
print(f"Parsed {len(patterns)} patterns from {gitignore_path}:")
|
||||
for pattern in patterns:
|
||||
print(f" {pattern}")
|
||||
else:
|
||||
# Parse all .gitignore files in current directory
|
||||
patterns = get_all_gitignore_patterns()
|
||||
print(f"Found {len(patterns)} gitignore patterns:")
|
||||
for pattern in patterns:
|
||||
print(f" {pattern}")
|
||||
500
.claude/python_script/core/path_matcher.py
Normal file
500
.claude/python_script/core/path_matcher.py
Normal file
@@ -0,0 +1,500 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Path Matcher Module for UltraThink Path-Aware Analyzer
|
||||
Matches files to analysis context and ranks them by relevance.
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
import fnmatch
|
||||
from typing import Dict, List, Tuple, Optional, Set
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import math
|
||||
|
||||
from .file_indexer import FileInfo
|
||||
from .context_analyzer import AnalysisResult
|
||||
|
||||
@dataclass
|
||||
class MatchResult:
|
||||
"""Result of path matching with relevance score."""
|
||||
file_info: FileInfo
|
||||
relevance_score: float
|
||||
match_reasons: List[str]
|
||||
category_bonus: float
|
||||
|
||||
@dataclass
|
||||
class PathMatchingResult:
|
||||
"""Complete result of path matching operation."""
|
||||
matched_files: List[MatchResult]
|
||||
total_tokens: int
|
||||
categories: Dict[str, int]
|
||||
patterns_used: List[str]
|
||||
confidence_score: float
|
||||
|
||||
class PathMatcher:
|
||||
"""Matches files to analysis context using various algorithms."""
|
||||
|
||||
def __init__(self, config: Dict):
|
||||
self.config = config
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Load scoring weights
|
||||
self.weights = config.get('path_matching', {}).get('weights', {
|
||||
'keyword_match': 0.4,
|
||||
'extension_match': 0.2,
|
||||
'directory_context': 0.2,
|
||||
'file_size_penalty': 0.1,
|
||||
'recency_bonus': 0.1
|
||||
})
|
||||
|
||||
# Load limits
|
||||
self.max_files_per_category = config.get('path_matching', {}).get('max_files_per_category', 20)
|
||||
self.min_relevance_score = config.get('path_matching', {}).get('min_relevance_score', 0.1)
|
||||
self.max_total_files = config.get('output', {}).get('max_total_files', 50)
|
||||
|
||||
# Load always include patterns
|
||||
self.always_include = config.get('output', {}).get('always_include', [])
|
||||
|
||||
# Category priorities
|
||||
self.category_priorities = {
|
||||
'code': 1.0,
|
||||
'config': 0.8,
|
||||
'docs': 0.6,
|
||||
'web': 0.4,
|
||||
'other': 0.2
|
||||
}
|
||||
|
||||
def _calculate_keyword_score(self, file_info: FileInfo, keywords: List[str]) -> Tuple[float, List[str]]:
|
||||
"""Calculate score based on keyword matches in file path."""
|
||||
if not keywords:
|
||||
return 0.0, []
|
||||
|
||||
path_lower = file_info.relative_path.lower()
|
||||
filename_lower = Path(file_info.relative_path).name.lower()
|
||||
|
||||
matches = []
|
||||
score = 0.0
|
||||
|
||||
for keyword in keywords:
|
||||
keyword_lower = keyword.lower()
|
||||
|
||||
# Exact filename match (highest weight)
|
||||
if keyword_lower in filename_lower:
|
||||
score += 2.0
|
||||
matches.append(f"filename:{keyword}")
|
||||
continue
|
||||
|
||||
# Directory name match
|
||||
if keyword_lower in path_lower:
|
||||
score += 1.0
|
||||
matches.append(f"path:{keyword}")
|
||||
continue
|
||||
|
||||
# Partial match in path components
|
||||
path_parts = path_lower.split('/')
|
||||
for part in path_parts:
|
||||
if keyword_lower in part:
|
||||
score += 0.5
|
||||
matches.append(f"partial:{keyword}")
|
||||
break
|
||||
|
||||
# Normalize by number of keywords
|
||||
normalized_score = score / len(keywords) if keywords else 0.0
|
||||
return min(normalized_score, 1.0), matches
|
||||
|
||||
def _calculate_extension_score(self, file_info: FileInfo, languages: List[str]) -> float:
|
||||
"""Calculate score based on file extension relevance."""
|
||||
if not languages:
|
||||
return 0.5 # Neutral score
|
||||
|
||||
extension = file_info.extension.lower()
|
||||
|
||||
# Language-specific extension mapping
|
||||
lang_extensions = {
|
||||
'python': ['.py', '.pyx', '.pyi'],
|
||||
'javascript': ['.js', '.jsx', '.mjs'],
|
||||
'typescript': ['.ts', '.tsx'],
|
||||
'java': ['.java'],
|
||||
'go': ['.go'],
|
||||
'rust': ['.rs'],
|
||||
'cpp': ['.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'],
|
||||
'csharp': ['.cs'],
|
||||
'php': ['.php'],
|
||||
'ruby': ['.rb'],
|
||||
'shell': ['.sh', '.bash', '.zsh']
|
||||
}
|
||||
|
||||
score = 0.0
|
||||
for language in languages:
|
||||
if language in lang_extensions:
|
||||
if extension in lang_extensions[language]:
|
||||
score = 1.0
|
||||
break
|
||||
|
||||
# Fallback to category-based scoring
|
||||
if score == 0.0:
|
||||
category_scores = {
|
||||
'code': 1.0,
|
||||
'config': 0.8,
|
||||
'docs': 0.6,
|
||||
'web': 0.4,
|
||||
'other': 0.2
|
||||
}
|
||||
score = category_scores.get(file_info.category, 0.2)
|
||||
|
||||
return score
|
||||
|
||||
def _calculate_directory_score(self, file_info: FileInfo, domains: List[str]) -> Tuple[float, List[str]]:
|
||||
"""Calculate score based on directory context."""
|
||||
if not domains:
|
||||
return 0.0, []
|
||||
|
||||
path_parts = file_info.relative_path.lower().split('/')
|
||||
matches = []
|
||||
score = 0.0
|
||||
|
||||
# Domain-specific directory patterns
|
||||
domain_patterns = {
|
||||
'auth': ['auth', 'authentication', 'login', 'user', 'account'],
|
||||
'authentication': ['auth', 'authentication', 'login', 'user', 'account'],
|
||||
'database': ['db', 'database', 'model', 'entity', 'migration', 'schema'],
|
||||
'api': ['api', 'rest', 'graphql', 'route', 'controller', 'handler'],
|
||||
'frontend': ['ui', 'component', 'view', 'template', 'client', 'web'],
|
||||
'backend': ['service', 'server', 'core', 'business', 'logic'],
|
||||
'test': ['test', 'spec', 'tests', '__tests__', 'testing'],
|
||||
'testing': ['test', 'spec', 'tests', '__tests__', 'testing'],
|
||||
'config': ['config', 'configuration', 'env', 'settings'],
|
||||
'configuration': ['config', 'configuration', 'env', 'settings'],
|
||||
'util': ['util', 'utils', 'helper', 'common', 'shared', 'lib'],
|
||||
'utility': ['util', 'utils', 'helper', 'common', 'shared', 'lib']
|
||||
}
|
||||
|
||||
for domain in domains:
|
||||
if domain in domain_patterns:
|
||||
patterns = domain_patterns[domain]
|
||||
for pattern in patterns:
|
||||
for part in path_parts:
|
||||
if pattern in part:
|
||||
score += 1.0
|
||||
matches.append(f"dir:{domain}->{pattern}")
|
||||
break
|
||||
|
||||
# Normalize by number of domains
|
||||
normalized_score = score / len(domains) if domains else 0.0
|
||||
return min(normalized_score, 1.0), matches
|
||||
|
||||
def _calculate_size_penalty(self, file_info: FileInfo) -> float:
|
||||
"""Calculate penalty for very large files."""
|
||||
max_size = self.config.get('performance', {}).get('max_file_size', 10485760) # 10MB
|
||||
|
||||
if file_info.size > max_size:
|
||||
# Heavy penalty for oversized files
|
||||
return -0.5
|
||||
elif file_info.size > max_size * 0.5:
|
||||
# Light penalty for large files
|
||||
return -0.2
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
def _calculate_recency_bonus(self, file_info: FileInfo) -> float:
|
||||
"""Calculate bonus for recently modified files."""
|
||||
import time
|
||||
|
||||
current_time = time.time()
|
||||
file_age = current_time - file_info.modified_time
|
||||
|
||||
# Files modified in last day get bonus
|
||||
if file_age < 86400: # 1 day
|
||||
return 0.3
|
||||
elif file_age < 604800: # 1 week
|
||||
return 0.1
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
def calculate_relevance_score(self, file_info: FileInfo, analysis: AnalysisResult) -> MatchResult:
|
||||
"""Calculate overall relevance score for a file."""
|
||||
# Calculate individual scores
|
||||
keyword_score, keyword_matches = self._calculate_keyword_score(file_info, analysis.keywords)
|
||||
extension_score = self._calculate_extension_score(file_info, analysis.languages)
|
||||
directory_score, dir_matches = self._calculate_directory_score(file_info, analysis.domains)
|
||||
size_penalty = self._calculate_size_penalty(file_info)
|
||||
recency_bonus = self._calculate_recency_bonus(file_info)
|
||||
|
||||
# Apply weights
|
||||
weighted_score = (
|
||||
keyword_score * self.weights.get('keyword_match', 0.4) +
|
||||
extension_score * self.weights.get('extension_match', 0.2) +
|
||||
directory_score * self.weights.get('directory_context', 0.2) +
|
||||
size_penalty * self.weights.get('file_size_penalty', 0.1) +
|
||||
recency_bonus * self.weights.get('recency_bonus', 0.1)
|
||||
)
|
||||
|
||||
# Category bonus
|
||||
category_bonus = self.category_priorities.get(file_info.category, 0.2)
|
||||
|
||||
# Final score with category bonus
|
||||
final_score = weighted_score + (category_bonus * 0.1)
|
||||
|
||||
# Collect match reasons
|
||||
match_reasons = keyword_matches + dir_matches
|
||||
if extension_score > 0.5:
|
||||
match_reasons.append(f"extension:{file_info.extension}")
|
||||
if recency_bonus > 0:
|
||||
match_reasons.append("recent")
|
||||
|
||||
return MatchResult(
|
||||
file_info=file_info,
|
||||
relevance_score=max(0.0, final_score),
|
||||
match_reasons=match_reasons,
|
||||
category_bonus=category_bonus
|
||||
)
|
||||
|
||||
def match_by_patterns(self, file_index: Dict[str, FileInfo], patterns: List[str]) -> List[FileInfo]:
|
||||
"""Match files using explicit glob patterns."""
|
||||
matched_files = []
|
||||
|
||||
for pattern in patterns:
|
||||
for path, file_info in file_index.items():
|
||||
# Try matching both relative path and full path
|
||||
if (fnmatch.fnmatch(path, pattern) or
|
||||
fnmatch.fnmatch(file_info.path, pattern) or
|
||||
fnmatch.fnmatch(Path(path).name, pattern)):
|
||||
matched_files.append(file_info)
|
||||
|
||||
# Remove duplicates based on path
|
||||
seen_paths = set()
|
||||
unique_files = []
|
||||
for file_info in matched_files:
|
||||
if file_info.relative_path not in seen_paths:
|
||||
seen_paths.add(file_info.relative_path)
|
||||
unique_files.append(file_info)
|
||||
return unique_files
|
||||
|
||||
def match_always_include(self, file_index: Dict[str, FileInfo]) -> List[FileInfo]:
|
||||
"""Match files that should always be included."""
|
||||
return self.match_by_patterns(file_index, self.always_include)
|
||||
|
||||
def rank_files(self, files: List[FileInfo], analysis: AnalysisResult) -> List[MatchResult]:
|
||||
"""Rank files by relevance score."""
|
||||
match_results = []
|
||||
|
||||
for file_info in files:
|
||||
match_result = self.calculate_relevance_score(file_info, analysis)
|
||||
if match_result.relevance_score >= self.min_relevance_score:
|
||||
match_results.append(match_result)
|
||||
|
||||
# Sort by relevance score (descending)
|
||||
match_results.sort(key=lambda x: x.relevance_score, reverse=True)
|
||||
|
||||
return match_results
|
||||
|
||||
def select_best_files(self, ranked_files: List[MatchResult], token_limit: Optional[int] = None) -> List[MatchResult]:
|
||||
"""Select the best files within token limits and category constraints."""
|
||||
if not ranked_files:
|
||||
return []
|
||||
|
||||
selected_files = []
|
||||
total_tokens = 0
|
||||
category_counts = {}
|
||||
|
||||
for match_result in ranked_files:
|
||||
file_info = match_result.file_info
|
||||
category = file_info.category
|
||||
|
||||
# Check category limit
|
||||
if category_counts.get(category, 0) >= self.max_files_per_category:
|
||||
continue
|
||||
|
||||
# Check token limit
|
||||
if token_limit and total_tokens + file_info.estimated_tokens > token_limit:
|
||||
continue
|
||||
|
||||
# Check total file limit
|
||||
if len(selected_files) >= self.max_total_files:
|
||||
break
|
||||
|
||||
# Add file
|
||||
selected_files.append(match_result)
|
||||
total_tokens += file_info.estimated_tokens
|
||||
category_counts[category] = category_counts.get(category, 0) + 1
|
||||
|
||||
return selected_files
|
||||
|
||||
def match_files(self, file_index: Dict[str, FileInfo], analysis: AnalysisResult,
|
||||
token_limit: Optional[int] = None, explicit_patterns: Optional[List[str]] = None) -> PathMatchingResult:
|
||||
"""Main file matching function."""
|
||||
self.logger.info(f"Matching files for analysis with {len(analysis.keywords)} keywords and {len(analysis.domains)} domains")
|
||||
|
||||
# Start with always-include files
|
||||
always_include_files = self.match_always_include(file_index)
|
||||
self.logger.debug(f"Always include: {len(always_include_files)} files")
|
||||
|
||||
# Add explicit pattern matches
|
||||
pattern_files = []
|
||||
patterns_used = []
|
||||
if explicit_patterns:
|
||||
pattern_files = self.match_by_patterns(file_index, explicit_patterns)
|
||||
patterns_used.extend(explicit_patterns)
|
||||
self.logger.debug(f"Explicit patterns: {len(pattern_files)} files")
|
||||
|
||||
# Add suggested pattern matches
|
||||
if analysis.file_patterns:
|
||||
suggested_files = self.match_by_patterns(file_index, analysis.file_patterns)
|
||||
pattern_files.extend(suggested_files)
|
||||
patterns_used.extend(analysis.file_patterns)
|
||||
self.logger.debug(f"Suggested patterns: {len(suggested_files)} files")
|
||||
|
||||
# Combine all candidate files and remove duplicates
|
||||
all_files = always_include_files + pattern_files + list(file_index.values())
|
||||
seen_paths = set()
|
||||
all_candidates = []
|
||||
for file_info in all_files:
|
||||
if file_info.relative_path not in seen_paths:
|
||||
seen_paths.add(file_info.relative_path)
|
||||
all_candidates.append(file_info)
|
||||
self.logger.debug(f"Total candidates: {len(all_candidates)} files")
|
||||
|
||||
# Rank all candidates
|
||||
ranked_files = self.rank_files(all_candidates, analysis)
|
||||
self.logger.debug(f"Files above threshold: {len(ranked_files)}")
|
||||
|
||||
# Select best files within limits
|
||||
selected_files = self.select_best_files(ranked_files, token_limit)
|
||||
self.logger.info(f"Selected {len(selected_files)} files")
|
||||
|
||||
# Calculate statistics
|
||||
total_tokens = sum(match.file_info.estimated_tokens for match in selected_files)
|
||||
categories = {}
|
||||
for match in selected_files:
|
||||
category = match.file_info.category
|
||||
categories[category] = categories.get(category, 0) + 1
|
||||
|
||||
# Calculate confidence score
|
||||
confidence_score = self._calculate_confidence(selected_files, analysis)
|
||||
|
||||
return PathMatchingResult(
|
||||
matched_files=selected_files,
|
||||
total_tokens=total_tokens,
|
||||
categories=categories,
|
||||
patterns_used=patterns_used,
|
||||
confidence_score=confidence_score
|
||||
)
|
||||
|
||||
def _calculate_confidence(self, selected_files: List[MatchResult], analysis: AnalysisResult) -> float:
|
||||
"""Calculate confidence score for the matching result."""
|
||||
if not selected_files:
|
||||
return 0.0
|
||||
|
||||
# Average relevance score
|
||||
avg_relevance = sum(match.relevance_score for match in selected_files) / len(selected_files)
|
||||
|
||||
# Keyword coverage (how many keywords are represented)
|
||||
keyword_coverage = 0.0
|
||||
if analysis.keywords:
|
||||
covered_keywords = set()
|
||||
for match in selected_files:
|
||||
for reason in match.match_reasons:
|
||||
if reason.startswith('filename:') or reason.startswith('path:'):
|
||||
keyword = reason.split(':', 1)[1]
|
||||
covered_keywords.add(keyword)
|
||||
keyword_coverage = len(covered_keywords) / len(analysis.keywords)
|
||||
|
||||
# Domain coverage
|
||||
domain_coverage = 0.0
|
||||
if analysis.domains:
|
||||
covered_domains = set()
|
||||
for match in selected_files:
|
||||
for reason in match.match_reasons:
|
||||
if reason.startswith('dir:'):
|
||||
domain = reason.split('->', 1)[0].split(':', 1)[1]
|
||||
covered_domains.add(domain)
|
||||
domain_coverage = len(covered_domains) / len(analysis.domains)
|
||||
|
||||
# Weighted confidence score
|
||||
confidence = (
|
||||
avg_relevance * 0.5 +
|
||||
keyword_coverage * 0.3 +
|
||||
domain_coverage * 0.2
|
||||
)
|
||||
|
||||
return min(confidence, 1.0)
|
||||
|
||||
def format_patterns(self, selected_files: List[MatchResult]) -> List[str]:
|
||||
"""Format selected files as @{pattern} strings."""
|
||||
pattern_format = self.config.get('output', {}).get('pattern_format', '@{{{path}}}')
|
||||
|
||||
patterns = []
|
||||
for match in selected_files:
|
||||
pattern = pattern_format.format(path=match.file_info.relative_path)
|
||||
patterns.append(pattern)
|
||||
|
||||
return patterns
|
||||
|
||||
def main():
|
||||
"""Command-line interface for path matcher."""
|
||||
import yaml
|
||||
import argparse
|
||||
import json
|
||||
from .file_indexer import FileIndexer
|
||||
from .context_analyzer import ContextAnalyzer
|
||||
|
||||
parser = argparse.ArgumentParser(description="Path Matcher for UltraThink")
|
||||
parser.add_argument("prompt", help="Prompt to analyze and match")
|
||||
parser.add_argument("--config", default="config.yaml", help="Configuration file path")
|
||||
parser.add_argument("--token-limit", type=int, help="Token limit for selection")
|
||||
parser.add_argument("--patterns", nargs="*", help="Explicit patterns to include")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
level = logging.DEBUG if args.verbose else logging.INFO
|
||||
logging.basicConfig(level=level, format='%(levelname)s: %(message)s')
|
||||
|
||||
# Load configuration
|
||||
config_path = Path(__file__).parent / args.config
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
# Create components
|
||||
indexer = FileIndexer(config)
|
||||
analyzer = ContextAnalyzer(config)
|
||||
matcher = PathMatcher(config)
|
||||
|
||||
# Build file index
|
||||
file_index = indexer.load_index()
|
||||
if not file_index:
|
||||
print("Building file index...")
|
||||
file_index = indexer.build_index()
|
||||
|
||||
# Analyze prompt
|
||||
analysis = analyzer.analyze(args.prompt)
|
||||
|
||||
# Match files
|
||||
result = matcher.match_files(
|
||||
file_index=file_index,
|
||||
analysis=analysis,
|
||||
token_limit=args.token_limit,
|
||||
explicit_patterns=args.patterns
|
||||
)
|
||||
|
||||
# Output results
|
||||
print(f"Matched {len(result.matched_files)} files (~{result.total_tokens:,} tokens)")
|
||||
print(f"Categories: {result.categories}")
|
||||
print(f"Confidence: {result.confidence_score:.2f}")
|
||||
print()
|
||||
|
||||
patterns = matcher.format_patterns(result.matched_files)
|
||||
print("Patterns:")
|
||||
for pattern in patterns[:20]: # Limit output
|
||||
print(f" {pattern}")
|
||||
|
||||
if args.verbose:
|
||||
print("\nDetailed matches:")
|
||||
for match in result.matched_files[:10]:
|
||||
print(f" {match.file_info.relative_path} (score: {match.relevance_score:.3f})")
|
||||
print(f" Reasons: {', '.join(match.match_reasons)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
204
.claude/python_script/indexer.py
Normal file
204
.claude/python_script/indexer.py
Normal file
@@ -0,0 +1,204 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
File Structure Indexer
|
||||
Builds and maintains file indices for intelligent analysis.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
# Add current directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from core.config import get_config
|
||||
from core.file_indexer import FileIndexer, IndexStats
|
||||
from core.embedding_manager import EmbeddingManager
|
||||
from utils.colors import Colors
|
||||
|
||||
|
||||
class ProjectIndexer:
|
||||
"""Manages file indexing and project statistics."""
|
||||
|
||||
def __init__(self, config_path: Optional[str] = None, root_path: str = "."):
|
||||
self.root_path = Path(root_path).resolve()
|
||||
self.config = get_config(config_path)
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, self.config.get('logging.level', 'INFO')),
|
||||
format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize core components
|
||||
self.indexer = FileIndexer(self.config, str(self.root_path))
|
||||
|
||||
# Initialize embedding manager if enabled
|
||||
self.embedding_manager = None
|
||||
if self.config.is_embedding_enabled():
|
||||
try:
|
||||
self.embedding_manager = EmbeddingManager(self.config)
|
||||
except ImportError:
|
||||
self.logger.warning("Embedding dependencies not available. Install sentence-transformers for enhanced functionality.")
|
||||
|
||||
def build_index(self) -> IndexStats:
|
||||
"""Build or update the file index."""
|
||||
print(Colors.yellow("Building file index..."))
|
||||
start_time = time.time()
|
||||
|
||||
self.indexer.build_index()
|
||||
stats = self.indexer.get_stats()
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
if stats:
|
||||
print(Colors.green(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens ({elapsed:.2f}s)"))
|
||||
else:
|
||||
print(Colors.green(f"Index built successfully ({elapsed:.2f}s)"))
|
||||
|
||||
return stats
|
||||
|
||||
def update_embeddings(self) -> bool:
|
||||
"""Update embeddings for semantic similarity."""
|
||||
if not self.embedding_manager:
|
||||
print(Colors.error("Embedding functionality not available"))
|
||||
return False
|
||||
|
||||
print(Colors.yellow("Updating embeddings..."))
|
||||
start_time = time.time()
|
||||
|
||||
# Load file index
|
||||
index = self.indexer.load_index()
|
||||
if not index:
|
||||
print(Colors.warning("No file index found. Building index first..."))
|
||||
self.build_index()
|
||||
index = self.indexer.load_index()
|
||||
|
||||
try:
|
||||
count = self.embedding_manager.update_embeddings(index)
|
||||
elapsed = time.time() - start_time
|
||||
print(Colors.green(f"Updated {count} embeddings ({elapsed:.2f}s)"))
|
||||
return True
|
||||
except Exception as e:
|
||||
print(Colors.error(f"Failed to update embeddings: {e}"))
|
||||
return False
|
||||
|
||||
def get_project_stats(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive project statistics."""
|
||||
stats = self.indexer.get_stats()
|
||||
embedding_stats = {}
|
||||
|
||||
if self.embedding_manager:
|
||||
embedding_stats = {
|
||||
'embeddings_exist': self.embedding_manager.embeddings_exist(),
|
||||
'embedding_count': len(self.embedding_manager._load_embedding_cache()) if self.embedding_manager.embeddings_exist() else 0
|
||||
}
|
||||
|
||||
project_size = self._classify_project_size(stats.total_tokens if stats else 0)
|
||||
|
||||
return {
|
||||
'files': stats.total_files if stats else 0,
|
||||
'tokens': stats.total_tokens if stats else 0,
|
||||
'size_bytes': stats.total_size if stats else 0,
|
||||
'categories': stats.categories if stats else {},
|
||||
'project_size': project_size,
|
||||
'last_updated': stats.last_updated if stats else 0,
|
||||
'embeddings': embedding_stats,
|
||||
'config': {
|
||||
'cache_dir': self.config.get_cache_dir(),
|
||||
'embedding_enabled': self.config.is_embedding_enabled(),
|
||||
'exclude_patterns_count': len(self.config.get_exclude_patterns())
|
||||
}
|
||||
}
|
||||
|
||||
def _classify_project_size(self, tokens: int) -> str:
|
||||
"""Classify project size based on token count."""
|
||||
small_limit = self.config.get('token_limits.small_project', 500000)
|
||||
medium_limit = self.config.get('token_limits.medium_project', 2000000)
|
||||
|
||||
if tokens < small_limit:
|
||||
return "small"
|
||||
elif tokens < medium_limit:
|
||||
return "medium"
|
||||
else:
|
||||
return "large"
|
||||
|
||||
def cleanup_cache(self):
|
||||
"""Clean up old cache files."""
|
||||
cache_dir = Path(self.config.get_cache_dir())
|
||||
if cache_dir.exists():
|
||||
print(Colors.yellow("Cleaning up cache..."))
|
||||
for file in cache_dir.glob("*"):
|
||||
if file.is_file():
|
||||
file.unlink()
|
||||
print(f"Removed: {file}")
|
||||
print(Colors.green("Cache cleaned"))
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point for indexer."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Project File Indexer - Build and manage file indices",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python indexer.py --build # Build file index
|
||||
python indexer.py --stats # Show project statistics
|
||||
python indexer.py --embeddings # Update embeddings
|
||||
python indexer.py --cleanup # Clean cache
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--build', action='store_true', help='Build file index')
|
||||
parser.add_argument('--stats', action='store_true', help='Show project statistics')
|
||||
parser.add_argument('--embeddings', action='store_true', help='Update embeddings')
|
||||
parser.add_argument('--cleanup', action='store_true', help='Clean up cache files')
|
||||
parser.add_argument('--output', choices=['json', 'text'], default='text', help='Output format')
|
||||
parser.add_argument('--config', help='Configuration file path')
|
||||
parser.add_argument('--root', default='.', help='Root directory to analyze')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Require at least one action
|
||||
if not any([args.build, args.stats, args.embeddings, args.cleanup]):
|
||||
parser.error("At least one action is required: --build, --stats, --embeddings, or --cleanup")
|
||||
|
||||
# Create indexer
|
||||
indexer = ProjectIndexer(args.config, args.root)
|
||||
|
||||
try:
|
||||
if args.cleanup:
|
||||
indexer.cleanup_cache()
|
||||
|
||||
if args.build:
|
||||
indexer.build_index()
|
||||
|
||||
if args.embeddings:
|
||||
indexer.update_embeddings()
|
||||
|
||||
if args.stats:
|
||||
stats = indexer.get_project_stats()
|
||||
if args.output == 'json':
|
||||
print(json.dumps(stats, indent=2, default=str))
|
||||
else:
|
||||
print(f"Total files: {stats['files']}")
|
||||
print(f"Total tokens: {stats['tokens']:,}")
|
||||
print(f"Project size: {stats['project_size']}")
|
||||
print(f"Categories: {stats['categories']}")
|
||||
if 'embeddings' in stats:
|
||||
print(f"Embeddings: {stats['embeddings']['embedding_count']}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(Colors.warning("\nOperation interrupted by user"))
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(Colors.error(f"Operation failed: {e}"))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
189
.claude/python_script/install.sh
Normal file
189
.claude/python_script/install.sh
Normal file
@@ -0,0 +1,189 @@
|
||||
#!/bin/bash
|
||||
# Installation script for UltraThink Path-Aware Analyzer
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Functions
|
||||
print_status() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# Check Python version
|
||||
check_python() {
|
||||
if command -v python3 &> /dev/null; then
|
||||
PYTHON_VERSION=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
|
||||
PYTHON_CMD="python3"
|
||||
elif command -v python &> /dev/null; then
|
||||
PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
|
||||
PYTHON_CMD="python"
|
||||
else
|
||||
print_error "Python not found. Please install Python 3.8 or later."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check version
|
||||
if [[ $(echo "$PYTHON_VERSION >= 3.8" | bc -l) -eq 1 ]]; then
|
||||
print_success "Python $PYTHON_VERSION found"
|
||||
else
|
||||
print_error "Python 3.8 or later required. Found Python $PYTHON_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Install dependencies
|
||||
install_dependencies() {
|
||||
print_status "Installing core dependencies..."
|
||||
|
||||
# Install core requirements
|
||||
$PYTHON_CMD -m pip install --user -r requirements.txt
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "Core dependencies installed"
|
||||
else
|
||||
print_error "Failed to install core dependencies"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Install optional dependencies
|
||||
install_optional() {
|
||||
read -p "Install RAG/embedding features? (requires ~200MB download) [y/N]: " install_rag
|
||||
if [[ $install_rag =~ ^[Yy]$ ]]; then
|
||||
print_status "Installing RAG dependencies..."
|
||||
$PYTHON_CMD -m pip install --user sentence-transformers numpy
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "RAG dependencies installed"
|
||||
else
|
||||
print_warning "Failed to install RAG dependencies (optional)"
|
||||
fi
|
||||
fi
|
||||
|
||||
read -p "Install development tools? [y/N]: " install_dev
|
||||
if [[ $install_dev =~ ^[Yy]$ ]]; then
|
||||
print_status "Installing development dependencies..."
|
||||
$PYTHON_CMD -m pip install --user pytest pytest-cov black flake8
|
||||
if [ $? -eq 0 ]; then
|
||||
print_success "Development dependencies installed"
|
||||
else
|
||||
print_warning "Failed to install development dependencies (optional)"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Create wrapper script
|
||||
create_wrapper() {
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
|
||||
WRAPPER_PATH="$HOME/.local/bin/ultrathink"
|
||||
|
||||
# Create .local/bin if it doesn't exist
|
||||
mkdir -p "$HOME/.local/bin"
|
||||
|
||||
# Create wrapper script
|
||||
cat > "$WRAPPER_PATH" << EOF
|
||||
#!/bin/bash
|
||||
# UltraThink Path-Aware Analyzer Wrapper
|
||||
# Auto-generated by install.sh
|
||||
|
||||
SCRIPT_DIR="$SCRIPT_DIR"
|
||||
export PYTHONPATH="\$SCRIPT_DIR:\$PYTHONPATH"
|
||||
|
||||
exec $PYTHON_CMD "\$SCRIPT_DIR/path_aware_analyzer.py" "\$@"
|
||||
EOF
|
||||
|
||||
chmod +x "$WRAPPER_PATH"
|
||||
|
||||
if [ -f "$WRAPPER_PATH" ]; then
|
||||
print_success "Wrapper script created at $WRAPPER_PATH"
|
||||
else
|
||||
print_error "Failed to create wrapper script"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Update configuration
|
||||
setup_config() {
|
||||
print_status "Setting up configuration..."
|
||||
|
||||
# Create cache directory
|
||||
mkdir -p .claude/cache/embeddings
|
||||
|
||||
# Check if config needs updating
|
||||
if [ ! -f config.yaml ]; then
|
||||
print_error "Configuration file config.yaml not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "Configuration ready"
|
||||
}
|
||||
|
||||
# Test installation
|
||||
test_installation() {
|
||||
print_status "Testing installation..."
|
||||
|
||||
# Test basic functionality
|
||||
if $PYTHON_CMD path_aware_analyzer.py --stats &> /dev/null; then
|
||||
print_success "Installation test passed"
|
||||
else
|
||||
print_warning "Installation test failed - but this might be normal for first run"
|
||||
fi
|
||||
}
|
||||
|
||||
# Add to PATH instructions
|
||||
show_path_instructions() {
|
||||
if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
|
||||
print_warning "Add $HOME/.local/bin to your PATH to use 'ultrathink' command globally"
|
||||
echo ""
|
||||
echo "Add this line to your ~/.bashrc or ~/.zshrc:"
|
||||
echo "export PATH=\"\$HOME/.local/bin:\$PATH\""
|
||||
echo ""
|
||||
echo "Or run: echo 'export PATH=\"\$HOME/.local/bin:\$PATH\"' >> ~/.bashrc"
|
||||
echo "Then: source ~/.bashrc"
|
||||
fi
|
||||
}
|
||||
|
||||
# Main installation
|
||||
main() {
|
||||
print_status "Installing UltraThink Path-Aware Analyzer..."
|
||||
echo ""
|
||||
|
||||
check_python
|
||||
install_dependencies
|
||||
install_optional
|
||||
create_wrapper
|
||||
setup_config
|
||||
test_installation
|
||||
|
||||
echo ""
|
||||
print_success "Installation complete!"
|
||||
echo ""
|
||||
|
||||
print_status "Usage examples:"
|
||||
echo " ./path_aware_analyzer.py \"analyze authentication flow\""
|
||||
echo " ultrathink \"implement user login feature\""
|
||||
echo " ultrathink --tool gemini \"review API endpoints\""
|
||||
echo ""
|
||||
|
||||
show_path_instructions
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
15
.claude/python_script/requirements.txt
Normal file
15
.claude/python_script/requirements.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
# Core dependencies for embedding tests
|
||||
numpy>=1.21.0
|
||||
scikit-learn>=1.0.0
|
||||
|
||||
# Sentence Transformers for advanced embeddings
|
||||
sentence-transformers>=2.2.0
|
||||
|
||||
# Optional: For better performance and additional models
|
||||
torch>=1.9.0
|
||||
|
||||
# Development and testing
|
||||
pytest>=6.0.0
|
||||
|
||||
# Data handling
|
||||
pandas>=1.3.0
|
||||
92
.claude/python_script/setup.py
Normal file
92
.claude/python_script/setup.py
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Setup script for UltraThink Path-Aware Analyzer
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
from pathlib import Path
|
||||
|
||||
# Read README
|
||||
readme_path = Path(__file__).parent / "README.md"
|
||||
long_description = readme_path.read_text(encoding='utf-8') if readme_path.exists() else ""
|
||||
|
||||
# Read requirements
|
||||
requirements_path = Path(__file__).parent / "requirements.txt"
|
||||
requirements = []
|
||||
if requirements_path.exists():
|
||||
with open(requirements_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
requirements.append(line)
|
||||
|
||||
setup(
|
||||
name="ultrathink-path-analyzer",
|
||||
version="1.0.0",
|
||||
description="Lightweight path-aware program for intelligent file pattern detection and analysis",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
author="UltraThink Development Team",
|
||||
author_email="dev@ultrathink.ai",
|
||||
url="https://github.com/ultrathink/path-analyzer",
|
||||
|
||||
packages=find_packages(),
|
||||
py_modules=[
|
||||
'analyzer', # Main entry point
|
||||
],
|
||||
|
||||
install_requires=requirements,
|
||||
|
||||
extras_require={
|
||||
'rag': [
|
||||
'sentence-transformers>=2.2.0',
|
||||
'numpy>=1.21.0'
|
||||
],
|
||||
'nlp': [
|
||||
'nltk>=3.8',
|
||||
'spacy>=3.4.0'
|
||||
],
|
||||
'performance': [
|
||||
'numba>=0.56.0'
|
||||
],
|
||||
'dev': [
|
||||
'pytest>=7.0.0',
|
||||
'pytest-cov>=4.0.0',
|
||||
'black>=22.0.0',
|
||||
'flake8>=5.0.0'
|
||||
]
|
||||
},
|
||||
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'path-analyzer=cli:main',
|
||||
'path-indexer=indexer:main',
|
||||
'analyzer=analyzer:main', # Legacy compatibility
|
||||
'module-analyzer=tools.module_analyzer:main',
|
||||
'tech-stack=tools.tech_stack:main',
|
||||
],
|
||||
},
|
||||
|
||||
classifiers=[
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Topic :: Software Development :: Tools",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
|
||||
python_requires=">=3.8",
|
||||
|
||||
keywords="ai, analysis, path-detection, code-analysis, file-matching, rag, nlp",
|
||||
|
||||
project_urls={
|
||||
"Bug Reports": "https://github.com/ultrathink/path-analyzer/issues",
|
||||
"Source": "https://github.com/ultrathink/path-analyzer",
|
||||
"Documentation": "https://github.com/ultrathink/path-analyzer/docs",
|
||||
},
|
||||
)
|
||||
13
.claude/python_script/tools/__init__.py
Normal file
13
.claude/python_script/tools/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""
|
||||
Independent tool scripts for specialized analysis tasks.
|
||||
Provides module analysis, tech stack detection, and workflow management tools.
|
||||
"""
|
||||
|
||||
from .module_analyzer import ModuleAnalyzer, ModuleInfo
|
||||
from .tech_stack import TechStackLoader
|
||||
|
||||
__all__ = [
|
||||
'ModuleAnalyzer',
|
||||
'ModuleInfo',
|
||||
'TechStackLoader'
|
||||
]
|
||||
BIN
.claude/python_script/tools/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
.claude/python_script/tools/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
369
.claude/python_script/tools/module_analyzer.py
Normal file
369
.claude/python_script/tools/module_analyzer.py
Normal file
@@ -0,0 +1,369 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unified Module Analyzer
|
||||
Combines functionality from detect_changed_modules.py and get_modules_by_depth.py
|
||||
into a single, comprehensive module analysis tool.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import time
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional, Set, Tuple
|
||||
from dataclasses import dataclass, asdict
|
||||
|
||||
# Add parent directory for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from core.config import get_config
|
||||
from core.gitignore_parser import GitignoreParser
|
||||
|
||||
@dataclass
|
||||
class ModuleInfo:
|
||||
"""Information about a module/directory."""
|
||||
depth: int
|
||||
path: str
|
||||
files: int
|
||||
types: List[str]
|
||||
has_claude: bool
|
||||
status: str = "normal" # changed, normal, new, deleted
|
||||
last_modified: Optional[float] = None
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
return asdict(self)
|
||||
|
||||
class ModuleAnalyzer:
|
||||
"""Unified module analysis tool with change detection and depth analysis."""
|
||||
|
||||
def __init__(self, root_path: str = ".", config_path: Optional[str] = None):
|
||||
self.root_path = Path(root_path).resolve()
|
||||
self.config = get_config(config_path)
|
||||
|
||||
# Source file extensions for analysis
|
||||
self.source_extensions = {
|
||||
'.md', '.js', '.ts', '.jsx', '.tsx', '.py', '.go', '.rs',
|
||||
'.java', '.cpp', '.c', '.h', '.sh', '.ps1', '.json', '.yaml', '.yml',
|
||||
'.php', '.rb', '.swift', '.kt', '.scala', '.dart'
|
||||
}
|
||||
|
||||
# Initialize gitignore parser for exclusions
|
||||
self.gitignore_parser = GitignoreParser(str(self.root_path))
|
||||
self.exclude_patterns = self._build_exclusion_patterns()
|
||||
|
||||
def _build_exclusion_patterns(self) -> Set[str]:
|
||||
"""Build exclusion patterns from config and gitignore."""
|
||||
exclusions = {
|
||||
'.git', '.history', '.vscode', '__pycache__', '.pytest_cache',
|
||||
'node_modules', 'dist', 'build', '.egg-info', '.env',
|
||||
'.cache', '.tmp', '.temp', '.DS_Store', 'Thumbs.db'
|
||||
}
|
||||
|
||||
# Add patterns from config
|
||||
config_patterns = self.config.get('exclude_patterns', [])
|
||||
for pattern in config_patterns:
|
||||
# Extract directory names from patterns
|
||||
if '/' in pattern:
|
||||
parts = pattern.replace('*/', '').replace('/*', '').split('/')
|
||||
exclusions.update(part for part in parts if part and not part.startswith('*'))
|
||||
|
||||
return exclusions
|
||||
|
||||
def _should_exclude_directory(self, dir_path: Path) -> bool:
|
||||
"""Check if directory should be excluded from analysis."""
|
||||
dir_name = dir_path.name
|
||||
|
||||
# Check against exclusion patterns
|
||||
if dir_name in self.exclude_patterns:
|
||||
return True
|
||||
|
||||
# Check if directory starts with . (hidden directories)
|
||||
if dir_name.startswith('.') and dir_name not in {'.github', '.vscode'}:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_git_changed_files(self, since: str = "HEAD") -> Set[str]:
|
||||
"""Get files changed in git."""
|
||||
changed_files = set()
|
||||
|
||||
try:
|
||||
# Check if we're in a git repository
|
||||
subprocess.run(['git', 'rev-parse', '--git-dir'],
|
||||
check=True, capture_output=True, cwd=self.root_path)
|
||||
|
||||
# Get changes since specified reference
|
||||
commands = [
|
||||
['git', 'diff', '--name-only', since], # Changes since reference
|
||||
['git', 'diff', '--name-only', '--staged'], # Staged changes
|
||||
['git', 'ls-files', '--others', '--exclude-standard'] # Untracked files
|
||||
]
|
||||
|
||||
for cmd in commands:
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True,
|
||||
cwd=self.root_path, check=True)
|
||||
if result.stdout.strip():
|
||||
files = result.stdout.strip().split('\n')
|
||||
changed_files.update(f for f in files if f)
|
||||
except subprocess.CalledProcessError:
|
||||
continue
|
||||
|
||||
except subprocess.CalledProcessError:
|
||||
# Not a git repository or git not available
|
||||
pass
|
||||
|
||||
return changed_files
|
||||
|
||||
def get_recently_modified_files(self, hours: int = 24) -> Set[str]:
|
||||
"""Get files modified within the specified hours."""
|
||||
cutoff_time = time.time() - (hours * 3600)
|
||||
recent_files = set()
|
||||
|
||||
try:
|
||||
for file_path in self.root_path.rglob('*'):
|
||||
if file_path.is_file():
|
||||
try:
|
||||
if file_path.stat().st_mtime > cutoff_time:
|
||||
rel_path = file_path.relative_to(self.root_path)
|
||||
recent_files.add(str(rel_path))
|
||||
except (OSError, ValueError):
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return recent_files
|
||||
|
||||
def analyze_directory(self, dir_path: Path) -> Optional[ModuleInfo]:
|
||||
"""Analyze a single directory and return module information."""
|
||||
if self._should_exclude_directory(dir_path):
|
||||
return None
|
||||
|
||||
try:
|
||||
# Count files by type
|
||||
file_types = set()
|
||||
file_count = 0
|
||||
has_claude = False
|
||||
last_modified = 0
|
||||
|
||||
for item in dir_path.iterdir():
|
||||
if item.is_file():
|
||||
file_count += 1
|
||||
|
||||
# Track file types
|
||||
if item.suffix.lower() in self.source_extensions:
|
||||
file_types.add(item.suffix.lower())
|
||||
|
||||
# Check for CLAUDE.md
|
||||
if item.name.upper() == 'CLAUDE.MD':
|
||||
has_claude = True
|
||||
|
||||
# Track latest modification
|
||||
try:
|
||||
mtime = item.stat().st_mtime
|
||||
last_modified = max(last_modified, mtime)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
# Calculate depth relative to root
|
||||
try:
|
||||
relative_path = dir_path.relative_to(self.root_path)
|
||||
depth = len(relative_path.parts)
|
||||
except ValueError:
|
||||
depth = 0
|
||||
|
||||
return ModuleInfo(
|
||||
depth=depth,
|
||||
path=str(relative_path) if depth > 0 else ".",
|
||||
files=file_count,
|
||||
types=sorted(list(file_types)),
|
||||
has_claude=has_claude,
|
||||
last_modified=last_modified if last_modified > 0 else None
|
||||
)
|
||||
|
||||
except (PermissionError, OSError):
|
||||
return None
|
||||
|
||||
def detect_changed_modules(self, since: str = "HEAD") -> List[ModuleInfo]:
|
||||
"""Detect modules affected by changes."""
|
||||
changed_files = self.get_git_changed_files(since)
|
||||
|
||||
# If no git changes, fall back to recently modified files
|
||||
if not changed_files:
|
||||
changed_files = self.get_recently_modified_files(24)
|
||||
|
||||
# Get affected directories
|
||||
affected_dirs = set()
|
||||
for file_path in changed_files:
|
||||
full_path = self.root_path / file_path
|
||||
if full_path.exists():
|
||||
# Add the file's directory and parent directories
|
||||
current_dir = full_path.parent
|
||||
while current_dir != self.root_path and current_dir.parent != current_dir:
|
||||
affected_dirs.add(current_dir)
|
||||
current_dir = current_dir.parent
|
||||
|
||||
# Analyze affected directories
|
||||
modules = []
|
||||
for dir_path in affected_dirs:
|
||||
module_info = self.analyze_directory(dir_path)
|
||||
if module_info:
|
||||
module_info.status = "changed"
|
||||
modules.append(module_info)
|
||||
|
||||
return sorted(modules, key=lambda m: (m.depth, m.path))
|
||||
|
||||
def analyze_by_depth(self, max_depth: Optional[int] = None) -> List[ModuleInfo]:
|
||||
"""Analyze all modules organized by depth (deepest first)."""
|
||||
modules = []
|
||||
|
||||
def scan_directory(dir_path: Path, current_depth: int = 0):
|
||||
"""Recursively scan directories."""
|
||||
if max_depth and current_depth > max_depth:
|
||||
return
|
||||
|
||||
module_info = self.analyze_directory(dir_path)
|
||||
if module_info and module_info.files > 0:
|
||||
modules.append(module_info)
|
||||
|
||||
# Recurse into subdirectories
|
||||
try:
|
||||
for item in dir_path.iterdir():
|
||||
if item.is_dir() and not self._should_exclude_directory(item):
|
||||
scan_directory(item, current_depth + 1)
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
scan_directory(self.root_path)
|
||||
|
||||
# Sort by depth (deepest first), then by path
|
||||
return sorted(modules, key=lambda m: (-m.depth, m.path))
|
||||
|
||||
def get_dependencies(self, module_path: str) -> List[str]:
|
||||
"""Get module dependencies (basic implementation)."""
|
||||
dependencies = []
|
||||
module_dir = self.root_path / module_path
|
||||
|
||||
if not module_dir.exists() or not module_dir.is_dir():
|
||||
return dependencies
|
||||
|
||||
# Look for common dependency files
|
||||
dependency_files = [
|
||||
'package.json', # Node.js
|
||||
'requirements.txt', # Python
|
||||
'Cargo.toml', # Rust
|
||||
'go.mod', # Go
|
||||
'pom.xml', # Java Maven
|
||||
'build.gradle', # Java Gradle
|
||||
]
|
||||
|
||||
for dep_file in dependency_files:
|
||||
dep_path = module_dir / dep_file
|
||||
if dep_path.exists():
|
||||
dependencies.append(str(dep_path.relative_to(self.root_path)))
|
||||
|
||||
return dependencies
|
||||
|
||||
def find_modules_with_pattern(self, pattern: str) -> List[ModuleInfo]:
|
||||
"""Find modules matching a specific pattern in their path or files."""
|
||||
modules = self.analyze_by_depth()
|
||||
matching_modules = []
|
||||
|
||||
for module in modules:
|
||||
# Check if pattern matches path
|
||||
if pattern.lower() in module.path.lower():
|
||||
matching_modules.append(module)
|
||||
continue
|
||||
|
||||
# Check if pattern matches file types
|
||||
if any(pattern.lower() in ext.lower() for ext in module.types):
|
||||
matching_modules.append(module)
|
||||
|
||||
return matching_modules
|
||||
|
||||
def export_analysis(self, modules: List[ModuleInfo], format: str = "json") -> str:
|
||||
"""Export module analysis in specified format."""
|
||||
if format == "json":
|
||||
return json.dumps([module.to_dict() for module in modules], indent=2)
|
||||
|
||||
elif format == "list":
|
||||
lines = []
|
||||
for module in modules:
|
||||
status = f"[{module.status}]" if module.status != "normal" else ""
|
||||
claude_marker = "[CLAUDE]" if module.has_claude else ""
|
||||
lines.append(f"{module.path} (depth:{module.depth}, files:{module.files}) {status} {claude_marker}")
|
||||
return "\n".join(lines)
|
||||
|
||||
elif format == "grouped":
|
||||
grouped = {}
|
||||
for module in modules:
|
||||
depth = module.depth
|
||||
if depth not in grouped:
|
||||
grouped[depth] = []
|
||||
grouped[depth].append(module)
|
||||
|
||||
lines = []
|
||||
for depth in sorted(grouped.keys()):
|
||||
lines.append(f"\n=== Depth {depth} ===")
|
||||
for module in grouped[depth]:
|
||||
status = f"[{module.status}]" if module.status != "normal" else ""
|
||||
claude_marker = "[CLAUDE]" if module.has_claude else ""
|
||||
lines.append(f" {module.path} (files:{module.files}) {status} {claude_marker}")
|
||||
return "\n".join(lines)
|
||||
|
||||
elif format == "paths":
|
||||
return "\n".join(module.path for module in modules)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {format}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main CLI entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Module Analysis Tool")
|
||||
parser.add_argument("command", choices=["changed", "depth", "dependencies", "find"],
|
||||
help="Analysis command to run")
|
||||
parser.add_argument("--format", choices=["json", "list", "grouped", "paths"],
|
||||
default="list", help="Output format")
|
||||
parser.add_argument("--since", default="HEAD~1",
|
||||
help="Git reference for change detection (default: HEAD~1)")
|
||||
parser.add_argument("--max-depth", type=int,
|
||||
help="Maximum directory depth to analyze")
|
||||
parser.add_argument("--pattern", help="Pattern to search for (for find command)")
|
||||
parser.add_argument("--module", help="Module path for dependency analysis")
|
||||
parser.add_argument("--config", help="Configuration file path")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = ModuleAnalyzer(config_path=args.config)
|
||||
|
||||
if args.command == "changed":
|
||||
modules = analyzer.detect_changed_modules(args.since)
|
||||
print(analyzer.export_analysis(modules, args.format))
|
||||
|
||||
elif args.command == "depth":
|
||||
modules = analyzer.analyze_by_depth(args.max_depth)
|
||||
print(analyzer.export_analysis(modules, args.format))
|
||||
|
||||
elif args.command == "dependencies":
|
||||
if not args.module:
|
||||
print("Error: --module required for dependencies command", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
deps = analyzer.get_dependencies(args.module)
|
||||
if args.format == "json":
|
||||
print(json.dumps(deps, indent=2))
|
||||
else:
|
||||
print("\n".join(deps))
|
||||
|
||||
elif args.command == "find":
|
||||
if not args.pattern:
|
||||
print("Error: --pattern required for find command", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
modules = analyzer.find_modules_with_pattern(args.pattern)
|
||||
print(analyzer.export_analysis(modules, args.format))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
202
.claude/python_script/tools/tech_stack.py
Normal file
202
.claude/python_script/tools/tech_stack.py
Normal file
@@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Python equivalent of tech-stack-loader.sh
|
||||
DMSFlow Tech Stack Guidelines Loader
|
||||
Returns tech stack specific coding guidelines and best practices for Claude processing
|
||||
|
||||
Usage: python tech_stack_loader.py [command] [tech_stack]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
class TechStackLoader:
|
||||
"""Load tech stack specific development guidelines."""
|
||||
|
||||
def __init__(self, script_dir: Optional[str] = None):
|
||||
if script_dir:
|
||||
self.script_dir = Path(script_dir)
|
||||
else:
|
||||
self.script_dir = Path(__file__).parent
|
||||
|
||||
# Look for template directory in multiple locations
|
||||
possible_template_dirs = [
|
||||
self.script_dir / "../tech-stack-templates",
|
||||
self.script_dir / "../workflows/cli-templates/tech-stacks",
|
||||
self.script_dir / "tech-stack-templates",
|
||||
self.script_dir / "templates",
|
||||
]
|
||||
|
||||
self.template_dir = None
|
||||
for template_dir in possible_template_dirs:
|
||||
if template_dir.exists():
|
||||
self.template_dir = template_dir.resolve()
|
||||
break
|
||||
|
||||
if not self.template_dir:
|
||||
# Create a default template directory
|
||||
self.template_dir = self.script_dir / "tech-stack-templates"
|
||||
self.template_dir.mkdir(exist_ok=True)
|
||||
|
||||
def parse_yaml_frontmatter(self, content: str) -> Tuple[Dict[str, str], str]:
|
||||
"""Parse YAML frontmatter from markdown content."""
|
||||
frontmatter = {}
|
||||
content_start = 0
|
||||
|
||||
lines = content.split('\n')
|
||||
if lines and lines[0].strip() == '---':
|
||||
# Find the closing ---
|
||||
for i, line in enumerate(lines[1:], 1):
|
||||
if line.strip() == '---':
|
||||
content_start = i + 1
|
||||
break
|
||||
elif ':' in line:
|
||||
key, value = line.split(':', 1)
|
||||
frontmatter[key.strip()] = value.strip()
|
||||
|
||||
# Return frontmatter and content without YAML
|
||||
remaining_content = '\n'.join(lines[content_start:])
|
||||
return frontmatter, remaining_content
|
||||
|
||||
def list_available_guidelines(self) -> str:
|
||||
"""List all available development guidelines."""
|
||||
output = ["Available Development Guidelines:", "=" * 33]
|
||||
|
||||
if not self.template_dir.exists():
|
||||
output.append("No template directory found.")
|
||||
return '\n'.join(output)
|
||||
|
||||
for file_path in self.template_dir.glob("*.md"):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
frontmatter, _ = self.parse_yaml_frontmatter(content)
|
||||
name = frontmatter.get('name', file_path.stem)
|
||||
description = frontmatter.get('description', 'No description available')
|
||||
|
||||
output.append(f"{name:<20} - {description}")
|
||||
|
||||
except Exception as e:
|
||||
output.append(f"{file_path.stem:<20} - Error reading file: {e}")
|
||||
|
||||
return '\n'.join(output)
|
||||
|
||||
def load_guidelines(self, tech_stack: str) -> str:
|
||||
"""Load specific development guidelines."""
|
||||
template_path = self.template_dir / f"{tech_stack}.md"
|
||||
|
||||
if not template_path.exists():
|
||||
# Try with different naming conventions
|
||||
alternatives = [
|
||||
f"{tech_stack}-dev.md",
|
||||
f"{tech_stack}_dev.md",
|
||||
f"{tech_stack.replace('-', '_')}.md",
|
||||
f"{tech_stack.replace('_', '-')}.md"
|
||||
]
|
||||
|
||||
for alt in alternatives:
|
||||
alt_path = self.template_dir / alt
|
||||
if alt_path.exists():
|
||||
template_path = alt_path
|
||||
break
|
||||
else:
|
||||
raise FileNotFoundError(
|
||||
f"Error: Development guidelines '{tech_stack}' not found\n"
|
||||
f"Use --list to see available guidelines"
|
||||
)
|
||||
|
||||
try:
|
||||
with open(template_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Parse and return content without YAML frontmatter
|
||||
_, content_without_yaml = self.parse_yaml_frontmatter(content)
|
||||
return content_without_yaml.strip()
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error reading guidelines file: {e}")
|
||||
|
||||
def get_version(self) -> str:
|
||||
"""Get version information."""
|
||||
return "DMSFlow tech-stack-loader v2.0 (Python)\nSemantic-based development guidelines system"
|
||||
|
||||
def get_help(self) -> str:
|
||||
"""Get help message."""
|
||||
return """Usage:
|
||||
tech_stack_loader.py --list List all available guidelines with descriptions
|
||||
tech_stack_loader.py --load <name> Load specific development guidelines
|
||||
tech_stack_loader.py <name> Load specific guidelines (legacy format)
|
||||
tech_stack_loader.py --help Show this help message
|
||||
tech_stack_loader.py --version Show version information
|
||||
|
||||
Examples:
|
||||
tech_stack_loader.py --list
|
||||
tech_stack_loader.py --load javascript-dev
|
||||
tech_stack_loader.py python-dev"""
|
||||
|
||||
def main():
|
||||
"""Command-line interface."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="DMSFlow Tech Stack Guidelines Loader",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""Examples:
|
||||
python tech_stack_loader.py --list
|
||||
python tech_stack_loader.py --load javascript-dev
|
||||
python tech_stack_loader.py python-dev"""
|
||||
)
|
||||
|
||||
parser.add_argument("command", nargs="?", help="Command or tech stack name")
|
||||
parser.add_argument("tech_stack", nargs="?", help="Tech stack name (when using --load)")
|
||||
parser.add_argument("--list", action="store_true", help="List all available guidelines")
|
||||
parser.add_argument("--load", metavar="TECH_STACK", help="Load specific development guidelines")
|
||||
parser.add_argument("--version", "-v", action="store_true", help="Show version information")
|
||||
parser.add_argument("--template-dir", help="Override template directory path")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
loader = TechStackLoader(args.template_dir)
|
||||
|
||||
# Handle version check
|
||||
if args.version or args.command == "--version":
|
||||
print(loader.get_version())
|
||||
return
|
||||
|
||||
# Handle list command
|
||||
if args.list or args.command == "--list":
|
||||
print(loader.list_available_guidelines())
|
||||
return
|
||||
|
||||
# Handle load command
|
||||
if args.load:
|
||||
result = loader.load_guidelines(args.load)
|
||||
print(result)
|
||||
return
|
||||
|
||||
if args.command == "--load" and args.tech_stack:
|
||||
result = loader.load_guidelines(args.tech_stack)
|
||||
print(result)
|
||||
return
|
||||
|
||||
# Handle legacy usage (direct tech stack name)
|
||||
if args.command and args.command not in ["--help", "--list", "--load"]:
|
||||
result = loader.load_guidelines(args.command)
|
||||
print(result)
|
||||
return
|
||||
|
||||
# Show help
|
||||
print(loader.get_help())
|
||||
|
||||
except (FileNotFoundError, RuntimeError) as e:
|
||||
print(str(e), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Unexpected error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
241
.claude/python_script/tools/workflow_updater.py
Normal file
241
.claude/python_script/tools/workflow_updater.py
Normal file
@@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Python equivalent of update_module_claude.sh
|
||||
Update CLAUDE.md for a specific module with automatic layer detection
|
||||
|
||||
Usage: python update_module_claude.py <module_path> [update_type]
|
||||
module_path: Path to the module directory
|
||||
update_type: full|related (default: full)
|
||||
Script automatically detects layer depth and selects appropriate template
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import time
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple, Dict
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class LayerInfo:
|
||||
"""Information about a documentation layer."""
|
||||
name: str
|
||||
template_path: str
|
||||
analysis_strategy: str
|
||||
|
||||
class ModuleClaudeUpdater:
|
||||
"""Update CLAUDE.md documentation for modules with layer detection."""
|
||||
|
||||
def __init__(self, home_dir: Optional[str] = None):
|
||||
self.home_dir = Path(home_dir) if home_dir else Path.home()
|
||||
self.template_base = self.home_dir / ".claude/workflows/cli-templates/prompts/dms"
|
||||
|
||||
def detect_layer(self, module_path: str) -> LayerInfo:
|
||||
"""Determine documentation layer based on path patterns."""
|
||||
clean_path = module_path.replace('./', '') if module_path.startswith('./') else module_path
|
||||
|
||||
if module_path == ".":
|
||||
# Root directory
|
||||
return LayerInfo(
|
||||
name="Layer 1 (Root)",
|
||||
template_path=str(self.template_base / "claude-layer1-root.txt"),
|
||||
analysis_strategy="--all-files"
|
||||
)
|
||||
elif '/' not in clean_path:
|
||||
# Top-level directories (e.g., .claude, src, tests)
|
||||
return LayerInfo(
|
||||
name="Layer 2 (Domain)",
|
||||
template_path=str(self.template_base / "claude-layer2-domain.txt"),
|
||||
analysis_strategy="@{*/CLAUDE.md}"
|
||||
)
|
||||
elif clean_path.count('/') == 1:
|
||||
# Second-level directories (e.g., .claude/scripts, src/components)
|
||||
return LayerInfo(
|
||||
name="Layer 3 (Module)",
|
||||
template_path=str(self.template_base / "claude-layer3-module.txt"),
|
||||
analysis_strategy="@{*/CLAUDE.md}"
|
||||
)
|
||||
else:
|
||||
# Deeper directories (e.g., .claude/workflows/cli-templates/prompts)
|
||||
return LayerInfo(
|
||||
name="Layer 4 (Sub-Module)",
|
||||
template_path=str(self.template_base / "claude-layer4-submodule.txt"),
|
||||
analysis_strategy="--all-files"
|
||||
)
|
||||
|
||||
def load_template(self, template_path: str) -> str:
|
||||
"""Load template content from file."""
|
||||
try:
|
||||
with open(template_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except FileNotFoundError:
|
||||
print(f" [WARN] Template not found: {template_path}, using fallback")
|
||||
return "Update CLAUDE.md documentation for this module following hierarchy standards."
|
||||
except Exception as e:
|
||||
print(f" [WARN] Error reading template: {e}, using fallback")
|
||||
return "Update CLAUDE.md documentation for this module following hierarchy standards."
|
||||
|
||||
def build_prompt(self, layer_info: LayerInfo, module_path: str, update_type: str) -> str:
|
||||
"""Build the prompt for gemini."""
|
||||
template_content = self.load_template(layer_info.template_path)
|
||||
module_name = os.path.basename(module_path)
|
||||
|
||||
if update_type == "full":
|
||||
update_context = """
|
||||
Update Mode: Complete refresh
|
||||
- Perform comprehensive analysis of all content
|
||||
- Document patterns, architecture, and purpose
|
||||
- Consider existing documentation hierarchy
|
||||
- Follow template guidelines strictly"""
|
||||
else:
|
||||
update_context = """
|
||||
Update Mode: Context-aware update
|
||||
- Focus on recent changes and affected areas
|
||||
- Maintain consistency with existing documentation
|
||||
- Update only relevant sections
|
||||
- Follow template guidelines for updated content"""
|
||||
|
||||
base_prompt = f"""
|
||||
[CRITICAL] RULES - MUST FOLLOW:
|
||||
1. ONLY modify CLAUDE.md files at any hierarchy level
|
||||
2. NEVER modify source code files
|
||||
3. Focus exclusively on updating documentation
|
||||
4. Follow the template guidelines exactly
|
||||
|
||||
{template_content}
|
||||
|
||||
{update_context}
|
||||
|
||||
Module Information:
|
||||
- Name: {module_name}
|
||||
- Path: {module_path}
|
||||
- Layer: {layer_info.name}
|
||||
- Analysis Strategy: {layer_info.analysis_strategy}"""
|
||||
|
||||
return base_prompt
|
||||
|
||||
def execute_gemini_command(self, prompt: str, analysis_strategy: str, module_path: str) -> bool:
|
||||
"""Execute gemini command with the appropriate strategy."""
|
||||
original_dir = os.getcwd()
|
||||
|
||||
try:
|
||||
os.chdir(module_path)
|
||||
|
||||
if analysis_strategy == "--all-files":
|
||||
cmd = ["gemini", "--all-files", "--yolo", "-p", prompt]
|
||||
else:
|
||||
cmd = ["gemini", "--yolo", "-p", f"{analysis_strategy} {prompt}"]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
return True
|
||||
else:
|
||||
print(f" [ERROR] Gemini command failed: {result.stderr}")
|
||||
return False
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f" [ERROR] Error executing gemini: {e}")
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
print(f" [ERROR] Gemini command not found. Make sure gemini is installed and in PATH.")
|
||||
return False
|
||||
finally:
|
||||
os.chdir(original_dir)
|
||||
|
||||
def update_module_claude(self, module_path: str, update_type: str = "full") -> bool:
|
||||
"""Main function to update CLAUDE.md for a module."""
|
||||
# Validate parameters
|
||||
if not module_path:
|
||||
print("[ERROR] Module path is required")
|
||||
print("Usage: update_module_claude.py <module_path> [update_type]")
|
||||
return False
|
||||
|
||||
path_obj = Path(module_path)
|
||||
if not path_obj.exists() or not path_obj.is_dir():
|
||||
print(f"[ERROR] Directory '{module_path}' does not exist")
|
||||
return False
|
||||
|
||||
# Check if directory has files
|
||||
files = list(path_obj.glob('*'))
|
||||
file_count = len([f for f in files if f.is_file()])
|
||||
if file_count == 0:
|
||||
print(f"[SKIP] Skipping '{module_path}' - no files found")
|
||||
return True
|
||||
|
||||
# Detect layer and get configuration
|
||||
layer_info = self.detect_layer(module_path)
|
||||
|
||||
print(f"[UPDATE] Updating: {module_path}")
|
||||
print(f" Layer: {layer_info.name} | Type: {update_type} | Files: {file_count}")
|
||||
print(f" Template: {os.path.basename(layer_info.template_path)} | Strategy: {layer_info.analysis_strategy}")
|
||||
|
||||
# Build prompt
|
||||
prompt = self.build_prompt(layer_info, module_path, update_type)
|
||||
|
||||
# Execute update
|
||||
start_time = time.time()
|
||||
print(" [PROGRESS] Starting update...")
|
||||
|
||||
success = self.execute_gemini_command(prompt, layer_info.analysis_strategy, module_path)
|
||||
|
||||
if success:
|
||||
duration = int(time.time() - start_time)
|
||||
print(f" [OK] Completed in {duration}s")
|
||||
return True
|
||||
else:
|
||||
print(f" [ERROR] Update failed for {module_path}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Command-line interface."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Update CLAUDE.md for a specific module with automatic layer detection",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""Examples:
|
||||
python update_module_claude.py .
|
||||
python update_module_claude.py src/components full
|
||||
python update_module_claude.py .claude/scripts related"""
|
||||
)
|
||||
|
||||
parser.add_argument("module_path", help="Path to the module directory")
|
||||
parser.add_argument("update_type", nargs="?", choices=["full", "related"],
|
||||
default="full", help="Update type (default: full)")
|
||||
parser.add_argument("--home", help="Override home directory path")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Show what would be done without executing")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
updater = ModuleClaudeUpdater(args.home)
|
||||
|
||||
if args.dry_run:
|
||||
layer_info = updater.detect_layer(args.module_path)
|
||||
prompt = updater.build_prompt(layer_info, args.module_path, args.update_type)
|
||||
|
||||
print("[DRY-RUN] Dry run mode - showing configuration:")
|
||||
print(f"Module Path: {args.module_path}")
|
||||
print(f"Update Type: {args.update_type}")
|
||||
print(f"Layer: {layer_info.name}")
|
||||
print(f"Template: {layer_info.template_path}")
|
||||
print(f"Strategy: {layer_info.analysis_strategy}")
|
||||
print("\nPrompt preview:")
|
||||
print("-" * 50)
|
||||
print(prompt[:500] + "..." if len(prompt) > 500 else prompt)
|
||||
return
|
||||
|
||||
success = updater.update_module_claude(args.module_path, args.update_type)
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n[ERROR] Operation cancelled by user")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Unexpected error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
16
.claude/python_script/utils/__init__.py
Normal file
16
.claude/python_script/utils/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Shared utility functions and helpers.
|
||||
Provides common functionality for colors, caching, and I/O operations.
|
||||
"""
|
||||
|
||||
from .colors import Colors
|
||||
from .cache import CacheManager
|
||||
from .io_helpers import IOHelpers, ensure_directory, safe_read_file
|
||||
|
||||
__all__ = [
|
||||
'Colors',
|
||||
'CacheManager',
|
||||
'IOHelpers',
|
||||
'ensure_directory',
|
||||
'safe_read_file'
|
||||
]
|
||||
BIN
.claude/python_script/utils/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
.claude/python_script/utils/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
.claude/python_script/utils/__pycache__/cache.cpython-313.pyc
Normal file
BIN
.claude/python_script/utils/__pycache__/cache.cpython-313.pyc
Normal file
Binary file not shown.
BIN
.claude/python_script/utils/__pycache__/colors.cpython-313.pyc
Normal file
BIN
.claude/python_script/utils/__pycache__/colors.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
350
.claude/python_script/utils/cache.py
Normal file
350
.claude/python_script/utils/cache.py
Normal file
@@ -0,0 +1,350 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Cache Management Utility
|
||||
Provides unified caching functionality for the analyzer system.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import hashlib
|
||||
import pickle
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, Dict, Union
|
||||
from dataclasses import dataclass, asdict
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheEntry:
|
||||
"""Cache entry with metadata."""
|
||||
value: Any
|
||||
timestamp: float
|
||||
ttl: Optional[float] = None
|
||||
key_hash: Optional[str] = None
|
||||
|
||||
def is_expired(self) -> bool:
|
||||
"""Check if cache entry is expired."""
|
||||
if self.ttl is None:
|
||||
return False
|
||||
return time.time() - self.timestamp > self.ttl
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
'value': self.value,
|
||||
'timestamp': self.timestamp,
|
||||
'ttl': self.ttl,
|
||||
'key_hash': self.key_hash
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict) -> 'CacheEntry':
|
||||
"""Create from dictionary."""
|
||||
return cls(**data)
|
||||
|
||||
|
||||
class CacheManager:
|
||||
"""Unified cache manager with multiple storage backends."""
|
||||
|
||||
def __init__(self, cache_dir: str = "cache", default_ttl: int = 3600):
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.default_ttl = default_ttl
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# In-memory cache for fast access
|
||||
self._memory_cache: Dict[str, CacheEntry] = {}
|
||||
|
||||
# Cache subdirectories
|
||||
self.json_cache_dir = self.cache_dir / "json"
|
||||
self.pickle_cache_dir = self.cache_dir / "pickle"
|
||||
self.temp_cache_dir = self.cache_dir / "temp"
|
||||
|
||||
for cache_subdir in [self.json_cache_dir, self.pickle_cache_dir, self.temp_cache_dir]:
|
||||
cache_subdir.mkdir(exist_ok=True)
|
||||
|
||||
def _generate_key_hash(self, key: str) -> str:
|
||||
"""Generate a hash for the cache key."""
|
||||
return hashlib.md5(key.encode('utf-8')).hexdigest()
|
||||
|
||||
def _get_cache_path(self, key: str, cache_type: str = "json") -> Path:
|
||||
"""Get cache file path for a key."""
|
||||
key_hash = self._generate_key_hash(key)
|
||||
|
||||
if cache_type == "json":
|
||||
return self.json_cache_dir / f"{key_hash}.json"
|
||||
elif cache_type == "pickle":
|
||||
return self.pickle_cache_dir / f"{key_hash}.pkl"
|
||||
elif cache_type == "temp":
|
||||
return self.temp_cache_dir / f"{key_hash}.tmp"
|
||||
else:
|
||||
raise ValueError(f"Unsupported cache type: {cache_type}")
|
||||
|
||||
def set(self, key: str, value: Any, ttl: Optional[int] = None,
|
||||
storage: str = "memory") -> bool:
|
||||
"""Set a cache value."""
|
||||
if ttl is None:
|
||||
ttl = self.default_ttl
|
||||
|
||||
entry = CacheEntry(
|
||||
value=value,
|
||||
timestamp=time.time(),
|
||||
ttl=ttl,
|
||||
key_hash=self._generate_key_hash(key)
|
||||
)
|
||||
|
||||
try:
|
||||
if storage == "memory":
|
||||
self._memory_cache[key] = entry
|
||||
return True
|
||||
|
||||
elif storage == "json":
|
||||
cache_path = self._get_cache_path(key, "json")
|
||||
with open(cache_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(entry.to_dict(), f, indent=2, default=str)
|
||||
return True
|
||||
|
||||
elif storage == "pickle":
|
||||
cache_path = self._get_cache_path(key, "pickle")
|
||||
with open(cache_path, 'wb') as f:
|
||||
pickle.dump(entry, f)
|
||||
return True
|
||||
|
||||
else:
|
||||
self.logger.warning(f"Unsupported storage type: {storage}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to set cache for key '{key}': {e}")
|
||||
return False
|
||||
|
||||
def get(self, key: str, storage: str = "memory",
|
||||
default: Any = None) -> Any:
|
||||
"""Get a cache value."""
|
||||
try:
|
||||
entry = None
|
||||
|
||||
if storage == "memory":
|
||||
entry = self._memory_cache.get(key)
|
||||
|
||||
elif storage == "json":
|
||||
cache_path = self._get_cache_path(key, "json")
|
||||
if cache_path.exists():
|
||||
with open(cache_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
entry = CacheEntry.from_dict(data)
|
||||
|
||||
elif storage == "pickle":
|
||||
cache_path = self._get_cache_path(key, "pickle")
|
||||
if cache_path.exists():
|
||||
with open(cache_path, 'rb') as f:
|
||||
entry = pickle.load(f)
|
||||
|
||||
else:
|
||||
self.logger.warning(f"Unsupported storage type: {storage}")
|
||||
return default
|
||||
|
||||
if entry is None:
|
||||
return default
|
||||
|
||||
# Check if entry is expired
|
||||
if entry.is_expired():
|
||||
self.delete(key, storage)
|
||||
return default
|
||||
|
||||
return entry.value
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get cache for key '{key}': {e}")
|
||||
return default
|
||||
|
||||
def delete(self, key: str, storage: str = "memory") -> bool:
|
||||
"""Delete a cache entry."""
|
||||
try:
|
||||
if storage == "memory":
|
||||
if key in self._memory_cache:
|
||||
del self._memory_cache[key]
|
||||
return True
|
||||
|
||||
elif storage in ["json", "pickle", "temp"]:
|
||||
cache_path = self._get_cache_path(key, storage)
|
||||
if cache_path.exists():
|
||||
cache_path.unlink()
|
||||
return True
|
||||
|
||||
else:
|
||||
self.logger.warning(f"Unsupported storage type: {storage}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to delete cache for key '{key}': {e}")
|
||||
return False
|
||||
|
||||
def exists(self, key: str, storage: str = "memory") -> bool:
|
||||
"""Check if a cache entry exists and is not expired."""
|
||||
return self.get(key, storage) is not None
|
||||
|
||||
def clear(self, storage: Optional[str] = None) -> bool:
|
||||
"""Clear cache entries."""
|
||||
try:
|
||||
if storage is None or storage == "memory":
|
||||
self._memory_cache.clear()
|
||||
|
||||
if storage is None or storage == "json":
|
||||
for cache_file in self.json_cache_dir.glob("*.json"):
|
||||
cache_file.unlink()
|
||||
|
||||
if storage is None or storage == "pickle":
|
||||
for cache_file in self.pickle_cache_dir.glob("*.pkl"):
|
||||
cache_file.unlink()
|
||||
|
||||
if storage is None or storage == "temp":
|
||||
for cache_file in self.temp_cache_dir.glob("*.tmp"):
|
||||
cache_file.unlink()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to clear cache: {e}")
|
||||
return False
|
||||
|
||||
def cleanup_expired(self) -> int:
|
||||
"""Clean up expired cache entries."""
|
||||
cleaned_count = 0
|
||||
|
||||
try:
|
||||
# Clean memory cache
|
||||
expired_keys = []
|
||||
for key, entry in self._memory_cache.items():
|
||||
if entry.is_expired():
|
||||
expired_keys.append(key)
|
||||
|
||||
for key in expired_keys:
|
||||
del self._memory_cache[key]
|
||||
cleaned_count += 1
|
||||
|
||||
# Clean file caches
|
||||
for cache_type in ["json", "pickle"]:
|
||||
cache_dir = self.json_cache_dir if cache_type == "json" else self.pickle_cache_dir
|
||||
extension = f".{cache_type}" if cache_type == "json" else ".pkl"
|
||||
|
||||
for cache_file in cache_dir.glob(f"*{extension}"):
|
||||
try:
|
||||
if cache_type == "json":
|
||||
with open(cache_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
entry = CacheEntry.from_dict(data)
|
||||
else:
|
||||
with open(cache_file, 'rb') as f:
|
||||
entry = pickle.load(f)
|
||||
|
||||
if entry.is_expired():
|
||||
cache_file.unlink()
|
||||
cleaned_count += 1
|
||||
|
||||
except Exception:
|
||||
# If we can't read the cache file, delete it
|
||||
cache_file.unlink()
|
||||
cleaned_count += 1
|
||||
|
||||
self.logger.info(f"Cleaned up {cleaned_count} expired cache entries")
|
||||
return cleaned_count
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to cleanup expired cache entries: {e}")
|
||||
return 0
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get cache statistics."""
|
||||
stats = {
|
||||
'memory_entries': len(self._memory_cache),
|
||||
'json_files': len(list(self.json_cache_dir.glob("*.json"))),
|
||||
'pickle_files': len(list(self.pickle_cache_dir.glob("*.pkl"))),
|
||||
'temp_files': len(list(self.temp_cache_dir.glob("*.tmp"))),
|
||||
'cache_dir_size': 0
|
||||
}
|
||||
|
||||
# Calculate total cache directory size
|
||||
try:
|
||||
for cache_file in self.cache_dir.rglob("*"):
|
||||
if cache_file.is_file():
|
||||
stats['cache_dir_size'] += cache_file.stat().st_size
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return stats
|
||||
|
||||
def set_file_cache(self, key: str, file_path: Union[str, Path],
|
||||
ttl: Optional[int] = None) -> bool:
|
||||
"""Cache a file by copying it to the cache directory."""
|
||||
try:
|
||||
source_path = Path(file_path)
|
||||
if not source_path.exists():
|
||||
return False
|
||||
|
||||
cache_path = self.temp_cache_dir / f"{self._generate_key_hash(key)}.cached"
|
||||
|
||||
# Copy file to cache
|
||||
import shutil
|
||||
shutil.copy2(source_path, cache_path)
|
||||
|
||||
# Store metadata
|
||||
metadata = {
|
||||
'original_path': str(source_path),
|
||||
'cached_path': str(cache_path),
|
||||
'size': source_path.stat().st_size,
|
||||
'timestamp': time.time(),
|
||||
'ttl': ttl or self.default_ttl
|
||||
}
|
||||
|
||||
return self.set(f"{key}_metadata", metadata, ttl, "json")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to cache file '{file_path}': {e}")
|
||||
return False
|
||||
|
||||
def get_file_cache(self, key: str) -> Optional[Path]:
|
||||
"""Get cached file path."""
|
||||
metadata = self.get(f"{key}_metadata", "json")
|
||||
if metadata is None:
|
||||
return None
|
||||
|
||||
cached_path = Path(metadata['cached_path'])
|
||||
if not cached_path.exists():
|
||||
# Cache file missing, clean up metadata
|
||||
self.delete(f"{key}_metadata", "json")
|
||||
return None
|
||||
|
||||
return cached_path
|
||||
|
||||
|
||||
# Global cache manager instance
|
||||
_global_cache = None
|
||||
|
||||
|
||||
def get_cache_manager(cache_dir: str = "cache", default_ttl: int = 3600) -> CacheManager:
|
||||
"""Get global cache manager instance."""
|
||||
global _global_cache
|
||||
if _global_cache is None:
|
||||
_global_cache = CacheManager(cache_dir, default_ttl)
|
||||
return _global_cache
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test cache functionality
|
||||
cache = CacheManager("test_cache")
|
||||
|
||||
# Test memory cache
|
||||
cache.set("test_key", {"data": "test_value"}, ttl=60)
|
||||
print(f"Memory cache: {cache.get('test_key')}")
|
||||
|
||||
# Test JSON cache
|
||||
cache.set("json_key", {"complex": {"data": [1, 2, 3]}}, ttl=60, storage="json")
|
||||
print(f"JSON cache: {cache.get('json_key', storage='json')}")
|
||||
|
||||
# Test stats
|
||||
print(f"Cache stats: {cache.get_stats()}")
|
||||
|
||||
# Clean up
|
||||
cache.clear()
|
||||
248
.claude/python_script/utils/colors.py
Normal file
248
.claude/python_script/utils/colors.py
Normal file
@@ -0,0 +1,248 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Terminal Colors Utility
|
||||
Provides ANSI color codes for terminal output formatting.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Colors:
|
||||
"""ANSI color codes for terminal output."""
|
||||
|
||||
# Basic colors
|
||||
RED = '\033[0;31m'
|
||||
GREEN = '\033[0;32m'
|
||||
YELLOW = '\033[1;33m'
|
||||
BLUE = '\033[0;34m'
|
||||
PURPLE = '\033[0;35m'
|
||||
CYAN = '\033[0;36m'
|
||||
WHITE = '\033[0;37m'
|
||||
BLACK = '\033[0;30m'
|
||||
|
||||
# Bright colors
|
||||
BRIGHT_RED = '\033[1;31m'
|
||||
BRIGHT_GREEN = '\033[1;32m'
|
||||
BRIGHT_YELLOW = '\033[1;33m'
|
||||
BRIGHT_BLUE = '\033[1;34m'
|
||||
BRIGHT_PURPLE = '\033[1;35m'
|
||||
BRIGHT_CYAN = '\033[1;36m'
|
||||
BRIGHT_WHITE = '\033[1;37m'
|
||||
|
||||
# Background colors
|
||||
BG_RED = '\033[41m'
|
||||
BG_GREEN = '\033[42m'
|
||||
BG_YELLOW = '\033[43m'
|
||||
BG_BLUE = '\033[44m'
|
||||
BG_PURPLE = '\033[45m'
|
||||
BG_CYAN = '\033[46m'
|
||||
BG_WHITE = '\033[47m'
|
||||
|
||||
# Text formatting
|
||||
BOLD = '\033[1m'
|
||||
DIM = '\033[2m'
|
||||
UNDERLINE = '\033[4m'
|
||||
BLINK = '\033[5m'
|
||||
REVERSE = '\033[7m'
|
||||
STRIKETHROUGH = '\033[9m'
|
||||
|
||||
# Reset
|
||||
NC = '\033[0m' # No Color / Reset
|
||||
RESET = '\033[0m'
|
||||
|
||||
@classmethod
|
||||
def is_tty(cls) -> bool:
|
||||
"""Check if output is a TTY (supports colors)."""
|
||||
return hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
|
||||
|
||||
@classmethod
|
||||
def supports_color(cls) -> bool:
|
||||
"""Check if the terminal supports color output."""
|
||||
# Check environment variables
|
||||
if os.getenv('NO_COLOR'):
|
||||
return False
|
||||
|
||||
if os.getenv('FORCE_COLOR'):
|
||||
return True
|
||||
|
||||
# Check if output is a TTY
|
||||
if not cls.is_tty():
|
||||
return False
|
||||
|
||||
# Check TERM environment variable
|
||||
term = os.getenv('TERM', '').lower()
|
||||
if 'color' in term or term in ('xterm', 'xterm-256color', 'screen', 'tmux'):
|
||||
return True
|
||||
|
||||
# Windows Terminal detection
|
||||
if os.name == 'nt':
|
||||
# Windows 10 version 1511 and later support ANSI colors
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(['ver'], capture_output=True, text=True, shell=True)
|
||||
if result.returncode == 0:
|
||||
version_info = result.stdout
|
||||
# Extract Windows version (simplified check)
|
||||
if 'Windows' in version_info:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def colorize(cls, text: str, color: str, bold: bool = False) -> str:
|
||||
"""Apply color to text if colors are supported."""
|
||||
if not cls.supports_color():
|
||||
return text
|
||||
|
||||
prefix = color
|
||||
if bold:
|
||||
prefix = cls.BOLD + prefix
|
||||
|
||||
return f"{prefix}{text}{cls.RESET}"
|
||||
|
||||
@classmethod
|
||||
def red(cls, text: str, bold: bool = False) -> str:
|
||||
"""Color text red."""
|
||||
return cls.colorize(text, cls.RED, bold)
|
||||
|
||||
@classmethod
|
||||
def green(cls, text: str, bold: bool = False) -> str:
|
||||
"""Color text green."""
|
||||
return cls.colorize(text, cls.GREEN, bold)
|
||||
|
||||
@classmethod
|
||||
def yellow(cls, text: str, bold: bool = False) -> str:
|
||||
"""Color text yellow."""
|
||||
return cls.colorize(text, cls.YELLOW, bold)
|
||||
|
||||
@classmethod
|
||||
def blue(cls, text: str, bold: bool = False) -> str:
|
||||
"""Color text blue."""
|
||||
return cls.colorize(text, cls.BLUE, bold)
|
||||
|
||||
@classmethod
|
||||
def purple(cls, text: str, bold: bool = False) -> str:
|
||||
"""Color text purple."""
|
||||
return cls.colorize(text, cls.PURPLE, bold)
|
||||
|
||||
@classmethod
|
||||
def cyan(cls, text: str, bold: bool = False) -> str:
|
||||
"""Color text cyan."""
|
||||
return cls.colorize(text, cls.CYAN, bold)
|
||||
|
||||
@classmethod
|
||||
def bold(cls, text: str) -> str:
|
||||
"""Make text bold."""
|
||||
return cls.colorize(text, '', True)
|
||||
|
||||
@classmethod
|
||||
def dim(cls, text: str) -> str:
|
||||
"""Make text dim."""
|
||||
return cls.colorize(text, cls.DIM)
|
||||
|
||||
@classmethod
|
||||
def underline(cls, text: str) -> str:
|
||||
"""Underline text."""
|
||||
return cls.colorize(text, cls.UNDERLINE)
|
||||
|
||||
@classmethod
|
||||
def success(cls, text: str) -> str:
|
||||
"""Format success message (green)."""
|
||||
return cls.green(f"[SUCCESS] {text}", bold=True)
|
||||
|
||||
@classmethod
|
||||
def error(cls, text: str) -> str:
|
||||
"""Format error message (red)."""
|
||||
return cls.red(f"[ERROR] {text}", bold=True)
|
||||
|
||||
@classmethod
|
||||
def warning(cls, text: str) -> str:
|
||||
"""Format warning message (yellow)."""
|
||||
return cls.yellow(f"[WARNING] {text}", bold=True)
|
||||
|
||||
@classmethod
|
||||
def info(cls, text: str) -> str:
|
||||
"""Format info message (blue)."""
|
||||
return cls.blue(f"[INFO] {text}")
|
||||
|
||||
@classmethod
|
||||
def highlight(cls, text: str) -> str:
|
||||
"""Highlight text (cyan background)."""
|
||||
if not cls.supports_color():
|
||||
return f"[{text}]"
|
||||
return f"{cls.BG_CYAN}{cls.BLACK}{text}{cls.RESET}"
|
||||
|
||||
@classmethod
|
||||
def strip_colors(cls, text: str) -> str:
|
||||
"""Remove ANSI color codes from text."""
|
||||
import re
|
||||
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
||||
return ansi_escape.sub('', text)
|
||||
|
||||
|
||||
# Convenience functions for common usage
|
||||
def colorize(text: str, color: str) -> str:
|
||||
"""Convenience function to colorize text."""
|
||||
return Colors.colorize(text, color)
|
||||
|
||||
|
||||
def red(text: str) -> str:
|
||||
"""Red text."""
|
||||
return Colors.red(text)
|
||||
|
||||
|
||||
def green(text: str) -> str:
|
||||
"""Green text."""
|
||||
return Colors.green(text)
|
||||
|
||||
|
||||
def yellow(text: str) -> str:
|
||||
"""Yellow text."""
|
||||
return Colors.yellow(text)
|
||||
|
||||
|
||||
def blue(text: str) -> str:
|
||||
"""Blue text."""
|
||||
return Colors.blue(text)
|
||||
|
||||
|
||||
def success(text: str) -> str:
|
||||
"""Success message."""
|
||||
return Colors.success(text)
|
||||
|
||||
|
||||
def error(text: str) -> str:
|
||||
"""Error message."""
|
||||
return Colors.error(text)
|
||||
|
||||
|
||||
def warning(text: str) -> str:
|
||||
"""Warning message."""
|
||||
return Colors.warning(text)
|
||||
|
||||
|
||||
def info(text: str) -> str:
|
||||
"""Info message."""
|
||||
return Colors.info(text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test color output
|
||||
print(Colors.red("Red text"))
|
||||
print(Colors.green("Green text"))
|
||||
print(Colors.yellow("Yellow text"))
|
||||
print(Colors.blue("Blue text"))
|
||||
print(Colors.purple("Purple text"))
|
||||
print(Colors.cyan("Cyan text"))
|
||||
print(Colors.bold("Bold text"))
|
||||
print(Colors.success("Success message"))
|
||||
print(Colors.error("Error message"))
|
||||
print(Colors.warning("Warning message"))
|
||||
print(Colors.info("Info message"))
|
||||
print(Colors.highlight("Highlighted text"))
|
||||
print(f"Color support: {Colors.supports_color()}")
|
||||
print(f"TTY: {Colors.is_tty()}")
|
||||
378
.claude/python_script/utils/io_helpers.py
Normal file
378
.claude/python_script/utils/io_helpers.py
Normal file
@@ -0,0 +1,378 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
I/O Helper Functions
|
||||
Provides common file and directory operations with error handling.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import yaml
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, Union, List, Dict
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
|
||||
class IOHelpers:
|
||||
"""Collection of I/O helper methods."""
|
||||
|
||||
@staticmethod
|
||||
def ensure_directory(path: Union[str, Path], mode: int = 0o755) -> bool:
|
||||
"""Ensure directory exists, create if necessary."""
|
||||
try:
|
||||
dir_path = Path(path)
|
||||
dir_path.mkdir(parents=True, exist_ok=True, mode=mode)
|
||||
return True
|
||||
except (PermissionError, OSError) as e:
|
||||
logging.error(f"Failed to create directory '{path}': {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def safe_read_file(file_path: Union[str, Path], encoding: str = 'utf-8',
|
||||
fallback_encoding: str = 'latin-1') -> Optional[str]:
|
||||
"""Safely read file content with encoding fallback."""
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
return None
|
||||
|
||||
encodings = [encoding, fallback_encoding] if encoding != fallback_encoding else [encoding]
|
||||
|
||||
for enc in encodings:
|
||||
try:
|
||||
with open(path, 'r', encoding=enc) as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
except (IOError, OSError) as e:
|
||||
logging.error(f"Failed to read file '{file_path}': {e}")
|
||||
return None
|
||||
|
||||
logging.warning(f"Failed to decode file '{file_path}' with any encoding")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def safe_write_file(file_path: Union[str, Path], content: str,
|
||||
encoding: str = 'utf-8', backup: bool = False) -> bool:
|
||||
"""Safely write content to file with optional backup."""
|
||||
path = Path(file_path)
|
||||
|
||||
try:
|
||||
# Create backup if requested and file exists
|
||||
if backup and path.exists():
|
||||
backup_path = path.with_suffix(path.suffix + '.bak')
|
||||
shutil.copy2(path, backup_path)
|
||||
|
||||
# Ensure parent directory exists
|
||||
if not IOHelpers.ensure_directory(path.parent):
|
||||
return False
|
||||
|
||||
# Write to temporary file first, then move to final location
|
||||
with tempfile.NamedTemporaryFile(mode='w', encoding=encoding,
|
||||
dir=path.parent, delete=False) as tmp_file:
|
||||
tmp_file.write(content)
|
||||
tmp_path = Path(tmp_file.name)
|
||||
|
||||
# Atomic move
|
||||
shutil.move(str(tmp_path), str(path))
|
||||
return True
|
||||
|
||||
except (IOError, OSError) as e:
|
||||
logging.error(f"Failed to write file '{file_path}': {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def read_json(file_path: Union[str, Path], default: Any = None) -> Any:
|
||||
"""Read JSON file with error handling."""
|
||||
content = IOHelpers.safe_read_file(file_path)
|
||||
if content is None:
|
||||
return default
|
||||
|
||||
try:
|
||||
return json.loads(content)
|
||||
except json.JSONDecodeError as e:
|
||||
logging.error(f"Failed to parse JSON from '{file_path}': {e}")
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def write_json(file_path: Union[str, Path], data: Any,
|
||||
indent: int = 2, backup: bool = False) -> bool:
|
||||
"""Write data to JSON file."""
|
||||
try:
|
||||
content = json.dumps(data, indent=indent, ensure_ascii=False, default=str)
|
||||
return IOHelpers.safe_write_file(file_path, content, backup=backup)
|
||||
except (TypeError, ValueError) as e:
|
||||
logging.error(f"Failed to serialize data to JSON for '{file_path}': {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def read_yaml(file_path: Union[str, Path], default: Any = None) -> Any:
|
||||
"""Read YAML file with error handling."""
|
||||
content = IOHelpers.safe_read_file(file_path)
|
||||
if content is None:
|
||||
return default
|
||||
|
||||
try:
|
||||
return yaml.safe_load(content)
|
||||
except yaml.YAMLError as e:
|
||||
logging.error(f"Failed to parse YAML from '{file_path}': {e}")
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def write_yaml(file_path: Union[str, Path], data: Any, backup: bool = False) -> bool:
|
||||
"""Write data to YAML file."""
|
||||
try:
|
||||
content = yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
||||
return IOHelpers.safe_write_file(file_path, content, backup=backup)
|
||||
except yaml.YAMLError as e:
|
||||
logging.error(f"Failed to serialize data to YAML for '{file_path}': {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def find_files(directory: Union[str, Path], pattern: str = "*",
|
||||
recursive: bool = True, max_depth: Optional[int] = None) -> List[Path]:
|
||||
"""Find files matching pattern in directory."""
|
||||
dir_path = Path(directory)
|
||||
if not dir_path.exists() or not dir_path.is_dir():
|
||||
return []
|
||||
|
||||
files = []
|
||||
try:
|
||||
if recursive:
|
||||
if max_depth is not None:
|
||||
# Implement depth-limited search
|
||||
def search_with_depth(path: Path, current_depth: int = 0):
|
||||
if current_depth > max_depth:
|
||||
return
|
||||
|
||||
for item in path.iterdir():
|
||||
if item.is_file() and item.match(pattern):
|
||||
files.append(item)
|
||||
elif item.is_dir() and current_depth < max_depth:
|
||||
search_with_depth(item, current_depth + 1)
|
||||
|
||||
search_with_depth(dir_path)
|
||||
else:
|
||||
files = list(dir_path.rglob(pattern))
|
||||
else:
|
||||
files = list(dir_path.glob(pattern))
|
||||
|
||||
return sorted(files)
|
||||
|
||||
except (PermissionError, OSError) as e:
|
||||
logging.error(f"Failed to search files in '{directory}': {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_file_stats(file_path: Union[str, Path]) -> Optional[Dict[str, Any]]:
|
||||
"""Get file statistics."""
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
stat = path.stat()
|
||||
return {
|
||||
'size': stat.st_size,
|
||||
'modified_time': stat.st_mtime,
|
||||
'created_time': stat.st_ctime,
|
||||
'is_file': path.is_file(),
|
||||
'is_dir': path.is_dir(),
|
||||
'permissions': oct(stat.st_mode)[-3:],
|
||||
'extension': path.suffix.lower(),
|
||||
'name': path.name,
|
||||
'parent': str(path.parent)
|
||||
}
|
||||
except (OSError, PermissionError) as e:
|
||||
logging.error(f"Failed to get stats for '{file_path}': {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def copy_with_backup(source: Union[str, Path], dest: Union[str, Path]) -> bool:
|
||||
"""Copy file with automatic backup if destination exists."""
|
||||
source_path = Path(source)
|
||||
dest_path = Path(dest)
|
||||
|
||||
if not source_path.exists():
|
||||
logging.error(f"Source file '{source}' does not exist")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create backup if destination exists
|
||||
if dest_path.exists():
|
||||
backup_path = dest_path.with_suffix(dest_path.suffix + '.bak')
|
||||
shutil.copy2(dest_path, backup_path)
|
||||
logging.info(f"Created backup: {backup_path}")
|
||||
|
||||
# Ensure destination directory exists
|
||||
if not IOHelpers.ensure_directory(dest_path.parent):
|
||||
return False
|
||||
|
||||
# Copy file
|
||||
shutil.copy2(source_path, dest_path)
|
||||
return True
|
||||
|
||||
except (IOError, OSError) as e:
|
||||
logging.error(f"Failed to copy '{source}' to '{dest}': {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def move_with_backup(source: Union[str, Path], dest: Union[str, Path]) -> bool:
|
||||
"""Move file with automatic backup if destination exists."""
|
||||
source_path = Path(source)
|
||||
dest_path = Path(dest)
|
||||
|
||||
if not source_path.exists():
|
||||
logging.error(f"Source file '{source}' does not exist")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create backup if destination exists
|
||||
if dest_path.exists():
|
||||
backup_path = dest_path.with_suffix(dest_path.suffix + '.bak')
|
||||
shutil.move(str(dest_path), str(backup_path))
|
||||
logging.info(f"Created backup: {backup_path}")
|
||||
|
||||
# Ensure destination directory exists
|
||||
if not IOHelpers.ensure_directory(dest_path.parent):
|
||||
return False
|
||||
|
||||
# Move file
|
||||
shutil.move(str(source_path), str(dest_path))
|
||||
return True
|
||||
|
||||
except (IOError, OSError) as e:
|
||||
logging.error(f"Failed to move '{source}' to '{dest}': {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def clean_temp_files(directory: Union[str, Path], extensions: List[str] = None,
|
||||
max_age_hours: int = 24) -> int:
|
||||
"""Clean temporary files older than specified age."""
|
||||
if extensions is None:
|
||||
extensions = ['.tmp', '.temp', '.bak', '.swp', '.~']
|
||||
|
||||
dir_path = Path(directory)
|
||||
if not dir_path.exists():
|
||||
return 0
|
||||
|
||||
import time
|
||||
cutoff_time = time.time() - (max_age_hours * 3600)
|
||||
cleaned_count = 0
|
||||
|
||||
try:
|
||||
for file_path in dir_path.rglob('*'):
|
||||
if file_path.is_file():
|
||||
# Check extension
|
||||
if file_path.suffix.lower() in extensions:
|
||||
# Check age
|
||||
if file_path.stat().st_mtime < cutoff_time:
|
||||
try:
|
||||
file_path.unlink()
|
||||
cleaned_count += 1
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
logging.info(f"Cleaned {cleaned_count} temporary files from '{directory}'")
|
||||
return cleaned_count
|
||||
|
||||
except (PermissionError, OSError) as e:
|
||||
logging.error(f"Failed to clean temp files in '{directory}': {e}")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def get_directory_size(directory: Union[str, Path]) -> int:
|
||||
"""Get total size of directory in bytes."""
|
||||
dir_path = Path(directory)
|
||||
if not dir_path.exists() or not dir_path.is_dir():
|
||||
return 0
|
||||
|
||||
total_size = 0
|
||||
try:
|
||||
for file_path in dir_path.rglob('*'):
|
||||
if file_path.is_file():
|
||||
total_size += file_path.stat().st_size
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
return total_size
|
||||
|
||||
@staticmethod
|
||||
def make_executable(file_path: Union[str, Path]) -> bool:
|
||||
"""Make file executable (Unix/Linux/Mac)."""
|
||||
if os.name == 'nt': # Windows
|
||||
return True # Windows doesn't use Unix permissions
|
||||
|
||||
try:
|
||||
path = Path(file_path)
|
||||
current_mode = path.stat().st_mode
|
||||
path.chmod(current_mode | 0o111) # Add execute permission
|
||||
return True
|
||||
except (OSError, PermissionError) as e:
|
||||
logging.error(f"Failed to make '{file_path}' executable: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Convenience functions
|
||||
def ensure_directory(path: Union[str, Path]) -> bool:
|
||||
"""Ensure directory exists."""
|
||||
return IOHelpers.ensure_directory(path)
|
||||
|
||||
|
||||
def safe_read_file(file_path: Union[str, Path]) -> Optional[str]:
|
||||
"""Safely read file content."""
|
||||
return IOHelpers.safe_read_file(file_path)
|
||||
|
||||
|
||||
def safe_write_file(file_path: Union[str, Path], content: str) -> bool:
|
||||
"""Safely write content to file."""
|
||||
return IOHelpers.safe_write_file(file_path, content)
|
||||
|
||||
|
||||
def read_json(file_path: Union[str, Path], default: Any = None) -> Any:
|
||||
"""Read JSON file."""
|
||||
return IOHelpers.read_json(file_path, default)
|
||||
|
||||
|
||||
def write_json(file_path: Union[str, Path], data: Any) -> bool:
|
||||
"""Write data to JSON file."""
|
||||
return IOHelpers.write_json(file_path, data)
|
||||
|
||||
|
||||
def read_yaml(file_path: Union[str, Path], default: Any = None) -> Any:
|
||||
"""Read YAML file."""
|
||||
return IOHelpers.read_yaml(file_path, default)
|
||||
|
||||
|
||||
def write_yaml(file_path: Union[str, Path], data: Any) -> bool:
|
||||
"""Write data to YAML file."""
|
||||
return IOHelpers.write_yaml(file_path, data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test I/O operations
|
||||
test_dir = Path("test_io")
|
||||
|
||||
# Test directory creation
|
||||
print(f"Create directory: {ensure_directory(test_dir)}")
|
||||
|
||||
# Test file operations
|
||||
test_file = test_dir / "test.txt"
|
||||
content = "Hello, World!\nThis is a test file."
|
||||
|
||||
print(f"Write file: {safe_write_file(test_file, content)}")
|
||||
print(f"Read file: {safe_read_file(test_file)}")
|
||||
|
||||
# Test JSON operations
|
||||
json_file = test_dir / "test.json"
|
||||
json_data = {"name": "test", "numbers": [1, 2, 3], "nested": {"key": "value"}}
|
||||
|
||||
print(f"Write JSON: {write_json(json_file, json_data)}")
|
||||
print(f"Read JSON: {read_json(json_file)}")
|
||||
|
||||
# Test file stats
|
||||
stats = IOHelpers.get_file_stats(test_file)
|
||||
print(f"File stats: {stats}")
|
||||
|
||||
# Cleanup
|
||||
shutil.rmtree(test_dir, ignore_errors=True)
|
||||
Reference in New Issue
Block a user