From c337204242c3401569d2613b0753f8759d042bec Mon Sep 17 00:00:00 2001 From: catlog22 Date: Tue, 23 Sep 2025 22:09:55 +0800 Subject: [PATCH] feat: Add pycli bash wrapper with hierarchical vector database support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create unified bash wrapper (pycli) for Python CLI tools - Implement hierarchical vector database with smart parent discovery - Add comprehensive installation script with auto-configuration - Remove redundant analyzer.py and api_indexer.py files - Enhance Python scripts with environment variable support - Update documentation to focus on pycli unified interface Key Features: - Automatic parent directory vector DB discovery - No redundant vectorization in subdirectories - Central vector database storage in ~/.claude/vector_db - Configurable Python interpreter paths - One-command installation and setup πŸ€– Generated with Claude Code Co-Authored-By: Claude --- .claude/python_script/analyzer.py | 305 ------------ .claude/python_script/api_indexer.py | 141 ------ .claude/python_script/cache/file_index.json | 276 ----------- .claude/python_script/config.yaml | 8 +- .claude/scripts/README.md | 152 ++++++ .claude/scripts/install_pycli.sh | 302 ++++++++++++ .claude/scripts/pycli | 225 +++++++++ .claude/scripts/pycli.conf | 159 ++++++ .claude/workflows/python-tools-strategy.md | 511 ++++++++++++++++++++ 9 files changed, 1353 insertions(+), 726 deletions(-) delete mode 100644 .claude/python_script/analyzer.py delete mode 100644 .claude/python_script/api_indexer.py delete mode 100644 .claude/python_script/cache/file_index.json create mode 100644 .claude/scripts/README.md create mode 100644 .claude/scripts/install_pycli.sh create mode 100644 .claude/scripts/pycli create mode 100644 .claude/scripts/pycli.conf create mode 100644 .claude/workflows/python-tools-strategy.md diff --git a/.claude/python_script/analyzer.py b/.claude/python_script/analyzer.py deleted file mode 100644 index 46d7bc19..00000000 --- a/.claude/python_script/analyzer.py +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env python3 -""" -Unified Path-Aware Analyzer -Main entry point for the refactored analyzer system. -Provides a clean, simple API for intelligent file analysis. -""" - -import os -import sys -import argparse -import logging -import json -import time -from pathlib import Path -from typing import Dict, List, Optional, Any - -# Add current directory to path for imports -sys.path.insert(0, str(Path(__file__).parent)) - -from core.config import get_config -from core.file_indexer import FileIndexer, IndexStats -from core.context_analyzer import ContextAnalyzer, AnalysisResult -from core.path_matcher import PathMatcher, PathMatchingResult -from core.embedding_manager import EmbeddingManager -from utils.colors import Colors - - -class Analyzer: - """Main analyzer class with simplified API.""" - - def __init__(self, config_path: Optional[str] = None, root_path: str = "."): - self.root_path = Path(root_path).resolve() - self.config = get_config(config_path) - - # Setup logging - logging.basicConfig( - level=getattr(logging, self.config.get('logging.level', 'INFO')), - format=self.config.get('logging.format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s') - ) - self.logger = logging.getLogger(__name__) - - # Initialize core components - self.indexer = FileIndexer(self.config, str(self.root_path)) - self.context_analyzer = ContextAnalyzer(self.config) - self.path_matcher = PathMatcher(self.config) - - # Initialize embedding manager if enabled - self.embedding_manager = None - if self.config.is_embedding_enabled(): - try: - self.embedding_manager = EmbeddingManager(self.config) - except ImportError: - self.logger.warning("Embedding dependencies not available. Install sentence-transformers for enhanced functionality.") - - def build_index(self) -> IndexStats: - """Build or update the file index.""" - print(Colors.yellow("Building file index...")) - start_time = time.time() - - self.indexer.build_index() - stats = self.indexer.get_stats() - - elapsed = time.time() - start_time - if stats: - print(Colors.green(f"Index built: {stats.total_files} files, ~{stats.total_tokens:,} tokens ({elapsed:.2f}s)")) - else: - print(Colors.green(f"Index built successfully ({elapsed:.2f}s)")) - - return stats - - def analyze(self, prompt: str, mode: str = "auto", patterns: Optional[List[str]] = None, - token_limit: Optional[int] = None, use_embeddings: Optional[bool] = None) -> Dict[str, Any]: - """Analyze and return relevant file paths for a given prompt.""" - - print(Colors.yellow("Analyzing project and prompt...")) - start_time = time.time() - - # Load or build index - index = self.indexer.load_index() - if not index: - self.build_index() - index = self.indexer.load_index() - - stats = self.indexer.get_stats() - print(Colors.cyan(f"Project stats: ~{stats.total_tokens:,} tokens across {stats.total_files} files")) - print(Colors.cyan(f"Categories: {', '.join(f'{k}: {v}' for k, v in stats.categories.items())}")) - - # Determine project size - project_size = self._classify_project_size(stats.total_tokens) - print(Colors.cyan(f"Project size: {project_size}")) - - # Analyze prompt context - print(Colors.yellow("Analyzing prompt context...")) - context_result = self.context_analyzer.analyze(prompt) - - print(Colors.cyan(f"Identified: {len(context_result.domains)} domains, {len(context_result.languages)} languages")) - if context_result.domains: - print(Colors.cyan(f"Top domains: {', '.join(context_result.domains[:3])}")) - - # Determine if we should use embeddings - should_use_embeddings = use_embeddings - if should_use_embeddings is None: - should_use_embeddings = ( - self.embedding_manager is not None and - self.config.is_embedding_enabled() and - len(context_result.keywords) < 5 # Use embeddings for vague queries - ) - - similar_files = [] - if should_use_embeddings and self.embedding_manager: - print(Colors.yellow("Using semantic similarity search...")) - # Update embeddings if needed - if not self.embedding_manager.embeddings_exist(): - print(Colors.yellow("Building embeddings (first run)...")) - self.embedding_manager.update_embeddings(index) - - similar_files = self.embedding_manager.find_similar_files(prompt, index) - print(Colors.cyan(f"Found {len(similar_files)} semantically similar files")) - - # Match files to context - print(Colors.yellow("Matching files to context...")) - matching_result = self.path_matcher.match_files( - index, - context_result, - token_limit=token_limit, - explicit_patterns=patterns - ) - - elapsed = time.time() - start_time - - print(Colors.green(f"Analysis complete: {len(matching_result.matched_files)} files, ~{matching_result.total_tokens:,} tokens")) - print(Colors.cyan(f"Confidence: {matching_result.confidence_score:.2f}")) - print(Colors.cyan(f"Execution time: {elapsed:.2f}s")) - - return { - 'files': [match.file_info.relative_path for match in matching_result.matched_files], - 'total_tokens': matching_result.total_tokens, - 'confidence': matching_result.confidence_score, - 'context': { - 'domains': context_result.domains, - 'languages': context_result.languages, - 'keywords': context_result.keywords - }, - 'stats': { - 'project_size': project_size, - 'total_files': stats.total_files, - 'analysis_time': elapsed, - 'embeddings_used': should_use_embeddings - } - } - - def generate_command(self, prompt: str, tool: str = "gemini", **kwargs) -> str: - """Generate a command for external tools (gemini/codex).""" - analysis_result = self.analyze(prompt, **kwargs) - - # Format file patterns - file_patterns = " ".join(f"@{{{file}}}" for file in analysis_result['files']) - - if tool == "gemini": - if len(analysis_result['files']) > 50: # Too many files for individual patterns - return f'gemini --all-files -p "{prompt}"' - else: - return f'gemini -p "{file_patterns} {prompt}"' - - elif tool == "codex": - workspace_flag = "-s workspace-write" if analysis_result['total_tokens'] > 100000 else "-s danger-full-access" - return f'codex {workspace_flag} --full-auto exec "{file_patterns} {prompt}"' - - else: - raise ValueError(f"Unsupported tool: {tool}") - - def _classify_project_size(self, tokens: int) -> str: - """Classify project size based on token count.""" - small_limit = self.config.get('token_limits.small_project', 500000) - medium_limit = self.config.get('token_limits.medium_project', 2000000) - - if tokens < small_limit: - return "small" - elif tokens < medium_limit: - return "medium" - else: - return "large" - - def get_project_stats(self) -> Dict[str, Any]: - """Get comprehensive project statistics.""" - stats = self.indexer.get_stats() - embedding_stats = {} - - if self.embedding_manager: - embedding_stats = { - 'embeddings_exist': self.embedding_manager.embeddings_exist(), - 'embedding_count': len(self.embedding_manager.load_embeddings()) if self.embedding_manager.embeddings_exist() else 0 - } - - return { - 'files': stats.total_files, - 'tokens': stats.total_tokens, - 'size_bytes': stats.total_size, - 'categories': stats.categories, - 'project_size': self._classify_project_size(stats.total_tokens), - 'last_updated': stats.last_updated, - 'embeddings': embedding_stats, - 'config': { - 'cache_dir': self.config.get_cache_dir(), - 'embedding_enabled': self.config.is_embedding_enabled(), - 'exclude_patterns_count': len(self.config.get_exclude_patterns()) - } - } - - -def main(): - """CLI entry point.""" - parser = argparse.ArgumentParser( - description="Path-Aware Analyzer - Intelligent file pattern detection", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - python analyzer.py "analyze authentication flow" - python analyzer.py "fix database connection" --patterns "src/**/*.py" - python analyzer.py "review API endpoints" --tool gemini - python analyzer.py --stats - """ - ) - - parser.add_argument('prompt', nargs='?', help='Analysis prompt or task description') - parser.add_argument('--patterns', nargs='*', help='Explicit file patterns to include') - parser.add_argument('--tool', choices=['gemini', 'codex'], help='Generate command for specific tool') - parser.add_argument('--output', choices=['patterns', 'json'], default='patterns', help='Output format') - parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') - parser.add_argument('--stats', action='store_true', help='Show project statistics and exit') - parser.add_argument('--build-index', action='store_true', help='Build file index and exit') - - args = parser.parse_args() - - # Create analyzer with default values - analyzer = Analyzer(config_path=None, root_path=".") - - # Handle special commands - if args.build_index: - analyzer.build_index() - return - - if args.stats: - stats = analyzer.get_project_stats() - if args.output == 'json': - print(json.dumps(stats, indent=2, default=str)) - else: - print(f"Total files: {stats['files']}") - print(f"Total tokens: {stats['tokens']:,}") - print(f"Categories: {stats['categories']}") - if 'embeddings' in stats: - print(f"Embeddings: {stats['embeddings']['embedding_count']}") - return - - # Require prompt for analysis - if not args.prompt: - parser.error("Analysis prompt is required unless using --build-index or --stats") - - # Perform analysis - try: - result = analyzer.analyze( - args.prompt, - patterns=args.patterns, - use_embeddings=False # Disable embeddings by default for simplicity - ) - - # Generate output - if args.tool: - # Generate command using already computed result - file_patterns = " ".join(f"@{{{file}}}" for file in result['files']) - if args.tool == "gemini": - if len(result['files']) > 50: - command = f'gemini --all-files -p "{args.prompt}"' - else: - command = f'gemini -p "{file_patterns} {args.prompt}"' - elif args.tool == "codex": - workspace_flag = "-s workspace-write" if result['total_tokens'] > 100000 else "-s danger-full-access" - command = f'codex {workspace_flag} --full-auto exec "{file_patterns} {args.prompt}"' - print(command) - elif args.output == 'json': - print(json.dumps(result, indent=2, default=str)) - else: # patterns output (default) - for file_path in result['files']: - print(f"@{{{file_path}}}") - - # Show verbose details - if args.verbose: - print(f"\n# Analysis Details:") - print(f"# Matched files: {len(result['files'])}") - print(f"# Total tokens: {result['total_tokens']:,}") - print(f"# Confidence: {result['confidence']:.2f}") - - except KeyboardInterrupt: - print(Colors.warning("\nAnalysis interrupted by user")) - sys.exit(1) - except Exception as e: - print(Colors.error(f"Analysis failed: {e}")) - if args.verbose: - import traceback - traceback.print_exc() - sys.exit(1) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/.claude/python_script/api_indexer.py b/.claude/python_script/api_indexer.py deleted file mode 100644 index bcd7a9e2..00000000 --- a/.claude/python_script/api_indexer.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 -""" -API Documentation Indexer -Parses Markdown documentation to create a searchable index of classes and methods. -""" - -import os -import re -import json -import logging -from pathlib import Path -from typing import Dict, Any - -from core.file_indexer import FileIndexer - -class ApiIndexer: - def __init__(self, config: Dict, root_path: str = "."): - self.config = config - self.root_path = Path(root_path).resolve() - self.file_indexer = FileIndexer(config, root_path) - self.api_index_file = self.file_indexer.cache_dir / "api_index.json" - self.logger = logging.getLogger(__name__) - - def build_index(self): - """Builds the API index from Markdown files.""" - self.logger.info("Building API index...") - file_index = self.file_indexer.load_index() - if not file_index: - self.logger.info("File index not found, building it first.") - self.file_indexer.build_index() - file_index = self.file_indexer.load_index() - - api_index = {} - for file_info in file_index.values(): - if file_info.extension == ".md": - self.logger.debug(f"Parsing {file_info.path}") - try: - with open(file_info.path, "r", encoding="utf-8") as f: - content = f.read() - self._parse_markdown(content, file_info.relative_path, api_index) - except Exception as e: - self.logger.error(f"Error parsing {file_info.path}: {e}") - - self._save_index(api_index) - self.logger.info(f"API index built with {len(api_index)} classes.") - - def _parse_markdown(self, content: str, file_path: str, api_index: Dict): - """Parses a single Markdown file for class and method info.""" - class_name_match = re.search(r"^#\s+([A-Za-z0-9_]+)", content) - if not class_name_match: - return - - class_name = class_name_match.group(1) - api_index[class_name] = { - "file_path": file_path, - "description": "", - "methods": {} - } - - # Simple description extraction - desc_match = re.search(r"\*\*Description:\*\*\s*(.+)", content) - if desc_match: - api_index[class_name]["description"] = desc_match.group(1).strip() - - # Method extraction - method_sections = re.split(r"###\s+", content)[1:] - for i, section in enumerate(method_sections): - method_signature_match = re.search(r"`(.+?)`", section) - if not method_signature_match: - continue - - signature = method_signature_match.group(1) - method_name_match = re.search(r"([A-Za-z0-9_]+)\(β€œ, signature) - if not method_name_match: - continue - - method_name = method_name_match.group(1) - - method_description = "" - method_desc_match = re.search(r"\*\*Description:\*\*\s*(.+)", section) - if method_desc_match: - method_description = method_desc_match.group(1).strip() - - # A simple way to get a line number approximation - line_number = content.count("\n", 0, content.find(f"### `{signature}`")) + 1 - - api_index[class_name]["methods"ΠŸΠΎΠΊΠ°Π·Π°Ρ‚ΡŒ большС] = { - "signature": signature, - "description": method_description, - "line_number": line_number - } - - def _save_index(self, api_index: Dict): - """Saves the API index to a file.""" - try: - with open(self.api_index_file, "w", encoding="utf-8") as f: - json.dump(api_index, f, indent=2) - except IOError as e: - self.logger.error(f"Could not save API index: {e}") - - def search(self, class_name: str, method_name: str = None) -> Any: - """Searches the API index for a class or method.""" - if not self.api_index_file.exists(): - self.build_index() - - with open(self.api_index_file, "r", encoding="utf-8") as f: - api_index = json.load(f) - - if class_name not in api_index: - return None - - if method_name: - return api_index[class_name]["methods"].get(method_name) - else: - return api_index[class_name] - -if __name__ == "__main__": - from core.config import get_config - import argparse - - logging.basicConfig(level=logging.INFO) - - parser = argparse.ArgumentParser(description="API Documentation Indexer.") - parser.add_argument("--build", action="store_true", help="Build the API index.") - parser.add_argument("--search_class", help="Search for a class.") - parser.add_argument("--search_method", help="Search for a method within a class (requires --search_class).") - - args = parser.parse_args() - - config = get_config() - api_indexer = ApiIndexer(config.to_dict()) - - if args.build: - api_indexer.build_index() - - if args.search_class: - result = api_indexer.search(args.search_class, args.search_method) - if result: - print(json.dumps(result, indent=2)) - else: - print("Not found.") diff --git a/.claude/python_script/cache/file_index.json b/.claude/python_script/cache/file_index.json deleted file mode 100644 index ab5c1ce2..00000000 --- a/.claude/python_script/cache/file_index.json +++ /dev/null @@ -1,276 +0,0 @@ -{ - "stats": { - "total_files": 26, - "total_tokens": 56126, - "total_size": 246519, - "categories": { - "code": 21, - "config": 3, - "docs": 1, - "other": 1 - }, - "last_updated": 1758177270.9103189 - }, - "files": { - "analyzer.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\analyzer.py", - "relative_path": "analyzer.py", - "size": 12595, - "modified_time": 1758175179.730658, - "extension": ".py", - "category": "code", - "estimated_tokens": 3072, - "content_hash": "3fb090745b5080e0731e7ef3fc94029d" - }, - "cli.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\cli.py", - "relative_path": "cli.py", - "size": 8329, - "modified_time": 1758177193.3710027, - "extension": ".py", - "category": "code", - "estimated_tokens": 2030, - "content_hash": "b9f0b5d6a154cf51c8665b2344c9faf8" - }, - "config.yaml": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\config.yaml", - "relative_path": "config.yaml", - "size": 4317, - "modified_time": 1758163450.6223683, - "extension": ".yaml", - "category": "config", - "estimated_tokens": 1040, - "content_hash": "b431b73dfa86ff83145468bbf4422a79" - }, - "indexer.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\indexer.py", - "relative_path": "indexer.py", - "size": 7776, - "modified_time": 1758177151.2160237, - "extension": ".py", - "category": "code", - "estimated_tokens": 1893, - "content_hash": "f88b5e5bffce26f3170974df2906aac3" - }, - "install.sh": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\install.sh", - "relative_path": "install.sh", - "size": 5236, - "modified_time": 1758161898.317552, - "extension": ".sh", - "category": "code", - "estimated_tokens": 1262, - "content_hash": "cc3a9121a0b8281457270f30ad76f5f6" - }, - "requirements.txt": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\requirements.txt", - "relative_path": "requirements.txt", - "size": 495, - "modified_time": 1758164967.7707567, - "extension": ".txt", - "category": "docs", - "estimated_tokens": 118, - "content_hash": "aea2ba14dfa7b37b1dde5518de87d956" - }, - "setup.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\setup.py", - "relative_path": "setup.py", - "size": 2860, - "modified_time": 1758177212.9095325, - "extension": ".py", - "category": "code", - "estimated_tokens": 692, - "content_hash": "609abf8b9c84a09f6a59d5815eb90bc5" - }, - "__init__.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\__init__.py", - "relative_path": "__init__.py", - "size": 1065, - "modified_time": 1758177224.8017242, - "extension": ".py", - "category": "code", - "estimated_tokens": 257, - "content_hash": "47368b235086fc0c75ba34a824c58506" - }, - "cache\\embeddings.pkl": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\cache\\embeddings.pkl", - "relative_path": "cache\\embeddings.pkl", - "size": 35109, - "modified_time": 1758175163.6754165, - "extension": ".pkl", - "category": "other", - "estimated_tokens": 4713, - "content_hash": "b8ed5c068acd5ed52ba10839701a5a24" - }, - "cache\\embedding_index.json": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\cache\\embedding_index.json", - "relative_path": "cache\\embedding_index.json", - "size": 5589, - "modified_time": 1758175163.6764157, - "extension": ".json", - "category": "config", - "estimated_tokens": 1358, - "content_hash": "5c2ba41b1b69ce19d2fc3b5854f6ee53" - }, - "cache\\file_index.json": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\cache\\file_index.json", - "relative_path": "cache\\file_index.json", - "size": 12164, - "modified_time": 1758165699.0883024, - "extension": ".json", - "category": "config", - "estimated_tokens": 2957, - "content_hash": "73563db28a2808aa28544c0275b97f94" - }, - "core\\config.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\core\\config.py", - "relative_path": "core\\config.py", - "size": 12266, - "modified_time": 1758164531.5934324, - "extension": ".py", - "category": "code", - "estimated_tokens": 2985, - "content_hash": "d85aedc01a528b486d41acbd823181d7" - }, - "core\\context_analyzer.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\core\\context_analyzer.py", - "relative_path": "core\\context_analyzer.py", - "size": 15002, - "modified_time": 1758164846.7665854, - "extension": ".py", - "category": "code", - "estimated_tokens": 3661, - "content_hash": "677903b5aaf3db13575ca1ca99ec7c16" - }, - "core\\embedding_manager.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\core\\embedding_manager.py", - "relative_path": "core\\embedding_manager.py", - "size": 17271, - "modified_time": 1758166063.1635072, - "extension": ".py", - "category": "code", - "estimated_tokens": 4204, - "content_hash": "d8f52cb93140a46fe3d22d465ec01b22" - }, - "core\\file_indexer.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\core\\file_indexer.py", - "relative_path": "core\\file_indexer.py", - "size": 14484, - "modified_time": 1758164612.5888917, - "extension": ".py", - "category": "code", - "estimated_tokens": 3525, - "content_hash": "1518d309108f3300417b65f6234241d1" - }, - "core\\gitignore_parser.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\core\\gitignore_parser.py", - "relative_path": "core\\gitignore_parser.py", - "size": 6757, - "modified_time": 1758164472.643646, - "extension": ".py", - "category": "code", - "estimated_tokens": 1644, - "content_hash": "9cd97725576727080aaafd329d9ce2c4" - }, - "core\\path_matcher.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\core\\path_matcher.py", - "relative_path": "core\\path_matcher.py", - "size": 19568, - "modified_time": 1758166045.8395746, - "extension": ".py", - "category": "code", - "estimated_tokens": 4767, - "content_hash": "f1dc44dc3ed67f100770aea40197623f" - }, - "core\\__init__.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\core\\__init__.py", - "relative_path": "core\\__init__.py", - "size": 712, - "modified_time": 1758164419.4437866, - "extension": ".py", - "category": "code", - "estimated_tokens": 172, - "content_hash": "b25991cb8d977021362f45e121e89de7" - }, - "tools\\module_analyzer.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\tools\\module_analyzer.py", - "relative_path": "tools\\module_analyzer.py", - "size": 14273, - "modified_time": 1758164687.488236, - "extension": ".py", - "category": "code", - "estimated_tokens": 3476, - "content_hash": "b958ec7ed264242f2bb30b1cca66b144" - }, - "tools\\tech_stack.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\tools\\tech_stack.py", - "relative_path": "tools\\tech_stack.py", - "size": 7576, - "modified_time": 1758164695.643722, - "extension": ".py", - "category": "code", - "estimated_tokens": 1843, - "content_hash": "f391a45d8254f0c4f4f789027dd69afc" - }, - "tools\\workflow_updater.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\tools\\workflow_updater.py", - "relative_path": "tools\\workflow_updater.py", - "size": 9577, - "modified_time": 1758164703.2230499, - "extension": ".py", - "category": "code", - "estimated_tokens": 2334, - "content_hash": "526edf0cfbe3c2041135eace9f89ef13" - }, - "tools\\__init__.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\tools\\__init__.py", - "relative_path": "tools\\__init__.py", - "size": 329, - "modified_time": 1758165927.9923615, - "extension": ".py", - "category": "code", - "estimated_tokens": 79, - "content_hash": "139aa450d7511347cc6799c471eac745" - }, - "utils\\cache.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\utils\\cache.py", - "relative_path": "utils\\cache.py", - "size": 12067, - "modified_time": 1758164781.2914226, - "extension": ".py", - "category": "code", - "estimated_tokens": 2929, - "content_hash": "39e49b731d601fafac74e96ed074e654" - }, - "utils\\colors.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\utils\\colors.py", - "relative_path": "utils\\colors.py", - "size": 6959, - "modified_time": 1758165650.9865932, - "extension": ".py", - "category": "code", - "estimated_tokens": 1678, - "content_hash": "8bb57134555d8fb07d2e351d4e100f0f" - }, - "utils\\io_helpers.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\utils\\io_helpers.py", - "relative_path": "utils\\io_helpers.py", - "size": 13773, - "modified_time": 1758164823.513003, - "extension": ".py", - "category": "code", - "estimated_tokens": 3349, - "content_hash": "aa54747c49319cc2c90c0544c668009a" - }, - "utils\\__init__.py": { - "path": "D:\\Claude_dms3\\.claude\\python_script\\utils\\__init__.py", - "relative_path": "utils\\__init__.py", - "size": 370, - "modified_time": 1758164433.7142198, - "extension": ".py", - "category": "code", - "estimated_tokens": 88, - "content_hash": "62ec4a34f1643a23c79207061bdb8d49" - } - } -} \ No newline at end of file diff --git a/.claude/python_script/config.yaml b/.claude/python_script/config.yaml index af2a8a92..226b024f 100644 --- a/.claude/python_script/config.yaml +++ b/.claude/python_script/config.yaml @@ -66,12 +66,12 @@ file_extensions: # Embedding/RAG configuration embedding: enabled: true # Set to true to enable RAG features - model: "codesage/codesage-large-v2" # CodeSage V2 for code embeddings + model: "all-MiniLM-L6-v2" # Stable general-purpose embedding model cache_dir: "cache" similarity_threshold: 0.6 # Higher threshold for better code similarity - max_context_length: 2048 # Increased for CodeSage V2 capabilities - batch_size: 8 # Reduced for larger model - trust_remote_code: true # Required for CodeSage V2 + max_context_length: 512 # Standard context length + batch_size: 32 # Standard batch size + trust_remote_code: false # Not required for standard models # Context analysis settings context_analysis: diff --git a/.claude/scripts/README.md b/.claude/scripts/README.md new file mode 100644 index 00000000..aa9194af --- /dev/null +++ b/.claude/scripts/README.md @@ -0,0 +1,152 @@ +# pycli - Python CLI Wrapper with Hierarchical Vector Database + +This directory contains the bash wrapper and configuration for the enhanced Python-based analysis CLI with hierarchical vector database support. + +## πŸ“ Files + +- **`pycli`** - Main bash wrapper script +- **`pycli.conf`** - Configuration file +- **`install_pycli.sh`** - Installation script +- **`README.md`** - This documentation + +## πŸš€ Quick Installation + +```bash +# Run the installation script +bash install_pycli.sh + +# Follow the prompts to configure your shell +# The script will automatically detect your Python installation + +# Verify installation +pycli --help +``` + +## 🎯 Key Features + +### Hierarchical Vector Database +- **Smart Parent Discovery**: Subdirectories automatically use parent's vector database +- **No Redundant Processing**: Avoids duplicate vectorization in project subdirectories +- **Central Storage**: All vector databases stored in `~/.claude/vector_db/` +- **Path-based Organization**: Organized by project directory structure + +### Unified Interface +- **Single Command**: `pycli` replaces complex Python script calls +- **Intelligent Context**: Automatic file discovery with semantic search +- **Tool Integration**: Seamless integration with Gemini and Codex +- **Configuration Management**: Environment-specific Python interpreter paths + +## πŸ“‹ Common Commands + +```bash +# Initialize new project +cd /path/to/your/project +pycli --init + +# Smart analysis +pycli --analyze --query "authentication patterns" --tool gemini + +# Direct analysis +pycli --analyze --tool codex -p "implement user login" + +# Maintenance +pycli --update-embeddings +pycli --status +``` + +## πŸ”§ Configuration + +Edit `~/.claude/scripts/pycli.conf` after installation: + +```bash +# Python interpreter path +PYTHON_PATH="/usr/bin/python3" + +# Vector database root directory +VECTOR_DB_ROOT="$HOME/.claude/vector_db" + +# Python scripts directory +PYTHON_SCRIPT_DIR="$HOME/.claude/python_script" +``` + +## πŸ—οΈ How Hierarchical DB Works + +``` +Project Structure: Vector Database: +/home/user/myproject/ ~/.claude/vector_db/ +β”œβ”€β”€ src/ └── home_user_myproject/ +β”‚ β”œβ”€β”€ auth/ β”œβ”€β”€ embeddings.pkl +β”‚ └── api/ └── index.json +└── tests/ + +# All subdirectories use the single parent DB +``` + +## πŸ“– Documentation + +For complete usage information, see: +- **Strategy Guide**: `~/.claude/workflows/python-tools-strategy.md` +- **Installation Guide**: Run `bash install_pycli.sh` for guided setup + +## πŸŽͺ Migration from Legacy Tools + +```bash +# Replace gemini-wrapper +# OLD: ~/.claude/scripts/gemini-wrapper -p "prompt" +# NEW: pycli --analyze --tool gemini -p "prompt" + +# Replace codex commands +# OLD: codex --full-auto exec "task" +# NEW: pycli --analyze --tool codex -p "task" + +# Enhanced with context discovery +pycli --analyze --query "relevant context" --tool both +``` + +## πŸ› Troubleshooting + +```bash +# Check system status +pycli --status + +# Rebuild everything +pycli --init + +# Test search functionality +pycli --test-search + +# View configuration +cat ~/.claude/scripts/pycli.conf +``` + +## πŸ’‘ Advanced Usage + +### Project Integration +```bash +# Add to package.json +{ + "scripts": { + "analyze": "pycli --analyze --query", + "ai-init": "pycli --init", + "ai-update": "pycli --update-embeddings" + } +} + +# Use in Makefiles +analyze: + pycli --analyze --query "$(QUERY)" --tool gemini +``` + +### CI/CD Integration +```yaml +# GitHub Actions example +- name: Update AI Context + run: pycli --update-embeddings + +- name: Analyze Changes + run: pycli --analyze --query "code review" --tool gemini +``` + +--- + +For questions or issues, check the documentation or run `pycli --help`. \ No newline at end of file diff --git a/.claude/scripts/install_pycli.sh b/.claude/scripts/install_pycli.sh new file mode 100644 index 00000000..6b8e182e --- /dev/null +++ b/.claude/scripts/install_pycli.sh @@ -0,0 +1,302 @@ +#!/bin/bash + +#============================================================================== +# pycli Installation Script +# +# This script installs the pycli bash wrapper and configuration files +# to the ~/.claude directory structure. +#============================================================================== + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Print colored output +print_status() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +print_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +#============================================================================== +# Configuration +#============================================================================== + +SOURCE_DIR="$(cd "$(dirname "$0")" && pwd)" +INSTALL_BASE="$HOME/.claude" +INSTALL_DIR="$INSTALL_BASE/scripts" +PYTHON_SCRIPT_DIR="$INSTALL_BASE/python_script" +VECTOR_DB_DIR="$INSTALL_BASE/vector_db" +CONFIG_DIR="$INSTALL_BASE/config" +LOGS_DIR="$INSTALL_BASE/logs" + +#============================================================================== +# Pre-installation Checks +#============================================================================== + +print_status "Starting pycli installation..." +print_status "Source directory: $SOURCE_DIR" +print_status "Install directory: $INSTALL_DIR" + +# Check if source files exist +if [[ ! -f "$SOURCE_DIR/pycli" ]]; then + print_error "pycli script not found in $SOURCE_DIR" + exit 1 +fi + +if [[ ! -f "$SOURCE_DIR/pycli.conf" ]]; then + print_error "pycli.conf not found in $SOURCE_DIR" + exit 1 +fi + +# Check if Python script directory exists +if [[ ! -d "$PYTHON_SCRIPT_DIR" ]]; then + print_warning "Python script directory not found: $PYTHON_SCRIPT_DIR" + print_status "Please ensure the Python scripts are installed in ~/.claude/python_script/" + read -p "Continue installation anyway? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + print_status "Installation cancelled." + exit 0 + fi +fi + +#============================================================================== +# Create Directory Structure +#============================================================================== + +print_status "Creating directory structure..." + +# Create all required directories +directories=( + "$INSTALL_BASE" + "$INSTALL_DIR" + "$VECTOR_DB_DIR" + "$CONFIG_DIR" + "$LOGS_DIR" +) + +for dir in "${directories[@]}"; do + if [[ ! -d "$dir" ]]; then + mkdir -p "$dir" + print_status "Created directory: $dir" + else + print_status "Directory exists: $dir" + fi +done + +#============================================================================== +# Install Files +#============================================================================== + +print_status "Installing pycli files..." + +# Backup existing files if they exist +if [[ -f "$INSTALL_DIR/pycli" ]]; then + backup_file="$INSTALL_DIR/pycli.backup.$(date +%Y%m%d_%H%M%S)" + cp "$INSTALL_DIR/pycli" "$backup_file" + print_warning "Backed up existing pycli to: $backup_file" +fi + +if [[ -f "$INSTALL_DIR/pycli.conf" ]]; then + backup_file="$INSTALL_DIR/pycli.conf.backup.$(date +%Y%m%d_%H%M%S)" + cp "$INSTALL_DIR/pycli.conf" "$backup_file" + print_warning "Backed up existing pycli.conf to: $backup_file" +fi + +# Copy files +cp "$SOURCE_DIR/pycli" "$INSTALL_DIR/" +cp "$SOURCE_DIR/pycli.conf" "$INSTALL_DIR/" + +# Make executable +chmod +x "$INSTALL_DIR/pycli" + +print_success "Files installed successfully" + +#============================================================================== +# Configuration Updates +#============================================================================== + +print_status "Updating configuration..." + +# Detect Python path +PYTHON_CANDIDATES=( + "/usr/bin/python3" + "/usr/local/bin/python3" + "/opt/conda/bin/python" + "$(which python3 2>/dev/null || echo "")" + "$(which python 2>/dev/null || echo "")" +) + +DETECTED_PYTHON="" +for candidate in "${PYTHON_CANDIDATES[@]}"; do + if [[ -n "$candidate" ]] && [[ -x "$candidate" ]]; then + # Test if it's Python 3 + if "$candidate" -c "import sys; exit(0 if sys.version_info >= (3, 6) else 1)" 2>/dev/null; then + DETECTED_PYTHON="$candidate" + break + fi + fi +done + +if [[ -n "$DETECTED_PYTHON" ]]; then + print_success "Detected Python: $DETECTED_PYTHON" + + # Update configuration file + sed -i.bak "s|^PYTHON_PATH=.*|PYTHON_PATH=\"$DETECTED_PYTHON\"|" "$INSTALL_DIR/pycli.conf" + print_status "Updated PYTHON_PATH in configuration" +else + print_warning "Could not detect Python 3.6+. Please manually update PYTHON_PATH in:" + print_warning " $INSTALL_DIR/pycli.conf" +fi + +#============================================================================== +# Shell Integration Setup +#============================================================================== + +print_status "Setting up shell integration..." + +# Detect shell +SHELL_RC="" +if [[ -n "${BASH_VERSION:-}" ]] || [[ "$SHELL" == *"bash"* ]]; then + SHELL_RC="$HOME/.bashrc" +elif [[ -n "${ZSH_VERSION:-}" ]] || [[ "$SHELL" == *"zsh"* ]]; then + SHELL_RC="$HOME/.zshrc" +fi + +# Function to add alias/path to shell config +add_to_shell_config() { + local config_file="$1" + local content="$2" + + if [[ -f "$config_file" ]]; then + if ! grep -q "pycli" "$config_file"; then + echo "" >> "$config_file" + echo "# pycli - Python CLI Wrapper" >> "$config_file" + echo "$content" >> "$config_file" + print_success "Added pycli to $config_file" + return 0 + else + print_warning "pycli already configured in $config_file" + return 1 + fi + fi + return 1 +} + +# Try to add alias automatically +ALIAS_ADDED=false +PATH_ADDED=false + +if [[ -n "$SHELL_RC" ]]; then + # Try to add alias + if add_to_shell_config "$SHELL_RC" "alias pycli='$INSTALL_DIR/pycli'"; then + ALIAS_ADDED=true + fi + + # Also add to PATH + if add_to_shell_config "$SHELL_RC" "export PATH=\"\$PATH:$INSTALL_DIR\""; then + PATH_ADDED=true + fi +fi + +#============================================================================== +# Test Installation +#============================================================================== + +print_status "Testing installation..." + +# Test that the script is executable +if [[ -x "$INSTALL_DIR/pycli" ]]; then + print_success "pycli script is executable" +else + print_error "pycli script is not executable" + exit 1 +fi + +# Test configuration loading +if "$INSTALL_DIR/pycli" --help >/dev/null 2>&1; then + print_success "pycli configuration loads correctly" +else + print_warning "pycli configuration test failed - check Python path" +fi + +#============================================================================== +# Installation Summary +#============================================================================== + +print_success "Installation completed successfully!" +echo +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "πŸ“ Installation Summary:" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo " β€’ Executable: $INSTALL_DIR/pycli" +echo " β€’ Config: $INSTALL_DIR/pycli.conf" +echo " β€’ Vector DB: $VECTOR_DB_DIR/" +echo " β€’ Logs: $LOGS_DIR/" +echo + +echo "πŸš€ Quick Start:" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +if [[ "$ALIAS_ADDED" == true ]]; then + echo " 1. Reload your shell configuration:" + echo " source $SHELL_RC" + echo + echo " 2. Initialize vector DB for a project:" + echo " cd /path/to/your/project" + echo " pycli --init" + echo + echo " 3. Start analyzing code:" + echo " pycli --analyze --query \"authentication patterns\" --tool gemini" +else + echo " 1. Add pycli to your shell configuration:" + if [[ -n "$SHELL_RC" ]]; then + echo " echo \"alias pycli='$INSTALL_DIR/pycli'\" >> $SHELL_RC" + echo " source $SHELL_RC" + else + echo " alias pycli='$INSTALL_DIR/pycli'" + fi + echo + echo " 2. Or add to PATH:" + echo " export PATH=\"\$PATH:$INSTALL_DIR\"" + echo + echo " 3. Initialize vector DB for a project:" + echo " cd /path/to/your/project" + echo " pycli --init" + echo + echo " 4. Start analyzing code:" + echo " pycli --analyze --query \"authentication patterns\" --tool gemini" +fi + +echo +echo "πŸ“š Documentation:" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo " β€’ Help: pycli --help" +echo " β€’ Strategy: ~/.claude/workflows/python-tools-strategy.md" +echo +echo "βš™οΈ Configuration:" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo " β€’ Edit config: $INSTALL_DIR/pycli.conf" + +if [[ -z "$DETECTED_PYTHON" ]]; then + echo " β€’ ⚠️ Please update PYTHON_PATH in pycli.conf" +fi + +echo +print_success "Installation complete! πŸŽ‰" \ No newline at end of file diff --git a/.claude/scripts/pycli b/.claude/scripts/pycli new file mode 100644 index 00000000..775292a6 --- /dev/null +++ b/.claude/scripts/pycli @@ -0,0 +1,225 @@ +#!/bin/bash + +#============================================================================== +# pycli - Python CLI Wrapper with Hierarchical Vector Database Support +# +# This script provides a bash wrapper for the Python-based analysis CLI, +# with intelligent hierarchical vector database management. +# +# Features: +# - Hierarchical vector database support (subdirs use parent's DB) +# - Configurable Python environment +# - Central vector database storage +# - Smart project root detection +#============================================================================== + +set -euo pipefail + +# Load configuration +CONFIG_FILE="$(dirname "$0")/pycli.conf" +if [[ -f "$CONFIG_FILE" ]]; then + source "$CONFIG_FILE" +else + echo "Error: Configuration file not found: $CONFIG_FILE" + echo "Please ensure pycli.conf exists in the same directory as this script." + exit 1 +fi + +# Validate required configuration +if [[ -z "${PYTHON_PATH:-}" ]]; then + echo "Error: PYTHON_PATH not set in configuration" + exit 1 +fi + +if [[ -z "${PYTHON_SCRIPT_DIR:-}" ]]; then + echo "Error: PYTHON_SCRIPT_DIR not set in configuration" + exit 1 +fi + +if [[ -z "${VECTOR_DB_ROOT:-}" ]]; then + echo "Error: VECTOR_DB_ROOT not set in configuration" + exit 1 +fi + +# Check if Python is available +if ! command -v "$PYTHON_PATH" &> /dev/null; then + echo "Error: Python not found at $PYTHON_PATH" + echo "Please update PYTHON_PATH in $CONFIG_FILE" + exit 1 +fi + +# Check if Python script directory exists +if [[ ! -d "$PYTHON_SCRIPT_DIR" ]]; then + echo "Error: Python script directory not found: $PYTHON_SCRIPT_DIR" + exit 1 +fi + +# Get current directory (will be used as project root for indexing) +CURRENT_DIR=$(pwd) + +#============================================================================== +# Helper Functions +#============================================================================== + +# Convert current path to vector DB path +# e.g., /home/user/project/subdir -> ~/.claude/vector_db/home_user_project_subdir +get_vector_db_path() { + local path="$1" + # Replace / with _ and remove leading / + local safe_path="${path//\//_}" + safe_path="${safe_path#_}" + # Handle Windows paths (C: -> C_) + safe_path="${safe_path//:/_}" + echo "$VECTOR_DB_ROOT/$safe_path" +} + +# Find nearest parent with existing vector DB +find_project_root() { + local dir="$CURRENT_DIR" + local max_depth=10 # Prevent infinite loops + local depth=0 + + while [[ "$dir" != "/" ]] && [[ "$depth" -lt "$max_depth" ]]; do + local db_path=$(get_vector_db_path "$dir") + + # Check if vector DB exists and has required files + if [[ -d "$db_path" ]] && ([[ -f "$db_path/embeddings.pkl" ]] || [[ -f "$db_path/index.json" ]]); then + echo "$dir" + return 0 + fi + + # Move to parent directory + local parent_dir=$(dirname "$dir") + if [[ "$parent_dir" == "$dir" ]]; then + break # Reached root + fi + dir="$parent_dir" + ((depth++)) + done + + # No parent vector DB found, use current directory + echo "$CURRENT_DIR" +} + +# Show help message +show_help() { + cat << EOF +pycli - Python CLI Wrapper with Hierarchical Vector Database Support + +USAGE: + pycli [OPTIONS] + +INITIALIZATION: + --init Initialize vector DB for current directory + --rebuild-index Rebuild file index from scratch + --update-embeddings Update vector embeddings for changed files + +ANALYSIS: + --analyze Run analysis with tool + --query TEXT Semantic search query for context discovery + -p, --prompt TEXT Direct prompt for analysis + --tool [gemini|codex|both] Which tool to use (default: $DEFAULT_TOOL) + --top-k INTEGER Number of similar files to find (default: $DEFAULT_TOP_K) + +STATUS: + --status Show system status + --test-search Test vector search functionality + +EXAMPLES: + # Initialize vector DB for current project + pycli --init + + # Smart analysis with context discovery + pycli --analyze --query "authentication patterns" --tool gemini + + # Direct analysis with known prompt + pycli --analyze --tool codex -p "implement user login" + + # Update embeddings after code changes + pycli --update-embeddings + + # Check system status + pycli --status + +For more information, see: ~/.claude/workflows/python-tools-strategy.md +EOF +} + +#============================================================================== +# Main Logic +#============================================================================== + +# Handle help +if [[ "${1:-}" == "--help" ]] || [[ "${1:-}" == "-h" ]] || [[ $# -eq 0 ]]; then + show_help + exit 0 +fi + +# Determine action based on arguments +case "${1:-}" in + --init|--rebuild-index) + # For initialization, always use current directory + PROJECT_ROOT="$CURRENT_DIR" + echo "Initializing vector database for: $PROJECT_ROOT" + ;; + *) + # For other operations, find nearest project root + PROJECT_ROOT=$(find_project_root) + if [[ "$PROJECT_ROOT" != "$CURRENT_DIR" ]]; then + echo "Using existing vector database from: $PROJECT_ROOT" + fi + ;; +esac + +VECTOR_DB_PATH=$(get_vector_db_path "$PROJECT_ROOT") + +# Create vector DB directory if needed +mkdir -p "$VECTOR_DB_PATH" + +# Determine which Python script to call +if [[ "${1:-}" == "--update-embeddings" ]] || [[ "${1:-}" == "--rebuild-index" ]] || [[ "${1:-}" == "--init" ]]; then + # Use indexer.py for indexing operations + PYTHON_SCRIPT="$PYTHON_SCRIPT_DIR/indexer.py" + + # Map --init to --rebuild-index --update-embeddings + if [[ "${1:-}" == "--init" ]]; then + set -- "--rebuild-index" "--update-embeddings" + fi + + if [[ ! -f "$PYTHON_SCRIPT" ]]; then + echo "Error: indexer.py not found at $PYTHON_SCRIPT" + exit 1 + fi +else + # Use cli.py for analysis operations + PYTHON_SCRIPT="$PYTHON_SCRIPT_DIR/cli.py" + + if [[ ! -f "$PYTHON_SCRIPT" ]]; then + echo "Error: cli.py not found at $PYTHON_SCRIPT" + exit 1 + fi +fi + +#============================================================================== +# Environment Setup and Execution +#============================================================================== + +# Set environment variables for Python scripts +export PYCLI_VECTOR_DB_PATH="$VECTOR_DB_PATH" +export PYCLI_PROJECT_ROOT="$PROJECT_ROOT" +export PYCLI_CONFIG_FILE="$CONFIG_FILE" + +# Add some debugging info in verbose mode +if [[ "${PYCLI_VERBOSE:-}" == "1" ]]; then + echo "Debug: PROJECT_ROOT=$PROJECT_ROOT" + echo "Debug: VECTOR_DB_PATH=$VECTOR_DB_PATH" + echo "Debug: PYTHON_SCRIPT=$PYTHON_SCRIPT" + echo "Debug: Arguments: $*" +fi + +# Execute Python script with all arguments +echo "Executing: $PYTHON_PATH $PYTHON_SCRIPT --root-path \"$PROJECT_ROOT\" $*" + +exec "$PYTHON_PATH" "$PYTHON_SCRIPT" \ + --root-path "$PROJECT_ROOT" \ + "$@" \ No newline at end of file diff --git a/.claude/scripts/pycli.conf b/.claude/scripts/pycli.conf new file mode 100644 index 00000000..7dbdcc65 --- /dev/null +++ b/.claude/scripts/pycli.conf @@ -0,0 +1,159 @@ +#============================================================================== +# pycli Configuration File +# +# This file contains configuration settings for the pycli bash wrapper. +# Modify these settings according to your environment. +#============================================================================== + +#------------------------------------------------------------------------------ +# Python Environment Configuration +#------------------------------------------------------------------------------ + +# Path to Python interpreter +# Examples: +# - System Python: /usr/bin/python3 +# - Conda: /opt/conda/bin/python +# - Virtual env: /home/user/.virtualenvs/myenv/bin/python +# - Windows: /c/Python39/python.exe +PYTHON_PATH="/usr/bin/python3" + +# Alternative Python paths for different environments +# Uncomment and modify as needed: +# PYTHON_PATH="/opt/conda/bin/python" # Conda +# PYTHON_PATH="$HOME/.pyenv/versions/3.11.0/bin/python" # pyenv +# PYTHON_PATH="/c/Python311/python.exe" # Windows + +#------------------------------------------------------------------------------ +# Directory Configuration +#------------------------------------------------------------------------------ + +# Python script location (should point to ~/.claude/python_script) +PYTHON_SCRIPT_DIR="$HOME/.claude/python_script" + +# Central vector database storage location +VECTOR_DB_ROOT="$HOME/.claude/vector_db" + +# Cache directory for temporary files +CACHE_DIR="$HOME/.claude/cache" + +#------------------------------------------------------------------------------ +# Default Tool Settings +#------------------------------------------------------------------------------ + +# Default tool to use when not specified +# Options: gemini, codex, both +DEFAULT_TOOL="gemini" + +# Default number of similar files to return in vector search +DEFAULT_TOP_K="10" + +# Default similarity threshold for vector search (0.0-1.0) +SIMILARITY_THRESHOLD="0.3" + +# Default timeout for tool execution (seconds) +TOOL_TIMEOUT="300" + +#------------------------------------------------------------------------------ +# Vector Database Configuration +#------------------------------------------------------------------------------ + +# Enable hierarchical vector database mode +# When true, subdirectories will use parent directory's vector database +HIERARCHICAL_MODE="true" + +# Maximum depth to search for parent vector databases +MAX_SEARCH_DEPTH="10" + +# Minimum files required to create a separate vector database +MIN_FILES_FOR_SEPARATE_DB="50" + +#------------------------------------------------------------------------------ +# Performance Settings +#------------------------------------------------------------------------------ + +# Enable verbose output for debugging +# Set to "1" to enable, "0" to disable +PYCLI_VERBOSE="0" + +# Enable caching of analysis results +ENABLE_CACHING="true" + +# Cache TTL in seconds (1 hour default) +CACHE_TTL="3600" + +#------------------------------------------------------------------------------ +# Integration Settings +#------------------------------------------------------------------------------ + +# Gemini wrapper compatibility mode +# Set to "true" to enable compatibility with existing gemini-wrapper scripts +GEMINI_COMPAT_MODE="true" + +# Codex integration settings +CODEX_COMPAT_MODE="true" + +# Auto-build index if not found +AUTO_BUILD_INDEX="true" + +# Auto-update embeddings when files change +AUTO_UPDATE_EMBEDDINGS="true" + +#------------------------------------------------------------------------------ +# Logging Configuration +#------------------------------------------------------------------------------ + +# Log level: DEBUG, INFO, WARNING, ERROR +LOG_LEVEL="INFO" + +# Log file location +LOG_FILE="$HOME/.claude/logs/pycli.log" + +# Enable log rotation +ENABLE_LOG_ROTATION="true" + +# Maximum log file size (MB) +MAX_LOG_SIZE="10" + +#------------------------------------------------------------------------------ +# Advanced Configuration +#------------------------------------------------------------------------------ + +# Custom configuration file for Python scripts +# Leave empty to use default config.yaml +PYTHON_CONFIG_FILE="" + +# Additional Python path directories +# Uncomment and modify if you need to add custom modules +# ADDITIONAL_PYTHON_PATH="/path/to/custom/modules" + +# Environment variables to pass to Python scripts +# Uncomment and modify as needed +# CUSTOM_ENV_VAR1="value1" +# CUSTOM_ENV_VAR2="value2" + +#------------------------------------------------------------------------------ +# Platform-Specific Settings +#------------------------------------------------------------------------------ + +# Windows-specific settings +if [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then + # Adjust paths for Windows + VECTOR_DB_ROOT="${VECTOR_DB_ROOT//\\//}" + PYTHON_SCRIPT_DIR="${PYTHON_SCRIPT_DIR//\\//}" +fi + +# macOS-specific settings +if [[ "$OSTYPE" == "darwin"* ]]; then + # macOS-specific optimizations + export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES +fi + +#------------------------------------------------------------------------------ +# User Customization +#------------------------------------------------------------------------------ + +# Load user-specific configuration if it exists +USER_CONFIG="$HOME/.claude/config/pycli.user.conf" +if [[ -f "$USER_CONFIG" ]]; then + source "$USER_CONFIG" +fi \ No newline at end of file diff --git a/.claude/workflows/python-tools-strategy.md b/.claude/workflows/python-tools-strategy.md new file mode 100644 index 00000000..94b7f35e --- /dev/null +++ b/.claude/workflows/python-tools-strategy.md @@ -0,0 +1,511 @@ +--- +name: python-tools-strategy +description: Command reference for Python-based tool invocation +type: command-reference +--- + +# Python Tools Command Reference + +## ⚑ Quick Commands + +**Smart Analysis**: `pycli --analyze --query "search term" --tool [gemini/codex]` +**Direct Tool Invocation**: `pycli --analyze --tool [gemini/codex] -p "prompt"` +**Vector Database Setup**: `pycli --init` +**Vector Database Update**: `pycli --update-embeddings` + +## ⏰ When to Use What + +### πŸ”„ Vector Database Timing +```bash +# FIRST TIME (run once per project) +pycli --init + +# DAILY (when files change) +pycli --update-embeddings + +# BEFORE ANALYSIS (check status) +pycli --status +``` + +### 🎯 Tool Selection Timing +- **Code Discovery** β†’ Use `pycli --analyze --query` to find relevant files +- **Direct Analysis** β†’ Use `pycli --analyze -p` when you know what to analyze +- **Development** β†’ Use `--tool codex` for implementation tasks +- **Understanding** β†’ Use `--tool gemini` for analysis and exploration + +## 🎯 Core Commands + +### Smart Analysis (Recommended) +```bash +# Find similar code patterns and analyze +pycli --analyze --query "authentication patterns" --tool gemini + +# Search with development context +pycli --analyze --query "error handling" --tool codex + +# Both discovery and analysis +pycli --analyze --query "database connections" --tool both +``` + +### Direct Tool Invocation +```bash +# Direct analysis with known context +pycli --analyze --tool gemini -p "analyze authentication patterns" + +# Direct development task +pycli --analyze --tool codex -p "implement user login" + +# Status and testing +pycli --status +pycli --test-search +``` + +### Vector Database Operations +```bash +# Initial setup (run once per project) +pycli --init + +# Daily updates (run when files change) +pycli --update-embeddings + +# Status check +pycli --status +``` + +## πŸ“Š Command Matrix + +| What You Want | Command | Use Case | +|---------------|---------|----------| +| **Smart analysis** | `pycli --analyze --query "pattern" --tool gemini` | Code discovery & analysis | +| **Direct analysis** | `pycli --analyze --tool gemini -p "prompt"` | Known target analysis | +| **Generate code** | `pycli --analyze --tool codex -p "task"` | Development | +| **Setup project** | `pycli --init` | First time setup | +| **Update search index** | `pycli --update-embeddings` | Maintenance | +| **Check status** | `pycli --status` | System health | + +## πŸš€ Usage Examples + +### Replace Gemini Wrapper +```bash +# OLD: ~/.claude/scripts/gemini-wrapper -p "analyze auth patterns" +# NEW: pycli --analyze --tool gemini -p "analyze auth patterns" +``` + +### Replace Codex Commands +```bash +# OLD: codex --full-auto exec "implement login" +# NEW: pycli --analyze --tool codex -p "implement login" +``` + +### Smart Context Discovery +```bash +# Find relevant files first, then analyze +pycli --analyze --query "user authentication" --tool gemini + +# Results include: +# - Hierarchical vector database search +# - Semantically similar files from project and parent directories +# - Generated tool command with intelligent context +# - Executed analysis with smart file selection +``` + +## πŸ”§ Command Options + +### pycli (Unified Interface) +```bash +pycli [command] [options] + +Commands: + --init Initialize vector database for current project + --analyze Run analysis with AI tools + --status Show system status and health + --test-search Test vector search functionality + --update-embeddings Update vector embeddings for changed files + +Analysis Options: + --tool [gemini|codex|both] Which AI tool to use (default: gemini) + -p, --prompt TEXT Direct prompt for analysis + --query TEXT Semantic search query for context discovery + --top-k INTEGER Number of similar files to find (default: 10) + --similarity-threshold FLOAT Minimum similarity score (0.0-1.0) + +Output Options: + --quiet Suppress progress output + --verbose Show detailed analysis information + --output [patterns|json] Output format (default: patterns) +``` + +### Installation & Setup +```bash +# Install pycli system +bash D:/Claude_dms3/.claude/scripts/install_pycli.sh + +# Add to shell (automatic during install) +alias pycli='~/.claude/scripts/pycli' + +# Verify installation +pycli --help +``` + +## πŸ“‹ Common Workflows + +### πŸš€ First-Time Setup (Vector Database) +```bash +# 1. Install pycli system +bash D:/Claude_dms3/.claude/scripts/install_pycli.sh + +# 2. Initialize vector database for project +cd /path/to/your/project +pycli --init + +# 3. Verify setup works +pycli --status + +# 4. Test search functionality +pycli --test-search +``` + +### 🎯 Analysis Workflow (Recommended) +```bash +# 1. Update vectors (if files changed) +pycli --update-embeddings + +# 2. Smart analysis with context discovery +pycli --analyze --query "what you're looking for" --tool gemini + +# 3. Development with context +pycli --analyze --query "related patterns" --tool codex +``` + +### ⏰ When to Run Commands + +#### πŸ”„ Vector Database Maintenance +```bash +# WHEN: First time using system +pycli --init + +# WHEN: Files have been added/modified (daily/after coding) +pycli --update-embeddings + +# WHEN: Before starting analysis (check if system ready) +pycli --status +``` + +#### 🎯 Analysis Timing +```bash +# WHEN: You need to find relevant code patterns +pycli --analyze --query "search term" --tool gemini + +# WHEN: You have specific prompt and know context +pycli --analyze --tool gemini -p "specific prompt" + +# WHEN: You want to develop/implement something +pycli --analyze --query "similar implementations" --tool codex +``` + +### Integration with Existing Tools +```bash +# In place of gemini-wrapper +pycli --analyze --tool gemini -p "$YOUR_PROMPT" + +# In place of codex commands +pycli --analyze --tool codex -p "$YOUR_TASK" + +# Enhanced with hierarchical context discovery +pycli --analyze --query "relevant context" --tool both +``` + +## 🎯 Quick Reference + +### πŸš€ Most Common Commands +```bash +# 1. Smart analysis (recommended first choice) +pycli --analyze --query "what you're looking for" --tool gemini + +# 2. Direct tool call (when you know exactly what to analyze) +pycli --analyze --tool codex -p "what you want to do" + +# 3. Keep embeddings updated (run after file changes) +pycli --update-embeddings +``` + +### βš™οΈ Configuration (config.yaml) +```yaml +# Essential settings only +embeddings: + enabled: true + similarity_threshold: 0.3 + +tools: + default_tool: "gemini" + timeout: 300 +``` + +### πŸ› Troubleshooting +```bash +# Check if everything works +pycli --status + +# Rebuild if issues +pycli --init + +# Test search functionality +pycli --test-search +``` + +## πŸŽͺ Integration Decision Tree + +``` +Need to analyze code? +β”œβ”€ Do you know specific files to analyze? +β”‚ β”œβ”€ YES β†’ Use: pycli --analyze --tool [gemini/codex] -p "prompt" +β”‚ └─ NO β†’ Use: pycli --analyze --query "search term" --tool [gemini/codex] +└─ Is vector database updated? + β”œβ”€ UNSURE β†’ Run: pycli --status + β”œβ”€ NO β†’ Run: pycli --update-embeddings + └─ YES β†’ Proceed with analysis +``` + +## πŸ—οΈ Hierarchical Vector Database + +### Key Features +- **Automatic Parent Discovery**: Subdirectories automatically use parent's vector database +- **No Redundant Vectorization**: Avoids duplicate processing in project subdirectories +- **Central Storage**: All vector databases stored in `~/.claude/vector_db/` +- **Path-based Organization**: Vector DBs organized by project directory structure + +### How It Works +```bash +# Project structure +/home/user/myproject/ +β”œβ”€β”€ src/ +β”‚ └── auth/ # Uses parent's vector DB +└── tests/ # Uses parent's vector DB + +# Vector database structure +~/.claude/vector_db/ +└── home_user_myproject/ # Single DB for entire project + β”œβ”€β”€ embeddings.pkl + └── index.json +``` + +### Usage Examples +```bash +# Initialize at project root +cd /home/user/myproject +pycli --init + +# Work in subdirectory (automatically finds parent DB) +cd src/auth +pycli --analyze --query "authentication patterns" # Uses parent's DB + +# Work in another subdirectory +cd ../../tests +pycli --analyze --query "test patterns" # Uses same parent DB +``` + +## πŸ”§ Vector Database Setup & Maintenance + +### ⚑ One-Time System Setup +```bash +# 1. Install dependencies (first time only) +cd .claude/python_script && pip install -r requirements.txt + +# 2. Initialize vector database (creates embeddings) +python indexer.py --rebuild-index --update-embeddings + +# 3. Verify setup works +python cli.py --status + +# 4. Test search functionality +python cli.py --test-search +``` + +### πŸ“‹ What Happens During Setup +1. **File Indexing**: Scans project files and creates index +2. **Model Download**: Downloads AI model (first time only, ~500MB) +3. **Embedding Generation**: Creates vector representations of code +4. **Cache Creation**: Saves embeddings to `.claude/cache/embeddings/` + +### 🎯 Verification Checklist +After setup, verify these work: +- [ ] `python cli.py --status` shows "System ready" +- [ ] `python cli.py --test-search` returns results +- [ ] Files exist: `.claude/cache/embeddings/embeddings.pkl` +- [ ] Search works: `python analyzer.py --query "test"` + +### πŸ› Common Issues & Fixes + +#### Nothing works / Setup failed +```bash +# Nuclear option - reset everything +rm -rf .claude/cache/embeddings/* +python indexer.py --rebuild-index --update-embeddings +``` + +#### Slow performance +```yaml +# In config.yaml - reduce batch size +embeddings: + batch_size: 16 +``` + +#### No search results found +```yaml +# In config.yaml - lower similarity threshold +embeddings: + similarity_threshold: 0.1 +``` + +#### Memory errors during setup +```yaml +# In config.yaml - use smaller batches +embeddings: + batch_size: 8 +``` + +#### Model download fails +```bash +# Manual model download +python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" +``` + +## πŸ“‹ Usage Rules & Best Practices + +### 🎯 Core Rules + +1. **Always check status first** - Run `python cli.py --status` before analysis +2. **Update after file changes** - Run `indexer.py --update-embeddings` when files modified +3. **Use vector search for discovery** - Use `analyzer.py --query` when exploring code +4. **Use direct tools for known targets** - Use `cli.py --analyze` for specific analysis +5. **Prefer context-aware tools** - Enhanced Python tools over legacy shell scripts + +### ⏰ Maintenance Schedule + +```bash +# DAILY (or after coding sessions) +python .claude/python_script/indexer.py --update-embeddings + +# WEEKLY (or when config changes) +python .claude/python_script/cli.py --status # Check system health + +# MONTHLY (or after major project changes) +python .claude/python_script/indexer.py --rebuild-index --update-embeddings +``` + +### 🎯 Tool Selection Rules + +#### Use `cli.py --analyze --query` when: +- βœ… Exploring unfamiliar codebase +- βœ… Looking for similar code patterns +- βœ… Need context discovery for complex tasks +- βœ… Want smart file selection for tool execution + +#### Use `cli.py --analyze -p` when: +- βœ… You know exactly what files to analyze +- βœ… Direct prompt execution without context search +- βœ… Quick tool invocation with known targets + +#### Use `indexer.py` when: +- βœ… First time setup +- βœ… Files have been added/modified +- βœ… System performance degraded +- βœ… Configuration changed + +### πŸ”§ Configuration Guidelines + +#### Minimal config.yaml +```yaml +embeddings: + enabled: true + similarity_threshold: 0.3 + model: "all-MiniLM-L6-v2" + batch_size: 32 + +tools: + default_tool: "gemini" + timeout: 300 +``` + +#### Performance tuning +```yaml +# Large codebase (>1000 files) +embeddings: + batch_size: 64 + similarity_threshold: 0.4 + +# Memory constrained +embeddings: + batch_size: 16 + similarity_threshold: 0.2 + +# High accuracy needed +embeddings: + model: "all-mpnet-base-v2" + similarity_threshold: 0.5 +``` + +### πŸš€ Migration from Legacy Tools + +#### Replace gemini-wrapper +```bash +# OLD (shell-based) +~/.claude/scripts/gemini-wrapper -p "analyze authentication" + +# NEW (Python-based with hierarchical vector context) +pycli --analyze --query "authentication" --tool gemini +``` + +#### Replace codex commands +```bash +# OLD (direct execution) +codex --full-auto exec "implement user login" + +# NEW (context-aware development with hierarchical DB) +pycli --analyze --query "login implementation patterns" --tool codex +``` + +#### Integration workflow +1. **Install pycli** - Run installation script once +2. **Initialize projects** - Run `pycli --init` in each project root +3. **Replace commands** - Update scripts to use `pycli` instead of direct Python calls +4. **Enjoy hierarchical benefits** - Automatic parent DB discovery in subdirectories + +## πŸŽ‰ Advanced Features + +### Bash Wrapper Benefits +- **Unified Interface**: Single `pycli` command for all operations +- **Smart Path Detection**: Automatically finds project roots and vector databases +- **Environment Management**: Configurable Python interpreter path +- **Hierarchical Support**: Intelligent parent directory discovery + +### Configuration Flexibility +```bash +# Edit pycli configuration +nano ~/.claude/scripts/pycli.conf + +# Key settings: +# PYTHON_PATH - Python interpreter location +# VECTOR_DB_ROOT - Central vector database storage +# HIERARCHICAL_MODE - Enable parent DB discovery +``` + +### Integration Examples +```bash +# Add to your project's package.json scripts +{ + "scripts": { + "analyze": "pycli --analyze --query", + "init-ai": "pycli --init", + "update-ai": "pycli --update-embeddings" + } +} + +# Use in Makefiles +analyze: + pycli --analyze --query "$(QUERY)" --tool gemini + +# Use in CI/CD pipelines +- name: Update AI Context + run: pycli --update-embeddings +``` \ No newline at end of file