Remove LLM enhancement features and related components as per user request. This includes the deletion of source code files, CLI commands, front-end components, tests, scripts, and documentation associated with LLM functionality. Simplified dependencies and reduced complexity while retaining core vector search capabilities. Validation of changes confirmed successful removal and functionality.

2026-02-05 01:50:27 +08:00 · 2025-12-16 21:38:27 +08:00
parent d21066c282
commit b702791c2c
21 changed files with 375 additions and 7193 deletions
--- a/codex-lens/scripts/compare_search_methods.py
+++ b/codex-lens/scripts/compare_search_methods.py
@@ -1,465 +0,0 @@
-#!/usr/bin/env python3
-"""Standalone script to compare pure vector vs LLM-enhanced semantic search.
-
-Usage:
-    python compare_search_methods.py [--tool gemini|qwen] [--skip-llm]
-
-This script:
-1. Creates a test dataset with sample code
-2. Tests pure vector search (code → fastembed → search)
-3. Tests LLM-enhanced search (code → LLM summary → fastembed → search)
-4. Compares results across natural language queries
-"""
-
-import argparse
-import sqlite3
-import sys
-import tempfile
-import time
-from pathlib import Path
-from typing import Dict, List, Tuple
-
-# Check dependencies
-try:
-    from codexlens.semantic import SEMANTIC_AVAILABLE
-    from codexlens.semantic.embedder import Embedder
-    from codexlens.semantic.vector_store import VectorStore
-    from codexlens.semantic.chunker import Chunker, ChunkConfig
-    from codexlens.semantic.llm_enhancer import (
-        LLMEnhancer,
-        LLMConfig,
-        FileData,
-        EnhancedSemanticIndexer,
-    )
-    from codexlens.storage.dir_index import DirIndexStore
-    from codexlens.search.hybrid_search import HybridSearchEngine
-except ImportError as e:
-    print(f"Error: Missing dependencies - {e}")
-    print("Install with: pip install codexlens[semantic]")
-    sys.exit(1)
-
-if not SEMANTIC_AVAILABLE:
-    print("Error: Semantic search dependencies not available")
-    print("Install with: pip install codexlens[semantic]")
-    sys.exit(1)
-
-
-# Test dataset with realistic code samples
-TEST_DATASET = {
-    "auth/password_hasher.py": '''"""Password hashing utilities using bcrypt."""
-import bcrypt
-
-def hash_password(password: str, salt_rounds: int = 12) -> str:
-    """Hash a password using bcrypt with specified salt rounds."""
-    salt = bcrypt.gensalt(rounds=salt_rounds)
-    hashed = bcrypt.hashpw(password.encode('utf-8'), salt)
-    return hashed.decode('utf-8')
-
-def verify_password(password: str, hashed: str) -> bool:
-    """Verify a password against its hash."""
-    return bcrypt.checkpw(password.encode('utf-8'), hashed.encode('utf-8'))
-''',
-
-    "auth/jwt_handler.py": '''"""JWT token generation and validation."""
-import jwt
-from datetime import datetime, timedelta
-
-SECRET_KEY = "your-secret-key"
-
-def create_token(user_id: int, expires_in: int = 3600) -> str:
-    """Generate a JWT access token for user authentication."""
-    payload = {
-        'user_id': user_id,
-        'exp': datetime.utcnow() + timedelta(seconds=expires_in),
-        'iat': datetime.utcnow()
-    }
-    return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
-
-def decode_token(token: str) -> dict:
-    """Validate and decode JWT token."""
-    try:
-        return jwt.decode(token, SECRET_KEY, algorithms=['HS256'])
-    except jwt.ExpiredSignatureError:
-        return None
-''',
-
-    "api/user_endpoints.py": '''"""REST API endpoints for user management."""
-from flask import Flask, request, jsonify
-
-app = Flask(__name__)
-
-@app.route('/api/users', methods=['POST'])
-def create_user():
-    """Create a new user account with email and password."""
-    data = request.get_json()
-    if not data.get('email') or not data.get('password'):
-        return jsonify({'error': 'Email and password required'}), 400
-    user_id = 12345  # Database insert
-    return jsonify({'user_id': user_id, 'success': True}), 201
-
-@app.route('/api/users/<int:user_id>', methods=['GET'])
-def get_user(user_id: int):
-    """Retrieve user profile information by user ID."""
-    user = {
-        'id': user_id,
-        'email': 'user@example.com',
-        'name': 'John Doe'
-    }
-    return jsonify(user), 200
-''',
-
-    "utils/validation.py": '''"""Input validation utilities."""
-import re
-
-def validate_email(email: str) -> bool:
-    """Check if email address format is valid using regex."""
-    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
-    return bool(re.match(pattern, email))
-
-def sanitize_input(text: str, max_length: int = 255) -> str:
-    """Clean user input by removing special characters."""
-    text = re.sub(r'[<>\"\'&]', '', text)
-    return text.strip()[:max_length]
-
-def validate_password_strength(password: str) -> tuple:
-    """Validate password meets security requirements."""
-    if len(password) < 8:
-        return False, "Password must be at least 8 characters"
-    if not re.search(r'[A-Z]', password):
-        return False, "Must contain uppercase letter"
-    return True, None
-''',
-
-    "database/connection.py": '''"""Database connection pooling."""
-import psycopg2
-from psycopg2 import pool
-from contextlib import contextmanager
-
-class DatabasePool:
-    """PostgreSQL connection pool manager."""
-
-    def __init__(self, min_conn: int = 1, max_conn: int = 10):
-        """Initialize database connection pool."""
-        self.pool = psycopg2.pool.SimpleConnectionPool(
-            min_conn, max_conn,
-            user='dbuser', host='localhost', database='myapp'
-        )
-
-    @contextmanager
-    def get_connection(self):
-        """Get a connection from pool as context manager."""
-        conn = self.pool.getconn()
-        try:
-            yield conn
-            conn.commit()
-        finally:
-            self.pool.putconn(conn)
-''',
-}
-
-
-# Natural language test queries
-TEST_QUERIES = [
-    ("How do I securely hash passwords?", "auth/password_hasher.py"),
-    ("Generate JWT token for authentication", "auth/jwt_handler.py"),
-    ("Create new user account via API", "api/user_endpoints.py"),
-    ("Validate email address format", "utils/validation.py"),
-    ("Connect to PostgreSQL database", "database/connection.py"),
-]
-
-
-def create_test_database(db_path: Path) -> None:
-    """Create and populate test database."""
-    store = DirIndexStore(db_path)
-    store.initialize()
-
-    with store._get_connection() as conn:
-        for path, content in TEST_DATASET.items():
-            name = path.split('/')[-1]
-            conn.execute(
-                """INSERT INTO files (name, full_path, content, language, mtime)
-                   VALUES (?, ?, ?, ?, ?)""",
-                (name, path, content, "python", 0.0)
-            )
-        conn.commit()
-
-    store.close()
-
-
-def test_pure_vector_search(db_path: Path) -> Dict:
-    """Test pure vector search (raw code embeddings)."""
-    print("\n" + "="*70)
-    print("PURE VECTOR SEARCH (Code → fastembed)")
-    print("="*70)
-
-    start_time = time.time()
-
-    # Generate pure vector embeddings
-    embedder = Embedder(profile="code")
-    vector_store = VectorStore(db_path)
-    chunker = Chunker(config=ChunkConfig(max_chunk_size=2000))
-
-    with sqlite3.connect(db_path) as conn:
-        conn.row_factory = sqlite3.Row
-        rows = conn.execute("SELECT full_path, content FROM files").fetchall()
-
-    chunk_count = 0
-    for row in rows:
-        chunks = chunker.chunk_sliding_window(
-            row["content"],
-            file_path=row["full_path"],
-            language="python"
-        )
-        for chunk in chunks:
-            chunk.embedding = embedder.embed_single(chunk.content)
-            chunk.metadata["strategy"] = "pure_vector"
-        if chunks:
-            vector_store.add_chunks(chunks, row["full_path"])
-            chunk_count += len(chunks)
-
-    setup_time = time.time() - start_time
-    print(f"Setup: {len(rows)} files, {chunk_count} chunks in {setup_time:.1f}s")
-
-    # Test queries
-    engine = HybridSearchEngine()
-    results = {}
-
-    print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        search_results = engine.search(
-            db_path,
-            query,
-            limit=3,
-            enable_vector=True,
-            pure_vector=True,
-        )
-
-        top_file = search_results[0].path if search_results else "No results"
-        top_score = search_results[0].score if search_results else 0.0
-        found = expected_file in [r.path for r in search_results]
-        rank = None
-        if found:
-            for i, r in enumerate(search_results):
-                if r.path == expected_file:
-                    rank = i + 1
-                    break
-
-        status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        display_file = top_file.split('/')[-1] if '/' in top_file else top_file
-
-        print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
-
-        results[query] = {
-            "found": found,
-            "rank": rank,
-            "top_file": top_file,
-            "score": top_score,
-        }
-
-    return results
-
-
-def test_llm_enhanced_search(db_path: Path, llm_tool: str = "gemini") -> Dict:
-    """Test LLM-enhanced search (LLM summaries → fastembed)."""
-    print("\n" + "="*70)
-    print(f"LLM-ENHANCED SEARCH (Code → {llm_tool.upper()} → fastembed)")
-    print("="*70)
-
-    # Check CCW availability
-    llm_config = LLMConfig(enabled=True, tool=llm_tool, batch_size=2)
-    enhancer = LLMEnhancer(llm_config)
-
-    if not enhancer.check_available():
-        print("[X] CCW CLI not available - skipping LLM-enhanced test")
-        print("  Install CCW: npm install -g ccw")
-        return {}
-
-    start_time = time.time()
-
-    # Generate LLM-enhanced embeddings
-    embedder = Embedder(profile="code")
-    vector_store = VectorStore(db_path)
-    indexer = EnhancedSemanticIndexer(enhancer, embedder, vector_store)
-
-    # Prepare file data
-    file_data_list = [
-        FileData(path=path, content=content, language="python")
-        for path, content in TEST_DATASET.items()
-    ]
-
-    # Index with LLM enhancement
-    print(f"Generating LLM summaries for {len(file_data_list)} files...")
-    indexed = indexer.index_files(file_data_list)
-    setup_time = time.time() - start_time
-
-    print(f"Setup: {indexed}/{len(file_data_list)} files indexed in {setup_time:.1f}s")
-
-    # Test queries
-    engine = HybridSearchEngine()
-    results = {}
-
-    print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        search_results = engine.search(
-            db_path,
-            query,
-            limit=3,
-            enable_vector=True,
-            pure_vector=True,
-        )
-
-        top_file = search_results[0].path if search_results else "No results"
-        top_score = search_results[0].score if search_results else 0.0
-        found = expected_file in [r.path for r in search_results]
-        rank = None
-        if found:
-            for i, r in enumerate(search_results):
-                if r.path == expected_file:
-                    rank = i + 1
-                    break
-
-        status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        display_file = top_file.split('/')[-1] if '/' in top_file else top_file
-
-        print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
-
-        results[query] = {
-            "found": found,
-            "rank": rank,
-            "top_file": top_file,
-            "score": top_score,
-        }
-
-    return results
-
-
-def compare_results(pure_results: Dict, llm_results: Dict) -> None:
-    """Compare and analyze results from both approaches."""
-    print("\n" + "="*70)
-    print("COMPARISON SUMMARY")
-    print("="*70)
-
-    if not llm_results:
-        print("Cannot compare - LLM-enhanced test was skipped")
-        return
-
-    pure_score = 0
-    llm_score = 0
-
-    print(f"\n{'Query':<45} {'Pure':<10} {'LLM':<10}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        pure_res = pure_results.get(query, {})
-        llm_res = llm_results.get(query, {})
-
-        pure_status = f"[OK] Rank {pure_res.get('rank', '?')}" if pure_res.get('found') else "[X] Miss"
-        llm_status = f"[OK] Rank {llm_res.get('rank', '?')}" if llm_res.get('found') else "[X] Miss"
-
-        # Scoring: Rank 1 = 3 points, Rank 2 = 2 points, Rank 3 = 1 point
-        if pure_res.get('found') and pure_res.get('rank'):
-            pure_score += max(0, 4 - pure_res['rank'])
-        if llm_res.get('found') and llm_res.get('rank'):
-            llm_score += max(0, 4 - llm_res['rank'])
-
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        print(f"{display_query:<45} {pure_status:<10} {llm_status:<10}")
-
-    print("-" * 70)
-    print(f"{'TOTAL SCORE':<45} {pure_score:<10} {llm_score:<10}")
-    print("="*70)
-
-    # Analysis
-    print("\nANALYSIS:")
-    if llm_score > pure_score:
-        improvement = ((llm_score - pure_score) / max(pure_score, 1)) * 100
-        print(f"[OK] LLM enhancement improves results by {improvement:.1f}%")
-        print("  Natural language summaries match queries better than raw code")
-    elif pure_score > llm_score:
-        degradation = ((pure_score - llm_score) / max(pure_score, 1)) * 100
-        print(f"[X] Pure vector performed {degradation:.1f}% better")
-        print("  LLM summaries may be too generic or missing key details")
-    else:
-        print("= Both approaches performed equally on this test set")
-
-    print("\nKEY FINDINGS:")
-    print("- Pure Vector: Direct code embeddings, fast but may miss semantic intent")
-    print("- LLM Enhanced: Natural language summaries, better for human-like queries")
-    print("- Best Use: Combine both - LLM for natural language, vector for code patterns")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Compare pure vector vs LLM-enhanced semantic search"
-    )
-    parser.add_argument(
-        "--tool",
-        choices=["gemini", "qwen"],
-        default="gemini",
-        help="LLM tool to use for enhancement (default: gemini)"
-    )
-    parser.add_argument(
-        "--skip-llm",
-        action="store_true",
-        help="Skip LLM-enhanced test (only run pure vector)"
-    )
-    args = parser.parse_args()
-
-    print("\n" + "="*70)
-    print("SEMANTIC SEARCH COMPARISON TEST")
-    print("Pure Vector vs LLM-Enhanced Vector Search")
-    print("="*70)
-
-    # Create test database
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = Path(f.name)
-
-    try:
-        print(f"\nTest dataset: {len(TEST_DATASET)} Python files")
-        print(f"Test queries: {len(TEST_QUERIES)} natural language questions")
-
-        create_test_database(db_path)
-
-        # Test pure vector search
-        pure_results = test_pure_vector_search(db_path)
-
-        # Test LLM-enhanced search
-        if not args.skip_llm:
-            # Clear semantic_chunks table for LLM test
-            with sqlite3.connect(db_path) as conn:
-                conn.execute("DELETE FROM semantic_chunks")
-                conn.commit()
-
-            llm_results = test_llm_enhanced_search(db_path, args.tool)
-        else:
-            llm_results = {}
-            print("\n[X] LLM-enhanced test skipped (--skip-llm flag)")
-
-        # Compare results
-        compare_results(pure_results, llm_results)
-
-    finally:
-        # Cleanup - ensure all connections are closed
-        try:
-            import gc
-            gc.collect()  # Force garbage collection to close any lingering connections
-            time.sleep(0.1)  # Small delay for Windows to release file handle
-            if db_path.exists():
-                db_path.unlink()
-        except PermissionError:
-            print(f"\nWarning: Could not delete temporary database: {db_path}")
-            print("It will be cleaned up on next system restart.")
-
-    print("\n" + "="*70)
-    print("Test completed successfully!")
-    print("="*70)
-
-
-if __name__ == "__main__":
-    main()
--- a/codex-lens/scripts/inspect_llm_summaries.py
+++ b/codex-lens/scripts/inspect_llm_summaries.py
@@ -1,88 +0,0 @@
-#!/usr/bin/env python3
-"""Inspect LLM-generated summaries in semantic_chunks table."""
-
-import sqlite3
-import sys
-from pathlib import Path
-
-def inspect_summaries(db_path: Path):
-    """Show LLM-generated summaries from database."""
-    if not db_path.exists():
-        print(f"Error: Database not found: {db_path}")
-        return
-
-    with sqlite3.connect(db_path) as conn:
-        conn.row_factory = sqlite3.Row
-
-        # Check if semantic_chunks table exists
-        cursor = conn.execute(
-            "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
-        )
-        if not cursor.fetchone():
-            print("No semantic_chunks table found")
-            return
-
-        # Get all chunks with metadata
-        cursor = conn.execute("""
-            SELECT file_path, chunk_index, content,
-                   json_extract(metadata, '$.llm_summary') as summary,
-                   json_extract(metadata, '$.llm_keywords') as keywords,
-                   json_extract(metadata, '$.llm_purpose') as purpose,
-                   json_extract(metadata, '$.strategy') as strategy
-            FROM semantic_chunks
-            ORDER BY file_path, chunk_index
-        """)
-
-        chunks = cursor.fetchall()
-
-        if not chunks:
-            print("No chunks found in database")
-            return
-
-        print("="*80)
-        print("LLM-GENERATED SUMMARIES INSPECTION")
-        print("="*80)
-
-        current_file = None
-        for chunk in chunks:
-            file_path = chunk['file_path']
-
-            if file_path != current_file:
-                print(f"\n{'='*80}")
-                print(f"FILE: {file_path}")
-                print(f"{'='*80}")
-                current_file = file_path
-
-            print(f"\n[Chunk {chunk['chunk_index']}]")
-            print(f"Strategy: {chunk['strategy']}")
-
-            if chunk['summary']:
-                print(f"\nLLM Summary:")
-                print(f"  {chunk['summary']}")
-
-            if chunk['keywords']:
-                print(f"\nKeywords:")
-                print(f"  {chunk['keywords']}")
-
-            if chunk['purpose']:
-                print(f"\nPurpose:")
-                print(f"  {chunk['purpose']}")
-
-            # Show first 200 chars of content
-            content = chunk['content']
-            if len(content) > 200:
-                content = content[:200] + "..."
-            print(f"\nOriginal Content (first 200 chars):")
-            print(f"  {content}")
-            print("-" * 80)
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 2:
-        print("Usage: python inspect_llm_summaries.py <path_to_index.db>")
-        print("\nExample:")
-        print("  python inspect_llm_summaries.py ~/.codexlens/indexes/myproject/_index.db")
-        sys.exit(1)
-
-    db_path = Path(sys.argv[1])
-    inspect_summaries(db_path)
--- a/codex-lens/scripts/show_llm_analysis.py
+++ b/codex-lens/scripts/show_llm_analysis.py
@@ -1,112 +0,0 @@
-#!/usr/bin/env python3
-"""Directly show LLM analysis of test code."""
-
-from codexlens.semantic.llm_enhancer import LLMEnhancer, LLMConfig, FileData
-
-# Misleading code example
-TEST_CODE = '''"""Email sending service."""
-import psycopg2
-from psycopg2 import pool
-from contextlib import contextmanager
-
-class EmailSender:
-    """SMTP email sender with retry logic."""
-
-    def __init__(self, min_conn: int = 1, max_conn: int = 10):
-        """Initialize email sender."""
-        self.pool = psycopg2.pool.SimpleConnectionPool(
-            min_conn, max_conn,
-            user='dbuser', host='localhost', database='myapp'
-        )
-
-    @contextmanager
-    def send_email(self):
-        """Send email message."""
-        conn = self.pool.getconn()
-        try:
-            yield conn
-            conn.commit()
-        finally:
-            self.pool.putconn(conn)
-'''
-
-print("="*80)
-print("LLM ANALYSIS OF MISLEADING CODE")
-print("="*80)
-
-print("\n[Original Code with Misleading Comments]")
-print("-"*80)
-print(TEST_CODE)
-print("-"*80)
-
-print("\n[Actual Functionality]")
-print("  - Imports: psycopg2 (PostgreSQL library)")
-print("  - Class: EmailSender (but name is misleading!)")
-print("  - Actually: Creates PostgreSQL connection pool")
-print("  - Methods: send_email (actually gets DB connection)")
-
-print("\n[Misleading Documentation]")
-print("  - Module docstring: 'Email sending service' (WRONG)")
-print("  - Class docstring: 'SMTP email sender' (WRONG)")
-print("  - Method docstring: 'Send email message' (WRONG)")
-
-print("\n" + "="*80)
-print("TESTING LLM UNDERSTANDING")
-print("="*80)
-
-# Test LLM analysis
-config = LLMConfig(enabled=True, tool="gemini", batch_size=1)
-enhancer = LLMEnhancer(config)
-
-if not enhancer.check_available():
-    print("\n[X] CCW CLI not available")
-    print("Install: npm install -g ccw")
-    exit(1)
-
-print("\n[Calling Gemini to analyze code...]")
-file_data = FileData(path="db/pool.py", content=TEST_CODE, language="python")
-
-import tempfile
-from pathlib import Path
-
-with tempfile.TemporaryDirectory() as tmpdir:
-    result = enhancer.enhance_files([file_data], Path(tmpdir))
-
-    if "db/pool.py" in result:
-        metadata = result["db/pool.py"]
-
-        print("\n[LLM-Generated Summary]")
-        print("-"*80)
-        print(f"Summary: {metadata.summary}")
-        print(f"\nPurpose: {metadata.purpose}")
-        print(f"\nKeywords: {', '.join(metadata.keywords)}")
-        print("-"*80)
-
-        print("\n[Analysis]")
-        # Check if LLM identified the real functionality
-        summary_lower = metadata.summary.lower()
-        keywords_lower = [k.lower() for k in metadata.keywords]
-
-        correct_terms = ['database', 'postgresql', 'connection', 'pool', 'psycopg']
-        misleading_terms = ['email', 'smtp', 'send']
-
-        found_correct = sum(1 for term in correct_terms
-                           if term in summary_lower or any(term in k for k in keywords_lower))
-        found_misleading = sum(1 for term in misleading_terms
-                              if term in summary_lower or any(term in k for k in keywords_lower))
-
-        print(f"Correct terms found: {found_correct}/{len(correct_terms)}")
-        print(f"Misleading terms found: {found_misleading}/{len(misleading_terms)}")
-
-        if found_correct > found_misleading:
-            print("\n[OK] LLM correctly identified actual functionality!")
-            print("     LLM ignored misleading comments and analyzed code behavior")
-        elif found_misleading > found_correct:
-            print("\n[X] LLM was misled by incorrect comments")
-            print("    LLM trusted documentation over code analysis")
-        else:
-            print("\n[~] Mixed results - LLM found both correct and misleading terms")
-    else:
-        print("\n[X] LLM analysis failed - no results returned")
-
-print("\n" + "="*80)
--- a/codex-lens/scripts/test_misleading_comments.py
+++ b/codex-lens/scripts/test_misleading_comments.py
@@ -1,491 +0,0 @@
-#!/usr/bin/env python3
-"""Test pure vector vs LLM-enhanced search with misleading/missing comments.
-
-This test demonstrates how LLM enhancement can overcome:
-1. Missing comments/docstrings
-2. Misleading or incorrect comments
-3. Outdated documentation
-
-Usage:
-    python test_misleading_comments.py --tool gemini
-"""
-
-import argparse
-import sqlite3
-import sys
-import tempfile
-import time
-from pathlib import Path
-from typing import Dict, List
-
-# Check dependencies
-try:
-    from codexlens.semantic import SEMANTIC_AVAILABLE
-    from codexlens.semantic.embedder import Embedder
-    from codexlens.semantic.vector_store import VectorStore
-    from codexlens.semantic.chunker import Chunker, ChunkConfig
-    from codexlens.semantic.llm_enhancer import (
-        LLMEnhancer,
-        LLMConfig,
-        FileData,
-        EnhancedSemanticIndexer,
-    )
-    from codexlens.storage.dir_index import DirIndexStore
-    from codexlens.search.hybrid_search import HybridSearchEngine
-except ImportError as e:
-    print(f"Error: Missing dependencies - {e}")
-    print("Install with: pip install codexlens[semantic]")
-    sys.exit(1)
-
-if not SEMANTIC_AVAILABLE:
-    print("Error: Semantic search dependencies not available")
-    sys.exit(1)
-
-
-# Test dataset with MISLEADING or MISSING comments
-MISLEADING_DATASET = {
-    "crypto/hasher.py": '''"""Simple string utilities."""
-import bcrypt
-
-def process_string(s: str, rounds: int = 12) -> str:
-    """Convert string to uppercase."""
-    salt = bcrypt.gensalt(rounds=rounds)
-    hashed = bcrypt.hashpw(s.encode('utf-8'), salt)
-    return hashed.decode('utf-8')
-
-def check_string(s: str, target: str) -> bool:
-    """Check if two strings are equal."""
-    return bcrypt.checkpw(s.encode('utf-8'), target.encode('utf-8'))
-''',
-
-    "auth/token.py": '''import jwt
-from datetime import datetime, timedelta
-
-SECRET_KEY = "key123"
-
-def make_thing(uid: int, exp: int = 3600) -> str:
-    payload = {
-        'user_id': uid,
-        'exp': datetime.utcnow() + timedelta(seconds=exp),
-        'iat': datetime.utcnow()
-    }
-    return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
-
-def parse_thing(thing: str) -> dict:
-    try:
-        return jwt.decode(thing, SECRET_KEY, algorithms=['HS256'])
-    except jwt.ExpiredSignatureError:
-        return None
-''',
-
-    "api/handlers.py": '''"""Database connection utilities."""
-from flask import Flask, request, jsonify
-
-app = Flask(__name__)
-
-@app.route('/api/items', methods=['POST'])
-def create_item():
-    """Delete an existing item."""
-    data = request.get_json()
-    if not data.get('email') or not data.get('password'):
-        return jsonify({'error': 'Missing data'}), 400
-    item_id = 12345
-    return jsonify({'item_id': item_id, 'success': True}), 201
-
-@app.route('/api/items/<int:item_id>', methods=['GET'])
-def get_item(item_id: int):
-    """Update item configuration."""
-    item = {
-        'id': item_id,
-        'email': 'user@example.com',
-        'name': 'John Doe'
-    }
-    return jsonify(item), 200
-''',
-
-    "utils/checker.py": '''"""Math calculation functions."""
-import re
-
-def calc_sum(email: str) -> bool:
-    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'
-    return bool(re.match(pattern, email))
-
-def format_text(text: str, max_len: int = 255) -> str:
-    text = re.sub(r'[<>"\\'&]', '', text)
-    return text.strip()[:max_len]
-''',
-
-    "db/pool.py": '''"""Email sending service."""
-import psycopg2
-from psycopg2 import pool
-from contextlib import contextmanager
-
-class EmailSender:
-    """SMTP email sender with retry logic."""
-
-    def __init__(self, min_conn: int = 1, max_conn: int = 10):
-        """Initialize email sender."""
-        self.pool = psycopg2.pool.SimpleConnectionPool(
-            min_conn, max_conn,
-            user='dbuser', host='localhost', database='myapp'
-        )
-
-    @contextmanager
-    def send_email(self):
-        """Send email message."""
-        conn = self.pool.getconn()
-        try:
-            yield conn
-            conn.commit()
-        finally:
-            self.pool.putconn(conn)
-''',
-}
-
-
-# Test queries - natural language based on ACTUAL functionality (not misleading comments)
-TEST_QUERIES = [
-    ("How to hash passwords securely with bcrypt?", "crypto/hasher.py"),
-    ("Generate JWT authentication token", "auth/token.py"),
-    ("Create user account REST API endpoint", "api/handlers.py"),
-    ("Validate email address format", "utils/checker.py"),
-    ("PostgreSQL database connection pool", "db/pool.py"),
-]
-
-
-def create_test_database(db_path: Path) -> None:
-    """Create and populate test database."""
-    store = DirIndexStore(db_path)
-    store.initialize()
-
-    with store._get_connection() as conn:
-        for path, content in MISLEADING_DATASET.items():
-            name = path.split('/')[-1]
-            conn.execute(
-                """INSERT INTO files (name, full_path, content, language, mtime)
-                   VALUES (?, ?, ?, ?, ?)""",
-                (name, path, content, "python", 0.0)
-            )
-        conn.commit()
-
-    store.close()
-
-
-def test_pure_vector_search(db_path: Path) -> Dict:
-    """Test pure vector search (relies on code + misleading comments)."""
-    print("\n" + "="*70)
-    print("PURE VECTOR SEARCH (Code + Misleading Comments -> fastembed)")
-    print("="*70)
-
-    start_time = time.time()
-
-    # Generate pure vector embeddings
-    embedder = Embedder(profile="code")
-    vector_store = VectorStore(db_path)
-    chunker = Chunker(config=ChunkConfig(max_chunk_size=2000))
-
-    with sqlite3.connect(db_path) as conn:
-        conn.row_factory = sqlite3.Row
-        rows = conn.execute("SELECT full_path, content FROM files").fetchall()
-
-    chunk_count = 0
-    for row in rows:
-        chunks = chunker.chunk_sliding_window(
-            row["content"],
-            file_path=row["full_path"],
-            language="python"
-        )
-        for chunk in chunks:
-            chunk.embedding = embedder.embed_single(chunk.content)
-            chunk.metadata["strategy"] = "pure_vector"
-        if chunks:
-            vector_store.add_chunks(chunks, row["full_path"])
-            chunk_count += len(chunks)
-
-    setup_time = time.time() - start_time
-    print(f"Setup: {len(rows)} files, {chunk_count} chunks in {setup_time:.1f}s")
-    print("Note: Embeddings include misleading comments")
-
-    # Test queries
-    engine = HybridSearchEngine()
-    results = {}
-
-    print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        search_results = engine.search(
-            db_path,
-            query,
-            limit=3,
-            enable_vector=True,
-            pure_vector=True,
-        )
-
-        top_file = search_results[0].path if search_results else "No results"
-        top_score = search_results[0].score if search_results else 0.0
-        found = expected_file in [r.path for r in search_results]
-        rank = None
-        if found:
-            for i, r in enumerate(search_results):
-                if r.path == expected_file:
-                    rank = i + 1
-                    break
-
-        status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        display_file = top_file.split('/')[-1] if '/' in top_file else top_file
-
-        print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
-
-        results[query] = {
-            "found": found,
-            "rank": rank,
-            "top_file": top_file,
-            "score": top_score,
-        }
-
-    return results
-
-
-def test_llm_enhanced_search(db_path: Path, llm_tool: str = "gemini") -> Dict:
-    """Test LLM-enhanced search (LLM reads code and generates accurate summary)."""
-    print("\n" + "="*70)
-    print(f"LLM-ENHANCED SEARCH (Code -> {llm_tool.upper()} Analysis -> fastembed)")
-    print("="*70)
-
-    # Check CCW availability
-    llm_config = LLMConfig(enabled=True, tool=llm_tool, batch_size=2)
-    enhancer = LLMEnhancer(llm_config)
-
-    if not enhancer.check_available():
-        print("[X] CCW CLI not available - skipping LLM-enhanced test")
-        print("  Install CCW: npm install -g ccw")
-        return {}
-
-    start_time = time.time()
-
-    # Generate LLM-enhanced embeddings
-    embedder = Embedder(profile="code")
-    vector_store = VectorStore(db_path)
-    indexer = EnhancedSemanticIndexer(enhancer, embedder, vector_store)
-
-    # Prepare file data
-    file_data_list = [
-        FileData(path=path, content=content, language="python")
-        for path, content in MISLEADING_DATASET.items()
-    ]
-
-    # Index with LLM enhancement
-    print(f"LLM analyzing code (ignoring misleading comments)...")
-    indexed = indexer.index_files(file_data_list)
-    setup_time = time.time() - start_time
-
-    print(f"Setup: {indexed}/{len(file_data_list)} files indexed in {setup_time:.1f}s")
-    print("Note: LLM generates summaries based on actual code logic")
-
-    # Test queries
-    engine = HybridSearchEngine()
-    results = {}
-
-    print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        search_results = engine.search(
-            db_path,
-            query,
-            limit=3,
-            enable_vector=True,
-            pure_vector=True,
-        )
-
-        top_file = search_results[0].path if search_results else "No results"
-        top_score = search_results[0].score if search_results else 0.0
-        found = expected_file in [r.path for r in search_results]
-        rank = None
-        if found:
-            for i, r in enumerate(search_results):
-                if r.path == expected_file:
-                    rank = i + 1
-                    break
-
-        status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        display_file = top_file.split('/')[-1] if '/' in top_file else top_file
-
-        print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
-
-        results[query] = {
-            "found": found,
-            "rank": rank,
-            "top_file": top_file,
-            "score": top_score,
-        }
-
-    return results
-
-
-def compare_results(pure_results: Dict, llm_results: Dict) -> None:
-    """Compare and analyze results from both approaches."""
-    print("\n" + "="*70)
-    print("COMPARISON SUMMARY - MISLEADING COMMENTS TEST")
-    print("="*70)
-
-    if not llm_results:
-        print("Cannot compare - LLM-enhanced test was skipped")
-        return
-
-    pure_score = 0
-    llm_score = 0
-
-    print(f"\n{'Query':<45} {'Pure':<10} {'LLM':<10}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        pure_res = pure_results.get(query, {})
-        llm_res = llm_results.get(query, {})
-
-        pure_status = f"[OK] Rank {pure_res.get('rank', '?')}" if pure_res.get('found') else "[X] Miss"
-        llm_status = f"[OK] Rank {llm_res.get('rank', '?')}" if llm_res.get('found') else "[X] Miss"
-
-        # Scoring: Rank 1 = 3 points, Rank 2 = 2 points, Rank 3 = 1 point
-        if pure_res.get('found') and pure_res.get('rank'):
-            pure_score += max(0, 4 - pure_res['rank'])
-        if llm_res.get('found') and llm_res.get('rank'):
-            llm_score += max(0, 4 - llm_res['rank'])
-
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        print(f"{display_query:<45} {pure_status:<10} {llm_status:<10}")
-
-    print("-" * 70)
-    print(f"{'TOTAL SCORE':<45} {pure_score:<10} {llm_score:<10}")
-    print("="*70)
-
-    # Analysis
-    print("\nANALYSIS:")
-    if llm_score > pure_score:
-        improvement = ((llm_score - pure_score) / max(pure_score, 1)) * 100
-        print(f"[OK] LLM enhancement improves results by {improvement:.1f}%")
-        print("  LLM understands actual code logic despite misleading comments")
-        print("  Pure vector search misled by incorrect documentation")
-    elif pure_score > llm_score:
-        degradation = ((pure_score - llm_score) / max(pure_score, 1)) * 100
-        print(f"[X] Pure vector performed {degradation:.1f}% better")
-        print("  Unexpected: Pure vector wasn't affected by misleading comments")
-    else:
-        print("= Both approaches performed equally")
-        print("  Test dataset may still be too simple to show differences")
-
-    print("\nKEY INSIGHTS:")
-    print("- Pure Vector: Embeds code + comments together, can be misled")
-    print("- LLM Enhanced: Analyzes actual code behavior, ignores bad comments")
-    print("- Best Use: LLM enhancement crucial for poorly documented codebases")
-
-    print("\nMISLEADING COMMENTS IN TEST:")
-    print("1. 'hasher.py' claims 'string utilities' but does bcrypt hashing")
-    print("2. 'token.py' has no docstrings, unclear function names")
-    print("3. 'handlers.py' says 'database utilities' but is REST API")
-    print("4. 'handlers.py' docstrings opposite (create says delete, etc)")
-    print("5. 'checker.py' claims 'math functions' but validates emails")
-    print("6. 'pool.py' claims 'email sender' but is database pool")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Test pure vector vs LLM-enhanced with misleading comments"
-    )
-    parser.add_argument(
-        "--tool",
-        choices=["gemini", "qwen"],
-        default="gemini",
-        help="LLM tool to use (default: gemini)"
-    )
-    parser.add_argument(
-        "--skip-llm",
-        action="store_true",
-        help="Skip LLM-enhanced test"
-    )
-    parser.add_argument(
-        "--keep-db",
-        type=str,
-        help="Save database to specified path for inspection (e.g., ./test_results.db)"
-    )
-    args = parser.parse_args()
-
-    print("\n" + "="*70)
-    print("MISLEADING COMMENTS TEST")
-    print("Pure Vector vs LLM-Enhanced with Incorrect Documentation")
-    print("="*70)
-
-    # Create test database
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = Path(f.name)
-
-    try:
-        print(f"\nTest dataset: {len(MISLEADING_DATASET)} Python files")
-        print(f"Test queries: {len(TEST_QUERIES)} natural language questions")
-        print("\nChallenges:")
-        print("- Misleading module docstrings")
-        print("- Incorrect function docstrings")
-        print("- Missing documentation")
-        print("- Unclear function names")
-
-        create_test_database(db_path)
-
-        # Test pure vector search
-        pure_results = test_pure_vector_search(db_path)
-
-        # Test LLM-enhanced search
-        if not args.skip_llm:
-            # Clear semantic_chunks table for LLM test
-            with sqlite3.connect(db_path) as conn:
-                conn.execute("DELETE FROM semantic_chunks")
-                conn.commit()
-
-            llm_results = test_llm_enhanced_search(db_path, args.tool)
-        else:
-            llm_results = {}
-            print("\n[X] LLM-enhanced test skipped (--skip-llm flag)")
-
-        # Compare results
-        compare_results(pure_results, llm_results)
-
-    finally:
-        # Save or cleanup database
-        if args.keep_db:
-            import shutil
-            save_path = Path(args.keep_db)
-            try:
-                import gc
-                gc.collect()
-                time.sleep(0.2)
-                shutil.copy2(db_path, save_path)
-                print(f"\n[OK] Database saved to: {save_path}")
-                print(f"Inspect with: python scripts/inspect_llm_summaries.py {save_path}")
-            except Exception as e:
-                print(f"\n[X] Failed to save database: {e}")
-            finally:
-                try:
-                    if db_path.exists():
-                        db_path.unlink()
-                except:
-                    pass
-        else:
-            # Cleanup
-            try:
-                import gc
-                gc.collect()
-                time.sleep(0.1)
-                if db_path.exists():
-                    db_path.unlink()
-            except PermissionError:
-                print(f"\nWarning: Could not delete temporary database: {db_path}")
-
-    print("\n" + "="*70)
-    print("Test completed!")
-    print("="*70)
-
-
-if __name__ == "__main__":
-    main()