mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
Remove LLM enhancement features and related components as per user request. This includes the deletion of source code files, CLI commands, front-end components, tests, scripts, and documentation associated with LLM functionality. Simplified dependencies and reduced complexity while retaining core vector search capabilities. Validation of changes confirmed successful removal and functionality.
This commit is contained in:
@@ -1,465 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Standalone script to compare pure vector vs LLM-enhanced semantic search.
|
||||
|
||||
Usage:
|
||||
python compare_search_methods.py [--tool gemini|qwen] [--skip-llm]
|
||||
|
||||
This script:
|
||||
1. Creates a test dataset with sample code
|
||||
2. Tests pure vector search (code → fastembed → search)
|
||||
3. Tests LLM-enhanced search (code → LLM summary → fastembed → search)
|
||||
4. Compares results across natural language queries
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sqlite3
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
# Check dependencies
|
||||
try:
|
||||
from codexlens.semantic import SEMANTIC_AVAILABLE
|
||||
from codexlens.semantic.embedder import Embedder
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
from codexlens.semantic.chunker import Chunker, ChunkConfig
|
||||
from codexlens.semantic.llm_enhancer import (
|
||||
LLMEnhancer,
|
||||
LLMConfig,
|
||||
FileData,
|
||||
EnhancedSemanticIndexer,
|
||||
)
|
||||
from codexlens.storage.dir_index import DirIndexStore
|
||||
from codexlens.search.hybrid_search import HybridSearchEngine
|
||||
except ImportError as e:
|
||||
print(f"Error: Missing dependencies - {e}")
|
||||
print("Install with: pip install codexlens[semantic]")
|
||||
sys.exit(1)
|
||||
|
||||
if not SEMANTIC_AVAILABLE:
|
||||
print("Error: Semantic search dependencies not available")
|
||||
print("Install with: pip install codexlens[semantic]")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Test dataset with realistic code samples
|
||||
TEST_DATASET = {
|
||||
"auth/password_hasher.py": '''"""Password hashing utilities using bcrypt."""
|
||||
import bcrypt
|
||||
|
||||
def hash_password(password: str, salt_rounds: int = 12) -> str:
|
||||
"""Hash a password using bcrypt with specified salt rounds."""
|
||||
salt = bcrypt.gensalt(rounds=salt_rounds)
|
||||
hashed = bcrypt.hashpw(password.encode('utf-8'), salt)
|
||||
return hashed.decode('utf-8')
|
||||
|
||||
def verify_password(password: str, hashed: str) -> bool:
|
||||
"""Verify a password against its hash."""
|
||||
return bcrypt.checkpw(password.encode('utf-8'), hashed.encode('utf-8'))
|
||||
''',
|
||||
|
||||
"auth/jwt_handler.py": '''"""JWT token generation and validation."""
|
||||
import jwt
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
SECRET_KEY = "your-secret-key"
|
||||
|
||||
def create_token(user_id: int, expires_in: int = 3600) -> str:
|
||||
"""Generate a JWT access token for user authentication."""
|
||||
payload = {
|
||||
'user_id': user_id,
|
||||
'exp': datetime.utcnow() + timedelta(seconds=expires_in),
|
||||
'iat': datetime.utcnow()
|
||||
}
|
||||
return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
|
||||
|
||||
def decode_token(token: str) -> dict:
|
||||
"""Validate and decode JWT token."""
|
||||
try:
|
||||
return jwt.decode(token, SECRET_KEY, algorithms=['HS256'])
|
||||
except jwt.ExpiredSignatureError:
|
||||
return None
|
||||
''',
|
||||
|
||||
"api/user_endpoints.py": '''"""REST API endpoints for user management."""
|
||||
from flask import Flask, request, jsonify
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/api/users', methods=['POST'])
|
||||
def create_user():
|
||||
"""Create a new user account with email and password."""
|
||||
data = request.get_json()
|
||||
if not data.get('email') or not data.get('password'):
|
||||
return jsonify({'error': 'Email and password required'}), 400
|
||||
user_id = 12345 # Database insert
|
||||
return jsonify({'user_id': user_id, 'success': True}), 201
|
||||
|
||||
@app.route('/api/users/<int:user_id>', methods=['GET'])
|
||||
def get_user(user_id: int):
|
||||
"""Retrieve user profile information by user ID."""
|
||||
user = {
|
||||
'id': user_id,
|
||||
'email': 'user@example.com',
|
||||
'name': 'John Doe'
|
||||
}
|
||||
return jsonify(user), 200
|
||||
''',
|
||||
|
||||
"utils/validation.py": '''"""Input validation utilities."""
|
||||
import re
|
||||
|
||||
def validate_email(email: str) -> bool:
|
||||
"""Check if email address format is valid using regex."""
|
||||
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
||||
return bool(re.match(pattern, email))
|
||||
|
||||
def sanitize_input(text: str, max_length: int = 255) -> str:
|
||||
"""Clean user input by removing special characters."""
|
||||
text = re.sub(r'[<>\"\'&]', '', text)
|
||||
return text.strip()[:max_length]
|
||||
|
||||
def validate_password_strength(password: str) -> tuple:
|
||||
"""Validate password meets security requirements."""
|
||||
if len(password) < 8:
|
||||
return False, "Password must be at least 8 characters"
|
||||
if not re.search(r'[A-Z]', password):
|
||||
return False, "Must contain uppercase letter"
|
||||
return True, None
|
||||
''',
|
||||
|
||||
"database/connection.py": '''"""Database connection pooling."""
|
||||
import psycopg2
|
||||
from psycopg2 import pool
|
||||
from contextlib import contextmanager
|
||||
|
||||
class DatabasePool:
|
||||
"""PostgreSQL connection pool manager."""
|
||||
|
||||
def __init__(self, min_conn: int = 1, max_conn: int = 10):
|
||||
"""Initialize database connection pool."""
|
||||
self.pool = psycopg2.pool.SimpleConnectionPool(
|
||||
min_conn, max_conn,
|
||||
user='dbuser', host='localhost', database='myapp'
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def get_connection(self):
|
||||
"""Get a connection from pool as context manager."""
|
||||
conn = self.pool.getconn()
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
finally:
|
||||
self.pool.putconn(conn)
|
||||
''',
|
||||
}
|
||||
|
||||
|
||||
# Natural language test queries
|
||||
TEST_QUERIES = [
|
||||
("How do I securely hash passwords?", "auth/password_hasher.py"),
|
||||
("Generate JWT token for authentication", "auth/jwt_handler.py"),
|
||||
("Create new user account via API", "api/user_endpoints.py"),
|
||||
("Validate email address format", "utils/validation.py"),
|
||||
("Connect to PostgreSQL database", "database/connection.py"),
|
||||
]
|
||||
|
||||
|
||||
def create_test_database(db_path: Path) -> None:
|
||||
"""Create and populate test database."""
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
with store._get_connection() as conn:
|
||||
for path, content in TEST_DATASET.items():
|
||||
name = path.split('/')[-1]
|
||||
conn.execute(
|
||||
"""INSERT INTO files (name, full_path, content, language, mtime)
|
||||
VALUES (?, ?, ?, ?, ?)""",
|
||||
(name, path, content, "python", 0.0)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
store.close()
|
||||
|
||||
|
||||
def test_pure_vector_search(db_path: Path) -> Dict:
|
||||
"""Test pure vector search (raw code embeddings)."""
|
||||
print("\n" + "="*70)
|
||||
print("PURE VECTOR SEARCH (Code → fastembed)")
|
||||
print("="*70)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Generate pure vector embeddings
|
||||
embedder = Embedder(profile="code")
|
||||
vector_store = VectorStore(db_path)
|
||||
chunker = Chunker(config=ChunkConfig(max_chunk_size=2000))
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
rows = conn.execute("SELECT full_path, content FROM files").fetchall()
|
||||
|
||||
chunk_count = 0
|
||||
for row in rows:
|
||||
chunks = chunker.chunk_sliding_window(
|
||||
row["content"],
|
||||
file_path=row["full_path"],
|
||||
language="python"
|
||||
)
|
||||
for chunk in chunks:
|
||||
chunk.embedding = embedder.embed_single(chunk.content)
|
||||
chunk.metadata["strategy"] = "pure_vector"
|
||||
if chunks:
|
||||
vector_store.add_chunks(chunks, row["full_path"])
|
||||
chunk_count += len(chunks)
|
||||
|
||||
setup_time = time.time() - start_time
|
||||
print(f"Setup: {len(rows)} files, {chunk_count} chunks in {setup_time:.1f}s")
|
||||
|
||||
# Test queries
|
||||
engine = HybridSearchEngine()
|
||||
results = {}
|
||||
|
||||
print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
|
||||
print("-" * 70)
|
||||
|
||||
for query, expected_file in TEST_QUERIES:
|
||||
search_results = engine.search(
|
||||
db_path,
|
||||
query,
|
||||
limit=3,
|
||||
enable_vector=True,
|
||||
pure_vector=True,
|
||||
)
|
||||
|
||||
top_file = search_results[0].path if search_results else "No results"
|
||||
top_score = search_results[0].score if search_results else 0.0
|
||||
found = expected_file in [r.path for r in search_results]
|
||||
rank = None
|
||||
if found:
|
||||
for i, r in enumerate(search_results):
|
||||
if r.path == expected_file:
|
||||
rank = i + 1
|
||||
break
|
||||
|
||||
status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
|
||||
display_query = query[:42] + "..." if len(query) > 45 else query
|
||||
display_file = top_file.split('/')[-1] if '/' in top_file else top_file
|
||||
|
||||
print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
|
||||
|
||||
results[query] = {
|
||||
"found": found,
|
||||
"rank": rank,
|
||||
"top_file": top_file,
|
||||
"score": top_score,
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_llm_enhanced_search(db_path: Path, llm_tool: str = "gemini") -> Dict:
|
||||
"""Test LLM-enhanced search (LLM summaries → fastembed)."""
|
||||
print("\n" + "="*70)
|
||||
print(f"LLM-ENHANCED SEARCH (Code → {llm_tool.upper()} → fastembed)")
|
||||
print("="*70)
|
||||
|
||||
# Check CCW availability
|
||||
llm_config = LLMConfig(enabled=True, tool=llm_tool, batch_size=2)
|
||||
enhancer = LLMEnhancer(llm_config)
|
||||
|
||||
if not enhancer.check_available():
|
||||
print("[X] CCW CLI not available - skipping LLM-enhanced test")
|
||||
print(" Install CCW: npm install -g ccw")
|
||||
return {}
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Generate LLM-enhanced embeddings
|
||||
embedder = Embedder(profile="code")
|
||||
vector_store = VectorStore(db_path)
|
||||
indexer = EnhancedSemanticIndexer(enhancer, embedder, vector_store)
|
||||
|
||||
# Prepare file data
|
||||
file_data_list = [
|
||||
FileData(path=path, content=content, language="python")
|
||||
for path, content in TEST_DATASET.items()
|
||||
]
|
||||
|
||||
# Index with LLM enhancement
|
||||
print(f"Generating LLM summaries for {len(file_data_list)} files...")
|
||||
indexed = indexer.index_files(file_data_list)
|
||||
setup_time = time.time() - start_time
|
||||
|
||||
print(f"Setup: {indexed}/{len(file_data_list)} files indexed in {setup_time:.1f}s")
|
||||
|
||||
# Test queries
|
||||
engine = HybridSearchEngine()
|
||||
results = {}
|
||||
|
||||
print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
|
||||
print("-" * 70)
|
||||
|
||||
for query, expected_file in TEST_QUERIES:
|
||||
search_results = engine.search(
|
||||
db_path,
|
||||
query,
|
||||
limit=3,
|
||||
enable_vector=True,
|
||||
pure_vector=True,
|
||||
)
|
||||
|
||||
top_file = search_results[0].path if search_results else "No results"
|
||||
top_score = search_results[0].score if search_results else 0.0
|
||||
found = expected_file in [r.path for r in search_results]
|
||||
rank = None
|
||||
if found:
|
||||
for i, r in enumerate(search_results):
|
||||
if r.path == expected_file:
|
||||
rank = i + 1
|
||||
break
|
||||
|
||||
status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
|
||||
display_query = query[:42] + "..." if len(query) > 45 else query
|
||||
display_file = top_file.split('/')[-1] if '/' in top_file else top_file
|
||||
|
||||
print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
|
||||
|
||||
results[query] = {
|
||||
"found": found,
|
||||
"rank": rank,
|
||||
"top_file": top_file,
|
||||
"score": top_score,
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def compare_results(pure_results: Dict, llm_results: Dict) -> None:
|
||||
"""Compare and analyze results from both approaches."""
|
||||
print("\n" + "="*70)
|
||||
print("COMPARISON SUMMARY")
|
||||
print("="*70)
|
||||
|
||||
if not llm_results:
|
||||
print("Cannot compare - LLM-enhanced test was skipped")
|
||||
return
|
||||
|
||||
pure_score = 0
|
||||
llm_score = 0
|
||||
|
||||
print(f"\n{'Query':<45} {'Pure':<10} {'LLM':<10}")
|
||||
print("-" * 70)
|
||||
|
||||
for query, expected_file in TEST_QUERIES:
|
||||
pure_res = pure_results.get(query, {})
|
||||
llm_res = llm_results.get(query, {})
|
||||
|
||||
pure_status = f"[OK] Rank {pure_res.get('rank', '?')}" if pure_res.get('found') else "[X] Miss"
|
||||
llm_status = f"[OK] Rank {llm_res.get('rank', '?')}" if llm_res.get('found') else "[X] Miss"
|
||||
|
||||
# Scoring: Rank 1 = 3 points, Rank 2 = 2 points, Rank 3 = 1 point
|
||||
if pure_res.get('found') and pure_res.get('rank'):
|
||||
pure_score += max(0, 4 - pure_res['rank'])
|
||||
if llm_res.get('found') and llm_res.get('rank'):
|
||||
llm_score += max(0, 4 - llm_res['rank'])
|
||||
|
||||
display_query = query[:42] + "..." if len(query) > 45 else query
|
||||
print(f"{display_query:<45} {pure_status:<10} {llm_status:<10}")
|
||||
|
||||
print("-" * 70)
|
||||
print(f"{'TOTAL SCORE':<45} {pure_score:<10} {llm_score:<10}")
|
||||
print("="*70)
|
||||
|
||||
# Analysis
|
||||
print("\nANALYSIS:")
|
||||
if llm_score > pure_score:
|
||||
improvement = ((llm_score - pure_score) / max(pure_score, 1)) * 100
|
||||
print(f"[OK] LLM enhancement improves results by {improvement:.1f}%")
|
||||
print(" Natural language summaries match queries better than raw code")
|
||||
elif pure_score > llm_score:
|
||||
degradation = ((pure_score - llm_score) / max(pure_score, 1)) * 100
|
||||
print(f"[X] Pure vector performed {degradation:.1f}% better")
|
||||
print(" LLM summaries may be too generic or missing key details")
|
||||
else:
|
||||
print("= Both approaches performed equally on this test set")
|
||||
|
||||
print("\nKEY FINDINGS:")
|
||||
print("- Pure Vector: Direct code embeddings, fast but may miss semantic intent")
|
||||
print("- LLM Enhanced: Natural language summaries, better for human-like queries")
|
||||
print("- Best Use: Combine both - LLM for natural language, vector for code patterns")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compare pure vector vs LLM-enhanced semantic search"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tool",
|
||||
choices=["gemini", "qwen"],
|
||||
default="gemini",
|
||||
help="LLM tool to use for enhancement (default: gemini)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-llm",
|
||||
action="store_true",
|
||||
help="Skip LLM-enhanced test (only run pure vector)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("SEMANTIC SEARCH COMPARISON TEST")
|
||||
print("Pure Vector vs LLM-Enhanced Vector Search")
|
||||
print("="*70)
|
||||
|
||||
# Create test database
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
||||
db_path = Path(f.name)
|
||||
|
||||
try:
|
||||
print(f"\nTest dataset: {len(TEST_DATASET)} Python files")
|
||||
print(f"Test queries: {len(TEST_QUERIES)} natural language questions")
|
||||
|
||||
create_test_database(db_path)
|
||||
|
||||
# Test pure vector search
|
||||
pure_results = test_pure_vector_search(db_path)
|
||||
|
||||
# Test LLM-enhanced search
|
||||
if not args.skip_llm:
|
||||
# Clear semantic_chunks table for LLM test
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.execute("DELETE FROM semantic_chunks")
|
||||
conn.commit()
|
||||
|
||||
llm_results = test_llm_enhanced_search(db_path, args.tool)
|
||||
else:
|
||||
llm_results = {}
|
||||
print("\n[X] LLM-enhanced test skipped (--skip-llm flag)")
|
||||
|
||||
# Compare results
|
||||
compare_results(pure_results, llm_results)
|
||||
|
||||
finally:
|
||||
# Cleanup - ensure all connections are closed
|
||||
try:
|
||||
import gc
|
||||
gc.collect() # Force garbage collection to close any lingering connections
|
||||
time.sleep(0.1) # Small delay for Windows to release file handle
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
except PermissionError:
|
||||
print(f"\nWarning: Could not delete temporary database: {db_path}")
|
||||
print("It will be cleaned up on next system restart.")
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("Test completed successfully!")
|
||||
print("="*70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,88 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Inspect LLM-generated summaries in semantic_chunks table."""
|
||||
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def inspect_summaries(db_path: Path):
|
||||
"""Show LLM-generated summaries from database."""
|
||||
if not db_path.exists():
|
||||
print(f"Error: Database not found: {db_path}")
|
||||
return
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Check if semantic_chunks table exists
|
||||
cursor = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
|
||||
)
|
||||
if not cursor.fetchone():
|
||||
print("No semantic_chunks table found")
|
||||
return
|
||||
|
||||
# Get all chunks with metadata
|
||||
cursor = conn.execute("""
|
||||
SELECT file_path, chunk_index, content,
|
||||
json_extract(metadata, '$.llm_summary') as summary,
|
||||
json_extract(metadata, '$.llm_keywords') as keywords,
|
||||
json_extract(metadata, '$.llm_purpose') as purpose,
|
||||
json_extract(metadata, '$.strategy') as strategy
|
||||
FROM semantic_chunks
|
||||
ORDER BY file_path, chunk_index
|
||||
""")
|
||||
|
||||
chunks = cursor.fetchall()
|
||||
|
||||
if not chunks:
|
||||
print("No chunks found in database")
|
||||
return
|
||||
|
||||
print("="*80)
|
||||
print("LLM-GENERATED SUMMARIES INSPECTION")
|
||||
print("="*80)
|
||||
|
||||
current_file = None
|
||||
for chunk in chunks:
|
||||
file_path = chunk['file_path']
|
||||
|
||||
if file_path != current_file:
|
||||
print(f"\n{'='*80}")
|
||||
print(f"FILE: {file_path}")
|
||||
print(f"{'='*80}")
|
||||
current_file = file_path
|
||||
|
||||
print(f"\n[Chunk {chunk['chunk_index']}]")
|
||||
print(f"Strategy: {chunk['strategy']}")
|
||||
|
||||
if chunk['summary']:
|
||||
print(f"\nLLM Summary:")
|
||||
print(f" {chunk['summary']}")
|
||||
|
||||
if chunk['keywords']:
|
||||
print(f"\nKeywords:")
|
||||
print(f" {chunk['keywords']}")
|
||||
|
||||
if chunk['purpose']:
|
||||
print(f"\nPurpose:")
|
||||
print(f" {chunk['purpose']}")
|
||||
|
||||
# Show first 200 chars of content
|
||||
content = chunk['content']
|
||||
if len(content) > 200:
|
||||
content = content[:200] + "..."
|
||||
print(f"\nOriginal Content (first 200 chars):")
|
||||
print(f" {content}")
|
||||
print("-" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python inspect_llm_summaries.py <path_to_index.db>")
|
||||
print("\nExample:")
|
||||
print(" python inspect_llm_summaries.py ~/.codexlens/indexes/myproject/_index.db")
|
||||
sys.exit(1)
|
||||
|
||||
db_path = Path(sys.argv[1])
|
||||
inspect_summaries(db_path)
|
||||
@@ -1,112 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Directly show LLM analysis of test code."""
|
||||
|
||||
from codexlens.semantic.llm_enhancer import LLMEnhancer, LLMConfig, FileData
|
||||
|
||||
# Misleading code example
|
||||
TEST_CODE = '''"""Email sending service."""
|
||||
import psycopg2
|
||||
from psycopg2 import pool
|
||||
from contextlib import contextmanager
|
||||
|
||||
class EmailSender:
|
||||
"""SMTP email sender with retry logic."""
|
||||
|
||||
def __init__(self, min_conn: int = 1, max_conn: int = 10):
|
||||
"""Initialize email sender."""
|
||||
self.pool = psycopg2.pool.SimpleConnectionPool(
|
||||
min_conn, max_conn,
|
||||
user='dbuser', host='localhost', database='myapp'
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def send_email(self):
|
||||
"""Send email message."""
|
||||
conn = self.pool.getconn()
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
finally:
|
||||
self.pool.putconn(conn)
|
||||
'''
|
||||
|
||||
print("="*80)
|
||||
print("LLM ANALYSIS OF MISLEADING CODE")
|
||||
print("="*80)
|
||||
|
||||
print("\n[Original Code with Misleading Comments]")
|
||||
print("-"*80)
|
||||
print(TEST_CODE)
|
||||
print("-"*80)
|
||||
|
||||
print("\n[Actual Functionality]")
|
||||
print(" - Imports: psycopg2 (PostgreSQL library)")
|
||||
print(" - Class: EmailSender (but name is misleading!)")
|
||||
print(" - Actually: Creates PostgreSQL connection pool")
|
||||
print(" - Methods: send_email (actually gets DB connection)")
|
||||
|
||||
print("\n[Misleading Documentation]")
|
||||
print(" - Module docstring: 'Email sending service' (WRONG)")
|
||||
print(" - Class docstring: 'SMTP email sender' (WRONG)")
|
||||
print(" - Method docstring: 'Send email message' (WRONG)")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("TESTING LLM UNDERSTANDING")
|
||||
print("="*80)
|
||||
|
||||
# Test LLM analysis
|
||||
config = LLMConfig(enabled=True, tool="gemini", batch_size=1)
|
||||
enhancer = LLMEnhancer(config)
|
||||
|
||||
if not enhancer.check_available():
|
||||
print("\n[X] CCW CLI not available")
|
||||
print("Install: npm install -g ccw")
|
||||
exit(1)
|
||||
|
||||
print("\n[Calling Gemini to analyze code...]")
|
||||
file_data = FileData(path="db/pool.py", content=TEST_CODE, language="python")
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = enhancer.enhance_files([file_data], Path(tmpdir))
|
||||
|
||||
if "db/pool.py" in result:
|
||||
metadata = result["db/pool.py"]
|
||||
|
||||
print("\n[LLM-Generated Summary]")
|
||||
print("-"*80)
|
||||
print(f"Summary: {metadata.summary}")
|
||||
print(f"\nPurpose: {metadata.purpose}")
|
||||
print(f"\nKeywords: {', '.join(metadata.keywords)}")
|
||||
print("-"*80)
|
||||
|
||||
print("\n[Analysis]")
|
||||
# Check if LLM identified the real functionality
|
||||
summary_lower = metadata.summary.lower()
|
||||
keywords_lower = [k.lower() for k in metadata.keywords]
|
||||
|
||||
correct_terms = ['database', 'postgresql', 'connection', 'pool', 'psycopg']
|
||||
misleading_terms = ['email', 'smtp', 'send']
|
||||
|
||||
found_correct = sum(1 for term in correct_terms
|
||||
if term in summary_lower or any(term in k for k in keywords_lower))
|
||||
found_misleading = sum(1 for term in misleading_terms
|
||||
if term in summary_lower or any(term in k for k in keywords_lower))
|
||||
|
||||
print(f"Correct terms found: {found_correct}/{len(correct_terms)}")
|
||||
print(f"Misleading terms found: {found_misleading}/{len(misleading_terms)}")
|
||||
|
||||
if found_correct > found_misleading:
|
||||
print("\n[OK] LLM correctly identified actual functionality!")
|
||||
print(" LLM ignored misleading comments and analyzed code behavior")
|
||||
elif found_misleading > found_correct:
|
||||
print("\n[X] LLM was misled by incorrect comments")
|
||||
print(" LLM trusted documentation over code analysis")
|
||||
else:
|
||||
print("\n[~] Mixed results - LLM found both correct and misleading terms")
|
||||
else:
|
||||
print("\n[X] LLM analysis failed - no results returned")
|
||||
|
||||
print("\n" + "="*80)
|
||||
@@ -1,491 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test pure vector vs LLM-enhanced search with misleading/missing comments.
|
||||
|
||||
This test demonstrates how LLM enhancement can overcome:
|
||||
1. Missing comments/docstrings
|
||||
2. Misleading or incorrect comments
|
||||
3. Outdated documentation
|
||||
|
||||
Usage:
|
||||
python test_misleading_comments.py --tool gemini
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sqlite3
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
# Check dependencies
|
||||
try:
|
||||
from codexlens.semantic import SEMANTIC_AVAILABLE
|
||||
from codexlens.semantic.embedder import Embedder
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
from codexlens.semantic.chunker import Chunker, ChunkConfig
|
||||
from codexlens.semantic.llm_enhancer import (
|
||||
LLMEnhancer,
|
||||
LLMConfig,
|
||||
FileData,
|
||||
EnhancedSemanticIndexer,
|
||||
)
|
||||
from codexlens.storage.dir_index import DirIndexStore
|
||||
from codexlens.search.hybrid_search import HybridSearchEngine
|
||||
except ImportError as e:
|
||||
print(f"Error: Missing dependencies - {e}")
|
||||
print("Install with: pip install codexlens[semantic]")
|
||||
sys.exit(1)
|
||||
|
||||
if not SEMANTIC_AVAILABLE:
|
||||
print("Error: Semantic search dependencies not available")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Test dataset with MISLEADING or MISSING comments
|
||||
MISLEADING_DATASET = {
|
||||
"crypto/hasher.py": '''"""Simple string utilities."""
|
||||
import bcrypt
|
||||
|
||||
def process_string(s: str, rounds: int = 12) -> str:
|
||||
"""Convert string to uppercase."""
|
||||
salt = bcrypt.gensalt(rounds=rounds)
|
||||
hashed = bcrypt.hashpw(s.encode('utf-8'), salt)
|
||||
return hashed.decode('utf-8')
|
||||
|
||||
def check_string(s: str, target: str) -> bool:
|
||||
"""Check if two strings are equal."""
|
||||
return bcrypt.checkpw(s.encode('utf-8'), target.encode('utf-8'))
|
||||
''',
|
||||
|
||||
"auth/token.py": '''import jwt
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
SECRET_KEY = "key123"
|
||||
|
||||
def make_thing(uid: int, exp: int = 3600) -> str:
|
||||
payload = {
|
||||
'user_id': uid,
|
||||
'exp': datetime.utcnow() + timedelta(seconds=exp),
|
||||
'iat': datetime.utcnow()
|
||||
}
|
||||
return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
|
||||
|
||||
def parse_thing(thing: str) -> dict:
|
||||
try:
|
||||
return jwt.decode(thing, SECRET_KEY, algorithms=['HS256'])
|
||||
except jwt.ExpiredSignatureError:
|
||||
return None
|
||||
''',
|
||||
|
||||
"api/handlers.py": '''"""Database connection utilities."""
|
||||
from flask import Flask, request, jsonify
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/api/items', methods=['POST'])
|
||||
def create_item():
|
||||
"""Delete an existing item."""
|
||||
data = request.get_json()
|
||||
if not data.get('email') or not data.get('password'):
|
||||
return jsonify({'error': 'Missing data'}), 400
|
||||
item_id = 12345
|
||||
return jsonify({'item_id': item_id, 'success': True}), 201
|
||||
|
||||
@app.route('/api/items/<int:item_id>', methods=['GET'])
|
||||
def get_item(item_id: int):
|
||||
"""Update item configuration."""
|
||||
item = {
|
||||
'id': item_id,
|
||||
'email': 'user@example.com',
|
||||
'name': 'John Doe'
|
||||
}
|
||||
return jsonify(item), 200
|
||||
''',
|
||||
|
||||
"utils/checker.py": '''"""Math calculation functions."""
|
||||
import re
|
||||
|
||||
def calc_sum(email: str) -> bool:
|
||||
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'
|
||||
return bool(re.match(pattern, email))
|
||||
|
||||
def format_text(text: str, max_len: int = 255) -> str:
|
||||
text = re.sub(r'[<>"\\'&]', '', text)
|
||||
return text.strip()[:max_len]
|
||||
''',
|
||||
|
||||
"db/pool.py": '''"""Email sending service."""
|
||||
import psycopg2
|
||||
from psycopg2 import pool
|
||||
from contextlib import contextmanager
|
||||
|
||||
class EmailSender:
|
||||
"""SMTP email sender with retry logic."""
|
||||
|
||||
def __init__(self, min_conn: int = 1, max_conn: int = 10):
|
||||
"""Initialize email sender."""
|
||||
self.pool = psycopg2.pool.SimpleConnectionPool(
|
||||
min_conn, max_conn,
|
||||
user='dbuser', host='localhost', database='myapp'
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def send_email(self):
|
||||
"""Send email message."""
|
||||
conn = self.pool.getconn()
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
finally:
|
||||
self.pool.putconn(conn)
|
||||
''',
|
||||
}
|
||||
|
||||
|
||||
# Test queries - natural language based on ACTUAL functionality (not misleading comments)
|
||||
TEST_QUERIES = [
|
||||
("How to hash passwords securely with bcrypt?", "crypto/hasher.py"),
|
||||
("Generate JWT authentication token", "auth/token.py"),
|
||||
("Create user account REST API endpoint", "api/handlers.py"),
|
||||
("Validate email address format", "utils/checker.py"),
|
||||
("PostgreSQL database connection pool", "db/pool.py"),
|
||||
]
|
||||
|
||||
|
||||
def create_test_database(db_path: Path) -> None:
|
||||
"""Create and populate test database."""
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
with store._get_connection() as conn:
|
||||
for path, content in MISLEADING_DATASET.items():
|
||||
name = path.split('/')[-1]
|
||||
conn.execute(
|
||||
"""INSERT INTO files (name, full_path, content, language, mtime)
|
||||
VALUES (?, ?, ?, ?, ?)""",
|
||||
(name, path, content, "python", 0.0)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
store.close()
|
||||
|
||||
|
||||
def test_pure_vector_search(db_path: Path) -> Dict:
|
||||
"""Test pure vector search (relies on code + misleading comments)."""
|
||||
print("\n" + "="*70)
|
||||
print("PURE VECTOR SEARCH (Code + Misleading Comments -> fastembed)")
|
||||
print("="*70)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Generate pure vector embeddings
|
||||
embedder = Embedder(profile="code")
|
||||
vector_store = VectorStore(db_path)
|
||||
chunker = Chunker(config=ChunkConfig(max_chunk_size=2000))
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
rows = conn.execute("SELECT full_path, content FROM files").fetchall()
|
||||
|
||||
chunk_count = 0
|
||||
for row in rows:
|
||||
chunks = chunker.chunk_sliding_window(
|
||||
row["content"],
|
||||
file_path=row["full_path"],
|
||||
language="python"
|
||||
)
|
||||
for chunk in chunks:
|
||||
chunk.embedding = embedder.embed_single(chunk.content)
|
||||
chunk.metadata["strategy"] = "pure_vector"
|
||||
if chunks:
|
||||
vector_store.add_chunks(chunks, row["full_path"])
|
||||
chunk_count += len(chunks)
|
||||
|
||||
setup_time = time.time() - start_time
|
||||
print(f"Setup: {len(rows)} files, {chunk_count} chunks in {setup_time:.1f}s")
|
||||
print("Note: Embeddings include misleading comments")
|
||||
|
||||
# Test queries
|
||||
engine = HybridSearchEngine()
|
||||
results = {}
|
||||
|
||||
print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
|
||||
print("-" * 70)
|
||||
|
||||
for query, expected_file in TEST_QUERIES:
|
||||
search_results = engine.search(
|
||||
db_path,
|
||||
query,
|
||||
limit=3,
|
||||
enable_vector=True,
|
||||
pure_vector=True,
|
||||
)
|
||||
|
||||
top_file = search_results[0].path if search_results else "No results"
|
||||
top_score = search_results[0].score if search_results else 0.0
|
||||
found = expected_file in [r.path for r in search_results]
|
||||
rank = None
|
||||
if found:
|
||||
for i, r in enumerate(search_results):
|
||||
if r.path == expected_file:
|
||||
rank = i + 1
|
||||
break
|
||||
|
||||
status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
|
||||
display_query = query[:42] + "..." if len(query) > 45 else query
|
||||
display_file = top_file.split('/')[-1] if '/' in top_file else top_file
|
||||
|
||||
print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
|
||||
|
||||
results[query] = {
|
||||
"found": found,
|
||||
"rank": rank,
|
||||
"top_file": top_file,
|
||||
"score": top_score,
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_llm_enhanced_search(db_path: Path, llm_tool: str = "gemini") -> Dict:
|
||||
"""Test LLM-enhanced search (LLM reads code and generates accurate summary)."""
|
||||
print("\n" + "="*70)
|
||||
print(f"LLM-ENHANCED SEARCH (Code -> {llm_tool.upper()} Analysis -> fastembed)")
|
||||
print("="*70)
|
||||
|
||||
# Check CCW availability
|
||||
llm_config = LLMConfig(enabled=True, tool=llm_tool, batch_size=2)
|
||||
enhancer = LLMEnhancer(llm_config)
|
||||
|
||||
if not enhancer.check_available():
|
||||
print("[X] CCW CLI not available - skipping LLM-enhanced test")
|
||||
print(" Install CCW: npm install -g ccw")
|
||||
return {}
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Generate LLM-enhanced embeddings
|
||||
embedder = Embedder(profile="code")
|
||||
vector_store = VectorStore(db_path)
|
||||
indexer = EnhancedSemanticIndexer(enhancer, embedder, vector_store)
|
||||
|
||||
# Prepare file data
|
||||
file_data_list = [
|
||||
FileData(path=path, content=content, language="python")
|
||||
for path, content in MISLEADING_DATASET.items()
|
||||
]
|
||||
|
||||
# Index with LLM enhancement
|
||||
print(f"LLM analyzing code (ignoring misleading comments)...")
|
||||
indexed = indexer.index_files(file_data_list)
|
||||
setup_time = time.time() - start_time
|
||||
|
||||
print(f"Setup: {indexed}/{len(file_data_list)} files indexed in {setup_time:.1f}s")
|
||||
print("Note: LLM generates summaries based on actual code logic")
|
||||
|
||||
# Test queries
|
||||
engine = HybridSearchEngine()
|
||||
results = {}
|
||||
|
||||
print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
|
||||
print("-" * 70)
|
||||
|
||||
for query, expected_file in TEST_QUERIES:
|
||||
search_results = engine.search(
|
||||
db_path,
|
||||
query,
|
||||
limit=3,
|
||||
enable_vector=True,
|
||||
pure_vector=True,
|
||||
)
|
||||
|
||||
top_file = search_results[0].path if search_results else "No results"
|
||||
top_score = search_results[0].score if search_results else 0.0
|
||||
found = expected_file in [r.path for r in search_results]
|
||||
rank = None
|
||||
if found:
|
||||
for i, r in enumerate(search_results):
|
||||
if r.path == expected_file:
|
||||
rank = i + 1
|
||||
break
|
||||
|
||||
status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
|
||||
display_query = query[:42] + "..." if len(query) > 45 else query
|
||||
display_file = top_file.split('/')[-1] if '/' in top_file else top_file
|
||||
|
||||
print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
|
||||
|
||||
results[query] = {
|
||||
"found": found,
|
||||
"rank": rank,
|
||||
"top_file": top_file,
|
||||
"score": top_score,
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def compare_results(pure_results: Dict, llm_results: Dict) -> None:
|
||||
"""Compare and analyze results from both approaches."""
|
||||
print("\n" + "="*70)
|
||||
print("COMPARISON SUMMARY - MISLEADING COMMENTS TEST")
|
||||
print("="*70)
|
||||
|
||||
if not llm_results:
|
||||
print("Cannot compare - LLM-enhanced test was skipped")
|
||||
return
|
||||
|
||||
pure_score = 0
|
||||
llm_score = 0
|
||||
|
||||
print(f"\n{'Query':<45} {'Pure':<10} {'LLM':<10}")
|
||||
print("-" * 70)
|
||||
|
||||
for query, expected_file in TEST_QUERIES:
|
||||
pure_res = pure_results.get(query, {})
|
||||
llm_res = llm_results.get(query, {})
|
||||
|
||||
pure_status = f"[OK] Rank {pure_res.get('rank', '?')}" if pure_res.get('found') else "[X] Miss"
|
||||
llm_status = f"[OK] Rank {llm_res.get('rank', '?')}" if llm_res.get('found') else "[X] Miss"
|
||||
|
||||
# Scoring: Rank 1 = 3 points, Rank 2 = 2 points, Rank 3 = 1 point
|
||||
if pure_res.get('found') and pure_res.get('rank'):
|
||||
pure_score += max(0, 4 - pure_res['rank'])
|
||||
if llm_res.get('found') and llm_res.get('rank'):
|
||||
llm_score += max(0, 4 - llm_res['rank'])
|
||||
|
||||
display_query = query[:42] + "..." if len(query) > 45 else query
|
||||
print(f"{display_query:<45} {pure_status:<10} {llm_status:<10}")
|
||||
|
||||
print("-" * 70)
|
||||
print(f"{'TOTAL SCORE':<45} {pure_score:<10} {llm_score:<10}")
|
||||
print("="*70)
|
||||
|
||||
# Analysis
|
||||
print("\nANALYSIS:")
|
||||
if llm_score > pure_score:
|
||||
improvement = ((llm_score - pure_score) / max(pure_score, 1)) * 100
|
||||
print(f"[OK] LLM enhancement improves results by {improvement:.1f}%")
|
||||
print(" LLM understands actual code logic despite misleading comments")
|
||||
print(" Pure vector search misled by incorrect documentation")
|
||||
elif pure_score > llm_score:
|
||||
degradation = ((pure_score - llm_score) / max(pure_score, 1)) * 100
|
||||
print(f"[X] Pure vector performed {degradation:.1f}% better")
|
||||
print(" Unexpected: Pure vector wasn't affected by misleading comments")
|
||||
else:
|
||||
print("= Both approaches performed equally")
|
||||
print(" Test dataset may still be too simple to show differences")
|
||||
|
||||
print("\nKEY INSIGHTS:")
|
||||
print("- Pure Vector: Embeds code + comments together, can be misled")
|
||||
print("- LLM Enhanced: Analyzes actual code behavior, ignores bad comments")
|
||||
print("- Best Use: LLM enhancement crucial for poorly documented codebases")
|
||||
|
||||
print("\nMISLEADING COMMENTS IN TEST:")
|
||||
print("1. 'hasher.py' claims 'string utilities' but does bcrypt hashing")
|
||||
print("2. 'token.py' has no docstrings, unclear function names")
|
||||
print("3. 'handlers.py' says 'database utilities' but is REST API")
|
||||
print("4. 'handlers.py' docstrings opposite (create says delete, etc)")
|
||||
print("5. 'checker.py' claims 'math functions' but validates emails")
|
||||
print("6. 'pool.py' claims 'email sender' but is database pool")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Test pure vector vs LLM-enhanced with misleading comments"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tool",
|
||||
choices=["gemini", "qwen"],
|
||||
default="gemini",
|
||||
help="LLM tool to use (default: gemini)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-llm",
|
||||
action="store_true",
|
||||
help="Skip LLM-enhanced test"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-db",
|
||||
type=str,
|
||||
help="Save database to specified path for inspection (e.g., ./test_results.db)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("MISLEADING COMMENTS TEST")
|
||||
print("Pure Vector vs LLM-Enhanced with Incorrect Documentation")
|
||||
print("="*70)
|
||||
|
||||
# Create test database
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
||||
db_path = Path(f.name)
|
||||
|
||||
try:
|
||||
print(f"\nTest dataset: {len(MISLEADING_DATASET)} Python files")
|
||||
print(f"Test queries: {len(TEST_QUERIES)} natural language questions")
|
||||
print("\nChallenges:")
|
||||
print("- Misleading module docstrings")
|
||||
print("- Incorrect function docstrings")
|
||||
print("- Missing documentation")
|
||||
print("- Unclear function names")
|
||||
|
||||
create_test_database(db_path)
|
||||
|
||||
# Test pure vector search
|
||||
pure_results = test_pure_vector_search(db_path)
|
||||
|
||||
# Test LLM-enhanced search
|
||||
if not args.skip_llm:
|
||||
# Clear semantic_chunks table for LLM test
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.execute("DELETE FROM semantic_chunks")
|
||||
conn.commit()
|
||||
|
||||
llm_results = test_llm_enhanced_search(db_path, args.tool)
|
||||
else:
|
||||
llm_results = {}
|
||||
print("\n[X] LLM-enhanced test skipped (--skip-llm flag)")
|
||||
|
||||
# Compare results
|
||||
compare_results(pure_results, llm_results)
|
||||
|
||||
finally:
|
||||
# Save or cleanup database
|
||||
if args.keep_db:
|
||||
import shutil
|
||||
save_path = Path(args.keep_db)
|
||||
try:
|
||||
import gc
|
||||
gc.collect()
|
||||
time.sleep(0.2)
|
||||
shutil.copy2(db_path, save_path)
|
||||
print(f"\n[OK] Database saved to: {save_path}")
|
||||
print(f"Inspect with: python scripts/inspect_llm_summaries.py {save_path}")
|
||||
except Exception as e:
|
||||
print(f"\n[X] Failed to save database: {e}")
|
||||
finally:
|
||||
try:
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
# Cleanup
|
||||
try:
|
||||
import gc
|
||||
gc.collect()
|
||||
time.sleep(0.1)
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
except PermissionError:
|
||||
print(f"\nWarning: Could not delete temporary database: {db_path}")
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("Test completed!")
|
||||
print("="*70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user