mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
- Implement `inspect_llm_summaries.py` to display LLM-generated summaries from the semantic_chunks table in the database. - Create `show_llm_analysis.py` to demonstrate LLM analysis of misleading code examples, highlighting discrepancies between comments and actual functionality. - Develop `test_misleading_comments.py` to compare pure vector search with LLM-enhanced search, focusing on the impact of misleading or missing comments on search results. - Introduce `test_llm_enhanced_search.py` to provide a test suite for evaluating the effectiveness of LLM-enhanced vector search against pure vector search. - Ensure all new scripts are integrated with the existing codebase and follow the established coding standards.
546 lines
18 KiB
Python
546 lines
18 KiB
Python
"""Test suite for comparing pure vector search vs LLM-enhanced vector search.
|
|
|
|
This test demonstrates the difference between:
|
|
1. Pure vector search: Raw code → fastembed → vector search
|
|
2. LLM-enhanced search: Code → LLM summary → fastembed → vector search
|
|
|
|
LLM-enhanced search should provide better semantic matches for natural language queries.
|
|
"""
|
|
|
|
import pytest
|
|
import sqlite3
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
|
|
from codexlens.search.hybrid_search import HybridSearchEngine
|
|
from codexlens.storage.dir_index import DirIndexStore
|
|
|
|
# Check semantic dependencies
|
|
try:
|
|
from codexlens.semantic import SEMANTIC_AVAILABLE
|
|
from codexlens.semantic.embedder import Embedder
|
|
from codexlens.semantic.vector_store import VectorStore
|
|
from codexlens.semantic.chunker import Chunker, ChunkConfig
|
|
from codexlens.semantic.llm_enhancer import (
|
|
LLMEnhancer,
|
|
LLMConfig,
|
|
FileData,
|
|
EnhancedSemanticIndexer,
|
|
SemanticChunk,
|
|
)
|
|
from codexlens.entities import SearchResult
|
|
except ImportError:
|
|
SEMANTIC_AVAILABLE = False
|
|
|
|
|
|
# Test code samples representing different functionality
|
|
TEST_CODE_SAMPLES = {
|
|
"auth/password_hasher.py": '''"""Password hashing utilities using bcrypt."""
|
|
import bcrypt
|
|
|
|
def hash_password(password: str, salt_rounds: int = 12) -> str:
|
|
"""Hash a password using bcrypt with specified salt rounds.
|
|
|
|
Args:
|
|
password: Plain text password to hash
|
|
salt_rounds: Number of salt rounds (default 12)
|
|
|
|
Returns:
|
|
Hashed password string
|
|
"""
|
|
salt = bcrypt.gensalt(rounds=salt_rounds)
|
|
hashed = bcrypt.hashpw(password.encode('utf-8'), salt)
|
|
return hashed.decode('utf-8')
|
|
|
|
def verify_password(password: str, hashed: str) -> bool:
|
|
"""Verify a password against its hash.
|
|
|
|
Args:
|
|
password: Plain text password to verify
|
|
hashed: Previously hashed password
|
|
|
|
Returns:
|
|
True if password matches hash
|
|
"""
|
|
return bcrypt.checkpw(password.encode('utf-8'), hashed.encode('utf-8'))
|
|
''',
|
|
|
|
"auth/jwt_handler.py": '''"""JWT token generation and validation."""
|
|
import jwt
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Optional
|
|
|
|
SECRET_KEY = "your-secret-key-here"
|
|
|
|
def create_token(user_id: int, expires_in: int = 3600) -> str:
|
|
"""Generate a JWT access token for user authentication.
|
|
|
|
Args:
|
|
user_id: User ID to encode in token
|
|
expires_in: Token expiration in seconds (default 1 hour)
|
|
|
|
Returns:
|
|
JWT token string
|
|
"""
|
|
payload = {
|
|
'user_id': user_id,
|
|
'exp': datetime.utcnow() + timedelta(seconds=expires_in),
|
|
'iat': datetime.utcnow()
|
|
}
|
|
return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
|
|
|
|
def decode_token(token: str) -> Optional[Dict]:
|
|
"""Validate and decode JWT token to extract user information.
|
|
|
|
Args:
|
|
token: JWT token string to decode
|
|
|
|
Returns:
|
|
Decoded payload dict or None if invalid
|
|
"""
|
|
try:
|
|
payload = jwt.decode(token, SECRET_KEY, algorithms=['HS256'])
|
|
return payload
|
|
except jwt.ExpiredSignatureError:
|
|
return None
|
|
except jwt.InvalidTokenError:
|
|
return None
|
|
''',
|
|
|
|
"api/user_endpoints.py": '''"""REST API endpoints for user management."""
|
|
from flask import Flask, request, jsonify
|
|
from typing import Dict
|
|
|
|
app = Flask(__name__)
|
|
|
|
@app.route('/api/users', methods=['POST'])
|
|
def create_user():
|
|
"""Create a new user account with email and password.
|
|
|
|
Request JSON:
|
|
email: User email address
|
|
password: User password
|
|
name: User full name
|
|
|
|
Returns:
|
|
JSON with user_id and success status
|
|
"""
|
|
data = request.get_json()
|
|
# Validate input
|
|
if not data.get('email') or not data.get('password'):
|
|
return jsonify({'error': 'Email and password required'}), 400
|
|
|
|
# Create user (simplified)
|
|
user_id = 12345 # Would normally insert into database
|
|
return jsonify({'user_id': user_id, 'success': True}), 201
|
|
|
|
@app.route('/api/users/<int:user_id>', methods=['GET'])
|
|
def get_user(user_id: int):
|
|
"""Retrieve user profile information by user ID.
|
|
|
|
Args:
|
|
user_id: Unique user identifier
|
|
|
|
Returns:
|
|
JSON with user profile data
|
|
"""
|
|
# Simplified user retrieval
|
|
user = {
|
|
'id': user_id,
|
|
'email': 'user@example.com',
|
|
'name': 'John Doe',
|
|
'created_at': '2024-01-01'
|
|
}
|
|
return jsonify(user), 200
|
|
''',
|
|
|
|
"utils/validation.py": '''"""Input validation and sanitization utilities."""
|
|
import re
|
|
from typing import Optional
|
|
|
|
def validate_email(email: str) -> bool:
|
|
"""Check if email address format is valid using regex pattern.
|
|
|
|
Args:
|
|
email: Email address string to validate
|
|
|
|
Returns:
|
|
True if email format is valid
|
|
"""
|
|
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
|
return bool(re.match(pattern, email))
|
|
|
|
def sanitize_input(text: str, max_length: int = 255) -> str:
|
|
"""Clean user input by removing special characters and limiting length.
|
|
|
|
Args:
|
|
text: Input text to sanitize
|
|
max_length: Maximum allowed length
|
|
|
|
Returns:
|
|
Sanitized text string
|
|
"""
|
|
# Remove special characters
|
|
text = re.sub(r'[<>\"\'&]', '', text)
|
|
# Trim whitespace
|
|
text = text.strip()
|
|
# Limit length
|
|
return text[:max_length]
|
|
|
|
def validate_password_strength(password: str) -> tuple[bool, Optional[str]]:
|
|
"""Validate password meets security requirements.
|
|
|
|
Requirements:
|
|
- At least 8 characters
|
|
- Contains uppercase and lowercase
|
|
- Contains numbers
|
|
- Contains special characters
|
|
|
|
Args:
|
|
password: Password string to validate
|
|
|
|
Returns:
|
|
Tuple of (is_valid, error_message)
|
|
"""
|
|
if len(password) < 8:
|
|
return False, "Password must be at least 8 characters"
|
|
if not re.search(r'[A-Z]', password):
|
|
return False, "Password must contain uppercase letter"
|
|
if not re.search(r'[a-z]', password):
|
|
return False, "Password must contain lowercase letter"
|
|
if not re.search(r'[0-9]', password):
|
|
return False, "Password must contain number"
|
|
if not re.search(r'[!@#$%^&*(),.?":{}|<>]', password):
|
|
return False, "Password must contain special character"
|
|
return True, None
|
|
''',
|
|
|
|
"database/connection.py": '''"""Database connection pooling and management."""
|
|
import psycopg2
|
|
from psycopg2 import pool
|
|
from typing import Optional
|
|
from contextlib import contextmanager
|
|
|
|
class DatabasePool:
|
|
"""PostgreSQL connection pool manager for handling multiple concurrent connections."""
|
|
|
|
def __init__(self, min_conn: int = 1, max_conn: int = 10):
|
|
"""Initialize database connection pool.
|
|
|
|
Args:
|
|
min_conn: Minimum number of connections to maintain
|
|
max_conn: Maximum number of connections allowed
|
|
"""
|
|
self.pool = psycopg2.pool.SimpleConnectionPool(
|
|
min_conn,
|
|
max_conn,
|
|
user='dbuser',
|
|
password='dbpass',
|
|
host='localhost',
|
|
port='5432',
|
|
database='myapp'
|
|
)
|
|
|
|
@contextmanager
|
|
def get_connection(self):
|
|
"""Get a connection from pool as context manager.
|
|
|
|
Yields:
|
|
Database connection object
|
|
"""
|
|
conn = self.pool.getconn()
|
|
try:
|
|
yield conn
|
|
conn.commit()
|
|
except Exception:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
self.pool.putconn(conn)
|
|
|
|
def close_all(self):
|
|
"""Close all connections in pool."""
|
|
self.pool.closeall()
|
|
'''
|
|
}
|
|
|
|
|
|
# Natural language queries to test semantic understanding
|
|
TEST_QUERIES = [
|
|
{
|
|
"query": "How do I securely hash passwords?",
|
|
"expected_file": "auth/password_hasher.py",
|
|
"description": "Should find password hashing implementation",
|
|
},
|
|
{
|
|
"query": "Generate JWT token for user authentication",
|
|
"expected_file": "auth/jwt_handler.py",
|
|
"description": "Should find JWT token creation logic",
|
|
},
|
|
{
|
|
"query": "Create new user account via REST API",
|
|
"expected_file": "api/user_endpoints.py",
|
|
"description": "Should find user registration endpoint",
|
|
},
|
|
{
|
|
"query": "Validate email address format",
|
|
"expected_file": "utils/validation.py",
|
|
"description": "Should find email validation function",
|
|
},
|
|
{
|
|
"query": "Connect to PostgreSQL database",
|
|
"expected_file": "database/connection.py",
|
|
"description": "Should find database connection management",
|
|
},
|
|
{
|
|
"query": "Check password complexity requirements",
|
|
"expected_file": "utils/validation.py",
|
|
"description": "Should find password strength validation",
|
|
},
|
|
]
|
|
|
|
|
|
@pytest.mark.skipif(not SEMANTIC_AVAILABLE, reason="Semantic dependencies not available")
|
|
class TestPureVectorSearch:
|
|
"""Test pure vector search (code → fastembed → search)."""
|
|
|
|
@pytest.fixture
|
|
def pure_vector_db(self):
|
|
"""Create database with pure vector embeddings (no LLM)."""
|
|
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
|
db_path = Path(f.name)
|
|
|
|
# Initialize database
|
|
store = DirIndexStore(db_path)
|
|
store.initialize()
|
|
|
|
# Add test files
|
|
with store._get_connection() as conn:
|
|
for path, content in TEST_CODE_SAMPLES.items():
|
|
name = path.split('/')[-1]
|
|
conn.execute(
|
|
"""INSERT INTO files (name, full_path, content, language, mtime)
|
|
VALUES (?, ?, ?, ?, ?)""",
|
|
(name, path, content, "python", 0.0)
|
|
)
|
|
conn.commit()
|
|
|
|
# Generate embeddings using pure vector approach (raw code)
|
|
embedder = Embedder(profile="code")
|
|
vector_store = VectorStore(db_path)
|
|
chunker = Chunker(config=ChunkConfig(max_chunk_size=2000))
|
|
|
|
with sqlite3.connect(db_path) as conn:
|
|
conn.row_factory = sqlite3.Row
|
|
rows = conn.execute("SELECT full_path, content FROM files").fetchall()
|
|
|
|
for row in rows:
|
|
# Pure vector: directly chunk and embed raw code
|
|
chunks = chunker.chunk_sliding_window(
|
|
row["content"],
|
|
file_path=row["full_path"],
|
|
language="python"
|
|
)
|
|
for chunk in chunks:
|
|
chunk.embedding = embedder.embed_single(chunk.content)
|
|
chunk.metadata["strategy"] = "pure_vector"
|
|
if chunks:
|
|
vector_store.add_chunks(chunks, row["full_path"])
|
|
|
|
yield db_path
|
|
store.close()
|
|
if db_path.exists():
|
|
db_path.unlink()
|
|
|
|
def test_pure_vector_queries(self, pure_vector_db):
|
|
"""Test natural language queries with pure vector search."""
|
|
engine = HybridSearchEngine()
|
|
results = {}
|
|
|
|
for test_case in TEST_QUERIES:
|
|
query = test_case["query"]
|
|
expected_file = test_case["expected_file"]
|
|
|
|
search_results = engine.search(
|
|
pure_vector_db,
|
|
query,
|
|
limit=5,
|
|
enable_vector=True,
|
|
pure_vector=True,
|
|
)
|
|
|
|
# Check if expected file is in top 3 results
|
|
top_files = [r.path for r in search_results[:3]]
|
|
found = expected_file in top_files
|
|
rank = top_files.index(expected_file) + 1 if found else None
|
|
|
|
results[query] = {
|
|
"found": found,
|
|
"rank": rank,
|
|
"top_result": search_results[0].path if search_results else None,
|
|
"top_score": search_results[0].score if search_results else 0.0,
|
|
}
|
|
|
|
return results
|
|
|
|
|
|
@pytest.mark.skipif(not SEMANTIC_AVAILABLE, reason="Semantic dependencies not available")
|
|
class TestLLMEnhancedSearch:
|
|
"""Test LLM-enhanced vector search (code → LLM → fastembed → search)."""
|
|
|
|
@pytest.fixture
|
|
def llm_enhanced_db(self):
|
|
"""Create database with LLM-enhanced embeddings."""
|
|
# Skip if CCW not available
|
|
llm_config = LLMConfig(enabled=True, tool="gemini")
|
|
enhancer = LLMEnhancer(llm_config)
|
|
if not enhancer.check_available():
|
|
pytest.skip("CCW CLI not available for LLM enhancement")
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
|
db_path = Path(f.name)
|
|
|
|
# Initialize database
|
|
store = DirIndexStore(db_path)
|
|
store.initialize()
|
|
|
|
# Add test files
|
|
with store._get_connection() as conn:
|
|
for path, content in TEST_CODE_SAMPLES.items():
|
|
name = path.split('/')[-1]
|
|
conn.execute(
|
|
"""INSERT INTO files (name, full_path, content, language, mtime)
|
|
VALUES (?, ?, ?, ?, ?)""",
|
|
(name, path, content, "python", 0.0)
|
|
)
|
|
conn.commit()
|
|
|
|
# Generate embeddings using LLM-enhanced approach
|
|
embedder = Embedder(profile="code")
|
|
vector_store = VectorStore(db_path)
|
|
|
|
# Create enhanced indexer
|
|
indexer = EnhancedSemanticIndexer(enhancer, embedder, vector_store)
|
|
|
|
# Prepare file data
|
|
file_data_list = [
|
|
FileData(path=path, content=content, language="python")
|
|
for path, content in TEST_CODE_SAMPLES.items()
|
|
]
|
|
|
|
# Index with LLM enhancement
|
|
indexed = indexer.index_files(file_data_list)
|
|
print(f"\nLLM-enhanced indexing: {indexed}/{len(file_data_list)} files")
|
|
|
|
yield db_path
|
|
store.close()
|
|
if db_path.exists():
|
|
db_path.unlink()
|
|
|
|
def test_llm_enhanced_queries(self, llm_enhanced_db):
|
|
"""Test natural language queries with LLM-enhanced search."""
|
|
engine = HybridSearchEngine()
|
|
results = {}
|
|
|
|
for test_case in TEST_QUERIES:
|
|
query = test_case["query"]
|
|
expected_file = test_case["expected_file"]
|
|
|
|
search_results = engine.search(
|
|
llm_enhanced_db,
|
|
query,
|
|
limit=5,
|
|
enable_vector=True,
|
|
pure_vector=True,
|
|
)
|
|
|
|
# Check if expected file is in top 3 results
|
|
top_files = [r.path for r in search_results[:3]]
|
|
found = expected_file in top_files
|
|
rank = top_files.index(expected_file) + 1 if found else None
|
|
|
|
results[query] = {
|
|
"found": found,
|
|
"rank": rank,
|
|
"top_result": search_results[0].path if search_results else None,
|
|
"top_score": search_results[0].score if search_results else 0.0,
|
|
}
|
|
|
|
return results
|
|
|
|
|
|
@pytest.mark.skipif(not SEMANTIC_AVAILABLE, reason="Semantic dependencies not available")
|
|
class TestSearchComparison:
|
|
"""Compare pure vector vs LLM-enhanced search side-by-side."""
|
|
|
|
def test_comparison(self):
|
|
"""Run comprehensive comparison of both approaches."""
|
|
# This test runs both approaches and compares results
|
|
print("\n" + "="*70)
|
|
print("SEMANTIC SEARCH COMPARISON TEST")
|
|
print("="*70)
|
|
|
|
try:
|
|
# Test pure vector search
|
|
print("\n1. Testing Pure Vector Search (Code → fastembed)")
|
|
print("-" * 70)
|
|
pure_test = TestPureVectorSearch()
|
|
pure_db = next(pure_test.pure_vector_db())
|
|
pure_results = pure_test.test_pure_vector_queries(pure_db)
|
|
|
|
# Test LLM-enhanced search
|
|
print("\n2. Testing LLM-Enhanced Search (Code → LLM → fastembed)")
|
|
print("-" * 70)
|
|
llm_test = TestLLMEnhancedSearch()
|
|
llm_db = next(llm_test.llm_enhanced_db())
|
|
llm_results = llm_test.test_llm_enhanced_queries(llm_db)
|
|
|
|
# Compare results
|
|
print("\n3. COMPARISON RESULTS")
|
|
print("="*70)
|
|
print(f"{'Query':<50} {'Pure Vec':<12} {'LLM Enhanced':<12}")
|
|
print("-" * 70)
|
|
|
|
pure_score = 0
|
|
llm_score = 0
|
|
|
|
for test_case in TEST_QUERIES:
|
|
query = test_case["query"][:47] + "..." if len(test_case["query"]) > 50 else test_case["query"]
|
|
|
|
pure_res = pure_results.get(test_case["query"], {})
|
|
llm_res = llm_results.get(test_case["query"], {})
|
|
|
|
pure_status = f"[OK] Rank {pure_res.get('rank', '?')}" if pure_res.get('found') else "[X] Not found"
|
|
llm_status = f"[OK] Rank {llm_res.get('rank', '?')}" if llm_res.get('found') else "[X] Not found"
|
|
|
|
print(f"{query:<50} {pure_status:<12} {llm_status:<12}")
|
|
|
|
if pure_res.get('found'):
|
|
pure_score += (4 - pure_res['rank']) # 3 points for rank 1, 2 for rank 2, etc
|
|
if llm_res.get('found'):
|
|
llm_score += (4 - llm_res['rank'])
|
|
|
|
print("-" * 70)
|
|
print(f"{'TOTAL SCORE':<50} {pure_score:<12} {llm_score:<12}")
|
|
print("="*70)
|
|
|
|
# Interpretation
|
|
print("\nINTERPRETATION:")
|
|
if llm_score > pure_score:
|
|
improvement = ((llm_score - pure_score) / max(pure_score, 1)) * 100
|
|
print(f"[OK] LLM enhancement improves results by {improvement:.1f}%")
|
|
print(" LLM summaries match natural language queries better than raw code")
|
|
elif pure_score > llm_score:
|
|
print("[X] Pure vector search performed better (unexpected)")
|
|
print(" This may indicate LLM summaries are too generic")
|
|
else:
|
|
print("= Both approaches performed equally")
|
|
|
|
except Exception as e:
|
|
pytest.fail(f"Comparison test failed: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v", "-s"])
|