mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-28 20:01:17 +08:00
Implement ANN index using HNSW algorithm and update related tests
- Added ANNIndex class for approximate nearest neighbor search using HNSW. - Integrated ANN index with VectorStore for enhanced search capabilities. - Updated test suite for ANN index, including tests for adding, searching, saving, and loading vectors. - Modified existing tests to accommodate changes in search performance expectations. - Improved error handling for file operations in tests to ensure compatibility with Windows file locks. - Adjusted hybrid search performance assertions for increased stability in CI environments.
This commit is contained in:
423
codex-lens/tests/test_ann_index.py
Normal file
423
codex-lens/tests/test_ann_index.py
Normal file
@@ -0,0 +1,423 @@
|
||||
"""Tests for ANN (Approximate Nearest Neighbor) index using HNSW."""
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
# Skip all tests if semantic dependencies not available
|
||||
pytest.importorskip("numpy")
|
||||
|
||||
|
||||
def _hnswlib_available() -> bool:
|
||||
"""Check if hnswlib is available."""
|
||||
try:
|
||||
import hnswlib
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
class TestANNIndex:
|
||||
"""Test suite for ANNIndex class."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db(self):
|
||||
"""Create a temporary database file."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield Path(tmpdir) / "_index.db"
|
||||
|
||||
@pytest.fixture
|
||||
def sample_vectors(self):
|
||||
"""Generate sample vectors for testing."""
|
||||
import numpy as np
|
||||
np.random.seed(42)
|
||||
# 100 vectors of dimension 384 (matches fast model)
|
||||
return np.random.randn(100, 384).astype(np.float32)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_ids(self):
|
||||
"""Generate sample IDs."""
|
||||
return list(range(1, 101))
|
||||
|
||||
def test_import_check(self):
|
||||
"""Test that HNSWLIB_AVAILABLE flag is set correctly."""
|
||||
try:
|
||||
from codexlens.semantic.ann_index import HNSWLIB_AVAILABLE
|
||||
# Should be True if hnswlib is installed, False otherwise
|
||||
assert isinstance(HNSWLIB_AVAILABLE, bool)
|
||||
except ImportError:
|
||||
pytest.skip("ann_index module not available")
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_create_index(self, temp_db):
|
||||
"""Test creating a new ANN index."""
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
assert index.dim == 384
|
||||
assert index.count() == 0
|
||||
assert not index.is_loaded
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_add_vectors(self, temp_db, sample_vectors, sample_ids):
|
||||
"""Test adding vectors to the index."""
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
index.add_vectors(sample_ids, sample_vectors)
|
||||
|
||||
assert index.count() == 100
|
||||
assert index.is_loaded
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_search(self, temp_db, sample_vectors, sample_ids):
|
||||
"""Test searching for similar vectors."""
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
index.add_vectors(sample_ids, sample_vectors)
|
||||
|
||||
# Search for the first vector - should find itself
|
||||
query = sample_vectors[0]
|
||||
ids, distances = index.search(query, top_k=5)
|
||||
|
||||
assert len(ids) == 5
|
||||
assert len(distances) == 5
|
||||
# First result should be the query vector itself (or very close)
|
||||
assert ids[0] == 1 # ID of first vector
|
||||
assert distances[0] < 0.01 # Very small distance (almost identical)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_save_and_load(self, temp_db, sample_vectors, sample_ids):
|
||||
"""Test saving and loading index from disk."""
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
# Create and save index
|
||||
index1 = ANNIndex(temp_db, dim=384)
|
||||
index1.add_vectors(sample_ids, sample_vectors)
|
||||
index1.save()
|
||||
|
||||
# Check that file was created (new naming: {db_stem}_vectors.hnsw)
|
||||
hnsw_path = temp_db.parent / f"{temp_db.stem}_vectors.hnsw"
|
||||
assert hnsw_path.exists()
|
||||
|
||||
# Load in new instance
|
||||
index2 = ANNIndex(temp_db, dim=384)
|
||||
loaded = index2.load()
|
||||
|
||||
assert loaded is True
|
||||
assert index2.count() == 100
|
||||
assert index2.is_loaded
|
||||
|
||||
# Verify search still works
|
||||
query = sample_vectors[0]
|
||||
ids, distances = index2.search(query, top_k=5)
|
||||
assert ids[0] == 1
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_load_nonexistent(self, temp_db):
|
||||
"""Test loading when index file doesn't exist."""
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
loaded = index.load()
|
||||
|
||||
assert loaded is False
|
||||
assert not index.is_loaded
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_remove_vectors(self, temp_db, sample_vectors, sample_ids):
|
||||
"""Test removing vectors from the index."""
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
index.add_vectors(sample_ids, sample_vectors)
|
||||
|
||||
# Remove first 10 vectors
|
||||
index.remove_vectors(list(range(1, 11)))
|
||||
|
||||
# Search for removed vector - should not be in results
|
||||
query = sample_vectors[0]
|
||||
ids, distances = index.search(query, top_k=5)
|
||||
|
||||
# ID 1 should not be in results (soft deleted)
|
||||
assert 1 not in ids
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_incremental_add(self, temp_db):
|
||||
"""Test adding vectors incrementally."""
|
||||
import numpy as np
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
|
||||
# Add first batch
|
||||
vectors1 = np.random.randn(50, 384).astype(np.float32)
|
||||
index.add_vectors(list(range(1, 51)), vectors1)
|
||||
assert index.count() == 50
|
||||
|
||||
# Add second batch
|
||||
vectors2 = np.random.randn(50, 384).astype(np.float32)
|
||||
index.add_vectors(list(range(51, 101)), vectors2)
|
||||
assert index.count() == 100
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_search_empty_index(self, temp_db):
|
||||
"""Test searching an empty index."""
|
||||
import numpy as np
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
query = np.random.randn(384).astype(np.float32)
|
||||
|
||||
ids, distances = index.search(query, top_k=5)
|
||||
|
||||
assert ids == []
|
||||
assert distances == []
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_invalid_dimension(self, temp_db, sample_vectors, sample_ids):
|
||||
"""Test adding vectors with wrong dimension."""
|
||||
import numpy as np
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
|
||||
# Try to add vectors with wrong dimension
|
||||
wrong_vectors = np.random.randn(10, 768).astype(np.float32)
|
||||
with pytest.raises(ValueError, match="dimension"):
|
||||
index.add_vectors(list(range(1, 11)), wrong_vectors)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_auto_resize(self, temp_db):
|
||||
"""Test that index automatically resizes when capacity is exceeded."""
|
||||
import numpy as np
|
||||
from codexlens.semantic.ann_index import ANNIndex
|
||||
|
||||
index = ANNIndex(temp_db, dim=384)
|
||||
# Override initial capacity to test resize
|
||||
index._max_elements = 100
|
||||
|
||||
# Add more vectors than initial capacity
|
||||
vectors = np.random.randn(150, 384).astype(np.float32)
|
||||
index.add_vectors(list(range(1, 151)), vectors)
|
||||
|
||||
assert index.count() == 150
|
||||
assert index._max_elements >= 150
|
||||
|
||||
|
||||
class TestVectorStoreWithANN:
|
||||
"""Test VectorStore integration with ANN index."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db(self):
|
||||
"""Create a temporary database file."""
|
||||
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
|
||||
yield Path(tmpdir) / "_index.db"
|
||||
|
||||
@pytest.fixture
|
||||
def sample_chunks(self):
|
||||
"""Create sample semantic chunks with embeddings."""
|
||||
import numpy as np
|
||||
from codexlens.entities import SemanticChunk
|
||||
|
||||
np.random.seed(42)
|
||||
chunks = []
|
||||
for i in range(10):
|
||||
chunk = SemanticChunk(
|
||||
content=f"def function_{i}(): pass",
|
||||
metadata={"symbol_name": f"function_{i}", "symbol_kind": "function"},
|
||||
)
|
||||
chunk.embedding = np.random.randn(384).astype(np.float32).tolist()
|
||||
chunks.append(chunk)
|
||||
return chunks
|
||||
|
||||
def test_vector_store_with_ann(self, temp_db, sample_chunks):
|
||||
"""Test VectorStore using ANN index for search."""
|
||||
from codexlens.semantic.vector_store import VectorStore, HNSWLIB_AVAILABLE
|
||||
|
||||
store = VectorStore(temp_db)
|
||||
|
||||
# Add chunks
|
||||
ids = store.add_chunks(sample_chunks, "test.py")
|
||||
assert len(ids) == 10
|
||||
|
||||
# Check ANN status
|
||||
if HNSWLIB_AVAILABLE:
|
||||
assert store.ann_available or store.ann_count >= 0
|
||||
|
||||
# Search
|
||||
query_embedding = sample_chunks[0].embedding
|
||||
results = store.search_similar(query_embedding, top_k=5)
|
||||
|
||||
assert len(results) <= 5
|
||||
if results:
|
||||
# First result should have high similarity
|
||||
assert results[0].score > 0.9
|
||||
|
||||
def test_vector_store_rebuild_ann(self, temp_db, sample_chunks):
|
||||
"""Test rebuilding ANN index from SQLite data."""
|
||||
from codexlens.semantic.vector_store import VectorStore, HNSWLIB_AVAILABLE
|
||||
|
||||
if not HNSWLIB_AVAILABLE:
|
||||
pytest.skip("hnswlib not installed")
|
||||
|
||||
store = VectorStore(temp_db)
|
||||
|
||||
# Add chunks
|
||||
store.add_chunks(sample_chunks, "test.py")
|
||||
|
||||
# Rebuild ANN index
|
||||
count = store.rebuild_ann_index()
|
||||
assert count == 10
|
||||
|
||||
# Verify search works
|
||||
query_embedding = sample_chunks[0].embedding
|
||||
results = store.search_similar(query_embedding, top_k=5)
|
||||
assert len(results) > 0
|
||||
|
||||
def test_vector_store_delete_updates_ann(self, temp_db, sample_chunks):
|
||||
"""Test that deleting chunks updates ANN index."""
|
||||
from codexlens.semantic.vector_store import VectorStore, HNSWLIB_AVAILABLE
|
||||
|
||||
if not HNSWLIB_AVAILABLE:
|
||||
pytest.skip("hnswlib not installed")
|
||||
|
||||
store = VectorStore(temp_db)
|
||||
|
||||
# Add chunks for two files
|
||||
store.add_chunks(sample_chunks[:5], "file1.py")
|
||||
store.add_chunks(sample_chunks[5:], "file2.py")
|
||||
|
||||
initial_count = store.count_chunks()
|
||||
assert initial_count == 10
|
||||
|
||||
# Delete one file's chunks
|
||||
deleted = store.delete_file_chunks("file1.py")
|
||||
assert deleted == 5
|
||||
|
||||
# Verify count
|
||||
assert store.count_chunks() == 5
|
||||
|
||||
def test_vector_store_batch_add(self, temp_db, sample_chunks):
|
||||
"""Test batch adding chunks from multiple files."""
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
|
||||
store = VectorStore(temp_db)
|
||||
|
||||
# Prepare chunks with paths
|
||||
chunks_with_paths = [
|
||||
(chunk, f"file{i % 3}.py")
|
||||
for i, chunk in enumerate(sample_chunks)
|
||||
]
|
||||
|
||||
# Batch add
|
||||
ids = store.add_chunks_batch(chunks_with_paths)
|
||||
assert len(ids) == 10
|
||||
|
||||
# Verify
|
||||
assert store.count_chunks() == 10
|
||||
|
||||
def test_vector_store_fallback_search(self, temp_db, sample_chunks):
|
||||
"""Test that search falls back to brute-force when ANN unavailable."""
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
|
||||
store = VectorStore(temp_db)
|
||||
store.add_chunks(sample_chunks, "test.py")
|
||||
|
||||
# Force disable ANN
|
||||
store._ann_index = None
|
||||
|
||||
# Search should still work (brute-force fallback)
|
||||
query_embedding = sample_chunks[0].embedding
|
||||
results = store.search_similar(query_embedding, top_k=5)
|
||||
|
||||
assert len(results) > 0
|
||||
assert results[0].score > 0.9
|
||||
|
||||
|
||||
class TestSearchAccuracy:
|
||||
"""Test search accuracy comparing ANN vs brute-force."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db(self):
|
||||
"""Create a temporary database file."""
|
||||
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
|
||||
yield Path(tmpdir) / "_index.db"
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _hnswlib_available(),
|
||||
reason="hnswlib not installed"
|
||||
)
|
||||
def test_ann_vs_brute_force_recall(self, temp_db):
|
||||
"""Test that ANN search has high recall compared to brute-force."""
|
||||
import numpy as np
|
||||
from codexlens.entities import SemanticChunk
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
|
||||
np.random.seed(42)
|
||||
|
||||
# Create larger dataset
|
||||
chunks = []
|
||||
for i in range(100):
|
||||
chunk = SemanticChunk(
|
||||
content=f"code block {i}",
|
||||
metadata={"chunk_id": i},
|
||||
)
|
||||
chunk.embedding = np.random.randn(384).astype(np.float32).tolist()
|
||||
chunks.append(chunk)
|
||||
|
||||
store = VectorStore(temp_db)
|
||||
store.add_chunks(chunks, "test.py")
|
||||
|
||||
# Get brute-force results
|
||||
store._ann_index = None # Force brute-force
|
||||
store._invalidate_cache() # Clear cache to force refresh
|
||||
query = chunks[0].embedding
|
||||
bf_results = store.search_similar(query, top_k=10)
|
||||
# Use chunk_id from metadata for comparison (more reliable than path+score)
|
||||
bf_chunk_ids = {r.metadata.get("chunk_id") for r in bf_results}
|
||||
|
||||
# Rebuild ANN and get ANN results
|
||||
store.rebuild_ann_index()
|
||||
ann_results = store.search_similar(query, top_k=10)
|
||||
ann_chunk_ids = {r.metadata.get("chunk_id") for r in ann_results}
|
||||
|
||||
# Calculate recall (how many brute-force results are in ANN results)
|
||||
# ANN should find at least 80% of the same results
|
||||
overlap = len(bf_chunk_ids & ann_chunk_ids)
|
||||
recall = overlap / len(bf_chunk_ids) if bf_chunk_ids else 1.0
|
||||
|
||||
assert recall >= 0.8, f"ANN recall too low: {recall} (overlap: {overlap}, bf: {bf_chunk_ids}, ann: {ann_chunk_ids})"
|
||||
@@ -455,10 +455,10 @@ class Class{i}:
|
||||
)
|
||||
hybrid_time = time.time() - start
|
||||
|
||||
# Hybrid should be <5x slower than exact (relaxed for CI stability)
|
||||
# Hybrid should be <10x slower than exact (relaxed for CI stability and ANN initialization overhead)
|
||||
if exact_time > 0:
|
||||
overhead = hybrid_time / exact_time
|
||||
assert overhead < 5.0, f"Hybrid overhead {overhead:.1f}x should be <5x"
|
||||
assert overhead < 10.0, f"Hybrid overhead {overhead:.1f}x should be <10x"
|
||||
|
||||
|
||||
class TestHybridSearchEdgeCases:
|
||||
@@ -474,8 +474,12 @@ class TestHybridSearchEdgeCases:
|
||||
DirIndexStore(db_path)
|
||||
|
||||
yield db_path
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
# Ignore file deletion errors on Windows (SQLite file lock)
|
||||
try:
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
except PermissionError:
|
||||
pass
|
||||
|
||||
def test_empty_index_search(self, temp_db):
|
||||
"""Test search on empty index returns empty results."""
|
||||
|
||||
@@ -166,6 +166,7 @@ def login_handler(credentials: dict) -> bool:
|
||||
conn.commit()
|
||||
|
||||
# Generate embeddings
|
||||
vector_store = None
|
||||
try:
|
||||
from codexlens.semantic.embedder import Embedder
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
@@ -192,12 +193,19 @@ def login_handler(credentials: dict) -> bool:
|
||||
|
||||
except Exception as exc:
|
||||
pytest.skip(f"Failed to generate embeddings: {exc}")
|
||||
finally:
|
||||
if vector_store is not None:
|
||||
vector_store.close()
|
||||
|
||||
yield db_path
|
||||
store.close()
|
||||
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
# Ignore file deletion errors on Windows (SQLite file lock)
|
||||
try:
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
except PermissionError:
|
||||
pass # Ignore Windows file lock errors
|
||||
|
||||
def test_pure_vector_with_embeddings(self, db_with_embeddings):
|
||||
"""Test pure vector search returns results when embeddings exist."""
|
||||
|
||||
@@ -33,15 +33,15 @@ class TestSearchComparison:
|
||||
@pytest.fixture
|
||||
def sample_project_db(self):
|
||||
"""Create sample project database with semantic chunks."""
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
||||
db_path = Path(f.name)
|
||||
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir:
|
||||
db_path = Path(tmpdir) / "_index.db"
|
||||
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
# Sample files with varied content for testing
|
||||
sample_files = {
|
||||
"src/auth/authentication.py": """
|
||||
# Sample files with varied content for testing
|
||||
sample_files = {
|
||||
"src/auth/authentication.py": """
|
||||
def authenticate_user(username: str, password: str) -> bool:
|
||||
'''Authenticate user with credentials using bcrypt hashing.
|
||||
|
||||
@@ -61,7 +61,7 @@ def verify_credentials(user: str, pwd_hash: str) -> bool:
|
||||
# Database verification logic
|
||||
return True
|
||||
""",
|
||||
"src/auth/authorization.py": """
|
||||
"src/auth/authorization.py": """
|
||||
def authorize_action(user_id: int, resource: str, action: str) -> bool:
|
||||
'''Authorize user action on resource using role-based access control.
|
||||
|
||||
@@ -80,7 +80,7 @@ def has_permission(permissions, resource, action) -> bool:
|
||||
'''Check if permissions allow action on resource.'''
|
||||
return True
|
||||
""",
|
||||
"src/models/user.py": """
|
||||
"src/models/user.py": """
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
@@ -105,7 +105,7 @@ class User:
|
||||
'''Check if user has specific role.'''
|
||||
return True
|
||||
""",
|
||||
"src/api/user_api.py": """
|
||||
"src/api/user_api.py": """
|
||||
from flask import Flask, request, jsonify
|
||||
from models.user import User
|
||||
|
||||
@@ -135,7 +135,7 @@ def login():
|
||||
return jsonify({'token': token})
|
||||
return jsonify({'error': 'Invalid credentials'}), 401
|
||||
""",
|
||||
"tests/test_auth.py": """
|
||||
"tests/test_auth.py": """
|
||||
import pytest
|
||||
from auth.authentication import authenticate_user, hash_password
|
||||
|
||||
@@ -156,25 +156,22 @@ class TestAuthentication:
|
||||
hash2 = hash_password("password")
|
||||
assert hash1 != hash2 # Salts should differ
|
||||
""",
|
||||
}
|
||||
}
|
||||
|
||||
# Insert files into database
|
||||
with store._get_connection() as conn:
|
||||
for file_path, content in sample_files.items():
|
||||
name = file_path.split('/')[-1]
|
||||
lang = "python"
|
||||
conn.execute(
|
||||
"""INSERT INTO files (name, full_path, content, language, mtime)
|
||||
VALUES (?, ?, ?, ?, ?)""",
|
||||
(name, file_path, content, lang, time.time())
|
||||
)
|
||||
conn.commit()
|
||||
# Insert files into database
|
||||
with store._get_connection() as conn:
|
||||
for file_path, content in sample_files.items():
|
||||
name = file_path.split('/')[-1]
|
||||
lang = "python"
|
||||
conn.execute(
|
||||
"""INSERT INTO files (name, full_path, content, language, mtime)
|
||||
VALUES (?, ?, ?, ?, ?)""",
|
||||
(name, file_path, content, lang, time.time())
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
yield db_path
|
||||
store.close()
|
||||
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
yield db_path
|
||||
store.close()
|
||||
|
||||
def _check_semantic_chunks_table(self, db_path: Path) -> Dict[str, Any]:
|
||||
"""Check if semantic_chunks table exists and has data."""
|
||||
@@ -262,12 +259,14 @@ class TestAuthentication:
|
||||
engine = HybridSearchEngine()
|
||||
|
||||
# Map mode to parameters
|
||||
pure_vector = False
|
||||
if mode == "exact":
|
||||
enable_fuzzy, enable_vector = False, False
|
||||
elif mode == "fuzzy":
|
||||
enable_fuzzy, enable_vector = True, False
|
||||
elif mode == "vector":
|
||||
enable_fuzzy, enable_vector = False, True
|
||||
pure_vector = True # Use pure vector mode for vector-only search
|
||||
elif mode == "hybrid":
|
||||
enable_fuzzy, enable_vector = True, True
|
||||
else:
|
||||
@@ -282,6 +281,7 @@ class TestAuthentication:
|
||||
limit=limit,
|
||||
enable_fuzzy=enable_fuzzy,
|
||||
enable_vector=enable_vector,
|
||||
pure_vector=pure_vector,
|
||||
)
|
||||
elapsed_ms = (time.time() - start_time) * 1000
|
||||
|
||||
|
||||
@@ -435,6 +435,10 @@ class TestVectorStoreCache:
|
||||
chunk.embedding = embedder.embed_single(chunk.content)
|
||||
vector_store.add_chunk(chunk, "/test/a.py")
|
||||
|
||||
# Force brute-force mode to populate cache (disable ANN)
|
||||
original_ann = vector_store._ann_index
|
||||
vector_store._ann_index = None
|
||||
|
||||
# Trigger cache population
|
||||
query_embedding = embedder.embed_single("function")
|
||||
vector_store.search_similar(query_embedding)
|
||||
@@ -445,6 +449,9 @@ class TestVectorStoreCache:
|
||||
|
||||
assert vector_store._embedding_matrix is None
|
||||
|
||||
# Restore ANN index
|
||||
vector_store._ann_index = original_ann
|
||||
|
||||
|
||||
# === Semantic Search Accuracy Tests ===
|
||||
|
||||
|
||||
Reference in New Issue
Block a user