diff --git a/codex-lens/benchmarks/binary_search_microbenchmark.py b/codex-lens/benchmarks/binary_search_microbenchmark.py new file mode 100644 index 00000000..d4bb8397 --- /dev/null +++ b/codex-lens/benchmarks/binary_search_microbenchmark.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +"""Micro-benchmark for BinaryANNIndex search performance. + +Measures the actual speedup of vectorized Hamming distance computation. +""" + +from __future__ import annotations + +import gc +import statistics +import sys +import time +from pathlib import Path + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +import numpy as np + + +def old_search_implementation(query_arr: np.ndarray, vectors: dict, id_list: list, top_k: int): + """Original O(N) loop-based implementation for comparison.""" + packed_dim = len(query_arr) + distances = [] + + for vec_id in id_list: + vec = vectors[vec_id] + vec_arr = np.frombuffer(vec, dtype=np.uint8) + xor = np.bitwise_xor(query_arr, vec_arr) + dist = int(np.unpackbits(xor).sum()) + distances.append((vec_id, dist)) + + distances.sort(key=lambda x: x[1]) + top_results = distances[:top_k] + ids = [r[0] for r in top_results] + dists = [r[1] for r in top_results] + + return ids, dists + + +def new_search_implementation(query_arr: np.ndarray, vectors_matrix: np.ndarray, ids_array: np.ndarray, top_k: int): + """Optimized vectorized implementation.""" + # Broadcast XOR + xor_result = np.bitwise_xor(query_arr, vectors_matrix) + + # Vectorized popcount using lookup table + popcount_lut = np.array([bin(i).count('1') for i in range(256)], dtype=np.uint8) + bit_counts = popcount_lut[xor_result] + + # Sum across packed bytes + distances = bit_counts.sum(axis=1) + + # Get top-k using argpartition + n_vectors = len(distances) + k = min(top_k, n_vectors) + + if k == n_vectors: + sorted_indices = np.argsort(distances) + else: + partition_indices = np.argpartition(distances, k)[:k] + top_k_distances = distances[partition_indices] + sorted_order = np.argsort(top_k_distances) + sorted_indices = partition_indices[sorted_order] + + result_ids = ids_array[sorted_indices].tolist() + result_dists = distances[sorted_indices].tolist() + + return result_ids, result_dists + + +def run_benchmark(n_vectors: int, dim: int = 256, top_k: int = 100, n_iterations: int = 50): + """Run benchmark comparing old and new implementations.""" + packed_dim = dim // 8 # 32 bytes for 256-bit + + print(f"\n{'='*60}") + print(f"Binary Search Micro-Benchmark") + print(f"{'='*60}") + print(f"Vectors: {n_vectors}") + print(f"Dimension: {dim} bits ({packed_dim} bytes packed)") + print(f"Top-K: {top_k}") + print(f"Iterations: {n_iterations}") + print(f"{'='*60}\n") + + # Generate random binary vectors + print("Generating test data...") + vectors_dict = {} + id_list = [] + + for i in range(n_vectors): + vec_bytes = np.random.randint(0, 256, size=packed_dim, dtype=np.uint8).tobytes() + vectors_dict[i] = vec_bytes + id_list.append(i) + + # Build matrix for vectorized search + vectors_matrix = np.empty((n_vectors, packed_dim), dtype=np.uint8) + ids_array = np.array(id_list, dtype=np.int64) + + for i, vec_id in enumerate(id_list): + vec_bytes = vectors_dict[vec_id] + vectors_matrix[i] = np.frombuffer(vec_bytes, dtype=np.uint8) + + # Generate random query + query_bytes = np.random.randint(0, 256, size=packed_dim, dtype=np.uint8).tobytes() + query_arr = np.frombuffer(query_bytes, dtype=np.uint8) + + # Warmup + print("Running warmup...") + for _ in range(3): + old_search_implementation(query_arr, vectors_dict, id_list, top_k) + new_search_implementation(query_arr, vectors_matrix, ids_array, top_k) + + # Benchmark old implementation + print("Benchmarking old implementation...") + old_times = [] + for _ in range(n_iterations): + gc.collect() + start = time.perf_counter() + old_ids, old_dists = old_search_implementation(query_arr, vectors_dict, id_list, top_k) + elapsed = (time.perf_counter() - start) * 1000 + old_times.append(elapsed) + + # Benchmark new implementation + print("Benchmarking new implementation...") + new_times = [] + for _ in range(n_iterations): + gc.collect() + start = time.perf_counter() + new_ids, new_dists = new_search_implementation(query_arr, vectors_matrix, ids_array, top_k) + elapsed = (time.perf_counter() - start) * 1000 + new_times.append(elapsed) + + # Verify correctness + print("\nVerifying correctness...") + # Check that distances are correct (IDs may differ for ties) + if old_dists == new_dists: + print("Distances match! (IDs may differ for ties)") + else: + # Check if difference is just in tie-breaking + old_dist_set = set(old_dists) + new_dist_set = set(new_dists) + if old_dist_set == new_dist_set: + print("Distances equivalent (tie-breaking differs, which is acceptable)") + else: + print("WARNING: Distance distributions differ!") + print(f" Old dists (first 5): {old_dists[:5]}") + print(f" New dists (first 5): {new_dists[:5]}") + + # Calculate statistics + old_avg = statistics.mean(old_times) + old_std = statistics.stdev(old_times) if len(old_times) > 1 else 0 + new_avg = statistics.mean(new_times) + new_std = statistics.stdev(new_times) if len(new_times) > 1 else 0 + + speedup = old_avg / new_avg if new_avg > 0 else 0 + + # Print results + print(f"\n{'='*60}") + print("RESULTS") + print(f"{'='*60}") + print(f"{'Metric':<25} {'Old (loop)':>15} {'New (vectorized)':>18}") + print(f"{'-'*25} {'-'*15} {'-'*18}") + print(f"{'Avg Latency (ms)':<25} {old_avg:>15.3f} {new_avg:>18.3f}") + print(f"{'Std Dev (ms)':<25} {old_std:>15.3f} {new_std:>18.3f}") + print(f"{'Min Latency (ms)':<25} {min(old_times):>15.3f} {min(new_times):>18.3f}") + print(f"{'Max Latency (ms)':<25} {max(old_times):>15.3f} {max(new_times):>18.3f}") + print(f"{'P50 (ms)':<25} {sorted(old_times)[len(old_times)//2]:>15.3f} {sorted(new_times)[len(new_times)//2]:>18.3f}") + print(f"\n{'Speedup:':<25} {speedup:>15.2f}x") + print(f"{'='*60}\n") + + return { + "n_vectors": n_vectors, + "dim": dim, + "top_k": top_k, + "old_avg_ms": old_avg, + "new_avg_ms": new_avg, + "speedup": speedup, + } + + +def main(): + print("\n" + "="*70) + print(" BINARY SEARCH OPTIMIZATION MICRO-BENCHMARK") + print("="*70) + + # Test different vector counts + results = [] + + for n_vectors in [1000, 5000, 10000, 50000]: + result = run_benchmark( + n_vectors=n_vectors, + dim=256, + top_k=100, + n_iterations=20, + ) + results.append(result) + + # Summary + print("\n" + "="*70) + print(" SUMMARY") + print("="*70) + print(f"{'N Vectors':<12} {'Old (ms)':<12} {'New (ms)':<12} {'Speedup':>10}") + print("-"*50) + for r in results: + print(f"{r['n_vectors']:<12} {r['old_avg_ms']:<12.3f} {r['new_avg_ms']:<12.3f} {r['speedup']:>10.2f}x") + print("="*70) + + +if __name__ == "__main__": + main() diff --git a/codex-lens/benchmarks/results/cascade_benchmark.json b/codex-lens/benchmarks/results/cascade_benchmark.json index ad5a2dbe..e8178395 100644 --- a/codex-lens/benchmarks/results/cascade_benchmark.json +++ b/codex-lens/benchmarks/results/cascade_benchmark.json @@ -1,30 +1,30 @@ { - "timestamp": "2026-01-02 11:22:34", + "timestamp": "2026-01-02 11:48:33", "summaries": { "binary": { "strategy": "binary", "total_queries": 15, "successful_queries": 15, - "avg_latency_ms": 850.328753333209, - "min_latency_ms": 750.9617999967304, - "max_latency_ms": 1015.733200001705, - "p50_latency_ms": 847.9711999971187, - "p95_latency_ms": 976.768470002571, - "p99_latency_ms": 1007.9402540018782, - "avg_results": 0, + "avg_latency_ms": 1133.4008666667312, + "min_latency_ms": 959.5361000028788, + "max_latency_ms": 1330.8978999993997, + "p50_latency_ms": 1125.8439999946859, + "p95_latency_ms": 1330.0081999987015, + "p99_latency_ms": 1330.71995999926, + "avg_results": 10, "errors": [] }, "hybrid": { "strategy": "hybrid", "total_queries": 15, "successful_queries": 15, - "avg_latency_ms": 821.3745733330143, - "min_latency_ms": 720.5589000004693, - "max_latency_ms": 943.0299999949057, - "p50_latency_ms": 819.5875000019441, - "p95_latency_ms": 916.3381599981221, - "p99_latency_ms": 937.691631995549, - "avg_results": 0, + "avg_latency_ms": 1111.1401133336283, + "min_latency_ms": 857.0021999985329, + "max_latency_ms": 1278.8890000010724, + "p50_latency_ms": 1130.696000000171, + "p95_latency_ms": 1254.2417899981956, + "p99_latency_ms": 1273.959558000497, + "avg_results": 10, "errors": [] } }, @@ -33,121 +33,121 @@ { "strategy": "binary", "query": "def search", - "latency_ms": 862.7266999974381, - "num_results": 0, - "top_result": null, + "latency_ms": 1044.525999997859, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py:0", "error": null }, { "strategy": "binary", "query": "class Engine", - "latency_ms": 773.8472999990336, - "num_results": 0, - "top_result": null, + "latency_ms": 1052.5979999947594, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py:0", "error": null }, { "strategy": "binary", "query": "import numpy", - "latency_ms": 858.1023000006098, - "num_results": 0, - "top_result": null, + "latency_ms": 1217.217100005655, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\__main__.py:0", "error": null }, { "strategy": "binary", "query": "async def", - "latency_ms": 877.2815999982413, - "num_results": 0, - "top_result": null, + "latency_ms": 1276.9802000038908, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py:0", "error": null }, { "strategy": "binary", "query": "raise ValueError", - "latency_ms": 824.3320999972639, - "num_results": 0, - "top_result": null, + "latency_ms": 1005.9053000004496, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py:0", "error": null }, { "strategy": "binary", "query": "how to parse json", - "latency_ms": 948.0362000031164, - "num_results": 0, - "top_result": null, + "latency_ms": 1330.8978999993997, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py:0", "error": null }, { "strategy": "binary", "query": "database connection", - "latency_ms": 789.3126000053599, - "num_results": 0, - "top_result": null, + "latency_ms": 1041.6685000018333, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py:0", "error": null }, { "strategy": "binary", "query": "error handling", - "latency_ms": 960.0693000029423, - "num_results": 0, - "top_result": null, + "latency_ms": 959.5361000028788, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_004_dual_fts.py:0", "error": null }, { "strategy": "binary", "query": "authentication logic", - "latency_ms": 757.247900000948, - "num_results": 0, - "top_result": null, + "latency_ms": 1060.9395999999833, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py:0", "error": null }, { "strategy": "binary", "query": "file read write", - "latency_ms": 750.9617999967304, - "num_results": 0, - "top_result": null, + "latency_ms": 971.8680000005406, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py:0", "error": null }, { "strategy": "binary", "query": "embedding vector", - "latency_ms": 871.1426000008942, - "num_results": 0, - "top_result": null, + "latency_ms": 1135.879900000873, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\embedder.py:0", "error": null }, { "strategy": "binary", "query": "cosine similarity", - "latency_ms": 817.1380999992834, - "num_results": 0, - "top_result": null, + "latency_ms": 1188.1732000038028, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py:0", "error": null }, { "strategy": "binary", "query": "binary quantization", - "latency_ms": 1015.733200001705, - "num_results": 0, - "top_result": null, + "latency_ms": 1259.3522999959532, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py:0", "error": null }, { "strategy": "binary", "query": "hamming distance", - "latency_ms": 847.9711999971187, - "num_results": 0, - "top_result": null, + "latency_ms": 1329.6268999984022, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py:0", "error": null }, { "strategy": "binary", "query": "reranking", - "latency_ms": 801.028399997449, - "num_results": 0, - "top_result": null, + "latency_ms": 1125.8439999946859, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py:0", "error": null } ], @@ -155,121 +155,121 @@ { "strategy": "hybrid", "query": "def search", - "latency_ms": 720.5589000004693, - "num_results": 0, - "top_result": null, + "latency_ms": 1117.0937999995658, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py:0", "error": null }, { "strategy": "hybrid", "query": "class Engine", - "latency_ms": 792.9914000051212, - "num_results": 0, - "top_result": null, + "latency_ms": 1039.3984000038472, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py:0", "error": null }, { "strategy": "hybrid", "query": "import numpy", - "latency_ms": 943.0299999949057, - "num_results": 0, - "top_result": null, + "latency_ms": 1144.7916999968584, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\__main__.py:0", "error": null }, { "strategy": "hybrid", "query": "async def", - "latency_ms": 819.5875000019441, - "num_results": 0, - "top_result": null, + "latency_ms": 857.0021999985329, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py:0", "error": null }, { "strategy": "hybrid", "query": "raise ValueError", - "latency_ms": 835.5114000005415, - "num_results": 0, - "top_result": null, + "latency_ms": 957.5578000003588, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py:0", "error": null }, { "strategy": "hybrid", "query": "how to parse json", - "latency_ms": 867.8118999960134, - "num_results": 0, - "top_result": null, + "latency_ms": 1216.5708000029554, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py:0", "error": null }, { "strategy": "hybrid", "query": "database connection", - "latency_ms": 824.6361999990768, - "num_results": 0, - "top_result": null, + "latency_ms": 1154.8929000055068, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py:0", "error": null }, { "strategy": "hybrid", "query": "error handling", - "latency_ms": 742.638600000646, - "num_results": 0, - "top_result": null, + "latency_ms": 1130.696000000171, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_004_dual_fts.py:0", "error": null }, { "strategy": "hybrid", "query": "authentication logic", - "latency_ms": 840.4286999939359, - "num_results": 0, - "top_result": null, + "latency_ms": 1112.8943000003346, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py:0", "error": null }, { "strategy": "hybrid", "query": "file read write", - "latency_ms": 810.9049000049708, - "num_results": 0, - "top_result": null, + "latency_ms": 1172.5986000019475, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py:0", "error": null }, { "strategy": "hybrid", "query": "embedding vector", - "latency_ms": 876.5335000061896, - "num_results": 0, - "top_result": null, + "latency_ms": 1278.8890000010724, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\embedder.py:0", "error": null }, { "strategy": "hybrid", "query": "cosine similarity", - "latency_ms": 797.3090999948909, - "num_results": 0, - "top_result": null, + "latency_ms": 1024.2393000007723, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py:0", "error": null }, { "strategy": "hybrid", "query": "binary quantization", - "latency_ms": 767.9803999999422, - "num_results": 0, - "top_result": null, + "latency_ms": 1243.6786999969627, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py:0", "error": null }, { "strategy": "hybrid", "query": "hamming distance", - "latency_ms": 775.7972999970661, - "num_results": 0, - "top_result": null, + "latency_ms": 1081.3100999948801, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py:0", "error": null }, { "strategy": "hybrid", "query": "reranking", - "latency_ms": 904.8987999995006, - "num_results": 0, - "top_result": null, + "latency_ms": 1135.4881000006571, + "num_results": 10, + "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py:0", "error": null } ] diff --git a/codex-lens/src/codexlens/search/chain_search.py b/codex-lens/src/codexlens/search/chain_search.py index 50514f96..dfde6f11 100644 --- a/codex-lens/src/codexlens/search/chain_search.py +++ b/codex-lens/src/codexlens/search/chain_search.py @@ -608,31 +608,43 @@ class ChainSearchEngine: for index_path, chunk_ids in candidates_by_index.items(): try: - store = SQLiteStore(index_path) - dense_embeddings = store.get_dense_embeddings(chunk_ids) - chunks_data = store.get_chunks_by_ids(chunk_ids) + # Read directly from semantic_chunks table (where cascade-index stores data) + import sqlite3 + conn = sqlite3.connect(str(index_path)) + conn.row_factory = sqlite3.Row - # Create lookup for chunk content - chunk_content: Dict[int, Dict[str, Any]] = { - c["id"]: c for c in chunks_data - } + placeholders = ",".join("?" * len(chunk_ids)) + rows = conn.execute( + f"SELECT id, file_path, content, embedding_dense FROM semantic_chunks WHERE id IN ({placeholders})", + chunk_ids + ).fetchall() + conn.close() - for chunk_id in chunk_ids: - dense_bytes = dense_embeddings.get(chunk_id) - chunk_info = chunk_content.get(chunk_id) + # Batch processing: collect all valid embeddings first + valid_rows = [] + dense_vectors = [] + for row in rows: + dense_bytes = row["embedding_dense"] + if dense_bytes is not None: + valid_rows.append(row) + dense_vectors.append(np.frombuffer(dense_bytes, dtype=np.float32)) - if dense_bytes is None or chunk_info is None: - continue + if not dense_vectors: + continue - # Compute cosine similarity - dense_vec = np.frombuffer(dense_bytes, dtype=np.float32) - score = self._compute_cosine_similarity(query_dense, dense_vec) + # Stack into matrix for batch computation + doc_matrix = np.vstack(dense_vectors) - # Create search result - excerpt = chunk_info.get("content", "")[:500] + # Batch compute cosine similarities + scores = self._compute_cosine_similarity_batch(query_dense, doc_matrix) + + # Create search results + for i, row in enumerate(valid_rows): + score = float(scores[i]) + excerpt = (row["content"] or "")[:500] result = SearchResult( - path=chunk_info.get("file_path", ""), - score=float(score), + path=row["file_path"] or "", + score=score, excerpt=excerpt, ) scored_results.append((score, result)) @@ -783,6 +795,58 @@ class ChainSearchEngine: return float(dot_product / (norm_q * norm_d)) + def _compute_cosine_similarity_batch( + self, + query_vec: "np.ndarray", + doc_matrix: "np.ndarray", + ) -> "np.ndarray": + """Compute cosine similarity between query and multiple document vectors. + + Uses vectorized matrix operations for efficient batch computation. + + Args: + query_vec: Query embedding vector of shape (dim,) + doc_matrix: Document embeddings matrix of shape (n_docs, dim) + + Returns: + Array of cosine similarity scores of shape (n_docs,) + """ + if not NUMPY_AVAILABLE: + return np.zeros(doc_matrix.shape[0]) + + # Ensure query is 1D + if query_vec.ndim > 1: + query_vec = query_vec.flatten() + + # Handle dimension mismatch by truncating to smaller dimension + min_dim = min(len(query_vec), doc_matrix.shape[1]) + q = query_vec[:min_dim] + docs = doc_matrix[:, :min_dim] + + # Compute query norm once + norm_q = np.linalg.norm(q) + if norm_q == 0: + return np.zeros(docs.shape[0]) + + # Normalize query + q_normalized = q / norm_q + + # Compute document norms (vectorized) + doc_norms = np.linalg.norm(docs, axis=1) + + # Avoid division by zero + nonzero_mask = doc_norms > 0 + scores = np.zeros(docs.shape[0], dtype=np.float32) + + if np.any(nonzero_mask): + # Normalize documents with non-zero norms + docs_normalized = docs[nonzero_mask] / doc_norms[nonzero_mask, np.newaxis] + + # Batch dot product: (n_docs, dim) @ (dim,) = (n_docs,) + scores[nonzero_mask] = docs_normalized @ q_normalized + + return scores + def _build_results_from_candidates( self, candidates: List[Tuple[int, int, Path]], diff --git a/codex-lens/src/codexlens/semantic/ann_index.py b/codex-lens/src/codexlens/semantic/ann_index.py index c3c6504c..7f042c5a 100644 --- a/codex-lens/src/codexlens/semantic/ann_index.py +++ b/codex-lens/src/codexlens/semantic/ann_index.py @@ -487,6 +487,11 @@ class BinaryANNIndex: self._vectors: dict[int, bytes] = {} self._id_list: list[int] = [] # Ordered list for efficient iteration + # Cached numpy array for vectorized search (invalidated on add/remove) + self._vectors_matrix: Optional[np.ndarray] = None + self._ids_array: Optional[np.ndarray] = None + self._cache_valid: bool = False + logger.info( f"Initialized BinaryANNIndex with dim={dim}, packed_dim={self.packed_dim}" ) @@ -524,6 +529,9 @@ class BinaryANNIndex: self._id_list.append(vec_id) self._vectors[vec_id] = vec + # Invalidate cache on modification + self._cache_valid = False + logger.debug( f"Added {len(ids)} binary vectors to index (total: {len(self._vectors)})" ) @@ -599,6 +607,8 @@ class BinaryANNIndex: # Rebuild ID list efficiently - O(N) once instead of O(N) per removal if removed_count > 0: self._id_list = [id_ for id_ in self._id_list if id_ not in ids_to_remove] + # Invalidate cache on modification + self._cache_valid = False logger.debug(f"Removed {removed_count}/{len(ids)} vectors from index") @@ -610,11 +620,42 @@ class BinaryANNIndex: f"Failed to remove vectors from Binary ANN index: {e}" ) + def _build_cache(self) -> None: + """Build numpy array cache from vectors dict for vectorized search. + + Pre-computes a contiguous numpy array from all vectors for efficient + batch distance computation. Called lazily on first search after modification. + """ + if self._cache_valid: + return + + n_vectors = len(self._id_list) + if n_vectors == 0: + self._vectors_matrix = None + self._ids_array = None + self._cache_valid = True + return + + # Build contiguous numpy array of all packed vectors + # Shape: (n_vectors, packed_dim) with uint8 dtype + self._vectors_matrix = np.empty((n_vectors, self.packed_dim), dtype=np.uint8) + self._ids_array = np.array(self._id_list, dtype=np.int64) + + for i, vec_id in enumerate(self._id_list): + vec_bytes = self._vectors[vec_id] + self._vectors_matrix[i] = np.frombuffer(vec_bytes, dtype=np.uint8) + + self._cache_valid = True + logger.debug(f"Built vectorized cache for {n_vectors} binary vectors") + def search( self, query: bytes, top_k: int = 10 ) -> Tuple[List[int], List[int]]: """Search for nearest neighbors using Hamming distance. + Uses vectorized batch computation for O(N) search with SIMD acceleration. + Pre-computes and caches numpy arrays for efficient repeated queries. + Args: query: Packed binary query vector (size: packed_dim bytes) top_k: Number of nearest neighbors to return @@ -638,27 +679,48 @@ class BinaryANNIndex: if len(self._vectors) == 0: return [], [] - # Compute Hamming distances to all vectors + # Build cache if needed (lazy initialization) + self._build_cache() + + if self._vectors_matrix is None or self._ids_array is None: + return [], [] + + # Vectorized Hamming distance computation + # 1. Convert query to numpy array query_arr = np.frombuffer(query, dtype=np.uint8) - distances = [] - for vec_id in self._id_list: - vec = self._vectors[vec_id] - vec_arr = np.frombuffer(vec, dtype=np.uint8) - # XOR and popcount for Hamming distance - xor = np.bitwise_xor(query_arr, vec_arr) - dist = int(np.unpackbits(xor).sum()) - distances.append((vec_id, dist)) + # 2. Broadcast XOR: (1, packed_dim) XOR (n_vectors, packed_dim) + # Result shape: (n_vectors, packed_dim) + xor_result = np.bitwise_xor(query_arr, self._vectors_matrix) - # Sort by distance (ascending) - distances.sort(key=lambda x: x[1]) + # 3. Vectorized popcount using lookup table for efficiency + # np.unpackbits is slow for large arrays, use popcount LUT instead + popcount_lut = np.array([bin(i).count('1') for i in range(256)], dtype=np.uint8) + bit_counts = popcount_lut[xor_result] - # Return top-k - top_results = distances[:top_k] - ids = [r[0] for r in top_results] - dists = [r[1] for r in top_results] + # 4. Sum across packed bytes to get Hamming distance per vector + distances = bit_counts.sum(axis=1) - return ids, dists + # 5. Get top-k using argpartition (O(N) instead of O(N log N) for full sort) + n_vectors = len(distances) + k = min(top_k, n_vectors) + + if k == n_vectors: + # No partitioning needed, just sort all + sorted_indices = np.argsort(distances) + else: + # Use argpartition for O(N) partial sort + partition_indices = np.argpartition(distances, k)[:k] + # Sort only the top-k + top_k_distances = distances[partition_indices] + sorted_order = np.argsort(top_k_distances) + sorted_indices = partition_indices[sorted_order] + + # 6. Return results + result_ids = self._ids_array[sorted_indices].tolist() + result_dists = distances[sorted_indices].tolist() + + return result_ids, result_dists except Exception as e: raise StorageError(f"Failed to search Binary ANN index: {e}") @@ -797,6 +859,7 @@ class BinaryANNIndex: # Clear existing data self._vectors.clear() self._id_list.clear() + self._cache_valid = False # Read vectors for _ in range(num_vectors): @@ -853,6 +916,9 @@ class BinaryANNIndex: with self._lock: self._vectors.clear() self._id_list.clear() + self._vectors_matrix = None + self._ids_array = None + self._cache_valid = False logger.debug("Cleared binary index")