refactor: rename package codexlens -> codexlens_search for independent distribution

Rename the v2 search engine package to `codexlens-search` (import as
`codexlens_search`) so it can be installed independently and consumed
by the original codex-lens as a dependency. This avoids package path
conflicts since both previously used `src/codexlens/`.

Changes:
- Rename src/codexlens/ -> src/codexlens_search/
- Update pyproject.toml: name=codexlens-search, version=0.2.0
- Update all imports across source, tests, and scripts
- Add public API exports in __init__.py (Config, SearchPipeline,
  IndexingPipeline, SearchResult, IndexStats)

37/37 tests pass. No functional changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
catlog22
2026-03-16 23:23:22 +08:00
parent a0a50d338a
commit 6712965b7f
32 changed files with 83 additions and 63 deletions

View File

@@ -0,0 +1,20 @@
"""codexlens-search: Lightweight semantic code search engine.
Public API for consumers (e.g. codex-lens):
from codexlens_search import SearchPipeline, IndexingPipeline, Config
from codexlens_search.core import create_ann_index, create_binary_index
from codexlens_search.embed.local import FastEmbedEmbedder
from codexlens_search.rerank.api import APIReranker
"""
from codexlens_search.config import Config
from codexlens_search.indexing import IndexingPipeline, IndexStats
from codexlens_search.search.pipeline import SearchPipeline, SearchResult
__all__ = [
"Config",
"IndexingPipeline",
"IndexStats",
"SearchPipeline",
"SearchResult",
]

View File

@@ -0,0 +1,99 @@
from __future__ import annotations
import logging
from dataclasses import dataclass, field
log = logging.getLogger(__name__)
@dataclass
class Config:
# Embedding
embed_model: str = "jinaai/jina-embeddings-v2-base-code"
embed_dim: int = 768
embed_batch_size: int = 64
# GPU / execution providers
device: str = "auto" # 'auto', 'cuda', 'cpu'
embed_providers: list[str] | None = None # explicit ONNX providers override
# Backend selection: 'auto', 'faiss', 'hnswlib'
ann_backend: str = "auto"
binary_backend: str = "auto"
# Indexing pipeline
index_workers: int = 2 # number of parallel indexing workers
# HNSW index (ANNIndex)
hnsw_ef: int = 150
hnsw_M: int = 32
hnsw_ef_construction: int = 200
# Binary coarse search (BinaryStore)
binary_top_k: int = 200
# ANN fine search
ann_top_k: int = 50
# Reranker
reranker_model: str = "BAAI/bge-reranker-v2-m3"
reranker_top_k: int = 20
reranker_batch_size: int = 32
# API reranker (optional)
reranker_api_url: str = ""
reranker_api_key: str = ""
reranker_api_model: str = ""
reranker_api_max_tokens_per_batch: int = 2048
# FTS
fts_top_k: int = 50
# Fusion
fusion_k: int = 60 # RRF k parameter
fusion_weights: dict = field(default_factory=lambda: {
"exact": 0.25,
"fuzzy": 0.10,
"vector": 0.50,
"graph": 0.15,
})
def resolve_embed_providers(self) -> list[str]:
"""Return ONNX execution providers based on device config.
Priority: explicit embed_providers > device setting > auto-detect.
"""
if self.embed_providers is not None:
return list(self.embed_providers)
if self.device == "cuda":
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
if self.device == "cpu":
return ["CPUExecutionProvider"]
# auto-detect
try:
import onnxruntime
available = onnxruntime.get_available_providers()
if "CUDAExecutionProvider" in available:
log.info("CUDA detected via onnxruntime, using GPU for embedding")
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
except ImportError:
pass
return ["CPUExecutionProvider"]
@classmethod
def defaults(cls) -> "Config":
return cls()
@classmethod
def small(cls) -> "Config":
"""Smaller config for testing or small corpora."""
return cls(
hnsw_ef=50,
hnsw_M=16,
binary_top_k=50,
ann_top_k=20,
reranker_top_k=10,
)

View File

@@ -0,0 +1,13 @@
from .base import BaseANNIndex, BaseBinaryIndex
from .binary import BinaryStore
from .factory import create_ann_index, create_binary_index
from .index import ANNIndex
__all__ = [
"BaseANNIndex",
"BaseBinaryIndex",
"ANNIndex",
"BinaryStore",
"create_ann_index",
"create_binary_index",
]

View File

@@ -0,0 +1,83 @@
from __future__ import annotations
from abc import ABC, abstractmethod
import numpy as np
class BaseANNIndex(ABC):
"""Abstract base class for approximate nearest neighbor indexes."""
@abstractmethod
def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
"""Add float32 vectors with corresponding IDs.
Args:
ids: shape (N,) int64
vectors: shape (N, dim) float32
"""
@abstractmethod
def fine_search(
self, query_vec: np.ndarray, top_k: int | None = None
) -> tuple[np.ndarray, np.ndarray]:
"""Search for nearest neighbors.
Args:
query_vec: float32 vector of shape (dim,)
top_k: number of results
Returns:
(ids, distances) as numpy arrays
"""
@abstractmethod
def save(self) -> None:
"""Persist index to disk."""
@abstractmethod
def load(self) -> None:
"""Load index from disk."""
@abstractmethod
def __len__(self) -> int:
"""Return the number of indexed items."""
class BaseBinaryIndex(ABC):
"""Abstract base class for binary vector indexes (Hamming distance)."""
@abstractmethod
def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
"""Add float32 vectors (will be binary-quantized internally).
Args:
ids: shape (N,) int64
vectors: shape (N, dim) float32
"""
@abstractmethod
def coarse_search(
self, query_vec: np.ndarray, top_k: int | None = None
) -> tuple[np.ndarray, np.ndarray]:
"""Search by Hamming distance.
Args:
query_vec: float32 vector of shape (dim,)
top_k: number of results
Returns:
(ids, distances) sorted ascending by distance
"""
@abstractmethod
def save(self) -> None:
"""Persist store to disk."""
@abstractmethod
def load(self) -> None:
"""Load store from disk."""
@abstractmethod
def __len__(self) -> int:
"""Return the number of stored items."""

View File

@@ -0,0 +1,173 @@
from __future__ import annotations
import logging
import math
from pathlib import Path
import numpy as np
from codexlens_search.config import Config
from codexlens_search.core.base import BaseBinaryIndex
logger = logging.getLogger(__name__)
class BinaryStore(BaseBinaryIndex):
"""Persistent binary vector store using numpy memmap.
Stores binary-quantized float32 vectors as packed uint8 arrays on disk.
Supports fast coarse search via XOR + popcount Hamming distance.
"""
def __init__(self, path: str | Path, dim: int, config: Config) -> None:
self._dir = Path(path)
self._dim = dim
self._config = config
self._packed_bytes = math.ceil(dim / 8)
self._bin_path = self._dir / "binary_store.bin"
self._ids_path = self._dir / "binary_store_ids.npy"
self._matrix: np.ndarray | None = None # shape (N, packed_bytes), uint8
self._ids: np.ndarray | None = None # shape (N,), int64
self._count: int = 0
if self._bin_path.exists() and self._ids_path.exists():
self.load()
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _quantize(self, vectors: np.ndarray) -> np.ndarray:
"""Convert float32 vectors (N, dim) to packed uint8 (N, packed_bytes)."""
binary = (vectors > 0).astype(np.uint8)
packed = np.packbits(binary, axis=1)
return packed
def _quantize_single(self, vec: np.ndarray) -> np.ndarray:
"""Convert a single float32 vector (dim,) to packed uint8 (packed_bytes,)."""
binary = (vec > 0).astype(np.uint8)
return np.packbits(binary)
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def _ensure_capacity(self, needed: int) -> None:
"""Grow pre-allocated matrix/ids arrays to fit *needed* total items."""
if self._matrix is not None and self._matrix.shape[0] >= needed:
return
new_cap = max(1024, needed)
# Double until large enough
if self._matrix is not None:
cur_cap = self._matrix.shape[0]
new_cap = max(cur_cap, 1024)
while new_cap < needed:
new_cap *= 2
new_matrix = np.zeros((new_cap, self._packed_bytes), dtype=np.uint8)
new_ids = np.zeros(new_cap, dtype=np.int64)
if self._matrix is not None and self._count > 0:
new_matrix[: self._count] = self._matrix[: self._count]
new_ids[: self._count] = self._ids[: self._count]
self._matrix = new_matrix
self._ids = new_ids
def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
"""Add float32 vectors and their ids.
Does NOT call save() internally -- callers must call save()
explicitly after batch indexing.
Args:
ids: shape (N,) int64
vectors: shape (N, dim) float32
"""
if len(ids) == 0:
return
packed = self._quantize(vectors) # (N, packed_bytes)
n = len(ids)
self._ensure_capacity(self._count + n)
self._matrix[self._count : self._count + n] = packed
self._ids[self._count : self._count + n] = ids.astype(np.int64)
self._count += n
def coarse_search(
self, query_vec: np.ndarray, top_k: int | None = None
) -> tuple[np.ndarray, np.ndarray]:
"""Search by Hamming distance.
Args:
query_vec: float32 vector of shape (dim,)
top_k: number of results; defaults to config.binary_top_k
Returns:
(ids, distances) sorted ascending by Hamming distance
"""
if self._matrix is None or self._count == 0:
return np.array([], dtype=np.int64), np.array([], dtype=np.int32)
k = top_k if top_k is not None else self._config.binary_top_k
k = min(k, self._count)
query_bin = self._quantize_single(query_vec) # (packed_bytes,)
# Slice to active region (matrix may be pre-allocated larger)
active_matrix = self._matrix[: self._count]
active_ids = self._ids[: self._count]
# XOR then popcount via unpackbits
xor = np.bitwise_xor(active_matrix, query_bin[np.newaxis, :]) # (N, packed_bytes)
dists = np.unpackbits(xor, axis=1).sum(axis=1).astype(np.int32) # (N,)
if k >= self._count:
order = np.argsort(dists)
else:
part = np.argpartition(dists, k)[:k]
order = part[np.argsort(dists[part])]
return active_ids[order], dists[order]
def save(self) -> None:
"""Flush binary store to disk."""
if self._matrix is None or self._count == 0:
return
self._dir.mkdir(parents=True, exist_ok=True)
# Write only the occupied portion of the pre-allocated matrix
active_matrix = self._matrix[: self._count]
mm = np.memmap(
str(self._bin_path),
dtype=np.uint8,
mode="w+",
shape=active_matrix.shape,
)
mm[:] = active_matrix
mm.flush()
del mm
np.save(str(self._ids_path), self._ids[: self._count])
def load(self) -> None:
"""Reload binary store from disk."""
ids = np.load(str(self._ids_path))
n = len(ids)
if n == 0:
return
mm = np.memmap(
str(self._bin_path),
dtype=np.uint8,
mode="r",
shape=(n, self._packed_bytes),
)
self._matrix = np.array(mm) # copy into RAM for mutation support
del mm
self._ids = ids.astype(np.int64)
self._count = n
def __len__(self) -> int:
return self._count

View File

@@ -0,0 +1,116 @@
from __future__ import annotations
import logging
from pathlib import Path
from codexlens_search.config import Config
from codexlens_search.core.base import BaseANNIndex, BaseBinaryIndex
logger = logging.getLogger(__name__)
try:
import faiss as _faiss # noqa: F401
_FAISS_AVAILABLE = True
except ImportError:
_FAISS_AVAILABLE = False
try:
import hnswlib as _hnswlib # noqa: F401
_HNSWLIB_AVAILABLE = True
except ImportError:
_HNSWLIB_AVAILABLE = False
def _has_faiss_gpu() -> bool:
"""Check whether faiss-gpu is available (has GPU resources)."""
if not _FAISS_AVAILABLE:
return False
try:
import faiss
res = faiss.StandardGpuResources() # noqa: F841
return True
except (AttributeError, RuntimeError):
return False
def create_ann_index(path: str | Path, dim: int, config: Config) -> BaseANNIndex:
"""Create an ANN index based on config.ann_backend.
Fallback chain for 'auto': faiss-gpu -> faiss-cpu -> hnswlib.
Args:
path: directory for index persistence
dim: vector dimensionality
config: project configuration
Returns:
A BaseANNIndex implementation
Raises:
ImportError: if no suitable backend is available
"""
backend = config.ann_backend
if backend == "faiss":
from codexlens_search.core.faiss_index import FAISSANNIndex
return FAISSANNIndex(path, dim, config)
if backend == "hnswlib":
from codexlens_search.core.index import ANNIndex
return ANNIndex(path, dim, config)
# auto: try faiss first, then hnswlib
if _FAISS_AVAILABLE:
from codexlens_search.core.faiss_index import FAISSANNIndex
gpu_tag = " (GPU available)" if _has_faiss_gpu() else " (CPU)"
logger.info("Auto-selected FAISS ANN backend%s", gpu_tag)
return FAISSANNIndex(path, dim, config)
if _HNSWLIB_AVAILABLE:
from codexlens_search.core.index import ANNIndex
logger.info("Auto-selected hnswlib ANN backend")
return ANNIndex(path, dim, config)
raise ImportError(
"No ANN backend available. Install faiss-cpu, faiss-gpu, or hnswlib."
)
def create_binary_index(
path: str | Path, dim: int, config: Config
) -> BaseBinaryIndex:
"""Create a binary index based on config.binary_backend.
Fallback chain for 'auto': faiss -> numpy BinaryStore.
Args:
path: directory for index persistence
dim: vector dimensionality
config: project configuration
Returns:
A BaseBinaryIndex implementation
Raises:
ImportError: if no suitable backend is available
"""
backend = config.binary_backend
if backend == "faiss":
from codexlens_search.core.faiss_index import FAISSBinaryIndex
return FAISSBinaryIndex(path, dim, config)
if backend == "hnswlib":
from codexlens_search.core.binary import BinaryStore
return BinaryStore(path, dim, config)
# auto: try faiss first, then numpy-based BinaryStore
if _FAISS_AVAILABLE:
from codexlens_search.core.faiss_index import FAISSBinaryIndex
logger.info("Auto-selected FAISS binary backend")
return FAISSBinaryIndex(path, dim, config)
# numpy BinaryStore is always available (no extra deps)
from codexlens_search.core.binary import BinaryStore
logger.info("Auto-selected numpy BinaryStore backend")
return BinaryStore(path, dim, config)

View File

@@ -0,0 +1,275 @@
from __future__ import annotations
import logging
import math
import threading
from pathlib import Path
import numpy as np
from codexlens_search.config import Config
from codexlens_search.core.base import BaseANNIndex, BaseBinaryIndex
logger = logging.getLogger(__name__)
try:
import faiss
_FAISS_AVAILABLE = True
except ImportError:
faiss = None # type: ignore[assignment]
_FAISS_AVAILABLE = False
def _try_gpu_index(index: "faiss.Index") -> "faiss.Index":
"""Transfer a FAISS index to GPU if faiss-gpu is available.
Returns the GPU index on success, or the original CPU index on failure.
"""
try:
res = faiss.StandardGpuResources()
gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
logger.info("FAISS index transferred to GPU 0")
return gpu_index
except (AttributeError, RuntimeError) as exc:
logger.debug("GPU transfer unavailable, staying on CPU: %s", exc)
return index
def _to_cpu_for_save(index: "faiss.Index") -> "faiss.Index":
"""Convert a GPU index back to CPU for serialization."""
try:
return faiss.index_gpu_to_cpu(index)
except (AttributeError, RuntimeError):
return index
class FAISSANNIndex(BaseANNIndex):
"""FAISS-based ANN index using IndexHNSWFlat with optional GPU.
Uses Inner Product space with L2-normalized vectors for cosine similarity.
Thread-safe via RLock.
"""
def __init__(self, path: str | Path, dim: int, config: Config) -> None:
if not _FAISS_AVAILABLE:
raise ImportError(
"faiss is required. Install with: pip install faiss-cpu "
"or pip install faiss-gpu"
)
self._path = Path(path)
self._index_path = self._path / "faiss_ann.index"
self._dim = dim
self._config = config
self._lock = threading.RLock()
self._index: faiss.Index | None = None
def _ensure_loaded(self) -> None:
"""Load or initialize the index (caller holds lock)."""
if self._index is not None:
return
self.load()
def load(self) -> None:
"""Load index from disk or initialize a fresh one."""
with self._lock:
if self._index_path.exists():
idx = faiss.read_index(str(self._index_path))
logger.debug(
"Loaded FAISS ANN index from %s (%d items)",
self._index_path, idx.ntotal,
)
else:
# HNSW with flat storage, M=32 by default
m = self._config.hnsw_M
idx = faiss.IndexHNSWFlat(self._dim, m, faiss.METRIC_INNER_PRODUCT)
idx.hnsw.efConstruction = self._config.hnsw_ef_construction
idx.hnsw.efSearch = self._config.hnsw_ef
logger.debug(
"Initialized fresh FAISS HNSW index (dim=%d, M=%d)",
self._dim, m,
)
self._index = _try_gpu_index(idx)
def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
"""Add L2-normalized float32 vectors.
Vectors are normalized before insertion so that Inner Product
distance equals cosine similarity.
Args:
ids: shape (N,) int64 -- currently unused by FAISS flat index
but kept for API compatibility. FAISS uses sequential IDs.
vectors: shape (N, dim) float32
"""
if len(ids) == 0:
return
vecs = np.ascontiguousarray(vectors, dtype=np.float32)
# Normalize for cosine similarity via Inner Product
faiss.normalize_L2(vecs)
with self._lock:
self._ensure_loaded()
self._index.add(vecs)
def fine_search(
self, query_vec: np.ndarray, top_k: int | None = None
) -> tuple[np.ndarray, np.ndarray]:
"""Search for nearest neighbors.
Args:
query_vec: float32 vector of shape (dim,)
top_k: number of results; defaults to config.ann_top_k
Returns:
(ids, distances) as numpy arrays. For IP space, higher = more
similar, but distances are returned as-is for consumer handling.
"""
k = top_k if top_k is not None else self._config.ann_top_k
with self._lock:
self._ensure_loaded()
count = self._index.ntotal
if count == 0:
return np.array([], dtype=np.int64), np.array([], dtype=np.float32)
k = min(k, count)
# Set efSearch for HNSW accuracy
try:
self._index.hnsw.efSearch = max(self._config.hnsw_ef, k)
except AttributeError:
pass # GPU index may not expose hnsw attribute directly
q = np.ascontiguousarray(query_vec, dtype=np.float32).reshape(1, -1)
faiss.normalize_L2(q)
distances, labels = self._index.search(q, k)
return labels[0].astype(np.int64), distances[0].astype(np.float32)
def save(self) -> None:
"""Save index to disk."""
with self._lock:
if self._index is None:
return
self._path.mkdir(parents=True, exist_ok=True)
cpu_index = _to_cpu_for_save(self._index)
faiss.write_index(cpu_index, str(self._index_path))
def __len__(self) -> int:
with self._lock:
if self._index is None:
return 0
return self._index.ntotal
class FAISSBinaryIndex(BaseBinaryIndex):
"""FAISS-based binary index using IndexBinaryFlat for Hamming distance.
Vectors are binary-quantized (sign bit) before insertion.
Thread-safe via RLock.
"""
def __init__(self, path: str | Path, dim: int, config: Config) -> None:
if not _FAISS_AVAILABLE:
raise ImportError(
"faiss is required. Install with: pip install faiss-cpu "
"or pip install faiss-gpu"
)
self._path = Path(path)
self._index_path = self._path / "faiss_binary.index"
self._dim = dim
self._config = config
self._packed_bytes = math.ceil(dim / 8)
self._lock = threading.RLock()
self._index: faiss.IndexBinary | None = None
def _ensure_loaded(self) -> None:
if self._index is not None:
return
self.load()
def _quantize(self, vectors: np.ndarray) -> np.ndarray:
"""Convert float32 vectors (N, dim) to packed uint8 (N, packed_bytes)."""
binary = (vectors > 0).astype(np.uint8)
return np.packbits(binary, axis=1)
def _quantize_single(self, vec: np.ndarray) -> np.ndarray:
"""Convert a single float32 vector (dim,) to packed uint8 (1, packed_bytes)."""
binary = (vec > 0).astype(np.uint8)
return np.packbits(binary).reshape(1, -1)
def load(self) -> None:
"""Load binary index from disk or initialize a fresh one."""
with self._lock:
if self._index_path.exists():
idx = faiss.read_index_binary(str(self._index_path))
logger.debug(
"Loaded FAISS binary index from %s (%d items)",
self._index_path, idx.ntotal,
)
else:
# IndexBinaryFlat takes dimension in bits
idx = faiss.IndexBinaryFlat(self._dim)
logger.debug(
"Initialized fresh FAISS binary index (dim_bits=%d)", self._dim,
)
self._index = idx
def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
"""Add float32 vectors (binary-quantized internally).
Args:
ids: shape (N,) int64 -- kept for API compatibility
vectors: shape (N, dim) float32
"""
if len(ids) == 0:
return
packed = self._quantize(vectors)
packed = np.ascontiguousarray(packed, dtype=np.uint8)
with self._lock:
self._ensure_loaded()
self._index.add(packed)
def coarse_search(
self, query_vec: np.ndarray, top_k: int | None = None
) -> tuple[np.ndarray, np.ndarray]:
"""Search by Hamming distance.
Args:
query_vec: float32 vector of shape (dim,)
top_k: number of results; defaults to config.binary_top_k
Returns:
(ids, distances) sorted ascending by Hamming distance
"""
with self._lock:
self._ensure_loaded()
if self._index.ntotal == 0:
return np.array([], dtype=np.int64), np.array([], dtype=np.int32)
k = top_k if top_k is not None else self._config.binary_top_k
k = min(k, self._index.ntotal)
q = self._quantize_single(query_vec)
q = np.ascontiguousarray(q, dtype=np.uint8)
distances, labels = self._index.search(q, k)
return labels[0].astype(np.int64), distances[0].astype(np.int32)
def save(self) -> None:
"""Save binary index to disk."""
with self._lock:
if self._index is None:
return
self._path.mkdir(parents=True, exist_ok=True)
faiss.write_index_binary(self._index, str(self._index_path))
def __len__(self) -> int:
with self._lock:
if self._index is None:
return 0
return self._index.ntotal

View File

@@ -0,0 +1,136 @@
from __future__ import annotations
import logging
import threading
from pathlib import Path
import numpy as np
from codexlens_search.config import Config
from codexlens_search.core.base import BaseANNIndex
logger = logging.getLogger(__name__)
try:
import hnswlib
_HNSWLIB_AVAILABLE = True
except ImportError:
_HNSWLIB_AVAILABLE = False
class ANNIndex(BaseANNIndex):
"""HNSW-based approximate nearest neighbor index.
Lazy-loads on first use, thread-safe via RLock.
"""
def __init__(self, path: str | Path, dim: int, config: Config) -> None:
if not _HNSWLIB_AVAILABLE:
raise ImportError("hnswlib is required. Install with: pip install hnswlib")
self._path = Path(path)
self._hnsw_path = self._path / "ann_index.hnsw"
self._dim = dim
self._config = config
self._lock = threading.RLock()
self._index: hnswlib.Index | None = None
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _ensure_loaded(self) -> None:
"""Load or initialize the index (caller holds lock)."""
if self._index is not None:
return
self.load()
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def load(self) -> None:
"""Load index from disk or initialize a fresh one."""
with self._lock:
idx = hnswlib.Index(space="cosine", dim=self._dim)
if self._hnsw_path.exists():
idx.load_index(str(self._hnsw_path), max_elements=0)
idx.set_ef(self._config.hnsw_ef)
logger.debug("Loaded HNSW index from %s (%d items)", self._hnsw_path, idx.get_current_count())
else:
idx.init_index(
max_elements=1000,
ef_construction=self._config.hnsw_ef_construction,
M=self._config.hnsw_M,
)
idx.set_ef(self._config.hnsw_ef)
logger.debug("Initialized fresh HNSW index (dim=%d)", self._dim)
self._index = idx
def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
"""Add float32 vectors.
Does NOT call save() internally -- callers must call save()
explicitly after batch indexing.
Args:
ids: shape (N,) int64
vectors: shape (N, dim) float32
"""
if len(ids) == 0:
return
vecs = np.ascontiguousarray(vectors, dtype=np.float32)
with self._lock:
self._ensure_loaded()
# Expand capacity if needed
current = self._index.get_current_count()
max_el = self._index.get_max_elements()
needed = current + len(ids)
if needed > max_el:
new_cap = max(max_el * 2, needed + 100)
self._index.resize_index(new_cap)
self._index.add_items(vecs, ids.astype(np.int64))
def fine_search(
self, query_vec: np.ndarray, top_k: int | None = None
) -> tuple[np.ndarray, np.ndarray]:
"""Search for nearest neighbors.
Args:
query_vec: float32 vector of shape (dim,)
top_k: number of results; defaults to config.ann_top_k
Returns:
(ids, distances) as numpy arrays
"""
k = top_k if top_k is not None else self._config.ann_top_k
with self._lock:
self._ensure_loaded()
count = self._index.get_current_count()
if count == 0:
return np.array([], dtype=np.int64), np.array([], dtype=np.float32)
k = min(k, count)
self._index.set_ef(max(self._config.hnsw_ef, k))
q = np.ascontiguousarray(query_vec, dtype=np.float32).reshape(1, -1)
labels, distances = self._index.knn_query(q, k=k)
return labels[0].astype(np.int64), distances[0].astype(np.float32)
def save(self) -> None:
"""Save index to disk (caller may or may not hold lock)."""
with self._lock:
if self._index is None:
return
self._path.mkdir(parents=True, exist_ok=True)
self._index.save_index(str(self._hnsw_path))
def __len__(self) -> int:
with self._lock:
if self._index is None:
return 0
return self._index.get_current_count()

View File

@@ -0,0 +1,4 @@
from .base import BaseEmbedder
from .local import FastEmbedEmbedder, EMBED_PROFILES
__all__ = ["BaseEmbedder", "FastEmbedEmbedder", "EMBED_PROFILES"]

View File

@@ -0,0 +1,13 @@
from __future__ import annotations
from abc import ABC, abstractmethod
import numpy as np
class BaseEmbedder(ABC):
@abstractmethod
def embed_single(self, text: str) -> np.ndarray:
"""Embed a single text, returns float32 ndarray shape (dim,)."""
@abstractmethod
def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
"""Embed a list of texts, returns list of float32 ndarrays."""

View File

@@ -0,0 +1,53 @@
from __future__ import annotations
import numpy as np
from ..config import Config
from .base import BaseEmbedder
EMBED_PROFILES = {
"small": "BAAI/bge-small-en-v1.5", # 384d
"base": "BAAI/bge-base-en-v1.5", # 768d
"large": "BAAI/bge-large-en-v1.5", # 1024d
"code": "jinaai/jina-embeddings-v2-base-code", # 768d
}
class FastEmbedEmbedder(BaseEmbedder):
"""Embedder backed by fastembed.TextEmbedding with lazy model loading."""
def __init__(self, config: Config) -> None:
self._config = config
self._model = None
def _load(self) -> None:
"""Lazy-load the fastembed TextEmbedding model on first use."""
if self._model is not None:
return
from fastembed import TextEmbedding
providers = self._config.resolve_embed_providers()
try:
self._model = TextEmbedding(
model_name=self._config.embed_model,
providers=providers,
)
except TypeError:
# Older fastembed versions may not accept providers kwarg
self._model = TextEmbedding(model_name=self._config.embed_model)
def embed_single(self, text: str) -> np.ndarray:
"""Embed a single text, returns float32 ndarray of shape (dim,)."""
self._load()
result = list(self._model.embed([text]))
return result[0].astype(np.float32)
def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
"""Embed a list of texts in batches, returns list of float32 ndarrays."""
self._load()
batch_size = self._config.embed_batch_size
results: list[np.ndarray] = []
for start in range(0, len(texts), batch_size):
batch = texts[start : start + batch_size]
for vec in self._model.embed(batch):
results.append(vec.astype(np.float32))
return results

View File

@@ -0,0 +1,5 @@
from __future__ import annotations
from .pipeline import IndexingPipeline, IndexStats
__all__ = ["IndexingPipeline", "IndexStats"]

View File

@@ -0,0 +1,277 @@
"""Three-stage parallel indexing pipeline: chunk -> embed -> index.
Uses threading.Thread with queue.Queue for producer-consumer handoff.
The GIL is acceptable because embedding (onnxruntime) releases it in C extensions.
"""
from __future__ import annotations
import logging
import queue
import threading
import time
from dataclasses import dataclass
from pathlib import Path
import numpy as np
from codexlens_search.config import Config
from codexlens_search.core.binary import BinaryStore
from codexlens_search.core.index import ANNIndex
from codexlens_search.embed.base import BaseEmbedder
from codexlens_search.search.fts import FTSEngine
logger = logging.getLogger(__name__)
# Sentinel value to signal worker shutdown
_SENTINEL = None
# Defaults for chunking (can be overridden via index_files kwargs)
_DEFAULT_MAX_CHUNK_CHARS = 800
_DEFAULT_CHUNK_OVERLAP = 100
@dataclass
class IndexStats:
"""Statistics returned after indexing completes."""
files_processed: int = 0
chunks_created: int = 0
duration_seconds: float = 0.0
class IndexingPipeline:
"""Parallel 3-stage indexing pipeline with queue-based handoff.
Stage 1 (main thread): Read files, chunk text, push to embed_queue.
Stage 2 (embed worker): Pull text batches, call embed_batch(), push vectors to index_queue.
Stage 3 (index worker): Pull vectors+ids, call BinaryStore.add(), ANNIndex.add(), FTS.add_documents().
After all stages complete, save() is called on BinaryStore and ANNIndex exactly once.
"""
def __init__(
self,
embedder: BaseEmbedder,
binary_store: BinaryStore,
ann_index: ANNIndex,
fts: FTSEngine,
config: Config,
) -> None:
self._embedder = embedder
self._binary_store = binary_store
self._ann_index = ann_index
self._fts = fts
self._config = config
def index_files(
self,
files: list[Path],
*,
root: Path | None = None,
max_chunk_chars: int = _DEFAULT_MAX_CHUNK_CHARS,
chunk_overlap: int = _DEFAULT_CHUNK_OVERLAP,
max_file_size: int = 50_000,
) -> IndexStats:
"""Run the 3-stage pipeline on the given files.
Args:
files: List of file paths to index.
root: Optional root for computing relative paths. If None, uses
each file's absolute path as its identifier.
max_chunk_chars: Maximum characters per chunk.
chunk_overlap: Character overlap between consecutive chunks.
max_file_size: Skip files larger than this (bytes).
Returns:
IndexStats with counts and timing.
"""
if not files:
return IndexStats()
t0 = time.monotonic()
embed_queue: queue.Queue = queue.Queue(maxsize=4)
index_queue: queue.Queue = queue.Queue(maxsize=4)
# Track errors from workers
worker_errors: list[Exception] = []
error_lock = threading.Lock()
def _record_error(exc: Exception) -> None:
with error_lock:
worker_errors.append(exc)
# --- Start workers ---
embed_thread = threading.Thread(
target=self._embed_worker,
args=(embed_queue, index_queue, _record_error),
daemon=True,
name="indexing-embed",
)
index_thread = threading.Thread(
target=self._index_worker,
args=(index_queue, _record_error),
daemon=True,
name="indexing-index",
)
embed_thread.start()
index_thread.start()
# --- Stage 1: chunk files (main thread) ---
chunk_id = 0
files_processed = 0
chunks_created = 0
for fpath in files:
try:
if fpath.stat().st_size > max_file_size:
continue
text = fpath.read_text(encoding="utf-8", errors="replace")
except Exception as exc:
logger.debug("Skipping %s: %s", fpath, exc)
continue
rel_path = str(fpath.relative_to(root)) if root else str(fpath)
file_chunks = self._chunk_text(text, rel_path, max_chunk_chars, chunk_overlap)
if not file_chunks:
continue
files_processed += 1
# Assign sequential IDs and push batch to embed queue
batch_ids = []
batch_texts = []
batch_paths = []
for chunk_text, path in file_chunks:
batch_ids.append(chunk_id)
batch_texts.append(chunk_text)
batch_paths.append(path)
chunk_id += 1
chunks_created += len(batch_ids)
embed_queue.put((batch_ids, batch_texts, batch_paths))
# Signal embed worker: no more data
embed_queue.put(_SENTINEL)
# Wait for workers to finish
embed_thread.join()
index_thread.join()
# --- Final flush ---
self._binary_store.save()
self._ann_index.save()
duration = time.monotonic() - t0
stats = IndexStats(
files_processed=files_processed,
chunks_created=chunks_created,
duration_seconds=round(duration, 2),
)
logger.info(
"Indexing complete: %d files, %d chunks in %.1fs",
stats.files_processed,
stats.chunks_created,
stats.duration_seconds,
)
# Raise first worker error if any occurred
if worker_errors:
raise worker_errors[0]
return stats
# ------------------------------------------------------------------
# Workers
# ------------------------------------------------------------------
def _embed_worker(
self,
in_q: queue.Queue,
out_q: queue.Queue,
on_error: callable,
) -> None:
"""Stage 2: Pull chunk batches, embed, push (ids, vecs, docs) to index queue."""
try:
while True:
item = in_q.get()
if item is _SENTINEL:
break
batch_ids, batch_texts, batch_paths = item
try:
vecs = self._embedder.embed_batch(batch_texts)
vec_array = np.array(vecs, dtype=np.float32)
id_array = np.array(batch_ids, dtype=np.int64)
out_q.put((id_array, vec_array, batch_texts, batch_paths))
except Exception as exc:
logger.error("Embed worker error: %s", exc)
on_error(exc)
finally:
# Signal index worker: no more data
out_q.put(_SENTINEL)
def _index_worker(
self,
in_q: queue.Queue,
on_error: callable,
) -> None:
"""Stage 3: Pull (ids, vecs, texts, paths), write to stores."""
while True:
item = in_q.get()
if item is _SENTINEL:
break
id_array, vec_array, texts, paths = item
try:
self._binary_store.add(id_array, vec_array)
self._ann_index.add(id_array, vec_array)
fts_docs = [
(int(id_array[i]), paths[i], texts[i])
for i in range(len(id_array))
]
self._fts.add_documents(fts_docs)
except Exception as exc:
logger.error("Index worker error: %s", exc)
on_error(exc)
# ------------------------------------------------------------------
# Chunking
# ------------------------------------------------------------------
@staticmethod
def _chunk_text(
text: str,
path: str,
max_chars: int,
overlap: int,
) -> list[tuple[str, str]]:
"""Split file text into overlapping chunks.
Returns list of (chunk_text, path) tuples.
"""
if not text.strip():
return []
chunks: list[tuple[str, str]] = []
lines = text.splitlines(keepends=True)
current: list[str] = []
current_len = 0
for line in lines:
if current_len + len(line) > max_chars and current:
chunk = "".join(current)
chunks.append((chunk, path))
# overlap: keep last N characters
tail = "".join(current)[-overlap:]
current = [tail] if tail else []
current_len = len(tail)
current.append(line)
current_len += len(line)
if current:
chunks.append(("".join(current), path))
return chunks

View File

@@ -0,0 +1,5 @@
from .base import BaseReranker
from .local import FastEmbedReranker
from .api import APIReranker
__all__ = ["BaseReranker", "FastEmbedReranker", "APIReranker"]

View File

@@ -0,0 +1,103 @@
from __future__ import annotations
import logging
import time
import httpx
from codexlens_search.config import Config
from .base import BaseReranker
logger = logging.getLogger(__name__)
class APIReranker(BaseReranker):
"""Reranker backed by a remote HTTP API (SiliconFlow/Cohere/Jina format)."""
def __init__(self, config: Config) -> None:
self._config = config
self._client = httpx.Client(
headers={
"Authorization": f"Bearer {config.reranker_api_key}",
"Content-Type": "application/json",
},
)
def score_pairs(self, query: str, documents: list[str]) -> list[float]:
if not documents:
return []
max_tokens = self._config.reranker_api_max_tokens_per_batch
batches = self._split_batches(documents, max_tokens)
scores = [0.0] * len(documents)
for batch in batches:
batch_scores = self._call_api_with_retry(query, batch)
for orig_idx, score in batch_scores.items():
scores[orig_idx] = score
return scores
def _split_batches(
self, documents: list[str], max_tokens: int
) -> list[list[tuple[int, str]]]:
batches: list[list[tuple[int, str]]] = []
current_batch: list[tuple[int, str]] = []
current_tokens = 0
for idx, text in enumerate(documents):
doc_tokens = len(text) // 4
if current_tokens + doc_tokens > max_tokens and current_batch:
batches.append(current_batch)
current_batch = []
current_tokens = 0
current_batch.append((idx, text))
current_tokens += doc_tokens
if current_batch:
batches.append(current_batch)
return batches
def _call_api_with_retry(
self,
query: str,
docs: list[tuple[int, str]],
max_retries: int = 3,
) -> dict[int, float]:
url = self._config.reranker_api_url.rstrip("/") + "/rerank"
payload = {
"model": self._config.reranker_api_model,
"query": query,
"documents": [t for _, t in docs],
}
last_exc: Exception | None = None
for attempt in range(max_retries):
try:
response = self._client.post(url, json=payload)
except Exception as exc:
last_exc = exc
time.sleep((2 ** attempt) * 0.5)
continue
if response.status_code in (429, 503):
logger.warning(
"API reranker returned HTTP %s (attempt %d/%d), retrying...",
response.status_code,
attempt + 1,
max_retries,
)
time.sleep((2 ** attempt) * 0.5)
continue
response.raise_for_status()
data = response.json()
results = data.get("results", [])
scores: dict[int, float] = {}
for item in results:
local_idx = int(item["index"])
orig_idx = docs[local_idx][0]
scores[orig_idx] = float(item["relevance_score"])
return scores
raise RuntimeError(
f"API reranker failed after {max_retries} attempts. Last error: {last_exc}"
)

View File

@@ -0,0 +1,8 @@
from __future__ import annotations
from abc import ABC, abstractmethod
class BaseReranker(ABC):
@abstractmethod
def score_pairs(self, query: str, documents: list[str]) -> list[float]:
"""Score (query, doc) pairs. Returns list of floats same length as documents."""

View File

@@ -0,0 +1,25 @@
from __future__ import annotations
from codexlens_search.config import Config
from .base import BaseReranker
class FastEmbedReranker(BaseReranker):
"""Local reranker backed by fastembed TextCrossEncoder."""
def __init__(self, config: Config) -> None:
self._config = config
self._model = None
def _load(self) -> None:
if self._model is None:
from fastembed.rerank.cross_encoder import TextCrossEncoder
self._model = TextCrossEncoder(model_name=self._config.reranker_model)
def score_pairs(self, query: str, documents: list[str]) -> list[float]:
self._load()
results = list(self._model.rerank(query, documents))
scores = [0.0] * len(documents)
for r in results:
scores[r.index] = float(r.score)
return scores

View File

@@ -0,0 +1,8 @@
from .fts import FTSEngine
from .fusion import reciprocal_rank_fusion, detect_query_intent, QueryIntent, DEFAULT_WEIGHTS
from .pipeline import SearchPipeline, SearchResult
__all__ = [
"FTSEngine", "reciprocal_rank_fusion", "detect_query_intent",
"QueryIntent", "DEFAULT_WEIGHTS", "SearchPipeline", "SearchResult",
]

View File

@@ -0,0 +1,69 @@
from __future__ import annotations
import sqlite3
from pathlib import Path
class FTSEngine:
def __init__(self, db_path: str | Path) -> None:
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
self._conn.execute(
"CREATE VIRTUAL TABLE IF NOT EXISTS docs "
"USING fts5(content, tokenize='porter unicode61')"
)
self._conn.execute(
"CREATE TABLE IF NOT EXISTS docs_meta "
"(id INTEGER PRIMARY KEY, path TEXT)"
)
self._conn.commit()
def add_documents(self, docs: list[tuple[int, str, str]]) -> None:
"""Add documents in batch. docs: list of (id, path, content)."""
if not docs:
return
self._conn.executemany(
"INSERT OR REPLACE INTO docs_meta (id, path) VALUES (?, ?)",
[(doc_id, path) for doc_id, path, content in docs],
)
self._conn.executemany(
"INSERT OR REPLACE INTO docs (rowid, content) VALUES (?, ?)",
[(doc_id, content) for doc_id, path, content in docs],
)
self._conn.commit()
def exact_search(self, query: str, top_k: int = 50) -> list[tuple[int, float]]:
"""FTS5 MATCH query, return (id, bm25_score) sorted by score descending."""
try:
rows = self._conn.execute(
"SELECT rowid, bm25(docs) AS score FROM docs "
"WHERE docs MATCH ? ORDER BY score LIMIT ?",
(query, top_k),
).fetchall()
except sqlite3.OperationalError:
return []
# bm25 in SQLite FTS5 returns negative values (lower = better match)
# Negate so higher is better
return [(int(row[0]), -float(row[1])) for row in rows]
def fuzzy_search(self, query: str, top_k: int = 50) -> list[tuple[int, float]]:
"""Prefix search: each token + '*', return (id, score) sorted descending."""
tokens = query.strip().split()
if not tokens:
return []
prefix_query = " ".join(t + "*" for t in tokens)
try:
rows = self._conn.execute(
"SELECT rowid, bm25(docs) AS score FROM docs "
"WHERE docs MATCH ? ORDER BY score LIMIT ?",
(prefix_query, top_k),
).fetchall()
except sqlite3.OperationalError:
return []
return [(int(row[0]), -float(row[1])) for row in rows]
def get_content(self, doc_id: int) -> str:
"""Retrieve content for a doc_id."""
row = self._conn.execute(
"SELECT content FROM docs WHERE rowid = ?", (doc_id,)
).fetchone()
return row[0] if row else ""

View File

@@ -0,0 +1,106 @@
from __future__ import annotations
import re
from enum import Enum
DEFAULT_WEIGHTS: dict[str, float] = {
"exact": 0.25,
"fuzzy": 0.10,
"vector": 0.50,
"graph": 0.15,
}
_CODE_CAMEL_RE = re.compile(r"[a-z][A-Z]")
_CODE_SNAKE_RE = re.compile(r"\b[a-z_]+_[a-z_]+\b")
_CODE_SYMBOLS_RE = re.compile(r"[.\[\](){}]|->|::")
_CODE_KEYWORDS_RE = re.compile(r"\b(import|def|class|return|from|async|await|lambda|yield)\b")
_QUESTION_WORDS_RE = re.compile(r"\b(how|what|why|when|where|which|who|does|do|is|are|can|should)\b", re.IGNORECASE)
class QueryIntent(Enum):
CODE_SYMBOL = "code_symbol"
NATURAL_LANGUAGE = "natural"
MIXED = "mixed"
def detect_query_intent(query: str) -> QueryIntent:
"""Detect whether query is a code symbol, natural language, or mixed."""
words = query.strip().split()
word_count = len(words)
code_signals = 0
natural_signals = 0
if _CODE_CAMEL_RE.search(query):
code_signals += 2
if _CODE_SNAKE_RE.search(query):
code_signals += 2
if _CODE_SYMBOLS_RE.search(query):
code_signals += 2
if _CODE_KEYWORDS_RE.search(query):
code_signals += 2
if "`" in query:
code_signals += 1
if word_count < 4:
code_signals += 1
if _QUESTION_WORDS_RE.search(query):
natural_signals += 2
if word_count > 5:
natural_signals += 2
if code_signals == 0 and word_count >= 3:
natural_signals += 1
if code_signals >= 2 and natural_signals == 0:
return QueryIntent.CODE_SYMBOL
if natural_signals >= 2 and code_signals == 0:
return QueryIntent.NATURAL_LANGUAGE
if code_signals >= 2 and natural_signals == 0:
return QueryIntent.CODE_SYMBOL
if natural_signals > code_signals:
return QueryIntent.NATURAL_LANGUAGE
if code_signals > natural_signals:
return QueryIntent.CODE_SYMBOL
return QueryIntent.MIXED
def get_adaptive_weights(intent: QueryIntent, base: dict | None = None) -> dict[str, float]:
"""Return weights adapted to query intent."""
weights = dict(base or DEFAULT_WEIGHTS)
if intent == QueryIntent.CODE_SYMBOL:
weights["exact"] = 0.45
weights["vector"] = 0.35
elif intent == QueryIntent.NATURAL_LANGUAGE:
weights["vector"] = 0.65
weights["exact"] = 0.15
# MIXED: use weights as-is
return weights
def reciprocal_rank_fusion(
results: dict[str, list[tuple[int, float]]],
weights: dict[str, float] | None = None,
k: int = 60,
) -> list[tuple[int, float]]:
"""Fuse ranked result lists using Reciprocal Rank Fusion.
results: {source_name: [(doc_id, score), ...]} each list sorted desc by score.
weights: weight per source (defaults to equal weight across all sources).
k: RRF constant (default 60).
Returns sorted list of (doc_id, fused_score) descending.
"""
if not results:
return []
sources = list(results.keys())
if weights is None:
equal_w = 1.0 / len(sources)
weights = {s: equal_w for s in sources}
scores: dict[int, float] = {}
for source, ranked_list in results.items():
w = weights.get(source, 0.0)
for rank, (doc_id, _) in enumerate(ranked_list, start=1):
scores[doc_id] = scores.get(doc_id, 0.0) + w * (1.0 / (k + rank))
return sorted(scores.items(), key=lambda x: x[1], reverse=True)

View File

@@ -0,0 +1,163 @@
from __future__ import annotations
import logging
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
import numpy as np
from ..config import Config
from ..core import ANNIndex, BinaryStore
from ..embed import BaseEmbedder
from ..rerank import BaseReranker
from .fts import FTSEngine
from .fusion import (
DEFAULT_WEIGHTS,
detect_query_intent,
get_adaptive_weights,
reciprocal_rank_fusion,
)
_log = logging.getLogger(__name__)
@dataclass
class SearchResult:
id: int
path: str
score: float
snippet: str = ""
class SearchPipeline:
def __init__(
self,
embedder: BaseEmbedder,
binary_store: BinaryStore,
ann_index: ANNIndex,
reranker: BaseReranker,
fts: FTSEngine,
config: Config,
) -> None:
self._embedder = embedder
self._binary_store = binary_store
self._ann_index = ann_index
self._reranker = reranker
self._fts = fts
self._config = config
# -- Helper: vector search (binary coarse + ANN fine) -----------------
def _vector_search(
self, query_vec: np.ndarray
) -> list[tuple[int, float]]:
"""Run binary coarse search then ANN fine search and intersect."""
cfg = self._config
# Binary coarse search -> candidate_ids set
candidate_ids_list, _ = self._binary_store.coarse_search(
query_vec, top_k=cfg.binary_top_k
)
candidate_ids = set(candidate_ids_list)
# ANN fine search on full index, then intersect with binary candidates
ann_ids, ann_scores = self._ann_index.fine_search(
query_vec, top_k=cfg.ann_top_k
)
# Keep only results that appear in binary candidates (2-stage funnel)
vector_results: list[tuple[int, float]] = [
(int(doc_id), float(score))
for doc_id, score in zip(ann_ids, ann_scores)
if int(doc_id) in candidate_ids
]
# Fall back to full ANN results if intersection is empty
if not vector_results:
vector_results = [
(int(doc_id), float(score))
for doc_id, score in zip(ann_ids, ann_scores)
]
return vector_results
# -- Helper: FTS search (exact + fuzzy) ------------------------------
def _fts_search(
self, query: str
) -> tuple[list[tuple[int, float]], list[tuple[int, float]]]:
"""Run exact and fuzzy full-text search."""
cfg = self._config
exact_results = self._fts.exact_search(query, top_k=cfg.fts_top_k)
fuzzy_results = self._fts.fuzzy_search(query, top_k=cfg.fts_top_k)
return exact_results, fuzzy_results
# -- Main search entry point -----------------------------------------
def search(self, query: str, top_k: int | None = None) -> list[SearchResult]:
cfg = self._config
final_top_k = top_k if top_k is not None else cfg.reranker_top_k
# 1. Detect intent -> adaptive weights
intent = detect_query_intent(query)
weights = get_adaptive_weights(intent, cfg.fusion_weights)
# 2. Embed query
query_vec = self._embedder.embed_single(query)
# 3. Parallel vector + FTS search
vector_results: list[tuple[int, float]] = []
exact_results: list[tuple[int, float]] = []
fuzzy_results: list[tuple[int, float]] = []
with ThreadPoolExecutor(max_workers=2) as pool:
vec_future = pool.submit(self._vector_search, query_vec)
fts_future = pool.submit(self._fts_search, query)
# Collect vector results
try:
vector_results = vec_future.result()
except Exception:
_log.warning("Vector search failed, using empty results", exc_info=True)
# Collect FTS results
try:
exact_results, fuzzy_results = fts_future.result()
except Exception:
_log.warning("FTS search failed, using empty results", exc_info=True)
# 4. RRF fusion
fusion_input: dict[str, list[tuple[int, float]]] = {}
if vector_results:
fusion_input["vector"] = vector_results
if exact_results:
fusion_input["exact"] = exact_results
if fuzzy_results:
fusion_input["fuzzy"] = fuzzy_results
if not fusion_input:
return []
fused = reciprocal_rank_fusion(fusion_input, weights=weights, k=cfg.fusion_k)
# 5. Rerank top candidates
rerank_ids = [doc_id for doc_id, _ in fused[:50]]
contents = [self._fts.get_content(doc_id) for doc_id in rerank_ids]
rerank_scores = self._reranker.score_pairs(query, contents)
# 6. Sort by rerank score, build SearchResult list
ranked = sorted(
zip(rerank_ids, rerank_scores), key=lambda x: x[1], reverse=True
)
results: list[SearchResult] = []
for doc_id, score in ranked[:final_top_k]:
path = self._fts._conn.execute(
"SELECT path FROM docs_meta WHERE id = ?", (doc_id,)
).fetchone()
results.append(
SearchResult(
id=doc_id,
path=path[0] if path else "",
score=float(score),
snippet=self._fts.get_content(doc_id)[:200],
)
)
return results