Files
Claude-Code-Workflow/codex-lens-v2/tests/unit/test_search.py
catlog22 6712965b7f refactor: rename package codexlens -> codexlens_search for independent distribution
Rename the v2 search engine package to `codexlens-search` (import as
`codexlens_search`) so it can be installed independently and consumed
by the original codex-lens as a dependency. This avoids package path
conflicts since both previously used `src/codexlens/`.

Changes:
- Rename src/codexlens/ -> src/codexlens_search/
- Update pyproject.toml: name=codexlens-search, version=0.2.0
- Update all imports across source, tests, and scripts
- Add public API exports in __init__.py (Config, SearchPipeline,
  IndexingPipeline, SearchResult, IndexStats)

37/37 tests pass. No functional changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 23:23:22 +08:00

157 lines
5.2 KiB
Python

"""Unit tests for search layer: FTSEngine, fusion, and SearchPipeline."""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
from codexlens_search.search.fts import FTSEngine
from codexlens_search.search.fusion import (
DEFAULT_WEIGHTS,
QueryIntent,
detect_query_intent,
get_adaptive_weights,
reciprocal_rank_fusion,
)
from codexlens_search.search.pipeline import SearchPipeline, SearchResult
from codexlens_search.config import Config
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def make_fts(docs: list[tuple[int, str, str]] | None = None) -> FTSEngine:
"""Create an in-memory FTSEngine and optionally add documents."""
engine = FTSEngine(":memory:")
if docs:
engine.add_documents(docs)
return engine
# ---------------------------------------------------------------------------
# FTSEngine tests
# ---------------------------------------------------------------------------
def test_fts_add_and_exact_search():
docs = [
(1, "a.py", "def authenticate user password login"),
(2, "b.py", "connect to database with credentials"),
(3, "c.py", "render template html response"),
]
engine = make_fts(docs)
results = engine.exact_search("authenticate", top_k=10)
ids = [r[0] for r in results]
assert 1 in ids, "doc 1 should match 'authenticate'"
assert 2 not in ids or results[0][0] == 1 # doc 1 must rank higher
def test_fts_fuzzy_search_prefix():
docs = [
(10, "auth.py", "authentication token refresh"),
(11, "db.py", "database connection pool"),
(12, "ui.py", "render button click handler"),
]
engine = make_fts(docs)
# Prefix 'auth' should match 'authentication' in doc 10
results = engine.fuzzy_search("auth", top_k=10)
ids = [r[0] for r in results]
assert 10 in ids, "prefix 'auth' should match doc 10 with 'authentication'"
# ---------------------------------------------------------------------------
# RRF fusion tests
# ---------------------------------------------------------------------------
def test_rrf_fusion_ordering():
"""When two sources agree on top-1, it should rank first in fused result."""
source_a = [(1, 0.9), (2, 0.5), (3, 0.2)]
source_b = [(1, 0.8), (3, 0.6), (2, 0.1)]
fused = reciprocal_rank_fusion({"a": source_a, "b": source_b})
assert fused[0][0] == 1, "doc 1 agreed top by both sources must rank first"
def test_rrf_equal_weight_default():
"""Calling with None weights should use DEFAULT_WEIGHTS shape (not crash)."""
source_exact = [(5, 1.0), (6, 0.8)]
source_vector = [(6, 0.9), (5, 0.7)]
# Should not raise and should return results
fused = reciprocal_rank_fusion(
{"exact": source_exact, "vector": source_vector},
weights=None,
)
assert len(fused) == 2
ids = [r[0] for r in fused]
assert 5 in ids and 6 in ids
# ---------------------------------------------------------------------------
# detect_query_intent tests
# ---------------------------------------------------------------------------
def test_detect_intent_code_symbol():
assert detect_query_intent("def authenticate()") == QueryIntent.CODE_SYMBOL
def test_detect_intent_natural():
assert detect_query_intent("how do I authenticate users") == QueryIntent.NATURAL_LANGUAGE
# ---------------------------------------------------------------------------
# SearchPipeline tests
# ---------------------------------------------------------------------------
def _make_pipeline(fts: FTSEngine, top_k: int = 5) -> SearchPipeline:
"""Build a SearchPipeline with mocked heavy components."""
cfg = Config.small()
cfg.reranker_top_k = top_k
embedder = MagicMock()
embedder.embed.return_value = [[0.1] * cfg.embed_dim]
binary_store = MagicMock()
binary_store.coarse_search.return_value = ([1, 2, 3], None)
ann_index = MagicMock()
ann_index.fine_search.return_value = ([1, 2, 3], [0.9, 0.8, 0.7])
reranker = MagicMock()
# Return a score for each content string passed
reranker.score_pairs.side_effect = lambda q, contents: [0.9 - i * 0.1 for i in range(len(contents))]
return SearchPipeline(
embedder=embedder,
binary_store=binary_store,
ann_index=ann_index,
reranker=reranker,
fts=fts,
config=cfg,
)
def test_pipeline_search_returns_results():
docs = [
(1, "a.py", "test content alpha"),
(2, "b.py", "test content beta"),
(3, "c.py", "test content gamma"),
]
fts = make_fts(docs)
pipeline = _make_pipeline(fts)
results = pipeline.search("test")
assert len(results) > 0
assert all(isinstance(r, SearchResult) for r in results)
def test_pipeline_top_k_limit():
docs = [
(1, "a.py", "hello world one"),
(2, "b.py", "hello world two"),
(3, "c.py", "hello world three"),
(4, "d.py", "hello world four"),
(5, "e.py", "hello world five"),
]
fts = make_fts(docs)
pipeline = _make_pipeline(fts, top_k=2)
results = pipeline.search("hello", top_k=2)
assert len(results) <= 2, "pipeline must respect top_k limit"