Claude-Code-Workflow/codex-lens-v2/tests/unit/test_search.py

"""Unit tests for search layer: FTSEngine, fusion, and SearchPipeline."""
from __future__ import annotations

from unittest.mock import MagicMock

import pytest

from codexlens.search.fts import FTSEngine
from codexlens.search.fusion import (
    DEFAULT_WEIGHTS,
    QueryIntent,
    detect_query_intent,
    get_adaptive_weights,
    reciprocal_rank_fusion,
)
from codexlens.search.pipeline import SearchPipeline, SearchResult
from codexlens.config import Config


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def make_fts(docs: list[tuple[int, str, str]] | None = None) -> FTSEngine:
    """Create an in-memory FTSEngine and optionally add documents."""
    engine = FTSEngine(":memory:")
    if docs:
        engine.add_documents(docs)
    return engine


# ---------------------------------------------------------------------------
# FTSEngine tests
# ---------------------------------------------------------------------------

def test_fts_add_and_exact_search():
    docs = [
        (1, "a.py", "def authenticate user password login"),
        (2, "b.py", "connect to database with credentials"),
        (3, "c.py", "render template html response"),
    ]
    engine = make_fts(docs)
    results = engine.exact_search("authenticate", top_k=10)
    ids = [r[0] for r in results]
    assert 1 in ids, "doc 1 should match 'authenticate'"
    assert 2 not in ids or results[0][0] == 1  # doc 1 must rank higher


def test_fts_fuzzy_search_prefix():
    docs = [
        (10, "auth.py", "authentication token refresh"),
        (11, "db.py", "database connection pool"),
        (12, "ui.py", "render button click handler"),
    ]
    engine = make_fts(docs)
    # Prefix 'auth' should match 'authentication' in doc 10
    results = engine.fuzzy_search("auth", top_k=10)
    ids = [r[0] for r in results]
    assert 10 in ids, "prefix 'auth' should match doc 10 with 'authentication'"


# ---------------------------------------------------------------------------
# RRF fusion tests
# ---------------------------------------------------------------------------

def test_rrf_fusion_ordering():
    """When two sources agree on top-1, it should rank first in fused result."""
    source_a = [(1, 0.9), (2, 0.5), (3, 0.2)]
    source_b = [(1, 0.8), (3, 0.6), (2, 0.1)]
    fused = reciprocal_rank_fusion({"a": source_a, "b": source_b})
    assert fused[0][0] == 1, "doc 1 agreed top by both sources must rank first"


def test_rrf_equal_weight_default():
    """Calling with None weights should use DEFAULT_WEIGHTS shape (not crash)."""
    source_exact = [(5, 1.0), (6, 0.8)]
    source_vector = [(6, 0.9), (5, 0.7)]
    # Should not raise and should return results
    fused = reciprocal_rank_fusion(
        {"exact": source_exact, "vector": source_vector},
        weights=None,
    )
    assert len(fused) == 2
    ids = [r[0] for r in fused]
    assert 5 in ids and 6 in ids


# ---------------------------------------------------------------------------
# detect_query_intent tests
# ---------------------------------------------------------------------------

def test_detect_intent_code_symbol():
    assert detect_query_intent("def authenticate()") == QueryIntent.CODE_SYMBOL


def test_detect_intent_natural():
    assert detect_query_intent("how do I authenticate users") == QueryIntent.NATURAL_LANGUAGE


# ---------------------------------------------------------------------------
# SearchPipeline tests
# ---------------------------------------------------------------------------

def _make_pipeline(fts: FTSEngine, top_k: int = 5) -> SearchPipeline:
    """Build a SearchPipeline with mocked heavy components."""
    cfg = Config.small()
    cfg.reranker_top_k = top_k

    embedder = MagicMock()
    embedder.embed.return_value = [[0.1] * cfg.embed_dim]

    binary_store = MagicMock()
    binary_store.coarse_search.return_value = ([1, 2, 3], None)

    ann_index = MagicMock()
    ann_index.fine_search.return_value = ([1, 2, 3], [0.9, 0.8, 0.7])

    reranker = MagicMock()
    # Return a score for each content string passed
    reranker.score_pairs.side_effect = lambda q, contents: [0.9 - i * 0.1 for i in range(len(contents))]

    return SearchPipeline(
        embedder=embedder,
        binary_store=binary_store,
        ann_index=ann_index,
        reranker=reranker,
        fts=fts,
        config=cfg,
    )


def test_pipeline_search_returns_results():
    docs = [
        (1, "a.py", "test content alpha"),
        (2, "b.py", "test content beta"),
        (3, "c.py", "test content gamma"),
    ]
    fts = make_fts(docs)
    pipeline = _make_pipeline(fts)
    results = pipeline.search("test")
    assert len(results) > 0
    assert all(isinstance(r, SearchResult) for r in results)


def test_pipeline_top_k_limit():
    docs = [
        (1, "a.py", "hello world one"),
        (2, "b.py", "hello world two"),
        (3, "c.py", "hello world three"),
        (4, "d.py", "hello world four"),
        (5, "e.py", "hello world five"),
    ]
    fts = make_fts(docs)
    pipeline = _make_pipeline(fts, top_k=2)
    results = pipeline.search("hello", top_k=2)
    assert len(results) <= 2, "pipeline must respect top_k limit"