Implement search and reranking functionality with FTS and embedding support

- Added BaseReranker abstract class for defining reranking interfaces. - Implemented FastEmbedReranker using fastembed's TextCrossEncoder for scoring document-query pairs. - Introduced FTSEngine for full-text search capabilities using SQLite FTS5. - Developed SearchPipeline to integrate embedding, binary search, ANN indexing, FTS, and reranking. - Added fusion methods for combining results from different search strategies using Reciprocal Rank Fusion. - Created unit and integration tests for the new search and reranking components. - Established configuration management for search parameters and models.
2026-03-18 18:48:48 +08:00 · 2026-03-16 23:03:17 +08:00
parent 5a4b18d9b1
commit de4158597b
41 changed files with 2655 additions and 1848 deletions
--- a/codex-lens-v2/tests/unit/test_embed.py
+++ b/codex-lens-v2/tests/unit/test_embed.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import sys
+import types
+import unittest
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+
+
+def _make_fastembed_mock():
+    """Build a minimal fastembed stub so imports succeed without the real package."""
+    fastembed_mod = types.ModuleType("fastembed")
+    fastembed_mod.TextEmbedding = MagicMock()
+    sys.modules.setdefault("fastembed", fastembed_mod)
+    return fastembed_mod
+
+
+_make_fastembed_mock()
+
+from codexlens.config import Config  # noqa: E402
+from codexlens.embed.base import BaseEmbedder  # noqa: E402
+from codexlens.embed.local import EMBED_PROFILES, FastEmbedEmbedder  # noqa: E402
+
+
+class TestEmbedSingle(unittest.TestCase):
+    def test_embed_single_returns_float32_ndarray(self):
+        config = Config()
+        embedder = FastEmbedEmbedder(config)
+
+        mock_model = MagicMock()
+        mock_model.embed.return_value = iter([np.ones(384, dtype=np.float64)])
+
+        # Inject mock model directly to bypass lazy load (no real fastembed needed)
+        embedder._model = mock_model
+        result = embedder.embed_single("hello world")
+
+        self.assertIsInstance(result, np.ndarray)
+        self.assertEqual(result.dtype, np.float32)
+        self.assertEqual(result.shape, (384,))
+
+
+class TestEmbedBatch(unittest.TestCase):
+    def test_embed_batch_returns_list(self):
+        config = Config()
+        embedder = FastEmbedEmbedder(config)
+
+        vecs = [np.ones(384, dtype=np.float64) * i for i in range(3)]
+        mock_model = MagicMock()
+        mock_model.embed.return_value = iter(vecs)
+
+        embedder._model = mock_model
+        result = embedder.embed_batch(["a", "b", "c"])
+
+        self.assertIsInstance(result, list)
+        self.assertEqual(len(result), 3)
+        for arr in result:
+            self.assertIsInstance(arr, np.ndarray)
+            self.assertEqual(arr.dtype, np.float32)
+
+
+class TestEmbedProfiles(unittest.TestCase):
+    def test_embed_profiles_all_have_valid_keys(self):
+        expected_keys = {"small", "base", "large", "code"}
+        self.assertEqual(set(EMBED_PROFILES.keys()), expected_keys)
+
+    def test_embed_profiles_model_ids_non_empty(self):
+        for key, model_id in EMBED_PROFILES.items():
+            self.assertIsInstance(model_id, str, msg=f"{key} model id should be str")
+            self.assertTrue(len(model_id) > 0, msg=f"{key} model id should be non-empty")
+
+
+class TestBaseEmbedderAbstract(unittest.TestCase):
+    def test_base_embedder_is_abstract(self):
+        with self.assertRaises(TypeError):
+            BaseEmbedder()  # type: ignore[abstract]
+
+
+if __name__ == "__main__":
+    unittest.main()