refactor: rename package codexlens -> codexlens_search for independent distribution

Rename the v2 search engine package to `codexlens-search` (import as `codexlens_search`) so it can be installed independently and consumed by the original codex-lens as a dependency. This avoids package path conflicts since both previously used `src/codexlens/`. Changes: - Rename src/codexlens/ -> src/codexlens_search/ - Update pyproject.toml: name=codexlens-search, version=0.2.0 - Update all imports across source, tests, and scripts - Add public API exports in __init__.py (Config, SearchPipeline, IndexingPipeline, SearchResult, IndexStats) 37/37 tests pass. No functional changes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 18:58:47 +08:00 · 2026-03-16 23:23:22 +08:00
parent a0a50d338a
commit 6712965b7f
32 changed files with 83 additions and 63 deletions
--- a/codex-lens-v2/src/codexlens_search/config.py
+++ b/codex-lens-v2/src/codexlens_search/config.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+import logging
+from dataclasses import dataclass, field
+
+log = logging.getLogger(__name__)
+
+
+@dataclass
+class Config:
+    # Embedding
+    embed_model: str = "jinaai/jina-embeddings-v2-base-code"
+    embed_dim: int = 768
+    embed_batch_size: int = 64
+
+    # GPU / execution providers
+    device: str = "auto"  # 'auto', 'cuda', 'cpu'
+    embed_providers: list[str] | None = None  # explicit ONNX providers override
+
+    # Backend selection: 'auto', 'faiss', 'hnswlib'
+    ann_backend: str = "auto"
+    binary_backend: str = "auto"
+
+    # Indexing pipeline
+    index_workers: int = 2  # number of parallel indexing workers
+
+    # HNSW index (ANNIndex)
+    hnsw_ef: int = 150
+    hnsw_M: int = 32
+    hnsw_ef_construction: int = 200
+
+    # Binary coarse search (BinaryStore)
+    binary_top_k: int = 200
+
+    # ANN fine search
+    ann_top_k: int = 50
+
+    # Reranker
+    reranker_model: str = "BAAI/bge-reranker-v2-m3"
+    reranker_top_k: int = 20
+    reranker_batch_size: int = 32
+
+    # API reranker (optional)
+    reranker_api_url: str = ""
+    reranker_api_key: str = ""
+    reranker_api_model: str = ""
+    reranker_api_max_tokens_per_batch: int = 2048
+
+    # FTS
+    fts_top_k: int = 50
+
+    # Fusion
+    fusion_k: int = 60  # RRF k parameter
+    fusion_weights: dict = field(default_factory=lambda: {
+        "exact": 0.25,
+        "fuzzy": 0.10,
+        "vector": 0.50,
+        "graph": 0.15,
+    })
+
+    def resolve_embed_providers(self) -> list[str]:
+        """Return ONNX execution providers based on device config.
+
+        Priority: explicit embed_providers > device setting > auto-detect.
+        """
+        if self.embed_providers is not None:
+            return list(self.embed_providers)
+
+        if self.device == "cuda":
+            return ["CUDAExecutionProvider", "CPUExecutionProvider"]
+
+        if self.device == "cpu":
+            return ["CPUExecutionProvider"]
+
+        # auto-detect
+        try:
+            import onnxruntime
+            available = onnxruntime.get_available_providers()
+            if "CUDAExecutionProvider" in available:
+                log.info("CUDA detected via onnxruntime, using GPU for embedding")
+                return ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        except ImportError:
+            pass
+
+        return ["CPUExecutionProvider"]
+
+    @classmethod
+    def defaults(cls) -> "Config":
+        return cls()
+
+    @classmethod
+    def small(cls) -> "Config":
+        """Smaller config for testing or small corpora."""
+        return cls(
+            hnsw_ef=50,
+            hnsw_M=16,
+            binary_top_k=50,
+            ann_top_k=20,
+            reranker_top_k=10,
+        )