refactor: rename package codexlens -> codexlens_search for independent distribution

Rename the v2 search engine package to `codexlens-search` (import as
`codexlens_search`) so it can be installed independently and consumed
by the original codex-lens as a dependency. This avoids package path
conflicts since both previously used `src/codexlens/`.

Changes:
- Rename src/codexlens/ -> src/codexlens_search/
- Update pyproject.toml: name=codexlens-search, version=0.2.0
- Update all imports across source, tests, and scripts
- Add public API exports in __init__.py (Config, SearchPipeline,
  IndexingPipeline, SearchResult, IndexStats)

37/37 tests pass. No functional changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
catlog22
2026-03-16 23:23:22 +08:00
parent a0a50d338a
commit 6712965b7f
32 changed files with 83 additions and 63 deletions

View File

@@ -0,0 +1,99 @@
from __future__ import annotations
import logging
from dataclasses import dataclass, field
log = logging.getLogger(__name__)
@dataclass
class Config:
# Embedding
embed_model: str = "jinaai/jina-embeddings-v2-base-code"
embed_dim: int = 768
embed_batch_size: int = 64
# GPU / execution providers
device: str = "auto" # 'auto', 'cuda', 'cpu'
embed_providers: list[str] | None = None # explicit ONNX providers override
# Backend selection: 'auto', 'faiss', 'hnswlib'
ann_backend: str = "auto"
binary_backend: str = "auto"
# Indexing pipeline
index_workers: int = 2 # number of parallel indexing workers
# HNSW index (ANNIndex)
hnsw_ef: int = 150
hnsw_M: int = 32
hnsw_ef_construction: int = 200
# Binary coarse search (BinaryStore)
binary_top_k: int = 200
# ANN fine search
ann_top_k: int = 50
# Reranker
reranker_model: str = "BAAI/bge-reranker-v2-m3"
reranker_top_k: int = 20
reranker_batch_size: int = 32
# API reranker (optional)
reranker_api_url: str = ""
reranker_api_key: str = ""
reranker_api_model: str = ""
reranker_api_max_tokens_per_batch: int = 2048
# FTS
fts_top_k: int = 50
# Fusion
fusion_k: int = 60 # RRF k parameter
fusion_weights: dict = field(default_factory=lambda: {
"exact": 0.25,
"fuzzy": 0.10,
"vector": 0.50,
"graph": 0.15,
})
def resolve_embed_providers(self) -> list[str]:
"""Return ONNX execution providers based on device config.
Priority: explicit embed_providers > device setting > auto-detect.
"""
if self.embed_providers is not None:
return list(self.embed_providers)
if self.device == "cuda":
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
if self.device == "cpu":
return ["CPUExecutionProvider"]
# auto-detect
try:
import onnxruntime
available = onnxruntime.get_available_providers()
if "CUDAExecutionProvider" in available:
log.info("CUDA detected via onnxruntime, using GPU for embedding")
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
except ImportError:
pass
return ["CPUExecutionProvider"]
@classmethod
def defaults(cls) -> "Config":
return cls()
@classmethod
def small(cls) -> "Config":
"""Smaller config for testing or small corpora."""
return cls(
hnsw_ef=50,
hnsw_M=16,
binary_top_k=50,
ann_top_k=20,
reranker_top_k=10,
)