mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-19 18:58:47 +08:00
refactor: rename package codexlens -> codexlens_search for independent distribution
Rename the v2 search engine package to `codexlens-search` (import as `codexlens_search`) so it can be installed independently and consumed by the original codex-lens as a dependency. This avoids package path conflicts since both previously used `src/codexlens/`. Changes: - Rename src/codexlens/ -> src/codexlens_search/ - Update pyproject.toml: name=codexlens-search, version=0.2.0 - Update all imports across source, tests, and scripts - Add public API exports in __init__.py (Config, SearchPipeline, IndexingPipeline, SearchResult, IndexStats) 37/37 tests pass. No functional changes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
99
codex-lens-v2/src/codexlens_search/config.py
Normal file
99
codex-lens-v2/src/codexlens_search/config.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
# Embedding
|
||||
embed_model: str = "jinaai/jina-embeddings-v2-base-code"
|
||||
embed_dim: int = 768
|
||||
embed_batch_size: int = 64
|
||||
|
||||
# GPU / execution providers
|
||||
device: str = "auto" # 'auto', 'cuda', 'cpu'
|
||||
embed_providers: list[str] | None = None # explicit ONNX providers override
|
||||
|
||||
# Backend selection: 'auto', 'faiss', 'hnswlib'
|
||||
ann_backend: str = "auto"
|
||||
binary_backend: str = "auto"
|
||||
|
||||
# Indexing pipeline
|
||||
index_workers: int = 2 # number of parallel indexing workers
|
||||
|
||||
# HNSW index (ANNIndex)
|
||||
hnsw_ef: int = 150
|
||||
hnsw_M: int = 32
|
||||
hnsw_ef_construction: int = 200
|
||||
|
||||
# Binary coarse search (BinaryStore)
|
||||
binary_top_k: int = 200
|
||||
|
||||
# ANN fine search
|
||||
ann_top_k: int = 50
|
||||
|
||||
# Reranker
|
||||
reranker_model: str = "BAAI/bge-reranker-v2-m3"
|
||||
reranker_top_k: int = 20
|
||||
reranker_batch_size: int = 32
|
||||
|
||||
# API reranker (optional)
|
||||
reranker_api_url: str = ""
|
||||
reranker_api_key: str = ""
|
||||
reranker_api_model: str = ""
|
||||
reranker_api_max_tokens_per_batch: int = 2048
|
||||
|
||||
# FTS
|
||||
fts_top_k: int = 50
|
||||
|
||||
# Fusion
|
||||
fusion_k: int = 60 # RRF k parameter
|
||||
fusion_weights: dict = field(default_factory=lambda: {
|
||||
"exact": 0.25,
|
||||
"fuzzy": 0.10,
|
||||
"vector": 0.50,
|
||||
"graph": 0.15,
|
||||
})
|
||||
|
||||
def resolve_embed_providers(self) -> list[str]:
|
||||
"""Return ONNX execution providers based on device config.
|
||||
|
||||
Priority: explicit embed_providers > device setting > auto-detect.
|
||||
"""
|
||||
if self.embed_providers is not None:
|
||||
return list(self.embed_providers)
|
||||
|
||||
if self.device == "cuda":
|
||||
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
if self.device == "cpu":
|
||||
return ["CPUExecutionProvider"]
|
||||
|
||||
# auto-detect
|
||||
try:
|
||||
import onnxruntime
|
||||
available = onnxruntime.get_available_providers()
|
||||
if "CUDAExecutionProvider" in available:
|
||||
log.info("CUDA detected via onnxruntime, using GPU for embedding")
|
||||
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return ["CPUExecutionProvider"]
|
||||
|
||||
@classmethod
|
||||
def defaults(cls) -> "Config":
|
||||
return cls()
|
||||
|
||||
@classmethod
|
||||
def small(cls) -> "Config":
|
||||
"""Smaller config for testing or small corpora."""
|
||||
return cls(
|
||||
hnsw_ef=50,
|
||||
hnsw_M=16,
|
||||
binary_top_k=50,
|
||||
ann_top_k=20,
|
||||
reranker_top_k=10,
|
||||
)
|
||||
Reference in New Issue
Block a user