mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-19 18:58:47 +08:00
Rename the v2 search engine package to `codexlens-search` (import as `codexlens_search`) so it can be installed independently and consumed by the original codex-lens as a dependency. This avoids package path conflicts since both previously used `src/codexlens/`. Changes: - Rename src/codexlens/ -> src/codexlens_search/ - Update pyproject.toml: name=codexlens-search, version=0.2.0 - Update all imports across source, tests, and scripts - Add public API exports in __init__.py (Config, SearchPipeline, IndexingPipeline, SearchResult, IndexStats) 37/37 tests pass. No functional changes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
100 lines
2.6 KiB
Python
100 lines
2.6 KiB
Python
from __future__ import annotations
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class Config:
|
|
# Embedding
|
|
embed_model: str = "jinaai/jina-embeddings-v2-base-code"
|
|
embed_dim: int = 768
|
|
embed_batch_size: int = 64
|
|
|
|
# GPU / execution providers
|
|
device: str = "auto" # 'auto', 'cuda', 'cpu'
|
|
embed_providers: list[str] | None = None # explicit ONNX providers override
|
|
|
|
# Backend selection: 'auto', 'faiss', 'hnswlib'
|
|
ann_backend: str = "auto"
|
|
binary_backend: str = "auto"
|
|
|
|
# Indexing pipeline
|
|
index_workers: int = 2 # number of parallel indexing workers
|
|
|
|
# HNSW index (ANNIndex)
|
|
hnsw_ef: int = 150
|
|
hnsw_M: int = 32
|
|
hnsw_ef_construction: int = 200
|
|
|
|
# Binary coarse search (BinaryStore)
|
|
binary_top_k: int = 200
|
|
|
|
# ANN fine search
|
|
ann_top_k: int = 50
|
|
|
|
# Reranker
|
|
reranker_model: str = "BAAI/bge-reranker-v2-m3"
|
|
reranker_top_k: int = 20
|
|
reranker_batch_size: int = 32
|
|
|
|
# API reranker (optional)
|
|
reranker_api_url: str = ""
|
|
reranker_api_key: str = ""
|
|
reranker_api_model: str = ""
|
|
reranker_api_max_tokens_per_batch: int = 2048
|
|
|
|
# FTS
|
|
fts_top_k: int = 50
|
|
|
|
# Fusion
|
|
fusion_k: int = 60 # RRF k parameter
|
|
fusion_weights: dict = field(default_factory=lambda: {
|
|
"exact": 0.25,
|
|
"fuzzy": 0.10,
|
|
"vector": 0.50,
|
|
"graph": 0.15,
|
|
})
|
|
|
|
def resolve_embed_providers(self) -> list[str]:
|
|
"""Return ONNX execution providers based on device config.
|
|
|
|
Priority: explicit embed_providers > device setting > auto-detect.
|
|
"""
|
|
if self.embed_providers is not None:
|
|
return list(self.embed_providers)
|
|
|
|
if self.device == "cuda":
|
|
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
|
|
|
if self.device == "cpu":
|
|
return ["CPUExecutionProvider"]
|
|
|
|
# auto-detect
|
|
try:
|
|
import onnxruntime
|
|
available = onnxruntime.get_available_providers()
|
|
if "CUDAExecutionProvider" in available:
|
|
log.info("CUDA detected via onnxruntime, using GPU for embedding")
|
|
return ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
|
except ImportError:
|
|
pass
|
|
|
|
return ["CPUExecutionProvider"]
|
|
|
|
@classmethod
|
|
def defaults(cls) -> "Config":
|
|
return cls()
|
|
|
|
@classmethod
|
|
def small(cls) -> "Config":
|
|
"""Smaller config for testing or small corpora."""
|
|
return cls(
|
|
hnsw_ef=50,
|
|
hnsw_M=16,
|
|
binary_top_k=50,
|
|
ann_top_k=20,
|
|
reranker_top_k=10,
|
|
)
|