feat: add model download manager with HF mirror support and fix defaults

- Add lightweight model_manager.py: cache detection (with fastembed name remapping), HF mirror download via huggingface_hub, auto model.onnx fallback from quantized variants - Config defaults: embed_model -> bge-small-en-v1.5 (384d), reranker -> Xenova/ms-marco-MiniLM-L-6-v2 (fastembed 0.7.4 compatible) - Add model_cache_dir and hf_mirror config options - embed/local.py and rerank/local.py use model_manager for cache-aware loading - Fix FastEmbedReranker to handle both float list and RerankResult formats - E2E test uses real FastEmbedReranker instead of mock KeywordReranker Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 18:58:47 +08:00 · 2026-03-17 10:29:52 +08:00
parent 6712965b7f
commit abdc66cee7
5 changed files with 186 additions and 28 deletions
--- a/codex-lens-v2/src/codexlens_search/rerank/local.py
+++ b/codex-lens-v2/src/codexlens_search/rerank/local.py
@@ -13,12 +13,26 @@ class FastEmbedReranker(BaseReranker):

    def _load(self) -> None:
        if self._model is None:
+            from .. import model_manager
+            model_manager.ensure_model(self._config.reranker_model, self._config)
+
            from fastembed.rerank.cross_encoder import TextCrossEncoder
-            self._model = TextCrossEncoder(model_name=self._config.reranker_model)
+            cache_kwargs = model_manager.get_cache_kwargs(self._config)
+            self._model = TextCrossEncoder(
+                model_name=self._config.reranker_model,
+                **cache_kwargs,
+            )

    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
        self._load()
        results = list(self._model.rerank(query, documents))
+        if not results:
+            return [0.0] * len(documents)
+        # fastembed may return list[float] or list[RerankResult] depending on version
+        first = results[0]
+        if isinstance(first, (int, float)):
+            return [float(s) for s in results]
+        # Older format: objects with .index and .score
        scores = [0.0] * len(documents)
        for r in results:
            scores[r.index] = float(r.score)