refactor: rename package codexlens -> codexlens_search for independent distribution

Rename the v2 search engine package to `codexlens-search` (import as `codexlens_search`) so it can be installed independently and consumed by the original codex-lens as a dependency. This avoids package path conflicts since both previously used `src/codexlens/`. Changes: - Rename src/codexlens/ -> src/codexlens_search/ - Update pyproject.toml: name=codexlens-search, version=0.2.0 - Update all imports across source, tests, and scripts - Add public API exports in __init__.py (Config, SearchPipeline, IndexingPipeline, SearchResult, IndexStats) 37/37 tests pass. No functional changes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 19:08:17 +08:00 · 2026-03-16 23:23:22 +08:00
parent a0a50d338a
commit 6712965b7f
32 changed files with 83 additions and 63 deletions
--- a/codex-lens-v2/src/codexlens_search/rerank/api.py
+++ b/codex-lens-v2/src/codexlens_search/rerank/api.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import logging
+import time
+
+import httpx
+
+from codexlens_search.config import Config
+from .base import BaseReranker
+
+logger = logging.getLogger(__name__)
+
+
+class APIReranker(BaseReranker):
+    """Reranker backed by a remote HTTP API (SiliconFlow/Cohere/Jina format)."""
+
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._client = httpx.Client(
+            headers={
+                "Authorization": f"Bearer {config.reranker_api_key}",
+                "Content-Type": "application/json",
+            },
+        )
+
+    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
+        if not documents:
+            return []
+        max_tokens = self._config.reranker_api_max_tokens_per_batch
+        batches = self._split_batches(documents, max_tokens)
+        scores = [0.0] * len(documents)
+        for batch in batches:
+            batch_scores = self._call_api_with_retry(query, batch)
+            for orig_idx, score in batch_scores.items():
+                scores[orig_idx] = score
+        return scores
+
+    def _split_batches(
+        self, documents: list[str], max_tokens: int
+    ) -> list[list[tuple[int, str]]]:
+        batches: list[list[tuple[int, str]]] = []
+        current_batch: list[tuple[int, str]] = []
+        current_tokens = 0
+
+        for idx, text in enumerate(documents):
+            doc_tokens = len(text) // 4
+            if current_tokens + doc_tokens > max_tokens and current_batch:
+                batches.append(current_batch)
+                current_batch = []
+                current_tokens = 0
+            current_batch.append((idx, text))
+            current_tokens += doc_tokens
+
+        if current_batch:
+            batches.append(current_batch)
+
+        return batches
+
+    def _call_api_with_retry(
+        self,
+        query: str,
+        docs: list[tuple[int, str]],
+        max_retries: int = 3,
+    ) -> dict[int, float]:
+        url = self._config.reranker_api_url.rstrip("/") + "/rerank"
+        payload = {
+            "model": self._config.reranker_api_model,
+            "query": query,
+            "documents": [t for _, t in docs],
+        }
+
+        last_exc: Exception | None = None
+        for attempt in range(max_retries):
+            try:
+                response = self._client.post(url, json=payload)
+            except Exception as exc:
+                last_exc = exc
+                time.sleep((2 ** attempt) * 0.5)
+                continue
+
+            if response.status_code in (429, 503):
+                logger.warning(
+                    "API reranker returned HTTP %s (attempt %d/%d), retrying...",
+                    response.status_code,
+                    attempt + 1,
+                    max_retries,
+                )
+                time.sleep((2 ** attempt) * 0.5)
+                continue
+
+            response.raise_for_status()
+            data = response.json()
+            results = data.get("results", [])
+            scores: dict[int, float] = {}
+            for item in results:
+                local_idx = int(item["index"])
+                orig_idx = docs[local_idx][0]
+                scores[orig_idx] = float(item["relevance_score"])
+            return scores
+
+        raise RuntimeError(
+            f"API reranker failed after {max_retries} attempts. Last error: {last_exc}"
+        )