mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-21 19:08:17 +08:00
Rename the v2 search engine package to `codexlens-search` (import as `codexlens_search`) so it can be installed independently and consumed by the original codex-lens as a dependency. This avoids package path conflicts since both previously used `src/codexlens/`. Changes: - Rename src/codexlens/ -> src/codexlens_search/ - Update pyproject.toml: name=codexlens-search, version=0.2.0 - Update all imports across source, tests, and scripts - Add public API exports in __init__.py (Config, SearchPipeline, IndexingPipeline, SearchResult, IndexStats) 37/37 tests pass. No functional changes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
104 lines
3.3 KiB
Python
104 lines
3.3 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
import time
|
|
|
|
import httpx
|
|
|
|
from codexlens_search.config import Config
|
|
from .base import BaseReranker
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class APIReranker(BaseReranker):
|
|
"""Reranker backed by a remote HTTP API (SiliconFlow/Cohere/Jina format)."""
|
|
|
|
def __init__(self, config: Config) -> None:
|
|
self._config = config
|
|
self._client = httpx.Client(
|
|
headers={
|
|
"Authorization": f"Bearer {config.reranker_api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
)
|
|
|
|
def score_pairs(self, query: str, documents: list[str]) -> list[float]:
|
|
if not documents:
|
|
return []
|
|
max_tokens = self._config.reranker_api_max_tokens_per_batch
|
|
batches = self._split_batches(documents, max_tokens)
|
|
scores = [0.0] * len(documents)
|
|
for batch in batches:
|
|
batch_scores = self._call_api_with_retry(query, batch)
|
|
for orig_idx, score in batch_scores.items():
|
|
scores[orig_idx] = score
|
|
return scores
|
|
|
|
def _split_batches(
|
|
self, documents: list[str], max_tokens: int
|
|
) -> list[list[tuple[int, str]]]:
|
|
batches: list[list[tuple[int, str]]] = []
|
|
current_batch: list[tuple[int, str]] = []
|
|
current_tokens = 0
|
|
|
|
for idx, text in enumerate(documents):
|
|
doc_tokens = len(text) // 4
|
|
if current_tokens + doc_tokens > max_tokens and current_batch:
|
|
batches.append(current_batch)
|
|
current_batch = []
|
|
current_tokens = 0
|
|
current_batch.append((idx, text))
|
|
current_tokens += doc_tokens
|
|
|
|
if current_batch:
|
|
batches.append(current_batch)
|
|
|
|
return batches
|
|
|
|
def _call_api_with_retry(
|
|
self,
|
|
query: str,
|
|
docs: list[tuple[int, str]],
|
|
max_retries: int = 3,
|
|
) -> dict[int, float]:
|
|
url = self._config.reranker_api_url.rstrip("/") + "/rerank"
|
|
payload = {
|
|
"model": self._config.reranker_api_model,
|
|
"query": query,
|
|
"documents": [t for _, t in docs],
|
|
}
|
|
|
|
last_exc: Exception | None = None
|
|
for attempt in range(max_retries):
|
|
try:
|
|
response = self._client.post(url, json=payload)
|
|
except Exception as exc:
|
|
last_exc = exc
|
|
time.sleep((2 ** attempt) * 0.5)
|
|
continue
|
|
|
|
if response.status_code in (429, 503):
|
|
logger.warning(
|
|
"API reranker returned HTTP %s (attempt %d/%d), retrying...",
|
|
response.status_code,
|
|
attempt + 1,
|
|
max_retries,
|
|
)
|
|
time.sleep((2 ** attempt) * 0.5)
|
|
continue
|
|
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
results = data.get("results", [])
|
|
scores: dict[int, float] = {}
|
|
for item in results:
|
|
local_idx = int(item["index"])
|
|
orig_idx = docs[local_idx][0]
|
|
scores[orig_idx] = float(item["relevance_score"])
|
|
return scores
|
|
|
|
raise RuntimeError(
|
|
f"API reranker failed after {max_retries} attempts. Last error: {last_exc}"
|
|
)
|