feat: remove API worker and batch size configurations; update CodexLens settings for v2

2026-03-18 18:48:48 +08:00 · 2026-03-17 20:44:26 +08:00
parent f37189dc64
commit ef2c5a58e1
11 changed files with 313 additions and 142 deletions
--- a/codex-lens-v2/src/codexlens_search/bridge.py
+++ b/codex-lens-v2/src/codexlens_search/bridge.py
@@ -386,6 +386,47 @@ def cmd_download_models(args: argparse.Namespace) -> None:
    })


+def cmd_list_models(args: argparse.Namespace) -> None:
+    """List known embed/reranker models with cache status."""
+    from codexlens_search import model_manager
+
+    config = _create_config(args)
+    models = model_manager.list_known_models(config)
+    _json_output(models)
+
+
+def cmd_download_model(args: argparse.Namespace) -> None:
+    """Download a single model by name."""
+    from codexlens_search import model_manager
+
+    config = _create_config(args)
+    model_name = args.model_name
+
+    model_manager.ensure_model(model_name, config)
+
+    cached = model_manager._model_is_cached(
+        model_name, model_manager._resolve_cache_dir(config)
+    )
+    _json_output({
+        "status": "downloaded" if cached else "failed",
+        "model": model_name,
+    })
+
+
+def cmd_delete_model(args: argparse.Namespace) -> None:
+    """Delete a model from cache."""
+    from codexlens_search import model_manager
+
+    config = _create_config(args)
+    model_name = args.model_name
+
+    deleted = model_manager.delete_model(model_name, config)
+    _json_output({
+        "status": "deleted" if deleted else "not_found",
+        "model": model_name,
+    })
+
+
 def cmd_status(args: argparse.Namespace) -> None:
    """Report index statistics."""
    from codexlens_search.indexing.metadata import MetadataStore
@@ -490,6 +531,17 @@ def _build_parser() -> argparse.ArgumentParser:
    p_dl = sub.add_parser("download-models", help="Download embed + reranker models")
    p_dl.add_argument("--embed-model", help="Override embed model name")

+    # list-models
+    sub.add_parser("list-models", help="List known models with cache status")
+
+    # download-model (single model by name)
+    p_dl_single = sub.add_parser("download-model", help="Download a single model by name")
+    p_dl_single.add_argument("model_name", help="HuggingFace model name (e.g. BAAI/bge-small-en-v1.5)")
+
+    # delete-model
+    p_del = sub.add_parser("delete-model", help="Delete a model from cache")
+    p_del.add_argument("model_name", help="HuggingFace model name to delete")
+
    # status
    sub.add_parser("status", help="Report index statistics")

@@ -528,6 +580,9 @@ def main() -> None:
        "sync": cmd_sync,
        "watch": cmd_watch,
        "download-models": cmd_download_models,
+        "list-models": cmd_list_models,
+        "download-model": cmd_download_model,
+        "delete-model": cmd_delete_model,
        "status": cmd_status,
    }

--- a/codex-lens-v2/src/codexlens_search/model_manager.py
+++ b/codex-lens-v2/src/codexlens_search/model_manager.py
@@ -137,6 +137,103 @@ def _ensure_model_onnx(model_dir: Path) -> None:
            return


+def list_known_models(config: Config) -> list[dict]:
+    """Return info for known embed/reranker models with cache status.
+
+    Checks config defaults plus common alternative models.
+    Returns list of dicts with keys: name, type, installed, cache_path.
+    """
+    cache_dir = _resolve_cache_dir(config)
+    base = cache_dir or _default_fastembed_cache()
+
+    # Known embedding models
+    embed_models = [
+        config.embed_model,
+        "BAAI/bge-small-en-v1.5",
+        "BAAI/bge-base-en-v1.5",
+        "BAAI/bge-large-en-v1.5",
+        "sentence-transformers/all-MiniLM-L6-v2",
+    ]
+
+    # Known reranker models
+    reranker_models = [
+        config.reranker_model,
+        "Xenova/ms-marco-MiniLM-L-6-v2",
+        "BAAI/bge-reranker-base",
+        "BAAI/bge-reranker-v2-m3",
+    ]
+
+    seen: set[str] = set()
+    results: list[dict] = []
+
+    for name in embed_models:
+        if name in seen:
+            continue
+        seen.add(name)
+        cache_path = _find_model_cache_path(name, base)
+        results.append({
+            "name": name,
+            "type": "embedding",
+            "installed": cache_path is not None,
+            "cache_path": cache_path,
+        })
+
+    for name in reranker_models:
+        if name in seen:
+            continue
+        seen.add(name)
+        cache_path = _find_model_cache_path(name, base)
+        results.append({
+            "name": name,
+            "type": "reranker",
+            "installed": cache_path is not None,
+            "cache_path": cache_path,
+        })
+
+    return results
+
+
+def delete_model(model_name: str, config: Config) -> bool:
+    """Remove a model from the HF/fastembed cache.
+
+    Returns True if deleted, False if not found.
+    """
+    import shutil
+
+    cache_dir = _resolve_cache_dir(config)
+    base = cache_dir or _default_fastembed_cache()
+    cache_path = _find_model_cache_path(model_name, base)
+
+    if cache_path is None:
+        log.warning("Model %s not found in cache", model_name)
+        return False
+
+    shutil.rmtree(cache_path)
+    log.info("Deleted model %s from %s", model_name, cache_path)
+    return True
+
+
+def _find_model_cache_path(model_name: str, base: str) -> str | None:
+    """Find the cache directory path for a model, or None if not cached."""
+    base_path = Path(base)
+    if not base_path.exists():
+        return None
+
+    # Exact match first
+    safe_name = model_name.replace("/", "--")
+    model_dir = base_path / f"models--{safe_name}"
+    if _dir_has_onnx(model_dir):
+        return str(model_dir)
+
+    # Partial match: fastembed remaps some model names
+    short_name = model_name.split("/")[-1].lower()
+    for d in base_path.iterdir():
+        if short_name in d.name.lower() and _dir_has_onnx(d):
+            return str(d)
+
+    return None
+
+
 def get_cache_kwargs(config: Config) -> dict:
    """Return kwargs to pass to fastembed constructors for cache_dir."""
    cache_dir = _resolve_cache_dir(config)