codex-lens: harden CLI embedding errors and flags

2026-03-05 16:13:08 +08:00 · 2026-03-05 14:27:36 +08:00
parent 6341ed43e1
commit 60218f6bf3
3 changed files with 177 additions and 24 deletions
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -79,6 +79,46 @@ def _parse_languages(raw: Optional[List[str]]) -> Optional[List[str]]:
    return langs or None


+def _fail_mutually_exclusive(option_a: str, option_b: str, json_mode: bool) -> None:
+    msg = f"Options {option_a} and {option_b} are mutually exclusive."
+    if json_mode:
+        print_json(success=False, error=msg)
+    else:
+        console.print(f"[red]Error:[/red] {msg}")
+    raise typer.Exit(code=1)
+
+
+def _extract_embedding_error(embed_result: Dict[str, Any]) -> str:
+    """Best-effort error extraction for embedding generation results."""
+    raw_error = embed_result.get("error")
+    if isinstance(raw_error, str) and raw_error.strip():
+        return raw_error.strip()
+
+    result = embed_result.get("result")
+    if isinstance(result, dict):
+        details = result.get("details")
+        if isinstance(details, list):
+            collected: List[str] = []
+            for item in details:
+                if not isinstance(item, dict):
+                    continue
+                item_error = item.get("error")
+                if isinstance(item_error, str) and item_error.strip():
+                    collected.append(item_error.strip())
+
+            if collected:
+                # De-dupe while preserving order, then keep output short.
+                seen: set[str] = set()
+                unique: List[str] = []
+                for err in collected:
+                    if err not in seen:
+                        seen.add(err)
+                        unique.append(err)
+                return "; ".join(unique[:3])
+
+    return "Embedding generation failed (no error details provided)"
+
+
 def _get_index_root() -> Path:
    """Get the index root directory from config or default.

@@ -126,16 +166,26 @@ def index_init(
    no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
    backend: Optional[str] = typer.Option(None, "--backend", "-b", help="Embedding backend: fastembed (local) or litellm (remote API). Defaults to settings.json config."),
    model: Optional[str] = typer.Option(None, "--model", "-m", help="Embedding model: profile name for fastembed or model name for litellm. Defaults to settings.json config."),
-    use_astgrep: Optional[bool] = typer.Option(
-        None,
-        "--use-astgrep/--no-use-astgrep",
+    use_astgrep: bool = typer.Option(
+        False,
+        "--use-astgrep",
        help="Prefer ast-grep parsers when available (experimental). Overrides settings.json config.",
    ),
-    static_graph: Optional[bool] = typer.Option(
-        None,
-        "--static-graph/--no-static-graph",
+    no_use_astgrep: bool = typer.Option(
+        False,
+        "--no-use-astgrep",
+        help="Disable ast-grep parsers. Overrides settings.json config.",
+    ),
+    static_graph: bool = typer.Option(
+        False,
+        "--static-graph",
        help="Persist global relationships during indexing for static graph expansion. Overrides settings.json config.",
    ),
+    no_static_graph: bool = typer.Option(
+        False,
+        "--no-static-graph",
+        help="Disable persisting global relationships. Overrides settings.json config.",
+    ),
    static_graph_types: Optional[str] = typer.Option(
        None,
        "--static-graph-types",
@@ -171,10 +221,19 @@ def index_init(
    config.load_settings()  # Ensure settings are loaded

    # Apply CLI overrides for parsing/indexing behavior
-    if use_astgrep is not None:
-        config.use_astgrep = bool(use_astgrep)
-    if static_graph is not None:
-        config.static_graph_enabled = bool(static_graph)
+    if use_astgrep and no_use_astgrep:
+        _fail_mutually_exclusive("--use-astgrep", "--no-use-astgrep", json_mode)
+    if use_astgrep:
+        config.use_astgrep = True
+    elif no_use_astgrep:
+        config.use_astgrep = False
+
+    if static_graph and no_static_graph:
+        _fail_mutually_exclusive("--static-graph", "--no-static-graph", json_mode)
+    if static_graph:
+        config.static_graph_enabled = True
+    elif no_static_graph:
+        config.static_graph_enabled = False
    if static_graph_types is not None:
        allowed = {"imports", "inherits", "calls"}
        parsed = [
@@ -323,10 +382,11 @@ def index_init(
                            console.print(f"  Indexes processed: [bold]{embed_data['indexes_successful']}/{embed_data['indexes_processed']}[/bold]")
                    else:
                        if not json_mode:
-                            console.print(f"[yellow]Warning:[/yellow] Embedding generation failed: {embed_result.get('error', 'Unknown error')}")
+                            error_msg = _extract_embedding_error(embed_result)
+                            console.print(f"[yellow]Warning:[/yellow] Embedding generation failed: {error_msg}")
                        result["embeddings"] = {
                            "generated": False,
-                            "error": embed_result.get("error"),
+                            "error": _extract_embedding_error(embed_result),
                        }
                else:
                    if not json_mode and verbose:
@@ -848,12 +908,16 @@ def symbol(
@app.command()
 def inspect(
    file: Path = typer.Argument(..., exists=True, dir_okay=False, help="File to analyze."),
-    symbols: bool = typer.Option(True, "--symbols/--no-symbols", help="Show discovered symbols."),
+    symbols: bool = typer.Option(False, "--symbols", help="Show discovered symbols (default)."),
+    no_symbols: bool = typer.Option(False, "--no-symbols", help="Hide discovered symbols."),
    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
 ) -> None:
    """Analyze a single file and display symbols."""
    _configure_logging(verbose, json_mode)
+    if symbols and no_symbols:
+        _fail_mutually_exclusive("--symbols", "--no-symbols", json_mode)
+    show_symbols = True if (symbols or not no_symbols) else False
    config = Config.load()
    factory = ParserFactory(config)

@@ -867,7 +931,7 @@ def inspect(
        if json_mode:
            print_json(success=True, result=payload)
        else:
-            if symbols:
+            if show_symbols:
                render_file_inspect(indexed.path, indexed.language, indexed.symbols)
            else:
                render_status({"file": indexed.path, "language": indexed.language})
@@ -2690,10 +2754,16 @@ def index_embeddings(
    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."),
    centralized: bool = typer.Option(
-        True,
-        "--centralized/--distributed",
-        "-c/-d",
-        help="Use centralized vector storage (default) or distributed per-directory indexes.",
+        False,
+        "--centralized",
+        "-c",
+        help="Use centralized vector storage (default).",
+    ),
+    distributed: bool = typer.Option(
+        False,
+        "--distributed",
+        "-d",
+        help="Use distributed per-directory indexes.",
    ),
 ) -> None:
    """Generate semantic embeddings for code search.
@@ -2730,6 +2800,9 @@ def index_embeddings(
        codexlens index embeddings ~/projects/my-app --centralized  # Centralized vector storage
    """
    _configure_logging(verbose, json_mode)
+    if centralized and distributed:
+        _fail_mutually_exclusive("--centralized", "--distributed", json_mode)
+    use_centralized = not distributed

    from codexlens.cli.embedding_manager import (
        generate_embeddings,
@@ -2867,7 +2940,7 @@ def index_embeddings(
                        console.print("[yellow]Cancelled.[/yellow] Use --force to skip this prompt.")
                        raise typer.Exit(code=0)

-    if centralized:
+    if use_centralized:
        # Centralized mode: single HNSW index at project root
        if not index_root:
            index_root = index_path.parent if index_path else target_path
@@ -2895,7 +2968,7 @@ def index_embeddings(
        print_json(**result)
    else:
        if not result["success"]:
-            error_msg = result.get("error", "Unknown error")
+            error_msg = _extract_embedding_error(result)
            console.print(f"[red]Error:[/red] {error_msg}")

            # Provide helpful hints
@@ -4245,14 +4318,22 @@ def embeddings_generate_deprecated(
    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."),
    centralized: bool = typer.Option(
-        True,
-        "--centralized/--distributed",
-        "-c/-d",
-        help="Use centralized vector storage (default) or distributed.",
+        False,
+        "--centralized",
+        "-c",
+        help="Use centralized vector storage (default).",
+    ),
+    distributed: bool = typer.Option(
+        False,
+        "--distributed",
+        "-d",
+        help="Use distributed per-directory indexes.",
    ),
 ) -> None:
    """[Deprecated] Use 'codexlens index embeddings' instead."""
    _deprecated_command_warning("embeddings-generate", "index embeddings")
+    if centralized and distributed:
+        _fail_mutually_exclusive("--centralized", "--distributed", json_mode)
    index_embeddings(
        path=path,
        backend=backend,
@@ -4263,6 +4344,7 @@ def embeddings_generate_deprecated(
        json_mode=json_mode,
        verbose=verbose,
        centralized=centralized,
+        distributed=distributed,
    )


--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -55,6 +55,11 @@ except ImportError:
    def is_embedding_backend_available(_backend: str):  # type: ignore[no-redef]
        return False, "codexlens.semantic not available"

+try:
+    from codexlens.semantic.vector_store import VectorStore
+except ImportError:  # pragma: no cover
+    VectorStore = None  # type: ignore[assignment]
+
 try:
    from codexlens.config import VECTORS_META_DB_NAME
 except ImportError:
@@ -720,6 +725,11 @@ def generate_embeddings(
    # effective_batch_size is calculated above (dynamic or EMBEDDING_BATCH_SIZE fallback)

    try:
+        if VectorStore is None:
+            return {
+                "success": False,
+                "error": "Semantic search not available (VectorStore import failed). Install with: pip install codexlens[semantic]",
+            }
        with VectorStore(index_path) as vector_store:
            # Check model compatibility with existing embeddings
            if not force:
--- a/codex-lens/tests/test_cli_help.py
+++ b/codex-lens/tests/test_cli_help.py
@@ -0,0 +1,61 @@
+"""Smoke tests for CodexLens CLI help output.
+
+These tests ensure that help text generation does not crash at import time
+or during Click/Typer option parsing.
+"""
+
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+
+def _subprocess_env() -> dict[str, str]:
+    env = os.environ.copy()
+    codex_lens_root = Path(__file__).resolve().parents[1]
+    src_dir = codex_lens_root / "src"
+    existing = env.get("PYTHONPATH", "")
+    env["PYTHONPATH"] = str(src_dir) + (os.pathsep + existing if existing else "")
+    return env
+
+
+def test_python_module_help_does_not_crash() -> None:
+    proc = subprocess.run(
+        [sys.executable, "-m", "codexlens", "--help"],
+        capture_output=True,
+        text=True,
+        encoding="utf-8",
+        errors="replace",
+        env=_subprocess_env(),
+    )
+    assert proc.returncode == 0, proc.stderr
+    assert "Traceback" not in (proc.stderr or "")
+
+
+def test_typer_app_help_does_not_crash() -> None:
+    from codexlens.cli.commands import app
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["--help"])
+    assert result.exit_code == 0, result.output
+
+
+def test_extract_embedding_error_uses_details() -> None:
+    from codexlens.cli.commands import _extract_embedding_error
+
+    embed_result = {
+        "success": False,
+        "result": {
+            "details": [
+                {"index_path": "/tmp/a/_index.db", "success": False, "error": "Backend timeout"},
+                {"index_path": "/tmp/b/_index.db", "success": False, "error": "Rate limit"},
+            ]
+        },
+    }
+    msg = _extract_embedding_error(embed_result)
+    assert "Unknown error" not in msg
+    assert "Backend timeout" in msg