feat: Add code analysis and LLM action templates with detailed configurations and examples

- Introduced a comprehensive code analysis action template for integrating code exploration and analysis capabilities. - Added LLM action template for seamless integration of LLM calls with customizable prompts and tools. - Implemented a benchmark search script to compare multiple search methods across various dimensions including speed, result quality, ranking stability, and coverage. - Provided preset configurations for common analysis tasks and LLM actions, enhancing usability and flexibility.
2026-02-11 02:33:51 +08:00 · 2026-01-03 17:37:49 +08:00
parent 6a45035e3f
commit be498acf59
10 changed files with 3076 additions and 217 deletions
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -432,75 +432,55 @@ def search(
    limit: int = typer.Option(20, "--limit", "-n", min=1, max=500, help="Max results."),
    depth: int = typer.Option(-1, "--depth", "-d", help="Search depth (-1 = unlimited, 0 = current only)."),
    files_only: bool = typer.Option(False, "--files-only", "-f", help="Return only file paths without content snippets."),
-    method: str = typer.Option("hybrid", "--method", "-m", help="Search method: fts, vector, splade, hybrid, cascade."),
+    method: str = typer.Option("dense_rerank", "--method", "-m", help="Search method: 'dense_rerank' (semantic, default), 'fts' (exact keyword)."),
    use_fuzzy: bool = typer.Option(False, "--use-fuzzy", help="Enable fuzzy matching in FTS method."),
+    # Hidden advanced options for backward compatibility
    weights: Optional[str] = typer.Option(
        None,
        "--weights", "-w",
-        help="RRF weights as key=value pairs (e.g., 'splade=0.4,vector=0.6' or 'fts=0.4,vector=0.6'). Default: auto-detect based on available backends."
+        hidden=True,
+        help="[Advanced] RRF weights as key=value pairs."
    ),
    cascade_strategy: Optional[str] = typer.Option(
        None,
        "--cascade-strategy",
-        help="Cascade search strategy: 'binary' (fast binary+dense) or 'hybrid' (FTS+cross-encoder). Only used with --method cascade."
+        hidden=True,
+        help="[Advanced] Cascade strategy for --method cascade."
    ),
    # Hidden deprecated parameter for backward compatibility
    mode: Optional[str] = typer.Option(None, "--mode", hidden=True, help="[DEPRECATED] Use --method instead."),
    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
 ) -> None:
-    """Search indexed file contents using hybrid retrieval.
+    """Search indexed file contents.

    Uses chain search across directory indexes.
    Use --depth to limit search recursion (0 = current dir only).

    Search Methods:
-      - fts: Full-text search using FTS5 (unicode61 tokenizer). Use --use-fuzzy for typo tolerance.
-      - vector: Pure semantic vector search - for natural language queries.
-      - splade: SPLADE sparse neural search - semantic term expansion.
-      - hybrid: RRF fusion of sparse + dense search (default) - best recall.
-      - cascade: Two-stage retrieval (binary coarse + dense rerank) - fast + accurate.
+      - dense_rerank (default): Semantic search using Dense embedding coarse retrieval +
+        Cross-encoder reranking. Best for natural language queries and code understanding.
+      - fts: Full-text search using FTS5 (unicode61 tokenizer). Best for exact code
+        identifiers like function/class names. Use --use-fuzzy for typo tolerance.

    Method Selection Guide:
      - Code identifiers (function/class names): fts
-      - Natural language queries: vector or hybrid
+      - Natural language queries: dense_rerank (default)
      - Typo-tolerant search: fts --use-fuzzy
-      - Best overall quality: hybrid (default)
-      - Large codebase performance: cascade

-    Vector Search Requirements:
-      Vector, hybrid, and cascade methods require pre-generated embeddings.
+    Requirements:
+      The dense_rerank method requires pre-generated embeddings.
      Use 'codexlens embeddings-generate' to create embeddings first.

-    Hybrid Mode Weights:
-      Use --weights to adjust RRF fusion weights:
-      - SPLADE mode: 'splade=0.4,vector=0.6' (default)
-      - FTS mode: 'fts=0.4,vector=0.6' (default)
-
    Examples:
-      # Default hybrid search
-      codexlens search "authentication"
+      # Default semantic search (dense_rerank)
+      codexlens search "authentication logic"

      # Exact code identifier search
      codexlens search "authenticate_user" --method fts

      # Typo-tolerant fuzzy search
      codexlens search "authentcate" --method fts --use-fuzzy
-
-      # Pure semantic search
-      codexlens search "how to verify user credentials" --method vector
-
-      # SPLADE sparse neural search
-      codexlens search "user login flow" --method splade
-
-      # Fast cascade retrieval for large codebases (binary strategy)
-      codexlens search "authentication" --method cascade
-
-      # Cascade with cross-encoder reranking (hybrid strategy)
-      codexlens search "authentication" --method cascade --cascade-strategy hybrid
-
-      # Hybrid with custom weights
-      codexlens search "authentication" --method hybrid --weights splade=0.5,vector=0.5
    """
    _configure_logging(verbose, json_mode)
    search_path = path.expanduser().resolve()
@@ -538,29 +518,33 @@ def search(
    # Configure search (load settings from file)
    config = Config.load()

-    # Validate method
-    valid_methods = ["fts", "vector", "splade", "hybrid", "cascade"]
+    # Validate method - simplified interface exposes only dense_rerank and fts
+    # Other methods (vector, splade, hybrid, cascade) are hidden but still work for backward compatibility
+    valid_methods = ["fts", "dense_rerank", "vector", "splade", "hybrid", "cascade"]
    if actual_method not in valid_methods:
        if json_mode:
-            print_json(success=False, error=f"Invalid method: {actual_method}. Must be one of: {', '.join(valid_methods)}")
+            print_json(success=False, error=f"Invalid method: {actual_method}. Use 'dense_rerank' (semantic) or 'fts' (exact keyword).")
        else:
            console.print(f"[red]Invalid method:[/red] {actual_method}")
-            console.print(f"[dim]Valid methods: {', '.join(valid_methods)}[/dim]")
+            console.print("[dim]Use 'dense_rerank' (semantic, default) or 'fts' (exact keyword)[/dim]")
        raise typer.Exit(code=1)

-    # Validate cascade_strategy if provided
-    if cascade_strategy is not None:
-        valid_strategies = ["binary", "hybrid"]
-        if cascade_strategy not in valid_strategies:
+    # Map dense_rerank to cascade method internally
+    internal_cascade_strategy = cascade_strategy
+    if actual_method == "dense_rerank":
+        actual_method = "cascade"
+        internal_cascade_strategy = "dense_rerank"
+
+    # Validate cascade_strategy if provided (for advanced users)
+    if internal_cascade_strategy is not None:
+        valid_strategies = ["binary", "hybrid", "binary_rerank", "dense_rerank"]
+        if internal_cascade_strategy not in valid_strategies:
            if json_mode:
-                print_json(success=False, error=f"Invalid cascade strategy: {cascade_strategy}. Must be one of: {', '.join(valid_strategies)}")
+                print_json(success=False, error=f"Invalid cascade strategy: {internal_cascade_strategy}. Must be one of: {', '.join(valid_strategies)}")
            else:
-                console.print(f"[red]Invalid cascade strategy:[/red] {cascade_strategy}")
+                console.print(f"[red]Invalid cascade strategy:[/red] {internal_cascade_strategy}")
                console.print(f"[dim]Valid strategies: {', '.join(valid_strategies)}[/dim]")
            raise typer.Exit(code=1)
-        # Warn if using cascade_strategy with non-cascade method
-        if actual_method != "cascade" and not json_mode:
-            console.print(f"[yellow]Warning: --cascade-strategy is only effective with --method cascade[/yellow]")

    # Parse custom weights if provided
    hybrid_weights = None
@@ -693,7 +677,7 @@ def search(
        else:
            # Dispatch to cascade_search for cascade method
            if actual_method == "cascade":
-                result = engine.cascade_search(query, search_path, k=limit, options=options, strategy=cascade_strategy)
+                result = engine.cascade_search(query, search_path, k=limit, options=options, strategy=internal_cascade_strategy)
            else:
                result = engine.search(query, search_path, options)
            results_list = [