feat: Add indexing group to CodexLens environment variable schema

- Introduced a new `indexing` group in the environment variable schema with fields for AST grep usage, static graph enablement, and relationship types.
- Updated the CodexLens configuration to support new indexing features.

feat: Enhance DashboardToolbar with session and fullscreen controls

- Added props for session sidebar visibility and fullscreen mode to the DashboardToolbar component.
- Implemented handlers for toggling session sidebar and fullscreen mode.
- Updated the toolbar layout to include session sidebar toggle and fullscreen button.

refactor: Improve TerminalGrid and TerminalPane components

- Refactored GridGroupRenderer to handle pane size changes directly via store.
- Enhanced TerminalPane to remove unused file browser logic and improve layout handling.
- Updated key generation for child panes to ensure stability.

feat: Extend CodexLens API for staged Stage-2 expansion modes

- Added support for `staged_stage2_mode` in the CodexLens API, allowing for different expansion strategies.
- Updated semantic search handlers to process new stage-2 mode parameter.
- Implemented validation and handling for new stage-2 modes in the backend.

test: Add benchmarks for staged Stage-2 modes comparison

- Created a benchmark script to compare performance and results of different staged Stage-2 modes.
- Included metrics for latency, overlap, and diversity across modes.
This commit is contained in:
catlog22
2026-02-16 12:12:38 +08:00
parent 2202c2ccfd
commit de3dd044b9
13 changed files with 674 additions and 126 deletions

View File

@@ -0,0 +1,391 @@
#!/usr/bin/env python
"""Compare staged cascade Stage-2 modes (precomputed vs realtime vs static graph).
This benchmark compares the *same* staged cascade strategy with different Stage-2
expansion sources:
1) precomputed: per-dir `graph_neighbors` expansion (fast, index-local)
2) realtime: live LSP graph expansion (contextual, requires LSP availability)
3) static_global_graph: global_relationships expansion (project-wide, requires static graph indexing)
Because most repos do not have ground-truth labels, this script reports:
- latency statistics per mode
- top-k overlap metrics (Jaccard + RBO) between modes
- diversity proxies (unique files/dirs)
- staged pipeline stage stats (when present)
Usage:
python benchmarks/compare_staged_stage2_modes.py --source ./src
python benchmarks/compare_staged_stage2_modes.py --queries-file benchmarks/queries.txt
"""
from __future__ import annotations
import argparse
import gc
import json
import os
import re
import statistics
import sys
import time
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
# Add src to path (match other benchmark scripts)
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from codexlens.config import Config
from codexlens.search.chain_search import ChainSearchEngine
from codexlens.storage.path_mapper import PathMapper
from codexlens.storage.registry import RegistryStore
DEFAULT_QUERIES = [
"class Config",
"def search",
"LspBridge",
"graph expansion",
"static graph relationships",
"clustering strategy",
"error handling",
]
VALID_STAGE2_MODES = ("precomputed", "realtime", "static_global_graph")
def _now_ms() -> float:
return time.perf_counter() * 1000.0
def _normalize_path_key(path: str) -> str:
"""Normalize file paths for overlap/dedup metrics (Windows-safe)."""
try:
p = Path(path)
if str(p) and (p.is_absolute() or re.match(r"^[A-Za-z]:", str(p))):
norm = str(p.resolve())
else:
norm = str(p)
except Exception:
norm = path
norm = norm.replace("/", "\\")
if os.name == "nt":
norm = norm.lower()
return norm
def _extract_stage_stats(errors: List[str]) -> Optional[Dict[str, Any]]:
"""Extract STAGE_STATS JSON blob from SearchStats.errors."""
for item in errors or []:
if not isinstance(item, str):
continue
if not item.startswith("STAGE_STATS:"):
continue
payload = item[len("STAGE_STATS:") :]
try:
return json.loads(payload)
except Exception:
return None
return None
def jaccard_topk(a: List[str], b: List[str]) -> float:
sa, sb = set(a), set(b)
if not sa and not sb:
return 1.0
if not sa or not sb:
return 0.0
return len(sa & sb) / len(sa | sb)
def rbo(a: List[str], b: List[str], p: float = 0.9) -> float:
"""Rank-biased overlap for two ranked lists."""
if p <= 0.0 or p >= 1.0:
raise ValueError("p must be in (0, 1)")
if not a and not b:
return 1.0
depth = max(len(a), len(b))
seen_a: set[str] = set()
seen_b: set[str] = set()
score = 0.0
for d in range(1, depth + 1):
if d <= len(a):
seen_a.add(a[d - 1])
if d <= len(b):
seen_b.add(b[d - 1])
overlap = len(seen_a & seen_b)
score += (overlap / d) * ((1.0 - p) * (p ** (d - 1)))
return score
def _unique_parent_dirs(paths: Iterable[str]) -> int:
dirs = set()
for p in paths:
try:
dirs.add(str(Path(p).parent))
except Exception:
continue
return len(dirs)
def _load_queries(path: Optional[Path], inline: Optional[List[str]]) -> List[str]:
if inline:
return [q.strip() for q in inline if isinstance(q, str) and q.strip()]
if path:
if not path.exists():
raise SystemExit(f"Queries file does not exist: {path}")
raw = path.read_text(encoding="utf-8", errors="ignore")
queries = [line.strip() for line in raw.splitlines() if line.strip() and not line.strip().startswith("#")]
return queries
return list(DEFAULT_QUERIES)
@dataclass
class RunDetail:
stage2_mode: str
query: str
latency_ms: float
num_results: int
topk_paths: List[str]
stage_stats: Optional[Dict[str, Any]] = None
error: Optional[str] = None
@dataclass
class PairwiseCompare:
query: str
mode_a: str
mode_b: str
jaccard_topk: float
rbo_topk: float
a_unique_files_topk: int
b_unique_files_topk: int
a_unique_dirs_topk: int
b_unique_dirs_topk: int
def _run_once(
engine: ChainSearchEngine,
config: Config,
query: str,
source_path: Path,
*,
stage2_mode: str,
k: int,
coarse_k: int,
) -> RunDetail:
if stage2_mode not in VALID_STAGE2_MODES:
raise ValueError(f"Invalid stage2_mode: {stage2_mode}")
# Mutate config for this run; ChainSearchEngine reads config fields per-call.
config.staged_stage2_mode = stage2_mode
gc.collect()
start_ms = _now_ms()
try:
result = engine.cascade_search(
query=query,
source_path=source_path,
k=k,
coarse_k=coarse_k,
strategy="staged",
)
latency_ms = _now_ms() - start_ms
paths_raw = [r.path for r in (result.results or []) if getattr(r, "path", None)]
paths = [_normalize_path_key(p) for p in paths_raw]
topk: List[str] = []
seen: set[str] = set()
for p in paths:
if p in seen:
continue
seen.add(p)
topk.append(p)
if len(topk) >= k:
break
stage_stats = None
try:
stage_stats = _extract_stage_stats(getattr(result.stats, "errors", []) or [])
except Exception:
stage_stats = None
return RunDetail(
stage2_mode=stage2_mode,
query=query,
latency_ms=latency_ms,
num_results=len(result.results or []),
topk_paths=topk,
stage_stats=stage_stats,
error=None,
)
except Exception as exc:
return RunDetail(
stage2_mode=stage2_mode,
query=query,
latency_ms=_now_ms() - start_ms,
num_results=0,
topk_paths=[],
stage_stats=None,
error=str(exc),
)
def main() -> None:
parser = argparse.ArgumentParser(description="Compare staged Stage-2 expansion modes.")
parser.add_argument("--source", type=Path, default=Path.cwd(), help="Project path to search")
parser.add_argument("--queries-file", type=Path, default=None, help="Optional newline-delimited queries file")
parser.add_argument("--queries", nargs="*", default=None, help="Inline queries (overrides queries-file)")
parser.add_argument("--k", type=int, default=20, help="Top-k to evaluate")
parser.add_argument("--coarse-k", type=int, default=100, help="Stage-1 coarse_k")
parser.add_argument(
"--stage2-modes",
nargs="*",
default=list(VALID_STAGE2_MODES),
help="Stage-2 modes to compare",
)
parser.add_argument("--warmup", type=int, default=0, help="Warmup iterations per mode")
parser.add_argument(
"--output",
type=Path,
default=Path(__file__).parent / "results" / "staged_stage2_modes.json",
help="Output JSON path",
)
args = parser.parse_args()
if not args.source.exists():
raise SystemExit(f"Source path does not exist: {args.source}")
stage2_modes = [str(m).strip().lower() for m in (args.stage2_modes or []) if str(m).strip()]
for m in stage2_modes:
if m not in VALID_STAGE2_MODES:
raise SystemExit(f"Invalid --stage2-modes entry: {m} (valid: {', '.join(VALID_STAGE2_MODES)})")
queries = _load_queries(args.queries_file, args.queries)
if not queries:
raise SystemExit("No queries to run")
# Match CLI behavior: load settings + apply global/workspace .env overrides.
config = Config.load()
config.cascade_strategy = "staged"
config.enable_staged_rerank = True
config.embedding_use_gpu = False # stability on some Windows setups
registry = RegistryStore()
registry.initialize()
mapper = PathMapper()
engine = ChainSearchEngine(registry=registry, mapper=mapper, config=config)
try:
# Warmup
if args.warmup > 0:
warm_query = queries[0]
for mode in stage2_modes:
for _ in range(args.warmup):
try:
_run_once(
engine,
config,
warm_query,
args.source,
stage2_mode=mode,
k=min(args.k, 5),
coarse_k=min(args.coarse_k, 50),
)
except Exception:
pass
per_query: Dict[str, Dict[str, RunDetail]] = {}
runs: List[RunDetail] = []
comparisons: List[PairwiseCompare] = []
for i, query in enumerate(queries, start=1):
print(f"[{i}/{len(queries)}] {query}")
per_query[query] = {}
for mode in stage2_modes:
detail = _run_once(
engine,
config,
query,
args.source,
stage2_mode=mode,
k=args.k,
coarse_k=args.coarse_k,
)
per_query[query][mode] = detail
runs.append(detail)
# Pairwise overlaps for this query
for a_idx in range(len(stage2_modes)):
for b_idx in range(a_idx + 1, len(stage2_modes)):
mode_a = stage2_modes[a_idx]
mode_b = stage2_modes[b_idx]
a = per_query[query][mode_a]
b = per_query[query][mode_b]
comparisons.append(
PairwiseCompare(
query=query,
mode_a=mode_a,
mode_b=mode_b,
jaccard_topk=jaccard_topk(a.topk_paths, b.topk_paths),
rbo_topk=rbo(a.topk_paths, b.topk_paths, p=0.9),
a_unique_files_topk=len(set(a.topk_paths)),
b_unique_files_topk=len(set(b.topk_paths)),
a_unique_dirs_topk=_unique_parent_dirs(a.topk_paths),
b_unique_dirs_topk=_unique_parent_dirs(b.topk_paths),
)
)
def _latencies(details: List[RunDetail]) -> List[float]:
return [d.latency_ms for d in details if not d.error]
mode_summaries: Dict[str, Dict[str, Any]] = {}
for mode in stage2_modes:
mode_runs = [r for r in runs if r.stage2_mode == mode]
lat = _latencies(mode_runs)
mode_summaries[mode] = {
"success": sum(1 for r in mode_runs if not r.error),
"avg_latency_ms": statistics.mean(lat) if lat else 0.0,
"p50_latency_ms": statistics.median(lat) if lat else 0.0,
"p95_latency_ms": statistics.quantiles(lat, n=20)[18] if len(lat) >= 2 else (lat[0] if lat else 0.0),
}
summary = {
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"source": str(args.source),
"k": args.k,
"coarse_k": args.coarse_k,
"query_count": len(queries),
"stage2_modes": stage2_modes,
"modes": mode_summaries,
"avg_pairwise_jaccard_topk": statistics.mean([c.jaccard_topk for c in comparisons]) if comparisons else 0.0,
"avg_pairwise_rbo_topk": statistics.mean([c.rbo_topk for c in comparisons]) if comparisons else 0.0,
}
args.output.parent.mkdir(parents=True, exist_ok=True)
payload = {
"summary": summary,
"runs": [asdict(r) for r in runs],
"comparisons": [asdict(c) for c in comparisons],
}
args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8")
print(f"\nSaved: {args.output}")
finally:
try:
engine.close()
except Exception as exc:
print(f"WARNING engine.close() failed: {exc!r}", file=sys.stderr)
try:
registry.close()
except Exception as exc:
print(f"WARNING registry.close() failed: {exc!r}", file=sys.stderr)
if __name__ == "__main__":
main()

View File

@@ -24,6 +24,7 @@ def semantic_search(
structural_weight: float = 0.3,
keyword_weight: float = 0.2,
fusion_strategy: str = "rrf",
staged_stage2_mode: Optional[str] = None,
kind_filter: Optional[List[str]] = None,
limit: int = 20,
include_match_reason: bool = False,
@@ -50,6 +51,10 @@ def semantic_search(
- binary: Binary rerank cascade -> binary_cascade_search
- hybrid: Binary rerank cascade (backward compat) -> binary_rerank_cascade_search
- dense_rerank: Dense rerank cascade -> dense_rerank_cascade_search
staged_stage2_mode: Optional override for staged Stage-2 expansion mode
- precomputed: GraphExpander over per-dir graph_neighbors (default)
- realtime: Live LSP expansion (requires LSP availability)
- static_global_graph: GlobalGraphExpander over global_relationships
kind_filter: Symbol type filter (e.g., ["function", "class"])
limit: Max return count (default 20)
include_match_reason: Generate match reason (heuristic, not LLM)
@@ -97,6 +102,17 @@ def semantic_search(
# Load config
config = Config.load()
# Optional per-call override for staged cascade Stage-2 mode.
if staged_stage2_mode:
stage2 = str(staged_stage2_mode).strip().lower()
if stage2 in {"live"}:
stage2 = "realtime"
valid_stage2 = {"precomputed", "realtime", "static_global_graph"}
if stage2 in valid_stage2:
config.staged_stage2_mode = stage2
else:
logger.debug("Ignoring invalid staged_stage2_mode: %r", staged_stage2_mode)
# Get or create registry and mapper
try:
registry = RegistryStore.default()

View File

@@ -32,6 +32,7 @@ class TestSemanticSearchFunctionSignature:
"structural_weight",
"keyword_weight",
"fusion_strategy",
"staged_stage2_mode",
"kind_filter",
"limit",
"include_match_reason",
@@ -49,6 +50,7 @@ class TestSemanticSearchFunctionSignature:
assert sig.parameters["structural_weight"].default == 0.3
assert sig.parameters["keyword_weight"].default == 0.2
assert sig.parameters["fusion_strategy"].default == "rrf"
assert sig.parameters["staged_stage2_mode"].default is None
assert sig.parameters["kind_filter"].default is None
assert sig.parameters["limit"].default == 20
assert sig.parameters["include_match_reason"].default is False