Files
Claude-Code-Workflow/codex-lens/examples/simple_search_comparison.py

124 lines
3.5 KiB
Python

"""Simple search method comparison using CLI commands.
Compares:
1. FTS (Full-Text Search)
2. Semantic (Dense + Rerank)
3. Hybrid (Future: FTS + Semantic fusion)
Usage:
python examples/simple_search_comparison.py
"""
import subprocess
import time
import json
import re
import os
from pathlib import Path
def strip_ansi(text: str) -> str:
"""Remove ANSI color codes from text."""
ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
return ansi_escape.sub('', text)
def run_search(query: str, method: str, limit: int = 20) -> tuple[list, float]:
"""Run search via CLI and measure time."""
cmd = [
"python", "-m", "codexlens", "search",
query,
"--method", method,
"--limit", str(limit),
"--json",
"-p", "."
]
start = time.perf_counter()
result = subprocess.run(
cmd,
cwd=str(Path("D:/Claude_dms3/codex-lens/src")),
capture_output=True,
text=True,
env={**os.environ, "NO_COLOR": "1"}, # Try to disable colors
)
elapsed = time.perf_counter() - start
if result.returncode != 0:
print(f"Error running {method} search:")
print(result.stderr[:200])
return [], elapsed
try:
# Strip ANSI codes and parse JSON
clean_output = strip_ansi(result.stdout)
data = json.loads(clean_output)
# Results are nested in "result" object
if "result" in data and "results" in data["result"]:
return data["result"]["results"], elapsed
return data.get("results", []), elapsed
except json.JSONDecodeError as e:
print(f"Failed to parse JSON output for {method}: {e}")
return [], elapsed
def print_comparison(query: str):
"""Print comparison for a single query."""
print(f"\n{'='*80}")
print(f"Query: {query}")
print(f"{'='*80}\n")
# Method 1: FTS
print("Method 1: FTS (Full-Text Search)")
results_fts, time_fts = run_search(query, "fts", 20)
print(f" Time: {time_fts*1000:.2f}ms")
print(f" Results: {len(results_fts)}")
if results_fts:
print(f" Top 3:")
for i, r in enumerate(results_fts[:3], 1):
path = r.get("path", "").replace("D:\\Claude_dms3\\codex-lens\\src\\", "")
score = r.get("score", 0)
print(f" {i}. [{score:.4f}] {path}")
print()
# Method 2: Semantic (Dense + Rerank)
print("Method 2: Semantic (Dense + Rerank)")
results_semantic, time_semantic = run_search(query, "dense_rerank", 20)
print(f" Time: {time_semantic*1000:.2f}ms")
print(f" Results: {len(results_semantic)}")
if results_semantic:
print(f" Top 3:")
for i, r in enumerate(results_semantic[:3], 1):
path = r.get("path", "").replace("D:\\Claude_dms3\\codex-lens\\src\\", "")
score = r.get("score", 0)
print(f" {i}. [{score:.4f}] {path}")
print()
# Summary
print(f"Summary:")
print(f" FTS: {time_fts*1000:8.2f}ms {len(results_fts):3d} results")
print(f" Semantic: {time_semantic*1000:8.2f}ms {len(results_semantic):3d} results")
print(f" Speedup: {time_semantic/time_fts:6.2f}x (FTS faster)")
def main():
"""Main comparison entry point."""
queries = [
"vector search",
"LSP call hierarchy",
"search ranking",
"index building",
]
print("Search Method Comparison")
print("=" * 80)
for query in queries:
print_comparison(query)
print(f"\n{'='*80}")
print("Comparison complete")
print(f"{'='*80}")
if __name__ == "__main__":
main()