mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
Add tests and implement functionality for staged cascade search and LSP expansion
- Introduced a new JSON file for verbose output of the Codex Lens search results. - Added unit tests for binary search functionality in `test_stage1_binary_search_uses_chunk_lines.py`. - Implemented regression tests for staged cascade Stage 2 expansion depth in `test_staged_cascade_lsp_depth.py`. - Created unit tests for staged cascade Stage 2 realtime LSP graph expansion in `test_staged_cascade_realtime_lsp.py`. - Enhanced the ChainSearchEngine to respect configuration settings for staged LSP depth and improve search accuracy.
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from codexlens.config import VECTORS_META_DB_NAME, Config
|
||||
from codexlens.search.chain_search import ChainSearchEngine, SearchStats
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.storage.registry import RegistryStore
|
||||
|
||||
|
||||
def test_stage1_binary_search_prefers_chunk_start_line(tmp_path: Path) -> None:
|
||||
registry = RegistryStore(db_path=tmp_path / "registry.db")
|
||||
registry.initialize()
|
||||
mapper = PathMapper(index_root=tmp_path / "indexes")
|
||||
engine = ChainSearchEngine(registry, mapper, config=Config(data_dir=tmp_path / "data"))
|
||||
|
||||
try:
|
||||
index_root = tmp_path / "fake_index_root"
|
||||
index_root.mkdir(parents=True, exist_ok=True)
|
||||
index_db = index_root / "_index.db"
|
||||
index_db.write_text("", encoding="utf-8")
|
||||
(index_root / VECTORS_META_DB_NAME).write_text("", encoding="utf-8")
|
||||
|
||||
class _DummyBinarySearcher:
|
||||
def search(self, query_dense, top_k: int):
|
||||
_ = query_dense
|
||||
_ = top_k
|
||||
return [(123, 10)]
|
||||
|
||||
class _DummyEmbedder:
|
||||
def embed_to_numpy(self, texts):
|
||||
_ = texts
|
||||
return [[0.0]]
|
||||
|
||||
dummy_meta_store = MagicMock()
|
||||
dummy_meta_store.get_chunks_by_ids.return_value = [
|
||||
{
|
||||
"chunk_id": 123,
|
||||
"file_path": str(tmp_path / "a.py"),
|
||||
"content": "def a():\n return 1\n",
|
||||
"start_line": 12,
|
||||
"end_line": 14,
|
||||
"metadata": {},
|
||||
"category": "code",
|
||||
}
|
||||
]
|
||||
|
||||
with patch.object(engine, "_get_centralized_binary_searcher", return_value=_DummyBinarySearcher()):
|
||||
with patch("codexlens.search.chain_search.VectorMetadataStore", return_value=dummy_meta_store):
|
||||
with patch("codexlens.semantic.embedder.Embedder", return_value=_DummyEmbedder()):
|
||||
coarse_results, returned_root = engine._stage1_binary_search(
|
||||
"a",
|
||||
[index_db],
|
||||
coarse_k=1,
|
||||
stats=SearchStats(),
|
||||
)
|
||||
|
||||
assert returned_root == index_root
|
||||
assert len(coarse_results) == 1
|
||||
assert coarse_results[0].start_line == 12
|
||||
assert coarse_results[0].end_line == 14
|
||||
finally:
|
||||
engine.close()
|
||||
|
||||
168
codex-lens/tests/test_staged_cascade_lsp_depth.py
Normal file
168
codex-lens/tests/test_staged_cascade_lsp_depth.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Regression tests for staged cascade Stage 2 expansion depth.
|
||||
|
||||
Staged cascade is documented as:
|
||||
coarse (binary) → LSP/graph expansion → clustering → optional rerank
|
||||
|
||||
This test ensures Stage 2 respects Config.staged_lsp_depth (not unrelated
|
||||
graph_expansion_depth settings).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.entities import CodeRelationship, RelationshipType, SearchResult, Symbol
|
||||
from codexlens.search.chain_search import ChainSearchEngine
|
||||
from codexlens.storage.dir_index import DirIndexStore
|
||||
from codexlens.storage.index_tree import _compute_graph_neighbors
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.storage.registry import RegistryStore
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def temp_paths() -> Path:
|
||||
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|
||||
root = Path(tmpdir.name)
|
||||
yield root
|
||||
try:
|
||||
tmpdir.cleanup()
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _create_index_with_neighbors(root: Path) -> tuple[PathMapper, Path, Path, str]:
|
||||
project_root = root / "project"
|
||||
project_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
index_root = root / "indexes"
|
||||
mapper = PathMapper(index_root=index_root)
|
||||
index_db_path = mapper.source_to_index_db(project_root)
|
||||
index_db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Use 3 files so staged_cascade_search's final "deduplicate by path" step
|
||||
# doesn't collapse all expanded symbols into a single file result.
|
||||
content_a = "\n".join(["def a():", " b()", ""])
|
||||
content_b = "\n".join(["def b():", " c()", ""])
|
||||
content_c = "\n".join(["def c():", " return 1", ""])
|
||||
|
||||
file_a = project_root / "a.py"
|
||||
file_b = project_root / "b.py"
|
||||
file_c = project_root / "c.py"
|
||||
file_a.write_text(content_a, encoding="utf-8")
|
||||
file_b.write_text(content_b, encoding="utf-8")
|
||||
file_c.write_text(content_c, encoding="utf-8")
|
||||
|
||||
symbols_a = [Symbol(name="a", kind="function", range=(1, 2), file=str(file_a))]
|
||||
symbols_b = [Symbol(name="b", kind="function", range=(1, 2), file=str(file_b))]
|
||||
symbols_c = [Symbol(name="c", kind="function", range=(1, 2), file=str(file_c))]
|
||||
|
||||
relationships_a = [
|
||||
CodeRelationship(
|
||||
source_symbol="a",
|
||||
target_symbol="b",
|
||||
relationship_type=RelationshipType.CALL,
|
||||
source_file=str(file_a),
|
||||
target_file=str(file_b),
|
||||
source_line=2,
|
||||
)
|
||||
]
|
||||
relationships_b = [
|
||||
CodeRelationship(
|
||||
source_symbol="b",
|
||||
target_symbol="c",
|
||||
relationship_type=RelationshipType.CALL,
|
||||
source_file=str(file_b),
|
||||
target_file=str(file_c),
|
||||
source_line=2,
|
||||
)
|
||||
]
|
||||
|
||||
config = Config(data_dir=root / "data")
|
||||
store = DirIndexStore(index_db_path, config=config)
|
||||
store.initialize()
|
||||
store.add_file(
|
||||
name=file_a.name,
|
||||
full_path=file_a,
|
||||
content=content_a,
|
||||
language="python",
|
||||
symbols=symbols_a,
|
||||
relationships=relationships_a,
|
||||
)
|
||||
store.add_file(
|
||||
name=file_b.name,
|
||||
full_path=file_b,
|
||||
content=content_b,
|
||||
language="python",
|
||||
symbols=symbols_b,
|
||||
relationships=relationships_b,
|
||||
)
|
||||
store.add_file(
|
||||
name=file_c.name,
|
||||
full_path=file_c,
|
||||
content=content_c,
|
||||
language="python",
|
||||
symbols=symbols_c,
|
||||
relationships=[],
|
||||
)
|
||||
_compute_graph_neighbors(store)
|
||||
store.close()
|
||||
|
||||
return mapper, project_root, file_a, content_a
|
||||
|
||||
|
||||
def test_staged_cascade_stage2_uses_staged_lsp_depth(temp_paths: Path) -> None:
|
||||
mapper, project_root, file_path, content = _create_index_with_neighbors(temp_paths)
|
||||
index_db_path = mapper.source_to_index_db(project_root)
|
||||
|
||||
registry = RegistryStore(db_path=temp_paths / "registry.db")
|
||||
registry.initialize()
|
||||
|
||||
# Intentionally conflicting depths: staged_lsp_depth should win for staged cascade.
|
||||
config = Config(
|
||||
data_dir=temp_paths / "data",
|
||||
staged_lsp_depth=1,
|
||||
graph_expansion_depth=2,
|
||||
enable_staged_rerank=False,
|
||||
staged_clustering_strategy="noop",
|
||||
)
|
||||
|
||||
engine = ChainSearchEngine(registry, mapper, config=config)
|
||||
try:
|
||||
base = SearchResult(
|
||||
path=str(file_path.resolve()),
|
||||
score=1.0,
|
||||
excerpt="",
|
||||
content=content,
|
||||
start_line=1,
|
||||
end_line=2,
|
||||
symbol_name="a",
|
||||
symbol_kind="function",
|
||||
)
|
||||
|
||||
with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", True):
|
||||
with patch.object(engine, "_find_start_index", return_value=index_db_path):
|
||||
with patch.object(engine, "_collect_index_paths", return_value=[index_db_path]):
|
||||
# Bypass binary vector infrastructure; Stage 1 output is sufficient for Stage 2 behavior.
|
||||
with patch.object(
|
||||
engine,
|
||||
"_stage1_binary_search",
|
||||
return_value=([base], index_db_path.parent),
|
||||
):
|
||||
result = engine.staged_cascade_search(
|
||||
query="test",
|
||||
source_path=project_root,
|
||||
k=3,
|
||||
coarse_k=10,
|
||||
)
|
||||
|
||||
symbol_names = {r.symbol_name for r in result.results if r.symbol_name}
|
||||
assert "b" in symbol_names
|
||||
# With staged_lsp_depth=1, Stage 2 should NOT include 2-hop neighbor "c".
|
||||
assert "c" not in symbol_names
|
||||
finally:
|
||||
engine.close()
|
||||
98
codex-lens/tests/test_staged_cascade_realtime_lsp.py
Normal file
98
codex-lens/tests/test_staged_cascade_realtime_lsp.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""Unit tests for staged cascade Stage 2 realtime LSP graph expansion.
|
||||
|
||||
These tests mock out the live LSP components (LspBridge + LspGraphBuilder)
|
||||
so they can run without external language servers installed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.entities import SearchResult
|
||||
from codexlens.hybrid_search.data_structures import CodeAssociationGraph, CodeSymbolNode, Range
|
||||
from codexlens.search.chain_search import ChainSearchEngine
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.storage.registry import RegistryStore
|
||||
|
||||
|
||||
class _DummyBridge:
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
pass
|
||||
|
||||
async def get_document_symbols(self, file_path: str):
|
||||
_ = file_path
|
||||
return []
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def test_stage2_realtime_mode_expands_and_combines(tmp_path: Path) -> None:
|
||||
registry = RegistryStore(db_path=tmp_path / "registry.db")
|
||||
registry.initialize()
|
||||
mapper = PathMapper(index_root=tmp_path / "indexes")
|
||||
|
||||
config = Config(
|
||||
data_dir=tmp_path / "data",
|
||||
staged_stage2_mode="realtime",
|
||||
staged_lsp_depth=1,
|
||||
staged_realtime_lsp_timeout_s=1.0,
|
||||
staged_realtime_lsp_max_nodes=10,
|
||||
staged_realtime_lsp_warmup_s=0.0,
|
||||
)
|
||||
|
||||
engine = ChainSearchEngine(registry, mapper, config=config)
|
||||
try:
|
||||
coarse = [
|
||||
SearchResult(
|
||||
path=str(tmp_path / "a.py"),
|
||||
score=1.0,
|
||||
excerpt="def a(): pass",
|
||||
content="def a():\n pass\n",
|
||||
symbol_name="a",
|
||||
symbol_kind="function",
|
||||
start_line=1,
|
||||
end_line=2,
|
||||
)
|
||||
]
|
||||
|
||||
graph = CodeAssociationGraph()
|
||||
seed_id = f"{coarse[0].path}:a:1"
|
||||
graph.nodes[seed_id] = CodeSymbolNode(
|
||||
id=seed_id,
|
||||
name="a",
|
||||
kind="function",
|
||||
file_path=coarse[0].path,
|
||||
range=Range(start_line=1, start_character=1, end_line=2, end_character=1),
|
||||
)
|
||||
related_id = f"{str(tmp_path / 'b.py')}:b:1"
|
||||
graph.nodes[related_id] = CodeSymbolNode(
|
||||
id=related_id,
|
||||
name="b",
|
||||
kind="function",
|
||||
file_path=str(tmp_path / "b.py"),
|
||||
range=Range(start_line=1, start_character=1, end_line=1, end_character=1),
|
||||
raw_code="def b():\n return 1\n",
|
||||
)
|
||||
|
||||
dummy_builder = MagicMock()
|
||||
dummy_builder.build_from_seeds = AsyncMock(return_value=graph)
|
||||
|
||||
with patch("codexlens.lsp.LspBridge", _DummyBridge):
|
||||
with patch("codexlens.lsp.LspGraphBuilder", return_value=dummy_builder) as mock_builder:
|
||||
# Avoid needing a real index_to_source mapping
|
||||
engine.mapper.index_to_source = MagicMock(return_value=tmp_path)
|
||||
expanded = engine._stage2_lsp_expand(coarse, index_root=tmp_path / "fake_index_root")
|
||||
|
||||
assert mock_builder.call_args is not None
|
||||
assert mock_builder.call_args.kwargs.get("resolve_symbols") is False
|
||||
names = {r.symbol_name for r in expanded if r.symbol_name}
|
||||
assert "a" in names
|
||||
assert "b" in names
|
||||
finally:
|
||||
engine.close()
|
||||
@@ -760,6 +760,24 @@ class TestLocationParsing:
|
||||
assert loc.line == 1
|
||||
assert loc.character == 1
|
||||
|
||||
def test_location_from_file_uri_windows_percent_encoded_drive(self):
|
||||
"""Parse Location from percent-encoded Windows drive URIs (pyright-style)."""
|
||||
from codexlens.lsp.lsp_bridge import Location
|
||||
|
||||
data = {
|
||||
"uri": "file:///d%3A/Claude_dms3/codex-lens/src/codexlens/api/semantic.py",
|
||||
"range": {
|
||||
"start": {"line": 18, "character": 3},
|
||||
"end": {"line": 18, "character": 10},
|
||||
},
|
||||
}
|
||||
|
||||
loc = Location.from_lsp_response(data)
|
||||
|
||||
assert loc.file_path == "d:/Claude_dms3/codex-lens/src/codexlens/api/semantic.py"
|
||||
assert loc.line == 19 # 0-based -> 1-based
|
||||
assert loc.character == 4
|
||||
|
||||
def test_location_from_direct_fields(self):
|
||||
"""Parse Location from direct field format."""
|
||||
from codexlens.lsp.lsp_bridge import Location
|
||||
|
||||
Reference in New Issue
Block a user