feat(codexlens): add staged settings for advanced configuration and update related components

- Added new staged settings in config.py for coarse_k, lsp_depth, stage2_mode, and clustering strategy.
- Updated config-handlers.ts to handle new staged settings and map environment variables.
- Enhanced codexlens.json localization files for English and Chinese to include new staged settings.
- Modified astgrep_js_ts_processor.py to improve import handling for named imports.
- Updated JavaScript and TypeScript patterns to support new import formats.
- Added tests for staged settings loading and performance benchmarks for stage-2 expansion.
This commit is contained in:
catlog22
2026-02-18 13:05:35 +08:00
parent 265a77d6e7
commit d6e282b5a9
12 changed files with 618 additions and 78 deletions

View File

@@ -83,16 +83,27 @@ def test_js_imports_and_inherits_match(tmp_path: Path) -> None:
assert result_ts is not None
assert result_ast is not None
ts_rel = extract_relationship_tuples(
ts_imports = extract_relationship_tuples(
result_ts.relationships,
only_types={RelationshipType.IMPORTS, RelationshipType.INHERITS},
only_types={RelationshipType.IMPORTS},
)
ast_rel = extract_relationship_tuples(
ast_imports = extract_relationship_tuples(
result_ast.relationships,
only_types={RelationshipType.IMPORTS, RelationshipType.INHERITS},
only_types={RelationshipType.IMPORTS},
)
assert ast_imports == ts_imports
assert ast_rel == ts_rel
ts_inherits = extract_relationship_tuples(
result_ts.relationships,
only_types={RelationshipType.INHERITS},
)
ast_inherits = extract_relationship_tuples(
result_ast.relationships,
only_types={RelationshipType.INHERITS},
)
# Ast-grep may include inheritance edges that the tree-sitter extractor does not currently emit.
assert ts_inherits.issubset(ast_inherits)
assert ("Child", "Base", "inherits") in ast_inherits
def test_ts_imports_match_and_inherits_superset(tmp_path: Path) -> None:
@@ -137,4 +148,3 @@ def test_ts_imports_match_and_inherits_superset(tmp_path: Path) -> None:
assert ts_inherits.issubset(ast_inherits)
# But at minimum, class inheritance should be present.
assert ("Child", "Base", "inherits") in ast_inherits

View File

@@ -104,8 +104,52 @@ class TestConfigCascadeDefaults:
config = Config(data_dir=temp_config_dir)
assert config.staged_coarse_k == 200
assert config.staged_lsp_depth == 2
assert config.staged_stage2_mode == "precomputed"
assert config.staged_clustering_strategy == "auto"
assert config.staged_clustering_min_size == 3
assert config.enable_staged_rerank is True
assert config.cascade_coarse_k == 100
assert config.cascade_fine_k == 10
def test_staged_settings_load_from_settings_json(self, temp_config_dir):
"""load_settings should load staged.* settings when present."""
config = Config(data_dir=temp_config_dir)
settings = {
"staged": {
"coarse_k": 250,
"lsp_depth": 3,
"stage2_mode": "static_global_graph",
"realtime_lsp_timeout_s": 11.0,
"realtime_lsp_depth": 2,
"realtime_lsp_max_nodes": 42,
"realtime_lsp_max_seeds": 2,
"realtime_lsp_max_concurrent": 4,
"realtime_lsp_warmup_s": 0.5,
"realtime_lsp_resolve_symbols": True,
"clustering_strategy": "path",
"clustering_min_size": 7,
"enable_rerank": False,
}
}
settings_path = config.settings_path
settings_path.parent.mkdir(parents=True, exist_ok=True)
with open(settings_path, "w", encoding="utf-8") as f:
json.dump(settings, f)
with patch.object(config, "_apply_env_overrides"):
config.load_settings()
assert config.staged_coarse_k == 250
assert config.staged_lsp_depth == 3
assert config.staged_stage2_mode == "static_global_graph"
assert config.staged_realtime_lsp_timeout_s == 11.0
assert config.staged_realtime_lsp_depth == 2
assert config.staged_realtime_lsp_max_nodes == 42
assert config.staged_realtime_lsp_max_seeds == 2
assert config.staged_realtime_lsp_max_concurrent == 4
assert config.staged_realtime_lsp_warmup_s == 0.5
assert config.staged_realtime_lsp_resolve_symbols is True
assert config.staged_clustering_strategy == "path"
assert config.staged_clustering_min_size == 7
assert config.enable_staged_rerank is False

View File

@@ -559,6 +559,227 @@ class TestPerformanceBenchmarks:
f"(baseline={baseline_time:.3f}s, graph={graph_time:.3f}s)"
)
def test_stage2_expansion_precomputed_vs_static_global_graph_benchmark(self, tmp_path):
"""Benchmark Stage-2 expansion: precomputed graph_neighbors vs static global graph.
This test is informational (prints timings) and asserts only correctness
and that both expanders return some related results.
"""
from codexlens.entities import CodeRelationship, RelationshipType, SearchResult, Symbol
from codexlens.search.graph_expander import GraphExpander
from codexlens.search.global_graph_expander import GlobalGraphExpander
from codexlens.storage.dir_index import DirIndexStore
from codexlens.storage.global_index import GlobalSymbolIndex
from codexlens.storage.index_tree import _compute_graph_neighbors
from codexlens.storage.path_mapper import PathMapper
# Source + index roots
source_dir = tmp_path / "proj" / "src"
source_dir.mkdir(parents=True, exist_ok=True)
mapper = PathMapper(index_root=tmp_path / "indexes")
index_db_path = mapper.source_to_index_db(source_dir)
index_db_path.parent.mkdir(parents=True, exist_ok=True)
store = DirIndexStore(index_db_path)
store.initialize()
file_count = 30
per_file_symbols = 2
file_paths = []
per_file_symbols_list = []
per_file_relationships_list = []
for i in range(file_count):
file_path = source_dir / f"m{i}.py"
file_paths.append(file_path)
file_path.write_text("pass\n", encoding="utf-8")
symbols = [
Symbol(
name=f"func_{i}_{j}",
kind="function",
range=(j + 1, j + 1),
file=str(file_path.resolve()),
)
for j in range(per_file_symbols)
]
per_file_symbols_list.append(symbols)
relationships: list[CodeRelationship] = []
# Intra-file edge: func_i_0 -> func_i_1
relationships.append(
CodeRelationship(
source_symbol=f"func_{i}_0",
target_symbol=f"func_{i}_1",
relationship_type=RelationshipType.CALL,
source_file=str(file_path.resolve()),
target_file=str(file_path.resolve()),
source_line=1,
)
)
# Cross-file edge: func_i_0 -> func_(i+1)_0 (name-unique across dir)
j = (i + 1) % file_count
relationships.append(
CodeRelationship(
source_symbol=f"func_{i}_0",
target_symbol=f"func_{j}_0",
relationship_type=RelationshipType.CALL,
source_file=str(file_path.resolve()),
target_file=str((source_dir / f"m{j}.py").resolve()),
source_line=1,
)
)
per_file_relationships_list.append(relationships)
store.add_file(
name=file_path.name,
full_path=file_path,
content="pass\n",
language="python",
symbols=symbols,
relationships=relationships,
)
# Precompute graph_neighbors for GraphExpander (precomputed Stage-2 build)
start = time.perf_counter()
_compute_graph_neighbors(store)
graph_build_ms = (time.perf_counter() - start) * 1000.0
store.close()
# Build global symbol index + relationships for GlobalGraphExpander
global_db_path = index_db_path.parent / GlobalSymbolIndex.DEFAULT_DB_NAME
global_index = GlobalSymbolIndex(global_db_path, project_id=1)
global_index.initialize()
try:
index_path_str = str(index_db_path.resolve())
start = time.perf_counter()
for file_path, symbols in zip(file_paths, per_file_symbols_list):
file_path_str = str(file_path.resolve())
global_index.update_file_symbols(
file_path_str,
symbols,
index_path=index_path_str,
)
global_symbols_ms = (time.perf_counter() - start) * 1000.0
start = time.perf_counter()
for file_path, relationships in zip(file_paths, per_file_relationships_list):
file_path_str = str(file_path.resolve())
global_index.update_file_relationships(file_path_str, relationships)
global_relationships_ms = (time.perf_counter() - start) * 1000.0
base_results = [
SearchResult(
path=str(file_paths[i].resolve()),
score=1.0,
excerpt=None,
content=None,
start_line=1,
end_line=1,
symbol_name=f"func_{i}_0",
symbol_kind="function",
)
for i in range(min(10, file_count))
]
pre_expander = GraphExpander(mapper)
static_expander = GlobalGraphExpander(global_index)
start = time.perf_counter()
pre_related = pre_expander.expand(
base_results,
depth=2,
max_expand=10,
max_related=50,
)
pre_ms = (time.perf_counter() - start) * 1000.0
start = time.perf_counter()
static_related = static_expander.expand(
base_results,
top_n=10,
max_related=50,
)
static_ms = (time.perf_counter() - start) * 1000.0
assert pre_related, "Expected precomputed graph expansion to return related results"
assert static_related, "Expected static global graph expansion to return related results"
print("\nStage-2 build benchmark (30 files, 2 symbols/file):")
print(f" graph_neighbors precompute: {graph_build_ms:.2f}ms")
print(f" global_symbols write: {global_symbols_ms:.2f}ms")
print(f" global_relationships write: {global_relationships_ms:.2f}ms")
print("\nStage-2 expansion benchmark (30 files, 2 symbols/file):")
print(f" precomputed (graph_neighbors): {pre_ms:.2f}ms, related={len(pre_related)}")
print(f" static_global_graph: {static_ms:.2f}ms, related={len(static_related)}")
finally:
global_index.close()
def test_relationship_extraction_astgrep_vs_treesitter_benchmark(self, tmp_path):
"""Informational benchmark: relationship extraction via ast-grep vs tree-sitter.
Skips when optional parser dependencies are unavailable.
"""
import textwrap
from codexlens.config import Config
from codexlens.parsers.astgrep_processor import is_astgrep_processor_available
from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser
if not is_astgrep_processor_available():
pytest.skip("ast-grep processor unavailable (optional dependency)")
code = textwrap.dedent(
"""
import os
from typing import List
class Base:
pass
class Child(Base):
def method(self) -> List[str]:
return [os.path.join("a", "b")]
"""
).lstrip()
file_path = tmp_path / "sample.py"
file_path.write_text(code, encoding="utf-8")
cfg_ts = Config(data_dir=tmp_path / "cfg_ts")
cfg_ts.use_astgrep = False
ts_parser = TreeSitterSymbolParser("python", file_path, config=cfg_ts)
if not ts_parser.is_available():
pytest.skip("tree-sitter python binding unavailable")
cfg_ag = Config(data_dir=tmp_path / "cfg_ag")
cfg_ag.use_astgrep = True
ag_parser = TreeSitterSymbolParser("python", file_path, config=cfg_ag)
if getattr(ag_parser, "_astgrep_processor", None) is None:
pytest.skip("ast-grep processor failed to initialize")
def _bench(parser: TreeSitterSymbolParser) -> tuple[float, int]:
durations = []
rel_counts = []
for _ in range(3):
start = time.perf_counter()
indexed = parser.parse(code, file_path)
durations.append(time.perf_counter() - start)
rel_counts.append(0 if indexed is None else len(indexed.relationships))
return min(durations) * 1000.0, max(rel_counts)
ts_ms, ts_rels = _bench(ts_parser)
ag_ms, ag_rels = _bench(ag_parser)
assert ts_rels > 0, "Expected relationships extracted via tree-sitter"
assert ag_rels > 0, "Expected relationships extracted via ast-grep"
print("\nRelationship extraction benchmark (python, 1 file):")
print(f" tree-sitter: {ts_ms:.2f}ms, rels={ts_rels}")
print(f" ast-grep: {ag_ms:.2f}ms, rels={ag_rels}")
def test_cross_encoder_reranking_latency_under_200ms(self):
"""Cross-encoder rerank step completes under 200ms (excluding model load)."""
from codexlens.entities import SearchResult