mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-28 09:23:08 +08:00
feat(codexlens): add staged settings for advanced configuration and update related components
- Added new staged settings in config.py for coarse_k, lsp_depth, stage2_mode, and clustering strategy. - Updated config-handlers.ts to handle new staged settings and map environment variables. - Enhanced codexlens.json localization files for English and Chinese to include new staged settings. - Modified astgrep_js_ts_processor.py to improve import handling for named imports. - Updated JavaScript and TypeScript patterns to support new import formats. - Added tests for staged settings loading and performance benchmarks for stage-2 expansion.
This commit is contained in:
@@ -83,16 +83,27 @@ def test_js_imports_and_inherits_match(tmp_path: Path) -> None:
|
||||
assert result_ts is not None
|
||||
assert result_ast is not None
|
||||
|
||||
ts_rel = extract_relationship_tuples(
|
||||
ts_imports = extract_relationship_tuples(
|
||||
result_ts.relationships,
|
||||
only_types={RelationshipType.IMPORTS, RelationshipType.INHERITS},
|
||||
only_types={RelationshipType.IMPORTS},
|
||||
)
|
||||
ast_rel = extract_relationship_tuples(
|
||||
ast_imports = extract_relationship_tuples(
|
||||
result_ast.relationships,
|
||||
only_types={RelationshipType.IMPORTS, RelationshipType.INHERITS},
|
||||
only_types={RelationshipType.IMPORTS},
|
||||
)
|
||||
assert ast_imports == ts_imports
|
||||
|
||||
assert ast_rel == ts_rel
|
||||
ts_inherits = extract_relationship_tuples(
|
||||
result_ts.relationships,
|
||||
only_types={RelationshipType.INHERITS},
|
||||
)
|
||||
ast_inherits = extract_relationship_tuples(
|
||||
result_ast.relationships,
|
||||
only_types={RelationshipType.INHERITS},
|
||||
)
|
||||
# Ast-grep may include inheritance edges that the tree-sitter extractor does not currently emit.
|
||||
assert ts_inherits.issubset(ast_inherits)
|
||||
assert ("Child", "Base", "inherits") in ast_inherits
|
||||
|
||||
|
||||
def test_ts_imports_match_and_inherits_superset(tmp_path: Path) -> None:
|
||||
@@ -137,4 +148,3 @@ def test_ts_imports_match_and_inherits_superset(tmp_path: Path) -> None:
|
||||
assert ts_inherits.issubset(ast_inherits)
|
||||
# But at minimum, class inheritance should be present.
|
||||
assert ("Child", "Base", "inherits") in ast_inherits
|
||||
|
||||
|
||||
@@ -104,8 +104,52 @@ class TestConfigCascadeDefaults:
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
assert config.staged_coarse_k == 200
|
||||
assert config.staged_lsp_depth == 2
|
||||
assert config.staged_stage2_mode == "precomputed"
|
||||
assert config.staged_clustering_strategy == "auto"
|
||||
assert config.staged_clustering_min_size == 3
|
||||
assert config.enable_staged_rerank is True
|
||||
assert config.cascade_coarse_k == 100
|
||||
assert config.cascade_fine_k == 10
|
||||
|
||||
def test_staged_settings_load_from_settings_json(self, temp_config_dir):
|
||||
"""load_settings should load staged.* settings when present."""
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
settings = {
|
||||
"staged": {
|
||||
"coarse_k": 250,
|
||||
"lsp_depth": 3,
|
||||
"stage2_mode": "static_global_graph",
|
||||
"realtime_lsp_timeout_s": 11.0,
|
||||
"realtime_lsp_depth": 2,
|
||||
"realtime_lsp_max_nodes": 42,
|
||||
"realtime_lsp_max_seeds": 2,
|
||||
"realtime_lsp_max_concurrent": 4,
|
||||
"realtime_lsp_warmup_s": 0.5,
|
||||
"realtime_lsp_resolve_symbols": True,
|
||||
"clustering_strategy": "path",
|
||||
"clustering_min_size": 7,
|
||||
"enable_rerank": False,
|
||||
}
|
||||
}
|
||||
|
||||
settings_path = config.settings_path
|
||||
settings_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(settings_path, "w", encoding="utf-8") as f:
|
||||
json.dump(settings, f)
|
||||
|
||||
with patch.object(config, "_apply_env_overrides"):
|
||||
config.load_settings()
|
||||
|
||||
assert config.staged_coarse_k == 250
|
||||
assert config.staged_lsp_depth == 3
|
||||
assert config.staged_stage2_mode == "static_global_graph"
|
||||
assert config.staged_realtime_lsp_timeout_s == 11.0
|
||||
assert config.staged_realtime_lsp_depth == 2
|
||||
assert config.staged_realtime_lsp_max_nodes == 42
|
||||
assert config.staged_realtime_lsp_max_seeds == 2
|
||||
assert config.staged_realtime_lsp_max_concurrent == 4
|
||||
assert config.staged_realtime_lsp_warmup_s == 0.5
|
||||
assert config.staged_realtime_lsp_resolve_symbols is True
|
||||
assert config.staged_clustering_strategy == "path"
|
||||
assert config.staged_clustering_min_size == 7
|
||||
assert config.enable_staged_rerank is False
|
||||
|
||||
@@ -559,6 +559,227 @@ class TestPerformanceBenchmarks:
|
||||
f"(baseline={baseline_time:.3f}s, graph={graph_time:.3f}s)"
|
||||
)
|
||||
|
||||
def test_stage2_expansion_precomputed_vs_static_global_graph_benchmark(self, tmp_path):
|
||||
"""Benchmark Stage-2 expansion: precomputed graph_neighbors vs static global graph.
|
||||
|
||||
This test is informational (prints timings) and asserts only correctness
|
||||
and that both expanders return some related results.
|
||||
"""
|
||||
from codexlens.entities import CodeRelationship, RelationshipType, SearchResult, Symbol
|
||||
from codexlens.search.graph_expander import GraphExpander
|
||||
from codexlens.search.global_graph_expander import GlobalGraphExpander
|
||||
from codexlens.storage.dir_index import DirIndexStore
|
||||
from codexlens.storage.global_index import GlobalSymbolIndex
|
||||
from codexlens.storage.index_tree import _compute_graph_neighbors
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
|
||||
# Source + index roots
|
||||
source_dir = tmp_path / "proj" / "src"
|
||||
source_dir.mkdir(parents=True, exist_ok=True)
|
||||
mapper = PathMapper(index_root=tmp_path / "indexes")
|
||||
|
||||
index_db_path = mapper.source_to_index_db(source_dir)
|
||||
index_db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
store = DirIndexStore(index_db_path)
|
||||
store.initialize()
|
||||
|
||||
file_count = 30
|
||||
per_file_symbols = 2
|
||||
file_paths = []
|
||||
per_file_symbols_list = []
|
||||
per_file_relationships_list = []
|
||||
|
||||
for i in range(file_count):
|
||||
file_path = source_dir / f"m{i}.py"
|
||||
file_paths.append(file_path)
|
||||
file_path.write_text("pass\n", encoding="utf-8")
|
||||
|
||||
symbols = [
|
||||
Symbol(
|
||||
name=f"func_{i}_{j}",
|
||||
kind="function",
|
||||
range=(j + 1, j + 1),
|
||||
file=str(file_path.resolve()),
|
||||
)
|
||||
for j in range(per_file_symbols)
|
||||
]
|
||||
per_file_symbols_list.append(symbols)
|
||||
|
||||
relationships: list[CodeRelationship] = []
|
||||
# Intra-file edge: func_i_0 -> func_i_1
|
||||
relationships.append(
|
||||
CodeRelationship(
|
||||
source_symbol=f"func_{i}_0",
|
||||
target_symbol=f"func_{i}_1",
|
||||
relationship_type=RelationshipType.CALL,
|
||||
source_file=str(file_path.resolve()),
|
||||
target_file=str(file_path.resolve()),
|
||||
source_line=1,
|
||||
)
|
||||
)
|
||||
# Cross-file edge: func_i_0 -> func_(i+1)_0 (name-unique across dir)
|
||||
j = (i + 1) % file_count
|
||||
relationships.append(
|
||||
CodeRelationship(
|
||||
source_symbol=f"func_{i}_0",
|
||||
target_symbol=f"func_{j}_0",
|
||||
relationship_type=RelationshipType.CALL,
|
||||
source_file=str(file_path.resolve()),
|
||||
target_file=str((source_dir / f"m{j}.py").resolve()),
|
||||
source_line=1,
|
||||
)
|
||||
)
|
||||
per_file_relationships_list.append(relationships)
|
||||
|
||||
store.add_file(
|
||||
name=file_path.name,
|
||||
full_path=file_path,
|
||||
content="pass\n",
|
||||
language="python",
|
||||
symbols=symbols,
|
||||
relationships=relationships,
|
||||
)
|
||||
|
||||
# Precompute graph_neighbors for GraphExpander (precomputed Stage-2 build)
|
||||
start = time.perf_counter()
|
||||
_compute_graph_neighbors(store)
|
||||
graph_build_ms = (time.perf_counter() - start) * 1000.0
|
||||
store.close()
|
||||
|
||||
# Build global symbol index + relationships for GlobalGraphExpander
|
||||
global_db_path = index_db_path.parent / GlobalSymbolIndex.DEFAULT_DB_NAME
|
||||
global_index = GlobalSymbolIndex(global_db_path, project_id=1)
|
||||
global_index.initialize()
|
||||
try:
|
||||
index_path_str = str(index_db_path.resolve())
|
||||
start = time.perf_counter()
|
||||
for file_path, symbols in zip(file_paths, per_file_symbols_list):
|
||||
file_path_str = str(file_path.resolve())
|
||||
global_index.update_file_symbols(
|
||||
file_path_str,
|
||||
symbols,
|
||||
index_path=index_path_str,
|
||||
)
|
||||
global_symbols_ms = (time.perf_counter() - start) * 1000.0
|
||||
|
||||
start = time.perf_counter()
|
||||
for file_path, relationships in zip(file_paths, per_file_relationships_list):
|
||||
file_path_str = str(file_path.resolve())
|
||||
global_index.update_file_relationships(file_path_str, relationships)
|
||||
global_relationships_ms = (time.perf_counter() - start) * 1000.0
|
||||
|
||||
base_results = [
|
||||
SearchResult(
|
||||
path=str(file_paths[i].resolve()),
|
||||
score=1.0,
|
||||
excerpt=None,
|
||||
content=None,
|
||||
start_line=1,
|
||||
end_line=1,
|
||||
symbol_name=f"func_{i}_0",
|
||||
symbol_kind="function",
|
||||
)
|
||||
for i in range(min(10, file_count))
|
||||
]
|
||||
|
||||
pre_expander = GraphExpander(mapper)
|
||||
static_expander = GlobalGraphExpander(global_index)
|
||||
|
||||
start = time.perf_counter()
|
||||
pre_related = pre_expander.expand(
|
||||
base_results,
|
||||
depth=2,
|
||||
max_expand=10,
|
||||
max_related=50,
|
||||
)
|
||||
pre_ms = (time.perf_counter() - start) * 1000.0
|
||||
|
||||
start = time.perf_counter()
|
||||
static_related = static_expander.expand(
|
||||
base_results,
|
||||
top_n=10,
|
||||
max_related=50,
|
||||
)
|
||||
static_ms = (time.perf_counter() - start) * 1000.0
|
||||
|
||||
assert pre_related, "Expected precomputed graph expansion to return related results"
|
||||
assert static_related, "Expected static global graph expansion to return related results"
|
||||
|
||||
print("\nStage-2 build benchmark (30 files, 2 symbols/file):")
|
||||
print(f" graph_neighbors precompute: {graph_build_ms:.2f}ms")
|
||||
print(f" global_symbols write: {global_symbols_ms:.2f}ms")
|
||||
print(f" global_relationships write: {global_relationships_ms:.2f}ms")
|
||||
|
||||
print("\nStage-2 expansion benchmark (30 files, 2 symbols/file):")
|
||||
print(f" precomputed (graph_neighbors): {pre_ms:.2f}ms, related={len(pre_related)}")
|
||||
print(f" static_global_graph: {static_ms:.2f}ms, related={len(static_related)}")
|
||||
finally:
|
||||
global_index.close()
|
||||
|
||||
def test_relationship_extraction_astgrep_vs_treesitter_benchmark(self, tmp_path):
|
||||
"""Informational benchmark: relationship extraction via ast-grep vs tree-sitter.
|
||||
|
||||
Skips when optional parser dependencies are unavailable.
|
||||
"""
|
||||
import textwrap
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.parsers.astgrep_processor import is_astgrep_processor_available
|
||||
from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser
|
||||
|
||||
if not is_astgrep_processor_available():
|
||||
pytest.skip("ast-grep processor unavailable (optional dependency)")
|
||||
|
||||
code = textwrap.dedent(
|
||||
"""
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
class Base:
|
||||
pass
|
||||
|
||||
class Child(Base):
|
||||
def method(self) -> List[str]:
|
||||
return [os.path.join("a", "b")]
|
||||
"""
|
||||
).lstrip()
|
||||
|
||||
file_path = tmp_path / "sample.py"
|
||||
file_path.write_text(code, encoding="utf-8")
|
||||
|
||||
cfg_ts = Config(data_dir=tmp_path / "cfg_ts")
|
||||
cfg_ts.use_astgrep = False
|
||||
ts_parser = TreeSitterSymbolParser("python", file_path, config=cfg_ts)
|
||||
if not ts_parser.is_available():
|
||||
pytest.skip("tree-sitter python binding unavailable")
|
||||
|
||||
cfg_ag = Config(data_dir=tmp_path / "cfg_ag")
|
||||
cfg_ag.use_astgrep = True
|
||||
ag_parser = TreeSitterSymbolParser("python", file_path, config=cfg_ag)
|
||||
if getattr(ag_parser, "_astgrep_processor", None) is None:
|
||||
pytest.skip("ast-grep processor failed to initialize")
|
||||
|
||||
def _bench(parser: TreeSitterSymbolParser) -> tuple[float, int]:
|
||||
durations = []
|
||||
rel_counts = []
|
||||
for _ in range(3):
|
||||
start = time.perf_counter()
|
||||
indexed = parser.parse(code, file_path)
|
||||
durations.append(time.perf_counter() - start)
|
||||
rel_counts.append(0 if indexed is None else len(indexed.relationships))
|
||||
return min(durations) * 1000.0, max(rel_counts)
|
||||
|
||||
ts_ms, ts_rels = _bench(ts_parser)
|
||||
ag_ms, ag_rels = _bench(ag_parser)
|
||||
|
||||
assert ts_rels > 0, "Expected relationships extracted via tree-sitter"
|
||||
assert ag_rels > 0, "Expected relationships extracted via ast-grep"
|
||||
|
||||
print("\nRelationship extraction benchmark (python, 1 file):")
|
||||
print(f" tree-sitter: {ts_ms:.2f}ms, rels={ts_rels}")
|
||||
print(f" ast-grep: {ag_ms:.2f}ms, rels={ag_rels}")
|
||||
|
||||
def test_cross_encoder_reranking_latency_under_200ms(self):
|
||||
"""Cross-encoder rerank step completes under 200ms (excluding model load)."""
|
||||
from codexlens.entities import SearchResult
|
||||
|
||||
Reference in New Issue
Block a user