feat(codexlens): add staged settings for advanced configuration and update related components

- Added new staged settings in config.py for coarse_k, lsp_depth, stage2_mode, and clustering strategy.
- Updated config-handlers.ts to handle new staged settings and map environment variables.
- Enhanced codexlens.json localization files for English and Chinese to include new staged settings.
- Modified astgrep_js_ts_processor.py to improve import handling for named imports.
- Updated JavaScript and TypeScript patterns to support new import formats.
- Added tests for staged settings loading and performance benchmarks for stage-2 expansion.
This commit is contained in:
catlog22
2026-02-18 13:05:35 +08:00
parent 265a77d6e7
commit d6e282b5a9
12 changed files with 618 additions and 78 deletions

View File

@@ -318,6 +318,21 @@ class Config:
"coarse_k": self.cascade_coarse_k,
"fine_k": self.cascade_fine_k,
},
"staged": {
"coarse_k": self.staged_coarse_k,
"lsp_depth": self.staged_lsp_depth,
"stage2_mode": self.staged_stage2_mode,
"realtime_lsp_timeout_s": self.staged_realtime_lsp_timeout_s,
"realtime_lsp_depth": self.staged_realtime_lsp_depth,
"realtime_lsp_max_nodes": self.staged_realtime_lsp_max_nodes,
"realtime_lsp_max_seeds": self.staged_realtime_lsp_max_seeds,
"realtime_lsp_max_concurrent": self.staged_realtime_lsp_max_concurrent,
"realtime_lsp_warmup_s": self.staged_realtime_lsp_warmup_s,
"realtime_lsp_resolve_symbols": self.staged_realtime_lsp_resolve_symbols,
"clustering_strategy": self.staged_clustering_strategy,
"clustering_min_size": self.staged_clustering_min_size,
"enable_rerank": self.enable_staged_rerank,
},
"api": {
"max_workers": self.api_max_workers,
"batch_size": self.api_batch_size,
@@ -426,6 +441,174 @@ class Config:
if "fine_k" in cascade:
self.cascade_fine_k = cascade["fine_k"]
# Load staged cascade settings
staged = settings.get("staged", {})
if isinstance(staged, dict):
if "coarse_k" in staged:
try:
self.staged_coarse_k = int(staged["coarse_k"])
except (TypeError, ValueError):
log.warning(
"Invalid staged.coarse_k in %s: %r (expected int)",
self.settings_path,
staged["coarse_k"],
)
if "lsp_depth" in staged:
try:
self.staged_lsp_depth = int(staged["lsp_depth"])
except (TypeError, ValueError):
log.warning(
"Invalid staged.lsp_depth in %s: %r (expected int)",
self.settings_path,
staged["lsp_depth"],
)
if "stage2_mode" in staged:
raw_mode = str(staged["stage2_mode"]).strip().lower()
if raw_mode in {"precomputed", "realtime", "static_global_graph"}:
self.staged_stage2_mode = raw_mode
elif raw_mode in {"live"}:
self.staged_stage2_mode = "realtime"
else:
log.warning(
"Invalid staged.stage2_mode in %s: %r "
"(expected 'precomputed', 'realtime', or 'static_global_graph')",
self.settings_path,
staged["stage2_mode"],
)
if "realtime_lsp_timeout_s" in staged:
try:
self.staged_realtime_lsp_timeout_s = float(
staged["realtime_lsp_timeout_s"]
)
except (TypeError, ValueError):
log.warning(
"Invalid staged.realtime_lsp_timeout_s in %s: %r (expected float)",
self.settings_path,
staged["realtime_lsp_timeout_s"],
)
if "realtime_lsp_depth" in staged:
try:
self.staged_realtime_lsp_depth = int(
staged["realtime_lsp_depth"]
)
except (TypeError, ValueError):
log.warning(
"Invalid staged.realtime_lsp_depth in %s: %r (expected int)",
self.settings_path,
staged["realtime_lsp_depth"],
)
if "realtime_lsp_max_nodes" in staged:
try:
self.staged_realtime_lsp_max_nodes = int(
staged["realtime_lsp_max_nodes"]
)
except (TypeError, ValueError):
log.warning(
"Invalid staged.realtime_lsp_max_nodes in %s: %r (expected int)",
self.settings_path,
staged["realtime_lsp_max_nodes"],
)
if "realtime_lsp_max_seeds" in staged:
try:
self.staged_realtime_lsp_max_seeds = int(
staged["realtime_lsp_max_seeds"]
)
except (TypeError, ValueError):
log.warning(
"Invalid staged.realtime_lsp_max_seeds in %s: %r (expected int)",
self.settings_path,
staged["realtime_lsp_max_seeds"],
)
if "realtime_lsp_max_concurrent" in staged:
try:
self.staged_realtime_lsp_max_concurrent = int(
staged["realtime_lsp_max_concurrent"]
)
except (TypeError, ValueError):
log.warning(
"Invalid staged.realtime_lsp_max_concurrent in %s: %r (expected int)",
self.settings_path,
staged["realtime_lsp_max_concurrent"],
)
if "realtime_lsp_warmup_s" in staged:
try:
self.staged_realtime_lsp_warmup_s = float(
staged["realtime_lsp_warmup_s"]
)
except (TypeError, ValueError):
log.warning(
"Invalid staged.realtime_lsp_warmup_s in %s: %r (expected float)",
self.settings_path,
staged["realtime_lsp_warmup_s"],
)
if "realtime_lsp_resolve_symbols" in staged:
raw = staged["realtime_lsp_resolve_symbols"]
if isinstance(raw, bool):
self.staged_realtime_lsp_resolve_symbols = raw
elif isinstance(raw, (int, float)):
self.staged_realtime_lsp_resolve_symbols = bool(raw)
elif isinstance(raw, str):
self.staged_realtime_lsp_resolve_symbols = (
raw.strip().lower() in {"true", "1", "yes", "on"}
)
else:
log.warning(
"Invalid staged.realtime_lsp_resolve_symbols in %s: %r (expected bool)",
self.settings_path,
raw,
)
if "clustering_strategy" in staged:
raw_strategy = str(staged["clustering_strategy"]).strip().lower()
allowed = {
"auto",
"hdbscan",
"dbscan",
"frequency",
"noop",
"score",
"dir_rr",
"path",
}
if raw_strategy in allowed:
self.staged_clustering_strategy = raw_strategy
elif raw_strategy in {"none", "off"}:
self.staged_clustering_strategy = "noop"
else:
log.warning(
"Invalid staged.clustering_strategy in %s: %r",
self.settings_path,
staged["clustering_strategy"],
)
if "clustering_min_size" in staged:
try:
self.staged_clustering_min_size = int(
staged["clustering_min_size"]
)
except (TypeError, ValueError):
log.warning(
"Invalid staged.clustering_min_size in %s: %r (expected int)",
self.settings_path,
staged["clustering_min_size"],
)
if "enable_rerank" in staged:
raw = staged["enable_rerank"]
if isinstance(raw, bool):
self.enable_staged_rerank = raw
elif isinstance(raw, (int, float)):
self.enable_staged_rerank = bool(raw)
elif isinstance(raw, str):
self.enable_staged_rerank = (
raw.strip().lower() in {"true", "1", "yes", "on"}
)
else:
log.warning(
"Invalid staged.enable_rerank in %s: %r (expected bool)",
self.settings_path,
raw,
)
# Load parsing settings
parsing = settings.get("parsing", {})
if isinstance(parsing, dict) and "use_astgrep" in parsing:

View File

@@ -50,7 +50,7 @@ ENV_VARS = {
"CASCADE_STRATEGY": "Cascade strategy: binary, binary_rerank (alias: hybrid), dense_rerank, staged",
"CASCADE_COARSE_K": "Cascade coarse_k candidate count (int)",
"CASCADE_FINE_K": "Cascade fine_k result count (int)",
"STAGED_STAGE2_MODE": "Staged Stage 2 mode: precomputed, realtime",
"STAGED_STAGE2_MODE": "Staged Stage 2 mode: precomputed, realtime, static_global_graph",
"STAGED_CLUSTERING_STRATEGY": "Staged clustering strategy: auto, score, path, dir_rr, noop, ...",
"STAGED_CLUSTERING_MIN_SIZE": "Staged clustering min cluster size (int)",
"ENABLE_STAGED_RERANK": "Enable staged reranking in Stage 4 (true/false)",

View File

@@ -12,13 +12,17 @@ from __future__ import annotations
import re
from pathlib import Path
from typing import Callable, Iterable, List, Optional, Sequence, Set, Tuple
from typing import Callable, List, Optional, Sequence, Set, Tuple
from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType
from codexlens.parsers.astgrep_processor import BaseAstGrepProcessor
_IDENT_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*$")
_BRACE_IMPORT_RE = re.compile(
r"\bimport\s+(?:type\s+)?(?:[A-Za-z_$][A-Za-z0-9_$]*\s*,\s*)?\{\s*(?P<names>[^}]*)\}\s*from\b",
re.MULTILINE,
)
def _strip_quotes(value: str) -> str:
@@ -32,11 +36,7 @@ def _module_from_literal(raw: str) -> str:
raw = (raw or "").strip()
if not raw:
return ""
unquoted = _strip_quotes(raw)
# Only accept string literal forms (tree-sitter extractor does the same).
if unquoted == raw:
return ""
return unquoted.strip()
return _strip_quotes(raw).strip()
def _extract_named_imports(raw: str) -> List[str]:
@@ -63,6 +63,16 @@ def _extract_named_imports(raw: str) -> List[str]:
return names
def _extract_brace_import_names(statement: str) -> str:
statement = (statement or "").strip()
if not statement:
return ""
match = _BRACE_IMPORT_RE.search(statement)
if not match:
return ""
return (match.group("names") or "").strip()
def _dedupe_relationships(rels: Sequence[CodeRelationship]) -> List[CodeRelationship]:
seen: Set[Tuple[str, str, str]] = set()
out: List[CodeRelationship] = []
@@ -139,40 +149,45 @@ class _AstGrepJsTsProcessor(BaseAstGrepProcessor):
)
# Any `import ... from "mod"` form
for node in self.run_ast_grep(source_code, self._get_pattern("import_from")):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if mod:
record(mod, self._get_line_number(node))
for pat_name in ("import_from_dq", "import_from_sq"):
for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if mod:
record(mod, self._get_line_number(node))
# Side-effect import: import "mod"
for node in self.run_ast_grep(source_code, self._get_pattern("import_side_effect")):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if mod:
record(mod, self._get_line_number(node))
for pat_name in ("import_side_effect_dq", "import_side_effect_sq"):
for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if mod:
record(mod, self._get_line_number(node))
# Named imports (named-only): import { a, b as c } from "mod"
for node in self.run_ast_grep(source_code, self._get_pattern("import_named_only")):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if not mod:
continue
raw_names = self._get_match(node, "NAMES")
for name in _extract_named_imports(raw_names):
record(f"{mod}.{name}", self._get_line_number(node))
for pat_name in ("import_named_only_dq", "import_named_only_sq"):
for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if not mod:
continue
raw_names = _extract_brace_import_names(self._get_node_text(node))
for name in _extract_named_imports(raw_names):
record(f"{mod}.{name}", self._get_line_number(node))
# Named imports (default + named): import X, { a, b as c } from "mod"
for node in self.run_ast_grep(source_code, self._get_pattern("import_default_named")):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if not mod:
continue
raw_names = self._get_match(node, "NAMES")
for name in _extract_named_imports(raw_names):
record(f"{mod}.{name}", self._get_line_number(node))
for pat_name in ("import_default_named_dq", "import_default_named_sq"):
for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if not mod:
continue
raw_names = _extract_brace_import_names(self._get_node_text(node))
for name in _extract_named_imports(raw_names):
record(f"{mod}.{name}", self._get_line_number(node))
# CommonJS require("mod") (string literal only)
for node in self.run_ast_grep(source_code, self._get_pattern("require_call")):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if mod:
record(mod, self._get_line_number(node))
for pat_name in ("require_call_dq", "require_call_sq"):
for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if mod:
record(mod, self._get_line_number(node))
return rels
@@ -258,26 +273,29 @@ class AstGrepTypeScriptProcessor(_AstGrepJsTsProcessor):
)
# Type-only imports: import type ... from "mod"
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_from")):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if mod:
record(mod, self._get_line_number(node))
for pat_name in ("import_type_from_dq", "import_type_from_sq"):
for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if mod:
record(mod, self._get_line_number(node))
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_named_only")):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if not mod:
continue
raw_names = self._get_match(node, "NAMES")
for name in _extract_named_imports(raw_names):
record(f"{mod}.{name}", self._get_line_number(node))
for pat_name in ("import_type_named_only_dq", "import_type_named_only_sq"):
for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if not mod:
continue
raw_names = _extract_brace_import_names(self._get_node_text(node))
for name in _extract_named_imports(raw_names):
record(f"{mod}.{name}", self._get_line_number(node))
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_default_named")):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if not mod:
continue
raw_names = self._get_match(node, "NAMES")
for name in _extract_named_imports(raw_names):
record(f"{mod}.{name}", self._get_line_number(node))
for pat_name in ("import_type_default_named_dq", "import_type_default_named_sq"):
for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)):
mod = _module_from_literal(self._get_match(node, "MODULE"))
if not mod:
continue
raw_names = _extract_brace_import_names(self._get_node_text(node))
for name in _extract_named_imports(raw_names):
record(f"{mod}.{name}", self._get_line_number(node))
return _dedupe_relationships(rels)
@@ -286,4 +304,3 @@ __all__ = [
"AstGrepJavaScriptProcessor",
"AstGrepTypeScriptProcessor",
]

View File

@@ -85,6 +85,30 @@ class BaseAstGrepProcessor(ABC):
return self._binding.find_all(pattern)
def _get_match(self, node: SgNode, metavar: str) -> str: # type: ignore[valid-type]
"""Extract matched metavariable value from node (best-effort)."""
if self._binding is None or node is None:
return ""
return self._binding._get_match(node, metavar)
def _get_line_number(self, node: SgNode) -> int: # type: ignore[valid-type]
"""Get 1-based starting line number of a node (best-effort)."""
if self._binding is None or node is None:
return 0
return self._binding._get_line_number(node)
def _get_line_range(self, node: SgNode) -> Tuple[int, int]: # type: ignore[valid-type]
"""Get (start_line, end_line) range of a node (best-effort)."""
if self._binding is None or node is None:
return (0, 0)
return self._binding._get_line_range(node)
def _get_node_text(self, node: SgNode) -> str: # type: ignore[valid-type]
"""Get the full text of a node (best-effort)."""
if self._binding is None or node is None:
return ""
return self._binding._get_node_text(node)
@abstractmethod
def process_matches(
self,

View File

@@ -20,17 +20,23 @@ JAVASCRIPT_PATTERNS: Dict[str, str] = {
# import React, { useEffect } from "react"
# import { useEffect } from "react"
# import * as fs from "fs"
"import_from": "import $$$IMPORTS from $MODULE",
"import_named_only": "import {$$$NAMES} from $MODULE",
"import_default_named": "import $DEFAULT, {$$$NAMES} from $MODULE",
"import_from_dq": "import $$$IMPORTS from \"$MODULE\"",
"import_from_sq": "import $$$IMPORTS from '$MODULE'",
"import_named_only_dq": "import {$$$NAMES} from \"$MODULE\"",
"import_named_only_sq": "import {$$$NAMES} from '$MODULE'",
"import_default_named_dq": "import $DEFAULT, {$$$NAMES} from \"$MODULE\"",
"import_default_named_sq": "import $DEFAULT, {$$$NAMES} from '$MODULE'",
# Side-effect import: import "./styles.css"
"import_side_effect": "import $MODULE",
"import_side_effect_dq": "import \"$MODULE\"",
"import_side_effect_sq": "import '$MODULE'",
# CommonJS require(): const fs = require("fs")
"require_call": "require($MODULE)",
"require_call_dq": "require(\"$MODULE\")",
"require_call_sq": "require('$MODULE')",
# Class inheritance: class Child extends Base {}
"class_extends": "class $NAME extends $BASE $$$BODY",
# Note: `{...}` form matches both JS and TS grammars more reliably.
"class_extends": "class $NAME extends $BASE {$$$BODY}",
}
@@ -45,11 +51,16 @@ METAVARS = {
RELATIONSHIP_PATTERNS: Dict[str, List[str]] = {
"imports": [
"import_from",
"import_named_only",
"import_default_named",
"import_side_effect",
"require_call",
"import_from_dq",
"import_from_sq",
"import_named_only_dq",
"import_named_only_sq",
"import_default_named_dq",
"import_default_named_sq",
"import_side_effect_dq",
"import_side_effect_sq",
"require_call_dq",
"require_call_sq",
],
"inheritance": ["class_extends"],
}
@@ -79,4 +90,3 @@ __all__ = [
"get_patterns_for_relationship",
"get_metavar",
]

View File

@@ -18,9 +18,12 @@ from codexlens.parsers.patterns.javascript import (
TYPESCRIPT_PATTERNS: Dict[str, str] = {
**JAVASCRIPT_PATTERNS,
# Type-only imports
"import_type_from": "import type $$$IMPORTS from $MODULE",
"import_type_named_only": "import type {$$$NAMES} from $MODULE",
"import_type_default_named": "import type $DEFAULT, {$$$NAMES} from $MODULE",
"import_type_from_dq": "import type $$$IMPORTS from \"$MODULE\"",
"import_type_from_sq": "import type $$$IMPORTS from '$MODULE'",
"import_type_named_only_dq": "import type {$$$NAMES} from \"$MODULE\"",
"import_type_named_only_sq": "import type {$$$NAMES} from '$MODULE'",
"import_type_default_named_dq": "import type $DEFAULT, {$$$NAMES} from \"$MODULE\"",
"import_type_default_named_sq": "import type $DEFAULT, {$$$NAMES} from '$MODULE'",
# Interface inheritance: interface Foo extends Bar {}
"interface_extends": "interface $NAME extends $BASE $$$BODY",
}
@@ -30,9 +33,12 @@ RELATIONSHIP_PATTERNS: Dict[str, List[str]] = {
**_JS_RELATIONSHIP_PATTERNS,
"imports": [
*_JS_RELATIONSHIP_PATTERNS.get("imports", []),
"import_type_from",
"import_type_named_only",
"import_type_default_named",
"import_type_from_dq",
"import_type_from_sq",
"import_type_named_only_dq",
"import_type_named_only_sq",
"import_type_default_named_dq",
"import_type_default_named_sq",
],
"inheritance": [
*_JS_RELATIONSHIP_PATTERNS.get("inheritance", []),
@@ -65,4 +71,3 @@ __all__ = [
"get_patterns_for_relationship",
"get_metavar",
]