mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-28 09:23:08 +08:00
feat: Add templates for epics, product brief, and requirements PRD
- Introduced a comprehensive template for generating epics and stories, including an index and individual epic files. - Created a product brief template to outline product vision, problem statements, and target users. - Developed a requirements PRD template to structure functional and non-functional requirements, including traceability and prioritization. - Implemented ast-grep processors for JavaScript and TypeScript to extract relationships such as imports and inheritance. - Added corresponding patterns for JavaScript and TypeScript to support relationship extraction. - Established comparison tests to validate the accuracy of relationship extraction between tree-sitter and ast-grep methods.
This commit is contained in:
@@ -140,7 +140,7 @@ class Config:
|
||||
enable_cascade_search: bool = False # Enable cascade search (coarse + fine ranking)
|
||||
cascade_coarse_k: int = 100 # Number of coarse candidates from first stage
|
||||
cascade_fine_k: int = 10 # Number of final results after reranking
|
||||
cascade_strategy: str = "binary" # "binary", "binary_rerank", "dense_rerank", or "staged"
|
||||
cascade_strategy: str = "binary" # "binary", "binary_rerank" (alias: "hybrid"), "dense_rerank", or "staged"
|
||||
|
||||
# Staged cascade search configuration (4-stage pipeline)
|
||||
staged_coarse_k: int = 200 # Number of coarse candidates from Stage 1 binary search
|
||||
@@ -190,7 +190,7 @@ class Config:
|
||||
chars_per_token_estimate: int = 4 # Characters per token estimation ratio
|
||||
|
||||
# Parser configuration
|
||||
use_astgrep: bool = False # Use ast-grep for Python relationship extraction (tree-sitter is default)
|
||||
use_astgrep: bool = False # Use ast-grep for relationship extraction (Python/JS/TS); tree-sitter is default
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
try:
|
||||
@@ -408,14 +408,18 @@ class Config:
|
||||
# Load cascade settings
|
||||
cascade = settings.get("cascade", {})
|
||||
if "strategy" in cascade:
|
||||
strategy = cascade["strategy"]
|
||||
raw_strategy = cascade["strategy"]
|
||||
strategy = str(raw_strategy).strip().lower()
|
||||
if strategy in {"binary", "binary_rerank", "dense_rerank", "staged"}:
|
||||
self.cascade_strategy = strategy
|
||||
elif strategy == "hybrid":
|
||||
self.cascade_strategy = "binary_rerank"
|
||||
log.debug("Mapping cascade strategy 'hybrid' -> 'binary_rerank'")
|
||||
else:
|
||||
log.warning(
|
||||
"Invalid cascade strategy in %s: %r (expected 'binary', 'binary_rerank', 'dense_rerank', or 'staged')",
|
||||
self.settings_path,
|
||||
strategy,
|
||||
raw_strategy,
|
||||
)
|
||||
if "coarse_k" in cascade:
|
||||
self.cascade_coarse_k = cascade["coarse_k"]
|
||||
@@ -522,6 +526,9 @@ class Config:
|
||||
if strategy in {"binary", "binary_rerank", "dense_rerank", "staged"}:
|
||||
self.cascade_strategy = strategy
|
||||
log.debug("Overriding cascade_strategy from .env: %s", self.cascade_strategy)
|
||||
elif strategy == "hybrid":
|
||||
self.cascade_strategy = "binary_rerank"
|
||||
log.debug("Overriding cascade_strategy from .env: %s", self.cascade_strategy)
|
||||
else:
|
||||
log.warning("Invalid CASCADE_STRATEGY in .env: %r", cascade_strategy)
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ ENV_VARS = {
|
||||
"CODEXLENS_DEBUG": "Enable debug mode (true/false)",
|
||||
# Cascade / staged pipeline configuration
|
||||
"ENABLE_CASCADE_SEARCH": "Enable cascade search (true/false)",
|
||||
"CASCADE_STRATEGY": "Cascade strategy: binary, binary_rerank, dense_rerank, staged",
|
||||
"CASCADE_STRATEGY": "Cascade strategy: binary, binary_rerank (alias: hybrid), dense_rerank, staged",
|
||||
"CASCADE_COARSE_K": "Cascade coarse_k candidate count (int)",
|
||||
"CASCADE_FINE_K": "Cascade fine_k result count (int)",
|
||||
"STAGED_STAGE2_MODE": "Staged Stage 2 mode: precomputed, realtime",
|
||||
|
||||
289
codex-lens/src/codexlens/parsers/astgrep_js_ts_processor.py
Normal file
289
codex-lens/src/codexlens/parsers/astgrep_js_ts_processor.py
Normal file
@@ -0,0 +1,289 @@
|
||||
"""Ast-grep processors for JavaScript/TypeScript relationship extraction.
|
||||
|
||||
These processors are intentionally narrower than the tree-sitter relationship
|
||||
extractor: they focus on stable, high-signal edges for static graph usage:
|
||||
- IMPORTS: ES module imports + CommonJS require() (string literal only)
|
||||
- INHERITS: class/interface extends
|
||||
|
||||
They are used when Config.use_astgrep is True.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Callable, Iterable, List, Optional, Sequence, Set, Tuple
|
||||
|
||||
from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType
|
||||
from codexlens.parsers.astgrep_processor import BaseAstGrepProcessor
|
||||
|
||||
|
||||
_IDENT_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*$")
|
||||
|
||||
|
||||
def _strip_quotes(value: str) -> str:
|
||||
value = (value or "").strip()
|
||||
if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"', "`"}:
|
||||
return value[1:-1]
|
||||
return value
|
||||
|
||||
|
||||
def _module_from_literal(raw: str) -> str:
|
||||
raw = (raw or "").strip()
|
||||
if not raw:
|
||||
return ""
|
||||
unquoted = _strip_quotes(raw)
|
||||
# Only accept string literal forms (tree-sitter extractor does the same).
|
||||
if unquoted == raw:
|
||||
return ""
|
||||
return unquoted.strip()
|
||||
|
||||
|
||||
def _extract_named_imports(raw: str) -> List[str]:
|
||||
raw = (raw or "").strip()
|
||||
if not raw:
|
||||
return []
|
||||
|
||||
# Normalize any surrounding braces the match might include.
|
||||
if raw.startswith("{") and raw.endswith("}"):
|
||||
raw = raw[1:-1].strip()
|
||||
|
||||
# Split by commas at top-level; named imports do not nest in JS/TS syntax.
|
||||
parts = [p.strip() for p in raw.split(",") if p.strip()]
|
||||
names: List[str] = []
|
||||
for part in parts:
|
||||
# TS: "type Foo" inside braces
|
||||
if part.startswith("type "):
|
||||
part = part[5:].strip()
|
||||
# Handle `foo as bar` (TS) / `foo as bar` (proposed) / `foo as bar`-style text.
|
||||
if " as " in part:
|
||||
part = part.split(" as ", 1)[0].strip()
|
||||
if _IDENT_RE.match(part):
|
||||
names.append(part)
|
||||
return names
|
||||
|
||||
|
||||
def _dedupe_relationships(rels: Sequence[CodeRelationship]) -> List[CodeRelationship]:
|
||||
seen: Set[Tuple[str, str, str]] = set()
|
||||
out: List[CodeRelationship] = []
|
||||
for r in rels:
|
||||
key = (r.source_symbol, r.target_symbol, r.relationship_type.value)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(r)
|
||||
return out
|
||||
|
||||
|
||||
class _AstGrepJsTsProcessor(BaseAstGrepProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
language_id: str,
|
||||
*,
|
||||
path: Optional[Path] = None,
|
||||
get_pattern: Callable[[str], str],
|
||||
) -> None:
|
||||
super().__init__(language_id, path)
|
||||
self._get_pattern = get_pattern
|
||||
|
||||
def parse(self, text: str, path: Path) -> Optional[IndexedFile]:
|
||||
if not self.is_available():
|
||||
return None
|
||||
|
||||
try:
|
||||
relationships = self._extract_relationships(text, path)
|
||||
return IndexedFile(
|
||||
path=str(path.resolve()),
|
||||
language=self.language_id,
|
||||
symbols=[],
|
||||
chunks=[],
|
||||
relationships=relationships,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def process_matches( # type: ignore[override]
|
||||
self,
|
||||
matches, # SgNode list (runtime-only type)
|
||||
source_code: str,
|
||||
path: Path,
|
||||
) -> List[CodeRelationship]:
|
||||
# Not used by the current JS/TS processors; keep the interface for parity.
|
||||
_ = (matches, source_code, path)
|
||||
return []
|
||||
|
||||
def _extract_relationships(self, source_code: str, path: Path) -> List[CodeRelationship]:
|
||||
source_file = str(path.resolve())
|
||||
rels: List[CodeRelationship] = []
|
||||
|
||||
rels.extend(self._extract_imports(source_code, source_file=source_file))
|
||||
rels.extend(self._extract_inherits(source_code, source_file=source_file))
|
||||
|
||||
return _dedupe_relationships(rels)
|
||||
|
||||
def _extract_imports(self, source_code: str, *, source_file: str) -> List[CodeRelationship]:
|
||||
rels: List[CodeRelationship] = []
|
||||
|
||||
def record(module_name: str, line: int) -> None:
|
||||
if not module_name:
|
||||
return
|
||||
rels.append(
|
||||
CodeRelationship(
|
||||
source_symbol="<module>",
|
||||
target_symbol=module_name,
|
||||
relationship_type=RelationshipType.IMPORTS,
|
||||
source_file=source_file,
|
||||
target_file=None,
|
||||
source_line=line,
|
||||
)
|
||||
)
|
||||
|
||||
# Any `import ... from "mod"` form
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_from")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if mod:
|
||||
record(mod, self._get_line_number(node))
|
||||
|
||||
# Side-effect import: import "mod"
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_side_effect")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if mod:
|
||||
record(mod, self._get_line_number(node))
|
||||
|
||||
# Named imports (named-only): import { a, b as c } from "mod"
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_named_only")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if not mod:
|
||||
continue
|
||||
raw_names = self._get_match(node, "NAMES")
|
||||
for name in _extract_named_imports(raw_names):
|
||||
record(f"{mod}.{name}", self._get_line_number(node))
|
||||
|
||||
# Named imports (default + named): import X, { a, b as c } from "mod"
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_default_named")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if not mod:
|
||||
continue
|
||||
raw_names = self._get_match(node, "NAMES")
|
||||
for name in _extract_named_imports(raw_names):
|
||||
record(f"{mod}.{name}", self._get_line_number(node))
|
||||
|
||||
# CommonJS require("mod") (string literal only)
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("require_call")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if mod:
|
||||
record(mod, self._get_line_number(node))
|
||||
|
||||
return rels
|
||||
|
||||
def _extract_inherits(self, source_code: str, *, source_file: str) -> List[CodeRelationship]:
|
||||
rels: List[CodeRelationship] = []
|
||||
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("class_extends")):
|
||||
class_name = (self._get_match(node, "NAME") or "").strip()
|
||||
base_raw = (self._get_match(node, "BASE") or "").strip()
|
||||
if not class_name or not base_raw:
|
||||
continue
|
||||
base = base_raw.split("<", 1)[0].strip()
|
||||
if not base:
|
||||
continue
|
||||
rels.append(
|
||||
CodeRelationship(
|
||||
source_symbol=class_name,
|
||||
target_symbol=base,
|
||||
relationship_type=RelationshipType.INHERITS,
|
||||
source_file=source_file,
|
||||
target_file=None,
|
||||
source_line=self._get_line_number(node),
|
||||
)
|
||||
)
|
||||
|
||||
return rels
|
||||
|
||||
|
||||
class AstGrepJavaScriptProcessor(_AstGrepJsTsProcessor):
|
||||
def __init__(self, path: Optional[Path] = None) -> None:
|
||||
from codexlens.parsers.patterns.javascript import get_pattern as get_js_pattern
|
||||
|
||||
super().__init__("javascript", path=path, get_pattern=get_js_pattern)
|
||||
|
||||
|
||||
class AstGrepTypeScriptProcessor(_AstGrepJsTsProcessor):
|
||||
def __init__(self, path: Optional[Path] = None) -> None:
|
||||
from codexlens.parsers.patterns.typescript import get_pattern as get_ts_pattern
|
||||
|
||||
super().__init__("typescript", path=path, get_pattern=get_ts_pattern)
|
||||
|
||||
def _extract_inherits(self, source_code: str, *, source_file: str) -> List[CodeRelationship]:
|
||||
rels = super()._extract_inherits(source_code, source_file=source_file)
|
||||
|
||||
# Interface extends: interface Foo extends Bar {}
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("interface_extends")):
|
||||
name = (self._get_match(node, "NAME") or "").strip()
|
||||
base_raw = (self._get_match(node, "BASE") or "").strip()
|
||||
if not name or not base_raw:
|
||||
continue
|
||||
base = base_raw.split("<", 1)[0].strip()
|
||||
if not base:
|
||||
continue
|
||||
rels.append(
|
||||
CodeRelationship(
|
||||
source_symbol=name,
|
||||
target_symbol=base,
|
||||
relationship_type=RelationshipType.INHERITS,
|
||||
source_file=source_file,
|
||||
target_file=None,
|
||||
source_line=self._get_line_number(node),
|
||||
)
|
||||
)
|
||||
|
||||
return _dedupe_relationships(rels)
|
||||
|
||||
def _extract_imports(self, source_code: str, *, source_file: str) -> List[CodeRelationship]:
|
||||
# Reuse JS logic for standard imports
|
||||
rels = super()._extract_imports(source_code, source_file=source_file)
|
||||
|
||||
def record(module_name: str, line: int) -> None:
|
||||
if not module_name:
|
||||
return
|
||||
rels.append(
|
||||
CodeRelationship(
|
||||
source_symbol="<module>",
|
||||
target_symbol=module_name,
|
||||
relationship_type=RelationshipType.IMPORTS,
|
||||
source_file=source_file,
|
||||
target_file=None,
|
||||
source_line=line,
|
||||
)
|
||||
)
|
||||
|
||||
# Type-only imports: import type ... from "mod"
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_from")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if mod:
|
||||
record(mod, self._get_line_number(node))
|
||||
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_named_only")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if not mod:
|
||||
continue
|
||||
raw_names = self._get_match(node, "NAMES")
|
||||
for name in _extract_named_imports(raw_names):
|
||||
record(f"{mod}.{name}", self._get_line_number(node))
|
||||
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_default_named")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if not mod:
|
||||
continue
|
||||
raw_names = self._get_match(node, "NAMES")
|
||||
for name in _extract_named_imports(raw_names):
|
||||
record(f"{mod}.{name}", self._get_line_number(node))
|
||||
|
||||
return _dedupe_relationships(rels)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AstGrepJavaScriptProcessor",
|
||||
"AstGrepTypeScriptProcessor",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
"""JavaScript ast-grep patterns for relationship extraction.
|
||||
|
||||
These patterns are used by CodexLens' optional ast-grep processors to extract:
|
||||
- IMPORTS: ES module imports + CommonJS require()
|
||||
- INHERITS: class extends relationships
|
||||
|
||||
Pattern Syntax (ast-grep-py 0.40+):
|
||||
$VAR - Single metavariable (matches one AST node)
|
||||
$$$VAR - Multiple metavariable (matches zero or more nodes)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
JAVASCRIPT_PATTERNS: Dict[str, str] = {
|
||||
# ES module imports
|
||||
# import React from "react"
|
||||
# import React, { useEffect } from "react"
|
||||
# import { useEffect } from "react"
|
||||
# import * as fs from "fs"
|
||||
"import_from": "import $$$IMPORTS from $MODULE",
|
||||
"import_named_only": "import {$$$NAMES} from $MODULE",
|
||||
"import_default_named": "import $DEFAULT, {$$$NAMES} from $MODULE",
|
||||
# Side-effect import: import "./styles.css"
|
||||
"import_side_effect": "import $MODULE",
|
||||
|
||||
# CommonJS require(): const fs = require("fs")
|
||||
"require_call": "require($MODULE)",
|
||||
|
||||
# Class inheritance: class Child extends Base {}
|
||||
"class_extends": "class $NAME extends $BASE $$$BODY",
|
||||
}
|
||||
|
||||
|
||||
METAVARS = {
|
||||
"module": "MODULE",
|
||||
"import_names": "NAMES",
|
||||
"import_default": "DEFAULT",
|
||||
"class_name": "NAME",
|
||||
"class_base": "BASE",
|
||||
}
|
||||
|
||||
|
||||
RELATIONSHIP_PATTERNS: Dict[str, List[str]] = {
|
||||
"imports": [
|
||||
"import_from",
|
||||
"import_named_only",
|
||||
"import_default_named",
|
||||
"import_side_effect",
|
||||
"require_call",
|
||||
],
|
||||
"inheritance": ["class_extends"],
|
||||
}
|
||||
|
||||
|
||||
def get_pattern(pattern_name: str) -> str:
|
||||
if pattern_name not in JAVASCRIPT_PATTERNS:
|
||||
raise KeyError(
|
||||
f"Unknown JS pattern: {pattern_name}. Available: {list(JAVASCRIPT_PATTERNS.keys())}"
|
||||
)
|
||||
return JAVASCRIPT_PATTERNS[pattern_name]
|
||||
|
||||
|
||||
def get_patterns_for_relationship(rel_type: str) -> List[str]:
|
||||
return RELATIONSHIP_PATTERNS.get(rel_type, [])
|
||||
|
||||
|
||||
def get_metavar(name: str) -> str:
|
||||
return METAVARS.get(name, name.upper())
|
||||
|
||||
|
||||
__all__ = [
|
||||
"JAVASCRIPT_PATTERNS",
|
||||
"METAVARS",
|
||||
"RELATIONSHIP_PATTERNS",
|
||||
"get_pattern",
|
||||
"get_patterns_for_relationship",
|
||||
"get_metavar",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
"""TypeScript ast-grep patterns for relationship extraction.
|
||||
|
||||
This module extends the JavaScript patterns with TypeScript-specific syntax
|
||||
such as `import type` and `interface ... extends ...`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from codexlens.parsers.patterns.javascript import (
|
||||
METAVARS,
|
||||
RELATIONSHIP_PATTERNS as _JS_RELATIONSHIP_PATTERNS,
|
||||
JAVASCRIPT_PATTERNS,
|
||||
)
|
||||
|
||||
|
||||
TYPESCRIPT_PATTERNS: Dict[str, str] = {
|
||||
**JAVASCRIPT_PATTERNS,
|
||||
# Type-only imports
|
||||
"import_type_from": "import type $$$IMPORTS from $MODULE",
|
||||
"import_type_named_only": "import type {$$$NAMES} from $MODULE",
|
||||
"import_type_default_named": "import type $DEFAULT, {$$$NAMES} from $MODULE",
|
||||
# Interface inheritance: interface Foo extends Bar {}
|
||||
"interface_extends": "interface $NAME extends $BASE $$$BODY",
|
||||
}
|
||||
|
||||
|
||||
RELATIONSHIP_PATTERNS: Dict[str, List[str]] = {
|
||||
**_JS_RELATIONSHIP_PATTERNS,
|
||||
"imports": [
|
||||
*_JS_RELATIONSHIP_PATTERNS.get("imports", []),
|
||||
"import_type_from",
|
||||
"import_type_named_only",
|
||||
"import_type_default_named",
|
||||
],
|
||||
"inheritance": [
|
||||
*_JS_RELATIONSHIP_PATTERNS.get("inheritance", []),
|
||||
"interface_extends",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def get_pattern(pattern_name: str) -> str:
|
||||
if pattern_name not in TYPESCRIPT_PATTERNS:
|
||||
raise KeyError(
|
||||
f"Unknown TS pattern: {pattern_name}. Available: {list(TYPESCRIPT_PATTERNS.keys())}"
|
||||
)
|
||||
return TYPESCRIPT_PATTERNS[pattern_name]
|
||||
|
||||
|
||||
def get_patterns_for_relationship(rel_type: str) -> List[str]:
|
||||
return RELATIONSHIP_PATTERNS.get(rel_type, [])
|
||||
|
||||
|
||||
def get_metavar(name: str) -> str:
|
||||
return METAVARS.get(name, name.upper())
|
||||
|
||||
|
||||
__all__ = [
|
||||
"TYPESCRIPT_PATTERNS",
|
||||
"METAVARS",
|
||||
"RELATIONSHIP_PATTERNS",
|
||||
"get_pattern",
|
||||
"get_patterns_for_relationship",
|
||||
"get_metavar",
|
||||
]
|
||||
|
||||
@@ -34,8 +34,9 @@ if TYPE_CHECKING:
|
||||
class TreeSitterSymbolParser:
|
||||
"""Parser using tree-sitter for AST-level symbol extraction.
|
||||
|
||||
Supports optional ast-grep integration for Python relationship extraction
|
||||
when config.use_astgrep is True and ast-grep-py is available.
|
||||
Supports optional ast-grep integration for relationship extraction
|
||||
(Python/JavaScript/TypeScript) when config.use_astgrep is True and
|
||||
ast-grep-py is available.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -105,24 +106,33 @@ class TreeSitterSymbolParser:
|
||||
"""Check if ast-grep should be used for relationship extraction.
|
||||
|
||||
Returns:
|
||||
True if config.use_astgrep is True and language is Python
|
||||
True if config.use_astgrep is True and language is supported
|
||||
"""
|
||||
if self._config is None:
|
||||
return False
|
||||
if not getattr(self._config, "use_astgrep", False):
|
||||
return False
|
||||
return self.language_id == "python"
|
||||
return self.language_id in {"python", "javascript", "typescript"}
|
||||
|
||||
def _initialize_astgrep_processor(self) -> None:
|
||||
"""Initialize ast-grep processor for Python relationship extraction."""
|
||||
"""Initialize ast-grep processor for relationship extraction."""
|
||||
try:
|
||||
from codexlens.parsers.astgrep_processor import (
|
||||
AstGrepPythonProcessor,
|
||||
is_astgrep_processor_available,
|
||||
)
|
||||
from codexlens.parsers.astgrep_js_ts_processor import (
|
||||
AstGrepJavaScriptProcessor,
|
||||
AstGrepTypeScriptProcessor,
|
||||
)
|
||||
|
||||
if is_astgrep_processor_available():
|
||||
self._astgrep_processor = AstGrepPythonProcessor(self.path)
|
||||
if self.language_id == "python":
|
||||
self._astgrep_processor = AstGrepPythonProcessor(self.path)
|
||||
elif self.language_id == "javascript":
|
||||
self._astgrep_processor = AstGrepJavaScriptProcessor(self.path)
|
||||
elif self.language_id == "typescript":
|
||||
self._astgrep_processor = AstGrepTypeScriptProcessor(self.path)
|
||||
except ImportError:
|
||||
self._astgrep_processor = None
|
||||
|
||||
@@ -222,9 +232,9 @@ class TreeSitterSymbolParser:
|
||||
path: Path,
|
||||
source_code: Optional[str] = None,
|
||||
) -> List[CodeRelationship]:
|
||||
"""Extract relationships, optionally using ast-grep for Python.
|
||||
"""Extract relationships, optionally using ast-grep.
|
||||
|
||||
When config.use_astgrep is True and ast-grep is available for Python,
|
||||
When config.use_astgrep is True and an ast-grep processor is available,
|
||||
uses ast-grep for relationship extraction. Otherwise, uses tree-sitter.
|
||||
|
||||
Args:
|
||||
@@ -236,32 +246,31 @@ class TreeSitterSymbolParser:
|
||||
Returns:
|
||||
List of extracted relationships
|
||||
"""
|
||||
# Try ast-grep first if configured and available for this language.
|
||||
if self._astgrep_processor is not None and source_code is not None:
|
||||
try:
|
||||
astgrep_rels = self._extract_relationships_astgrep(source_code, path)
|
||||
if astgrep_rels is not None:
|
||||
return astgrep_rels
|
||||
except Exception:
|
||||
# Fall back to tree-sitter on ast-grep failure
|
||||
pass
|
||||
|
||||
if self.language_id == "python":
|
||||
# Try ast-grep first if configured and available
|
||||
if self._astgrep_processor is not None and source_code is not None:
|
||||
try:
|
||||
astgrep_rels = self._extract_python_relationships_astgrep(
|
||||
source_code, path
|
||||
)
|
||||
if astgrep_rels is not None:
|
||||
return astgrep_rels
|
||||
except Exception:
|
||||
# Fall back to tree-sitter on ast-grep failure
|
||||
pass
|
||||
return self._extract_python_relationships(source_bytes, root, path)
|
||||
if self.language_id in {"javascript", "typescript"}:
|
||||
return self._extract_js_ts_relationships(source_bytes, root, path)
|
||||
return []
|
||||
|
||||
def _extract_python_relationships_astgrep(
|
||||
def _extract_relationships_astgrep(
|
||||
self,
|
||||
source_code: str,
|
||||
path: Path,
|
||||
) -> Optional[List[CodeRelationship]]:
|
||||
"""Extract Python relationships using ast-grep processor.
|
||||
"""Extract relationships using ast-grep processor.
|
||||
|
||||
Args:
|
||||
source_code: Python source code text
|
||||
source_code: Source code text
|
||||
path: File path
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -694,13 +694,14 @@ class ChainSearchEngine:
|
||||
k: int = 10,
|
||||
coarse_k: int = 100,
|
||||
options: Optional[SearchOptions] = None,
|
||||
strategy: Optional[Literal["binary", "binary_rerank", "dense_rerank", "staged"]] = None,
|
||||
strategy: Optional[Literal["binary", "binary_rerank", "dense_rerank", "staged", "hybrid"]] = None,
|
||||
) -> ChainSearchResult:
|
||||
"""Unified cascade search entry point with strategy selection.
|
||||
|
||||
Provides a single interface for cascade search with configurable strategy:
|
||||
- "binary": Uses binary vector coarse ranking + dense fine ranking (fastest)
|
||||
- "binary_rerank": Uses binary vector coarse ranking + cross-encoder reranking (best balance)
|
||||
- "hybrid": Alias for "binary_rerank" (backward compat)
|
||||
- "dense_rerank": Uses dense vector coarse ranking + cross-encoder reranking
|
||||
- "staged": 4-stage pipeline: binary -> LSP expand -> clustering -> optional rerank
|
||||
|
||||
@@ -731,7 +732,7 @@ class ChainSearchEngine:
|
||||
"""
|
||||
# Strategy priority: parameter > config > default
|
||||
effective_strategy = strategy
|
||||
valid_strategies = ("binary", "binary_rerank", "dense_rerank", "staged")
|
||||
valid_strategies = ("binary", "binary_rerank", "dense_rerank", "staged", "hybrid")
|
||||
if effective_strategy is None:
|
||||
# Not passed via parameter, check config
|
||||
if self._config is not None:
|
||||
@@ -743,6 +744,10 @@ class ChainSearchEngine:
|
||||
if effective_strategy not in valid_strategies:
|
||||
effective_strategy = "binary"
|
||||
|
||||
# Normalize backward-compat alias
|
||||
if effective_strategy == "hybrid":
|
||||
effective_strategy = "binary_rerank"
|
||||
|
||||
if effective_strategy == "binary":
|
||||
return self.binary_cascade_search(query, source_path, k, coarse_k, options)
|
||||
elif effective_strategy == "binary_rerank":
|
||||
|
||||
140
codex-lens/tests/parsers/test_comparison_js_ts.py
Normal file
140
codex-lens/tests/parsers/test_comparison_js_ts.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""Comparison tests for tree-sitter vs ast-grep JS/TS relationship extraction.
|
||||
|
||||
These tests focus on stable, high-signal relationship types used by the
|
||||
static graph pipeline:
|
||||
- IMPORTS
|
||||
- INHERITS
|
||||
|
||||
If ast-grep-py is not installed, tests are skipped.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Set, Tuple
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.entities import CodeRelationship, RelationshipType
|
||||
from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser
|
||||
|
||||
|
||||
SAMPLE_JS_CODE = """
|
||||
import React, { useEffect as useEf } from "react";
|
||||
import { foo } from "./foo";
|
||||
import "./styles.css";
|
||||
const fs = require("fs");
|
||||
|
||||
class Base {}
|
||||
class Child extends Base {
|
||||
method() {
|
||||
console.log("hi");
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
SAMPLE_TS_CODE = """
|
||||
import type { Foo } from "pkg";
|
||||
import { bar as baz } from "./bar";
|
||||
|
||||
interface MyInterface extends Foo {}
|
||||
|
||||
class Base {}
|
||||
class Child extends Base {}
|
||||
"""
|
||||
|
||||
|
||||
def extract_relationship_tuples(
|
||||
relationships: List[CodeRelationship],
|
||||
*,
|
||||
only_types: Set[RelationshipType],
|
||||
) -> Set[Tuple[str, str, str]]:
|
||||
return {
|
||||
(rel.source_symbol, rel.target_symbol, rel.relationship_type.value)
|
||||
for rel in relationships
|
||||
if rel.relationship_type in only_types
|
||||
}
|
||||
|
||||
|
||||
def _skip_if_astgrep_unavailable(parser: TreeSitterSymbolParser) -> None:
|
||||
if parser._astgrep_processor is None or not parser._astgrep_processor.is_available(): # type: ignore[attr-defined]
|
||||
pytest.skip("ast-grep-py not installed or language not supported")
|
||||
|
||||
|
||||
def test_js_imports_and_inherits_match(tmp_path: Path) -> None:
|
||||
js_file = tmp_path / "sample.js"
|
||||
js_file.write_text(SAMPLE_JS_CODE, encoding="utf-8")
|
||||
source = js_file.read_text(encoding="utf-8")
|
||||
|
||||
config_default = Config()
|
||||
config_default.use_astgrep = False
|
||||
ts_default = TreeSitterSymbolParser("javascript", js_file, config=config_default)
|
||||
|
||||
config_ast = Config()
|
||||
config_ast.use_astgrep = True
|
||||
ts_ast = TreeSitterSymbolParser("javascript", js_file, config=config_ast)
|
||||
_skip_if_astgrep_unavailable(ts_ast)
|
||||
|
||||
result_ts = ts_default.parse(source, js_file)
|
||||
result_ast = ts_ast.parse(source, js_file)
|
||||
|
||||
assert result_ts is not None
|
||||
assert result_ast is not None
|
||||
|
||||
ts_rel = extract_relationship_tuples(
|
||||
result_ts.relationships,
|
||||
only_types={RelationshipType.IMPORTS, RelationshipType.INHERITS},
|
||||
)
|
||||
ast_rel = extract_relationship_tuples(
|
||||
result_ast.relationships,
|
||||
only_types={RelationshipType.IMPORTS, RelationshipType.INHERITS},
|
||||
)
|
||||
|
||||
assert ast_rel == ts_rel
|
||||
|
||||
|
||||
def test_ts_imports_match_and_inherits_superset(tmp_path: Path) -> None:
|
||||
ts_file = tmp_path / "sample.ts"
|
||||
ts_file.write_text(SAMPLE_TS_CODE, encoding="utf-8")
|
||||
source = ts_file.read_text(encoding="utf-8")
|
||||
|
||||
config_default = Config()
|
||||
config_default.use_astgrep = False
|
||||
ts_default = TreeSitterSymbolParser("typescript", ts_file, config=config_default)
|
||||
|
||||
config_ast = Config()
|
||||
config_ast.use_astgrep = True
|
||||
ts_ast = TreeSitterSymbolParser("typescript", ts_file, config=config_ast)
|
||||
_skip_if_astgrep_unavailable(ts_ast)
|
||||
|
||||
result_ts = ts_default.parse(source, ts_file)
|
||||
result_ast = ts_ast.parse(source, ts_file)
|
||||
|
||||
assert result_ts is not None
|
||||
assert result_ast is not None
|
||||
|
||||
ts_imports = extract_relationship_tuples(
|
||||
result_ts.relationships,
|
||||
only_types={RelationshipType.IMPORTS},
|
||||
)
|
||||
ast_imports = extract_relationship_tuples(
|
||||
result_ast.relationships,
|
||||
only_types={RelationshipType.IMPORTS},
|
||||
)
|
||||
assert ast_imports == ts_imports
|
||||
|
||||
ts_inherits = extract_relationship_tuples(
|
||||
result_ts.relationships,
|
||||
only_types={RelationshipType.INHERITS},
|
||||
)
|
||||
ast_inherits = extract_relationship_tuples(
|
||||
result_ast.relationships,
|
||||
only_types={RelationshipType.INHERITS},
|
||||
)
|
||||
# Ast-grep may include additional TypeScript inheritance edges (e.g., interface extends).
|
||||
assert ts_inherits.issubset(ast_inherits)
|
||||
# But at minimum, class inheritance should be present.
|
||||
assert ("Child", "Base", "inherits") in ast_inherits
|
||||
|
||||
@@ -84,6 +84,21 @@ class TestConfigCascadeDefaults:
|
||||
# Should keep the default "binary" strategy
|
||||
assert config.cascade_strategy == "binary"
|
||||
|
||||
def test_hybrid_cascade_strategy_alias_maps_to_binary_rerank(self, temp_config_dir):
|
||||
"""Hybrid is a backward-compat alias for binary_rerank."""
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
settings = {"cascade": {"strategy": "hybrid"}}
|
||||
|
||||
settings_path = config.settings_path
|
||||
settings_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(settings_path, "w", encoding="utf-8") as f:
|
||||
json.dump(settings, f)
|
||||
|
||||
with patch.object(config, "_apply_env_overrides"):
|
||||
config.load_settings()
|
||||
|
||||
assert config.cascade_strategy == "binary_rerank"
|
||||
|
||||
def test_staged_config_defaults(self, temp_config_dir):
|
||||
"""Staged cascade settings should have correct defaults."""
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
|
||||
@@ -115,3 +115,22 @@ def test_staged_env_overrides_invalid_ignored(temp_config_dir: Path) -> None:
|
||||
assert config.staged_stage2_mode == "precomputed"
|
||||
assert config.staged_clustering_strategy == "auto"
|
||||
assert config.staged_realtime_lsp_timeout_s == 30.0
|
||||
|
||||
|
||||
def test_cascade_strategy_hybrid_alias_env_override(temp_config_dir: Path) -> None:
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
|
||||
env_path = temp_config_dir / ".env"
|
||||
env_path.write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"CASCADE_STRATEGY=hybrid",
|
||||
"",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
config.load_settings()
|
||||
|
||||
assert config.cascade_strategy == "binary_rerank"
|
||||
|
||||
Reference in New Issue
Block a user