mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-28 09:23:08 +08:00
feat: Add templates for epics, product brief, and requirements PRD
- Introduced a comprehensive template for generating epics and stories, including an index and individual epic files. - Created a product brief template to outline product vision, problem statements, and target users. - Developed a requirements PRD template to structure functional and non-functional requirements, including traceability and prioritization. - Implemented ast-grep processors for JavaScript and TypeScript to extract relationships such as imports and inheritance. - Added corresponding patterns for JavaScript and TypeScript to support relationship extraction. - Established comparison tests to validate the accuracy of relationship extraction between tree-sitter and ast-grep methods.
This commit is contained in:
@@ -140,7 +140,7 @@ class Config:
|
||||
enable_cascade_search: bool = False # Enable cascade search (coarse + fine ranking)
|
||||
cascade_coarse_k: int = 100 # Number of coarse candidates from first stage
|
||||
cascade_fine_k: int = 10 # Number of final results after reranking
|
||||
cascade_strategy: str = "binary" # "binary", "binary_rerank", "dense_rerank", or "staged"
|
||||
cascade_strategy: str = "binary" # "binary", "binary_rerank" (alias: "hybrid"), "dense_rerank", or "staged"
|
||||
|
||||
# Staged cascade search configuration (4-stage pipeline)
|
||||
staged_coarse_k: int = 200 # Number of coarse candidates from Stage 1 binary search
|
||||
@@ -190,7 +190,7 @@ class Config:
|
||||
chars_per_token_estimate: int = 4 # Characters per token estimation ratio
|
||||
|
||||
# Parser configuration
|
||||
use_astgrep: bool = False # Use ast-grep for Python relationship extraction (tree-sitter is default)
|
||||
use_astgrep: bool = False # Use ast-grep for relationship extraction (Python/JS/TS); tree-sitter is default
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
try:
|
||||
@@ -408,14 +408,18 @@ class Config:
|
||||
# Load cascade settings
|
||||
cascade = settings.get("cascade", {})
|
||||
if "strategy" in cascade:
|
||||
strategy = cascade["strategy"]
|
||||
raw_strategy = cascade["strategy"]
|
||||
strategy = str(raw_strategy).strip().lower()
|
||||
if strategy in {"binary", "binary_rerank", "dense_rerank", "staged"}:
|
||||
self.cascade_strategy = strategy
|
||||
elif strategy == "hybrid":
|
||||
self.cascade_strategy = "binary_rerank"
|
||||
log.debug("Mapping cascade strategy 'hybrid' -> 'binary_rerank'")
|
||||
else:
|
||||
log.warning(
|
||||
"Invalid cascade strategy in %s: %r (expected 'binary', 'binary_rerank', 'dense_rerank', or 'staged')",
|
||||
self.settings_path,
|
||||
strategy,
|
||||
raw_strategy,
|
||||
)
|
||||
if "coarse_k" in cascade:
|
||||
self.cascade_coarse_k = cascade["coarse_k"]
|
||||
@@ -522,6 +526,9 @@ class Config:
|
||||
if strategy in {"binary", "binary_rerank", "dense_rerank", "staged"}:
|
||||
self.cascade_strategy = strategy
|
||||
log.debug("Overriding cascade_strategy from .env: %s", self.cascade_strategy)
|
||||
elif strategy == "hybrid":
|
||||
self.cascade_strategy = "binary_rerank"
|
||||
log.debug("Overriding cascade_strategy from .env: %s", self.cascade_strategy)
|
||||
else:
|
||||
log.warning("Invalid CASCADE_STRATEGY in .env: %r", cascade_strategy)
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ ENV_VARS = {
|
||||
"CODEXLENS_DEBUG": "Enable debug mode (true/false)",
|
||||
# Cascade / staged pipeline configuration
|
||||
"ENABLE_CASCADE_SEARCH": "Enable cascade search (true/false)",
|
||||
"CASCADE_STRATEGY": "Cascade strategy: binary, binary_rerank, dense_rerank, staged",
|
||||
"CASCADE_STRATEGY": "Cascade strategy: binary, binary_rerank (alias: hybrid), dense_rerank, staged",
|
||||
"CASCADE_COARSE_K": "Cascade coarse_k candidate count (int)",
|
||||
"CASCADE_FINE_K": "Cascade fine_k result count (int)",
|
||||
"STAGED_STAGE2_MODE": "Staged Stage 2 mode: precomputed, realtime",
|
||||
|
||||
289
codex-lens/src/codexlens/parsers/astgrep_js_ts_processor.py
Normal file
289
codex-lens/src/codexlens/parsers/astgrep_js_ts_processor.py
Normal file
@@ -0,0 +1,289 @@
|
||||
"""Ast-grep processors for JavaScript/TypeScript relationship extraction.
|
||||
|
||||
These processors are intentionally narrower than the tree-sitter relationship
|
||||
extractor: they focus on stable, high-signal edges for static graph usage:
|
||||
- IMPORTS: ES module imports + CommonJS require() (string literal only)
|
||||
- INHERITS: class/interface extends
|
||||
|
||||
They are used when Config.use_astgrep is True.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Callable, Iterable, List, Optional, Sequence, Set, Tuple
|
||||
|
||||
from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType
|
||||
from codexlens.parsers.astgrep_processor import BaseAstGrepProcessor
|
||||
|
||||
|
||||
_IDENT_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*$")
|
||||
|
||||
|
||||
def _strip_quotes(value: str) -> str:
|
||||
value = (value or "").strip()
|
||||
if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"', "`"}:
|
||||
return value[1:-1]
|
||||
return value
|
||||
|
||||
|
||||
def _module_from_literal(raw: str) -> str:
|
||||
raw = (raw or "").strip()
|
||||
if not raw:
|
||||
return ""
|
||||
unquoted = _strip_quotes(raw)
|
||||
# Only accept string literal forms (tree-sitter extractor does the same).
|
||||
if unquoted == raw:
|
||||
return ""
|
||||
return unquoted.strip()
|
||||
|
||||
|
||||
def _extract_named_imports(raw: str) -> List[str]:
|
||||
raw = (raw or "").strip()
|
||||
if not raw:
|
||||
return []
|
||||
|
||||
# Normalize any surrounding braces the match might include.
|
||||
if raw.startswith("{") and raw.endswith("}"):
|
||||
raw = raw[1:-1].strip()
|
||||
|
||||
# Split by commas at top-level; named imports do not nest in JS/TS syntax.
|
||||
parts = [p.strip() for p in raw.split(",") if p.strip()]
|
||||
names: List[str] = []
|
||||
for part in parts:
|
||||
# TS: "type Foo" inside braces
|
||||
if part.startswith("type "):
|
||||
part = part[5:].strip()
|
||||
# Handle `foo as bar` (TS) / `foo as bar` (proposed) / `foo as bar`-style text.
|
||||
if " as " in part:
|
||||
part = part.split(" as ", 1)[0].strip()
|
||||
if _IDENT_RE.match(part):
|
||||
names.append(part)
|
||||
return names
|
||||
|
||||
|
||||
def _dedupe_relationships(rels: Sequence[CodeRelationship]) -> List[CodeRelationship]:
|
||||
seen: Set[Tuple[str, str, str]] = set()
|
||||
out: List[CodeRelationship] = []
|
||||
for r in rels:
|
||||
key = (r.source_symbol, r.target_symbol, r.relationship_type.value)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(r)
|
||||
return out
|
||||
|
||||
|
||||
class _AstGrepJsTsProcessor(BaseAstGrepProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
language_id: str,
|
||||
*,
|
||||
path: Optional[Path] = None,
|
||||
get_pattern: Callable[[str], str],
|
||||
) -> None:
|
||||
super().__init__(language_id, path)
|
||||
self._get_pattern = get_pattern
|
||||
|
||||
def parse(self, text: str, path: Path) -> Optional[IndexedFile]:
|
||||
if not self.is_available():
|
||||
return None
|
||||
|
||||
try:
|
||||
relationships = self._extract_relationships(text, path)
|
||||
return IndexedFile(
|
||||
path=str(path.resolve()),
|
||||
language=self.language_id,
|
||||
symbols=[],
|
||||
chunks=[],
|
||||
relationships=relationships,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def process_matches( # type: ignore[override]
|
||||
self,
|
||||
matches, # SgNode list (runtime-only type)
|
||||
source_code: str,
|
||||
path: Path,
|
||||
) -> List[CodeRelationship]:
|
||||
# Not used by the current JS/TS processors; keep the interface for parity.
|
||||
_ = (matches, source_code, path)
|
||||
return []
|
||||
|
||||
def _extract_relationships(self, source_code: str, path: Path) -> List[CodeRelationship]:
|
||||
source_file = str(path.resolve())
|
||||
rels: List[CodeRelationship] = []
|
||||
|
||||
rels.extend(self._extract_imports(source_code, source_file=source_file))
|
||||
rels.extend(self._extract_inherits(source_code, source_file=source_file))
|
||||
|
||||
return _dedupe_relationships(rels)
|
||||
|
||||
def _extract_imports(self, source_code: str, *, source_file: str) -> List[CodeRelationship]:
|
||||
rels: List[CodeRelationship] = []
|
||||
|
||||
def record(module_name: str, line: int) -> None:
|
||||
if not module_name:
|
||||
return
|
||||
rels.append(
|
||||
CodeRelationship(
|
||||
source_symbol="<module>",
|
||||
target_symbol=module_name,
|
||||
relationship_type=RelationshipType.IMPORTS,
|
||||
source_file=source_file,
|
||||
target_file=None,
|
||||
source_line=line,
|
||||
)
|
||||
)
|
||||
|
||||
# Any `import ... from "mod"` form
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_from")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if mod:
|
||||
record(mod, self._get_line_number(node))
|
||||
|
||||
# Side-effect import: import "mod"
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_side_effect")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if mod:
|
||||
record(mod, self._get_line_number(node))
|
||||
|
||||
# Named imports (named-only): import { a, b as c } from "mod"
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_named_only")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if not mod:
|
||||
continue
|
||||
raw_names = self._get_match(node, "NAMES")
|
||||
for name in _extract_named_imports(raw_names):
|
||||
record(f"{mod}.{name}", self._get_line_number(node))
|
||||
|
||||
# Named imports (default + named): import X, { a, b as c } from "mod"
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_default_named")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if not mod:
|
||||
continue
|
||||
raw_names = self._get_match(node, "NAMES")
|
||||
for name in _extract_named_imports(raw_names):
|
||||
record(f"{mod}.{name}", self._get_line_number(node))
|
||||
|
||||
# CommonJS require("mod") (string literal only)
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("require_call")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if mod:
|
||||
record(mod, self._get_line_number(node))
|
||||
|
||||
return rels
|
||||
|
||||
def _extract_inherits(self, source_code: str, *, source_file: str) -> List[CodeRelationship]:
|
||||
rels: List[CodeRelationship] = []
|
||||
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("class_extends")):
|
||||
class_name = (self._get_match(node, "NAME") or "").strip()
|
||||
base_raw = (self._get_match(node, "BASE") or "").strip()
|
||||
if not class_name or not base_raw:
|
||||
continue
|
||||
base = base_raw.split("<", 1)[0].strip()
|
||||
if not base:
|
||||
continue
|
||||
rels.append(
|
||||
CodeRelationship(
|
||||
source_symbol=class_name,
|
||||
target_symbol=base,
|
||||
relationship_type=RelationshipType.INHERITS,
|
||||
source_file=source_file,
|
||||
target_file=None,
|
||||
source_line=self._get_line_number(node),
|
||||
)
|
||||
)
|
||||
|
||||
return rels
|
||||
|
||||
|
||||
class AstGrepJavaScriptProcessor(_AstGrepJsTsProcessor):
|
||||
def __init__(self, path: Optional[Path] = None) -> None:
|
||||
from codexlens.parsers.patterns.javascript import get_pattern as get_js_pattern
|
||||
|
||||
super().__init__("javascript", path=path, get_pattern=get_js_pattern)
|
||||
|
||||
|
||||
class AstGrepTypeScriptProcessor(_AstGrepJsTsProcessor):
|
||||
def __init__(self, path: Optional[Path] = None) -> None:
|
||||
from codexlens.parsers.patterns.typescript import get_pattern as get_ts_pattern
|
||||
|
||||
super().__init__("typescript", path=path, get_pattern=get_ts_pattern)
|
||||
|
||||
def _extract_inherits(self, source_code: str, *, source_file: str) -> List[CodeRelationship]:
|
||||
rels = super()._extract_inherits(source_code, source_file=source_file)
|
||||
|
||||
# Interface extends: interface Foo extends Bar {}
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("interface_extends")):
|
||||
name = (self._get_match(node, "NAME") or "").strip()
|
||||
base_raw = (self._get_match(node, "BASE") or "").strip()
|
||||
if not name or not base_raw:
|
||||
continue
|
||||
base = base_raw.split("<", 1)[0].strip()
|
||||
if not base:
|
||||
continue
|
||||
rels.append(
|
||||
CodeRelationship(
|
||||
source_symbol=name,
|
||||
target_symbol=base,
|
||||
relationship_type=RelationshipType.INHERITS,
|
||||
source_file=source_file,
|
||||
target_file=None,
|
||||
source_line=self._get_line_number(node),
|
||||
)
|
||||
)
|
||||
|
||||
return _dedupe_relationships(rels)
|
||||
|
||||
def _extract_imports(self, source_code: str, *, source_file: str) -> List[CodeRelationship]:
|
||||
# Reuse JS logic for standard imports
|
||||
rels = super()._extract_imports(source_code, source_file=source_file)
|
||||
|
||||
def record(module_name: str, line: int) -> None:
|
||||
if not module_name:
|
||||
return
|
||||
rels.append(
|
||||
CodeRelationship(
|
||||
source_symbol="<module>",
|
||||
target_symbol=module_name,
|
||||
relationship_type=RelationshipType.IMPORTS,
|
||||
source_file=source_file,
|
||||
target_file=None,
|
||||
source_line=line,
|
||||
)
|
||||
)
|
||||
|
||||
# Type-only imports: import type ... from "mod"
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_from")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if mod:
|
||||
record(mod, self._get_line_number(node))
|
||||
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_named_only")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if not mod:
|
||||
continue
|
||||
raw_names = self._get_match(node, "NAMES")
|
||||
for name in _extract_named_imports(raw_names):
|
||||
record(f"{mod}.{name}", self._get_line_number(node))
|
||||
|
||||
for node in self.run_ast_grep(source_code, self._get_pattern("import_type_default_named")):
|
||||
mod = _module_from_literal(self._get_match(node, "MODULE"))
|
||||
if not mod:
|
||||
continue
|
||||
raw_names = self._get_match(node, "NAMES")
|
||||
for name in _extract_named_imports(raw_names):
|
||||
record(f"{mod}.{name}", self._get_line_number(node))
|
||||
|
||||
return _dedupe_relationships(rels)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AstGrepJavaScriptProcessor",
|
||||
"AstGrepTypeScriptProcessor",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
"""JavaScript ast-grep patterns for relationship extraction.
|
||||
|
||||
These patterns are used by CodexLens' optional ast-grep processors to extract:
|
||||
- IMPORTS: ES module imports + CommonJS require()
|
||||
- INHERITS: class extends relationships
|
||||
|
||||
Pattern Syntax (ast-grep-py 0.40+):
|
||||
$VAR - Single metavariable (matches one AST node)
|
||||
$$$VAR - Multiple metavariable (matches zero or more nodes)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
JAVASCRIPT_PATTERNS: Dict[str, str] = {
|
||||
# ES module imports
|
||||
# import React from "react"
|
||||
# import React, { useEffect } from "react"
|
||||
# import { useEffect } from "react"
|
||||
# import * as fs from "fs"
|
||||
"import_from": "import $$$IMPORTS from $MODULE",
|
||||
"import_named_only": "import {$$$NAMES} from $MODULE",
|
||||
"import_default_named": "import $DEFAULT, {$$$NAMES} from $MODULE",
|
||||
# Side-effect import: import "./styles.css"
|
||||
"import_side_effect": "import $MODULE",
|
||||
|
||||
# CommonJS require(): const fs = require("fs")
|
||||
"require_call": "require($MODULE)",
|
||||
|
||||
# Class inheritance: class Child extends Base {}
|
||||
"class_extends": "class $NAME extends $BASE $$$BODY",
|
||||
}
|
||||
|
||||
|
||||
METAVARS = {
|
||||
"module": "MODULE",
|
||||
"import_names": "NAMES",
|
||||
"import_default": "DEFAULT",
|
||||
"class_name": "NAME",
|
||||
"class_base": "BASE",
|
||||
}
|
||||
|
||||
|
||||
RELATIONSHIP_PATTERNS: Dict[str, List[str]] = {
|
||||
"imports": [
|
||||
"import_from",
|
||||
"import_named_only",
|
||||
"import_default_named",
|
||||
"import_side_effect",
|
||||
"require_call",
|
||||
],
|
||||
"inheritance": ["class_extends"],
|
||||
}
|
||||
|
||||
|
||||
def get_pattern(pattern_name: str) -> str:
|
||||
if pattern_name not in JAVASCRIPT_PATTERNS:
|
||||
raise KeyError(
|
||||
f"Unknown JS pattern: {pattern_name}. Available: {list(JAVASCRIPT_PATTERNS.keys())}"
|
||||
)
|
||||
return JAVASCRIPT_PATTERNS[pattern_name]
|
||||
|
||||
|
||||
def get_patterns_for_relationship(rel_type: str) -> List[str]:
|
||||
return RELATIONSHIP_PATTERNS.get(rel_type, [])
|
||||
|
||||
|
||||
def get_metavar(name: str) -> str:
|
||||
return METAVARS.get(name, name.upper())
|
||||
|
||||
|
||||
__all__ = [
|
||||
"JAVASCRIPT_PATTERNS",
|
||||
"METAVARS",
|
||||
"RELATIONSHIP_PATTERNS",
|
||||
"get_pattern",
|
||||
"get_patterns_for_relationship",
|
||||
"get_metavar",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
"""TypeScript ast-grep patterns for relationship extraction.
|
||||
|
||||
This module extends the JavaScript patterns with TypeScript-specific syntax
|
||||
such as `import type` and `interface ... extends ...`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from codexlens.parsers.patterns.javascript import (
|
||||
METAVARS,
|
||||
RELATIONSHIP_PATTERNS as _JS_RELATIONSHIP_PATTERNS,
|
||||
JAVASCRIPT_PATTERNS,
|
||||
)
|
||||
|
||||
|
||||
TYPESCRIPT_PATTERNS: Dict[str, str] = {
|
||||
**JAVASCRIPT_PATTERNS,
|
||||
# Type-only imports
|
||||
"import_type_from": "import type $$$IMPORTS from $MODULE",
|
||||
"import_type_named_only": "import type {$$$NAMES} from $MODULE",
|
||||
"import_type_default_named": "import type $DEFAULT, {$$$NAMES} from $MODULE",
|
||||
# Interface inheritance: interface Foo extends Bar {}
|
||||
"interface_extends": "interface $NAME extends $BASE $$$BODY",
|
||||
}
|
||||
|
||||
|
||||
RELATIONSHIP_PATTERNS: Dict[str, List[str]] = {
|
||||
**_JS_RELATIONSHIP_PATTERNS,
|
||||
"imports": [
|
||||
*_JS_RELATIONSHIP_PATTERNS.get("imports", []),
|
||||
"import_type_from",
|
||||
"import_type_named_only",
|
||||
"import_type_default_named",
|
||||
],
|
||||
"inheritance": [
|
||||
*_JS_RELATIONSHIP_PATTERNS.get("inheritance", []),
|
||||
"interface_extends",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def get_pattern(pattern_name: str) -> str:
|
||||
if pattern_name not in TYPESCRIPT_PATTERNS:
|
||||
raise KeyError(
|
||||
f"Unknown TS pattern: {pattern_name}. Available: {list(TYPESCRIPT_PATTERNS.keys())}"
|
||||
)
|
||||
return TYPESCRIPT_PATTERNS[pattern_name]
|
||||
|
||||
|
||||
def get_patterns_for_relationship(rel_type: str) -> List[str]:
|
||||
return RELATIONSHIP_PATTERNS.get(rel_type, [])
|
||||
|
||||
|
||||
def get_metavar(name: str) -> str:
|
||||
return METAVARS.get(name, name.upper())
|
||||
|
||||
|
||||
__all__ = [
|
||||
"TYPESCRIPT_PATTERNS",
|
||||
"METAVARS",
|
||||
"RELATIONSHIP_PATTERNS",
|
||||
"get_pattern",
|
||||
"get_patterns_for_relationship",
|
||||
"get_metavar",
|
||||
]
|
||||
|
||||
@@ -34,8 +34,9 @@ if TYPE_CHECKING:
|
||||
class TreeSitterSymbolParser:
|
||||
"""Parser using tree-sitter for AST-level symbol extraction.
|
||||
|
||||
Supports optional ast-grep integration for Python relationship extraction
|
||||
when config.use_astgrep is True and ast-grep-py is available.
|
||||
Supports optional ast-grep integration for relationship extraction
|
||||
(Python/JavaScript/TypeScript) when config.use_astgrep is True and
|
||||
ast-grep-py is available.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -105,24 +106,33 @@ class TreeSitterSymbolParser:
|
||||
"""Check if ast-grep should be used for relationship extraction.
|
||||
|
||||
Returns:
|
||||
True if config.use_astgrep is True and language is Python
|
||||
True if config.use_astgrep is True and language is supported
|
||||
"""
|
||||
if self._config is None:
|
||||
return False
|
||||
if not getattr(self._config, "use_astgrep", False):
|
||||
return False
|
||||
return self.language_id == "python"
|
||||
return self.language_id in {"python", "javascript", "typescript"}
|
||||
|
||||
def _initialize_astgrep_processor(self) -> None:
|
||||
"""Initialize ast-grep processor for Python relationship extraction."""
|
||||
"""Initialize ast-grep processor for relationship extraction."""
|
||||
try:
|
||||
from codexlens.parsers.astgrep_processor import (
|
||||
AstGrepPythonProcessor,
|
||||
is_astgrep_processor_available,
|
||||
)
|
||||
from codexlens.parsers.astgrep_js_ts_processor import (
|
||||
AstGrepJavaScriptProcessor,
|
||||
AstGrepTypeScriptProcessor,
|
||||
)
|
||||
|
||||
if is_astgrep_processor_available():
|
||||
self._astgrep_processor = AstGrepPythonProcessor(self.path)
|
||||
if self.language_id == "python":
|
||||
self._astgrep_processor = AstGrepPythonProcessor(self.path)
|
||||
elif self.language_id == "javascript":
|
||||
self._astgrep_processor = AstGrepJavaScriptProcessor(self.path)
|
||||
elif self.language_id == "typescript":
|
||||
self._astgrep_processor = AstGrepTypeScriptProcessor(self.path)
|
||||
except ImportError:
|
||||
self._astgrep_processor = None
|
||||
|
||||
@@ -222,9 +232,9 @@ class TreeSitterSymbolParser:
|
||||
path: Path,
|
||||
source_code: Optional[str] = None,
|
||||
) -> List[CodeRelationship]:
|
||||
"""Extract relationships, optionally using ast-grep for Python.
|
||||
"""Extract relationships, optionally using ast-grep.
|
||||
|
||||
When config.use_astgrep is True and ast-grep is available for Python,
|
||||
When config.use_astgrep is True and an ast-grep processor is available,
|
||||
uses ast-grep for relationship extraction. Otherwise, uses tree-sitter.
|
||||
|
||||
Args:
|
||||
@@ -236,32 +246,31 @@ class TreeSitterSymbolParser:
|
||||
Returns:
|
||||
List of extracted relationships
|
||||
"""
|
||||
# Try ast-grep first if configured and available for this language.
|
||||
if self._astgrep_processor is not None and source_code is not None:
|
||||
try:
|
||||
astgrep_rels = self._extract_relationships_astgrep(source_code, path)
|
||||
if astgrep_rels is not None:
|
||||
return astgrep_rels
|
||||
except Exception:
|
||||
# Fall back to tree-sitter on ast-grep failure
|
||||
pass
|
||||
|
||||
if self.language_id == "python":
|
||||
# Try ast-grep first if configured and available
|
||||
if self._astgrep_processor is not None and source_code is not None:
|
||||
try:
|
||||
astgrep_rels = self._extract_python_relationships_astgrep(
|
||||
source_code, path
|
||||
)
|
||||
if astgrep_rels is not None:
|
||||
return astgrep_rels
|
||||
except Exception:
|
||||
# Fall back to tree-sitter on ast-grep failure
|
||||
pass
|
||||
return self._extract_python_relationships(source_bytes, root, path)
|
||||
if self.language_id in {"javascript", "typescript"}:
|
||||
return self._extract_js_ts_relationships(source_bytes, root, path)
|
||||
return []
|
||||
|
||||
def _extract_python_relationships_astgrep(
|
||||
def _extract_relationships_astgrep(
|
||||
self,
|
||||
source_code: str,
|
||||
path: Path,
|
||||
) -> Optional[List[CodeRelationship]]:
|
||||
"""Extract Python relationships using ast-grep processor.
|
||||
"""Extract relationships using ast-grep processor.
|
||||
|
||||
Args:
|
||||
source_code: Python source code text
|
||||
source_code: Source code text
|
||||
path: File path
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -694,13 +694,14 @@ class ChainSearchEngine:
|
||||
k: int = 10,
|
||||
coarse_k: int = 100,
|
||||
options: Optional[SearchOptions] = None,
|
||||
strategy: Optional[Literal["binary", "binary_rerank", "dense_rerank", "staged"]] = None,
|
||||
strategy: Optional[Literal["binary", "binary_rerank", "dense_rerank", "staged", "hybrid"]] = None,
|
||||
) -> ChainSearchResult:
|
||||
"""Unified cascade search entry point with strategy selection.
|
||||
|
||||
Provides a single interface for cascade search with configurable strategy:
|
||||
- "binary": Uses binary vector coarse ranking + dense fine ranking (fastest)
|
||||
- "binary_rerank": Uses binary vector coarse ranking + cross-encoder reranking (best balance)
|
||||
- "hybrid": Alias for "binary_rerank" (backward compat)
|
||||
- "dense_rerank": Uses dense vector coarse ranking + cross-encoder reranking
|
||||
- "staged": 4-stage pipeline: binary -> LSP expand -> clustering -> optional rerank
|
||||
|
||||
@@ -731,7 +732,7 @@ class ChainSearchEngine:
|
||||
"""
|
||||
# Strategy priority: parameter > config > default
|
||||
effective_strategy = strategy
|
||||
valid_strategies = ("binary", "binary_rerank", "dense_rerank", "staged")
|
||||
valid_strategies = ("binary", "binary_rerank", "dense_rerank", "staged", "hybrid")
|
||||
if effective_strategy is None:
|
||||
# Not passed via parameter, check config
|
||||
if self._config is not None:
|
||||
@@ -743,6 +744,10 @@ class ChainSearchEngine:
|
||||
if effective_strategy not in valid_strategies:
|
||||
effective_strategy = "binary"
|
||||
|
||||
# Normalize backward-compat alias
|
||||
if effective_strategy == "hybrid":
|
||||
effective_strategy = "binary_rerank"
|
||||
|
||||
if effective_strategy == "binary":
|
||||
return self.binary_cascade_search(query, source_path, k, coarse_k, options)
|
||||
elif effective_strategy == "binary_rerank":
|
||||
|
||||
Reference in New Issue
Block a user