mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
feat: Add support for Tree-Sitter parsing and enhance SQLite storage performance
This commit is contained in:
@@ -17,6 +17,9 @@ dependencies = [
|
|||||||
"rich>=13",
|
"rich>=13",
|
||||||
"pydantic>=2.0",
|
"pydantic>=2.0",
|
||||||
"tree-sitter>=0.20",
|
"tree-sitter>=0.20",
|
||||||
|
"tree-sitter-python>=0.25",
|
||||||
|
"tree-sitter-javascript>=0.25",
|
||||||
|
"tree-sitter-typescript>=0.23",
|
||||||
"pathspec>=0.11",
|
"pathspec>=0.11",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -24,6 +27,7 @@ dependencies = [
|
|||||||
semantic = [
|
semantic = [
|
||||||
"numpy>=1.24",
|
"numpy>=1.24",
|
||||||
"sentence-transformers>=2.2",
|
"sentence-transformers>=2.2",
|
||||||
|
"fastembed>=0.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
@@ -31,4 +35,3 @@ Homepage = "https://github.com/openai/codex-lens"
|
|||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
package-dir = { "" = "src" }
|
package-dir = { "" = "src" }
|
||||||
|
|
||||||
|
|||||||
@@ -62,25 +62,42 @@ def _iter_source_files(
|
|||||||
languages: Optional[List[str]] = None,
|
languages: Optional[List[str]] = None,
|
||||||
) -> Iterable[Path]:
|
) -> Iterable[Path]:
|
||||||
ignore_dirs = {".git", ".venv", "venv", "node_modules", "__pycache__", ".codexlens"}
|
ignore_dirs = {".git", ".venv", "venv", "node_modules", "__pycache__", ".codexlens"}
|
||||||
ignore_patterns = _load_gitignore(base_path)
|
|
||||||
pathspec = None
|
# Cache for PathSpec objects per directory
|
||||||
if ignore_patterns:
|
pathspec_cache: Dict[Path, Optional[Any]] = {}
|
||||||
|
|
||||||
|
def get_pathspec_for_dir(dir_path: Path) -> Optional[Any]:
|
||||||
|
"""Get PathSpec for a directory, loading .gitignore if present."""
|
||||||
|
if dir_path in pathspec_cache:
|
||||||
|
return pathspec_cache[dir_path]
|
||||||
|
|
||||||
|
ignore_patterns = _load_gitignore(dir_path)
|
||||||
|
if not ignore_patterns:
|
||||||
|
pathspec_cache[dir_path] = None
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from pathspec import PathSpec
|
from pathspec import PathSpec
|
||||||
from pathspec.patterns.gitwildmatch import GitWildMatchPattern
|
from pathspec.patterns.gitwildmatch import GitWildMatchPattern
|
||||||
|
|
||||||
pathspec = PathSpec.from_lines(GitWildMatchPattern, ignore_patterns)
|
pathspec = PathSpec.from_lines(GitWildMatchPattern, ignore_patterns)
|
||||||
|
pathspec_cache[dir_path] = pathspec
|
||||||
|
return pathspec
|
||||||
except Exception:
|
except Exception:
|
||||||
pathspec = None
|
pathspec_cache[dir_path] = None
|
||||||
|
return None
|
||||||
|
|
||||||
for root, dirs, files in os.walk(base_path):
|
for root, dirs, files in os.walk(base_path):
|
||||||
dirs[:] = [d for d in dirs if d not in ignore_dirs and not d.startswith(".")]
|
dirs[:] = [d for d in dirs if d not in ignore_dirs and not d.startswith(".")]
|
||||||
root_path = Path(root)
|
root_path = Path(root)
|
||||||
|
|
||||||
|
# Get pathspec for current directory
|
||||||
|
pathspec = get_pathspec_for_dir(root_path)
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
if file.startswith("."):
|
if file.startswith("."):
|
||||||
continue
|
continue
|
||||||
full_path = root_path / file
|
full_path = root_path / file
|
||||||
rel = full_path.relative_to(base_path)
|
rel = full_path.relative_to(root_path)
|
||||||
if pathspec and pathspec.match_file(str(rel)):
|
if pathspec and pathspec.match_file(str(rel)):
|
||||||
continue
|
continue
|
||||||
language_id = config.language_for_path(full_path)
|
language_id = config.language_for_path(full_path)
|
||||||
@@ -112,6 +129,25 @@ def _get_store_for_path(path: Path, use_global: bool = False) -> tuple[SQLiteSto
|
|||||||
return SQLiteStore(config.db_path), config.db_path
|
return SQLiteStore(config.db_path), config.db_path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _is_safe_to_clean(target_dir: Path) -> bool:
|
||||||
|
"""Verify directory is a CodexLens directory before deletion.
|
||||||
|
|
||||||
|
Checks for presence of .codexlens directory or index.db file.
|
||||||
|
"""
|
||||||
|
if not target_dir.exists():
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check if it's the .codexlens directory itself
|
||||||
|
if target_dir.name == ".codexlens":
|
||||||
|
# Verify it contains index.db or cache directory
|
||||||
|
return (target_dir / "index.db").exists() or (target_dir / "cache").exists()
|
||||||
|
|
||||||
|
# Check if it contains .codexlens subdirectory
|
||||||
|
return (target_dir / ".codexlens").exists()
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def init(
|
def init(
|
||||||
path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to index."),
|
path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to index."),
|
||||||
@@ -469,12 +505,16 @@ def clean(
|
|||||||
config = Config()
|
config = Config()
|
||||||
import shutil
|
import shutil
|
||||||
if config.index_dir.exists():
|
if config.index_dir.exists():
|
||||||
|
if not _is_safe_to_clean(config.index_dir):
|
||||||
|
raise CodexLensError(f"Safety check failed: {config.index_dir} does not appear to be a CodexLens directory")
|
||||||
shutil.rmtree(config.index_dir)
|
shutil.rmtree(config.index_dir)
|
||||||
result = {"cleaned": str(config.index_dir), "type": "global"}
|
result = {"cleaned": str(config.index_dir), "type": "global"}
|
||||||
else:
|
else:
|
||||||
workspace = WorkspaceConfig.from_path(base_path)
|
workspace = WorkspaceConfig.from_path(base_path)
|
||||||
if workspace and workspace.codexlens_dir.exists():
|
if workspace and workspace.codexlens_dir.exists():
|
||||||
import shutil
|
import shutil
|
||||||
|
if not _is_safe_to_clean(workspace.codexlens_dir):
|
||||||
|
raise CodexLensError(f"Safety check failed: {workspace.codexlens_dir} does not appear to be a CodexLens directory")
|
||||||
shutil.rmtree(workspace.codexlens_dir)
|
shutil.rmtree(workspace.codexlens_dir)
|
||||||
result = {"cleaned": str(workspace.codexlens_dir), "type": "workspace"}
|
result = {"cleaned": str(workspace.codexlens_dir), "type": "workspace"}
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
"""Parser factory for CodexLens.
|
"""Parser factory for CodexLens.
|
||||||
|
|
||||||
The project currently ships lightweight regex-based parsers per language.
|
Python and JavaScript/TypeScript parsing use Tree-Sitter grammars when
|
||||||
They can be swapped for tree-sitter based parsers later without changing
|
available. Regex fallbacks are retained to preserve the existing parser
|
||||||
CLI or storage interfaces.
|
interface and behavior in minimal environments.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -10,7 +10,16 @@ from __future__ import annotations
|
|||||||
import re
|
import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional, Protocol
|
from typing import Dict, Iterable, List, Optional, Protocol
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tree_sitter import Language as TreeSitterLanguage
|
||||||
|
from tree_sitter import Node as TreeSitterNode
|
||||||
|
from tree_sitter import Parser as TreeSitterParser
|
||||||
|
except Exception: # pragma: no cover
|
||||||
|
TreeSitterLanguage = None # type: ignore[assignment]
|
||||||
|
TreeSitterNode = None # type: ignore[assignment]
|
||||||
|
TreeSitterParser = None # type: ignore[assignment]
|
||||||
|
|
||||||
from codexlens.config import Config
|
from codexlens.config import Config
|
||||||
from codexlens.entities import IndexedFile, Symbol
|
from codexlens.entities import IndexedFile, Symbol
|
||||||
@@ -25,11 +34,10 @@ class SimpleRegexParser:
|
|||||||
language_id: str
|
language_id: str
|
||||||
|
|
||||||
def parse(self, text: str, path: Path) -> IndexedFile:
|
def parse(self, text: str, path: Path) -> IndexedFile:
|
||||||
symbols: List[Symbol] = []
|
|
||||||
if self.language_id == "python":
|
if self.language_id == "python":
|
||||||
symbols = _parse_python_symbols(text)
|
symbols = _parse_python_symbols(text)
|
||||||
elif self.language_id in {"javascript", "typescript"}:
|
elif self.language_id in {"javascript", "typescript"}:
|
||||||
symbols = _parse_js_ts_symbols(text)
|
symbols = _parse_js_ts_symbols(text, self.language_id, path)
|
||||||
elif self.language_id == "java":
|
elif self.language_id == "java":
|
||||||
symbols = _parse_java_symbols(text)
|
symbols = _parse_java_symbols(text)
|
||||||
elif self.language_id == "go":
|
elif self.language_id == "go":
|
||||||
@@ -57,24 +65,135 @@ class ParserFactory:
|
|||||||
|
|
||||||
|
|
||||||
_PY_CLASS_RE = re.compile(r"^\s*class\s+([A-Za-z_]\w*)\b")
|
_PY_CLASS_RE = re.compile(r"^\s*class\s+([A-Za-z_]\w*)\b")
|
||||||
_PY_DEF_RE = re.compile(r"^\s*def\s+([A-Za-z_]\w*)\s*\(")
|
_PY_DEF_RE = re.compile(r"^\s*(?:async\s+)?def\s+([A-Za-z_]\w*)\s*\(")
|
||||||
|
|
||||||
|
_TREE_SITTER_LANGUAGE_CACHE: Dict[str, TreeSitterLanguage] = {}
|
||||||
|
|
||||||
|
|
||||||
def _parse_python_symbols(text: str) -> List[Symbol]:
|
def _get_tree_sitter_language(language_id: str, path: Path | None = None) -> TreeSitterLanguage | None:
|
||||||
|
if TreeSitterLanguage is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
cache_key = language_id
|
||||||
|
if language_id == "typescript" and path is not None and path.suffix.lower() == ".tsx":
|
||||||
|
cache_key = "tsx"
|
||||||
|
|
||||||
|
cached = _TREE_SITTER_LANGUAGE_CACHE.get(cache_key)
|
||||||
|
if cached is not None:
|
||||||
|
return cached
|
||||||
|
|
||||||
|
try:
|
||||||
|
if cache_key == "python":
|
||||||
|
import tree_sitter_python # type: ignore[import-not-found]
|
||||||
|
|
||||||
|
language = TreeSitterLanguage(tree_sitter_python.language())
|
||||||
|
elif cache_key == "javascript":
|
||||||
|
import tree_sitter_javascript # type: ignore[import-not-found]
|
||||||
|
|
||||||
|
language = TreeSitterLanguage(tree_sitter_javascript.language())
|
||||||
|
elif cache_key == "typescript":
|
||||||
|
import tree_sitter_typescript # type: ignore[import-not-found]
|
||||||
|
|
||||||
|
language = TreeSitterLanguage(tree_sitter_typescript.language_typescript())
|
||||||
|
elif cache_key == "tsx":
|
||||||
|
import tree_sitter_typescript # type: ignore[import-not-found]
|
||||||
|
|
||||||
|
language = TreeSitterLanguage(tree_sitter_typescript.language_tsx())
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
_TREE_SITTER_LANGUAGE_CACHE[cache_key] = language
|
||||||
|
return language
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_tree_sitter_nodes(root: TreeSitterNode) -> Iterable[TreeSitterNode]:
|
||||||
|
stack: List[TreeSitterNode] = [root]
|
||||||
|
while stack:
|
||||||
|
node = stack.pop()
|
||||||
|
yield node
|
||||||
|
for child in reversed(node.children):
|
||||||
|
stack.append(child)
|
||||||
|
|
||||||
|
|
||||||
|
def _node_text(source_bytes: bytes, node: TreeSitterNode) -> str:
|
||||||
|
return source_bytes[node.start_byte:node.end_byte].decode("utf8")
|
||||||
|
|
||||||
|
|
||||||
|
def _node_range(node: TreeSitterNode) -> tuple[int, int]:
|
||||||
|
start_line = node.start_point[0] + 1
|
||||||
|
end_line = node.end_point[0] + 1
|
||||||
|
return (start_line, max(start_line, end_line))
|
||||||
|
|
||||||
|
|
||||||
|
def _python_kind_for_function_node(node: TreeSitterNode) -> str:
|
||||||
|
parent = node.parent
|
||||||
|
while parent is not None:
|
||||||
|
if parent.type in {"function_definition", "async_function_definition"}:
|
||||||
|
return "function"
|
||||||
|
if parent.type == "class_definition":
|
||||||
|
return "method"
|
||||||
|
parent = parent.parent
|
||||||
|
return "function"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_python_symbols_tree_sitter(text: str) -> List[Symbol] | None:
|
||||||
|
if TreeSitterParser is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
language = _get_tree_sitter_language("python")
|
||||||
|
if language is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
parser = TreeSitterParser()
|
||||||
|
if hasattr(parser, "set_language"):
|
||||||
|
parser.set_language(language) # type: ignore[attr-defined]
|
||||||
|
else:
|
||||||
|
parser.language = language # type: ignore[assignment]
|
||||||
|
|
||||||
|
source_bytes = text.encode("utf8")
|
||||||
|
tree = parser.parse(source_bytes)
|
||||||
|
root = tree.root_node
|
||||||
|
|
||||||
|
symbols: List[Symbol] = []
|
||||||
|
for node in _iter_tree_sitter_nodes(root):
|
||||||
|
if node.type == "class_definition":
|
||||||
|
name_node = node.child_by_field_name("name")
|
||||||
|
if name_node is None:
|
||||||
|
continue
|
||||||
|
symbols.append(Symbol(
|
||||||
|
name=_node_text(source_bytes, name_node),
|
||||||
|
kind="class",
|
||||||
|
range=_node_range(node),
|
||||||
|
))
|
||||||
|
elif node.type in {"function_definition", "async_function_definition"}:
|
||||||
|
name_node = node.child_by_field_name("name")
|
||||||
|
if name_node is None:
|
||||||
|
continue
|
||||||
|
symbols.append(Symbol(
|
||||||
|
name=_node_text(source_bytes, name_node),
|
||||||
|
kind=_python_kind_for_function_node(node),
|
||||||
|
range=_node_range(node),
|
||||||
|
))
|
||||||
|
|
||||||
|
return symbols
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_python_symbols_regex(text: str) -> List[Symbol]:
|
||||||
symbols: List[Symbol] = []
|
symbols: List[Symbol] = []
|
||||||
current_class_indent: Optional[int] = None
|
current_class_indent: Optional[int] = None
|
||||||
for i, line in enumerate(text.splitlines(), start=1):
|
for i, line in enumerate(text.splitlines(), start=1):
|
||||||
if _PY_CLASS_RE.match(line):
|
class_match = _PY_CLASS_RE.match(line)
|
||||||
name = _PY_CLASS_RE.match(line).group(1)
|
if class_match:
|
||||||
current_class_indent = len(line) - len(line.lstrip(" "))
|
current_class_indent = len(line) - len(line.lstrip(" "))
|
||||||
symbols.append(Symbol(name=name, kind="class", range=(i, i)))
|
symbols.append(Symbol(name=class_match.group(1), kind="class", range=(i, i)))
|
||||||
continue
|
continue
|
||||||
def_match = _PY_DEF_RE.match(line)
|
def_match = _PY_DEF_RE.match(line)
|
||||||
if def_match:
|
if def_match:
|
||||||
name = def_match.group(1)
|
|
||||||
indent = len(line) - len(line.lstrip(" "))
|
indent = len(line) - len(line.lstrip(" "))
|
||||||
kind = "method" if current_class_indent is not None and indent > current_class_indent else "function"
|
kind = "method" if current_class_indent is not None and indent > current_class_indent else "function"
|
||||||
symbols.append(Symbol(name=name, kind=kind, range=(i, i)))
|
symbols.append(Symbol(name=def_match.group(1), kind=kind, range=(i, i)))
|
||||||
continue
|
continue
|
||||||
if current_class_indent is not None:
|
if current_class_indent is not None:
|
||||||
indent = len(line) - len(line.lstrip(" "))
|
indent = len(line) - len(line.lstrip(" "))
|
||||||
@@ -83,23 +202,153 @@ def _parse_python_symbols(text: str) -> List[Symbol]:
|
|||||||
return symbols
|
return symbols
|
||||||
|
|
||||||
|
|
||||||
_JS_FUNC_RE = re.compile(r"^\s*(?:export\s+)?function\s+([A-Za-z_$][\w$]*)\s*\(")
|
def _parse_python_symbols(text: str) -> List[Symbol]:
|
||||||
|
symbols = _parse_python_symbols_tree_sitter(text)
|
||||||
|
if symbols is not None:
|
||||||
|
return symbols
|
||||||
|
return _parse_python_symbols_regex(text)
|
||||||
|
|
||||||
|
|
||||||
|
_JS_FUNC_RE = re.compile(r"^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)\s*\(")
|
||||||
_JS_CLASS_RE = re.compile(r"^\s*(?:export\s+)?class\s+([A-Za-z_$][\w$]*)\b")
|
_JS_CLASS_RE = re.compile(r"^\s*(?:export\s+)?class\s+([A-Za-z_$][\w$]*)\b")
|
||||||
|
_JS_ARROW_RE = re.compile(
|
||||||
|
r"^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\(?[^)]*\)?\s*=>"
|
||||||
|
)
|
||||||
|
_JS_METHOD_RE = re.compile(r"^\s+(?:async\s+)?([A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{")
|
||||||
|
|
||||||
|
|
||||||
def _parse_js_ts_symbols(text: str) -> List[Symbol]:
|
def _js_has_class_ancestor(node: TreeSitterNode) -> bool:
|
||||||
|
parent = node.parent
|
||||||
|
while parent is not None:
|
||||||
|
if parent.type in {"class_declaration", "class"}:
|
||||||
|
return True
|
||||||
|
parent = parent.parent
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_js_ts_symbols_tree_sitter(
|
||||||
|
text: str,
|
||||||
|
language_id: str,
|
||||||
|
path: Path | None = None,
|
||||||
|
) -> List[Symbol] | None:
|
||||||
|
if TreeSitterParser is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
language = _get_tree_sitter_language(language_id, path)
|
||||||
|
if language is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
parser = TreeSitterParser()
|
||||||
|
if hasattr(parser, "set_language"):
|
||||||
|
parser.set_language(language) # type: ignore[attr-defined]
|
||||||
|
else:
|
||||||
|
parser.language = language # type: ignore[assignment]
|
||||||
|
|
||||||
|
source_bytes = text.encode("utf8")
|
||||||
|
tree = parser.parse(source_bytes)
|
||||||
|
root = tree.root_node
|
||||||
|
|
||||||
symbols: List[Symbol] = []
|
symbols: List[Symbol] = []
|
||||||
|
for node in _iter_tree_sitter_nodes(root):
|
||||||
|
if node.type in {"class_declaration", "class"}:
|
||||||
|
name_node = node.child_by_field_name("name")
|
||||||
|
if name_node is None:
|
||||||
|
continue
|
||||||
|
symbols.append(Symbol(
|
||||||
|
name=_node_text(source_bytes, name_node),
|
||||||
|
kind="class",
|
||||||
|
range=_node_range(node),
|
||||||
|
))
|
||||||
|
elif node.type in {"function_declaration", "generator_function_declaration"}:
|
||||||
|
name_node = node.child_by_field_name("name")
|
||||||
|
if name_node is None:
|
||||||
|
continue
|
||||||
|
symbols.append(Symbol(
|
||||||
|
name=_node_text(source_bytes, name_node),
|
||||||
|
kind="function",
|
||||||
|
range=_node_range(node),
|
||||||
|
))
|
||||||
|
elif node.type == "variable_declarator":
|
||||||
|
name_node = node.child_by_field_name("name")
|
||||||
|
value_node = node.child_by_field_name("value")
|
||||||
|
if (
|
||||||
|
name_node is None
|
||||||
|
or value_node is None
|
||||||
|
or name_node.type not in {"identifier", "property_identifier"}
|
||||||
|
or value_node.type != "arrow_function"
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
symbols.append(Symbol(
|
||||||
|
name=_node_text(source_bytes, name_node),
|
||||||
|
kind="function",
|
||||||
|
range=_node_range(node),
|
||||||
|
))
|
||||||
|
elif node.type == "method_definition" and _js_has_class_ancestor(node):
|
||||||
|
name_node = node.child_by_field_name("name")
|
||||||
|
if name_node is None:
|
||||||
|
continue
|
||||||
|
name = _node_text(source_bytes, name_node)
|
||||||
|
if name == "constructor":
|
||||||
|
continue
|
||||||
|
symbols.append(Symbol(
|
||||||
|
name=name,
|
||||||
|
kind="method",
|
||||||
|
range=_node_range(node),
|
||||||
|
))
|
||||||
|
|
||||||
|
return symbols
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_js_ts_symbols_regex(text: str) -> List[Symbol]:
|
||||||
|
symbols: List[Symbol] = []
|
||||||
|
in_class = False
|
||||||
|
class_brace_depth = 0
|
||||||
|
brace_depth = 0
|
||||||
|
|
||||||
for i, line in enumerate(text.splitlines(), start=1):
|
for i, line in enumerate(text.splitlines(), start=1):
|
||||||
|
brace_depth += line.count("{") - line.count("}")
|
||||||
|
|
||||||
|
class_match = _JS_CLASS_RE.match(line)
|
||||||
|
if class_match:
|
||||||
|
symbols.append(Symbol(name=class_match.group(1), kind="class", range=(i, i)))
|
||||||
|
in_class = True
|
||||||
|
class_brace_depth = brace_depth
|
||||||
|
continue
|
||||||
|
|
||||||
|
if in_class and brace_depth < class_brace_depth:
|
||||||
|
in_class = False
|
||||||
|
|
||||||
func_match = _JS_FUNC_RE.match(line)
|
func_match = _JS_FUNC_RE.match(line)
|
||||||
if func_match:
|
if func_match:
|
||||||
symbols.append(Symbol(name=func_match.group(1), kind="function", range=(i, i)))
|
symbols.append(Symbol(name=func_match.group(1), kind="function", range=(i, i)))
|
||||||
continue
|
continue
|
||||||
class_match = _JS_CLASS_RE.match(line)
|
|
||||||
if class_match:
|
arrow_match = _JS_ARROW_RE.match(line)
|
||||||
symbols.append(Symbol(name=class_match.group(1), kind="class", range=(i, i)))
|
if arrow_match:
|
||||||
|
symbols.append(Symbol(name=arrow_match.group(1), kind="function", range=(i, i)))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if in_class:
|
||||||
|
method_match = _JS_METHOD_RE.match(line)
|
||||||
|
if method_match:
|
||||||
|
name = method_match.group(1)
|
||||||
|
if name != "constructor":
|
||||||
|
symbols.append(Symbol(name=name, kind="method", range=(i, i)))
|
||||||
|
|
||||||
return symbols
|
return symbols
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_js_ts_symbols(
|
||||||
|
text: str,
|
||||||
|
language_id: str = "javascript",
|
||||||
|
path: Path | None = None,
|
||||||
|
) -> List[Symbol]:
|
||||||
|
symbols = _parse_js_ts_symbols_tree_sitter(text, language_id, path)
|
||||||
|
if symbols is not None:
|
||||||
|
return symbols
|
||||||
|
return _parse_js_ts_symbols_regex(text)
|
||||||
|
|
||||||
|
|
||||||
_JAVA_CLASS_RE = re.compile(r"^\s*(?:public\s+)?class\s+([A-Za-z_]\w*)\b")
|
_JAVA_CLASS_RE = re.compile(r"^\s*(?:public\s+)?class\s+([A-Za-z_]\w*)\b")
|
||||||
_JAVA_METHOD_RE = re.compile(
|
_JAVA_METHOD_RE = re.compile(
|
||||||
r"^\s*(?:public|private|protected|static|\s)+[\w<>\[\]]+\s+([A-Za-z_]\w*)\s*\("
|
r"^\s*(?:public|private|protected|static|\s)+[\w<>\[\]]+\s+([A-Za-z_]\w*)\s*\("
|
||||||
@@ -151,4 +400,3 @@ def _parse_generic_symbols(text: str) -> List[Symbol]:
|
|||||||
if def_match:
|
if def_match:
|
||||||
symbols.append(Symbol(name=def_match.group(1), kind="function", range=(i, i)))
|
symbols.append(Symbol(name=def_match.group(1), kind="function", range=(i, i)))
|
||||||
return symbols
|
return symbols
|
||||||
|
|
||||||
|
|||||||
@@ -118,6 +118,59 @@ class SQLiteStore:
|
|||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
def add_files(self, files_data: List[tuple[IndexedFile, str]]) -> None:
|
||||||
|
"""Add multiple files in a single transaction for better performance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
files_data: List of (indexed_file, content) tuples
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
conn = self._get_connection()
|
||||||
|
try:
|
||||||
|
conn.execute("BEGIN")
|
||||||
|
|
||||||
|
for indexed_file, content in files_data:
|
||||||
|
path = str(Path(indexed_file.path).resolve())
|
||||||
|
language = indexed_file.language
|
||||||
|
mtime = Path(path).stat().st_mtime if Path(path).exists() else None
|
||||||
|
line_count = content.count(chr(10)) + 1
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO files(path, language, content, mtime, line_count)
|
||||||
|
VALUES(?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(path) DO UPDATE SET
|
||||||
|
language=excluded.language,
|
||||||
|
content=excluded.content,
|
||||||
|
mtime=excluded.mtime,
|
||||||
|
line_count=excluded.line_count
|
||||||
|
""",
|
||||||
|
(path, language, content, mtime, line_count),
|
||||||
|
)
|
||||||
|
|
||||||
|
row = conn.execute("SELECT id FROM files WHERE path=?", (path,)).fetchone()
|
||||||
|
if not row:
|
||||||
|
raise StorageError(f"Failed to read file id for {path}")
|
||||||
|
file_id = int(row["id"])
|
||||||
|
|
||||||
|
conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
|
||||||
|
if indexed_file.symbols:
|
||||||
|
conn.executemany(
|
||||||
|
"""
|
||||||
|
INSERT INTO symbols(file_id, name, kind, start_line, end_line)
|
||||||
|
VALUES(?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
[
|
||||||
|
(file_id, s.name, s.kind, s.range[0], s.range[1])
|
||||||
|
for s in indexed_file.symbols
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
except Exception:
|
||||||
|
conn.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
def remove_file(self, path: str | Path) -> bool:
|
def remove_file(self, path: str | Path) -> bool:
|
||||||
"""Remove a file from the index."""
|
"""Remove a file from the index."""
|
||||||
with self._lock:
|
with self._lock:
|
||||||
@@ -178,7 +231,7 @@ class SQLiteStore:
|
|||||||
results: List[SearchResult] = []
|
results: List[SearchResult] = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
rank = float(row["rank"]) if row["rank"] is not None else 0.0
|
rank = float(row["rank"]) if row["rank"] is not None else 0.0
|
||||||
score = max(0.0, -rank)
|
score = abs(rank) if rank < 0 else 0.0
|
||||||
results.append(
|
results.append(
|
||||||
SearchResult(
|
SearchResult(
|
||||||
path=row["path"],
|
path=row["path"],
|
||||||
|
|||||||
148
codex-lens/tests/test_parsers.py
Normal file
148
codex-lens/tests/test_parsers.py
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
"""Tests for CodexLens parsers."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from codexlens.parsers.factory import (
|
||||||
|
SimpleRegexParser,
|
||||||
|
_parse_js_ts_symbols,
|
||||||
|
_parse_python_symbols,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
TREE_SITTER_JS_AVAILABLE = True
|
||||||
|
try:
|
||||||
|
import tree_sitter_javascript # type: ignore[import-not-found] # noqa: F401
|
||||||
|
except Exception:
|
||||||
|
TREE_SITTER_JS_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
|
class TestPythonParser:
|
||||||
|
"""Tests for Python symbol parsing."""
|
||||||
|
|
||||||
|
def test_parse_function(self):
|
||||||
|
code = "def hello():\n pass"
|
||||||
|
symbols = _parse_python_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "hello"
|
||||||
|
assert symbols[0].kind == "function"
|
||||||
|
|
||||||
|
def test_parse_async_function(self):
|
||||||
|
code = "async def fetch_data():\n pass"
|
||||||
|
symbols = _parse_python_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "fetch_data"
|
||||||
|
assert symbols[0].kind == "function"
|
||||||
|
|
||||||
|
def test_parse_class(self):
|
||||||
|
code = "class MyClass:\n pass"
|
||||||
|
symbols = _parse_python_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "MyClass"
|
||||||
|
assert symbols[0].kind == "class"
|
||||||
|
|
||||||
|
def test_parse_method(self):
|
||||||
|
code = "class MyClass:\n def method(self):\n pass"
|
||||||
|
symbols = _parse_python_symbols(code)
|
||||||
|
assert len(symbols) == 2
|
||||||
|
assert symbols[0].name == "MyClass"
|
||||||
|
assert symbols[0].kind == "class"
|
||||||
|
assert symbols[1].name == "method"
|
||||||
|
assert symbols[1].kind == "method"
|
||||||
|
|
||||||
|
def test_parse_async_method(self):
|
||||||
|
code = "class MyClass:\n async def async_method(self):\n pass"
|
||||||
|
symbols = _parse_python_symbols(code)
|
||||||
|
assert len(symbols) == 2
|
||||||
|
assert symbols[1].name == "async_method"
|
||||||
|
assert symbols[1].kind == "method"
|
||||||
|
|
||||||
|
|
||||||
|
class TestJavaScriptParser:
|
||||||
|
"""Tests for JavaScript/TypeScript symbol parsing."""
|
||||||
|
|
||||||
|
def test_parse_function(self):
|
||||||
|
code = "function hello() {}"
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "hello"
|
||||||
|
assert symbols[0].kind == "function"
|
||||||
|
|
||||||
|
def test_parse_async_function(self):
|
||||||
|
code = "async function fetchData() {}"
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "fetchData"
|
||||||
|
assert symbols[0].kind == "function"
|
||||||
|
|
||||||
|
def test_parse_arrow_function(self):
|
||||||
|
code = "const hello = () => {}"
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "hello"
|
||||||
|
assert symbols[0].kind == "function"
|
||||||
|
|
||||||
|
def test_parse_async_arrow_function(self):
|
||||||
|
code = "const fetchData = async () => {}"
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "fetchData"
|
||||||
|
assert symbols[0].kind == "function"
|
||||||
|
|
||||||
|
def test_parse_class(self):
|
||||||
|
code = "class MyClass {}"
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "MyClass"
|
||||||
|
assert symbols[0].kind == "class"
|
||||||
|
|
||||||
|
def test_parse_export_function(self):
|
||||||
|
code = "export function hello() {}"
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "hello"
|
||||||
|
assert symbols[0].kind == "function"
|
||||||
|
|
||||||
|
def test_parse_export_class(self):
|
||||||
|
code = "export class MyClass {}"
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "MyClass"
|
||||||
|
assert symbols[0].kind == "class"
|
||||||
|
|
||||||
|
def test_parse_export_arrow_function(self):
|
||||||
|
code = "export const hello = () => {}"
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
assert len(symbols) == 1
|
||||||
|
assert symbols[0].name == "hello"
|
||||||
|
assert symbols[0].kind == "function"
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not TREE_SITTER_JS_AVAILABLE, reason="tree-sitter-javascript not installed")
|
||||||
|
def test_parse_class_methods(self):
|
||||||
|
code = (
|
||||||
|
"class MyClass {\n"
|
||||||
|
" method() {}\n"
|
||||||
|
" async asyncMethod() {}\n"
|
||||||
|
" static staticMethod() {}\n"
|
||||||
|
" constructor() {}\n"
|
||||||
|
"}"
|
||||||
|
)
|
||||||
|
symbols = _parse_js_ts_symbols(code)
|
||||||
|
names_kinds = [(s.name, s.kind) for s in symbols]
|
||||||
|
assert ("MyClass", "class") in names_kinds
|
||||||
|
assert ("method", "method") in names_kinds
|
||||||
|
assert ("asyncMethod", "method") in names_kinds
|
||||||
|
assert ("staticMethod", "method") in names_kinds
|
||||||
|
assert all(name != "constructor" for name, _ in names_kinds)
|
||||||
|
|
||||||
|
|
||||||
|
class TestParserInterface:
|
||||||
|
"""High-level interface tests."""
|
||||||
|
|
||||||
|
def test_simple_parser_parse(self):
|
||||||
|
parser = SimpleRegexParser("python")
|
||||||
|
indexed = parser.parse("def hello():\n pass", Path("test.py"))
|
||||||
|
assert indexed.language == "python"
|
||||||
|
assert len(indexed.symbols) == 1
|
||||||
|
assert indexed.symbols[0].name == "hello"
|
||||||
Reference in New Issue
Block a user