mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-28 09:23:08 +08:00
Add benchmark results and tests for LSP graph builder and staged search
- Introduced a new benchmark results file for performance comparison on 2026-02-09. - Added a test for LspGraphBuilder to ensure it does not expand nodes at maximum depth. - Created a test for the staged search pipeline to validate fallback behavior when stage 1 returns empty results.
This commit is contained in:
340
ccw/src/utils/outline-parser.ts
Normal file
340
ccw/src/utils/outline-parser.ts
Normal file
@@ -0,0 +1,340 @@
|
||||
/**
|
||||
* Core AST outline parsing engine using web-tree-sitter.
|
||||
*
|
||||
* Parses source files into structured symbol outlines (functions, classes, methods, etc.)
|
||||
* with line offsets compatible with read_file(offset, limit).
|
||||
*/
|
||||
|
||||
import { createRequire } from 'node:module';
|
||||
import { dirname, join } from 'path';
|
||||
import Parser from 'web-tree-sitter';
|
||||
import type { LanguageConfig } from './outline-queries.js';
|
||||
|
||||
export interface OutlineSymbol {
|
||||
kind: 'function' | 'class' | 'method' | 'interface' | 'type' | 'enum' | 'property';
|
||||
name: string;
|
||||
line: number; // 0-based, compatible with read_file offset
|
||||
endLine: number; // 0-based
|
||||
doc: string | null;
|
||||
signature: string; // truncated to 200 chars
|
||||
parent: string | null;
|
||||
children: number; // nested method/property count (class/interface)
|
||||
}
|
||||
|
||||
export interface OutlineResult {
|
||||
file: string;
|
||||
language: string;
|
||||
symbols: OutlineSymbol[];
|
||||
totalSymbols: number;
|
||||
}
|
||||
|
||||
// Singleton init guard
|
||||
let initialized = false;
|
||||
|
||||
// Language WASM cache (Language loading is heavy IO, cache aggressively)
|
||||
const languageCache = new Map<string, Parser.Language>();
|
||||
|
||||
// Resolve WASM paths via createRequire (works in ESM)
|
||||
const _require = createRequire(import.meta.url);
|
||||
|
||||
function getWasmDir(): string {
|
||||
return join(dirname(_require.resolve('tree-sitter-wasms/package.json')), 'out');
|
||||
}
|
||||
|
||||
async function ensureInit(): Promise<void> {
|
||||
if (initialized) return;
|
||||
await Parser.init();
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
async function loadLanguage(grammarName: string): Promise<Parser.Language> {
|
||||
const cached = languageCache.get(grammarName);
|
||||
if (cached) return cached;
|
||||
|
||||
const wasmPath = join(getWasmDir(), `tree-sitter-${grammarName}.wasm`);
|
||||
const lang = await Parser.Language.load(wasmPath);
|
||||
languageCache.set(grammarName, lang);
|
||||
return lang;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a source file into an outline of symbols.
|
||||
*/
|
||||
export async function parseOutline(
|
||||
filePath: string,
|
||||
content: string,
|
||||
config: LanguageConfig
|
||||
): Promise<OutlineResult> {
|
||||
await ensureInit();
|
||||
|
||||
const language = await loadLanguage(config.grammarName);
|
||||
const parser = new Parser();
|
||||
parser.setLanguage(language);
|
||||
|
||||
const tree = parser.parse(content);
|
||||
if (!tree) {
|
||||
parser.delete();
|
||||
return { file: filePath, language: config.grammarName, symbols: [], totalSymbols: 0 };
|
||||
}
|
||||
|
||||
let query: Parser.Query;
|
||||
try {
|
||||
query = language.query(config.symbolQuery);
|
||||
} catch (err) {
|
||||
tree.delete();
|
||||
parser.delete();
|
||||
throw new Error(`Query compilation failed for ${config.grammarName}: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
const matches = query.matches(tree.rootNode);
|
||||
const contentLines = content.split('\n');
|
||||
const symbols: OutlineSymbol[] = [];
|
||||
|
||||
for (const match of matches) {
|
||||
const symbol = processMatch(match, contentLines, config.grammarName);
|
||||
if (symbol) symbols.push(symbol);
|
||||
}
|
||||
|
||||
// Sort by line position
|
||||
symbols.sort((a, b) => a.line - b.line);
|
||||
|
||||
// Clean up native resources
|
||||
query.delete();
|
||||
tree.delete();
|
||||
parser.delete();
|
||||
|
||||
return {
|
||||
file: filePath,
|
||||
language: config.grammarName,
|
||||
symbols,
|
||||
totalSymbols: symbols.length,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single query match into an OutlineSymbol.
|
||||
*/
|
||||
function processMatch(
|
||||
match: Parser.QueryMatch,
|
||||
contentLines: string[],
|
||||
language: string
|
||||
): OutlineSymbol | null {
|
||||
let nameNode: Parser.SyntaxNode | null = null;
|
||||
let defNode: Parser.SyntaxNode | null = null;
|
||||
let kind = 'function';
|
||||
|
||||
for (const capture of match.captures) {
|
||||
if (capture.name === 'name') {
|
||||
nameNode = capture.node;
|
||||
} else if (capture.name.startsWith('definition.')) {
|
||||
defNode = capture.node;
|
||||
kind = capture.name.slice('definition.'.length);
|
||||
}
|
||||
}
|
||||
|
||||
if (!defNode || !nameNode) return null;
|
||||
|
||||
const name = nameNode.text;
|
||||
const line = defNode.startPosition.row;
|
||||
const endLine = defNode.endPosition.row;
|
||||
const signature = extractSignature(defNode.text, language);
|
||||
const doc = extractDoc(defNode, contentLines, language);
|
||||
const parent = findParent(defNode);
|
||||
const children = countChildren(defNode, kind);
|
||||
|
||||
return {
|
||||
kind: kind as OutlineSymbol['kind'],
|
||||
name,
|
||||
line,
|
||||
endLine,
|
||||
doc,
|
||||
signature,
|
||||
parent,
|
||||
children,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a concise signature from the node text.
|
||||
* Takes the first line, removes the body start, truncates to 200 chars.
|
||||
*/
|
||||
function extractSignature(nodeText: string, language: string): string {
|
||||
const firstLine = nodeText.split('\n')[0].trimEnd();
|
||||
let sig = firstLine;
|
||||
|
||||
if (language === 'python') {
|
||||
// Remove trailing colon (body start)
|
||||
if (sig.endsWith(':')) {
|
||||
sig = sig.slice(0, -1).trimEnd();
|
||||
}
|
||||
} else {
|
||||
// Remove opening brace and everything after
|
||||
const braceIdx = sig.indexOf('{');
|
||||
if (braceIdx > 0) {
|
||||
sig = sig.substring(0, braceIdx).trimEnd();
|
||||
}
|
||||
}
|
||||
|
||||
if (sig.length > 200) {
|
||||
sig = sig.substring(0, 200) + '...';
|
||||
}
|
||||
|
||||
return sig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract documentation comment for a definition node.
|
||||
*/
|
||||
function extractDoc(
|
||||
defNode: Parser.SyntaxNode,
|
||||
contentLines: string[],
|
||||
language: string
|
||||
): string | null {
|
||||
if (language === 'python') {
|
||||
return extractPythonDocstring(defNode);
|
||||
}
|
||||
return extractCommentDoc(defNode, contentLines);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract comment doc by looking at lines before the definition.
|
||||
*/
|
||||
function extractCommentDoc(
|
||||
defNode: Parser.SyntaxNode,
|
||||
contentLines: string[]
|
||||
): string | null {
|
||||
const defLine = defNode.startPosition.row;
|
||||
let endIdx = defLine - 1;
|
||||
if (endIdx < 0) return null;
|
||||
|
||||
// Skip at most one blank line
|
||||
if (contentLines[endIdx].trim() === '') {
|
||||
endIdx--;
|
||||
if (endIdx < 0) return null;
|
||||
}
|
||||
|
||||
const endText = contentLines[endIdx].trim();
|
||||
|
||||
// Block comment ending with */
|
||||
if (endText.endsWith('*/')) {
|
||||
let startIdx = endIdx;
|
||||
while (startIdx > 0 && !contentLines[startIdx].trim().startsWith('/*')) {
|
||||
startIdx--;
|
||||
}
|
||||
return cleanBlockComment(contentLines.slice(startIdx, endIdx + 1).join('\n'));
|
||||
}
|
||||
|
||||
// Line comments (// or /// or #)
|
||||
if (endText.startsWith('//') || endText.startsWith('#')) {
|
||||
let startIdx = endIdx;
|
||||
while (startIdx > 0) {
|
||||
const prevText = contentLines[startIdx - 1].trim();
|
||||
if (prevText.startsWith('//') || prevText.startsWith('#')) {
|
||||
startIdx--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cleanLineComments(contentLines.slice(startIdx, endIdx + 1).join('\n'));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract Python docstring from function/class body.
|
||||
*/
|
||||
function extractPythonDocstring(defNode: Parser.SyntaxNode): string | null {
|
||||
const body = defNode.childForFieldName('body');
|
||||
if (!body) return null;
|
||||
|
||||
const firstChild = body.namedChildren[0];
|
||||
if (!firstChild || firstChild.type !== 'expression_statement') return null;
|
||||
|
||||
const expr = firstChild.namedChildren[0];
|
||||
if (!expr || (expr.type !== 'string' && expr.type !== 'concatenated_string')) return null;
|
||||
|
||||
let text = expr.text;
|
||||
// Remove triple-quote markers
|
||||
for (const quote of ['"""', "'''"]) {
|
||||
if (text.startsWith(quote) && text.endsWith(quote)) {
|
||||
text = text.slice(3, -3);
|
||||
break;
|
||||
}
|
||||
}
|
||||
text = text.trim();
|
||||
return text || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean block comment text.
|
||||
*/
|
||||
function cleanBlockComment(text: string): string | null {
|
||||
let lines = text.split('\n');
|
||||
// Remove /* and */
|
||||
lines[0] = lines[0].replace(/^\s*\/\*\*?\s?/, '');
|
||||
lines[lines.length - 1] = lines[lines.length - 1].replace(/\s*\*\/\s*$/, '');
|
||||
// Remove leading * from middle lines
|
||||
lines = lines.map(l => l.replace(/^\s*\*\s?/, ''));
|
||||
const result = lines.join('\n').trim();
|
||||
return result || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean line comment (// or #) text.
|
||||
*/
|
||||
function cleanLineComments(text: string): string | null {
|
||||
const lines = text.split('\n').map(l => l.replace(/^\s*(?:\/\/\/?\s?|#\s?)/, ''));
|
||||
const result = lines.join('\n').trim();
|
||||
return result || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the parent class/interface/impl name for a definition node.
|
||||
*/
|
||||
function findParent(defNode: Parser.SyntaxNode): string | null {
|
||||
let current = defNode.parent;
|
||||
while (current) {
|
||||
const type = current.type;
|
||||
|
||||
// Common parent types across languages
|
||||
if (
|
||||
type === 'class_declaration' || type === 'interface_declaration' ||
|
||||
type === 'class_definition' || type === 'enum_declaration' ||
|
||||
type === 'impl_item' || type === 'class_specifier' || type === 'struct_specifier'
|
||||
) {
|
||||
// Try 'name' field first, then 'type' field (for Rust impl_item)
|
||||
const nameNode = current.childForFieldName('name') || current.childForFieldName('type');
|
||||
if (nameNode) return nameNode.text;
|
||||
}
|
||||
|
||||
current = current.parent;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count direct children (methods/properties) for class/interface nodes.
|
||||
*/
|
||||
function countChildren(defNode: Parser.SyntaxNode, kind: string): number {
|
||||
if (kind !== 'class' && kind !== 'interface') return 0;
|
||||
|
||||
// Find the body node (class_body, interface_body, block, declaration_list, etc.)
|
||||
let body = defNode.childForFieldName('body');
|
||||
if (!body) {
|
||||
for (const child of defNode.namedChildren) {
|
||||
if (
|
||||
child.type === 'class_body' || child.type === 'interface_body' ||
|
||||
child.type === 'declaration_list' || child.type === 'block' ||
|
||||
child.type === 'enum_body' || child.type === 'field_declaration_list'
|
||||
) {
|
||||
body = child;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!body) return 0;
|
||||
return body.namedChildCount;
|
||||
}
|
||||
150
ccw/src/utils/outline-queries.ts
Normal file
150
ccw/src/utils/outline-queries.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
/**
|
||||
* Language configurations and tree-sitter query definitions for outline parsing.
|
||||
*/
|
||||
|
||||
import { extname } from 'path';
|
||||
|
||||
export interface LanguageConfig {
|
||||
grammarName: string;
|
||||
extensions: string[];
|
||||
symbolQuery: string;
|
||||
}
|
||||
|
||||
export const LANGUAGE_CONFIGS: Record<string, LanguageConfig> = {
|
||||
typescript: {
|
||||
grammarName: 'typescript',
|
||||
extensions: ['.ts'],
|
||||
symbolQuery: [
|
||||
'(function_declaration name: (identifier) @name) @definition.function',
|
||||
'(class_declaration name: (type_identifier) @name) @definition.class',
|
||||
'(method_definition name: (property_identifier) @name) @definition.method',
|
||||
'(abstract_method_signature name: (property_identifier) @name) @definition.method',
|
||||
'(interface_declaration name: (type_identifier) @name) @definition.interface',
|
||||
'(type_alias_declaration name: (type_identifier) @name) @definition.type',
|
||||
'(enum_declaration name: (identifier) @name) @definition.enum',
|
||||
'(variable_declarator name: (identifier) @name value: (arrow_function)) @definition.function',
|
||||
'(variable_declarator name: (identifier) @name value: (function_expression)) @definition.function',
|
||||
].join('\n'),
|
||||
},
|
||||
tsx: {
|
||||
grammarName: 'tsx',
|
||||
extensions: ['.tsx'],
|
||||
symbolQuery: [
|
||||
'(function_declaration name: (identifier) @name) @definition.function',
|
||||
'(class_declaration name: (type_identifier) @name) @definition.class',
|
||||
'(method_definition name: (property_identifier) @name) @definition.method',
|
||||
'(interface_declaration name: (type_identifier) @name) @definition.interface',
|
||||
'(type_alias_declaration name: (type_identifier) @name) @definition.type',
|
||||
'(enum_declaration name: (identifier) @name) @definition.enum',
|
||||
'(variable_declarator name: (identifier) @name value: (arrow_function)) @definition.function',
|
||||
'(variable_declarator name: (identifier) @name value: (function_expression)) @definition.function',
|
||||
].join('\n'),
|
||||
},
|
||||
javascript: {
|
||||
grammarName: 'javascript',
|
||||
extensions: ['.js', '.jsx', '.mjs', '.cjs'],
|
||||
symbolQuery: [
|
||||
'(function_declaration name: (identifier) @name) @definition.function',
|
||||
'(class_declaration name: (identifier) @name) @definition.class',
|
||||
'(method_definition name: (property_identifier) @name) @definition.method',
|
||||
'(variable_declarator name: (identifier) @name value: (arrow_function)) @definition.function',
|
||||
'(variable_declarator name: (identifier) @name value: (function_expression)) @definition.function',
|
||||
].join('\n'),
|
||||
},
|
||||
python: {
|
||||
grammarName: 'python',
|
||||
extensions: ['.py'],
|
||||
symbolQuery: [
|
||||
'(function_definition name: (identifier) @name) @definition.function',
|
||||
'(class_definition name: (identifier) @name) @definition.class',
|
||||
].join('\n'),
|
||||
},
|
||||
go: {
|
||||
grammarName: 'go',
|
||||
extensions: ['.go'],
|
||||
symbolQuery: [
|
||||
'(function_declaration name: (identifier) @name) @definition.function',
|
||||
'(method_declaration name: (field_identifier) @name) @definition.method',
|
||||
'(type_spec name: (type_identifier) @name) @definition.type',
|
||||
].join('\n'),
|
||||
},
|
||||
rust: {
|
||||
grammarName: 'rust',
|
||||
extensions: ['.rs'],
|
||||
symbolQuery: [
|
||||
'(function_item name: (identifier) @name) @definition.function',
|
||||
'(struct_item name: (type_identifier) @name) @definition.class',
|
||||
'(enum_item name: (type_identifier) @name) @definition.enum',
|
||||
'(trait_item name: (type_identifier) @name) @definition.interface',
|
||||
'(impl_item type: (type_identifier) @name) @definition.class',
|
||||
].join('\n'),
|
||||
},
|
||||
java: {
|
||||
grammarName: 'java',
|
||||
extensions: ['.java'],
|
||||
symbolQuery: [
|
||||
'(class_declaration name: (identifier) @name) @definition.class',
|
||||
'(method_declaration name: (identifier) @name) @definition.method',
|
||||
'(interface_declaration name: (identifier) @name) @definition.interface',
|
||||
'(enum_declaration name: (identifier) @name) @definition.enum',
|
||||
'(constructor_declaration name: (identifier) @name) @definition.method',
|
||||
].join('\n'),
|
||||
},
|
||||
csharp: {
|
||||
grammarName: 'c_sharp',
|
||||
extensions: ['.cs'],
|
||||
symbolQuery: [
|
||||
'(class_declaration name: (identifier) @name) @definition.class',
|
||||
'(method_declaration name: (identifier) @name) @definition.method',
|
||||
'(interface_declaration name: (identifier) @name) @definition.interface',
|
||||
'(enum_declaration name: (identifier) @name) @definition.enum',
|
||||
'(constructor_declaration name: (identifier) @name) @definition.method',
|
||||
].join('\n'),
|
||||
},
|
||||
c: {
|
||||
grammarName: 'c',
|
||||
extensions: ['.c', '.h'],
|
||||
symbolQuery: [
|
||||
'(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function',
|
||||
'(struct_specifier name: (type_identifier) @name) @definition.class',
|
||||
'(enum_specifier name: (type_identifier) @name) @definition.enum',
|
||||
].join('\n'),
|
||||
},
|
||||
cpp: {
|
||||
grammarName: 'cpp',
|
||||
extensions: ['.cpp', '.hpp', '.cc', '.cxx'],
|
||||
symbolQuery: [
|
||||
'(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function',
|
||||
'(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))) @definition.function',
|
||||
'(class_specifier name: (type_identifier) @name) @definition.class',
|
||||
'(struct_specifier name: (type_identifier) @name) @definition.class',
|
||||
'(enum_specifier name: (type_identifier) @name) @definition.enum',
|
||||
].join('\n'),
|
||||
},
|
||||
};
|
||||
|
||||
// Build extension → language name lookup map
|
||||
const EXTENSION_MAP = new Map<string, string>();
|
||||
for (const [lang, config] of Object.entries(LANGUAGE_CONFIGS)) {
|
||||
for (const ext of config.extensions) {
|
||||
EXTENSION_MAP.set(ext, lang);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect language config from file path extension or explicit hint.
|
||||
* Returns null if language is not supported.
|
||||
*/
|
||||
export function detectLanguage(filePath: string, hint?: string): LanguageConfig | null {
|
||||
if (hint) {
|
||||
const normalized = hint.toLowerCase();
|
||||
const config = LANGUAGE_CONFIGS[normalized];
|
||||
if (config) return config;
|
||||
}
|
||||
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
const lang = EXTENSION_MAP.get(ext);
|
||||
if (lang) return LANGUAGE_CONFIGS[lang];
|
||||
|
||||
return null;
|
||||
}
|
||||
Reference in New Issue
Block a user