mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: Enhance CodexLens search functionality with new parameters and result handling
- Added search limit, content length, and extra files input fields in the CodexLens manager UI. - Updated API request parameters to include new fields: max_content_length and extra_files_count. - Refactored smart-search.ts to support new parameters with default values. - Implemented result splitting logic to return both full content and additional file paths. - Updated CLI commands to remove worker limits and allow dynamic scaling based on endpoint count. - Introduced EmbeddingPoolConfig for improved embedding management and auto-discovery of providers. - Enhanced search engines to utilize new parameters for fuzzy and exact searches. - Added support for embedding single texts in the LiteLLM embedder.
This commit is contained in:
@@ -466,27 +466,21 @@ RULES: $(cat ~/.claude/workflows/cli-templates/protocols/write-protocol.md) $(ca
|
|||||||
```
|
```
|
||||||
---
|
---
|
||||||
|
|
||||||
## Configuration
|
## ⚙️ Execution Configuration
|
||||||
|
|
||||||
### Timeout Allocation (Bash)
|
### Dynamic Timeout Allocation
|
||||||
|
|
||||||
controlled by external bash `timeout` command:
|
**Minimum timeout: 5 minutes (300000ms)** - Never set below this threshold.
|
||||||
|
|
||||||
**Recommended Time Allocation**:
|
**Timeout Ranges**:
|
||||||
|
- **Simple** (analysis, search): 5-10min (300000-600000ms)
|
||||||
|
- **Medium** (refactoring, documentation): 10-20min (600000-1200000ms)
|
||||||
|
- **Complex** (implementation, migration): 20-60min (1200000-3600000ms)
|
||||||
|
- **Heavy** (large codebase, multi-file): 60-120min (3600000-7200000ms)
|
||||||
|
|
||||||
- **Simple** (5-10min): Analysis, search
|
**Codex Multiplier**: 3x of allocated time (minimum 15min / 900000ms)
|
||||||
- `timeout 300` ~ `timeout 600`
|
|
||||||
|
|
||||||
- **Medium** (10-20min): Refactoring, documentation
|
**Auto-detection**: Analyze PURPOSE and TASK fields to determine timeout
|
||||||
- `timeout 600` ~ `timeout 1200`
|
|
||||||
|
|
||||||
- **Complex** (20-60min): Implementation, migration
|
|
||||||
- `timeout 1200` ~ `timeout 3600`
|
|
||||||
|
|
||||||
- **Heavy** (60-120min): Large codebase, multi-file
|
|
||||||
- `timeout 3600` ~ `timeout 7200`
|
|
||||||
|
|
||||||
**Codex Multiplier**: 3x allocated time (minimum 15min / 900s)
|
|
||||||
|
|
||||||
### Permission Framework
|
### Permission Framework
|
||||||
|
|
||||||
|
|||||||
@@ -586,6 +586,8 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
|
|||||||
const query = url.searchParams.get('query') || '';
|
const query = url.searchParams.get('query') || '';
|
||||||
const limit = parseInt(url.searchParams.get('limit') || '20', 10);
|
const limit = parseInt(url.searchParams.get('limit') || '20', 10);
|
||||||
const mode = url.searchParams.get('mode') || 'exact'; // exact, fuzzy, hybrid, vector
|
const mode = url.searchParams.get('mode') || 'exact'; // exact, fuzzy, hybrid, vector
|
||||||
|
const maxContentLength = parseInt(url.searchParams.get('max_content_length') || '200', 10);
|
||||||
|
const extraFilesCount = parseInt(url.searchParams.get('extra_files_count') || '10', 10);
|
||||||
const projectPath = url.searchParams.get('path') || initialPath;
|
const projectPath = url.searchParams.get('path') || initialPath;
|
||||||
|
|
||||||
if (!query) {
|
if (!query) {
|
||||||
@@ -595,15 +597,46 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const args = ['search', query, '--path', projectPath, '--limit', limit.toString(), '--mode', mode, '--json'];
|
// Request more results to support split (full content + extra files)
|
||||||
|
const totalToFetch = limit + extraFilesCount;
|
||||||
|
const args = ['search', query, '--path', projectPath, '--limit', totalToFetch.toString(), '--mode', mode, '--json'];
|
||||||
|
|
||||||
const result = await executeCodexLens(args, { cwd: projectPath });
|
const result = await executeCodexLens(args, { cwd: projectPath });
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
try {
|
try {
|
||||||
const parsed = extractJSON(result.output);
|
const parsed = extractJSON(result.output);
|
||||||
|
const allResults = parsed.result?.results || [];
|
||||||
|
|
||||||
|
// Truncate content and split results
|
||||||
|
const truncateContent = (content: string | null | undefined): string => {
|
||||||
|
if (!content) return '';
|
||||||
|
if (content.length <= maxContentLength) return content;
|
||||||
|
return content.slice(0, maxContentLength) + '...';
|
||||||
|
};
|
||||||
|
|
||||||
|
// Split results: first N with full content, rest as file paths only
|
||||||
|
const resultsWithContent = allResults.slice(0, limit).map((r: any) => ({
|
||||||
|
...r,
|
||||||
|
content: truncateContent(r.content || r.excerpt),
|
||||||
|
excerpt: truncateContent(r.excerpt || r.content),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const extraResults = allResults.slice(limit, limit + extraFilesCount);
|
||||||
|
const extraFiles = [...new Set(extraResults.map((r: any) => r.path || r.file))];
|
||||||
|
|
||||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||||
res.end(JSON.stringify({ success: true, ...parsed.result }));
|
res.end(JSON.stringify({
|
||||||
|
success: true,
|
||||||
|
results: resultsWithContent,
|
||||||
|
extra_files: extraFiles.length > 0 ? extraFiles : undefined,
|
||||||
|
metadata: {
|
||||||
|
total: allResults.length,
|
||||||
|
limit,
|
||||||
|
max_content_length: maxContentLength,
|
||||||
|
extra_files_count: extraFilesCount,
|
||||||
|
},
|
||||||
|
}));
|
||||||
} catch {
|
} catch {
|
||||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||||
res.end(JSON.stringify({ success: true, results: [], output: result.output }));
|
res.end(JSON.stringify({ success: true, results: [], output: result.output }));
|
||||||
|
|||||||
@@ -36,10 +36,12 @@ const ParamsSchema = z.object({
|
|||||||
path: z.string().optional(),
|
path: z.string().optional(),
|
||||||
paths: z.array(z.string()).default([]),
|
paths: z.array(z.string()).default([]),
|
||||||
contextLines: z.number().default(0),
|
contextLines: z.number().default(0),
|
||||||
maxResults: z.number().default(20), // Increased default
|
maxResults: z.number().default(5), // Default 5 with full content
|
||||||
includeHidden: z.boolean().default(false),
|
includeHidden: z.boolean().default(false),
|
||||||
languages: z.array(z.string()).optional(),
|
languages: z.array(z.string()).optional(),
|
||||||
limit: z.number().default(20), // Increased default
|
limit: z.number().default(5), // Default 5 with full content
|
||||||
|
extraFilesCount: z.number().default(10), // Additional file-only results
|
||||||
|
maxContentLength: z.number().default(200), // Max content length for truncation (50-2000)
|
||||||
offset: z.number().default(0), // NEW: Pagination offset (start_index)
|
offset: z.number().default(0), // NEW: Pagination offset (start_index)
|
||||||
enrich: z.boolean().default(false),
|
enrich: z.boolean().default(false),
|
||||||
// Search modifiers for ripgrep mode
|
// Search modifiers for ripgrep mode
|
||||||
@@ -268,6 +270,7 @@ interface SearchMetadata {
|
|||||||
interface SearchResult {
|
interface SearchResult {
|
||||||
success: boolean;
|
success: boolean;
|
||||||
results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown;
|
results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown;
|
||||||
|
extra_files?: string[]; // Additional file paths without content
|
||||||
output?: string;
|
output?: string;
|
||||||
metadata?: SearchMetadata;
|
metadata?: SearchMetadata;
|
||||||
error?: string;
|
error?: string;
|
||||||
@@ -301,6 +304,42 @@ function stripAnsi(str: string): string {
|
|||||||
return str.replace(/\x1b\[[0-9;]*m/g, '');
|
return str.replace(/\x1b\[[0-9;]*m/g, '');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Default maximum content length to return (avoid excessive output) */
|
||||||
|
const DEFAULT_MAX_CONTENT_LENGTH = 200;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate content to specified length with ellipsis
|
||||||
|
* @param content - The content to truncate
|
||||||
|
* @param maxLength - Maximum length (default: 200)
|
||||||
|
*/
|
||||||
|
function truncateContent(content: string | null | undefined, maxLength: number = DEFAULT_MAX_CONTENT_LENGTH): string {
|
||||||
|
if (!content) return '';
|
||||||
|
if (content.length <= maxLength) return content;
|
||||||
|
return content.slice(0, maxLength) + '...';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split results into full content results and extra file-only results
|
||||||
|
* Generic function supporting both SemanticMatch and ExactMatch types
|
||||||
|
* @param allResults - All search results (must have 'file' property)
|
||||||
|
* @param fullContentLimit - Number of results with full content (default: 5)
|
||||||
|
* @param extraFilesCount - Number of additional file-only results (default: 10)
|
||||||
|
*/
|
||||||
|
function splitResultsWithExtraFiles<T extends { file: string }>(
|
||||||
|
allResults: T[],
|
||||||
|
fullContentLimit: number = 5,
|
||||||
|
extraFilesCount: number = 10
|
||||||
|
): { results: T[]; extra_files: string[] } {
|
||||||
|
// First N results with full content
|
||||||
|
const results = allResults.slice(0, fullContentLimit);
|
||||||
|
|
||||||
|
// Next M results as file paths only (deduplicated)
|
||||||
|
const extraResults = allResults.slice(fullContentLimit, fullContentLimit + extraFilesCount);
|
||||||
|
const extra_files = [...new Set(extraResults.map(r => r.file))];
|
||||||
|
|
||||||
|
return { results, extra_files };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if CodexLens index exists for current directory
|
* Check if CodexLens index exists for current directory
|
||||||
* @param path - Directory path to check
|
* @param path - Directory path to check
|
||||||
@@ -714,7 +753,7 @@ async function executeAutoMode(params: Params): Promise<SearchResult> {
|
|||||||
* Supports tokenized multi-word queries with OR matching and result ranking
|
* Supports tokenized multi-word queries with OR matching and result ranking
|
||||||
*/
|
*/
|
||||||
async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
||||||
const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params;
|
const { query, paths = [], contextLines = 0, maxResults = 5, extraFilesCount = 10, maxContentLength = 200, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params;
|
||||||
|
|
||||||
if (!query) {
|
if (!query) {
|
||||||
return {
|
return {
|
||||||
@@ -726,6 +765,9 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
// Check if ripgrep is available
|
// Check if ripgrep is available
|
||||||
const hasRipgrep = checkToolAvailability('rg');
|
const hasRipgrep = checkToolAvailability('rg');
|
||||||
|
|
||||||
|
// Calculate total to fetch for split (full content + extra files)
|
||||||
|
const totalToFetch = maxResults + extraFilesCount;
|
||||||
|
|
||||||
// If ripgrep not available, fall back to CodexLens exact mode
|
// If ripgrep not available, fall back to CodexLens exact mode
|
||||||
if (!hasRipgrep) {
|
if (!hasRipgrep) {
|
||||||
const readyStatus = await ensureCodexLensReady();
|
const readyStatus = await ensureCodexLensReady();
|
||||||
@@ -737,7 +779,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Use CodexLens exact mode as fallback
|
// Use CodexLens exact mode as fallback
|
||||||
const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json'];
|
const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'exact', '--json'];
|
||||||
const result = await executeCodexLens(args, { cwd: path });
|
const result = await executeCodexLens(args, { cwd: path });
|
||||||
|
|
||||||
if (!result.success) {
|
if (!result.success) {
|
||||||
@@ -754,23 +796,27 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse results
|
// Parse results
|
||||||
let results: SemanticMatch[] = [];
|
let allResults: SemanticMatch[] = [];
|
||||||
try {
|
try {
|
||||||
const parsed = JSON.parse(stripAnsi(result.output || '{}'));
|
const parsed = JSON.parse(stripAnsi(result.output || '{}'));
|
||||||
const data = parsed.result?.results || parsed.results || parsed;
|
const data = parsed.result?.results || parsed.results || parsed;
|
||||||
results = (Array.isArray(data) ? data : []).map((item: any) => ({
|
allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
|
||||||
file: item.path || item.file,
|
file: item.path || item.file,
|
||||||
score: item.score || 0,
|
score: item.score || 0,
|
||||||
content: item.excerpt || item.content || '',
|
content: truncateContent(item.content || item.excerpt, maxContentLength),
|
||||||
symbol: item.symbol || null,
|
symbol: item.symbol || null,
|
||||||
}));
|
}));
|
||||||
} catch {
|
} catch {
|
||||||
// Keep empty results
|
// Keep empty results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Split results: first N with full content, rest as file paths only
|
||||||
|
const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
results,
|
results,
|
||||||
|
extra_files: extra_files.length > 0 ? extra_files : undefined,
|
||||||
metadata: {
|
metadata: {
|
||||||
mode: 'ripgrep',
|
mode: 'ripgrep',
|
||||||
backend: 'codexlens-fallback',
|
backend: 'codexlens-fallback',
|
||||||
@@ -781,12 +827,12 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use ripgrep
|
// Use ripgrep - request more results to support split
|
||||||
const { command, args, tokens } = buildRipgrepCommand({
|
const { command, args, tokens } = buildRipgrepCommand({
|
||||||
query,
|
query,
|
||||||
paths: paths.length > 0 ? paths : [path],
|
paths: paths.length > 0 ? paths : [path],
|
||||||
contextLines,
|
contextLines,
|
||||||
maxResults,
|
maxResults: totalToFetch, // Fetch more to support split
|
||||||
includeHidden,
|
includeHidden,
|
||||||
regex,
|
regex,
|
||||||
caseSensitive,
|
caseSensitive,
|
||||||
@@ -812,14 +858,14 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
child.on('close', (code) => {
|
child.on('close', (code) => {
|
||||||
const results: ExactMatch[] = [];
|
const allResults: ExactMatch[] = [];
|
||||||
const lines = stdout.split('\n').filter((line) => line.trim());
|
const lines = stdout.split('\n').filter((line) => line.trim());
|
||||||
// Limit total results to prevent memory overflow (--max-count only limits per-file)
|
// Limit total results to prevent memory overflow (--max-count only limits per-file)
|
||||||
const effectiveLimit = maxResults > 0 ? maxResults : 500;
|
const effectiveLimit = totalToFetch > 0 ? totalToFetch : 500;
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
// Stop collecting if we've reached the limit
|
// Stop collecting if we've reached the limit
|
||||||
if (results.length >= effectiveLimit) {
|
if (allResults.length >= effectiveLimit) {
|
||||||
resultLimitReached = true;
|
resultLimitReached = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -837,7 +883,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
: 1,
|
: 1,
|
||||||
content: item.data.lines.text.trim(),
|
content: item.data.lines.text.trim(),
|
||||||
};
|
};
|
||||||
results.push(match);
|
allResults.push(match);
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
continue;
|
||||||
@@ -850,9 +896,12 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
|
|
||||||
// Apply token-based scoring and sorting for multi-word queries
|
// Apply token-based scoring and sorting for multi-word queries
|
||||||
// Results matching more tokens are ranked higher (exact matches first)
|
// Results matching more tokens are ranked higher (exact matches first)
|
||||||
const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results;
|
const scoredResults = tokens.length > 1 ? scoreByTokenMatch(allResults, tokens) : allResults;
|
||||||
|
|
||||||
if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
|
if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
|
||||||
|
// Split results: first N with full content, rest as file paths only
|
||||||
|
const { results, extra_files } = splitResultsWithExtraFiles(scoredResults, maxResults, extraFilesCount);
|
||||||
|
|
||||||
// Build warning message for various conditions
|
// Build warning message for various conditions
|
||||||
const warnings: string[] = [];
|
const warnings: string[] = [];
|
||||||
if (resultLimitReached) {
|
if (resultLimitReached) {
|
||||||
@@ -864,18 +913,19 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
|
|
||||||
resolve({
|
resolve({
|
||||||
success: true,
|
success: true,
|
||||||
results: scoredResults,
|
results,
|
||||||
|
extra_files: extra_files.length > 0 ? extra_files : undefined,
|
||||||
metadata: {
|
metadata: {
|
||||||
mode: 'ripgrep',
|
mode: 'ripgrep',
|
||||||
backend: 'ripgrep',
|
backend: 'ripgrep',
|
||||||
count: scoredResults.length,
|
count: results.length,
|
||||||
query,
|
query,
|
||||||
tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging
|
tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging
|
||||||
tokenized: tokens.length > 1,
|
tokenized: tokens.length > 1,
|
||||||
...(warnings.length > 0 && { warning: warnings.join('; ') }),
|
...(warnings.length > 0 && { warning: warnings.join('; ') }),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
} else if (isWindowsDeviceError && results.length === 0) {
|
} else if (isWindowsDeviceError && allResults.length === 0) {
|
||||||
// Windows device error but no results - might be the only issue
|
// Windows device error but no results - might be the only issue
|
||||||
resolve({
|
resolve({
|
||||||
success: true,
|
success: true,
|
||||||
@@ -912,7 +962,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
* Requires index
|
* Requires index
|
||||||
*/
|
*/
|
||||||
async function executeCodexLensExactMode(params: Params): Promise<SearchResult> {
|
async function executeCodexLensExactMode(params: Params): Promise<SearchResult> {
|
||||||
const { query, path = '.', maxResults = 10, enrich = false } = params;
|
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params;
|
||||||
|
|
||||||
if (!query) {
|
if (!query) {
|
||||||
return {
|
return {
|
||||||
@@ -933,7 +983,9 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
|||||||
// Check index status
|
// Check index status
|
||||||
const indexStatus = await checkIndexStatus(path);
|
const indexStatus = await checkIndexStatus(path);
|
||||||
|
|
||||||
const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json'];
|
// Request more results to support split (full content + extra files)
|
||||||
|
const totalToFetch = maxResults + extraFilesCount;
|
||||||
|
const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'exact', '--json'];
|
||||||
if (enrich) {
|
if (enrich) {
|
||||||
args.push('--enrich');
|
args.push('--enrich');
|
||||||
}
|
}
|
||||||
@@ -954,14 +1006,14 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse results
|
// Parse results
|
||||||
let results: SemanticMatch[] = [];
|
let allResults: SemanticMatch[] = [];
|
||||||
try {
|
try {
|
||||||
const parsed = JSON.parse(stripAnsi(result.output || '{}'));
|
const parsed = JSON.parse(stripAnsi(result.output || '{}'));
|
||||||
const data = parsed.result?.results || parsed.results || parsed;
|
const data = parsed.result?.results || parsed.results || parsed;
|
||||||
results = (Array.isArray(data) ? data : []).map((item: any) => ({
|
allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
|
||||||
file: item.path || item.file,
|
file: item.path || item.file,
|
||||||
score: item.score || 0,
|
score: item.score || 0,
|
||||||
content: item.excerpt || item.content || '',
|
content: truncateContent(item.content || item.excerpt, maxContentLength),
|
||||||
symbol: item.symbol || null,
|
symbol: item.symbol || null,
|
||||||
}));
|
}));
|
||||||
} catch {
|
} catch {
|
||||||
@@ -969,8 +1021,8 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to fuzzy mode if exact returns no results
|
// Fallback to fuzzy mode if exact returns no results
|
||||||
if (results.length === 0) {
|
if (allResults.length === 0) {
|
||||||
const fuzzyArgs = ['search', query, '--limit', maxResults.toString(), '--mode', 'fuzzy', '--json'];
|
const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'fuzzy', '--json'];
|
||||||
if (enrich) {
|
if (enrich) {
|
||||||
fuzzyArgs.push('--enrich');
|
fuzzyArgs.push('--enrich');
|
||||||
}
|
}
|
||||||
@@ -980,20 +1032,23 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
|||||||
try {
|
try {
|
||||||
const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}'));
|
const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}'));
|
||||||
const data = parsed.result?.results || parsed.results || parsed;
|
const data = parsed.result?.results || parsed.results || parsed;
|
||||||
results = (Array.isArray(data) ? data : []).map((item: any) => ({
|
allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
|
||||||
file: item.path || item.file,
|
file: item.path || item.file,
|
||||||
score: item.score || 0,
|
score: item.score || 0,
|
||||||
content: item.excerpt || item.content || '',
|
content: truncateContent(item.content || item.excerpt, maxContentLength),
|
||||||
symbol: item.symbol || null,
|
symbol: item.symbol || null,
|
||||||
}));
|
}));
|
||||||
} catch {
|
} catch {
|
||||||
// Keep empty results
|
// Keep empty results
|
||||||
}
|
}
|
||||||
|
|
||||||
if (results.length > 0) {
|
if (allResults.length > 0) {
|
||||||
|
// Split results: first N with full content, rest as file paths only
|
||||||
|
const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
results,
|
results,
|
||||||
|
extra_files: extra_files.length > 0 ? extra_files : undefined,
|
||||||
metadata: {
|
metadata: {
|
||||||
mode: 'exact',
|
mode: 'exact',
|
||||||
backend: 'codexlens',
|
backend: 'codexlens',
|
||||||
@@ -1008,9 +1063,13 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Split results: first N with full content, rest as file paths only
|
||||||
|
const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
results,
|
results,
|
||||||
|
extra_files: extra_files.length > 0 ? extra_files : undefined,
|
||||||
metadata: {
|
metadata: {
|
||||||
mode: 'exact',
|
mode: 'exact',
|
||||||
backend: 'codexlens',
|
backend: 'codexlens',
|
||||||
@@ -1027,7 +1086,7 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
|||||||
* Requires index with embeddings
|
* Requires index with embeddings
|
||||||
*/
|
*/
|
||||||
async function executeHybridMode(params: Params): Promise<SearchResult> {
|
async function executeHybridMode(params: Params): Promise<SearchResult> {
|
||||||
const { query, path = '.', maxResults = 10, enrich = false } = params;
|
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params;
|
||||||
|
|
||||||
if (!query) {
|
if (!query) {
|
||||||
return {
|
return {
|
||||||
@@ -1048,7 +1107,9 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|||||||
// Check index status
|
// Check index status
|
||||||
const indexStatus = await checkIndexStatus(path);
|
const indexStatus = await checkIndexStatus(path);
|
||||||
|
|
||||||
const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'hybrid', '--json'];
|
// Request more results to support split (full content + extra files)
|
||||||
|
const totalToFetch = maxResults + extraFilesCount;
|
||||||
|
const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'hybrid', '--json'];
|
||||||
if (enrich) {
|
if (enrich) {
|
||||||
args.push('--enrich');
|
args.push('--enrich');
|
||||||
}
|
}
|
||||||
@@ -1069,14 +1130,14 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse results
|
// Parse results
|
||||||
let results: SemanticMatch[] = [];
|
let allResults: SemanticMatch[] = [];
|
||||||
let baselineInfo: { score: number; count: number } | null = null;
|
let baselineInfo: { score: number; count: number } | null = null;
|
||||||
let initialCount = 0;
|
let initialCount = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const parsed = JSON.parse(stripAnsi(result.output || '{}'));
|
const parsed = JSON.parse(stripAnsi(result.output || '{}'));
|
||||||
const data = parsed.result?.results || parsed.results || parsed;
|
const data = parsed.result?.results || parsed.results || parsed;
|
||||||
results = (Array.isArray(data) ? data : []).map((item: any) => {
|
allResults = (Array.isArray(data) ? data : []).map((item: any) => {
|
||||||
const rawScore = item.score || 0;
|
const rawScore = item.score || 0;
|
||||||
// Hybrid mode returns distance scores (lower is better).
|
// Hybrid mode returns distance scores (lower is better).
|
||||||
// Convert to similarity scores (higher is better) for consistency.
|
// Convert to similarity scores (higher is better) for consistency.
|
||||||
@@ -1085,27 +1146,27 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|||||||
return {
|
return {
|
||||||
file: item.path || item.file,
|
file: item.path || item.file,
|
||||||
score: similarityScore,
|
score: similarityScore,
|
||||||
content: item.excerpt || item.content || '',
|
content: truncateContent(item.content || item.excerpt, maxContentLength),
|
||||||
symbol: item.symbol || null,
|
symbol: item.symbol || null,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
initialCount = results.length;
|
initialCount = allResults.length;
|
||||||
|
|
||||||
// Post-processing pipeline to improve semantic search quality
|
// Post-processing pipeline to improve semantic search quality
|
||||||
// 0. Filter dominant baseline scores (hot spot detection)
|
// 0. Filter dominant baseline scores (hot spot detection)
|
||||||
const baselineResult = filterDominantBaselineScores(results);
|
const baselineResult = filterDominantBaselineScores(allResults);
|
||||||
results = baselineResult.filteredResults;
|
allResults = baselineResult.filteredResults;
|
||||||
baselineInfo = baselineResult.baselineInfo;
|
baselineInfo = baselineResult.baselineInfo;
|
||||||
|
|
||||||
// 1. Filter noisy files (coverage, node_modules, etc.)
|
// 1. Filter noisy files (coverage, node_modules, etc.)
|
||||||
results = filterNoisyFiles(results);
|
allResults = filterNoisyFiles(allResults);
|
||||||
// 2. Boost results containing query keywords
|
// 2. Boost results containing query keywords
|
||||||
results = applyKeywordBoosting(results, query);
|
allResults = applyKeywordBoosting(allResults, query);
|
||||||
// 3. Enforce score diversity (penalize identical scores)
|
// 3. Enforce score diversity (penalize identical scores)
|
||||||
results = enforceScoreDiversity(results);
|
allResults = enforceScoreDiversity(allResults);
|
||||||
// 4. Re-sort by adjusted scores
|
// 4. Re-sort by adjusted scores
|
||||||
results.sort((a, b) => b.score - a.score);
|
allResults.sort((a, b) => b.score - a.score);
|
||||||
} catch {
|
} catch {
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
@@ -1121,15 +1182,19 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Split results: first N with full content, rest as file paths only
|
||||||
|
const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
|
||||||
|
|
||||||
// Build metadata with baseline info if detected
|
// Build metadata with baseline info if detected
|
||||||
let note = 'Hybrid mode uses RRF fusion (exact + fuzzy + vector) for best results';
|
let note = 'Hybrid mode uses RRF fusion (exact + fuzzy + vector) for best results';
|
||||||
if (baselineInfo) {
|
if (baselineInfo) {
|
||||||
note += ` | Filtered ${initialCount - results.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`;
|
note += ` | Filtered ${initialCount - allResults.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
results,
|
results,
|
||||||
|
extra_files: extra_files.length > 0 ? extra_files : undefined,
|
||||||
metadata: {
|
metadata: {
|
||||||
mode: 'hybrid',
|
mode: 'hybrid',
|
||||||
backend: 'codexlens',
|
backend: 'codexlens',
|
||||||
@@ -1540,7 +1605,7 @@ export const schema: ToolSchema = {
|
|||||||
mode: {
|
mode: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
enum: SEARCH_MODES,
|
enum: SEARCH_MODES,
|
||||||
description: 'Search mode: auto (default), hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback: hybrid->exact->ripgrep)',
|
description: 'Search mode: auto, hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback chain)',
|
||||||
default: 'auto',
|
default: 'auto',
|
||||||
},
|
},
|
||||||
output_mode: {
|
output_mode: {
|
||||||
@@ -1576,6 +1641,16 @@ export const schema: ToolSchema = {
|
|||||||
description: 'Alias for maxResults (default: 20)',
|
description: 'Alias for maxResults (default: 20)',
|
||||||
default: 20,
|
default: 20,
|
||||||
},
|
},
|
||||||
|
extraFilesCount: {
|
||||||
|
type: 'number',
|
||||||
|
description: 'Number of additional file-only results (paths without content)',
|
||||||
|
default: 10,
|
||||||
|
},
|
||||||
|
maxContentLength: {
|
||||||
|
type: 'number',
|
||||||
|
description: 'Maximum content length for truncation (50-2000)',
|
||||||
|
default: 200,
|
||||||
|
},
|
||||||
offset: {
|
offset: {
|
||||||
type: 'number',
|
type: 'number',
|
||||||
description: 'Pagination offset - skip first N results (default: 0)',
|
description: 'Pagination offset - skip first N results (default: 0)',
|
||||||
|
|||||||
@@ -494,9 +494,13 @@ class ChainSearchEngine:
|
|||||||
else:
|
else:
|
||||||
# Use fuzzy FTS if enable_fuzzy=True (mode="fuzzy"), otherwise exact FTS
|
# Use fuzzy FTS if enable_fuzzy=True (mode="fuzzy"), otherwise exact FTS
|
||||||
if enable_fuzzy:
|
if enable_fuzzy:
|
||||||
fts_results = store.search_fts_fuzzy(query, limit=limit)
|
fts_results = store.search_fts_fuzzy(
|
||||||
|
query, limit=limit, return_full_content=True
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
fts_results = store.search_fts(query, limit=limit)
|
fts_results = store.search_fts_exact(
|
||||||
|
query, limit=limit, return_full_content=True
|
||||||
|
)
|
||||||
|
|
||||||
# Optionally add semantic keyword results
|
# Optionally add semantic keyword results
|
||||||
if include_semantic:
|
if include_semantic:
|
||||||
|
|||||||
@@ -200,7 +200,9 @@ class HybridSearchEngine:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
with DirIndexStore(index_path) as store:
|
with DirIndexStore(index_path) as store:
|
||||||
return store.search_fts_exact(query, limit=limit)
|
return store.search_fts_exact(
|
||||||
|
query, limit=limit, return_full_content=True
|
||||||
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger.debug("Exact search error: %s", exc)
|
self.logger.debug("Exact search error: %s", exc)
|
||||||
return []
|
return []
|
||||||
@@ -220,7 +222,9 @@ class HybridSearchEngine:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
with DirIndexStore(index_path) as store:
|
with DirIndexStore(index_path) as store:
|
||||||
return store.search_fts_fuzzy(query, limit=limit)
|
return store.search_fts_fuzzy(
|
||||||
|
query, limit=limit, return_full_content=True
|
||||||
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger.debug("Fuzzy search error: %s", exc)
|
self.logger.debug("Fuzzy search error: %s", exc)
|
||||||
return []
|
return []
|
||||||
|
|||||||
@@ -127,3 +127,18 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
|
|||||||
|
|
||||||
# LiteLLM handles batching internally, ignore batch_size parameter
|
# LiteLLM handles batching internally, ignore batch_size parameter
|
||||||
return self._embedder.embed(texts)
|
return self._embedder.embed(texts)
|
||||||
|
|
||||||
|
def embed_single(self, text: str) -> list[float]:
|
||||||
|
"""Generate embedding for a single text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to embed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[float]: Embedding vector as a list of floats.
|
||||||
|
"""
|
||||||
|
# Sanitize text before embedding
|
||||||
|
sanitized = self._sanitize_text(text)
|
||||||
|
embedding = self._embedder.embed([sanitized])
|
||||||
|
return embedding[0].tolist()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user