diff --git a/ccw/frontend/src/components/issue/hub/IssueBoardPanel.tsx b/ccw/frontend/src/components/issue/hub/IssueBoardPanel.tsx index fbdc51ea..f932abcf 100644 --- a/ccw/frontend/src/components/issue/hub/IssueBoardPanel.tsx +++ b/ccw/frontend/src/components/issue/hub/IssueBoardPanel.tsx @@ -310,14 +310,14 @@ export function IssueBoardPanel() { preferredShell: 'bash', tool: autoStart.tool, resumeKey: issueId, - }); + }, projectPath); await executeInCliSession(created.session.sessionKey, { tool: autoStart.tool, prompt: buildIssueAutoPrompt({ ...issue, status: destStatus }), mode: autoStart.mode, resumeKey: issueId, resumeStrategy: autoStart.resumeStrategy, - }); + }, projectPath); } catch (e) { setOptimisticError(`Auto-start failed: ${e instanceof Error ? e.message : String(e)}`); } @@ -328,7 +328,7 @@ export function IssueBoardPanel() { } } }, - [issues, idsByStatus, updateIssue] + [autoStart, issues, idsByStatus, projectPath, updateIssue] ); if (error) { diff --git a/ccw/frontend/src/components/issue/hub/IssueTerminalTab.tsx b/ccw/frontend/src/components/issue/hub/IssueTerminalTab.tsx index 2c73149d..953e08ec 100644 --- a/ccw/frontend/src/components/issue/hub/IssueTerminalTab.tsx +++ b/ccw/frontend/src/components/issue/hub/IssueTerminalTab.tsx @@ -5,7 +5,7 @@ import { useEffect, useMemo, useRef, useState } from 'react'; import { useIntl } from 'react-intl'; -import { Plus, RefreshCw, XCircle } from 'lucide-react'; +import { Copy, Plus, RefreshCw, Share2, XCircle } from 'lucide-react'; import { Terminal as XTerm } from 'xterm'; import { FitAddon } from 'xterm-addon-fit'; import { Button } from '@/components/ui/Button'; @@ -16,6 +16,7 @@ import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; import { closeCliSession, createCliSession, + createCliSessionShareToken, executeInCliSession, fetchCliSessionBuffer, fetchCliSessions, @@ -53,6 +54,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { const [resumeStrategy, setResumeStrategy] = useState('nativeResume'); const [prompt, setPrompt] = useState(''); const [isExecuting, setIsExecuting] = useState(false); + const [shareUrl, setShareUrl] = useState(''); const terminalHostRef = useRef(null); const xtermRef = useRef(null); @@ -69,7 +71,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { pendingInputRef.current = ''; if (!pending) return; try { - await sendCliSessionText(sessionKey, { text: pending, appendNewline: false }); + await sendCliSessionText(sessionKey, { text: pending, appendNewline: false }, projectPath || undefined); } catch (e) { // Ignore transient failures (WS output still shows process state) } @@ -86,13 +88,13 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { useEffect(() => { setIsLoadingSessions(true); setError(null); - fetchCliSessions() + fetchCliSessions(projectPath || undefined) .then((r) => { setSessions(r.sessions as unknown as CliSession[]); }) .catch((e) => setError(e instanceof Error ? e.message : String(e))) .finally(() => setIsLoadingSessions(false)); - }, [setSessions]); + }, [projectPath, setSessions]); // Auto-select a session if none selected yet useEffect(() => { @@ -152,7 +154,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { if (!selectedSessionKey) return; clearOutput(selectedSessionKey); - fetchCliSessionBuffer(selectedSessionKey) + fetchCliSessionBuffer(selectedSessionKey, projectPath || undefined) .then(({ buffer }) => { setBuffer(selectedSessionKey, buffer || ''); }) @@ -162,7 +164,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { .finally(() => { fitAddon.fit(); }); - }, [selectedSessionKey, setBuffer, clearOutput]); + }, [selectedSessionKey, projectPath, setBuffer, clearOutput]); // Stream new output chunks into xterm useEffect(() => { @@ -192,7 +194,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { if (selectedSessionKey) { void (async () => { try { - await resizeCliSession(selectedSessionKey, { cols: term.cols, rows: term.rows }); + await resizeCliSession(selectedSessionKey, { cols: term.cols, rows: term.rows }, projectPath || undefined); } catch { // ignore } @@ -203,7 +205,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { const ro = new ResizeObserver(resize); ro.observe(host); return () => ro.disconnect(); - }, [selectedSessionKey]); + }, [selectedSessionKey, projectPath]); const handleCreateSession = async () => { setIsCreating(true); @@ -217,7 +219,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { tool, model: undefined, resumeKey, - }); + }, projectPath || undefined); upsertSession(created.session as unknown as CliSession); setSelectedSessionKey(created.session.sessionKey); } catch (e) { @@ -232,7 +234,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { setIsClosing(true); setError(null); try { - await closeCliSession(selectedSessionKey); + await closeCliSession(selectedSessionKey, projectPath || undefined); setSelectedSessionKey(''); } catch (e) { setError(e instanceof Error ? e.message : String(e)); @@ -254,7 +256,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { resumeKey: resumeKey.trim() || undefined, resumeStrategy, category: 'user', - }); + }, projectPath || undefined); setPrompt(''); } catch (e) { setError(e instanceof Error ? e.message : String(e)); @@ -267,7 +269,7 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { setIsLoadingSessions(true); setError(null); try { - const r = await fetchCliSessions(); + const r = await fetchCliSessions(projectPath || undefined); setSessions(r.sessions as unknown as CliSession[]); } catch (e) { setError(e instanceof Error ? e.message : String(e)); @@ -276,6 +278,31 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { } }; + const handleCreateShareLink = async () => { + if (!selectedSessionKey) return; + setError(null); + setShareUrl(''); + try { + const r = await createCliSessionShareToken(selectedSessionKey, { mode: 'read' }, projectPath || undefined); + const url = new URL(window.location.href); + const base = (import.meta.env.BASE_URL ?? '/').replace(/\/$/, ''); + url.pathname = `${base}/cli-sessions/share`; + url.search = `sessionKey=${encodeURIComponent(selectedSessionKey)}&shareToken=${encodeURIComponent(r.shareToken)}`; + setShareUrl(url.toString()); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } + }; + + const handleCopyShareLink = async () => { + if (!shareUrl) return; + try { + await navigator.clipboard.writeText(shareUrl); + } catch { + // ignore + } + }; + return (
@@ -317,8 +344,23 @@ export function IssueTerminalTab({ issueId }: { issueId: string }) { {formatMessage({ id: 'issues.terminal.session.close' })} + +
+ {shareUrl && ( +
+ + +
+ )} +
{formatMessage({ id: 'issues.terminal.exec.tool' })}
diff --git a/ccw/frontend/src/components/issue/queue/QueueExecuteInSession.tsx b/ccw/frontend/src/components/issue/queue/QueueExecuteInSession.tsx index d21c5d4e..5cae355f 100644 --- a/ccw/frontend/src/components/issue/queue/QueueExecuteInSession.tsx +++ b/ccw/frontend/src/components/issue/queue/QueueExecuteInSession.tsx @@ -103,7 +103,7 @@ export function QueueExecuteInSession({ item, className }: { item: QueueItem; cl setIsLoading(true); setError(null); try { - const r = await fetchCliSessions(); + const r = await fetchCliSessions(projectPath || undefined); setSessions(r.sessions as unknown as CliSession[]); } catch (e) { setError(e instanceof Error ? e.message : String(e)); @@ -115,7 +115,7 @@ export function QueueExecuteInSession({ item, className }: { item: QueueItem; cl useEffect(() => { void refreshSessions(); // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); + }, [projectPath]); useEffect(() => { if (selectedSessionKey) return; @@ -130,7 +130,7 @@ export function QueueExecuteInSession({ item, className }: { item: QueueItem; cl workingDir: projectPath, preferredShell: 'bash', resumeKey: item.issue_id, - }); + }, projectPath); upsertSession(created.session as unknown as CliSession); setSelectedSessionKey(created.session.sessionKey); return created.session.sessionKey; @@ -144,7 +144,7 @@ export function QueueExecuteInSession({ item, className }: { item: QueueItem; cl workingDir: projectPath, preferredShell: 'bash', resumeKey: item.issue_id, - }); + }, projectPath); upsertSession(created.session as unknown as CliSession); setSelectedSessionKey(created.session.sessionKey); await refreshSessions(); @@ -168,7 +168,7 @@ export function QueueExecuteInSession({ item, className }: { item: QueueItem; cl category: 'user', resumeKey: item.issue_id, resumeStrategy, - }); + }, projectPath); setLastExecution({ executionId: result.executionId, command: result.command }); } catch (e) { setError(e instanceof Error ? e.message : String(e)); diff --git a/ccw/frontend/src/lib/api.ts b/ccw/frontend/src/lib/api.ts index d7a05f57..e391abac 100644 --- a/ccw/frontend/src/lib/api.ts +++ b/ccw/frontend/src/lib/api.ts @@ -5707,28 +5707,41 @@ export interface CreateCliSessionInput { resumeKey?: string; } -export async function fetchCliSessions(): Promise<{ sessions: CliSession[] }> { - return fetchApi<{ sessions: CliSession[] }>('/api/cli-sessions'); +function withPath(url: string, projectPath?: string): string { + if (!projectPath) return url; + const sep = url.includes('?') ? '&' : '?'; + return `${url}${sep}path=${encodeURIComponent(projectPath)}`; } -export async function createCliSession(input: CreateCliSessionInput): Promise<{ success: boolean; session: CliSession }> { - return fetchApi<{ success: boolean; session: CliSession }>('/api/cli-sessions', { +export async function fetchCliSessions(projectPath?: string): Promise<{ sessions: CliSession[] }> { + return fetchApi<{ sessions: CliSession[] }>(withPath('/api/cli-sessions', projectPath)); +} + +export async function createCliSession( + input: CreateCliSessionInput, + projectPath?: string +): Promise<{ success: boolean; session: CliSession }> { + return fetchApi<{ success: boolean; session: CliSession }>(withPath('/api/cli-sessions', projectPath), { method: 'POST', body: JSON.stringify(input), }); } -export async function fetchCliSessionBuffer(sessionKey: string): Promise<{ session: CliSession; buffer: string }> { +export async function fetchCliSessionBuffer( + sessionKey: string, + projectPath?: string +): Promise<{ session: CliSession; buffer: string }> { return fetchApi<{ session: CliSession; buffer: string }>( - `/api/cli-sessions/${encodeURIComponent(sessionKey)}/buffer` + withPath(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/buffer`, projectPath) ); } export async function sendCliSessionText( sessionKey: string, - input: { text: string; appendNewline?: boolean } + input: { text: string; appendNewline?: boolean }, + projectPath?: string ): Promise<{ success: boolean }> { - return fetchApi<{ success: boolean }>(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/send`, { + return fetchApi<{ success: boolean }>(withPath(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/send`, projectPath), { method: 'POST', body: JSON.stringify(input), }); @@ -5747,27 +5760,40 @@ export interface ExecuteInCliSessionInput { export async function executeInCliSession( sessionKey: string, - input: ExecuteInCliSessionInput + input: ExecuteInCliSessionInput, + projectPath?: string ): Promise<{ success: boolean; executionId: string; command: string }> { return fetchApi<{ success: boolean; executionId: string; command: string }>( - `/api/cli-sessions/${encodeURIComponent(sessionKey)}/execute`, + withPath(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/execute`, projectPath), { method: 'POST', body: JSON.stringify(input) } ); } export async function resizeCliSession( sessionKey: string, - input: { cols: number; rows: number } + input: { cols: number; rows: number }, + projectPath?: string ): Promise<{ success: boolean }> { - return fetchApi<{ success: boolean }>(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/resize`, { + return fetchApi<{ success: boolean }>(withPath(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/resize`, projectPath), { method: 'POST', body: JSON.stringify(input), }); } -export async function closeCliSession(sessionKey: string): Promise<{ success: boolean }> { - return fetchApi<{ success: boolean }>(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/close`, { +export async function closeCliSession(sessionKey: string, projectPath?: string): Promise<{ success: boolean }> { + return fetchApi<{ success: boolean }>(withPath(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/close`, projectPath), { method: 'POST', body: JSON.stringify({}), }); } + +export async function createCliSessionShareToken( + sessionKey: string, + input: { mode?: 'read' | 'write'; ttlMs?: number }, + projectPath?: string +): Promise<{ success: boolean; shareToken: string; expiresAt: string; mode: 'read' | 'write' }> { + return fetchApi<{ success: boolean; shareToken: string; expiresAt: string; mode: 'read' | 'write' }>( + withPath(`/api/cli-sessions/${encodeURIComponent(sessionKey)}/share`, projectPath), + { method: 'POST', body: JSON.stringify(input) } + ); +} diff --git a/ccw/frontend/src/locales/en/issues.json b/ccw/frontend/src/locales/en/issues.json index 162ab431..6cc81437 100644 --- a/ccw/frontend/src/locales/en/issues.json +++ b/ccw/frontend/src/locales/en/issues.json @@ -119,7 +119,8 @@ "none": "No sessions", "refresh": "Refresh", "new": "New Session", - "close": "Close" + "close": "Close", + "share": "Share (Read-only)" }, "exec": { "tool": "Tool", diff --git a/ccw/frontend/src/locales/zh/issues.json b/ccw/frontend/src/locales/zh/issues.json index cdb21c97..d64dfa48 100644 --- a/ccw/frontend/src/locales/zh/issues.json +++ b/ccw/frontend/src/locales/zh/issues.json @@ -119,7 +119,8 @@ "none": "暂无会话", "refresh": "刷新", "new": "新建会话", - "close": "关闭" + "close": "关闭", + "share": "分享(只读)" }, "exec": { "tool": "工具", diff --git a/ccw/src/core/routes/cli-sessions-routes.ts b/ccw/src/core/routes/cli-sessions-routes.ts index da2e174a..d6b08733 100644 --- a/ccw/src/core/routes/cli-sessions-routes.ts +++ b/ccw/src/core/routes/cli-sessions-routes.ts @@ -14,10 +14,52 @@ import type { RouteContext } from './types.js'; import { getCliSessionManager } from '../services/cli-session-manager.js'; +import path from 'path'; +import { getCliSessionPolicy } from '../services/cli-session-policy.js'; +import { RateLimiter } from '../services/rate-limiter.js'; +import { appendCliSessionAudit } from '../services/cli-session-audit.js'; +import { describeShareAuthFailure, getCliSessionShareManager } from '../services/cli-session-share.js'; + +function clientKey(req: RouteContext['req']): string { + const addr = req.socket?.remoteAddress ?? 'unknown'; + const ua = Array.isArray(req.headers['user-agent']) ? req.headers['user-agent'][0] : req.headers['user-agent']; + return `${addr}|${ua ?? ''}`; +} + +function clientInfo(req: RouteContext['req']): { ip?: string; userAgent?: string } { + const ip = req.socket?.remoteAddress ?? undefined; + const userAgent = Array.isArray(req.headers['user-agent']) ? req.headers['user-agent'][0] : req.headers['user-agent']; + return { ip: ip || undefined, userAgent: userAgent || undefined }; +} + +function resolveProjectRoot(ctx: RouteContext): string { + const forced = (ctx.req as any).__cliSessionShareProjectRoot; + if (typeof forced === 'string' && forced.trim()) return path.resolve(forced); + const raw = ctx.url.searchParams.get('path'); + if (raw && raw.trim()) return path.resolve(raw); + return path.resolve(ctx.initialPath || process.cwd()); +} + +function validateWorkingDir(projectRoot: string, workingDir: string, allowOutside: boolean): string | null { + const resolved = path.resolve(workingDir); + if (allowOutside) return null; + + const rel = path.relative(projectRoot, resolved); + const isInside = rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel)); + return isInside ? null : `workingDir must be within project: ${projectRoot}`; +} + +const policy = getCliSessionPolicy(); +const createLimiter = new RateLimiter({ limit: policy.rateLimit.createPerMinute, windowMs: 60_000 }); +const executeLimiter = new RateLimiter({ limit: policy.rateLimit.executePerMinute, windowMs: 60_000 }); +const resizeLimiter = new RateLimiter({ limit: policy.rateLimit.resizePerMinute, windowMs: 60_000 }); +const sendBytesLimiter = new RateLimiter({ limit: policy.rateLimit.sendBytesPerMinute, windowMs: 60_000 }); +const shareManager = getCliSessionShareManager(); export async function handleCliSessionsRoutes(ctx: RouteContext): Promise { const { pathname, req, res, handlePostRequest, initialPath } = ctx; - const manager = getCliSessionManager(process.cwd()); + const projectRoot = resolveProjectRoot(ctx); + const manager = getCliSessionManager(projectRoot); // GET /api/cli-sessions if (pathname === '/api/cli-sessions' && req.method === 'GET') { @@ -29,6 +71,15 @@ export async function handleCliSessionsRoutes(ctx: RouteContext): Promise { + const rate = createLimiter.consume(clientKey(req), 1); + if (!rate.ok) { + return { error: 'Rate limited', status: 429 }; + } + + if (policy.maxSessions > 0 && manager.listSessions().length >= policy.maxSessions) { + return { error: `Too many sessions (max ${policy.maxSessions})`, status: 429 }; + } + const { workingDir, cols, @@ -39,16 +90,41 @@ export async function handleCliSessionsRoutes(ctx: RouteContext): Promise { + if (event.sessionKey !== sessionKey) return; + res.write(`event: output\ndata: ${JSON.stringify(event)}\n\n`); + }); + + req.on('close', () => { + unsubscribe(); + try { + res.end(); + } catch { + // ignore + } + }); + + return true; + } + // POST /api/cli-sessions/:sessionKey/send const sendMatch = pathname.match(/^\/api\/cli-sessions\/([^/]+)\/send$/); if (sendMatch && req.method === 'POST') { @@ -78,17 +218,69 @@ export async function handleCliSessionsRoutes(ctx: RouteContext): Promise { + const { mode, ttlMs } = (body || {}) as any; + const session = manager.getSession(sessionKey); + if (!session) return { error: 'Session not found', status: 404 }; + + const shareMode = mode === 'write' ? 'write' : 'read'; + const safeTtlMs = typeof ttlMs === 'number' ? Math.min(Math.max(60_000, ttlMs), 7 * 24 * 60 * 60_000) : undefined; + const token = shareManager.createToken({ + sessionKey, + projectRoot, + mode: shareMode, + ttlMs: safeTtlMs, + }); + + appendCliSessionAudit({ + type: 'session_share_created', + timestamp: new Date().toISOString(), + projectRoot, + sessionKey, + ...clientInfo(req), + details: { shareMode, expiresAt: token.expiresAt }, + }); + + return { success: true, shareToken: token.token, expiresAt: token.expiresAt, mode: token.mode }; + }); + return true; + } + // POST /api/cli-sessions/:sessionKey/execute const executeMatch = pathname.match(/^\/api\/cli-sessions\/([^/]+)\/execute$/); if (executeMatch && req.method === 'POST') { const sessionKey = decodeURIComponent(executeMatch[1]); handlePostRequest(req, res, async (body: unknown) => { + const rate = executeLimiter.consume(clientKey(req), 1); + if (!rate.ok) { + return { error: 'Rate limited', status: 429 }; + } + const { tool, prompt, @@ -106,9 +298,18 @@ export async function handleCliSessionsRoutes(ctx: RouteContext): Promise { + const rate = resizeLimiter.consume(clientKey(req), 1); + if (!rate.ok) { + return { error: 'Rate limited', status: 429 }; + } const { cols, rows } = (body || {}) as any; if (typeof cols !== 'number' || typeof rows !== 'number') { return { error: 'cols and rows are required', status: 400 }; } manager.resize(sessionKey, cols, rows); + appendCliSessionAudit({ + type: 'session_resize', + timestamp: new Date().toISOString(), + projectRoot, + sessionKey, + ...clientInfo(req), + details: { cols, rows }, + }); return { success: true }; }); return true; @@ -143,6 +368,13 @@ export async function handleCliSessionsRoutes(ctx: RouteContext): Promise(['/api/auth/token', '/api/csrf-token', '/api/hook', '/api/test/ask-question', '/api/a2ui/answer']); + const cliSessionShareManager = getCliSessionShareManager(); const server = http.createServer(async (req, res) => { const url = new URL(req.url ?? '/', `http://localhost:${serverPort}`); @@ -521,8 +523,24 @@ export async function startServer(options: ServerOptions = {}): Promise; +} + +function auditFilePath(projectRoot: string): string { + return path.join(projectRoot, '.workflow', 'audit', 'cli-sessions.jsonl'); +} + +export function appendCliSessionAudit(event: CliSessionAuditEvent): void { + try { + const filePath = auditFilePath(event.projectRoot); + const dir = path.dirname(filePath); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + appendFileSync(filePath, JSON.stringify(event) + '\n', { encoding: 'utf8' }); + } catch { + // Best-effort: never fail API requests due to audit write errors. + } +} diff --git a/ccw/src/core/services/cli-session-manager.ts b/ccw/src/core/services/cli-session-manager.ts index 699367f3..e5d4e18e 100644 --- a/ccw/src/core/services/cli-session-manager.ts +++ b/ccw/src/core/services/cli-session-manager.ts @@ -12,6 +12,7 @@ import { type CliSessionResumeStrategy } from './cli-session-command-builder.js'; import { getCliSessionPolicy } from './cli-session-policy.js'; +import { appendCliSessionAudit } from './cli-session-audit.js'; export interface CliSession { sessionKey: string; @@ -147,10 +148,29 @@ export class CliSessionManager { private projectRoot: string; private emitter = new EventEmitter(); private maxBufferBytes: number; + private idleTimeoutMs: number; + private reaperTimer: NodeJS.Timeout | null = null; constructor(projectRoot: string) { this.projectRoot = projectRoot; - this.maxBufferBytes = getCliSessionPolicy().maxBufferBytes; + const policy = getCliSessionPolicy(); + this.maxBufferBytes = policy.maxBufferBytes; + this.idleTimeoutMs = policy.idleTimeoutMs; + + if (this.idleTimeoutMs > 0) { + this.reaperTimer = setInterval(() => { + const reaped = this.closeIdleSessions(this.idleTimeoutMs); + for (const sessionKey of reaped) { + appendCliSessionAudit({ + type: 'session_idle_reaped', + timestamp: nowIso(), + projectRoot: this.projectRoot, + sessionKey, + }); + } + }, 60_000); + this.reaperTimer.unref?.(); + } } listSessions(): CliSession[] { @@ -354,14 +374,14 @@ export class CliSessionManager { return () => this.emitter.off('output', handler); } - closeIdleSessions(idleTimeoutMs: number): number { - if (idleTimeoutMs <= 0) return 0; + closeIdleSessions(idleTimeoutMs: number): string[] { + if (idleTimeoutMs <= 0) return []; const now = Date.now(); - let closed = 0; + const closed: string[] = []; for (const s of this.sessions.values()) { if (now - s.lastActivityAt >= idleTimeoutMs) { this.close(s.sessionKey); - closed += 1; + closed.push(s.sessionKey); } } return closed; diff --git a/ccw/src/core/services/cli-session-share.ts b/ccw/src/core/services/cli-session-share.ts new file mode 100644 index 00000000..c7aa5ae6 --- /dev/null +++ b/ccw/src/core/services/cli-session-share.ts @@ -0,0 +1,83 @@ +import { randomBytes } from 'crypto'; + +export type CliSessionShareMode = 'read' | 'write'; + +export interface CliSessionShareTokenRecord { + token: string; + sessionKey: string; + projectRoot: string; + mode: CliSessionShareMode; + expiresAt: string; +} + +interface InternalTokenRecord extends CliSessionShareTokenRecord { + expiresAtMs: number; +} + +function createTokenValue(): string { + // 32 bytes => 43 chars base64url (approx), safe for URLs. + return randomBytes(32).toString('base64url'); +} + +export class CliSessionShareManager { + private tokens = new Map(); + + createToken(input: { + sessionKey: string; + projectRoot: string; + mode: CliSessionShareMode; + ttlMs?: number; + }): CliSessionShareTokenRecord { + const ttlMs = typeof input.ttlMs === 'number' ? Math.max(1_000, input.ttlMs) : 24 * 60 * 60_000; + const expiresAtMs = Date.now() + ttlMs; + const record: InternalTokenRecord = { + token: createTokenValue(), + sessionKey: input.sessionKey, + projectRoot: input.projectRoot, + mode: input.mode, + expiresAt: new Date(expiresAtMs).toISOString(), + expiresAtMs, + }; + this.tokens.set(record.token, record); + return record; + } + + validateToken(token: string, sessionKey: string): CliSessionShareTokenRecord | null { + const record = this.tokens.get(token); + if (!record) return null; + if (record.sessionKey !== sessionKey) return null; + if (Date.now() >= record.expiresAtMs) { + this.tokens.delete(token); + return null; + } + const { expiresAtMs: _expiresAtMs, ...publicRecord } = record; + return publicRecord; + } + + revokeToken(token: string): boolean { + return this.tokens.delete(token); + } + + cleanupExpired(): number { + const now = Date.now(); + let removed = 0; + for (const [token, record] of this.tokens) { + if (now >= record.expiresAtMs) { + this.tokens.delete(token); + removed += 1; + } + } + return removed; + } +} + +let singleton: CliSessionShareManager | null = null; + +export function getCliSessionShareManager(): CliSessionShareManager { + if (!singleton) singleton = new CliSessionShareManager(); + return singleton; +} + +export function describeShareAuthFailure(): { error: string; status: number } { + return { error: 'Invalid or expired share token', status: 403 }; +} diff --git a/ccw/src/core/services/flow-executor.ts b/ccw/src/core/services/flow-executor.ts index db5e3d40..11ab76c6 100644 --- a/ccw/src/core/services/flow-executor.ts +++ b/ccw/src/core/services/flow-executor.ts @@ -255,7 +255,7 @@ export class NodeRunner { }; } - const manager = getCliSessionManager(process.cwd()); + const manager = getCliSessionManager(this.context.workingDir || process.cwd()); const routed = manager.execute(targetSessionKey, { tool, prompt: instruction, diff --git a/ccw/src/mcp-server/index.ts b/ccw/src/mcp-server/index.ts index 965ac8f8..88ce43a7 100644 --- a/ccw/src/mcp-server/index.ts +++ b/ccw/src/mcp-server/index.ts @@ -22,7 +22,7 @@ const ENV_PROJECT_ROOT = 'CCW_PROJECT_ROOT'; const ENV_ALLOWED_DIRS = 'CCW_ALLOWED_DIRS'; // Default enabled tools (core set - file operations, core memory, and smart search) -const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'read_many_files', 'core_memory', 'smart_search']; +const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'read_many_files', 'read_outline', 'core_memory', 'smart_search']; /** * Get list of enabled tools from environment or defaults diff --git a/ccw/src/tools/index.ts b/ccw/src/tools/index.ts index 02d56b0d..8c8495b7 100644 --- a/ccw/src/tools/index.ts +++ b/ccw/src/tools/index.ts @@ -24,6 +24,7 @@ import * as codexLensLspMod from './codex-lens-lsp.js'; import * as vscodeLspMod from './vscode-lsp.js'; import * as readFileMod from './read-file.js'; import * as readManyFilesMod from './read-many-files.js'; +import * as readOutlineMod from './read-outline.js'; import * as coreMemoryMod from './core-memory.js'; import * as contextCacheMod from './context-cache.js'; import * as skillContextLoaderMod from './skill-context-loader.js'; @@ -367,6 +368,7 @@ registerTool(toLegacyTool(codexLensLspMod)); registerTool(toLegacyTool(vscodeLspMod)); registerTool(toLegacyTool(readFileMod)); registerTool(toLegacyTool(readManyFilesMod)); +registerTool(toLegacyTool(readOutlineMod)); registerTool(toLegacyTool(coreMemoryMod)); registerTool(toLegacyTool(contextCacheMod)); registerTool(toLegacyTool(skillContextLoaderMod)); diff --git a/ccw/src/tools/read-outline.ts b/ccw/src/tools/read-outline.ts new file mode 100644 index 00000000..d7e20ae3 --- /dev/null +++ b/ccw/src/tools/read-outline.ts @@ -0,0 +1,104 @@ +/** + * Read Outline Tool - Parse code files into structured symbol outlines. + * + * Uses web-tree-sitter for AST-level parsing. Returns function/class/method + * signatures with line offsets directly usable by read_file(offset, limit). + * + * Supported: TypeScript, TSX, JavaScript, Python, Go, Rust, Java, C#, C, C++ + */ + +import { z } from 'zod'; +import type { ToolSchema, ToolResult } from '../types/tool.js'; +import { existsSync, statSync, readFileSync } from 'fs'; +import { relative } from 'path'; +import { validatePath, getProjectRoot } from '../utils/path-validator.js'; +import { BINARY_EXTENSIONS } from '../utils/file-reader.js'; +import { detectLanguage } from '../utils/outline-queries.js'; +import { parseOutline } from '../utils/outline-parser.js'; +import type { OutlineResult } from '../utils/outline-parser.js'; +import { extname } from 'path'; + +const ParamsSchema = z.object({ + path: z.string().describe('File path to parse for outline'), + language: z.string().optional().describe('Language hint (e.g. "typescript", "python"). Auto-detected from extension if omitted.'), +}); + +type Params = z.infer; + +export const schema: ToolSchema = { + name: 'read_outline', + description: `Parse a code file into a structured outline of symbols (functions, classes, methods, interfaces, types, enums). + +Returns symbol names, signatures, docstrings, and 0-based line offsets that work directly with read_file(offset, limit). + +Usage: + read_outline(path="src/server.ts") + read_outline(path="main.py", language="python") + +Workflow: discover symbols → use line/endLine with read_file to jump to implementations. + +Supported languages: TypeScript, TSX, JavaScript, Python, Go, Rust, Java, C#, C, C++`, + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: 'File path to parse for outline' }, + language: { type: 'string', description: 'Language hint (e.g. "typescript", "python"). Auto-detected from extension if omitted.' }, + }, + required: ['path'], + }, +}; + +export async function handler(params: Record): Promise> { + const parsed = ParamsSchema.safeParse(params); + if (!parsed.success) { + return { success: false, error: `Invalid params: ${parsed.error.message}` }; + } + + const { path: filePath, language: langHint } = parsed.data; + const cwd = getProjectRoot(); + const resolvedPath = await validatePath(filePath); + + if (!existsSync(resolvedPath)) { + return { success: false, error: `File not found: ${filePath}` }; + } + + const stat = statSync(resolvedPath); + if (!stat.isFile()) { + return { success: false, error: `Not a file: ${filePath}` }; + } + + // Check for binary files + const ext = extname(resolvedPath).toLowerCase(); + if (BINARY_EXTENSIONS.has(ext)) { + return { success: false, error: `Binary file not supported: ${filePath}` }; + } + + // Detect language + const config = detectLanguage(resolvedPath, langHint); + if (!config) { + const supported = 'TypeScript, TSX, JavaScript, Python, Go, Rust, Java, C#, C, C++'; + return { + success: false, + error: `Unsupported language for "${ext}" extension. Supported: ${supported}`, + }; + } + + // Read file content + const content = readFileSync(resolvedPath, 'utf-8'); + + // Parse outline + try { + const result = await parseOutline( + relative(cwd, resolvedPath) || filePath, + content, + config + ); + + return { success: true, result }; + } catch (err) { + return { + success: false, + error: `Outline parsing failed: ${(err as Error).message}`, + }; + } +} diff --git a/ccw/src/utils/outline-parser.ts b/ccw/src/utils/outline-parser.ts new file mode 100644 index 00000000..bf6273fb --- /dev/null +++ b/ccw/src/utils/outline-parser.ts @@ -0,0 +1,340 @@ +/** + * Core AST outline parsing engine using web-tree-sitter. + * + * Parses source files into structured symbol outlines (functions, classes, methods, etc.) + * with line offsets compatible with read_file(offset, limit). + */ + +import { createRequire } from 'node:module'; +import { dirname, join } from 'path'; +import Parser from 'web-tree-sitter'; +import type { LanguageConfig } from './outline-queries.js'; + +export interface OutlineSymbol { + kind: 'function' | 'class' | 'method' | 'interface' | 'type' | 'enum' | 'property'; + name: string; + line: number; // 0-based, compatible with read_file offset + endLine: number; // 0-based + doc: string | null; + signature: string; // truncated to 200 chars + parent: string | null; + children: number; // nested method/property count (class/interface) +} + +export interface OutlineResult { + file: string; + language: string; + symbols: OutlineSymbol[]; + totalSymbols: number; +} + +// Singleton init guard +let initialized = false; + +// Language WASM cache (Language loading is heavy IO, cache aggressively) +const languageCache = new Map(); + +// Resolve WASM paths via createRequire (works in ESM) +const _require = createRequire(import.meta.url); + +function getWasmDir(): string { + return join(dirname(_require.resolve('tree-sitter-wasms/package.json')), 'out'); +} + +async function ensureInit(): Promise { + if (initialized) return; + await Parser.init(); + initialized = true; +} + +async function loadLanguage(grammarName: string): Promise { + const cached = languageCache.get(grammarName); + if (cached) return cached; + + const wasmPath = join(getWasmDir(), `tree-sitter-${grammarName}.wasm`); + const lang = await Parser.Language.load(wasmPath); + languageCache.set(grammarName, lang); + return lang; +} + +/** + * Parse a source file into an outline of symbols. + */ +export async function parseOutline( + filePath: string, + content: string, + config: LanguageConfig +): Promise { + await ensureInit(); + + const language = await loadLanguage(config.grammarName); + const parser = new Parser(); + parser.setLanguage(language); + + const tree = parser.parse(content); + if (!tree) { + parser.delete(); + return { file: filePath, language: config.grammarName, symbols: [], totalSymbols: 0 }; + } + + let query: Parser.Query; + try { + query = language.query(config.symbolQuery); + } catch (err) { + tree.delete(); + parser.delete(); + throw new Error(`Query compilation failed for ${config.grammarName}: ${(err as Error).message}`); + } + + const matches = query.matches(tree.rootNode); + const contentLines = content.split('\n'); + const symbols: OutlineSymbol[] = []; + + for (const match of matches) { + const symbol = processMatch(match, contentLines, config.grammarName); + if (symbol) symbols.push(symbol); + } + + // Sort by line position + symbols.sort((a, b) => a.line - b.line); + + // Clean up native resources + query.delete(); + tree.delete(); + parser.delete(); + + return { + file: filePath, + language: config.grammarName, + symbols, + totalSymbols: symbols.length, + }; +} + +/** + * Process a single query match into an OutlineSymbol. + */ +function processMatch( + match: Parser.QueryMatch, + contentLines: string[], + language: string +): OutlineSymbol | null { + let nameNode: Parser.SyntaxNode | null = null; + let defNode: Parser.SyntaxNode | null = null; + let kind = 'function'; + + for (const capture of match.captures) { + if (capture.name === 'name') { + nameNode = capture.node; + } else if (capture.name.startsWith('definition.')) { + defNode = capture.node; + kind = capture.name.slice('definition.'.length); + } + } + + if (!defNode || !nameNode) return null; + + const name = nameNode.text; + const line = defNode.startPosition.row; + const endLine = defNode.endPosition.row; + const signature = extractSignature(defNode.text, language); + const doc = extractDoc(defNode, contentLines, language); + const parent = findParent(defNode); + const children = countChildren(defNode, kind); + + return { + kind: kind as OutlineSymbol['kind'], + name, + line, + endLine, + doc, + signature, + parent, + children, + }; +} + +/** + * Extract a concise signature from the node text. + * Takes the first line, removes the body start, truncates to 200 chars. + */ +function extractSignature(nodeText: string, language: string): string { + const firstLine = nodeText.split('\n')[0].trimEnd(); + let sig = firstLine; + + if (language === 'python') { + // Remove trailing colon (body start) + if (sig.endsWith(':')) { + sig = sig.slice(0, -1).trimEnd(); + } + } else { + // Remove opening brace and everything after + const braceIdx = sig.indexOf('{'); + if (braceIdx > 0) { + sig = sig.substring(0, braceIdx).trimEnd(); + } + } + + if (sig.length > 200) { + sig = sig.substring(0, 200) + '...'; + } + + return sig; +} + +/** + * Extract documentation comment for a definition node. + */ +function extractDoc( + defNode: Parser.SyntaxNode, + contentLines: string[], + language: string +): string | null { + if (language === 'python') { + return extractPythonDocstring(defNode); + } + return extractCommentDoc(defNode, contentLines); +} + +/** + * Extract comment doc by looking at lines before the definition. + */ +function extractCommentDoc( + defNode: Parser.SyntaxNode, + contentLines: string[] +): string | null { + const defLine = defNode.startPosition.row; + let endIdx = defLine - 1; + if (endIdx < 0) return null; + + // Skip at most one blank line + if (contentLines[endIdx].trim() === '') { + endIdx--; + if (endIdx < 0) return null; + } + + const endText = contentLines[endIdx].trim(); + + // Block comment ending with */ + if (endText.endsWith('*/')) { + let startIdx = endIdx; + while (startIdx > 0 && !contentLines[startIdx].trim().startsWith('/*')) { + startIdx--; + } + return cleanBlockComment(contentLines.slice(startIdx, endIdx + 1).join('\n')); + } + + // Line comments (// or /// or #) + if (endText.startsWith('//') || endText.startsWith('#')) { + let startIdx = endIdx; + while (startIdx > 0) { + const prevText = contentLines[startIdx - 1].trim(); + if (prevText.startsWith('//') || prevText.startsWith('#')) { + startIdx--; + } else { + break; + } + } + return cleanLineComments(contentLines.slice(startIdx, endIdx + 1).join('\n')); + } + + return null; +} + +/** + * Extract Python docstring from function/class body. + */ +function extractPythonDocstring(defNode: Parser.SyntaxNode): string | null { + const body = defNode.childForFieldName('body'); + if (!body) return null; + + const firstChild = body.namedChildren[0]; + if (!firstChild || firstChild.type !== 'expression_statement') return null; + + const expr = firstChild.namedChildren[0]; + if (!expr || (expr.type !== 'string' && expr.type !== 'concatenated_string')) return null; + + let text = expr.text; + // Remove triple-quote markers + for (const quote of ['"""', "'''"]) { + if (text.startsWith(quote) && text.endsWith(quote)) { + text = text.slice(3, -3); + break; + } + } + text = text.trim(); + return text || null; +} + +/** + * Clean block comment text. + */ +function cleanBlockComment(text: string): string | null { + let lines = text.split('\n'); + // Remove /* and */ + lines[0] = lines[0].replace(/^\s*\/\*\*?\s?/, ''); + lines[lines.length - 1] = lines[lines.length - 1].replace(/\s*\*\/\s*$/, ''); + // Remove leading * from middle lines + lines = lines.map(l => l.replace(/^\s*\*\s?/, '')); + const result = lines.join('\n').trim(); + return result || null; +} + +/** + * Clean line comment (// or #) text. + */ +function cleanLineComments(text: string): string | null { + const lines = text.split('\n').map(l => l.replace(/^\s*(?:\/\/\/?\s?|#\s?)/, '')); + const result = lines.join('\n').trim(); + return result || null; +} + +/** + * Find the parent class/interface/impl name for a definition node. + */ +function findParent(defNode: Parser.SyntaxNode): string | null { + let current = defNode.parent; + while (current) { + const type = current.type; + + // Common parent types across languages + if ( + type === 'class_declaration' || type === 'interface_declaration' || + type === 'class_definition' || type === 'enum_declaration' || + type === 'impl_item' || type === 'class_specifier' || type === 'struct_specifier' + ) { + // Try 'name' field first, then 'type' field (for Rust impl_item) + const nameNode = current.childForFieldName('name') || current.childForFieldName('type'); + if (nameNode) return nameNode.text; + } + + current = current.parent; + } + + return null; +} + +/** + * Count direct children (methods/properties) for class/interface nodes. + */ +function countChildren(defNode: Parser.SyntaxNode, kind: string): number { + if (kind !== 'class' && kind !== 'interface') return 0; + + // Find the body node (class_body, interface_body, block, declaration_list, etc.) + let body = defNode.childForFieldName('body'); + if (!body) { + for (const child of defNode.namedChildren) { + if ( + child.type === 'class_body' || child.type === 'interface_body' || + child.type === 'declaration_list' || child.type === 'block' || + child.type === 'enum_body' || child.type === 'field_declaration_list' + ) { + body = child; + break; + } + } + } + + if (!body) return 0; + return body.namedChildCount; +} diff --git a/ccw/src/utils/outline-queries.ts b/ccw/src/utils/outline-queries.ts new file mode 100644 index 00000000..7c84106a --- /dev/null +++ b/ccw/src/utils/outline-queries.ts @@ -0,0 +1,150 @@ +/** + * Language configurations and tree-sitter query definitions for outline parsing. + */ + +import { extname } from 'path'; + +export interface LanguageConfig { + grammarName: string; + extensions: string[]; + symbolQuery: string; +} + +export const LANGUAGE_CONFIGS: Record = { + typescript: { + grammarName: 'typescript', + extensions: ['.ts'], + symbolQuery: [ + '(function_declaration name: (identifier) @name) @definition.function', + '(class_declaration name: (type_identifier) @name) @definition.class', + '(method_definition name: (property_identifier) @name) @definition.method', + '(abstract_method_signature name: (property_identifier) @name) @definition.method', + '(interface_declaration name: (type_identifier) @name) @definition.interface', + '(type_alias_declaration name: (type_identifier) @name) @definition.type', + '(enum_declaration name: (identifier) @name) @definition.enum', + '(variable_declarator name: (identifier) @name value: (arrow_function)) @definition.function', + '(variable_declarator name: (identifier) @name value: (function_expression)) @definition.function', + ].join('\n'), + }, + tsx: { + grammarName: 'tsx', + extensions: ['.tsx'], + symbolQuery: [ + '(function_declaration name: (identifier) @name) @definition.function', + '(class_declaration name: (type_identifier) @name) @definition.class', + '(method_definition name: (property_identifier) @name) @definition.method', + '(interface_declaration name: (type_identifier) @name) @definition.interface', + '(type_alias_declaration name: (type_identifier) @name) @definition.type', + '(enum_declaration name: (identifier) @name) @definition.enum', + '(variable_declarator name: (identifier) @name value: (arrow_function)) @definition.function', + '(variable_declarator name: (identifier) @name value: (function_expression)) @definition.function', + ].join('\n'), + }, + javascript: { + grammarName: 'javascript', + extensions: ['.js', '.jsx', '.mjs', '.cjs'], + symbolQuery: [ + '(function_declaration name: (identifier) @name) @definition.function', + '(class_declaration name: (identifier) @name) @definition.class', + '(method_definition name: (property_identifier) @name) @definition.method', + '(variable_declarator name: (identifier) @name value: (arrow_function)) @definition.function', + '(variable_declarator name: (identifier) @name value: (function_expression)) @definition.function', + ].join('\n'), + }, + python: { + grammarName: 'python', + extensions: ['.py'], + symbolQuery: [ + '(function_definition name: (identifier) @name) @definition.function', + '(class_definition name: (identifier) @name) @definition.class', + ].join('\n'), + }, + go: { + grammarName: 'go', + extensions: ['.go'], + symbolQuery: [ + '(function_declaration name: (identifier) @name) @definition.function', + '(method_declaration name: (field_identifier) @name) @definition.method', + '(type_spec name: (type_identifier) @name) @definition.type', + ].join('\n'), + }, + rust: { + grammarName: 'rust', + extensions: ['.rs'], + symbolQuery: [ + '(function_item name: (identifier) @name) @definition.function', + '(struct_item name: (type_identifier) @name) @definition.class', + '(enum_item name: (type_identifier) @name) @definition.enum', + '(trait_item name: (type_identifier) @name) @definition.interface', + '(impl_item type: (type_identifier) @name) @definition.class', + ].join('\n'), + }, + java: { + grammarName: 'java', + extensions: ['.java'], + symbolQuery: [ + '(class_declaration name: (identifier) @name) @definition.class', + '(method_declaration name: (identifier) @name) @definition.method', + '(interface_declaration name: (identifier) @name) @definition.interface', + '(enum_declaration name: (identifier) @name) @definition.enum', + '(constructor_declaration name: (identifier) @name) @definition.method', + ].join('\n'), + }, + csharp: { + grammarName: 'c_sharp', + extensions: ['.cs'], + symbolQuery: [ + '(class_declaration name: (identifier) @name) @definition.class', + '(method_declaration name: (identifier) @name) @definition.method', + '(interface_declaration name: (identifier) @name) @definition.interface', + '(enum_declaration name: (identifier) @name) @definition.enum', + '(constructor_declaration name: (identifier) @name) @definition.method', + ].join('\n'), + }, + c: { + grammarName: 'c', + extensions: ['.c', '.h'], + symbolQuery: [ + '(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function', + '(struct_specifier name: (type_identifier) @name) @definition.class', + '(enum_specifier name: (type_identifier) @name) @definition.enum', + ].join('\n'), + }, + cpp: { + grammarName: 'cpp', + extensions: ['.cpp', '.hpp', '.cc', '.cxx'], + symbolQuery: [ + '(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function', + '(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))) @definition.function', + '(class_specifier name: (type_identifier) @name) @definition.class', + '(struct_specifier name: (type_identifier) @name) @definition.class', + '(enum_specifier name: (type_identifier) @name) @definition.enum', + ].join('\n'), + }, +}; + +// Build extension → language name lookup map +const EXTENSION_MAP = new Map(); +for (const [lang, config] of Object.entries(LANGUAGE_CONFIGS)) { + for (const ext of config.extensions) { + EXTENSION_MAP.set(ext, lang); + } +} + +/** + * Detect language config from file path extension or explicit hint. + * Returns null if language is not supported. + */ +export function detectLanguage(filePath: string, hint?: string): LanguageConfig | null { + if (hint) { + const normalized = hint.toLowerCase(); + const config = LANGUAGE_CONFIGS[normalized]; + if (config) return config; + } + + const ext = extname(filePath).toLowerCase(); + const lang = EXTENSION_MAP.get(ext); + if (lang) return LANGUAGE_CONFIGS[lang]; + + return null; +} diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast5.json b/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast5.json new file mode 100644 index 00000000..2d30d43e --- /dev/null +++ b/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast5.json @@ -0,0 +1,466 @@ +{ + "summary": { + "timestamp": "2026-02-09 20:48:55", + "source": "src", + "k": 10, + "coarse_k": 100, + "query_count": 7, + "avg_jaccard_topk": 0.11418494830148965, + "avg_rbo_topk": 0.08910725003591835, + "staged": { + "success": 7, + "avg_latency_ms": 16443.109000005894 + }, + "dense_rerank": { + "success": 7, + "avg_latency_ms": 2919.481471432107 + } + }, + "comparisons": [ + { + "query": "class Config", + "staged": { + "strategy": "staged", + "query": "class Config", + "latency_ms": 6056.956700026989, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 113.12270164489746, + "stage1_fallback_search_ms": 262.55249977111816, + "stage2_expand_ms": 3022.8426456451416, + "stage3_cluster_ms": 1.155853271484375, + "stage4_rerank_ms": 2554.953098297119 + }, + "stage_counts": { + "stage1_candidates": 37, + "stage1_fallback_used": 1, + "stage2_expanded": 86, + "stage2_unique_paths": 53, + "stage2_duplicate_paths": 33, + "stage3_clustered": 20, + "stage3_strategy": "dir_rr", + "stage4_reranked": 20 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "class Config", + "latency_ms": 2788.0383999943733, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.05263157894736842, + "rbo_topk": 0.014635885139999999, + "staged_unique_files_topk": 10, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 8, + "dense_unique_dirs_topk": 4 + }, + { + "query": "def search", + "staged": { + "strategy": "staged", + "query": "def search", + "latency_ms": 12229.477500021458, + "num_results": 3, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 108.82282257080078, + "stage2_expand_ms": 9422.304153442383, + "stage3_cluster_ms": 0.001430511474609375, + "stage4_rerank_ms": 2611.234664916992 + }, + "stage_counts": { + "stage1_candidates": 3, + "stage2_expanded": 4, + "stage2_unique_paths": 3, + "stage2_duplicate_paths": 1, + "stage3_clustered": 4, + "stage3_strategy": "dir_rr", + "stage4_reranked": 4 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "def search", + "latency_ms": 2823.377499997616, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.09090909090909091, + "rbo_topk": 0.23541639942571424, + "staged_unique_files_topk": 2, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 2, + "dense_unique_dirs_topk": 4 + }, + { + "query": "LspBridge", + "staged": { + "strategy": "staged", + "query": "LspBridge", + "latency_ms": 33805.434699982405, + "num_results": 5, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 100.5556583404541, + "stage1_fallback_search_ms": 176.71489715576172, + "stage2_expand_ms": 31017.661809921265, + "stage3_cluster_ms": 0.001430511474609375, + "stage4_rerank_ms": 2403.3148288726807 + }, + "stage_counts": { + "stage1_candidates": 5, + "stage1_fallback_used": 1, + "stage2_expanded": 5, + "stage2_unique_paths": 5, + "stage2_duplicate_paths": 0, + "stage3_clustered": 5, + "stage3_strategy": "dir_rr", + "stage4_reranked": 5 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "LspBridge", + "latency_ms": 2906.127400010824, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.07142857142857142, + "rbo_topk": 0.045191399425714276, + "staged_unique_files_topk": 5, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 2, + "dense_unique_dirs_topk": 4 + }, + { + "query": "graph expansion", + "staged": { + "strategy": "staged", + "query": "graph expansion", + "latency_ms": 16790.213800013065, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\hybrid_search\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 110.00967025756836, + "stage1_fallback_search_ms": 176.9556999206543, + "stage2_expand_ms": 13929.782629013062, + "stage3_cluster_ms": 0.45800209045410156, + "stage4_rerank_ms": 2486.6883754730225 + }, + "stage_counts": { + "stage1_candidates": 11, + "stage1_fallback_used": 1, + "stage2_expanded": 29, + "stage2_unique_paths": 14, + "stage2_duplicate_paths": 15, + "stage3_clustered": 20, + "stage3_strategy": "dir_rr", + "stage4_reranked": 20 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "graph expansion", + "latency_ms": 2866.819000005722, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.1875, + "rbo_topk": 0.06893318399142857, + "staged_unique_files_topk": 9, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 8, + "dense_unique_dirs_topk": 4 + }, + { + "query": "clustering strategy", + "staged": { + "strategy": "staged", + "query": "clustering strategy", + "latency_ms": 9090.759900003672, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 85.28780937194824, + "stage1_fallback_search_ms": 183.7012767791748, + "stage2_expand_ms": 5557.527780532837, + "stage3_cluster_ms": 0.001430511474609375, + "stage4_rerank_ms": 3164.6268367767334 + }, + "stage_counts": { + "stage1_candidates": 10, + "stage1_fallback_used": 1, + "stage2_expanded": 10, + "stage2_unique_paths": 10, + "stage2_duplicate_paths": 0, + "stage3_clustered": 10, + "stage3_strategy": "dir_rr", + "stage4_reranked": 10 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "clustering strategy", + "latency_ms": 3062.4616000056267, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.1111111111111111, + "rbo_topk": 0.04670528456571428, + "staged_unique_files_topk": 10, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 3, + "dense_unique_dirs_topk": 4 + }, + { + "query": "error handling", + "staged": { + "strategy": "staged", + "query": "error handling", + "latency_ms": 19777.87659996748, + "num_results": 6, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 65.9482479095459, + "stage1_fallback_search_ms": 181.9770336151123, + "stage2_expand_ms": 16960.813760757446, + "stage3_cluster_ms": 0.00095367431640625, + "stage4_rerank_ms": 2472.1477031707764 + }, + "stage_counts": { + "stage1_candidates": 5, + "stage1_fallback_used": 1, + "stage2_expanded": 13, + "stage2_unique_paths": 6, + "stage2_duplicate_paths": 7, + "stage3_clustered": 13, + "stage3_strategy": "dir_rr", + "stage4_reranked": 13 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "error handling", + "latency_ms": 2854.169200003147, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.07142857142857142, + "rbo_topk": 0.045191399425714276, + "staged_unique_files_topk": 5, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 4, + "dense_unique_dirs_topk": 4 + }, + { + "query": "how to parse json", + "staged": { + "strategy": "staged", + "query": "how to parse json", + "latency_ms": 17351.04380002618, + "num_results": 7, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 119.1408634185791, + "stage1_fallback_search_ms": 246.2625503540039, + "stage2_expand_ms": 14137.234449386597, + "stage3_cluster_ms": 0.0011920928955078125, + "stage4_rerank_ms": 2750.417470932007 + }, + "stage_counts": { + "stage1_candidates": 4, + "stage1_fallback_used": 1, + "stage2_expanded": 11, + "stage2_unique_paths": 7, + "stage2_duplicate_paths": 4, + "stage3_clustered": 11, + "stage3_strategy": "dir_rr", + "stage4_reranked": 11 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "how to parse json", + "latency_ms": 3135.3772000074387, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.21428571428571427, + "rbo_topk": 0.16767719827714284, + "staged_unique_files_topk": 7, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 5, + "dense_unique_dirs_topk": 4 + } + ] +} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast6.json b/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast6.json new file mode 100644 index 00000000..bdc35197 --- /dev/null +++ b/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast6.json @@ -0,0 +1,467 @@ +{ + "summary": { + "timestamp": "2026-02-09 20:56:02", + "source": "src", + "k": 10, + "coarse_k": 100, + "query_count": 7, + "avg_jaccard_topk": 0.11350467619264612, + "avg_rbo_topk": 0.09062624799510204, + "staged": { + "success": 7, + "avg_latency_ms": 8679.35167142323 + }, + "dense_rerank": { + "success": 7, + "avg_latency_ms": 3097.294714289052 + } + }, + "comparisons": [ + { + "query": "class Config", + "staged": { + "strategy": "staged", + "query": "class Config", + "latency_ms": 6814.465099990368, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 85.55030822753906, + "stage1_fallback_search_ms": 197.95989990234375, + "stage2_expand_ms": 3032.4549674987793, + "stage3_cluster_ms": 1.1937618255615234, + "stage4_rerank_ms": 3402.9476642608643 + }, + "stage_counts": { + "stage1_candidates": 37, + "stage1_fallback_used": 1, + "stage2_expanded": 86, + "stage2_unique_paths": 53, + "stage2_duplicate_paths": 33, + "stage3_clustered": 20, + "stage3_strategy": "dir_rr", + "stage4_reranked": 20 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "class Config", + "latency_ms": 3175.0339000225067, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.05263157894736842, + "rbo_topk": 0.014635885139999999, + "staged_unique_files_topk": 10, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 8, + "dense_unique_dirs_topk": 4 + }, + { + "query": "def search", + "staged": { + "strategy": "staged", + "query": "def search", + "latency_ms": 8990.238099992275, + "num_results": 3, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 90.6367301940918, + "stage2_expand_ms": 6272.260665893555, + "stage3_cluster_ms": 0.00095367431640625, + "stage4_rerank_ms": 2531.4290523529053 + }, + "stage_counts": { + "stage1_candidates": 3, + "stage2_expanded": 4, + "stage2_unique_paths": 3, + "stage2_duplicate_paths": 1, + "stage3_clustered": 4, + "stage3_strategy": "dir_rr", + "stage4_reranked": 4 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "def search", + "latency_ms": 3434.4095999896526, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.09090909090909091, + "rbo_topk": 0.23541639942571424, + "staged_unique_files_topk": 2, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 2, + "dense_unique_dirs_topk": 4 + }, + { + "query": "LspBridge", + "staged": { + "strategy": "staged", + "query": "LspBridge", + "latency_ms": 9296.205000013113, + "num_results": 7, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 86.64774894714355, + "stage1_fallback_search_ms": 163.8650894165039, + "stage2_expand_ms": 6144.1497802734375, + "stage3_cluster_ms": 0.4100799560546875, + "stage4_rerank_ms": 2807.274580001831 + }, + "stage_counts": { + "stage1_candidates": 5, + "stage1_fallback_used": 1, + "stage2_expanded": 31, + "stage2_unique_paths": 11, + "stage2_duplicate_paths": 20, + "stage3_clustered": 20, + "stage3_strategy": "dir_rr", + "stage4_reranked": 20 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "LspBridge", + "latency_ms": 3043.4417999982834, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.06666666666666667, + "rbo_topk": 0.045191399425714276, + "staged_unique_files_topk": 6, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 2, + "dense_unique_dirs_topk": 4 + }, + { + "query": "graph expansion", + "staged": { + "strategy": "staged", + "query": "graph expansion", + "latency_ms": 9086.15110000968, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 72.22437858581543, + "stage1_fallback_search_ms": 166.3804054260254, + "stage2_expand_ms": 6179.303169250488, + "stage3_cluster_ms": 0.00095367431640625, + "stage4_rerank_ms": 2575.9027004241943 + }, + "stage_counts": { + "stage1_candidates": 11, + "stage1_fallback_used": 1, + "stage2_expanded": 16, + "stage2_unique_paths": 13, + "stage2_duplicate_paths": 3, + "stage3_clustered": 16, + "stage3_strategy": "dir_rr", + "stage4_reranked": 16 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "graph expansion", + "latency_ms": 2793.8257000148296, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.1875, + "rbo_topk": 0.06134116970571428, + "staged_unique_files_topk": 9, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 7, + "dense_unique_dirs_topk": 4 + }, + { + "query": "clustering strategy", + "staged": { + "strategy": "staged", + "query": "clustering strategy", + "latency_ms": 8401.927499979734, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 72.67880439758301, + "stage1_fallback_search_ms": 166.71442985534668, + "stage2_expand_ms": 5561.89489364624, + "stage3_cluster_ms": 0.0007152557373046875, + "stage4_rerank_ms": 2517.7178382873535 + }, + "stage_counts": { + "stage1_candidates": 10, + "stage1_fallback_used": 1, + "stage2_expanded": 10, + "stage2_unique_paths": 10, + "stage2_duplicate_paths": 0, + "stage3_clustered": 10, + "stage3_strategy": "dir_rr", + "stage4_reranked": 10 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "clustering strategy", + "latency_ms": 3192.0045999884605, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.1111111111111111, + "rbo_topk": 0.04670528456571428, + "staged_unique_files_topk": 10, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 3, + "dense_unique_dirs_topk": 4 + }, + { + "query": "error handling", + "staged": { + "strategy": "staged", + "query": "error handling", + "latency_ms": 9032.269400000572, + "num_results": 6, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 78.59635353088379, + "stage1_fallback_search_ms": 180.96280097961426, + "stage2_expand_ms": 6175.840377807617, + "stage3_cluster_ms": 0.001430511474609375, + "stage4_rerank_ms": 2503.4260749816895 + }, + "stage_counts": { + "stage1_candidates": 5, + "stage1_fallback_used": 1, + "stage2_expanded": 13, + "stage2_unique_paths": 6, + "stage2_duplicate_paths": 7, + "stage3_clustered": 13, + "stage3_strategy": "dir_rr", + "stage4_reranked": 13 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "error handling", + "latency_ms": 3076.744800001383, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.07142857142857142, + "rbo_topk": 0.045191399425714276, + "staged_unique_files_topk": 5, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 4, + "dense_unique_dirs_topk": 4 + }, + { + "query": "how to parse json", + "staged": { + "strategy": "staged", + "query": "how to parse json", + "latency_ms": 9134.205499976873, + "num_results": 7, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 117.79379844665527, + "stage1_fallback_search_ms": 187.53886222839355, + "stage2_expand_ms": 6218.849658966064, + "stage3_cluster_ms": 0.00095367431640625, + "stage4_rerank_ms": 2515.6633853912354 + }, + "stage_counts": { + "stage1_candidates": 4, + "stage1_fallback_used": 1, + "stage2_expanded": 9, + "stage2_unique_paths": 7, + "stage2_duplicate_paths": 2, + "stage3_clustered": 9, + "stage3_strategy": "dir_rr", + "stage4_reranked": 9 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "how to parse json", + "latency_ms": 2965.6026000082493, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.21428571428571427, + "rbo_topk": 0.18590219827714285, + "staged_unique_files_topk": 7, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 5, + "dense_unique_dirs_topk": 4 + } + ] +} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast6.json b/codex-lens/benchmarks/results/compare_2026-02-09_score_fast6.json new file mode 100644 index 00000000..d76156dc --- /dev/null +++ b/codex-lens/benchmarks/results/compare_2026-02-09_score_fast6.json @@ -0,0 +1,465 @@ +{ + "summary": { + "timestamp": "2026-02-09 20:53:01", + "source": "src", + "k": 10, + "coarse_k": 100, + "query_count": 7, + "avg_jaccard_topk": 0.12384302205730777, + "avg_rbo_topk": 0.09816673566816325, + "staged": { + "success": 7, + "avg_latency_ms": 8696.564499999795 + }, + "dense_rerank": { + "success": 7, + "avg_latency_ms": 2936.2583857136115 + } + }, + "comparisons": [ + { + "query": "class Config", + "staged": { + "strategy": "staged", + "query": "class Config", + "latency_ms": 6108.304299980402, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 90.47985076904297, + "stage1_fallback_search_ms": 224.38788414001465, + "stage2_expand_ms": 3031.7258834838867, + "stage3_cluster_ms": 0.02956390380859375, + "stage4_rerank_ms": 2655.31849861145 + }, + "stage_counts": { + "stage1_candidates": 37, + "stage1_fallback_used": 1, + "stage2_expanded": 86, + "stage2_unique_paths": 53, + "stage2_duplicate_paths": 33, + "stage3_clustered": 20, + "stage3_strategy": "score", + "stage4_reranked": 20 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "class Config", + "latency_ms": 2873.6466999948025, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.125, + "rbo_topk": 0.06741929885142856, + "staged_unique_files_topk": 8, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 5, + "dense_unique_dirs_topk": 4 + }, + { + "query": "def search", + "staged": { + "strategy": "staged", + "query": "def search", + "latency_ms": 9321.754200011492, + "num_results": 3, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 140.43283462524414, + "stage2_expand_ms": 6410.467863082886, + "stage3_cluster_ms": 0.00095367431640625, + "stage4_rerank_ms": 2675.7972240448 + }, + "stage_counts": { + "stage1_candidates": 3, + "stage2_expanded": 4, + "stage2_unique_paths": 3, + "stage2_duplicate_paths": 1, + "stage3_clustered": 4, + "stage3_strategy": "score", + "stage4_reranked": 4 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "def search", + "latency_ms": 3104.7773999869823, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.09090909090909091, + "rbo_topk": 0.23541639942571424, + "staged_unique_files_topk": 2, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 2, + "dense_unique_dirs_topk": 4 + }, + { + "query": "LspBridge", + "staged": { + "strategy": "staged", + "query": "LspBridge", + "latency_ms": 9527.073799997568, + "num_results": 6, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 98.59919548034668, + "stage1_fallback_search_ms": 172.26457595825195, + "stage2_expand_ms": 6125.282049179077, + "stage3_cluster_ms": 0.017404556274414062, + "stage4_rerank_ms": 3023.9248275756836 + }, + "stage_counts": { + "stage1_candidates": 5, + "stage1_fallback_used": 1, + "stage2_expanded": 31, + "stage2_unique_paths": 11, + "stage2_duplicate_paths": 20, + "stage3_clustered": 20, + "stage3_strategy": "score", + "stage4_reranked": 20 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "LspBridge", + "latency_ms": 2901.0302999913692, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.06666666666666667, + "rbo_topk": 0.045191399425714276, + "staged_unique_files_topk": 6, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 2, + "dense_unique_dirs_topk": 4 + }, + { + "query": "graph expansion", + "staged": { + "strategy": "staged", + "query": "graph expansion", + "latency_ms": 9120.886200010777, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 91.48454666137695, + "stage1_fallback_search_ms": 172.12390899658203, + "stage2_expand_ms": 6166.24903678894, + "stage3_cluster_ms": 0.00095367431640625, + "stage4_rerank_ms": 2601.947546005249 + }, + "stage_counts": { + "stage1_candidates": 11, + "stage1_fallback_used": 1, + "stage2_expanded": 16, + "stage2_unique_paths": 13, + "stage2_duplicate_paths": 3, + "stage3_clustered": 16, + "stage3_strategy": "score", + "stage4_reranked": 16 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "graph expansion", + "latency_ms": 2847.6964999735355, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.1875, + "rbo_topk": 0.06134116970571428, + "staged_unique_files_topk": 9, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 7, + "dense_unique_dirs_topk": 4 + }, + { + "query": "clustering strategy", + "staged": { + "strategy": "staged", + "query": "clustering strategy", + "latency_ms": 8424.535699993372, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 92.8945541381836, + "stage1_fallback_search_ms": 192.06547737121582, + "stage2_expand_ms": 5568.126440048218, + "stage3_cluster_ms": 0.0011920928955078125, + "stage4_rerank_ms": 2480.673313140869 + }, + "stage_counts": { + "stage1_candidates": 10, + "stage1_fallback_used": 1, + "stage2_expanded": 10, + "stage2_unique_paths": 10, + "stage2_duplicate_paths": 0, + "stage3_clustered": 10, + "stage3_strategy": "score", + "stage4_reranked": 10 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "clustering strategy", + "latency_ms": 2974.9999000132084, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.1111111111111111, + "rbo_topk": 0.04670528456571428, + "staged_unique_files_topk": 10, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 3, + "dense_unique_dirs_topk": 4 + }, + { + "query": "error handling", + "staged": { + "strategy": "staged", + "query": "error handling", + "latency_ms": 9253.624700009823, + "num_results": 6, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 102.18691825866699, + "stage1_fallback_search_ms": 176.97691917419434, + "stage2_expand_ms": 6113.626480102539, + "stage3_cluster_ms": 0.00095367431640625, + "stage4_rerank_ms": 2774.4452953338623 + }, + "stage_counts": { + "stage1_candidates": 5, + "stage1_fallback_used": 1, + "stage2_expanded": 13, + "stage2_unique_paths": 6, + "stage2_duplicate_paths": 7, + "stage3_clustered": 13, + "stage3_strategy": "score", + "stage4_reranked": 13 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "error handling", + "latency_ms": 2860.619900047779, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.07142857142857142, + "rbo_topk": 0.045191399425714276, + "staged_unique_files_topk": 5, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 4, + "dense_unique_dirs_topk": 4 + }, + { + "query": "how to parse json", + "staged": { + "strategy": "staged", + "query": "how to parse json", + "latency_ms": 9119.772599995136, + "num_results": 7, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" + ], + "stage_stats": { + "stage_times": { + "stage1_binary_ms": 90.18850326538086, + "stage1_fallback_search_ms": 157.95397758483887, + "stage2_expand_ms": 6293.469429016113, + "stage3_cluster_ms": 0.0011920928955078125, + "stage4_rerank_ms": 2486.8383407592773 + }, + "stage_counts": { + "stage1_candidates": 4, + "stage1_fallback_used": 1, + "stage2_expanded": 9, + "stage2_unique_paths": 7, + "stage2_duplicate_paths": 2, + "stage3_clustered": 9, + "stage3_strategy": "score", + "stage4_reranked": 9 + } + }, + "error": null + }, + "dense_rerank": { + "strategy": "dense_rerank", + "query": "how to parse json", + "latency_ms": 2991.0379999876022, + "num_results": 10, + "topk_paths": [ + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", + "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" + ], + "stage_stats": null, + "error": null + }, + "jaccard_topk": 0.21428571428571427, + "rbo_topk": 0.18590219827714285, + "staged_unique_files_topk": 7, + "dense_unique_files_topk": 10, + "staged_unique_dirs_topk": 5, + "dense_unique_dirs_topk": 4 + } + ] +} \ No newline at end of file diff --git a/codex-lens/src/codexlens/lsp/lsp_graph_builder.py b/codex-lens/src/codexlens/lsp/lsp_graph_builder.py index a0ed381e..446fa2c7 100644 --- a/codex-lens/src/codexlens/lsp/lsp_graph_builder.py +++ b/codex-lens/src/codexlens/lsp/lsp_graph_builder.py @@ -134,7 +134,10 @@ class LspGraphBuilder: # Skip if already visited or at max depth if node.id in visited: return [] - if depth > self.max_depth: + # Depth is 0 for seeds. To limit expansion to N hops from seeds, + # we expand nodes with depth < max_depth. + if depth >= self.max_depth: + visited.add(node.id) return [] if len(graph.nodes) >= self.max_nodes: return [] diff --git a/codex-lens/tests/test_lsp_graph_builder_depth.py b/codex-lens/tests/test_lsp_graph_builder_depth.py new file mode 100644 index 00000000..ab70b770 --- /dev/null +++ b/codex-lens/tests/test_lsp_graph_builder_depth.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock + +import pytest + +from codexlens.hybrid_search.data_structures import CodeAssociationGraph, CodeSymbolNode, Range +from codexlens.lsp.lsp_graph_builder import LspGraphBuilder + + +@pytest.mark.asyncio +async def test_lsp_graph_builder_does_not_expand_at_max_depth() -> None: + """Depth semantics: max_depth is the number of hops from seeds.""" + builder = LspGraphBuilder(max_depth=1, max_nodes=10, max_concurrent=1, resolve_symbols=False) + + bridge = AsyncMock() + bridge.get_references.side_effect = RuntimeError("should not call references") + bridge.get_call_hierarchy.side_effect = RuntimeError("should not call call hierarchy") + + node = CodeSymbolNode( + id="x.py:foo:1", + name="foo", + kind="function", + file_path="x.py", + range=Range(start_line=1, start_character=1, end_line=1, end_character=1), + ) + graph = CodeAssociationGraph() + visited: set[str] = set() + sem = asyncio.Semaphore(1) + + # Seeds are depth=0. A node at depth==max_depth should not be expanded. + new_nodes = await builder._expand_node(node, 1, graph, bridge, visited, sem) # type: ignore[attr-defined] + assert new_nodes == [] + assert node.id in visited + diff --git a/codex-lens/tests/test_staged_stage1_fallback_seed.py b/codex-lens/tests/test_staged_stage1_fallback_seed.py new file mode 100644 index 00000000..ff9ea061 --- /dev/null +++ b/codex-lens/tests/test_staged_stage1_fallback_seed.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock + +from codexlens.config import Config +from codexlens.entities import SearchResult +from codexlens.search.chain_search import ChainSearchEngine, ChainSearchResult, SearchOptions + + +def _extract_stage_stats(result: ChainSearchResult) -> dict: + for item in result.stats.errors or []: + if isinstance(item, str) and item.startswith("STAGE_STATS:"): + return json.loads(item[len("STAGE_STATS:") :]) + raise AssertionError("missing STAGE_STATS payload") + + +def test_staged_pipeline_seeds_from_fts_when_stage1_empty(monkeypatch) -> None: + cfg = Config.load() + cfg.enable_staged_rerank = False + cfg.staged_stage2_mode = "realtime" # ensure we pass through stage2 wrapper + cfg.staged_clustering_strategy = "score" + + engine = ChainSearchEngine(registry=MagicMock(), mapper=MagicMock(), config=cfg) + + # Avoid touching registry/mapper/index stores. + monkeypatch.setattr(engine, "_find_start_index", lambda *_a, **_k: Path("X:/fake/_index.db")) + monkeypatch.setattr(engine, "_collect_index_paths", lambda *_a, **_k: [Path("X:/fake/_index.db")]) + + # Force Stage 1 to return empty so the FTS seeding path is exercised. + monkeypatch.setattr(engine, "_stage1_binary_search", lambda *_a, **_k: ([], Path("X:/fake"))) + + seed_results = [SearchResult(path="D:/p/a.py", score=1.0), SearchResult(path="D:/p/b.py", score=0.9)] + + # Provide a stable SearchStats instance for the fallback search call. + from codexlens.search.chain_search import SearchStats + + monkeypatch.setattr(engine, "search", lambda *_a, **_k: ChainSearchResult(query="q", results=seed_results, symbols=[], stats=SearchStats())) + + # Make later stages no-ops so we only validate plumbing. + monkeypatch.setattr(engine, "_stage2_lsp_expand", lambda results, *_a, **_k: results) + monkeypatch.setattr(engine, "_stage3_cluster_prune", lambda results, *_a, **_k: results) + + result = engine.staged_cascade_search("q", Path("."), k=2, coarse_k=5, options=SearchOptions()) + stage_stats = _extract_stage_stats(result) + + assert stage_stats["stage_counts"].get("stage1_fallback_used") == 1 + assert result.results and [r.path for r in result.results] == ["D:/p/a.py", "D:/p/b.py"] diff --git a/package-lock.json b/package-lock.json index e3ab9b91..2671d792 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,6 +27,8 @@ "node-pty": "^1.1.0-beta21", "open": "^9.1.0", "ora": "^7.0.0", + "tree-sitter-wasms": "^0.1.13", + "web-tree-sitter": "^0.24.0", "zod": "^4.1.13" }, "bin": { @@ -25844,6 +25846,15 @@ "tree-kill": "cli.js" } }, + "node_modules/tree-sitter-wasms": { + "version": "0.1.13", + "resolved": "https://registry.npmjs.org/tree-sitter-wasms/-/tree-sitter-wasms-0.1.13.tgz", + "integrity": "sha512-wT+cR6DwaIz80/vho3AvSF0N4txuNx/5bcRKoXouOfClpxh/qqrF4URNLQXbbt8MaAxeksZcZd1j8gcGjc+QxQ==", + "license": "Unlicense", + "dependencies": { + "tree-sitter-wasms": "^0.1.11" + } + }, "node_modules/trim-lines": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", @@ -28411,6 +28422,12 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/web-tree-sitter": { + "version": "0.24.0", + "resolved": "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.24.0.tgz", + "integrity": "sha512-NZWxfjayD1NlO4klbaR7ohHTp67uBjYC/y7pr+aaiqdpAbRHvmSs30f6O562E6YrwaZwFVu38HIA7GDmuxgtUg==", + "license": "MIT" + }, "node_modules/web-vitals": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/web-vitals/-/web-vitals-5.1.0.tgz", diff --git a/package.json b/package.json index 2ebd658a..b9eec834 100644 --- a/package.json +++ b/package.json @@ -60,6 +60,8 @@ "node-pty": "^1.1.0-beta21", "open": "^9.1.0", "ora": "^7.0.0", + "tree-sitter-wasms": "^0.1.13", + "web-tree-sitter": "^0.24.0", "zod": "^4.1.13" }, "files": [