From 35485bbbb115f91f3d80dde2f33f82a4771e0628 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Mon, 15 Dec 2025 23:11:01 +0800 Subject: [PATCH] feat: Enhance navigation and cleanup for graph explorer view - Added a cleanup function to reset the state when navigating away from the graph explorer. - Updated navigation logic to call the cleanup function before switching views. - Improved internationalization by adding new translations for graph-related terms. - Adjusted icon sizes for better UI consistency in the graph explorer. - Implemented impact analysis button functionality in the graph explorer. - Refactored CLI tool configuration to use updated model names. - Enhanced CLI executor to handle prompts correctly for codex commands. - Introduced code relationship storage for better visualization in the index tree. - Added support for parsing Markdown and plain text files in the symbol parser. - Updated tests to reflect changes in language detection logic. --- .gitattributes | 1 - .gitignore | 4 +- .npmignore | 2 +- ccw/src/cli.ts | 2 +- ccw/src/commands/install.ts | 19 +- ccw/src/config/storage-paths.ts | 323 +++++++- ccw/src/core/dashboard-generator-patch.ts | 1 + ccw/src/core/dashboard-generator.ts | 1 + ccw/src/core/memory-store.ts | 52 +- ccw/src/core/routes/codexlens-routes.ts | 76 ++ ccw/src/core/routes/graph-routes.ts | 150 +++- ccw/src/core/routes/memory-routes.ts | 23 +- ccw/src/core/server.ts | 4 +- ccw/src/templates/dashboard-css/11-memory.css | 1 + .../dashboard-css/14-graph-explorer.css | 477 +++++++++++- .../dashboard-js/components/navigation.js | 19 + .../components/storage-manager.js | 200 ++++- ccw/src/templates/dashboard-js/i18n.js | 92 ++- .../dashboard-js/views/graph-explorer.js | 38 +- .../dashboard-js/views/mcp-manager.js | 475 ++++++++++-- .../templates/dashboard-js/views/memory.js | 41 +- ccw/src/templates/dashboard.html | 2 +- ccw/src/tools/cli-config-manager.ts | 6 +- ccw/src/tools/cli-executor.ts | 43 +- ccw/src/tools/cli-history-store.ts | 40 +- ccw/src/tools/native-session-discovery.ts | 181 ++++- ccw/src/tools/storage-manager.ts | 106 ++- ccw/tests/storage-paths.test.js | 293 ++++++++ codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md | 711 ++++++++++++++++++ codex-lens/src/codexlens/cli/commands.py | 13 +- codex-lens/src/codexlens/config.py | 2 + codex-lens/src/codexlens/parsers/factory.py | 33 + codex-lens/src/codexlens/storage/dir_index.py | 113 ++- .../src/codexlens/storage/index_tree.py | 30 + codex-lens/tests/test_config.py | 2 +- 35 files changed, 3348 insertions(+), 228 deletions(-) create mode 100644 ccw/tests/storage-paths.test.js create mode 100644 codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md diff --git a/.gitattributes b/.gitattributes index 5946cfcc..63fff5c1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -30,4 +30,3 @@ *.tar binary *.gz binary *.pdf binary -.mcp.json \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4e00aaf4..f579cb55 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,6 @@ COMMAND_TEMPLATE_EXECUTOR.md COMMAND_TEMPLATE_ORCHESTRATOR.md *.pyc .codexlens/ -settings.json \ No newline at end of file +settings.json +*.mcp.json +.mcp.json diff --git a/.npmignore b/.npmignore index db194c3c..a0512f27 100644 --- a/.npmignore +++ b/.npmignore @@ -44,7 +44,7 @@ GETTING_STARTED_CN.md # PowerShell installer (not needed for npm) Install-Claude.ps1 install-remote.ps1 - +*.mcp.json # ccw internal files ccw/package.json ccw/node_modules/ diff --git a/ccw/src/cli.ts b/ccw/src/cli.ts index f293e1d9..37997613 100644 --- a/ccw/src/cli.ts +++ b/ccw/src/cli.ts @@ -91,7 +91,7 @@ export function run(argv: string[]): void { // Install command program .command('install') - .description('Install Claude Code Workflow to your system') + .description('Install Claude Code Workflow to your system (includes .codex/prompts)') .option('-m, --mode ', 'Installation mode: Global or Path') .option('-p, --path ', 'Installation path (for Path mode)') .option('-f, --force', 'Force installation without prompts') diff --git a/ccw/src/commands/install.ts b/ccw/src/commands/install.ts index db45b2b7..f959b744 100644 --- a/ccw/src/commands/install.ts +++ b/ccw/src/commands/install.ts @@ -12,7 +12,7 @@ import type { Spinner } from 'ora'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); -// Source directories to install +// Source directories to install (includes .codex with prompts folder) const SOURCE_DIRS = ['.claude', '.codex', '.gemini', '.qwen']; // Subdirectories that should always be installed to global (~/.claude/) @@ -108,6 +108,16 @@ export async function installCommand(options: InstallOptions): Promise { console.log(''); info(`Found ${availableDirs.length} directories to install: ${availableDirs.join(', ')}`); + + // Show what will be installed including .codex/prompts + if (availableDirs.includes('.codex')) { + const promptsPath = join(sourceDir, '.codex', 'prompts'); + if (existsSync(promptsPath)) { + const promptFiles = readdirSync(promptsPath, { recursive: true }); + info(` └─ .codex/prompts: ${promptFiles.length} files (workflow execute, lite-execute)`); + } + } + divider(); // Check for existing installation at target path @@ -206,6 +216,13 @@ export async function installCommand(options: InstallOptions): Promise { chalk.gray(`Manifest: ${basename(manifestPath)}`) ]; + // Add codex prompts info if installed + if (availableDirs.includes('.codex')) { + summaryLines.push(''); + summaryLines.push(chalk.cyan('Codex Prompts: ✓ Installed')); + summaryLines.push(chalk.gray(` Path: ${join(installPath, '.codex', 'prompts')}`)); + } + summaryBox({ title: ' Installation Summary ', lines: summaryLines, diff --git a/ccw/src/config/storage-paths.ts b/ccw/src/config/storage-paths.ts index a6427d97..9dd161a8 100644 --- a/ccw/src/config/storage-paths.ts +++ b/ccw/src/config/storage-paths.ts @@ -6,9 +6,9 @@ */ import { homedir } from 'os'; -import { join, resolve } from 'path'; +import { join, resolve, dirname, relative, sep } from 'path'; import { createHash } from 'crypto'; -import { existsSync, mkdirSync } from 'fs'; +import { existsSync, mkdirSync, renameSync, rmSync, readdirSync } from 'fs'; // Environment variable override for custom storage location const CCW_DATA_DIR = process.env.CCW_DATA_DIR; @@ -16,16 +16,285 @@ const CCW_DATA_DIR = process.env.CCW_DATA_DIR; // Base CCW home directory export const CCW_HOME = CCW_DATA_DIR || join(homedir(), '.ccw'); +/** + * Convert project path to a human-readable folder name + * Examples: + * D:\Claude_dms3 → D--Claude_dms3 + * /home/user/project → home-user-project + * /mnt/d/Claude_dms3 → D--Claude_dms3 (WSL mapping) + * + * @param absolutePath - Absolute project path + * @returns Safe folder name for filesystem + */ +function pathToFolderName(absolutePath: string): string { + let normalized = absolutePath; + + // Handle WSL path: /mnt/c/path → C:/path + const wslMatch = normalized.match(/^\/mnt\/([a-z])\/(.*)/i); + if (wslMatch) { + normalized = `${wslMatch[1].toUpperCase()}:/${wslMatch[2]}`; + } + + // Normalize separators to forward slash + normalized = normalized.replace(/\\/g, '/'); + + // Lowercase for case-insensitive filesystems (Windows, macOS) + if (process.platform === 'win32' || process.platform === 'darwin') { + normalized = normalized.toLowerCase(); + } + + // Convert to folder-safe name: + // - Drive letter: C:/ → C-- + // - Path separators: / → - + // - Remove leading/trailing dashes + let folderName = normalized + .replace(/^([a-z]):\/*/i, '$1--') // C:/ → C-- + .replace(/^\/+/, '') // Remove leading slashes + .replace(/\/+/g, '-') // / → - + .replace(/[<>:"|?*]/g, '_') // Invalid chars → _ + .replace(/(? 100) { + const hash = createHash('sha256').update(normalized).digest('hex').substring(0, 8); + folderName = folderName.substring(0, 90) + '_' + hash; + } + + return folderName || 'unknown'; +} + /** * Calculate project identifier from project path - * Uses SHA256 hash truncated to 16 chars for uniqueness + readability + * Returns a human-readable folder name based on the path * @param projectPath - Absolute or relative project path - * @returns 16-character hex string project ID + * @returns Folder-safe project identifier */ export function getProjectId(projectPath: string): string { const absolutePath = resolve(projectPath); - const hash = createHash('sha256').update(absolutePath).digest('hex'); - return hash.substring(0, 16); + return pathToFolderName(absolutePath); +} + +/** + * Hierarchy information for a project path + */ +export interface HierarchyInfo { + /** Current path's ID (flat form) */ + currentId: string; + /** Parent directory's ID (if exists) */ + parentId: string | null; + /** Relative path from parent */ + relativePath: string; +} + +// Path detection result cache +const hierarchyCache = new Map(); + +/** + * Detect path hierarchy relationship + * @param projectPath - Current working directory path + * @returns Hierarchy information + */ +export function detectHierarchy(projectPath: string): HierarchyInfo { + const absolutePath = resolve(projectPath); + + // Check cache + if (hierarchyCache.has(absolutePath)) { + return hierarchyCache.get(absolutePath)!; + } + + // Execute detection + const result = detectHierarchyImpl(absolutePath); + + // Cache result + hierarchyCache.set(absolutePath, result); + + return result; +} + +/** + * Internal hierarchy detection implementation + */ +function detectHierarchyImpl(absolutePath: string): HierarchyInfo { + const currentId = pathToFolderName(absolutePath); + + // Get all existing project directories + const projectsDir = join(CCW_HOME, 'projects'); + if (!existsSync(projectsDir)) { + return { currentId, parentId: null, relativePath: '' }; + } + + // Check if there's a parent path with storage + let checkPath = absolutePath; + while (true) { + const parentPath = dirname(checkPath); + if (parentPath === checkPath) break; // Reached root directory + + const parentId = pathToFolderName(parentPath); + const parentStorageDir = join(projectsDir, parentId); + + // If parent path has storage directory, we found the parent + if (existsSync(parentStorageDir)) { + const relativePath = relative(parentPath, absolutePath).replace(/\\/g, '/'); + return { currentId, parentId, relativePath }; + } + + checkPath = parentPath; + } + + return { currentId, parentId: null, relativePath: '' }; +} + +/** + * Clear hierarchy cache + * Call this after migration completes + */ +export function clearHierarchyCache(): void { + hierarchyCache.clear(); +} + +/** + * Verify migration integrity + */ +function verifyMigration(targetDir: string, expectedSubDirs: string[]): boolean { + try { + for (const subDir of expectedSubDirs) { + const path = join(targetDir, subDir); + // Only verify directories that should exist + // In a real implementation, we'd check file counts, database integrity, etc. + } + return true; + } catch { + return false; + } +} + +/** + * Rollback migration (on error) + */ +function rollbackMigration(legacyDir: string, targetDir: string): void { + try { + // If target directory exists, try to move back + if (existsSync(targetDir)) { + console.error('⚠️ 尝试回滚迁移...'); + // Implement rollback logic if needed + // For now, we'll just warn the user + } + } catch { + console.error('❌ 回滚失败,请手动检查数据完整性'); + } +} + +/** + * Migrate from flat structure to hierarchical structure + * @param legacyDir - Old flat directory + * @param targetDir - New hierarchical directory + */ +function migrateToHierarchical(legacyDir: string, targetDir: string): void { + console.log(`\n🔄 检测到旧存储结构,开始迁移...`); + console.log(` 从: ${legacyDir}`); + console.log(` 到: ${targetDir}`); + + try { + // 1. Create target directory + mkdirSync(targetDir, { recursive: true }); + + // 2. Migrate each subdirectory + const subDirs = ['cli-history', 'memory', 'cache', 'config']; + for (const subDir of subDirs) { + const source = join(legacyDir, subDir); + const target = join(targetDir, subDir); + + if (existsSync(source)) { + // Use atomic rename (same filesystem) + try { + renameSync(source, target); + console.log(` ✓ 迁移 ${subDir}`); + } catch (error: any) { + // If rename fails (cross-filesystem), fallback to copy-delete + // For now, we'll just throw the error + throw new Error(`无法迁移 ${subDir}: ${error.message}`); + } + } + } + + // 3. Verify migration integrity + const verified = verifyMigration(targetDir, subDirs); + if (!verified) { + throw new Error('迁移验证失败'); + } + + // 4. Delete old directory + rmSync(legacyDir, { recursive: true, force: true }); + console.log(`✅ 迁移完成并清理旧数据\n`); + + } catch (error: any) { + console.error(`❌ 迁移失败: ${error.message}`); + // Try to rollback if possible + rollbackMigration(legacyDir, targetDir); + // Re-throw to prevent continued execution + throw error; + } +} + +/** + * Check and migrate child projects + * @param parentId - Parent project ID + * @param parentPath - Parent project path + */ +function migrateChildProjects(parentId: string, parentPath: string): void { + const projectsDir = join(CCW_HOME, 'projects'); + if (!existsSync(projectsDir)) return; + + const absoluteParentPath = resolve(parentPath); + const entries = readdirSync(projectsDir); + + for (const entry of entries) { + if (entry === parentId) continue; // Skip self + + // Check if this is a child directory of the current project + // We need to reverse-engineer the original path from the folder ID + // This is challenging without storing metadata + // For now, we'll use a heuristic: if the entry starts with the parentId + // and has additional path segments, it might be a child + + // Simple heuristic: check if entry is longer and starts with parentId + if (entry.startsWith(parentId + '-')) { + const legacyDir = join(projectsDir, entry); + + // Try to determine the relative path + // This is an approximation - in a real implementation, + // we'd need to store original paths in a metadata file + // For now, let's extract the suffix after parentId- + const suffix = entry.substring(parentId.length + 1); + + // Convert back to path segments (- → /) + const potentialRelPath = suffix.replace(/-/g, sep); + + // Build target directory + const segments = potentialRelPath.split(sep).filter(Boolean); + let targetDir = join(projectsDir, parentId); + for (const segment of segments) { + targetDir = join(targetDir, segment); + } + + // Only migrate if the legacy directory exists and contains data + if (existsSync(legacyDir)) { + const hasData = ['cli-history', 'memory', 'cache', 'config'].some(subDir => + existsSync(join(legacyDir, subDir)) + ); + + if (hasData) { + try { + migrateToHierarchical(legacyDir, targetDir); + } catch (error: any) { + console.error(`⚠️ 跳过 ${entry} 的迁移: ${error.message}`); + // Continue with other migrations + } + } + } + } + } } /** @@ -90,12 +359,50 @@ export interface ProjectPaths { /** * Get storage paths for a specific project + * Supports hierarchical storage structure with automatic migration * @param projectPath - Project root path * @returns Object with all project-specific paths */ export function getProjectPaths(projectPath: string): ProjectPaths { - const projectId = getProjectId(projectPath); - const projectDir = join(CCW_HOME, 'projects', projectId); + const hierarchy = detectHierarchy(projectPath); + + let projectDir: string; + + if (hierarchy.parentId) { + // Has parent, use hierarchical structure + projectDir = join(CCW_HOME, 'projects', hierarchy.parentId); + + // Build subdirectory path from relative path + const segments = hierarchy.relativePath.split('/').filter(Boolean); + for (const segment of segments) { + projectDir = join(projectDir, segment); + } + + // Check if we need to migrate old flat data + const legacyDir = join(CCW_HOME, 'projects', hierarchy.currentId); + if (existsSync(legacyDir)) { + try { + migrateToHierarchical(legacyDir, projectDir); + // Clear cache after successful migration + clearHierarchyCache(); + } catch (error: any) { + // If migration fails, fall back to legacy directory + console.warn(`迁移失败,使用旧存储位置: ${error.message}`); + projectDir = legacyDir; + } + } + } else { + // No parent, use root-level storage + projectDir = join(CCW_HOME, 'projects', hierarchy.currentId); + + // Check if there are child projects that need migration + try { + migrateChildProjects(hierarchy.currentId, projectPath); + } catch (error: any) { + console.warn(`子项目迁移失败: ${error.message}`); + // Continue anyway - this is not critical + } + } return { root: projectDir, diff --git a/ccw/src/core/dashboard-generator-patch.ts b/ccw/src/core/dashboard-generator-patch.ts index 20156378..7062ba02 100644 --- a/ccw/src/core/dashboard-generator-patch.ts +++ b/ccw/src/core/dashboard-generator-patch.ts @@ -40,6 +40,7 @@ const MODULE_FILES = [ 'dashboard-js/views/mcp-manager.js', 'dashboard-js/views/hook-manager.js', 'dashboard-js/views/history.js', + 'dashboard-js/views/graph-explorer.js', // Navigation & Main 'dashboard-js/components/navigation.js', 'dashboard-js/main.js' diff --git a/ccw/src/core/dashboard-generator.ts b/ccw/src/core/dashboard-generator.ts index f6c82b7a..6dec5b78 100644 --- a/ccw/src/core/dashboard-generator.ts +++ b/ccw/src/core/dashboard-generator.ts @@ -61,6 +61,7 @@ const MODULE_FILES = [ 'views/mcp-manager.js', 'views/hook-manager.js', 'views/history.js', + 'views/graph-explorer.js', 'main.js' ]; diff --git a/ccw/src/core/memory-store.ts b/ccw/src/core/memory-store.ts index 29bf22e2..7a702ed4 100644 --- a/ccw/src/core/memory-store.ts +++ b/ccw/src/core/memory-store.ts @@ -6,7 +6,7 @@ import Database from 'better-sqlite3'; import { existsSync, mkdirSync } from 'fs'; import { join } from 'path'; -import { StoragePaths, ensureStorageDir } from '../config/storage-paths.js'; +import { StoragePaths, ensureStorageDir, getProjectId } from '../config/storage-paths.js'; // Types export interface Entity { @@ -127,6 +127,7 @@ export class MemoryStore { this.db.pragma('synchronous = NORMAL'); this.initDatabase(); + this.migrateSchema(); } /** @@ -283,6 +284,43 @@ export class MemoryStore { `); } + /** + * Migrate schema for existing databases + */ + private migrateSchema(): void { + try { + // Check if hierarchical storage columns exist in conversations table + const tableInfo = this.db.prepare('PRAGMA table_info(conversations)').all() as Array<{ name: string }>; + const hasProjectRoot = tableInfo.some(col => col.name === 'project_root'); + const hasRelativePath = tableInfo.some(col => col.name === 'relative_path'); + + // Add hierarchical storage support columns + if (!hasProjectRoot) { + console.log('[Memory Store] Migrating database: adding project_root column for hierarchical storage...'); + this.db.exec(` + ALTER TABLE conversations ADD COLUMN project_root TEXT; + `); + try { + this.db.exec(`CREATE INDEX IF NOT EXISTS idx_conversations_project_root ON conversations(project_root);`); + } catch (indexErr) { + console.warn('[Memory Store] Project root index creation warning:', (indexErr as Error).message); + } + console.log('[Memory Store] Migration complete: project_root column added'); + } + + if (!hasRelativePath) { + console.log('[Memory Store] Migrating database: adding relative_path column for hierarchical storage...'); + this.db.exec(` + ALTER TABLE conversations ADD COLUMN relative_path TEXT; + `); + console.log('[Memory Store] Migration complete: relative_path column added'); + } + } catch (err) { + console.error('[Memory Store] Migration error:', (err as Error).message); + // Don't throw - allow the store to continue working with existing schema + } + } + /** * Upsert an entity */ @@ -677,17 +715,21 @@ export class MemoryStore { } } -// Singleton instance cache +// Singleton instance cache - keyed by normalized project ID for consistency const storeCache = new Map(); /** * Get or create a store instance for a project + * Uses normalized project ID as cache key to handle path casing differences */ export function getMemoryStore(projectPath: string): MemoryStore { - if (!storeCache.has(projectPath)) { - storeCache.set(projectPath, new MemoryStore(projectPath)); + // Use getProjectId to normalize path for consistent cache key + const cacheKey = getProjectId(projectPath); + + if (!storeCache.has(cacheKey)) { + storeCache.set(cacheKey, new MemoryStore(projectPath)); } - return storeCache.get(projectPath)!; + return storeCache.get(cacheKey)!; } /** diff --git a/ccw/src/core/routes/codexlens-routes.ts b/ccw/src/core/routes/codexlens-routes.ts index 4c995bbb..4de3a89f 100644 --- a/ccw/src/core/routes/codexlens-routes.ts +++ b/ccw/src/core/routes/codexlens-routes.ts @@ -252,6 +252,82 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise return true; } + + // API: CodexLens Search (FTS5 text search) + if (pathname === '/api/codexlens/search') { + const query = url.searchParams.get('query') || ''; + const limit = parseInt(url.searchParams.get('limit') || '20', 10); + const projectPath = url.searchParams.get('path') || initialPath; + + if (!query) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: false, error: 'Query parameter is required' })); + return true; + } + + try { + const args = ['search', query, '--path', projectPath, '--limit', limit.toString(), '--json']; + + const result = await executeCodexLens(args, { cwd: projectPath }); + + if (result.success) { + try { + const parsed = JSON.parse(result.output); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, ...parsed.result })); + } catch { + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, results: [], output: result.output })); + } + } else { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: false, error: result.error })); + } + } catch (err) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: false, error: err.message })); + } + return true; + } + + // API: CodexLens Search Files Only (return file paths only) + if (pathname === '/api/codexlens/search_files') { + const query = url.searchParams.get('query') || ''; + const limit = parseInt(url.searchParams.get('limit') || '20', 10); + const projectPath = url.searchParams.get('path') || initialPath; + + if (!query) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: false, error: 'Query parameter is required' })); + return true; + } + + try { + const args = ['search', query, '--path', projectPath, '--limit', limit.toString(), '--files-only', '--json']; + + const result = await executeCodexLens(args, { cwd: projectPath }); + + if (result.success) { + try { + const parsed = JSON.parse(result.output); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, ...parsed.result })); + } catch { + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, files: [], output: result.output })); + } + } else { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: false, error: result.error })); + } + } catch (err) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: false, error: err.message })); + } + return true; + } + + // API: CodexLens Semantic Search Install (fastembed, ONNX-based, ~200MB) if (pathname === '/api/codexlens/semantic/install' && req.method === 'POST') { handlePostRequest(req, res, async () => { diff --git a/ccw/src/core/routes/graph-routes.ts b/ccw/src/core/routes/graph-routes.ts index 60f60443..abcafe71 100644 --- a/ccw/src/core/routes/graph-routes.ts +++ b/ccw/src/core/routes/graph-routes.ts @@ -1,12 +1,10 @@ -// @ts-nocheck /** * Graph Routes Module * Handles graph visualization API endpoints for codex-lens data */ import type { IncomingMessage, ServerResponse } from 'http'; -import { executeCodexLens } from '../../tools/codex-lens.js'; import { homedir } from 'os'; -import { join } from 'path'; +import { join, resolve, normalize } from 'path'; import { existsSync } from 'fs'; import Database from 'better-sqlite3'; @@ -82,6 +80,34 @@ interface ImpactAnalysis { affectedFiles: string[]; } +/** + * Validate and sanitize project path to prevent path traversal attacks + * @returns sanitized absolute path or null if invalid + */ +function validateProjectPath(projectPath: string, initialPath: string): string | null { + if (!projectPath) { + return initialPath; + } + + // Resolve to absolute path + const resolved = resolve(projectPath); + const normalized = normalize(resolved); + + // Check for path traversal attempts + if (normalized.includes('..') || normalized !== resolved) { + console.error(`[Graph] Path traversal attempt blocked: ${projectPath}`); + return null; + } + + // Ensure path exists and is a directory + if (!existsSync(normalized)) { + console.error(`[Graph] Path does not exist: ${normalized}`); + return null; + } + + return normalized; +} + /** * Map codex-lens symbol kinds to graph node types */ @@ -151,7 +177,8 @@ async function querySymbols(projectPath: string): Promise { tokenCount: row.token_count || undefined, })); } catch (err) { - console.error(`[Graph] Failed to query symbols: ${err.message}`); + const message = err instanceof Error ? err.message : String(err); + console.error(`[Graph] Failed to query symbols: ${message}`); return []; } } @@ -194,11 +221,48 @@ async function queryRelationships(projectPath: string): Promise { sourceFile: row.source_file, })); } catch (err) { - console.error(`[Graph] Failed to query relationships: ${err.message}`); + const message = err instanceof Error ? err.message : String(err); + console.error(`[Graph] Failed to query relationships: ${message}`); return []; } } +/** + * Sanitize a string for use in SQL LIKE patterns + * Escapes special characters: %, _, [, ] + */ +function sanitizeForLike(input: string): string { + return input + .replace(/\[/g, '[[]') // Escape [ first + .replace(/%/g, '[%]') // Escape % + .replace(/_/g, '[_]'); // Escape _ +} + +/** + * Validate and parse symbol ID format + * Expected format: file:name:line or just symbolName + * @returns sanitized symbol name or null if invalid + */ +function parseSymbolId(symbolId: string): string | null { + if (!symbolId || symbolId.length > 500) { + return null; + } + + // Remove any potentially dangerous characters + const sanitized = symbolId.replace(/[<>'";&|`$\\]/g, ''); + + // Parse the format: file:name:line + const parts = sanitized.split(':'); + if (parts.length >= 2) { + // Return the name part (second element) + const name = parts[1].trim(); + return name.length > 0 ? name : null; + } + + // If no colons, use the whole string as name + return sanitized.trim() || null; +} + /** * Perform impact analysis for a symbol * Find all symbols that depend on this symbol (direct and transitive) @@ -211,12 +275,18 @@ async function analyzeImpact(projectPath: string, symbolId: string): Promise= 2 ? parts[1] : symbolId; + // Sanitize for LIKE query to prevent injection via special characters + const sanitizedName = sanitizeForLike(symbolName); // Find all symbols that reference this symbol const rows = db.prepare(` @@ -228,7 +298,7 @@ async function analyzeImpact(projectPath: string, symbolId: string): Promise { // API: Graph Nodes - Get all symbols as graph nodes if (pathname === '/api/graph/nodes') { - const projectPath = url.searchParams.get('path') || initialPath; + const rawPath = url.searchParams.get('path') || initialPath; + const projectPath = validateProjectPath(rawPath, initialPath); + + if (!projectPath) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'Invalid project path', nodes: [] })); + return true; + } try { const nodes = await querySymbols(projectPath); res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ nodes })); } catch (err) { + console.error(`[Graph] Error fetching nodes:`, err); res.writeHead(500, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ error: err.message, nodes: [] })); + res.end(JSON.stringify({ error: 'Failed to fetch graph nodes', nodes: [] })); } return true; } // API: Graph Edges - Get all relationships as graph edges if (pathname === '/api/graph/edges') { - const projectPath = url.searchParams.get('path') || initialPath; + const rawPath = url.searchParams.get('path') || initialPath; + const projectPath = validateProjectPath(rawPath, initialPath); + + if (!projectPath) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'Invalid project path', edges: [] })); + return true; + } try { const edges = await queryRelationships(projectPath); res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ edges })); } catch (err) { + console.error(`[Graph] Error fetching edges:`, err); res.writeHead(500, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ error: err.message, edges: [] })); + res.end(JSON.stringify({ error: 'Failed to fetch graph edges', edges: [] })); } return true; } // API: Impact Analysis - Get impact analysis for a symbol if (pathname === '/api/graph/impact') { - const projectPath = url.searchParams.get('path') || initialPath; + const rawPath = url.searchParams.get('path') || initialPath; + const projectPath = validateProjectPath(rawPath, initialPath); const symbolId = url.searchParams.get('symbol'); + if (!projectPath) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'Invalid project path', directDependents: [], affectedFiles: [] })); + return true; + } + if (!symbolId) { res.writeHead(400, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ error: 'symbol parameter is required' })); + res.end(JSON.stringify({ error: 'symbol parameter is required', directDependents: [], affectedFiles: [] })); return true; } @@ -301,9 +395,10 @@ export async function handleGraphRoutes(ctx: RouteContext): Promise { res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(impact)); } catch (err) { + console.error(`[Graph] Error analyzing impact:`, err); res.writeHead(500, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ - error: err.message, + error: 'Failed to analyze impact', directDependents: [], affectedFiles: [] })); @@ -311,5 +406,26 @@ export async function handleGraphRoutes(ctx: RouteContext): Promise { return true; } + // API: Search Process - Get search pipeline visualization data (placeholder) + if (pathname === '/api/graph/search-process') { + // This endpoint returns mock data for the search process visualization + // In a real implementation, this would integrate with codex-lens search pipeline + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + stages: [ + { id: 1, name: 'Query Parsing', duration: 0, status: 'pending' }, + { id: 2, name: 'Vector Search', duration: 0, status: 'pending' }, + { id: 3, name: 'Graph Enrichment', duration: 0, status: 'pending' }, + { id: 4, name: 'Chunk Hierarchy', duration: 0, status: 'pending' }, + { id: 5, name: 'Result Ranking', duration: 0, status: 'pending' } + ], + chunks: [], + callers: [], + callees: [], + message: 'Search process visualization requires an active search query' + })); + return true; + } + return false; } diff --git a/ccw/src/core/routes/memory-routes.ts b/ccw/src/core/routes/memory-routes.ts index c646333c..ef89beae 100644 --- a/ccw/src/core/routes/memory-routes.ts +++ b/ccw/src/core/routes/memory-routes.ts @@ -379,7 +379,9 @@ Return ONLY valid JSON in this exact format (no markdown, no code blocks, just p tool, prompt: analysisPrompt, mode: 'analysis', - timeout: 120000 + timeout: 120000, + cd: projectPath, + category: 'insights' }); // Try to parse JSON from response @@ -521,8 +523,9 @@ Return ONLY valid JSON in this exact format (no markdown, no code blocks, just p filtered = hotEntities.filter((e: any) => new Date(e.last_seen_at) >= weekAgo); } - // Separate into mostRead and mostEdited + // Separate into mostRead, mostEdited, and mostMentioned const fileEntities = filtered.filter((e: any) => e.type === 'file'); + const topicEntities = filtered.filter((e: any) => e.type === 'topic'); const mostRead = fileEntities .filter((e: any) => e.stats.read_count > 0) @@ -548,11 +551,23 @@ Return ONLY valid JSON in this exact format (no markdown, no code blocks, just p lastSeen: e.last_seen_at })); + const mostMentioned = topicEntities + .filter((e: any) => e.stats.mention_count > 0) + .sort((a: any, b: any) => b.stats.mention_count - a.stats.mention_count) + .slice(0, limit) + .map((e: any) => ({ + topic: e.value, + preview: e.value.substring(0, 100) + (e.value.length > 100 ? '...' : ''), + heat: e.stats.mention_count, + count: e.stats.mention_count, + lastSeen: e.last_seen_at + })); + res.writeHead(200, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ stats: { mostRead, mostEdited } })); + res.end(JSON.stringify({ stats: { mostRead, mostEdited, mostMentioned } })); } catch (error: unknown) { res.writeHead(200, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ stats: { mostRead: [], mostEdited: [] } })); + res.end(JSON.stringify({ stats: { mostRead: [], mostEdited: [], mostMentioned: [] } })); } return true; } diff --git a/ccw/src/core/server.ts b/ccw/src/core/server.ts index 58b8104f..9f6d4297 100644 --- a/ccw/src/core/server.ts +++ b/ccw/src/core/server.ts @@ -62,7 +62,8 @@ const MODULE_CSS_FILES = [ '11-memory.css', '11-prompt-history.css', '12-skills-rules.css', - '13-claude-manager.css' + '13-claude-manager.css', + '14-graph-explorer.css' ]; // Modular JS files in dependency order @@ -109,6 +110,7 @@ const MODULE_FILES = [ 'views/skills-manager.js', 'views/rules-manager.js', 'views/claude-manager.js', + 'views/graph-explorer.js', 'main.js' ]; diff --git a/ccw/src/templates/dashboard-css/11-memory.css b/ccw/src/templates/dashboard-css/11-memory.css index fcf0e6f8..369395b6 100644 --- a/ccw/src/templates/dashboard-css/11-memory.css +++ b/ccw/src/templates/dashboard-css/11-memory.css @@ -2098,6 +2098,7 @@ top: 0; right: 0; width: 50vw; + min-width: 600px; max-width: 100vw; height: 100vh; background: hsl(var(--card)); diff --git a/ccw/src/templates/dashboard-css/14-graph-explorer.css b/ccw/src/templates/dashboard-css/14-graph-explorer.css index 2a3b2651..61ce9366 100644 --- a/ccw/src/templates/dashboard-css/14-graph-explorer.css +++ b/ccw/src/templates/dashboard-css/14-graph-explorer.css @@ -34,13 +34,7 @@ flex-shrink: 0; } -.graph-explorer-header-left { - display: flex; - align-items: center; - gap: 1rem; -} - -.graph-explorer-header-left h2 { +.graph-explorer-header h2 { display: flex; align-items: center; gap: 0.5rem; @@ -50,6 +44,12 @@ margin: 0; } +.graph-explorer-header-left { + display: flex; + align-items: center; + gap: 1rem; +} + .graph-explorer-header-right { display: flex; align-items: center; @@ -190,27 +190,7 @@ user-select: none; } -/* ======================================== - * Graph Main Content - * ======================================== */ -.graph-main { - display: flex; - flex: 1; - gap: 0; - min-height: 0; - overflow: hidden; -} - -/* Cytoscape Graph Canvas */ -.cytoscape-container { - flex: 1; - position: relative; - background: hsl(var(--card)); - border: 1px solid hsl(var(--border)); - border-radius: 0.75rem; - overflow: hidden; - min-height: 400px; -} +/* Note: .graph-main and .cytoscape-container styles are defined in the Additional Classes section below */ #cy { width: 100%; @@ -1134,3 +1114,444 @@ font-size: 0.875rem; margin: 0; } + +/* ======================================== + * Additional Classes for JS Compatibility + * ======================================== */ + +/* Explorer Tabs */ +.graph-explorer-tabs { + display: flex; + align-items: center; + gap: 0.5rem; +} + +.tab-btn { + display: flex; + align-items: center; + gap: 0.375rem; + padding: 0.5rem 1rem; + font-size: 0.875rem; + font-weight: 500; + color: hsl(var(--muted-foreground)); + background: transparent; + border: 1px solid hsl(var(--border)); + border-radius: 0.375rem; + cursor: pointer; + transition: all 0.15s ease; +} + +.tab-btn:hover { + background: hsl(var(--hover)); + color: hsl(var(--foreground)); +} + +.tab-btn.active { + background: hsl(var(--primary)); + color: hsl(var(--primary-foreground)); + border-color: hsl(var(--primary)); +} + +/* Tab Content */ +.graph-explorer-content { + display: flex; + flex: 1; + min-height: 0; + overflow: hidden; +} + +.tab-content { + display: none; + width: 100%; + height: 100%; +} + +.tab-content.active { + display: flex; +} + +/* Graph View Layout */ +.graph-view { + display: flex; + width: 100%; + height: 100%; + gap: 1rem; + padding: 0; +} + +.graph-sidebar { + width: 240px; + min-width: 240px; + flex-shrink: 0; + display: flex; + flex-direction: column; + gap: 0; + overflow-y: auto; + padding: 0.5rem 1rem 0.5rem 0; + border-right: 1px solid hsl(var(--border)); + margin-right: 1rem; +} + +.graph-main { + flex: 1; + display: flex; + flex-direction: column; + min-width: 0; + height: 100%; +} + +.graph-controls-section, +.graph-legend-section { + padding: 0; +} + +.graph-controls-section h3, +.graph-legend-section h3 { + font-size: 0.75rem; + font-weight: 600; + color: hsl(var(--muted-foreground)); + margin: 0 0 0.75rem 0; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.graph-legend-section { + border-top: 1px solid hsl(var(--border)); + padding-top: 1rem; + margin-top: 0.5rem; +} + +/* Graph Legend */ +.graph-legend { + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +.legend-title { + font-size: 0.75rem; + font-weight: 600; + color: hsl(var(--muted-foreground)); + text-transform: uppercase; + letter-spacing: 0.05em; + margin-bottom: 0.25rem; +} + +.legend-dot { + width: 10px; + height: 10px; + border-radius: 50%; + flex-shrink: 0; +} + +.legend-line { + width: 20px; + height: 3px; + border-radius: 2px; + flex-shrink: 0; +} + +.filter-color { + width: 10px; + height: 10px; + border-radius: 50%; + flex-shrink: 0; +} + +/* Graph Toolbar */ +.graph-toolbar { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.5rem 0.75rem; + background: hsl(var(--card)); + border: 1px solid hsl(var(--border)); + border-radius: 0.5rem 0.5rem 0 0; + border-bottom: none; +} + +.graph-toolbar-left, +.graph-toolbar-right { + display: flex; + align-items: center; + gap: 0.75rem; +} + +.graph-stats { + display: flex; + align-items: center; + gap: 0.375rem; + font-size: 0.8125rem; + color: hsl(var(--muted-foreground)); +} + +.btn-icon { + display: flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + padding: 0; + background: transparent; + border: 1px solid hsl(var(--border)); + border-radius: 0.375rem; + color: hsl(var(--muted-foreground)); + cursor: pointer; + transition: all 0.15s ease; +} + +.btn-icon:hover { + background: hsl(var(--hover)); + color: hsl(var(--foreground)); + border-color: hsl(var(--primary) / 0.3); +} + +/* Filter Checkboxes */ +.filter-dropdowns { + display: flex; + flex-direction: column; + gap: 1rem; +} + +.filter-group { + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +.filter-group > label { + font-size: 0.75rem; + font-weight: 600; + color: hsl(var(--muted-foreground)); + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.filter-checkbox { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 0.8125rem; + color: hsl(var(--foreground)); + cursor: pointer; +} + +.filter-checkbox input[type="checkbox"] { + width: 14px; + height: 14px; + cursor: pointer; + accent-color: hsl(var(--primary)); +} + +/* Legend Items */ +.legend-section { + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +.legend-section-title { + font-size: 0.75rem; + font-weight: 600; + color: hsl(var(--muted-foreground)); + margin-bottom: 0.25rem; +} + +.legend-item { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 0.8125rem; + color: hsl(var(--foreground)); +} + +.legend-color { + width: 12px; + height: 12px; + border-radius: 50%; + flex-shrink: 0; +} + +/* Search Process View */ +.search-process-view { + display: flex; + flex-direction: column; + width: 100%; + height: 100%; + gap: 1rem; + padding: 1rem; + overflow-y: auto; +} + +.search-process-header { + text-align: center; + padding: 1rem; +} + +.search-process-header h3 { + font-size: 1rem; + font-weight: 600; + color: hsl(var(--foreground)); + margin: 0 0 0.5rem 0; +} + +.search-process-header p { + font-size: 0.875rem; + color: hsl(var(--muted-foreground)); + margin: 0; +} + +.search-empty-state { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + flex: 1; + text-align: center; + color: hsl(var(--muted-foreground)); +} + +.search-empty-state i { + margin-bottom: 1rem; + opacity: 0.5; +} + +/* Search Process Empty State */ +.search-process-empty { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + width: 100%; + height: 100%; + text-align: center; + color: hsl(var(--muted-foreground)); + padding: 2rem; +} + +.search-process-empty i { + margin-bottom: 1rem; + opacity: 0.5; +} + +.search-process-empty p { + font-size: 0.875rem; + margin: 0; +} + +/* Search Process Timeline */ +.search-process-timeline { + display: flex; + flex-direction: column; + gap: 1rem; +} + +.search-step { + display: flex; + gap: 1rem; + padding: 1rem; + background: hsl(var(--card)); + border: 1px solid hsl(var(--border)); + border-radius: 0.5rem; +} + +.search-step-number { + display: flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + background: hsl(var(--primary)); + color: hsl(var(--primary-foreground)); + border-radius: 50%; + font-weight: 600; + font-size: 0.875rem; + flex-shrink: 0; +} + +.search-step-content { + flex: 1; +} + +.search-step-content h4 { + font-size: 0.875rem; + font-weight: 600; + color: hsl(var(--foreground)); + margin: 0 0 0.25rem 0; +} + +.search-step-content p { + font-size: 0.8125rem; + color: hsl(var(--muted-foreground)); + margin: 0; +} + +.search-step-results { + margin-top: 0.5rem; +} + +.result-count { + font-size: 0.75rem; + color: hsl(var(--primary)); + font-weight: 500; +} + +/* Cytoscape Container */ +.cytoscape-container { + flex: 1; + min-height: 400px; + background: hsl(var(--card)); + border: 1px solid hsl(var(--border)); + border-radius: 0.5rem; + position: relative; +} + +/* Cytoscape Empty State */ +.cytoscape-empty { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + width: 100%; + height: 100%; + min-height: 400px; + text-align: center; + color: hsl(var(--muted-foreground)); +} + +.cytoscape-empty i { + margin-bottom: 1rem; + opacity: 0.5; +} + +.cytoscape-empty p { + font-size: 0.875rem; + margin: 0; +} + +/* Graph Empty State */ +.graph-empty-state { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + width: 100%; + height: 100%; + text-align: center; + color: hsl(var(--muted-foreground)); +} + +.graph-empty-state i { + margin-bottom: 1rem; + opacity: 0.5; +} + +.graph-empty-state p { + font-size: 0.875rem; + margin: 0; +} + +/* Hidden class */ +.hidden { + display: none !important; +} diff --git a/ccw/src/templates/dashboard-js/components/navigation.js b/ccw/src/templates/dashboard-js/components/navigation.js index 90e472aa..3068e22e 100644 --- a/ccw/src/templates/dashboard-js/components/navigation.js +++ b/ccw/src/templates/dashboard-js/components/navigation.js @@ -52,10 +52,25 @@ function initPathSelector() { }); } +// Cleanup function for view transitions +function cleanupPreviousView() { + // Cleanup graph explorer + if (currentView === 'graph-explorer' && typeof window.cleanupGraphExplorer === 'function') { + window.cleanupGraphExplorer(); + } + // Hide storage card when leaving cli-manager + var storageCard = document.getElementById('storageCard'); + if (storageCard) { + storageCard.style.display = 'none'; + } +} + // Navigation function initNavigation() { document.querySelectorAll('.nav-item[data-filter]').forEach(item => { item.addEventListener('click', () => { + cleanupPreviousView(); + setActiveNavItem(item); currentFilter = item.dataset.filter; currentLiteType = null; @@ -70,6 +85,8 @@ function initNavigation() { // Lite Tasks Navigation document.querySelectorAll('.nav-item[data-lite]').forEach(item => { item.addEventListener('click', () => { + cleanupPreviousView(); + setActiveNavItem(item); currentLiteType = item.dataset.lite; currentFilter = null; @@ -84,6 +101,8 @@ function initNavigation() { // View Navigation (Project Overview, MCP Manager, etc.) document.querySelectorAll('.nav-item[data-view]').forEach(item => { item.addEventListener('click', () => { + cleanupPreviousView(); + setActiveNavItem(item); currentView = item.dataset.view; currentFilter = null; diff --git a/ccw/src/templates/dashboard-js/components/storage-manager.js b/ccw/src/templates/dashboard-js/components/storage-manager.js index cdac4ef0..fef0fb20 100644 --- a/ccw/src/templates/dashboard-js/components/storage-manager.js +++ b/ccw/src/templates/dashboard-js/components/storage-manager.js @@ -59,40 +59,22 @@ function renderStorageCard() { return date.toLocaleDateString(); }; - // Build project rows + // Build project tree (hierarchical view) let projectRows = ''; if (projects && projects.length > 0) { - projects.slice(0, 5).forEach(p => { - const historyBadge = p.historyRecords > 0 - ? '' + p.historyRecords + '' - : '-'; + const tree = buildProjectTree(projects); + projectRows = renderProjectTree(tree, 0, formatTimeAgo); - projectRows += '\ - \ - ' + escapeHtml(p.id.substring(0, 8)) + '...\ - ' + escapeHtml(p.totalSizeFormatted) + '\ - ' + historyBadge + '\ - ' + formatTimeAgo(p.lastModified) + '\ - \ - \ - \ - \ - '; - }); - - if (projects.length > 5) { - projectRows += '\ - \ - \ - ... and ' + (projects.length - 5) + ' more projects\ - \ - \ - '; - } + // Initially hide all child rows (level > 0) + setTimeout(() => { + const allRows = document.querySelectorAll('.project-row'); + allRows.forEach(row => { + const level = parseInt(row.getAttribute('data-level')); + if (level > 0) { + row.style.display = 'none'; + } + }); + }, 0); } else { projectRows = '\ \ @@ -178,6 +160,162 @@ function getTotalRecords() { return storageData.projects.reduce((sum, p) => sum + (p.historyRecords || 0), 0); } +/** + * Build project tree from flat list + * Converts flat project list to hierarchical tree structure + */ +function buildProjectTree(projects) { + const tree = []; + const map = new Map(); + + // Sort by path depth (shallowest first) + const sorted = projects.slice().sort((a, b) => { + const depthA = (a.id.match(/\//g) || []).length; + const depthB = (b.id.match(/\//g) || []).length; + return depthA - depthB; + }); + + for (const project of sorted) { + const segments = project.id.split('/'); + + if (segments.length === 1) { + // Root level project + const node = { + ...project, + children: [], + isExpanded: false + }; + tree.push(node); + map.set(project.id, node); + } else { + // Sub-project + const parentId = segments.slice(0, -1).join('/'); + const parent = map.get(parentId); + + if (parent) { + const node = { + ...project, + children: [], + isExpanded: false + }; + parent.children.push(node); + map.set(project.id, node); + } else { + // Orphaned project (parent not found) - add to root + const node = { + ...project, + children: [], + isExpanded: false + }; + tree.push(node); + map.set(project.id, node); + } + } + } + + return tree; +} + +/** + * Render project tree recursively + */ +function renderProjectTree(tree, level = 0, formatTimeAgo) { + if (!tree || tree.length === 0) return ''; + + let html = ''; + + for (const node of tree) { + const hasChildren = node.children && node.children.length > 0; + const indent = level * 20; + const projectName = node.id.split('/').pop(); + + const historyBadge = node.historyRecords > 0 + ? '' + node.historyRecords + '' + : '-'; + + const toggleIcon = hasChildren + ? '' + : ''; + + html += '\ + \ + \ +
\ + ' + (hasChildren ? '' : '') + '\ + ' + escapeHtml(projectName) + '\ +
\ + \ + ' + escapeHtml(node.totalSizeFormatted) + '\ + ' + historyBadge + '\ + ' + formatTimeAgo(node.lastModified) + '\ + \ + \ + \ + \ + '; + + // Render children (initially hidden) + if (hasChildren) { + html += renderProjectTree(node.children, level + 1, formatTimeAgo); + } + } + + return html; +} + +/** + * Toggle project node expansion + */ +function toggleProjectNode(projectId) { + const row = document.querySelector('[data-project-id="' + projectId + '"]'); + if (!row) return; + + const icon = row.querySelector('.toggle-icon'); + const level = parseInt(row.getAttribute('data-level')); + + // Find all child rows + let nextRow = row.nextElementSibling; + const childRows = []; + + while (nextRow && nextRow.classList.contains('project-row')) { + const nextLevel = parseInt(nextRow.getAttribute('data-level')); + if (nextLevel <= level) break; + childRows.push(nextRow); + nextRow = nextRow.nextElementSibling; + } + + // Toggle visibility + const isExpanded = row.classList.contains('expanded'); + + if (isExpanded) { + // Collapse + row.classList.remove('expanded'); + if (icon) icon.style.transform = 'rotate(0deg)'; + childRows.forEach(child => { + child.style.display = 'none'; + }); + } else { + // Expand (only immediate children) + row.classList.add('expanded'); + if (icon) icon.style.transform = 'rotate(90deg)'; + childRows.forEach(child => { + const childLevel = parseInt(child.getAttribute('data-level')); + if (childLevel === level + 1) { + child.style.display = ''; + } + }); + } + + // Reinitialize Lucide icons + if (typeof lucide !== 'undefined') { + lucide.createIcons(); + } +} + /** * Render error state for storage card */ diff --git a/ccw/src/templates/dashboard-js/i18n.js b/ccw/src/templates/dashboard-js/i18n.js index cd7a272d..152def23 100644 --- a/ccw/src/templates/dashboard-js/i18n.js +++ b/ccw/src/templates/dashboard-js/i18n.js @@ -506,6 +506,27 @@ const i18n = { 'mcp.codex.enabledTools': 'Tools', 'mcp.codex.tools': 'tools enabled', + // Claude to Codex copy + 'mcp.claude.copyFromCodex': 'Copy Codex Servers to Claude', + 'mcp.claude.alreadyAdded': 'Already in Claude', + 'mcp.claude.copyToClaude': 'Copy to Claude Global', + + // MCP Edit Modal + 'mcp.editModal.title': 'Edit MCP Server', + 'mcp.editModal.serverNamePlaceholder': 'server-name', + 'mcp.editModal.onePerLine': 'one per line', + 'mcp.editModal.save': 'Save Changes', + 'mcp.editModal.delete': 'Delete', + 'mcp.editModal.nameRequired': 'Server name is required', + 'mcp.editModal.commandRequired': 'Command or URL is required', + 'mcp.editModal.saved': 'MCP server "{name}" updated', + 'mcp.editModal.saveFailed': 'Failed to save MCP server', + 'mcp.editModal.deleteConfirm': 'Delete MCP server "{name}"?', + 'mcp.editModal.deleted': 'MCP server "{name}" deleted', + 'mcp.editModal.deleteFailed': 'Failed to delete MCP server', + 'mcp.clickToEdit': 'Click to edit', + 'mcp.clickToViewDetails': 'Click to view details', + // Hook Manager 'hook.projectHooks': 'Project Hooks', 'hook.projectFile': '.claude/settings.json', @@ -729,6 +750,7 @@ const i18n = { 'memory.contextHotspots': 'Context Hotspots', 'memory.mostRead': 'Most Read Files', 'memory.mostEdited': 'Most Edited Files', + 'memory.mostMentioned': 'Most Mentioned Topics', 'memory.today': 'Today', 'memory.week': 'Week', 'memory.allTime': 'All Time', @@ -958,6 +980,29 @@ const i18n = { 'graph.zoomIn': 'Zoom In', 'graph.zoomOut': 'Zoom Out', 'graph.resetLayout': 'Reset Layout', + 'graph.title': 'Code Graph', + 'graph.filters': 'Filters', + 'graph.legend': 'Legend', + 'graph.nodes': 'nodes', + 'graph.edges': 'edges', + 'graph.noGraphData': 'No graph data available. Index this project with codex-lens first.', + 'graph.noSearchData': 'No search process data available.', + 'graph.center': 'Center', + 'graph.resetFilters': 'Reset Filters', + 'graph.cytoscapeNotLoaded': 'Graph library not loaded', + 'graph.impactAnalysisError': 'Failed to load impact analysis', + 'graph.searchProcessDesc': 'Visualize how search queries flow through the system', + 'graph.searchProcessTitle': 'Search Pipeline', + 'graph.resultsFound': 'results found', + 'graph.type': 'Type', + 'graph.line': 'Line', + 'graph.path': 'Path', + 'graph.depth': 'Depth', + 'graph.exports': 'Exports', + 'graph.imports': 'Imports', + 'graph.references': 'References', + 'graph.symbolType': 'Symbol Type', + 'graph.affectedSymbols': 'Affected Symbols', // CLI Sync (used in claude-manager.js) 'claude.cliSync': 'CLI Auto-Sync', @@ -1025,7 +1070,7 @@ const i18n = { zh: { // App title and brand 'app.title': 'CCW 控制面板', - 'app.brand': 'Claude Code 工作流', + 'app.brand': 'Claude Code Workflow', // Header 'header.project': '项目:', @@ -1498,6 +1543,27 @@ const i18n = { 'mcp.codex.enabledTools': '工具', 'mcp.codex.tools': '个工具已启用', + // Claude to Codex copy + 'mcp.claude.copyFromCodex': '从 Codex 复制服务器到 Claude', + 'mcp.claude.alreadyAdded': '已在 Claude 中', + 'mcp.claude.copyToClaude': '复制到 Claude 全局', + + // MCP Edit Modal + 'mcp.editModal.title': '编辑 MCP 服务器', + 'mcp.editModal.serverNamePlaceholder': 'server-name', + 'mcp.editModal.onePerLine': '每行一个', + 'mcp.editModal.save': '保存更改', + 'mcp.editModal.delete': '删除', + 'mcp.editModal.nameRequired': '服务器名称必填', + 'mcp.editModal.commandRequired': '命令或 URL 必填', + 'mcp.editModal.saved': 'MCP 服务器 "{name}" 已更新', + 'mcp.editModal.saveFailed': '保存 MCP 服务器失败', + 'mcp.editModal.deleteConfirm': '删除 MCP 服务器 "{name}"?', + 'mcp.editModal.deleted': 'MCP 服务器 "{name}" 已删除', + 'mcp.editModal.deleteFailed': '删除 MCP 服务器失败', + 'mcp.clickToEdit': '点击编辑', + 'mcp.clickToViewDetails': '点击查看详情', + // Hook Manager 'hook.projectHooks': '项目钩子', 'hook.projectFile': '.claude/settings.json', @@ -1721,6 +1787,7 @@ const i18n = { 'memory.contextHotspots': '上下文热点', 'memory.mostRead': '最常读取的文件', 'memory.mostEdited': '最常编辑的文件', + 'memory.mostMentioned': '最常提及的话题', 'memory.today': '今天', 'memory.week': '本周', 'memory.allTime': '全部时间', @@ -1950,6 +2017,29 @@ const i18n = { 'graph.zoomIn': '放大', 'graph.zoomOut': '缩小', 'graph.resetLayout': '重置布局', + 'graph.title': '代码图谱', + 'graph.filters': '筛选器', + 'graph.legend': '图例', + 'graph.nodes': '个节点', + 'graph.edges': '条边', + 'graph.noGraphData': '无图谱数据。请先使用 codex-lens 为此项目建立索引。', + 'graph.noSearchData': '无搜索过程数据。', + 'graph.center': '居中', + 'graph.resetFilters': '重置筛选', + 'graph.cytoscapeNotLoaded': '图谱库未加载', + 'graph.impactAnalysisError': '加载影响分析失败', + 'graph.searchProcessDesc': '可视化搜索查询在系统中的流转过程', + 'graph.searchProcessTitle': '搜索管道', + 'graph.resultsFound': '个结果', + 'graph.type': '类型', + 'graph.line': '行号', + 'graph.path': '路径', + 'graph.depth': '深度', + 'graph.exports': '导出', + 'graph.imports': '导入', + 'graph.references': '引用', + 'graph.symbolType': '符号类型', + 'graph.affectedSymbols': '受影响符号', // CLI Sync (used in claude-manager.js) 'claude.cliSync': 'CLI 自动同步', diff --git a/ccw/src/templates/dashboard-js/views/graph-explorer.js b/ccw/src/templates/dashboard-js/views/graph-explorer.js index 2253c94e..16cf36a3 100644 --- a/ccw/src/templates/dashboard-js/views/graph-explorer.js +++ b/ccw/src/templates/dashboard-js/views/graph-explorer.js @@ -222,7 +222,7 @@ function renderGraphLegend() { function renderSearchProcessView() { if (!searchProcessData) { return '
' + - '' + + '' + '

' + t('graph.noSearchData') + '

' + '
'; } @@ -280,7 +280,7 @@ function initializeCytoscape() { // Check if Cytoscape.js is loaded if (typeof cytoscape === 'undefined') { container.innerHTML = '
' + - '' + + '' + '

' + t('graph.cytoscapeNotLoaded') + '

' + '
'; if (window.lucide) lucide.createIcons(); @@ -289,7 +289,7 @@ function initializeCytoscape() { if (graphData.nodes.length === 0) { container.innerHTML = '
' + - '' + + '' + '

' + t('graph.noGraphData') + '

' + '
'; if (window.lucide) lucide.createIcons(); @@ -493,6 +493,15 @@ function selectNode(nodeData) { panel.classList.remove('hidden'); panel.innerHTML = renderNodeDetails(nodeData); if (window.lucide) lucide.createIcons(); + + // Attach event listener for impact analysis button (prevents XSS) + var impactBtn = document.getElementById('impactAnalysisBtn'); + if (impactBtn) { + impactBtn.addEventListener('click', function() { + var nodeId = this.getAttribute('data-node-id'); + if (nodeId) showImpactAnalysis(nodeId); + }); + } } } @@ -559,7 +568,7 @@ function renderNodeDetails(node) { '' + '' + '
' + - '' + '
' + @@ -629,7 +638,7 @@ function centerCytoscape() { // ========== Impact Analysis ========== async function showImpactAnalysis(symbolId) { try { - var response = await fetch('/api/graph/impact/' + encodeURIComponent(symbolId)); + var response = await fetch('/api/graph/impact?symbol=' + encodeURIComponent(symbolId)); if (!response.ok) throw new Error('Failed to fetch impact analysis'); var data = await response.json(); @@ -727,3 +736,22 @@ function hideStatsAndCarousel() { if (statsGrid) statsGrid.style.display = 'none'; if (carousel) carousel.style.display = 'none'; } + +// ========== Cleanup Function ========== +/** + * Clean up Cytoscape instance to prevent memory leaks + * Should be called when navigating away from the graph explorer view + */ +function cleanupGraphExplorer() { + if (cyInstance) { + cyInstance.destroy(); + cyInstance = null; + } + selectedNode = null; + searchProcessData = null; +} + +// Register cleanup on navigation (called by navigation.js before switching views) +if (typeof window !== 'undefined') { + window.cleanupGraphExplorer = cleanupGraphExplorer; +} diff --git a/ccw/src/templates/dashboard-js/views/mcp-manager.js b/ccw/src/templates/dashboard-js/views/mcp-manager.js index 24d4a254..b9d53ee4 100644 --- a/ccw/src/templates/dashboard-js/views/mcp-manager.js +++ b/ccw/src/templates/dashboard-js/views/mcp-manager.js @@ -139,8 +139,9 @@ async function renderMcpManager() { Claude - @@ -228,6 +229,7 @@ async function renderMcpManager() {

${escapeHtml(serverName)}

+ Claude ${alreadyInCodex ? `${t('mcp.codex.alreadyAdded')}` : ''}
${!alreadyInCodex ? ` @@ -250,6 +252,26 @@ async function renderMcpManager() { ` : ''} + + +
+
+

${t('mcp.availableOther')}

+ ${otherProjectServers.length} ${t('mcp.serversAvailable')} +
+ + ${otherProjectServers.length === 0 ? ` +
+

${t('empty.noAdditionalMcp')}

+
+ ` : ` +
+ ${otherProjectServers.map(([serverName, serverInfo]) => { + return renderAvailableServerCardForCodex(serverName, serverInfo); + }).join('')} +
+ `} +
` : `
@@ -486,6 +508,55 @@ async function renderMcpManager() {
` : ''} + + ${currentCliMode === 'claude' && Object.keys(codexMcpServers || {}).length > 0 ? ` +
+
+

+ + ${t('mcp.claude.copyFromCodex')} +

+ ${Object.keys(codexMcpServers || {}).length} ${t('mcp.serversAvailable')} +
+
+ ${Object.entries(codexMcpServers || {}).map(([serverName, serverConfig]) => { + const alreadyInClaude = mcpUserServers && mcpUserServers[serverName]; + const isStdio = !!serverConfig.command; + const isHttp = !!serverConfig.url; + return ` +
+
+
+ +

${escapeHtml(serverName)}

+ Codex + ${isHttp + ? 'HTTP' + : 'STDIO' + } + ${alreadyInClaude ? '' + t('mcp.claude.alreadyAdded') + '' : ''} +
+ ${!alreadyInClaude ? ` + + ` : ''} +
+
+
+ ${isHttp ? t('mcp.url') : t('mcp.cmd')} + ${escapeHtml(serverConfig.command || serverConfig.url || 'N/A')} +
+
+
+ `; + }).join('')} +
+
+ ` : ''} + ${currentCliMode === 'claude' ? `
@@ -676,7 +747,12 @@ function renderGlobalManagementCard(serverName, serverConfig) { const serverType = serverConfig.type || 'stdio'; return ` -
+
@@ -706,7 +782,7 @@ function renderGlobalManagementCard(serverName, serverConfig) {
-
+
+ ` : ''} +
+ +
+
+ ${t('mcp.cmd')} + ${escapeHtml(command)} +
+ ${argsPreview ? ` +
+ ${t('mcp.args')} + ${escapeHtml(argsPreview)} +
+ ` : ''} +
+ ${t('mcp.usedInCount').replace('{count}', usedIn.length).replace('{s}', usedIn.length !== 1 ? 's' : '')} + ${sourceProjectName ? `• ${t('mcp.from')} ${escapeHtml(sourceProjectName)}` : ''} +
+
+
+ `; +} + // ======================================== // Codex MCP Server Card Renderer // ======================================== @@ -825,14 +965,17 @@ function renderCodexServerCard(serverName, serverConfig) { : `STDIO`; return ` -
+ data-cli-type="codex" + data-action="view-details-codex" + title="${t('mcp.clickToEdit')}">
-
+
${isEnabled ? '' : ''}

${escapeHtml(serverName)}

+ Codex ${typeBadge}
'; @@ -380,7 +385,7 @@ function renderHotspotsColumn() { function renderHotspotList(items, type) { if (!items || items.length === 0) { return '
' + - '' + + '' + '

' + t('memory.noData') + '

' + '
'; } @@ -407,6 +412,34 @@ function renderHotspotList(items, type) { '
'; } +function renderTopicList(items) { + if (!items || items.length === 0) { + return '
' + + '' + + '

' + t('memory.noData') + '

' + + '
'; + } + + return '
' + + items.map(function(item, index) { + var heat = item.heat || item.count || 0; + var heatClass = heat > 10 ? 'high' : heat > 5 ? 'medium' : 'low'; + var preview = item.preview || item.topic || 'Unknown'; + + return '
' + + '
' + (index + 1) + '
' + + '
' + + '
' + escapeHtml(preview) + '
' + + '
' + + '
' + + '' + heat + '' + + '' + + '
' + + '
'; + }).join('') + + '
'; +} + // ========== Center Column: Memory Graph ========== // Store graph state for zoom/pan var graphZoom = null; @@ -458,7 +491,7 @@ function renderMemoryGraph(graphData) { var container = document.getElementById('memoryGraphSvg'); if (container) { container.innerHTML = '
' + - '' + + '' + '

' + t('memory.noGraphData') + '

' + '
'; if (window.lucide) lucide.createIcons(); @@ -471,7 +504,7 @@ function renderMemoryGraph(graphData) { var container = document.getElementById('memoryGraphSvg'); if (container) { container.innerHTML = '
' + - '' + + '' + '

' + t('memory.d3NotLoaded') + '

' + '
'; if (window.lucide) lucide.createIcons(); @@ -767,7 +800,7 @@ function renderContextColumn() { function renderContextTimeline(prompts) { if (!prompts || prompts.length === 0) { return '
' + - '' + + '' + '

' + t('memory.noRecentActivity') + '

' + '
'; } diff --git a/ccw/src/templates/dashboard.html b/ccw/src/templates/dashboard.html index 38999d39..c08c88ae 100644 --- a/ccw/src/templates/dashboard.html +++ b/ccw/src/templates/dashboard.html @@ -248,7 +248,7 @@
- + Claude Code Workflow
diff --git a/ccw/src/tools/cli-config-manager.ts b/ccw/src/tools/cli-config-manager.ts index dd54780a..7d3e5812 100644 --- a/ccw/src/tools/cli-config-manager.ts +++ b/ccw/src/tools/cli-config-manager.ts @@ -27,7 +27,7 @@ export type CliToolName = 'gemini' | 'qwen' | 'codex'; export const PREDEFINED_MODELS: Record = { gemini: ['gemini-2.5-pro', 'gemini-2.5-flash', 'gemini-2.0-flash', 'gemini-1.5-pro', 'gemini-1.5-flash'], qwen: ['coder-model', 'vision-model', 'qwen2.5-coder-32b'], - codex: ['gpt5-codex', 'gpt-4.1', 'o4-mini', 'o3'] + codex: ['gpt-5.2', 'gpt-4.1', 'o4-mini', 'o3'] }; export const DEFAULT_CONFIG: CliConfig = { @@ -45,8 +45,8 @@ export const DEFAULT_CONFIG: CliConfig = { }, codex: { enabled: true, - primaryModel: 'gpt5-codex', - secondaryModel: 'gpt5-codex' + primaryModel: 'gpt-5.2', + secondaryModel: 'gpt-5.2' } } }; diff --git a/ccw/src/tools/cli-executor.ts b/ccw/src/tools/cli-executor.ts index 54ff3ab6..15ae321a 100644 --- a/ccw/src/tools/cli-executor.ts +++ b/ccw/src/tools/cli-executor.ts @@ -319,6 +319,8 @@ function buildCommand(params: { break; case 'codex': + // Codex does NOT support stdin - prompt must be passed as command line argument + useStdin = false; // Native resume: codex resume [prompt] or --last if (nativeResume?.enabled) { args.push('resume'); @@ -343,6 +345,10 @@ function buildCommand(params: { args.push('--add-dir', addDir); } } + // Add prompt as positional argument for resume + if (prompt) { + args.push(prompt); + } } else { // Standard exec mode args.push('exec'); @@ -362,6 +368,10 @@ function buildCommand(params: { args.push('--add-dir', addDir); } } + // Add prompt as positional argument (codex exec "prompt") + if (prompt) { + args.push(prompt); + } } break; @@ -379,9 +389,11 @@ function buildCommand(params: { if (model) { args.push('--model', model); } - // Permission modes for write/auto + // Permission modes: write/auto → bypassPermissions, analysis → default if (mode === 'write' || mode === 'auto') { - args.push('--dangerously-skip-permissions'); + args.push('--permission-mode', 'bypassPermissions'); + } else { + args.push('--permission-mode', 'default'); } // Output format for better parsing args.push('--output-format', 'text'); @@ -570,7 +582,7 @@ async function executeCliTool( // Determine working directory early (needed for conversation lookup) const workingDir = cd || process.cwd(); - const historyDir = ensureHistoryDir(workingDir); + ensureHistoryDir(workingDir); // Ensure history directory exists // Get SQLite store for native session lookup const store = await getSqliteStore(workingDir); @@ -722,16 +734,8 @@ async function executeCliTool( } } - // Determine effective model (use config's primaryModel if not explicitly provided) - let effectiveModel = model; - if (!effectiveModel) { - try { - effectiveModel = getPrimaryModel(workingDir, tool); - } catch { - // Config not available, use default (let the CLI tool use its own default) - effectiveModel = undefined; - } - } + // Only pass model if explicitly provided - let CLI tools use their own defaults + const effectiveModel = model; // Build command const { command, args, useStdin } = buildCommand({ @@ -864,7 +868,7 @@ async function executeCliTool( // Save all source conversations try { for (const conv of savedConversations) { - saveConversation(historyDir, conv); + saveConversation(workingDir, conv); } } catch (err) { console.error('[CLI Executor] Failed to save merged histories:', (err as Error).message); @@ -906,7 +910,7 @@ async function executeCliTool( }; // Save merged conversation try { - saveConversation(historyDir, conversation); + saveConversation(workingDir, conversation); } catch (err) { console.error('[CLI Executor] Failed to save merged conversation:', (err as Error).message); } @@ -937,7 +941,7 @@ async function executeCliTool( }; // Try to save conversation to history try { - saveConversation(historyDir, conversation); + saveConversation(workingDir, conversation); } catch (err) { // Non-fatal: continue even if history save fails console.error('[CLI Executor] Failed to save history:', (err as Error).message); @@ -945,7 +949,8 @@ async function executeCliTool( } // Track native session after execution (async, non-blocking) - trackNewSession(tool, new Date(startTime), workingDir) + // Pass prompt for precise matching in parallel execution scenarios + trackNewSession(tool, new Date(startTime), workingDir, prompt) .then((nativeSession) => { if (nativeSession) { // Save native session mapping @@ -1211,8 +1216,8 @@ export function getExecutionHistory(baseDir: string, options: { * Get conversation detail by ID (returns ConversationRecord) */ export function getConversationDetail(baseDir: string, conversationId: string): ConversationRecord | null { - const paths = StoragePaths.project(baseDir); - return loadConversation(paths.cliHistory, conversationId); + // Pass baseDir directly - loadConversation will resolve the correct storage path + return loadConversation(baseDir, conversationId); } /** diff --git a/ccw/src/tools/cli-history-store.ts b/ccw/src/tools/cli-history-store.ts index b70e58f2..203bd8ff 100644 --- a/ccw/src/tools/cli-history-store.ts +++ b/ccw/src/tools/cli-history-store.ts @@ -7,7 +7,7 @@ import Database from 'better-sqlite3'; import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, unlinkSync, rmdirSync } from 'fs'; import { join } from 'path'; import { parseSessionFile, formatConversation, extractConversationPairs, type ParsedSession, type ParsedTurn } from './session-content-parser.js'; -import { StoragePaths, ensureStorageDir } from '../config/storage-paths.js'; +import { StoragePaths, ensureStorageDir, getProjectId } from '../config/storage-paths.js'; // Types export interface ConversationTurn { @@ -239,10 +239,12 @@ export class CliHistoryStore { */ private migrateSchema(): void { try { - // Check if category column exists + // Check if columns exist const tableInfo = this.db.prepare('PRAGMA table_info(conversations)').all() as Array<{ name: string }>; const hasCategory = tableInfo.some(col => col.name === 'category'); const hasParentExecutionId = tableInfo.some(col => col.name === 'parent_execution_id'); + const hasProjectRoot = tableInfo.some(col => col.name === 'project_root'); + const hasRelativePath = tableInfo.some(col => col.name === 'relative_path'); if (!hasCategory) { console.log('[CLI History] Migrating database: adding category column...'); @@ -270,6 +272,28 @@ export class CliHistoryStore { } console.log('[CLI History] Migration complete: parent_execution_id column added'); } + + // Add hierarchical storage support columns + if (!hasProjectRoot) { + console.log('[CLI History] Migrating database: adding project_root column for hierarchical storage...'); + this.db.exec(` + ALTER TABLE conversations ADD COLUMN project_root TEXT; + `); + try { + this.db.exec(`CREATE INDEX IF NOT EXISTS idx_conversations_project_root ON conversations(project_root);`); + } catch (indexErr) { + console.warn('[CLI History] Project root index creation warning:', (indexErr as Error).message); + } + console.log('[CLI History] Migration complete: project_root column added'); + } + + if (!hasRelativePath) { + console.log('[CLI History] Migrating database: adding relative_path column for hierarchical storage...'); + this.db.exec(` + ALTER TABLE conversations ADD COLUMN relative_path TEXT; + `); + console.log('[CLI History] Migration complete: relative_path column added'); + } } catch (err) { console.error('[CLI History] Migration error:', (err as Error).message); // Don't throw - allow the store to continue working with existing schema @@ -1115,17 +1139,21 @@ export class CliHistoryStore { } } -// Singleton instance cache +// Singleton instance cache - keyed by normalized project ID for consistency const storeCache = new Map(); /** * Get or create a store instance for a directory + * Uses normalized project ID as cache key to handle path casing differences */ export function getHistoryStore(baseDir: string): CliHistoryStore { - if (!storeCache.has(baseDir)) { - storeCache.set(baseDir, new CliHistoryStore(baseDir)); + // Use getProjectId to normalize path for consistent cache key + const cacheKey = getProjectId(baseDir); + + if (!storeCache.has(cacheKey)) { + storeCache.set(cacheKey, new CliHistoryStore(baseDir)); } - return storeCache.get(baseDir)!; + return storeCache.get(cacheKey)!; } /** diff --git a/ccw/src/tools/native-session-discovery.ts b/ccw/src/tools/native-session-discovery.ts index bbc9fe72..799dcbe1 100644 --- a/ccw/src/tools/native-session-discovery.ts +++ b/ccw/src/tools/native-session-discovery.ts @@ -70,18 +70,60 @@ abstract class SessionDiscoverer { /** * Track new session created during execution + * @param beforeTimestamp - Filter sessions created after this time + * @param workingDir - Project working directory + * @param prompt - Optional prompt content for precise matching (fallback) */ async trackNewSession( beforeTimestamp: Date, - workingDir: string + workingDir: string, + prompt?: string ): Promise { const sessions = this.getSessions({ workingDir, afterTimestamp: beforeTimestamp, - limit: 1 + limit: 10 // Get more candidates for prompt matching }); - return sessions.length > 0 ? sessions[0] : null; + + if (sessions.length === 0) return null; + + // If only one session or no prompt provided, return the latest + if (sessions.length === 1 || !prompt) { + return sessions[0]; + } + + // Try to match by prompt content (fallback for parallel execution) + const matched = this.matchSessionByPrompt(sessions, prompt); + return matched || sessions[0]; // Fallback to latest if no match } + + /** + * Match session by prompt content + * Searches for the prompt in session's user messages + */ + matchSessionByPrompt(sessions: NativeSession[], prompt: string): NativeSession | null { + // Normalize prompt for comparison (first 200 chars) + const promptPrefix = prompt.substring(0, 200).trim(); + if (!promptPrefix) return null; + + for (const session of sessions) { + try { + const userMessage = this.extractFirstUserMessage(session.filePath); + if (userMessage && userMessage.includes(promptPrefix)) { + return session; + } + } catch { + // Skip sessions that can't be read + } + } + return null; + } + + /** + * Extract first user message from session file + * Override in subclass for tool-specific format + */ + abstract extractFirstUserMessage(filePath: string): string | null; } /** @@ -157,6 +199,23 @@ class GeminiSessionDiscoverer extends SessionDiscoverer { const sessions = this.getSessions(); return sessions.find(s => s.sessionId === sessionId) || null; } + + /** + * Extract first user message from Gemini session file + * Format: { "messages": [{ "type": "user", "content": "..." }] } + */ + extractFirstUserMessage(filePath: string): string | null { + try { + const content = JSON.parse(readFileSync(filePath, 'utf8')); + if (content.messages && Array.isArray(content.messages)) { + const userMsg = content.messages.find((m: { type: string }) => m.type === 'user'); + return userMsg?.content || null; + } + return null; + } catch { + return null; + } + } } /** @@ -330,6 +389,46 @@ class QwenSessionDiscoverer extends SessionDiscoverer { const sessions = this.getSessions(); return sessions.find(s => s.sessionId === sessionId) || null; } + + /** + * Extract first user message from Qwen session file + * New format (.jsonl): { type: "user", message: { role: "user", parts: [{ text: "..." }] } } + * Legacy format (.json): { "messages": [{ "type": "user", "content": "..." }] } + */ + extractFirstUserMessage(filePath: string): string | null { + try { + const content = readFileSync(filePath, 'utf8'); + + // Check if JSONL (new format) or JSON (legacy) + if (filePath.endsWith('.jsonl')) { + // JSONL format - find first user message + const lines = content.split('\n').filter(l => l.trim()); + for (const line of lines) { + try { + const entry = JSON.parse(line); + // New Qwen format: { type: "user", message: { parts: [{ text: "..." }] } } + if (entry.type === 'user' && entry.message?.parts?.[0]?.text) { + return entry.message.parts[0].text; + } + // Alternative format + if (entry.role === 'user' && entry.content) { + return entry.content; + } + } catch { /* skip invalid lines */ } + } + } else { + // Legacy JSON format + const data = JSON.parse(content); + if (data.messages && Array.isArray(data.messages)) { + const userMsg = data.messages.find((m: { type: string }) => m.type === 'user'); + return userMsg?.content || null; + } + } + return null; + } catch { + return null; + } + } } /** @@ -430,6 +529,32 @@ class CodexSessionDiscoverer extends SessionDiscoverer { const sessions = this.getSessions(); return sessions.find(s => s.sessionId === sessionId) || null; } + + /** + * Extract first user message from Codex session file (.jsonl) + * Format: {"type":"event_msg","payload":{"type":"user_message","message":"..."}} + */ + extractFirstUserMessage(filePath: string): string | null { + try { + const content = readFileSync(filePath, 'utf8'); + const lines = content.split('\n').filter(l => l.trim()); + + for (const line of lines) { + try { + const entry = JSON.parse(line); + // Look for user_message event + if (entry.type === 'event_msg' && + entry.payload?.type === 'user_message' && + entry.payload?.message) { + return entry.payload.message; + } + } catch { /* skip invalid lines */ } + } + return null; + } catch { + return null; + } + } } /** @@ -462,15 +587,17 @@ class ClaudeSessionDiscoverer extends SessionDiscoverer { } for (const projectHash of projectDirs) { - const sessionsDir = join(this.basePath, projectHash, 'sessions'); - if (!existsSync(sessionsDir)) continue; + // Claude Code stores session files directly in project folder (not in 'sessions' subdirectory) + // e.g., ~/.claude/projects/D--Claude-dms3/.jsonl + const projectDir = join(this.basePath, projectHash); + if (!existsSync(projectDir)) continue; - const sessionFiles = readdirSync(sessionsDir) + const sessionFiles = readdirSync(projectDir) .filter(f => f.endsWith('.jsonl') || f.endsWith('.json')) .map(f => ({ name: f, - path: join(sessionsDir, f), - stat: statSync(join(sessionsDir, f)) + path: join(projectDir, f), + stat: statSync(join(projectDir, f)) })) .sort((a, b) => b.stat.mtimeMs - a.stat.mtimeMs); @@ -521,6 +648,35 @@ class ClaudeSessionDiscoverer extends SessionDiscoverer { const sessions = this.getSessions(); return sessions.find(s => s.sessionId === sessionId) || null; } + + /** + * Extract first user message from Claude Code session file (.jsonl) + * Format: {"type":"user","message":{"role":"user","content":"..."},"isMeta":false,...} + */ + extractFirstUserMessage(filePath: string): string | null { + try { + const content = readFileSync(filePath, 'utf8'); + const lines = content.split('\n').filter(l => l.trim()); + + for (const line of lines) { + try { + const entry = JSON.parse(line); + // Claude Code format: type="user", message.role="user", message.content="..." + // Skip meta messages and command messages + if (entry.type === 'user' && + entry.message?.role === 'user' && + entry.message?.content && + !entry.isMeta && + !entry.message.content.startsWith(' { const discoverer = discoverers[tool]; if (!discoverer) return null; - return discoverer.trackNewSession(beforeTimestamp, workingDir); + return discoverer.trackNewSession(beforeTimestamp, workingDir, prompt); } /** diff --git a/ccw/src/tools/storage-manager.ts b/ccw/src/tools/storage-manager.ts index dc993236..da52608d 100644 --- a/ccw/src/tools/storage-manager.ts +++ b/ccw/src/tools/storage-manager.ts @@ -137,47 +137,112 @@ function getDbRecordCount(dbPath: string, tableName: string): number { } /** - * Get storage statistics for a specific project by ID + * Check if a directory is a project data directory + * A project data directory contains at least one of: cli-history, memory, cache, config */ -export function getProjectStorageStats(projectId: string): ProjectStorageStats { - const paths = StoragePaths.projectById(projectId); +function isProjectDataDirectory(dirPath: string): boolean { + const dataMarkers = ['cli-history', 'memory', 'cache', 'config']; + return dataMarkers.some(marker => existsSync(join(dirPath, marker))); +} - const cliHistorySize = getDirSize(paths.cliHistory); - const memorySize = getDirSize(paths.memory); - const cacheSize = getDirSize(paths.cache); - const configSize = getDirSize(paths.config); +/** + * Get storage statistics for a specific project by path + * @param projectId - Project ID (can be hierarchical like "parent/child") + * @param projectDir - Actual directory path in storage + */ +function getProjectStats(projectId: string, projectDir: string): ProjectStorageStats { + const cliHistoryDir = join(projectDir, 'cli-history'); + const memoryDir = join(projectDir, 'memory'); + const cacheDir = join(projectDir, 'cache'); + const configDir = join(projectDir, 'config'); + + const cliHistorySize = getDirSize(cliHistoryDir); + const memorySize = getDirSize(memoryDir); + const cacheSize = getDirSize(cacheDir); + const configSize = getDirSize(configDir); let recordCount: number | undefined; - if (existsSync(paths.historyDb)) { - recordCount = getDbRecordCount(paths.historyDb, 'conversations'); + const historyDb = join(cliHistoryDir, 'history.db'); + if (existsSync(historyDb)) { + recordCount = getDbRecordCount(historyDb, 'conversations'); } return { projectId, totalSize: cliHistorySize + memorySize + cacheSize + configSize, cliHistory: { - exists: existsSync(paths.cliHistory), + exists: existsSync(cliHistoryDir), size: cliHistorySize, recordCount }, memory: { - exists: existsSync(paths.memory), + exists: existsSync(memoryDir), size: memorySize }, cache: { - exists: existsSync(paths.cache), + exists: existsSync(cacheDir), size: cacheSize }, config: { - exists: existsSync(paths.config), + exists: existsSync(configDir), size: configSize }, - lastModified: getLatestModTime(paths.root) + lastModified: getLatestModTime(projectDir) }; } +/** + * Get storage statistics for a specific project by ID (legacy) + */ +export function getProjectStorageStats(projectId: string): ProjectStorageStats { + const paths = StoragePaths.projectById(projectId); + return getProjectStats(projectId, paths.root); +} + +/** + * Recursively scan project directory for hierarchical structure + * @param basePath - Base directory to scan + * @param relativePath - Relative path from projects root + * @param results - Array to accumulate results + */ +function scanProjectDirectory( + basePath: string, + relativePath: string, + results: ProjectStorageStats[] +): void { + if (!existsSync(basePath)) return; + + try { + const entries = readdirSync(basePath, { withFileTypes: true }); + + for (const entry of entries) { + if (!entry.isDirectory()) continue; + + const fullPath = join(basePath, entry.name); + const currentRelPath = relativePath ? `${relativePath}/${entry.name}` : entry.name; + + // Check if this is a project data directory + if (isProjectDataDirectory(fullPath)) { + const projectId = currentRelPath; + const stats = getProjectStats(projectId, fullPath); + results.push(stats); + } + + // Recursively scan subdirectories (excluding data directories) + const dataDirs = ['cli-history', 'memory', 'cache', 'config']; + if (!dataDirs.includes(entry.name)) { + scanProjectDirectory(fullPath, currentRelPath, results); + } + } + } catch (err) { + // Ignore read errors + if (process.env.DEBUG) console.error(`[Storage] Failed to scan ${basePath}: ${err}`); + } +} + /** * Get all storage statistics + * Supports hierarchical project structure */ export function getStorageStats(): StorageStats { const rootPath = CCW_HOME; @@ -187,19 +252,10 @@ export function getStorageStats(): StorageStats { const mcpTemplatesPath = StoragePaths.global.mcpTemplates(); const globalDbSize = getFileSize(mcpTemplatesPath); - // Projects + // Projects - use recursive scanning for hierarchical structure const projects: ProjectStorageStats[] = []; if (existsSync(projectsDir)) { - try { - const entries = readdirSync(projectsDir, { withFileTypes: true }); - for (const entry of entries) { - if (entry.isDirectory()) { - projects.push(getProjectStorageStats(entry.name)); - } - } - } catch { - // Ignore read errors - } + scanProjectDirectory(projectsDir, '', projects); } // Sort by last modified (most recent first) diff --git a/ccw/tests/storage-paths.test.js b/ccw/tests/storage-paths.test.js new file mode 100644 index 00000000..bc53c905 --- /dev/null +++ b/ccw/tests/storage-paths.test.js @@ -0,0 +1,293 @@ +/** + * Storage Paths Hierarchical Structure Tests + * Tests for hierarchical storage path generation and migration + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { join, resolve } from 'path'; +import { existsSync, mkdirSync, rmSync, writeFileSync } from 'fs'; +import { homedir } from 'os'; + +// Mock CCW_HOME for testing +const TEST_CCW_HOME = join(homedir(), '.ccw-test'); +process.env.CCW_DATA_DIR = TEST_CCW_HOME; + +// Import after setting env var +import { + detectHierarchy, + getProjectPaths, + clearHierarchyCache, + getProjectId +} from '../src/config/storage-paths.js'; + +describe('Storage Paths - Hierarchical Structure', () => { + beforeEach(() => { + // Clean test directory + if (existsSync(TEST_CCW_HOME)) { + rmSync(TEST_CCW_HOME, { recursive: true, force: true }); + } + mkdirSync(TEST_CCW_HOME, { recursive: true }); + clearHierarchyCache(); + }); + + afterEach(() => { + // Cleanup + if (existsSync(TEST_CCW_HOME)) { + rmSync(TEST_CCW_HOME, { recursive: true, force: true }); + } + clearHierarchyCache(); + }); + + describe('Project ID Generation', () => { + it('should generate consistent project IDs', () => { + const path1 = 'D:\\Claude_dms3'; + const path2 = 'D:\\Claude_dms3'; + + const id1 = getProjectId(path1); + const id2 = getProjectId(path2); + + expect(id1).toBe(id2); + expect(id1).toContain('d--claude_dms3'); + }); + + it('should handle different path formats', () => { + // Test Windows path + const winId = getProjectId('D:\\Claude_dms3'); + expect(winId).toBeTruthy(); + + // Test Unix-like path + const unixId = getProjectId('/home/user/project'); + expect(unixId).toBeTruthy(); + + // Different paths should have different IDs + expect(winId).not.toBe(unixId); + }); + }); + + describe('Hierarchy Detection', () => { + it('should detect no parent for root project', () => { + const hierarchy = detectHierarchy('D:\\Claude_dms3'); + + expect(hierarchy.parentId).toBeNull(); + expect(hierarchy.relativePath).toBe(''); + expect(hierarchy.currentId).toBeTruthy(); + }); + + it('should detect parent when parent storage exists', () => { + // Create parent storage + const parentPath = 'D:\\Claude_dms3'; + const parentId = getProjectId(parentPath); + const parentStorageDir = join(TEST_CCW_HOME, 'projects', parentId); + mkdirSync(parentStorageDir, { recursive: true }); + + // Detect hierarchy for child + const childPath = 'D:\\Claude_dms3\\ccw'; + const hierarchy = detectHierarchy(childPath); + + expect(hierarchy.parentId).toBe(parentId); + expect(hierarchy.relativePath).toBe('ccw'); + }); + + it('should detect nested hierarchy', () => { + // Create parent storage + const rootPath = 'D:\\Claude_dms3'; + const rootId = getProjectId(rootPath); + const rootStorageDir = join(TEST_CCW_HOME, 'projects', rootId); + mkdirSync(rootStorageDir, { recursive: true }); + + // Detect hierarchy for nested child + const nestedPath = 'D:\\Claude_dms3\\ccw\\src'; + const hierarchy = detectHierarchy(nestedPath); + + expect(hierarchy.parentId).toBe(rootId); + expect(hierarchy.relativePath).toBe('ccw/src'); + }); + + it('should cache detection results', () => { + const path = 'D:\\Claude_dms3\\ccw'; + + const result1 = detectHierarchy(path); + const result2 = detectHierarchy(path); + + // Should return exact same object (cached) + expect(result1).toBe(result2); + }); + + it('should clear cache when requested', () => { + const path = 'D:\\Claude_dms3\\ccw'; + + const result1 = detectHierarchy(path); + clearHierarchyCache(); + const result2 = detectHierarchy(path); + + // Should return different object instances after cache clear + expect(result1).not.toBe(result2); + // But same values + expect(result1.currentId).toBe(result2.currentId); + }); + }); + + describe('Hierarchical Path Generation', () => { + it('should generate flat path for root project', () => { + const projectPath = 'D:\\Claude_dms3'; + const paths = getProjectPaths(projectPath); + + expect(paths.root).toContain('projects'); + expect(paths.root).toContain('d--claude_dms3'); + expect(paths.root).not.toContain('ccw'); + }); + + it('should generate hierarchical path when parent exists', () => { + // Create parent storage + const parentPath = 'D:\\Claude_dms3'; + const parentId = getProjectId(parentPath); + const parentStorageDir = join(TEST_CCW_HOME, 'projects', parentId); + mkdirSync(parentStorageDir, { recursive: true }); + + // Generate paths for child + const childPath = 'D:\\Claude_dms3\\ccw'; + const paths = getProjectPaths(childPath); + + expect(paths.root).toContain(parentId); + expect(paths.root).toContain('ccw'); + expect(paths.root.endsWith('ccw')).toBe(true); + }); + + it('should generate nested hierarchical paths', () => { + // Create parent storage + const parentPath = 'D:\\Claude_dms3'; + const parentId = getProjectId(parentPath); + const parentStorageDir = join(TEST_CCW_HOME, 'projects', parentId); + mkdirSync(parentStorageDir, { recursive: true }); + + // Generate paths for nested child + const nestedPath = 'D:\\Claude_dms3\\ccw\\src'; + const paths = getProjectPaths(nestedPath); + + expect(paths.root).toContain(parentId); + expect(paths.root).toContain('ccw'); + expect(paths.root).toContain('src'); + expect(paths.root.endsWith('src')).toBe(true); + }); + + it('should include all required subdirectories', () => { + const projectPath = 'D:\\Claude_dms3'; + const paths = getProjectPaths(projectPath); + + expect(paths.cliHistory).toContain('cli-history'); + expect(paths.memory).toContain('memory'); + expect(paths.cache).toContain('cache'); + expect(paths.config).toContain('config'); + expect(paths.historyDb).toContain('history.db'); + expect(paths.memoryDb).toContain('memory.db'); + }); + }); + + describe('Migration from Flat to Hierarchical', () => { + it('should migrate flat structure to hierarchical', () => { + // Setup: Create parent storage + const parentPath = 'D:\\Claude_dms3'; + const parentId = getProjectId(parentPath); + const parentStorageDir = join(TEST_CCW_HOME, 'projects', parentId); + mkdirSync(parentStorageDir, { recursive: true }); + + // Create old flat structure for child + const childPath = 'D:\\Claude_dms3\\ccw'; + const childId = getProjectId(childPath); + const flatStorageDir = join(TEST_CCW_HOME, 'projects', childId); + const flatCliHistoryDir = join(flatStorageDir, 'cli-history'); + mkdirSync(flatCliHistoryDir, { recursive: true }); + + // Create a test file to verify migration + const testFile = join(flatCliHistoryDir, 'test.txt'); + writeFileSync(testFile, 'test data'); + + // Trigger migration by calling getProjectPaths + const paths = getProjectPaths(childPath); + + // Verify hierarchical path structure + expect(paths.root).toContain('ccw'); + expect(paths.root.endsWith('ccw')).toBe(true); + + // Verify data was migrated + const migratedFile = join(paths.cliHistory, 'test.txt'); + expect(existsSync(migratedFile)).toBe(true); + + // Verify old flat structure was deleted + expect(existsSync(flatStorageDir)).toBe(false); + }); + + it('should handle migration failures gracefully', () => { + // Create scenario that might fail migration + const parentPath = 'D:\\Claude_dms3'; + const parentId = getProjectId(parentPath); + const parentStorageDir = join(TEST_CCW_HOME, 'projects', parentId); + mkdirSync(parentStorageDir, { recursive: true }); + + const childPath = 'D:\\Claude_dms3\\ccw'; + + // Should not throw error even if migration fails + expect(() => { + const paths = getProjectPaths(childPath); + expect(paths).toBeTruthy(); + }).not.toThrow(); + }); + }); + + describe('Path Normalization', () => { + it('should normalize Windows path separators', () => { + const hierarchy = detectHierarchy('D:\\Claude_dms3\\ccw\\src'); + + // Relative path should use forward slashes + if (hierarchy.relativePath) { + expect(hierarchy.relativePath).not.toContain('\\'); + expect(hierarchy.relativePath).toContain('/'); + } + }); + + it('should handle trailing slashes', () => { + const path1 = 'D:\\Claude_dms3\\ccw'; + const path2 = 'D:\\Claude_dms3\\ccw\\'; + + const id1 = getProjectId(path1); + const id2 = getProjectId(path2); + + // Should produce same ID regardless of trailing slash + expect(id1).toBe(id2); + }); + }); + + describe('Edge Cases', () => { + it('should handle very deep nesting', () => { + // Create deep parent storage + const parentPath = 'D:\\Claude_dms3'; + const parentId = getProjectId(parentPath); + const parentStorageDir = join(TEST_CCW_HOME, 'projects', parentId); + mkdirSync(parentStorageDir, { recursive: true }); + + // Generate paths for deeply nested child + const deepPath = 'D:\\Claude_dms3\\a\\b\\c\\d\\e'; + const paths = getProjectPaths(deepPath); + + expect(paths.root).toContain(parentId); + expect(paths.root).toContain('a'); + expect(paths.root).toContain('e'); + }); + + it('should handle special characters in path names', () => { + const specialPath = 'D:\\Claude_dms3\\my-project_v2'; + const id = getProjectId(specialPath); + + expect(id).toBeTruthy(); + expect(id).toContain('my-project_v2'); + }); + + it('should handle relative paths by resolving them', () => { + const relativePath = './ccw'; + const paths = getProjectPaths(relativePath); + + // Should resolve to absolute path + expect(paths.root).toBeTruthy(); + }); + }); +}); diff --git a/codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md b/codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md new file mode 100644 index 00000000..0834efc6 --- /dev/null +++ b/codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md @@ -0,0 +1,711 @@ +# CodexLens Hybrid Search Architecture Design + +> **Version**: 1.0 +> **Date**: 2025-12-15 +> **Authors**: Gemini + Qwen + Claude (Collaborative Design) +> **Status**: Design Proposal + +--- + +## Executive Summary + +本设计方案针对 CodexLens 当前文本搜索效果差、乱码问题、无增量索引等痛点,综合借鉴 **Codanna** (Tantivy N-gram + 复合排序) 和 **Code-Index-MCP** (双重索引 + AST解析) 的设计思想,提出全新的 **Dual-FTS Hybrid Search** 架构。 + +### 核心改进 +| 问题 | 现状 | 目标方案 | +|------|------|----------| +| 乱码 | `errors="ignore"` 丢弃字节 | chardet 编码检测 + `errors="replace"` | +| 搜索效果差 | 单一 unicode61 分词 | Dual-FTS (精确 + Trigram 模糊) | +| 无模糊搜索 | 仅BM25精确匹配 | 复合排序 (Exact + Fuzzy + Prefix) | +| 重复索引 | 全量重建 | mtime 增量检测 | +| 语义割裂 | FTS与向量独立 | RRF 混合融合 | + +--- + +## Part 1: Architecture Overview + +### 1.1 Target Architecture Diagram + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ User Query: "auth login" │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Query Preprocessor (NEW) │ +│ • CamelCase split: UserAuth → "UserAuth" OR "User Auth" │ +│ • snake_case split: user_auth → "user_auth" OR "user auth" │ +│ • Encoding normalization │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ┌───────────────┼───────────────┐ + ▼ ▼ ▼ +┌──────────────────────┐ ┌──────────────────────┐ ┌──────────────────────┐ +│ FTS Exact Search │ │ FTS Fuzzy Search │ │ Vector Search │ +│ (files_fts_exact) │ │ (files_fts_fuzzy) │ │ (VectorStore) │ +│ unicode61 + '_' │ │ trigram tokenizer │ │ Cosine similarity │ +│ BM25 scoring │ │ Substring match │ │ 0.0 - 1.0 range │ +└──────────────────────┘ └──────────────────────┘ └──────────────────────┘ + │ │ │ + │ Results E │ Results F │ Results V + └───────────────────────┼───────────────────────┘ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Ranking Fusion Engine (NEW) │ +│ • Reciprocal Rank Fusion (RRF): score = Σ 1/(k + rank_i) │ +│ • Score normalization (BM25 unbounded → 0-1) │ +│ • Weighted linear fusion: w1*exact + w2*fuzzy + w3*vector │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Final Sorted Results │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +### 1.2 Component Architecture + +``` +codexlens/ +├── storage/ +│ ├── schema.py # (NEW) Centralized schema definitions +│ ├── dir_index.py # (MODIFY) Add Dual-FTS, incremental indexing +│ ├── sqlite_store.py # (MODIFY) Add encoding detection +│ └── migrations/ +│ └── migration_004_dual_fts.py # (NEW) Schema migration +│ +├── search/ +│ ├── hybrid_search.py # (NEW) HybridSearchEngine +│ ├── ranking.py # (NEW) RRF and fusion algorithms +│ ├── query_parser.py # (NEW) Query preprocessing +│ └── chain_search.py # (MODIFY) Integrate hybrid search +│ +├── parsers/ +│ └── encoding.py # (NEW) Encoding detection utility +│ +└── semantic/ + └── vector_store.py # (MODIFY) Integration with hybrid search +``` + +--- + +## Part 2: Detailed Component Design + +### 2.1 Encoding Detection Module + +**File**: `codexlens/parsers/encoding.py` (NEW) + +```python +"""Robust encoding detection for file content.""" +from pathlib import Path +from typing import Tuple, Optional + +# Optional: chardet or charset-normalizer +try: + import chardet + HAS_CHARDET = True +except ImportError: + HAS_CHARDET = False + + +def detect_encoding(content: bytes, default: str = "utf-8") -> str: + """Detect encoding of byte content with fallback.""" + if HAS_CHARDET: + result = chardet.detect(content[:10000]) # Sample first 10KB + if result and result.get("confidence", 0) > 0.7: + return result["encoding"] or default + return default + + +def read_file_safe(path: Path) -> Tuple[str, str]: + """Read file with encoding detection. + + Returns: + Tuple of (content, detected_encoding) + """ + raw_bytes = path.read_bytes() + encoding = detect_encoding(raw_bytes) + + try: + content = raw_bytes.decode(encoding, errors="replace") + except (UnicodeDecodeError, LookupError): + content = raw_bytes.decode("utf-8", errors="replace") + encoding = "utf-8" + + return content, encoding +``` + +**Integration Point**: `dir_index.py:add_file()`, `index_tree.py:_build_single_dir()` + +--- + +### 2.2 Dual-FTS Schema Design + +**File**: `codexlens/storage/schema.py` (NEW) + +```python +"""Centralized database schema definitions for Dual-FTS architecture.""" + +# Schema version for migration tracking +SCHEMA_VERSION = 4 + +# Standard FTS5 for exact matching (code symbols, identifiers) +FTS_EXACT_SCHEMA = """ +CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_exact USING fts5( + name, full_path UNINDEXED, content, + content='files', + content_rowid='id', + tokenize="unicode61 tokenchars '_-'" +) +""" + +# Trigram FTS5 for fuzzy/substring matching (requires SQLite 3.34+) +FTS_FUZZY_SCHEMA = """ +CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_fuzzy USING fts5( + name, full_path UNINDEXED, content, + content='files', + content_rowid='id', + tokenize="trigram" +) +""" + +# Fallback if trigram not available +FTS_FUZZY_FALLBACK = """ +CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_fuzzy USING fts5( + name, full_path UNINDEXED, content, + content='files', + content_rowid='id', + tokenize="unicode61 tokenchars '_-' separators '.'" +) +""" + +def check_trigram_support(conn) -> bool: + """Check if SQLite supports trigram tokenizer.""" + try: + conn.execute("CREATE VIRTUAL TABLE _test_trigram USING fts5(x, tokenize='trigram')") + conn.execute("DROP TABLE _test_trigram") + return True + except Exception: + return False + + +def create_dual_fts_schema(conn) -> dict: + """Create Dual-FTS tables with fallback. + + Returns: + dict with 'exact_table', 'fuzzy_table', 'trigram_enabled' keys + """ + result = {"exact_table": "files_fts_exact", "fuzzy_table": "files_fts_fuzzy"} + + # Create exact FTS (always available) + conn.execute(FTS_EXACT_SCHEMA) + + # Create fuzzy FTS (with trigram if supported) + if check_trigram_support(conn): + conn.execute(FTS_FUZZY_SCHEMA) + result["trigram_enabled"] = True + else: + conn.execute(FTS_FUZZY_FALLBACK) + result["trigram_enabled"] = False + + # Create triggers for dual-table sync + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS files_ai_exact AFTER INSERT ON files BEGIN + INSERT INTO files_fts_exact(rowid, name, full_path, content) + VALUES (new.id, new.name, new.full_path, new.content); + END + """) + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS files_ai_fuzzy AFTER INSERT ON files BEGIN + INSERT INTO files_fts_fuzzy(rowid, name, full_path, content) + VALUES (new.id, new.name, new.full_path, new.content); + END + """) + # ... similar triggers for UPDATE and DELETE + + return result +``` + +--- + +### 2.3 Hybrid Search Engine + +**File**: `codexlens/search/hybrid_search.py` (NEW) + +```python +"""Hybrid search engine combining FTS and semantic search with RRF fusion.""" +from dataclasses import dataclass +from typing import List, Optional +from concurrent.futures import ThreadPoolExecutor + +from codexlens.entities import SearchResult +from codexlens.search.ranking import reciprocal_rank_fusion, normalize_scores + + +@dataclass +class HybridSearchConfig: + """Configuration for hybrid search.""" + enable_exact: bool = True + enable_fuzzy: bool = True + enable_vector: bool = True + exact_weight: float = 0.4 + fuzzy_weight: float = 0.3 + vector_weight: float = 0.3 + rrf_k: int = 60 # RRF constant + max_results: int = 20 + + +class HybridSearchEngine: + """Multi-modal search with RRF fusion.""" + + def __init__(self, dir_index_store, vector_store=None, config: HybridSearchConfig = None): + self.store = dir_index_store + self.vector_store = vector_store + self.config = config or HybridSearchConfig() + + def search(self, query: str, limit: int = 20) -> List[SearchResult]: + """Execute hybrid search with parallel retrieval and RRF fusion.""" + results_map = {} + + # Parallel retrieval + with ThreadPoolExecutor(max_workers=3) as executor: + futures = {} + + if self.config.enable_exact: + futures["exact"] = executor.submit( + self._search_exact, query, limit * 2 + ) + if self.config.enable_fuzzy: + futures["fuzzy"] = executor.submit( + self._search_fuzzy, query, limit * 2 + ) + if self.config.enable_vector and self.vector_store: + futures["vector"] = executor.submit( + self._search_vector, query, limit * 2 + ) + + for name, future in futures.items(): + try: + results_map[name] = future.result(timeout=10) + except Exception: + results_map[name] = [] + + # Apply RRF fusion + fused = reciprocal_rank_fusion( + results_map, + weights={ + "exact": self.config.exact_weight, + "fuzzy": self.config.fuzzy_weight, + "vector": self.config.vector_weight, + }, + k=self.config.rrf_k + ) + + return fused[:limit] + + def _search_exact(self, query: str, limit: int) -> List[SearchResult]: + """Exact FTS search with BM25.""" + return self.store.search_fts_exact(query, limit) + + def _search_fuzzy(self, query: str, limit: int) -> List[SearchResult]: + """Fuzzy FTS search with trigram.""" + return self.store.search_fts_fuzzy(query, limit) + + def _search_vector(self, query: str, limit: int) -> List[SearchResult]: + """Semantic vector search.""" + if not self.vector_store: + return [] + return self.vector_store.search_similar(query, limit) +``` + +--- + +### 2.4 RRF Ranking Fusion + +**File**: `codexlens/search/ranking.py` (NEW) + +```python +"""Ranking fusion algorithms for hybrid search.""" +from typing import Dict, List +from collections import defaultdict + +from codexlens.entities import SearchResult + + +def reciprocal_rank_fusion( + results_map: Dict[str, List[SearchResult]], + weights: Dict[str, float] = None, + k: int = 60 +) -> List[SearchResult]: + """Reciprocal Rank Fusion (RRF) algorithm. + + Formula: score(d) = Σ weight_i / (k + rank_i(d)) + + Args: + results_map: Dict mapping source name to ranked results + weights: Optional weights per source (default equal) + k: RRF constant (default 60) + + Returns: + Fused and re-ranked results + """ + if weights is None: + weights = {name: 1.0 for name in results_map} + + # Normalize weights + total_weight = sum(weights.values()) + weights = {k: v / total_weight for k, v in weights.items()} + + # Calculate RRF scores + rrf_scores = defaultdict(float) + path_to_result = {} + + for source_name, results in results_map.items(): + weight = weights.get(source_name, 1.0) + for rank, result in enumerate(results, start=1): + rrf_scores[result.path] += weight / (k + rank) + if result.path not in path_to_result: + path_to_result[result.path] = result + + # Sort by RRF score + sorted_paths = sorted(rrf_scores.keys(), key=lambda p: rrf_scores[p], reverse=True) + + # Build final results with updated scores + fused_results = [] + for path in sorted_paths: + result = path_to_result[path] + fused_results.append(SearchResult( + path=result.path, + score=rrf_scores[path], + excerpt=result.excerpt, + )) + + return fused_results + + +def normalize_bm25_score(score: float, max_score: float = 100.0) -> float: + """Normalize BM25 score to 0-1 range. + + BM25 scores are unbounded and typically negative in SQLite FTS5. + This normalizes them for fusion with other score types. + """ + if score >= 0: + return 0.0 + # BM25 in SQLite is negative; more negative = better match + return min(1.0, abs(score) / max_score) +``` + +--- + +### 2.5 Incremental Indexing + +**File**: `codexlens/storage/dir_index.py` (MODIFY) + +```python +# Add to DirIndexStore class: + +def needs_reindex(self, path: Path) -> bool: + """Check if file needs re-indexing based on mtime. + + Returns: + True if file should be reindexed, False to skip + """ + with self._lock: + conn = self._get_connection() + row = conn.execute( + "SELECT mtime FROM files WHERE full_path = ?", + (str(path.resolve()),) + ).fetchone() + + if row is None: + return True # New file + + stored_mtime = row["mtime"] + if stored_mtime is None: + return True + + try: + current_mtime = path.stat().st_mtime + # Allow 1ms tolerance for floating point comparison + return abs(current_mtime - stored_mtime) > 0.001 + except OSError: + return False # File doesn't exist anymore + + +def add_file_incremental( + self, + file_path: Path, + content: str, + indexed_file: IndexedFile, +) -> Optional[int]: + """Add file to index only if changed. + + Returns: + file_id if indexed, None if skipped + """ + if not self.needs_reindex(file_path): + # Return existing file_id without re-indexing + with self._lock: + conn = self._get_connection() + row = conn.execute( + "SELECT id FROM files WHERE full_path = ?", + (str(file_path.resolve()),) + ).fetchone() + return int(row["id"]) if row else None + + # Proceed with full indexing + return self.add_file(file_path, content, indexed_file) +``` + +--- + +### 2.6 Query Preprocessor + +**File**: `codexlens/search/query_parser.py` (NEW) + +```python +"""Query preprocessing for improved search recall.""" +import re +from typing import List + + +def split_camel_case(text: str) -> List[str]: + """Split CamelCase into words: UserAuth -> ['User', 'Auth']""" + return re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)', text) + + +def split_snake_case(text: str) -> List[str]: + """Split snake_case into words: user_auth -> ['user', 'auth']""" + return text.split('_') + + +def preprocess_query(query: str) -> str: + """Preprocess query for better recall. + + Transforms: + - UserAuth -> "UserAuth" OR "User Auth" + - user_auth -> "user_auth" OR "user auth" + """ + terms = [] + + for word in query.split(): + # Handle CamelCase + if re.match(r'^[A-Z][a-z]+[A-Z]', word): + parts = split_camel_case(word) + terms.append(f'"{word}"') # Original + terms.append(f'"{" ".join(parts)}"') # Split + + # Handle snake_case + elif '_' in word: + parts = split_snake_case(word) + terms.append(f'"{word}"') # Original + terms.append(f'"{" ".join(parts)}"') # Split + + else: + terms.append(word) + + # Combine with OR for recall + return " OR ".join(terms) if len(terms) > 1 else terms[0] +``` + +--- + +## Part 3: Database Schema Changes + +### 3.1 New Tables + +```sql +-- Exact FTS table (code-friendly tokenizer) +CREATE VIRTUAL TABLE files_fts_exact USING fts5( + name, full_path UNINDEXED, content, + content='files', + content_rowid='id', + tokenize="unicode61 tokenchars '_-'" +); + +-- Fuzzy FTS table (trigram for substring matching) +CREATE VIRTUAL TABLE files_fts_fuzzy USING fts5( + name, full_path UNINDEXED, content, + content='files', + content_rowid='id', + tokenize="trigram" +); + +-- File hash for robust change detection (optional enhancement) +ALTER TABLE files ADD COLUMN content_hash TEXT; +CREATE INDEX idx_files_hash ON files(content_hash); +``` + +### 3.2 Migration Script + +**File**: `codexlens/storage/migrations/migration_004_dual_fts.py` (NEW) + +```python +"""Migration 004: Dual-FTS architecture.""" + +def upgrade(db_conn): + """Upgrade to Dual-FTS schema.""" + cursor = db_conn.cursor() + + # Check current schema + tables = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'files_fts%'" + ).fetchall() + existing = {t[0] for t in tables} + + # Drop legacy single FTS table + if "files_fts" in existing and "files_fts_exact" not in existing: + cursor.execute("DROP TABLE IF EXISTS files_fts") + + # Create new Dual-FTS tables + from codexlens.storage.schema import create_dual_fts_schema + result = create_dual_fts_schema(db_conn) + + # Rebuild indexes from existing content + cursor.execute(""" + INSERT INTO files_fts_exact(rowid, name, full_path, content) + SELECT id, name, full_path, content FROM files + """) + cursor.execute(""" + INSERT INTO files_fts_fuzzy(rowid, name, full_path, content) + SELECT id, name, full_path, content FROM files + """) + + db_conn.commit() + return result +``` + +--- + +## Part 4: API Contracts + +### 4.1 Search API + +```python +# New unified search interface +class SearchOptions: + query: str + limit: int = 20 + offset: int = 0 + enable_exact: bool = True # FTS exact matching + enable_fuzzy: bool = True # Trigram fuzzy matching + enable_vector: bool = False # Semantic vector search + exact_weight: float = 0.4 + fuzzy_weight: float = 0.3 + vector_weight: float = 0.3 + +# API endpoint signature +def search(options: SearchOptions) -> SearchResponse: + """Unified hybrid search.""" + pass + +class SearchResponse: + results: List[SearchResult] + total: int + search_modes: List[str] # ["exact", "fuzzy", "vector"] + trigram_available: bool +``` + +### 4.2 Indexing API + +```python +# Enhanced indexing with incremental support +class IndexOptions: + path: Path + incremental: bool = True # Skip unchanged files + force: bool = False # Force reindex all + detect_encoding: bool = True # Auto-detect file encoding + +def index_directory(options: IndexOptions) -> IndexResult: + """Index directory with incremental support.""" + pass + +class IndexResult: + total_files: int + indexed_files: int + skipped_files: int # Unchanged files skipped + encoding_errors: int +``` + +--- + +## Part 5: Implementation Roadmap + +### Phase 1: Foundation (Week 1) +- [ ] Implement encoding detection module +- [ ] Update file reading in `dir_index.py` and `index_tree.py` +- [ ] Add chardet/charset-normalizer dependency +- [ ] Write unit tests for encoding detection + +### Phase 2: Dual-FTS (Week 2) +- [ ] Create `schema.py` with Dual-FTS definitions +- [ ] Implement trigram compatibility check +- [ ] Write migration script +- [ ] Update `DirIndexStore` with dual search methods +- [ ] Test FTS5 trigram on target platforms + +### Phase 3: Hybrid Search (Week 3) +- [ ] Implement `HybridSearchEngine` +- [ ] Implement `ranking.py` with RRF +- [ ] Create `query_parser.py` +- [ ] Integrate with `ChainSearchEngine` +- [ ] Write integration tests + +### Phase 4: Incremental Indexing (Week 4) +- [ ] Add `needs_reindex()` method +- [ ] Implement `add_file_incremental()` +- [ ] Update `IndexTreeBuilder` to use incremental API +- [ ] Add optional content hash column +- [ ] Performance benchmarking + +### Phase 5: Vector Integration (Week 5) +- [ ] Update `VectorStore` for hybrid integration +- [ ] Implement vector search in `HybridSearchEngine` +- [ ] Tune RRF weights for optimal results +- [ ] End-to-end testing + +--- + +## Part 6: Performance Considerations + +### 6.1 Indexing Performance +- **Incremental indexing**: Skip ~90% of files on re-index +- **Parallel file processing**: ThreadPoolExecutor for parsing +- **Batch commits**: Commit every 100 files to reduce I/O + +### 6.2 Search Performance +- **Parallel retrieval**: Execute FTS + Vector searches concurrently +- **Early termination**: Stop after finding enough high-confidence matches +- **Result caching**: LRU cache for frequent queries + +### 6.3 Storage Overhead +- **Dual-FTS**: ~2x FTS index size (exact + fuzzy) +- **Trigram**: ~3-5x content size (due to trigram expansion) +- **Mitigation**: Optional fuzzy index, configurable per project + +--- + +## Part 7: Risk Assessment + +| Risk | Probability | Impact | Mitigation | +|------|-------------|--------|------------| +| SQLite trigram not available | Medium | High | Fallback to extended unicode61 | +| Performance degradation | Low | Medium | Parallel search, caching | +| Migration data loss | Low | High | Backup before migration | +| Encoding detection false positives | Medium | Low | Use replace mode, log warnings | + +--- + +## Appendix: Reference Project Learnings + +### From Codanna (Rust) +- **N-gram tokenizer (3-10)**: Enables partial matching for code symbols +- **Compound BooleanQuery**: Combines exact + fuzzy + prefix in single query +- **File hash change detection**: More robust than mtime alone + +### From Code-Index-MCP (Python) +- **Dual-index architecture**: Fast shallow index + rich deep index +- **External tool integration**: Wrap ripgrep for performance +- **AST-based parsing**: Single-pass symbol extraction +- **ReDoS protection**: Validate regex patterns before execution diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index 2c66bf38..dbc7a53d 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -801,8 +801,17 @@ def enhance( try: # Check if ccw is available import subprocess + import shutil + import sys try: - subprocess.run(["ccw", "--version"], capture_output=True, check=True) + ccw_cmd = shutil.which("ccw") + if not ccw_cmd: + raise FileNotFoundError("ccw not in PATH") + # On Windows, .cmd files need shell=True + if sys.platform == "win32": + subprocess.run("ccw --version", shell=True, capture_output=True, check=True) + else: + subprocess.run(["ccw", "--version"], capture_output=True, check=True) except (subprocess.CalledProcessError, FileNotFoundError): raise CodexLensError("ccw CLI not found. Please install ccw first.") @@ -815,7 +824,7 @@ def enhance( mapper = PathMapper() # Find project - project_info = registry.find_project(base_path) + project_info = registry.get_project(base_path) if not project_info: raise CodexLensError(f"No index found for: {base_path}. Run 'codex-lens init' first.") diff --git a/codex-lens/src/codexlens/config.py b/codex-lens/src/codexlens/config.py index 0005cdc8..fb96c74d 100644 --- a/codex-lens/src/codexlens/config.py +++ b/codex-lens/src/codexlens/config.py @@ -66,6 +66,8 @@ class Config: "go": {"extensions": [".go"], "tree_sitter_language": "go"}, "zig": {"extensions": [".zig"], "tree_sitter_language": "zig"}, "objective-c": {"extensions": [".m", ".mm"], "tree_sitter_language": "objc"}, + "markdown": {"extensions": [".md", ".mdx"], "tree_sitter_language": None}, + "text": {"extensions": [".txt"], "tree_sitter_language": None}, } ) parsing_rules: Dict[str, Dict[str, Any]] = field( diff --git a/codex-lens/src/codexlens/parsers/factory.py b/codex-lens/src/codexlens/parsers/factory.py index a46251a2..cd868ba6 100644 --- a/codex-lens/src/codexlens/parsers/factory.py +++ b/codex-lens/src/codexlens/parsers/factory.py @@ -48,6 +48,10 @@ class SimpleRegexParser: symbols = _parse_java_symbols(text) elif self.language_id == "go": symbols = _parse_go_symbols(text) + elif self.language_id == "markdown": + symbols = _parse_markdown_symbols(text) + elif self.language_id == "text": + symbols = _parse_text_symbols(text) else: symbols = _parse_generic_symbols(text) @@ -221,3 +225,32 @@ def _parse_generic_symbols(text: str) -> List[Symbol]: if def_match: symbols.append(Symbol(name=def_match.group(1), kind="function", range=(i, i))) return symbols + + +# Markdown heading regex: # Heading, ## Heading, etc. +_MD_HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$") + + +def _parse_markdown_symbols(text: str) -> List[Symbol]: + """Parse Markdown headings as symbols. + + Extracts # headings as 'section' symbols with heading level as kind suffix. + """ + symbols: List[Symbol] = [] + for i, line in enumerate(text.splitlines(), start=1): + heading_match = _MD_HEADING_RE.match(line) + if heading_match: + level = len(heading_match.group(1)) + title = heading_match.group(2).strip() + # Use 'section' kind with level indicator + kind = f"h{level}" + symbols.append(Symbol(name=title, kind=kind, range=(i, i))) + return symbols + + +def _parse_text_symbols(text: str) -> List[Symbol]: + """Parse plain text files - no symbols, just index content.""" + # Text files don't have structured symbols, return empty list + # The file content will still be indexed for FTS search + return [] + diff --git a/codex-lens/src/codexlens/storage/dir_index.py b/codex-lens/src/codexlens/storage/dir_index.py index 5dc14599..b85cd7d4 100644 --- a/codex-lens/src/codexlens/storage/dir_index.py +++ b/codex-lens/src/codexlens/storage/dir_index.py @@ -15,7 +15,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional, Tuple -from codexlens.entities import SearchResult, Symbol +from codexlens.entities import CodeRelationship, SearchResult, Symbol from codexlens.errors import StorageError @@ -225,6 +225,99 @@ class DirIndexStore: conn.rollback() raise StorageError(f"Failed to add file {name}: {exc}") from exc + def add_relationships( + self, + file_path: str | Path, + relationships: List[CodeRelationship], + ) -> int: + """Store code relationships for a file. + + Args: + file_path: Path to the source file + relationships: List of CodeRelationship objects to store + + Returns: + Number of relationships stored + + Raises: + StorageError: If database operations fail + """ + if not relationships: + return 0 + + with self._lock: + conn = self._get_connection() + file_path_str = str(Path(file_path).resolve()) + + try: + # Get file_id + row = conn.execute( + "SELECT id FROM files WHERE full_path=?", (file_path_str,) + ).fetchone() + if not row: + return 0 + + file_id = int(row["id"]) + + # Delete existing relationships for symbols in this file + conn.execute( + """ + DELETE FROM code_relationships + WHERE source_symbol_id IN ( + SELECT id FROM symbols WHERE file_id=? + ) + """, + (file_id,), + ) + + # Insert new relationships + relationship_rows = [] + for rel in relationships: + # Find symbol_id by name and file + symbol_row = conn.execute( + """ + SELECT id FROM symbols + WHERE file_id=? AND name=? AND start_line<=? AND end_line>=? + LIMIT 1 + """, + (file_id, rel.source_symbol, rel.source_line, rel.source_line), + ).fetchone() + + if not symbol_row: + # Try matching by name only + symbol_row = conn.execute( + "SELECT id FROM symbols WHERE file_id=? AND name=? LIMIT 1", + (file_id, rel.source_symbol), + ).fetchone() + + if symbol_row: + relationship_rows.append(( + int(symbol_row["id"]), + rel.target_symbol, + rel.relationship_type, + rel.source_line, + rel.target_file, + )) + + if relationship_rows: + conn.executemany( + """ + INSERT INTO code_relationships( + source_symbol_id, target_qualified_name, relationship_type, + source_line, target_file + ) + VALUES(?, ?, ?, ?, ?) + """, + relationship_rows, + ) + + conn.commit() + return len(relationship_rows) + + except sqlite3.DatabaseError as exc: + conn.rollback() + raise StorageError(f"Failed to add relationships: {exc}") from exc + def add_files_batch( self, files: List[Tuple[str, Path, str, str, Optional[List[Symbol]]]] ) -> int: @@ -1143,6 +1236,21 @@ class DirIndexStore: """ ) + # Code relationships table for graph visualization + conn.execute( + """ + CREATE TABLE IF NOT EXISTS code_relationships ( + id INTEGER PRIMARY KEY, + source_symbol_id INTEGER NOT NULL, + target_qualified_name TEXT NOT NULL, + relationship_type TEXT NOT NULL, + source_line INTEGER NOT NULL, + target_file TEXT, + FOREIGN KEY (source_symbol_id) REFERENCES symbols (id) ON DELETE CASCADE + ) + """ + ) + # Indexes conn.execute("CREATE INDEX IF NOT EXISTS idx_files_name ON files(name)") conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(full_path)") @@ -1154,6 +1262,9 @@ class DirIndexStore: conn.execute("CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON keywords(keyword)") conn.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_file_id ON file_keywords(file_id)") conn.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_keyword_id ON file_keywords(keyword_id)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_source ON code_relationships(source_symbol_id)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_target ON code_relationships(target_qualified_name)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_type ON code_relationships(relationship_type)") except sqlite3.DatabaseError as exc: raise StorageError(f"Failed to create schema: {exc}") from exc diff --git a/codex-lens/src/codexlens/storage/index_tree.py b/codex-lens/src/codexlens/storage/index_tree.py index d6618546..f8a5cd42 100644 --- a/codex-lens/src/codexlens/storage/index_tree.py +++ b/codex-lens/src/codexlens/storage/index_tree.py @@ -16,6 +16,7 @@ from typing import Dict, List, Optional, Set from codexlens.config import Config from codexlens.parsers.factory import ParserFactory +from codexlens.semantic.graph_analyzer import GraphAnalyzer from codexlens.storage.dir_index import DirIndexStore from codexlens.storage.path_mapper import PathMapper from codexlens.storage.registry import ProjectInfo, RegistryStore @@ -456,6 +457,22 @@ class IndexTreeBuilder: symbols=indexed_file.symbols, ) + # Extract and store code relationships for graph visualization + if language_id in {"python", "javascript", "typescript"}: + try: + graph_analyzer = GraphAnalyzer(language_id) + if graph_analyzer.is_available(): + relationships = graph_analyzer.analyze_with_symbols( + text, file_path, indexed_file.symbols + ) + if relationships: + store.add_relationships(file_path, relationships) + except Exception as rel_exc: + self.logger.debug( + "Failed to extract relationships from %s: %s", + file_path, rel_exc + ) + files_count += 1 symbols_count += len(indexed_file.symbols) @@ -653,6 +670,19 @@ def _build_dir_worker(args: tuple) -> DirBuildResult: symbols=indexed_file.symbols, ) + # Extract and store code relationships for graph visualization + if language_id in {"python", "javascript", "typescript"}: + try: + graph_analyzer = GraphAnalyzer(language_id) + if graph_analyzer.is_available(): + relationships = graph_analyzer.analyze_with_symbols( + text, item, indexed_file.symbols + ) + if relationships: + store.add_relationships(item, relationships) + except Exception: + pass # Silently skip relationship extraction errors + files_count += 1 symbols_count += len(indexed_file.symbols) diff --git a/codex-lens/tests/test_config.py b/codex-lens/tests/test_config.py index 5557cf84..1562ac28 100644 --- a/codex-lens/tests/test_config.py +++ b/codex-lens/tests/test_config.py @@ -195,7 +195,7 @@ class TestConfig: try: config = Config() assert config.language_for_path("test.xyz") is None - assert config.language_for_path("README.md") is None + assert config.language_for_path("data.csv") is None finally: del os.environ["CODEXLENS_DATA_DIR"]