From 3e2cb036de82e20eb1d27093989077b1fd0d02a6 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Wed, 18 Feb 2026 11:16:42 +0800 Subject: [PATCH] feat: add CcwLitellmStatus component for installation management and package discovery utility - Implemented CcwLitellmStatus component to display installation status and provide install/uninstall actions. - Integrated hooks for managing installation and uninstallation processes. - Added package discovery utility to locate local Python packages with environment variable and configuration support. - Enhanced diagnostics with detailed search results for package paths. --- .../commands/workflow/analyze-with-file.md | 23 - .../api-settings/CcwLitellmStatus.tsx | 152 +++ .../src/components/api-settings/index.ts | 1 + ccw/frontend/src/pages/ApiSettingsPage.tsx | 4 + ccw/src/core/routes/litellm-api-routes.ts | 160 +--- ccw/src/tools/codex-lens.ts | 351 ++++--- ccw/src/utils/package-discovery.ts | 318 +++++++ ccw/src/utils/uv-manager.ts | 11 +- codex-lens/src/codexlens/api/semantic.py | 27 +- codex-lens/src/codexlens/config.py | 899 +++++++++++++++++- 10 files changed, 1597 insertions(+), 349 deletions(-) create mode 100644 ccw/frontend/src/components/api-settings/CcwLitellmStatus.tsx create mode 100644 ccw/src/utils/package-discovery.ts diff --git a/.claude/commands/workflow/analyze-with-file.md b/.claude/commands/workflow/analyze-with-file.md index 7729d5fa..0e702213 100644 --- a/.claude/commands/workflow/analyze-with-file.md +++ b/.claude/commands/workflow/analyze-with-file.md @@ -686,29 +686,6 @@ User agrees with current direction, wants deeper code analysis - Quick information gathering without multi-round iteration - Follow-up analysis building on existing session -**Use `Skill(skill="workflow:analyze-with-file", args="\"topic\"")` when:** -- Exploring a complex topic collaboratively -- Need documented discussion trail -- Decision-making requires multiple perspectives -- Want to iterate on understanding with user input -- Building shared understanding before implementation - -**Use `Skill(skill="workflow:debug-with-file", args="\"bug description\"")` when:** -- Diagnosing specific bugs -- Need hypothesis-driven investigation -- Focus on evidence and verification - -**Use `Skill(skill="brainstorm", args="\"topic or question\"")` when:** -- Generating new ideas or solutions -- Need creative exploration -- Want divergent thinking before convergence - -**Use `Skill(skill="workflow-plan", args="\"task description\"")` when:** -- Complex planning requiring multiple perspectives -- Large scope needing parallel sub-domain analysis -- Want shared collaborative planning document -- Need structured task breakdown with agent coordination - **Use `Skill(skill="workflow-lite-plan", args="\"task description\"")` when:** - Ready to implement (past analysis phase) - Need simple task breakdown diff --git a/ccw/frontend/src/components/api-settings/CcwLitellmStatus.tsx b/ccw/frontend/src/components/api-settings/CcwLitellmStatus.tsx new file mode 100644 index 00000000..130fedde --- /dev/null +++ b/ccw/frontend/src/components/api-settings/CcwLitellmStatus.tsx @@ -0,0 +1,152 @@ +// ======================================== +// CCW-LiteLLM Status & Install Component +// ======================================== +// Shows ccw-litellm installation status with install/uninstall actions + +import { useState } from 'react'; +import { useIntl } from 'react-intl'; +import { + Download, + Trash2, + RefreshCw, + CheckCircle2, + XCircle, + Loader2, + Package, +} from 'lucide-react'; +import { Button } from '@/components/ui/Button'; +import { Card, CardContent } from '@/components/ui/Card'; +import { Badge } from '@/components/ui/Badge'; +import { + useCcwLitellmStatus, + useInstallCcwLitellm, + useUninstallCcwLitellm, +} from '@/hooks/useApiSettings'; +import { useNotifications } from '@/hooks/useNotifications'; + +export function CcwLitellmStatus() { + const { formatMessage } = useIntl(); + const { success, error: notifyError } = useNotifications(); + const [refresh, setRefresh] = useState(false); + + const { data: status, isLoading, refetch } = useCcwLitellmStatus({ refresh }); + const { install, isInstalling } = useInstallCcwLitellm(); + const { uninstall, isUninstalling } = useUninstallCcwLitellm(); + + const isBusy = isInstalling || isUninstalling; + + const handleInstall = async () => { + try { + await install(); + success(formatMessage({ id: 'apiSettings.ccwLitellm.messages.installSuccess' })); + setRefresh(true); + refetch(); + } catch { + notifyError(formatMessage({ id: 'apiSettings.ccwLitellm.messages.installFailed' })); + } + }; + + const handleUninstall = async () => { + try { + await uninstall(); + success(formatMessage({ id: 'apiSettings.ccwLitellm.messages.uninstallSuccess' })); + setRefresh(true); + refetch(); + } catch { + notifyError(formatMessage({ id: 'apiSettings.ccwLitellm.messages.uninstallFailed' })); + } + }; + + const handleRefresh = () => { + setRefresh(true); + refetch(); + }; + + const installed = status?.installed ?? false; + const version = status?.version; + + return ( + + +
+
+ +
+

+ {formatMessage({ id: 'apiSettings.ccwLitellm.title' })} +

+

+ {formatMessage({ id: 'apiSettings.ccwLitellm.description' })} +

+
+
+ +
+ {/* Status badge */} + {isLoading ? ( + + + ... + + ) : installed ? ( + + + {formatMessage({ id: 'apiSettings.ccwLitellm.status.installed' })} + {version && ` v${version}`} + + ) : ( + + + {formatMessage({ id: 'apiSettings.ccwLitellm.status.notInstalled' })} + + )} + + {/* Refresh */} + + + {/* Install / Uninstall */} + {installed ? ( + + ) : ( + + )} +
+
+
+
+ ); +} + +export default CcwLitellmStatus; diff --git a/ccw/frontend/src/components/api-settings/index.ts b/ccw/frontend/src/components/api-settings/index.ts index e673f1f9..a221bc47 100644 --- a/ccw/frontend/src/components/api-settings/index.ts +++ b/ccw/frontend/src/components/api-settings/index.ts @@ -13,3 +13,4 @@ export { CliSettingsList } from './CliSettingsList'; export { CliSettingsModal } from './CliSettingsModal'; export { MultiKeySettingsModal } from './MultiKeySettingsModal'; export { ManageModelsModal } from './ManageModelsModal'; +export { CcwLitellmStatus } from './CcwLitellmStatus'; diff --git a/ccw/frontend/src/pages/ApiSettingsPage.tsx b/ccw/frontend/src/pages/ApiSettingsPage.tsx index 70cdab0e..0ca5c0a4 100644 --- a/ccw/frontend/src/pages/ApiSettingsPage.tsx +++ b/ccw/frontend/src/pages/ApiSettingsPage.tsx @@ -23,6 +23,7 @@ import { CliSettingsModal, MultiKeySettingsModal, ManageModelsModal, + CcwLitellmStatus, } from '@/components/api-settings'; import { useProviders, useEndpoints, useModelPools, useCliSettings, useSyncApiConfig } from '@/hooks/useApiSettings'; import { useNotifications } from '@/hooks/useNotifications'; @@ -207,6 +208,9 @@ export function ApiSettingsPage() { + {/* CCW-LiteLLM Status */} + + {/* Tabbed Interface */} core -> src -> ccw -> package root -const PACKAGE_ROOT = pathJoin(__dirname, '..', '..', '..', '..'); import { getAllProviders, @@ -105,24 +97,6 @@ export function clearCcwLitellmStatusCache() { ccwLitellmStatusCache.timestamp = 0; } -/** - * Install ccw-litellm using UV package manager - * Delegates to ensureLiteLLMEmbedderReady for consistent dependency handling - * This ensures ccw-litellm installation doesn't break fastembed's onnxruntime dependencies - * @param _packagePath - Ignored, ensureLiteLLMEmbedderReady handles path discovery - * @returns Installation result - */ -async function installCcwLitellmWithUv(_packagePath: string | null): Promise<{ success: boolean; message?: string; error?: string }> { - // Delegate to the robust installation logic in codex-lens.ts - // This ensures consistent dependency handling within the shared venv, - // preventing onnxruntime conflicts that would break fastembed - const result = await ensureLiteLLMEmbedderReady(); - if (result.success) { - clearCcwLitellmStatusCache(); - } - return result; -} - function sanitizeProviderForResponse(provider: any): any { if (!provider) return provider; return { @@ -877,28 +851,41 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise((resolve) => { + const child = spawn(venvPython, ['-c', 'import ccw_litellm; print(ccw_litellm.__version__)'], { + stdio: ['ignore', 'pipe', 'pipe'], + timeout: statusTimeout, + windowsHide: true, + }); + let stdout = ''; + child.stdout.on('data', (data: Buffer) => { stdout += data.toString(); }); + child.on('close', (code: number | null) => { + if (code === 0) { + const version = stdout.trim(); + if (version) { + console.log(`[ccw-litellm status] Found in CodexLens venv: ${version}`); + resolve({ installed: true, version }); + return; + } + } + console.log('[ccw-litellm status] Not found in CodexLens venv'); + resolve({ installed: false }); + }); + child.on('error', () => { + console.log('[ccw-litellm status] Spawn error checking venv'); + resolve({ installed: false }); + }); }); - const version = stdout.trim(); - if (version) { - result = { installed: true, version }; - console.log(`[ccw-litellm status] Found in CodexLens venv: ${version}`); - } } catch (venvErr) { console.log('[ccw-litellm status] Not found in CodexLens venv'); result = { installed: false }; @@ -1320,95 +1307,19 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise { try { - const { spawn } = await import('child_process'); - const path = await import('path'); - const fs = await import('fs'); + // Delegate entirely to ensureLiteLLMEmbedderReady for consistent installation + // This uses unified package discovery and handles UV → pip fallback + const result = await ensureLiteLLMEmbedderReady(); - // Try to find ccw-litellm package in distribution - const possiblePaths = [ - path.join(initialPath, 'ccw-litellm'), - path.join(initialPath, '..', 'ccw-litellm'), - path.join(process.cwd(), 'ccw-litellm'), - path.join(PACKAGE_ROOT, 'ccw-litellm'), // npm package internal path - ]; - - let packagePath = ''; - for (const p of possiblePaths) { - const pyproject = path.join(p, 'pyproject.toml'); - if (fs.existsSync(pyproject)) { - packagePath = p; - break; - } - } - - // Priority: Use UV if available - if (await isUvAvailable()) { - const uvResult = await installCcwLitellmWithUv(packagePath || null); - if (uvResult.success) { - // Broadcast installation event - broadcastToClients({ - type: 'CCW_LITELLM_INSTALLED', - payload: { timestamp: new Date().toISOString(), method: 'uv' } - }); - return { ...uvResult, path: packagePath || undefined }; - } - // UV install failed, fall through to pip fallback - console.log('[ccw-litellm install] UV install failed, falling back to pip:', uvResult.error); - } - - // Fallback: Use pip for installation - // Use shared Python detection for consistent cross-platform behavior - const pythonCmd = getSystemPython(); - - if (!packagePath) { - // Try pip install from PyPI as fallback - return new Promise((resolve) => { - const proc = spawn(pythonCmd, ['-m', 'pip', 'install', 'ccw-litellm'], { shell: true, timeout: 300000 }); - let output = ''; - let error = ''; - proc.stdout?.on('data', (data) => { output += data.toString(); }); - proc.stderr?.on('data', (data) => { error += data.toString(); }); - proc.on('close', (code) => { - if (code === 0) { - // Clear status cache after successful installation - clearCcwLitellmStatusCache(); - broadcastToClients({ - type: 'CCW_LITELLM_INSTALLED', - payload: { timestamp: new Date().toISOString(), method: 'pip' } - }); - resolve({ success: true, message: 'ccw-litellm installed from PyPI' }); - } else { - resolve({ success: false, error: error || 'Installation failed' }); - } - }); - proc.on('error', (err) => resolve({ success: false, error: err.message })); + if (result.success) { + clearCcwLitellmStatusCache(); + broadcastToClients({ + type: 'CCW_LITELLM_INSTALLED', + payload: { timestamp: new Date().toISOString(), method: 'unified' } }); } - // Install from local package - return new Promise((resolve) => { - const proc = spawn(pythonCmd, ['-m', 'pip', 'install', '-e', packagePath], { shell: true, timeout: 300000 }); - let output = ''; - let error = ''; - proc.stdout?.on('data', (data) => { output += data.toString(); }); - proc.stderr?.on('data', (data) => { error += data.toString(); }); - proc.on('close', (code) => { - if (code === 0) { - // Clear status cache after successful installation - clearCcwLitellmStatusCache(); - - // Broadcast installation event - broadcastToClients({ - type: 'CCW_LITELLM_INSTALLED', - payload: { timestamp: new Date().toISOString(), method: 'pip' } - }); - resolve({ success: true, message: 'ccw-litellm installed successfully', path: packagePath }); - } else { - resolve({ success: false, error: error || output || 'Installation failed' }); - } - }); - proc.on('error', (err) => resolve({ success: false, error: err.message })); - }); + return result; } catch (err) { return { success: false, error: (err as Error).message }; } @@ -1441,7 +1352,6 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise { diff --git a/ccw/src/tools/codex-lens.ts b/ccw/src/tools/codex-lens.ts index 600c0e0a..8c697008 100644 --- a/ccw/src/tools/codex-lens.ts +++ b/ccw/src/tools/codex-lens.ts @@ -12,11 +12,9 @@ import { z } from 'zod'; import type { ToolSchema, ToolResult } from '../types/tool.js'; import { spawn, execSync, exec } from 'child_process'; -import { existsSync, mkdirSync } from 'fs'; -import { join, dirname } from 'path'; -import { homedir } from 'os'; -import { fileURLToPath } from 'url'; -import { getSystemPython } from '../utils/python-utils.js'; +import { existsSync, mkdirSync, rmSync } from 'fs'; +import { join } from 'path'; +import { getSystemPython, parsePythonVersion, isPythonVersionCompatible } from '../utils/python-utils.js'; import { EXEC_TIMEOUTS } from '../utils/exec-constants.js'; import { UvManager, @@ -30,94 +28,15 @@ import { getCodexLensPython, getCodexLensPip, } from '../utils/codexlens-path.js'; - -// Get directory of this module -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -/** - * Check if a path is inside node_modules (unstable for editable installs) - * Paths inside node_modules will change when npm reinstalls packages, - * breaking editable (-e) pip installs that reference them. - */ -function isInsideNodeModules(pathToCheck: string): boolean { - const normalizedPath = pathToCheck.replace(/\\/g, '/').toLowerCase(); - return normalizedPath.includes('/node_modules/'); -} - -/** - * Check if we're running in a development environment (not from node_modules) - * Also detects Yarn PnP (Plug'n'Play) which doesn't use node_modules. - */ -function isDevEnvironment(): boolean { - // Yarn PnP detection: if pnp version exists, it's a managed production environment - if ((process.versions as any).pnp) { - return false; - } - return !isInsideNodeModules(__dirname); -} - -/** - * Find valid local package path for development installs. - * Returns null if running from node_modules (should use PyPI instead). - * - * IMPORTANT: When running from node_modules, local paths are unstable - * because npm reinstall will delete and recreate the node_modules directory, - * breaking any editable (-e) pip installs that reference them. - */ -function findLocalPackagePath(packageName: string): string | null { - // Always try to find local paths first, even when running from node_modules. - // codex-lens is a local development package not published to PyPI, - // so we must find it locally regardless of execution context. - - const possiblePaths = [ - join(process.cwd(), packageName), - join(__dirname, '..', '..', '..', packageName), // ccw/src/tools -> project root - join(homedir(), packageName), - ]; - - // Also check common workspace locations - const cwd = process.cwd(); - const cwdParent = dirname(cwd); - if (cwdParent !== cwd) { - possiblePaths.push(join(cwdParent, packageName)); - } - - // First pass: prefer non-node_modules paths (development environment) - for (const localPath of possiblePaths) { - if (isInsideNodeModules(localPath)) { - continue; - } - if (existsSync(join(localPath, 'pyproject.toml'))) { - console.log(`[CodexLens] Found local ${packageName} at: ${localPath}`); - return localPath; - } - } - - // Second pass: allow node_modules paths (NPM global install) - for (const localPath of possiblePaths) { - if (existsSync(join(localPath, 'pyproject.toml'))) { - console.log(`[CodexLens] Found ${packageName} in node_modules at: ${localPath}`); - return localPath; - } - } - - return null; -} - -/** - * Find valid local codex-lens package path for development installs. - */ -function findLocalCodexLensPath(): string | null { - return findLocalPackagePath('codex-lens'); -} - -/** - * Find valid local ccw-litellm package path for development installs. - */ -function findLocalCcwLitellmPath(): string | null { - return findLocalPackagePath('ccw-litellm'); -} +import { + findCodexLensPath, + findCcwLitellmPath, + formatSearchResults, + isDevEnvironment, + isInsideNodeModules, + type PackageDiscoveryResult, + type SearchAttempt, +} from '../utils/package-discovery.js'; // Bootstrap status cache let bootstrapChecked = false; @@ -143,6 +62,62 @@ const SEMANTIC_STATUS_TTL = 5 * 60 * 1000; // 5 minutes TTL let currentIndexingProcess: ReturnType | null = null; let currentIndexingAborted = false; +// Spawn timeout for checkVenvStatus (Windows cold start is slower) +const VENV_CHECK_TIMEOUT = process.platform === 'win32' ? 15000 : 10000; + +/** + * Pre-flight check: verify Python 3.9+ is available before attempting bootstrap. + * Returns an error message if Python is not suitable, or null if OK. + */ +function preFlightCheck(): string | null { + try { + const pythonCmd = getSystemPython(); + const version = execSync(`${pythonCmd} --version 2>&1`, { + encoding: 'utf8', + timeout: EXEC_TIMEOUTS.PYTHON_VERSION, + }).trim(); + const parsed = parsePythonVersion(version); + if (!parsed) { + return `Cannot parse Python version from: "${version}". Ensure Python 3.9+ is installed.`; + } + if (parsed.major !== 3 || parsed.minor < 9) { + return `Python ${parsed.major}.${parsed.minor} found, but 3.9+ is required. Install Python 3.9-3.12 or set CCW_PYTHON.`; + } + return null; + } catch (err) { + return `Python not found: ${(err as Error).message}. Install Python 3.9-3.12 and ensure it is in PATH.`; + } +} + +/** + * Detect and repair a corrupted venv. + * A venv is considered corrupted if the directory exists but the Python executable is missing. + * @returns true if venv was repaired (deleted), false if no repair needed + */ +function repairVenvIfCorrupted(): boolean { + const venvPath = getCodexLensVenvDir(); + if (!existsSync(venvPath)) { + return false; // No venv at all — nothing to repair + } + + const pythonPath = getCodexLensPython(); + if (existsSync(pythonPath)) { + return false; // Venv looks healthy + } + + // Venv dir exists but python is missing — corrupted + console.warn(`[CodexLens] Corrupted venv detected: ${venvPath} exists but Python executable missing. Removing for recreation...`); + try { + rmSync(venvPath, { recursive: true, force: true }); + clearVenvStatusCache(); + console.log('[CodexLens] Corrupted venv removed successfully.'); + return true; + } catch (err) { + console.error(`[CodexLens] Failed to remove corrupted venv: ${(err as Error).message}`); + return false; + } +} + // Define Zod schema for validation const ParamsSchema = z.object({ action: z.enum([ @@ -194,6 +169,15 @@ interface BootstrapResult { success: boolean; error?: string; message?: string; + warnings?: string[]; + diagnostics?: { + pythonVersion?: string; + venvPath?: string; + packagePath?: string; + installer?: 'uv' | 'pip'; + editable?: boolean; + searchedPaths?: SearchAttempt[]; + }; } interface ExecuteResult { @@ -276,7 +260,7 @@ async function checkVenvStatus(force = false): Promise { return new Promise((resolve) => { const child = spawn(pythonPath, ['-c', 'import sys; import codexlens; import watchdog; print(f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"); print(codexlens.__version__)'], { stdio: ['ignore', 'pipe', 'pipe'], - timeout: 10000, + timeout: VENV_CHECK_TIMEOUT, }); let stdout = ''; @@ -476,13 +460,16 @@ async function ensureLiteLLMEmbedderReady(): Promise { }); if (importStatus.ok) { - return { success: true }; + return { success: true, diagnostics: { venvPath: getCodexLensVenvDir() } }; } console.log('[CodexLens] Installing ccw-litellm for LiteLLM embedding backend...'); - // Find local ccw-litellm package path (only in development, not from node_modules) - const localPath = findLocalCcwLitellmPath(); + // Find local ccw-litellm package path using unified discovery + const discovery = findCcwLitellmPath(); + const localPath = discovery.path; + const editable = localPath ? (isDevEnvironment() && !discovery.insideNodeModules) : false; + const warnings: string[] = []; // Priority: Use UV if available (faster, better dependency resolution) if (await isUvAvailable()) { @@ -495,6 +482,7 @@ async function ensureLiteLLMEmbedderReady(): Promise { const venvResult = await uv.createVenv(); if (!venvResult.success) { console.log('[CodexLens] UV venv creation failed, falling back to pip:', venvResult.error); + warnings.push(`UV venv creation failed: ${venvResult.error}`); // Fall through to pip fallback } } @@ -502,20 +490,26 @@ async function ensureLiteLLMEmbedderReady(): Promise { if (uv.isVenvValid()) { let uvResult; if (localPath) { - console.log(`[CodexLens] Installing ccw-litellm from local path with UV: ${localPath}`); - uvResult = await uv.installFromProject(localPath); + console.log(`[CodexLens] Installing ccw-litellm from local path with UV: ${localPath} (editable: ${editable})`); + uvResult = await uv.installFromProject(localPath, undefined, editable); } else { console.log('[CodexLens] Installing ccw-litellm from PyPI with UV...'); uvResult = await uv.install(['ccw-litellm']); } if (uvResult.success) { - return { success: true }; + return { + success: true, + diagnostics: { packagePath: localPath || undefined, venvPath: getCodexLensVenvDir(), installer: 'uv', editable }, + warnings: warnings.length > 0 ? warnings : undefined, + }; } console.log('[CodexLens] UV install failed, falling back to pip:', uvResult.error); + warnings.push(`UV install failed: ${uvResult.error}`); } } catch (uvErr) { console.log('[CodexLens] UV error, falling back to pip:', (uvErr as Error).message); + warnings.push(`UV error: ${(uvErr as Error).message}`); } } @@ -524,16 +518,33 @@ async function ensureLiteLLMEmbedderReady(): Promise { try { if (localPath) { - console.log(`[CodexLens] Installing ccw-litellm from local path with pip: ${localPath}`); - execSync(`"${pipPath}" install -e "${localPath}"`, { stdio: 'inherit', timeout: EXEC_TIMEOUTS.PACKAGE_INSTALL }); + const pipFlag = editable ? '-e' : ''; + const pipInstallSpec = editable ? `"${localPath}"` : `"${localPath}"`; + console.log(`[CodexLens] Installing ccw-litellm from local path with pip: ${localPath} (editable: ${editable})`); + execSync(`"${pipPath}" install ${pipFlag} ${pipInstallSpec}`.replace(/ +/g, ' '), { stdio: 'inherit', timeout: EXEC_TIMEOUTS.PACKAGE_INSTALL }); } else { console.log('[CodexLens] Installing ccw-litellm from PyPI with pip...'); execSync(`"${pipPath}" install ccw-litellm`, { stdio: 'inherit', timeout: EXEC_TIMEOUTS.PACKAGE_INSTALL }); } - return { success: true }; + return { + success: true, + diagnostics: { packagePath: localPath || undefined, venvPath: getCodexLensVenvDir(), installer: 'pip', editable }, + warnings: warnings.length > 0 ? warnings : undefined, + }; } catch (err) { - return { success: false, error: `Failed to install ccw-litellm: ${(err as Error).message}` }; + return { + success: false, + error: `Failed to install ccw-litellm: ${(err as Error).message}`, + diagnostics: { + packagePath: localPath || undefined, + venvPath: getCodexLensVenvDir(), + installer: 'pip', + editable, + searchedPaths: !localPath ? discovery.searchedPaths : undefined, + }, + warnings: warnings.length > 0 ? warnings : undefined, + }; } } @@ -660,6 +671,15 @@ async function detectGpuSupport(): Promise<{ mode: GpuMode; available: GpuMode[] async function bootstrapWithUv(gpuMode: GpuMode = 'cpu'): Promise { console.log('[CodexLens] Bootstrapping with UV package manager...'); + // Pre-flight: verify Python is available and compatible + const preFlightError = preFlightCheck(); + if (preFlightError) { + return { success: false, error: `Pre-flight failed: ${preFlightError}` }; + } + + // Auto-repair corrupted venv before proceeding + repairVenvIfCorrupted(); + // Ensure UV is installed const uvInstalled = await ensureUvInstalled(); if (!uvInstalled) { @@ -678,49 +698,42 @@ async function bootstrapWithUv(gpuMode: GpuMode = 'cpu'): Promise ` - ${p}`).join('\n'); - - const errorMsg = `Cannot find codex-lens directory for local installation.\n\n` + - `codex-lens is a local development package (not published to PyPI) and must be installed from local files.\n\n` + - `To fix this:\n` + - `1. Ensure 'codex-lens' directory exists at one of these locations:\n${pathsList}\n` + - `2. Verify pyproject.toml exists in the codex-lens directory\n` + - `3. Run ccw from the correct working directory\n` + - `4. Or manually install: cd /path/to/codex-lens && pip install -e .[${extras.join(',')}]`; - return { success: false, error: errorMsg }; + if (!discovery.path) { + return { + success: false, + error: formatSearchResults(discovery, 'codex-lens'), + diagnostics: { searchedPaths: discovery.searchedPaths, venvPath: getCodexLensVenvDir(), installer: 'uv' }, + }; } - console.log(`[CodexLens] Installing from local path with UV: ${codexLensPath}`); + // Use non-editable install for production stability (editable only in dev) + const editable = isDevEnvironment() && !discovery.insideNodeModules; + console.log(`[CodexLens] Installing from local path with UV: ${discovery.path} (editable: ${editable})`); console.log(`[CodexLens] Extras: ${extras.join(', ')}`); - const installResult = await uv.installFromProject(codexLensPath, extras); + const installResult = await uv.installFromProject(discovery.path, extras, editable); if (!installResult.success) { - return { success: false, error: `Failed to install codex-lens: ${installResult.error}` }; + return { + success: false, + error: `Failed to install codex-lens: ${installResult.error}`, + diagnostics: { packagePath: discovery.path, venvPath: getCodexLensVenvDir(), installer: 'uv', editable }, + }; } // Clear cache after successful installation clearVenvStatusCache(); clearSemanticStatusCache(); console.log(`[CodexLens] Bootstrap with UV complete (${gpuMode} mode)`); - return { success: true, message: `Installed with UV (${gpuMode} mode)` }; + return { + success: true, + message: `Installed with UV (${gpuMode} mode)`, + diagnostics: { packagePath: discovery.path, venvPath: getCodexLensVenvDir(), installer: 'uv', editable }, + }; } /** @@ -754,8 +767,8 @@ async function installSemanticWithUv(gpuMode: GpuMode = 'cpu'): Promise ` - ${p}`).join('\n'); - - const errorMsg = `Cannot find codex-lens directory for local installation.\n\n` + - `codex-lens is a local development package (not published to PyPI) and must be installed from local files.\n\n` + - `To fix this:\n` + - `1. Ensure 'codex-lens' directory exists at one of these locations:\n${pathsList}\n` + - `2. Verify pyproject.toml exists in the codex-lens directory\n` + - `3. Run ccw from the correct working directory\n` + - `4. Or manually install: cd /path/to/codex-lens && pip install -e .[${extras.join(',')}]`; - return { success: false, error: errorMsg }; + if (!discovery.path) { + return { success: false, error: formatSearchResults(discovery, 'codex-lens') }; } - console.log(`[CodexLens] Reinstalling from local path with semantic extras...`); - const installResult = await uv.installFromProject(codexLensPath, extras); + const editable = isDevEnvironment() && !discovery.insideNodeModules; + console.log(`[CodexLens] Reinstalling from local path with semantic extras (editable: ${editable})...`); + const installResult = await uv.installFromProject(discovery.path, extras, editable); if (!installResult.success) { return { success: false, error: `Installation failed: ${installResult.error}` }; } @@ -965,6 +958,15 @@ async function bootstrapVenv(): Promise { return bootstrapWithUv(); } + // Pre-flight: verify Python is available and compatible + const preFlightError = preFlightCheck(); + if (preFlightError) { + return { success: false, error: `Pre-flight failed: ${preFlightError}` }; + } + + // Auto-repair corrupted venv before proceeding + repairVenvIfCorrupted(); + // Fall back to pip logic... // Ensure data directory exists const dataDir = getCodexLensDataDir(); @@ -989,35 +991,24 @@ async function bootstrapVenv(): Promise { console.log('[CodexLens] Installing codex-lens package...'); const pipPath = getCodexLensPip(); - // Try local path - codex-lens is local-only, not published to PyPI - const codexLensPath = findLocalCodexLensPath(); + // Try local path using unified discovery + const discovery = findCodexLensPath(); - if (!codexLensPath) { - // codex-lens is a local-only package, not published to PyPI - const errorMsg = `Cannot find codex-lens directory for local installation.\n\n` + - `codex-lens is a local development package (not published to PyPI) and must be installed from local files.\n\n` + - `To fix this:\n` + - `1. Ensure the 'codex-lens' directory exists in your project root\n` + - `2. Verify pyproject.toml exists in codex-lens directory\n` + - `3. Run ccw from the correct working directory\n` + - `4. Or manually install: cd codex-lens && pip install -e .`; - throw new Error(errorMsg); + if (!discovery.path) { + throw new Error(formatSearchResults(discovery, 'codex-lens')); } - console.log(`[CodexLens] Installing from local path: ${codexLensPath}`); - execSync(`"${pipPath}" install -e "${codexLensPath}"`, { stdio: 'inherit', timeout: EXEC_TIMEOUTS.PACKAGE_INSTALL }); + const editable = isDevEnvironment() && !discovery.insideNodeModules; + const pipFlag = editable ? ' -e' : ''; + console.log(`[CodexLens] Installing from local path: ${discovery.path} (editable: ${editable})`); + execSync(`"${pipPath}" install${pipFlag} "${discovery.path}"`, { stdio: 'inherit', timeout: EXEC_TIMEOUTS.PACKAGE_INSTALL }); // Clear cache after successful installation clearVenvStatusCache(); clearSemanticStatusCache(); return { success: true }; } catch (err) { - const errorMsg = `Failed to install codex-lens: ${(err as Error).message}\n\n` + - `codex-lens is a local development package. To fix this:\n` + - `1. Ensure the 'codex-lens' directory exists in your project root\n` + - `2. Run the installation from the correct working directory\n` + - `3. Or manually install: cd codex-lens && pip install -e .`; - return { success: false, error: errorMsg }; + return { success: false, error: `Failed to install codex-lens: ${(err as Error).message}` }; } } diff --git a/ccw/src/utils/package-discovery.ts b/ccw/src/utils/package-discovery.ts new file mode 100644 index 00000000..b3dd4b79 --- /dev/null +++ b/ccw/src/utils/package-discovery.ts @@ -0,0 +1,318 @@ +/** + * Unified Package Discovery for local Python packages (codex-lens, ccw-litellm) + * + * Provides a single, transparent path discovery mechanism with: + * - Environment variable overrides (highest priority) + * - ~/.codexlens/config.json configuration + * - Extended search paths (npm global, PACKAGE_ROOT, siblings, etc.) + * - Full search result transparency for diagnostics + */ + +import { existsSync, readFileSync } from 'fs'; +import { join, dirname, resolve } from 'path'; +import { homedir } from 'os'; +import { execSync } from 'child_process'; +import { fileURLToPath } from 'url'; +import { getCodexLensDataDir } from './codexlens-path.js'; +import { EXEC_TIMEOUTS } from './exec-constants.js'; + +// Get directory of this module (src/utils/) +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// ======================================== +// Types +// ======================================== + +/** Source that found the package path */ +export type PackageSource = + | 'env' // Environment variable override + | 'config' // ~/.codexlens/config.json + | 'sibling' // Sibling directory to ccw project root + | 'npm-global' // npm global prefix + | 'cwd' // Current working directory + | 'cwd-parent' // Parent of current working directory + | 'homedir' // User home directory + | 'package-root'; // npm package internal path + +/** A single search attempt result */ +export interface SearchAttempt { + path: string; + source: PackageSource; + exists: boolean; +} + +/** Result of package discovery */ +export interface PackageDiscoveryResult { + /** Resolved package path, or null if not found */ + path: string | null; + /** Source that found the package */ + source: PackageSource | null; + /** All paths searched (for diagnostics) */ + searchedPaths: SearchAttempt[]; + /** Whether the found path is inside node_modules */ + insideNodeModules: boolean; +} + +/** Known local package names */ +export type LocalPackageName = 'codex-lens' | 'ccw-litellm'; + +/** Environment variable mapping for each package */ +const PACKAGE_ENV_VARS: Record = { + 'codex-lens': 'CODEXLENS_PACKAGE_PATH', + 'ccw-litellm': 'CCW_LITELLM_PATH', +}; + +/** Config key mapping for each package */ +const PACKAGE_CONFIG_KEYS: Record = { + 'codex-lens': 'codexLensPath', + 'ccw-litellm': 'ccwLitellmPath', +}; + +// ======================================== +// Helpers +// ======================================== + +/** + * Check if a path is inside node_modules + */ +export function isInsideNodeModules(pathToCheck: string): boolean { + const normalized = pathToCheck.replace(/\\/g, '/').toLowerCase(); + return normalized.includes('/node_modules/'); +} + +/** + * Check if running in a development environment (not from node_modules) + */ +export function isDevEnvironment(): boolean { + // Yarn PnP detection + if ((process.versions as Record).pnp) { + return false; + } + return !isInsideNodeModules(__dirname); +} + +/** + * Read package paths from ~/.codexlens/config.json + */ +function readConfigPath(packageName: LocalPackageName): string | null { + try { + const configPath = join(getCodexLensDataDir(), 'config.json'); + if (!existsSync(configPath)) return null; + + const config = JSON.parse(readFileSync(configPath, 'utf-8')); + const key = PACKAGE_CONFIG_KEYS[packageName]; + const value = config?.packagePaths?.[key]; + return typeof value === 'string' && value.trim() ? value.trim() : null; + } catch { + return null; + } +} + +/** + * Get npm global prefix directory + */ +let _npmGlobalPrefix: string | null | undefined; +function getNpmGlobalPrefix(): string | null { + if (_npmGlobalPrefix !== undefined) return _npmGlobalPrefix; + + try { + const result = execSync('npm prefix -g', { + encoding: 'utf-8', + timeout: EXEC_TIMEOUTS.SYSTEM_INFO, + stdio: ['pipe', 'pipe', 'pipe'], + }); + _npmGlobalPrefix = result.trim() || null; + } catch { + _npmGlobalPrefix = null; + } + return _npmGlobalPrefix; +} + +/** + * Check if a directory contains a valid Python package (has pyproject.toml) + */ +function isValidPackageDir(dir: string): boolean { + return existsSync(join(dir, 'pyproject.toml')); +} + +// ======================================== +// Main Discovery Function +// ======================================== + +/** + * Find a local Python package path with unified search logic. + * + * Search priority: + * 1. Environment variable (CODEXLENS_PACKAGE_PATH / CCW_LITELLM_PATH) + * 2. ~/.codexlens/config.json packagePaths + * 3. Sibling directory to ccw project root (src/utils -> ../../..) + * 4. npm global prefix node_modules path + * 5. Current working directory + * 6. Parent of current working directory + * 7. Home directory + * + * Two-pass search: first pass skips node_modules paths, second pass allows them. + * + * @param packageName - Package to find ('codex-lens' or 'ccw-litellm') + * @returns Discovery result with path, source, and all searched paths + */ +export function findPackagePath(packageName: LocalPackageName): PackageDiscoveryResult { + const searched: SearchAttempt[] = []; + + // Helper to check and record a path + const check = (path: string, source: PackageSource): boolean => { + const resolvedPath = resolve(path); + const exists = isValidPackageDir(resolvedPath); + searched.push({ path: resolvedPath, source, exists }); + return exists; + }; + + // 1. Environment variable (highest priority, skip two-pass) + const envKey = PACKAGE_ENV_VARS[packageName]; + const envPath = process.env[envKey]; + if (envPath) { + if (check(envPath, 'env')) { + return { + path: resolve(envPath), + source: 'env', + searchedPaths: searched, + insideNodeModules: isInsideNodeModules(envPath), + }; + } + // Env var set but path invalid — continue searching but warn + console.warn(`[PackageDiscovery] ${envKey}="${envPath}" set but pyproject.toml not found, continuing search...`); + } + + // 2. Config file + const configPath = readConfigPath(packageName); + if (configPath) { + if (check(configPath, 'config')) { + return { + path: resolve(configPath), + source: 'config', + searchedPaths: searched, + insideNodeModules: isInsideNodeModules(configPath), + }; + } + } + + // Build candidate paths for two-pass search + const candidates: { path: string; source: PackageSource }[] = []; + + // 3. Sibling directory to ccw project root + // __dirname = src/utils/ → project root = ../../.. + // Also try one more level up for nested structures + const projectRoot = join(__dirname, '..', '..', '..'); + candidates.push({ path: join(projectRoot, packageName), source: 'sibling' }); + candidates.push({ path: join(projectRoot, '..', packageName), source: 'sibling' }); + + // 4. npm global prefix + const npmPrefix = getNpmGlobalPrefix(); + if (npmPrefix) { + // npm global: prefix/node_modules/claude-code-workflow/ + candidates.push({ + path: join(npmPrefix, 'node_modules', 'claude-code-workflow', packageName), + source: 'npm-global', + }); + // npm global: prefix/lib/node_modules/claude-code-workflow/ (Linux/Mac) + candidates.push({ + path: join(npmPrefix, 'lib', 'node_modules', 'claude-code-workflow', packageName), + source: 'npm-global', + }); + // npm global sibling: prefix/node_modules/ + candidates.push({ + path: join(npmPrefix, 'node_modules', packageName), + source: 'npm-global', + }); + } + + // 5. Current working directory + const cwd = process.cwd(); + candidates.push({ path: join(cwd, packageName), source: 'cwd' }); + + // 6. Parent of cwd (common workspace layout) + const cwdParent = dirname(cwd); + if (cwdParent !== cwd) { + candidates.push({ path: join(cwdParent, packageName), source: 'cwd-parent' }); + } + + // 7. Home directory + candidates.push({ path: join(homedir(), packageName), source: 'homedir' }); + + // Two-pass search: prefer non-node_modules paths first + // First pass: skip node_modules + for (const candidate of candidates) { + const resolvedPath = resolve(candidate.path); + if (isInsideNodeModules(resolvedPath)) continue; + if (check(resolvedPath, candidate.source)) { + console.log(`[PackageDiscovery] Found ${packageName} at: ${resolvedPath} (source: ${candidate.source})`); + return { + path: resolvedPath, + source: candidate.source, + searchedPaths: searched, + insideNodeModules: false, + }; + } + } + + // Second pass: allow node_modules paths + for (const candidate of candidates) { + const resolvedPath = resolve(candidate.path); + if (!isInsideNodeModules(resolvedPath)) continue; + // Skip if already checked in first pass + if (searched.some(s => s.path === resolvedPath)) continue; + if (check(resolvedPath, candidate.source)) { + console.log(`[PackageDiscovery] Found ${packageName} in node_modules at: ${resolvedPath} (source: ${candidate.source})`); + return { + path: resolvedPath, + source: candidate.source, + searchedPaths: searched, + insideNodeModules: true, + }; + } + } + + // Not found + return { + path: null, + source: null, + searchedPaths: searched, + insideNodeModules: false, + }; +} + +/** + * Find codex-lens package path (convenience wrapper) + */ +export function findCodexLensPath(): PackageDiscoveryResult { + return findPackagePath('codex-lens'); +} + +/** + * Find ccw-litellm package path (convenience wrapper) + */ +export function findCcwLitellmPath(): PackageDiscoveryResult { + return findPackagePath('ccw-litellm'); +} + +/** + * Format search results for error messages + */ +export function formatSearchResults(result: PackageDiscoveryResult, packageName: string): string { + const lines = [`Cannot find '${packageName}' package directory.\n`]; + lines.push('Searched locations:'); + for (const attempt of result.searchedPaths) { + const status = attempt.exists ? '✓' : '✗'; + lines.push(` ${status} [${attempt.source}] ${attempt.path}`); + } + lines.push(''); + lines.push('To fix this:'); + + const envKey = PACKAGE_ENV_VARS[packageName as LocalPackageName] || `${packageName.toUpperCase().replace(/-/g, '_')}_PATH`; + lines.push(` 1. Set environment variable: ${envKey}=/path/to/${packageName}`); + lines.push(` 2. Or add to ~/.codexlens/config.json: { "packagePaths": { "${PACKAGE_CONFIG_KEYS[packageName as LocalPackageName] || packageName}": "/path/to/${packageName}" } }`); + lines.push(` 3. Or ensure '${packageName}' directory exists as a sibling to the ccw project`); + + return lines.join('\n'); +} diff --git a/ccw/src/utils/uv-manager.ts b/ccw/src/utils/uv-manager.ts index bff0630a..467e2283 100644 --- a/ccw/src/utils/uv-manager.ts +++ b/ccw/src/utils/uv-manager.ts @@ -356,12 +356,13 @@ export class UvManager { /** * Install packages from a local project with optional extras - * Uses `uv pip install -e` for editable installs + * Uses `uv pip install` for standard installs, or `-e` for editable installs * @param projectPath - Path to the project directory (must contain pyproject.toml or setup.py) * @param extras - Optional array of extras to install (e.g., ['semantic', 'dev']) + * @param editable - Whether to install in editable mode (default: false for stability) * @returns Installation result */ - async installFromProject(projectPath: string, extras?: string[]): Promise { + async installFromProject(projectPath: string, extras?: string[], editable = false): Promise { const startTime = Date.now(); // Ensure UV is available @@ -383,9 +384,11 @@ export class UvManager { } return new Promise((resolve) => { - const args = ['pip', 'install', '-e', installSpec, '--python', this.getVenvPython()]; + const args = editable + ? ['pip', 'install', '-e', installSpec, '--python', this.getVenvPython()] + : ['pip', 'install', installSpec, '--python', this.getVenvPython()]; - console.log(`[UV] Installing from project: ${installSpec}`); + console.log(`[UV] Installing from project: ${installSpec} (editable: ${editable})`); const child = spawn(uvPath, args, { stdio: ['ignore', 'pipe', 'pipe'], diff --git a/codex-lens/src/codexlens/api/semantic.py b/codex-lens/src/codexlens/api/semantic.py index 4af03074..c442364f 100644 --- a/codex-lens/src/codexlens/api/semantic.py +++ b/codex-lens/src/codexlens/api/semantic.py @@ -114,13 +114,6 @@ def semantic_search( logger.debug("Ignoring invalid staged_stage2_mode: %r", staged_stage2_mode) # Get or create registry and mapper - try: - registry = RegistryStore.default() - mapper = PathMapper(registry) - except Exception as exc: - logger.error("Failed to initialize search infrastructure: %s", exc) - return [] - # Build search options based on mode search_options = _build_search_options( mode=mode, @@ -132,15 +125,17 @@ def semantic_search( # Execute search based on fusion_strategy try: - with ChainSearchEngine(registry, mapper, config=config) as engine: - chain_result = _execute_search( - engine=engine, - query=query, - source_path=project_path, - fusion_strategy=fusion_strategy, - options=search_options, - limit=limit, - ) + with RegistryStore() as registry: + mapper = PathMapper() + with ChainSearchEngine(registry, mapper, config=config) as engine: + chain_result = _execute_search( + engine=engine, + query=query, + source_path=project_path, + fusion_strategy=fusion_strategy, + options=search_options, + limit=limit, + ) except Exception as exc: logger.error("Search execution failed: %s", exc) return [] diff --git a/codex-lens/src/codexlens/config.py b/codex-lens/src/codexlens/config.py index 988eb355..77b5b055 100644 --- a/codex-lens/src/codexlens/config.py +++ b/codex-lens/src/codexlens/config.py @@ -1 +1,898 @@ -uid=1_0 RootWebArea url="about:blank" +"""Configuration system for CodexLens.""" + +from __future__ import annotations + +import json +import logging +import os +from dataclasses import dataclass, field +from functools import cached_property +from pathlib import Path +from typing import Any, Dict, List, Optional + +from .errors import ConfigError + + +# Workspace-local directory name +WORKSPACE_DIR_NAME = ".codexlens" + +# Settings file name +SETTINGS_FILE_NAME = "settings.json" + +# Dense vector storage names (centralized storage) +VECTORS_HNSW_NAME = "_vectors.hnsw" +VECTORS_META_DB_NAME = "_vectors_meta.db" +BINARY_VECTORS_MMAP_NAME = "_binary_vectors.mmap" + +log = logging.getLogger(__name__) + + +def _default_global_dir() -> Path: + """Get global CodexLens data directory.""" + env_override = os.getenv("CODEXLENS_DATA_DIR") + if env_override: + return Path(env_override).expanduser().resolve() + return (Path.home() / ".codexlens").resolve() + + +def find_workspace_root(start_path: Path) -> Optional[Path]: + """Find the workspace root by looking for .codexlens directory. + + Searches from start_path upward to find an existing .codexlens directory. + Returns None if not found. + """ + current = start_path.resolve() + + # Search up to filesystem root + while current != current.parent: + workspace_dir = current / WORKSPACE_DIR_NAME + if workspace_dir.is_dir(): + return current + current = current.parent + + # Check root as well + workspace_dir = current / WORKSPACE_DIR_NAME + if workspace_dir.is_dir(): + return current + + return None + + +@dataclass +class Config: + """Runtime configuration for CodexLens. + + - data_dir: Base directory for all persistent CodexLens data. + - venv_path: Optional virtualenv used for language tooling. + - supported_languages: Language IDs and their associated file extensions. + - parsing_rules: Per-language parsing and chunking hints. + """ + + data_dir: Path = field(default_factory=_default_global_dir) + venv_path: Path = field(default_factory=lambda: _default_global_dir() / "venv") + supported_languages: Dict[str, Dict[str, Any]] = field( + default_factory=lambda: { + # Source code languages (category: "code") + "python": {"extensions": [".py"], "tree_sitter_language": "python", "category": "code"}, + "javascript": {"extensions": [".js", ".jsx"], "tree_sitter_language": "javascript", "category": "code"}, + "typescript": {"extensions": [".ts", ".tsx"], "tree_sitter_language": "typescript", "category": "code"}, + "java": {"extensions": [".java"], "tree_sitter_language": "java", "category": "code"}, + "go": {"extensions": [".go"], "tree_sitter_language": "go", "category": "code"}, + "zig": {"extensions": [".zig"], "tree_sitter_language": "zig", "category": "code"}, + "objective-c": {"extensions": [".m", ".mm"], "tree_sitter_language": "objc", "category": "code"}, + "c": {"extensions": [".c", ".h"], "tree_sitter_language": "c", "category": "code"}, + "cpp": {"extensions": [".cc", ".cpp", ".hpp", ".cxx"], "tree_sitter_language": "cpp", "category": "code"}, + "rust": {"extensions": [".rs"], "tree_sitter_language": "rust", "category": "code"}, + } + ) + parsing_rules: Dict[str, Dict[str, Any]] = field( + default_factory=lambda: { + "default": { + "max_chunk_chars": 4000, + "max_chunk_lines": 200, + "overlap_lines": 20, + } + } + ) + + llm_enabled: bool = False + llm_tool: str = "gemini" + llm_timeout_ms: int = 300000 + llm_batch_size: int = 5 + + # Hybrid chunker configuration + hybrid_max_chunk_size: int = 2000 # Max characters per chunk before LLM refinement + hybrid_llm_refinement: bool = False # Enable LLM-based semantic boundary refinement + + # Embedding configuration + embedding_backend: str = "fastembed" # "fastembed" (local) or "litellm" (API) + embedding_model: str = "code" # For fastembed: profile (fast/code/multilingual/balanced) + # For litellm: model name from config (e.g., "qwen3-embedding") + embedding_use_gpu: bool = True # For fastembed: whether to use GPU acceleration + + # Indexing/search optimizations + global_symbol_index_enabled: bool = True # Enable project-wide symbol index fast path + enable_merkle_detection: bool = True # Enable content-hash based incremental indexing + + # Graph expansion (search-time, uses precomputed neighbors) + enable_graph_expansion: bool = False + graph_expansion_depth: int = 2 + + # Optional search reranking (disabled by default) + enable_reranking: bool = False + reranking_top_k: int = 50 + symbol_boost_factor: float = 1.5 + + # Optional cross-encoder reranking (second stage; requires optional reranker deps) + enable_cross_encoder_rerank: bool = False + reranker_backend: str = "onnx" + reranker_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2" + reranker_top_k: int = 50 + reranker_max_input_tokens: int = 8192 # Maximum tokens for reranker API batching + reranker_chunk_type_weights: Optional[Dict[str, float]] = None # Weights for chunk types: {"code": 1.0, "docstring": 0.7} + reranker_test_file_penalty: float = 0.0 # Penalty for test files (0.0-1.0, e.g., 0.2 = 20% reduction) + + # Chunk stripping configuration (for semantic embedding) + chunk_strip_comments: bool = True # Strip comments from code chunks + chunk_strip_docstrings: bool = True # Strip docstrings from code chunks + + # Cascade search configuration (two-stage retrieval) + enable_cascade_search: bool = False # Enable cascade search (coarse + fine ranking) + cascade_coarse_k: int = 100 # Number of coarse candidates from first stage + cascade_fine_k: int = 10 # Number of final results after reranking + cascade_strategy: str = "binary" # "binary", "binary_rerank", "dense_rerank", or "staged" + + # Staged cascade search configuration (4-stage pipeline) + staged_coarse_k: int = 200 # Number of coarse candidates from Stage 1 binary search + staged_lsp_depth: int = 2 # LSP relationship expansion depth in Stage 2 + staged_stage2_mode: str = "precomputed" # "precomputed" (graph_neighbors) | "realtime" (LSP) | "static_global_graph" (global_relationships) + + # Static graph configuration (write relationships to global index during build) + static_graph_enabled: bool = False + static_graph_relationship_types: List[str] = field(default_factory=lambda: ["imports", "inherits"]) + + staged_realtime_lsp_timeout_s: float = 30.0 # Max time budget for realtime LSP expansion + staged_realtime_lsp_depth: int = 1 # BFS depth for realtime LSP expansion + staged_realtime_lsp_max_nodes: int = 50 # Node cap for realtime graph expansion + staged_realtime_lsp_max_seeds: int = 1 # Seed cap for realtime graph expansion + staged_realtime_lsp_max_concurrent: int = 2 # Max concurrent LSP requests during graph expansion + staged_realtime_lsp_warmup_s: float = 3.0 # Wait for server analysis after opening seed docs + staged_realtime_lsp_resolve_symbols: bool = False # If True, resolves symbol names via documentSymbol (slower) + staged_clustering_strategy: str = "auto" # "auto", "hdbscan", "dbscan", "frequency", "noop", "score", "dir_rr", "path" + staged_clustering_min_size: int = 3 # Minimum cluster size for Stage 3 grouping + enable_staged_rerank: bool = True # Enable optional cross-encoder reranking in Stage 4 + + # RRF fusion configuration + fusion_method: str = "rrf" # "simple" (weighted sum) or "rrf" (reciprocal rank fusion) + rrf_k: int = 60 # RRF constant (default 60) + + # Category-based filtering to separate code/doc results + enable_category_filter: bool = True # Enable code/doc result separation + + # Multi-endpoint configuration for litellm backend + embedding_endpoints: List[Dict[str, Any]] = field(default_factory=list) + # List of endpoint configs: [{"model": "...", "api_key": "...", "api_base": "...", "weight": 1.0}] + embedding_pool_enabled: bool = False # Enable high availability pool for embeddings + embedding_strategy: str = "latency_aware" # round_robin, latency_aware, weighted_random + embedding_cooldown: float = 60.0 # Default cooldown seconds for rate-limited endpoints + + # Reranker multi-endpoint configuration + reranker_pool_enabled: bool = False # Enable high availability pool for reranker + reranker_strategy: str = "latency_aware" # round_robin, latency_aware, weighted_random + reranker_cooldown: float = 60.0 # Default cooldown seconds for rate-limited endpoints + + # API concurrency settings + api_max_workers: int = 4 # Max concurrent API calls for embedding/reranking + api_batch_size: int = 8 # Batch size for API requests + api_batch_size_dynamic: bool = False # Enable dynamic batch size calculation + api_batch_size_utilization_factor: float = 0.8 # Use 80% of model token capacity + api_batch_size_max: int = 2048 # Absolute upper limit for batch size + chars_per_token_estimate: int = 4 # Characters per token estimation ratio + + # Parser configuration + use_astgrep: bool = False # Use ast-grep for Python relationship extraction (tree-sitter is default) + + def __post_init__(self) -> None: + try: + self.data_dir = self.data_dir.expanduser().resolve() + self.venv_path = self.venv_path.expanduser().resolve() + self.data_dir.mkdir(parents=True, exist_ok=True) + except PermissionError as exc: + raise ConfigError( + f"Permission denied initializing paths (data_dir={self.data_dir}, venv_path={self.venv_path}) " + f"[{type(exc).__name__}]: {exc}" + ) from exc + except OSError as exc: + raise ConfigError( + f"Filesystem error initializing paths (data_dir={self.data_dir}, venv_path={self.venv_path}) " + f"[{type(exc).__name__}]: {exc}" + ) from exc + except Exception as exc: + raise ConfigError( + f"Unexpected error initializing paths (data_dir={self.data_dir}, venv_path={self.venv_path}) " + f"[{type(exc).__name__}]: {exc}" + ) from exc + + @cached_property + def cache_dir(self) -> Path: + """Directory for transient caches.""" + return self.data_dir / "cache" + + @cached_property + def index_dir(self) -> Path: + """Directory where index artifacts are stored.""" + return self.data_dir / "index" + + @cached_property + def db_path(self) -> Path: + """Default SQLite index path.""" + return self.index_dir / "codexlens.db" + + def ensure_runtime_dirs(self) -> None: + """Create standard runtime directories if missing.""" + for directory in (self.cache_dir, self.index_dir): + try: + directory.mkdir(parents=True, exist_ok=True) + except PermissionError as exc: + raise ConfigError( + f"Permission denied creating directory {directory} [{type(exc).__name__}]: {exc}" + ) from exc + except OSError as exc: + raise ConfigError( + f"Filesystem error creating directory {directory} [{type(exc).__name__}]: {exc}" + ) from exc + except Exception as exc: + raise ConfigError( + f"Unexpected error creating directory {directory} [{type(exc).__name__}]: {exc}" + ) from exc + + def language_for_path(self, path: str | Path) -> str | None: + """Infer a supported language ID from a file path.""" + extension = Path(path).suffix.lower() + for language_id, spec in self.supported_languages.items(): + extensions: List[str] = spec.get("extensions", []) + if extension in extensions: + return language_id + return None + + def category_for_path(self, path: str | Path) -> str | None: + """Get file category ('code' or 'doc') from a file path.""" + language = self.language_for_path(path) + if language is None: + return None + spec = self.supported_languages.get(language, {}) + return spec.get("category") + + def rules_for_language(self, language_id: str) -> Dict[str, Any]: + """Get parsing rules for a specific language, falling back to defaults.""" + return {**self.parsing_rules.get("default", {}), **self.parsing_rules.get(language_id, {})} + + @cached_property + def settings_path(self) -> Path: + """Path to the settings file.""" + return self.data_dir / SETTINGS_FILE_NAME + + def save_settings(self) -> None: + """Save embedding and other settings to file.""" + embedding_config = { + "backend": self.embedding_backend, + "model": self.embedding_model, + "use_gpu": self.embedding_use_gpu, + "pool_enabled": self.embedding_pool_enabled, + "strategy": self.embedding_strategy, + "cooldown": self.embedding_cooldown, + } + # Include multi-endpoint config if present + if self.embedding_endpoints: + embedding_config["endpoints"] = self.embedding_endpoints + + settings = { + "embedding": embedding_config, + "llm": { + "enabled": self.llm_enabled, + "tool": self.llm_tool, + "timeout_ms": self.llm_timeout_ms, + "batch_size": self.llm_batch_size, + }, + "parsing": { + # Prefer ast-grep processors when available (experimental). + "use_astgrep": self.use_astgrep, + }, + "indexing": { + # Persist global relationship edges during index build for static graph expansion. + "static_graph_enabled": self.static_graph_enabled, + "static_graph_relationship_types": self.static_graph_relationship_types, + }, + "reranker": { + "enabled": self.enable_cross_encoder_rerank, + "backend": self.reranker_backend, + "model": self.reranker_model, + "top_k": self.reranker_top_k, + "max_input_tokens": self.reranker_max_input_tokens, + "pool_enabled": self.reranker_pool_enabled, + "strategy": self.reranker_strategy, + "cooldown": self.reranker_cooldown, + }, + "cascade": { + "strategy": self.cascade_strategy, + "coarse_k": self.cascade_coarse_k, + "fine_k": self.cascade_fine_k, + }, + "api": { + "max_workers": self.api_max_workers, + "batch_size": self.api_batch_size, + "batch_size_dynamic": self.api_batch_size_dynamic, + "batch_size_utilization_factor": self.api_batch_size_utilization_factor, + "batch_size_max": self.api_batch_size_max, + "chars_per_token_estimate": self.chars_per_token_estimate, + }, + } + with open(self.settings_path, "w", encoding="utf-8") as f: + json.dump(settings, f, indent=2) + + def load_settings(self) -> None: + """Load settings from file if exists.""" + if self.settings_path.exists(): + try: + with open(self.settings_path, "r", encoding="utf-8") as f: + settings = json.load(f) + + # Load embedding settings + embedding = settings.get("embedding", {}) + if "backend" in embedding: + backend = embedding["backend"] + # Support 'api' as alias for 'litellm' + if backend == "api": + backend = "litellm" + if backend in {"fastembed", "litellm"}: + self.embedding_backend = backend + else: + log.warning( + "Invalid embedding backend in %s: %r (expected 'fastembed' or 'litellm')", + self.settings_path, + embedding["backend"], + ) + if "model" in embedding: + self.embedding_model = embedding["model"] + if "use_gpu" in embedding: + self.embedding_use_gpu = embedding["use_gpu"] + + # Load multi-endpoint configuration + if "endpoints" in embedding: + self.embedding_endpoints = embedding["endpoints"] + if "pool_enabled" in embedding: + self.embedding_pool_enabled = embedding["pool_enabled"] + if "strategy" in embedding: + self.embedding_strategy = embedding["strategy"] + if "cooldown" in embedding: + self.embedding_cooldown = embedding["cooldown"] + + # Load LLM settings + llm = settings.get("llm", {}) + if "enabled" in llm: + self.llm_enabled = llm["enabled"] + if "tool" in llm: + self.llm_tool = llm["tool"] + if "timeout_ms" in llm: + self.llm_timeout_ms = llm["timeout_ms"] + if "batch_size" in llm: + self.llm_batch_size = llm["batch_size"] + + # Load reranker settings + reranker = settings.get("reranker", {}) + if "enabled" in reranker: + self.enable_cross_encoder_rerank = reranker["enabled"] + if "backend" in reranker: + backend = reranker["backend"] + if backend in {"fastembed", "onnx", "api", "litellm", "legacy"}: + self.reranker_backend = backend + else: + log.warning( + "Invalid reranker backend in %s: %r (expected 'fastembed', 'onnx', 'api', 'litellm', or 'legacy')", + self.settings_path, + backend, + ) + if "model" in reranker: + self.reranker_model = reranker["model"] + if "top_k" in reranker: + self.reranker_top_k = reranker["top_k"] + if "max_input_tokens" in reranker: + self.reranker_max_input_tokens = reranker["max_input_tokens"] + if "pool_enabled" in reranker: + self.reranker_pool_enabled = reranker["pool_enabled"] + if "strategy" in reranker: + self.reranker_strategy = reranker["strategy"] + if "cooldown" in reranker: + self.reranker_cooldown = reranker["cooldown"] + + # Load cascade settings + cascade = settings.get("cascade", {}) + if "strategy" in cascade: + strategy = cascade["strategy"] + if strategy in {"binary", "binary_rerank", "dense_rerank", "staged"}: + self.cascade_strategy = strategy + else: + log.warning( + "Invalid cascade strategy in %s: %r (expected 'binary', 'binary_rerank', 'dense_rerank', or 'staged')", + self.settings_path, + strategy, + ) + if "coarse_k" in cascade: + self.cascade_coarse_k = cascade["coarse_k"] + if "fine_k" in cascade: + self.cascade_fine_k = cascade["fine_k"] + + # Load parsing settings + parsing = settings.get("parsing", {}) + if isinstance(parsing, dict) and "use_astgrep" in parsing: + self.use_astgrep = bool(parsing["use_astgrep"]) + + # Load indexing settings + indexing = settings.get("indexing", {}) + if isinstance(indexing, dict): + if "static_graph_enabled" in indexing: + self.static_graph_enabled = bool(indexing["static_graph_enabled"]) + if "static_graph_relationship_types" in indexing: + raw_types = indexing["static_graph_relationship_types"] + if isinstance(raw_types, list): + allowed = {"imports", "inherits", "calls"} + cleaned = [] + for item in raw_types: + val = str(item).strip().lower() + if val and val in allowed: + cleaned.append(val) + if cleaned: + self.static_graph_relationship_types = cleaned + else: + log.warning( + "Invalid indexing.static_graph_relationship_types in %s: %r (expected list)", + self.settings_path, + raw_types, + ) + + # Load API settings + api = settings.get("api", {}) + if "max_workers" in api: + self.api_max_workers = api["max_workers"] + if "batch_size" in api: + self.api_batch_size = api["batch_size"] + if "batch_size_dynamic" in api: + self.api_batch_size_dynamic = api["batch_size_dynamic"] + if "batch_size_utilization_factor" in api: + self.api_batch_size_utilization_factor = api["batch_size_utilization_factor"] + if "batch_size_max" in api: + self.api_batch_size_max = api["batch_size_max"] + if "chars_per_token_estimate" in api: + self.chars_per_token_estimate = api["chars_per_token_estimate"] + except Exception as exc: + log.warning( + "Failed to load settings from %s (%s): %s", + self.settings_path, + type(exc).__name__, + exc, + ) + + # Apply .env overrides (highest priority) + self._apply_env_overrides() + + def _apply_env_overrides(self) -> None: + """Apply environment variable overrides from .env file. + + Priority: default → settings.json → .env (highest) + + Supported variables (with or without CODEXLENS_ prefix): + EMBEDDING_MODEL: Override embedding model/profile + EMBEDDING_BACKEND: Override embedding backend (fastembed/litellm) + EMBEDDING_POOL_ENABLED: Enable embedding high availability pool + EMBEDDING_STRATEGY: Load balance strategy for embedding + EMBEDDING_COOLDOWN: Rate limit cooldown for embedding + RERANKER_MODEL: Override reranker model + RERANKER_BACKEND: Override reranker backend + RERANKER_ENABLED: Override reranker enabled state (true/false) + RERANKER_POOL_ENABLED: Enable reranker high availability pool + RERANKER_STRATEGY: Load balance strategy for reranker + RERANKER_COOLDOWN: Rate limit cooldown for reranker + """ + from .env_config import load_env_file + + env_vars = load_env_file(self.data_dir / ".env") + if not env_vars: + return + + def get_env(key: str) -> str | None: + """Get env var with or without CODEXLENS_ prefix.""" + # Check prefixed version first (Dashboard format), then unprefixed + return env_vars.get(f"CODEXLENS_{key}") or env_vars.get(key) + + def _parse_bool(value: str) -> bool: + return value.strip().lower() in {"true", "1", "yes", "on"} + + # Cascade overrides + cascade_enabled = get_env("ENABLE_CASCADE_SEARCH") + if cascade_enabled: + self.enable_cascade_search = _parse_bool(cascade_enabled) + log.debug( + "Overriding enable_cascade_search from .env: %s", + self.enable_cascade_search, + ) + + cascade_strategy = get_env("CASCADE_STRATEGY") + if cascade_strategy: + strategy = cascade_strategy.strip().lower() + if strategy in {"binary", "binary_rerank", "dense_rerank", "staged"}: + self.cascade_strategy = strategy + log.debug("Overriding cascade_strategy from .env: %s", self.cascade_strategy) + else: + log.warning("Invalid CASCADE_STRATEGY in .env: %r", cascade_strategy) + + cascade_coarse_k = get_env("CASCADE_COARSE_K") + if cascade_coarse_k: + try: + self.cascade_coarse_k = int(cascade_coarse_k) + log.debug("Overriding cascade_coarse_k from .env: %s", self.cascade_coarse_k) + except ValueError: + log.warning("Invalid CASCADE_COARSE_K in .env: %r", cascade_coarse_k) + + cascade_fine_k = get_env("CASCADE_FINE_K") + if cascade_fine_k: + try: + self.cascade_fine_k = int(cascade_fine_k) + log.debug("Overriding cascade_fine_k from .env: %s", self.cascade_fine_k) + except ValueError: + log.warning("Invalid CASCADE_FINE_K in .env: %r", cascade_fine_k) + + # Embedding overrides + embedding_model = get_env("EMBEDDING_MODEL") + if embedding_model: + self.embedding_model = embedding_model + log.debug("Overriding embedding_model from .env: %s", self.embedding_model) + + embedding_backend = get_env("EMBEDDING_BACKEND") + if embedding_backend: + backend = embedding_backend.lower() + # Support 'api' as alias for 'litellm' + if backend == "api": + backend = "litellm" + if backend in {"fastembed", "litellm"}: + self.embedding_backend = backend + log.debug("Overriding embedding_backend from .env: %s", backend) + else: + log.warning("Invalid EMBEDDING_BACKEND in .env: %r", embedding_backend) + + embedding_pool = get_env("EMBEDDING_POOL_ENABLED") + if embedding_pool: + value = embedding_pool.lower() + self.embedding_pool_enabled = value in {"true", "1", "yes", "on"} + log.debug("Overriding embedding_pool_enabled from .env: %s", self.embedding_pool_enabled) + + embedding_strategy = get_env("EMBEDDING_STRATEGY") + if embedding_strategy: + strategy = embedding_strategy.lower() + if strategy in {"round_robin", "latency_aware", "weighted_random"}: + self.embedding_strategy = strategy + log.debug("Overriding embedding_strategy from .env: %s", strategy) + else: + log.warning("Invalid EMBEDDING_STRATEGY in .env: %r", embedding_strategy) + + embedding_cooldown = get_env("EMBEDDING_COOLDOWN") + if embedding_cooldown: + try: + self.embedding_cooldown = float(embedding_cooldown) + log.debug("Overriding embedding_cooldown from .env: %s", self.embedding_cooldown) + except ValueError: + log.warning("Invalid EMBEDDING_COOLDOWN in .env: %r", embedding_cooldown) + + # Reranker overrides + reranker_model = get_env("RERANKER_MODEL") + if reranker_model: + self.reranker_model = reranker_model + log.debug("Overriding reranker_model from .env: %s", self.reranker_model) + + reranker_backend = get_env("RERANKER_BACKEND") + if reranker_backend: + backend = reranker_backend.lower() + if backend in {"fastembed", "onnx", "api", "litellm", "legacy"}: + self.reranker_backend = backend + log.debug("Overriding reranker_backend from .env: %s", backend) + else: + log.warning("Invalid RERANKER_BACKEND in .env: %r", reranker_backend) + + reranker_enabled = get_env("RERANKER_ENABLED") + if reranker_enabled: + value = reranker_enabled.lower() + self.enable_cross_encoder_rerank = value in {"true", "1", "yes", "on"} + log.debug("Overriding reranker_enabled from .env: %s", self.enable_cross_encoder_rerank) + + reranker_pool = get_env("RERANKER_POOL_ENABLED") + if reranker_pool: + value = reranker_pool.lower() + self.reranker_pool_enabled = value in {"true", "1", "yes", "on"} + log.debug("Overriding reranker_pool_enabled from .env: %s", self.reranker_pool_enabled) + + reranker_strategy = get_env("RERANKER_STRATEGY") + if reranker_strategy: + strategy = reranker_strategy.lower() + if strategy in {"round_robin", "latency_aware", "weighted_random"}: + self.reranker_strategy = strategy + log.debug("Overriding reranker_strategy from .env: %s", strategy) + else: + log.warning("Invalid RERANKER_STRATEGY in .env: %r", reranker_strategy) + + reranker_cooldown = get_env("RERANKER_COOLDOWN") + if reranker_cooldown: + try: + self.reranker_cooldown = float(reranker_cooldown) + log.debug("Overriding reranker_cooldown from .env: %s", self.reranker_cooldown) + except ValueError: + log.warning("Invalid RERANKER_COOLDOWN in .env: %r", reranker_cooldown) + + reranker_max_tokens = get_env("RERANKER_MAX_INPUT_TOKENS") + if reranker_max_tokens: + try: + self.reranker_max_input_tokens = int(reranker_max_tokens) + log.debug("Overriding reranker_max_input_tokens from .env: %s", self.reranker_max_input_tokens) + except ValueError: + log.warning("Invalid RERANKER_MAX_INPUT_TOKENS in .env: %r", reranker_max_tokens) + + # Reranker tuning from environment + test_penalty = get_env("RERANKER_TEST_FILE_PENALTY") + if test_penalty: + try: + self.reranker_test_file_penalty = float(test_penalty) + log.debug("Overriding reranker_test_file_penalty from .env: %s", self.reranker_test_file_penalty) + except ValueError: + log.warning("Invalid RERANKER_TEST_FILE_PENALTY in .env: %r", test_penalty) + + docstring_weight = get_env("RERANKER_DOCSTRING_WEIGHT") + if docstring_weight: + try: + weight = float(docstring_weight) + self.reranker_chunk_type_weights = {"code": 1.0, "docstring": weight} + log.debug("Overriding reranker docstring weight from .env: %s", weight) + except ValueError: + log.warning("Invalid RERANKER_DOCSTRING_WEIGHT in .env: %r", docstring_weight) + + # Chunk stripping from environment + strip_comments = get_env("CHUNK_STRIP_COMMENTS") + if strip_comments: + self.chunk_strip_comments = strip_comments.lower() in ("true", "1", "yes") + log.debug("Overriding chunk_strip_comments from .env: %s", self.chunk_strip_comments) + + strip_docstrings = get_env("CHUNK_STRIP_DOCSTRINGS") + if strip_docstrings: + self.chunk_strip_docstrings = strip_docstrings.lower() in ("true", "1", "yes") + log.debug("Overriding chunk_strip_docstrings from .env: %s", self.chunk_strip_docstrings) + + # Staged cascade overrides + staged_stage2_mode = get_env("STAGED_STAGE2_MODE") + if staged_stage2_mode: + mode = staged_stage2_mode.strip().lower() + if mode in {"precomputed", "realtime", "static_global_graph"}: + self.staged_stage2_mode = mode + log.debug("Overriding staged_stage2_mode from .env: %s", self.staged_stage2_mode) + elif mode in {"live"}: + self.staged_stage2_mode = "realtime" + log.debug("Overriding staged_stage2_mode from .env: %s", self.staged_stage2_mode) + else: + log.warning("Invalid STAGED_STAGE2_MODE in .env: %r", staged_stage2_mode) + + staged_clustering_strategy = get_env("STAGED_CLUSTERING_STRATEGY") + if staged_clustering_strategy: + strategy = staged_clustering_strategy.strip().lower() + if strategy in {"auto", "hdbscan", "dbscan", "frequency", "noop", "score", "dir_rr", "path"}: + self.staged_clustering_strategy = strategy + log.debug( + "Overriding staged_clustering_strategy from .env: %s", + self.staged_clustering_strategy, + ) + elif strategy in {"none", "off"}: + self.staged_clustering_strategy = "noop" + log.debug( + "Overriding staged_clustering_strategy from .env: %s", + self.staged_clustering_strategy, + ) + else: + log.warning( + "Invalid STAGED_CLUSTERING_STRATEGY in .env: %r", + staged_clustering_strategy, + ) + + staged_clustering_min_size = get_env("STAGED_CLUSTERING_MIN_SIZE") + if staged_clustering_min_size: + try: + self.staged_clustering_min_size = int(staged_clustering_min_size) + log.debug( + "Overriding staged_clustering_min_size from .env: %s", + self.staged_clustering_min_size, + ) + except ValueError: + log.warning( + "Invalid STAGED_CLUSTERING_MIN_SIZE in .env: %r", + staged_clustering_min_size, + ) + + enable_staged_rerank = get_env("ENABLE_STAGED_RERANK") + if enable_staged_rerank: + self.enable_staged_rerank = _parse_bool(enable_staged_rerank) + log.debug("Overriding enable_staged_rerank from .env: %s", self.enable_staged_rerank) + + rt_timeout = get_env("STAGED_REALTIME_LSP_TIMEOUT_S") + if rt_timeout: + try: + self.staged_realtime_lsp_timeout_s = float(rt_timeout) + log.debug( + "Overriding staged_realtime_lsp_timeout_s from .env: %s", + self.staged_realtime_lsp_timeout_s, + ) + except ValueError: + log.warning("Invalid STAGED_REALTIME_LSP_TIMEOUT_S in .env: %r", rt_timeout) + + rt_depth = get_env("STAGED_REALTIME_LSP_DEPTH") + if rt_depth: + try: + self.staged_realtime_lsp_depth = int(rt_depth) + log.debug( + "Overriding staged_realtime_lsp_depth from .env: %s", + self.staged_realtime_lsp_depth, + ) + except ValueError: + log.warning("Invalid STAGED_REALTIME_LSP_DEPTH in .env: %r", rt_depth) + + rt_max_nodes = get_env("STAGED_REALTIME_LSP_MAX_NODES") + if rt_max_nodes: + try: + self.staged_realtime_lsp_max_nodes = int(rt_max_nodes) + log.debug( + "Overriding staged_realtime_lsp_max_nodes from .env: %s", + self.staged_realtime_lsp_max_nodes, + ) + except ValueError: + log.warning("Invalid STAGED_REALTIME_LSP_MAX_NODES in .env: %r", rt_max_nodes) + + rt_max_seeds = get_env("STAGED_REALTIME_LSP_MAX_SEEDS") + if rt_max_seeds: + try: + self.staged_realtime_lsp_max_seeds = int(rt_max_seeds) + log.debug( + "Overriding staged_realtime_lsp_max_seeds from .env: %s", + self.staged_realtime_lsp_max_seeds, + ) + except ValueError: + log.warning("Invalid STAGED_REALTIME_LSP_MAX_SEEDS in .env: %r", rt_max_seeds) + + rt_max_concurrent = get_env("STAGED_REALTIME_LSP_MAX_CONCURRENT") + if rt_max_concurrent: + try: + self.staged_realtime_lsp_max_concurrent = int(rt_max_concurrent) + log.debug( + "Overriding staged_realtime_lsp_max_concurrent from .env: %s", + self.staged_realtime_lsp_max_concurrent, + ) + except ValueError: + log.warning( + "Invalid STAGED_REALTIME_LSP_MAX_CONCURRENT in .env: %r", + rt_max_concurrent, + ) + + rt_warmup = get_env("STAGED_REALTIME_LSP_WARMUP_S") + if rt_warmup: + try: + self.staged_realtime_lsp_warmup_s = float(rt_warmup) + log.debug( + "Overriding staged_realtime_lsp_warmup_s from .env: %s", + self.staged_realtime_lsp_warmup_s, + ) + except ValueError: + log.warning("Invalid STAGED_REALTIME_LSP_WARMUP_S in .env: %r", rt_warmup) + + rt_resolve = get_env("STAGED_REALTIME_LSP_RESOLVE_SYMBOLS") + if rt_resolve: + self.staged_realtime_lsp_resolve_symbols = _parse_bool(rt_resolve) + log.debug( + "Overriding staged_realtime_lsp_resolve_symbols from .env: %s", + self.staged_realtime_lsp_resolve_symbols, + ) + + @classmethod + def load(cls) -> "Config": + """Load config with settings from file.""" + config = cls() + config.load_settings() + return config + + +@dataclass +class WorkspaceConfig: + """Workspace-local configuration for CodexLens. + + Stores index data in project/.codexlens/ directory. + """ + + workspace_root: Path + + def __post_init__(self) -> None: + self.workspace_root = Path(self.workspace_root).resolve() + + @property + def codexlens_dir(self) -> Path: + """The .codexlens directory in workspace root.""" + return self.workspace_root / WORKSPACE_DIR_NAME + + @property + def db_path(self) -> Path: + """SQLite index path for this workspace.""" + return self.codexlens_dir / "index.db" + + @property + def cache_dir(self) -> Path: + """Cache directory for this workspace.""" + return self.codexlens_dir / "cache" + + @property + def env_path(self) -> Path: + """Path to workspace .env file.""" + return self.codexlens_dir / ".env" + + def load_env(self, *, override: bool = False) -> int: + """Load .env file and apply to os.environ. + + Args: + override: If True, override existing environment variables + + Returns: + Number of variables applied + """ + from .env_config import apply_workspace_env + return apply_workspace_env(self.workspace_root, override=override) + + def get_api_config(self, prefix: str) -> dict: + """Get API configuration from environment. + + Args: + prefix: Environment variable prefix (e.g., "RERANKER", "EMBEDDING") + + Returns: + Dictionary with api_key, api_base, model, etc. + """ + from .env_config import get_api_config + return get_api_config(prefix, workspace_root=self.workspace_root) + + def initialize(self) -> None: + """Create the .codexlens directory structure.""" + try: + self.codexlens_dir.mkdir(parents=True, exist_ok=True) + self.cache_dir.mkdir(parents=True, exist_ok=True) + + # Create .gitignore to exclude cache but keep index + gitignore_path = self.codexlens_dir / ".gitignore" + if not gitignore_path.exists(): + gitignore_path.write_text( + "# CodexLens workspace data\n" + "cache/\n" + "*.log\n" + ".env\n" # Exclude .env from git + ) + except Exception as exc: + raise ConfigError(f"Failed to initialize workspace at {self.codexlens_dir}: {exc}") from exc + + def exists(self) -> bool: + """Check if workspace is already initialized.""" + return self.codexlens_dir.is_dir() and self.db_path.exists() + + @classmethod + def from_path(cls, path: Path) -> Optional["WorkspaceConfig"]: + """Create WorkspaceConfig from a path by finding workspace root. + + Returns None if no workspace found. + """ + root = find_workspace_root(path) + if root is None: + return None + return cls(workspace_root=root) + + @classmethod + def create_at(cls, path: Path) -> "WorkspaceConfig": + """Create a new workspace at the given path.""" + config = cls(workspace_root=path) + config.initialize() + return config