chore: bump version to 6.3.1

🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
feat: 将 Code Index MCP 提供者的按钮更改为下拉选择框，优化用户界面
2026-02-07 02:04:11 +08:00 · 2025-12-25 20:19:30 +08:00 · 2025-12-25 20:15:15 +08:00 · 2025-12-25 20:12:45 +08:00 · 2025-12-25 19:58:42 +08:00 · 2025-12-25 18:29:57 +08:00
108 changed files with 20547 additions and 4435 deletions
--- a/.claude/CLAUDE.md
+++ b/.claude/CLAUDE.md
@@ -2,9 +2,24 @@

 - **CLI Tools Usage**: @~/.claude/workflows/cli-tools-usage.md
 - **Coding Philosophy**: @~/.claude/workflows/coding-philosophy.md
- **Context Requirements**: @~/.claude/workflows/context-tools.md
+- **Context Requirements**: @~/.claude/workflows/context-tools-ace.md
 - **File Modification**: @~/.claude/workflows/file-modification.md
+- **CLI Endpoints Config**: @.claude/cli-tools.json
+
+## CLI Endpoints
+
+**Strictly follow the @.claude/cli-tools.json configuration**
+
+Available CLI endpoints are dynamically defined by the config file:
+- Built-in tools and their enable/disable status
+- Custom API endpoints registered via the Dashboard
+- Managed through the CCW Dashboard Status page

 ## Agent Execution

- **Always use `run_in_background = false`** for Task tool agent calls to ensure synchronous execution and immediate result visibility
+- **Always use `run_in_background: false`** for Task tool agent calls: `Task({ subagent_type: "xxx", prompt: "...", run_in_background: false })` to ensure synchronous execution and immediate result visibility
+- **TaskOutput usage**: Only use `TaskOutput({ task_id: "xxx", block: false })` + sleep loop to poll completion status. NEVER read intermediate output during agent/CLI execution - wait for final result only
+
+## Code Diagnostics
+
+- **Prefer `mcp__ide__getDiagnostics`** for code error checking over shell-based TypeScript compilation
--- a/.claude/cli-tools.json
+++ b/.claude/cli-tools.json
@@ -0,0 +1,47 @@
+{
+  "version": "1.0.0",
+  "tools": {
+    "gemini": {
+      "enabled": true,
+      "isBuiltin": true,
+      "command": "gemini",
+      "description": "Google AI for code analysis"
+    },
+    "qwen": {
+      "enabled": true,
+      "isBuiltin": true,
+      "command": "qwen",
+      "description": "Alibaba AI assistant"
+    },
+    "codex": {
+      "enabled": true,
+      "isBuiltin": true,
+      "command": "codex",
+      "description": "OpenAI code generation"
+    },
+    "claude": {
+      "enabled": true,
+      "isBuiltin": true,
+      "command": "claude",
+      "description": "Anthropic AI assistant"
+    }
+  },
+  "customEndpoints": [],
+  "defaultTool": "gemini",
+  "settings": {
+    "promptFormat": "plain",
+    "smartContext": {
+      "enabled": false,
+      "maxFiles": 10
+    },
+    "nativeResume": true,
+    "recursiveQuery": true,
+    "cache": {
+      "injectionMode": "auto",
+      "defaultPrefix": "",
+      "defaultSuffix": ""
+    },
+    "codeIndexMcp": "ace"
+  },
+  "$schema": "./cli-tools.schema.json"
+}
--- a/.claude/commands/workflow/lite-plan.md
+++ b/.claude/commands/workflow/lite-plan.md
@@ -140,11 +140,17 @@ function selectAngles(taskDescription, count) {

 const selectedAngles = selectAngles(task_description, complexity === 'High' ? 4 : (complexity === 'Medium' ? 3 : 1))

+// Planning strategy determination
+const planningStrategy = complexity === 'Low'
+  ? 'Direct Claude Planning'
+  : 'cli-lite-planning-agent'
+
 console.log(`
 ## Exploration Plan

 Task Complexity: ${complexity}
 Selected Angles: ${selectedAngles.join(', ')}
+Planning Strategy: ${planningStrategy}

 Launching ${selectedAngles.length} parallel explorations...
 `)
@@ -358,10 +364,7 @@ if (dedupedClarifications.length > 0) {
 ```javascript
 // 分配规则（优先级从高到低）：
 // 1. 用户明确指定："用 gemini 分析..." → gemini, "codex 实现..." → codex
-// 2. 任务类型推断：
-//    - 分析|审查|评估|探索 → gemini
-//    - 实现|创建|修改|修复 → codex (复杂) 或 agent (简单)
-// 3. 默认 → agent
+// 2. 默认 → agent

 const executorAssignments = {}  // { taskId: { executor: 'gemini'|'codex'|'agent', reason: string } }
 plan.tasks.forEach(task => {
--- a/.claude/workflows/cli-templates/protocols/analysis-protocol.md
+++ b/.claude/workflows/cli-templates/protocols/analysis-protocol.md
@@ -1,10 +1,17 @@
 # Analysis Mode Protocol

 ## Mode Definition
-
 **Mode**: `analysis` (READ-ONLY)
-**Tools**: Gemini, Qwen (default mode)
+## Prompt Structure

+```
+PURPOSE: [development goal]
+TASK: [specific implementation task]
+MODE: [auto|write]
+CONTEXT: [file patterns]
+EXPECTED: [deliverables]
+RULES: [templates | additional constraints]
+```
 ## Operation Boundaries

 ### ALLOWED Operations
@@ -27,8 +34,8 @@
 2. **Read** and analyze CONTEXT files thoroughly
 3. **Identify** patterns, issues, and dependencies
 4. **Generate** insights and recommendations
-5. **Output** structured analysis (text response only)
-6. **Validate** EXPECTED deliverables met
+5. **Validate** EXPECTED deliverables met
+6. **Output** structured analysis (text response only)

 ## Core Requirements

--- a/.claude/workflows/cli-templates/protocols/write-protocol.md
+++ b/.claude/workflows/cli-templates/protocols/write-protocol.md
@@ -1,10 +1,14 @@
 # Write Mode Protocol
+## Prompt Structure

-## Mode Definition
-
-**Mode**: `write` (FILE OPERATIONS) / `auto` (FULL OPERATIONS)
-**Tools**: Codex (auto), Gemini/Qwen (write)
-
+```
+PURPOSE: [development goal]
+TASK: [specific implementation task]
+MODE: [auto|write]
+CONTEXT: [file patterns]
+EXPECTED: [deliverables]
+RULES: [templates | additional constraints]
+```
 ## Operation Boundaries

 ### MODE: write
@@ -15,12 +19,6 @@

 **Restrictions**: Follow project conventions, cannot break existing functionality

-### MODE: auto (Codex only)
- All `write` mode operations
- Run tests and builds
- Commit code incrementally
- Full autonomous development
-
 **Constraint**: Must test every change

 ## Execution Flow
@@ -33,16 +31,6 @@
 5. **Validate** changes
 6. **Report** file changes

-### MODE: auto
-1. **Parse** all 6 fields
-2. **Analyze** CONTEXT files - find 3+ similar patterns
-3. **Plan** implementation following RULES
-4. **Generate** code with tests
-5. **Run** tests continuously
-6. **Commit** working code incrementally
-7. **Validate** EXPECTED deliverables
-8. **Report** results
-
 ## Core Requirements

 **ALWAYS**:
@@ -61,17 +49,6 @@
 - Break backward compatibility
 - Exceed 3 failed attempts without stopping

-## Multi-Task Execution (Resume)
-
-**First subtask**: Standard execution flow
-**Subsequent subtasks** (via `resume`):
- Recall context from previous subtasks
- Build on previous work
- Maintain consistency
- Test integration
- Report context for next subtask
-
-## Error Handling

 **Three-Attempt Rule**: On 3rd failure, stop and report what attempted, what failed, root cause

@@ -92,7 +69,7 @@

 **If template has no format** → Use default format below

-### Single Task Implementation
+### Task Implementation

 ```markdown
 # Implementation: [TASK Title]
@@ -124,48 +101,6 @@
 [Recommendations if any]
 ```

-### Multi-Task (First Subtask)
-
-```markdown
-# Subtask 1/N: [TASK Title]
-
-## Changes
-[List of file changes]
-
-## Implementation
-[Details with code references]
-
-## Testing
-✅ Tests: X passing
-
-## Context for Next Subtask
- Key decisions: [established patterns]
- Files created: [paths and purposes]
- Integration points: [where next subtask should connect]
-```
-
-### Multi-Task (Subsequent Subtasks)
-
-```markdown
-# Subtask N/M: [TASK Title]
-
-## Changes
-[List of file changes]
-
-## Integration Notes
-✅ Compatible with previous subtask
-✅ Maintains established patterns
-
-## Implementation
-[Details with code references]
-
-## Testing
-✅ Tests: X passing
-
-## Context for Next Subtask
-[If not final, provide context]
-```
-
 ### Partial Completion

 ```markdown
--- a/.claude/workflows/cli-tools-usage.md
+++ b/.claude/workflows/cli-tools-usage.md
@@ -362,10 +362,6 @@ ccw cli -p "RULES: \$(cat ~/.claude/workflows/cli-templates/protocols/analysis-p
  - Description: Additional directories (comma-separated)
  - Default: none

- **`--timeout <ms>`**
-  - Description: Timeout in milliseconds
-  - Default: 300000
-
 - **`--resume [id]`**
  - Description: Resume previous session
  - Default: -
@@ -430,7 +426,7 @@ MODE: analysis
 CONTEXT: @src/auth/**/* @src/middleware/auth.ts | Memory: Using bcrypt for passwords, JWT for sessions
 EXPECTED: Security report with: severity matrix, file:line references, CVE mappings where applicable, remediation code snippets prioritized by risk
 RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) $(cat ~/.claude/workflows/cli-templates/prompts/analysis/03-assess-security-risks.txt) | Focus on authentication | Ignore test files
-" --tool gemini --cd src/auth --timeout 600000
+" --tool gemini --mode analysis --cd src/auth
 ```

 **Implementation Task** (New Feature):
@@ -442,7 +438,7 @@ MODE: write
 CONTEXT: @src/middleware/**/* @src/config/**/* | Memory: Using Express.js, Redis already configured, existing middleware pattern in auth.ts
 EXPECTED: Production-ready code with: TypeScript types, unit tests, integration test, configuration example, migration guide
 RULES: $(cat ~/.claude/workflows/cli-templates/protocols/write-protocol.md) $(cat ~/.claude/workflows/cli-templates/prompts/development/02-implement-feature.txt) | Follow existing middleware patterns | No breaking changes
-" --tool codex --mode write --timeout 1800000
+" --tool codex --mode write
 ```

 **Bug Fix Task**:
@@ -454,7 +450,7 @@ MODE: analysis
 CONTEXT: @src/websocket/**/* @src/services/connection-manager.ts | Memory: Using ws library, ~5000 concurrent connections in production
 EXPECTED: Root cause analysis with: memory profile, leak source (file:line), fix recommendation with code, verification steps
 RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) $(cat ~/.claude/workflows/cli-templates/prompts/analysis/01-diagnose-bug-root-cause.txt) | Focus on resource cleanup
-" --tool gemini --cd src --timeout 900000
+" --tool gemini --mode analysis --cd src
 ```

 **Refactoring Task**:
@@ -466,30 +462,25 @@ MODE: write
 CONTEXT: @src/payments/**/* @src/types/payment.ts | Memory: Currently only Stripe, adding PayPal next sprint, must support future gateways
 EXPECTED: Refactored code with: strategy interface, concrete implementations, factory class, updated tests, migration checklist
 RULES: $(cat ~/.claude/workflows/cli-templates/protocols/write-protocol.md) $(cat ~/.claude/workflows/cli-templates/prompts/development/02-refactor-codebase.txt) | Preserve all existing behavior | Tests must pass
-" --tool gemini --mode write --timeout 1200000
+" --tool gemini --mode write
 ```
 ---

-## Configuration
+## ⚙️ Execution Configuration

-### Timeout Allocation
+### Dynamic Timeout Allocation

-**Minimum**: 5 minutes (300000ms)
+**Minimum timeout: 5 minutes (300000ms)** - Never set below this threshold.

- **Simple**: 5-10min (300000-600000ms)
-  - Examples: Analysis, search
+**Timeout Ranges**:
+- **Simple** (analysis, search): 5-10min (300000-600000ms)
+- **Medium** (refactoring, documentation): 10-20min (600000-1200000ms)
+- **Complex** (implementation, migration): 20-60min (1200000-3600000ms)
+- **Heavy** (large codebase, multi-file): 60-120min (3600000-7200000ms)

- **Medium**: 10-20min (600000-1200000ms)
-  - Examples: Refactoring, documentation
-
- **Complex**: 20-60min (1200000-3600000ms)
-  - Examples: Implementation, migration
-
- **Heavy**: 60-120min (3600000-7200000ms)
-  - Examples: Large codebase, multi-file
-
-**Codex Multiplier**: 3x allocated time (minimum 15min / 900000ms)
+**Codex Multiplier**: 3x of allocated time (minimum 15min / 900000ms)

+**Auto-detection**: Analyze PURPOSE and TASK fields to determine timeout

 ### Permission Framework

@@ -523,4 +514,3 @@ RULES: $(cat ~/.claude/workflows/cli-templates/protocols/write-protocol.md) $(ca
 - [ ] **Tool selected** - `--tool gemini|qwen|codex`
 - [ ] **Template applied (REQUIRED)** - Use specific or universal fallback template
 - [ ] **Constraints specified** - Scope, requirements
- [ ] **Timeout configured** - Based on complexity
--- a/.claude/workflows/context-tools-ace.md
+++ b/.claude/workflows/context-tools-ace.md
@@ -0,0 +1,105 @@
+## MCP Tools Usage
+
+### search_context (ACE) - Code Search (REQUIRED - HIGHEST PRIORITY)
+
+**OVERRIDES**: All other search/discovery rules in other workflow files
+
+**When**: ANY code discovery task, including:
+- Find code, understand codebase structure, locate implementations
+- Explore unknown locations
+- Verify file existence before reading
+- Pattern-based file discovery
+- Semantic code understanding
+
+**Priority Rule**:
+1. **Always use mcp__ace-tool__search_context FIRST** for any code/file discovery
+2. Only use Built-in Grep for single-file exact line search (after location confirmed)
+3. Only use Built-in Read for known, confirmed file paths
+
+**How**:
+```javascript
+// Natural language code search - best for understanding and exploration
+mcp__ace-tool__search_context({
+  project_root_path: "/path/to/project",
+  query: "authentication logic"
+})
+
+// With keywords for better semantic matching
+mcp__ace-tool__search_context({
+  project_root_path: "/path/to/project",
+  query: "I want to find where the server handles user login. Keywords: auth, login, session"
+})
+```
+
+**Good Query Examples**:
+- "Where is the function that handles user authentication?"
+- "What tests are there for the login functionality?"
+- "How is the database connected to the application?"
+- "I want to find where the server handles chunk merging. Keywords: upload chunk merge"
+- "Locate where the system refreshes cached data. Keywords: cache refresh, invalidation"
+
+**Bad Query Examples** (use grep or file view instead):
+- "Find definition of constructor of class Foo" (use grep tool instead)
+- "Find all references to function bar" (use grep tool instead)
+- "Show me how Checkout class is used in services/payment.py" (use file view tool instead)
+
+**Key Features**:
+- Real-time index of the codebase (always up-to-date)
+- Cross-language retrieval support
+- Semantic search with embeddings
+- No manual index initialization required
+
+---
+
+### read_file - Read File Contents
+
+**When**: Read files found by search_context
+
+**How**:
+```javascript
+read_file(path="/path/to/file.ts")                   // Single file
+read_file(path="/src/**/*.config.ts")                // Pattern matching
+```
+
+---
+
+### edit_file - Modify Files
+
+**When**: Built-in Edit tool fails or need advanced features
+
+**How**:
+```javascript
+edit_file(path="/file.ts", old_string="...", new_string="...", mode="update")
+edit_file(path="/file.ts", line=10, content="...", mode="insert_after")
+```
+
+**Modes**: `update` (replace text), `insert_after`, `insert_before`, `delete_line`
+
+---
+
+### write_file - Create/Overwrite Files
+
+**When**: Create new files or completely replace content
+
+**How**:
+```javascript
+write_file(path="/new-file.ts", content="...")
+```
+
+---
+
+### Exa - External Search
+
+**When**: Find documentation/examples outside codebase
+
+**How**:
+```javascript
+mcp__exa__search(query="React hooks 2025 documentation")
+mcp__exa__search(query="FastAPI auth example", numResults=10)
+mcp__exa__search(query="latest API docs", livecrawl="always")
+```
+
+**Parameters**:
+- `query` (required): Search query string
+- `numResults` (optional): Number of results to return (default: 5)
+- `livecrawl` (optional): `"always"` or `"fallback"` for live crawling
--- a/.codex/AGENTS.md
+++ b/.codex/AGENTS.md
@@ -21,8 +21,11 @@
 - Graceful degradation
 - Don't expose sensitive info

+
+
 ## Core Principles

+
 **Incremental Progress**:
 - Small, testable changes
 - Commit working code frequently
@@ -43,11 +46,58 @@
 - Maintain established patterns
 - Test integration between subtasks

+
+## System Optimization
+
+**Direct Binary Calls**: Always call binaries directly in `functions.shell`, set `workdir`, avoid shell wrappers (`bash -lc`, `cmd /c`, etc.)
+
+**Text Editing Priority**:
+1. Use `apply_patch` tool for all routine text edits
+2. Fall back to `sed` for single-line substitutions if unavailable
+3. Avoid Python editing scripts unless both fail
+
+**apply_patch invocation**:
+```json
+{
+  "command": ["apply_patch", "*** Begin Patch\n*** Update File: path/to/file\n@@\n- old\n+ new\n*** End Patch\n"],
+  "workdir": "<workdir>",
+  "justification": "Brief reason"
+}
+```
+
+**Windows UTF-8 Encoding** (before commands):
+```powershell
+[Console]::InputEncoding  = [Text.UTF8Encoding]::new($false)
+[Console]::OutputEncoding = [Text.UTF8Encoding]::new($false)
+chcp 65001 > $null
+```
+
+## Context Acquisition (MCP Tools Priority)
+
+**For task context gathering and analysis, ALWAYS prefer MCP tools**:
+
+1. **smart_search** - First choice for code discovery
+   - Use `smart_search(query="...")` for semantic/keyword search
+   - Use `smart_search(action="find_files", pattern="*.ts")` for file discovery
+   - Supports modes: `auto`, `hybrid`, `exact`, `ripgrep`
+
+2. **read_file** - Batch file reading
+   - Read multiple files in parallel: `read_file(path="file1.ts")`, `read_file(path="file2.ts")`
+   - Supports glob patterns: `read_file(path="src/**/*.config.ts")`
+
+**Priority Order**:
+```
+smart_search (discovery) → read_file (batch read) → shell commands (fallback)
+```
+
+**NEVER** use shell commands (`cat`, `find`, `grep`) when MCP tools are available.
+
 ## Execution Checklist

 **Before**:
 - [ ] Understand PURPOSE and TASK clearly
- [ ] Review CONTEXT files, find 3+ patterns
+- [ ] Use smart_search to discover relevant files
+- [ ] Use read_file to batch read context files, find 3+ patterns
 - [ ] Check RULES templates and constraints

 **During**:
--- a/.codex/prompts.zip
+++ b/.codex/prompts.zip
--- a/.codex/prompts/compact.md
+++ b/.codex/prompts/compact.md
@@ -0,0 +1,378 @@
+---
+description: Compact current session memory into structured text for session recovery
+argument-hint: "[optional: session description]"
+---
+
+# Memory Compact Command (/memory:compact)
+
+## 1. Overview
+
+The `memory:compact` command **compresses current session working memory** into structured text optimized for **session recovery**, extracts critical information, and saves it to persistent storage via MCP `core_memory` tool.
+
+**Core Philosophy**:
+- **Session Recovery First**: Capture everything needed to resume work seamlessly
+- **Minimize Re-exploration**: Include file paths, decisions, and state to avoid redundant analysis
+- **Preserve Train of Thought**: Keep notes and hypotheses for complex debugging
+- **Actionable State**: Record last action result and known issues
+
+## 2. Parameters
+
+- `"session description"` (Optional): Session description to supplement objective
+  - Example: "completed core-memory module"
+  - Example: "debugging JWT refresh - suspected memory leak"
+
+## 3. Structured Output Format
+
+```markdown
+## Session ID
+[WFS-ID if workflow session active, otherwise (none)]
+
+## Project Root
+[Absolute path to project root, e.g., D:\Claude_dms3]
+
+## Objective
+[High-level goal - the "North Star" of this session]
+
+## Execution Plan
+[CRITICAL: Embed the LATEST plan in its COMPLETE and DETAILED form]
+
+### Source: [workflow | todo | user-stated | inferred]
+
+<details>
+<summary>Full Execution Plan (Click to expand)</summary>
+
+[PRESERVE COMPLETE PLAN VERBATIM - DO NOT SUMMARIZE]
+- ALL phases, tasks, subtasks
+- ALL file paths (absolute)
+- ALL dependencies and prerequisites
+- ALL acceptance criteria
+- ALL status markers ([x] done, [ ] pending)
+- ALL notes and context
+
+Example:
+## Phase 1: Setup
+- [x] Initialize project structure
+  - Created D:\Claude_dms3\src\core\index.ts
+  - Added dependencies: lodash, zod
+- [ ] Configure TypeScript
+  - Update tsconfig.json for strict mode
+
+## Phase 2: Implementation
+- [ ] Implement core API
+  - Target: D:\Claude_dms3\src\api\handler.ts
+  - Dependencies: Phase 1 complete
+  - Acceptance: All tests pass
+
+</details>
+
+## Working Files (Modified)
+[Absolute paths to actively modified files]
+- D:\Claude_dms3\src\file1.ts (role: main implementation)
+- D:\Claude_dms3\tests\file1.test.ts (role: unit tests)
+
+## Reference Files (Read-Only)
+[Absolute paths to context files - NOT modified but essential for understanding]
+- D:\Claude_dms3\.claude\CLAUDE.md (role: project instructions)
+- D:\Claude_dms3\src\types\index.ts (role: type definitions)
+- D:\Claude_dms3\package.json (role: dependencies)
+
+## Last Action
+[Last significant action and its result/status]
+
+## Decisions
+- [Decision]: [Reasoning]
+- [Decision]: [Reasoning]
+
+## Constraints
+- [User-specified limitation or preference]
+
+## Dependencies
+- [Added/changed packages or environment requirements]
+
+## Known Issues
+- [Deferred bug or edge case]
+
+## Changes Made
+- [Completed modification]
+
+## Pending
+- [Next step] or (none)
+
+## Notes
+[Unstructured thoughts, hypotheses, debugging trails]
+```
+
+## 4. Field Definitions
+
+| Field | Purpose | Recovery Value |
+|-------|---------|----------------|
+| **Session ID** | Workflow session identifier (WFS-*) | Links memory to specific stateful task execution |
+| **Project Root** | Absolute path to project directory | Enables correct path resolution in new sessions |
+| **Objective** | Ultimate goal of the session | Prevents losing track of broader feature |
+| **Execution Plan** | Complete plan from any source (verbatim) | Preserves full planning context, avoids re-planning |
+| **Working Files** | Actively modified files (absolute paths) | Immediately identifies where work was happening |
+| **Reference Files** | Read-only context files (absolute paths) | Eliminates re-exploration for critical context |
+| **Last Action** | Final tool output/status | Immediate state awareness (success/failure) |
+| **Decisions** | Architectural choices + reasoning | Prevents re-litigating settled decisions |
+| **Constraints** | User-imposed limitations | Maintains personalized coding style |
+| **Dependencies** | Package/environment changes | Prevents missing dependency errors |
+| **Known Issues** | Deferred bugs/edge cases | Ensures issues aren't forgotten |
+| **Changes Made** | Completed modifications | Clear record of what was done |
+| **Pending** | Next steps | Immediate action items |
+| **Notes** | Hypotheses, debugging trails | Preserves "train of thought" |
+
+## 5. Execution Flow
+
+### Step 1: Analyze Current Session
+
+Extract the following from conversation history:
+
+```javascript
+const sessionAnalysis = {
+  sessionId: "",       // WFS-* if workflow session active, null otherwise
+  projectRoot: "",     // Absolute path: D:\Claude_dms3
+  objective: "",       // High-level goal (1-2 sentences)
+  executionPlan: {
+    source: "workflow" | "todo" | "user-stated" | "inferred",
+    content: ""        // Full plan content - ALWAYS preserve COMPLETE and DETAILED form
+  },
+  workingFiles: [],    // {absolutePath, role} - modified files
+  referenceFiles: [],  // {absolutePath, role} - read-only context files
+  lastAction: "",      // Last significant action + result
+  decisions: [],       // {decision, reasoning}
+  constraints: [],     // User-specified limitations
+  dependencies: [],    // Added/changed packages
+  knownIssues: [],     // Deferred bugs
+  changesMade: [],     // Completed modifications
+  pending: [],         // Next steps
+  notes: ""            // Unstructured thoughts
+}
+```
+
+### Step 2: Generate Structured Text
+
+```javascript
+// Helper: Generate execution plan section
+const generateExecutionPlan = (plan) => {
+  const sourceLabels = {
+    'workflow': 'workflow (IMPL_PLAN.md)',
+    'todo': 'todo (TodoWrite)',
+    'user-stated': 'user-stated',
+    'inferred': 'inferred'
+  };
+
+  // CRITICAL: Preserve complete plan content verbatim - DO NOT summarize
+  return `### Source: ${sourceLabels[plan.source] || plan.source}
+
+<details>
+<summary>Full Execution Plan (Click to expand)</summary>
+
+${plan.content}
+
+</details>`;
+};
+
+const structuredText = `## Session ID
+${sessionAnalysis.sessionId || '(none)'}
+
+## Project Root
+${sessionAnalysis.projectRoot}
+
+## Objective
+${sessionAnalysis.objective}
+
+## Execution Plan
+${generateExecutionPlan(sessionAnalysis.executionPlan)}
+
+## Working Files (Modified)
+${sessionAnalysis.workingFiles.map(f => `- ${f.absolutePath} (role: ${f.role})`).join('\n') || '(none)'}
+
+## Reference Files (Read-Only)
+${sessionAnalysis.referenceFiles.map(f => `- ${f.absolutePath} (role: ${f.role})`).join('\n') || '(none)'}
+
+## Last Action
+${sessionAnalysis.lastAction}
+
+## Decisions
+${sessionAnalysis.decisions.map(d => `- ${d.decision}: ${d.reasoning}`).join('\n') || '(none)'}
+
+## Constraints
+${sessionAnalysis.constraints.map(c => `- ${c}`).join('\n') || '(none)'}
+
+## Dependencies
+${sessionAnalysis.dependencies.map(d => `- ${d}`).join('\n') || '(none)'}
+
+## Known Issues
+${sessionAnalysis.knownIssues.map(i => `- ${i}`).join('\n') || '(none)'}
+
+## Changes Made
+${sessionAnalysis.changesMade.map(c => `- ${c}`).join('\n') || '(none)'}
+
+## Pending
+${sessionAnalysis.pending.length > 0
+  ? sessionAnalysis.pending.map(p => `- ${p}`).join('\n')
+  : '(none)'}
+
+## Notes
+${sessionAnalysis.notes || '(none)'}`
+```
+
+### Step 3: Import to Core Memory via MCP
+
+Use the MCP `core_memory` tool to save the structured text:
+
+```javascript
+mcp__ccw-tools__core_memory({
+  operation: "import",
+  text: structuredText
+})
+```
+
+Or via CLI (pipe structured text to import):
+
+```bash
+# Write structured text to temp file, then import
+echo "$structuredText" | ccw core-memory import
+
+# Or from a file
+ccw core-memory import --file /path/to/session-memory.md
+```
+
+**Response Format**:
+```json
+{
+  "operation": "import",
+  "id": "CMEM-YYYYMMDD-HHMMSS",
+  "message": "Created memory: CMEM-YYYYMMDD-HHMMSS"
+}
+```
+
+### Step 4: Report Recovery ID
+
+After successful import, **clearly display the Recovery ID** to the user:
+
+```
+╔════════════════════════════════════════════════════════════════════════════╗
+║  ✓ Session Memory Saved                                                    ║
+║                                                                            ║
+║  Recovery ID: CMEM-YYYYMMDD-HHMMSS                                         ║
+║                                                                            ║
+║  To restore: "Please import memory <ID>"                                   ║
+║  (MCP: core_memory export | CLI: ccw core-memory export --id <ID>)         ║
+╚════════════════════════════════════════════════════════════════════════════╝
+```
+
+## 6. Quality Checklist
+
+Before generating:
+- [ ] Session ID captured if workflow session active (WFS-*)
+- [ ] Project Root is absolute path (e.g., D:\Claude_dms3)
+- [ ] Objective clearly states the "North Star" goal
+- [ ] Execution Plan: COMPLETE plan preserved VERBATIM (no summarization)
+- [ ] Plan Source: Clearly identified (workflow | todo | user-stated | inferred)
+- [ ] Plan Details: ALL phases, tasks, file paths, dependencies, status markers included
+- [ ] All file paths are ABSOLUTE (not relative)
+- [ ] Working Files: 3-8 modified files with roles
+- [ ] Reference Files: Key context files (CLAUDE.md, types, configs)
+- [ ] Last Action captures final state (success/failure)
+- [ ] Decisions include reasoning, not just choices
+- [ ] Known Issues separates deferred from forgotten bugs
+- [ ] Notes preserve debugging hypotheses if any
+
+## 7. Path Resolution Rules
+
+### Project Root Detection
+1. Check current working directory from environment
+2. Look for project markers: `.git/`, `package.json`, `.claude/`
+3. Use the topmost directory containing these markers
+
+### Absolute Path Conversion
+```javascript
+// Convert relative to absolute
+const toAbsolutePath = (relativePath, projectRoot) => {
+  if (path.isAbsolute(relativePath)) return relativePath;
+  return path.join(projectRoot, relativePath);
+};
+
+// Example: "src/api/auth.ts" → "D:\Claude_dms3\src\api\auth.ts"
+```
+
+### Reference File Categories
+| Category | Examples | Priority |
+|----------|----------|----------|
+| Project Config | `.claude/CLAUDE.md`, `package.json`, `tsconfig.json` | High |
+| Type Definitions | `src/types/*.ts`, `*.d.ts` | High |
+| Related Modules | Parent/sibling modules with shared interfaces | Medium |
+| Test Files | Corresponding test files for modified code | Medium |
+| Documentation | `README.md`, `ARCHITECTURE.md` | Low |
+
+## 8. Plan Detection (Priority Order)
+
+### Priority 1: Workflow Session (IMPL_PLAN.md)
+```javascript
+// Check for active workflow session
+const manifest = await mcp__ccw-tools__session_manager({
+  operation: "list",
+  location: "active"
+});
+
+if (manifest.sessions?.length > 0) {
+  const session = manifest.sessions[0];
+  const plan = await mcp__ccw-tools__session_manager({
+    operation: "read",
+    session_id: session.id,
+    content_type: "plan"
+  });
+  sessionAnalysis.sessionId = session.id;
+  sessionAnalysis.executionPlan.source = "workflow";
+  sessionAnalysis.executionPlan.content = plan.content;
+}
+```
+
+### Priority 2: TodoWrite (Current Session Todos)
+```javascript
+// Extract from conversation - look for TodoWrite tool calls
+// Preserve COMPLETE todo list with all details
+const todos = extractTodosFromConversation();
+if (todos.length > 0) {
+  sessionAnalysis.executionPlan.source = "todo";
+  // Format todos with full context - preserve status markers
+  sessionAnalysis.executionPlan.content = todos.map(t =>
+    `- [${t.status === 'completed' ? 'x' : t.status === 'in_progress' ? '>' : ' '}] ${t.content}`
+  ).join('\n');
+}
+```
+
+### Priority 3: User-Stated Plan
+```javascript
+// Look for explicit plan statements in user messages:
+// - "Here's my plan: 1. ... 2. ... 3. ..."
+// - "I want to: first..., then..., finally..."
+// - Numbered or bulleted lists describing steps
+const userPlan = extractUserStatedPlan();
+if (userPlan) {
+  sessionAnalysis.executionPlan.source = "user-stated";
+  sessionAnalysis.executionPlan.content = userPlan;
+}
+```
+
+### Priority 4: Inferred Plan
+```javascript
+// If no explicit plan, infer from:
+// - Task description and breakdown discussion
+// - Sequence of actions taken
+// - Outstanding work mentioned
+const inferredPlan = inferPlanFromDiscussion();
+if (inferredPlan) {
+  sessionAnalysis.executionPlan.source = "inferred";
+  sessionAnalysis.executionPlan.content = inferredPlan;
+}
+```
+
+## 9. Notes
+
+- **Timing**: Execute at task completion or before context switch
+- **Frequency**: Once per independent task or milestone
+- **Recovery**: New session can immediately continue with full context
+- **Knowledge Graph**: Entity relationships auto-extracted for visualization
+- **Absolute Paths**: Critical for cross-session recovery on different machines
--- a/.gemini/GEMINI.md
+++ b/.gemini/GEMINI.md
@@ -1,25 +1,62 @@
-# Gemini Code Guidelines
+
+## Code Quality Standards
+
+### Code Quality
+- Follow project's existing patterns
+- Match import style and naming conventions
+- Single responsibility per function/class
+- DRY (Don't Repeat Yourself)
+- YAGNI (You Aren't Gonna Need It)
+
+### Testing
+- Test all public functions
+- Test edge cases and error conditions
+- Mock external dependencies
+- Target 80%+ coverage
+
+### Error Handling
+- Proper try-catch blocks
+- Clear error messages
+- Graceful degradation
+- Don't expose sensitive info

 ## Core Principles

-**Thoroughness**:
- Analyze ALL CONTEXT files completely
- Check cross-file patterns and dependencies
- Identify edge cases and quantify metrics
+**Incremental Progress**:
+- Small, testable changes
+- Commit working code frequently
+- Build on previous work (subtasks)

 **Evidence-Based**:
- Quote relevant code with `file:line` references
- Link related patterns across files
- Support all claims with concrete examples
+- Study 3+ similar patterns before implementing
+- Match project style exactly
+- Verify with existing code

-**Actionable**:
- Clear, specific recommendations (not vague)
- Prioritized by impact
- Incremental changes over big rewrites
+**Pragmatic**:
+- Boring solutions over clever code
+- Simple over complex
+- Adapt to project reality

-**Philosophy**:
- **Simple over complex** - Avoid over-engineering
- **Clear over clever** - Prefer obvious solutions
- **Learn from existing** - Reference project patterns
- **Pragmatic over dogmatic** - Adapt to project reality
- **Incremental progress** - Small, testable changes
+**Context Continuity** (Multi-Task):
+- Leverage resume for consistency
+- Maintain established patterns
+- Test integration between subtasks
+
+## Execution Checklist
+
+**Before**:
+- [ ] Understand PURPOSE and TASK clearly
+- [ ] Review CONTEXT files, find 3+ patterns
+- [ ] Check RULES templates and constraints
+
+**During**:
+- [ ] Follow existing patterns exactly
+- [ ] Write tests alongside code
+- [ ] Run tests after every change
+- [ ] Commit working code incrementally
+
+**After**:
+- [ ] All tests pass
+- [ ] Coverage meets target
+- [ ] Build succeeds
+- [ ] All EXPECTED deliverables met
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ COMMAND_TEMPLATE_ORCHESTRATOR.md
 settings.json
 *.mcp.json
 .mcp.json
+.ace-tool/
--- a/.mcp.json
+++ b/.mcp.json
@@ -1,19 +0,0 @@
-{
-  "mcpServers": {
-    "chrome-devtools": {
-      "type": "stdio",
-      "command": "npx",
-      "args": [
-        "chrome-devtools-mcp@latest"
-      ],
-      "env": {}
-    },
-    "ccw-tools": {
-      "command": "ccw-mcp",
-      "args": [],
-      "env": {
-        "CCW_ENABLED_TOOLS": "write_file,edit_file,smart_search,core_memory"
-      }
-    }
-  }
-}
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,331 @@
+# Codex Agent Execution Protocol
+
+## Overview
+
+**Role**: Autonomous development, implementation, and testing specialist
+
+
+## Prompt Structure
+
+All prompts follow this 6-field format:
+
+```
+PURPOSE: [development goal]
+TASK: [specific implementation task]
+MODE: [auto|write]
+CONTEXT: [file patterns]
+EXPECTED: [deliverables]
+RULES: [templates | additional constraints]
+```
+
+**Subtask indicator**: `Subtask N of M: [title]` or `CONTINUE TO NEXT SUBTASK`
+
+## MODE Definitions
+
+### MODE: auto (default)
+
+**Permissions**:
+- Full file operations (create/modify/delete)
+- Run tests and builds
+- Commit code incrementally
+
+**Execute**:
+1. Parse PURPOSE and TASK
+2. Analyze CONTEXT files - find 3+ similar patterns
+3. Plan implementation following RULES
+4. Generate code with tests
+5. Run tests continuously
+6. Commit working code incrementally
+7. Validate EXPECTED deliverables
+8. Report results (with context for next subtask if multi-task)
+
+**Constraint**: Must test every change
+
+### MODE: write
+
+**Permissions**:
+- Focused file operations
+- Create/modify specific files
+- Run tests for validation
+
+**Execute**:
+1. Analyze CONTEXT files
+2. Make targeted changes
+3. Validate tests pass
+4. Report file changes
+
+## Execution Protocol
+
+### Core Requirements
+
+**ALWAYS**:
+- Parse all 6 fields (PURPOSE, TASK, MODE, CONTEXT, EXPECTED, RULES)
+- Study CONTEXT files - find 3+ similar patterns before implementing
+- Apply RULES (templates + constraints) exactly
+- Test continuously after every change
+- Commit incrementally with working code
+- Match project style and patterns exactly
+- List all created/modified files at output beginning
+- Use direct binary calls (avoid shell wrappers)
+- Prefer apply_patch for text edits
+- Configure Windows UTF-8 encoding for Chinese support
+
+**NEVER**:
+- Make assumptions without code verification
+- Ignore existing patterns
+- Skip tests
+- Use clever tricks over boring solutions
+- Over-engineer solutions
+- Break existing code or backward compatibility
+- Exceed 3 failed attempts without stopping
+
+### RULES Processing
+
+- Parse RULES field to extract template content and constraints
+- Recognize `|` as separator: `template content | additional constraints`
+- Apply ALL template guidelines as mandatory
+- Apply ALL additional constraints as mandatory
+- Treat rule violations as task failures
+
+### Multi-Task Execution (Resume Pattern)
+
+**First subtask**: Standard execution flow above
+**Subsequent subtasks** (via `resume --last`):
+- Recall context from previous subtasks
+- Build on previous work (don't repeat)
+- Maintain consistency with established patterns
+- Focus on current subtask scope only
+- Test integration with previous work
+- Report context for next subtask
+
+## System Optimization
+
+**Direct Binary Calls**: Always call binaries directly in `functions.shell`, set `workdir`, avoid shell wrappers (`bash -lc`, `cmd /c`, etc.)
+
+**Text Editing Priority**:
+1. Use `apply_patch` tool for all routine text edits
+2. Fall back to `sed` for single-line substitutions if unavailable
+3. Avoid Python editing scripts unless both fail
+
+**apply_patch invocation**:
+```json
+{
+  "command": ["apply_patch", "*** Begin Patch\n*** Update File: path/to/file\n@@\n- old\n+ new\n*** End Patch\n"],
+  "workdir": "<workdir>",
+  "justification": "Brief reason"
+}
+```
+
+**Windows UTF-8 Encoding** (before commands):
+```powershell
+[Console]::InputEncoding  = [Text.UTF8Encoding]::new($false)
+[Console]::OutputEncoding = [Text.UTF8Encoding]::new($false)
+chcp 65001 > $null
+```
+
+## Output Standards
+
+### Format Priority
+
+**If template defines output format** → Follow template format EXACTLY (all sections mandatory)
+
+**If template has no format** → Use default format below based on task type
+
+### Default Output Formats
+
+#### Single Task Implementation
+
+```markdown
+# Implementation: [TASK Title]
+
+## Changes
+- Created: `path/to/file1.ext` (X lines)
+- Modified: `path/to/file2.ext` (+Y/-Z lines)
+- Deleted: `path/to/file3.ext`
+
+## Summary
+[2-3 sentence overview of what was implemented]
+
+## Key Decisions
+1. [Decision] - Rationale and reference to similar pattern
+2. [Decision] - path/to/reference:line
+
+## Implementation Details
+[Evidence-based description with code references]
+
+## Testing
+- Tests written: X new tests
+- Tests passing: Y/Z tests
+- Coverage: N%
+
+## Validation
+✅ Tests: X passing
+✅ Coverage: Y%
+✅ Build: Success
+
+## Next Steps
+[Recommendations or future improvements]
+```
+
+#### Multi-Task Execution (with Resume)
+
+**First Subtask**:
+```markdown
+# Subtask 1/N: [TASK Title]
+
+## Changes
+[List of file changes]
+
+## Implementation
+[Details with code references]
+
+## Testing
+✅ Tests: X passing
+✅ Integration: Compatible with existing code
+
+## Context for Next Subtask
+- Key decisions: [established patterns]
+- Files created: [paths and purposes]
+- Integration points: [where next subtask should connect]
+```
+
+**Subsequent Subtasks**:
+```markdown
+# Subtask N/M: [TASK Title]
+
+## Changes
+[List of file changes]
+
+## Integration Notes
+✅ Compatible with subtask N-1
+✅ Maintains established patterns
+✅ Tests pass with previous work
+
+## Implementation
+[Details with code references]
+
+## Testing
+✅ Tests: X passing
+✅ Total coverage: Y%
+
+## Context for Next Subtask
+[If not final subtask, provide context for continuation]
+```
+
+#### Partial Completion
+
+```markdown
+# Task Status: Partially Completed
+
+## Completed
+- [What worked successfully]
+- Files: `path/to/completed.ext`
+
+## Blocked
+- **Issue**: [What failed]
+- **Root Cause**: [Analysis of failure]
+- **Attempted**: [Solutions tried - attempt X of 3]
+
+## Required
+[What's needed to proceed]
+
+## Recommendation
+[Suggested next steps or alternative approaches]
+```
+
+### Code References
+
+**Format**: `path/to/file:line_number`
+
+**Example**: `src/auth/jwt.ts:45` - Implemented token validation following pattern from `src/auth/session.ts:78`
+
+### Related Files Section
+
+**Always include at output beginning** - List ALL files analyzed, created, or modified:
+
+```markdown
+## Related Files
+- `path/to/file1.ext` - [Role in implementation]
+- `path/to/file2.ext` - [Reference pattern used]
+- `path/to/file3.ext` - [Modified for X reason]
+```
+
+## Error Handling
+
+### Three-Attempt Rule
+
+**On 3rd failed attempt**:
+1. Stop execution
+2. Report: What attempted, what failed, root cause
+3. Request guidance or suggest alternatives
+
+### Recovery Strategies
+
+| Error Type | Response |
+|------------|----------|
+| **Syntax/Type** | Review errors → Fix → Re-run tests → Validate build |
+| **Runtime** | Analyze stack trace → Add error handling → Test error cases |
+| **Test Failure** | Debug in isolation → Review setup → Fix implementation/test |
+| **Build Failure** | Check messages → Fix incrementally → Validate each fix |
+
+## Quality Standards
+
+### Code Quality
+- Follow project's existing patterns
+- Match import style and naming conventions
+- Single responsibility per function/class
+- DRY (Don't Repeat Yourself)
+- YAGNI (You Aren't Gonna Need It)
+
+### Testing
+- Test all public functions
+- Test edge cases and error conditions
+- Mock external dependencies
+- Target 80%+ coverage
+
+### Error Handling
+- Proper try-catch blocks
+- Clear error messages
+- Graceful degradation
+- Don't expose sensitive info
+
+## Core Principles
+
+**Incremental Progress**:
+- Small, testable changes
+- Commit working code frequently
+- Build on previous work (subtasks)
+
+**Evidence-Based**:
+- Study 3+ similar patterns before implementing
+- Match project style exactly
+- Verify with existing code
+
+**Pragmatic**:
+- Boring solutions over clever code
+- Simple over complex
+- Adapt to project reality
+
+**Context Continuity** (Multi-Task):
+- Leverage resume for consistency
+- Maintain established patterns
+- Test integration between subtasks
+
+## Execution Checklist
+
+**Before**:
+- [ ] Understand PURPOSE and TASK clearly
+- [ ] Review CONTEXT files, find 3+ patterns
+- [ ] Check RULES templates and constraints
+
+**During**:
+- [ ] Follow existing patterns exactly
+- [ ] Write tests alongside code
+- [ ] Run tests after every change
+- [ ] Commit working code incrementally
+
+**After**:
+- [ ] All tests pass
+- [ ] Coverage meets target
+- [ ] Build succeeds
+- [ ] All EXPECTED deliverables met
--- a/API_SETTINGS_IMPLEMENTATION.md
+++ b/API_SETTINGS_IMPLEMENTATION.md
@@ -0,0 +1,196 @@
+# API Settings 页面实现完成
+
+## 创建的文件
+
+### 1. JavaScript 文件
+**位置**: `ccw/src/templates/dashboard-js/views/api-settings.js` (28KB)
+
+**主要功能**:
+- ✅ Provider Management (提供商管理)
+  - 添加/编辑/删除提供商
+  - 支持 OpenAI, Anthropic, Google, Ollama, Azure, Mistral, DeepSeek, Custom
+  - API Key 管理（支持环境变量）
+  - 连接测试功能
+  
+- ✅ Endpoint Management (端点管理)
+  - 创建自定义端点
+  - 关联提供商和模型
+  - 缓存策略配置
+  - 显示 CLI 使用示例
+  
+- ✅ Cache Management (缓存管理)
+  - 全局缓存开关
+  - 缓存统计显示
+  - 清除缓存功能
+
+### 2. CSS 样式文件
+**位置**: `ccw/src/templates/dashboard-css/31-api-settings.css` (6.8KB)
+
+**样式包括**:
+- 卡片式布局
+- 表单样式
+- 进度条
+- 响应式设计
+- 空状态显示
+
+### 3. 国际化支持
+**位置**: `ccw/src/templates/dashboard-js/i18n.js`
+
+**添加的翻译**:
+- 英文：54 个翻译键
+- 中文：54 个翻译键
+- 包含所有 UI 文本、提示信息、错误消息
+
+### 4. 配置更新
+
+#### dashboard-generator.ts
+- ✅ 添加 `31-api-settings.css` 到 CSS 模块列表
+- ✅ 添加 `views/api-settings.js` 到 JS 模块列表
+
+#### navigation.js
+- ✅ 添加 `api-settings` 路由处理
+- ✅ 添加标题更新逻辑
+
+#### dashboard.html
+- ✅ 添加导航菜单项 (Settings 图标)
+
+## API 端点使用
+
+该页面使用以下后端 API（已存在）:
+
+### Provider APIs
+- `GET /api/litellm-api/providers` - 获取所有提供商
+- `POST /api/litellm-api/providers` - 创建提供商
+- `PUT /api/litellm-api/providers/:id` - 更新提供商
+- `DELETE /api/litellm-api/providers/:id` - 删除提供商
+- `POST /api/litellm-api/providers/:id/test` - 测试连接
+
+### Endpoint APIs
+- `GET /api/litellm-api/endpoints` - 获取所有端点
+- `POST /api/litellm-api/endpoints` - 创建端点
+- `PUT /api/litellm-api/endpoints/:id` - 更新端点
+- `DELETE /api/litellm-api/endpoints/:id` - 删除端点
+
+### Model Discovery
+- `GET /api/litellm-api/models/:providerType` - 获取提供商支持的模型列表
+
+### Cache APIs
+- `GET /api/litellm-api/cache/stats` - 获取缓存统计
+- `POST /api/litellm-api/cache/clear` - 清除缓存
+
+### Config APIs
+- `GET /api/litellm-api/config` - 获取完整配置
+- `PUT /api/litellm-api/config/cache` - 更新全局缓存设置
+
+## 页面特性
+
+### Provider 管理
+```
+-- Provider Card ------------------------+
+| OpenAI Production          [Edit] [Del] |
+| Type: openai                            |
+| Key: sk-...abc                          |
+| URL: https://api.openai.com/v1         |
+| Status: ✓ Enabled                       |
+-----------------------------------------+
+```
+
+### Endpoint 管理
+```
+-- Endpoint Card ------------------------+
+| GPT-4o Code Review          [Edit] [Del]|
+| ID: my-gpt4o                            |
+| Provider: OpenAI Production             |
+| Model: gpt-4-turbo                      |
+| Cache: Enabled (60 min)                 |
+| Usage: ccw cli -p "..." --model my-gpt4o|
+-----------------------------------------+
+```
+
+### 表单功能
+- **Provider Form**:
+  - 类型选择（8 种提供商）
+  - API Key 输入（支持显示/隐藏）
+  - 环境变量支持
+  - Base URL 自定义
+  - 启用/禁用开关
+
+- **Endpoint Form**:
+  - 端点 ID（CLI 使用）
+  - 显示名称
+  - 提供商选择（动态加载）
+  - 模型选择（根据提供商动态加载）
+  - 缓存策略配置
+    - TTL（分钟）
+    - 最大大小（KB）
+    - 自动缓存文件模式
+
+## 使用流程
+
+### 1. 添加提供商
+1. 点击 "Add Provider"
+2. 选择提供商类型（如 OpenAI）
+3. 输入显示名称
+4. 输入 API Key（或使用环境变量）
+5. 可选：输入自定义 API Base URL
+6. 保存
+
+### 2. 创建自定义端点
+1. 点击 "Add Endpoint"
+2. 输入端点 ID（用于 CLI）
+3. 输入显示名称
+4. 选择提供商
+5. 选择模型（自动加载该提供商支持的模型）
+6. 可选：配置缓存策略
+7. 保存
+
+### 3. 使用端点
+```bash
+ccw cli -p "Analyze this code..." --model my-gpt4o
+```
+
+## 代码质量
+
+- ✅ 遵循现有代码风格
+- ✅ 使用 i18n 函数支持国际化
+- ✅ 响应式设计（移动端友好）
+- ✅ 完整的表单验证
+- ✅ 用户友好的错误提示
+- ✅ 使用 Lucide 图标
+- ✅ 模态框复用现有样式
+- ✅ 与后端 API 完全集成
+
+## 测试建议
+
+1. **基础功能测试**:
+   - 添加/编辑/删除提供商
+   - 添加/编辑/删除端点
+   - 清除缓存
+
+2. **表单验证测试**:
+   - 必填字段验证
+   - API Key 显示/隐藏
+   - 环境变量切换
+
+3. **数据加载测试**:
+   - 模型列表动态加载
+   - 缓存统计显示
+   - 空状态显示
+
+4. **国际化测试**:
+   - 切换语言（英文/中文）
+   - 验证所有文本正确显示
+
+## 下一步
+
+页面已完成并集成到项目中。启动 CCW Dashboard 后：
+1. 导航栏会显示 "API Settings" 菜单项（Settings 图标）
+2. 点击进入即可使用所有功能
+3. 所有操作会实时同步到配置文件
+
+## 注意事项
+
+- 页面使用现有的 LiteLLM API 路由（`litellm-api-routes.ts`）
+- 配置保存在项目的 LiteLLM 配置文件中
+- 支持环境变量引用格式：`${VARIABLE_NAME}`
+- API Key 在显示时会自动脱敏（显示前 4 位和后 4 位）
--- a/README_CN.md
+++ b/README_CN.md
@@ -1,5 +1,8 @@
 # 🚀 Claude Code Workflow (CCW)

+[![Run in Smithery](https://smithery.ai/badge/skills/catlog22)](https://smithery.ai/skills?ns=catlog22&utm_source=github&utm_medium=badge)
+
+
 <div align="center">

 [![Version](https://img.shields.io/badge/version-v6.2.0-blue.svg)](https://github.com/catlog22/Claude-Code-Workflow/releases)
@@ -13,7 +16,7 @@

 ---

-**Claude Code Workflow (CCW)** 将 AI 开发从简单的提示词链接转变为一个强大的、上下文优先的编排系统。它通过结构化规划、确定性执行和智能多模型编排，解决了执行不确定性和误差累积的问题。
+**Claude Code Workflow (CCW)** 是一个 JSON 驱动的多智能体开发框架，具有智能 CLI 编排（Gemini/Qwen/Codex）、上下文优先架构和自动化工作流执行。它将 AI 开发从简单的提示词链接转变为一个强大的编排系统。

 > **🎉 版本 6.2.0: 原生 CodexLens 与 Dashboard 革新**
 >
@@ -38,8 +41,8 @@

 CCW 构建在一系列核心原则之上，这些原则使其与传统的 AI 开发方法区别开来：

- **上下文优先架构**: 通过预定义的上下文收集，消除了执行过程中的不确定性，确保智能体在实现*之前*就拥有正确的信息。
- **JSON 优先的状态管理**: 任务状态完全存储在 `.task/IMPL-*.json` 文件中，作为唯一的事实来源，实现了无需状态漂移的程序化编排。
+- **上下文优先架构**: 通过预定义的上下文收集消除执行过程中的不确定性，确保智能体在实现*之前*就拥有正确的信息。
+- **JSON 优先的状态管理**: 任务状态完全存储在 `.task/IMPL-*.json` 文件中作为唯一的事实来源，实现无状态漂移的程序化编排。
 - **自主多阶段编排**: 命令链式调用专门的子命令和智能体，以零用户干预的方式自动化复杂的工作流。
 - **多模型策略**: 充分利用不同 AI 模型（如 Gemini 用于分析，Codex 用于实现）的独特优势，以获得更优越的结果。
 - **分层内存系统**: 一个 4 层文档系统，在适当的抽象级别上提供上下文，防止信息过载。
@@ -49,18 +52,23 @@ CCW 构建在一系列核心原则之上，这些原则使其与传统的 AI 开

 ## ⚙️ 安装

-有关详细的安装说明，请参阅 [**INSTALL_CN.md**](INSTALL_CN.md) 指南。
+### **📦 npm 安装（推荐）**

-### **🚀 一键快速安装**
-
-**Windows (PowerShell):**
-```powershell
-Invoke-Expression (Invoke-WebRequest -Uri "https://raw.githubusercontent.com/catlog22/Claude-Code-Workflow/main/install-remote.ps1" -UseBasicParsing).Content
+通过 npm 全局安装：
+```bash
+npm install -g claude-code-workflow
 ```

-**Linux/macOS (Bash/Zsh):**
+然后将工作流文件安装到您的系统：
 ```bash
-bash <(curl -fsSL https://raw.githubusercontent.com/catlog22/Claude-Code-Workflow/main/install-remote.sh)
+# 交互式安装
+ccw install
+
+# 全局安装（到 ~/.claude）
+ccw install -m Global
+
+# 项目特定安装
+ccw install -m Path -p /path/to/project
 ```

 ### **✅ 验证安装**
@@ -283,4 +291,4 @@ CCW 提供全面的文档，帮助您快速上手并掌握高级功能：

 ## 📄 许可证

-此项目根据 **MIT 许可证** 授权。详见 [LICENSE](LICENSE) 文件。
+此项目根据 **MIT 许可证** 授权。详见 [LICENSE](LICENSE) 文件。
--- a/ccw-litellm/README.md
+++ b/ccw-litellm/README.md
@@ -0,0 +1,180 @@
+# ccw-litellm
+
+Unified LiteLLM interface layer shared by ccw and codex-lens projects.
+
+## Features
+
+- **Unified LLM Interface**: Abstract interface for LLM operations (chat, completion)
+- **Unified Embedding Interface**: Abstract interface for text embeddings
+- **Multi-Provider Support**: OpenAI, Anthropic, Azure, and more via LiteLLM
+- **Configuration Management**: YAML-based configuration with environment variable substitution
+- **Type Safety**: Full type annotations with Pydantic models
+
+## Installation
+
+```bash
+pip install -e .
+```
+
+## Quick Start
+
+### Configuration
+
+Create a configuration file at `~/.ccw/config/litellm-config.yaml`:
+
+```yaml
+version: 1
+default_provider: openai
+
+providers:
+  openai:
+    api_key: ${OPENAI_API_KEY}
+    api_base: https://api.openai.com/v1
+
+llm_models:
+  default:
+    provider: openai
+    model: gpt-4
+
+embedding_models:
+  default:
+    provider: openai
+    model: text-embedding-3-small
+    dimensions: 1536
+```
+
+### Usage
+
+#### LLM Client
+
+```python
+from ccw_litellm import LiteLLMClient, ChatMessage
+
+# Initialize client with default model
+client = LiteLLMClient(model="default")
+
+# Chat completion
+messages = [
+    ChatMessage(role="user", content="Hello, how are you?")
+]
+response = client.chat(messages)
+print(response.content)
+
+# Text completion
+response = client.complete("Once upon a time")
+print(response.content)
+```
+
+#### Embedder
+
+```python
+from ccw_litellm import LiteLLMEmbedder
+
+# Initialize embedder with default model
+embedder = LiteLLMEmbedder(model="default")
+
+# Embed single text
+vector = embedder.embed("Hello world")
+print(vector.shape)  # (1, 1536)
+
+# Embed multiple texts
+vectors = embedder.embed(["Text 1", "Text 2", "Text 3"])
+print(vectors.shape)  # (3, 1536)
+```
+
+#### Custom Configuration
+
+```python
+from ccw_litellm import LiteLLMClient, load_config
+
+# Load custom configuration
+config = load_config("/path/to/custom-config.yaml")
+
+# Use custom configuration
+client = LiteLLMClient(model="fast", config=config)
+```
+
+## Configuration Reference
+
+### Provider Configuration
+
+```yaml
+providers:
+  <provider_name>:
+    api_key: <api_key_or_${ENV_VAR}>
+    api_base: <base_url>
+```
+
+Supported providers: `openai`, `anthropic`, `azure`, `vertex_ai`, `bedrock`, etc.
+
+### LLM Model Configuration
+
+```yaml
+llm_models:
+  <model_name>:
+    provider: <provider_name>
+    model: <model_identifier>
+```
+
+### Embedding Model Configuration
+
+```yaml
+embedding_models:
+  <model_name>:
+    provider: <provider_name>
+    model: <model_identifier>
+    dimensions: <embedding_dimensions>
+```
+
+## Environment Variables
+
+The configuration supports environment variable substitution using the `${VAR}` or `${VAR:-default}` syntax:
+
+```yaml
+providers:
+  openai:
+    api_key: ${OPENAI_API_KEY}              # Required
+    api_base: ${OPENAI_API_BASE:-https://api.openai.com/v1}  # With default
+```
+
+## API Reference
+
+### Interfaces
+
+- `AbstractLLMClient`: Abstract base class for LLM clients
+- `AbstractEmbedder`: Abstract base class for embedders
+- `ChatMessage`: Message data class (role, content)
+- `LLMResponse`: Response data class (content, raw)
+
+### Implementations
+
+- `LiteLLMClient`: LiteLLM implementation of AbstractLLMClient
+- `LiteLLMEmbedder`: LiteLLM implementation of AbstractEmbedder
+
+### Configuration
+
+- `LiteLLMConfig`: Root configuration model
+- `ProviderConfig`: Provider configuration model
+- `LLMModelConfig`: LLM model configuration model
+- `EmbeddingModelConfig`: Embedding model configuration model
+- `load_config(path)`: Load configuration from YAML file
+- `get_config(path, reload)`: Get global configuration singleton
+- `reset_config()`: Reset global configuration (for testing)
+
+## Development
+
+### Running Tests
+
+```bash
+pytest tests/ -v
+```
+
+### Type Checking
+
+```bash
+mypy src/ccw_litellm
+```
+
+## License
+
+MIT
--- a/ccw-litellm/litellm-config.yaml.example
+++ b/ccw-litellm/litellm-config.yaml.example
@@ -0,0 +1,53 @@
+# LiteLLM Unified Configuration
+# Copy to ~/.ccw/config/litellm-config.yaml
+
+version: 1
+
+# Default provider for LLM calls
+default_provider: openai
+
+# Provider configurations
+providers:
+  openai:
+    api_key: ${OPENAI_API_KEY}
+    api_base: https://api.openai.com/v1
+    
+  anthropic:
+    api_key: ${ANTHROPIC_API_KEY}
+    
+  ollama:
+    api_base: http://localhost:11434
+    
+  azure:
+    api_key: ${AZURE_API_KEY}
+    api_base: ${AZURE_API_BASE}
+
+# LLM model configurations
+llm_models:
+  default:
+    provider: openai
+    model: gpt-4o
+  fast:
+    provider: openai
+    model: gpt-4o-mini
+  claude:
+    provider: anthropic
+    model: claude-sonnet-4-20250514
+  local:
+    provider: ollama
+    model: llama3.2
+
+# Embedding model configurations
+embedding_models:
+  default:
+    provider: openai
+    model: text-embedding-3-small
+    dimensions: 1536
+  large:
+    provider: openai
+    model: text-embedding-3-large
+    dimensions: 3072
+  ada:
+    provider: openai
+    model: text-embedding-ada-002
+    dimensions: 1536
--- a/ccw-litellm/pyproject.toml
+++ b/ccw-litellm/pyproject.toml
@@ -0,0 +1,35 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "ccw-litellm"
+version = "0.1.0"
+description = "Unified LiteLLM interface layer shared by ccw and codex-lens"
+requires-python = ">=3.10"
+authors = [{ name = "ccw-litellm contributors" }]
+dependencies = [
+  "litellm>=1.0.0",
+  "pyyaml",
+  "numpy",
+  "pydantic>=2.0",
+]
+
+[project.optional-dependencies]
+dev = [
+  "pytest>=7.0",
+]
+
+[project.scripts]
+ccw-litellm = "ccw_litellm.cli:main"
+
+[tool.setuptools]
+package-dir = { "" = "src" }
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["ccw_litellm*"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-q"
--- a/ccw-litellm/src/ccw_litellm.egg-info/PKG-INFO
+++ b/ccw-litellm/src/ccw_litellm.egg-info/PKG-INFO
@@ -0,0 +1,12 @@
+Metadata-Version: 2.4
+Name: ccw-litellm
+Version: 0.1.0
+Summary: Unified LiteLLM interface layer shared by ccw and codex-lens
+Author: ccw-litellm contributors
+Requires-Python: >=3.10
+Requires-Dist: litellm>=1.0.0
+Requires-Dist: pyyaml
+Requires-Dist: numpy
+Requires-Dist: pydantic>=2.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
--- a/ccw-litellm/src/ccw_litellm.egg-info/SOURCES.txt
+++ b/ccw-litellm/src/ccw_litellm.egg-info/SOURCES.txt
@@ -0,0 +1,20 @@
+README.md
+pyproject.toml
+src/ccw_litellm/__init__.py
+src/ccw_litellm/cli.py
+src/ccw_litellm.egg-info/PKG-INFO
+src/ccw_litellm.egg-info/SOURCES.txt
+src/ccw_litellm.egg-info/dependency_links.txt
+src/ccw_litellm.egg-info/entry_points.txt
+src/ccw_litellm.egg-info/requires.txt
+src/ccw_litellm.egg-info/top_level.txt
+src/ccw_litellm/clients/__init__.py
+src/ccw_litellm/clients/litellm_embedder.py
+src/ccw_litellm/clients/litellm_llm.py
+src/ccw_litellm/config/__init__.py
+src/ccw_litellm/config/loader.py
+src/ccw_litellm/config/models.py
+src/ccw_litellm/interfaces/__init__.py
+src/ccw_litellm/interfaces/embedder.py
+src/ccw_litellm/interfaces/llm.py
+tests/test_interfaces.py
--- a/ccw-litellm/src/ccw_litellm.egg-info/dependency_links.txt
+++ b/ccw-litellm/src/ccw_litellm.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
--- a/ccw-litellm/src/ccw_litellm.egg-info/entry_points.txt
+++ b/ccw-litellm/src/ccw_litellm.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+ccw-litellm = ccw_litellm.cli:main
--- a/ccw-litellm/src/ccw_litellm.egg-info/requires.txt
+++ b/ccw-litellm/src/ccw_litellm.egg-info/requires.txt
@@ -0,0 +1,7 @@
+litellm>=1.0.0
+pyyaml
+numpy
+pydantic>=2.0
+
+[dev]
+pytest>=7.0
--- a/ccw-litellm/src/ccw_litellm.egg-info/top_level.txt
+++ b/ccw-litellm/src/ccw_litellm.egg-info/top_level.txt
@@ -0,0 +1 @@
+ccw_litellm
--- a/ccw-litellm/src/ccw_litellm/init.py
+++ b/ccw-litellm/src/ccw_litellm/init.py
@@ -0,0 +1,47 @@
+"""ccw-litellm package.
+
+This package provides a small, stable interface layer around LiteLLM to share
+between the ccw and codex-lens projects.
+"""
+
+from __future__ import annotations
+
+from .clients import LiteLLMClient, LiteLLMEmbedder
+from .config import (
+    EmbeddingModelConfig,
+    LiteLLMConfig,
+    LLMModelConfig,
+    ProviderConfig,
+    get_config,
+    load_config,
+    reset_config,
+)
+from .interfaces import (
+    AbstractEmbedder,
+    AbstractLLMClient,
+    ChatMessage,
+    LLMResponse,
+)
+
+__version__ = "0.1.0"
+
+__all__ = [
+    "__version__",
+    # Abstract interfaces
+    "AbstractEmbedder",
+    "AbstractLLMClient",
+    "ChatMessage",
+    "LLMResponse",
+    # Client implementations
+    "LiteLLMClient",
+    "LiteLLMEmbedder",
+    # Configuration
+    "LiteLLMConfig",
+    "ProviderConfig",
+    "LLMModelConfig",
+    "EmbeddingModelConfig",
+    "load_config",
+    "get_config",
+    "reset_config",
+]
+
--- a/ccw-litellm/src/ccw_litellm/cli.py
+++ b/ccw-litellm/src/ccw_litellm/cli.py
@@ -0,0 +1,108 @@
+"""CLI entry point for ccw-litellm."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+def main() -> int:
+    """Main CLI entry point."""
+    parser = argparse.ArgumentParser(
+        prog="ccw-litellm",
+        description="Unified LiteLLM interface for ccw and codex-lens",
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+
+    # config command
+    config_parser = subparsers.add_parser("config", help="Show configuration")
+    config_parser.add_argument(
+        "--path",
+        type=Path,
+        help="Configuration file path",
+    )
+
+    # embed command
+    embed_parser = subparsers.add_parser("embed", help="Generate embeddings")
+    embed_parser.add_argument("texts", nargs="+", help="Texts to embed")
+    embed_parser.add_argument(
+        "--model",
+        default="default",
+        help="Embedding model name (default: default)",
+    )
+    embed_parser.add_argument(
+        "--output",
+        choices=["json", "shape"],
+        default="shape",
+        help="Output format (default: shape)",
+    )
+
+    # chat command
+    chat_parser = subparsers.add_parser("chat", help="Chat with LLM")
+    chat_parser.add_argument("message", help="Message to send")
+    chat_parser.add_argument(
+        "--model",
+        default="default",
+        help="LLM model name (default: default)",
+    )
+
+    # version command
+    subparsers.add_parser("version", help="Show version")
+
+    args = parser.parse_args()
+
+    if args.command == "version":
+        from . import __version__
+
+        print(f"ccw-litellm {__version__}")
+        return 0
+
+    if args.command == "config":
+        from .config import get_config
+
+        try:
+            config = get_config(config_path=args.path if hasattr(args, "path") else None)
+            print(config.model_dump_json(indent=2))
+        except Exception as e:
+            print(f"Error loading config: {e}", file=sys.stderr)
+            return 1
+        return 0
+
+    if args.command == "embed":
+        from .clients import LiteLLMEmbedder
+
+        try:
+            embedder = LiteLLMEmbedder(model=args.model)
+            vectors = embedder.embed(args.texts)
+
+            if args.output == "json":
+                print(json.dumps(vectors.tolist()))
+            else:
+                print(f"Shape: {vectors.shape}")
+                print(f"Dimensions: {embedder.dimensions}")
+        except Exception as e:
+            print(f"Error: {e}", file=sys.stderr)
+            return 1
+        return 0
+
+    if args.command == "chat":
+        from .clients import LiteLLMClient
+        from .interfaces import ChatMessage
+
+        try:
+            client = LiteLLMClient(model=args.model)
+            response = client.chat([ChatMessage(role="user", content=args.message)])
+            print(response.content)
+        except Exception as e:
+            print(f"Error: {e}", file=sys.stderr)
+            return 1
+        return 0
+
+    parser.print_help()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/ccw-litellm/src/ccw_litellm/clients/init.py
+++ b/ccw-litellm/src/ccw_litellm/clients/init.py
@@ -0,0 +1,12 @@
+"""Client implementations for ccw-litellm."""
+
+from __future__ import annotations
+
+from .litellm_embedder import LiteLLMEmbedder
+from .litellm_llm import LiteLLMClient
+
+__all__ = [
+    "LiteLLMClient",
+    "LiteLLMEmbedder",
+]
+
--- a/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py
+++ b/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py
@@ -0,0 +1,251 @@
+"""LiteLLM embedder implementation for text embeddings."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Sequence
+
+import litellm
+import numpy as np
+from numpy.typing import NDArray
+
+from ..config import LiteLLMConfig, get_config
+from ..interfaces.embedder import AbstractEmbedder
+
+logger = logging.getLogger(__name__)
+
+
+class LiteLLMEmbedder(AbstractEmbedder):
+    """LiteLLM embedder implementation.
+
+    Supports multiple embedding providers (OpenAI, etc.) through LiteLLM's unified interface.
+
+    Example:
+        embedder = LiteLLMEmbedder(model="default")
+        vectors = embedder.embed(["Hello world", "Another text"])
+        print(vectors.shape)  # (2, 1536)
+    """
+
+    def __init__(
+        self,
+        model: str = "default",
+        config: LiteLLMConfig | None = None,
+        **litellm_kwargs: Any,
+    ) -> None:
+        """Initialize LiteLLM embedder.
+
+        Args:
+            model: Model name from configuration (default: "default")
+            config: Configuration instance (default: use global config)
+            **litellm_kwargs: Additional arguments to pass to litellm.embedding()
+        """
+        self._config = config or get_config()
+        self._model_name = model
+        self._litellm_kwargs = litellm_kwargs
+
+        # Get embedding model configuration
+        try:
+            self._model_config = self._config.get_embedding_model(model)
+        except ValueError as e:
+            logger.error(f"Failed to get embedding model configuration: {e}")
+            raise
+
+        # Get provider configuration
+        try:
+            self._provider_config = self._config.get_provider(self._model_config.provider)
+        except ValueError as e:
+            logger.error(f"Failed to get provider configuration: {e}")
+            raise
+
+        # Set up LiteLLM environment
+        self._setup_litellm()
+
+    def _setup_litellm(self) -> None:
+        """Configure LiteLLM with provider settings."""
+        provider = self._model_config.provider
+
+        # Set API key
+        if self._provider_config.api_key:
+            litellm.api_key = self._provider_config.api_key
+            # Also set environment-specific keys
+            if provider == "openai":
+                litellm.openai_key = self._provider_config.api_key
+            elif provider == "anthropic":
+                litellm.anthropic_key = self._provider_config.api_key
+
+        # Set API base
+        if self._provider_config.api_base:
+            litellm.api_base = self._provider_config.api_base
+
+    def _format_model_name(self) -> str:
+        """Format model name for LiteLLM.
+
+        Returns:
+            Formatted model name (e.g., "openai/text-embedding-3-small")
+        """
+        provider = self._model_config.provider
+        model = self._model_config.model
+
+        # For some providers, LiteLLM expects explicit prefix
+        if provider in ["azure", "vertex_ai", "bedrock"]:
+            return f"{provider}/{model}"
+
+        # For providers with custom api_base (OpenAI-compatible endpoints),
+        # use openai/ prefix to tell LiteLLM to use OpenAI API format
+        if self._provider_config.api_base and provider not in ["openai", "anthropic"]:
+            return f"openai/{model}"
+
+        return model
+
+    @property
+    def dimensions(self) -> int:
+        """Embedding vector size."""
+        return self._model_config.dimensions
+
+    def _estimate_tokens(self, text: str) -> int:
+        """Estimate token count for a text using fast heuristic.
+
+        Args:
+            text: Text to estimate tokens for
+
+        Returns:
+            Estimated token count (len/4 is a reasonable approximation)
+        """
+        return len(text) // 4
+
+    def _create_batches(
+        self,
+        texts: list[str],
+        max_tokens: int = 30000
+    ) -> list[list[str]]:
+        """Split texts into batches that fit within token limits.
+
+        Args:
+            texts: List of texts to batch
+            max_tokens: Maximum tokens per batch (default: 30000, safe margin for 40960 limit)
+
+        Returns:
+            List of text batches
+        """
+        batches = []
+        current_batch = []
+        current_tokens = 0
+
+        for text in texts:
+            text_tokens = self._estimate_tokens(text)
+
+            # If single text exceeds limit, truncate it
+            if text_tokens > max_tokens:
+                logger.warning(f"Text with {text_tokens} estimated tokens exceeds limit, truncating")
+                # Truncate to fit (rough estimate: 4 chars per token)
+                max_chars = max_tokens * 4
+                text = text[:max_chars]
+                text_tokens = self._estimate_tokens(text)
+
+            # Start new batch if current would exceed limit
+            if current_tokens + text_tokens > max_tokens and current_batch:
+                batches.append(current_batch)
+                current_batch = []
+                current_tokens = 0
+
+            current_batch.append(text)
+            current_tokens += text_tokens
+
+        # Add final batch
+        if current_batch:
+            batches.append(current_batch)
+
+        return batches
+
+    def embed(
+        self,
+        texts: str | Sequence[str],
+        *,
+        batch_size: int | None = None,
+        max_tokens_per_batch: int = 30000,
+        **kwargs: Any,
+    ) -> NDArray[np.floating]:
+        """Embed one or more texts.
+
+        Args:
+            texts: Single text or sequence of texts
+            batch_size: Batch size for processing (deprecated, use max_tokens_per_batch)
+            max_tokens_per_batch: Maximum estimated tokens per API call (default: 30000)
+            **kwargs: Additional arguments for litellm.embedding()
+
+        Returns:
+            A numpy array of shape (n_texts, dimensions).
+
+        Raises:
+            Exception: If LiteLLM embedding fails
+        """
+        # Normalize input to list
+        if isinstance(texts, str):
+            text_list = [texts]
+        else:
+            text_list = list(texts)
+
+        if not text_list:
+            # Return empty array with correct shape
+            return np.empty((0, self.dimensions), dtype=np.float32)
+
+        # Merge kwargs
+        embedding_kwargs = {**self._litellm_kwargs, **kwargs}
+
+        # For OpenAI-compatible endpoints, ensure encoding_format is set
+        if self._provider_config.api_base and "encoding_format" not in embedding_kwargs:
+            embedding_kwargs["encoding_format"] = "float"
+
+        # Split into token-aware batches
+        batches = self._create_batches(text_list, max_tokens_per_batch)
+
+        if len(batches) > 1:
+            logger.info(f"Split {len(text_list)} texts into {len(batches)} batches for embedding")
+
+        all_embeddings = []
+
+        for batch_idx, batch in enumerate(batches):
+            try:
+                # Build call kwargs with explicit api_base
+                call_kwargs = {**embedding_kwargs}
+                if self._provider_config.api_base:
+                    call_kwargs["api_base"] = self._provider_config.api_base
+                if self._provider_config.api_key:
+                    call_kwargs["api_key"] = self._provider_config.api_key
+
+                # Call LiteLLM embedding for this batch
+                response = litellm.embedding(
+                    model=self._format_model_name(),
+                    input=batch,
+                    **call_kwargs,
+                )
+
+                # Extract embeddings
+                batch_embeddings = [item["embedding"] for item in response.data]
+                all_embeddings.extend(batch_embeddings)
+
+            except Exception as e:
+                logger.error(f"LiteLLM embedding failed for batch {batch_idx + 1}/{len(batches)}: {e}")
+                raise
+
+        # Convert to numpy array
+        result = np.array(all_embeddings, dtype=np.float32)
+
+        # Validate dimensions
+        if result.shape[1] != self.dimensions:
+            logger.warning(
+                f"Expected {self.dimensions} dimensions, got {result.shape[1]}. "
+                f"Configuration may be incorrect."
+            )
+
+        return result
+
+    @property
+    def model_name(self) -> str:
+        """Get configured model name."""
+        return self._model_name
+
+    @property
+    def provider(self) -> str:
+        """Get configured provider name."""
+        return self._model_config.provider
--- a/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py
+++ b/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py
@@ -0,0 +1,165 @@
+"""LiteLLM client implementation for LLM operations."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Sequence
+
+import litellm
+
+from ..config import LiteLLMConfig, get_config
+from ..interfaces.llm import AbstractLLMClient, ChatMessage, LLMResponse
+
+logger = logging.getLogger(__name__)
+
+
+class LiteLLMClient(AbstractLLMClient):
+    """LiteLLM client implementation.
+
+    Supports multiple providers (OpenAI, Anthropic, etc.) through LiteLLM's unified interface.
+
+    Example:
+        client = LiteLLMClient(model="default")
+        response = client.chat([
+            ChatMessage(role="user", content="Hello!")
+        ])
+        print(response.content)
+    """
+
+    def __init__(
+        self,
+        model: str = "default",
+        config: LiteLLMConfig | None = None,
+        **litellm_kwargs: Any,
+    ) -> None:
+        """Initialize LiteLLM client.
+
+        Args:
+            model: Model name from configuration (default: "default")
+            config: Configuration instance (default: use global config)
+            **litellm_kwargs: Additional arguments to pass to litellm.completion()
+        """
+        self._config = config or get_config()
+        self._model_name = model
+        self._litellm_kwargs = litellm_kwargs
+
+        # Get model configuration
+        try:
+            self._model_config = self._config.get_llm_model(model)
+        except ValueError as e:
+            logger.error(f"Failed to get model configuration: {e}")
+            raise
+
+        # Get provider configuration
+        try:
+            self._provider_config = self._config.get_provider(self._model_config.provider)
+        except ValueError as e:
+            logger.error(f"Failed to get provider configuration: {e}")
+            raise
+
+        # Set up LiteLLM environment
+        self._setup_litellm()
+
+    def _setup_litellm(self) -> None:
+        """Configure LiteLLM with provider settings."""
+        provider = self._model_config.provider
+
+        # Set API key
+        if self._provider_config.api_key:
+            env_var = f"{provider.upper()}_API_KEY"
+            litellm.api_key = self._provider_config.api_key
+            # Also set environment-specific keys
+            if provider == "openai":
+                litellm.openai_key = self._provider_config.api_key
+            elif provider == "anthropic":
+                litellm.anthropic_key = self._provider_config.api_key
+
+        # Set API base
+        if self._provider_config.api_base:
+            litellm.api_base = self._provider_config.api_base
+
+    def _format_model_name(self) -> str:
+        """Format model name for LiteLLM.
+
+        Returns:
+            Formatted model name (e.g., "gpt-4", "claude-3-opus-20240229")
+        """
+        # LiteLLM expects model names in format: "provider/model" or just "model"
+        # If provider is explicit, use provider/model format
+        provider = self._model_config.provider
+        model = self._model_config.model
+
+        # For some providers, LiteLLM expects explicit prefix
+        if provider in ["anthropic", "azure", "vertex_ai", "bedrock"]:
+            return f"{provider}/{model}"
+
+        return model
+
+    def chat(
+        self,
+        messages: Sequence[ChatMessage],
+        **kwargs: Any,
+    ) -> LLMResponse:
+        """Chat completion for a sequence of messages.
+
+        Args:
+            messages: Sequence of chat messages
+            **kwargs: Additional arguments for litellm.completion()
+
+        Returns:
+            LLM response with content and raw response
+
+        Raises:
+            Exception: If LiteLLM completion fails
+        """
+        # Convert messages to LiteLLM format
+        litellm_messages = [
+            {"role": msg.role, "content": msg.content} for msg in messages
+        ]
+
+        # Merge kwargs
+        completion_kwargs = {**self._litellm_kwargs, **kwargs}
+
+        try:
+            # Call LiteLLM
+            response = litellm.completion(
+                model=self._format_model_name(),
+                messages=litellm_messages,
+                **completion_kwargs,
+            )
+
+            # Extract content
+            content = response.choices[0].message.content or ""
+
+            return LLMResponse(content=content, raw=response)
+
+        except Exception as e:
+            logger.error(f"LiteLLM completion failed: {e}")
+            raise
+
+    def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
+        """Text completion for a prompt.
+
+        Args:
+            prompt: Input prompt
+            **kwargs: Additional arguments for litellm.completion()
+
+        Returns:
+            LLM response with content and raw response
+
+        Raises:
+            Exception: If LiteLLM completion fails
+        """
+        # Convert to chat format (most modern models use chat interface)
+        messages = [ChatMessage(role="user", content=prompt)]
+        return self.chat(messages, **kwargs)
+
+    @property
+    def model_name(self) -> str:
+        """Get configured model name."""
+        return self._model_name
+
+    @property
+    def provider(self) -> str:
+        """Get configured provider name."""
+        return self._model_config.provider
--- a/ccw-litellm/src/ccw_litellm/config/init.py
+++ b/ccw-litellm/src/ccw_litellm/config/init.py
@@ -0,0 +1,22 @@
+"""Configuration management for LiteLLM integration."""
+
+from __future__ import annotations
+
+from .loader import get_config, load_config, reset_config
+from .models import (
+    EmbeddingModelConfig,
+    LiteLLMConfig,
+    LLMModelConfig,
+    ProviderConfig,
+)
+
+__all__ = [
+    "LiteLLMConfig",
+    "ProviderConfig",
+    "LLMModelConfig",
+    "EmbeddingModelConfig",
+    "load_config",
+    "get_config",
+    "reset_config",
+]
+
--- a/ccw-litellm/src/ccw_litellm/config/loader.py
+++ b/ccw-litellm/src/ccw_litellm/config/loader.py
@@ -0,0 +1,316 @@
+"""Configuration loader with environment variable substitution."""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from .models import LiteLLMConfig
+
+# Default configuration paths
+# JSON format (UI config) takes priority over YAML format
+DEFAULT_JSON_CONFIG_PATH = Path.home() / ".ccw" / "config" / "litellm-api-config.json"
+DEFAULT_YAML_CONFIG_PATH = Path.home() / ".ccw" / "config" / "litellm-config.yaml"
+# Keep backward compatibility
+DEFAULT_CONFIG_PATH = DEFAULT_YAML_CONFIG_PATH
+
+# Global configuration singleton
+_config_instance: LiteLLMConfig | None = None
+
+
+def _substitute_env_vars(value: Any) -> Any:
+    """Recursively substitute environment variables in configuration values.
+
+    Supports ${ENV_VAR} and ${ENV_VAR:-default} syntax.
+
+    Args:
+        value: Configuration value (str, dict, list, or primitive)
+
+    Returns:
+        Value with environment variables substituted
+    """
+    if isinstance(value, str):
+        # Pattern: ${VAR} or ${VAR:-default}
+        pattern = r"\$\{([^:}]+)(?::-(.*?))?\}"
+
+        def replace_var(match: re.Match) -> str:
+            var_name = match.group(1)
+            default_value = match.group(2) if match.group(2) is not None else ""
+            return os.environ.get(var_name, default_value)
+
+        return re.sub(pattern, replace_var, value)
+
+    if isinstance(value, dict):
+        return {k: _substitute_env_vars(v) for k, v in value.items()}
+
+    if isinstance(value, list):
+        return [_substitute_env_vars(item) for item in value]
+
+    return value
+
+
+def _get_default_config() -> dict[str, Any]:
+    """Get default configuration when no config file exists.
+
+    Returns:
+        Default configuration dictionary
+    """
+    return {
+        "version": 1,
+        "default_provider": "openai",
+        "providers": {
+            "openai": {
+                "api_key": "${OPENAI_API_KEY}",
+                "api_base": "https://api.openai.com/v1",
+            },
+        },
+        "llm_models": {
+            "default": {
+                "provider": "openai",
+                "model": "gpt-4",
+            },
+            "fast": {
+                "provider": "openai",
+                "model": "gpt-3.5-turbo",
+            },
+        },
+        "embedding_models": {
+            "default": {
+                "provider": "openai",
+                "model": "text-embedding-3-small",
+                "dimensions": 1536,
+            },
+        },
+    }
+
+
+def _convert_json_to_internal_format(json_config: dict[str, Any]) -> dict[str, Any]:
+    """Convert UI JSON config format to internal format.
+
+    The UI stores config in a different structure:
+    - providers: array of {id, name, type, apiKey, apiBase, llmModels[], embeddingModels[]}
+
+    Internal format uses:
+    - providers: dict of {provider_id: {api_key, api_base}}
+    - llm_models: dict of {model_id: {provider, model}}
+    - embedding_models: dict of {model_id: {provider, model, dimensions}}
+
+    Args:
+        json_config: Configuration in UI JSON format
+
+    Returns:
+        Configuration in internal format
+    """
+    providers: dict[str, Any] = {}
+    llm_models: dict[str, Any] = {}
+    embedding_models: dict[str, Any] = {}
+    default_provider: str | None = None
+
+    for provider in json_config.get("providers", []):
+        if not provider.get("enabled", True):
+            continue
+
+        provider_id = provider.get("id", "")
+        if not provider_id:
+            continue
+
+        # Set first enabled provider as default
+        if default_provider is None:
+            default_provider = provider_id
+
+        # Convert provider with advanced settings
+        provider_config: dict[str, Any] = {
+            "api_key": provider.get("apiKey", ""),
+            "api_base": provider.get("apiBase"),
+        }
+
+        # Map advanced settings
+        adv = provider.get("advancedSettings", {})
+        if adv.get("timeout"):
+            provider_config["timeout"] = adv["timeout"]
+        if adv.get("maxRetries"):
+            provider_config["max_retries"] = adv["maxRetries"]
+        if adv.get("organization"):
+            provider_config["organization"] = adv["organization"]
+        if adv.get("apiVersion"):
+            provider_config["api_version"] = adv["apiVersion"]
+        if adv.get("customHeaders"):
+            provider_config["custom_headers"] = adv["customHeaders"]
+
+        providers[provider_id] = provider_config
+
+        # Convert LLM models
+        for model in provider.get("llmModels", []):
+            if not model.get("enabled", True):
+                continue
+            model_id = model.get("id", "")
+            if not model_id:
+                continue
+
+            llm_model_config: dict[str, Any] = {
+                "provider": provider_id,
+                "model": model.get("name", ""),
+            }
+            # Add model-specific endpoint settings
+            endpoint = model.get("endpointSettings", {})
+            if endpoint.get("baseUrl"):
+                llm_model_config["api_base"] = endpoint["baseUrl"]
+            if endpoint.get("timeout"):
+                llm_model_config["timeout"] = endpoint["timeout"]
+            if endpoint.get("maxRetries"):
+                llm_model_config["max_retries"] = endpoint["maxRetries"]
+
+            # Add capabilities
+            caps = model.get("capabilities", {})
+            if caps.get("contextWindow"):
+                llm_model_config["context_window"] = caps["contextWindow"]
+            if caps.get("maxOutputTokens"):
+                llm_model_config["max_output_tokens"] = caps["maxOutputTokens"]
+
+            llm_models[model_id] = llm_model_config
+
+        # Convert embedding models
+        for model in provider.get("embeddingModels", []):
+            if not model.get("enabled", True):
+                continue
+            model_id = model.get("id", "")
+            if not model_id:
+                continue
+
+            embedding_model_config: dict[str, Any] = {
+                "provider": provider_id,
+                "model": model.get("name", ""),
+                "dimensions": model.get("capabilities", {}).get("embeddingDimension", 1536),
+            }
+            # Add model-specific endpoint settings
+            endpoint = model.get("endpointSettings", {})
+            if endpoint.get("baseUrl"):
+                embedding_model_config["api_base"] = endpoint["baseUrl"]
+            if endpoint.get("timeout"):
+                embedding_model_config["timeout"] = endpoint["timeout"]
+
+            embedding_models[model_id] = embedding_model_config
+
+    # Ensure we have defaults if no models found
+    if not llm_models:
+        llm_models["default"] = {
+            "provider": default_provider or "openai",
+            "model": "gpt-4",
+        }
+
+    if not embedding_models:
+        embedding_models["default"] = {
+            "provider": default_provider or "openai",
+            "model": "text-embedding-3-small",
+            "dimensions": 1536,
+        }
+
+    return {
+        "version": json_config.get("version", 1),
+        "default_provider": default_provider or "openai",
+        "providers": providers,
+        "llm_models": llm_models,
+        "embedding_models": embedding_models,
+    }
+
+
+def load_config(config_path: Path | str | None = None) -> LiteLLMConfig:
+    """Load LiteLLM configuration from JSON or YAML file.
+
+    Priority order:
+    1. Explicit config_path if provided
+    2. JSON config (UI format): ~/.ccw/config/litellm-api-config.json
+    3. YAML config: ~/.ccw/config/litellm-config.yaml
+    4. Default configuration
+
+    Args:
+        config_path: Path to configuration file (optional)
+
+    Returns:
+        Parsed and validated configuration
+
+    Raises:
+        FileNotFoundError: If config file not found and no default available
+        ValueError: If configuration is invalid
+    """
+    raw_config: dict[str, Any] | None = None
+    is_json_format = False
+
+    if config_path is not None:
+        config_path = Path(config_path)
+        if config_path.exists():
+            try:
+                with open(config_path, "r", encoding="utf-8") as f:
+                    if config_path.suffix == ".json":
+                        raw_config = json.load(f)
+                        is_json_format = True
+                    else:
+                        raw_config = yaml.safe_load(f)
+            except Exception as e:
+                raise ValueError(f"Failed to load configuration from {config_path}: {e}") from e
+
+    # Check JSON config first (UI format)
+    if raw_config is None and DEFAULT_JSON_CONFIG_PATH.exists():
+        try:
+            with open(DEFAULT_JSON_CONFIG_PATH, "r", encoding="utf-8") as f:
+                raw_config = json.load(f)
+                is_json_format = True
+        except Exception:
+            pass  # Fall through to YAML
+
+    # Check YAML config
+    if raw_config is None and DEFAULT_YAML_CONFIG_PATH.exists():
+        try:
+            with open(DEFAULT_YAML_CONFIG_PATH, "r", encoding="utf-8") as f:
+                raw_config = yaml.safe_load(f)
+        except Exception:
+            pass  # Fall through to default
+
+    # Use default configuration
+    if raw_config is None:
+        raw_config = _get_default_config()
+
+    # Convert JSON format to internal format if needed
+    if is_json_format:
+        raw_config = _convert_json_to_internal_format(raw_config)
+
+    # Substitute environment variables
+    config_data = _substitute_env_vars(raw_config)
+
+    # Validate and parse with Pydantic
+    try:
+        return LiteLLMConfig.model_validate(config_data)
+    except Exception as e:
+        raise ValueError(f"Invalid configuration: {e}") from e
+
+
+def get_config(config_path: Path | str | None = None, reload: bool = False) -> LiteLLMConfig:
+    """Get global configuration singleton.
+
+    Args:
+        config_path: Path to configuration file (default: ~/.ccw/config/litellm-config.yaml)
+        reload: Force reload configuration from disk
+
+    Returns:
+        Global configuration instance
+    """
+    global _config_instance
+
+    if _config_instance is None or reload:
+        _config_instance = load_config(config_path)
+
+    return _config_instance
+
+
+def reset_config() -> None:
+    """Reset global configuration singleton.
+
+    Useful for testing.
+    """
+    global _config_instance
+    _config_instance = None
--- a/ccw-litellm/src/ccw_litellm/config/models.py
+++ b/ccw-litellm/src/ccw_litellm/config/models.py
@@ -0,0 +1,130 @@
+"""Pydantic configuration models for LiteLLM integration."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class ProviderConfig(BaseModel):
+    """Provider API configuration.
+
+    Supports environment variable substitution in the format ${ENV_VAR}.
+    """
+
+    api_key: str | None = None
+    api_base: str | None = None
+
+    model_config = {"extra": "allow"}
+
+
+class LLMModelConfig(BaseModel):
+    """LLM model configuration."""
+
+    provider: str
+    model: str
+
+    model_config = {"extra": "allow"}
+
+
+class EmbeddingModelConfig(BaseModel):
+    """Embedding model configuration."""
+
+    provider: str  # "openai", "fastembed", "ollama", etc.
+    model: str
+    dimensions: int
+
+    model_config = {"extra": "allow"}
+
+
+class LiteLLMConfig(BaseModel):
+    """Root configuration for LiteLLM integration.
+
+    Example YAML:
+        version: 1
+        default_provider: openai
+        providers:
+          openai:
+            api_key: ${OPENAI_API_KEY}
+            api_base: https://api.openai.com/v1
+          anthropic:
+            api_key: ${ANTHROPIC_API_KEY}
+        llm_models:
+          default:
+            provider: openai
+            model: gpt-4
+          fast:
+            provider: openai
+            model: gpt-3.5-turbo
+        embedding_models:
+          default:
+            provider: openai
+            model: text-embedding-3-small
+            dimensions: 1536
+    """
+
+    version: int = 1
+    default_provider: str = "openai"
+    providers: dict[str, ProviderConfig] = Field(default_factory=dict)
+    llm_models: dict[str, LLMModelConfig] = Field(default_factory=dict)
+    embedding_models: dict[str, EmbeddingModelConfig] = Field(default_factory=dict)
+
+    model_config = {"extra": "allow"}
+
+    def get_llm_model(self, model: str = "default") -> LLMModelConfig:
+        """Get LLM model configuration by name.
+
+        Args:
+            model: Model name or "default"
+
+        Returns:
+            LLM model configuration
+
+        Raises:
+            ValueError: If model not found
+        """
+        if model not in self.llm_models:
+            raise ValueError(
+                f"LLM model '{model}' not found in configuration. "
+                f"Available models: {list(self.llm_models.keys())}"
+            )
+        return self.llm_models[model]
+
+    def get_embedding_model(self, model: str = "default") -> EmbeddingModelConfig:
+        """Get embedding model configuration by name.
+
+        Args:
+            model: Model name or "default"
+
+        Returns:
+            Embedding model configuration
+
+        Raises:
+            ValueError: If model not found
+        """
+        if model not in self.embedding_models:
+            raise ValueError(
+                f"Embedding model '{model}' not found in configuration. "
+                f"Available models: {list(self.embedding_models.keys())}"
+            )
+        return self.embedding_models[model]
+
+    def get_provider(self, provider: str) -> ProviderConfig:
+        """Get provider configuration by name.
+
+        Args:
+            provider: Provider name
+
+        Returns:
+            Provider configuration
+
+        Raises:
+            ValueError: If provider not found
+        """
+        if provider not in self.providers:
+            raise ValueError(
+                f"Provider '{provider}' not found in configuration. "
+                f"Available providers: {list(self.providers.keys())}"
+            )
+        return self.providers[provider]
--- a/ccw-litellm/src/ccw_litellm/interfaces/init.py
+++ b/ccw-litellm/src/ccw_litellm/interfaces/init.py
@@ -0,0 +1,14 @@
+"""Abstract interfaces for ccw-litellm."""
+
+from __future__ import annotations
+
+from .embedder import AbstractEmbedder
+from .llm import AbstractLLMClient, ChatMessage, LLMResponse
+
+__all__ = [
+    "AbstractEmbedder",
+    "AbstractLLMClient",
+    "ChatMessage",
+    "LLMResponse",
+]
+
--- a/ccw-litellm/src/ccw_litellm/interfaces/embedder.py
+++ b/ccw-litellm/src/ccw_litellm/interfaces/embedder.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from typing import Any, Sequence
+
+import numpy as np
+from numpy.typing import NDArray
+
+
+class AbstractEmbedder(ABC):
+    """Embedding interface compatible with fastembed-style embedders.
+
+    Implementers only need to provide the synchronous `embed` method; an
+    asynchronous `aembed` wrapper is provided for convenience.
+    """
+
+    @property
+    @abstractmethod
+    def dimensions(self) -> int:
+        """Embedding vector size."""
+
+    @abstractmethod
+    def embed(
+        self,
+        texts: str | Sequence[str],
+        *,
+        batch_size: int | None = None,
+        **kwargs: Any,
+    ) -> NDArray[np.floating]:
+        """Embed one or more texts.
+
+        Returns:
+            A numpy array of shape (n_texts, dimensions).
+        """
+
+    async def aembed(
+        self,
+        texts: str | Sequence[str],
+        *,
+        batch_size: int | None = None,
+        **kwargs: Any,
+    ) -> NDArray[np.floating]:
+        """Async wrapper around `embed` using a worker thread by default."""
+
+        return await asyncio.to_thread(
+            self.embed,
+            texts,
+            batch_size=batch_size,
+            **kwargs,
+        )
+
--- a/ccw-litellm/src/ccw_litellm/interfaces/llm.py
+++ b/ccw-litellm/src/ccw_litellm/interfaces/llm.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Literal, Sequence
+
+
+@dataclass(frozen=True, slots=True)
+class ChatMessage:
+    role: Literal["system", "user", "assistant", "tool"]
+    content: str
+
+
+@dataclass(frozen=True, slots=True)
+class LLMResponse:
+    content: str
+    raw: Any | None = None
+
+
+class AbstractLLMClient(ABC):
+    """LiteLLM-like client interface.
+
+    Implementers only need to provide synchronous methods; async wrappers are
+    provided via `asyncio.to_thread`.
+    """
+
+    @abstractmethod
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> LLMResponse:
+        """Chat completion for a sequence of messages."""
+
+    @abstractmethod
+    def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
+        """Text completion for a prompt."""
+
+    async def achat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> LLMResponse:
+        """Async wrapper around `chat` using a worker thread by default."""
+
+        return await asyncio.to_thread(self.chat, messages, **kwargs)
+
+    async def acomplete(self, prompt: str, **kwargs: Any) -> LLMResponse:
+        """Async wrapper around `complete` using a worker thread by default."""
+
+        return await asyncio.to_thread(self.complete, prompt, **kwargs)
+
--- a/ccw-litellm/tests/conftest.py
+++ b/ccw-litellm/tests/conftest.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+
+def pytest_configure() -> None:
+    project_root = Path(__file__).resolve().parents[1]
+    src_dir = project_root / "src"
+    sys.path.insert(0, str(src_dir))
+
--- a/ccw-litellm/tests/test_interfaces.py
+++ b/ccw-litellm/tests/test_interfaces.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+import asyncio
+from typing import Any, Sequence
+
+import numpy as np
+
+from ccw_litellm.interfaces import AbstractEmbedder, AbstractLLMClient, ChatMessage, LLMResponse
+
+
+class _DummyEmbedder(AbstractEmbedder):
+    @property
+    def dimensions(self) -> int:
+        return 3
+
+    def embed(
+        self,
+        texts: str | Sequence[str],
+        *,
+        batch_size: int | None = None,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        if isinstance(texts, str):
+            texts = [texts]
+        _ = batch_size
+        _ = kwargs
+        return np.zeros((len(texts), self.dimensions), dtype=np.float32)
+
+
+class _DummyLLM(AbstractLLMClient):
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> LLMResponse:
+        _ = kwargs
+        return LLMResponse(content="".join(m.content for m in messages))
+
+    def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
+        _ = kwargs
+        return LLMResponse(content=prompt)
+
+
+def test_embed_sync_shape_and_dtype() -> None:
+    emb = _DummyEmbedder()
+    out = emb.embed(["a", "b"])
+    assert out.shape == (2, 3)
+    assert out.dtype == np.float32
+
+
+def test_embed_async_wrapper() -> None:
+    emb = _DummyEmbedder()
+    out = asyncio.run(emb.aembed("x"))
+    assert out.shape == (1, 3)
+
+
+def test_llm_sync() -> None:
+    llm = _DummyLLM()
+    out = llm.chat([ChatMessage(role="user", content="hi")])
+    assert out == LLMResponse(content="hi")
+
+
+def test_llm_async_wrappers() -> None:
+    llm = _DummyLLM()
+    out1 = asyncio.run(llm.achat([ChatMessage(role="user", content="a")]))
+    out2 = asyncio.run(llm.acomplete("b"))
+    assert out1.content == "a"
+    assert out2.content == "b"
--- a/ccw/LITELLM_INTEGRATION.md
+++ b/ccw/LITELLM_INTEGRATION.md
@@ -0,0 +1,308 @@
+# LiteLLM Integration Guide
+
+## Overview
+
+CCW now supports custom LiteLLM endpoints with integrated context caching. You can configure multiple providers (OpenAI, Anthropic, Ollama, etc.) and create custom endpoints with file-based caching strategies.
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                      CLI Executor                           │
+│                                                             │
+│  ┌─────────────┐         ┌──────────────────────────────┐  │
+│  │   --model   │────────>│  Route Decision:             │  │
+│  │   flag      │         │  - gemini/qwen/codex → CLI   │  │
+│  └─────────────┘         │  - custom ID → LiteLLM       │  │
+│                          └──────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────┘
+                                    │
+                                    ▼
+┌─────────────────────────────────────────────────────────────┐
+│                  LiteLLM Executor                           │
+│                                                             │
+│  1. Load endpoint config (litellm-api-config.json)         │
+│  2. Extract @patterns from prompt                          │
+│  3. Pack files via context-cache                           │
+│  4. Call LiteLLM client with cached content + prompt       │
+│  5. Return result                                          │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Configuration
+
+### File Location
+
+Configuration is stored per-project:
+```
+<project>/.ccw/storage/config/litellm-api-config.json
+```
+
+### Configuration Structure
+
+```json
+{
+  "version": 1,
+  "providers": [
+    {
+      "id": "openai-1234567890",
+      "name": "My OpenAI",
+      "type": "openai",
+      "apiKey": "${OPENAI_API_KEY}",
+      "enabled": true,
+      "createdAt": "2025-01-01T00:00:00.000Z",
+      "updatedAt": "2025-01-01T00:00:00.000Z"
+    }
+  ],
+  "endpoints": [
+    {
+      "id": "my-gpt4o",
+      "name": "GPT-4o with Context Cache",
+      "providerId": "openai-1234567890",
+      "model": "gpt-4o",
+      "description": "GPT-4o with automatic file caching",
+      "cacheStrategy": {
+        "enabled": true,
+        "ttlMinutes": 60,
+        "maxSizeKB": 512,
+        "filePatterns": ["*.md", "*.ts", "*.js"]
+      },
+      "enabled": true,
+      "createdAt": "2025-01-01T00:00:00.000Z",
+      "updatedAt": "2025-01-01T00:00:00.000Z"
+    }
+  ],
+  "defaultEndpoint": "my-gpt4o",
+  "globalCacheSettings": {
+    "enabled": true,
+    "cacheDir": "~/.ccw/cache/context",
+    "maxTotalSizeMB": 100
+  }
+}
+```
+
+## Usage
+
+### Via CLI
+
+```bash
+# Use custom endpoint with --model flag
+ccw cli -p "Analyze authentication flow" --tool litellm --model my-gpt4o
+
+# With context patterns (automatically cached)
+ccw cli -p "@src/auth/**/*.ts Review security" --tool litellm --model my-gpt4o
+
+# Disable caching for specific call
+ccw cli -p "Quick question" --tool litellm --model my-gpt4o --no-cache
+```
+
+### Via Dashboard API
+
+#### Create Provider
+```bash
+curl -X POST http://localhost:3000/api/litellm-api/providers \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "My OpenAI",
+    "type": "openai",
+    "apiKey": "${OPENAI_API_KEY}",
+    "enabled": true
+  }'
+```
+
+#### Create Endpoint
+```bash
+curl -X POST http://localhost:3000/api/litellm-api/endpoints \
+  -H "Content-Type: application/json" \
+  -d '{
+    "id": "my-gpt4o",
+    "name": "GPT-4o with Cache",
+    "providerId": "openai-1234567890",
+    "model": "gpt-4o",
+    "cacheStrategy": {
+      "enabled": true,
+      "ttlMinutes": 60,
+      "maxSizeKB": 512,
+      "filePatterns": ["*.md", "*.ts"]
+    },
+    "enabled": true
+  }'
+```
+
+#### Test Provider Connection
+```bash
+curl -X POST http://localhost:3000/api/litellm-api/providers/openai-1234567890/test
+```
+
+## Context Caching
+
+### How It Works
+
+1. **Pattern Detection**: LiteLLM executor scans prompt for `@patterns`
+   ```
+   @src/**/*.ts
+   @CLAUDE.md
+   @../shared/**/*
+   ```
+
+2. **File Packing**: Files matching patterns are packed via `context-cache` tool
+   - Respects `max_file_size` limit (default: 1MB per file)
+   - Applies TTL from endpoint config
+   - Generates session ID for retrieval
+
+3. **Cache Integration**: Cached content is prepended to prompt
+   ```
+   <cached files>
+   ---
+   <original prompt>
+   ```
+
+4. **LLM Call**: Combined prompt sent to LiteLLM with provider credentials
+
+### Cache Strategy Configuration
+
+```typescript
+interface CacheStrategy {
+  enabled: boolean;           // Enable/disable caching for this endpoint
+  ttlMinutes: number;         // Cache lifetime (default: 60)
+  maxSizeKB: number;          // Max cache size (default: 512KB)
+  filePatterns: string[];     // Glob patterns to cache
+}
+```
+
+### Example: Security Audit with Cache
+
+```bash
+ccw cli -p "
+PURPOSE: OWASP Top 10 security audit of authentication module
+TASK: • Check SQL injection • Verify session management • Test XSS vectors
+CONTEXT: @src/auth/**/*.ts @src/middleware/auth.ts
+EXPECTED: Security report with severity levels and remediation steps
+" --tool litellm --model my-security-scanner --mode analysis
+```
+
+**What happens:**
+1. Executor detects `@src/auth/**/*.ts` and `@src/middleware/auth.ts`
+2. Packs matching files into context cache
+3. Cache entry valid for 60 minutes (per endpoint config)
+4. Subsequent calls reuse cached files (no re-packing)
+5. LiteLLM receives full context without manual file specification
+
+## Environment Variables
+
+### Provider API Keys
+
+LiteLLM uses standard environment variable names:
+
+| Provider   | Env Var Name          |
+|------------|-----------------------|
+| OpenAI     | `OPENAI_API_KEY`      |
+| Anthropic  | `ANTHROPIC_API_KEY`   |
+| Google     | `GOOGLE_API_KEY`      |
+| Azure      | `AZURE_API_KEY`       |
+| Mistral    | `MISTRAL_API_KEY`     |
+| DeepSeek   | `DEEPSEEK_API_KEY`    |
+
+### Configuration Syntax
+
+Use `${ENV_VAR}` syntax in config:
+```json
+{
+  "apiKey": "${OPENAI_API_KEY}"
+}
+```
+
+The executor resolves these at runtime via `resolveEnvVar()`.
+
+## API Reference
+
+### Config Manager (`litellm-api-config-manager.ts`)
+
+#### Provider Management
+```typescript
+getAllProviders(baseDir: string): ProviderCredential[]
+getProvider(baseDir: string, providerId: string): ProviderCredential | null
+getProviderWithResolvedEnvVars(baseDir: string, providerId: string): ProviderCredential & { resolvedApiKey: string } | null
+addProvider(baseDir: string, providerData): ProviderCredential
+updateProvider(baseDir: string, providerId: string, updates): ProviderCredential
+deleteProvider(baseDir: string, providerId: string): boolean
+```
+
+#### Endpoint Management
+```typescript
+getAllEndpoints(baseDir: string): CustomEndpoint[]
+getEndpoint(baseDir: string, endpointId: string): CustomEndpoint | null
+findEndpointById(baseDir: string, endpointId: string): CustomEndpoint | null
+addEndpoint(baseDir: string, endpointData): CustomEndpoint
+updateEndpoint(baseDir: string, endpointId: string, updates): CustomEndpoint
+deleteEndpoint(baseDir: string, endpointId: string): boolean
+```
+
+### Executor (`litellm-executor.ts`)
+
+```typescript
+interface LiteLLMExecutionOptions {
+  prompt: string;
+  endpointId: string;
+  baseDir: string;
+  cwd?: string;
+  includeDirs?: string[];
+  enableCache?: boolean;
+  onOutput?: (data: { type: string; data: string }) => void;
+}
+
+interface LiteLLMExecutionResult {
+  success: boolean;
+  output: string;
+  model: string;
+  provider: string;
+  cacheUsed: boolean;
+  cachedFiles?: string[];
+  error?: string;
+}
+
+executeLiteLLMEndpoint(options: LiteLLMExecutionOptions): Promise<LiteLLMExecutionResult>
+extractPatterns(prompt: string): string[]
+```
+
+## Dashboard Integration
+
+The dashboard provides UI for managing LiteLLM configuration:
+
+- **Providers**: Add/edit/delete provider credentials
+- **Endpoints**: Configure custom endpoints with cache strategies
+- **Cache Stats**: View cache usage and clear entries
+- **Test Connections**: Verify provider API access
+
+Routes are handled by `litellm-api-routes.ts`.
+
+## Limitations
+
+1. **Python Dependency**: Requires `ccw-litellm` Python package installed
+2. **Model Support**: Limited to models supported by LiteLLM library
+3. **Cache Scope**: Context cache is in-memory (not persisted across restarts)
+4. **Pattern Syntax**: Only supports glob-style `@patterns`, not regex
+
+## Troubleshooting
+
+### Error: "Endpoint not found"
+- Verify endpoint ID matches config file
+- Check `litellm-api-config.json` exists in `.ccw/storage/config/`
+
+### Error: "API key not configured"
+- Ensure environment variable is set
+- Verify `${ENV_VAR}` syntax in config
+- Test with `echo $OPENAI_API_KEY`
+
+### Error: "Failed to spawn Python process"
+- Install ccw-litellm: `pip install ccw-litellm`
+- Verify Python accessible: `python --version`
+
+### Cache Not Applied
+- Check endpoint has `cacheStrategy.enabled: true`
+- Verify prompt contains `@patterns`
+- Check cache TTL hasn't expired
+
+## Examples
+
+See `examples/litellm-config.json` for complete configuration template.
--- a/ccw/examples/litellm-usage.ts
+++ b/ccw/examples/litellm-usage.ts
@@ -0,0 +1,77 @@
+/**
+ * LiteLLM Usage Examples
+ * Demonstrates how to use the LiteLLM TypeScript client
+ */
+
+import { getLiteLLMClient, getLiteLLMStatus } from '../src/tools/litellm-client';
+
+async function main() {
+  console.log('=== LiteLLM TypeScript Bridge Examples ===\n');
+
+  // Example 1: Check availability
+  console.log('1. Checking LiteLLM availability...');
+  const status = await getLiteLLMStatus();
+  console.log('   Status:', status);
+  console.log('');
+
+  if (!status.available) {
+    console.log('❌ LiteLLM is not available. Please install ccw-litellm:');
+    console.log('   pip install ccw-litellm');
+    return;
+  }
+
+  const client = getLiteLLMClient();
+
+  // Example 2: Get configuration
+  console.log('2. Getting configuration...');
+  try {
+    const config = await client.getConfig();
+    console.log('   Config:', config);
+  } catch (error) {
+    console.log('   Error:', error.message);
+  }
+  console.log('');
+
+  // Example 3: Generate embeddings
+  console.log('3. Generating embeddings...');
+  try {
+    const texts = ['Hello world', 'Machine learning is amazing'];
+    const embedResult = await client.embed(texts, 'default');
+    console.log('   Dimensions:', embedResult.dimensions);
+    console.log('   Vectors count:', embedResult.vectors.length);
+    console.log('   First vector (first 5 dims):', embedResult.vectors[0]?.slice(0, 5));
+  } catch (error) {
+    console.log('   Error:', error.message);
+  }
+  console.log('');
+
+  // Example 4: Single message chat
+  console.log('4. Single message chat...');
+  try {
+    const response = await client.chat('What is 2+2?', 'default');
+    console.log('   Response:', response);
+  } catch (error) {
+    console.log('   Error:', error.message);
+  }
+  console.log('');
+
+  // Example 5: Multi-turn chat
+  console.log('5. Multi-turn chat...');
+  try {
+    const chatResponse = await client.chatMessages([
+      { role: 'system', content: 'You are a helpful math tutor.' },
+      { role: 'user', content: 'What is the Pythagorean theorem?' }
+    ], 'default');
+    console.log('   Content:', chatResponse.content);
+    console.log('   Model:', chatResponse.model);
+    console.log('   Usage:', chatResponse.usage);
+  } catch (error) {
+    console.log('   Error:', error.message);
+  }
+  console.log('');
+
+  console.log('=== Examples completed ===');
+}
+
+// Run examples
+main().catch(console.error);
--- a/ccw/package-lock.json
+++ b/ccw/package-lock.json
--- a/ccw/src/cli.ts
+++ b/ccw/src/cli.ts
@@ -28,20 +28,32 @@ interface PackageInfo {

 /**
 * Load package.json with error handling
+ * Tries root package.json first (../../package.json from dist),
+ * then falls back to ccw package.json (../package.json from dist)
 * @returns Package info with version
 */
 function loadPackageInfo(): PackageInfo {
-  const pkgPath = join(__dirname, '../package.json');
+  // First try root package.json (parent of ccw directory)
+  const rootPkgPath = join(__dirname, '../../package.json');
+  // Fallback to ccw package.json
+  const ccwPkgPath = join(__dirname, '../package.json');

  try {
-    if (!existsSync(pkgPath)) {
-      console.error('Fatal Error: package.json not found.');
-      console.error(`Expected location: ${pkgPath}`);
-      process.exit(1);
+    // Try root package.json first
+    if (existsSync(rootPkgPath)) {
+      const content = readFileSync(rootPkgPath, 'utf8');
+      return JSON.parse(content) as PackageInfo;
    }

-    const content = readFileSync(pkgPath, 'utf8');
-    return JSON.parse(content) as PackageInfo;
+    // Fallback to ccw package.json
+    if (existsSync(ccwPkgPath)) {
+      const content = readFileSync(ccwPkgPath, 'utf8');
+      return JSON.parse(content) as PackageInfo;
+    }
+
+    console.error('Fatal Error: package.json not found.');
+    console.error(`Tried locations:\n  - ${rootPkgPath}\n  - ${ccwPkgPath}`);
+    process.exit(1);
  } catch (error) {
    if (error instanceof SyntaxError) {
      console.error('Fatal Error: package.json contains invalid JSON.');
@@ -169,12 +181,14 @@ export function run(argv: string[]): void {
    .option('--resume [id]', 'Resume previous session (empty=last, or execution ID, or comma-separated IDs for merge)')
    .option('--id <id>', 'Custom execution ID (e.g., IMPL-001-step1)')
    .option('--no-native', 'Force prompt concatenation instead of native resume')
+    .option('--cache [items]', 'Cache: comma-separated @patterns and text content')
+    .option('--inject-mode <mode>', 'Inject mode: none, full, progressive (default: codex=full, others=none)')
    // Storage options
    .option('--project <path>', 'Project path for storage operations')
    .option('--force', 'Confirm destructive operations')
    .option('--cli-history', 'Target CLI history storage')
    .option('--memory', 'Target memory storage')
-    .option('--cache', 'Target cache storage')
+    .option('--storage-cache', 'Target cache storage')
    .option('--config', 'Target config storage')
    .action((subcommand, args, options) => cliCommand(subcommand, args, options));

--- a/ccw/src/commands/cli.ts
+++ b/ccw/src/commands/cli.ts
@@ -78,6 +78,14 @@ interface CliExecOptions {
  resume?: string | boolean; // true = last, string = execution ID, comma-separated for merge
  id?: string; // Custom execution ID (e.g., IMPL-001-step1)
  noNative?: boolean; // Force prompt concatenation instead of native resume
+  cache?: string | boolean; // Cache: true = auto from CONTEXT, string = comma-separated patterns/content
+  injectMode?: 'none' | 'full' | 'progressive'; // Inject mode for cached content
+}
+
+/** Cache configuration parsed from --cache */
+interface CacheConfig {
+  patterns?: string[];       // @patterns to pack (items starting with @)
+  content?: string;          // Additional text content (items not starting with @)
 }

 interface HistoryOptions {
@@ -91,7 +99,7 @@ interface StorageOptions {
  project?: string;
  cliHistory?: boolean;
  memory?: boolean;
-  cache?: boolean;
+  storageCache?: boolean;
  config?: boolean;
  force?: boolean;
 }
@@ -173,15 +181,15 @@ async function showStorageInfo(): Promise<void> {
 * Clean storage
 */
 async function cleanStorage(options: StorageOptions): Promise<void> {
-  const { all, project, force, cliHistory, memory, cache, config } = options;
+  const { all, project, force, cliHistory, memory, storageCache, config } = options;

  // Determine what to clean
  const cleanTypes = {
-    cliHistory: cliHistory || (!cliHistory && !memory && !cache && !config),
-    memory: memory || (!cliHistory && !memory && !cache && !config),
-    cache: cache || (!cliHistory && !memory && !cache && !config),
+    cliHistory: cliHistory || (!cliHistory && !memory && !storageCache && !config),
+    memory: memory || (!cliHistory && !memory && !storageCache && !config),
+    cache: storageCache || (!cliHistory && !memory && !storageCache && !config),
    config: config || false, // Config requires explicit flag
-    all: !cliHistory && !memory && !cache && !config
+    all: !cliHistory && !memory && !storageCache && !config
  };

  if (project) {
@@ -383,7 +391,7 @@ async function statusAction(): Promise<void> {
 * @param {Object} options - CLI options
 */
 async function execAction(positionalPrompt: string | undefined, options: CliExecOptions): Promise<void> {
-  const { prompt: optionPrompt, file, tool = 'gemini', mode = 'analysis', model, cd, includeDirs, timeout, noStream, resume, id, noNative } = options;
+  const { prompt: optionPrompt, file, tool = 'gemini', mode = 'analysis', model, cd, includeDirs, timeout, noStream, resume, id, noNative, cache, injectMode } = options;

  // Priority: 1. --file, 2. --prompt/-p option, 3. positional argument
  let finalPrompt: string | undefined;
@@ -421,6 +429,128 @@ async function execAction(positionalPrompt: string | undefined, options: CliExec

  const prompt_to_use = finalPrompt || '';

+  // Handle cache option: pack @patterns and/or content
+  let cacheSessionId: string | undefined;
+  let actualPrompt = prompt_to_use;
+
+  if (cache) {
+    const { handler: contextCacheHandler } = await import('../tools/context-cache.js');
+
+    // Parse cache config from comma-separated string
+    // Items starting with @ are patterns, others are text content
+    let cacheConfig: CacheConfig = {};
+
+    if (cache === true) {
+      // --cache without value: auto-extract from CONTEXT field
+      const contextMatch = prompt_to_use.match(/CONTEXT:\s*([^\n]+)/i);
+      if (contextMatch) {
+        const contextLine = contextMatch[1];
+        const patternMatches = contextLine.matchAll(/@[^\s|]+/g);
+        cacheConfig.patterns = Array.from(patternMatches).map(m => m[0]);
+      }
+    } else if (typeof cache === 'string') {
+      // Parse comma-separated items: @patterns and text content
+      const items = cache.split(',').map(s => s.trim()).filter(Boolean);
+      const patterns: string[] = [];
+      const contentParts: string[] = [];
+
+      for (const item of items) {
+        if (item.startsWith('@')) {
+          patterns.push(item);
+        } else {
+          contentParts.push(item);
+        }
+      }
+
+      if (patterns.length > 0) {
+        cacheConfig.patterns = patterns;
+      }
+      if (contentParts.length > 0) {
+        cacheConfig.content = contentParts.join('\n');
+      }
+    }
+
+    // Also extract patterns from CONTEXT if not provided
+    if ((!cacheConfig.patterns || cacheConfig.patterns.length === 0) && prompt_to_use) {
+      const contextMatch = prompt_to_use.match(/CONTEXT:\s*([^\n]+)/i);
+      if (contextMatch) {
+        const contextLine = contextMatch[1];
+        const patternMatches = contextLine.matchAll(/@[^\s|]+/g);
+        cacheConfig.patterns = Array.from(patternMatches).map(m => m[0]);
+      }
+    }
+
+    // Pack if we have patterns or content
+    if ((cacheConfig.patterns && cacheConfig.patterns.length > 0) || cacheConfig.content) {
+      const patternCount = cacheConfig.patterns?.length || 0;
+      const hasContent = !!cacheConfig.content;
+      console.log(chalk.gray(`  Caching: ${patternCount} pattern(s)${hasContent ? ' + text content' : ''}...`));
+
+      const cacheResult = await contextCacheHandler({
+        operation: 'pack',
+        patterns: cacheConfig.patterns,
+        content: cacheConfig.content,
+        cwd: cd || process.cwd(),
+        include_dirs: includeDirs ? includeDirs.split(',') : undefined,
+      });
+
+      if (cacheResult.success && cacheResult.result) {
+        const packResult = cacheResult.result as { session_id: string; files_packed: number; total_bytes: number };
+        cacheSessionId = packResult.session_id;
+        console.log(chalk.gray(`  Cached: ${packResult.files_packed} files, ${packResult.total_bytes} bytes`));
+        console.log(chalk.gray(`  Session: ${cacheSessionId}`));
+
+        // Determine inject mode:
+        // --inject-mode explicitly set > tool default (codex=full, others=none)
+        const effectiveInjectMode = injectMode ?? (tool === 'codex' ? 'full' : 'none');
+
+        if (effectiveInjectMode !== 'none' && cacheSessionId) {
+          if (effectiveInjectMode === 'full') {
+            // Read full cache content
+            const readResult = await contextCacheHandler({
+              operation: 'read',
+              session_id: cacheSessionId,
+              offset: 0,
+              limit: 1024 * 1024, // 1MB max
+            });
+
+            if (readResult.success && readResult.result) {
+              const { content: cachedContent, total_bytes } = readResult.result as { content: string; total_bytes: number };
+              console.log(chalk.gray(`  Injecting ${total_bytes} bytes (full mode)...`));
+              actualPrompt = `=== CACHED CONTEXT (${packResult.files_packed} files) ===\n${cachedContent}\n\n=== USER PROMPT ===\n${prompt_to_use}`;
+            }
+          } else if (effectiveInjectMode === 'progressive') {
+            // Progressive mode: read first page only (64KB default)
+            const pageLimit = 65536;
+            const readResult = await contextCacheHandler({
+              operation: 'read',
+              session_id: cacheSessionId,
+              offset: 0,
+              limit: pageLimit,
+            });
+
+            if (readResult.success && readResult.result) {
+              const { content: cachedContent, total_bytes, has_more, next_offset } = readResult.result as {
+                content: string; total_bytes: number; has_more: boolean; next_offset: number | null
+              };
+              console.log(chalk.gray(`  Injecting ${cachedContent.length}/${total_bytes} bytes (progressive mode)...`));
+
+              const moreInfo = has_more
+                ? `\n[... ${total_bytes - cachedContent.length} more bytes available via: context_cache(operation="read", session_id="${cacheSessionId}", offset=${next_offset}) ...]`
+                : '';
+
+              actualPrompt = `=== CACHED CONTEXT (${packResult.files_packed} files, progressive) ===\n${cachedContent}${moreInfo}\n\n=== USER PROMPT ===\n${prompt_to_use}`;
+            }
+          }
+        }
+
+        console.log();
+      } else {
+        console.log(chalk.yellow(`  Cache warning: ${cacheResult.error}`));
+      }
+    }
+  }
+
  // Parse resume IDs for merge scenario
  const resumeIds = resume && typeof resume === 'string' ? resume.split(',').map(s => s.trim()).filter(Boolean) : [];
  const isMerge = resumeIds.length > 1;
@@ -462,7 +592,7 @@ async function execAction(positionalPrompt: string | undefined, options: CliExec
  try {
    const result = await cliExecutorTool.execute({
      tool,
-      prompt: prompt_to_use,
+      prompt: actualPrompt,
      mode,
      model,
      cd,
@@ -725,14 +855,28 @@ export async function cliCommand(
        console.log(chalk.gray('    --model <model>     Model override'));
        console.log(chalk.gray('    --cd <path>         Working directory'));
        console.log(chalk.gray('    --includeDirs <dirs>  Additional directories'));
-        console.log(chalk.gray('    --timeout <ms>      Timeout (default: 300000)'));
+        console.log(chalk.gray('    --timeout <ms>      Timeout (default: 0=disabled)'));
        console.log(chalk.gray('    --resume [id]       Resume previous session'));
+        console.log(chalk.gray('    --cache <items>     Cache: comma-separated @patterns and text'));
+        console.log(chalk.gray('    --inject-mode <m>   Inject mode: none, full, progressive'));
+        console.log();
+        console.log('  Cache format:');
+        console.log(chalk.gray('    --cache "@src/**/*.ts,@CLAUDE.md"     # @patterns to pack'));
+        console.log(chalk.gray('    --cache "@src/**/*,extra context"     # patterns + text content'));
+        console.log(chalk.gray('    --cache                               # auto from CONTEXT field'));
+        console.log();
+        console.log('  Inject modes:');
+        console.log(chalk.gray('    none:        cache only, no injection (default for gemini/qwen)'));
+        console.log(chalk.gray('    full:        inject all cached content (default for codex)'));
+        console.log(chalk.gray('    progressive: inject first 64KB with MCP continuation hint'));
        console.log();
        console.log('  Examples:');
        console.log(chalk.gray('    ccw cli -p "Analyze auth module" --tool gemini'));
        console.log(chalk.gray('    ccw cli -f prompt.txt --tool codex --mode write'));
        console.log(chalk.gray('    ccw cli -p "$(cat template.md)" --tool gemini'));
        console.log(chalk.gray('    ccw cli --resume --tool gemini'));
+        console.log(chalk.gray('    ccw cli -p "..." --cache "@src/**/*.ts" --tool codex'));
+        console.log(chalk.gray('    ccw cli -p "..." --cache "@src/**/*" --inject-mode progressive --tool gemini'));
        console.log();
      }
    }
--- a/ccw/src/commands/hook.ts
+++ b/ccw/src/commands/hook.ts
@@ -6,7 +6,7 @@
 import chalk from 'chalk';
 import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
 import { join, dirname } from 'path';
-import { tmpdir } from 'os';
+import { homedir } from 'os';

 interface HookOptions {
  stdin?: boolean;
@@ -53,9 +53,10 @@ async function readStdin(): Promise<string> {

 /**
 * Get session state file path
+ * Uses ~/.claude/.ccw-sessions/ for reliable persistence across sessions
 */
 function getSessionStateFile(sessionId: string): string {
-  const stateDir = join(tmpdir(), '.ccw-sessions');
+  const stateDir = join(homedir(), '.claude', '.ccw-sessions');
  if (!existsSync(stateDir)) {
    mkdirSync(stateDir, { recursive: true });
  }
--- a/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak
+++ b/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak
@@ -0,0 +1,441 @@
+/**
+ * LiteLLM API Config Manager
+ * Manages provider credentials, endpoint configurations, and model discovery
+ */
+
+import { join } from 'path';
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
+import { homedir } from 'os';
+
+// ===========================
+// Type Definitions
+// ===========================
+
+export type ProviderType =
+  | 'openai'
+  | 'anthropic'
+  | 'google'
+  | 'cohere'
+  | 'azure'
+  | 'bedrock'
+  | 'vertexai'
+  | 'huggingface'
+  | 'ollama'
+  | 'custom';
+
+export interface ProviderCredential {
+  id: string;
+  name: string;
+  type: ProviderType;
+  apiKey?: string;
+  baseUrl?: string;
+  apiVersion?: string;
+  region?: string;
+  projectId?: string;
+  organizationId?: string;
+  enabled: boolean;
+  metadata?: Record<string, any>;
+  createdAt: string;
+  updatedAt: string;
+}
+
+export interface EndpointConfig {
+  id: string;
+  name: string;
+  providerId: string;
+  model: string;
+  alias?: string;
+  temperature?: number;
+  maxTokens?: number;
+  topP?: number;
+  enabled: boolean;
+  metadata?: Record<string, any>;
+  createdAt: string;
+  updatedAt: string;
+}
+
+export interface ModelInfo {
+  id: string;
+  name: string;
+  provider: ProviderType;
+  contextWindow: number;
+  supportsFunctions: boolean;
+  supportsStreaming: boolean;
+  inputCostPer1k?: number;
+  outputCostPer1k?: number;
+}
+
+export interface LiteLLMApiConfig {
+  version: string;
+  providers: ProviderCredential[];
+  endpoints: EndpointConfig[];
+}
+
+// ===========================
+// Model Definitions
+// ===========================
+
+export const PROVIDER_MODELS: Record<ProviderType, ModelInfo[]> = {
+  openai: [
+    {
+      id: 'gpt-4-turbo',
+      name: 'GPT-4 Turbo',
+      provider: 'openai',
+      contextWindow: 128000,
+      supportsFunctions: true,
+      supportsStreaming: true,
+      inputCostPer1k: 0.01,
+      outputCostPer1k: 0.03,
+    },
+    {
+      id: 'gpt-4',
+      name: 'GPT-4',
+      provider: 'openai',
+      contextWindow: 8192,
+      supportsFunctions: true,
+      supportsStreaming: true,
+      inputCostPer1k: 0.03,
+      outputCostPer1k: 0.06,
+    },
+    {
+      id: 'gpt-3.5-turbo',
+      name: 'GPT-3.5 Turbo',
+      provider: 'openai',
+      contextWindow: 16385,
+      supportsFunctions: true,
+      supportsStreaming: true,
+      inputCostPer1k: 0.0005,
+      outputCostPer1k: 0.0015,
+    },
+  ],
+  anthropic: [
+    {
+      id: 'claude-3-opus-20240229',
+      name: 'Claude 3 Opus',
+      provider: 'anthropic',
+      contextWindow: 200000,
+      supportsFunctions: true,
+      supportsStreaming: true,
+      inputCostPer1k: 0.015,
+      outputCostPer1k: 0.075,
+    },
+    {
+      id: 'claude-3-sonnet-20240229',
+      name: 'Claude 3 Sonnet',
+      provider: 'anthropic',
+      contextWindow: 200000,
+      supportsFunctions: true,
+      supportsStreaming: true,
+      inputCostPer1k: 0.003,
+      outputCostPer1k: 0.015,
+    },
+    {
+      id: 'claude-3-haiku-20240307',
+      name: 'Claude 3 Haiku',
+      provider: 'anthropic',
+      contextWindow: 200000,
+      supportsFunctions: true,
+      supportsStreaming: true,
+      inputCostPer1k: 0.00025,
+      outputCostPer1k: 0.00125,
+    },
+  ],
+  google: [
+    {
+      id: 'gemini-pro',
+      name: 'Gemini Pro',
+      provider: 'google',
+      contextWindow: 32768,
+      supportsFunctions: true,
+      supportsStreaming: true,
+    },
+    {
+      id: 'gemini-pro-vision',
+      name: 'Gemini Pro Vision',
+      provider: 'google',
+      contextWindow: 16384,
+      supportsFunctions: false,
+      supportsStreaming: true,
+    },
+  ],
+  cohere: [
+    {
+      id: 'command',
+      name: 'Command',
+      provider: 'cohere',
+      contextWindow: 4096,
+      supportsFunctions: false,
+      supportsStreaming: true,
+    },
+    {
+      id: 'command-light',
+      name: 'Command Light',
+      provider: 'cohere',
+      contextWindow: 4096,
+      supportsFunctions: false,
+      supportsStreaming: true,
+    },
+  ],
+  azure: [],
+  bedrock: [],
+  vertexai: [],
+  huggingface: [],
+  ollama: [],
+  custom: [],
+};
+
+// ===========================
+// Config File Management
+// ===========================
+
+const CONFIG_DIR = join(homedir(), '.claude', 'litellm');
+const CONFIG_FILE = join(CONFIG_DIR, 'config.json');
+
+function ensureConfigDir(): void {
+  if (!existsSync(CONFIG_DIR)) {
+    mkdirSync(CONFIG_DIR, { recursive: true });
+  }
+}
+
+function loadConfig(): LiteLLMApiConfig {
+  ensureConfigDir();
+
+  if (!existsSync(CONFIG_FILE)) {
+    const defaultConfig: LiteLLMApiConfig = {
+      version: '1.0.0',
+      providers: [],
+      endpoints: [],
+    };
+    saveConfig(defaultConfig);
+    return defaultConfig;
+  }
+
+  try {
+    const content = readFileSync(CONFIG_FILE, 'utf-8');
+    return JSON.parse(content);
+  } catch (err) {
+    throw new Error(`Failed to load config: ${(err as Error).message}`);
+  }
+}
+
+function saveConfig(config: LiteLLMApiConfig): void {
+  ensureConfigDir();
+
+  try {
+    writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2), 'utf-8');
+  } catch (err) {
+    throw new Error(`Failed to save config: ${(err as Error).message}`);
+  }
+}
+
+// ===========================
+// Provider Management
+// ===========================
+
+export function getAllProviders(): ProviderCredential[] {
+  const config = loadConfig();
+  return config.providers;
+}
+
+export function getProvider(id: string): ProviderCredential | null {
+  const config = loadConfig();
+  return config.providers.find((p) => p.id === id) || null;
+}
+
+export function createProvider(
+  data: Omit<ProviderCredential, 'id' | 'createdAt' | 'updatedAt'>
+): ProviderCredential {
+  const config = loadConfig();
+
+  const now = new Date().toISOString();
+  const provider: ProviderCredential = {
+    ...data,
+    id: `provider-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
+    createdAt: now,
+    updatedAt: now,
+  };
+
+  config.providers.push(provider);
+  saveConfig(config);
+
+  return provider;
+}
+
+export function updateProvider(
+  id: string,
+  updates: Partial<ProviderCredential>
+): ProviderCredential | null {
+  const config = loadConfig();
+
+  const index = config.providers.findIndex((p) => p.id === id);
+  if (index === -1) {
+    return null;
+  }
+
+  const updated: ProviderCredential = {
+    ...config.providers[index],
+    ...updates,
+    id,
+    updatedAt: new Date().toISOString(),
+  };
+
+  config.providers[index] = updated;
+  saveConfig(config);
+
+  return updated;
+}
+
+export function deleteProvider(id: string): { success: boolean } {
+  const config = loadConfig();
+
+  const index = config.providers.findIndex((p) => p.id === id);
+  if (index === -1) {
+    return { success: false };
+  }
+
+  config.providers.splice(index, 1);
+
+  // Also delete endpoints using this provider
+  config.endpoints = config.endpoints.filter((e) => e.providerId !== id);
+
+  saveConfig(config);
+
+  return { success: true };
+}
+
+export async function testProviderConnection(
+  providerId: string
+): Promise<{ success: boolean; error?: string }> {
+  const provider = getProvider(providerId);
+
+  if (!provider) {
+    return { success: false, error: 'Provider not found' };
+  }
+
+  if (!provider.enabled) {
+    return { success: false, error: 'Provider is disabled' };
+  }
+
+  // Basic validation
+  if (!provider.apiKey && provider.type !== 'ollama' && provider.type !== 'custom') {
+    return { success: false, error: 'API key is required for this provider type' };
+  }
+
+  // TODO: Implement actual provider connection testing using litellm-client
+  // For now, just validate the configuration
+  return { success: true };
+}
+
+// ===========================
+// Endpoint Management
+// ===========================
+
+export function getAllEndpoints(): EndpointConfig[] {
+  const config = loadConfig();
+  return config.endpoints;
+}
+
+export function getEndpoint(id: string): EndpointConfig | null {
+  const config = loadConfig();
+  return config.endpoints.find((e) => e.id === id) || null;
+}
+
+export function createEndpoint(
+  data: Omit<EndpointConfig, 'id' | 'createdAt' | 'updatedAt'>
+): EndpointConfig {
+  const config = loadConfig();
+
+  // Validate provider exists
+  const provider = config.providers.find((p) => p.id === data.providerId);
+  if (!provider) {
+    throw new Error('Provider not found');
+  }
+
+  const now = new Date().toISOString();
+  const endpoint: EndpointConfig = {
+    ...data,
+    id: `endpoint-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
+    createdAt: now,
+    updatedAt: now,
+  };
+
+  config.endpoints.push(endpoint);
+  saveConfig(config);
+
+  return endpoint;
+}
+
+export function updateEndpoint(
+  id: string,
+  updates: Partial<EndpointConfig>
+): EndpointConfig | null {
+  const config = loadConfig();
+
+  const index = config.endpoints.findIndex((e) => e.id === id);
+  if (index === -1) {
+    return null;
+  }
+
+  // Validate provider if being updated
+  if (updates.providerId) {
+    const provider = config.providers.find((p) => p.id === updates.providerId);
+    if (!provider) {
+      throw new Error('Provider not found');
+    }
+  }
+
+  const updated: EndpointConfig = {
+    ...config.endpoints[index],
+    ...updates,
+    id,
+    updatedAt: new Date().toISOString(),
+  };
+
+  config.endpoints[index] = updated;
+  saveConfig(config);
+
+  return updated;
+}
+
+export function deleteEndpoint(id: string): { success: boolean } {
+  const config = loadConfig();
+
+  const index = config.endpoints.findIndex((e) => e.id === id);
+  if (index === -1) {
+    return { success: false };
+  }
+
+  config.endpoints.splice(index, 1);
+  saveConfig(config);
+
+  return { success: true };
+}
+
+// ===========================
+// Model Discovery
+// ===========================
+
+export function getModelsForProviderType(providerType: ProviderType): ModelInfo[] | null {
+  return PROVIDER_MODELS[providerType] || null;
+}
+
+export function getAllModels(): Record<ProviderType, ModelInfo[]> {
+  return PROVIDER_MODELS;
+}
+
+// ===========================
+// Config Access
+// ===========================
+
+export function getFullConfig(): LiteLLMApiConfig {
+  return loadConfig();
+}
+
+export function resetConfig(): void {
+  const defaultConfig: LiteLLMApiConfig = {
+    version: '1.0.0',
+    providers: [],
+    endpoints: [],
+  };
+  saveConfig(defaultConfig);
+}
--- a/ccw/src/config/litellm-api-config-manager.ts
+++ b/ccw/src/config/litellm-api-config-manager.ts
--- a/ccw/src/config/provider-models.ts
+++ b/ccw/src/config/provider-models.ts
@@ -0,0 +1,222 @@
+/**
+ * Provider Model Presets
+ *
+ * Predefined model information for each supported LLM provider.
+ * Used for UI dropdowns and validation.
+ */
+
+import type { ProviderType } from '../types/litellm-api-config.js';
+
+/**
+ * Model information metadata
+ */
+export interface ModelInfo {
+  /** Model identifier (used in API calls) */
+  id: string;
+
+  /** Human-readable display name */
+  name: string;
+
+  /** Context window size in tokens */
+  contextWindow: number;
+
+  /** Whether this model supports prompt caching */
+  supportsCaching: boolean;
+}
+
+/**
+ * Embedding model information metadata
+ */
+export interface EmbeddingModelInfo {
+  /** Model identifier (used in API calls) */
+  id: string;
+
+  /** Human-readable display name */
+  name: string;
+
+  /** Embedding dimensions */
+  dimensions: number;
+
+  /** Maximum input tokens */
+  maxTokens: number;
+
+  /** Provider identifier */
+  provider: string;
+}
+
+
+/**
+ * Predefined models for each API format
+ * Used for UI selection and validation
+ * Note: Most providers use OpenAI-compatible format
+ */
+export const PROVIDER_MODELS: Record<ProviderType, ModelInfo[]> = {
+  // OpenAI-compatible format (used by OpenAI, DeepSeek, Ollama, etc.)
+  openai: [
+    {
+      id: 'gpt-4o',
+      name: 'GPT-4o',
+      contextWindow: 128000,
+      supportsCaching: true
+    },
+    {
+      id: 'gpt-4o-mini',
+      name: 'GPT-4o Mini',
+      contextWindow: 128000,
+      supportsCaching: true
+    },
+    {
+      id: 'o1',
+      name: 'O1',
+      contextWindow: 200000,
+      supportsCaching: true
+    },
+    {
+      id: 'deepseek-chat',
+      name: 'DeepSeek Chat',
+      contextWindow: 64000,
+      supportsCaching: false
+    },
+    {
+      id: 'deepseek-coder',
+      name: 'DeepSeek Coder',
+      contextWindow: 64000,
+      supportsCaching: false
+    },
+    {
+      id: 'llama3.2',
+      name: 'Llama 3.2',
+      contextWindow: 128000,
+      supportsCaching: false
+    },
+    {
+      id: 'qwen2.5-coder',
+      name: 'Qwen 2.5 Coder',
+      contextWindow: 32000,
+      supportsCaching: false
+    }
+  ],
+
+  // Anthropic format
+  anthropic: [
+    {
+      id: 'claude-sonnet-4-20250514',
+      name: 'Claude Sonnet 4',
+      contextWindow: 200000,
+      supportsCaching: true
+    },
+    {
+      id: 'claude-3-5-sonnet-20241022',
+      name: 'Claude 3.5 Sonnet',
+      contextWindow: 200000,
+      supportsCaching: true
+    },
+    {
+      id: 'claude-3-5-haiku-20241022',
+      name: 'Claude 3.5 Haiku',
+      contextWindow: 200000,
+      supportsCaching: true
+    },
+    {
+      id: 'claude-3-opus-20240229',
+      name: 'Claude 3 Opus',
+      contextWindow: 200000,
+      supportsCaching: false
+    }
+  ],
+
+  // Custom format
+  custom: [
+    {
+      id: 'custom-model',
+      name: 'Custom Model',
+      contextWindow: 128000,
+      supportsCaching: false
+    }
+  ]
+};
+
+/**
+ * Get models for a specific provider
+ * @param providerType - Provider type to get models for
+ * @returns Array of model information
+ */
+export function getModelsForProvider(providerType: ProviderType): ModelInfo[] {
+  return PROVIDER_MODELS[providerType] || [];
+}
+
+/**
+ * Predefined embedding models for each API format
+ * Used for UI selection and validation
+ */
+export const EMBEDDING_MODELS: Record<ProviderType, EmbeddingModelInfo[]> = {
+  // OpenAI embedding models
+  openai: [
+    {
+      id: 'text-embedding-3-small',
+      name: 'Text Embedding 3 Small',
+      dimensions: 1536,
+      maxTokens: 8191,
+      provider: 'openai'
+    },
+    {
+      id: 'text-embedding-3-large',
+      name: 'Text Embedding 3 Large',
+      dimensions: 3072,
+      maxTokens: 8191,
+      provider: 'openai'
+    },
+    {
+      id: 'text-embedding-ada-002',
+      name: 'Ada 002',
+      dimensions: 1536,
+      maxTokens: 8191,
+      provider: 'openai'
+    }
+  ],
+
+  // Anthropic doesn't have embedding models
+  anthropic: [],
+
+  // Custom embedding models
+  custom: [
+    {
+      id: 'custom-embedding',
+      name: 'Custom Embedding',
+      dimensions: 1536,
+      maxTokens: 8192,
+      provider: 'custom'
+    }
+  ]
+};
+
+/**
+ * Get embedding models for a specific provider
+ * @param providerType - Provider type to get embedding models for
+ * @returns Array of embedding model information
+ */
+export function getEmbeddingModelsForProvider(providerType: ProviderType): EmbeddingModelInfo[] {
+  return EMBEDDING_MODELS[providerType] || [];
+}
+
+
+/**
+ * Get model information by ID within a provider
+ * @param providerType - Provider type
+ * @param modelId - Model identifier
+ * @returns Model information or undefined if not found
+ */
+export function getModelInfo(providerType: ProviderType, modelId: string): ModelInfo | undefined {
+  const models = PROVIDER_MODELS[providerType] || [];
+  return models.find(m => m.id === modelId);
+}
+
+/**
+ * Validate if a model ID is supported by a provider
+ * @param providerType - Provider type
+ * @param modelId - Model identifier to validate
+ * @returns true if model is valid for provider
+ */
+export function isValidModel(providerType: ProviderType, modelId: string): boolean {
+  return getModelInfo(providerType, modelId) !== undefined;
+}
--- a/ccw/src/core/cache-manager.ts
+++ b/ccw/src/core/cache-manager.ts
@@ -1,4 +1,4 @@
-import { existsSync, mkdirSync, readFileSync, writeFileSync, statSync } from 'fs';
+import { existsSync, mkdirSync, readFileSync, writeFileSync, statSync, unlinkSync, readdirSync } from 'fs';
 import { join, dirname } from 'path';
 import { StoragePaths, ensureStorageDir } from '../config/storage-paths.js';

@@ -118,8 +118,7 @@ export class CacheManager<T> {
  invalidate(): void {
    try {
      if (existsSync(this.cacheFile)) {
-        const fs = require('fs');
-        fs.unlinkSync(this.cacheFile);
+        unlinkSync(this.cacheFile);
      }
    } catch (err) {
      console.warn(`Cache invalidation error for ${this.cacheFile}:`, (err as Error).message);
@@ -180,8 +179,7 @@ export class CacheManager<T> {
    if (depth > 3) return; // Limit recursion depth

    try {
-      const fs = require('fs');
-      const entries = fs.readdirSync(dirPath, { withFileTypes: true });
+      const entries = readdirSync(dirPath, { withFileTypes: true });

      for (const entry of entries) {
        const fullPath = join(dirPath, entry.name);
--- a/ccw/src/core/dashboard-generator.ts
+++ b/ccw/src/core/dashboard-generator.ts
@@ -46,7 +46,8 @@ const MODULE_CSS_FILES = [
  '27-graph-explorer.css',
  '28-mcp-manager.css',
  '29-help.css',
-  '30-core-memory.css'
+  '30-core-memory.css',
+  '31-api-settings.css'
 ];

 const MODULE_FILES = [
@@ -95,6 +96,7 @@ const MODULE_FILES = [
  'views/skills-manager.js',
  'views/rules-manager.js',
  'views/claude-manager.js',
+  'views/api-settings.js',
  'views/help.js',
  'main.js'
 ];
--- a/ccw/src/core/routes/cli-routes.ts
+++ b/ccw/src/core/routes/cli-routes.ts
@@ -33,6 +33,17 @@ import {
  getFullConfigResponse,
  PREDEFINED_MODELS
 } from '../../tools/cli-config-manager.js';
+import {
+  loadClaudeCliTools,
+  saveClaudeCliTools,
+  updateClaudeToolEnabled,
+  updateClaudeCacheSettings,
+  getClaudeCliToolsInfo,
+  addClaudeCustomEndpoint,
+  removeClaudeCustomEndpoint,
+  updateCodeIndexMcp,
+  getCodeIndexMcp
+} from '../../tools/claude-cli-tools.js';

 export interface RouteContext {
  pathname: string;
@@ -204,6 +215,93 @@ export async function handleCliRoutes(ctx: RouteContext): Promise<boolean> {
    }
  }

+  // API: Get all custom endpoints
+  if (pathname === '/api/cli/endpoints' && req.method === 'GET') {
+    try {
+      const config = loadClaudeCliTools(initialPath);
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ endpoints: config.customEndpoints || [] }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // API: Add/Update custom endpoint
+  if (pathname === '/api/cli/endpoints' && req.method === 'POST') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      try {
+        const { id, name, enabled } = body as { id: string; name: string; enabled: boolean };
+        if (!id || !name) {
+          return { error: 'id and name are required', status: 400 };
+        }
+        const config = addClaudeCustomEndpoint(initialPath, { id, name, enabled: enabled !== false });
+
+        broadcastToClients({
+          type: 'CLI_ENDPOINT_UPDATED',
+          payload: { endpoint: { id, name, enabled }, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, endpoints: config.customEndpoints };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // API: Update custom endpoint enabled status
+  if (pathname.match(/^\/api\/cli\/endpoints\/[^/]+$/) && req.method === 'PUT') {
+    const endpointId = pathname.split('/').pop() || '';
+    handlePostRequest(req, res, async (body: unknown) => {
+      try {
+        const { enabled, name } = body as { enabled?: boolean; name?: string };
+        const config = loadClaudeCliTools(initialPath);
+        const endpoint = config.customEndpoints.find(e => e.id === endpointId);
+
+        if (!endpoint) {
+          return { error: 'Endpoint not found', status: 404 };
+        }
+
+        if (typeof enabled === 'boolean') endpoint.enabled = enabled;
+        if (name) endpoint.name = name;
+
+        saveClaudeCliTools(initialPath, config);
+
+        broadcastToClients({
+          type: 'CLI_ENDPOINT_UPDATED',
+          payload: { endpoint, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, endpoint };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // API: Delete custom endpoint
+  if (pathname.match(/^\/api\/cli\/endpoints\/[^/]+$/) && req.method === 'DELETE') {
+    const endpointId = pathname.split('/').pop() || '';
+    try {
+      const config = removeClaudeCustomEndpoint(initialPath, endpointId);
+
+      broadcastToClients({
+        type: 'CLI_ENDPOINT_DELETED',
+        payload: { endpointId, timestamp: new Date().toISOString() }
+      });
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: true, endpoints: config.customEndpoints }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
  // API: CLI Execution History
  if (pathname === '/api/cli/history') {
    const projectPath = url.searchParams.get('path') || initialPath;
@@ -558,5 +656,141 @@ export async function handleCliRoutes(ctx: RouteContext): Promise<boolean> {
    return true;
  }

+  // API: Get CLI Tools Config from .claude/cli-tools.json (with fallback to global)
+  if (pathname === '/api/cli/tools-config' && req.method === 'GET') {
+    try {
+      const config = loadClaudeCliTools(initialPath);
+      const info = getClaudeCliToolsInfo(initialPath);
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        ...config,
+        _configInfo: info
+      }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // API: Update CLI Tools Config
+  if (pathname === '/api/cli/tools-config' && req.method === 'PUT') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      try {
+        const updates = body as Partial<any>;
+        const config = loadClaudeCliTools(initialPath);
+
+        // Merge updates
+        const updatedConfig = {
+          ...config,
+          ...updates,
+          tools: { ...config.tools, ...(updates.tools || {}) },
+          settings: {
+            ...config.settings,
+            ...(updates.settings || {}),
+            cache: {
+              ...config.settings.cache,
+              ...(updates.settings?.cache || {})
+            }
+          }
+        };
+
+        saveClaudeCliTools(initialPath, updatedConfig);
+
+        broadcastToClients({
+          type: 'CLI_TOOLS_CONFIG_UPDATED',
+          payload: { config: updatedConfig, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, config: updatedConfig };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // API: Update specific tool enabled status
+  const toolsConfigMatch = pathname.match(/^\/api\/cli\/tools-config\/([a-zA-Z0-9_-]+)$/);
+  if (toolsConfigMatch && req.method === 'PUT') {
+    const toolName = toolsConfigMatch[1];
+    handlePostRequest(req, res, async (body: unknown) => {
+      try {
+        const { enabled } = body as { enabled: boolean };
+        const config = updateClaudeToolEnabled(initialPath, toolName, enabled);
+
+        broadcastToClients({
+          type: 'CLI_TOOL_TOGGLED',
+          payload: { tool: toolName, enabled, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, config };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // API: Update cache settings
+  if (pathname === '/api/cli/tools-config/cache' && req.method === 'PUT') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      try {
+        const cacheSettings = body as { injectionMode?: string; defaultPrefix?: string; defaultSuffix?: string };
+        const config = updateClaudeCacheSettings(initialPath, cacheSettings as any);
+
+        broadcastToClients({
+          type: 'CLI_CACHE_SETTINGS_UPDATED',
+          payload: { cache: config.settings.cache, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, config };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // API: Get Code Index MCP provider
+  if (pathname === '/api/cli/code-index-mcp' && req.method === 'GET') {
+    try {
+      const provider = getCodeIndexMcp(initialPath);
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ provider }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // API: Update Code Index MCP provider
+  if (pathname === '/api/cli/code-index-mcp' && req.method === 'PUT') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      try {
+        const { provider } = body as { provider: 'codexlens' | 'ace' };
+        if (!provider || !['codexlens', 'ace'].includes(provider)) {
+          return { error: 'Invalid provider. Must be "codexlens" or "ace"', status: 400 };
+        }
+
+        const result = updateCodeIndexMcp(initialPath, provider);
+
+        if (result.success) {
+          broadcastToClients({
+            type: 'CODE_INDEX_MCP_UPDATED',
+            payload: { provider, timestamp: new Date().toISOString() }
+          });
+          return { success: true, provider };
+        } else {
+          return { error: result.error, status: 500 };
+        }
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
  return false;
 }
--- a/ccw/src/core/routes/codexlens-routes.ts
+++ b/ccw/src/core/routes/codexlens-routes.ts
@@ -9,6 +9,7 @@ import {
  bootstrapVenv,
  executeCodexLens,
  checkSemanticStatus,
+  ensureLiteLLMEmbedderReady,
  installSemantic,
  detectGpuSupport,
  uninstallCodexLens,
@@ -80,10 +81,22 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
  // API: CodexLens Index List - Get all indexed projects with details
  if (pathname === '/api/codexlens/indexes') {
    try {
-      // Get config for index directory path
-      const configResult = await executeCodexLens(['config', '--json']);
+      // Check if CodexLens is installed first (without auto-installing)
+      const venvStatus = await checkVenvStatus();
+      if (!venvStatus.ready) {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ success: true, indexes: [], totalSize: 0, totalSizeFormatted: '0 B' }));
+        return true;
+      }
+      
+      // Execute all CLI commands in parallel
+      const [configResult, projectsResult, statusResult] = await Promise.all([
+        executeCodexLens(['config', '--json']),
+        executeCodexLens(['projects', 'list', '--json']),
+        executeCodexLens(['status', '--json'])
+      ]);
+      
      let indexDir = '';
-
      if (configResult.success) {
        try {
          const config = extractJSON(configResult.output);
@@ -96,8 +109,6 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
        }
      }

-      // Get project list using 'projects list' command
-      const projectsResult = await executeCodexLens(['projects', 'list', '--json']);
      let indexes: any[] = [];
      let totalSize = 0;
      let vectorIndexCount = 0;
@@ -107,7 +118,8 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
        try {
          const projectsData = extractJSON(projectsResult.output);
          if (projectsData.success && Array.isArray(projectsData.result)) {
-            const { statSync, existsSync } = await import('fs');
+            const { stat, readdir } = await import('fs/promises');
+            const { existsSync } = await import('fs');
            const { basename, join } = await import('path');

            for (const project of projectsData.result) {
@@ -128,15 +140,14 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
              // Try to get actual index size from index_root
              if (project.index_root && existsSync(project.index_root)) {
                try {
-                  const { readdirSync } = await import('fs');
-                  const files = readdirSync(project.index_root);
+                  const files = await readdir(project.index_root);
                  for (const file of files) {
                    try {
                      const filePath = join(project.index_root, file);
-                      const stat = statSync(filePath);
-                      projectSize += stat.size;
-                      if (!lastModified || stat.mtime > lastModified) {
-                        lastModified = stat.mtime;
+                      const fileStat = await stat(filePath);
+                      projectSize += fileStat.size;
+                      if (!lastModified || fileStat.mtime > lastModified) {
+                        lastModified = fileStat.mtime;
                      }
                      // Check for vector/embedding files
                      if (file.includes('vector') || file.includes('embedding') ||
@@ -186,8 +197,7 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
        }
      }

-      // Also get summary stats from status command
-      const statusResult = await executeCodexLens(['status', '--json']);
+      // Parse summary stats from status command (already fetched in parallel)
      let statusSummary: any = {};

      if (statusResult.success) {
@@ -242,6 +252,71 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
    return true;
  }

+  // API: CodexLens Dashboard Init - Aggregated endpoint for page initialization
+  if (pathname === '/api/codexlens/dashboard-init') {
+    try {
+      const venvStatus = await checkVenvStatus();
+      
+      if (!venvStatus.ready) {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({
+          installed: false,
+          status: venvStatus,
+          config: { index_dir: '~/.codexlens/indexes', index_count: 0 },
+          semantic: { available: false }
+        }));
+        return true;
+      }
+      
+      // Parallel fetch all initialization data
+      const [configResult, statusResult, semanticStatus] = await Promise.all([
+        executeCodexLens(['config', '--json']),
+        executeCodexLens(['status', '--json']),
+        checkSemanticStatus()
+      ]);
+      
+      // Parse config
+      let config = { index_dir: '~/.codexlens/indexes', index_count: 0 };
+      if (configResult.success) {
+        try {
+          const configData = extractJSON(configResult.output);
+          if (configData.success && configData.result) {
+            config.index_dir = configData.result.index_dir || configData.result.index_root || config.index_dir;
+          }
+        } catch (e) {
+          console.error('[CodexLens] Failed to parse config for dashboard init:', e.message);
+        }
+      }
+      
+      // Parse status
+      let statusData: any = {};
+      if (statusResult.success) {
+        try {
+          const status = extractJSON(statusResult.output);
+          if (status.success && status.result) {
+            config.index_count = status.result.projects_count || 0;
+            statusData = status.result;
+          }
+        } catch (e) {
+          console.error('[CodexLens] Failed to parse status for dashboard init:', e.message);
+        }
+      }
+      
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        installed: true,
+        status: venvStatus,
+        config,
+        semantic: semanticStatus,
+        statusData
+      }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: false, error: err.message }));
+    }
+    return true;
+  }
+
  // API: CodexLens Bootstrap (Install)
  if (pathname === '/api/codexlens/bootstrap' && req.method === 'POST') {
    handlePostRequest(req, res, async () => {
@@ -290,14 +365,24 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
  // API: CodexLens Config - GET (Get current configuration with index count)
  if (pathname === '/api/codexlens/config' && req.method === 'GET') {
    try {
+      // Check if CodexLens is installed first (without auto-installing)
+      const venvStatus = await checkVenvStatus();
+
+      let responseData = { index_dir: '~/.codexlens/indexes', index_count: 0 };
+
+      // If not installed, return default config without executing CodexLens
+      if (!venvStatus.ready) {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify(responseData));
+        return true;
+      }
+
      // Fetch both config and status to merge index_count
      const [configResult, statusResult] = await Promise.all([
        executeCodexLens(['config', '--json']),
        executeCodexLens(['status', '--json'])
      ]);

-      let responseData = { index_dir: '~/.codexlens/indexes', index_count: 0 };
-
      // Parse config (extract JSON from output that may contain log messages)
      if (configResult.success) {
        try {
@@ -388,9 +473,17 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
  // API: CodexLens Init (Initialize workspace index)
  if (pathname === '/api/codexlens/init' && req.method === 'POST') {
    handlePostRequest(req, res, async (body) => {
-      const { path: projectPath, indexType = 'vector', embeddingModel = 'code' } = body;
+      const { path: projectPath, indexType = 'vector', embeddingModel = 'code', embeddingBackend = 'fastembed', maxWorkers = 1 } = body;
      const targetPath = projectPath || initialPath;

+      // Ensure LiteLLM backend dependencies are installed before running the CLI
+      if (indexType !== 'normal' && embeddingBackend === 'litellm') {
+        const installResult = await ensureLiteLLMEmbedderReady();
+        if (!installResult.success) {
+          return { success: false, error: installResult.error || 'Failed to prepare LiteLLM embedder', status: 500 };
+        }
+      }
+
      // Build CLI arguments based on index type
      const args = ['init', targetPath, '--json'];
      if (indexType === 'normal') {
@@ -398,6 +491,14 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
      } else {
        // Add embedding model selection for vector index
        args.push('--embedding-model', embeddingModel);
+        // Add embedding backend if not using default fastembed
+        if (embeddingBackend && embeddingBackend !== 'fastembed') {
+          args.push('--embedding-backend', embeddingBackend);
+        }
+        // Add max workers for concurrent API calls (useful for litellm backend)
+        if (maxWorkers && maxWorkers > 1) {
+          args.push('--max-workers', String(maxWorkers));
+        }
      }

      // Broadcast start event
@@ -552,6 +653,8 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
    const query = url.searchParams.get('query') || '';
    const limit = parseInt(url.searchParams.get('limit') || '20', 10);
    const mode = url.searchParams.get('mode') || 'exact';  // exact, fuzzy, hybrid, vector
+    const maxContentLength = parseInt(url.searchParams.get('max_content_length') || '200', 10);
+    const extraFilesCount = parseInt(url.searchParams.get('extra_files_count') || '10', 10);
    const projectPath = url.searchParams.get('path') || initialPath;

    if (!query) {
@@ -561,15 +664,46 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
    }

    try {
-      const args = ['search', query, '--path', projectPath, '--limit', limit.toString(), '--mode', mode, '--json'];
+      // Request more results to support split (full content + extra files)
+      const totalToFetch = limit + extraFilesCount;
+      const args = ['search', query, '--path', projectPath, '--limit', totalToFetch.toString(), '--mode', mode, '--json'];

      const result = await executeCodexLens(args, { cwd: projectPath });

      if (result.success) {
        try {
          const parsed = extractJSON(result.output);
+          const allResults = parsed.result?.results || [];
+
+          // Truncate content and split results
+          const truncateContent = (content: string | null | undefined): string => {
+            if (!content) return '';
+            if (content.length <= maxContentLength) return content;
+            return content.slice(0, maxContentLength) + '...';
+          };
+
+          // Split results: first N with full content, rest as file paths only
+          const resultsWithContent = allResults.slice(0, limit).map((r: any) => ({
+            ...r,
+            content: truncateContent(r.content || r.excerpt),
+            excerpt: truncateContent(r.excerpt || r.content),
+          }));
+
+          const extraResults = allResults.slice(limit, limit + extraFilesCount);
+          const extraFiles = [...new Set(extraResults.map((r: any) => r.path || r.file))];
+
          res.writeHead(200, { 'Content-Type': 'application/json' });
-          res.end(JSON.stringify({ success: true, ...parsed.result }));
+          res.end(JSON.stringify({
+            success: true,
+            results: resultsWithContent,
+            extra_files: extraFiles.length > 0 ? extraFiles : undefined,
+            metadata: {
+              total: allResults.length,
+              limit,
+              max_content_length: maxContentLength,
+              extra_files_count: extraFilesCount,
+            },
+          }));
        } catch {
          res.writeHead(200, { 'Content-Type': 'application/json' });
          res.end(JSON.stringify({ success: true, results: [], output: result.output }));
@@ -682,6 +816,87 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
    return true;
  }

+  // API: List available GPU devices for selection
+  if (pathname === '/api/codexlens/gpu/list' && req.method === 'GET') {
+    try {
+      // Check if CodexLens is installed first (without auto-installing)
+      const venvStatus = await checkVenvStatus();
+      if (!venvStatus.ready) {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ success: true, devices: [], selected_device_id: null }));
+        return true;
+      }
+      const result = await executeCodexLens(['gpu-list', '--json']);
+      if (result.success) {
+        try {
+          const parsed = extractJSON(result.output);
+          res.writeHead(200, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify(parsed));
+        } catch {
+          res.writeHead(200, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify({ success: true, devices: [], output: result.output }));
+        }
+      } else {
+        res.writeHead(500, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ success: false, error: result.error }));
+      }
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: false, error: err.message }));
+    }
+    return true;
+  }
+
+  // API: Select GPU device for embedding
+  if (pathname === '/api/codexlens/gpu/select' && req.method === 'POST') {
+    handlePostRequest(req, res, async (body) => {
+      const { device_id } = body;
+
+      if (device_id === undefined || device_id === null) {
+        return { success: false, error: 'device_id is required', status: 400 };
+      }
+
+      try {
+        const result = await executeCodexLens(['gpu-select', String(device_id), '--json']);
+        if (result.success) {
+          try {
+            const parsed = extractJSON(result.output);
+            return parsed;
+          } catch {
+            return { success: true, message: 'GPU selected', output: result.output };
+          }
+        } else {
+          return { success: false, error: result.error, status: 500 };
+        }
+      } catch (err) {
+        return { success: false, error: err.message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // API: Reset GPU selection to auto-detection
+  if (pathname === '/api/codexlens/gpu/reset' && req.method === 'POST') {
+    handlePostRequest(req, res, async () => {
+      try {
+        const result = await executeCodexLens(['gpu-reset', '--json']);
+        if (result.success) {
+          try {
+            const parsed = extractJSON(result.output);
+            return parsed;
+          } catch {
+            return { success: true, message: 'GPU selection reset', output: result.output };
+          }
+        } else {
+          return { success: false, error: result.error, status: 500 };
+        }
+      } catch (err) {
+        return { success: false, error: err.message, status: 500 };
+      }
+    });
+    return true;
+  }
+
  // API: CodexLens Semantic Search Install (with GPU mode support)
  if (pathname === '/api/codexlens/semantic/install' && req.method === 'POST') {
    handlePostRequest(req, res, async (body) => {
@@ -721,6 +936,13 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
  // API: CodexLens Model List (list available embedding models)
  if (pathname === '/api/codexlens/models' && req.method === 'GET') {
    try {
+      // Check if CodexLens is installed first (without auto-installing)
+      const venvStatus = await checkVenvStatus();
+      if (!venvStatus.ready) {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ success: false, error: 'CodexLens not installed' }));
+        return true;
+      }
      const result = await executeCodexLens(['model-list', '--json']);
      if (result.success) {
        try {
--- a/ccw/src/core/routes/hooks-routes.ts
+++ b/ccw/src/core/routes/hooks-routes.ts
@@ -31,8 +31,8 @@ const GLOBAL_SETTINGS_PATH = join(homedir(), '.claude', 'settings.json');
 * @returns {string}
 */
 function getProjectSettingsPath(projectPath) {
-  const normalizedPath = projectPath.replace(/\//g, '\\').replace(/^\\([a-zA-Z])\\/, '$1:\\');
-  return join(normalizedPath, '.claude', 'settings.json');
+  // path.join automatically handles cross-platform path separators
+  return join(projectPath, '.claude', 'settings.json');
 }

 /**
@@ -181,29 +181,13 @@ function deleteHookFromSettings(projectPath, scope, event, hookIndex) {
 }

 // ========================================
-// Session State Tracking (for progressive disclosure)
+// Session State Tracking
 // ========================================
-
-// Track sessions that have received startup context
-// Key: sessionId, Value: timestamp of first context load
-const sessionContextState = new Map<string, {
-  firstLoad: string;
-  loadCount: number;
-  lastPrompt?: string;
-}>();
-
-// Cleanup old sessions (older than 24 hours)
-function cleanupOldSessions() {
-  const cutoff = Date.now() - 24 * 60 * 60 * 1000;
-  for (const [sessionId, state] of sessionContextState.entries()) {
-    if (new Date(state.firstLoad).getTime() < cutoff) {
-      sessionContextState.delete(sessionId);
-    }
-  }
-}
-
-// Run cleanup every hour
-setInterval(cleanupOldSessions, 60 * 60 * 1000);
+// NOTE: Session state is managed by the CLI command (src/commands/hook.ts)
+// using file-based persistence (~/.claude/.ccw-sessions/).
+// This ensures consistent state tracking across all invocation methods.
+// The /api/hook endpoint delegates to SessionClusteringService without
+// managing its own state, as the authoritative state lives in the CLI layer.

 // ========================================
 // Route Handler
@@ -286,7 +270,8 @@ export async function handleHooksRoutes(ctx: RouteContext): Promise<boolean> {
  }

  // API: Unified Session Context endpoint (Progressive Disclosure)
-  // Automatically detects first prompt vs subsequent prompts
+  // DEPRECATED: Use CLI command `ccw hook session-context --stdin` instead.
+  // This endpoint now uses file-based state (shared with CLI) for consistency.
  // - First prompt: returns cluster-based session overview
  // - Subsequent prompts: returns intent-matched sessions based on prompt
  if (pathname === '/api/hook/session-context' && req.method === 'POST') {
@@ -306,21 +291,30 @@ export async function handleHooksRoutes(ctx: RouteContext): Promise<boolean> {
        const { SessionClusteringService } = await import('../session-clustering-service.js');
        const clusteringService = new SessionClusteringService(projectPath);

-        // Check if this is the first prompt for this session
-        const existingState = sessionContextState.get(sessionId);
+        // Use file-based session state (shared with CLI hook.ts)
+        const sessionStateDir = join(homedir(), '.claude', '.ccw-sessions');
+        const sessionStateFile = join(sessionStateDir, `session-${sessionId}.json`);
+        
+        let existingState: { firstLoad: string; loadCount: number; lastPrompt?: string } | null = null;
+        if (existsSync(sessionStateFile)) {
+          try {
+            existingState = JSON.parse(readFileSync(sessionStateFile, 'utf-8'));
+          } catch {
+            existingState = null;
+          }
+        }
+        
        const isFirstPrompt = !existingState;

-        // Update session state
-        if (isFirstPrompt) {
-          sessionContextState.set(sessionId, {
-            firstLoad: new Date().toISOString(),
-            loadCount: 1,
-            lastPrompt: prompt
-          });
-        } else {
-          existingState.loadCount++;
-          existingState.lastPrompt = prompt;
+        // Update session state (file-based)
+        const newState = isFirstPrompt
+          ? { firstLoad: new Date().toISOString(), loadCount: 1, lastPrompt: prompt }
+          : { ...existingState!, loadCount: existingState!.loadCount + 1, lastPrompt: prompt };
+        
+        if (!existsSync(sessionStateDir)) {
+          mkdirSync(sessionStateDir, { recursive: true });
        }
+        writeFileSync(sessionStateFile, JSON.stringify(newState, null, 2));

        // Determine which type of context to return
        let contextType: 'session-start' | 'context';
@@ -351,7 +345,7 @@ export async function handleHooksRoutes(ctx: RouteContext): Promise<boolean> {
          success: true,
          type: contextType,
          isFirstPrompt,
-          loadCount: sessionContextState.get(sessionId)?.loadCount || 1,
+          loadCount: newState.loadCount,
          content,
          sessionId
        };
--- a/ccw/src/core/routes/litellm-api-routes.ts
+++ b/ccw/src/core/routes/litellm-api-routes.ts
@@ -0,0 +1,930 @@
+// @ts-nocheck
+/**
+ * LiteLLM API Routes Module
+ * Handles LiteLLM provider management, endpoint configuration, and cache management
+ */
+import type { IncomingMessage, ServerResponse } from 'http';
+import { fileURLToPath } from 'url';
+import { dirname, join as pathJoin } from 'path';
+
+// Get current module path for package-relative lookups
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+// Package root: routes -> core -> src -> ccw -> package root
+const PACKAGE_ROOT = pathJoin(__dirname, '..', '..', '..', '..');
+
+import {
+  getAllProviders,
+  getProvider,
+  addProvider,
+  updateProvider,
+  deleteProvider,
+  getAllEndpoints,
+  getEndpoint,
+  addEndpoint,
+  updateEndpoint,
+  deleteEndpoint,
+  getDefaultEndpoint,
+  setDefaultEndpoint,
+  getGlobalCacheSettings,
+  updateGlobalCacheSettings,
+  loadLiteLLMApiConfig,
+  saveLiteLLMYamlConfig,
+  generateLiteLLMYamlConfig,
+  getCodexLensEmbeddingRotation,
+  updateCodexLensEmbeddingRotation,
+  getEmbeddingProvidersForRotation,
+  generateRotationEndpoints,
+  syncCodexLensConfig,
+  getEmbeddingPoolConfig,
+  updateEmbeddingPoolConfig,
+  discoverProvidersForModel,
+  type ProviderCredential,
+  type CustomEndpoint,
+  type ProviderType,
+  type CodexLensEmbeddingRotation,
+  type EmbeddingPoolConfig,
+} from '../../config/litellm-api-config-manager.js';
+import { getContextCacheStore } from '../../tools/context-cache-store.js';
+import { getLiteLLMClient } from '../../tools/litellm-client.js';
+
+// Cache for ccw-litellm status check
+let ccwLitellmStatusCache: {
+  data: { installed: boolean; version?: string; error?: string } | null;
+  timestamp: number;
+  ttl: number;
+} = {
+  data: null,
+  timestamp: 0,
+  ttl: 5 * 60 * 1000, // 5 minutes
+};
+
+// Clear cache (call after install)
+export function clearCcwLitellmStatusCache() {
+  ccwLitellmStatusCache.data = null;
+  ccwLitellmStatusCache.timestamp = 0;
+}
+
+export interface RouteContext {
+  pathname: string;
+  url: URL;
+  req: IncomingMessage;
+  res: ServerResponse;
+  initialPath: string;
+  handlePostRequest: (req: IncomingMessage, res: ServerResponse, handler: (body: unknown) => Promise<any>) => void;
+  broadcastToClients: (data: unknown) => void;
+}
+
+// ===========================
+// Model Information
+// ===========================
+
+interface ModelInfo {
+  id: string;
+  name: string;
+  provider: ProviderType;
+  description?: string;
+}
+
+const PROVIDER_MODELS: Record<ProviderType, ModelInfo[]> = {
+  openai: [
+    { id: 'gpt-4-turbo', name: 'GPT-4 Turbo', provider: 'openai', description: '128K context' },
+    { id: 'gpt-4', name: 'GPT-4', provider: 'openai', description: '8K context' },
+    { id: 'gpt-3.5-turbo', name: 'GPT-3.5 Turbo', provider: 'openai', description: '16K context' },
+  ],
+  anthropic: [
+    { id: 'claude-3-opus-20240229', name: 'Claude 3 Opus', provider: 'anthropic', description: '200K context' },
+    { id: 'claude-3-sonnet-20240229', name: 'Claude 3 Sonnet', provider: 'anthropic', description: '200K context' },
+    { id: 'claude-3-haiku-20240307', name: 'Claude 3 Haiku', provider: 'anthropic', description: '200K context' },
+  ],
+  google: [
+    { id: 'gemini-pro', name: 'Gemini Pro', provider: 'google', description: '32K context' },
+    { id: 'gemini-pro-vision', name: 'Gemini Pro Vision', provider: 'google', description: '16K context' },
+  ],
+  ollama: [
+    { id: 'llama2', name: 'Llama 2', provider: 'ollama', description: 'Local model' },
+    { id: 'mistral', name: 'Mistral', provider: 'ollama', description: 'Local model' },
+  ],
+  azure: [],
+  mistral: [
+    { id: 'mistral-large-latest', name: 'Mistral Large', provider: 'mistral', description: '32K context' },
+    { id: 'mistral-medium-latest', name: 'Mistral Medium', provider: 'mistral', description: '32K context' },
+  ],
+  deepseek: [
+    { id: 'deepseek-chat', name: 'DeepSeek Chat', provider: 'deepseek', description: '64K context' },
+    { id: 'deepseek-coder', name: 'DeepSeek Coder', provider: 'deepseek', description: '64K context' },
+  ],
+  custom: [],
+};
+
+/**
+ * Handle LiteLLM API routes
+ * @returns true if route was handled, false otherwise
+ */
+export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise<boolean> {
+  const { pathname, url, req, res, initialPath, handlePostRequest, broadcastToClients } = ctx;
+
+  // ===========================
+  // Provider Management Routes
+  // ===========================
+
+  // GET /api/litellm-api/providers - List all providers
+  if (pathname === '/api/litellm-api/providers' && req.method === 'GET') {
+    try {
+      const providers = getAllProviders(initialPath);
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ providers, count: providers.length }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // POST /api/litellm-api/providers - Create provider
+  if (pathname === '/api/litellm-api/providers' && req.method === 'POST') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      const providerData = body as Omit<ProviderCredential, 'id' | 'createdAt' | 'updatedAt'>;
+
+      if (!providerData.name || !providerData.type || !providerData.apiKey) {
+        return { error: 'Provider name, type, and apiKey are required', status: 400 };
+      }
+
+      try {
+        const provider = addProvider(initialPath, providerData);
+
+        broadcastToClients({
+          type: 'LITELLM_PROVIDER_CREATED',
+          payload: { provider, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, provider };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // GET /api/litellm-api/providers/:id - Get provider by ID
+  const providerGetMatch = pathname.match(/^\/api\/litellm-api\/providers\/([^/]+)$/);
+  if (providerGetMatch && req.method === 'GET') {
+    const providerId = providerGetMatch[1];
+
+    try {
+      const provider = getProvider(initialPath, providerId);
+      if (!provider) {
+        res.writeHead(404, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ error: 'Provider not found' }));
+        return true;
+      }
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(provider));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // PUT /api/litellm-api/providers/:id - Update provider
+  const providerUpdateMatch = pathname.match(/^\/api\/litellm-api\/providers\/([^/]+)$/);
+  if (providerUpdateMatch && req.method === 'PUT') {
+    const providerId = providerUpdateMatch[1];
+
+    handlePostRequest(req, res, async (body: unknown) => {
+      const updates = body as Partial<Omit<ProviderCredential, 'id' | 'createdAt' | 'updatedAt'>>;
+
+      try {
+        const provider = updateProvider(initialPath, providerId, updates);
+
+        broadcastToClients({
+          type: 'LITELLM_PROVIDER_UPDATED',
+          payload: { provider, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, provider };
+      } catch (err) {
+        return { error: (err as Error).message, status: 404 };
+      }
+    });
+    return true;
+  }
+
+  // DELETE /api/litellm-api/providers/:id - Delete provider
+  const providerDeleteMatch = pathname.match(/^\/api\/litellm-api\/providers\/([^/]+)$/);
+  if (providerDeleteMatch && req.method === 'DELETE') {
+    const providerId = providerDeleteMatch[1];
+
+    try {
+      const success = deleteProvider(initialPath, providerId);
+
+      if (!success) {
+        res.writeHead(404, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ error: 'Provider not found' }));
+        return true;
+      }
+
+      broadcastToClients({
+        type: 'LITELLM_PROVIDER_DELETED',
+        payload: { providerId, timestamp: new Date().toISOString() }
+      });
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: true, message: 'Provider deleted' }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // POST /api/litellm-api/providers/:id/test - Test provider connection
+  const providerTestMatch = pathname.match(/^\/api\/litellm-api\/providers\/([^/]+)\/test$/);
+  if (providerTestMatch && req.method === 'POST') {
+    const providerId = providerTestMatch[1];
+
+    try {
+      const provider = getProvider(initialPath, providerId);
+
+      if (!provider) {
+        res.writeHead(404, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ success: false, error: 'Provider not found' }));
+        return true;
+      }
+
+      if (!provider.enabled) {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ success: false, error: 'Provider is disabled' }));
+        return true;
+      }
+
+      // Test connection using litellm client
+      const client = getLiteLLMClient();
+      const available = await client.isAvailable();
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: available, provider: provider.type }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: false, error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // ===========================
+  // Endpoint Management Routes
+  // ===========================
+
+  // GET /api/litellm-api/endpoints - List all endpoints
+  if (pathname === '/api/litellm-api/endpoints' && req.method === 'GET') {
+    try {
+      const endpoints = getAllEndpoints(initialPath);
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ endpoints, count: endpoints.length }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // POST /api/litellm-api/endpoints - Create endpoint
+  if (pathname === '/api/litellm-api/endpoints' && req.method === 'POST') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      const endpointData = body as Omit<CustomEndpoint, 'createdAt' | 'updatedAt'>;
+
+      if (!endpointData.id || !endpointData.name || !endpointData.providerId || !endpointData.model) {
+        return { error: 'Endpoint id, name, providerId, and model are required', status: 400 };
+      }
+
+      try {
+        const endpoint = addEndpoint(initialPath, endpointData);
+
+        broadcastToClients({
+          type: 'LITELLM_ENDPOINT_CREATED',
+          payload: { endpoint, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, endpoint };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // GET /api/litellm-api/endpoints/:id - Get endpoint by ID
+  const endpointGetMatch = pathname.match(/^\/api\/litellm-api\/endpoints\/([^/]+)$/);
+  if (endpointGetMatch && req.method === 'GET') {
+    const endpointId = endpointGetMatch[1];
+
+    try {
+      const endpoint = getEndpoint(initialPath, endpointId);
+      if (!endpoint) {
+        res.writeHead(404, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ error: 'Endpoint not found' }));
+        return true;
+      }
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(endpoint));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // PUT /api/litellm-api/endpoints/:id - Update endpoint
+  const endpointUpdateMatch = pathname.match(/^\/api\/litellm-api\/endpoints\/([^/]+)$/);
+  if (endpointUpdateMatch && req.method === 'PUT') {
+    const endpointId = endpointUpdateMatch[1];
+
+    handlePostRequest(req, res, async (body: unknown) => {
+      const updates = body as Partial<Omit<CustomEndpoint, 'id' | 'createdAt' | 'updatedAt'>>;
+
+      try {
+        const endpoint = updateEndpoint(initialPath, endpointId, updates);
+
+        broadcastToClients({
+          type: 'LITELLM_ENDPOINT_UPDATED',
+          payload: { endpoint, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, endpoint };
+      } catch (err) {
+        return { error: (err as Error).message, status: 404 };
+      }
+    });
+    return true;
+  }
+
+  // DELETE /api/litellm-api/endpoints/:id - Delete endpoint
+  const endpointDeleteMatch = pathname.match(/^\/api\/litellm-api\/endpoints\/([^/]+)$/);
+  if (endpointDeleteMatch && req.method === 'DELETE') {
+    const endpointId = endpointDeleteMatch[1];
+
+    try {
+      const success = deleteEndpoint(initialPath, endpointId);
+
+      if (!success) {
+        res.writeHead(404, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ error: 'Endpoint not found' }));
+        return true;
+      }
+
+      broadcastToClients({
+        type: 'LITELLM_ENDPOINT_DELETED',
+        payload: { endpointId, timestamp: new Date().toISOString() }
+      });
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: true, message: 'Endpoint deleted' }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // ===========================
+  // Model Discovery Routes
+  // ===========================
+
+  // GET /api/litellm-api/models/:providerType - Get available models for provider type
+  const modelsMatch = pathname.match(/^\/api\/litellm-api\/models\/([^/]+)$/);
+  if (modelsMatch && req.method === 'GET') {
+    const providerType = modelsMatch[1] as ProviderType;
+
+    try {
+      const models = PROVIDER_MODELS[providerType];
+
+      if (!models) {
+        res.writeHead(404, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ error: 'Provider type not found' }));
+        return true;
+      }
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ providerType, models, count: models.length }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // ===========================
+  // Cache Management Routes
+  // ===========================
+
+  // GET /api/litellm-api/cache/stats - Get cache statistics
+  if (pathname === '/api/litellm-api/cache/stats' && req.method === 'GET') {
+    try {
+      const cacheStore = getContextCacheStore();
+      const stats = cacheStore.getStatus();
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(stats));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // POST /api/litellm-api/cache/clear - Clear cache
+  if (pathname === '/api/litellm-api/cache/clear' && req.method === 'POST') {
+    try {
+      const cacheStore = getContextCacheStore();
+      const result = cacheStore.clear();
+
+      broadcastToClients({
+        type: 'LITELLM_CACHE_CLEARED',
+        payload: { removed: result.removed, timestamp: new Date().toISOString() }
+      });
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: true, removed: result.removed }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // ===========================
+  // Config Management Routes
+  // ===========================
+
+  // GET /api/litellm-api/config - Get full config
+  if (pathname === '/api/litellm-api/config' && req.method === 'GET') {
+    try {
+      const config = loadLiteLLMApiConfig(initialPath);
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(config));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // PUT /api/litellm-api/config/cache - Update global cache settings
+  if (pathname === '/api/litellm-api/config/cache' && req.method === 'PUT') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      const settings = body as Partial<{ enabled: boolean; cacheDir: string; maxTotalSizeMB: number }>;
+
+      try {
+        updateGlobalCacheSettings(initialPath, settings);
+
+        const updatedSettings = getGlobalCacheSettings(initialPath);
+
+        broadcastToClients({
+          type: 'LITELLM_CACHE_SETTINGS_UPDATED',
+          payload: { settings: updatedSettings, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, settings: updatedSettings };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // PUT /api/litellm-api/config/default-endpoint - Set default endpoint
+  if (pathname === '/api/litellm-api/config/default-endpoint' && req.method === 'PUT') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      const { endpointId } = body as { endpointId?: string };
+
+      try {
+        setDefaultEndpoint(initialPath, endpointId);
+
+        const defaultEndpoint = getDefaultEndpoint(initialPath);
+
+        broadcastToClients({
+          type: 'LITELLM_DEFAULT_ENDPOINT_UPDATED',
+          payload: { endpointId, defaultEndpoint, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, defaultEndpoint };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // ===========================
+  // Config Sync Routes
+  // ===========================
+
+  // POST /api/litellm-api/config/sync - Sync UI config to ccw_litellm YAML config
+  if (pathname === '/api/litellm-api/config/sync' && req.method === 'POST') {
+    try {
+      const yamlPath = saveLiteLLMYamlConfig(initialPath);
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        success: true,
+        message: 'Config synced to ccw_litellm',
+        yamlPath,
+      }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // GET /api/litellm-api/config/yaml-preview - Preview YAML config without saving
+  if (pathname === '/api/litellm-api/config/yaml-preview' && req.method === 'GET') {
+    try {
+      const yamlConfig = generateLiteLLMYamlConfig(initialPath);
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        success: true,
+        config: yamlConfig,
+      }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // ===========================
+  // CCW-LiteLLM Package Management
+  // ===========================
+
+  // GET /api/litellm-api/ccw-litellm/status - Check ccw-litellm installation status
+  // Supports ?refresh=true to bypass cache
+  if (pathname === '/api/litellm-api/ccw-litellm/status' && req.method === 'GET') {
+    const forceRefresh = url.searchParams.get('refresh') === 'true';
+
+    // Check cache first (unless force refresh)
+    if (!forceRefresh && ccwLitellmStatusCache.data &&
+        Date.now() - ccwLitellmStatusCache.timestamp < ccwLitellmStatusCache.ttl) {
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(ccwLitellmStatusCache.data));
+      return true;
+    }
+
+    // Async check - use pip show for more reliable detection
+    try {
+      const { exec } = await import('child_process');
+      const { promisify } = await import('util');
+      const execAsync = promisify(exec);
+
+      let result: { installed: boolean; version?: string; error?: string } = { installed: false };
+
+      // Method 1: Try pip show ccw-litellm (most reliable)
+      try {
+        const { stdout } = await execAsync('pip show ccw-litellm', {
+          timeout: 10000,
+          windowsHide: true,
+          shell: true,
+        });
+        // Parse version from pip show output
+        const versionMatch = stdout.match(/Version:\s*(.+)/i);
+        if (versionMatch) {
+          result = { installed: true, version: versionMatch[1].trim() };
+          console.log(`[ccw-litellm status] Found via pip show: ${result.version}`);
+        }
+      } catch (pipErr) {
+        console.log('[ccw-litellm status] pip show failed, trying python import...');
+
+        // Method 2: Fallback to Python import
+        const pythonExecutables = ['python', 'python3', 'py'];
+        for (const pythonExe of pythonExecutables) {
+          try {
+            // Use simpler Python code without complex quotes
+            const { stdout } = await execAsync(`${pythonExe} -c "import ccw_litellm; print(ccw_litellm.__version__)"`, {
+              timeout: 5000,
+              windowsHide: true,
+              shell: true,
+            });
+            const version = stdout.trim();
+            if (version) {
+              result = { installed: true, version };
+              console.log(`[ccw-litellm status] Found with ${pythonExe}: ${version}`);
+              break;
+            }
+          } catch (err) {
+            result.error = (err as Error).message;
+            console.log(`[ccw-litellm status] ${pythonExe} failed:`, result.error.substring(0, 100));
+          }
+        }
+      }
+
+      // Update cache
+      ccwLitellmStatusCache = {
+        data: result,
+        timestamp: Date.now(),
+        ttl: 5 * 60 * 1000,
+      };
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(result));
+    } catch (err) {
+      const errorResult = { installed: false, error: (err as Error).message };
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(errorResult));
+    }
+    return true;
+  }
+
+  // ===========================
+  // CodexLens Embedding Rotation Routes
+  // ===========================
+
+  // GET /api/litellm-api/codexlens/rotation - Get rotation config
+  if (pathname === '/api/litellm-api/codexlens/rotation' && req.method === 'GET') {
+    try {
+      const rotationConfig = getCodexLensEmbeddingRotation(initialPath);
+      const availableProviders = getEmbeddingProvidersForRotation(initialPath);
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        rotationConfig: rotationConfig || null,
+        availableProviders,
+      }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // PUT /api/litellm-api/codexlens/rotation - Update rotation config
+  if (pathname === '/api/litellm-api/codexlens/rotation' && req.method === 'PUT') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      const rotationConfig = body as CodexLensEmbeddingRotation | null;
+
+      try {
+        const { syncResult } = updateCodexLensEmbeddingRotation(initialPath, rotationConfig || undefined);
+
+        broadcastToClients({
+          type: 'CODEXLENS_ROTATION_UPDATED',
+          payload: { rotationConfig, syncResult, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, rotationConfig, syncResult };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // GET /api/litellm-api/codexlens/rotation/endpoints - Get generated rotation endpoints
+  if (pathname === '/api/litellm-api/codexlens/rotation/endpoints' && req.method === 'GET') {
+    try {
+      const endpoints = generateRotationEndpoints(initialPath);
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        endpoints,
+        count: endpoints.length,
+      }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // POST /api/litellm-api/codexlens/rotation/sync - Manually sync rotation config to CodexLens
+  if (pathname === '/api/litellm-api/codexlens/rotation/sync' && req.method === 'POST') {
+    try {
+      const syncResult = syncCodexLensConfig(initialPath);
+
+      if (syncResult.success) {
+        broadcastToClients({
+          type: 'CODEXLENS_CONFIG_SYNCED',
+          payload: { ...syncResult, timestamp: new Date().toISOString() }
+        });
+      }
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(syncResult));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ success: false, message: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // ===========================
+  // Embedding Pool Routes (New Generic API)
+  // ===========================
+
+  // GET /api/litellm-api/embedding-pool - Get pool config and available models
+  if (pathname === '/api/litellm-api/embedding-pool' && req.method === 'GET') {
+    try {
+      const poolConfig = getEmbeddingPoolConfig(initialPath);
+
+      // Get list of all available embedding models from all providers
+      const config = loadLiteLLMApiConfig(initialPath);
+      const availableModels: Array<{ modelId: string; modelName: string; providers: string[] }> = [];
+      const modelMap = new Map<string, { modelId: string; modelName: string; providers: string[] }>();
+
+      for (const provider of config.providers) {
+        if (!provider.enabled || !provider.embeddingModels) continue;
+
+        for (const model of provider.embeddingModels) {
+          if (!model.enabled) continue;
+
+          const key = model.id;
+          if (modelMap.has(key)) {
+            modelMap.get(key)!.providers.push(provider.name);
+          } else {
+            modelMap.set(key, {
+              modelId: model.id,
+              modelName: model.name,
+              providers: [provider.name],
+            });
+          }
+        }
+      }
+
+      availableModels.push(...Array.from(modelMap.values()));
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        poolConfig: poolConfig || null,
+        availableModels,
+      }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // PUT /api/litellm-api/embedding-pool - Update pool config
+  if (pathname === '/api/litellm-api/embedding-pool' && req.method === 'PUT') {
+    handlePostRequest(req, res, async (body: unknown) => {
+      const poolConfig = body as EmbeddingPoolConfig | null;
+
+      try {
+        const { syncResult } = updateEmbeddingPoolConfig(initialPath, poolConfig || undefined);
+
+        broadcastToClients({
+          type: 'EMBEDDING_POOL_UPDATED',
+          payload: { poolConfig, syncResult, timestamp: new Date().toISOString() }
+        });
+
+        return { success: true, poolConfig, syncResult };
+      } catch (err) {
+        return { error: (err as Error).message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // GET /api/litellm-api/embedding-pool/discover/:model - Preview auto-discovery results
+  const discoverMatch = pathname.match(/^\/api\/litellm-api\/embedding-pool\/discover\/([^/]+)$/);
+  if (discoverMatch && req.method === 'GET') {
+    const targetModel = decodeURIComponent(discoverMatch[1]);
+
+    try {
+      const discovered = discoverProvidersForModel(initialPath, targetModel);
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        targetModel,
+        discovered,
+        count: discovered.length,
+      }));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: (err as Error).message }));
+    }
+    return true;
+  }
+
+  // POST /api/litellm-api/ccw-litellm/install - Install ccw-litellm package
+  if (pathname === '/api/litellm-api/ccw-litellm/install' && req.method === 'POST') {
+    handlePostRequest(req, res, async () => {
+      try {
+        const { spawn } = await import('child_process');
+        const path = await import('path');
+        const fs = await import('fs');
+
+        // Try to find ccw-litellm package in distribution
+        const possiblePaths = [
+          path.join(initialPath, 'ccw-litellm'),
+          path.join(initialPath, '..', 'ccw-litellm'),
+          path.join(process.cwd(), 'ccw-litellm'),
+          path.join(PACKAGE_ROOT, 'ccw-litellm'), // npm package internal path
+        ];
+
+        let packagePath = '';
+        for (const p of possiblePaths) {
+          const pyproject = path.join(p, 'pyproject.toml');
+          if (fs.existsSync(pyproject)) {
+            packagePath = p;
+            break;
+          }
+        }
+
+        if (!packagePath) {
+          // Try pip install from PyPI as fallback
+          return new Promise((resolve) => {
+            const proc = spawn('pip', ['install', 'ccw-litellm'], { shell: true, timeout: 300000 });
+            let output = '';
+            let error = '';
+            proc.stdout?.on('data', (data) => { output += data.toString(); });
+            proc.stderr?.on('data', (data) => { error += data.toString(); });
+            proc.on('close', (code) => {
+              if (code === 0) {
+                // Clear status cache after successful installation
+                clearCcwLitellmStatusCache();
+                resolve({ success: true, message: 'ccw-litellm installed from PyPI' });
+              } else {
+                resolve({ success: false, error: error || 'Installation failed' });
+              }
+            });
+            proc.on('error', (err) => resolve({ success: false, error: err.message }));
+          });
+        }
+
+        // Install from local package
+        return new Promise((resolve) => {
+          const proc = spawn('pip', ['install', '-e', packagePath], { shell: true, timeout: 300000 });
+          let output = '';
+          let error = '';
+          proc.stdout?.on('data', (data) => { output += data.toString(); });
+          proc.stderr?.on('data', (data) => { error += data.toString(); });
+          proc.on('close', (code) => {
+            if (code === 0) {
+              // Clear status cache after successful installation
+              clearCcwLitellmStatusCache();
+
+              // Broadcast installation event
+              broadcastToClients({
+                type: 'CCW_LITELLM_INSTALLED',
+                payload: { timestamp: new Date().toISOString() }
+              });
+              resolve({ success: true, message: 'ccw-litellm installed successfully', path: packagePath });
+            } else {
+              resolve({ success: false, error: error || output || 'Installation failed' });
+            }
+          });
+          proc.on('error', (err) => resolve({ success: false, error: err.message }));
+        });
+      } catch (err) {
+        return { success: false, error: (err as Error).message };
+      }
+    });
+    return true;
+  }
+
+  // POST /api/litellm-api/ccw-litellm/uninstall - Uninstall ccw-litellm package
+  if (pathname === '/api/litellm-api/ccw-litellm/uninstall' && req.method === 'POST') {
+    handlePostRequest(req, res, async () => {
+      try {
+        const { spawn } = await import('child_process');
+
+        return new Promise((resolve) => {
+          const proc = spawn('pip', ['uninstall', '-y', 'ccw-litellm'], { shell: true, timeout: 120000 });
+          let output = '';
+          let error = '';
+          proc.stdout?.on('data', (data) => { output += data.toString(); });
+          proc.stderr?.on('data', (data) => { error += data.toString(); });
+          proc.on('close', (code) => {
+            // Clear status cache after uninstallation attempt
+            clearCcwLitellmStatusCache();
+
+            if (code === 0) {
+              broadcastToClients({
+                type: 'CCW_LITELLM_UNINSTALLED',
+                payload: { timestamp: new Date().toISOString() }
+              });
+              resolve({ success: true, message: 'ccw-litellm uninstalled successfully' });
+            } else {
+              // Check if package was not installed
+              if (error.includes('not installed') || output.includes('not installed')) {
+                resolve({ success: true, message: 'ccw-litellm was not installed' });
+              } else {
+                resolve({ success: false, error: error || output || 'Uninstallation failed' });
+              }
+            }
+          });
+          proc.on('error', (err) => resolve({ success: false, error: err.message }));
+        });
+      } catch (err) {
+        return { success: false, error: (err as Error).message };
+      }
+    });
+    return true;
+  }
+
+  return false;
+}
--- a/ccw/src/core/routes/litellm-routes.ts
+++ b/ccw/src/core/routes/litellm-routes.ts
@@ -0,0 +1,107 @@
+// @ts-nocheck
+/**
+ * LiteLLM Routes Module
+ * Handles all LiteLLM-related API endpoints
+ */
+import type { IncomingMessage, ServerResponse } from 'http';
+import { getLiteLLMClient, getLiteLLMStatus, checkLiteLLMAvailable } from '../../tools/litellm-client.js';
+
+export interface RouteContext {
+  pathname: string;
+  url: URL;
+  req: IncomingMessage;
+  res: ServerResponse;
+  initialPath: string;
+  handlePostRequest: (req: IncomingMessage, res: ServerResponse, handler: (body: unknown) => Promise<any>) => void;
+  broadcastToClients: (data: unknown) => void;
+}
+
+/**
+ * Handle LiteLLM routes
+ * @returns true if route was handled, false otherwise
+ */
+export async function handleLiteLLMRoutes(ctx: RouteContext): Promise<boolean> {
+  const { pathname, url, req, res, initialPath, handlePostRequest } = ctx;
+
+  // API: LiteLLM Status - Check availability and version
+  if (pathname === '/api/litellm/status') {
+    try {
+      const status = await getLiteLLMStatus();
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(status));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ available: false, error: err.message }));
+    }
+    return true;
+  }
+
+  // API: LiteLLM Config - Get configuration
+  if (pathname === '/api/litellm/config' && req.method === 'GET') {
+    try {
+      const client = getLiteLLMClient();
+      const config = await client.getConfig();
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(config));
+    } catch (err) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: err.message }));
+    }
+    return true;
+  }
+
+  // API: LiteLLM Embed - Generate embeddings
+  if (pathname === '/api/litellm/embed' && req.method === 'POST') {
+    handlePostRequest(req, res, async (body) => {
+      const { texts, model = 'default' } = body;
+
+      if (!texts || !Array.isArray(texts)) {
+        return { error: 'texts array is required', status: 400 };
+      }
+
+      if (texts.length === 0) {
+        return { error: 'texts array cannot be empty', status: 400 };
+      }
+
+      try {
+        const client = getLiteLLMClient();
+        const result = await client.embed(texts, model);
+        return { success: true, ...result };
+      } catch (err) {
+        return { error: err.message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  // API: LiteLLM Chat - Chat with LLM
+  if (pathname === '/api/litellm/chat' && req.method === 'POST') {
+    handlePostRequest(req, res, async (body) => {
+      const { message, messages, model = 'default' } = body;
+
+      // Support both single message and messages array
+      if (!message && (!messages || !Array.isArray(messages))) {
+        return { error: 'message or messages array is required', status: 400 };
+      }
+
+      try {
+        const client = getLiteLLMClient();
+
+        if (messages && Array.isArray(messages)) {
+          // Multi-turn chat
+          const result = await client.chatMessages(messages, model);
+          return { success: true, ...result };
+        } else {
+          // Single message chat
+          const content = await client.chat(message, model);
+          return { success: true, content, model };
+        }
+      } catch (err) {
+        return { error: err.message, status: 500 };
+      }
+    });
+    return true;
+  }
+
+  return false;
+}
--- a/ccw/src/core/routes/mcp-routes.ts
+++ b/ccw/src/core/routes/mcp-routes.ts
@@ -1000,8 +1000,8 @@ function writeSettingsFile(filePath, settings) {
 * @returns {string}
 */
 function getProjectSettingsPath(projectPath) {
-  const normalizedPath = projectPath.replace(/\//g, '\\').replace(/^\\([a-zA-Z])\\/, '$1:\\');
-  return join(normalizedPath, '.claude', 'settings.json');
+  // path.join automatically handles cross-platform path separators
+  return join(projectPath, '.claude', 'settings.json');
 }

 // ========================================
--- a/ccw/src/core/routes/status-routes.ts
+++ b/ccw/src/core/routes/status-routes.ts
@@ -4,9 +4,56 @@
 * Aggregated status endpoint for faster dashboard loading
 */
 import type { IncomingMessage, ServerResponse } from 'http';
+import { existsSync } from 'fs';
+import { join } from 'path';
+import { homedir } from 'os';
 import { getCliToolsStatus } from '../../tools/cli-executor.js';
 import { checkVenvStatus, checkSemanticStatus } from '../../tools/codex-lens.js';

+/**
+ * Check CCW installation status
+ * Verifies that required workflow files are installed in user's home directory
+ */
+function checkCcwInstallStatus(): {
+  installed: boolean;
+  workflowsInstalled: boolean;
+  missingFiles: string[];
+  installPath: string;
+} {
+  const claudeDir = join(homedir(), '.claude');
+  const workflowsDir = join(claudeDir, 'workflows');
+
+  // Required workflow files for full functionality
+  const requiredFiles = [
+    'chinese-response.md',
+    'windows-platform.md',
+    'cli-tools-usage.md',
+    'coding-philosophy.md',
+    'context-tools.md',
+    'file-modification.md'
+  ];
+
+  const missingFiles: string[] = [];
+
+  // Check each required file
+  for (const file of requiredFiles) {
+    const filePath = join(workflowsDir, file);
+    if (!existsSync(filePath)) {
+      missingFiles.push(file);
+    }
+  }
+
+  const workflowsInstalled = existsSync(workflowsDir) && missingFiles.length === 0;
+  const installed = existsSync(claudeDir) && workflowsInstalled;
+
+  return {
+    installed,
+    workflowsInstalled,
+    missingFiles,
+    installPath: claudeDir
+  };
+}
+
 export interface RouteContext {
  pathname: string;
  url: URL;
@@ -27,6 +74,9 @@ export async function handleStatusRoutes(ctx: RouteContext): Promise<boolean> {
  // API: Aggregated Status (all statuses in one call)
  if (pathname === '/api/status/all') {
    try {
+      // Check CCW installation status (sync, fast)
+      const ccwInstallStatus = checkCcwInstallStatus();
+
      // Execute all status checks in parallel
      const [cliStatus, codexLensStatus, semanticStatus] = await Promise.all([
        getCliToolsStatus(),
@@ -39,6 +89,7 @@ export async function handleStatusRoutes(ctx: RouteContext): Promise<boolean> {
        cli: cliStatus,
        codexLens: codexLensStatus,
        semantic: semanticStatus,
+        ccwInstall: ccwInstallStatus,
        timestamp: new Date().toISOString()
      };

--- a/ccw/src/core/server.ts
+++ b/ccw/src/core/server.ts
@@ -22,6 +22,8 @@ import { handleSessionRoutes } from './routes/session-routes.js';
 import { handleCcwRoutes } from './routes/ccw-routes.js';
 import { handleClaudeRoutes } from './routes/claude-routes.js';
 import { handleHelpRoutes } from './routes/help-routes.js';
+import { handleLiteLLMRoutes } from './routes/litellm-routes.js';
+import { handleLiteLLMApiRoutes } from './routes/litellm-api-routes.js';

 // Import WebSocket handling
 import { handleWebSocketUpgrade, broadcastToClients } from './websocket.js';
@@ -83,7 +85,8 @@ const MODULE_CSS_FILES = [
  '27-graph-explorer.css',
  '28-mcp-manager.css',
  '29-help.css',
-  '30-core-memory.css'
+  '30-core-memory.css',
+  '31-api-settings.css'
 ];

 // Modular JS files in dependency order
@@ -137,6 +140,7 @@ const MODULE_FILES = [
  'views/skills-manager.js',
  'views/rules-manager.js',
  'views/claude-manager.js',
+  'views/api-settings.js',
  'views/help.js',
  'main.js'
 ];
@@ -311,6 +315,16 @@ export async function startServer(options: ServerOptions = {}): Promise<http.Ser
        if (await handleCodexLensRoutes(routeContext)) return;
      }

+      // LiteLLM routes (/api/litellm/*)
+      if (pathname.startsWith('/api/litellm/')) {
+        if (await handleLiteLLMRoutes(routeContext)) return;
+      }
+
+      // LiteLLM API routes (/api/litellm-api/*)
+      if (pathname.startsWith('/api/litellm-api/')) {
+        if (await handleLiteLLMApiRoutes(routeContext)) return;
+      }
+
      // Graph routes (/api/graph/*)
      if (pathname.startsWith('/api/graph/')) {
        if (await handleGraphRoutes(routeContext)) return;
--- a/ccw/src/mcp-server/index.ts
+++ b/ccw/src/mcp-server/index.ts
@@ -22,7 +22,7 @@ const ENV_PROJECT_ROOT = 'CCW_PROJECT_ROOT';
 const ENV_ALLOWED_DIRS = 'CCW_ALLOWED_DIRS';

 // Default enabled tools (core set)
-const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'smart_search', 'core_memory'];
+const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'smart_search', 'core_memory', 'context_cache'];

 /**
 * Get list of enabled tools from environment or defaults
--- a/ccw/src/templates/dashboard-css/12-cli-legacy.css
+++ b/ccw/src/templates/dashboard-css/12-cli-legacy.css
@@ -170,6 +170,27 @@
  letter-spacing: 0.03em;
 }

+.cli-tool-badge-disabled {
+  font-size: 0.5625rem;
+  font-weight: 600;
+  padding: 0.125rem 0.375rem;
+  background: hsl(38 92% 50% / 0.2);
+  color: hsl(38 92% 50%);
+  border-radius: 9999px;
+  text-transform: uppercase;
+  letter-spacing: 0.03em;
+}
+
+/* Disabled tool card state */
+.cli-tool-card.disabled {
+  opacity: 0.7;
+  border-style: dashed;
+}
+
+.cli-tool-card.disabled .cli-tool-name {
+  color: hsl(var(--muted-foreground));
+}
+
 .cli-tool-info {
  font-size: 0.6875rem;
  margin-bottom: 0.3125rem;
@@ -773,6 +794,29 @@
  border-color: hsl(var(--destructive) / 0.5);
 }

+/* Enable/Disable button variants */
+.btn-sm.btn-outline-success {
+  background: transparent;
+  border: 1px solid hsl(142 76% 36% / 0.4);
+  color: hsl(142 76% 36%);
+}
+
+.btn-sm.btn-outline-success:hover {
+  background: hsl(142 76% 36% / 0.1);
+  border-color: hsl(142 76% 36% / 0.6);
+}
+
+.btn-sm.btn-outline-warning {
+  background: transparent;
+  border: 1px solid hsl(38 92% 50% / 0.4);
+  color: hsl(38 92% 50%);
+}
+
+.btn-sm.btn-outline-warning:hover {
+  background: hsl(38 92% 50% / 0.1);
+  border-color: hsl(38 92% 50% / 0.6);
+}
+
 /* Empty State */
 .empty-state {
  display: flex;
--- a/ccw/src/templates/dashboard-css/18-cli-settings.css
+++ b/ccw/src/templates/dashboard-css/18-cli-settings.css
@@ -158,3 +158,37 @@
  pointer-events: none;
 }

+/* Code Index MCP Toggle Buttons */
+.code-mcp-btn {
+  padding: 0.375rem 0.75rem;
+  font-size: 0.75rem;
+  font-weight: 500;
+  border-radius: 0.375rem;
+  border: none;
+  cursor: pointer;
+  transition: all 0.15s ease;
+  background: transparent;
+  color: hsl(var(--muted-foreground));
+}
+
+.code-mcp-btn:hover {
+  color: hsl(var(--foreground));
+  background: hsl(var(--muted) / 0.5);
+}
+
+.code-mcp-btn.active,
+.code-mcp-btn[class*="bg-primary"] {
+  background: hsl(var(--primary));
+  color: hsl(var(--primary-foreground));
+  box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1);
+}
+
+.code-mcp-toggle {
+  display: flex;
+  align-items: center;
+  gap: 0.25rem;
+  background: hsl(var(--muted));
+  border-radius: 0.5rem;
+  padding: 0.125rem;
+}
+
--- a/ccw/src/templates/dashboard-css/31-api-settings.css
+++ b/ccw/src/templates/dashboard-css/31-api-settings.css
--- a/ccw/src/templates/dashboard-js/components/cli-history.js
+++ b/ccw/src/templates/dashboard-js/components/cli-history.js
@@ -33,9 +33,13 @@ async function loadCliHistory(options = {}) {
 }

 // Load native session content for a specific execution
-async function loadNativeSessionContent(executionId) {
+async function loadNativeSessionContent(executionId, sourceDir) {
  try {
-    const url = `/api/cli/native-session?path=${encodeURIComponent(projectPath)}&id=${encodeURIComponent(executionId)}`;
+    // If sourceDir provided, use it to build the correct path
+    const basePath = sourceDir && sourceDir !== '.'
+      ? projectPath + '/' + sourceDir
+      : projectPath;
+    const url = `/api/cli/native-session?path=${encodeURIComponent(basePath)}&id=${encodeURIComponent(executionId)}`;
    const response = await fetch(url);
    if (!response.ok) return null;
    return await response.json();
@@ -133,9 +137,12 @@ function renderCliHistory() {
             </span>`
          : '';

+        // Escape sourceDir for use in onclick
+        const sourceDirEscaped = exec.sourceDir ? exec.sourceDir.replace(/'/g, "\\'") : '';
+
        return `
          <div class="cli-history-item ${hasNative ? 'has-native' : ''}">
-            <div class="cli-history-item-content" onclick="showExecutionDetail('${exec.id}')">
+            <div class="cli-history-item-content" onclick="showExecutionDetail('${exec.id}', '${sourceDirEscaped}')">
              <div class="cli-history-item-header">
                <span class="cli-tool-tag cli-tool-${exec.tool}">${exec.tool.toUpperCase()}</span>
                <span class="cli-mode-tag">${exec.mode || 'analysis'}</span>
@@ -154,14 +161,14 @@ function renderCliHistory() {
            </div>
            <div class="cli-history-actions">
              ${hasNative ? `
-                <button class="btn-icon" onclick="event.stopPropagation(); showNativeSessionDetail('${exec.id}')" title="View Native Session">
+                <button class="btn-icon" onclick="event.stopPropagation(); showNativeSessionDetail('${exec.id}', '${sourceDirEscaped}')" title="View Native Session">
                  <i data-lucide="file-json" class="w-3.5 h-3.5"></i>
                </button>
              ` : ''}
-              <button class="btn-icon" onclick="event.stopPropagation(); showExecutionDetail('${exec.id}')" title="View Details">
+              <button class="btn-icon" onclick="event.stopPropagation(); showExecutionDetail('${exec.id}', '${sourceDirEscaped}')" title="View Details">
                <i data-lucide="eye" class="w-3.5 h-3.5"></i>
              </button>
-              <button class="btn-icon btn-danger" onclick="event.stopPropagation(); confirmDeleteExecution('${exec.id}')" title="Delete">
+              <button class="btn-icon btn-danger" onclick="event.stopPropagation(); confirmDeleteExecution('${exec.id}', '${sourceDirEscaped}')" title="Delete">
                <i data-lucide="trash-2" class="w-3.5 h-3.5"></i>
              </button>
            </div>
@@ -650,9 +657,9 @@ async function copyConcatenatedPrompt(executionId) {
 /**
 * Show native session detail modal with full conversation content
 */
-async function showNativeSessionDetail(executionId) {
+async function showNativeSessionDetail(executionId, sourceDir) {
  // Load native session content
-  const nativeSession = await loadNativeSessionContent(executionId);
+  const nativeSession = await loadNativeSessionContent(executionId, sourceDir);

  if (!nativeSession) {
    showRefreshToast('Native session not found', 'error');
--- a/ccw/src/templates/dashboard-js/components/cli-status.js
+++ b/ccw/src/templates/dashboard-js/components/cli-status.js
@@ -5,8 +5,11 @@
 let cliToolStatus = { gemini: {}, qwen: {}, codex: {}, claude: {} };
 let codexLensStatus = { ready: false };
 let semanticStatus = { available: false };
+let ccwInstallStatus = { installed: true, workflowsInstalled: true, missingFiles: [], installPath: '' };
 let defaultCliTool = 'gemini';
 let promptConcatFormat = localStorage.getItem('ccw-prompt-format') || 'plain'; // plain, yaml, json
+let cliToolsConfig = {}; // CLI tools enable/disable config
+let apiEndpoints = []; // API endpoints from LiteLLM config

 // Smart Context settings
 let smartContextEnabled = localStorage.getItem('ccw-smart-context') === 'true';
@@ -18,6 +21,9 @@ let nativeResumeEnabled = localStorage.getItem('ccw-native-resume') !== 'false';
 // Recursive Query settings (for hierarchical storage aggregation)
 let recursiveQueryEnabled = localStorage.getItem('ccw-recursive-query') !== 'false'; // default true

+// Code Index MCP provider (codexlens or ace)
+let codeIndexMcpProvider = 'codexlens';
+
 // ========== Initialization ==========
 function initCliStatus() {
  // Load all statuses in one call using aggregated endpoint
@@ -38,10 +44,18 @@ async function loadAllStatuses() {
    cliToolStatus = data.cli || { gemini: {}, qwen: {}, codex: {}, claude: {} };
    codexLensStatus = data.codexLens || { ready: false };
    semanticStatus = data.semantic || { available: false };
+    ccwInstallStatus = data.ccwInstall || { installed: true, workflowsInstalled: true, missingFiles: [], installPath: '' };
+
+    // Load CLI tools config and API endpoints
+    await Promise.all([
+      loadCliToolsConfig(),
+      loadApiEndpoints()
+    ]);

    // Update badges
    updateCliBadge();
    updateCodexLensBadge();
+    updateCcwInstallBadge();

    return data;
  } catch (err) {
@@ -118,6 +132,54 @@ async function loadCodexLensStatus() {
  }
 }

+/**
+ * Load CodexLens dashboard data using aggregated endpoint (single API call)
+ * This is optimized for the CodexLens Manager page initialization
+ * @returns {Promise<object|null>} Dashboard init data or null on error
+ */
+async function loadCodexLensDashboardInit() {
+  try {
+    const response = await fetch('/api/codexlens/dashboard-init');
+    if (!response.ok) throw new Error('Failed to load CodexLens dashboard init');
+    const data = await response.json();
+
+    // Update status variables from aggregated response
+    codexLensStatus = data.status || { ready: false };
+    semanticStatus = data.semantic || { available: false };
+
+    // Expose to window for other modules
+    if (!window.cliToolsStatus) {
+      window.cliToolsStatus = {};
+    }
+    window.cliToolsStatus.codexlens = {
+      installed: data.installed || false,
+      version: data.status?.version || null,
+      installedModels: [],
+      config: data.config || {},
+      semantic: data.semantic || {}
+    };
+
+    // Store config globally for easy access
+    window.codexLensConfig = data.config || {};
+    window.codexLensStatusData = data.statusData || {};
+
+    // Update badges
+    updateCodexLensBadge();
+
+    console.log('[CLI Status] CodexLens dashboard init loaded:', {
+      installed: data.installed,
+      version: data.status?.version,
+      semanticAvailable: data.semantic?.available
+    });
+
+    return data;
+  } catch (err) {
+    console.error('Failed to load CodexLens dashboard init:', err);
+    // Fallback to individual calls
+    return await loadCodexLensStatus();
+  }
+}
+
 /**
 * Legacy: Load semantic status individually
 */
@@ -165,6 +227,72 @@ async function loadInstalledModels() {
  }
 }

+/**
+ * Load CLI tools config from .claude/cli-tools.json (project or global fallback)
+ */
+async function loadCliToolsConfig() {
+  try {
+    const response = await fetch('/api/cli/tools-config');
+    if (!response.ok) return null;
+    const data = await response.json();
+    // Store full config and extract tools for backward compatibility
+    cliToolsConfig = data.tools || {};
+    window.claudeCliToolsConfig = data; // Full config available globally
+
+    // Load default tool from config
+    if (data.defaultTool) {
+      defaultCliTool = data.defaultTool;
+    }
+
+    // Load Code Index MCP provider from config
+    if (data.settings?.codeIndexMcp) {
+      codeIndexMcpProvider = data.settings.codeIndexMcp;
+    }
+
+    console.log('[CLI Config] Loaded from:', data._configInfo?.source || 'unknown', '| Default:', data.defaultTool, '| CodeIndexMCP:', codeIndexMcpProvider);
+    return data;
+  } catch (err) {
+    console.error('Failed to load CLI tools config:', err);
+    return null;
+  }
+}
+
+/**
+ * Update CLI tool enabled status
+ */
+async function updateCliToolEnabled(tool, enabled) {
+  try {
+    const response = await fetch('/api/cli/tools-config/' + tool, {
+      method: 'PUT',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ enabled: enabled })
+    });
+    if (!response.ok) throw new Error('Failed to update');
+    showRefreshToast(tool + (enabled ? ' enabled' : ' disabled'), 'success');
+    return await response.json();
+  } catch (err) {
+    console.error('Failed to update CLI tool:', err);
+    showRefreshToast('Failed to update ' + tool, 'error');
+    return null;
+  }
+}
+
+/**
+ * Load API endpoints from LiteLLM config
+ */
+async function loadApiEndpoints() {
+  try {
+    const response = await fetch('/api/litellm-api/endpoints');
+    if (!response.ok) return [];
+    const data = await response.json();
+    apiEndpoints = data.endpoints || [];
+    return apiEndpoints;
+  } catch (err) {
+    console.error('Failed to load API endpoints:', err);
+    return [];
+  }
+}
+
 // ========== Badge Update ==========
 function updateCliBadge() {
  const badge = document.getElementById('badgeCliTools');
@@ -187,6 +315,25 @@ function updateCodexLensBadge() {
  }
 }

+function updateCcwInstallBadge() {
+  const badge = document.getElementById('badgeCcwInstall');
+  if (badge) {
+    if (ccwInstallStatus.installed) {
+      badge.textContent = t('status.installed');
+      badge.classList.add('text-success');
+      badge.classList.remove('text-warning', 'text-destructive');
+    } else if (ccwInstallStatus.workflowsInstalled === false) {
+      badge.textContent = t('status.incomplete');
+      badge.classList.add('text-warning');
+      badge.classList.remove('text-success', 'text-destructive');
+    } else {
+      badge.textContent = t('status.notInstalled');
+      badge.classList.add('text-destructive');
+      badge.classList.remove('text-success', 'text-warning');
+    }
+  }
+}
+
 // ========== Rendering ==========
 function renderCliStatus() {
  const container = document.getElementById('cli-status-panel');
@@ -212,25 +359,41 @@ function renderCliStatus() {
    const status = cliToolStatus[tool] || {};
    const isAvailable = status.available;
    const isDefault = defaultCliTool === tool;
+    const config = cliToolsConfig[tool] || { enabled: true };
+    const isEnabled = config.enabled !== false;
+    const canSetDefault = isAvailable && isEnabled && !isDefault;

    return `
-      <div class="cli-tool-card tool-${tool} ${isAvailable ? 'available' : 'unavailable'}">
+      <div class="cli-tool-card tool-${tool} ${isAvailable ? 'available' : 'unavailable'} ${!isEnabled ? 'disabled' : ''}">
        <div class="cli-tool-header">
-          <span class="cli-tool-status ${isAvailable ? 'status-available' : 'status-unavailable'}"></span>
+          <span class="cli-tool-status ${isAvailable && isEnabled ? 'status-available' : 'status-unavailable'}"></span>
          <span class="cli-tool-name">${tool.charAt(0).toUpperCase() + tool.slice(1)}</span>
          ${isDefault ? '<span class="cli-tool-badge">Default</span>' : ''}
+          ${!isEnabled && isAvailable ? '<span class="cli-tool-badge-disabled">Disabled</span>' : ''}
        </div>
        <div class="cli-tool-desc text-xs text-muted-foreground mt-1">
          ${toolDescriptions[tool]}
        </div>
-        <div class="cli-tool-info mt-2">
-          ${isAvailable
-            ? `<span class="text-success flex items-center gap-1"><i data-lucide="check-circle" class="w-3 h-3"></i> Ready</span>`
-            : `<span class="text-muted-foreground flex items-center gap-1"><i data-lucide="circle-dashed" class="w-3 h-3"></i> Not Installed</span>`
-          }
+        <div class="cli-tool-info mt-2 flex items-center justify-between">
+          <div>
+            ${isAvailable
+              ? (isEnabled
+                  ? `<span class="text-success flex items-center gap-1"><i data-lucide="check-circle" class="w-3 h-3"></i> Ready</span>`
+                  : `<span class="text-warning flex items-center gap-1"><i data-lucide="pause-circle" class="w-3 h-3"></i> Disabled</span>`)
+              : `<span class="text-muted-foreground flex items-center gap-1"><i data-lucide="circle-dashed" class="w-3 h-3"></i> Not Installed</span>`
+            }
+          </div>
        </div>
-        <div class="cli-tool-actions mt-3">
-          ${isAvailable && !isDefault
+        <div class="cli-tool-actions mt-3 flex gap-2">
+          ${isAvailable ? (isEnabled
+            ? `<button class="btn-sm btn-outline-warning flex items-center gap-1" onclick="toggleCliTool('${tool}', false)">
+                <i data-lucide="pause" class="w-3 h-3"></i> Disable
+              </button>`
+            : `<button class="btn-sm btn-outline-success flex items-center gap-1" onclick="toggleCliTool('${tool}', true)">
+                <i data-lucide="play" class="w-3 h-3"></i> Enable
+              </button>`
+          ) : ''}
+          ${canSetDefault
            ? `<button class="btn-sm btn-outline flex items-center gap-1" onclick="setDefaultCliTool('${tool}')">
                <i data-lucide="star" class="w-3 h-3"></i> Set Default
              </button>`
@@ -310,11 +473,75 @@ function renderCliStatus() {
    </div>
  ` : '';

+  // CCW Installation Status card (show warning if not fully installed)
+  const ccwInstallHtml = !ccwInstallStatus.installed ? `
+    <div class="cli-tool-card tool-ccw-install unavailable" style="border: 1px solid var(--warning); background: rgba(var(--warning-rgb), 0.05);">
+      <div class="cli-tool-header">
+        <span class="cli-tool-status status-unavailable" style="background: var(--warning);"></span>
+        <span class="cli-tool-name">${t('status.ccwInstall')}</span>
+        <span class="badge px-1.5 py-0.5 text-xs rounded bg-warning/20 text-warning">${t('status.required')}</span>
+      </div>
+      <div class="cli-tool-desc text-xs text-muted-foreground mt-1">
+        ${t('status.ccwInstallDesc')}
+      </div>
+      <div class="cli-tool-info mt-2">
+        <span class="text-warning flex items-center gap-1">
+          <i data-lucide="alert-triangle" class="w-3 h-3"></i>
+          ${ccwInstallStatus.missingFiles.length} ${t('status.filesMissing')}
+        </span>
+      </div>
+      <div class="cli-tool-actions flex flex-col gap-2 mt-3">
+        <div class="text-xs text-muted-foreground">
+          <p class="mb-1">${t('status.missingFiles')}:</p>
+          <ul class="list-disc list-inside text-xs opacity-70">
+            ${ccwInstallStatus.missingFiles.slice(0, 3).map(f => `<li>${f}</li>`).join('')}
+            ${ccwInstallStatus.missingFiles.length > 3 ? `<li>+${ccwInstallStatus.missingFiles.length - 3} more...</li>` : ''}
+          </ul>
+        </div>
+        <div class="bg-muted/50 rounded p-2 mt-2">
+          <p class="text-xs font-medium mb-1">${t('status.runToFix')}:</p>
+          <code class="text-xs bg-background px-2 py-1 rounded block">ccw install</code>
+        </div>
+      </div>
+    </div>
+  ` : '';
+
+  // API Endpoints section
+  const apiEndpointsHtml = apiEndpoints.length > 0 ? `
+    <div class="cli-api-endpoints-section" style="margin-top: 1.5rem;">
+      <div class="cli-section-header" style="display: flex; align-items: center; gap: 0.5rem; margin-bottom: 1rem;">
+        <h4 style="display: flex; align-items: center; gap: 0.5rem; font-weight: 600; margin: 0;">
+          <i data-lucide="link" class="w-4 h-4"></i> API Endpoints
+        </h4>
+        <span class="badge" style="padding: 0.125rem 0.5rem; font-size: 0.75rem; border-radius: 0.25rem; background: var(--muted); color: var(--muted-foreground);">${apiEndpoints.length}</span>
+      </div>
+      <div class="cli-endpoints-list" style="display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 0.75rem;">
+        ${apiEndpoints.map(ep => `
+          <div class="cli-endpoint-card ${ep.enabled ? 'available' : 'unavailable'}" style="padding: 0.75rem; border: 1px solid var(--border); border-radius: 0.5rem; background: var(--card);">
+            <div class="cli-endpoint-header" style="display: flex; align-items: center; gap: 0.5rem; margin-bottom: 0.5rem;">
+              <span class="cli-tool-status ${ep.enabled ? 'status-available' : 'status-unavailable'}" style="width: 8px; height: 8px; border-radius: 50%; background: ${ep.enabled ? 'var(--success)' : 'var(--muted-foreground)'}; flex-shrink: 0;"></span>
+              <span class="cli-endpoint-id" style="font-weight: 500; font-size: 0.875rem;">${ep.id}</span>
+            </div>
+            <div class="cli-endpoint-info" style="margin-top: 0.25rem;">
+              <span class="text-xs text-muted-foreground" style="font-size: 0.75rem; color: var(--muted-foreground);">${ep.model}</span>
+            </div>
+          </div>
+        `).join('')}
+      </div>
+    </div>
+  ` : '';
+
+  // Config source info
+  const configInfo = window.claudeCliToolsConfig?._configInfo || {};
+  const configSourceLabel = configInfo.source === 'project' ? 'Project' : configInfo.source === 'global' ? 'Global' : 'Default';
+  const configSourceClass = configInfo.source === 'project' ? 'text-success' : configInfo.source === 'global' ? 'text-primary' : 'text-muted-foreground';
+
  // CLI Settings section
  const settingsHtml = `
    <div class="cli-settings-section">
      <div class="cli-settings-header">
        <h4><i data-lucide="settings" class="w-3.5 h-3.5"></i> Settings</h4>
+        <span class="badge text-xs ${configSourceClass}" title="${configInfo.activePath || ''}">${configSourceLabel}</span>
      </div>
      <div class="cli-settings-grid">
        <div class="cli-setting-item">
@@ -381,6 +608,39 @@ function renderCliStatus() {
          </div>
          <p class="cli-setting-desc">Maximum files to include in smart context</p>
        </div>
+        <div class="cli-setting-item">
+          <label class="cli-setting-label">
+            <i data-lucide="hard-drive" class="w-3 h-3"></i>
+            Cache Injection
+          </label>
+          <div class="cli-setting-control">
+            <select class="cli-setting-select" onchange="setCacheInjectionMode(this.value)">
+              <option value="auto" ${getCacheInjectionMode() === 'auto' ? 'selected' : ''}>Auto</option>
+              <option value="manual" ${getCacheInjectionMode() === 'manual' ? 'selected' : ''}>Manual</option>
+              <option value="disabled" ${getCacheInjectionMode() === 'disabled' ? 'selected' : ''}>Disabled</option>
+            </select>
+          </div>
+          <p class="cli-setting-desc">Cache prefix/suffix injection mode for prompts</p>
+        </div>
+        <div class="cli-setting-item">
+          <label class="cli-setting-label">
+            <i data-lucide="search" class="w-3 h-3"></i>
+            Code Index MCP
+          </label>
+          <div class="cli-setting-control">
+            <div class="flex items-center bg-muted rounded-lg p-0.5">
+              <button class="code-mcp-btn px-3 py-1.5 text-xs font-medium rounded-md transition-all ${codeIndexMcpProvider === 'codexlens' ? 'bg-primary text-primary-foreground shadow-sm' : 'text-muted-foreground hover:text-foreground'}"
+                      onclick="setCodeIndexMcpProvider('codexlens')">
+                CodexLens
+              </button>
+              <button class="code-mcp-btn px-3 py-1.5 text-xs font-medium rounded-md transition-all ${codeIndexMcpProvider === 'ace' ? 'bg-primary text-primary-foreground shadow-sm' : 'text-muted-foreground hover:text-foreground'}"
+                      onclick="setCodeIndexMcpProvider('ace')">
+                ACE
+              </button>
+            </div>
+          </div>
+          <p class="cli-setting-desc">Code search provider (updates CLAUDE.md context-tools reference)</p>
+        </div>
      </div>
    </div>
  `;
@@ -392,11 +652,13 @@ function renderCliStatus() {
        <i data-lucide="refresh-cw" class="w-4 h-4"></i>
      </button>
    </div>
+    ${ccwInstallHtml}
    <div class="cli-tools-grid">
      ${toolsHtml}
      ${codexLensHtml}
      ${semanticHtml}
    </div>
+    ${apiEndpointsHtml}
    ${settingsHtml}
  `;

@@ -408,7 +670,30 @@ function renderCliStatus() {

 // ========== Actions ==========
 function setDefaultCliTool(tool) {
+  // Validate: tool must be available and enabled
+  const status = cliToolStatus[tool] || {};
+  const config = cliToolsConfig[tool] || { enabled: true };
+
+  if (!status.available) {
+    showRefreshToast(`Cannot set ${tool} as default: not installed`, 'error');
+    return;
+  }
+
+  if (config.enabled === false) {
+    showRefreshToast(`Cannot set ${tool} as default: tool is disabled`, 'error');
+    return;
+  }
+
  defaultCliTool = tool;
+  // Save to config
+  if (window.claudeCliToolsConfig) {
+    window.claudeCliToolsConfig.defaultTool = tool;
+    fetch('/api/cli/tools-config', {
+      method: 'PUT',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ defaultTool: tool })
+    }).catch(err => console.error('Failed to save default tool:', err));
+  }
  renderCliStatus();
  showRefreshToast(`Default CLI tool set to ${tool}`, 'success');
 }
@@ -449,11 +734,93 @@ function setRecursiveQueryEnabled(enabled) {
  showRefreshToast(`Recursive Query ${enabled ? 'enabled' : 'disabled'}`, 'success');
 }

+function getCacheInjectionMode() {
+  if (window.claudeCliToolsConfig && window.claudeCliToolsConfig.settings) {
+    return window.claudeCliToolsConfig.settings.cache?.injectionMode || 'auto';
+  }
+  return localStorage.getItem('ccw-cache-injection-mode') || 'auto';
+}
+
+async function setCacheInjectionMode(mode) {
+  try {
+    const response = await fetch('/api/cli/tools-config/cache', {
+      method: 'PUT',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ injectionMode: mode })
+    });
+    if (response.ok) {
+      localStorage.setItem('ccw-cache-injection-mode', mode);
+      if (window.claudeCliToolsConfig) {
+        window.claudeCliToolsConfig.settings.cache.injectionMode = mode;
+      }
+      showRefreshToast(`Cache injection mode set to ${mode}`, 'success');
+    } else {
+      showRefreshToast('Failed to update cache settings', 'error');
+    }
+  } catch (err) {
+    console.error('Failed to update cache settings:', err);
+    showRefreshToast('Failed to update cache settings', 'error');
+  }
+}
+
+async function setCodeIndexMcpProvider(provider) {
+  try {
+    const response = await fetch('/api/cli/code-index-mcp', {
+      method: 'PUT',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ provider: provider })
+    });
+    if (response.ok) {
+      codeIndexMcpProvider = provider;
+      if (window.claudeCliToolsConfig && window.claudeCliToolsConfig.settings) {
+        window.claudeCliToolsConfig.settings.codeIndexMcp = provider;
+      }
+      showRefreshToast(`Code Index MCP switched to ${provider === 'ace' ? 'ACE (Augment)' : 'CodexLens'}`, 'success');
+      // Re-render both CLI status and settings section
+      if (typeof renderCliStatus === 'function') renderCliStatus();
+      if (typeof renderCliSettingsSection === 'function') renderCliSettingsSection();
+    } else {
+      const data = await response.json();
+      showRefreshToast(`Failed to switch Code Index MCP: ${data.error}`, 'error');
+    }
+  } catch (err) {
+    console.error('Failed to switch Code Index MCP:', err);
+    showRefreshToast('Failed to switch Code Index MCP', 'error');
+  }
+}
+
 async function refreshAllCliStatus() {
  await loadAllStatuses();
  renderCliStatus();
 }

+async function toggleCliTool(tool, enabled) {
+  // If disabling the current default tool, switch to another available+enabled tool
+  if (!enabled && defaultCliTool === tool) {
+    const tools = ['gemini', 'qwen', 'codex', 'claude'];
+    const newDefault = tools.find(t => {
+      if (t === tool) return false;
+      const status = cliToolStatus[t] || {};
+      const config = cliToolsConfig[t] || { enabled: true };
+      return status.available && config.enabled !== false;
+    });
+
+    if (newDefault) {
+      defaultCliTool = newDefault;
+      if (window.claudeCliToolsConfig) {
+        window.claudeCliToolsConfig.defaultTool = newDefault;
+      }
+      showRefreshToast(`Default tool switched to ${newDefault}`, 'info');
+    } else {
+      showRefreshToast(`Warning: No other enabled tool available for default`, 'warning');
+    }
+  }
+
+  await updateCliToolEnabled(tool, enabled);
+  await loadAllStatuses();
+  renderCliStatus();
+}
+
 function installCodexLens() {
  openCodexLensInstallWizard();
 }
--- a/ccw/src/templates/dashboard-js/components/navigation.js
+++ b/ccw/src/templates/dashboard-js/components/navigation.js
@@ -143,6 +143,18 @@ function initNavigation() {
        } else {
          console.error('renderCoreMemoryView not defined - please refresh the page');
        }
+      } else if (currentView === 'codexlens-manager') {
+        if (typeof renderCodexLensManager === 'function') {
+          renderCodexLensManager();
+        } else {
+          console.error('renderCodexLensManager not defined - please refresh the page');
+        }
+      } else if (currentView === 'api-settings') {
+        if (typeof renderApiSettings === 'function') {
+          renderApiSettings();
+        } else {
+          console.error('renderApiSettings not defined - please refresh the page');
+        }
      }
    });
  });
@@ -183,6 +195,10 @@ function updateContentTitle() {
    titleEl.textContent = t('title.helpGuide');
  } else if (currentView === 'core-memory') {
    titleEl.textContent = t('title.coreMemory');
+  } else if (currentView === 'codexlens-manager') {
+    titleEl.textContent = t('title.codexLensManager');
+  } else if (currentView === 'api-settings') {
+    titleEl.textContent = t('title.apiSettings');
  } else if (currentView === 'liteTasks') {
    const names = { 'lite-plan': t('title.litePlanSessions'), 'lite-fix': t('title.liteFixSessions') };
    titleEl.textContent = names[currentLiteType] || t('title.liteTasks');
--- a/ccw/src/templates/dashboard-js/i18n.js
+++ b/ccw/src/templates/dashboard-js/i18n.js
@@ -19,13 +19,18 @@ const i18n = {
    'common.delete': 'Delete',
    'common.cancel': 'Cancel',
    'common.save': 'Save',
+    'common.include': 'Include',
    'common.close': 'Close',
    'common.loading': 'Loading...',
    'common.error': 'Error',
    'common.success': 'Success',
+    'common.deleteSuccess': 'Deleted successfully',
+    'common.deleteFailed': 'Delete failed',
    'common.retry': 'Retry',
    'common.refresh': 'Refresh',
    'common.minutes': 'minutes',
+    'common.enabled': 'Enabled',
+    'common.disabled': 'Disabled',

    // Header
    'header.project': 'Project:',
@@ -41,6 +46,7 @@ const i18n = {
    'nav.explorer': 'Explorer',
    'nav.status': 'Status',
    'nav.history': 'History',
+    'nav.codexLensManager': 'CodexLens',
    'nav.memory': 'Memory',
    'nav.contextMemory': 'Context',
    'nav.coreMemory': 'Core Memory',
@@ -98,7 +104,8 @@ const i18n = {
    'title.hookManager': 'Hook Manager',
    'title.memoryModule': 'Memory Module',
    'title.promptHistory': 'Prompt History',
-    
+    'title.codexLensManager': 'CodexLens Manager',
+
    // Search
    'search.placeholder': 'Search...',
    
@@ -215,6 +222,7 @@ const i18n = {
    'cli.default': 'Default',
    'cli.install': 'Install',
    'cli.uninstall': 'Uninstall',
+    'cli.openManager': 'Manager',
    'cli.initIndex': 'Init Index',
    'cli.geminiDesc': 'Google AI for code analysis',
    'cli.qwenDesc': 'Alibaba AI assistant',
@@ -223,12 +231,19 @@ const i18n = {
    'cli.codexLensDescFull': 'Full-text code search engine',
    'cli.semanticDesc': 'AI-powered code understanding',
    'cli.semanticDescFull': 'Natural language code search',
+    'cli.apiEndpoints': 'API Endpoints',
+    'cli.configured': 'configured',
+    'cli.addToCli': 'Add to CLI',
+    'cli.enabled': 'Enabled',
+    'cli.disabled': 'Disabled',

    // CodexLens Configuration
    'codexlens.config': 'CodexLens Configuration',
+    'codexlens.configDesc': 'Manage code indexing, semantic search, and embedding models',
    'codexlens.status': 'Status',
    'codexlens.installed': 'Installed',
    'codexlens.notInstalled': 'Not Installed',
+    'codexlens.installFirst': 'Install CodexLens to access semantic search and model management features',
    'codexlens.indexes': 'Indexes',
    'codexlens.currentWorkspace': 'Current Workspace',
    'codexlens.indexStoragePath': 'Index Storage Path',
@@ -237,6 +252,8 @@ const i18n = {
    'codexlens.newStoragePath': 'New Storage Path',
    'codexlens.pathPlaceholder': 'e.g., /path/to/indexes or ~/.codexlens/indexes',
    'codexlens.pathInfo': 'Supports ~ for home directory. Changes take effect immediately.',
+    'codexlens.pathUnchanged': 'Path unchanged',
+    'codexlens.pathEmpty': 'Path cannot be empty',
    'codexlens.migrationRequired': 'Migration Required',
    'codexlens.migrationWarning': 'After changing the path, existing indexes will need to be re-initialized for each workspace.',
    'codexlens.actions': 'Actions',
@@ -244,6 +261,50 @@ const i18n = {
    'codexlens.cleanCurrentWorkspace': 'Clean Current Workspace',
    'codexlens.cleanAllIndexes': 'Clean All Indexes',
    'codexlens.installCodexLens': 'Install CodexLens',
+    'codexlens.createIndex': 'Create Index',
+    'codexlens.embeddingBackend': 'Embedding Backend',
+    'codexlens.localFastembed': 'Local (FastEmbed)',
+    'codexlens.apiLitellm': 'API (LiteLLM)',
+    'codexlens.backendHint': 'Select local model or remote API endpoint',
+    'codexlens.noApiModels': 'No API embedding models configured',
+    'codexlens.embeddingModel': 'Embedding Model',
+    'codexlens.modelHint': 'Select embedding model for vector search (models with ✓ are installed)',
+    'codexlens.concurrency': 'API Concurrency',
+    'codexlens.concurrencyHint': 'Number of parallel API calls. Higher values speed up indexing but may hit rate limits.',
+    'codexlens.concurrencyCustom': 'Custom',
+    'codexlens.rotation': 'Multi-Provider Rotation',
+    'codexlens.rotationDesc': 'Aggregate multiple API providers and keys for parallel embedding generation',
+    'codexlens.rotationEnabled': 'Enable Rotation',
+    'codexlens.rotationStrategy': 'Rotation Strategy',
+    'codexlens.strategyRoundRobin': 'Round Robin',
+    'codexlens.strategyLatencyAware': 'Latency Aware',
+    'codexlens.strategyWeightedRandom': 'Weighted Random',
+    'codexlens.targetModel': 'Target Model',
+    'codexlens.targetModelHint': 'Model name that all providers should support (e.g., qwen3-embedding)',
+    'codexlens.cooldownSeconds': 'Cooldown (seconds)',
+    'codexlens.cooldownHint': 'Default cooldown after rate limit (60s recommended)',
+    'codexlens.rotationProviders': 'Rotation Providers',
+    'codexlens.addProvider': 'Add Provider',
+    'codexlens.noRotationProviders': 'No providers configured for rotation',
+    'codexlens.providerWeight': 'Weight',
+    'codexlens.maxConcurrentPerKey': 'Max Concurrent/Key',
+    'codexlens.useAllKeys': 'Use All Keys',
+    'codexlens.selectKeys': 'Select Keys',
+    'codexlens.configureRotation': 'Configure Rotation',
+    'codexlens.configureInApiSettings': 'Configure in API Settings',
+    'codexlens.rotationSaved': 'Rotation config saved successfully',
+    'codexlens.endpointsSynced': 'endpoints synced to CodexLens',
+    'codexlens.syncFailed': 'Sync failed',
+    'codexlens.rotationDeleted': 'Rotation config deleted',
+    'codexlens.totalEndpoints': 'Total Endpoints',
+    'codexlens.fullIndex': 'Full',
+    'codexlens.vectorIndex': 'Vector',
+    'codexlens.ftsIndex': 'FTS',
+    'codexlens.fullIndexDesc': 'FTS + Semantic search (recommended)',
+    'codexlens.vectorIndexDesc': 'Semantic search with embeddings only',
+    'codexlens.ftsIndexDesc': 'Fast full-text search only',
+    'codexlens.indexTypeHint': 'Full index includes FTS + semantic search. FTS only is faster but without AI-powered search.',
+    'codexlens.maintenance': 'Maintenance',
    'codexlens.testSearch': 'Test Search',
    'codexlens.testFunctionality': 'test CodexLens functionality',
    'codexlens.textSearch': 'Text Search',
@@ -257,6 +318,9 @@ const i18n = {
    'codexlens.runSearch': 'Run Search',
    'codexlens.results': 'Results',
    'codexlens.resultsCount': 'results',
+    'codexlens.resultLimit': 'Limit',
+    'codexlens.contentLength': 'Content Length',
+    'codexlens.extraFiles': 'Extra Files',
    'codexlens.saveConfig': 'Save Configuration',
    'codexlens.searching': 'Searching...',
    'codexlens.searchCompleted': 'Search completed',
@@ -291,6 +355,17 @@ const i18n = {
    'codexlens.cudaModeDesc': 'NVIDIA GPU (requires CUDA Toolkit)',
    'common.recommended': 'Recommended',
    'common.unavailable': 'Unavailable',
+    'common.auto': 'Auto',
+
+    // GPU Device Selection
+    'codexlens.selectGpuDevice': 'Select GPU Device',
+    'codexlens.discrete': 'Discrete',
+    'codexlens.integrated': 'Integrated',
+    'codexlens.selectingGpu': 'Selecting GPU...',
+    'codexlens.gpuSelected': 'GPU selected',
+    'codexlens.resettingGpu': 'Resetting GPU selection...',
+    'codexlens.gpuReset': 'GPU selection reset to auto',
+    'codexlens.resetToAuto': 'Reset to Auto',
    'codexlens.modelManagement': 'Model Management',
    'codexlens.loadingModels': 'Loading models...',
    'codexlens.downloadModel': 'Download',
@@ -342,6 +417,8 @@ const i18n = {
    'codexlens.indexComplete': 'Index complete',
    'codexlens.indexSuccess': 'Index created successfully',
    'codexlens.indexFailed': 'Indexing failed',
+    'codexlens.embeddingsFailed': 'Embeddings generation failed',
+    'codexlens.ftsSuccessEmbeddingsFailed': 'FTS index created, but embeddings failed',

    // CodexLens Install
    'codexlens.installDesc': 'Python-based code indexing engine',
@@ -444,6 +521,19 @@ const i18n = {
    'lang.windowsDisableSuccess': 'Windows platform guidelines disabled',
    'lang.windowsEnableFailed': 'Failed to enable Windows platform guidelines',
    'lang.windowsDisableFailed': 'Failed to disable Windows platform guidelines',
+    'lang.installRequired': 'Run "ccw install" to enable this feature',
+
+    // CCW Installation Status
+    'status.installed': 'Installed',
+    'status.incomplete': 'Incomplete',
+    'status.notInstalled': 'Not Installed',
+    'status.ccwInstall': 'CCW Workflows',
+    'status.ccwInstallDesc': 'Required workflow files for full functionality',
+    'status.required': 'Required',
+    'status.filesMissing': 'files missing',
+    'status.missingFiles': 'Missing files',
+    'status.runToFix': 'Run to fix',
+
    'cli.promptFormat': 'Prompt Format',
    'cli.promptFormatDesc': 'Format for multi-turn conversation concatenation',
    'cli.storageBackend': 'Storage Backend',
@@ -456,7 +546,9 @@ const i18n = {
    'cli.recursiveQueryDesc': 'Aggregate CLI history and memory data from parent and child projects',
    'cli.maxContextFiles': 'Max Context Files',
    'cli.maxContextFilesDesc': 'Maximum files to include in smart context',
-    
+    'cli.codeIndexMcp': 'Code Index MCP',
+    'cli.codeIndexMcpDesc': 'Code search provider (updates CLAUDE.md context-tools reference)',
+
    // CCW Install
    'ccw.install': 'CCW Install',
    'ccw.installations': 'installation',
@@ -1289,6 +1381,206 @@ const i18n = {
    'claude.unsupportedFileType': 'Unsupported file type',
    'claude.loadFileError': 'Failed to load file',

+
+    // API Settings
+    'nav.apiSettings': 'API Settings',
+    'title.apiSettings': 'API Settings',
+    'apiSettings.providers': 'Providers',
+    'apiSettings.customEndpoints': 'Custom Endpoints',
+    'apiSettings.cacheSettings': 'Cache Settings',
+    'apiSettings.addProvider': 'Add Provider',
+    'apiSettings.editProvider': 'Edit Provider',
+    'apiSettings.deleteProvider': 'Delete Provider',
+    'apiSettings.addEndpoint': 'Add Endpoint',
+    'apiSettings.editEndpoint': 'Edit Endpoint',
+    'apiSettings.deleteEndpoint': 'Delete Endpoint',
+    'apiSettings.providerType': 'Provider Type',
+    'apiSettings.apiFormat': 'API Format',
+    'apiSettings.compatible': 'Compatible',
+    'apiSettings.customFormat': 'Custom Format',
+    'apiSettings.apiFormatHint': 'Most providers (DeepSeek, Ollama, etc.) use OpenAI-compatible format',
+    'apiSettings.displayName': 'Display Name',
+    'apiSettings.apiKey': 'API Key',
+    'apiSettings.apiBaseUrl': 'API Base URL',
+    'apiSettings.useEnvVar': 'Use environment variable',
+    'apiSettings.enableProvider': 'Enable provider',
+    'apiSettings.advancedSettings': 'Advanced Settings',
+    'apiSettings.basicInfo': 'Basic Info',
+    'apiSettings.endpointSettings': 'Endpoint Settings',
+    'apiSettings.timeout': 'Timeout (seconds)',
+    'apiSettings.seconds': 'seconds',
+    'apiSettings.timeoutHint': 'Request timeout in seconds (default: 300)',
+    'apiSettings.maxRetries': 'Max Retries',
+    'apiSettings.maxRetriesHint': 'Maximum retry attempts on failure',
+    'apiSettings.organization': 'Organization ID',
+    'apiSettings.organizationHint': 'OpenAI organization ID (org-...)',
+    'apiSettings.apiVersion': 'API Version',
+    'apiSettings.apiVersionHint': 'Azure API version (e.g., 2024-02-01)',
+    'apiSettings.rpm': 'RPM Limit',
+    'apiSettings.tpm': 'TPM Limit',
+    'apiSettings.unlimited': 'Unlimited',
+    'apiSettings.proxy': 'Proxy Server',
+    'apiSettings.proxyHint': 'HTTP proxy server URL',
+    'apiSettings.customHeaders': 'Custom Headers',
+    'apiSettings.customHeadersHint': 'JSON object with custom HTTP headers',
+    'apiSettings.invalidJsonHeaders': 'Invalid JSON in custom headers',
+    'apiSettings.searchProviders': 'Search providers...',
+    'apiSettings.selectProvider': 'Select a Provider',
+    'apiSettings.selectProviderHint': 'Select a provider from the list to view and manage its settings',
+    'apiSettings.noProvidersFound': 'No providers found',
+    'apiSettings.llmModels': 'LLM Models',
+    'apiSettings.embeddingModels': 'Embedding Models',
+    'apiSettings.manageModels': 'Manage',
+    'apiSettings.addModel': 'Add Model',
+    'apiSettings.multiKeySettings': 'Multi-Key Settings',
+    'apiSettings.noModels': 'No models configured',
+    'apiSettings.previewModel': 'Preview',
+    'apiSettings.modelSettings': 'Model Settings',
+    'apiSettings.deleteModel': 'Delete Model',
+    'apiSettings.endpointPreview': 'Endpoint Preview',
+    'apiSettings.modelBaseUrlOverride': 'Base URL Override',
+    'apiSettings.modelBaseUrlHint': 'Override the provider base URL for this specific model (leave empty to use provider default)',
+    'apiSettings.providerUpdated': 'Provider updated',
+    'apiSettings.syncToCodexLens': 'Sync to CodexLens',
+    'apiSettings.configSynced': 'Config synced to CodexLens',
+    'apiSettings.sdkAutoAppends': 'SDK auto-appends',
+    'apiSettings.preview': 'Preview',
+    'apiSettings.used': 'used',
+    'apiSettings.total': 'total',
+    'apiSettings.testConnection': 'Test Connection',
+    'apiSettings.endpointId': 'Endpoint ID',
+    'apiSettings.endpointIdHint': 'Usage: ccw cli -p "..." --model <endpoint-id>',
+    'apiSettings.endpoints': 'Endpoints',
+    'apiSettings.addEndpointHint': 'Create custom endpoint aliases for CLI usage',
+    'apiSettings.endpointModel': 'Model',
+    'apiSettings.selectEndpoint': 'Select an endpoint',
+    'apiSettings.selectEndpointHint': 'Choose an endpoint from the list to view or edit its settings',
+    'apiSettings.provider': 'Provider',
+    'apiSettings.model': 'Model',
+    'apiSettings.selectModel': 'Select model',
+    'apiSettings.noModelsConfigured': 'No models configured for this provider',
+    'apiSettings.cacheStrategy': 'Cache Strategy',
+    'apiSettings.enableContextCaching': 'Enable Context Caching',
+    'apiSettings.cacheTTL': 'TTL (minutes)',
+    'apiSettings.cacheMaxSize': 'Max Size (KB)',
+    'apiSettings.autoCachePatterns': 'Auto-cache file patterns',
+    'apiSettings.enableGlobalCaching': 'Enable Global Caching',
+    'apiSettings.cacheUsed': 'Used',
+    'apiSettings.cacheEntries': 'Entries',
+    'apiSettings.clearCache': 'Clear Cache',
+    'apiSettings.noProviders': 'No providers configured',
+    'apiSettings.noEndpoints': 'No endpoints configured',
+    'apiSettings.enabled': 'Enabled',
+    'apiSettings.disabled': 'Disabled',
+    'apiSettings.cacheEnabled': 'Cache Enabled',
+    'apiSettings.cacheDisabled': 'Cache Disabled',
+    'apiSettings.providerSaved': 'Provider saved successfully',
+    'apiSettings.providerDeleted': 'Provider deleted successfully',
+    'apiSettings.apiBaseUpdated': 'API Base URL updated successfully',
+    'apiSettings.endpointSaved': 'Endpoint saved successfully',
+    'apiSettings.endpointDeleted': 'Endpoint deleted successfully',
+    'apiSettings.cacheCleared': 'Cache cleared successfully',
+    'apiSettings.cacheSettingsUpdated': 'Cache settings updated',
+    'apiSettings.embeddingPool': 'Embedding Pool',
+    'apiSettings.embeddingPoolDesc': 'Auto-rotate between providers with same model',
+    'apiSettings.targetModel': 'Target Model',
+    'apiSettings.discoveredProviders': 'Discovered Providers',
+    'apiSettings.autoDiscover': 'Auto-discover providers',
+    'apiSettings.excludeProvider': 'Exclude',
+    'apiSettings.defaultCooldown': 'Cooldown (seconds)',
+    'apiSettings.defaultConcurrent': 'Concurrent per key',
+    'apiSettings.poolEnabled': 'Enable Embedding Pool',
+    'apiSettings.noProvidersFound': 'No providers found for this model',
+    'apiSettings.poolSaved': 'Embedding pool config saved',
+    'apiSettings.strategy': 'Strategy',
+    'apiSettings.providerKeys': 'keys',
+    'apiSettings.selectTargetModel': 'Select target model',
+    'apiSettings.confirmDeleteProvider': 'Are you sure you want to delete this provider?',
+    'apiSettings.confirmDeleteEndpoint': 'Are you sure you want to delete this endpoint?',
+    'apiSettings.confirmClearCache': 'Are you sure you want to clear the cache?',
+    'apiSettings.connectionSuccess': 'Connection successful',
+    'apiSettings.connectionFailed': 'Connection failed',
+    'apiSettings.saveProviderFirst': 'Please save the provider first',
+    'apiSettings.addProviderFirst': 'Please add a provider first',
+    'apiSettings.failedToLoad': 'Failed to load API settings',
+    'apiSettings.toggleVisibility': 'Toggle visibility',
+    'apiSettings.noProvidersHint': 'Add an API provider to get started',
+    'apiSettings.noEndpointsHint': 'Create custom endpoints for quick access to models',
+    'apiSettings.cache': 'Cache',
+    'apiSettings.off': 'Off',
+    'apiSettings.used': 'used',
+    'apiSettings.total': 'total',
+    'apiSettings.cacheUsage': 'Usage',
+    'apiSettings.cacheSize': 'Size',
+    'apiSettings.endpointsDescription': 'Manage custom API endpoints for quick model access',
+    'apiSettings.totalEndpoints': 'Total Endpoints',
+    'apiSettings.cachedEndpoints': 'Cached Endpoints',
+    'apiSettings.cacheTabHint': 'Configure global cache settings and view statistics in the main panel',
+    'apiSettings.cacheDescription': 'Manage response caching to improve performance and reduce costs',
+    'apiSettings.cachedEntries': 'Cached Entries',
+    'apiSettings.storageUsed': 'Storage Used',
+    'apiSettings.cacheActions': 'Cache Actions',
+    'apiSettings.cacheStatistics': 'Cache Statistics',
+    'apiSettings.globalCache': 'Global Cache',
+
+    // Multi-key management
+    'apiSettings.apiKeys': 'API Keys',
+    'apiSettings.addKey': 'Add Key',
+    'apiSettings.keyLabel': 'Label',
+    'apiSettings.keyValue': 'API Key',
+    'apiSettings.keyWeight': 'Weight',
+    'apiSettings.removeKey': 'Remove',
+    'apiSettings.noKeys': 'No API keys configured',
+    'apiSettings.primaryKey': 'Primary Key',
+
+    // Routing strategy
+    'apiSettings.routingStrategy': 'Routing Strategy',
+    'apiSettings.simpleShuffleRouting': 'Simple Shuffle (Random)',
+    'apiSettings.weightedRouting': 'Weighted Distribution',
+    'apiSettings.latencyRouting': 'Latency-Based',
+    'apiSettings.costRouting': 'Cost-Based',
+    'apiSettings.leastBusyRouting': 'Least Busy',
+    'apiSettings.routingHint': 'How to distribute requests across multiple API keys',
+
+    // Health check
+    'apiSettings.healthCheck': 'Health Check',
+    'apiSettings.enableHealthCheck': 'Enable Health Check',
+    'apiSettings.healthInterval': 'Check Interval (seconds)',
+    'apiSettings.healthCooldown': 'Cooldown (seconds)',
+    'apiSettings.failureThreshold': 'Failure Threshold',
+    'apiSettings.healthStatus': 'Status',
+    'apiSettings.healthy': 'Healthy',
+    'apiSettings.unhealthy': 'Unhealthy',
+    'apiSettings.unknown': 'Unknown',
+    'apiSettings.lastCheck': 'Last Check',
+    'apiSettings.testKey': 'Test Key',
+    'apiSettings.testingKey': 'Testing...',
+    'apiSettings.keyValid': 'Key is valid',
+    'apiSettings.keyInvalid': 'Key is invalid',
+
+    // Embedding models
+    'apiSettings.embeddingDimensions': 'Dimensions',
+    'apiSettings.embeddingMaxTokens': 'Max Tokens',
+    'apiSettings.selectEmbeddingModel': 'Select Embedding Model',
+
+    // Model modal
+    'apiSettings.addLlmModel': 'Add LLM Model',
+    'apiSettings.addEmbeddingModel': 'Add Embedding Model',
+    'apiSettings.modelId': 'Model ID',
+    'apiSettings.modelName': 'Display Name',
+    'apiSettings.modelSeries': 'Series',
+    'apiSettings.selectFromPresets': 'Select from Presets',
+    'apiSettings.customModel': 'Custom Model',
+    'apiSettings.capabilities': 'Capabilities',
+    'apiSettings.streaming': 'Streaming',
+    'apiSettings.functionCalling': 'Function Calling',
+    'apiSettings.vision': 'Vision',
+    'apiSettings.contextWindow': 'Context Window',
+    'apiSettings.description': 'Description',
+    'apiSettings.optional': 'Optional',
+    'apiSettings.modelIdExists': 'Model ID already exists',
+    'apiSettings.useModelTreeToManage': 'Use the model tree to manage individual models',
+
    // Common
    'common.cancel': 'Cancel',
    'common.optional': '(Optional)',
@@ -1312,6 +1604,7 @@ const i18n = {
    'common.saveFailed': 'Failed to save',
    'common.unknownError': 'Unknown error',
    'common.exception': 'Exception',
+    'common.status': 'Status',

    // Core Memory
    'title.coreMemory': 'Core Memory',
@@ -1435,13 +1728,18 @@ const i18n = {
    'common.delete': '删除',
    'common.cancel': '取消',
    'common.save': '保存',
+    'common.include': '包含',
    'common.close': '关闭',
    'common.loading': '加载中...',
    'common.error': '错误',
    'common.success': '成功',
+    'common.deleteSuccess': '删除成功',
+    'common.deleteFailed': '删除失败',
    'common.retry': '重试',
    'common.refresh': '刷新',
    'common.minutes': '分钟',
+    'common.enabled': '已启用',
+    'common.disabled': '已禁用',

    // Header
    'header.project': '项目:',
@@ -1457,6 +1755,7 @@ const i18n = {
    'nav.explorer': '文件浏览器',
    'nav.status': '状态',
    'nav.history': '历史',
+    'nav.codexLensManager': 'CodexLens',
    'nav.memory': '记忆',
    'nav.contextMemory': '活动',
    'nav.coreMemory': '核心记忆',
@@ -1514,6 +1813,7 @@ const i18n = {
    'title.hookManager': '钩子管理',
    'title.memoryModule': '记忆模块',
    'title.promptHistory': '提示历史',
+    'title.codexLensManager': 'CodexLens 管理',

    // Search
    'search.placeholder': '搜索...',
@@ -1631,6 +1931,7 @@ const i18n = {
    'cli.default': '默认',
    'cli.install': '安装',
    'cli.uninstall': '卸载',
+    'cli.openManager': '管理',
    'cli.initIndex': '初始化索引',
    'cli.geminiDesc': 'Google AI 代码分析',
    'cli.qwenDesc': '阿里通义 AI 助手',
@@ -1639,12 +1940,19 @@ const i18n = {
    'cli.codexLensDescFull': '全文代码搜索引擎',
    'cli.semanticDesc': 'AI 驱动的代码理解',
    'cli.semanticDescFull': '自然语言代码搜索',
+    'cli.apiEndpoints': 'API 端点',
+    'cli.configured': '已配置',
+    'cli.addToCli': '添加到 CLI',
+    'cli.enabled': '已启用',
+    'cli.disabled': '已禁用',

    // CodexLens 配置
    'codexlens.config': 'CodexLens 配置',
+    'codexlens.configDesc': '管理代码索引、语义搜索和嵌入模型',
    'codexlens.status': '状态',
    'codexlens.installed': '已安装',
    'codexlens.notInstalled': '未安装',
+    'codexlens.installFirst': '安装 CodexLens 以访问语义搜索和模型管理功能',
    'codexlens.indexes': '索引',
    'codexlens.currentWorkspace': '当前工作区',
    'codexlens.indexStoragePath': '索引存储路径',
@@ -1653,6 +1961,8 @@ const i18n = {
    'codexlens.newStoragePath': '新存储路径',
    'codexlens.pathPlaceholder': '例如：/path/to/indexes 或 ~/.codexlens/indexes',
    'codexlens.pathInfo': '支持 ~ 表示用户目录。更改立即生效。',
+    'codexlens.pathUnchanged': '路径未变更',
+    'codexlens.pathEmpty': '路径不能为空',
    'codexlens.migrationRequired': '需要迁移',
    'codexlens.migrationWarning': '更改路径后，需要为每个工作区重新初始化索引。',
    'codexlens.actions': '操作',
@@ -1660,6 +1970,50 @@ const i18n = {
    'codexlens.cleanCurrentWorkspace': '清理当前工作空间',
    'codexlens.cleanAllIndexes': '清理所有索引',
    'codexlens.installCodexLens': '安装 CodexLens',
+    'codexlens.createIndex': '创建索引',
+    'codexlens.embeddingBackend': '嵌入后端',
+    'codexlens.localFastembed': '本地 (FastEmbed)',
+    'codexlens.apiLitellm': 'API (LiteLLM)',
+    'codexlens.backendHint': '选择本地模型或远程 API 端点',
+    'codexlens.noApiModels': '未配置 API 嵌入模型',
+    'codexlens.embeddingModel': '嵌入模型',
+    'codexlens.modelHint': '选择向量搜索的嵌入模型（带 ✓ 的已安装）',
+    'codexlens.concurrency': 'API 并发数',
+    'codexlens.concurrencyHint': '并行 API 调用数量。较高的值可加速索引但可能触发速率限制。',
+    'codexlens.concurrencyCustom': '自定义',
+    'codexlens.rotation': '多供应商轮训',
+    'codexlens.rotationDesc': '聚合多个 API 供应商和密钥进行并行嵌入生成',
+    'codexlens.rotationEnabled': '启用轮训',
+    'codexlens.rotationStrategy': '轮训策略',
+    'codexlens.strategyRoundRobin': '轮询',
+    'codexlens.strategyLatencyAware': '延迟感知',
+    'codexlens.strategyWeightedRandom': '加权随机',
+    'codexlens.targetModel': '目标模型',
+    'codexlens.targetModelHint': '所有供应商应支持的模型名称（例如 qwen3-embedding）',
+    'codexlens.cooldownSeconds': '冷却时间（秒）',
+    'codexlens.cooldownHint': '速率限制后的默认冷却时间（推荐 60 秒）',
+    'codexlens.rotationProviders': '轮训供应商',
+    'codexlens.addProvider': '添加供应商',
+    'codexlens.noRotationProviders': '未配置轮训供应商',
+    'codexlens.providerWeight': '权重',
+    'codexlens.maxConcurrentPerKey': '每密钥最大并发',
+    'codexlens.useAllKeys': '使用所有密钥',
+    'codexlens.selectKeys': '选择密钥',
+    'codexlens.configureRotation': '配置轮训',
+    'codexlens.configureInApiSettings': '在 API 设置中配置',
+    'codexlens.rotationSaved': '轮训配置保存成功',
+    'codexlens.endpointsSynced': '个端点已同步到 CodexLens',
+    'codexlens.syncFailed': '同步失败',
+    'codexlens.rotationDeleted': '轮训配置已删除',
+    'codexlens.totalEndpoints': '总端点数',
+    'codexlens.fullIndex': '全部',
+    'codexlens.vectorIndex': '向量',
+    'codexlens.ftsIndex': 'FTS',
+    'codexlens.fullIndexDesc': 'FTS + 语义搜索（推荐）',
+    'codexlens.vectorIndexDesc': '仅语义嵌入搜索',
+    'codexlens.ftsIndexDesc': '仅快速全文搜索',
+    'codexlens.indexTypeHint': '完整索引包含 FTS + 语义搜索。仅 FTS 更快但无 AI 搜索功能。',
+    'codexlens.maintenance': '维护',
    'codexlens.testSearch': '测试搜索',
    'codexlens.testFunctionality': '测试 CodexLens 功能',
    'codexlens.textSearch': '文本搜索',
@@ -1673,6 +2027,9 @@ const i18n = {
    'codexlens.runSearch': '运行搜索',
    'codexlens.results': '结果',
    'codexlens.resultsCount': '个结果',
+    'codexlens.resultLimit': '数量限制',
+    'codexlens.contentLength': '内容长度',
+    'codexlens.extraFiles': '额外文件',
    'codexlens.saveConfig': '保存配置',
    'codexlens.searching': '搜索中...',
    'codexlens.searchCompleted': '搜索完成',
@@ -1707,6 +2064,18 @@ const i18n = {
    'codexlens.cudaModeDesc': 'NVIDIA GPU（需要 CUDA Toolkit）',
    'common.recommended': '推荐',
    'common.unavailable': '不可用',
+    'common.auto': '自动',
+
+    // GPU 设备选择
+    'codexlens.selectGpuDevice': '选择 GPU 设备',
+    'codexlens.discrete': '独立显卡',
+    'codexlens.integrated': '集成显卡',
+    'codexlens.selectingGpu': '选择 GPU 中...',
+    'codexlens.gpuSelected': 'GPU 已选择',
+    'codexlens.resettingGpu': '重置 GPU 选择中...',
+    'codexlens.gpuReset': 'GPU 选择已重置为自动',
+    'codexlens.resetToAuto': '重置为自动',
+
    'codexlens.modelManagement': '模型管理',
    'codexlens.loadingModels': '加载模型中...',
    'codexlens.downloadModel': '下载',
@@ -1758,6 +2127,8 @@ const i18n = {
    'codexlens.indexComplete': '索引完成',
    'codexlens.indexSuccess': '索引创建成功',
    'codexlens.indexFailed': '索引失败',
+    'codexlens.embeddingsFailed': '嵌入生成失败',
+    'codexlens.ftsSuccessEmbeddingsFailed': 'FTS 索引已创建，但嵌入生成失败',

    // CodexLens 安装
    'codexlens.installDesc': '基于 Python 的代码索引引擎',
@@ -1860,6 +2231,19 @@ const i18n = {
    'lang.windowsDisableSuccess': 'Windows 平台规范已禁用',
    'lang.windowsEnableFailed': '启用 Windows 平台规范失败',
    'lang.windowsDisableFailed': '禁用 Windows 平台规范失败',
+    'lang.installRequired': '请运行 "ccw install" 以启用此功能',
+
+    // CCW 安装状态
+    'status.installed': '已安装',
+    'status.incomplete': '不完整',
+    'status.notInstalled': '未安装',
+    'status.ccwInstall': 'CCW 工作流',
+    'status.ccwInstallDesc': '完整功能所需的工作流文件',
+    'status.required': '必需',
+    'status.filesMissing': '个文件缺失',
+    'status.missingFiles': '缺失文件',
+    'status.runToFix': '修复命令',
+
    'cli.promptFormat': '提示词格式',
    'cli.promptFormatDesc': '多轮对话拼接格式',
    'cli.storageBackend': '存储后端',
@@ -1872,7 +2256,9 @@ const i18n = {
    'cli.recursiveQueryDesc': '聚合显示父项目和子项目的 CLI 历史与内存数据',
    'cli.maxContextFiles': '最大上下文文件数',
    'cli.maxContextFilesDesc': '智能上下文包含的最大文件数',
-    
+    'cli.codeIndexMcp': '代码索引 MCP',
+    'cli.codeIndexMcpDesc': '代码搜索提供者 (更新 CLAUDE.md 的 context-tools 引用)',
+
    // CCW Install
    'ccw.install': 'CCW 安装',
    'ccw.installations': '个安装',
@@ -2714,6 +3100,205 @@ const i18n = {
    'claudeManager.saved': 'File saved successfully',
    'claudeManager.saveError': 'Failed to save file',

+
+    // API Settings
+    'nav.apiSettings': 'API 设置',
+    'title.apiSettings': 'API 设置',
+    'apiSettings.providers': '提供商',
+    'apiSettings.customEndpoints': '自定义端点',
+    'apiSettings.cacheSettings': '缓存设置',
+    'apiSettings.addProvider': '添加提供商',
+    'apiSettings.editProvider': '编辑提供商',
+    'apiSettings.deleteProvider': '删除提供商',
+    'apiSettings.addEndpoint': '添加端点',
+    'apiSettings.editEndpoint': '编辑端点',
+    'apiSettings.deleteEndpoint': '删除端点',
+    'apiSettings.providerType': '提供商类型',
+    'apiSettings.apiFormat': 'API 格式',
+    'apiSettings.compatible': '兼容',
+    'apiSettings.customFormat': '自定义格式',
+    'apiSettings.apiFormatHint': '大多数供应商（DeepSeek、Ollama 等）使用 OpenAI 兼容格式',
+    'apiSettings.displayName': '显示名称',
+    'apiSettings.apiKey': 'API 密钥',
+    'apiSettings.apiBaseUrl': 'API 基础 URL',
+    'apiSettings.useEnvVar': '使用环境变量',
+    'apiSettings.enableProvider': '启用提供商',
+    'apiSettings.advancedSettings': '高级设置',
+    'apiSettings.basicInfo': '基本信息',
+    'apiSettings.endpointSettings': '端点设置',
+    'apiSettings.timeout': '超时时间（秒）',
+    'apiSettings.seconds': '秒',
+    'apiSettings.timeoutHint': '请求超时时间，单位秒（默认：300）',
+    'apiSettings.maxRetries': '最大重试次数',
+    'apiSettings.maxRetriesHint': '失败后最大重试次数',
+    'apiSettings.organization': '组织 ID',
+    'apiSettings.organizationHint': 'OpenAI 组织 ID（org-...）',
+    'apiSettings.apiVersion': 'API 版本',
+    'apiSettings.apiVersionHint': 'Azure API 版本（如 2024-02-01）',
+    'apiSettings.rpm': 'RPM 限制',
+    'apiSettings.tpm': 'TPM 限制',
+    'apiSettings.unlimited': '无限制',
+    'apiSettings.proxy': '代理服务器',
+    'apiSettings.proxyHint': 'HTTP 代理服务器 URL',
+    'apiSettings.customHeaders': '自定义请求头',
+    'apiSettings.customHeadersHint': '自定义 HTTP 请求头的 JSON 对象',
+    'apiSettings.invalidJsonHeaders': '自定义请求头 JSON 格式无效',
+    'apiSettings.searchProviders': '搜索供应商...',
+    'apiSettings.selectProvider': '选择供应商',
+    'apiSettings.selectProviderHint': '从列表中选择一个供应商来查看和管理其设置',
+    'apiSettings.noProvidersFound': '未找到供应商',
+    'apiSettings.llmModels': '大语言模型',
+    'apiSettings.embeddingModels': '向量模型',
+    'apiSettings.manageModels': '管理',
+    'apiSettings.addModel': '添加模型',
+    'apiSettings.multiKeySettings': '多密钥设置',
+    'apiSettings.noModels': '暂无模型配置',
+    'apiSettings.previewModel': '预览',
+    'apiSettings.modelSettings': '模型设置',
+    'apiSettings.deleteModel': '删除模型',
+    'apiSettings.endpointPreview': '端点预览',
+    'apiSettings.modelBaseUrlOverride': '基础 URL 覆盖',
+    'apiSettings.modelBaseUrlHint': '为此模型覆盖供应商的基础 URL（留空则使用供应商默认值）',
+    'apiSettings.providerUpdated': '供应商已更新',
+    'apiSettings.syncToCodexLens': '同步到 CodexLens',
+    'apiSettings.configSynced': '配置已同步到 CodexLens',
+    'apiSettings.preview': '预览',
+    'apiSettings.used': '已使用',
+    'apiSettings.total': '总计',
+    'apiSettings.testConnection': '测试连接',
+    'apiSettings.endpointId': '端点 ID',
+    'apiSettings.endpointIdHint': '用法: ccw cli -p "..." --model <端点ID>',
+    'apiSettings.endpoints': '端点',
+    'apiSettings.addEndpointHint': '创建用于 CLI 的自定义端点别名',
+    'apiSettings.endpointModel': '模型',
+    'apiSettings.selectEndpoint': '选择端点',
+    'apiSettings.selectEndpointHint': '从列表中选择一个端点以查看或编辑其设置',
+    'apiSettings.provider': '提供商',
+    'apiSettings.model': '模型',
+    'apiSettings.selectModel': '选择模型',
+    'apiSettings.noModelsConfigured': '该供应商未配置模型',
+    'apiSettings.cacheStrategy': '缓存策略',
+    'apiSettings.enableContextCaching': '启用上下文缓存',
+    'apiSettings.cacheTTL': 'TTL (分钟)',
+    'apiSettings.cacheMaxSize': '最大大小 (KB)',
+    'apiSettings.autoCachePatterns': '自动缓存文件模式',
+    'apiSettings.enableGlobalCaching': '启用全局缓存',
+    'apiSettings.cacheUsed': '已使用',
+    'apiSettings.cacheEntries': '条目数',
+    'apiSettings.clearCache': '清除缓存',
+    'apiSettings.noProviders': '未配置提供商',
+    'apiSettings.noEndpoints': '未配置端点',
+    'apiSettings.enabled': '已启用',
+    'apiSettings.disabled': '已禁用',
+    'apiSettings.cacheEnabled': '缓存已启用',
+    'apiSettings.cacheDisabled': '缓存已禁用',
+    'apiSettings.providerSaved': '提供商保存成功',
+    'apiSettings.providerDeleted': '提供商删除成功',
+    'apiSettings.apiBaseUpdated': 'API 基础 URL 更新成功',
+    'apiSettings.endpointSaved': '端点保存成功',
+    'apiSettings.endpointDeleted': '端点删除成功',
+    'apiSettings.cacheCleared': '缓存清除成功',
+    'apiSettings.cacheSettingsUpdated': '缓存设置已更新',
+    'apiSettings.embeddingPool': '高可用嵌入',
+    'apiSettings.embeddingPoolDesc': '自动轮训相同模型的供应商',
+    'apiSettings.targetModel': '目标模型',
+    'apiSettings.discoveredProviders': '发现的供应商',
+    'apiSettings.autoDiscover': '自动发现供应商',
+    'apiSettings.excludeProvider': '排除',
+    'apiSettings.defaultCooldown': '冷却时间（秒）',
+    'apiSettings.defaultConcurrent': '每密钥并发数',
+    'apiSettings.poolEnabled': '启用嵌入池',
+    'apiSettings.noProvidersFound': '未找到提供此模型的供应商',
+    'apiSettings.poolSaved': '嵌入池配置已保存',
+    'apiSettings.strategy': '策略',
+    'apiSettings.providerKeys': '密钥',
+    'apiSettings.selectTargetModel': '选择目标模型',
+    'apiSettings.confirmDeleteProvider': '确定要删除此提供商吗？',
+    'apiSettings.confirmDeleteEndpoint': '确定要删除此端点吗？',
+    'apiSettings.confirmClearCache': '确定要清除缓存吗？',
+    'apiSettings.connectionSuccess': '连接成功',
+    'apiSettings.connectionFailed': '连接失败',
+    'apiSettings.saveProviderFirst': '请先保存提供商',
+    'apiSettings.addProviderFirst': '请先添加提供商',
+    'apiSettings.failedToLoad': '加载 API 设置失败',
+    'apiSettings.toggleVisibility': '切换可见性',
+    'apiSettings.noProvidersHint': '添加 API 提供商以开始使用',
+    'apiSettings.noEndpointsHint': '创建自定义端点以快速访问模型',
+    'apiSettings.cache': '缓存',
+    'apiSettings.off': '关闭',
+    'apiSettings.used': '已用',
+    'apiSettings.total': '总计',
+    'apiSettings.cacheUsage': '使用率',
+    'apiSettings.cacheSize': '大小',
+    'apiSettings.endpointsDescription': '管理自定义 API 端点以快速访问模型',
+    'apiSettings.totalEndpoints': '总端点数',
+    'apiSettings.cachedEndpoints': '缓存端点数',
+    'apiSettings.cacheTabHint': '在主面板中配置全局缓存设置并查看统计信息',
+    'apiSettings.cacheDescription': '管理响应缓存以提高性能并降低成本',
+    'apiSettings.cachedEntries': '缓存条目',
+    'apiSettings.storageUsed': '已用存储',
+    'apiSettings.cacheActions': '缓存操作',
+    'apiSettings.cacheStatistics': '缓存统计',
+    'apiSettings.globalCache': '全局缓存',
+
+    // Multi-key management
+    'apiSettings.apiKeys': 'API 密钥',
+    'apiSettings.addKey': '添加密钥',
+    'apiSettings.keyLabel': '标签',
+    'apiSettings.keyValue': 'API 密钥',
+    'apiSettings.keyWeight': '权重',
+    'apiSettings.removeKey': '移除',
+    'apiSettings.noKeys': '未配置 API 密钥',
+    'apiSettings.primaryKey': '主密钥',
+
+    // Routing strategy
+    'apiSettings.routingStrategy': '路由策略',
+    'apiSettings.simpleShuffleRouting': '简单随机',
+    'apiSettings.weightedRouting': '权重分配',
+    'apiSettings.latencyRouting': '延迟优先',
+    'apiSettings.costRouting': '成本优先',
+    'apiSettings.leastBusyRouting': '最少并发',
+    'apiSettings.routingHint': '如何在多个 API 密钥间分配请求',
+
+    // Health check
+    'apiSettings.healthCheck': '健康检查',
+    'apiSettings.enableHealthCheck': '启用健康检查',
+    'apiSettings.healthInterval': '检查间隔（秒）',
+    'apiSettings.healthCooldown': '冷却时间（秒）',
+    'apiSettings.failureThreshold': '失败阈值',
+    'apiSettings.healthStatus': '状态',
+    'apiSettings.healthy': '健康',
+    'apiSettings.unhealthy': '异常',
+    'apiSettings.unknown': '未知',
+    'apiSettings.lastCheck': '最后检查',
+    'apiSettings.testKey': '测试密钥',
+    'apiSettings.testingKey': '测试中...',
+    'apiSettings.keyValid': '密钥有效',
+    'apiSettings.keyInvalid': '密钥无效',
+
+    // Embedding models
+    'apiSettings.embeddingDimensions': '向量维度',
+    'apiSettings.embeddingMaxTokens': '最大 Token',
+    'apiSettings.selectEmbeddingModel': '选择嵌入模型',
+
+    // Model modal
+    'apiSettings.addLlmModel': '添加 LLM 模型',
+    'apiSettings.addEmbeddingModel': '添加嵌入模型',
+    'apiSettings.modelId': '模型 ID',
+    'apiSettings.modelName': '显示名称',
+    'apiSettings.modelSeries': '模型系列',
+    'apiSettings.selectFromPresets': '从预设选择',
+    'apiSettings.customModel': '自定义模型',
+    'apiSettings.capabilities': '能力',
+    'apiSettings.streaming': '流式输出',
+    'apiSettings.functionCalling': '函数调用',
+    'apiSettings.vision': '视觉能力',
+    'apiSettings.contextWindow': '上下文窗口',
+    'apiSettings.description': '描述',
+    'apiSettings.optional': '可选',
+    'apiSettings.modelIdExists': '模型 ID 已存在',
+    'apiSettings.useModelTreeToManage': '使用模型树管理各个模型',
+
    // Common
    'common.cancel': '取消',
    'common.optional': '(可选)',
@@ -2737,6 +3322,7 @@ const i18n = {
    'common.saveFailed': '保存失败',
    'common.unknownError': '未知错误',
    'common.exception': '异常',
+    'common.status': '状态',

    // Core Memory
    'title.coreMemory': '核心记忆',
--- a/ccw/src/templates/dashboard-js/views/api-settings.js
+++ b/ccw/src/templates/dashboard-js/views/api-settings.js
--- a/ccw/src/templates/dashboard-js/views/cli-manager.js
+++ b/ccw/src/templates/dashboard-js/views/cli-manager.js
@@ -9,6 +9,26 @@ var ccwEndpointTools = [];
 var cliToolConfig = null;  // Store loaded CLI config
 var predefinedModels = {}; // Store predefined models per tool

+// ========== Navigation Helpers ==========
+
+/**
+ * Navigate to CodexLens Manager page
+ */
+function navigateToCodexLensManager() {
+  var navItem = document.querySelector('.nav-item[data-view="codexlens-manager"]');
+  if (navItem) {
+    navItem.click();
+  } else {
+    // Fallback: try to render directly
+    if (typeof renderCodexLensManager === 'function') {
+      currentView = 'codexlens-manager';
+      renderCodexLensManager();
+    } else {
+      showRefreshToast(t('common.error') + ': CodexLens Manager not available', 'error');
+    }
+  }
+}
+
 // ========== CCW Installations ==========
 async function loadCcwInstallations() {
  try {
@@ -39,6 +59,91 @@ async function loadCcwEndpointTools() {
  }
 }

+// ========== LiteLLM API Endpoints ==========
+var litellmApiEndpoints = [];
+var cliCustomEndpoints = [];
+
+async function loadLitellmApiEndpoints() {
+  try {
+    var response = await fetch('/api/litellm-api/config');
+    if (!response.ok) throw new Error('Failed to load LiteLLM endpoints');
+    var data = await response.json();
+    litellmApiEndpoints = data.endpoints || [];
+    window.litellmApiConfig = data;
+    return litellmApiEndpoints;
+  } catch (err) {
+    console.error('Failed to load LiteLLM endpoints:', err);
+    litellmApiEndpoints = [];
+    return [];
+  }
+}
+
+async function loadCliCustomEndpoints() {
+  try {
+    var response = await fetch('/api/cli/endpoints');
+    if (!response.ok) throw new Error('Failed to load CLI custom endpoints');
+    var data = await response.json();
+    cliCustomEndpoints = data.endpoints || [];
+    return cliCustomEndpoints;
+  } catch (err) {
+    console.error('Failed to load CLI custom endpoints:', err);
+    cliCustomEndpoints = [];
+    return [];
+  }
+}
+
+async function toggleEndpointEnabled(endpointId, enabled) {
+  try {
+    var response = await fetch('/api/cli/endpoints/' + endpointId, {
+      method: 'PUT',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ enabled: enabled })
+    });
+    if (!response.ok) throw new Error('Failed to update endpoint');
+    var data = await response.json();
+    if (data.success) {
+      // Update local state
+      var idx = cliCustomEndpoints.findIndex(function(e) { return e.id === endpointId; });
+      if (idx >= 0) {
+        cliCustomEndpoints[idx].enabled = enabled;
+      }
+      showRefreshToast((enabled ? 'Enabled' : 'Disabled') + ' endpoint: ' + endpointId, 'success');
+    }
+    return data;
+  } catch (err) {
+    showRefreshToast('Failed to update endpoint: ' + err.message, 'error');
+    throw err;
+  }
+}
+
+async function syncEndpointToCliTools(endpoint) {
+  try {
+    var response = await fetch('/api/cli/endpoints', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        id: endpoint.id,
+        name: endpoint.name,
+        enabled: true
+      })
+    });
+    if (!response.ok) throw new Error('Failed to sync endpoint');
+    var data = await response.json();
+    if (data.success) {
+      cliCustomEndpoints = data.endpoints;
+      showRefreshToast('Endpoint synced to CLI tools: ' + endpoint.id, 'success');
+      renderToolsSection();
+    }
+    return data;
+  } catch (err) {
+    showRefreshToast('Failed to sync endpoint: ' + err.message, 'error');
+    throw err;
+  }
+}
+
+window.toggleEndpointEnabled = toggleEndpointEnabled;
+window.syncEndpointToCliTools = syncEndpointToCliTools;
+
 // ========== CLI Tool Configuration ==========
 async function loadCliToolConfig() {
  try {
@@ -302,7 +407,9 @@ async function renderCliManager() {
    loadCliToolStatus(),
    loadCodexLensStatus(),
    loadCcwInstallations(),
-    loadCcwEndpointTools()
+    loadCcwEndpointTools(),
+    loadLitellmApiEndpoints(),
+    loadCliCustomEndpoints()
  ]);

  container.innerHTML = '<div class="status-manager">' +
@@ -314,8 +421,7 @@ async function renderCliManager() {
    '<div class="cli-settings-section" id="cli-settings-section" style="margin-top: 1.5rem;"></div>' +
    '<div class="cli-section" id="ccw-endpoint-tools-section" style="margin-top: 1.5rem;"></div>' +
    '</div>' +
-    '<section id="storageCard" class="mb-6"></section>' +
-    '<section id="indexCard" class="mb-6"></section>';
+    '<section id="storageCard" class="mb-6"></section>';

  // Render sub-panels
  renderToolsSection();
@@ -329,11 +435,6 @@ async function renderCliManager() {
    initStorageManager();
  }

-  // Initialize index manager card
-  if (typeof initIndexManager === 'function') {
-    initIndexManager();
-  }
-
  // Initialize Lucide icons
  if (window.lucide) lucide.createIcons();
 }
@@ -434,28 +535,22 @@ function renderToolsSection() {
    '</div>';
  }).join('');

-  // CodexLens item
-  var codexLensHtml = '<div class="tool-item clickable ' + (codexLensStatus.ready ? 'available' : 'unavailable') + '" onclick="showCodexLensConfigModal()">' +
+  // CodexLens item - simplified view with link to manager page
+  var codexLensHtml = '<div class="tool-item clickable ' + (codexLensStatus.ready ? 'available' : 'unavailable') + '" onclick="navigateToCodexLensManager()">' +
    '<div class="tool-item-left">' +
      '<span class="tool-status-dot ' + (codexLensStatus.ready ? 'status-available' : 'status-unavailable') + '"></span>' +
      '<div class="tool-item-info">' +
        '<div class="tool-item-name">CodexLens <span class="tool-type-badge">Index</span>' +
-          '<i data-lucide="settings" class="w-3 h-3 tool-config-icon"></i></div>' +
+          '<i data-lucide="external-link" class="w-3 h-3 tool-config-icon"></i></div>' +
        '<div class="tool-item-desc">' + (codexLensStatus.ready ? t('cli.codexLensDesc') : t('cli.codexLensDescFull')) + '</div>' +
      '</div>' +
    '</div>' +
    '<div class="tool-item-right">' +
      (codexLensStatus.ready
        ? '<span class="tool-status-text success"><i data-lucide="check-circle" class="w-3.5 h-3.5"></i> v' + (codexLensStatus.version || 'installed') + '</span>' +
-          '<select id="codexlensModelSelect" class="btn-sm bg-muted border border-border rounded text-xs" onclick="event.stopPropagation()" title="' + (t('index.selectModel') || 'Select embedding model') + '">' +
-            buildModelSelectOptions() +
-          '</select>' +
-          '<button class="btn-sm btn-primary" onclick="event.stopPropagation(); initCodexLensIndex(\'full\', getSelectedModel())" title="' + (t('index.fullDesc') || 'FTS + Semantic search (recommended)') + '"><i data-lucide="layers" class="w-3 h-3"></i> ' + (t('index.fullIndex') || '全部索引') + '</button>' +
-          '<button class="btn-sm btn-outline" onclick="event.stopPropagation(); initCodexLensIndex(\'vector\', getSelectedModel())" title="' + (t('index.vectorDesc') || 'Semantic search with embeddings') + '"><i data-lucide="sparkles" class="w-3 h-3"></i> ' + (t('index.vectorIndex') || '向量索引') + '</button>' +
-          '<button class="btn-sm btn-outline" onclick="event.stopPropagation(); initCodexLensIndex(\'normal\')" title="' + (t('index.normalDesc') || 'Fast full-text search only') + '"><i data-lucide="file-text" class="w-3 h-3"></i> ' + (t('index.normalIndex') || 'FTS索引') + '</button>' +
-          '<button class="btn-sm btn-outline btn-danger" onclick="event.stopPropagation(); uninstallCodexLens()"><i data-lucide="trash-2" class="w-3 h-3"></i> ' + t('cli.uninstall') + '</button>'
+          '<button class="btn-sm btn-primary" onclick="event.stopPropagation(); navigateToCodexLensManager()"><i data-lucide="settings" class="w-3 h-3"></i> ' + t('cli.openManager') + '</button>'
        : '<span class="tool-status-text muted"><i data-lucide="circle-dashed" class="w-3.5 h-3.5"></i> ' + t('cli.notInstalled') + '</span>' +
-          '<button class="btn-sm btn-primary" onclick="event.stopPropagation(); installCodexLens()"><i data-lucide="download" class="w-3 h-3"></i> ' + t('cli.install') + '</button>') +
+          '<button class="btn-sm btn-primary" onclick="event.stopPropagation(); navigateToCodexLensManager()"><i data-lucide="settings" class="w-3 h-3"></i> ' + t('cli.openManager') + '</button>') +
    '</div>' +
  '</div>';

@@ -479,6 +574,51 @@ function renderToolsSection() {
    '</div>';
  }

+  // API Endpoints section
+  var apiEndpointsHtml = '';
+  if (litellmApiEndpoints.length > 0) {
+    var endpointItems = litellmApiEndpoints.map(function(endpoint) {
+      // Check if endpoint is synced to CLI tools
+      var cliEndpoint = cliCustomEndpoints.find(function(e) { return e.id === endpoint.id; });
+      var isSynced = !!cliEndpoint;
+      var isEnabled = cliEndpoint ? cliEndpoint.enabled : false;
+
+      // Find provider info
+      var provider = (window.litellmApiConfig?.providers || []).find(function(p) { return p.id === endpoint.providerId; });
+      var providerName = provider ? provider.name : endpoint.providerId;
+
+      return '<div class="tool-item ' + (isSynced && isEnabled ? 'available' : 'unavailable') + '">' +
+        '<div class="tool-item-left">' +
+          '<span class="tool-status-dot ' + (isSynced && isEnabled ? 'status-available' : 'status-unavailable') + '"></span>' +
+          '<div class="tool-item-info">' +
+            '<div class="tool-item-name">' + endpoint.id + ' <span class="tool-type-badge">API</span></div>' +
+            '<div class="tool-item-desc">' + endpoint.model + ' (' + providerName + ')</div>' +
+          '</div>' +
+        '</div>' +
+        '<div class="tool-item-right">' +
+          (isSynced
+            ? '<label class="toggle-switch" onclick="event.stopPropagation()">' +
+                '<input type="checkbox" ' + (isEnabled ? 'checked' : '') + ' onchange="toggleEndpointEnabled(\'' + endpoint.id + '\', this.checked); renderToolsSection();">' +
+                '<span class="toggle-slider"></span>' +
+              '</label>'
+            : '<button class="btn-sm btn-primary" onclick="event.stopPropagation(); syncEndpointToCliTools({id: \'' + endpoint.id + '\', name: \'' + endpoint.name + '\'})">' +
+                '<i data-lucide="plus" class="w-3 h-3"></i> ' + (t('cli.addToCli') || 'Add to CLI') +
+              '</button>') +
+        '</div>' +
+      '</div>';
+    }).join('');
+
+    apiEndpointsHtml = '<div class="tools-subsection" style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid var(--border);">' +
+      '<div class="section-header-left" style="margin-bottom: 0.5rem;">' +
+        '<h4 style="font-size: 0.875rem; font-weight: 600; display: flex; align-items: center; gap: 0.5rem;">' +
+          '<i data-lucide="cloud" class="w-4 h-4"></i> ' + (t('cli.apiEndpoints') || 'API Endpoints') +
+        '</h4>' +
+        '<span class="section-count">' + litellmApiEndpoints.length + ' ' + (t('cli.configured') || 'configured') + '</span>' +
+      '</div>' +
+      '<div class="tools-list">' + endpointItems + '</div>' +
+    '</div>';
+  }
+
  container.innerHTML = '<div class="section-header">' +
      '<div class="section-header-left">' +
        '<h3><i data-lucide="terminal" class="w-4 h-4"></i> ' + t('cli.tools') + '</h3>' +
@@ -492,7 +632,8 @@ function renderToolsSection() {
      toolsHtml +
      codexLensHtml +
      semanticHtml +
-    '</div>';
+    '</div>' +
+    apiEndpointsHtml;

  if (window.lucide) lucide.createIcons();
 }
@@ -606,6 +747,16 @@ async function loadWindowsPlatformSettings() {

 async function toggleChineseResponse(enabled) {
  if (chineseResponseLoading) return;
+
+  // Pre-check: verify CCW workflows are installed (only when enabling)
+  if (enabled && typeof ccwInstallStatus !== 'undefined' && !ccwInstallStatus.installed) {
+    var missingFile = ccwInstallStatus.missingFiles.find(function(f) { return f === 'chinese-response.md'; });
+    if (missingFile) {
+      showRefreshToast(t('lang.installRequired'), 'warning');
+      return;
+    }
+  }
+
  chineseResponseLoading = true;

  try {
@@ -617,7 +768,14 @@ async function toggleChineseResponse(enabled) {

    if (!response.ok) {
      var errData = await response.json();
-      throw new Error(errData.error || 'Failed to update setting');
+      // Show specific error message from backend
+      var errorMsg = errData.error || 'Failed to update setting';
+      if (errorMsg.includes('not found')) {
+        showRefreshToast(t('lang.installRequired'), 'warning');
+      } else {
+        showRefreshToast((enabled ? t('lang.enableFailed') : t('lang.disableFailed')) + ': ' + errorMsg, 'error');
+      }
+      throw new Error(errorMsg);
    }

    var data = await response.json();
@@ -630,7 +788,7 @@ async function toggleChineseResponse(enabled) {
    showRefreshToast(enabled ? t('lang.enableSuccess') : t('lang.disableSuccess'), 'success');
  } catch (err) {
    console.error('Failed to toggle Chinese response:', err);
-    showRefreshToast(enabled ? t('lang.enableFailed') : t('lang.disableFailed'), 'error');
+    // Error already shown in the !response.ok block
  } finally {
    chineseResponseLoading = false;
  }
@@ -638,6 +796,16 @@ async function toggleChineseResponse(enabled) {

 async function toggleWindowsPlatform(enabled) {
  if (windowsPlatformLoading) return;
+
+  // Pre-check: verify CCW workflows are installed (only when enabling)
+  if (enabled && typeof ccwInstallStatus !== 'undefined' && !ccwInstallStatus.installed) {
+    var missingFile = ccwInstallStatus.missingFiles.find(function(f) { return f === 'windows-platform.md'; });
+    if (missingFile) {
+      showRefreshToast(t('lang.installRequired'), 'warning');
+      return;
+    }
+  }
+
  windowsPlatformLoading = true;

  try {
@@ -649,7 +817,14 @@ async function toggleWindowsPlatform(enabled) {

    if (!response.ok) {
      var errData = await response.json();
-      throw new Error(errData.error || 'Failed to update setting');
+      // Show specific error message from backend
+      var errorMsg = errData.error || 'Failed to update setting';
+      if (errorMsg.includes('not found')) {
+        showRefreshToast(t('lang.installRequired'), 'warning');
+      } else {
+        showRefreshToast((enabled ? t('lang.windowsEnableFailed') : t('lang.windowsDisableFailed')) + ': ' + errorMsg, 'error');
+      }
+      throw new Error(errorMsg);
    }

    var data = await response.json();
@@ -662,7 +837,7 @@ async function toggleWindowsPlatform(enabled) {
    showRefreshToast(enabled ? t('lang.windowsEnableSuccess') : t('lang.windowsDisableSuccess'), 'success');
  } catch (err) {
    console.error('Failed to toggle Windows platform:', err);
-    showRefreshToast(enabled ? t('lang.windowsEnableFailed') : t('lang.windowsDisableFailed'), 'error');
+    // Error already shown in the !response.ok block
  } finally {
    windowsPlatformLoading = false;
  }
@@ -812,6 +987,19 @@ function renderCliSettingsSection() {
        '</div>' +
        '<p class="cli-setting-desc">' + t('cli.maxContextFilesDesc') + '</p>' +
      '</div>' +
+      '<div class="cli-setting-item">' +
+        '<label class="cli-setting-label">' +
+          '<i data-lucide="search" class="w-3 h-3"></i>' +
+          t('cli.codeIndexMcp') +
+        '</label>' +
+        '<div class="cli-setting-control">' +
+          '<select class="cli-setting-select" onchange="setCodeIndexMcpProvider(this.value)">' +
+            '<option value="codexlens"' + (codeIndexMcpProvider === 'codexlens' ? ' selected' : '') + '>CodexLens</option>' +
+            '<option value="ace"' + (codeIndexMcpProvider === 'ace' ? ' selected' : '') + '>ACE (Augment)</option>' +
+          '</select>' +
+        '</div>' +
+        '<p class="cli-setting-desc">' + t('cli.codeIndexMcpDesc') + '</p>' +
+      '</div>' +
    '</div>';

  container.innerHTML = settingsHtml;
--- a/ccw/src/templates/dashboard-js/views/codexlens-manager.js
+++ b/ccw/src/templates/dashboard-js/views/codexlens-manager.js
--- a/ccw/src/templates/dashboard.html
+++ b/ccw/src/templates/dashboard.html
@@ -331,6 +331,15 @@
                <i data-lucide="history" class="nav-icon"></i>
                <span class="nav-text flex-1" data-i18n="nav.history">History</span>
              </li>
+              <li class="nav-item flex items-center gap-2 px-3 py-2.5 text-sm text-muted-foreground hover:bg-hover hover:text-foreground rounded cursor-pointer transition-colors" data-view="codexlens-manager" data-tooltip="CodexLens Manager">
+                <i data-lucide="search-code" class="nav-icon"></i>
+                <span class="nav-text flex-1" data-i18n="nav.codexLensManager">CodexLens</span>
+                <span class="badge px-2 py-0.5 text-xs font-semibold rounded-full bg-hover text-muted-foreground" id="badgeCodexLens">-</span>
+              </li>
+              <li class="nav-item flex items-center gap-2 px-3 py-2.5 text-sm text-muted-foreground hover:bg-hover hover:text-foreground rounded cursor-pointer transition-colors" data-view="api-settings" data-tooltip="API Settings">
+                <i data-lucide="settings" class="nav-icon"></i>
+                <span class="nav-text flex-1" data-i18n="nav.apiSettings">API Settings</span>
+              </li>
              <!-- Hidden: Code Graph Explorer (feature disabled)
              <li class="nav-item flex items-center gap-2 px-3 py-2.5 text-sm text-muted-foreground hover:bg-hover hover:text-foreground rounded cursor-pointer transition-colors" data-view="graph-explorer" data-tooltip="Code Graph Explorer">
                <i data-lucide="git-branch" class="nav-icon"></i>
--- a/ccw/src/tools/claude-cli-tools.ts
+++ b/ccw/src/tools/claude-cli-tools.ts
@@ -0,0 +1,371 @@
+/**
+ * Claude CLI Tools Configuration Manager
+ * Manages .claude/cli-tools.json with fallback:
+ * 1. Project workspace: {projectDir}/.claude/cli-tools.json (priority)
+ * 2. Global: ~/.claude/cli-tools.json (fallback)
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+// ========== Types ==========
+
+export interface ClaudeCliTool {
+  enabled: boolean;
+  isBuiltin: boolean;
+  command: string;
+  description: string;
+}
+
+export interface ClaudeCacheSettings {
+  injectionMode: 'auto' | 'manual' | 'disabled';
+  defaultPrefix: string;
+  defaultSuffix: string;
+}
+
+export interface ClaudeCliToolsConfig {
+  $schema?: string;
+  version: string;
+  tools: Record<string, ClaudeCliTool>;
+  customEndpoints: Array<{
+    id: string;
+    name: string;
+    enabled: boolean;
+  }>;
+  defaultTool: string;
+  settings: {
+    promptFormat: 'plain' | 'yaml' | 'json';
+    smartContext: {
+      enabled: boolean;
+      maxFiles: number;
+    };
+    nativeResume: boolean;
+    recursiveQuery: boolean;
+    cache: ClaudeCacheSettings;
+    codeIndexMcp: 'codexlens' | 'ace';  // Code Index MCP provider
+  };
+}
+
+// ========== Default Config ==========
+
+const DEFAULT_CONFIG: ClaudeCliToolsConfig = {
+  version: '1.0.0',
+  tools: {
+    gemini: {
+      enabled: true,
+      isBuiltin: true,
+      command: 'gemini',
+      description: 'Google AI for code analysis'
+    },
+    qwen: {
+      enabled: true,
+      isBuiltin: true,
+      command: 'qwen',
+      description: 'Alibaba AI assistant'
+    },
+    codex: {
+      enabled: true,
+      isBuiltin: true,
+      command: 'codex',
+      description: 'OpenAI code generation'
+    },
+    claude: {
+      enabled: true,
+      isBuiltin: true,
+      command: 'claude',
+      description: 'Anthropic AI assistant'
+    }
+  },
+  customEndpoints: [],
+  defaultTool: 'gemini',
+  settings: {
+    promptFormat: 'plain',
+    smartContext: {
+      enabled: false,
+      maxFiles: 10
+    },
+    nativeResume: true,
+    recursiveQuery: true,
+    cache: {
+      injectionMode: 'auto',
+      defaultPrefix: '',
+      defaultSuffix: ''
+    },
+    codeIndexMcp: 'codexlens'  // Default to CodexLens
+  }
+};
+
+// ========== Helper Functions ==========
+
+function getProjectConfigPath(projectDir: string): string {
+  return path.join(projectDir, '.claude', 'cli-tools.json');
+}
+
+function getGlobalConfigPath(): string {
+  return path.join(os.homedir(), '.claude', 'cli-tools.json');
+}
+
+/**
+ * Resolve config path with fallback:
+ * 1. Project: {projectDir}/.claude/cli-tools.json
+ * 2. Global: ~/.claude/cli-tools.json
+ * Returns { path, source } where source is 'project' | 'global' | 'default'
+ */
+function resolveConfigPath(projectDir: string): { path: string; source: 'project' | 'global' | 'default' } {
+  const projectPath = getProjectConfigPath(projectDir);
+  if (fs.existsSync(projectPath)) {
+    return { path: projectPath, source: 'project' };
+  }
+
+  const globalPath = getGlobalConfigPath();
+  if (fs.existsSync(globalPath)) {
+    return { path: globalPath, source: 'global' };
+  }
+
+  return { path: projectPath, source: 'default' };
+}
+
+function ensureClaudeDir(projectDir: string): void {
+  const claudeDir = path.join(projectDir, '.claude');
+  if (!fs.existsSync(claudeDir)) {
+    fs.mkdirSync(claudeDir, { recursive: true });
+  }
+}
+
+// ========== Main Functions ==========
+
+/**
+ * Load CLI tools configuration with fallback:
+ * 1. Project: {projectDir}/.claude/cli-tools.json
+ * 2. Global: ~/.claude/cli-tools.json
+ * 3. Default config
+ */
+export function loadClaudeCliTools(projectDir: string): ClaudeCliToolsConfig & { _source?: string } {
+  const resolved = resolveConfigPath(projectDir);
+
+  try {
+    if (resolved.source === 'default') {
+      // No config file found, return defaults
+      return { ...DEFAULT_CONFIG, _source: 'default' };
+    }
+
+    const content = fs.readFileSync(resolved.path, 'utf-8');
+    const parsed = JSON.parse(content) as Partial<ClaudeCliToolsConfig>;
+
+    // Merge with defaults
+    const config = {
+      ...DEFAULT_CONFIG,
+      ...parsed,
+      tools: { ...DEFAULT_CONFIG.tools, ...(parsed.tools || {}) },
+      settings: {
+        ...DEFAULT_CONFIG.settings,
+        ...(parsed.settings || {}),
+        smartContext: {
+          ...DEFAULT_CONFIG.settings.smartContext,
+          ...(parsed.settings?.smartContext || {})
+        },
+        cache: {
+          ...DEFAULT_CONFIG.settings.cache,
+          ...(parsed.settings?.cache || {})
+        }
+      },
+      _source: resolved.source
+    };
+
+    console.log(`[claude-cli-tools] Loaded config from ${resolved.source}: ${resolved.path}`);
+    return config;
+  } catch (err) {
+    console.error('[claude-cli-tools] Error loading config:', err);
+    return { ...DEFAULT_CONFIG, _source: 'default' };
+  }
+}
+
+/**
+ * Save CLI tools configuration to project .claude/cli-tools.json
+ * Always saves to project directory (not global)
+ */
+export function saveClaudeCliTools(projectDir: string, config: ClaudeCliToolsConfig & { _source?: string }): void {
+  ensureClaudeDir(projectDir);
+  const configPath = getProjectConfigPath(projectDir);
+
+  // Remove internal _source field before saving
+  const { _source, ...configToSave } = config;
+
+  try {
+    fs.writeFileSync(configPath, JSON.stringify(configToSave, null, 2), 'utf-8');
+    console.log(`[claude-cli-tools] Saved config to project: ${configPath}`);
+  } catch (err) {
+    console.error('[claude-cli-tools] Error saving config:', err);
+    throw new Error(`Failed to save CLI tools config: ${err}`);
+  }
+}
+
+/**
+ * Update enabled status for a specific tool
+ */
+export function updateClaudeToolEnabled(
+  projectDir: string,
+  toolName: string,
+  enabled: boolean
+): ClaudeCliToolsConfig {
+  const config = loadClaudeCliTools(projectDir);
+
+  if (config.tools[toolName]) {
+    config.tools[toolName].enabled = enabled;
+    saveClaudeCliTools(projectDir, config);
+  }
+
+  return config;
+}
+
+/**
+ * Update cache settings
+ */
+export function updateClaudeCacheSettings(
+  projectDir: string,
+  cacheSettings: Partial<ClaudeCacheSettings>
+): ClaudeCliToolsConfig {
+  const config = loadClaudeCliTools(projectDir);
+
+  config.settings.cache = {
+    ...config.settings.cache,
+    ...cacheSettings
+  };
+
+  saveClaudeCliTools(projectDir, config);
+  return config;
+}
+
+/**
+ * Update default tool
+ */
+export function updateClaudeDefaultTool(
+  projectDir: string,
+  defaultTool: string
+): ClaudeCliToolsConfig {
+  const config = loadClaudeCliTools(projectDir);
+  config.defaultTool = defaultTool;
+  saveClaudeCliTools(projectDir, config);
+  return config;
+}
+
+/**
+ * Add custom endpoint
+ */
+export function addClaudeCustomEndpoint(
+  projectDir: string,
+  endpoint: { id: string; name: string; enabled: boolean }
+): ClaudeCliToolsConfig {
+  const config = loadClaudeCliTools(projectDir);
+
+  // Check if endpoint already exists
+  const existingIndex = config.customEndpoints.findIndex(e => e.id === endpoint.id);
+  if (existingIndex >= 0) {
+    config.customEndpoints[existingIndex] = endpoint;
+  } else {
+    config.customEndpoints.push(endpoint);
+  }
+
+  saveClaudeCliTools(projectDir, config);
+  return config;
+}
+
+/**
+ * Remove custom endpoint
+ */
+export function removeClaudeCustomEndpoint(
+  projectDir: string,
+  endpointId: string
+): ClaudeCliToolsConfig {
+  const config = loadClaudeCliTools(projectDir);
+  config.customEndpoints = config.customEndpoints.filter(e => e.id !== endpointId);
+  saveClaudeCliTools(projectDir, config);
+  return config;
+}
+
+/**
+ * Get config source info
+ */
+export function getClaudeCliToolsInfo(projectDir: string): {
+  projectPath: string;
+  globalPath: string;
+  activePath: string;
+  source: 'project' | 'global' | 'default';
+} {
+  const resolved = resolveConfigPath(projectDir);
+  return {
+    projectPath: getProjectConfigPath(projectDir),
+    globalPath: getGlobalConfigPath(),
+    activePath: resolved.path,
+    source: resolved.source
+  };
+}
+
+/**
+ * Update Code Index MCP provider and switch CLAUDE.md reference
+ * Strategy: Only modify global user-level CLAUDE.md (~/.claude/CLAUDE.md)
+ * This is consistent with Chinese response and Windows platform settings
+ */
+export function updateCodeIndexMcp(
+  projectDir: string,
+  provider: 'codexlens' | 'ace'
+): { success: boolean; error?: string; config?: ClaudeCliToolsConfig } {
+  try {
+    // Update config
+    const config = loadClaudeCliTools(projectDir);
+    config.settings.codeIndexMcp = provider;
+    saveClaudeCliTools(projectDir, config);
+
+    // Only update global CLAUDE.md (consistent with Chinese response / Windows platform)
+    const globalClaudeMdPath = path.join(os.homedir(), '.claude', 'CLAUDE.md');
+
+    if (!fs.existsSync(globalClaudeMdPath)) {
+      // If global CLAUDE.md doesn't exist, check project-level
+      const projectClaudeMdPath = path.join(projectDir, '.claude', 'CLAUDE.md');
+      if (fs.existsSync(projectClaudeMdPath)) {
+        let content = fs.readFileSync(projectClaudeMdPath, 'utf-8');
+
+        // Define patterns for both formats
+        const codexlensPattern = /@~\/\.claude\/workflows\/context-tools\.md/g;
+        const acePattern = /@~\/\.claude\/workflows\/context-tools-ace\.md/g;
+
+        if (provider === 'ace') {
+          content = content.replace(codexlensPattern, '@~/.claude/workflows/context-tools-ace.md');
+        } else {
+          content = content.replace(acePattern, '@~/.claude/workflows/context-tools.md');
+        }
+
+        fs.writeFileSync(projectClaudeMdPath, content, 'utf-8');
+        console.log(`[claude-cli-tools] Updated project CLAUDE.md to use ${provider} (no global CLAUDE.md found)`);
+      }
+    } else {
+      // Update global CLAUDE.md (primary target)
+      let content = fs.readFileSync(globalClaudeMdPath, 'utf-8');
+
+      const codexlensPattern = /@~\/\.claude\/workflows\/context-tools\.md/g;
+      const acePattern = /@~\/\.claude\/workflows\/context-tools-ace\.md/g;
+
+      if (provider === 'ace') {
+        content = content.replace(codexlensPattern, '@~/.claude/workflows/context-tools-ace.md');
+      } else {
+        content = content.replace(acePattern, '@~/.claude/workflows/context-tools.md');
+      }
+
+      fs.writeFileSync(globalClaudeMdPath, content, 'utf-8');
+      console.log(`[claude-cli-tools] Updated global CLAUDE.md to use ${provider}`);
+    }
+
+    return { success: true, config };
+  } catch (err) {
+    console.error('[claude-cli-tools] Error updating Code Index MCP:', err);
+    return { success: false, error: (err as Error).message };
+  }
+}
+
+/**
+ * Get current Code Index MCP provider
+ */
+export function getCodeIndexMcp(projectDir: string): 'codexlens' | 'ace' {
+  const config = loadClaudeCliTools(projectDir);
+  return config.settings.codeIndexMcp || 'codexlens';
+}
--- a/ccw/src/tools/cli-executor.ts
+++ b/ccw/src/tools/cli-executor.ts
@@ -10,6 +10,10 @@ import { spawn, ChildProcess } from 'child_process';
 import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync, readdirSync, statSync } from 'fs';
 import { join, relative } from 'path';

+// LiteLLM integration
+import { executeLiteLLMEndpoint } from './litellm-executor.js';
+import { findEndpointById } from '../config/litellm-api-config-manager.js';
+
 // Native resume support
 import {
  trackNewSession,
@@ -63,7 +67,7 @@ const ParamsSchema = z.object({
  model: z.string().optional(),
  cd: z.string().optional(),
  includeDirs: z.string().optional(),
-  timeout: z.number().default(300000),
+  timeout: z.number().default(0), // 0 = no internal timeout, controlled by external caller (e.g., bash timeout)
  resume: z.union([z.boolean(), z.string()]).optional(), // true = last, string = single ID or comma-separated IDs
  id: z.string().optional(), // Custom execution ID (e.g., IMPL-001-step1)
  noNative: z.boolean().optional(), // Force prompt concatenation instead of native resume
@@ -333,9 +337,8 @@ function buildCommand(params: {
          args.push(nativeResume.sessionId);
        }
        // Codex resume still supports additional flags
-        if (dir) {
-          args.push('-C', dir);
-        }
+        // Note: -C is NOT used because spawn's cwd already sets the working directory
+        // Using both would cause path to be applied twice (e.g., codex-lens/codex-lens)
        // Permission configuration based on mode:
        // - analysis: --full-auto (read-only sandbox, no prompts) - safer for read operations
        // - write/auto: --dangerously-bypass-approvals-and-sandbox (full access for modifications)
@@ -358,9 +361,8 @@ function buildCommand(params: {
      } else {
        // Standard exec mode
        args.push('exec');
-        if (dir) {
-          args.push('-C', dir);
-        }
+        // Note: -C is NOT used because spawn's cwd already sets the working directory
+        // Using both would cause path to be applied twice (e.g., codex-lens/codex-lens)
        // Permission configuration based on mode:
        // - analysis: --full-auto (read-only sandbox, no prompts) - safer for read operations
        // - write/auto: --dangerously-bypass-approvals-and-sandbox (full access for modifications)
@@ -592,6 +594,66 @@ async function executeCliTool(
  const workingDir = cd || process.cwd();
  ensureHistoryDir(workingDir); // Ensure history directory exists

+  // NEW: Check if model is a custom LiteLLM endpoint ID
+  if (model && !['gemini', 'qwen', 'codex'].includes(tool)) {
+    const endpoint = findEndpointById(workingDir, model);
+    if (endpoint) {
+      // Route to LiteLLM executor
+      if (onOutput) {
+        onOutput({ type: 'stderr', data: `[Routing to LiteLLM endpoint: ${model}]\n` });
+      }
+
+      const result = await executeLiteLLMEndpoint({
+        prompt,
+        endpointId: model,
+        baseDir: workingDir,
+        cwd: cd,
+        includeDirs: includeDirs ? includeDirs.split(',').map(d => d.trim()) : undefined,
+        enableCache: true,
+        onOutput: onOutput || undefined,
+      });
+
+      // Convert LiteLLM result to ExecutionOutput format
+      const startTime = Date.now();
+      const endTime = Date.now();
+      const duration = endTime - startTime;
+
+      const execution: ExecutionRecord = {
+        id: customId || `${Date.now()}-litellm`,
+        timestamp: new Date(startTime).toISOString(),
+        tool: 'litellm',
+        model: result.model,
+        mode,
+        prompt,
+        status: result.success ? 'success' : 'error',
+        exit_code: result.success ? 0 : 1,
+        duration_ms: duration,
+        output: {
+          stdout: result.output,
+          stderr: result.error || '',
+          truncated: false,
+        },
+      };
+
+      const conversation = convertToConversation(execution);
+
+      // Try to save to history
+      try {
+        saveConversation(workingDir, conversation);
+      } catch (err) {
+        console.error('[CLI Executor] Failed to save LiteLLM history:', (err as Error).message);
+      }
+
+      return {
+        success: result.success,
+        execution,
+        conversation,
+        stdout: result.output,
+        stderr: result.error || '',
+      };
+    }
+  }
+
  // Get SQLite store for native session lookup
  const store = await getSqliteStore(workingDir);

@@ -994,19 +1056,24 @@ async function executeCliTool(
      reject(new Error(`Failed to spawn ${tool}: ${error.message}`));
    });

-    // Timeout handling
-    const timeoutId = setTimeout(() => {
-      timedOut = true;
-      child.kill('SIGTERM');
-      setTimeout(() => {
-        if (!child.killed) {
-          child.kill('SIGKILL');
-        }
-      }, 5000);
-    }, timeout);
+    // Timeout handling (timeout=0 disables internal timeout, controlled by external caller)
+    let timeoutId: NodeJS.Timeout | null = null;
+    if (timeout > 0) {
+      timeoutId = setTimeout(() => {
+        timedOut = true;
+        child.kill('SIGTERM');
+        setTimeout(() => {
+          if (!child.killed) {
+            child.kill('SIGKILL');
+          }
+        }, 5000);
+      }, timeout);
+    }

    child.on('close', () => {
-      clearTimeout(timeoutId);
+      if (timeoutId) {
+        clearTimeout(timeoutId);
+      }
    });
  });
 }
@@ -1051,8 +1118,8 @@ Modes:
      },
      timeout: {
        type: 'number',
-        description: 'Timeout in milliseconds (default: 300000 = 5 minutes)',
-        default: 300000
+        description: 'Timeout in milliseconds (default: 0 = disabled, controlled by external caller)',
+        default: 0
      }
    },
    required: ['tool', 'prompt']
--- a/ccw/src/tools/codex-lens.ts
+++ b/ccw/src/tools/codex-lens.ts
@@ -33,6 +33,14 @@ const VENV_PYTHON =
 let bootstrapChecked = false;
 let bootstrapReady = false;

+// Venv status cache with TTL
+interface VenvStatusCache {
+  status: ReadyStatus;
+  timestamp: number;
+}
+let venvStatusCache: VenvStatusCache | null = null;
+const VENV_STATUS_TTL = 5 * 60 * 1000; // 5 minutes TTL
+
 // Track running indexing process for cancellation
 let currentIndexingProcess: ReturnType<typeof spawn> | null = null;
 let currentIndexingAborted = false;
@@ -77,6 +85,7 @@ interface SemanticStatus {
  backend?: string;
  accelerator?: string;
  providers?: string[];
+  litellmAvailable?: boolean;
  error?: string;
 }

@@ -115,6 +124,13 @@ interface ProgressInfo {
  totalFiles?: number;
 }

+/**
+ * Clear venv status cache (call after install/uninstall operations)
+ */
+function clearVenvStatusCache(): void {
+  venvStatusCache = null;
+}
+
 /**
 * Detect available Python 3 executable
 * @returns Python executable command
@@ -137,17 +153,27 @@ function getSystemPython(): string {

 /**
 * Check if CodexLens venv exists and has required packages
+ * @param force - Force refresh cache (default: false)
 * @returns Ready status
 */
-async function checkVenvStatus(): Promise<ReadyStatus> {
+async function checkVenvStatus(force = false): Promise<ReadyStatus> {
+  // Use cached result if available and not expired
+  if (!force && venvStatusCache && (Date.now() - venvStatusCache.timestamp < VENV_STATUS_TTL)) {
+    return venvStatusCache.status;
+  }
+
  // Check venv exists
  if (!existsSync(CODEXLENS_VENV)) {
-    return { ready: false, error: 'Venv not found' };
+    const result = { ready: false, error: 'Venv not found' };
+    venvStatusCache = { status: result, timestamp: Date.now() };
+    return result;
  }

  // Check python executable exists
  if (!existsSync(VENV_PYTHON)) {
-    return { ready: false, error: 'Python executable not found in venv' };
+    const result = { ready: false, error: 'Python executable not found in venv' };
+    venvStatusCache = { status: result, timestamp: Date.now() };
+    return result;
  }

  // Check codexlens is importable
@@ -168,15 +194,21 @@ async function checkVenvStatus(): Promise<ReadyStatus> {
    });

    child.on('close', (code) => {
+      let result: ReadyStatus;
      if (code === 0) {
-        resolve({ ready: true, version: stdout.trim() });
+        result = { ready: true, version: stdout.trim() };
      } else {
-        resolve({ ready: false, error: `CodexLens not installed: ${stderr}` });
+        result = { ready: false, error: `CodexLens not installed: ${stderr}` };
      }
+      // Cache the result
+      venvStatusCache = { status: result, timestamp: Date.now() };
+      resolve(result);
    });

    child.on('error', (err) => {
-      resolve({ ready: false, error: `Failed to check venv: ${err.message}` });
+      const result = { ready: false, error: `Failed to check venv: ${err.message}` };
+      venvStatusCache = { status: result, timestamp: Date.now() };
+      resolve(result);
    });
  });
 }
@@ -198,8 +230,15 @@ async function checkSemanticStatus(): Promise<SemanticStatus> {
 import sys
 import json
 try:
-    from codexlens.semantic import SEMANTIC_AVAILABLE, SEMANTIC_BACKEND
-    result = {"available": SEMANTIC_AVAILABLE, "backend": SEMANTIC_BACKEND if SEMANTIC_AVAILABLE else None}
+    import codexlens.semantic as semantic
+    SEMANTIC_AVAILABLE = bool(getattr(semantic, "SEMANTIC_AVAILABLE", False))
+    SEMANTIC_BACKEND = getattr(semantic, "SEMANTIC_BACKEND", None)
+    LITELLM_AVAILABLE = bool(getattr(semantic, "LITELLM_AVAILABLE", False))
+    result = {
+        "available": SEMANTIC_AVAILABLE,
+        "backend": SEMANTIC_BACKEND if SEMANTIC_AVAILABLE else None,
+        "litellm_available": LITELLM_AVAILABLE,
+    }

    # Get ONNX providers for accelerator info
    try:
@@ -250,6 +289,7 @@ except Exception as e:
          backend: result.backend,
          accelerator: result.accelerator || 'CPU',
          providers: result.providers || [],
+          litellmAvailable: result.litellm_available || false,
          error: result.error
        });
      } catch {
@@ -263,6 +303,77 @@ except Exception as e:
  });
 }

+/**
+ * Ensure LiteLLM embedder dependencies are available in the CodexLens venv.
+ * Installs ccw-litellm into the venv if needed.
+ */
+async function ensureLiteLLMEmbedderReady(): Promise<BootstrapResult> {
+  // Ensure CodexLens venv exists and CodexLens is installed.
+  const readyStatus = await ensureReady();
+  if (!readyStatus.ready) {
+    return { success: false, error: readyStatus.error || 'CodexLens not ready' };
+  }
+
+  // Check if ccw_litellm can be imported
+  const importStatus = await new Promise<{ ok: boolean; error?: string }>((resolve) => {
+    const child = spawn(VENV_PYTHON, ['-c', 'import ccw_litellm; print("OK")'], {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      timeout: 15000,
+    });
+
+    let stderr = '';
+    child.stderr.on('data', (data) => {
+      stderr += data.toString();
+    });
+
+    child.on('close', (code) => {
+      resolve({ ok: code === 0, error: stderr.trim() || undefined });
+    });
+
+    child.on('error', (err) => {
+      resolve({ ok: false, error: err.message });
+    });
+  });
+
+  if (importStatus.ok) {
+    return { success: true };
+  }
+
+  const pipPath =
+    process.platform === 'win32'
+      ? join(CODEXLENS_VENV, 'Scripts', 'pip.exe')
+      : join(CODEXLENS_VENV, 'bin', 'pip');
+
+  try {
+    console.log('[CodexLens] Installing ccw-litellm for LiteLLM embedding backend...');
+
+    const possiblePaths = [
+      join(process.cwd(), 'ccw-litellm'),
+      join(__dirname, '..', '..', '..', 'ccw-litellm'), // ccw/src/tools -> project root
+      join(homedir(), 'ccw-litellm'),
+    ];
+
+    let installed = false;
+    for (const localPath of possiblePaths) {
+      if (existsSync(join(localPath, 'pyproject.toml'))) {
+        console.log(`[CodexLens] Installing ccw-litellm from local path: ${localPath}`);
+        execSync(`"${pipPath}" install -e "${localPath}"`, { stdio: 'inherit' });
+        installed = true;
+        break;
+      }
+    }
+
+    if (!installed) {
+      console.log('[CodexLens] Installing ccw-litellm from PyPI...');
+      execSync(`"${pipPath}" install ccw-litellm`, { stdio: 'inherit' });
+    }
+
+    return { success: true };
+  } catch (err) {
+    return { success: false, error: `Failed to install ccw-litellm: ${(err as Error).message}` };
+  }
+}
+
 /**
 * GPU acceleration mode for semantic search
 */
@@ -421,6 +532,17 @@ async function installSemantic(gpuMode: GpuMode = 'cpu'): Promise<BootstrapResul

      child.on('close', (code) => {
        if (code === 0) {
+          // IMPORTANT: fastembed installs onnxruntime (CPU) as dependency, which conflicts
+          // with onnxruntime-directml/gpu. Reinstall the GPU version to ensure it takes precedence.
+          if (gpuMode !== 'cpu') {
+            try {
+              console.log(`[CodexLens] Reinstalling ${onnxPackage} to ensure GPU provider works...`);
+              execSync(`"${pipPath}" install --force-reinstall ${onnxPackage}`, { stdio: 'pipe', timeout: 300000 });
+              console.log(`[CodexLens] ${onnxPackage} reinstalled successfully`);
+            } catch (e) {
+              console.warn(`[CodexLens] Warning: Failed to reinstall ${onnxPackage}: ${(e as Error).message}`);
+            }
+          }
          console.log(`[CodexLens] Semantic dependencies installed successfully (${gpuMode} mode)`);
          resolve({ success: true, message: `Installed with ${modeDescription}` });
        } else {
@@ -490,6 +612,8 @@ async function bootstrapVenv(): Promise<BootstrapResult> {
      execSync(`"${pipPath}" install codexlens`, { stdio: 'inherit' });
    }

+    // Clear cache after successful installation
+    clearVenvStatusCache();
    return { success: true };
  } catch (err) {
    return { success: false, error: `Failed to install codexlens: ${(err as Error).message}` };
@@ -1209,6 +1333,7 @@ async function uninstallCodexLens(): Promise<BootstrapResult> {
    // Reset bootstrap cache
    bootstrapChecked = false;
    bootstrapReady = false;
+    clearVenvStatusCache();

    console.log('[CodexLens] CodexLens uninstalled successfully');
    return { success: true, message: 'CodexLens uninstalled successfully' };
@@ -1273,7 +1398,19 @@ function isIndexingInProgress(): boolean {
 export type { ProgressInfo, ExecuteOptions };

 // Export for direct usage
-export { ensureReady, executeCodexLens, checkVenvStatus, bootstrapVenv, checkSemanticStatus, installSemantic, detectGpuSupport, uninstallCodexLens, cancelIndexing, isIndexingInProgress };
+export {
+  ensureReady,
+  executeCodexLens,
+  checkVenvStatus,
+  bootstrapVenv,
+  checkSemanticStatus,
+  ensureLiteLLMEmbedderReady,
+  installSemantic,
+  detectGpuSupport,
+  uninstallCodexLens,
+  cancelIndexing,
+  isIndexingInProgress,
+};
 export type { GpuMode };

 // Backward-compatible export for tests
--- a/ccw/src/tools/context-cache-store.ts
+++ b/ccw/src/tools/context-cache-store.ts
@@ -0,0 +1,368 @@
+/**
+ * Context Cache Store - In-memory cache with TTL and LRU eviction
+ * Stores packed file contents with session-based lifecycle management
+ */
+
+/** Cache entry metadata */
+export interface CacheMetadata {
+  files: string[];           // Source file paths
+  patterns: string[];        // Original @patterns
+  total_bytes: number;       // Total content bytes
+  file_count: number;        // Number of files packed
+}
+
+/** Cache entry structure */
+export interface CacheEntry {
+  session_id: string;
+  created_at: number;        // Timestamp ms
+  accessed_at: number;       // Last access timestamp
+  ttl: number;               // TTL in ms
+  content: string;           // Packed file content
+  metadata: CacheMetadata;
+}
+
+/** Paginated read result */
+export interface PagedReadResult {
+  content: string;           // Current page content
+  offset: number;            // Current byte offset
+  limit: number;             // Requested bytes
+  total_bytes: number;       // Total content bytes
+  has_more: boolean;         // Has more content
+  next_offset: number | null; // Next page offset (null if no more)
+}
+
+/** Cache status info */
+export interface CacheStatus {
+  entries: number;           // Total cache entries
+  total_bytes: number;       // Total bytes cached
+  oldest_session: string | null;
+  newest_session: string | null;
+}
+
+/** Session status info */
+export interface SessionStatus {
+  session_id: string;
+  exists: boolean;
+  files?: string[];
+  file_count?: number;
+  total_bytes?: number;
+  created_at?: string;
+  expires_at?: string;
+  accessed_at?: string;
+  ttl_remaining_ms?: number;
+}
+
+/** Default configuration */
+const DEFAULT_MAX_ENTRIES = 100;
+const DEFAULT_TTL_MS = 30 * 60 * 1000; // 30 minutes
+const DEFAULT_PAGE_SIZE = 65536; // 64KB
+
+/**
+ * Context Cache Store singleton
+ * Manages in-memory cache with TTL expiration and LRU eviction
+ */
+class ContextCacheStore {
+  private cache: Map<string, CacheEntry> = new Map();
+  private maxEntries: number;
+  private defaultTTL: number;
+  private cleanupInterval: NodeJS.Timeout | null = null;
+
+  constructor(options: {
+    maxEntries?: number;
+    defaultTTL?: number;
+    cleanupIntervalMs?: number;
+  } = {}) {
+    this.maxEntries = options.maxEntries ?? DEFAULT_MAX_ENTRIES;
+    this.defaultTTL = options.defaultTTL ?? DEFAULT_TTL_MS;
+
+    // Start periodic cleanup
+    const cleanupMs = options.cleanupIntervalMs ?? 60000; // 1 minute
+    this.cleanupInterval = setInterval(() => {
+      this.cleanupExpired();
+    }, cleanupMs);
+
+    // Allow cleanup to not keep process alive
+    if (this.cleanupInterval.unref) {
+      this.cleanupInterval.unref();
+    }
+  }
+
+  /**
+   * Store packed content in cache
+   */
+  set(
+    sessionId: string,
+    content: string,
+    metadata: CacheMetadata,
+    ttl?: number
+  ): CacheEntry {
+    const now = Date.now();
+    const entryTTL = ttl ?? this.defaultTTL;
+
+    // Evict if at capacity
+    if (this.cache.size >= this.maxEntries && !this.cache.has(sessionId)) {
+      this.evictOldest();
+    }
+
+    const entry: CacheEntry = {
+      session_id: sessionId,
+      created_at: now,
+      accessed_at: now,
+      ttl: entryTTL,
+      content,
+      metadata,
+    };
+
+    this.cache.set(sessionId, entry);
+    return entry;
+  }
+
+  /**
+   * Get cache entry by session ID
+   */
+  get(sessionId: string): CacheEntry | null {
+    const entry = this.cache.get(sessionId);
+
+    if (!entry) {
+      return null;
+    }
+
+    // Check TTL expiration
+    if (this.isExpired(entry)) {
+      this.cache.delete(sessionId);
+      return null;
+    }
+
+    // Update access time (LRU)
+    entry.accessed_at = Date.now();
+    return entry;
+  }
+
+  /**
+   * Read content with pagination
+   */
+  read(
+    sessionId: string,
+    offset: number = 0,
+    limit: number = DEFAULT_PAGE_SIZE
+  ): PagedReadResult | null {
+    const entry = this.get(sessionId);
+
+    if (!entry) {
+      return null;
+    }
+
+    const content = entry.content;
+    const totalBytes = Buffer.byteLength(content, 'utf-8');
+
+    // Handle byte-based offset for UTF-8
+    // For simplicity, we use character-based slicing
+    // This is approximate but works for most use cases
+    const charOffset = Math.min(offset, content.length);
+    const charLimit = Math.min(limit, content.length - charOffset);
+
+    const pageContent = content.slice(charOffset, charOffset + charLimit);
+    const endOffset = charOffset + pageContent.length;
+    const hasMore = endOffset < content.length;
+
+    return {
+      content: pageContent,
+      offset: charOffset,
+      limit: charLimit,
+      total_bytes: totalBytes,
+      has_more: hasMore,
+      next_offset: hasMore ? endOffset : null,
+    };
+  }
+
+  /**
+   * Release (delete) cache entry
+   */
+  release(sessionId: string): { released: boolean; freed_bytes: number } {
+    const entry = this.cache.get(sessionId);
+
+    if (!entry) {
+      return { released: false, freed_bytes: 0 };
+    }
+
+    const freedBytes = entry.metadata.total_bytes;
+    this.cache.delete(sessionId);
+
+    return { released: true, freed_bytes: freedBytes };
+  }
+
+  /**
+   * Get session status
+   */
+  getSessionStatus(sessionId: string): SessionStatus {
+    const entry = this.cache.get(sessionId);
+
+    if (!entry) {
+      return { session_id: sessionId, exists: false };
+    }
+
+    // Check if expired
+    if (this.isExpired(entry)) {
+      this.cache.delete(sessionId);
+      return { session_id: sessionId, exists: false };
+    }
+
+    const now = Date.now();
+    const expiresAt = entry.created_at + entry.ttl;
+    const ttlRemaining = Math.max(0, expiresAt - now);
+
+    return {
+      session_id: sessionId,
+      exists: true,
+      files: entry.metadata.files,
+      file_count: entry.metadata.file_count,
+      total_bytes: entry.metadata.total_bytes,
+      created_at: new Date(entry.created_at).toISOString(),
+      expires_at: new Date(expiresAt).toISOString(),
+      accessed_at: new Date(entry.accessed_at).toISOString(),
+      ttl_remaining_ms: ttlRemaining,
+    };
+  }
+
+  /**
+   * Get overall cache status
+   */
+  getStatus(): CacheStatus {
+    let totalBytes = 0;
+    let oldest: CacheEntry | null = null;
+    let newest: CacheEntry | null = null;
+
+    for (const entry of this.cache.values()) {
+      // Skip expired entries
+      if (this.isExpired(entry)) {
+        continue;
+      }
+
+      totalBytes += entry.metadata.total_bytes;
+
+      if (!oldest || entry.created_at < oldest.created_at) {
+        oldest = entry;
+      }
+      if (!newest || entry.created_at > newest.created_at) {
+        newest = entry;
+      }
+    }
+
+    return {
+      entries: this.cache.size,
+      total_bytes: totalBytes,
+      oldest_session: oldest?.session_id ?? null,
+      newest_session: newest?.session_id ?? null,
+    };
+  }
+
+  /**
+   * Cleanup expired entries
+   */
+  cleanupExpired(): { removed: number } {
+    let removed = 0;
+    const now = Date.now();
+
+    for (const [sessionId, entry] of this.cache.entries()) {
+      if (this.isExpired(entry, now)) {
+        this.cache.delete(sessionId);
+        removed++;
+      }
+    }
+
+    return { removed };
+  }
+
+  /**
+   * Clear all cache entries
+   */
+  clear(): { removed: number } {
+    const count = this.cache.size;
+    this.cache.clear();
+    return { removed: count };
+  }
+
+  /**
+   * Check if entry is expired
+   */
+  private isExpired(entry: CacheEntry, now?: number): boolean {
+    const currentTime = now ?? Date.now();
+    return currentTime > entry.created_at + entry.ttl;
+  }
+
+  /**
+   * Evict oldest entry (LRU)
+   */
+  private evictOldest(): void {
+    let oldest: [string, CacheEntry] | null = null;
+
+    for (const [sessionId, entry] of this.cache.entries()) {
+      if (!oldest || entry.accessed_at < oldest[1].accessed_at) {
+        oldest = [sessionId, entry];
+      }
+    }
+
+    if (oldest) {
+      this.cache.delete(oldest[0]);
+    }
+  }
+
+  /**
+   * Stop cleanup timer (for graceful shutdown)
+   */
+  destroy(): void {
+    if (this.cleanupInterval) {
+      clearInterval(this.cleanupInterval);
+      this.cleanupInterval = null;
+    }
+  }
+
+  /**
+   * List all session IDs
+   */
+  listSessions(): string[] {
+    return Array.from(this.cache.keys());
+  }
+
+  /**
+   * Check if session exists and is valid
+   */
+  has(sessionId: string): boolean {
+    const entry = this.cache.get(sessionId);
+    if (!entry) return false;
+    if (this.isExpired(entry)) {
+      this.cache.delete(sessionId);
+      return false;
+    }
+    return true;
+  }
+}
+
+// Singleton instance
+let cacheInstance: ContextCacheStore | null = null;
+
+/**
+ * Get the singleton cache instance
+ */
+export function getContextCacheStore(options?: {
+  maxEntries?: number;
+  defaultTTL?: number;
+  cleanupIntervalMs?: number;
+}): ContextCacheStore {
+  if (!cacheInstance) {
+    cacheInstance = new ContextCacheStore(options);
+  }
+  return cacheInstance;
+}
+
+/**
+ * Reset the cache instance (for testing)
+ */
+export function resetContextCacheStore(): void {
+  if (cacheInstance) {
+    cacheInstance.destroy();
+    cacheInstance = null;
+  }
+}
+
+export { ContextCacheStore };
--- a/ccw/src/tools/context-cache.ts
+++ b/ccw/src/tools/context-cache.ts
@@ -0,0 +1,393 @@
+/**
+ * Context Cache MCP Tool
+ * Pack files by @patterns, cache in memory, paginated read by session ID
+ *
+ * Operations:
+ * - pack: Parse @patterns and cache file contents
+ * - read: Paginated read from cache by session ID
+ * - status: Get cache/session status
+ * - release: Release session cache
+ * - cleanup: Cleanup expired caches
+ */
+
+import { z } from 'zod';
+import type { ToolSchema, ToolResult } from '../types/tool.js';
+import { parseAndPack } from './pattern-parser.js';
+import {
+  getContextCacheStore,
+  type CacheMetadata,
+  type PagedReadResult,
+  type CacheStatus,
+  type SessionStatus,
+} from './context-cache-store.js';
+
+// Zod schema for parameter validation
+const OperationEnum = z.enum(['pack', 'read', 'status', 'release', 'cleanup']);
+
+const ParamsSchema = z.object({
+  operation: OperationEnum,
+  // Pack parameters
+  patterns: z.array(z.string()).optional(),
+  content: z.string().optional(), // Direct text content to cache
+  session_id: z.string().optional(),
+  cwd: z.string().optional(),
+  include_dirs: z.array(z.string()).optional(),
+  ttl: z.number().optional(),
+  include_metadata: z.boolean().optional().default(true),
+  max_file_size: z.number().optional(),
+  // Read parameters
+  offset: z.number().optional().default(0),
+  limit: z.number().optional().default(65536), // 64KB default
+});
+
+type Params = z.infer<typeof ParamsSchema>;
+
+// Result types
+interface PackResult {
+  operation: 'pack';
+  session_id: string;
+  files_packed: number;
+  files_skipped: number;
+  total_bytes: number;
+  patterns_matched: number;
+  patterns_failed: number;
+  expires_at: string;
+  errors?: string[];
+}
+
+interface ReadResult {
+  operation: 'read';
+  session_id: string;
+  content: string;
+  offset: number;
+  limit: number;
+  total_bytes: number;
+  has_more: boolean;
+  next_offset: number | null;
+}
+
+interface StatusResult {
+  operation: 'status';
+  session_id?: string;
+  session?: SessionStatus;
+  cache?: CacheStatus;
+}
+
+interface ReleaseResult {
+  operation: 'release';
+  session_id: string;
+  released: boolean;
+  freed_bytes: number;
+}
+
+interface CleanupResult {
+  operation: 'cleanup';
+  removed: number;
+  remaining: number;
+}
+
+type OperationResult = PackResult | ReadResult | StatusResult | ReleaseResult | CleanupResult;
+
+/**
+ * Generate session ID if not provided
+ */
+function generateSessionId(): string {
+  return `ctx-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+}
+
+/**
+ * Operation: pack
+ * Parse @patterns and/or cache text content directly
+ */
+async function executePack(params: Params): Promise<PackResult> {
+  const {
+    patterns,
+    content,
+    session_id,
+    cwd,
+    include_dirs,
+    ttl,
+    include_metadata,
+    max_file_size,
+  } = params;
+
+  // Require at least patterns or content
+  if ((!patterns || patterns.length === 0) && !content) {
+    throw new Error('Either "patterns" or "content" is required for pack operation');
+  }
+
+  const sessionId = session_id || generateSessionId();
+  const store = getContextCacheStore();
+
+  let finalContent = '';
+  let filesPacked = 0;
+  let filesSkipped = 0;
+  let totalBytes = 0;
+  let patternsMatched = 0;
+  let patternsFailed = 0;
+  let errors: string[] = [];
+  let files: string[] = [];
+  let parsedPatterns: string[] = [];
+
+  // Pack files from patterns if provided
+  if (patterns && patterns.length > 0) {
+    const result = await parseAndPack(patterns, {
+      cwd: cwd || process.cwd(),
+      includeDirs: include_dirs,
+      includeMetadata: include_metadata,
+      maxFileSize: max_file_size,
+    });
+
+    finalContent = result.content;
+    filesPacked = result.packedFiles.length;
+    filesSkipped = result.skippedFiles.length;
+    totalBytes = result.totalBytes;
+    patternsMatched = result.parseResult.stats.matched_patterns;
+    patternsFailed = result.parseResult.stats.total_patterns - patternsMatched;
+    errors = result.parseResult.errors;
+    files = result.packedFiles;
+    parsedPatterns = result.parseResult.patterns;
+  }
+
+  // Append direct content if provided
+  if (content) {
+    if (finalContent) {
+      finalContent += '\n\n=== ADDITIONAL CONTENT ===\n' + content;
+    } else {
+      finalContent = content;
+    }
+    totalBytes += Buffer.byteLength(content, 'utf-8');
+  }
+
+  // Store in cache
+  const metadata: CacheMetadata = {
+    files,
+    patterns: parsedPatterns,
+    total_bytes: totalBytes,
+    file_count: filesPacked,
+  };
+
+  const entry = store.set(sessionId, finalContent, metadata, ttl);
+  const expiresAt = new Date(entry.created_at + entry.ttl).toISOString();
+
+  return {
+    operation: 'pack',
+    session_id: sessionId,
+    files_packed: filesPacked,
+    files_skipped: filesSkipped,
+    total_bytes: totalBytes,
+    patterns_matched: patternsMatched,
+    patterns_failed: patternsFailed,
+    expires_at: expiresAt,
+    errors: errors.length > 0 ? errors : undefined,
+  };
+}
+
+/**
+ * Operation: read
+ * Paginated read from cache
+ */
+function executeRead(params: Params): ReadResult {
+  const { session_id, offset, limit } = params;
+
+  if (!session_id) {
+    throw new Error('Parameter "session_id" is required for read operation');
+  }
+
+  const store = getContextCacheStore();
+  const result = store.read(session_id, offset, limit);
+
+  if (!result) {
+    throw new Error(`Session "${session_id}" not found or expired`);
+  }
+
+  return {
+    operation: 'read',
+    session_id,
+    content: result.content,
+    offset: result.offset,
+    limit: result.limit,
+    total_bytes: result.total_bytes,
+    has_more: result.has_more,
+    next_offset: result.next_offset,
+  };
+}
+
+/**
+ * Operation: status
+ * Get session or overall cache status
+ */
+function executeStatus(params: Params): StatusResult {
+  const { session_id } = params;
+  const store = getContextCacheStore();
+
+  if (session_id) {
+    // Session-specific status
+    const sessionStatus = store.getSessionStatus(session_id);
+    return {
+      operation: 'status',
+      session_id,
+      session: sessionStatus,
+    };
+  }
+
+  // Overall cache status
+  const cacheStatus = store.getStatus();
+  return {
+    operation: 'status',
+    cache: cacheStatus,
+  };
+}
+
+/**
+ * Operation: release
+ * Release session cache
+ */
+function executeRelease(params: Params): ReleaseResult {
+  const { session_id } = params;
+
+  if (!session_id) {
+    throw new Error('Parameter "session_id" is required for release operation');
+  }
+
+  const store = getContextCacheStore();
+  const result = store.release(session_id);
+
+  return {
+    operation: 'release',
+    session_id,
+    released: result.released,
+    freed_bytes: result.freed_bytes,
+  };
+}
+
+/**
+ * Operation: cleanup
+ * Cleanup expired caches
+ */
+function executeCleanup(): CleanupResult {
+  const store = getContextCacheStore();
+  const result = store.cleanupExpired();
+  const status = store.getStatus();
+
+  return {
+    operation: 'cleanup',
+    removed: result.removed,
+    remaining: status.entries,
+  };
+}
+
+/**
+ * Route to operation handler
+ */
+async function execute(params: Params): Promise<OperationResult> {
+  const { operation } = params;
+
+  switch (operation) {
+    case 'pack':
+      return executePack(params);
+    case 'read':
+      return executeRead(params);
+    case 'status':
+      return executeStatus(params);
+    case 'release':
+      return executeRelease(params);
+    case 'cleanup':
+      return executeCleanup();
+    default:
+      throw new Error(
+        `Unknown operation: ${operation}. Valid operations: pack, read, status, release, cleanup`
+      );
+  }
+}
+
+// MCP Tool Schema
+export const schema: ToolSchema = {
+  name: 'context_cache',
+  description: `Context file cache with @pattern and text content support, paginated reading.
+
+Usage:
+  context_cache(operation="pack", patterns=["@src/**/*.ts"], session_id="...")
+  context_cache(operation="pack", content="text to cache", session_id="...")
+  context_cache(operation="pack", patterns=["@src/**/*.ts"], content="extra text")
+  context_cache(operation="read", session_id="...", offset=0, limit=65536)
+  context_cache(operation="status", session_id="...")
+  context_cache(operation="release", session_id="...")
+  context_cache(operation="cleanup")
+
+Pattern syntax:
+  @src/**/*.ts     - All TypeScript files in src
+  @CLAUDE.md       - Specific file
+  @../shared/**/*  - Sibling directory (needs include_dirs)`,
+  inputSchema: {
+    type: 'object',
+    properties: {
+      operation: {
+        type: 'string',
+        enum: ['pack', 'read', 'status', 'release', 'cleanup'],
+        description: 'Operation to perform',
+      },
+      patterns: {
+        type: 'array',
+        items: { type: 'string' },
+        description: '@patterns to pack (e.g., ["@src/**/*.ts"]). Either patterns or content required for pack.',
+      },
+      content: {
+        type: 'string',
+        description: 'Direct text content to cache. Either patterns or content required for pack.',
+      },
+      session_id: {
+        type: 'string',
+        description: 'Cache session ID. Auto-generated for pack if not provided.',
+      },
+      cwd: {
+        type: 'string',
+        description: 'Working directory for pattern resolution (default: process.cwd())',
+      },
+      include_dirs: {
+        type: 'array',
+        items: { type: 'string' },
+        description: 'Additional directories to include for pattern matching',
+      },
+      ttl: {
+        type: 'number',
+        description: 'Cache TTL in milliseconds (default: 1800000 = 30min)',
+      },
+      include_metadata: {
+        type: 'boolean',
+        description: 'Include file metadata headers in packed content (default: true)',
+      },
+      max_file_size: {
+        type: 'number',
+        description: 'Max file size in bytes to include (default: 1MB). Larger files are skipped.',
+      },
+      offset: {
+        type: 'number',
+        description: 'Byte offset for paginated read (default: 0)',
+      },
+      limit: {
+        type: 'number',
+        description: 'Max bytes to read (default: 65536 = 64KB)',
+      },
+    },
+    required: ['operation'],
+  },
+};
+
+// Handler function
+export async function handler(
+  params: Record<string, unknown>
+): Promise<ToolResult<OperationResult>> {
+  const parsed = ParamsSchema.safeParse(params);
+
+  if (!parsed.success) {
+    return { success: false, error: `Invalid params: ${parsed.error.message}` };
+  }
+
+  try {
+    const result = await execute(parsed.data);
+    return { success: true, result };
+  } catch (error) {
+    return { success: false, error: (error as Error).message };
+  }
+}
--- a/ccw/src/tools/core-memory.ts
+++ b/ccw/src/tools/core-memory.ts
@@ -16,6 +16,8 @@ const OperationEnum = z.enum(['list', 'import', 'export', 'summary', 'embed', 's

 const ParamsSchema = z.object({
  operation: OperationEnum,
+  // Path parameter - highest priority for project resolution
+  path: z.string().optional(),
  text: z.string().optional(),
  id: z.string().optional(),
  tool: z.enum(['gemini', 'qwen']).optional().default('gemini'),
@@ -106,17 +108,21 @@ interface EmbedStatusResult {
 type OperationResult = ListResult | ImportResult | ExportResult | SummaryResult | EmbedResult | SearchResult | EmbedStatusResult;

 /**
- * Get project path from current working directory
+ * Get project path - uses explicit path if provided, otherwise falls back to current working directory
+ * Priority: path parameter > getProjectRoot()
 */
-function getProjectPath(): string {
+function getProjectPath(explicitPath?: string): string {
+  if (explicitPath) {
+    return explicitPath;
+  }
  return getProjectRoot();
 }

 /**
- * Get database path for current project
+ * Get database path for project
 */
-function getDatabasePath(): string {
-  const projectPath = getProjectPath();
+function getDatabasePath(explicitPath?: string): string {
+  const projectPath = getProjectPath(explicitPath);
  const paths = StoragePaths.project(projectPath);
  return join(paths.root, 'core-memory', 'core_memory.db');
 }
@@ -129,8 +135,8 @@ const PREVIEW_MAX_LENGTH = 100;
 * List all memories with compact output
 */
 function executeList(params: Params): ListResult {
-  const { limit } = params;
-  const store = getCoreMemoryStore(getProjectPath());
+  const { limit, path } = params;
+  const store = getCoreMemoryStore(getProjectPath(path));
  const memories = store.getMemories({ limit }) as CoreMemory[];

  // Convert to compact format with truncated preview
@@ -160,13 +166,13 @@ function executeList(params: Params): ListResult {
 * Import text as a new memory
 */
 function executeImport(params: Params): ImportResult {
-  const { text } = params;
+  const { text, path } = params;

  if (!text || text.trim() === '') {
    throw new Error('Parameter "text" is required for import operation');
  }

-  const store = getCoreMemoryStore(getProjectPath());
+  const store = getCoreMemoryStore(getProjectPath(path));
  const memory = store.upsertMemory({
    content: text.trim(),
  });
@@ -184,14 +190,14 @@ function executeImport(params: Params): ImportResult {
 * Searches current project first, then all projects if not found
 */
 function executeExport(params: Params): ExportResult {
-  const { id } = params;
+  const { id, path } = params;

  if (!id) {
    throw new Error('Parameter "id" is required for export operation');
  }

-  // Try current project first
-  const store = getCoreMemoryStore(getProjectPath());
+  // Try current project first (or explicit path if provided)
+  const store = getCoreMemoryStore(getProjectPath(path));
  let memory = store.getMemory(id);

  // If not found, search across all projects
@@ -218,13 +224,13 @@ function executeExport(params: Params): ExportResult {
 * Generate AI summary for a memory
 */
 async function executeSummary(params: Params): Promise<SummaryResult> {
-  const { id, tool = 'gemini' } = params;
+  const { id, tool = 'gemini', path } = params;

  if (!id) {
    throw new Error('Parameter "id" is required for summary operation');
  }

-  const store = getCoreMemoryStore(getProjectPath());
+  const store = getCoreMemoryStore(getProjectPath(path));
  const memory = store.getMemory(id);

  if (!memory) {
@@ -245,8 +251,8 @@ async function executeSummary(params: Params): Promise<SummaryResult> {
 * Generate embeddings for memory chunks
 */
 async function executeEmbed(params: Params): Promise<EmbedResult> {
-  const { source_id, batch_size = 8, force = false } = params;
-  const dbPath = getDatabasePath();
+  const { source_id, batch_size = 8, force = false, path } = params;
+  const dbPath = getDatabasePath(path);

  const result = await MemoryEmbedder.generateEmbeddings(dbPath, {
    sourceId: source_id,
@@ -272,13 +278,13 @@ async function executeEmbed(params: Params): Promise<EmbedResult> {
 * Search memory chunks using semantic search
 */
 async function executeSearch(params: Params): Promise<SearchResult> {
-  const { query, top_k = 10, min_score = 0.3, source_type } = params;
+  const { query, top_k = 10, min_score = 0.3, source_type, path } = params;

  if (!query) {
    throw new Error('Parameter "query" is required for search operation');
  }

-  const dbPath = getDatabasePath();
+  const dbPath = getDatabasePath(path);

  const result = await MemoryEmbedder.searchMemories(dbPath, query, {
    topK: top_k,
@@ -309,7 +315,8 @@ async function executeSearch(params: Params): Promise<SearchResult> {
 * Get embedding status statistics
 */
 async function executeEmbedStatus(params: Params): Promise<EmbedStatusResult> {
-  const dbPath = getDatabasePath();
+  const { path } = params;
+  const dbPath = getDatabasePath(path);

  const result = await MemoryEmbedder.getEmbeddingStatus(dbPath);

@@ -368,6 +375,9 @@ Usage:
  core_memory(operation="search", query="authentication")    # Search memories semantically
  core_memory(operation="embed_status")                      # Check embedding status

+Path parameter (highest priority):
+  core_memory(operation="list", path="/path/to/project")     # Use specific project path
+
 Memory IDs use format: CMEM-YYYYMMDD-HHMMSS`,
  inputSchema: {
    type: 'object',
@@ -377,6 +387,10 @@ Memory IDs use format: CMEM-YYYYMMDD-HHMMSS`,
        enum: ['list', 'import', 'export', 'summary', 'embed', 'search', 'embed_status'],
        description: 'Operation to perform',
      },
+      path: {
+        type: 'string',
+        description: 'Project path (highest priority - overrides auto-detected project root)',
+      },
      text: {
        type: 'string',
        description: 'Text content to import (required for import operation)',
--- a/ccw/src/tools/index.ts
+++ b/ccw/src/tools/index.ts
@@ -22,6 +22,7 @@ import { executeInitWithProgress } from './smart-search.js';
 // codex_lens removed - functionality integrated into smart_search
 import * as readFileMod from './read-file.js';
 import * as coreMemoryMod from './core-memory.js';
+import * as contextCacheMod from './context-cache.js';
 import type { ProgressInfo } from './codex-lens.js';

 // Import legacy JS tools
@@ -357,6 +358,7 @@ registerTool(toLegacyTool(smartSearchMod));
 // codex_lens removed - functionality integrated into smart_search
 registerTool(toLegacyTool(readFileMod));
 registerTool(toLegacyTool(coreMemoryMod));
+registerTool(toLegacyTool(contextCacheMod));

 // Register legacy JS tools
 registerTool(uiGeneratePreviewTool);
--- a/ccw/src/tools/litellm-client.ts
+++ b/ccw/src/tools/litellm-client.ts
@@ -0,0 +1,246 @@
+/**
+ * LiteLLM Client - Bridge between CCW and ccw-litellm Python package
+ * Provides LLM chat and embedding capabilities via spawned Python process
+ *
+ * Features:
+ * - Chat completions with multiple models
+ * - Text embeddings generation
+ * - Configuration management
+ * - JSON protocol communication
+ */
+
+import { spawn } from 'child_process';
+import { promisify } from 'util';
+
+export interface LiteLLMConfig {
+  pythonPath?: string;  // Default 'python'
+  configPath?: string;  // Configuration file path
+  timeout?: number;     // Default 60000ms
+}
+
+export interface ChatMessage {
+  role: 'system' | 'user' | 'assistant';
+  content: string;
+}
+
+export interface ChatResponse {
+  content: string;
+  model: string;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
+export interface EmbedResponse {
+  vectors: number[][];
+  dimensions: number;
+  model: string;
+}
+
+export interface LiteLLMStatus {
+  available: boolean;
+  version?: string;
+  error?: string;
+}
+
+export class LiteLLMClient {
+  private pythonPath: string;
+  private configPath?: string;
+  private timeout: number;
+
+  constructor(config: LiteLLMConfig = {}) {
+    this.pythonPath = config.pythonPath || 'python';
+    this.configPath = config.configPath;
+    this.timeout = config.timeout || 60000;
+  }
+
+  /**
+   * Execute Python ccw-litellm command
+   */
+  private async executePython(args: string[], options: { timeout?: number } = {}): Promise<string> {
+    const timeout = options.timeout || this.timeout;
+
+    return new Promise((resolve, reject) => {
+      const proc = spawn(this.pythonPath, ['-m', 'ccw_litellm.cli', ...args], {
+        stdio: ['pipe', 'pipe', 'pipe'],
+        env: { ...process.env }
+      });
+
+      let stdout = '';
+      let stderr = '';
+      let timedOut = false;
+
+      // Set up timeout
+      const timeoutId = setTimeout(() => {
+        timedOut = true;
+        proc.kill('SIGTERM');
+        reject(new Error(`Command timed out after ${timeout}ms`));
+      }, timeout);
+
+      proc.stdout.on('data', (data) => {
+        stdout += data.toString();
+      });
+
+      proc.stderr.on('data', (data) => {
+        stderr += data.toString();
+      });
+
+      proc.on('error', (error) => {
+        clearTimeout(timeoutId);
+        reject(new Error(`Failed to spawn Python process: ${error.message}`));
+      });
+
+      proc.on('close', (code) => {
+        clearTimeout(timeoutId);
+
+        if (timedOut) {
+          return; // Already rejected
+        }
+
+        if (code === 0) {
+          resolve(stdout.trim());
+        } else {
+          const errorMsg = stderr.trim() || `Process exited with code ${code}`;
+          reject(new Error(errorMsg));
+        }
+      });
+    });
+  }
+
+  /**
+   * Check if ccw-litellm is available
+   */
+  async isAvailable(): Promise<boolean> {
+    try {
+      await this.executePython(['version'], { timeout: 5000 });
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Get status information
+   */
+  async getStatus(): Promise<LiteLLMStatus> {
+    try {
+      const output = await this.executePython(['version'], { timeout: 5000 });
+      return {
+        available: true,
+        version: output.trim()
+      };
+    } catch (error: any) {
+      return {
+        available: false,
+        error: error.message
+      };
+    }
+  }
+
+  /**
+   * Get current configuration
+   */
+  async getConfig(): Promise<any> {
+    const output = await this.executePython(['config', '--json']);
+    return JSON.parse(output);
+  }
+
+  /**
+   * Generate embeddings for texts
+   */
+  async embed(texts: string[], model: string = 'default'): Promise<EmbedResponse> {
+    if (!texts || texts.length === 0) {
+      throw new Error('texts array cannot be empty');
+    }
+
+    const args = ['embed', '--model', model, '--output', 'json'];
+
+    // Add texts as arguments
+    for (const text of texts) {
+      args.push(text);
+    }
+
+    const output = await this.executePython(args, { timeout: this.timeout * 2 });
+    const vectors = JSON.parse(output);
+
+    return {
+      vectors,
+      dimensions: vectors[0]?.length || 0,
+      model
+    };
+  }
+
+  /**
+   * Chat with LLM
+   */
+  async chat(message: string, model: string = 'default'): Promise<string> {
+    if (!message) {
+      throw new Error('message cannot be empty');
+    }
+
+    const args = ['chat', '--model', model, message];
+    return this.executePython(args, { timeout: this.timeout * 2 });
+  }
+
+  /**
+   * Multi-turn chat with messages array
+   */
+  async chatMessages(messages: ChatMessage[], model: string = 'default'): Promise<ChatResponse> {
+    if (!messages || messages.length === 0) {
+      throw new Error('messages array cannot be empty');
+    }
+
+    // For now, just use the last user message
+    // TODO: Implement full message history support in ccw-litellm
+    const lastMessage = messages[messages.length - 1];
+    const content = await this.chat(lastMessage.content, model);
+
+    return {
+      content,
+      model,
+      usage: undefined // TODO: Add usage tracking
+    };
+  }
+}
+
+// Singleton instance
+let _client: LiteLLMClient | null = null;
+
+/**
+ * Get or create singleton LiteLLM client
+ */
+export function getLiteLLMClient(config?: LiteLLMConfig): LiteLLMClient {
+  if (!_client) {
+    _client = new LiteLLMClient(config);
+  }
+  return _client;
+}
+
+/**
+ * Check if LiteLLM is available
+ */
+export async function checkLiteLLMAvailable(): Promise<boolean> {
+  try {
+    const client = getLiteLLMClient();
+    return await client.isAvailable();
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Get LiteLLM status
+ */
+export async function getLiteLLMStatus(): Promise<LiteLLMStatus> {
+  try {
+    const client = getLiteLLMClient();
+    return await client.getStatus();
+  } catch (error: any) {
+    return {
+      available: false,
+      error: error.message
+    };
+  }
+}
--- a/ccw/src/tools/litellm-executor.ts
+++ b/ccw/src/tools/litellm-executor.ts
@@ -0,0 +1,241 @@
+/**
+ * LiteLLM Executor - Execute LiteLLM endpoints with context caching
+ * Integrates with context-cache for file packing and LiteLLM client for API calls
+ */
+
+import { getLiteLLMClient } from './litellm-client.js';
+import { handler as contextCacheHandler } from './context-cache.js';
+import {
+  findEndpointById,
+  getProviderWithResolvedEnvVars,
+} from '../config/litellm-api-config-manager.js';
+import type { CustomEndpoint, ProviderCredential } from '../types/litellm-api-config.js';
+
+export interface LiteLLMExecutionOptions {
+  prompt: string;
+  endpointId: string; // Custom endpoint ID (e.g., "my-gpt4o")
+  baseDir: string; // Project base directory
+  cwd?: string; // Working directory for file resolution
+  includeDirs?: string[]; // Additional directories for @patterns
+  enableCache?: boolean; // Override endpoint cache setting
+  onOutput?: (data: { type: string; data: string }) => void;
+}
+
+export interface LiteLLMExecutionResult {
+  success: boolean;
+  output: string;
+  model: string;
+  provider: string;
+  cacheUsed: boolean;
+  cachedFiles?: string[];
+  error?: string;
+}
+
+/**
+ * Extract @patterns from prompt text
+ */
+export function extractPatterns(prompt: string): string[] {
+  // Match @path patterns: @src/**/*.ts, @CLAUDE.md, @../shared/**/*
+  const regex = /@([^\s]+)/g;
+  const patterns: string[] = [];
+  let match;
+  while ((match = regex.exec(prompt)) !== null) {
+    patterns.push('@' + match[1]);
+  }
+  return patterns;
+}
+
+/**
+ * Execute LiteLLM endpoint with optional context caching
+ */
+export async function executeLiteLLMEndpoint(
+  options: LiteLLMExecutionOptions
+): Promise<LiteLLMExecutionResult> {
+  const { prompt, endpointId, baseDir, cwd, includeDirs, enableCache, onOutput } = options;
+
+  // 1. Find endpoint configuration
+  const endpoint = findEndpointById(baseDir, endpointId);
+  if (!endpoint) {
+    return {
+      success: false,
+      output: '',
+      model: '',
+      provider: '',
+      cacheUsed: false,
+      error: `Endpoint not found: ${endpointId}`,
+    };
+  }
+
+  // 2. Get provider with resolved env vars
+  const provider = getProviderWithResolvedEnvVars(baseDir, endpoint.providerId);
+  if (!provider) {
+    return {
+      success: false,
+      output: '',
+      model: '',
+      provider: '',
+      cacheUsed: false,
+      error: `Provider not found: ${endpoint.providerId}`,
+    };
+  }
+
+  // Verify API key is available
+  if (!provider.resolvedApiKey) {
+    return {
+      success: false,
+      output: '',
+      model: endpoint.model,
+      provider: provider.type,
+      cacheUsed: false,
+      error: `API key not configured for provider: ${provider.name}`,
+    };
+  }
+
+  // 3. Process context cache if enabled
+  let finalPrompt = prompt;
+  let cacheUsed = false;
+  let cachedFiles: string[] = [];
+
+  const shouldCache = enableCache ?? endpoint.cacheStrategy.enabled;
+  if (shouldCache) {
+    const patterns = extractPatterns(prompt);
+    if (patterns.length > 0) {
+      if (onOutput) {
+        onOutput({ type: 'stderr', data: `[Context cache: Found ${patterns.length} @patterns]\n` });
+      }
+
+      // Pack files into cache
+      const packResult = await contextCacheHandler({
+        operation: 'pack',
+        patterns,
+        cwd: cwd || process.cwd(),
+        include_dirs: includeDirs,
+        ttl: endpoint.cacheStrategy.ttlMinutes * 60 * 1000,
+        max_file_size: endpoint.cacheStrategy.maxSizeKB * 1024,
+      });
+
+      if (packResult.success && packResult.result) {
+        const pack = packResult.result as any;
+
+        if (onOutput) {
+          onOutput({
+            type: 'stderr',
+            data: `[Context cache: Packed ${pack.files_packed} files, ${pack.total_bytes} bytes]\n`,
+          });
+        }
+
+        // Read cached content
+        const readResult = await contextCacheHandler({
+          operation: 'read',
+          session_id: pack.session_id,
+          limit: endpoint.cacheStrategy.maxSizeKB * 1024,
+        });
+
+        if (readResult.success && readResult.result) {
+          const read = readResult.result as any;
+          // Prepend cached content to prompt
+          finalPrompt = `${read.content}\n\n---\n\n${prompt}`;
+          cacheUsed = true;
+          cachedFiles = pack.files_packed ? Array(pack.files_packed).fill('...') : [];
+
+          if (onOutput) {
+            onOutput({ type: 'stderr', data: `[Context cache: Applied to prompt]\n` });
+          }
+        }
+      } else if (packResult.error) {
+        if (onOutput) {
+          onOutput({ type: 'stderr', data: `[Context cache warning: ${packResult.error}]\n` });
+        }
+      }
+    }
+  }
+
+  // 4. Call LiteLLM
+  try {
+    if (onOutput) {
+      onOutput({
+        type: 'stderr',
+        data: `[LiteLLM: Calling ${provider.type}/${endpoint.model}]\n`,
+      });
+    }
+
+    const client = getLiteLLMClient({
+      pythonPath: 'python',
+      timeout: 120000, // 2 minutes
+    });
+
+    // Configure provider credentials via environment
+    // LiteLLM uses standard env vars like OPENAI_API_KEY, ANTHROPIC_API_KEY
+    const envVarName = getProviderEnvVarName(provider.type);
+    if (envVarName) {
+      process.env[envVarName] = provider.resolvedApiKey;
+    }
+
+    // Set base URL if custom
+    if (provider.apiBase) {
+      const baseUrlEnvVar = getProviderBaseUrlEnvVarName(provider.type);
+      if (baseUrlEnvVar) {
+        process.env[baseUrlEnvVar] = provider.apiBase;
+      }
+    }
+
+    // Use litellm-client to call chat
+    const response = await client.chat(finalPrompt, endpoint.model);
+
+    if (onOutput) {
+      onOutput({ type: 'stdout', data: response });
+    }
+
+    return {
+      success: true,
+      output: response,
+      model: endpoint.model,
+      provider: provider.type,
+      cacheUsed,
+      cachedFiles,
+    };
+  } catch (error) {
+    const errorMsg = (error as Error).message;
+    if (onOutput) {
+      onOutput({ type: 'stderr', data: `[LiteLLM error: ${errorMsg}]\n` });
+    }
+
+    return {
+      success: false,
+      output: '',
+      model: endpoint.model,
+      provider: provider.type,
+      cacheUsed,
+      error: errorMsg,
+    };
+  }
+}
+
+/**
+ * Get environment variable name for provider API key
+ */
+function getProviderEnvVarName(providerType: string): string | null {
+  const envVarMap: Record<string, string> = {
+    openai: 'OPENAI_API_KEY',
+    anthropic: 'ANTHROPIC_API_KEY',
+    google: 'GOOGLE_API_KEY',
+    azure: 'AZURE_API_KEY',
+    mistral: 'MISTRAL_API_KEY',
+    deepseek: 'DEEPSEEK_API_KEY',
+  };
+
+  return envVarMap[providerType] || null;
+}
+
+/**
+ * Get environment variable name for provider base URL
+ */
+function getProviderBaseUrlEnvVarName(providerType: string): string | null {
+  const envVarMap: Record<string, string> = {
+    openai: 'OPENAI_API_BASE',
+    anthropic: 'ANTHROPIC_API_BASE',
+    azure: 'AZURE_API_BASE',
+  };
+
+  return envVarMap[providerType] || null;
+}
--- a/ccw/src/tools/pattern-parser.ts
+++ b/ccw/src/tools/pattern-parser.ts
@@ -0,0 +1,329 @@
+/**
+ * Pattern Parser - Parse @expression patterns to file lists
+ * Supports glob patterns like @src/**.ts, @CLAUDE.md, @../shared/**
+ */
+
+import { glob } from 'glob';
+import { resolve, isAbsolute, normalize } from 'path';
+import { existsSync, statSync, readFileSync } from 'fs';
+
+/** Result of parsing @patterns */
+export interface PatternParseResult {
+  files: string[];           // Matched file paths (absolute)
+  patterns: string[];        // Original patterns
+  errors: string[];          // Parse errors
+  stats: {
+    total_files: number;
+    total_patterns: number;
+    matched_patterns: number;
+  };
+}
+
+/** Options for pattern parsing */
+export interface PatternParseOptions {
+  cwd?: string;              // Working directory
+  includeDirs?: string[];    // Additional directories to include
+  ignore?: string[];         // Ignore patterns
+  maxFiles?: number;         // Max files to return (default: 1000)
+  followSymlinks?: boolean;  // Follow symlinks (default: false)
+}
+
+/** Default ignore patterns */
+const DEFAULT_IGNORE = [
+  '**/node_modules/**',
+  '**/.git/**',
+  '**/dist/**',
+  '**/build/**',
+  '**/.next/**',
+  '**/__pycache__/**',
+  '**/*.pyc',
+  '**/venv/**',
+  '**/.venv/**',
+];
+
+/**
+ * Extract pattern from @expression
+ * Example: "@src/**.ts" -> "src/**.ts"
+ */
+function extractPattern(expression: string): string | null {
+  const trimmed = expression.trim();
+  if (!trimmed.startsWith('@')) {
+    return null;
+  }
+  return trimmed.slice(1);
+}
+
+/**
+ * Check if a pattern is a glob pattern or exact file
+ */
+function isGlobPattern(pattern: string): boolean {
+  return pattern.includes('*') || pattern.includes('?') || pattern.includes('{') || pattern.includes('[');
+}
+
+/**
+ * Validate that a path is within allowed directories
+ */
+function isPathAllowed(filePath: string, allowedDirs: string[]): boolean {
+  const normalized = normalize(filePath);
+  return allowedDirs.some(dir => normalized.startsWith(normalize(dir)));
+}
+
+/**
+ * Build allowed directories list from options
+ */
+function buildAllowedDirs(cwd: string, includeDirs?: string[]): string[] {
+  const allowed = [cwd];
+
+  if (includeDirs) {
+    for (const dir of includeDirs) {
+      const absDir = isAbsolute(dir) ? dir : resolve(cwd, dir);
+      if (existsSync(absDir) && statSync(absDir).isDirectory()) {
+        allowed.push(absDir);
+      }
+    }
+  }
+
+  return allowed.map(d => normalize(d));
+}
+
+/**
+ * Parse @expressions and return matched files
+ */
+export async function parsePatterns(
+  patterns: string[],
+  options: PatternParseOptions = {}
+): Promise<PatternParseResult> {
+  const {
+    cwd = process.cwd(),
+    includeDirs = [],
+    ignore = [],
+    maxFiles = 1000,
+    followSymlinks = false,
+  } = options;
+
+  const result: PatternParseResult = {
+    files: [],
+    patterns: [],
+    errors: [],
+    stats: {
+      total_files: 0,
+      total_patterns: patterns.length,
+      matched_patterns: 0,
+    },
+  };
+
+  // Build allowed directories
+  const allowedDirs = buildAllowedDirs(cwd, includeDirs);
+
+  // Merge ignore patterns
+  const allIgnore = [...DEFAULT_IGNORE, ...ignore];
+
+  // Track unique files
+  const fileSet = new Set<string>();
+
+  for (const expr of patterns) {
+    const pattern = extractPattern(expr);
+
+    if (!pattern) {
+      result.errors.push(`Invalid pattern: ${expr} (must start with @)`);
+      continue;
+    }
+
+    result.patterns.push(pattern);
+
+    try {
+      if (isGlobPattern(pattern)) {
+        // Glob pattern - use glob package
+        // Determine base directory for pattern
+        let baseDir = cwd;
+        let globPattern = pattern;
+
+        // Handle relative paths like ../shared/**
+        if (pattern.startsWith('../') || pattern.startsWith('./')) {
+          const parts = pattern.split('/');
+          const pathParts: string[] = [];
+          let i = 0;
+
+          // Extract path prefix
+          while (i < parts.length && (parts[i] === '..' || parts[i] === '.')) {
+            pathParts.push(parts[i]);
+            i++;
+          }
+
+          // Keep non-glob path parts
+          while (i < parts.length && !isGlobPattern(parts[i])) {
+            pathParts.push(parts[i]);
+            i++;
+          }
+
+          // Resolve base directory
+          if (pathParts.length > 0) {
+            baseDir = resolve(cwd, pathParts.join('/'));
+            globPattern = parts.slice(i).join('/') || '**/*';
+          }
+        }
+
+        // Check if base directory is allowed
+        if (!isPathAllowed(baseDir, allowedDirs)) {
+          result.errors.push(`Pattern ${expr}: base directory not in allowed paths`);
+          continue;
+        }
+
+        // Execute glob using the glob package
+        const matches = await glob(globPattern, {
+          cwd: baseDir,
+          absolute: true,
+          nodir: true,
+          follow: followSymlinks,
+          ignore: allIgnore,
+          dot: false,
+        });
+
+        let matchCount = 0;
+        for (const file of matches) {
+          // Validate each file is in allowed directories
+          if (isPathAllowed(file, allowedDirs)) {
+            fileSet.add(file);
+            matchCount++;
+            if (fileSet.size >= maxFiles) break;
+          }
+        }
+
+        if (matchCount > 0) {
+          result.stats.matched_patterns++;
+        }
+      } else {
+        // Exact file path
+        const absPath = isAbsolute(pattern) ? pattern : resolve(cwd, pattern);
+
+        // Validate path is allowed
+        if (!isPathAllowed(absPath, allowedDirs)) {
+          result.errors.push(`Pattern ${expr}: path not in allowed directories`);
+          continue;
+        }
+
+        // Check file exists
+        if (existsSync(absPath) && statSync(absPath).isFile()) {
+          fileSet.add(absPath);
+          result.stats.matched_patterns++;
+        } else {
+          result.errors.push(`Pattern ${expr}: file not found`);
+        }
+      }
+    } catch (err) {
+      result.errors.push(`Pattern ${expr}: ${(err as Error).message}`);
+    }
+
+    // Check max files limit
+    if (fileSet.size >= maxFiles) {
+      result.errors.push(`Max files limit (${maxFiles}) reached`);
+      break;
+    }
+  }
+
+  result.files = Array.from(fileSet);
+  result.stats.total_files = result.files.length;
+
+  return result;
+}
+
+/**
+ * Pack files into a single content string with metadata headers
+ */
+export async function packFiles(
+  files: string[],
+  options: {
+    includeMetadata?: boolean;
+    separator?: string;
+    maxFileSize?: number; // Max size per file in bytes (default: 1MB)
+  } = {}
+): Promise<{
+  content: string;
+  packedFiles: string[];
+  skippedFiles: string[];
+  totalBytes: number;
+}> {
+  const {
+    includeMetadata = true,
+    separator = '\n\n',
+    maxFileSize = 1024 * 1024, // 1MB default
+  } = options;
+
+  const parts: string[] = [];
+  const packedFiles: string[] = [];
+  const skippedFiles: string[] = [];
+  let totalBytes = 0;
+
+  for (const file of files) {
+    try {
+      const stats = statSync(file);
+
+      // Skip files that are too large
+      if (stats.size > maxFileSize) {
+        skippedFiles.push(file);
+        continue;
+      }
+
+      const content = readFileSync(file, 'utf-8');
+
+      if (includeMetadata) {
+        // Add file header with metadata
+        const header = [
+          `=== FILE: ${file} ===`,
+          `Size: ${stats.size} bytes`,
+          `Modified: ${stats.mtime.toISOString()}`,
+          '---',
+        ].join('\n');
+        parts.push(header + '\n' + content);
+      } else {
+        parts.push(content);
+      }
+
+      packedFiles.push(file);
+      totalBytes += content.length;
+    } catch {
+      skippedFiles.push(file);
+    }
+  }
+
+  return {
+    content: parts.join(separator),
+    packedFiles,
+    skippedFiles,
+    totalBytes,
+  };
+}
+
+/**
+ * Parse patterns and pack files in one call
+ */
+export async function parseAndPack(
+  patterns: string[],
+  options: PatternParseOptions & {
+    includeMetadata?: boolean;
+    separator?: string;
+    maxFileSize?: number;
+  } = {}
+): Promise<{
+  content: string;
+  parseResult: PatternParseResult;
+  packedFiles: string[];
+  skippedFiles: string[];
+  totalBytes: number;
+}> {
+  const parseResult = await parsePatterns(patterns, options);
+
+  const packResult = await packFiles(parseResult.files, {
+    includeMetadata: options.includeMetadata,
+    separator: options.separator,
+    maxFileSize: options.maxFileSize,
+  });
+
+  return {
+    content: packResult.content,
+    parseResult,
+    packedFiles: packResult.packedFiles,
+    skippedFiles: packResult.skippedFiles,
+    totalBytes: packResult.totalBytes,
+  };
+}
--- a/ccw/src/tools/smart-search.ts
+++ b/ccw/src/tools/smart-search.ts
@@ -36,10 +36,12 @@ const ParamsSchema = z.object({
  path: z.string().optional(),
  paths: z.array(z.string()).default([]),
  contextLines: z.number().default(0),
-  maxResults: z.number().default(20),  // Increased default
+  maxResults: z.number().default(5),  // Default 5 with full content
  includeHidden: z.boolean().default(false),
  languages: z.array(z.string()).optional(),
-  limit: z.number().default(20),  // Increased default
+  limit: z.number().default(5),  // Default 5 with full content
+  extraFilesCount: z.number().default(10),  // Additional file-only results
+  maxContentLength: z.number().default(200),  // Max content length for truncation (50-2000)
  offset: z.number().default(0),  // NEW: Pagination offset (start_index)
  enrich: z.boolean().default(false),
  // Search modifiers for ripgrep mode
@@ -268,6 +270,7 @@ interface SearchMetadata {
 interface SearchResult {
  success: boolean;
  results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown;
+  extra_files?: string[];  // Additional file paths without content
  output?: string;
  metadata?: SearchMetadata;
  error?: string;
@@ -275,11 +278,22 @@ interface SearchResult {
  message?: string;
 }

+interface ModelInfo {
+  model_profile?: string;
+  model_name?: string;
+  embedding_dim?: number;
+  backend?: string;
+  created_at?: string;
+  updated_at?: string;
+}
+
 interface IndexStatus {
  indexed: boolean;
  has_embeddings: boolean;
  file_count?: number;
  embeddings_coverage_percent?: number;
+  total_chunks?: number;
+  model_info?: ModelInfo | null;
  warning?: string;
 }

@@ -290,6 +304,42 @@ function stripAnsi(str: string): string {
  return str.replace(/\x1b\[[0-9;]*m/g, '');
 }

+/** Default maximum content length to return (avoid excessive output) */
+const DEFAULT_MAX_CONTENT_LENGTH = 200;
+
+/**
+ * Truncate content to specified length with ellipsis
+ * @param content - The content to truncate
+ * @param maxLength - Maximum length (default: 200)
+ */
+function truncateContent(content: string | null | undefined, maxLength: number = DEFAULT_MAX_CONTENT_LENGTH): string {
+  if (!content) return '';
+  if (content.length <= maxLength) return content;
+  return content.slice(0, maxLength) + '...';
+}
+
+/**
+ * Split results into full content results and extra file-only results
+ * Generic function supporting both SemanticMatch and ExactMatch types
+ * @param allResults - All search results (must have 'file' property)
+ * @param fullContentLimit - Number of results with full content (default: 5)
+ * @param extraFilesCount - Number of additional file-only results (default: 10)
+ */
+function splitResultsWithExtraFiles<T extends { file: string }>(
+  allResults: T[],
+  fullContentLimit: number = 5,
+  extraFilesCount: number = 10
+): { results: T[]; extra_files: string[] } {
+  // First N results with full content
+  const results = allResults.slice(0, fullContentLimit);
+
+  // Next M results as file paths only (deduplicated)
+  const extraResults = allResults.slice(fullContentLimit, fullContentLimit + extraFilesCount);
+  const extra_files = [...new Set(extraResults.map(r => r.file))];
+
+  return { results, extra_files };
+}
+
 /**
 * Check if CodexLens index exists for current directory
 * @param path - Directory path to check
@@ -320,6 +370,18 @@ async function checkIndexStatus(path: string = '.'): Promise<IndexStatus> {
      const embeddingsData = status.embeddings || {};
      const embeddingsCoverage = embeddingsData.coverage_percent || 0;
      const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50%
+      const totalChunks = embeddingsData.total_chunks || 0;
+
+      // Extract model info if available
+      const modelInfoData = embeddingsData.model_info;
+      const modelInfo: ModelInfo | undefined = modelInfoData ? {
+        model_profile: modelInfoData.model_profile,
+        model_name: modelInfoData.model_name,
+        embedding_dim: modelInfoData.embedding_dim,
+        backend: modelInfoData.backend,
+        created_at: modelInfoData.created_at,
+        updated_at: modelInfoData.updated_at,
+      } : undefined;

      let warning: string | undefined;
      if (!indexed) {
@@ -335,6 +397,9 @@ async function checkIndexStatus(path: string = '.'): Promise<IndexStatus> {
        has_embeddings,
        file_count: status.total_files,
        embeddings_coverage_percent: embeddingsCoverage,
+        total_chunks: totalChunks,
+        // Ensure model_info is null instead of undefined so it's included in JSON
+        model_info: modelInfo ?? null,
        warning,
      };
    } catch {
@@ -688,7 +753,7 @@ async function executeAutoMode(params: Params): Promise<SearchResult> {
 * Supports tokenized multi-word queries with OR matching and result ranking
 */
 async function executeRipgrepMode(params: Params): Promise<SearchResult> {
-  const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params;
+  const { query, paths = [], contextLines = 0, maxResults = 5, extraFilesCount = 10, maxContentLength = 200, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params;

  if (!query) {
    return {
@@ -700,6 +765,9 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
  // Check if ripgrep is available
  const hasRipgrep = checkToolAvailability('rg');

+  // Calculate total to fetch for split (full content + extra files)
+  const totalToFetch = maxResults + extraFilesCount;
+
  // If ripgrep not available, fall back to CodexLens exact mode
  if (!hasRipgrep) {
    const readyStatus = await ensureCodexLensReady();
@@ -711,7 +779,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
    }

    // Use CodexLens exact mode as fallback
-    const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json'];
+    const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'exact', '--json'];
    const result = await executeCodexLens(args, { cwd: path });

    if (!result.success) {
@@ -728,23 +796,27 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
    }

    // Parse results
-    let results: SemanticMatch[] = [];
+    let allResults: SemanticMatch[] = [];
    try {
      const parsed = JSON.parse(stripAnsi(result.output || '{}'));
      const data = parsed.result?.results || parsed.results || parsed;
-      results = (Array.isArray(data) ? data : []).map((item: any) => ({
+      allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
        file: item.path || item.file,
        score: item.score || 0,
-        content: item.excerpt || item.content || '',
+        content: truncateContent(item.content || item.excerpt, maxContentLength),
        symbol: item.symbol || null,
      }));
    } catch {
      // Keep empty results
    }

+    // Split results: first N with full content, rest as file paths only
+    const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
+
    return {
      success: true,
      results,
+      extra_files: extra_files.length > 0 ? extra_files : undefined,
      metadata: {
        mode: 'ripgrep',
        backend: 'codexlens-fallback',
@@ -755,12 +827,12 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
    };
  }

-  // Use ripgrep
+  // Use ripgrep - request more results to support split
  const { command, args, tokens } = buildRipgrepCommand({
    query,
    paths: paths.length > 0 ? paths : [path],
    contextLines,
-    maxResults,
+    maxResults: totalToFetch,  // Fetch more to support split
    includeHidden,
    regex,
    caseSensitive,
@@ -786,14 +858,14 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
    });

    child.on('close', (code) => {
-      const results: ExactMatch[] = [];
+      const allResults: ExactMatch[] = [];
      const lines = stdout.split('\n').filter((line) => line.trim());
      // Limit total results to prevent memory overflow (--max-count only limits per-file)
-      const effectiveLimit = maxResults > 0 ? maxResults : 500;
+      const effectiveLimit = totalToFetch > 0 ? totalToFetch : 500;

      for (const line of lines) {
        // Stop collecting if we've reached the limit
-        if (results.length >= effectiveLimit) {
+        if (allResults.length >= effectiveLimit) {
          resultLimitReached = true;
          break;
        }
@@ -811,7 +883,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
                  : 1,
              content: item.data.lines.text.trim(),
            };
-            results.push(match);
+            allResults.push(match);
          }
        } catch {
          continue;
@@ -824,9 +896,12 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {

      // Apply token-based scoring and sorting for multi-word queries
      // Results matching more tokens are ranked higher (exact matches first)
-      const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results;
+      const scoredResults = tokens.length > 1 ? scoreByTokenMatch(allResults, tokens) : allResults;

      if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
+        // Split results: first N with full content, rest as file paths only
+        const { results, extra_files } = splitResultsWithExtraFiles(scoredResults, maxResults, extraFilesCount);
+
        // Build warning message for various conditions
        const warnings: string[] = [];
        if (resultLimitReached) {
@@ -838,18 +913,19 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {

        resolve({
          success: true,
-          results: scoredResults,
+          results,
+          extra_files: extra_files.length > 0 ? extra_files : undefined,
          metadata: {
            mode: 'ripgrep',
            backend: 'ripgrep',
-            count: scoredResults.length,
+            count: results.length,
            query,
            tokens: tokens.length > 1 ? tokens : undefined,  // Include tokens in metadata for debugging
            tokenized: tokens.length > 1,
            ...(warnings.length > 0 && { warning: warnings.join('; ') }),
          },
        });
-      } else if (isWindowsDeviceError && results.length === 0) {
+      } else if (isWindowsDeviceError && allResults.length === 0) {
        // Windows device error but no results - might be the only issue
        resolve({
          success: true,
@@ -886,7 +962,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
 * Requires index
 */
 async function executeCodexLensExactMode(params: Params): Promise<SearchResult> {
-  const { query, path = '.', maxResults = 10, enrich = false } = params;
+  const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params;

  if (!query) {
    return {
@@ -907,7 +983,9 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
  // Check index status
  const indexStatus = await checkIndexStatus(path);

-  const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json'];
+  // Request more results to support split (full content + extra files)
+  const totalToFetch = maxResults + extraFilesCount;
+  const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'exact', '--json'];
  if (enrich) {
    args.push('--enrich');
  }
@@ -928,14 +1006,14 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
  }

  // Parse results
-  let results: SemanticMatch[] = [];
+  let allResults: SemanticMatch[] = [];
  try {
    const parsed = JSON.parse(stripAnsi(result.output || '{}'));
    const data = parsed.result?.results || parsed.results || parsed;
-    results = (Array.isArray(data) ? data : []).map((item: any) => ({
+    allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
      file: item.path || item.file,
      score: item.score || 0,
-      content: item.excerpt || item.content || '',
+      content: truncateContent(item.content || item.excerpt, maxContentLength),
      symbol: item.symbol || null,
    }));
  } catch {
@@ -943,8 +1021,8 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
  }

  // Fallback to fuzzy mode if exact returns no results
-  if (results.length === 0) {
-    const fuzzyArgs = ['search', query, '--limit', maxResults.toString(), '--mode', 'fuzzy', '--json'];
+  if (allResults.length === 0) {
+    const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'fuzzy', '--json'];
    if (enrich) {
      fuzzyArgs.push('--enrich');
    }
@@ -954,20 +1032,23 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
      try {
        const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}'));
        const data = parsed.result?.results || parsed.results || parsed;
-        results = (Array.isArray(data) ? data : []).map((item: any) => ({
+        allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
          file: item.path || item.file,
          score: item.score || 0,
-          content: item.excerpt || item.content || '',
+          content: truncateContent(item.content || item.excerpt, maxContentLength),
          symbol: item.symbol || null,
        }));
      } catch {
        // Keep empty results
      }

-      if (results.length > 0) {
+      if (allResults.length > 0) {
+        // Split results: first N with full content, rest as file paths only
+        const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
        return {
          success: true,
          results,
+          extra_files: extra_files.length > 0 ? extra_files : undefined,
          metadata: {
            mode: 'exact',
            backend: 'codexlens',
@@ -982,9 +1063,13 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
    }
  }

+  // Split results: first N with full content, rest as file paths only
+  const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
+
  return {
    success: true,
    results,
+    extra_files: extra_files.length > 0 ? extra_files : undefined,
    metadata: {
      mode: 'exact',
      backend: 'codexlens',
@@ -1001,7 +1086,7 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
 * Requires index with embeddings
 */
 async function executeHybridMode(params: Params): Promise<SearchResult> {
-  const { query, path = '.', maxResults = 10, enrich = false } = params;
+  const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params;

  if (!query) {
    return {
@@ -1022,7 +1107,9 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
  // Check index status
  const indexStatus = await checkIndexStatus(path);

-  const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'hybrid', '--json'];
+  // Request more results to support split (full content + extra files)
+  const totalToFetch = maxResults + extraFilesCount;
+  const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'hybrid', '--json'];
  if (enrich) {
    args.push('--enrich');
  }
@@ -1043,14 +1130,14 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
  }

  // Parse results
-  let results: SemanticMatch[] = [];
+  let allResults: SemanticMatch[] = [];
  let baselineInfo: { score: number; count: number } | null = null;
  let initialCount = 0;

  try {
    const parsed = JSON.parse(stripAnsi(result.output || '{}'));
    const data = parsed.result?.results || parsed.results || parsed;
-    results = (Array.isArray(data) ? data : []).map((item: any) => {
+    allResults = (Array.isArray(data) ? data : []).map((item: any) => {
      const rawScore = item.score || 0;
      // Hybrid mode returns distance scores (lower is better).
      // Convert to similarity scores (higher is better) for consistency.
@@ -1059,27 +1146,27 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
      return {
        file: item.path || item.file,
        score: similarityScore,
-        content: item.excerpt || item.content || '',
+        content: truncateContent(item.content || item.excerpt, maxContentLength),
        symbol: item.symbol || null,
      };
    });

-    initialCount = results.length;
+    initialCount = allResults.length;

    // Post-processing pipeline to improve semantic search quality
    // 0. Filter dominant baseline scores (hot spot detection)
-    const baselineResult = filterDominantBaselineScores(results);
-    results = baselineResult.filteredResults;
+    const baselineResult = filterDominantBaselineScores(allResults);
+    allResults = baselineResult.filteredResults;
    baselineInfo = baselineResult.baselineInfo;

    // 1. Filter noisy files (coverage, node_modules, etc.)
-    results = filterNoisyFiles(results);
+    allResults = filterNoisyFiles(allResults);
    // 2. Boost results containing query keywords
-    results = applyKeywordBoosting(results, query);
+    allResults = applyKeywordBoosting(allResults, query);
    // 3. Enforce score diversity (penalize identical scores)
-    results = enforceScoreDiversity(results);
+    allResults = enforceScoreDiversity(allResults);
    // 4. Re-sort by adjusted scores
-    results.sort((a, b) => b.score - a.score);
+    allResults.sort((a, b) => b.score - a.score);
  } catch {
    return {
      success: true,
@@ -1095,15 +1182,19 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
    };
  }

+  // Split results: first N with full content, rest as file paths only
+  const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
+
  // Build metadata with baseline info if detected
  let note = 'Hybrid mode uses RRF fusion (exact + fuzzy + vector) for best results';
  if (baselineInfo) {
-    note += ` | Filtered ${initialCount - results.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`;
+    note += ` | Filtered ${initialCount - allResults.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`;
  }

  return {
    success: true,
    results,
+    extra_files: extra_files.length > 0 ? extra_files : undefined,
    metadata: {
      mode: 'hybrid',
      backend: 'codexlens',
@@ -1514,7 +1605,7 @@ export const schema: ToolSchema = {
      mode: {
        type: 'string',
        enum: SEARCH_MODES,
-        description: 'Search mode: auto (default), hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback: hybrid->exact->ripgrep)',
+        description: 'Search mode: auto, hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback chain)',
        default: 'auto',
      },
      output_mode: {
@@ -1550,6 +1641,16 @@ export const schema: ToolSchema = {
        description: 'Alias for maxResults (default: 20)',
        default: 20,
      },
+      extraFilesCount: {
+        type: 'number',
+        description: 'Number of additional file-only results (paths without content)',
+        default: 10,
+      },
+      maxContentLength: {
+        type: 'number',
+        description: 'Maximum content length for truncation (50-2000)',
+        default: 200,
+      },
      offset: {
        type: 'number',
        description: 'Pagination offset - skip first N results (default: 0)',
--- a/ccw/src/types/litellm-api-config.ts
+++ b/ccw/src/types/litellm-api-config.ts
@@ -0,0 +1,402 @@
+/**
+ * LiteLLM API Configuration Type Definitions
+ *
+ * Defines types for provider credentials, cache strategies, custom endpoints,
+ * and the overall configuration structure for LiteLLM API integration.
+ */
+
+/**
+ * API format types (simplified)
+ * Most providers use OpenAI-compatible format
+ */
+export type ProviderType =
+  | 'openai'      // OpenAI-compatible format (most providers)
+  | 'anthropic'   // Anthropic format
+  | 'custom';     // Custom format
+
+/**
+ * Advanced provider settings for LiteLLM compatibility
+ * Maps to LiteLLM's provider configuration options
+ */
+export interface ProviderAdvancedSettings {
+  /** Request timeout in seconds (default: 300) */
+  timeout?: number;
+
+  /** Maximum retry attempts on failure (default: 3) */
+  maxRetries?: number;
+
+  /** Organization ID (OpenAI-specific) */
+  organization?: string;
+
+  /** API version string (Azure-specific, e.g., "2024-02-01") */
+  apiVersion?: string;
+
+  /** Custom HTTP headers as JSON object */
+  customHeaders?: Record<string, string>;
+
+  /** Requests per minute rate limit */
+  rpm?: number;
+
+  /** Tokens per minute rate limit */
+  tpm?: number;
+
+  /** Proxy server URL (e.g., "http://proxy.example.com:8080") */
+  proxy?: string;
+}
+
+/**
+ * Model type classification
+ */
+export type ModelType = 'llm' | 'embedding';
+
+/**
+ * Model capability metadata
+ */
+export interface ModelCapabilities {
+  /** Whether the model supports streaming responses */
+  streaming?: boolean;
+
+  /** Whether the model supports function/tool calling */
+  functionCalling?: boolean;
+
+  /** Whether the model supports vision/image input */
+  vision?: boolean;
+
+  /** Context window size in tokens */
+  contextWindow?: number;
+
+  /** Embedding dimension (for embedding models only) */
+  embeddingDimension?: number;
+
+  /** Maximum output tokens */
+  maxOutputTokens?: number;
+}
+
+/**
+ * Routing strategy for load balancing across multiple keys
+ */
+export type RoutingStrategy = 
+  | 'simple-shuffle'    // Random selection (default, recommended)
+  | 'weighted'          // Weight-based distribution
+  | 'latency-based'     // Route to lowest latency
+  | 'cost-based'        // Route to lowest cost
+  | 'least-busy';       // Route to least concurrent
+
+/**
+ * Individual API key configuration with optional weight
+ */
+export interface ApiKeyEntry {
+  /** Unique identifier */
+  id: string;
+
+  /** API key value or env var reference */
+  key: string;
+
+  /** Display label for this key */
+  label?: string;
+
+  /** Weight for weighted routing (default: 1) */
+  weight?: number;
+
+  /** Whether this key is enabled */
+  enabled: boolean;
+
+  /** Last health check status */
+  healthStatus?: 'healthy' | 'unhealthy' | 'unknown';
+
+  /** Last health check timestamp */
+  lastHealthCheck?: string;
+
+  /** Error message if unhealthy */
+  lastError?: string;
+}
+
+/**
+ * Health check configuration
+ */
+export interface HealthCheckConfig {
+  /** Enable automatic health checks */
+  enabled: boolean;
+
+  /** Check interval in seconds (default: 300) */
+  intervalSeconds: number;
+
+  /** Cooldown period after failure in seconds (default: 5) */
+  cooldownSeconds: number;
+
+  /** Number of failures before marking unhealthy (default: 3) */
+  failureThreshold: number;
+}
+
+
+/**
+ * Model-specific endpoint settings
+ * Allows per-model configuration overrides
+ */
+export interface ModelEndpointSettings {
+  /** Override base URL for this model */
+  baseUrl?: string;
+
+  /** Override timeout for this model */
+  timeout?: number;
+
+  /** Override max retries for this model */
+  maxRetries?: number;
+
+  /** Custom headers for this model */
+  customHeaders?: Record<string, string>;
+
+  /** Cache strategy for this model */
+  cacheStrategy?: CacheStrategy;
+}
+
+/**
+ * Model definition with type and grouping
+ */
+export interface ModelDefinition {
+  /** Unique identifier for this model */
+  id: string;
+
+  /** Display name for UI */
+  name: string;
+
+  /** Model type: LLM or Embedding */
+  type: ModelType;
+
+  /** Model series for grouping (e.g., "GPT-4", "Claude-3") */
+  series: string;
+
+  /** Whether this model is enabled */
+  enabled: boolean;
+
+  /** Model capabilities */
+  capabilities?: ModelCapabilities;
+
+  /** Model-specific endpoint settings */
+  endpointSettings?: ModelEndpointSettings;
+
+  /** Optional description */
+  description?: string;
+
+  /** Creation timestamp (ISO 8601) */
+  createdAt: string;
+
+  /** Last update timestamp (ISO 8601) */
+  updatedAt: string;
+}
+
+/**
+ * Provider credential configuration
+ * Stores API keys, base URLs, and provider metadata
+ */
+export interface ProviderCredential {
+  /** Unique identifier for this provider configuration */
+  id: string;
+
+  /** Display name for UI */
+  name: string;
+
+  /** Provider type */
+  type: ProviderType;
+
+  /** API key or environment variable reference (e.g., ${OPENAI_API_KEY}) */
+  apiKey: string;
+
+  /** Custom API base URL (optional, overrides provider default) */
+  apiBase?: string;
+
+  /** Whether this provider is enabled */
+  enabled: boolean;
+
+  /** Advanced provider settings (optional) */
+  advancedSettings?: ProviderAdvancedSettings;
+
+  /** Multiple API keys for load balancing */
+  apiKeys?: ApiKeyEntry[];
+
+  /** Routing strategy for multi-key load balancing */
+  routingStrategy?: RoutingStrategy;
+
+  /** Health check configuration */
+  healthCheck?: HealthCheckConfig;
+
+  /** LLM models configured for this provider */
+  llmModels?: ModelDefinition[];
+
+  /** Embedding models configured for this provider */
+  embeddingModels?: ModelDefinition[];
+
+  /** Creation timestamp (ISO 8601) */
+  createdAt: string;
+
+  /** Last update timestamp (ISO 8601) */
+  updatedAt: string;
+}
+
+/**
+ * Cache strategy for prompt context optimization
+ * Enables file-based caching to reduce token usage
+ */
+export interface CacheStrategy {
+  /** Whether caching is enabled for this endpoint */
+  enabled: boolean;
+
+  /** Time-to-live in minutes (default: 60) */
+  ttlMinutes: number;
+
+  /** Maximum cache size in KB (default: 512) */
+  maxSizeKB: number;
+
+  /** File patterns to cache (glob patterns like "*.md", "*.ts") */
+  filePatterns: string[];
+}
+
+/**
+ * Custom endpoint configuration
+ * Maps CLI identifiers to specific models and caching strategies
+ */
+export interface CustomEndpoint {
+  /** Unique CLI identifier (used in --model flag, e.g., "my-gpt4o") */
+  id: string;
+
+  /** Display name for UI */
+  name: string;
+
+  /** Reference to provider credential ID */
+  providerId: string;
+
+  /** Model identifier (e.g., "gpt-4o", "claude-3-5-sonnet-20241022") */
+  model: string;
+
+  /** Optional description */
+  description?: string;
+
+  /** Cache strategy for this endpoint */
+  cacheStrategy: CacheStrategy;
+
+  /** Whether this endpoint is enabled */
+  enabled: boolean;
+
+  /** Creation timestamp (ISO 8601) */
+  createdAt: string;
+
+  /** Last update timestamp (ISO 8601) */
+  updatedAt: string;
+}
+
+/**
+ * Global cache settings
+ * Applies to all endpoints unless overridden
+ */
+export interface GlobalCacheSettings {
+  /** Whether caching is globally enabled */
+  enabled: boolean;
+
+  /** Cache directory path (default: ~/.ccw/cache/context) */
+  cacheDir: string;
+
+  /** Maximum total cache size in MB (default: 100) */
+  maxTotalSizeMB: number;
+}
+
+/**
+ * CodexLens embedding provider selection for rotation
+ * Aggregates provider + model + all API keys
+ */
+export interface CodexLensEmbeddingProvider {
+  /** Reference to provider credential ID */
+  providerId: string;
+
+  /** Embedding model ID from the provider */
+  modelId: string;
+
+  /** Whether to use all API keys from this provider (default: true) */
+  useAllKeys: boolean;
+
+  /** Specific API key IDs to use (if useAllKeys is false) */
+  selectedKeyIds?: string[];
+
+  /** Weight for weighted routing (default: 1.0, applies to all keys from this provider) */
+  weight: number;
+
+  /** Maximum concurrent requests per key (default: 4) */
+  maxConcurrentPerKey: number;
+
+  /** Whether this provider is enabled for rotation */
+  enabled: boolean;
+}
+
+/**
+ * CodexLens multi-provider embedding rotation configuration
+ * Aggregates multiple providers with same model for parallel rotation
+ */
+export interface CodexLensEmbeddingRotation {
+  /** Whether multi-provider rotation is enabled */
+  enabled: boolean;
+
+  /** Selection strategy: round_robin, latency_aware, weighted_random */
+  strategy: 'round_robin' | 'latency_aware' | 'weighted_random';
+
+  /** Default cooldown seconds for rate-limited endpoints (default: 60) */
+  defaultCooldown: number;
+
+  /** Target model name that all providers should support (e.g., "qwen3-embedding") */
+  targetModel: string;
+
+  /** List of providers to aggregate for rotation */
+  providers: CodexLensEmbeddingProvider[];
+}
+
+/**
+ * Generic embedding pool configuration (refactored from CodexLensEmbeddingRotation)
+ * Supports automatic discovery of all providers offering a specific model
+ */
+export interface EmbeddingPoolConfig {
+  /** Whether embedding pool is enabled */
+  enabled: boolean;
+
+  /** Target embedding model name (e.g., "text-embedding-3-small") */
+  targetModel: string;
+
+  /** Selection strategy: round_robin, latency_aware, weighted_random */
+  strategy: 'round_robin' | 'latency_aware' | 'weighted_random';
+
+  /** Whether to automatically discover all providers offering targetModel */
+  autoDiscover: boolean;
+
+  /** Provider IDs to exclude from auto-discovery (optional) */
+  excludedProviderIds?: string[];
+
+  /** Default cooldown seconds for rate-limited endpoints (default: 60) */
+  defaultCooldown: number;
+
+  /** Default maximum concurrent requests per key (default: 4) */
+  defaultMaxConcurrentPerKey: number;
+}
+
+/**
+ * Complete LiteLLM API configuration
+ * Root configuration object stored in JSON file
+ */
+export interface LiteLLMApiConfig {
+  /** Configuration schema version */
+  version: number;
+
+  /** List of configured providers */
+  providers: ProviderCredential[];
+
+  /** List of custom endpoints */
+  endpoints: CustomEndpoint[];
+
+  /** Default endpoint ID (optional) */
+  defaultEndpoint?: string;
+
+  /** Global cache settings */
+  globalCacheSettings: GlobalCacheSettings;
+
+  /** CodexLens multi-provider embedding rotation config (deprecated, use embeddingPoolConfig) */
+  codexlensEmbeddingRotation?: CodexLensEmbeddingRotation;
+
+  /** Generic embedding pool configuration with auto-discovery support */
+  embeddingPoolConfig?: EmbeddingPoolConfig;
+}
--- a/ccw/tests/litellm-client.test.ts
+++ b/ccw/tests/litellm-client.test.ts
@@ -0,0 +1,96 @@
+/**
+ * LiteLLM Client Tests
+ * Tests for the LiteLLM TypeScript bridge
+ */
+
+import { describe, it, expect, beforeEach } from '@jest/globals';
+import { LiteLLMClient, getLiteLLMClient, checkLiteLLMAvailable, getLiteLLMStatus } from '../src/tools/litellm-client';
+
+describe('LiteLLMClient', () => {
+  let client: LiteLLMClient;
+
+  beforeEach(() => {
+    client = new LiteLLMClient({ timeout: 5000 });
+  });
+
+  describe('Constructor', () => {
+    it('should create client with default config', () => {
+      const defaultClient = new LiteLLMClient();
+      expect(defaultClient).toBeDefined();
+    });
+
+    it('should create client with custom config', () => {
+      const customClient = new LiteLLMClient({
+        pythonPath: 'python3',
+        timeout: 10000
+      });
+      expect(customClient).toBeDefined();
+    });
+  });
+
+  describe('isAvailable', () => {
+    it('should check if ccw-litellm is available', async () => {
+      const available = await client.isAvailable();
+      expect(typeof available).toBe('boolean');
+    });
+  });
+
+  describe('getStatus', () => {
+    it('should return status object', async () => {
+      const status = await client.getStatus();
+      expect(status).toHaveProperty('available');
+      expect(typeof status.available).toBe('boolean');
+    });
+  });
+
+  describe('embed', () => {
+    it('should throw error for empty texts array', async () => {
+      await expect(client.embed([])).rejects.toThrow('texts array cannot be empty');
+    });
+
+    it('should throw error for null texts', async () => {
+      await expect(client.embed(null as any)).rejects.toThrow();
+    });
+  });
+
+  describe('chat', () => {
+    it('should throw error for empty message', async () => {
+      await expect(client.chat('')).rejects.toThrow('message cannot be empty');
+    });
+  });
+
+  describe('chatMessages', () => {
+    it('should throw error for empty messages array', async () => {
+      await expect(client.chatMessages([])).rejects.toThrow('messages array cannot be empty');
+    });
+
+    it('should throw error for null messages', async () => {
+      await expect(client.chatMessages(null as any)).rejects.toThrow();
+    });
+  });
+});
+
+describe('Singleton Functions', () => {
+  describe('getLiteLLMClient', () => {
+    it('should return singleton instance', () => {
+      const client1 = getLiteLLMClient();
+      const client2 = getLiteLLMClient();
+      expect(client1).toBe(client2);
+    });
+  });
+
+  describe('checkLiteLLMAvailable', () => {
+    it('should return boolean', async () => {
+      const available = await checkLiteLLMAvailable();
+      expect(typeof available).toBe('boolean');
+    });
+  });
+
+  describe('getLiteLLMStatus', () => {
+    it('should return status object', async () => {
+      const status = await getLiteLLMStatus();
+      expect(status).toHaveProperty('available');
+      expect(typeof status.available).toBe('boolean');
+    });
+  });
+});
--- a/ccw/tsconfig.tsbuildinfo
+++ b/ccw/tsconfig.tsbuildinfo
@@ -0,0 +1 @@
+{"root":["./src/cli.ts","./src/index.ts","./src/commands/cli.ts","./src/commands/core-memory.ts","./src/commands/hook.ts","./src/commands/install.ts","./src/commands/list.ts","./src/commands/memory.ts","./src/commands/serve.ts","./src/commands/session-path-resolver.ts","./src/commands/session.ts","./src/commands/stop.ts","./src/commands/tool.ts","./src/commands/uninstall.ts","./src/commands/upgrade.ts","./src/commands/view.ts","./src/config/litellm-api-config-manager.ts","./src/config/provider-models.ts","./src/config/storage-paths.ts","./src/core/cache-manager.ts","./src/core/claude-freshness.ts","./src/core/core-memory-store.ts","./src/core/dashboard-generator-patch.ts","./src/core/dashboard-generator.ts","./src/core/data-aggregator.ts","./src/core/history-importer.ts","./src/core/lite-scanner-complete.ts","./src/core/lite-scanner.ts","./src/core/manifest.ts","./src/core/memory-embedder-bridge.ts","./src/core/memory-store.ts","./src/core/server.ts","./src/core/session-clustering-service.ts","./src/core/session-scanner.ts","./src/core/websocket.ts","./src/core/routes/ccw-routes.ts","./src/core/routes/claude-routes.ts","./src/core/routes/cli-routes.ts","./src/core/routes/codexlens-routes.ts","./src/core/routes/core-memory-routes.ts","./src/core/routes/files-routes.ts","./src/core/routes/graph-routes.ts","./src/core/routes/help-routes.ts","./src/core/routes/hooks-routes.ts","./src/core/routes/litellm-api-routes.ts","./src/core/routes/litellm-routes.ts","./src/core/routes/mcp-routes.ts","./src/core/routes/mcp-templates-db.ts","./src/core/routes/memory-routes.ts","./src/core/routes/rules-routes.ts","./src/core/routes/session-routes.ts","./src/core/routes/skills-routes.ts","./src/core/routes/status-routes.ts","./src/core/routes/system-routes.ts","./src/mcp-server/index.ts","./src/tools/classify-folders.ts","./src/tools/claude-cli-tools.ts","./src/tools/cli-config-manager.ts","./src/tools/cli-executor.ts","./src/tools/cli-history-store.ts","./src/tools/codex-lens.ts","./src/tools/context-cache-store.ts","./src/tools/context-cache.ts","./src/tools/convert-tokens-to-css.ts","./src/tools/core-memory.ts","./src/tools/detect-changed-modules.ts","./src/tools/discover-design-files.ts","./src/tools/edit-file.ts","./src/tools/generate-module-docs.ts","./src/tools/get-modules-by-depth.ts","./src/tools/index.ts","./src/tools/litellm-client.ts","./src/tools/litellm-executor.ts","./src/tools/native-session-discovery.ts","./src/tools/notifier.ts","./src/tools/pattern-parser.ts","./src/tools/read-file.ts","./src/tools/resume-strategy.ts","./src/tools/session-content-parser.ts","./src/tools/session-manager.ts","./src/tools/smart-context.ts","./src/tools/smart-search.ts","./src/tools/storage-manager.ts","./src/tools/ui-generate-preview.js","./src/tools/ui-instantiate-prototypes.js","./src/tools/update-module-claude.js","./src/tools/write-file.ts","./src/types/config.ts","./src/types/index.ts","./src/types/litellm-api-config.ts","./src/types/session.ts","./src/types/tool.ts","./src/utils/browser-launcher.ts","./src/utils/file-utils.ts","./src/utils/path-resolver.ts","./src/utils/path-validator.ts","./src/utils/ui.ts"],"version":"5.9.3"}
--- a/codex-lens/src/codex_lens.egg-info/SOURCES.txt
+++ b/codex-lens/src/codex_lens.egg-info/SOURCES.txt
@@ -29,10 +29,14 @@ src/codexlens/search/query_parser.py
 src/codexlens/search/ranking.py
 src/codexlens/semantic/__init__.py
 src/codexlens/semantic/ann_index.py
+src/codexlens/semantic/base.py
 src/codexlens/semantic/chunker.py
 src/codexlens/semantic/code_extractor.py
 src/codexlens/semantic/embedder.py
+src/codexlens/semantic/factory.py
 src/codexlens/semantic/gpu_support.py
+src/codexlens/semantic/litellm_embedder.py
+src/codexlens/semantic/rotational_embedder.py
 src/codexlens/semantic/vector_store.py
 src/codexlens/storage/__init__.py
 src/codexlens/storage/dir_index.py
@@ -54,6 +58,7 @@ tests/test_cli_output.py
 tests/test_code_extractor.py
 tests/test_config.py
 tests/test_dual_fts.py
+tests/test_embedding_backend_availability.py
 tests/test_encoding.py
 tests/test_enrichment.py
 tests/test_entities.py
@@ -67,6 +72,7 @@ tests/test_parsers.py
 tests/test_performance_optimizations.py
 tests/test_pure_vector_search.py
 tests/test_query_parser.py
+tests/test_recursive_splitting.py
 tests/test_result_grouping.py
 tests/test_rrf_fusion.py
 tests/test_schema_cleanup_migration.py
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -62,10 +62,27 @@ def _parse_languages(raw: Optional[List[str]]) -> Optional[List[str]]:


 def _get_index_root() -> Path:
-    """Get the index root directory from config or default."""
+    """Get the index root directory from config or default.
+
+    Priority order:
+    1. CODEXLENS_INDEX_DIR environment variable
+    2. index_dir from ~/.codexlens/config.json
+    3. Default: ~/.codexlens/indexes
+    """
    env_override = os.getenv("CODEXLENS_INDEX_DIR")
    if env_override:
        return Path(env_override).expanduser().resolve()
+
+    # Read from config.json
+    config_file = Path.home() / ".codexlens" / "config.json"
+    if config_file.exists():
+        try:
+            cfg = json.loads(config_file.read_text(encoding="utf-8"))
+            if "index_dir" in cfg:
+                return Path(cfg["index_dir"]).expanduser().resolve()
+        except (json.JSONDecodeError, OSError):
+            pass  # Fall through to default
+
    return Path.home() / ".codexlens" / "indexes"


@@ -86,10 +103,12 @@ def init(
        "-l",
        help="Limit indexing to specific languages (repeat or comma-separated).",
    ),
-    workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=16, help="Parallel worker processes (default: auto-detect based on CPU count, max 16)."),
+    workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes (default: auto-detect based on CPU count)."),
    force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."),
    no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
-    embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model profile: fast, code, multilingual, balanced."),
+    embedding_backend: str = typer.Option("fastembed", "--embedding-backend", help="Embedding backend: fastembed (local) or litellm (remote API)."),
+    embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."),
+    max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
 ) -> None:
@@ -103,6 +122,14 @@ def init(

    If semantic search dependencies are installed, automatically generates embeddings
    after indexing completes. Use --no-embeddings to skip this step.
+
+    Embedding Backend Options:
+      - fastembed: Local ONNX-based embeddings (default, no API calls)
+      - litellm: Remote API embeddings via ccw-litellm (requires API keys)
+
+    Embedding Model Options:
+      - For fastembed backend: Use profile names (fast, code, multilingual, balanced)
+      - For litellm backend: Use model names (e.g., text-embedding-3-small, text-embedding-ada-002)
    """
    _configure_logging(verbose, json_mode)
    config = Config()
@@ -139,26 +166,37 @@ def init(
            "errors": len(build_result.errors),
        }

-        if json_mode:
-            print_json(success=True, result=result)
-        else:
+        if not json_mode:
            console.print(f"[green]OK[/green] Indexed [bold]{build_result.total_files}[/bold] files in [bold]{build_result.total_dirs}[/bold] directories")
            console.print(f"  Index root: {build_result.index_root}")
            if build_result.errors:
                console.print(f"  [yellow]Warnings:[/yellow] {len(build_result.errors)} errors")

-        # Auto-generate embeddings if semantic search is available
+        # Auto-generate embeddings if the requested backend is available
        if not no_embeddings:
            try:
-                from codexlens.semantic import SEMANTIC_AVAILABLE
+                from codexlens.semantic import is_embedding_backend_available
                from codexlens.cli.embedding_manager import generate_embeddings_recursive, get_embeddings_status

-                if SEMANTIC_AVAILABLE:
+                # Validate embedding backend
+                valid_backends = ["fastembed", "litellm"]
+                if embedding_backend not in valid_backends:
+                    error_msg = f"Invalid embedding backend: {embedding_backend}. Must be one of: {', '.join(valid_backends)}"
+                    if json_mode:
+                        print_json(success=False, error=error_msg)
+                    else:
+                        console.print(f"[red]Error:[/red] {error_msg}")
+                    raise typer.Exit(code=1)
+
+                backend_available, backend_error = is_embedding_backend_available(embedding_backend)
+
+                if backend_available:
                    # Use the index root directory (not the _index.db file)
                    index_root = Path(build_result.index_root)

                    if not json_mode:
                        console.print("\n[bold]Generating embeddings...[/bold]")
+                        console.print(f"Backend: [cyan]{embedding_backend}[/cyan]")
                        console.print(f"Model: [cyan]{embedding_model}[/cyan]")
                    else:
                        # Output progress message for JSON mode (parsed by Node.js)
@@ -179,10 +217,12 @@ def init(

                    embed_result = generate_embeddings_recursive(
                        index_root,
+                        embedding_backend=embedding_backend,
                        model_profile=embedding_model,
                        force=False,  # Don't force regenerate during init
                        chunk_size=2000,
                        progress_callback=progress_update,  # Always use callback
+                        max_workers=max_workers,
                    )

                    if embed_result["success"]:
@@ -224,10 +264,10 @@ def init(
                        }
                else:
                    if not json_mode and verbose:
-                        console.print("[dim]Semantic search not available. Skipping embeddings.[/dim]")
+                        console.print(f"[dim]Embedding backend '{embedding_backend}' not available. Skipping embeddings.[/dim]")
                    result["embeddings"] = {
                        "generated": False,
-                        "error": "Semantic dependencies not installed",
+                        "error": backend_error or "Embedding backend not available",
                    }
            except Exception as e:
                if not json_mode and verbose:
@@ -242,6 +282,10 @@ def init(
                "error": "Skipped (--no-embeddings)",
            }

+        # Output final JSON result with embeddings status
+        if json_mode:
+            print_json(success=True, result=result)
+
    except StorageError as exc:
        if json_mode:
            print_json(success=False, error=f"Storage error: {exc}")
@@ -307,7 +351,7 @@ def search(
      Use 'codexlens embeddings-generate' to create embeddings first.

    Hybrid Mode:
-      Default weights: exact=0.4, fuzzy=0.3, vector=0.3
+      Default weights: exact=0.3, fuzzy=0.1, vector=0.6
      Use --weights to customize (e.g., --weights 0.5,0.3,0.2)

    Examples:
@@ -434,6 +478,7 @@ def search(
                    "path": r.path,
                    "score": r.score,
                    "excerpt": r.excerpt,
+                    "content": r.content,  # Full function/class body
                    "source": getattr(r, "search_source", None),
                    "symbol": getattr(r, "symbol", None),
                }
@@ -715,6 +760,16 @@ def status(
                console.print(f"  Coverage: {embeddings_info['coverage_percent']:.1f}%")
                console.print(f"  Total Chunks: {embeddings_info['total_chunks']}")

+                # Display model information if available
+                model_info = embeddings_info.get('model_info')
+                if model_info:
+                    console.print("\n[bold]Embedding Model:[/bold]")
+                    console.print(f"  Backend: [cyan]{model_info.get('backend', 'unknown')}[/cyan]")
+                    console.print(f"  Model: [cyan]{model_info.get('model_profile', 'unknown')}[/cyan] ({model_info.get('model_name', '')})")
+                    console.print(f"  Dimensions: {model_info.get('embedding_dim', 'unknown')}")
+                    if model_info.get('updated_at'):
+                        console.print(f"  Last Updated: {model_info['updated_at']}")
+
    except StorageError as exc:
        if json_mode:
            print_json(success=False, error=f"Storage error: {exc}")
@@ -1764,11 +1819,17 @@ def embeddings_generate(
        exists=True,
        help="Path to _index.db file or project directory.",
    ),
+    backend: str = typer.Option(
+        "fastembed",
+        "--backend",
+        "-b",
+        help="Embedding backend: fastembed (local) or litellm (remote API).",
+    ),
    model: str = typer.Option(
        "code",
        "--model",
        "-m",
-        help="Model profile: fast, code, multilingual, balanced.",
+        help="Model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small).",
    ),
    force: bool = typer.Option(
        False,
@@ -1787,6 +1848,13 @@ def embeddings_generate(
        "-r",
        help="Recursively process all _index.db files in directory tree.",
    ),
+    max_workers: int = typer.Option(
+        1,
+        "--max-workers",
+        "-w",
+        min=1,
+        help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).",
+    ),
    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."),
 ) -> None:
@@ -1796,20 +1864,48 @@ def embeddings_generate(
    semantic search capabilities. Embeddings are stored in the same
    database as the FTS index.

-    Model Profiles:
-      - fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB)
-      - code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended]
-      - multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB)
-      - balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB)
+    Embedding Backend Options:
+      - fastembed: Local ONNX-based embeddings (default, no API calls)
+      - litellm: Remote API embeddings via ccw-litellm (requires API keys)
+
+    Model Options:
+      For fastembed backend (profiles):
+        - fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB)
+        - code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended]
+        - multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB)
+        - balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB)
+
+      For litellm backend (model names):
+        - text-embedding-3-small, text-embedding-3-large (OpenAI)
+        - text-embedding-ada-002 (OpenAI legacy)
+        - Any model supported by ccw-litellm

    Examples:
-        codexlens embeddings-generate ~/projects/my-app              # Auto-find index for project
+        codexlens embeddings-generate ~/projects/my-app              # Auto-find index (fastembed, code profile)
        codexlens embeddings-generate ~/.codexlens/indexes/project/_index.db  # Specific index
-        codexlens embeddings-generate ~/projects/my-app --model fast --force  # Regenerate with fast model
+        codexlens embeddings-generate ~/projects/my-app --backend litellm --model text-embedding-3-small  # Use LiteLLM
+        codexlens embeddings-generate ~/projects/my-app --model fast --force  # Regenerate with fast profile
    """
    _configure_logging(verbose, json_mode)

-    from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive
+    from codexlens.cli.embedding_manager import (
+        generate_embeddings,
+        generate_embeddings_recursive,
+        scan_for_model_conflicts,
+        check_global_model_lock,
+        set_locked_model_config,
+    )
+
+    # Validate backend
+    valid_backends = ["fastembed", "litellm"]
+    if backend not in valid_backends:
+        error_msg = f"Invalid backend: {backend}. Must be one of: {', '.join(valid_backends)}"
+        if json_mode:
+            print_json(success=False, error=error_msg)
+        else:
+            console.print(f"[red]Error:[/red] {error_msg}")
+            console.print(f"[dim]Valid backends: {', '.join(valid_backends)}[/dim]")
+        raise typer.Exit(code=1)

    # Resolve path
    target_path = path.expanduser().resolve()
@@ -1860,23 +1956,100 @@ def embeddings_generate(
        console.print(f"Mode: [yellow]Recursive[/yellow]")
    else:
        console.print(f"Index: [dim]{index_path}[/dim]")
-    console.print(f"Model: [cyan]{model}[/cyan]\n")
+    console.print(f"Backend: [cyan]{backend}[/cyan]")
+    console.print(f"Model: [cyan]{model}[/cyan]")
+    if max_workers > 1:
+        console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]")
+    console.print()
+
+    # Check global model lock (prevents mixing different models)
+    if not force:
+        lock_result = check_global_model_lock(backend, model)
+        if lock_result["has_conflict"]:
+            locked = lock_result["locked_config"]
+            if json_mode:
+                print_json(
+                    success=False,
+                    error="Global model lock conflict",
+                    code="MODEL_LOCKED",
+                    locked_config=locked,
+                    target_config=lock_result["target_config"],
+                    hint="Use --force to override the lock and switch to a different model (will regenerate all embeddings)",
+                )
+                raise typer.Exit(code=1)
+            else:
+                console.print("[red]⛔ Global Model Lock Active[/red]")
+                console.print(f"  Locked model: [cyan]{locked['backend']}/{locked['model']}[/cyan]")
+                console.print(f"  Requested: [yellow]{backend}/{model}[/yellow]")
+                console.print(f"  Locked at: {locked.get('locked_at', 'unknown')}")
+                console.print()
+                console.print("[dim]All indexes must use the same embedding model.[/dim]")
+                console.print("[dim]Use --force to switch models (will regenerate all embeddings).[/dim]")
+                raise typer.Exit(code=1)
+
+    # Pre-check for model conflicts (only if not forcing)
+    if not force:
+        # Determine the index root for conflict scanning
+        scan_root = index_root if use_recursive else (index_path.parent if index_path else None)
+
+        if scan_root:
+            conflict_result = scan_for_model_conflicts(scan_root, backend, model)
+
+            if conflict_result["has_conflict"]:
+                existing = conflict_result["existing_config"]
+                conflict_count = len(conflict_result["conflicts"])
+
+                if json_mode:
+                    # JSON mode: return structured error for UI handling
+                    print_json(
+                        success=False,
+                        error="Model conflict detected",
+                        code="MODEL_CONFLICT",
+                        existing_config=existing,
+                        target_config=conflict_result["target_config"],
+                        conflict_count=conflict_count,
+                        conflicts=conflict_result["conflicts"][:5],  # Show first 5 conflicts
+                        hint="Use --force to overwrite existing embeddings with the new model",
+                    )
+                    raise typer.Exit(code=1)
+                else:
+                    # Interactive mode: show warning and ask for confirmation
+                    console.print("[yellow]⚠ Model Conflict Detected[/yellow]")
+                    console.print(f"  Existing: [red]{existing['backend']}/{existing['model']}[/red] ({existing.get('embedding_dim', '?')} dim)")
+                    console.print(f"  Requested: [green]{backend}/{model}[/green]")
+                    console.print(f"  Affected indexes: [yellow]{conflict_count}[/yellow]")
+                    console.print()
+                    console.print("[dim]Mixing different embedding models in the same index is not supported.[/dim]")
+                    console.print("[dim]Overwriting will delete all existing embeddings and regenerate with the new model.[/dim]")
+                    console.print()
+
+                    # Ask for confirmation
+                    if typer.confirm("Overwrite existing embeddings with the new model?", default=False):
+                        force = True
+                        console.print("[green]Confirmed.[/green] Proceeding with overwrite...\n")
+                    else:
+                        console.print("[yellow]Cancelled.[/yellow] Use --force to skip this prompt.")
+                        raise typer.Exit(code=0)

    if use_recursive:
        result = generate_embeddings_recursive(
            index_root,
+            embedding_backend=backend,
            model_profile=model,
            force=force,
            chunk_size=chunk_size,
            progress_callback=progress_update,
+            max_workers=max_workers,
        )
    else:
        result = generate_embeddings(
            index_path,
+            embedding_backend=backend,
            model_profile=model,
            force=force,
            chunk_size=chunk_size,
            progress_callback=progress_update,
+            max_workers=max_workers,
        )

    if json_mode:
@@ -1889,14 +2062,21 @@ def embeddings_generate(
            # Provide helpful hints
            if "already has" in error_msg:
                console.print("\n[dim]Use --force to regenerate existing embeddings[/dim]")
-            elif "Semantic search not available" in error_msg:
+            elif "fastembed not available" in error_msg or "Semantic search not available" in error_msg:
                console.print("\n[dim]Install semantic dependencies:[/dim]")
                console.print("  [cyan]pip install codexlens[semantic][/cyan]")
+            elif "ccw-litellm not available" in error_msg:
+                console.print("\n[dim]Install LiteLLM backend dependencies:[/dim]")
+                console.print("  [cyan]pip install ccw-litellm[/cyan]")

            raise typer.Exit(code=1)

        data = result["result"]

+        # Set global model lock after successful generation
+        # This prevents using different models for future indexes
+        set_locked_model_config(backend, model)
+
        if use_recursive:
            # Recursive mode output
            console.print(f"[green]✓[/green] Recursive embeddings generation complete!")
@@ -1938,3 +2118,178 @@ def embeddings_generate(

        console.print("\n[dim]Use vector search with:[/dim]")
        console.print("  [cyan]codexlens search 'your query' --mode pure-vector[/cyan]")
+
+
+# ==================== GPU Management Commands ====================
+
+@app.command(name="gpu-list")
+def gpu_list(
+    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
+) -> None:
+    """List available GPU devices for embedding acceleration.
+
+    Shows all detected GPU devices with their capabilities and selection status.
+    Discrete GPUs (NVIDIA, AMD) are automatically preferred over integrated GPUs.
+
+    Examples:
+        codexlens gpu-list                    # List all GPUs
+        codexlens gpu-list --json             # JSON output for scripting
+    """
+    from codexlens.semantic.gpu_support import get_gpu_devices, detect_gpu, get_selected_device_id
+
+    gpu_info = detect_gpu()
+    devices = get_gpu_devices()
+    selected_id = get_selected_device_id()
+
+    if json_mode:
+        print_json(
+            success=True,
+            result={
+                "devices": devices,
+                "selected_device_id": selected_id,
+                "gpu_available": gpu_info.gpu_available,
+                "providers": gpu_info.onnx_providers,
+            }
+        )
+    else:
+        if not devices:
+            console.print("[yellow]No GPU devices detected[/yellow]")
+            console.print(f"ONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]")
+            return
+
+        console.print("[bold]Available GPU Devices[/bold]\n")
+
+        table = Table(show_header=True, header_style="bold")
+        table.add_column("ID", justify="center")
+        table.add_column("Name")
+        table.add_column("Vendor", justify="center")
+        table.add_column("Type", justify="center")
+        table.add_column("Status", justify="center")
+
+        for dev in devices:
+            type_str = "[green]Discrete[/green]" if dev["is_discrete"] else "[dim]Integrated[/dim]"
+            vendor_color = {
+                "nvidia": "green",
+                "amd": "red",
+                "intel": "blue"
+            }.get(dev["vendor"], "white")
+            vendor_str = f"[{vendor_color}]{dev['vendor'].upper()}[/{vendor_color}]"
+
+            status_parts = []
+            if dev["is_preferred"]:
+                status_parts.append("[cyan]Auto[/cyan]")
+            if dev["is_selected"]:
+                status_parts.append("[green]✓ Selected[/green]")
+
+            status_str = " ".join(status_parts) if status_parts else "[dim]—[/dim]"
+
+            table.add_row(
+                str(dev["device_id"]),
+                dev["name"],
+                vendor_str,
+                type_str,
+                status_str,
+            )
+
+        console.print(table)
+        console.print(f"\nONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]")
+        console.print("\n[dim]Select GPU with:[/dim]")
+        console.print("  [cyan]codexlens gpu-select <device_id>[/cyan]")
+
+
+@app.command(name="gpu-select")
+def gpu_select(
+    device_id: int = typer.Argument(
+        ...,
+        help="GPU device ID to use for embeddings. Use 'codexlens gpu-list' to see available IDs.",
+    ),
+    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
+) -> None:
+    """Select a specific GPU device for embedding generation.
+
+    By default, CodexLens automatically selects the most powerful GPU (discrete over integrated).
+    Use this command to override the selection.
+
+    Examples:
+        codexlens gpu-select 1                # Use GPU device 1
+        codexlens gpu-select 0 --json         # Select GPU 0 with JSON output
+    """
+    from codexlens.semantic.gpu_support import set_selected_device_id, get_gpu_devices
+    from codexlens.semantic.embedder import clear_embedder_cache
+
+    devices = get_gpu_devices()
+    valid_ids = [dev["device_id"] for dev in devices]
+
+    if device_id not in valid_ids:
+        if json_mode:
+            print_json(success=False, error=f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
+        else:
+            console.print(f"[red]Error:[/red] Invalid device_id {device_id}")
+            console.print(f"Valid IDs: {valid_ids}")
+            console.print("\n[dim]Use 'codexlens gpu-list' to see available devices[/dim]")
+        raise typer.Exit(code=1)
+
+    success = set_selected_device_id(device_id)
+
+    if success:
+        # Clear embedder cache to force reload with new GPU
+        clear_embedder_cache()
+
+        device_name = next((dev["name"] for dev in devices if dev["device_id"] == device_id), "Unknown")
+
+        if json_mode:
+            print_json(
+                success=True,
+                result={
+                    "device_id": device_id,
+                    "device_name": device_name,
+                    "message": f"GPU selection set to device {device_id}: {device_name}",
+                }
+            )
+        else:
+            console.print(f"[green]✓[/green] GPU selection updated")
+            console.print(f"  Device ID: {device_id}")
+            console.print(f"  Device: [cyan]{device_name}[/cyan]")
+            console.print("\n[dim]New embeddings will use this GPU[/dim]")
+    else:
+        if json_mode:
+            print_json(success=False, error="Failed to set GPU selection")
+        else:
+            console.print("[red]Error:[/red] Failed to set GPU selection")
+        raise typer.Exit(code=1)
+
+
+@app.command(name="gpu-reset")
+def gpu_reset(
+    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
+) -> None:
+    """Reset GPU selection to automatic detection.
+
+    Clears any manual GPU selection and returns to automatic selection
+    (discrete GPU preferred over integrated).
+
+    Examples:
+        codexlens gpu-reset                   # Reset to auto-detection
+    """
+    from codexlens.semantic.gpu_support import set_selected_device_id, detect_gpu
+    from codexlens.semantic.embedder import clear_embedder_cache
+
+    set_selected_device_id(None)
+    clear_embedder_cache()
+
+    gpu_info = detect_gpu(force_refresh=True)
+
+    if json_mode:
+        print_json(
+            success=True,
+            result={
+                "message": "GPU selection reset to auto-detection",
+                "preferred_device_id": gpu_info.preferred_device_id,
+                "preferred_device_name": gpu_info.gpu_name,
+            }
+        )
+    else:
+        console.print("[green]✓[/green] GPU selection reset to auto-detection")
+        if gpu_info.preferred_device_id is not None:
+            console.print(f"  Auto-selected device: {gpu_info.preferred_device_id}")
+            console.print(f"  Device: [cyan]{gpu_info.gpu_name}[/cyan]")
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -1,27 +1,36 @@
 """Embedding Manager - Manage semantic embeddings for code indexes."""

 import gc
+import json
 import logging
 import sqlite3
 import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from itertools import islice
 from pathlib import Path
-from typing import Dict, Generator, List, Optional, Tuple
+from typing import Any, Dict, Generator, List, Optional, Tuple

 try:
-    from codexlens.semantic import SEMANTIC_AVAILABLE
-    if SEMANTIC_AVAILABLE:
-        from codexlens.semantic.embedder import Embedder, get_embedder, clear_embedder_cache
-        from codexlens.semantic.vector_store import VectorStore
-        from codexlens.semantic.chunker import Chunker, ChunkConfig
+    from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available
 except ImportError:
    SEMANTIC_AVAILABLE = False
+    def is_embedding_backend_available(_backend: str):  # type: ignore[no-redef]
+        return False, "codexlens.semantic not available"

 logger = logging.getLogger(__name__)

 # Embedding batch size - larger values improve throughput on modern hardware
 # Benchmark: 256 gives ~2.35x speedup over 64 with DirectML GPU acceleration
-EMBEDDING_BATCH_SIZE = 256  # Optimized from 64 based on batch size benchmarks
+EMBEDDING_BATCH_SIZE = 256
+
+
+def _cleanup_fastembed_resources() -> None:
+    """Best-effort cleanup for fastembed/ONNX resources (no-op for other backends)."""
+    try:
+        from codexlens.semantic.embedder import clear_embedder_cache
+        clear_embedder_cache()
+    except Exception:
+        pass


 def _generate_chunks_from_cursor(
@@ -79,6 +88,44 @@ def _generate_chunks_from_cursor(
                failed_files.append((file_path, str(e)))


+def _create_token_aware_batches(
+    chunk_generator: Generator,
+    max_tokens_per_batch: int = 8000,
+) -> Generator[List[Tuple], None, None]:
+    """Group chunks by total token count instead of fixed count.
+
+    Uses fast token estimation (len(content) // 4) for efficiency.
+    Yields batches when approaching the token limit.
+
+    Args:
+        chunk_generator: Generator yielding (chunk, file_path) tuples
+        max_tokens_per_batch: Maximum tokens per batch (default: 8000)
+
+    Yields:
+        List of (chunk, file_path) tuples representing a batch
+    """
+    current_batch = []
+    current_tokens = 0
+
+    for chunk, file_path in chunk_generator:
+        # Fast token estimation: len(content) // 4
+        chunk_tokens = len(chunk.content) // 4
+
+        # If adding this chunk would exceed limit and we have items, yield current batch
+        if current_tokens + chunk_tokens > max_tokens_per_batch and current_batch:
+            yield current_batch
+            current_batch = []
+            current_tokens = 0
+
+        # Add chunk to current batch
+        current_batch.append((chunk, file_path))
+        current_tokens += chunk_tokens
+
+    # Yield final batch if not empty
+    if current_batch:
+        yield current_batch
+
+
 def _get_path_column(conn: sqlite3.Connection) -> str:
    """Detect whether files table uses 'path' or 'full_path' column.

@@ -189,33 +236,110 @@ def check_index_embeddings(index_path: Path) -> Dict[str, any]:
        }


+def _get_embedding_defaults() -> tuple[str, str, bool, List, str, float]:
+    """Get default embedding settings from config.
+
+    Returns:
+        Tuple of (backend, model, use_gpu, endpoints, strategy, cooldown)
+    """
+    try:
+        from codexlens.config import Config
+        config = Config.load()
+        return (
+            config.embedding_backend,
+            config.embedding_model,
+            config.embedding_use_gpu,
+            config.embedding_endpoints,
+            config.embedding_strategy,
+            config.embedding_cooldown,
+        )
+    except Exception:
+        return "fastembed", "code", True, [], "latency_aware", 60.0
+
+
 def generate_embeddings(
    index_path: Path,
-    model_profile: str = "code",
+    embedding_backend: Optional[str] = None,
+    model_profile: Optional[str] = None,
    force: bool = False,
    chunk_size: int = 2000,
+    overlap: int = 200,
    progress_callback: Optional[callable] = None,
+    use_gpu: Optional[bool] = None,
+    max_tokens_per_batch: Optional[int] = None,
+    max_workers: Optional[int] = None,
+    endpoints: Optional[List] = None,
+    strategy: Optional[str] = None,
+    cooldown: Optional[float] = None,
 ) -> Dict[str, any]:
    """Generate embeddings for an index using memory-efficient batch processing.

    This function processes files in small batches to keep memory usage under 2GB,
-    regardless of the total project size.
+    regardless of the total project size. Supports concurrent API calls for
+    LiteLLM backend to improve throughput.

    Args:
        index_path: Path to _index.db file
-        model_profile: Model profile (fast, code, multilingual, balanced)
+        embedding_backend: Embedding backend to use (fastembed or litellm).
+                          Defaults to config setting.
+        model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
+                      or model name for litellm (e.g., qwen3-embedding).
+                      Defaults to config setting.
        force: If True, regenerate even if embeddings exist
        chunk_size: Maximum chunk size in characters
+        overlap: Overlap size in characters for sliding window chunking (default: 200)
        progress_callback: Optional callback for progress updates
+        use_gpu: Whether to use GPU acceleration (fastembed only).
+                Defaults to config setting.
+        max_tokens_per_batch: Maximum tokens per batch for token-aware batching.
+                             If None, attempts to get from embedder.max_tokens,
+                             then falls back to 8000. If set, overrides automatic detection.
+        max_workers: Maximum number of concurrent API calls.
+                    If None, uses dynamic defaults based on backend and endpoint count.
+        endpoints: Optional list of endpoint configurations for multi-API load balancing.
+                  Each dict has keys: model, api_key, api_base, weight.
+        strategy: Selection strategy for multi-endpoint mode (round_robin, latency_aware).
+        cooldown: Default cooldown seconds for rate-limited endpoints.

    Returns:
        Result dictionary with generation statistics
    """
-    if not SEMANTIC_AVAILABLE:
-        return {
-            "success": False,
-            "error": "Semantic search not available. Install with: pip install codexlens[semantic]",
-        }
+    # Get defaults from config if not specified
+    (default_backend, default_model, default_gpu,
+     default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
+
+    if embedding_backend is None:
+        embedding_backend = default_backend
+    if model_profile is None:
+        model_profile = default_model
+    if use_gpu is None:
+        use_gpu = default_gpu
+    if endpoints is None:
+        endpoints = default_endpoints
+    if strategy is None:
+        strategy = default_strategy
+    if cooldown is None:
+        cooldown = default_cooldown
+
+    # Calculate endpoint count for worker scaling
+    endpoint_count = len(endpoints) if endpoints else 1
+
+    # Set dynamic max_workers default based on backend type and endpoint count
+    # - FastEmbed: CPU-bound, sequential is optimal (1 worker)
+    # - LiteLLM single endpoint: 4 workers default
+    # - LiteLLM multi-endpoint: workers = endpoint_count * 2 (to saturate all APIs)
+    if max_workers is None:
+        if embedding_backend == "litellm":
+            if endpoint_count > 1:
+                max_workers = endpoint_count * 2  # No cap, scale with endpoints
+            else:
+                max_workers = 4
+        else:
+            max_workers = 1
+
+    backend_available, backend_error = is_embedding_backend_available(embedding_backend)
+    if not backend_available:
+        return {"success": False, "error": backend_error or "Embedding backend not available"}

    if not index_path.exists():
        return {
@@ -253,13 +377,43 @@ def generate_embeddings(

    # Initialize components
    try:
-        # Initialize embedder (singleton, reused throughout the function)
-        embedder = get_embedder(profile=model_profile)
+        # Import factory function to support both backends
+        from codexlens.semantic.factory import get_embedder as get_embedder_factory
+        from codexlens.semantic.vector_store import VectorStore
+        from codexlens.semantic.chunker import Chunker, ChunkConfig
+
+        # Initialize embedder using factory (supports fastembed, litellm, and rotational)
+        # For fastembed: model_profile is a profile name (fast/code/multilingual/balanced)
+        # For litellm: model_profile is a model name (e.g., qwen3-embedding)
+        # For multi-endpoint: endpoints list enables load balancing
+        if embedding_backend == "fastembed":
+            embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=use_gpu)
+        elif embedding_backend == "litellm":
+            embedder = get_embedder_factory(
+                backend="litellm",
+                model=model_profile,
+                endpoints=endpoints if endpoints else None,
+                strategy=strategy,
+                cooldown=cooldown,
+            )
+        else:
+            return {
+                "success": False,
+                "error": f"Invalid embedding backend: {embedding_backend}. Must be 'fastembed' or 'litellm'.",
+            }
+
        # skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
        # This significantly reduces CPU usage with minimal impact on metadata accuracy
-        chunker = Chunker(config=ChunkConfig(max_chunk_size=chunk_size, skip_token_count=True))
+        chunker = Chunker(config=ChunkConfig(
+            max_chunk_size=chunk_size,
+            overlap=overlap,
+            skip_token_count=True
+        ))

+        # Log embedder info with endpoint count for multi-endpoint mode
        if progress_callback:
+            if endpoint_count > 1:
+                progress_callback(f"Using {endpoint_count} API endpoints with {strategy} strategy")
            progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)")

    except Exception as e:
@@ -292,7 +446,7 @@ def generate_embeddings(

            # Set/update model configuration for this index
            vector_store.set_model_config(
-                model_profile, embedder.model_name, embedder.embedding_dim
+                model_profile, embedder.model_name, embedder.embedding_dim, backend=embedding_backend
            )
            # Use bulk insert mode for efficient batch ANN index building
            # This defers ANN updates until end_bulk_insert() is called
@@ -319,42 +473,203 @@ def generate_embeddings(
                        cursor, chunker, path_column, FILE_BATCH_SIZE, failed_files
                    )

+                    # Determine max tokens per batch
+                    # Priority: explicit parameter > embedder.max_tokens > default 8000
+                    if max_tokens_per_batch is None:
+                        max_tokens_per_batch = getattr(embedder, 'max_tokens', 8000)
+
+                    # Create token-aware batches or fall back to fixed-size batching
+                    if max_tokens_per_batch:
+                        batch_generator = _create_token_aware_batches(
+                            chunk_generator, max_tokens_per_batch
+                        )
+                    else:
+                        # Fallback to fixed-size batching for backward compatibility
+                        def fixed_size_batches():
+                            while True:
+                                batch = list(islice(chunk_generator, EMBEDDING_BATCH_SIZE))
+                                if not batch:
+                                    break
+                                yield batch
+                        batch_generator = fixed_size_batches()
+
                    batch_number = 0
                    files_seen = set()

-                    while True:
-                        # Get a small batch of chunks from the generator (EMBEDDING_BATCH_SIZE at a time)
-                        chunk_batch = list(islice(chunk_generator, EMBEDDING_BATCH_SIZE))
-                        if not chunk_batch:
-                            break
+                    def compute_embeddings_only(batch_data: Tuple[int, List[Tuple]]):
+                        """Compute embeddings for a batch (no DB write) with retry logic.

-                        batch_number += 1
+                        Args:
+                            batch_data: Tuple of (batch_number, chunk_batch)

-                        # Track unique files for progress
+                        Returns:
+                            Tuple of (batch_num, chunk_batch, embeddings_numpy, batch_files, error)
+                        """
+                        import random
+
+                        batch_num, chunk_batch = batch_data
+                        batch_files = set()
                        for _, file_path in chunk_batch:
-                            files_seen.add(file_path)
+                            batch_files.add(file_path)

-                        # Generate embeddings directly to numpy (no tolist() conversion)
-                        try:
-                            batch_contents = [chunk.content for chunk, _ in chunk_batch]
-                            embeddings_numpy = embedder.embed_to_numpy(batch_contents)
+                        max_retries = 5
+                        base_delay = 2.0

-                            # Use add_chunks_batch_numpy to avoid numpy->list->numpy roundtrip
-                            vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
+                        for attempt in range(max_retries + 1):
+                            try:
+                                batch_contents = [chunk.content for chunk, _ in chunk_batch]
+                                embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
+                                return batch_num, chunk_batch, embeddings_numpy, batch_files, None

-                            total_chunks_created += len(chunk_batch)
-                            total_files_processed = len(files_seen)
+                            except Exception as e:
+                                error_str = str(e).lower()
+                                # Check for retryable errors (rate limit, connection, backend issues)
+                                # Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors
+                                is_retryable = any(x in error_str for x in [
+                                    "429", "rate limit", "connection", "timeout",
+                                    "502", "503", "504", "service unavailable",
+                                    "500", "400", "badrequesterror", "internal server error",
+                                    "11434"  # Ollama port - indicates backend routing issue
+                                ])

-                            if progress_callback and batch_number % 10 == 0:
+                                if attempt < max_retries and is_retryable:
+                                    sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
+                                    logger.warning(f"Batch {batch_num} failed (attempt {attempt+1}/{max_retries+1}). "
+                                                   f"Retrying in {sleep_time:.1f}s. Error: {e}")
+                                    time.sleep(sleep_time)
+                                    continue
+
+                                error_msg = f"Batch {batch_num}: {str(e)}"
+                                logger.error(f"Failed to compute embeddings for batch {batch_num}: {str(e)}")
+                                return batch_num, chunk_batch, None, batch_files, error_msg
+
+                        # Should not reach here, but just in case
+                        return batch_num, chunk_batch, None, batch_files, f"Batch {batch_num}: Max retries exceeded"
+
+                    # Process batches based on max_workers setting
+                    if max_workers <= 1:
+                        # Sequential processing - stream directly from generator (no pre-materialization)
+                        for chunk_batch in batch_generator:
+                            batch_number += 1
+
+                            # Track files in this batch
+                            batch_files = set()
+                            for _, file_path in chunk_batch:
+                                batch_files.add(file_path)
+
+                            # Retry logic for transient backend errors
+                            max_retries = 5
+                            base_delay = 2.0
+                            success = False
+
+                            for attempt in range(max_retries + 1):
+                                try:
+                                    # Generate embeddings
+                                    batch_contents = [chunk.content for chunk, _ in chunk_batch]
+                                    embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
+
+                                    # Store embeddings
+                                    vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
+
+                                    files_seen.update(batch_files)
+                                    total_chunks_created += len(chunk_batch)
+                                    total_files_processed = len(files_seen)
+                                    success = True
+                                    break
+
+                                except Exception as e:
+                                    error_str = str(e).lower()
+                                    # Check for retryable errors (rate limit, connection, backend issues)
+                                    is_retryable = any(x in error_str for x in [
+                                        "429", "rate limit", "connection", "timeout",
+                                        "502", "503", "504", "service unavailable",
+                                        "500", "400", "badrequesterror", "internal server error",
+                                        "11434"  # Ollama port - indicates backend routing issue
+                                    ])
+
+                                    if attempt < max_retries and is_retryable:
+                                        import random
+                                        sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
+                                        logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). "
+                                                       f"Retrying in {sleep_time:.1f}s. Error: {e}")
+                                        time.sleep(sleep_time)
+                                        continue
+
+                                    logger.error(f"Failed to process batch {batch_number}: {str(e)}")
+                                    files_seen.update(batch_files)
+                                    break
+
+                            if success and progress_callback and batch_number % 10 == 0:
                                progress_callback(f"  Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
+                    else:
+                        # Concurrent processing - main thread iterates batches (SQLite safe),
+                        # workers compute embeddings (parallel), main thread writes to DB (serial)
+                        if progress_callback:
+                            progress_callback(f"Processing with {max_workers} concurrent embedding workers...")

-                            # Cleanup intermediate data
-                            del batch_contents, embeddings_numpy, chunk_batch
+                        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                            pending_futures = {}  # future -> (batch_num, chunk_batch)
+                            completed_batches = 0
+                            last_reported_batch = 0

-                        except Exception as e:
-                            logger.error(f"Failed to process embedding batch {batch_number}: {str(e)}")
-                            # Continue to next batch instead of failing entirely
-                            continue
+                            def process_completed_futures():
+                                """Process any completed futures and write to DB."""
+                                nonlocal total_chunks_created, total_files_processed, completed_batches, last_reported_batch
+                                done_futures = [f for f in pending_futures if f.done()]
+                                for f in done_futures:
+                                    try:
+                                        batch_num, chunk_batch, embeddings_numpy, batch_files, error = f.result()
+                                        if embeddings_numpy is not None and error is None:
+                                            # Write to DB in main thread (no contention)
+                                            vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
+                                            total_chunks_created += len(chunk_batch)
+                                        files_seen.update(batch_files)
+                                        total_files_processed = len(files_seen)
+                                        completed_batches += 1
+                                    except Exception as e:
+                                        logger.error(f"Future raised exception: {e}")
+                                        completed_batches += 1
+                                    del pending_futures[f]
+
+                                # Report progress based on completed batches (every 5 batches)
+                                if progress_callback and completed_batches >= last_reported_batch + 5:
+                                    progress_callback(f"  Batch {completed_batches}: {total_chunks_created} chunks, {total_files_processed} files")
+                                    last_reported_batch = completed_batches
+
+                            # Iterate batches in main thread (SQLite cursor is main-thread bound)
+                            for chunk_batch in batch_generator:
+                                batch_number += 1
+
+                                # Submit compute task to worker pool
+                                future = executor.submit(compute_embeddings_only, (batch_number, chunk_batch))
+                                pending_futures[future] = batch_number
+
+                                # Process any completed futures to free memory and write to DB
+                                process_completed_futures()
+
+                                # Backpressure: wait if too many pending
+                                while len(pending_futures) >= max_workers * 2:
+                                    process_completed_futures()
+                                    if len(pending_futures) >= max_workers * 2:
+                                        time.sleep(0.1)  # time is imported at module level
+
+                            # Wait for remaining futures
+                            for future in as_completed(list(pending_futures.keys())):
+                                try:
+                                    batch_num, chunk_batch, embeddings_numpy, batch_files, error = future.result()
+                                    if embeddings_numpy is not None and error is None:
+                                        vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
+                                        total_chunks_created += len(chunk_batch)
+                                    files_seen.update(batch_files)
+                                    total_files_processed = len(files_seen)
+                                    completed_batches += 1
+
+                                    # Report progress for remaining batches
+                                    if progress_callback and completed_batches >= last_reported_batch + 5:
+                                        progress_callback(f"  Batch {completed_batches}: {total_chunks_created} chunks, {total_files_processed} files")
+                                        last_reported_batch = completed_batches
+                                except Exception as e:
+                                    logger.error(f"Future raised exception: {e}")

                # Notify before ANN index finalization (happens when bulk_insert context exits)
                if progress_callback:
@@ -363,7 +678,7 @@ def generate_embeddings(
    except Exception as e:
        # Cleanup on error to prevent process hanging
        try:
-            clear_embedder_cache()
+            _cleanup_fastembed_resources()
            gc.collect()
        except Exception:
            pass
@@ -374,7 +689,7 @@ def generate_embeddings(
    # Final cleanup: release ONNX resources to allow process exit
    # This is critical - without it, ONNX Runtime threads prevent Python from exiting
    try:
-        clear_embedder_cache()
+        _cleanup_fastembed_resources()
        gc.collect()
    except Exception:
        pass
@@ -427,23 +742,76 @@ def find_all_indexes(scan_dir: Path) -> List[Path]:

 def generate_embeddings_recursive(
    index_root: Path,
-    model_profile: str = "code",
+    embedding_backend: Optional[str] = None,
+    model_profile: Optional[str] = None,
    force: bool = False,
    chunk_size: int = 2000,
+    overlap: int = 200,
    progress_callback: Optional[callable] = None,
+    use_gpu: Optional[bool] = None,
+    max_tokens_per_batch: Optional[int] = None,
+    max_workers: Optional[int] = None,
+    endpoints: Optional[List] = None,
+    strategy: Optional[str] = None,
+    cooldown: Optional[float] = None,
 ) -> Dict[str, any]:
    """Generate embeddings for all index databases in a project recursively.

    Args:
        index_root: Root index directory containing _index.db files
-        model_profile: Model profile (fast, code, multilingual, balanced)
+        embedding_backend: Embedding backend to use (fastembed or litellm).
+                          Defaults to config setting.
+        model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
+                      or model name for litellm (e.g., qwen3-embedding).
+                      Defaults to config setting.
        force: If True, regenerate even if embeddings exist
        chunk_size: Maximum chunk size in characters
+        overlap: Overlap size in characters for sliding window chunking (default: 200)
        progress_callback: Optional callback for progress updates
+        use_gpu: Whether to use GPU acceleration (fastembed only).
+                Defaults to config setting.
+        max_tokens_per_batch: Maximum tokens per batch for token-aware batching.
+                             If None, attempts to get from embedder.max_tokens,
+                             then falls back to 8000. If set, overrides automatic detection.
+        max_workers: Maximum number of concurrent API calls.
+                    If None, uses dynamic defaults based on backend and endpoint count.
+        endpoints: Optional list of endpoint configurations for multi-API load balancing.
+        strategy: Selection strategy for multi-endpoint mode.
+        cooldown: Default cooldown seconds for rate-limited endpoints.

    Returns:
        Aggregated result dictionary with generation statistics
    """
+    # Get defaults from config if not specified
+    (default_backend, default_model, default_gpu,
+     default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
+
+    if embedding_backend is None:
+        embedding_backend = default_backend
+    if model_profile is None:
+        model_profile = default_model
+    if use_gpu is None:
+        use_gpu = default_gpu
+    if endpoints is None:
+        endpoints = default_endpoints
+    if strategy is None:
+        strategy = default_strategy
+    if cooldown is None:
+        cooldown = default_cooldown
+
+    # Calculate endpoint count for worker scaling
+    endpoint_count = len(endpoints) if endpoints else 1
+
+    # Set dynamic max_workers default based on backend type and endpoint count
+    if max_workers is None:
+        if embedding_backend == "litellm":
+            if endpoint_count > 1:
+                max_workers = endpoint_count * 2  # No cap, scale with endpoints
+            else:
+                max_workers = 4
+        else:
+            max_workers = 1
+
    # Discover all _index.db files
    index_files = discover_all_index_dbs(index_root)

@@ -473,10 +841,18 @@ def generate_embeddings_recursive(

        result = generate_embeddings(
            index_path,
+            embedding_backend=embedding_backend,
            model_profile=model_profile,
            force=force,
            chunk_size=chunk_size,
+            overlap=overlap,
            progress_callback=None,  # Don't cascade callbacks
+            use_gpu=use_gpu,
+            max_tokens_per_batch=max_tokens_per_batch,
+            max_workers=max_workers,
+            endpoints=endpoints,
+            strategy=strategy,
+            cooldown=cooldown,
        )

        all_results.append({
@@ -497,9 +873,8 @@ def generate_embeddings_recursive(
    # Final cleanup after processing all indexes
    # Each generate_embeddings() call does its own cleanup, but do a final one to be safe
    try:
-        if SEMANTIC_AVAILABLE:
-            clear_embedder_cache()
-            gc.collect()
+        _cleanup_fastembed_resources()
+        gc.collect()
    except Exception:
        pass

@@ -525,7 +900,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
        index_root: Root index directory

    Returns:
-        Aggregated status with coverage statistics
+        Aggregated status with coverage statistics, model info, and timestamps
    """
    index_files = discover_all_index_dbs(index_root)

@@ -541,6 +916,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
                "coverage_percent": 0.0,
                "indexes_with_embeddings": 0,
                "indexes_without_embeddings": 0,
+                "model_info": None,
            },
        }

@@ -548,6 +924,8 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
    files_with_embeddings = 0
    total_chunks = 0
    indexes_with_embeddings = 0
+    model_info = None
+    latest_updated_at = None

    for index_path in index_files:
        status = check_index_embeddings(index_path)
@@ -559,6 +937,40 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
            if result["has_embeddings"]:
                indexes_with_embeddings += 1

+                # Get model config from first index with embeddings (they should all match)
+                if model_info is None:
+                    try:
+                        from codexlens.semantic.vector_store import VectorStore
+                        with VectorStore(index_path) as vs:
+                            config = vs.get_model_config()
+                            if config:
+                                model_info = {
+                                    "model_profile": config.get("model_profile"),
+                                    "model_name": config.get("model_name"),
+                                    "embedding_dim": config.get("embedding_dim"),
+                                    "backend": config.get("backend"),
+                                    "created_at": config.get("created_at"),
+                                    "updated_at": config.get("updated_at"),
+                                }
+                                latest_updated_at = config.get("updated_at")
+                    except Exception:
+                        pass
+                else:
+                    # Track the latest updated_at across all indexes
+                    try:
+                        from codexlens.semantic.vector_store import VectorStore
+                        with VectorStore(index_path) as vs:
+                            config = vs.get_model_config()
+                            if config and config.get("updated_at"):
+                                if latest_updated_at is None or config["updated_at"] > latest_updated_at:
+                                    latest_updated_at = config["updated_at"]
+                    except Exception:
+                        pass
+
+    # Update model_info with latest timestamp
+    if model_info and latest_updated_at:
+        model_info["updated_at"] = latest_updated_at
+
    return {
        "success": True,
        "result": {
@@ -570,6 +982,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
            "coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1),
            "indexes_with_embeddings": indexes_with_embeddings,
            "indexes_without_embeddings": len(index_files) - indexes_with_embeddings,
+            "model_info": model_info,
        },
    }

@@ -633,3 +1046,194 @@ def get_embedding_stats_summary(index_root: Path) -> Dict[str, any]:
            "indexes": index_stats,
        },
    }
+
+
+def scan_for_model_conflicts(
+    index_root: Path,
+    target_backend: str,
+    target_model: str,
+) -> Dict[str, any]:
+    """Scan for model conflicts across all indexes in a directory.
+
+    Checks if any existing embeddings were generated with a different
+    backend or model than the target configuration.
+
+    Args:
+        index_root: Root index directory to scan
+        target_backend: Target embedding backend (fastembed or litellm)
+        target_model: Target model profile/name
+
+    Returns:
+        Dictionary with:
+        - has_conflict: True if any index has different model config
+        - existing_config: Config from first index with embeddings (if any)
+        - target_config: The requested configuration
+        - conflicts: List of conflicting index paths with their configs
+        - indexes_with_embeddings: Count of indexes that have embeddings
+    """
+    index_files = discover_all_index_dbs(index_root)
+
+    if not index_files:
+        return {
+            "has_conflict": False,
+            "existing_config": None,
+            "target_config": {"backend": target_backend, "model": target_model},
+            "conflicts": [],
+            "indexes_with_embeddings": 0,
+        }
+
+    conflicts = []
+    existing_config = None
+    indexes_with_embeddings = 0
+
+    for index_path in index_files:
+        try:
+            from codexlens.semantic.vector_store import VectorStore
+
+            with VectorStore(index_path) as vs:
+                config = vs.get_model_config()
+                if config and config.get("model_profile"):
+                    indexes_with_embeddings += 1
+
+                    # Store first existing config as reference
+                    if existing_config is None:
+                        existing_config = {
+                            "backend": config.get("backend"),
+                            "model": config.get("model_profile"),
+                            "model_name": config.get("model_name"),
+                            "embedding_dim": config.get("embedding_dim"),
+                        }
+
+                    # Check for conflict: different backend OR different model
+                    existing_backend = config.get("backend", "")
+                    existing_model = config.get("model_profile", "")
+
+                    if existing_backend != target_backend or existing_model != target_model:
+                        conflicts.append({
+                            "path": str(index_path),
+                            "existing": {
+                                "backend": existing_backend,
+                                "model": existing_model,
+                                "model_name": config.get("model_name"),
+                            },
+                        })
+        except Exception as e:
+            logger.debug(f"Failed to check model config for {index_path}: {e}")
+            continue
+
+    return {
+        "has_conflict": len(conflicts) > 0,
+        "existing_config": existing_config,
+        "target_config": {"backend": target_backend, "model": target_model},
+        "conflicts": conflicts,
+        "indexes_with_embeddings": indexes_with_embeddings,
+    }
+
+
+def _get_global_settings_path() -> Path:
+    """Get the path to global embedding settings file."""
+    return Path.home() / ".codexlens" / "embedding_lock.json"
+
+
+def get_locked_model_config() -> Optional[Dict[str, Any]]:
+    """Get the globally locked embedding model configuration.
+
+    Returns:
+        Dictionary with backend and model if locked, None otherwise.
+    """
+    settings_path = _get_global_settings_path()
+    if not settings_path.exists():
+        return None
+
+    try:
+        with open(settings_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+            if data.get("locked"):
+                return {
+                    "backend": data.get("backend"),
+                    "model": data.get("model"),
+                    "locked_at": data.get("locked_at"),
+                }
+    except (json.JSONDecodeError, OSError):
+        pass
+
+    return None
+
+
+def set_locked_model_config(backend: str, model: str) -> None:
+    """Set the globally locked embedding model configuration.
+
+    This is called after the first successful embedding generation
+    to lock the model for all future operations.
+
+    Args:
+        backend: Embedding backend (fastembed or litellm)
+        model: Model profile/name
+    """
+    import datetime
+
+    settings_path = _get_global_settings_path()
+    settings_path.parent.mkdir(parents=True, exist_ok=True)
+
+    data = {
+        "locked": True,
+        "backend": backend,
+        "model": model,
+        "locked_at": datetime.datetime.now().isoformat(),
+    }
+
+    with open(settings_path, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2)
+
+
+def clear_locked_model_config() -> bool:
+    """Clear the globally locked embedding model configuration.
+
+    Returns:
+        True if lock was cleared, False if no lock existed.
+    """
+    settings_path = _get_global_settings_path()
+    if settings_path.exists():
+        settings_path.unlink()
+        return True
+    return False
+
+
+def check_global_model_lock(
+    target_backend: str,
+    target_model: str,
+) -> Dict[str, Any]:
+    """Check if the target model conflicts with the global lock.
+
+    Args:
+        target_backend: Requested embedding backend
+        target_model: Requested model profile/name
+
+    Returns:
+        Dictionary with:
+        - is_locked: True if a global lock exists
+        - has_conflict: True if target differs from locked config
+        - locked_config: The locked configuration (if any)
+        - target_config: The requested configuration
+    """
+    locked_config = get_locked_model_config()
+
+    if locked_config is None:
+        return {
+            "is_locked": False,
+            "has_conflict": False,
+            "locked_config": None,
+            "target_config": {"backend": target_backend, "model": target_model},
+        }
+
+    has_conflict = (
+        locked_config["backend"] != target_backend or
+        locked_config["model"] != target_model
+    )
+
+    return {
+        "is_locked": True,
+        "has_conflict": has_conflict,
+        "locked_config": locked_config,
+        "target_config": {"backend": target_backend, "model": target_model},
+    }
--- a/codex-lens/src/codexlens/cli/model_manager.py
+++ b/codex-lens/src/codexlens/cli/model_manager.py
@@ -79,36 +79,38 @@ def get_cache_dir() -> Path:
    """Get fastembed cache directory.

    Returns:
-        Path to cache directory (usually ~/.cache/fastembed or %LOCALAPPDATA%\\Temp\\fastembed_cache)
+        Path to cache directory (~/.cache/huggingface or custom path)
    """
    # Check HF_HOME environment variable first
    if "HF_HOME" in os.environ:
        return Path(os.environ["HF_HOME"])

-    # Default cache locations
-    if os.name == "nt":  # Windows
-        cache_dir = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) / "Temp" / "fastembed_cache"
-    else:  # Unix-like
-        cache_dir = Path.home() / ".cache" / "fastembed"
-
-    return cache_dir
+    # fastembed 0.7.4+ uses HuggingFace cache when cache_dir is specified
+    # Models are stored directly under the cache directory
+    return Path.home() / ".cache" / "huggingface"


 def _get_model_cache_path(cache_dir: Path, info: Dict) -> Path:
    """Get the actual cache path for a model.

-    fastembed uses ONNX versions of models with different names than the original.
-    This function returns the correct path based on the cache_name field.
+    fastembed 0.7.4+ uses HuggingFace Hub's naming convention:
+    - Prefix: 'models--'
+    - Replace '/' with '--' in model name
+    Example: jinaai/jina-embeddings-v2-base-code
+             -> models--jinaai--jina-embeddings-v2-base-code

    Args:
-        cache_dir: The fastembed cache directory
+        cache_dir: The fastembed cache directory (HuggingFace hub path)
        info: Model profile info dictionary

    Returns:
        Path to the model cache directory
    """
-    cache_name = info.get("cache_name", info["model_name"])
-    return cache_dir / f"models--{cache_name.replace('/', '--')}"
+    # HuggingFace Hub naming: models--{org}--{model}
+    # Use cache_name if available (for mapped ONNX models), else model_name
+    target_name = info.get("cache_name", info["model_name"])
+    sanitized_name = f"models--{target_name.replace('/', '--')}"
+    return cache_dir / sanitized_name


 def list_models() -> Dict[str, any]:
@@ -194,18 +196,29 @@ def download_model(profile: str, progress_callback: Optional[callable] = None) -
    model_name = info["model_name"]

    try:
-        # Download model by instantiating TextEmbedding
-        # This will automatically download to cache if not present
+        # Get cache directory
+        cache_dir = get_cache_dir()
+
+        # Download model by instantiating TextEmbedding with explicit cache_dir
+        # This ensures fastembed uses the correct HuggingFace Hub cache location
        if progress_callback:
            progress_callback(f"Downloading {model_name}...")

-        embedder = TextEmbedding(model_name=model_name)
+        # CRITICAL: Must specify cache_dir to use HuggingFace cache
+        # and call embed() to trigger actual download
+        embedder = TextEmbedding(model_name=model_name, cache_dir=str(cache_dir))
+
+        # Trigger actual download by calling embed
+        # TextEmbedding.__init__ alone doesn't download files
+        if progress_callback:
+            progress_callback(f"Initializing {model_name}...")
+
+        list(embedder.embed(["test"]))  # Trigger download

        if progress_callback:
            progress_callback(f"Model {model_name} downloaded successfully")

-        # Get cache info using correct cache_name
-        cache_dir = get_cache_dir()
+        # Get cache info using correct HuggingFace Hub path
        model_cache_path = _get_model_cache_path(cache_dir, info)

        cache_size = 0
--- a/codex-lens/src/codexlens/cli/output.py
+++ b/codex-lens/src/codexlens/cli/output.py
@@ -35,12 +35,23 @@ def _to_jsonable(value: Any) -> Any:
    return value


-def print_json(*, success: bool, result: Any = None, error: str | None = None) -> None:
+def print_json(*, success: bool, result: Any = None, error: str | None = None, **kwargs: Any) -> None:
+    """Print JSON output with optional additional fields.
+
+    Args:
+        success: Whether the operation succeeded
+        result: Result data (used when success=True)
+        error: Error message (used when success=False)
+        **kwargs: Additional fields to include in the payload (e.g., code, details)
+    """
    payload: dict[str, Any] = {"success": success}
    if success:
        payload["result"] = _to_jsonable(result)
    else:
        payload["error"] = error or "Unknown error"
+        # Include additional error details if provided
+        for key, value in kwargs.items():
+            payload[key] = _to_jsonable(value)
    console.print_json(json.dumps(payload, ensure_ascii=False))


--- a/codex-lens/src/codexlens/config.py
+++ b/codex-lens/src/codexlens/config.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import json
 import os
 from dataclasses import dataclass, field
 from functools import cached_property
@@ -14,6 +15,9 @@ from .errors import ConfigError
 # Workspace-local directory name
 WORKSPACE_DIR_NAME = ".codexlens"

+# Settings file name
+SETTINGS_FILE_NAME = "settings.json"
+

 def _default_global_dir() -> Path:
    """Get global CodexLens data directory."""
@@ -89,6 +93,19 @@ class Config:
    # Hybrid chunker configuration
    hybrid_max_chunk_size: int = 2000  # Max characters per chunk before LLM refinement
    hybrid_llm_refinement: bool = False  # Enable LLM-based semantic boundary refinement
+
+    # Embedding configuration
+    embedding_backend: str = "fastembed"  # "fastembed" (local) or "litellm" (API)
+    embedding_model: str = "code"  # For fastembed: profile (fast/code/multilingual/balanced)
+                                   # For litellm: model name from config (e.g., "qwen3-embedding")
+    embedding_use_gpu: bool = True  # For fastembed: whether to use GPU acceleration
+
+    # Multi-endpoint configuration for litellm backend
+    embedding_endpoints: List[Dict[str, Any]] = field(default_factory=list)
+    # List of endpoint configs: [{"model": "...", "api_key": "...", "api_base": "...", "weight": 1.0}]
+    embedding_strategy: str = "latency_aware"  # round_robin, latency_aware, weighted_random
+    embedding_cooldown: float = 60.0  # Default cooldown seconds for rate-limited endpoints
+
    def __post_init__(self) -> None:
        try:
            self.data_dir = self.data_dir.expanduser().resolve()
@@ -133,6 +150,82 @@ class Config:
        """Get parsing rules for a specific language, falling back to defaults."""
        return {**self.parsing_rules.get("default", {}), **self.parsing_rules.get(language_id, {})}

+    @cached_property
+    def settings_path(self) -> Path:
+        """Path to the settings file."""
+        return self.data_dir / SETTINGS_FILE_NAME
+
+    def save_settings(self) -> None:
+        """Save embedding and other settings to file."""
+        embedding_config = {
+            "backend": self.embedding_backend,
+            "model": self.embedding_model,
+            "use_gpu": self.embedding_use_gpu,
+        }
+        # Include multi-endpoint config if present
+        if self.embedding_endpoints:
+            embedding_config["endpoints"] = self.embedding_endpoints
+            embedding_config["strategy"] = self.embedding_strategy
+            embedding_config["cooldown"] = self.embedding_cooldown
+
+        settings = {
+            "embedding": embedding_config,
+            "llm": {
+                "enabled": self.llm_enabled,
+                "tool": self.llm_tool,
+                "timeout_ms": self.llm_timeout_ms,
+                "batch_size": self.llm_batch_size,
+            },
+        }
+        with open(self.settings_path, "w", encoding="utf-8") as f:
+            json.dump(settings, f, indent=2)
+
+    def load_settings(self) -> None:
+        """Load settings from file if exists."""
+        if not self.settings_path.exists():
+            return
+
+        try:
+            with open(self.settings_path, "r", encoding="utf-8") as f:
+                settings = json.load(f)
+
+            # Load embedding settings
+            embedding = settings.get("embedding", {})
+            if "backend" in embedding:
+                self.embedding_backend = embedding["backend"]
+            if "model" in embedding:
+                self.embedding_model = embedding["model"]
+            if "use_gpu" in embedding:
+                self.embedding_use_gpu = embedding["use_gpu"]
+
+            # Load multi-endpoint configuration
+            if "endpoints" in embedding:
+                self.embedding_endpoints = embedding["endpoints"]
+            if "strategy" in embedding:
+                self.embedding_strategy = embedding["strategy"]
+            if "cooldown" in embedding:
+                self.embedding_cooldown = embedding["cooldown"]
+
+            # Load LLM settings
+            llm = settings.get("llm", {})
+            if "enabled" in llm:
+                self.llm_enabled = llm["enabled"]
+            if "tool" in llm:
+                self.llm_tool = llm["tool"]
+            if "timeout_ms" in llm:
+                self.llm_timeout_ms = llm["timeout_ms"]
+            if "batch_size" in llm:
+                self.llm_batch_size = llm["batch_size"]
+        except Exception:
+            pass  # Silently ignore errors
+
+    @classmethod
+    def load(cls) -> "Config":
+        """Load config with settings from file."""
+        config = cls()
+        config.load_settings()
+        return config
+

@dataclass
 class WorkspaceConfig:
--- a/codex-lens/src/codexlens/search/chain_search.py
+++ b/codex-lens/src/codexlens/search/chain_search.py
@@ -494,9 +494,13 @@ class ChainSearchEngine:
                    else:
                        # Use fuzzy FTS if enable_fuzzy=True (mode="fuzzy"), otherwise exact FTS
                        if enable_fuzzy:
-                            fts_results = store.search_fts_fuzzy(query, limit=limit)
+                            fts_results = store.search_fts_fuzzy(
+                                query, limit=limit, return_full_content=True
+                            )
                        else:
-                            fts_results = store.search_fts(query, limit=limit)
+                            fts_results = store.search_fts_exact(
+                                query, limit=limit, return_full_content=True
+                            )

                    # Optionally add semantic keyword results
                    if include_semantic:
--- a/codex-lens/src/codexlens/search/hybrid_search.py
+++ b/codex-lens/src/codexlens/search/hybrid_search.py
@@ -27,11 +27,11 @@ class HybridSearchEngine:
        default_weights: Default RRF weights for each source
    """

-    # Default RRF weights (exact: 40%, fuzzy: 30%, vector: 30%)
+    # Default RRF weights (vector: 60%, exact: 30%, fuzzy: 10%)
    DEFAULT_WEIGHTS = {
-        "exact": 0.4,
-        "fuzzy": 0.3,
-        "vector": 0.3,
+        "exact": 0.3,
+        "fuzzy": 0.1,
+        "vector": 0.6,
    }

    def __init__(self, weights: Optional[Dict[str, float]] = None):
@@ -200,7 +200,9 @@ class HybridSearchEngine:
        """
        try:
            with DirIndexStore(index_path) as store:
-                return store.search_fts_exact(query, limit=limit)
+                return store.search_fts_exact(
+                    query, limit=limit, return_full_content=True
+                )
        except Exception as exc:
            self.logger.debug("Exact search error: %s", exc)
            return []
@@ -220,7 +222,9 @@ class HybridSearchEngine:
        """
        try:
            with DirIndexStore(index_path) as store:
-                return store.search_fts_fuzzy(query, limit=limit)
+                return store.search_fts_fuzzy(
+                    query, limit=limit, return_full_content=True
+                )
        except Exception as exc:
            self.logger.debug("Fuzzy search error: %s", exc)
            return []
@@ -260,7 +264,7 @@ class HybridSearchEngine:
                return []

            # Initialize embedder and vector store
-            from codexlens.semantic.embedder import get_embedder
+            from codexlens.semantic.factory import get_embedder
            from codexlens.semantic.vector_store import VectorStore

            vector_store = VectorStore(index_path)
@@ -277,32 +281,51 @@ class HybridSearchEngine:
            # Get stored model configuration (preferred) or auto-detect from dimension
            model_config = vector_store.get_model_config()
            if model_config:
-                profile = model_config["model_profile"]
+                backend = model_config.get("backend", "fastembed")
+                model_name = model_config["model_name"]
+                model_profile = model_config["model_profile"]
                self.logger.debug(
-                    "Using stored model config: %s (%s, %dd)",
-                    profile, model_config["model_name"], model_config["embedding_dim"]
+                    "Using stored model config: %s backend, %s (%s, %dd)",
+                    backend, model_profile, model_name, model_config["embedding_dim"]
                )
+                
+                # Get embedder based on backend
+                if backend == "litellm":
+                    embedder = get_embedder(backend="litellm", model=model_name)
+                else:
+                    embedder = get_embedder(backend="fastembed", profile=model_profile)
            else:
                # Fallback: auto-detect from embedding dimension
                detected_dim = vector_store.dimension
                if detected_dim is None:
                    self.logger.info("Vector store dimension unknown, using default profile")
-                    profile = "code"  # Default fallback
+                    embedder = get_embedder(backend="fastembed", profile="code")
                elif detected_dim == 384:
-                    profile = "fast"
+                    embedder = get_embedder(backend="fastembed", profile="fast")
                elif detected_dim == 768:
-                    profile = "code"
+                    embedder = get_embedder(backend="fastembed", profile="code")
                elif detected_dim == 1024:
-                    profile = "multilingual"  # or balanced, both are 1024
+                    embedder = get_embedder(backend="fastembed", profile="multilingual")
+                elif detected_dim == 1536:
+                    # Likely OpenAI text-embedding-3-small or ada-002
+                    self.logger.info(
+                        "Detected 1536-dim embeddings (likely OpenAI), using litellm backend with text-embedding-3-small"
+                    )
+                    embedder = get_embedder(backend="litellm", model="text-embedding-3-small")
+                elif detected_dim == 3072:
+                    # Likely OpenAI text-embedding-3-large
+                    self.logger.info(
+                        "Detected 3072-dim embeddings (likely OpenAI), using litellm backend with text-embedding-3-large"
+                    )
+                    embedder = get_embedder(backend="litellm", model="text-embedding-3-large")
                else:
-                    profile = "code"  # Default fallback
-                self.logger.debug(
-                    "No stored model config, auto-detected profile '%s' from dimension %s",
-                    profile, detected_dim
-                )
+                    self.logger.debug(
+                        "Unknown dimension %s, using default fastembed profile 'code'",
+                        detected_dim
+                    )
+                    embedder = get_embedder(backend="fastembed", profile="code")
+

-            # Use cached embedder (singleton) for performance
-            embedder = get_embedder(profile=profile)

            # Generate query embedding
            query_embedding = embedder.embed_single(query)
--- a/codex-lens/src/codexlens/search/ranking.py
+++ b/codex-lens/src/codexlens/search/ranking.py
@@ -25,7 +25,7 @@ def reciprocal_rank_fusion(
        results_map: Dictionary mapping source name to list of SearchResult objects
                     Sources: 'exact', 'fuzzy', 'vector'
        weights: Dictionary mapping source name to weight (default: equal weights)
-                 Example: {'exact': 0.4, 'fuzzy': 0.3, 'vector': 0.3}
+                 Example: {'exact': 0.3, 'fuzzy': 0.1, 'vector': 0.6}
        k: Constant to avoid division by zero and control rank influence (default 60)

    Returns:
--- a/codex-lens/src/codexlens/semantic/init.py
+++ b/codex-lens/src/codexlens/semantic/init.py
@@ -14,6 +14,7 @@ from __future__ import annotations
 SEMANTIC_AVAILABLE = False
 SEMANTIC_BACKEND: str | None = None
 GPU_AVAILABLE = False
+LITELLM_AVAILABLE = False
 _import_error: str | None = None


@@ -67,10 +68,51 @@ def check_gpu_available() -> tuple[bool, str]:
        return False, "GPU support module not available"


+# Export embedder components
+# BaseEmbedder is always available (abstract base class)
+from .base import BaseEmbedder
+
+# Factory function for creating embedders
+from .factory import get_embedder as get_embedder_factory
+
+# Optional: LiteLLMEmbedderWrapper (only if ccw-litellm is installed)
+try:
+    import ccw_litellm  # noqa: F401
+    from .litellm_embedder import LiteLLMEmbedderWrapper
+    LITELLM_AVAILABLE = True
+except ImportError:
+    LiteLLMEmbedderWrapper = None
+    LITELLM_AVAILABLE = False
+
+
+def is_embedding_backend_available(backend: str) -> tuple[bool, str | None]:
+    """Check whether a specific embedding backend can be used.
+
+    Notes:
+    - "fastembed" requires the optional semantic deps (pip install codexlens[semantic]).
+    - "litellm" requires ccw-litellm to be installed in the same environment.
+    """
+    backend = (backend or "").strip().lower()
+    if backend == "fastembed":
+        if SEMANTIC_AVAILABLE:
+            return True, None
+        return False, _import_error or "fastembed not available. Install with: pip install codexlens[semantic]"
+    if backend == "litellm":
+        if LITELLM_AVAILABLE:
+            return True, None
+        return False, "ccw-litellm not available. Install with: pip install ccw-litellm"
+    return False, f"Invalid embedding backend: {backend}. Must be 'fastembed' or 'litellm'."
+
+
 __all__ = [
    "SEMANTIC_AVAILABLE",
    "SEMANTIC_BACKEND",
    "GPU_AVAILABLE",
+    "LITELLM_AVAILABLE",
    "check_semantic_available",
+    "is_embedding_backend_available",
    "check_gpu_available",
+    "BaseEmbedder",
+    "get_embedder_factory",
+    "LiteLLMEmbedderWrapper",
 ]
--- a/codex-lens/src/codexlens/semantic/base.py
+++ b/codex-lens/src/codexlens/semantic/base.py
@@ -0,0 +1,61 @@
+"""Base class for embedders.
+
+Defines the interface that all embedders must implement.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Iterable
+
+import numpy as np
+
+
+class BaseEmbedder(ABC):
+    """Base class for all embedders.
+
+    All embedder implementations must inherit from this class and implement
+    the abstract methods to ensure a consistent interface.
+    """
+
+    @property
+    @abstractmethod
+    def embedding_dim(self) -> int:
+        """Return embedding dimensions.
+
+        Returns:
+            int: Dimension of the embedding vectors.
+        """
+        ...
+
+    @property
+    @abstractmethod
+    def model_name(self) -> str:
+        """Return model name.
+
+        Returns:
+            str: Name or identifier of the underlying model.
+        """
+        ...
+
+    @property
+    def max_tokens(self) -> int:
+        """Return maximum token limit for embeddings.
+
+        Returns:
+            int: Maximum number of tokens that can be embedded at once.
+                Default is 8192 if not overridden by implementation.
+        """
+        return 8192
+
+    @abstractmethod
+    def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
+        """Embed texts to numpy array.
+
+        Args:
+            texts: Single text or iterable of texts to embed.
+
+        Returns:
+            numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
+        """
+        ...
--- a/codex-lens/src/codexlens/semantic/chunker.py
+++ b/codex-lens/src/codexlens/semantic/chunker.py
@@ -39,7 +39,7 @@ from codexlens.parsers.tokenizer import get_default_tokenizer
 class ChunkConfig:
    """Configuration for chunking strategies."""
    max_chunk_size: int = 1000  # Max characters per chunk
-    overlap: int = 100  # Overlap for sliding window
+    overlap: int = 200  # Overlap for sliding window (increased from 100 for better context)
    strategy: str = "auto"  # Chunking strategy: auto, symbol, sliding_window, hybrid
    min_chunk_size: int = 50  # Minimum chunk size
    skip_token_count: bool = False  # Skip expensive token counting (use char/4 estimate)
@@ -80,6 +80,7 @@ class Chunker:
        """Chunk code by extracted symbols (functions, classes).

        Each symbol becomes one chunk with its full content.
+        Large symbols exceeding max_chunk_size are recursively split using sliding window.

        Args:
            content: Source code content
@@ -101,27 +102,49 @@ class Chunker:
            if len(chunk_content.strip()) < self.config.min_chunk_size:
                continue

-            # Calculate token count if not provided
-            token_count = None
-            if symbol_token_counts and symbol.name in symbol_token_counts:
-                token_count = symbol_token_counts[symbol.name]
-            else:
-                token_count = self._estimate_token_count(chunk_content)
+            # Check if symbol content exceeds max_chunk_size
+            if len(chunk_content) > self.config.max_chunk_size:
+                # Create line mapping for correct line number tracking
+                line_mapping = list(range(start_line, end_line + 1))

-            chunks.append(SemanticChunk(
-                content=chunk_content,
-                embedding=None,
-                metadata={
-                    "file": str(file_path),
-                    "language": language,
-                    "symbol_name": symbol.name,
-                    "symbol_kind": symbol.kind,
-                    "start_line": start_line,
-                    "end_line": end_line,
-                    "strategy": "symbol",
-                    "token_count": token_count,
-                }
-            ))
+                # Use sliding window to split large symbol
+                sub_chunks = self.chunk_sliding_window(
+                    chunk_content,
+                    file_path=file_path,
+                    language=language,
+                    line_mapping=line_mapping
+                )
+
+                # Update sub_chunks with parent symbol metadata
+                for sub_chunk in sub_chunks:
+                    sub_chunk.metadata["symbol_name"] = symbol.name
+                    sub_chunk.metadata["symbol_kind"] = symbol.kind
+                    sub_chunk.metadata["strategy"] = "symbol_split"
+                    sub_chunk.metadata["parent_symbol_range"] = (start_line, end_line)
+
+                chunks.extend(sub_chunks)
+            else:
+                # Calculate token count if not provided
+                token_count = None
+                if symbol_token_counts and symbol.name in symbol_token_counts:
+                    token_count = symbol_token_counts[symbol.name]
+                else:
+                    token_count = self._estimate_token_count(chunk_content)
+
+                chunks.append(SemanticChunk(
+                    content=chunk_content,
+                    embedding=None,
+                    metadata={
+                        "file": str(file_path),
+                        "language": language,
+                        "symbol_name": symbol.name,
+                        "symbol_kind": symbol.kind,
+                        "start_line": start_line,
+                        "end_line": end_line,
+                        "strategy": "symbol",
+                        "token_count": token_count,
+                    }
+                ))

        return chunks

--- a/codex-lens/src/codexlens/semantic/embedder.py
+++ b/codex-lens/src/codexlens/semantic/embedder.py
@@ -14,7 +14,8 @@ from typing import Dict, Iterable, List, Optional
 import numpy as np

 from . import SEMANTIC_AVAILABLE
-from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary
+from .base import BaseEmbedder
+from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary, get_selected_device_id

 logger = logging.getLogger(__name__)

@@ -84,7 +85,7 @@ def clear_embedder_cache() -> None:
        gc.collect()


-class Embedder:
+class Embedder(BaseEmbedder):
    """Generate embeddings for code chunks using fastembed (ONNX-based).

    Supported Model Profiles:
@@ -138,25 +139,58 @@ class Embedder:

        # Resolve model name from profile or use explicit name
        if model_name:
-            self.model_name = model_name
+            self._model_name = model_name
        elif profile and profile in self.MODELS:
-            self.model_name = self.MODELS[profile]
+            self._model_name = self.MODELS[profile]
        else:
-            self.model_name = self.DEFAULT_MODEL
+            self._model_name = self.DEFAULT_MODEL

-        # Configure ONNX execution providers
+        # Configure ONNX execution providers with device_id options for GPU selection
+        # Using with_device_options=True ensures DirectML/CUDA device_id is passed correctly
        if providers is not None:
            self._providers = providers
        else:
-            self._providers = get_optimal_providers(use_gpu=use_gpu)
+            self._providers = get_optimal_providers(use_gpu=use_gpu, with_device_options=True)

        self._use_gpu = use_gpu
        self._model = None

+    @property
+    def model_name(self) -> str:
+        """Get model name."""
+        return self._model_name
+
    @property
    def embedding_dim(self) -> int:
        """Get embedding dimension for current model."""
-        return self.MODEL_DIMS.get(self.model_name, 768)  # Default to 768 if unknown
+        return self.MODEL_DIMS.get(self._model_name, 768)  # Default to 768 if unknown
+
+    @property
+    def max_tokens(self) -> int:
+        """Get maximum token limit for current model.
+
+        Returns:
+            int: Maximum number of tokens based on model profile.
+                - fast: 512 (lightweight, optimized for speed)
+                - code: 8192 (code-optimized, larger context)
+                - multilingual: 512 (standard multilingual model)
+                - balanced: 512 (general purpose)
+        """
+        # Determine profile from model name
+        profile = None
+        for prof, model in self.MODELS.items():
+            if model == self._model_name:
+                profile = prof
+                break
+
+        # Return token limit based on profile
+        if profile == "code":
+            return 8192
+        elif profile in ("fast", "multilingual", "balanced"):
+            return 512
+        else:
+            # Default for unknown models
+            return 512

    @property
    def providers(self) -> List[str]:
@@ -168,7 +202,12 @@ class Embedder:
        """Check if GPU acceleration is enabled for this embedder."""
        gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
                        "DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
-        return any(p in gpu_providers for p in self._providers)
+        # Handle both string providers and tuple providers (name, options)
+        for p in self._providers:
+            provider_name = p[0] if isinstance(p, tuple) else p
+            if provider_name in gpu_providers:
+                return True
+        return False

    def _load_model(self) -> None:
        """Lazy load the embedding model with configured providers."""
@@ -177,7 +216,9 @@ class Embedder:

        from fastembed import TextEmbedding

-        # fastembed supports 'providers' parameter for ONNX execution providers
+        # providers already include device_id options via get_optimal_providers(with_device_options=True)
+        # DO NOT pass device_ids separately - fastembed ignores it when providers is specified
+        # See: fastembed/text/onnx_embedding.py - device_ids is only used with cuda=True
        try:
            self._model = TextEmbedding(
                model_name=self.model_name,
@@ -215,7 +256,7 @@ class Embedder:
        embeddings = list(self._model.embed(texts))
        return [emb.tolist() for emb in embeddings]

-    def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
+    def embed_to_numpy(self, texts: str | Iterable[str], batch_size: Optional[int] = None) -> np.ndarray:
        """Generate embeddings for one or more texts (returns numpy arrays).

        This method is more memory-efficient than embed() as it avoids converting
@@ -224,6 +265,8 @@ class Embedder:

        Args:
            texts: Single text or iterable of texts to embed.
+            batch_size: Optional batch size for fastembed processing.
+                       Larger values improve GPU utilization but use more memory.

        Returns:
            numpy.ndarray of shape (n_texts, embedding_dim) containing embeddings.
@@ -235,8 +278,12 @@ class Embedder:
        else:
            texts = list(texts)

-        # Return embeddings as numpy array directly (no .tolist() conversion)
-        embeddings = list(self._model.embed(texts))
+        # Pass batch_size to fastembed for optimal GPU utilization
+        # Default batch_size in fastembed is 256, but larger values can improve throughput
+        if batch_size is not None:
+            embeddings = list(self._model.embed(texts, batch_size=batch_size))
+        else:
+            embeddings = list(self._model.embed(texts))
        return np.array(embeddings)

    def embed_single(self, text: str) -> List[float]:
--- a/codex-lens/src/codexlens/semantic/factory.py
+++ b/codex-lens/src/codexlens/semantic/factory.py
@@ -0,0 +1,98 @@
+"""Factory for creating embedders.
+
+Provides a unified interface for instantiating different embedder backends.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional
+
+from .base import BaseEmbedder
+
+
+def get_embedder(
+    backend: str = "fastembed",
+    profile: str = "code",
+    model: str = "default",
+    use_gpu: bool = True,
+    endpoints: Optional[List[Dict[str, Any]]] = None,
+    strategy: str = "latency_aware",
+    cooldown: float = 60.0,
+    **kwargs: Any,
+) -> BaseEmbedder:
+    """Factory function to create embedder based on backend.
+
+    Args:
+        backend: Embedder backend to use. Options:
+            - "fastembed": Use fastembed (ONNX-based) embedder (default)
+            - "litellm": Use ccw-litellm embedder
+        profile: Model profile for fastembed backend ("fast", "code", "multilingual", "balanced")
+                Used only when backend="fastembed". Default: "code"
+        model: Model identifier for litellm backend.
+              Used only when backend="litellm". Default: "default"
+        use_gpu: Whether to use GPU acceleration when available (default: True).
+                Used only when backend="fastembed".
+        endpoints: Optional list of endpoint configurations for multi-endpoint load balancing.
+                  Each endpoint is a dict with keys: model, api_key, api_base, weight.
+                  Used only when backend="litellm" and multiple endpoints provided.
+        strategy: Selection strategy for multi-endpoint mode:
+                 "round_robin", "latency_aware", "weighted_random".
+                 Default: "latency_aware"
+        cooldown: Default cooldown seconds for rate-limited endpoints (default: 60.0)
+        **kwargs: Additional backend-specific arguments
+
+    Returns:
+        BaseEmbedder: Configured embedder instance
+
+    Raises:
+        ValueError: If backend is not recognized
+        ImportError: If required backend dependencies are not installed
+
+    Examples:
+        Create fastembed embedder with code profile:
+            >>> embedder = get_embedder(backend="fastembed", profile="code")
+
+        Create fastembed embedder with fast profile and CPU only:
+            >>> embedder = get_embedder(backend="fastembed", profile="fast", use_gpu=False)
+
+        Create litellm embedder:
+            >>> embedder = get_embedder(backend="litellm", model="text-embedding-3-small")
+
+        Create rotational embedder with multiple endpoints:
+            >>> endpoints = [
+            ...     {"model": "openai/text-embedding-3-small", "api_key": "sk-..."},
+            ...     {"model": "azure/my-embedding", "api_base": "https://...", "api_key": "..."},
+            ... ]
+            >>> embedder = get_embedder(backend="litellm", endpoints=endpoints)
+    """
+    if backend == "fastembed":
+        from .embedder import Embedder
+        return Embedder(profile=profile, use_gpu=use_gpu, **kwargs)
+    elif backend == "litellm":
+        # Check if multi-endpoint mode is requested
+        if endpoints and len(endpoints) > 1:
+            from .rotational_embedder import create_rotational_embedder
+            return create_rotational_embedder(
+                endpoints_config=endpoints,
+                strategy=strategy,
+                default_cooldown=cooldown,
+            )
+        elif endpoints and len(endpoints) == 1:
+            # Single endpoint in list - use it directly
+            ep = endpoints[0]
+            ep_kwargs = {**kwargs}
+            if "api_key" in ep:
+                ep_kwargs["api_key"] = ep["api_key"]
+            if "api_base" in ep:
+                ep_kwargs["api_base"] = ep["api_base"]
+            from .litellm_embedder import LiteLLMEmbedderWrapper
+            return LiteLLMEmbedderWrapper(model=ep["model"], **ep_kwargs)
+        else:
+            # No endpoints list - use model parameter
+            from .litellm_embedder import LiteLLMEmbedderWrapper
+            return LiteLLMEmbedderWrapper(model=model, **kwargs)
+    else:
+        raise ValueError(
+            f"Unknown backend: {backend}. "
+            f"Supported backends: 'fastembed', 'litellm'"
+        )
--- a/codex-lens/src/codexlens/semantic/gpu_support.py
+++ b/codex-lens/src/codexlens/semantic/gpu_support.py
@@ -13,6 +13,15 @@ from typing import List, Optional
 logger = logging.getLogger(__name__)


+@dataclass
+class GPUDevice:
+    """Individual GPU device info."""
+    device_id: int
+    name: str
+    is_discrete: bool  # True for discrete GPU (NVIDIA, AMD), False for integrated (Intel UHD)
+    vendor: str  # "nvidia", "amd", "intel", "unknown"
+
+
@dataclass
 class GPUInfo:
    """GPU availability and configuration info."""
@@ -22,15 +31,117 @@ class GPUInfo:
    gpu_count: int = 0
    gpu_name: Optional[str] = None
    onnx_providers: List[str] = None
+    devices: List[GPUDevice] = None  # List of detected GPU devices
+    preferred_device_id: Optional[int] = None  # Preferred GPU for embedding

    def __post_init__(self):
        if self.onnx_providers is None:
            self.onnx_providers = ["CPUExecutionProvider"]
+        if self.devices is None:
+            self.devices = []


 _gpu_info_cache: Optional[GPUInfo] = None


+def _enumerate_gpus() -> List[GPUDevice]:
+    """Enumerate available GPU devices using WMI on Windows.
+    
+    Returns:
+        List of GPUDevice with device info, ordered by device_id.
+    """
+    devices = []
+    
+    try:
+        import subprocess
+        import sys
+        
+        if sys.platform == "win32":
+            # Use PowerShell to query GPU information via WMI
+            cmd = [
+                "powershell", "-NoProfile", "-Command",
+                "Get-WmiObject Win32_VideoController | Select-Object DeviceID, Name, AdapterCompatibility | ConvertTo-Json"
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+            
+            if result.returncode == 0 and result.stdout.strip():
+                import json
+                gpu_data = json.loads(result.stdout)
+                
+                # Handle single GPU case (returns dict instead of list)
+                if isinstance(gpu_data, dict):
+                    gpu_data = [gpu_data]
+                
+                for idx, gpu in enumerate(gpu_data):
+                    name = gpu.get("Name", "Unknown GPU")
+                    compat = gpu.get("AdapterCompatibility", "").lower()
+                    
+                    # Determine vendor
+                    name_lower = name.lower()
+                    if "nvidia" in name_lower or "nvidia" in compat:
+                        vendor = "nvidia"
+                        is_discrete = True
+                    elif "amd" in name_lower or "radeon" in name_lower or "amd" in compat:
+                        vendor = "amd"
+                        is_discrete = True
+                    elif "intel" in name_lower or "intel" in compat:
+                        vendor = "intel"
+                        # Intel UHD/Iris are integrated, Intel Arc is discrete
+                        is_discrete = "arc" in name_lower
+                    else:
+                        vendor = "unknown"
+                        is_discrete = False
+                    
+                    devices.append(GPUDevice(
+                        device_id=idx,
+                        name=name,
+                        is_discrete=is_discrete,
+                        vendor=vendor
+                    ))
+                    logger.debug(f"Detected GPU {idx}: {name} (vendor={vendor}, discrete={is_discrete})")
+                    
+    except Exception as e:
+        logger.debug(f"GPU enumeration failed: {e}")
+    
+    return devices
+
+
+def _get_preferred_device_id(devices: List[GPUDevice]) -> Optional[int]:
+    """Determine the preferred GPU device_id for embedding.
+    
+    Preference order:
+    1. NVIDIA discrete GPU (best DirectML/CUDA support)
+    2. AMD discrete GPU
+    3. Intel Arc (discrete)
+    4. Intel integrated (fallback)
+    
+    Returns:
+        device_id of preferred GPU, or None to use default.
+    """
+    if not devices:
+        return None
+    
+    # Priority: NVIDIA > AMD > Intel Arc > Intel integrated
+    priority_order = [
+        ("nvidia", True),   # NVIDIA discrete
+        ("amd", True),      # AMD discrete
+        ("intel", True),    # Intel Arc (discrete)
+        ("intel", False),   # Intel integrated (fallback)
+    ]
+    
+    for target_vendor, target_discrete in priority_order:
+        for device in devices:
+            if device.vendor == target_vendor and device.is_discrete == target_discrete:
+                logger.info(f"Preferred GPU: {device.name} (device_id={device.device_id})")
+                return device.device_id
+    
+    # If no match, use first device
+    if devices:
+        return devices[0].device_id
+    
+    return None
+
+
 def detect_gpu(force_refresh: bool = False) -> GPUInfo:
    """Detect available GPU resources for embedding acceleration.

@@ -47,6 +158,18 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:

    info = GPUInfo()

+    # Enumerate GPU devices first
+    info.devices = _enumerate_gpus()
+    info.gpu_count = len(info.devices)
+    if info.devices:
+        # Set preferred device (discrete GPU preferred over integrated)
+        info.preferred_device_id = _get_preferred_device_id(info.devices)
+        # Set gpu_name to preferred device name
+        for dev in info.devices:
+            if dev.device_id == info.preferred_device_id:
+                info.gpu_name = dev.name
+                break
+
    # Check PyTorch CUDA availability (most reliable detection)
    try:
        import torch
@@ -143,21 +266,48 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:
    return info


-def get_optimal_providers(use_gpu: bool = True) -> List[str]:
+def get_optimal_providers(use_gpu: bool = True, with_device_options: bool = False) -> list:
    """Get optimal ONNX execution providers based on availability.

    Args:
        use_gpu: If True, include GPU providers when available.
                 If False, force CPU-only execution.
+        with_device_options: If True, return providers as tuples with device_id options
+                            for proper GPU device selection (required for DirectML).

    Returns:
-        List of provider names in priority order.
+        List of provider names or tuples (provider_name, options_dict) in priority order.
    """
    if not use_gpu:
        return ["CPUExecutionProvider"]

    gpu_info = detect_gpu()
-    return gpu_info.onnx_providers
+    
+    if not with_device_options:
+        return gpu_info.onnx_providers
+    
+    # Build providers with device_id options for GPU providers
+    device_id = get_selected_device_id()
+    providers = []
+    
+    for provider in gpu_info.onnx_providers:
+        if provider == "DmlExecutionProvider" and device_id is not None:
+            # DirectML requires device_id in provider_options tuple
+            providers.append(("DmlExecutionProvider", {"device_id": device_id}))
+            logger.debug(f"DmlExecutionProvider configured with device_id={device_id}")
+        elif provider == "CUDAExecutionProvider" and device_id is not None:
+            # CUDA also supports device_id in provider_options
+            providers.append(("CUDAExecutionProvider", {"device_id": device_id}))
+            logger.debug(f"CUDAExecutionProvider configured with device_id={device_id}")
+        elif provider == "ROCMExecutionProvider" and device_id is not None:
+            # ROCm supports device_id
+            providers.append(("ROCMExecutionProvider", {"device_id": device_id}))
+            logger.debug(f"ROCMExecutionProvider configured with device_id={device_id}")
+        else:
+            # CPU and other providers don't need device_id
+            providers.append(provider)
+    
+    return providers


 def is_gpu_available() -> bool:
@@ -190,3 +340,75 @@ def clear_gpu_cache() -> None:
    """Clear cached GPU detection info."""
    global _gpu_info_cache
    _gpu_info_cache = None
+
+
+# User-selected device ID (overrides auto-detection)
+_selected_device_id: Optional[int] = None
+
+
+def get_gpu_devices() -> List[dict]:
+    """Get list of available GPU devices for frontend selection.
+    
+    Returns:
+        List of dicts with device info for each GPU.
+    """
+    info = detect_gpu()
+    devices = []
+    
+    for dev in info.devices:
+        devices.append({
+            "device_id": dev.device_id,
+            "name": dev.name,
+            "vendor": dev.vendor,
+            "is_discrete": dev.is_discrete,
+            "is_preferred": dev.device_id == info.preferred_device_id,
+            "is_selected": dev.device_id == get_selected_device_id(),
+        })
+    
+    return devices
+
+
+def get_selected_device_id() -> Optional[int]:
+    """Get the user-selected GPU device_id.
+    
+    Returns:
+        User-selected device_id, or auto-detected preferred device_id if not set.
+    """
+    global _selected_device_id
+    
+    if _selected_device_id is not None:
+        return _selected_device_id
+    
+    # Fall back to auto-detected preferred device
+    info = detect_gpu()
+    return info.preferred_device_id
+
+
+def set_selected_device_id(device_id: Optional[int]) -> bool:
+    """Set the GPU device_id to use for embeddings.
+    
+    Args:
+        device_id: GPU device_id to use, or None to use auto-detection.
+        
+    Returns:
+        True if device_id is valid, False otherwise.
+    """
+    global _selected_device_id
+    
+    if device_id is None:
+        _selected_device_id = None
+        logger.info("GPU selection reset to auto-detection")
+        return True
+    
+    # Validate device_id exists
+    info = detect_gpu()
+    valid_ids = [dev.device_id for dev in info.devices]
+    
+    if device_id in valid_ids:
+        _selected_device_id = device_id
+        device_name = next((dev.name for dev in info.devices if dev.device_id == device_id), "Unknown")
+        logger.info(f"GPU selection set to device {device_id}: {device_name}")
+        return True
+    else:
+        logger.warning(f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
+        return False
--- a/codex-lens/src/codexlens/semantic/litellm_embedder.py
+++ b/codex-lens/src/codexlens/semantic/litellm_embedder.py
@@ -0,0 +1,144 @@
+"""LiteLLM embedder wrapper for CodexLens.
+
+Provides integration with ccw-litellm's LiteLLMEmbedder for embedding generation.
+"""
+
+from __future__ import annotations
+
+from typing import Iterable
+
+import numpy as np
+
+from .base import BaseEmbedder
+
+
+class LiteLLMEmbedderWrapper(BaseEmbedder):
+    """Wrapper for ccw-litellm LiteLLMEmbedder.
+
+    This wrapper adapts the ccw-litellm LiteLLMEmbedder to the CodexLens
+    BaseEmbedder interface, enabling seamless integration with CodexLens
+    semantic search functionality.
+
+    Args:
+        model: Model identifier for LiteLLM (default: "default")
+        **kwargs: Additional arguments passed to LiteLLMEmbedder
+
+    Raises:
+        ImportError: If ccw-litellm package is not installed
+    """
+
+    def __init__(self, model: str = "default", **kwargs) -> None:
+        """Initialize LiteLLM embedder wrapper.
+
+        Args:
+            model: Model identifier for LiteLLM (default: "default")
+            **kwargs: Additional arguments passed to LiteLLMEmbedder
+
+        Raises:
+            ImportError: If ccw-litellm package is not installed
+        """
+        try:
+            from ccw_litellm import LiteLLMEmbedder
+            self._embedder = LiteLLMEmbedder(model=model, **kwargs)
+        except ImportError as e:
+            raise ImportError(
+                "ccw-litellm not installed. Install with: pip install ccw-litellm"
+            ) from e
+
+    @property
+    def embedding_dim(self) -> int:
+        """Return embedding dimensions from LiteLLMEmbedder.
+
+        Returns:
+            int: Dimension of the embedding vectors.
+        """
+        return self._embedder.dimensions
+
+    @property
+    def model_name(self) -> str:
+        """Return model name from LiteLLMEmbedder.
+
+        Returns:
+            str: Name or identifier of the underlying model.
+        """
+        return self._embedder.model_name
+
+    @property
+    def max_tokens(self) -> int:
+        """Return maximum token limit for the embedding model.
+
+        Returns:
+            int: Maximum number of tokens that can be embedded at once.
+                Inferred from model config or model name patterns.
+        """
+        # Try to get from LiteLLM config first
+        if hasattr(self._embedder, 'max_input_tokens') and self._embedder.max_input_tokens:
+            return self._embedder.max_input_tokens
+
+        # Infer from model name
+        model_name_lower = self.model_name.lower()
+
+        # Large models (8B or "large" in name)
+        if '8b' in model_name_lower or 'large' in model_name_lower:
+            return 32768
+
+        # OpenAI text-embedding-3-* models
+        if 'text-embedding-3' in model_name_lower:
+            return 8191
+
+        # Default fallback
+        return 8192
+
+    def _sanitize_text(self, text: str) -> str:
+        """Sanitize text to work around ModelScope API routing bug.
+
+        ModelScope incorrectly routes text starting with lowercase 'import'
+        to an Ollama endpoint, causing failures. This adds a leading space
+        to work around the issue without affecting embedding quality.
+
+        Args:
+            text: Text to sanitize.
+
+        Returns:
+            Sanitized text safe for embedding API.
+        """
+        if text.startswith('import'):
+            return ' ' + text
+        return text
+
+    def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
+        """Embed texts to numpy array using LiteLLMEmbedder.
+
+        Args:
+            texts: Single text or iterable of texts to embed.
+            **kwargs: Additional arguments (ignored for LiteLLM backend).
+                      Accepts batch_size for API compatibility with fastembed.
+
+        Returns:
+            numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
+        """
+        if isinstance(texts, str):
+            texts = [texts]
+        else:
+            texts = list(texts)
+
+        # Sanitize texts to avoid ModelScope routing bug
+        texts = [self._sanitize_text(t) for t in texts]
+
+        # LiteLLM handles batching internally, ignore batch_size parameter
+        return self._embedder.embed(texts)
+
+    def embed_single(self, text: str) -> list[float]:
+        """Generate embedding for a single text.
+
+        Args:
+            text: Text to embed.
+
+        Returns:
+            list[float]: Embedding vector as a list of floats.
+        """
+        # Sanitize text before embedding
+        sanitized = self._sanitize_text(text)
+        embedding = self._embedder.embed([sanitized])
+        return embedding[0].tolist()
+
--- a/codex-lens/src/codexlens/semantic/rotational_embedder.py
+++ b/codex-lens/src/codexlens/semantic/rotational_embedder.py
@@ -0,0 +1,434 @@
+"""Rotational embedder for multi-endpoint API load balancing.
+
+Provides intelligent load balancing across multiple LiteLLM embedding endpoints
+to maximize throughput while respecting rate limits.
+"""
+
+from __future__ import annotations
+
+import logging
+import random
+import threading
+import time
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, Iterable, List, Optional
+
+import numpy as np
+
+from .base import BaseEmbedder
+
+logger = logging.getLogger(__name__)
+
+
+class EndpointStatus(Enum):
+    """Status of an API endpoint."""
+    AVAILABLE = "available"
+    COOLING = "cooling"  # Rate limited, temporarily unavailable
+    FAILED = "failed"    # Permanent failure (auth error, etc.)
+
+
+class SelectionStrategy(Enum):
+    """Strategy for selecting endpoints."""
+    ROUND_ROBIN = "round_robin"
+    LATENCY_AWARE = "latency_aware"
+    WEIGHTED_RANDOM = "weighted_random"
+
+
+@dataclass
+class EndpointConfig:
+    """Configuration for a single API endpoint."""
+    model: str
+    api_key: Optional[str] = None
+    api_base: Optional[str] = None
+    weight: float = 1.0  # Higher weight = more requests
+    max_concurrent: int = 4  # Max concurrent requests to this endpoint
+
+
+@dataclass
+class EndpointState:
+    """Runtime state for an endpoint."""
+    config: EndpointConfig
+    embedder: Any = None  # LiteLLMEmbedderWrapper instance
+    
+    # Health metrics
+    status: EndpointStatus = EndpointStatus.AVAILABLE
+    cooldown_until: float = 0.0  # Unix timestamp when cooldown ends
+    
+    # Performance metrics
+    total_requests: int = 0
+    total_failures: int = 0
+    avg_latency_ms: float = 0.0
+    last_latency_ms: float = 0.0
+    
+    # Concurrency tracking
+    active_requests: int = 0
+    lock: threading.Lock = field(default_factory=threading.Lock)
+    
+    def is_available(self) -> bool:
+        """Check if endpoint is available for requests."""
+        if self.status == EndpointStatus.FAILED:
+            return False
+        if self.status == EndpointStatus.COOLING:
+            if time.time() >= self.cooldown_until:
+                self.status = EndpointStatus.AVAILABLE
+                return True
+            return False
+        return True
+    
+    def set_cooldown(self, seconds: float) -> None:
+        """Put endpoint in cooldown state."""
+        self.status = EndpointStatus.COOLING
+        self.cooldown_until = time.time() + seconds
+        logger.warning(f"Endpoint {self.config.model} cooling down for {seconds:.1f}s")
+    
+    def mark_failed(self) -> None:
+        """Mark endpoint as permanently failed."""
+        self.status = EndpointStatus.FAILED
+        logger.error(f"Endpoint {self.config.model} marked as failed")
+    
+    def record_success(self, latency_ms: float) -> None:
+        """Record successful request."""
+        self.total_requests += 1
+        self.last_latency_ms = latency_ms
+        # Exponential moving average for latency
+        alpha = 0.3
+        if self.avg_latency_ms == 0:
+            self.avg_latency_ms = latency_ms
+        else:
+            self.avg_latency_ms = alpha * latency_ms + (1 - alpha) * self.avg_latency_ms
+    
+    def record_failure(self) -> None:
+        """Record failed request."""
+        self.total_requests += 1
+        self.total_failures += 1
+    
+    @property
+    def health_score(self) -> float:
+        """Calculate health score (0-1) based on metrics."""
+        if not self.is_available():
+            return 0.0
+        
+        # Base score from success rate
+        if self.total_requests > 0:
+            success_rate = 1 - (self.total_failures / self.total_requests)
+        else:
+            success_rate = 1.0
+        
+        # Latency factor (faster = higher score)
+        # Normalize: 100ms = 1.0, 1000ms = 0.1
+        if self.avg_latency_ms > 0:
+            latency_factor = min(1.0, 100 / self.avg_latency_ms)
+        else:
+            latency_factor = 1.0
+        
+        # Availability factor (less concurrent = more available)
+        if self.config.max_concurrent > 0:
+            availability = 1 - (self.active_requests / self.config.max_concurrent)
+        else:
+            availability = 1.0
+        
+        # Combined score with weights
+        return (success_rate * 0.4 + latency_factor * 0.3 + availability * 0.3) * self.config.weight
+
+
+class RotationalEmbedder(BaseEmbedder):
+    """Embedder that load balances across multiple API endpoints.
+    
+    Features:
+    - Intelligent endpoint selection based on latency and health
+    - Automatic failover on rate limits (429) and server errors
+    - Cooldown management to respect rate limits
+    - Thread-safe concurrent request handling
+    
+    Args:
+        endpoints: List of endpoint configurations
+        strategy: Selection strategy (default: latency_aware)
+        default_cooldown: Default cooldown seconds for rate limits (default: 60)
+        max_retries: Maximum retry attempts across all endpoints (default: 3)
+    """
+    
+    def __init__(
+        self,
+        endpoints: List[EndpointConfig],
+        strategy: SelectionStrategy = SelectionStrategy.LATENCY_AWARE,
+        default_cooldown: float = 60.0,
+        max_retries: int = 3,
+    ) -> None:
+        if not endpoints:
+            raise ValueError("At least one endpoint must be provided")
+        
+        self.strategy = strategy
+        self.default_cooldown = default_cooldown
+        self.max_retries = max_retries
+        
+        # Initialize endpoint states
+        self._endpoints: List[EndpointState] = []
+        self._lock = threading.Lock()
+        self._round_robin_index = 0
+        
+        # Create embedder instances for each endpoint
+        from .litellm_embedder import LiteLLMEmbedderWrapper
+        
+        for config in endpoints:
+            # Build kwargs for LiteLLMEmbedderWrapper
+            kwargs: Dict[str, Any] = {}
+            if config.api_key:
+                kwargs["api_key"] = config.api_key
+            if config.api_base:
+                kwargs["api_base"] = config.api_base
+            
+            try:
+                embedder = LiteLLMEmbedderWrapper(model=config.model, **kwargs)
+                state = EndpointState(config=config, embedder=embedder)
+                self._endpoints.append(state)
+                logger.info(f"Initialized endpoint: {config.model}")
+            except Exception as e:
+                logger.error(f"Failed to initialize endpoint {config.model}: {e}")
+        
+        if not self._endpoints:
+            raise ValueError("Failed to initialize any endpoints")
+        
+        # Cache embedding properties from first endpoint
+        self._embedding_dim = self._endpoints[0].embedder.embedding_dim
+        self._model_name = f"rotational({len(self._endpoints)} endpoints)"
+        self._max_tokens = self._endpoints[0].embedder.max_tokens
+    
+    @property
+    def embedding_dim(self) -> int:
+        """Return embedding dimensions."""
+        return self._embedding_dim
+    
+    @property
+    def model_name(self) -> str:
+        """Return model name."""
+        return self._model_name
+    
+    @property
+    def max_tokens(self) -> int:
+        """Return maximum token limit."""
+        return self._max_tokens
+    
+    @property
+    def endpoint_count(self) -> int:
+        """Return number of configured endpoints."""
+        return len(self._endpoints)
+    
+    @property
+    def available_endpoint_count(self) -> int:
+        """Return number of available endpoints."""
+        return sum(1 for ep in self._endpoints if ep.is_available())
+    
+    def get_endpoint_stats(self) -> List[Dict[str, Any]]:
+        """Get statistics for all endpoints."""
+        stats = []
+        for ep in self._endpoints:
+            stats.append({
+                "model": ep.config.model,
+                "status": ep.status.value,
+                "total_requests": ep.total_requests,
+                "total_failures": ep.total_failures,
+                "avg_latency_ms": round(ep.avg_latency_ms, 2),
+                "health_score": round(ep.health_score, 3),
+                "active_requests": ep.active_requests,
+            })
+        return stats
+    
+    def _select_endpoint(self) -> Optional[EndpointState]:
+        """Select best available endpoint based on strategy."""
+        available = [ep for ep in self._endpoints if ep.is_available()]
+        
+        if not available:
+            return None
+        
+        if self.strategy == SelectionStrategy.ROUND_ROBIN:
+            with self._lock:
+                self._round_robin_index = (self._round_robin_index + 1) % len(available)
+                return available[self._round_robin_index]
+        
+        elif self.strategy == SelectionStrategy.LATENCY_AWARE:
+            # Sort by health score (descending) and pick top candidate
+            # Add small random factor to prevent thundering herd
+            scored = [(ep, ep.health_score + random.uniform(0, 0.1)) for ep in available]
+            scored.sort(key=lambda x: x[1], reverse=True)
+            return scored[0][0]
+        
+        elif self.strategy == SelectionStrategy.WEIGHTED_RANDOM:
+            # Weighted random selection based on health scores
+            scores = [ep.health_score for ep in available]
+            total = sum(scores)
+            if total == 0:
+                return random.choice(available)
+            
+            weights = [s / total for s in scores]
+            return random.choices(available, weights=weights, k=1)[0]
+        
+        return available[0]
+    
+    def _parse_retry_after(self, error: Exception) -> Optional[float]:
+        """Extract Retry-After value from error if available."""
+        error_str = str(error)
+        
+        # Try to find Retry-After in error message
+        import re
+        match = re.search(r'[Rr]etry[- ][Aa]fter[:\s]+(\d+)', error_str)
+        if match:
+            return float(match.group(1))
+        
+        return None
+    
+    def _is_rate_limit_error(self, error: Exception) -> bool:
+        """Check if error is a rate limit error."""
+        error_str = str(error).lower()
+        return any(x in error_str for x in ["429", "rate limit", "too many requests"])
+    
+    def _is_retryable_error(self, error: Exception) -> bool:
+        """Check if error is retryable (not auth/config error)."""
+        error_str = str(error).lower()
+        # Retryable errors
+        if any(x in error_str for x in ["429", "rate limit", "502", "503", "504", 
+                                         "timeout", "connection", "service unavailable"]):
+            return True
+        # Non-retryable errors (auth, config)
+        if any(x in error_str for x in ["401", "403", "invalid", "authentication", 
+                                         "unauthorized", "api key"]):
+            return False
+        # Default to retryable for unknown errors
+        return True
+    
+    def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
+        """Embed texts using load-balanced endpoint selection.
+        
+        Args:
+            texts: Single text or iterable of texts to embed.
+            **kwargs: Additional arguments passed to underlying embedder.
+        
+        Returns:
+            numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
+        
+        Raises:
+            RuntimeError: If all endpoints fail after retries.
+        """
+        if isinstance(texts, str):
+            texts = [texts]
+        else:
+            texts = list(texts)
+        
+        last_error: Optional[Exception] = None
+        tried_endpoints: set = set()
+        
+        for attempt in range(self.max_retries + 1):
+            endpoint = self._select_endpoint()
+            
+            if endpoint is None:
+                # All endpoints unavailable, wait for shortest cooldown
+                min_cooldown = min(
+                    (ep.cooldown_until - time.time() for ep in self._endpoints 
+                     if ep.status == EndpointStatus.COOLING),
+                    default=self.default_cooldown
+                )
+                if min_cooldown > 0 and attempt < self.max_retries:
+                    wait_time = min(min_cooldown, 30)  # Cap wait at 30s
+                    logger.warning(f"All endpoints busy, waiting {wait_time:.1f}s...")
+                    time.sleep(wait_time)
+                    continue
+                break
+            
+            # Track tried endpoints to avoid infinite loops
+            endpoint_id = id(endpoint)
+            if endpoint_id in tried_endpoints and len(tried_endpoints) >= len(self._endpoints):
+                # Already tried all endpoints
+                break
+            tried_endpoints.add(endpoint_id)
+            
+            # Acquire slot
+            with endpoint.lock:
+                endpoint.active_requests += 1
+            
+            try:
+                start_time = time.time()
+                result = endpoint.embedder.embed_to_numpy(texts, **kwargs)
+                latency_ms = (time.time() - start_time) * 1000
+                
+                # Record success
+                endpoint.record_success(latency_ms)
+                
+                return result
+                
+            except Exception as e:
+                last_error = e
+                endpoint.record_failure()
+                
+                if self._is_rate_limit_error(e):
+                    # Rate limited - set cooldown
+                    retry_after = self._parse_retry_after(e) or self.default_cooldown
+                    endpoint.set_cooldown(retry_after)
+                    logger.warning(f"Endpoint {endpoint.config.model} rate limited, "
+                                   f"cooling for {retry_after}s")
+                    
+                elif not self._is_retryable_error(e):
+                    # Permanent failure (auth error, etc.)
+                    endpoint.mark_failed()
+                    logger.error(f"Endpoint {endpoint.config.model} failed permanently: {e}")
+                    
+                else:
+                    # Temporary error - short cooldown
+                    endpoint.set_cooldown(5.0)
+                    logger.warning(f"Endpoint {endpoint.config.model} error: {e}")
+                
+            finally:
+                with endpoint.lock:
+                    endpoint.active_requests -= 1
+        
+        # All retries exhausted
+        available = self.available_endpoint_count
+        raise RuntimeError(
+            f"All embedding attempts failed after {self.max_retries + 1} tries. "
+            f"Available endpoints: {available}/{len(self._endpoints)}. "
+            f"Last error: {last_error}"
+        )
+
+
+def create_rotational_embedder(
+    endpoints_config: List[Dict[str, Any]],
+    strategy: str = "latency_aware",
+    default_cooldown: float = 60.0,
+) -> RotationalEmbedder:
+    """Factory function to create RotationalEmbedder from config dicts.
+    
+    Args:
+        endpoints_config: List of endpoint configuration dicts with keys:
+            - model: Model identifier (required)
+            - api_key: API key (optional)
+            - api_base: API base URL (optional)
+            - weight: Request weight (optional, default 1.0)
+            - max_concurrent: Max concurrent requests (optional, default 4)
+        strategy: Selection strategy name (round_robin, latency_aware, weighted_random)
+        default_cooldown: Default cooldown seconds for rate limits
+    
+    Returns:
+        Configured RotationalEmbedder instance
+    
+    Example config:
+        endpoints_config = [
+            {"model": "openai/text-embedding-3-small", "api_key": "sk-..."},
+            {"model": "azure/my-embedding", "api_base": "https://...", "api_key": "..."},
+        ]
+    """
+    endpoints = []
+    for cfg in endpoints_config:
+        endpoints.append(EndpointConfig(
+            model=cfg["model"],
+            api_key=cfg.get("api_key"),
+            api_base=cfg.get("api_base"),
+            weight=cfg.get("weight", 1.0),
+            max_concurrent=cfg.get("max_concurrent", 4),
+        ))
+    
+    strategy_enum = SelectionStrategy[strategy.upper()]
+    
+    return RotationalEmbedder(
+        endpoints=endpoints,
+        strategy=strategy_enum,
+        default_cooldown=default_cooldown,
+    )
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
catlog22	f64f619713	chore: bump version to 6.3.1 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 20:19:30 +08:00
catlog22	a742fa0f8a	feat: 将 Code Index MCP 提供者的按钮更改为下拉选择框，优化用户界面	2025-12-25 20:15:15 +08:00
catlog22	6894c7e80b	feat: 更新 Code Index MCP 提供者支持，修改 CLAUDE.md 和相关样式	2025-12-25 20:12:45 +08:00
catlog22	203100431b	feat: 添加 Code Index MCP 提供者支持，更新相关 API 和配置	2025-12-25 19:58:42 +08:00
catlog22	e8b9bcae92	feat: Add ccw-litellm uninstall button and fix npm install path resolution - Fix ccw-litellm path lookup for npm distribution by adding PACKAGE_ROOT - Add uninstall button to API Settings page for ccw-litellm - Add /api/litellm-api/ccw-litellm/uninstall endpoint 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 18:29:57 +08:00
catlog22	052351ab5b	fix: 删除不再需要的 prompts.zip 文件	2025-12-25 18:17:49 +08:00
catlog22	9dd84e3416	fix: Update agent execution instructions and improve rendering layout in API settings	2025-12-25 18:13:28 +08:00
catlog22	211c25d969	fix: Use pip show for more reliable ccw-litellm detection - Primary method: pip show ccw-litellm (most reliable) - Fallback: Python import with simpler syntax - Increased timeout for pip show to 10s 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 17:57:34 +08:00
catlog22	275684d319	fix: Load embedding pool config before rendering sidebar Ensures the sidebar summary displays correctly on page load 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 17:55:05 +08:00
catlog22	0f8a47e8f6	fix: Add shell:true for Windows PATH resolution in ccw-litellm status check 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 17:50:00 +08:00
catlog22	303c840464	fix: Remove invalid i18n key from Embedding Pool UI Removed 'apiSettings.configuration' heading that was showing raw key 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 17:43:29 +08:00
catlog22	b15008fbce	feat: Enhance Embedding Pool UI with sidebar summary - Add renderEmbeddingPoolSidebar() for config summary display - Show status, target model, strategy, and provider stats - Improve visual hierarchy with icon indicators - Update sidebar rendering for embedding-pool tab 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 17:39:15 +08:00
catlog22	a8cf3e1ad6	feat: Refactor embedding pool sidebar rendering and always load semantic dependencies status	2025-12-25 17:36:11 +08:00
catlog22	0515ef6e8b	refactor: Simplify CodexLens rotation UI, link to API Settings - Simplify rotation section to show status only - Add link to navigate to API Settings Embedding Pool - Update loadRotationStatus to read from embedding-pool API - Remove detailed modal in favor of API Settings config - Add i18n translations for 'Configure in API Settings' 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 17:29:44 +08:00
catlog22	777d5df573	feat: Add aggregated endpoint for CodexLens dashboard initialization and improve loading performance	2025-12-25 17:22:42 +08:00
catlog22	c5f379ba01	fix: Force refresh ccw-litellm status on first page load - Add isFirstApiSettingsRender flag to track first load - Force refresh ccw-litellm status on first page load - Add ?refresh=true query param support to backend API - Frontend passes refresh param to bypass backend cache - Subsequent tab switches still use cache for performance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 17:18:23 +08:00
catlog22	145d38c9bd	feat: Implement venv status caching with TTL and clear cache on install/uninstall	2025-12-25 17:13:07 +08:00
catlog22	eab957ce00	perf: Optimize API Settings page loading performance - Add forceRefresh parameter to loadApiSettings() with caching - Implement 60s TTL cache for ccwLitellmStatus check - Tab switching now uses cached data (0 network requests) - Clear cache after save/delete operations for data consistency - Response time reduced from 100-500ms to <10ms on tab switch 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 17:11:58 +08:00
catlog22	b5fb077ad6	fix: Improve Embedding Pool UI styling - Add dedicated CSS for embedding-pool-main-panel - Style discovered providers list with proper cards - Fix toggle switch visibility - Add info-message component styling - Update renderDiscoveredProviders() to use CSS classes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 16:57:33 +08:00
catlog22	ebcbb11cb2	feat: Enhance CodexLens search functionality with new parameters and result handling - Added search limit, content length, and extra files input fields in the CodexLens manager UI. - Updated API request parameters to include new fields: max_content_length and extra_files_count. - Refactored smart-search.ts to support new parameters with default values. - Implemented result splitting logic to return both full content and additional file paths. - Updated CLI commands to remove worker limits and allow dynamic scaling based on endpoint count. - Introduced EmbeddingPoolConfig for improved embedding management and auto-discovery of providers. - Enhanced search engines to utilize new parameters for fuzzy and exact searches. - Added support for embedding single texts in the LiteLLM embedder.	2025-12-25 16:16:44 +08:00
catlog22	a1413dd1b3	feat: Unified Embedding Pool with auto-discovery Architecture refactoring for multi-provider rotation: Backend: - Add EmbeddingPoolConfig type with autoDiscover support - Implement discoverProvidersForModel() for auto-aggregation - Add GET/PUT /api/litellm-api/embedding-pool endpoints - Add GET /api/litellm-api/embedding-pool/discover/:model preview - Convert ccw-litellm status check to async with 5-min cache - Maintain backward compatibility with legacy rotation config Frontend: - Add "Embedding Pool" tab in API Settings - Auto-discover providers when target model selected - Show provider/key count with include/exclude controls - Increase sidebar width (280px → 320px) - Add sync result feedback on save Other: - Remove worker count limits (was max=32) - Add i18n translations (EN/CN) - Update .gitignore for .mcp.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 16:06:49 +08:00
catlog22	4e6ee2db25	chore: stop tracking .mcp.json (already in .gitignore)	2025-12-25 16:05:18 +08:00
catlog22	8e744597d1	feat: Implement CodexLens multi-provider embedding rotation management - Added functions to get and update CodexLens embedding rotation configuration. - Introduced functionality to retrieve enabled embedding providers for rotation. - Created endpoints for managing rotation configuration via API. - Enhanced dashboard UI to support multi-provider rotation configuration. - Updated internationalization strings for new rotation features. - Adjusted CLI commands and embedding manager to support increased concurrency limits. - Modified hybrid search weights for improved ranking behavior.	2025-12-25 14:13:27 +08:00
catlog22	dfa8b541b4	fix: 修复语义依赖检测 Python 代码缩进错误之前的 commit `3c3ce55` 错误地在 checkSemanticStatus 的 Python 代码每行前添加了一个空格，导致 Python IndentationError，使得前端始终显示"语义搜索依赖未安装"。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 13:21:51 +08:00
catlog22	1dc55f8811	feat(ui): 支持自定义 API 并发数 (1-32 workers) - 添加 codexlens.concurrency 和 concurrencyHint 翻译 (中英文) - 将 worker 下拉菜单改为数字输入框，支持 1-32 范围 - 添加 validateConcurrencyInput 输入验证函数 - 默认值 4 workers，显示推荐提示 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 13:11:51 +08:00
catlog22	501d9a05d4	fix: 修复 ModelScope API 路由 bug 导致的 Ollama 连接错误 - 添加 _sanitize_text() 方法处理以 'import' 开头的文本 - ModelScope 后端错误地将此类文本路由到本地 Ollama 端点 - 通过在文本前添加空格绕过路由检测，不影响嵌入质量 - 增强 embedding_manager.py 的重试逻辑和错误处理 - 在 commands.py 中成功生成后调用全局模型锁定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-25 12:52:43 +08:00
catlog22	229d51cd18	feat: 添加全局模型锁定功能，防止不同模型混合使用，增强嵌入生成的稳定性	2025-12-25 11:20:05 +08:00
catlog22	40e61b30d6	feat: 添加多端点支持和负载均衡功能，增强 LiteLLM 嵌入管理	2025-12-25 11:01:08 +08:00
catlog22	3c3ce55842	feat: 添加对 LiteLLM 嵌入后端的支持，增强并发 API 调用能力	2025-12-24 22:20:13 +08:00
catlog22	e3e61bcae9	feat: Enhance LiteLLM integration and CLI management - Added token estimation and batching functionality in LiteLLMEmbedder to handle large text inputs efficiently. - Updated embed method to support max_tokens_per_batch parameter for better API call management. - Introduced new API routes for managing custom CLI endpoints, including GET, POST, PUT, and DELETE methods. - Enhanced CLI history component to support source directory context for native session content. - Improved error handling and logging in various components for better debugging and user feedback. - Added internationalization support for new API endpoint features in the i18n module. - Updated CodexLens CLI commands to allow for concurrent API calls with a max_workers option. - Enhanced embedding manager to track model information and handle embeddings generation more robustly. - Added entry points for CLI commands in the package configuration.	2025-12-24 18:01:26 +08:00
catlog22	dfca4d60ee	feat: 添加 LiteLLM 嵌入后端支持及相关配置选项	2025-12-24 16:41:04 +08:00
catlog22	e671b45948	feat: Enhance configuration management and embedding capabilities - Added JSON-based settings management in Config class for embedding and LLM configurations. - Introduced methods to save and load settings from a JSON file. - Updated BaseEmbedder and its subclasses to include max_tokens property for better token management. - Enhanced chunking strategy to support recursive splitting of large symbols with improved overlap handling. - Implemented comprehensive tests for recursive splitting and chunking behavior. - Added CLI tools configuration management for better integration with external tools. - Introduced a new command for compacting session memory into structured text for recovery.	2025-12-24 16:32:27 +08:00
catlog22	b00113d212	feat: Enhance embedding management and model configuration - Updated embedding_manager.py to include backend parameter in model configuration. - Modified model_manager.py to utilize cache_name for ONNX models. - Refactored hybrid_search.py to improve embedder initialization based on backend type. - Added backend column to vector_store.py for better model configuration management. - Implemented migration for existing database to include backend information. - Enhanced API settings implementation with comprehensive provider and endpoint management. - Introduced LiteLLM integration guide detailing configuration and usage. - Added examples for LiteLLM usage in TypeScript.	2025-12-24 14:03:59 +08:00
catlog22	9b926d1a1e	docs: Sync README_CN.md with English version - Add Smithery badge - Update project description to match English (JSON-driven multi-agent framework) - Change installation from script to npm install (recommended) - Minor text adjustments for consistency 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-24 14:03:45 +08:00
catlog22	98c9f1a830	fix: Add api-settings to server.ts MODULE_FILES array server.ts has a duplicate MODULE_FILES/MODULE_CSS_FILES array separate from dashboard-generator.ts. The api-settings files were missing from this duplicate list, causing renderApiSettings to be undefined. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-23 21:14:27 +08:00
catlog22	46ac591fe8	Merge branch 'main' of https://github.com/catlog22/Claude-Code-Workflow	2025-12-23 20:46:01 +08:00
catlog22	bf66b095c7	feat: Add unified LiteLLM API management with dashboard UI and CLI integration - Create ccw-litellm Python package with AbstractEmbedder and AbstractLLMClient interfaces - Add BaseEmbedder abstraction and factory pattern to codex-lens for pluggable backends - Implement API Settings dashboard page for provider credentials and custom endpoints - Add REST API routes for CRUD operations on providers and endpoints - Extend CLI with --model parameter for custom endpoint routing - Integrate existing context-cache for @pattern file resolution - Add provider model registry with predefined models per provider type - Include i18n translations (en/zh) for all new UI elements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-23 20:36:32 +08:00
catlog22	5228581324	feat: Add context_cache MCP tool with simplified CLI options Add context_cache MCP tool for caching files by @patterns: - pattern-parser.ts: Parse @expressions using glob - context-cache-store.ts: In-memory cache with TTL/LRU - context-cache.ts: MCP tool with pack/read/status/release/cleanup Simplify CLI cache options: - --cache now uses comma-separated format instead of JSON - Items starting with @ are patterns, others are text content - Add --inject-mode option (none/full/progressive) - Default: codex=full, gemini/qwen=none 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-23 19:54:05 +08:00
catlog22	c9c704e671	Merge pull request #42 from rhyme227/fix/cross-platform-path-handling fix(hooks): correct cross-platform path handling in getProjectSettingsPath	2025-12-23 18:49:35 +08:00
catlog22	16d4c7c646	feat: 增加写模式协议的提示结构	2025-12-23 18:49:04 +08:00
catlog22	39056292b7	feat: Add CodexLens Manager to dashboard and enhance GPU management - Introduced a new CodexLens Manager item in the dashboard for easier access. - Implemented GPU management commands in the CLI, including listing available GPUs, selecting a specific GPU, and resetting to automatic detection. - Enhanced the embedding generation process to utilize GPU resources more effectively, including batch size optimization for better performance. - Updated the embedder to support device ID options for GPU selection, ensuring compatibility with DirectML and CUDA. - Added detailed logging and error handling for GPU detection and selection processes. - Updated package version to 6.2.9 and added comprehensive documentation for Codex Agent Execution Protocol.	2025-12-23 18:35:30 +08:00
rhyme	87ffd283ce	fix(hooks): correct cross-platform path handling in getProjectSettingsPath Remove incorrect path separator conversion that caused directory creation issues on Linux/WSL platforms. The function was converting forward slashes to backslashes, which are treated as literal filename characters on Unix systems rather than path separators. Changes: - Remove manual path normalization in getProjectSettingsPath() - Rely on Node.js path.join() for cross-platform compatibility - Fix affects both hooks-routes.ts and mcp-routes.ts Impact: - Linux/WSL: Fixes incorrect directory creation - Windows: No behavior change, maintains correct functionality Fixes project-level hook settings being saved to wrong location when using Dashboard frontend on Linux/WSL systems.	2025-12-23 17:58:33 +08:00
catlog22	8eb42816f1	Merge pull request #40 from rhyme227/fix/cache-manager-esm-compatibility fix(core): replace require() with ESM imports in cache-manager	2025-12-23 16:36:31 +08:00
rhyme	ebdf64c0b9	fix(core): replace require() with ESM imports in cache-manager Remove CommonJS require() calls that caused \"require is not defined\" errors when scanning .workflow directories in ESM context. Changes: - Add unlinkSync and readdirSync to fs import statement - Replace require('fs').unlinkSync() with direct unlinkSync() call - Replace require('fs').readdirSync() with direct readdirSync() call Fixes: Cannot scan directory .workflow/active: require is not defined File: ccw/src/core/cache-manager.ts	2025-12-23 15:33:29 +08:00
catlog22	caab5f476e	Merge pull request #39 from rhyme227/fix/codexlens-model-cache-detection fix(codexlens): correct fastembed 0.7.4 cache path and download trigger	2025-12-23 15:23:13 +08:00
rhyme	1998f3ae8a	fix(codexlens): correct fastembed 0.7.4 cache path and download trigger - Update cache path to ~/.cache/huggingface (HuggingFace Hub default) - Fix model path format: models--{org}--{model} - Add .embed() call to trigger actual download in download_model() - Ensure cross-platform compatibility (Linux/Windows)	2025-12-23 14:51:08 +08:00
catlog22	5ff2a43b70	bump version to 6.2.9	2025-12-23 10:28:48 +08:00
catlog22	3cd842ca1a	fix: ccw package.json removal - add root build script and fix cli.ts path resolution - Fix cli.ts loadPackageInfo() to try root package.json first (../../package.json) - Add build script and devDependencies to root package.json - Remove ccw/package.json and ccw/package-lock.json (no longer needed) - CodexLens: add config.json support for index_dir configuration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-23 10:25:15 +08:00
				`@@ -0,0 +1 @@`
				{"root":["./src/cli.ts","./src/index.ts","./src/commands/cli.ts","./src/commands/core-memory.ts","./src/commands/hook.ts","./src/commands/install.ts","./src/commands/list.ts","./src/commands/memory.ts","./src/commands/serve.ts","./src/commands/session-path-resolver.ts","./src/commands/session.ts","./src/commands/stop.ts","./src/commands/tool.ts","./src/commands/uninstall.ts","./src/commands/upgrade.ts","./src/commands/view.ts","./src/config/litellm-api-config-manager.ts","./src/config/provider-models.ts","./src/config/storage-paths.ts","./src/core/cache-manager.ts","./src/core/claude-freshness.ts","./src/core/core-memory-store.ts","./src/core/dashboard-generator-patch.ts","./src/core/dashboard-generator.ts","./src/core/data-aggregator.ts","./src/core/history-importer.ts","./src/core/lite-scanner-complete.ts","./src/core/lite-scanner.ts","./src/core/manifest.ts","./src/core/memory-embedder-bridge.ts","./src/core/memory-store.ts","./src/core/server.ts","./src/core/session-clustering-service.ts","./src/core/session-scanner.ts","./src/core/websocket.ts","./src/core/routes/ccw-routes.ts","./src/core/routes/claude-routes.ts","./src/core/routes/cli-routes.ts","./src/core/routes/codexlens-routes.ts","./src/core/routes/core-memory-routes.ts","./src/core/routes/files-routes.ts","./src/core/routes/graph-routes.ts","./src/core/routes/help-routes.ts","./src/core/routes/hooks-routes.ts","./src/core/routes/litellm-api-routes.ts","./src/core/routes/litellm-routes.ts","./src/core/routes/mcp-routes.ts","./src/core/routes/mcp-templates-db.ts","./src/core/routes/memory-routes.ts","./src/core/routes/rules-routes.ts","./src/core/routes/session-routes.ts","./src/core/routes/skills-routes.ts","./src/core/routes/status-routes.ts","./src/core/routes/system-routes.ts","./src/mcp-server/index.ts","./src/tools/classify-folders.ts","./src/tools/claude-cli-tools.ts","./src/tools/cli-config-manager.ts","./src/tools/cli-executor.ts","./src/tools/cli-history-store.ts","./src/tools/codex-lens.ts","./src/tools/context-cache-store.ts","./src/tools/context-cache.ts","./src/tools/convert-tokens-to-css.ts","./src/tools/core-memory.ts","./src/tools/detect-changed-modules.ts","./src/tools/discover-design-files.ts","./src/tools/edit-file.ts","./src/tools/generate-module-docs.ts","./src/tools/get-modules-by-depth.ts","./src/tools/index.ts","./src/tools/litellm-client.ts","./src/tools/litellm-executor.ts","./src/tools/native-session-discovery.ts","./src/tools/notifier.ts","./src/tools/pattern-parser.ts","./src/tools/read-file.ts","./src/tools/resume-strategy.ts","./src/tools/session-content-parser.ts","./src/tools/session-manager.ts","./src/tools/smart-context.ts","./src/tools/smart-search.ts","./src/tools/storage-manager.ts","./src/tools/ui-generate-preview.js","./src/tools/ui-instantiate-prototypes.js","./src/tools/update-module-claude.js","./src/tools/write-file.ts","./src/types/config.ts","./src/types/index.ts","./src/types/litellm-api-config.ts","./src/types/session.ts","./src/types/tool.ts","./src/utils/browser-launcher.ts","./src/utils/file-utils.ts","./src/utils/path-resolver.ts","./src/utils/path-validator.ts","./src/utils/ui.ts"],"version":"5.9.3"}