diff --git a/.claude/skills/skill-generator/phases/03-phase-generation.md b/.claude/skills/skill-generator/phases/03-phase-generation.md
index fd360479..09a2abea 100644
--- a/.claude/skills/skill-generator/phases/03-phase-generation.md
+++ b/.claude/skills/skill-generator/phases/03-phase-generation.md
@@ -1,17 +1,42 @@
 # Phase 3: Phase Generation
 
-根据执行模式生成 Phase 文件。
+根据执行模式生成 Phase 文件，包含声明式工作流编排和上下文策略支持。
 
 ## Objective
 
-- Sequential 模式：生成顺序 Phase 文件 (`01-xx.md`, `02-xx.md`, ...)
+- Sequential 模式：生成顺序 Phase 文件 + **声明式编排器**
 - Autonomous 模式：生成编排器和动作文件
+- 支持 **文件上下文** 和 **内存上下文** 两种策略
 
 ## Input
 
 - 依赖: `skill-config.json`, SKILL.md (Phase 1-2 产出)
 - 模板: `templates/sequential-phase.md`, `templates/autonomous-*.md`
 
+## 上下文策略 (P0 增强)
+
+根据 `config.context_strategy` 生成不同的上下文管理代码：
+
+| 策略 | 适用场景 | 优点 | 缺点 |
+|------|----------|------|------|
+| `file` | 复杂多阶段任务 | 持久化、可调试、可恢复 | IO 开销 |
+| `memory` | 简单线性任务 | 速度快 | 无法恢复、调试困难 |
+
+```javascript
+const CONTEXT_STRATEGIES = {
+  file: {
+    read: (key) => `JSON.parse(Read(\`\${workDir}/context/${key}.json\`))`,
+    write: (key, data) => `Write(\`\${workDir}/context/${key}.json\`, JSON.stringify(${data}, null, 2))`,
+    init: `Bash(\`mkdir -p "\${workDir}/context"\`)`
+  },
+  memory: {
+    read: (key) => `state.context.${key}`,
+    write: (key, data) => `state.context.${key} = ${data}`,
+    init: `state.context = {}`
+  }
+};
+```
+
 ## Execution Steps
 
 ### Step 1: 读取配置和模板
@@ -19,19 +44,27 @@
 ```javascript
 const config = JSON.parse(Read(`${workDir}/skill-config.json`));
 const skillDir = `.claude/skills/${config.skill_name}`;
+const contextStrategy = config.context_strategy || 'file'; // 默认文件策略
 
 // 读取模板
-const sequentialTemplate = Read(`${skillRoot}/templates/sequential-phase.md`);
-const orchestratorTemplate = Read(`${skillRoot}/templates/autonomous-orchestrator.md`);
-const actionTemplate = Read(`${skillRoot}/templates/autonomous-action.md`);
+const skillRoot = '.claude/skills/skill-generator';
 ```
 
-### Step 2: Sequential 模式 - 生成阶段文件
+### Step 2: Sequential 模式 - 生成阶段文件 + 声明式编排器
 
 ```javascript
 if (config.execution_mode === 'sequential') {
   const phases = config.sequential_config.phases;
   
+  // ========== P0 增强: 生成声明式编排器 ==========
+  const workflowOrchestrator = generateSequentialOrchestrator(config, phases);
+  Write(`${skillDir}/phases/_orchestrator.md`, workflowOrchestrator);
+  
+  // ========== P0 增强: 生成工作流定义 ==========
+  const workflowDef = generateWorkflowDefinition(config, phases);
+  Write(`${skillDir}/workflow.json`, JSON.stringify(workflowDef, null, 2));
+  
+  // 生成各阶段文件
   for (let i = 0; i < phases.length; i++) {
     const phase = phases[i];
     const prevPhase = i > 0 ? phases[i-1] : null;
@@ -45,14 +78,204 @@ if (config.execution_mode === 'sequential') {
       input: prevPhase ? prevPhase.output : "user input",
       output: phase.output,
       nextPhase: nextPhase ? nextPhase.id : null,
-      config: config
+      config: config,
+      contextStrategy: contextStrategy
     });
     
     Write(`${skillDir}/phases/${phase.id}.md`, content);
   }
 }
 
+// ========== P0 增强: 声明式工作流定义 ==========
+function generateWorkflowDefinition(config, phases) {
+  return {
+    skill_name: config.skill_name,
+    version: "1.0.0",
+    execution_mode: "sequential",
+    context_strategy: config.context_strategy || "file",
+    
+    // 声明式阶段列表 (类似 software-manual 的 agents_to_run)
+    phases_to_run: phases.map(p => p.id),
+    
+    // 阶段配置
+    phases: phases.map((p, i) => ({
+      id: p.id,
+      name: p.name,
+      order: i + 1,
+      input: i > 0 ? phases[i-1].output : null,
+      output: p.output,
+      // 可选的并行配置
+      parallel: p.parallel || false,
+      // 可选的条件执行
+      condition: p.condition || null,
+      // Agent 配置
+      agent: p.agent || {
+        type: "universal-executor",
+        run_in_background: false
+      }
+    })),
+    
+    // 终止条件
+    termination: {
+      on_success: "all_phases_completed",
+      on_error: "stop_and_report",
+      max_retries: 3
+    }
+  };
+}
+
+// ========== P0 增强: 声明式编排器 ==========
+function generateSequentialOrchestrator(config, phases) {
+  return `# Sequential Orchestrator
+
+声明式工作流编排器，按 \`workflow.json\` 定义顺序执行阶段。
+
+## 工作流定义
+
+\`\`\`javascript
+const workflow = JSON.parse(Read(\`\${skillDir}/workflow.json\`));
+\`\`\`
+
+## 编排逻辑
+
+\`\`\`javascript
+async function runSequentialWorkflow(workDir) {
+  const workflow = JSON.parse(Read(\`\${skillDir}/workflow.json\`));
+  const contextStrategy = workflow.context_strategy;
+  
+  // 初始化上下文
+  ${config.context_strategy === 'file' ? 
+    `Bash(\`mkdir -p "\${workDir}/context"\`);` :
+    `const state = { context: {} };`}
+  
+  // 执行状态追踪
+  const execution = {
+    started_at: new Date().toISOString(),
+    phases_completed: [],
+    current_phase: null,
+    errors: []
+  };
+  
+  Write(\`\${workDir}/execution-state.json\`, JSON.stringify(execution, null, 2));
+  
+  // 按声明顺序执行阶段
+  for (const phaseId of workflow.phases_to_run) {
+    const phaseConfig = workflow.phases.find(p => p.id === phaseId);
+    
+    // 更新执行状态
+    execution.current_phase = phaseId;
+    Write(\`\${workDir}/execution-state.json\`, JSON.stringify(execution, null, 2));
+    
+    console.log(\`[Orchestrator] Executing: \${phaseId}\`);
+    
+    try {
+      // 检查条件执行
+      if (phaseConfig.condition) {
+        const shouldRun = evaluateCondition(phaseConfig.condition, execution);
+        if (!shouldRun) {
+          console.log(\`[Orchestrator] Skipping \${phaseId} (condition not met)\`);
+          continue;
+        }
+      }
+      
+      // 执行阶段
+      const result = await executePhase(phaseId, phaseConfig, workDir);
+      
+      // 记录完成
+      execution.phases_completed.push({
+        id: phaseId,
+        completed_at: new Date().toISOString(),
+        output: phaseConfig.output
+      });
+      
+    } catch (error) {
+      execution.errors.push({
+        phase: phaseId,
+        message: error.message,
+        timestamp: new Date().toISOString()
+      });
+      
+      // 错误处理策略
+      if (workflow.termination.on_error === 'stop_and_report') {
+        console.error(\`[Orchestrator] Failed at \${phaseId}: \${error.message}\`);
+        break;
+      }
+    }
+    
+    Write(\`\${workDir}/execution-state.json\`, JSON.stringify(execution, null, 2));
+  }
+  
+  // 完成
+  execution.current_phase = null;
+  execution.completed_at = new Date().toISOString();
+  Write(\`\${workDir}/execution-state.json\`, JSON.stringify(execution, null, 2));
+  
+  return execution;
+}
+
+async function executePhase(phaseId, phaseConfig, workDir) {
+  const phasePrompt = Read(\`\${skillDir}/phases/\${phaseId}.md\`);
+  
+  // 使用 Task 调用 Agent
+  const result = await Task({
+    subagent_type: phaseConfig.agent?.type || 'universal-executor',
+    run_in_background: phaseConfig.agent?.run_in_background || false,
+    prompt: \`
+[PHASE] \${phaseId}
+[WORK_DIR] \${workDir}
+[INPUT] \${phaseConfig.input ? \`\${workDir}/\${phaseConfig.input}\` : 'None'}
+[OUTPUT] \${workDir}/\${phaseConfig.output}
+
+\${phasePrompt}
+\`
+  });
+  
+  return JSON.parse(result);
+}
+\`\`\`
+
+## 阶段执行计划
+
+| Order | Phase | Input | Output | Agent |
+|-------|-------|-------|--------|-------|
+${phases.map((p, i) => 
+  `| ${i+1} | ${p.id} | ${i > 0 ? phases[i-1].output : '-'} | ${p.output} | ${p.agent?.type || 'universal-executor'} |`
+).join('\n')}
+
+## 错误恢复
+
+\`\`\`javascript
+// 从指定阶段恢复执行
+async function resumeFromPhase(phaseId, workDir) {
+  const workflow = JSON.parse(Read(\`\${skillDir}/workflow.json\`));
+  const startIndex = workflow.phases_to_run.indexOf(phaseId);
+  
+  if (startIndex === -1) {
+    throw new Error(\`Phase not found: \${phaseId}\`);
+  }
+  
+  // 从指定阶段开始执行
+  const remainingPhases = workflow.phases_to_run.slice(startIndex);
+  // ...继续执行
+}
+\`\`\`
+`;
+}
+
+// 生成阶段文件（增强上下文策略支持）
 function generateSequentialPhase(params) {
+  const contextCode = params.contextStrategy === 'file' ? {
+    readPrev: `const prevOutput = JSON.parse(Read(\`\${workDir}/${params.input}\`));`,
+    writeResult: `Write(\`\${workDir}/${params.output}\`, JSON.stringify(result, null, 2));`,
+    readContext: (key) => `JSON.parse(Read(\`\${workDir}/context/${key}.json\`))`,
+    writeContext: (key) => `Write(\`\${workDir}/context/${key}.json\`, JSON.stringify(data, null, 2))`
+  } : {
+    readPrev: `const prevOutput = state.context.prevPhaseOutput;`,
+    writeResult: `state.context.${params.phaseId.replace(/-/g, '_')}_output = result;`,
+    readContext: (key) => `state.context.${key}`,
+    writeContext: (key) => `state.context.${key} = data`
+  };
+
   return `# Phase ${params.phaseNumber}: ${params.phaseName}
 
 ${params.phaseDescription}
@@ -66,16 +289,15 @@ ${params.phaseDescription}
 
 - 依赖: \`${params.input}\`
 - 配置: \`{workDir}/skill-config.json\`
+- 上下文策略: \`${params.contextStrategy}\`
 
 ## Execution Steps
 
-### Step 1: 准备工作
+### Step 1: 读取输入
 
 \`\`\`javascript
-// 读取上一阶段产出
-${params.phaseNumber > 1 ? 
-  `const prevOutput = JSON.parse(Read(\`\${workDir}/${params.input}\`));` : 
-  `// 首阶段，直接从配置开始`}
+// 上下文策略: ${params.contextStrategy}
+${params.phaseNumber > 1 ? contextCode.readPrev : '// 首阶段，直接从配置开始'}
 \`\`\`
 
 ### Step 2: 核心处理
@@ -83,26 +305,43 @@ ${params.phaseNumber > 1 ?
 \`\`\`javascript
 // TODO: 实现核心逻辑
 const result = {
-  // 处理结果
+  status: 'completed',
+  data: {
+    // 处理结果
+  },
+  metadata: {
+    phase: '${params.phaseId}',
+    timestamp: new Date().toISOString()
+  }
 };
 \`\`\`
 
 ### Step 3: 输出结果
 
 \`\`\`javascript
-Write(\`\${workDir}/${params.output}\`, JSON.stringify(result, null, 2));
+// 写入阶段产出 (上下文策略: ${params.contextStrategy})
+${contextCode.writeResult}
+
+// 返回简要信息给编排器
+return {
+  status: 'completed',
+  output_file: '${params.output}',
+  summary: '阶段 ${params.phaseNumber} 完成'
+};
 \`\`\`
 
 ## Output
 
 - **File**: \`${params.output}\`
 - **Format**: ${params.output.endsWith('.json') ? 'JSON' : 'Markdown'}
+- **Context Strategy**: ${params.contextStrategy}
 
 ## Quality Checklist
 
 - [ ] 输入数据验证通过
 - [ ] 核心逻辑执行成功
 - [ ] 输出格式正确
+- [ ] 上下文正确保存
 
 ${params.nextPhase ? 
   `## Next Phase\n\n→ [Phase ${params.phaseNumber + 1}: ${params.nextPhase}](${params.nextPhase}.md)` : 
@@ -111,186 +350,271 @@ ${params.nextPhase ?
 }
 ```
 
-### Step 3: Autonomous 模式 - 生成编排器
+### Step 3: Autonomous 模式 - 生成编排器 (增强版)
 
 ```javascript
 if (config.execution_mode === 'autonomous' || config.execution_mode === 'hybrid') {
+  const contextStrategy = config.context_strategy || 'file';
   
-  // 生成状态 Schema
-  const stateSchema = generateStateSchema(config);
+  // 生成状态 Schema (增强文件策略支持)
+  const stateSchema = generateStateSchema(config, contextStrategy);
   Write(`${skillDir}/phases/state-schema.md`, stateSchema);
   
-  // 生成编排器
-  const orchestrator = generateOrchestrator(config);
+  // 生成编排器 (增强版)
+  const orchestrator = generateEnhancedOrchestrator(config, contextStrategy);
   Write(`${skillDir}/phases/orchestrator.md`, orchestrator);
   
+  // 生成动作目录
+  const actionCatalog = generateActionCatalog(config);
+  Write(`${skillDir}/specs/action-catalog.md`, actionCatalog);
+  
   // 生成动作文件
   for (const action of config.autonomous_config.actions) {
-    const actionContent = generateAction(action, config);
+    const actionContent = generateEnhancedAction(action, config, contextStrategy);
     Write(`${skillDir}/phases/actions/${action.id}.md`, actionContent);
   }
 }
 
-function generateStateSchema(config) {
-  return `# State Schema
-
-## 状态文件
-
-位置: \`{workDir}/state.json\`
-
-## 结构定义
-
-\`\`\`typescript
-interface ${toPascalCase(config.skill_name)}State {
-  // 元信息
-  skill_name: "${config.skill_name}";
-  started_at: string;
-  updated_at: string;
-  
-  // 执行状态
-  status: 'pending' | 'running' | 'completed' | 'failed';
-  current_action: string | null;
-  completed_actions: string[];
-  
-  // 业务数据
-${config.autonomous_config.state_schema?.fields?.map(f => 
-  `  ${f.name}: ${f.type};  // ${f.description}`
-).join('\n') || '  context: Record<string, any>;'}
-  
-  // 错误追踪
-  errors: Array<{
-    action: string;
-    message: string;
-    timestamp: string;
-  }>;
-  error_count: number;
-}
-\`\`\`
-
-## 初始状态
-
-\`\`\`json
-{
-  "skill_name": "${config.skill_name}",
-  "started_at": "",
-  "updated_at": "",
-  "status": "pending",
-  "current_action": null,
-  "completed_actions": [],
-${config.autonomous_config.state_schema?.fields?.map(f => 
-  `  "${f.name}": ${getDefaultValue(f.type)}`
-).join(',\n') || '  "context": {}'}
-  "errors": [],
-  "error_count": 0
-}
-\`\`\`
-
-## 状态转换规则
-
-| 当前状态 | 触发条件 | 目标状态 |
-|----------|----------|----------|
-| pending | 首次执行 | running |
-| running | 动作完成 | running |
-| running | 所有任务完成 | completed |
-| running | 错误超限 | failed |
-`;
-}
-
-function generateOrchestrator(config) {
+// 增强版编排器生成
+function generateEnhancedOrchestrator(config, contextStrategy) {
   const actions = config.autonomous_config.actions;
   
-  return `# Orchestrator
+  return `# Orchestrator (Enhanced)
 
-## Role
+增强版编排器，支持声明式动作调度和文件上下文策略。
 
-根据当前状态选择并执行下一个动作。
+## 配置
 
-## State Reading
+- **上下文策略**: ${contextStrategy}
+- **终止条件**: ${config.autonomous_config.termination_conditions?.join(', ') || 'task_completed'}
+
+## 声明式动作目录
 
 \`\`\`javascript
-const state = JSON.parse(Read(\`\${workDir}/state.json\`));
+const ACTION_CATALOG = ${JSON.stringify(actions.map(a => ({
+  id: a.id,
+  name: a.name,
+  preconditions: a.preconditions || [],
+  effects: a.effects || [],
+  priority: a.priority || 0
+})), null, 2)};
 \`\`\`
 
-## Decision Logic
+## 上下文管理 (${contextStrategy} 策略)
+
+\`\`\`javascript
+const ContextManager = {
+  ${contextStrategy === 'file' ? `
+  // 文件策略: 持久化到 .scratchpad
+  init: (workDir) => {
+    Bash(\`mkdir -p "\${workDir}/context"\`);
+    Write(\`\${workDir}/state.json\`, JSON.stringify(initialState, null, 2));
+  },
+  
+  readState: (workDir) => JSON.parse(Read(\`\${workDir}/state.json\`)),
+  
+  writeState: (workDir, state) => {
+    state.updated_at = new Date().toISOString();
+    Write(\`\${workDir}/state.json\`, JSON.stringify(state, null, 2));
+  },
+  
+  readContext: (workDir, key) => {
+    try {
+      return JSON.parse(Read(\`\${workDir}/context/\${key}.json\`));
+    } catch { return null; }
+  },
+  
+  writeContext: (workDir, key, data) => {
+    Write(\`\${workDir}/context/\${key}.json\`, JSON.stringify(data, null, 2));
+  }` : `
+  // 内存策略: 仅在运行时保持
+  state: null,
+  context: {},
+  
+  init: (workDir) => {
+    ContextManager.state = { ...initialState };
+    ContextManager.context = {};
+  },
+  
+  readState: () => ContextManager.state,
+  
+  writeState: (workDir, state) => {
+    state.updated_at = new Date().toISOString();
+    ContextManager.state = state;
+  },
+  
+  readContext: (workDir, key) => ContextManager.context[key],
+  
+  writeContext: (workDir, key, data) => {
+    ContextManager.context[key] = data;
+  }`}
+};
+\`\`\`
+
+## 决策逻辑
 
 \`\`\`javascript
 function selectNextAction(state) {
-  // 1. 检查终止条件
+  // 1. 终止条件检查
 ${config.autonomous_config.termination_conditions?.map(c => 
   `  if (${getTerminationCheck(c)}) return null;`
 ).join('\n') || '  if (state.status === "completed") return null;'}
   
-  // 2. 错误检查
+  // 2. 错误限制检查
   if (state.error_count >= 3) return 'action-abort';
   
-  // 3. 根据状态选择动作
-${actions.map(a => 
-  `  if (${getPreconditionCheck(a)}) return '${a.id}';`
-).join('\n')}
+  // 3. 按优先级选择满足前置条件的动作
+  const availableActions = ACTION_CATALOG
+    .filter(a => checkPreconditions(a.preconditions, state))
+    .filter(a => !state.completed_actions.includes(a.id))
+    .sort((a, b) => b.priority - a.priority);
   
-  // 4. 默认: 完成
+  if (availableActions.length > 0) {
+    return availableActions[0].id;
+  }
+  
+  // 4. 默认完成
   return 'action-complete';
 }
+
+function checkPreconditions(conditions, state) {
+  if (!conditions || conditions.length === 0) return true;
+  return conditions.every(cond => {
+    // 支持多种条件格式
+    if (cond.includes('===')) {
+      const [left, right] = cond.split('===').map(s => s.trim());
+      return eval(\`state.\${left}\`) === eval(right);
+    }
+    return state[cond] === true;
+  });
+}
 \`\`\`
 
-## Execution Loop
+## 执行循环 (增强版)
 
 \`\`\`javascript
-async function runOrchestrator() {
-  while (true) {
-    // 读取状态
-    const state = JSON.parse(Read(\`\${workDir}/state.json\`));
+async function runOrchestrator(workDir) {
+  console.log('=== Orchestrator Started ===');
+  console.log(\`Context Strategy: ${contextStrategy}\`);
+  
+  // 初始化
+  ContextManager.init(workDir);
+  
+  let iteration = 0;
+  const MAX_ITERATIONS = 100;
+  
+  while (iteration < MAX_ITERATIONS) {
+    iteration++;
     
-    // 选择动作
+    // 1. 读取状态
+    const state = ContextManager.readState(workDir);
+    console.log(\`[Iteration \${iteration}] Status: \${state.status}, Completed: \${state.completed_actions.length}\`);
+    
+    // 2. 选择动作
     const actionId = selectNextAction(state);
+    
     if (!actionId) {
-      console.log("任务完成或终止");
+      console.log('=== All actions completed ===');
+      state.status = 'completed';
+      ContextManager.writeState(workDir, state);
       break;
     }
     
-    // 更新当前动作
-    state.current_action = actionId;
-    state.updated_at = new Date().toISOString();
-    Write(\`\${workDir}/state.json\`, JSON.stringify(state, null, 2));
+    console.log(\`[Iteration \${iteration}] Executing: \${actionId}\`);
     
-    // 执行动作
+    // 3. 更新当前动作
+    state.current_action = actionId;
+    ContextManager.writeState(workDir, state);
+    
+    // 4. 执行动作
     try {
-      const result = await executeAction(actionId, state);
+      const actionPrompt = Read(\`\${skillDir}/phases/actions/\${actionId}.md\`);
       
-      // 更新状态
+      const result = await Task({
+        subagent_type: 'universal-executor',
+        run_in_background: false,
+        prompt: \`
+[STATE]
+\${JSON.stringify(state, null, 2)}
+
+[WORK_DIR]
+\${workDir}
+
+[CONTEXT_STRATEGY]
+${contextStrategy}
+
+[ACTION]
+\${actionPrompt}
+
+[RETURN FORMAT]
+Return JSON: { "status": "completed"|"failed", "stateUpdates": {...}, "summary": "..." }
+\`
+      });
+      
+      const actionResult = JSON.parse(result);
+      
+      // 5. 更新状态
       state.completed_actions.push(actionId);
       state.current_action = null;
-      Object.assign(state, result.stateUpdates);
+      Object.assign(state, actionResult.stateUpdates || {});
+      
+      console.log(\`[Iteration \${iteration}] Completed: \${actionResult.summary || actionId}\`);
       
     } catch (error) {
+      console.error(\`[Iteration \${iteration}] Error: \${error.message}\`);
       state.errors.push({
         action: actionId,
         message: error.message,
         timestamp: new Date().toISOString()
       });
       state.error_count++;
+      state.current_action = null;
     }
     
-    Write(\`\${workDir}/state.json\`, JSON.stringify(state, null, 2));
+    ContextManager.writeState(workDir, state);
   }
+  
+  console.log('=== Orchestrator Finished ===');
+  return ContextManager.readState(workDir);
 }
 \`\`\`
 
-## Action Catalog
+## 动作目录
 
-| Action | Purpose | Preconditions |
-|--------|---------|---------------|
+| Action | Priority | Preconditions | Effects |
+|--------|----------|---------------|---------|
 ${actions.map(a => 
-  `| [${a.id}](actions/${a.id}.md) | ${a.description || a.name} | ${a.preconditions?.join(', ') || '-'} |`
+  `| [${a.id}](actions/${a.id}.md) | ${a.priority || 0} | ${a.preconditions?.join(', ') || '-'} | ${a.effects?.join(', ') || '-'} |`
 ).join('\n')}
 
-## Termination Conditions
+## 调试与恢复
 
-${config.autonomous_config.termination_conditions?.map(c => `- ${c}`).join('\n') || '- status === "completed"'}
+\`\`\`javascript
+// 从特定状态恢复
+async function resumeFromState(workDir) {
+  const state = ContextManager.readState(workDir);
+  console.log(\`Resuming from: \${state.current_action || 'start'}\`);
+  console.log(\`Completed actions: \${state.completed_actions.join(', ')}\`);
+  return runOrchestrator(workDir);
+}
+
+// 重试失败的动作
+async function retryFailedAction(workDir) {
+  const state = ContextManager.readState(workDir);
+  if (state.errors.length > 0) {
+    const lastError = state.errors[state.errors.length - 1];
+    console.log(\`Retrying: \${lastError.action}\`);
+    state.error_count = Math.max(0, state.error_count - 1);
+    ContextManager.writeState(workDir, state);
+    return runOrchestrator(workDir);
+  }
+}
+\`\`\`
 `;
 }
 
-function generateAction(action, config) {
+// 增强版动作生成
+function generateEnhancedAction(action, config, contextStrategy) {
   return `# Action: ${action.name}
 
 ${action.description || '执行 ' + action.name + ' 操作'}
@@ -301,23 +625,50 @@ ${action.description || 'TODO: 描述此动作的目的'}
 
 ## Preconditions
 
-${action.preconditions?.map(p => `- [ ] ${p}`).join('\n') || '- [ ] 无特殊前置条件'}
+${action.preconditions?.map(p => `- [ ] \`${p}\``).join('\n') || '- [ ] 无特殊前置条件'}
+
+## Context Access (${contextStrategy} 策略)
+
+\`\`\`javascript
+// 读取共享上下文
+${contextStrategy === 'file' ?
+  `const sharedData = JSON.parse(Read(\`\${workDir}/context/shared.json\`));` :
+  `const sharedData = state.context.shared || {};`}
+
+// 写入共享上下文
+${contextStrategy === 'file' ?
+  `Write(\`\${workDir}/context/shared.json\`, JSON.stringify(updatedData, null, 2));` :
+  `state.context.shared = updatedData;`}
+\`\`\`
 
 ## Execution
 
 \`\`\`javascript
-async function execute(state) {
-  // TODO: 实现动作逻辑
-  
+async function execute(state, workDir) {
   // 1. 读取必要数据
+  ${contextStrategy === 'file' ?
+    `const input = JSON.parse(Read(\`\${workDir}/context/input.json\`));` :
+    `const input = state.context.input || {};`}
   
   // 2. 执行核心逻辑
+  // TODO: 实现动作逻辑
+  const result = {
+    // 处理结果
+  };
   
-  // 3. 返回状态更新
+  // 3. 保存结果 (${contextStrategy} 策略)
+  ${contextStrategy === 'file' ?
+    `Write(\`\${workDir}/context/${action.id.replace(/-/g, '_')}_result.json\`, JSON.stringify(result, null, 2));` :
+    `// 结果通过 stateUpdates 返回`}
+  
+  // 4. 返回状态更新
   return {
+    status: 'completed',
     stateUpdates: {
-      // 更新的状态字段
-    }
+      completed_actions: [...state.completed_actions, '${action.id}'],
+      ${contextStrategy === 'memory' ? `context: { ...state.context, ${action.id.replace(/-/g, '_')}_result: result }` : '// 文件策略：结果已保存到文件'}
+    },
+    summary: '${action.name} 完成'
   };
 }
 \`\`\`
@@ -326,9 +677,11 @@ async function execute(state) {
 
 \`\`\`javascript
 return {
-  completed_actions: [...state.completed_actions, '${action.id}'],
-  // 其他状态更新
-${action.effects?.map(e => `  // Effect: ${e}`).join('\n') || ''}
+  status: 'completed',
+  stateUpdates: {
+    completed_actions: [...state.completed_actions, '${action.id}'],
+${action.effects?.map(e => `    // Effect: ${e}`).join('\n') || '    // 无额外效果'}
+  }
 };
 \`\`\`
 
@@ -338,11 +691,62 @@ ${action.effects?.map(e => `  // Effect: ${e}`).join('\n') || ''}
 |----------|----------|
 | 数据验证失败 | 返回错误，不更新状态 |
 | 执行异常 | 记录错误，增加 error_count |
+| 上下文读取失败 | 使用默认值或跳过 |
 
 ## Next Actions (Hints)
 
-- 成功时: 由编排器根据状态决定
-- 失败时: 重试或 \`action-abort\`
+- 成功: 由编排器根据 \`ACTION_CATALOG\` 优先级决定
+- 失败: 重试或 \`action-abort\`
+`;
+}
+
+// 生成动作目录
+function generateActionCatalog(config) {
+  const actions = config.autonomous_config.actions;
+  
+  return `# Action Catalog
+
+${config.display_name} 的可用动作目录（声明式）。
+
+## 动作定义
+
+\`\`\`json
+${JSON.stringify(actions.map(a => ({
+  id: a.id,
+  name: a.name,
+  description: a.description,
+  preconditions: a.preconditions || [],
+  effects: a.effects || [],
+  priority: a.priority || 0
+})), null, 2)}
+\`\`\`
+
+## 动作依赖图
+
+\`\`\`mermaid
+graph TD
+${actions.map((a, i) => {
+  const deps = a.preconditions?.filter(p => p.startsWith('completed_actions.includes'))
+    .map(p => p.match(/'([^']+)'/)?.[1])
+    .filter(Boolean) || [];
+  
+  if (deps.length === 0 && i === 0) {
+    return `    START((Start)) --> ${a.id.replace(/-/g, '_')}[${a.name}]`;
+  } else if (deps.length > 0) {
+    return deps.map(d => `    ${d.replace(/-/g, '_')} --> ${a.id.replace(/-/g, '_')}[${a.name}]`).join('\n');
+  }
+  return '';
+}).filter(Boolean).join('\n')}
+    ${actions[actions.length-1]?.id.replace(/-/g, '_') || 'last'} --> END((End))
+\`\`\`
+
+## 选择优先级
+
+| Priority | Action | Description |
+|----------|--------|-------------|
+${actions.sort((a, b) => (b.priority || 0) - (a.priority || 0)).map(a => 
+  `| ${a.priority || 0} | ${a.id} | ${a.description || a.name} |`
+).join('\n')}
 `;
 }
 ```
@@ -366,7 +770,8 @@ function getTerminationCheck(condition) {
   const checks = {
     'user_exit': 'state.status === "user_exit"',
     'error_limit': 'state.error_count >= 3',
-    'task_completed': 'state.status === "completed"'
+    'task_completed': 'state.status === "completed"',
+    'max_iterations': 'iteration >= MAX_ITERATIONS'
   };
   return checks[condition] || `state.${condition}`;
 }
@@ -381,14 +786,15 @@ function getPreconditionCheck(action) {
 
 ### Sequential 模式
 
-- `phases/01-{step}.md`
-- `phases/02-{step}.md`
-- ...
+- `phases/_orchestrator.md` (声明式编排器)
+- `workflow.json` (工作流定义)
+- `phases/01-{step}.md`, `02-{step}.md`, ...
 
 ### Autonomous 模式
 
-- `phases/orchestrator.md`
+- `phases/orchestrator.md` (增强版编排器)
 - `phases/state-schema.md`
+- `specs/action-catalog.md` (声明式动作目录)
 - `phases/actions/action-{name}.md` (多个)
 
 ## Next Phase
diff --git a/.claude/skills/skill-generator/specs/cli-integration.md b/.claude/skills/skill-generator/specs/cli-integration.md
new file mode 100644
index 00000000..0cef186e
--- /dev/null
+++ b/.claude/skills/skill-generator/specs/cli-integration.md
@@ -0,0 +1,448 @@
+# CLI Integration Specification
+
+CCW CLI 集成规范，定义 Skill 中如何正确调用外部 CLI 工具。
+
+---
+
+## 执行模式
+
+### 1. 同步执行 (Blocking)
+
+适用于需要立即获取结果的场景。
+
+```javascript
+// Agent 调用 - 同步
+const result = Task({
+  subagent_type: 'universal-executor',
+  prompt: '执行任务...',
+  run_in_background: false  // 关键: 同步执行
+});
+
+// 结果立即可用
+console.log(result);
+```
+
+### 2. 异步执行 (Background)
+
+适用于长时间运行的 CLI 命令。
+
+```javascript
+// CLI 调用 - 异步
+const task = Bash({
+  command: 'ccw cli -p "..." --tool gemini --mode analysis',
+  run_in_background: true  // 关键: 后台执行
+});
+
+// 立即返回，不等待结果
+// task.task_id 可用于后续查询
+```
+
+---
+
+## CCW CLI 调用规范
+
+### 基础命令结构
+
+```bash
+ccw cli -p "<PROMPT>" --tool <gemini|qwen|codex> --mode <analysis|write>
+```
+
+### 参数说明
+
+| 参数 | 必需 | 说明 |
+|------|------|------|
+| `-p "<prompt>"` | ✓ | 提示词（使用双引号） |
+| `--tool <tool>` | ✓ | 工具选择: gemini, qwen, codex |
+| `--mode <mode>` | ✓ | 执行模式: analysis, write |
+| `--cd <path>` | - | 工作目录 |
+| `--includeDirs <dirs>` | - | 包含额外目录（逗号分隔） |
+| `--resume [id]` | - | 恢复会话 |
+
+### 模式选择
+
+```
+┌─ 分析/文档任务?
+│  └─→ --mode analysis (只读)
+│
+└─ 实现/修改任务?
+   └─→ --mode write (读写)
+```
+
+---
+
+## Agent 类型与选择
+
+### universal-executor
+
+通用执行器，最常用的 Agent 类型。
+
+```javascript
+Task({
+  subagent_type: 'universal-executor',
+  prompt: `
+执行任务:
+1. 读取配置文件
+2. 分析依赖关系
+3. 生成报告到 ${outputPath}
+  `,
+  run_in_background: false
+});
+```
+
+**适用场景**:
+- 多步骤任务执行
+- 文件操作（读/写/编辑）
+- 需要工具调用的任务
+
+### Explore
+
+代码探索 Agent，快速理解代码库。
+
+```javascript
+Task({
+  subagent_type: 'Explore',
+  prompt: `
+探索 src/ 目录:
+- 识别主要模块
+- 理解目录结构
+- 找到入口点
+
+Thoroughness: medium
+  `,
+  run_in_background: false
+});
+```
+
+**适用场景**:
+- 代码库探索
+- 文件发现
+- 结构理解
+
+### cli-explore-agent
+
+深度代码分析 Agent。
+
+```javascript
+Task({
+  subagent_type: 'cli-explore-agent',
+  prompt: `
+深度分析 src/auth/ 模块:
+- 认证流程
+- 会话管理
+- 安全机制
+  `,
+  run_in_background: false
+});
+```
+
+**适用场景**:
+- 深度代码理解
+- 设计模式识别
+- 复杂逻辑分析
+
+---
+
+## 会话管理
+
+### 会话恢复
+
+```javascript
+// 保存会话 ID
+const session = Bash({
+  command: 'ccw cli -p "初始分析..." --tool gemini --mode analysis',
+  run_in_background: true
+});
+
+// 后续恢复
+const continuation = Bash({
+  command: `ccw cli -p "继续分析..." --tool gemini --mode analysis --resume ${session.id}`,
+  run_in_background: true
+});
+```
+
+### 多会话合并
+
+```javascript
+// 合并多个会话的上下文
+const merged = Bash({
+  command: `ccw cli -p "汇总分析..." --tool gemini --mode analysis --resume ${id1},${id2}`,
+  run_in_background: true
+});
+```
+
+---
+
+## Skill 中的 CLI 集成模式
+
+### 模式 1: 单次调用
+
+简单任务，一次调用完成。
+
+```javascript
+// Phase 执行
+async function executePhase(context) {
+  const result = Bash({
+    command: `ccw cli -p "
+PURPOSE: 分析项目结构
+TASK: 识别模块、依赖、入口点
+MODE: analysis
+CONTEXT: @src/**/*
+EXPECTED: JSON 格式的结构报告
+" --tool gemini --mode analysis --cd ${context.projectRoot}`,
+    run_in_background: true,
+    timeout: 600000
+  });
+
+  // 等待完成
+  return await waitForCompletion(result.task_id);
+}
+```
+
+### 模式 2: 链式调用
+
+多步骤任务，每步依赖前一步结果。
+
+```javascript
+async function executeChain(context) {
+  // Step 1: 收集
+  const collectId = await runCLI('collect', context);
+
+  // Step 2: 分析 (依赖 Step 1)
+  const analyzeId = await runCLI('analyze', context, `--resume ${collectId}`);
+
+  // Step 3: 生成 (依赖 Step 2)
+  const generateId = await runCLI('generate', context, `--resume ${analyzeId}`);
+
+  return generateId;
+}
+
+async function runCLI(step, context, resumeFlag = '') {
+  const prompts = {
+    collect: 'PURPOSE: 收集代码文件...',
+    analyze: 'PURPOSE: 分析代码模式...',
+    generate: 'PURPOSE: 生成文档...'
+  };
+
+  const result = Bash({
+    command: `ccw cli -p "${prompts[step]}" --tool gemini --mode analysis ${resumeFlag}`,
+    run_in_background: true
+  });
+
+  return await waitForCompletion(result.task_id);
+}
+```
+
+### 模式 3: 并行调用
+
+独立任务并行执行。
+
+```javascript
+async function executeParallel(context) {
+  const tasks = [
+    { type: 'structure', tool: 'gemini' },
+    { type: 'dependencies', tool: 'gemini' },
+    { type: 'patterns', tool: 'qwen' }
+  ];
+
+  // 并行启动
+  const taskIds = tasks.map(task =>
+    Bash({
+      command: `ccw cli -p "分析 ${task.type}..." --tool ${task.tool} --mode analysis`,
+      run_in_background: true
+    }).task_id
+  );
+
+  // 等待全部完成
+  const results = await Promise.all(
+    taskIds.map(id => waitForCompletion(id))
+  );
+
+  return results;
+}
+```
+
+### 模式 4: Fallback 链
+
+工具失败时自动切换。
+
+```javascript
+async function executeWithFallback(context) {
+  const tools = ['gemini', 'qwen', 'codex'];
+  let result = null;
+
+  for (const tool of tools) {
+    try {
+      result = await runWithTool(tool, context);
+      if (result.success) break;
+    } catch (error) {
+      console.log(`${tool} failed, trying next...`);
+    }
+  }
+
+  if (!result?.success) {
+    throw new Error('All tools failed');
+  }
+
+  return result;
+}
+
+async function runWithTool(tool, context) {
+  const task = Bash({
+    command: `ccw cli -p "..." --tool ${tool} --mode analysis`,
+    run_in_background: true,
+    timeout: 600000
+  });
+
+  return await waitForCompletion(task.task_id);
+}
+```
+
+---
+
+## 提示词模板集成
+
+### 引用协议模板
+
+```bash
+# 分析模式 - 必须引用 analysis-protocol.md
+ccw cli -p "
+RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md)
+$(cat ~/.claude/workflows/cli-templates/prompts/analysis/02-analyze-code-patterns.txt)
+..." --tool gemini --mode analysis
+
+# 写入模式 - 必须引用 write-protocol.md
+ccw cli -p "
+RULES: $(cat ~/.claude/workflows/cli-templates/protocols/write-protocol.md)
+$(cat ~/.claude/workflows/cli-templates/prompts/development/02-implement-feature.txt)
+..." --tool codex --mode write
+```
+
+### 动态模板构建
+
+```javascript
+function buildPrompt(config) {
+  const { purpose, task, mode, context, expected, template } = config;
+
+  const protocolPath = mode === 'write'
+    ? '~/.claude/workflows/cli-templates/protocols/write-protocol.md'
+    : '~/.claude/workflows/cli-templates/protocols/analysis-protocol.md';
+
+  return `
+PURPOSE: ${purpose}
+TASK: ${task.map(t => `• ${t}`).join('\n')}
+MODE: ${mode}
+CONTEXT: ${context}
+EXPECTED: ${expected}
+RULES: $(cat ${protocolPath}) $(cat ${template})
+`;
+}
+```
+
+---
+
+## 超时配置
+
+### 推荐超时值
+
+| 任务类型 | 超时 (ms) | 说明 |
+|---------|----------|------|
+| 快速分析 | 300000 | 5 分钟 |
+| 标准分析 | 600000 | 10 分钟 |
+| 深度分析 | 1200000 | 20 分钟 |
+| 代码生成 | 1800000 | 30 分钟 |
+| 复杂任务 | 3600000 | 60 分钟 |
+
+### Codex 特殊处理
+
+Codex 需要更长的超时时间（建议 3x）。
+
+```javascript
+const timeout = tool === 'codex' ? baseTimeout * 3 : baseTimeout;
+
+Bash({
+  command: `ccw cli -p "..." --tool ${tool} --mode write`,
+  run_in_background: true,
+  timeout: timeout
+});
+```
+
+---
+
+## 错误处理
+
+### 常见错误
+
+| 错误 | 原因 | 处理 |
+|------|------|------|
+| ETIMEDOUT | 网络超时 | 重试或切换工具 |
+| Exit code 1 | 命令执行失败 | 检查参数，切换工具 |
+| Context overflow | 上下文过大 | 减少输入范围 |
+
+### 重试策略
+
+```javascript
+async function executeWithRetry(command, maxRetries = 3) {
+  let lastError = null;
+
+  for (let attempt = 1; attempt <= maxRetries; attempt++) {
+    try {
+      const task = Bash({
+        command,
+        run_in_background: true,
+        timeout: 600000
+      });
+
+      return await waitForCompletion(task.task_id);
+    } catch (error) {
+      lastError = error;
+      console.log(`Attempt ${attempt} failed: ${error.message}`);
+
+      // 指数退避
+      if (attempt < maxRetries) {
+        await sleep(Math.pow(2, attempt) * 1000);
+      }
+    }
+  }
+
+  throw lastError;
+}
+```
+
+---
+
+## 最佳实践
+
+### 1. run_in_background 规则
+
+```
+Agent 调用 (Task):
+  run_in_background: false  → 同步，立即获取结果
+
+CLI 调用 (Bash + ccw cli):
+  run_in_background: true   → 异步，后台执行
+```
+
+### 2. 工具选择
+
+```
+分析任务: gemini > qwen
+生成任务: codex > gemini > qwen
+代码修改: codex > gemini
+```
+
+### 3. 会话管理
+
+- 相关任务使用 `--resume` 保持上下文
+- 独立任务不使用 `--resume`
+
+### 4. 提示词规范
+
+- 始终使用 PURPOSE/TASK/MODE/CONTEXT/EXPECTED/RULES 结构
+- 必须引用协议模板（analysis-protocol 或 write-protocol）
+- 使用 `$(cat ...)` 动态加载模板
+
+### 5. 结果处理
+
+- 持久化重要结果到 workDir
+- Brief returns: 路径 + 摘要，避免上下文溢出
+- JSON 格式便于后续处理
diff --git a/.claude/skills/skill-generator/specs/skill-requirements.md b/.claude/skills/skill-generator/specs/skill-requirements.md
index c37573fd..5b35510e 100644
--- a/.claude/skills/skill-generator/specs/skill-requirements.md
+++ b/.claude/skills/skill-generator/specs/skill-requirements.md
@@ -19,10 +19,32 @@
 
 | 字段 | 类型 | 必需 | 说明 |
 |------|------|------|------|
-| `execution_mode` | enum | ✓ | `sequential` \| `autonomous` |
+| `execution_mode` | enum | ✓ | `sequential` \| `autonomous` \| `hybrid` |
 | `phase_count` | number | 条件 | Sequential 模式下的阶段数 |
 | `action_count` | number | 条件 | Autonomous 模式下的动作数 |
 
+### 2.5 上下文策略 (P0 增强)
+
+| 字段 | 类型 | 必需 | 说明 |
+|------|------|------|------|
+| `context_strategy` | enum | ✓ | `file` \| `memory` |
+
+**策略对比**:
+
+| 策略 | 持久化 | 可调试 | 可恢复 | 适用场景 |
+|------|--------|--------|--------|----------|
+| `file` | ✓ | ✓ | ✓ | 复杂多阶段任务（推荐） |
+| `memory` | ✗ | ✗ | ✗ | 简单线性任务 |
+
+### 2.6 LLM 集成配置 (P1 增强)
+
+| 字段 | 类型 | 必需 | 说明 |
+|------|------|------|------|
+| `llm_integration` | object | 可选 | LLM 调用配置 |
+| `llm_integration.enabled` | boolean | - | 是否启用 LLM 调用 |
+| `llm_integration.default_tool` | enum | - | `gemini` \| `qwen` \| `codex` |
+| `llm_integration.fallback_chain` | string[] | - | 失败时的备选工具链 |
+
 ### 3. 工具依赖
 
 | 字段 | 类型 | 必需 | 说明 |
@@ -50,8 +72,19 @@ interface SkillConfig {
   triggers: string[];           // ["keyword1", "keyword2"]
   
   // 执行模式
-  execution_mode: 'sequential' | 'autonomous';
-  
+  execution_mode: 'sequential' | 'autonomous' | 'hybrid';
+
+  // 上下文策略 (P0 增强)
+  context_strategy: 'file' | 'memory';  // 默认: 'file'
+
+  // LLM 集成配置 (P1 增强)
+  llm_integration?: {
+    enabled: boolean;                    // 是否启用 LLM 调用
+    default_tool: 'gemini' | 'qwen' | 'codex';
+    fallback_chain: string[];            // ['gemini', 'qwen', 'codex']
+    mode: 'analysis' | 'write';          // 默认 mode
+  };
+
   // Sequential 模式配置
   sequential_config?: {
     phases: Array<{
@@ -211,7 +244,73 @@ AskUserQuestion({
 });
 ```
 
-### Phase 4: 工具依赖
+### Phase 4: 上下文策略 (P0 增强)
+
+```javascript
+AskUserQuestion({
+  questions: [
+    {
+      question: "选择上下文管理策略：",
+      header: "上下文策略",
+      multiSelect: false,
+      options: [
+        {
+          label: "文件策略 (file)",
+          description: "持久化到 .scratchpad，支持调试和恢复（推荐）"
+        },
+        {
+          label: "内存策略 (memory)",
+          description: "仅在运行时保持，速度快但无法恢复"
+        }
+      ]
+    }
+  ]
+});
+```
+
+### Phase 5: LLM 集成 (P1 增强)
+
+```javascript
+AskUserQuestion({
+  questions: [
+    {
+      question: "是否需要 LLM 调用能力？",
+      header: "LLM 集成",
+      multiSelect: false,
+      options: [
+        {
+          label: "启用 LLM 调用",
+          description: "使用 gemini/qwen/codex 进行分析或生成"
+        },
+        {
+          label: "不需要",
+          description: "仅使用本地工具"
+        }
+      ]
+    }
+  ]
+});
+
+// 如果启用 LLM
+if (llmEnabled) {
+  AskUserQuestion({
+    questions: [
+      {
+        question: "选择默认 LLM 工具：",
+        header: "LLM 工具",
+        multiSelect: false,
+        options: [
+          { label: "Gemini", description: "大上下文，适合分析任务（推荐）" },
+          { label: "Qwen", description: "代码生成能力强" },
+          { label: "Codex", description: "自主执行能力强，适合实现任务" }
+        ]
+      }
+    ]
+  });
+}
+```
+
+### Phase 6: 工具依赖
 
 ```javascript
 AskUserQuestion({
@@ -224,7 +323,8 @@ AskUserQuestion({
         { label: "基础工具", description: "Task, Read, Write, Glob, Grep, Bash" },
         { label: "用户交互", description: "AskUserQuestion" },
         { label: "Chrome 截图", description: "mcp__chrome__*" },
-        { label: "外部搜索", description: "mcp__exa__search" }
+        { label: "外部搜索", description: "mcp__exa__search" },
+        { label: "CCW CLI 调用", description: "ccw cli (gemini/qwen/codex)" }
       ]
     }
   ]
@@ -285,7 +385,7 @@ function validateSkillConfig(config) {
 
 ## 示例配置
 
-### Sequential 模式示例
+### Sequential 模式示例 (增强版)
 
 ```json
 {
@@ -294,11 +394,33 @@ function validateSkillConfig(config) {
   "description": "Generate API documentation from source code",
   "triggers": ["generate api docs", "api documentation"],
   "execution_mode": "sequential",
+  "context_strategy": "file",
+  "llm_integration": {
+    "enabled": true,
+    "default_tool": "gemini",
+    "fallback_chain": ["gemini", "qwen"],
+    "mode": "analysis"
+  },
   "sequential_config": {
     "phases": [
-      { "id": "01-scan", "name": "Code Scanning", "output": "endpoints.json" },
-      { "id": "02-parse", "name": "Schema Parsing", "output": "schemas.json" },
-      { "id": "03-generate", "name": "Doc Generation", "output": "api-docs.md" }
+      {
+        "id": "01-scan",
+        "name": "Code Scanning",
+        "output": "endpoints.json",
+        "agent": { "type": "universal-executor", "run_in_background": false }
+      },
+      {
+        "id": "02-analyze",
+        "name": "LLM Analysis",
+        "output": "analysis.json",
+        "agent": { "type": "llm", "tool": "gemini", "mode": "analysis" }
+      },
+      {
+        "id": "03-generate",
+        "name": "Doc Generation",
+        "output": "api-docs.md",
+        "agent": { "type": "universal-executor", "run_in_background": false }
+      }
     ]
   },
   "allowed_tools": ["Task", "Read", "Write", "Glob", "Grep", "Bash"],
diff --git a/.claude/skills/skill-generator/templates/code-analysis-action.md b/.claude/skills/skill-generator/templates/code-analysis-action.md
new file mode 100644
index 00000000..c0233186
--- /dev/null
+++ b/.claude/skills/skill-generator/templates/code-analysis-action.md
@@ -0,0 +1,503 @@
+# Code Analysis Action Template
+
+代码分析动作模板，用于在 Skill 中集成代码探索和分析能力。
+
+---
+
+## 配置结构
+
+```typescript
+interface CodeAnalysisActionConfig {
+  id: string;                    // "analyze-structure", "explore-patterns"
+  name: string;                  // "Code Structure Analysis"
+  type: 'code-analysis';         // 动作类型标识
+
+  // 分析范围
+  scope: {
+    paths: string[];             // 目标路径
+    patterns: string[];          // Glob 模式
+    excludes?: string[];         // 排除模式
+  };
+
+  // 分析类型
+  analysis_type: 'structure' | 'patterns' | 'dependencies' | 'quality' | 'security';
+
+  // Agent 配置
+  agent: {
+    type: 'Explore' | 'cli-explore-agent' | 'universal-executor';
+    thoroughness: 'quick' | 'medium' | 'very thorough';
+  };
+
+  // 输出配置
+  output: {
+    format: 'json' | 'markdown';
+    file: string;
+  };
+
+  // MCP 工具增强
+  mcp_tools?: string[];          // ['mcp__ace-tool__search_context']
+}
+```
+
+---
+
+## 模板生成函数
+
+```javascript
+function generateCodeAnalysisAction(config) {
+  const { id, name, scope, analysis_type, agent, output, mcp_tools = [] } = config;
+
+  return `
+# ${name}
+
+## Action: ${id}
+
+### 分析范围
+
+- **路径**: ${scope.paths.join(', ')}
+- **模式**: ${scope.patterns.join(', ')}
+${scope.excludes ? `- **排除**: ${scope.excludes.join(', ')}` : ''}
+
+### 执行逻辑
+
+\`\`\`javascript
+async function execute${toPascalCase(id)}(context) {
+  const workDir = context.workDir;
+  const results = [];
+
+  // 1. 文件发现
+  const files = await discoverFiles({
+    paths: ${JSON.stringify(scope.paths)},
+    patterns: ${JSON.stringify(scope.patterns)},
+    excludes: ${JSON.stringify(scope.excludes || [])}
+  });
+
+  console.log(\`Found \${files.length} files to analyze\`);
+
+  // 2. 使用 MCP 工具进行语义搜索（如果配置）
+  ${mcp_tools.length > 0 ? `
+  const semanticResults = await mcp__ace_tool__search_context({
+    project_root_path: context.projectRoot,
+    query: '${getQueryForAnalysisType(analysis_type)}'
+  });
+  results.push({ type: 'semantic', data: semanticResults });
+  ` : '// No MCP tools configured'}
+
+  // 3. 启动 Agent 进行深度分析
+  const agentResult = await Task({
+    subagent_type: '${agent.type}',
+    prompt: \`
+${generateAgentPrompt(analysis_type, scope)}
+    \`,
+    run_in_background: false
+  });
+
+  results.push({ type: 'agent', data: agentResult });
+
+  // 4. 汇总结果
+  const summary = aggregateResults(results);
+
+  // 5. 输出结果
+  const outputPath = \`\${workDir}/${output.file}\`;
+  ${output.format === 'json'
+    ? `Write(outputPath, JSON.stringify(summary, null, 2));`
+    : `Write(outputPath, formatAsMarkdown(summary));`}
+
+  return {
+    success: true,
+    output: '${output.file}',
+    files_analyzed: files.length,
+    analysis_type: '${analysis_type}'
+  };
+}
+\`\`\`
+`;
+}
+
+function getQueryForAnalysisType(type) {
+  const queries = {
+    structure: 'main entry points, module organization, exports',
+    patterns: 'design patterns, abstractions, reusable components',
+    dependencies: 'imports, external dependencies, coupling',
+    quality: 'code complexity, test coverage, documentation',
+    security: 'authentication, authorization, input validation, secrets'
+  };
+  return queries[type] || queries.structure;
+}
+
+function generateAgentPrompt(type, scope) {
+  const prompts = {
+    structure: `分析以下路径的代码结构:
+${scope.paths.map(p => `- ${p}`).join('\\n')}
+
+任务:
+1. 识别主要模块和入口点
+2. 分析目录组织结构
+3. 提取模块间的导入导出关系
+4. 生成结构概览图 (Mermaid)
+
+输出格式: JSON
+{
+  "modules": [...],
+  "entry_points": [...],
+  "structure_diagram": "mermaid code"
+}`,
+
+    patterns: `分析以下路径的设计模式:
+${scope.paths.map(p => `- ${p}`).join('\\n')}
+
+任务:
+1. 识别使用的设计模式 (Factory, Strategy, Observer 等)
+2. 分析抽象层级
+3. 评估模式使用的恰当性
+4. 提取可复用的模式实例
+
+输出格式: JSON
+{
+  "patterns": [{ "name": "...", "location": "...", "usage": "..." }],
+  "abstractions": [...],
+  "reusable_components": [...]
+}`,
+
+    dependencies: `分析以下路径的依赖关系:
+${scope.paths.map(p => `- ${p}`).join('\\n')}
+
+任务:
+1. 提取内部模块依赖
+2. 识别外部包依赖
+3. 分析耦合度
+4. 检测循环依赖
+
+输出格式: JSON
+{
+  "internal_deps": [...],
+  "external_deps": [...],
+  "coupling_score": 0-100,
+  "circular_deps": [...]
+}`,
+
+    quality: `分析以下路径的代码质量:
+${scope.paths.map(p => `- ${p}`).join('\\n')}
+
+任务:
+1. 评估代码复杂度
+2. 检查测试覆盖率
+3. 分析文档完整性
+4. 识别技术债务
+
+输出格式: JSON
+{
+  "complexity": { "avg": 0, "max": 0, "hotspots": [...] },
+  "test_coverage": { "percentage": 0, "gaps": [...] },
+  "documentation": { "score": 0, "missing": [...] },
+  "tech_debt": [...]
+}`,
+
+    security: `分析以下路径的安全性:
+${scope.paths.map(p => `- ${p}`).join('\\n')}
+
+任务:
+1. 检查认证授权实现
+2. 分析输入验证
+3. 检测敏感数据处理
+4. 识别常见漏洞模式
+
+输出格式: JSON
+{
+  "auth": { "methods": [...], "issues": [...] },
+  "input_validation": { "coverage": 0, "gaps": [...] },
+  "sensitive_data": { "found": [...], "protected": true/false },
+  "vulnerabilities": [{ "type": "...", "severity": "...", "location": "..." }]
+}`
+  };
+
+  return prompts[type] || prompts.structure;
+}
+```
+
+---
+
+## 预置代码分析动作
+
+### 1. 项目结构分析
+
+```yaml
+id: analyze-project-structure
+name: Project Structure Analysis
+type: code-analysis
+scope:
+  paths:
+    - src/
+  patterns:
+    - "**/*.ts"
+    - "**/*.js"
+  excludes:
+    - "**/node_modules/**"
+    - "**/*.test.*"
+analysis_type: structure
+agent:
+  type: Explore
+  thoroughness: medium
+output:
+  format: json
+  file: structure-analysis.json
+mcp_tools:
+  - mcp__ace-tool__search_context
+```
+
+### 2. 设计模式提取
+
+```yaml
+id: extract-design-patterns
+name: Design Pattern Extraction
+type: code-analysis
+scope:
+  paths:
+    - src/
+  patterns:
+    - "**/*.ts"
+analysis_type: patterns
+agent:
+  type: cli-explore-agent
+  thoroughness: very thorough
+output:
+  format: markdown
+  file: patterns-report.md
+```
+
+### 3. 依赖关系分析
+
+```yaml
+id: analyze-dependencies
+name: Dependency Analysis
+type: code-analysis
+scope:
+  paths:
+    - src/
+    - packages/
+  patterns:
+    - "**/package.json"
+    - "**/*.ts"
+analysis_type: dependencies
+agent:
+  type: Explore
+  thoroughness: medium
+output:
+  format: json
+  file: dependency-graph.json
+```
+
+### 4. 安全审计
+
+```yaml
+id: security-audit
+name: Security Audit
+type: code-analysis
+scope:
+  paths:
+    - src/auth/
+    - src/api/
+  patterns:
+    - "**/*.ts"
+analysis_type: security
+agent:
+  type: universal-executor
+  thoroughness: very thorough
+output:
+  format: json
+  file: security-report.json
+mcp_tools:
+  - mcp__ace-tool__search_context
+```
+
+---
+
+## 使用示例
+
+### 在 Phase 中使用
+
+```javascript
+// phases/01-code-exploration.md
+
+const analysisConfig = {
+  id: 'explore-skill-structure',
+  name: 'Skill Structure Exploration',
+  type: 'code-analysis',
+  scope: {
+    paths: ['D:\\Claude_dms3\\.claude\\skills\\software-manual'],
+    patterns: ['**/*.md'],
+    excludes: ['**/node_modules/**']
+  },
+  analysis_type: 'structure',
+  agent: {
+    type: 'Explore',
+    thoroughness: 'medium'
+  },
+  output: {
+    format: 'json',
+    file: 'skill-structure.json'
+  }
+};
+
+// 执行
+const result = await executeCodeAnalysis(analysisConfig, context);
+```
+
+### 组合多种分析
+
+```javascript
+// 串行执行多种分析
+const analyses = [
+  { type: 'structure', file: 'structure.json' },
+  { type: 'patterns', file: 'patterns.json' },
+  { type: 'dependencies', file: 'deps.json' }
+];
+
+for (const analysis of analyses) {
+  await executeCodeAnalysis({
+    ...baseConfig,
+    analysis_type: analysis.type,
+    output: { format: 'json', file: analysis.file }
+  }, context);
+}
+
+// 并行执行（独立分析）
+const parallelResults = await Promise.all(
+  analyses.map(a => executeCodeAnalysis({
+    ...baseConfig,
+    analysis_type: a.type,
+    output: { format: 'json', file: a.file }
+  }, context))
+);
+```
+
+---
+
+## Agent 选择指南
+
+| 分析类型 | 推荐 Agent | Thoroughness | 原因 |
+|---------|-----------|--------------|------|
+| structure | Explore | medium | 快速获取目录结构 |
+| patterns | cli-explore-agent | very thorough | 需要深度代码理解 |
+| dependencies | Explore | medium | 主要分析 import 语句 |
+| quality | universal-executor | medium | 需要运行分析工具 |
+| security | universal-executor | very thorough | 需要全面扫描 |
+
+---
+
+## MCP 工具集成
+
+### 语义搜索增强
+
+```javascript
+// 使用 ACE 工具进行语义搜索
+const semanticContext = await mcp__ace_tool__search_context({
+  project_root_path: projectRoot,
+  query: 'authentication logic, user session management'
+});
+
+// 将语义搜索结果作为 Agent 的输入上下文
+const agentResult = await Task({
+  subagent_type: 'Explore',
+  prompt: `
+基于以下语义搜索结果进行深度分析:
+
+${semanticContext}
+
+任务: 分析认证逻辑的实现细节...
+  `,
+  run_in_background: false
+});
+```
+
+### smart_search 集成
+
+```javascript
+// 使用 smart_search 进行精确搜索
+const exactMatches = await mcp__ccw_tools__smart_search({
+  action: 'search',
+  query: 'class.*Controller',
+  mode: 'ripgrep',
+  path: 'src/'
+});
+
+// 使用 find_files 发现文件
+const configFiles = await mcp__ccw_tools__smart_search({
+  action: 'find_files',
+  pattern: '**/*.config.ts',
+  path: 'src/'
+});
+```
+
+---
+
+## 结果聚合
+
+```javascript
+function aggregateResults(results) {
+  const aggregated = {
+    timestamp: new Date().toISOString(),
+    sources: [],
+    summary: {},
+    details: []
+  };
+
+  for (const result of results) {
+    aggregated.sources.push(result.type);
+
+    if (result.type === 'semantic') {
+      aggregated.summary.semantic_matches = result.data.length;
+      aggregated.details.push({
+        source: 'semantic',
+        data: result.data.slice(0, 10)  // Top 10
+      });
+    }
+
+    if (result.type === 'agent') {
+      aggregated.summary.agent_findings = extractKeyFindings(result.data);
+      aggregated.details.push({
+        source: 'agent',
+        data: result.data
+      });
+    }
+  }
+
+  return aggregated;
+}
+
+function extractKeyFindings(agentResult) {
+  // 从 Agent 结果中提取关键发现
+  // 实现取决于 Agent 的输出格式
+  return {
+    modules: agentResult.modules?.length || 0,
+    patterns: agentResult.patterns?.length || 0,
+    issues: agentResult.issues?.length || 0
+  };
+}
+```
+
+---
+
+## 最佳实践
+
+1. **范围控制**
+   - 使用精确的 patterns 减少分析范围
+   - 配置 excludes 排除无关文件
+
+2. **Agent 选择**
+   - 快速探索用 Explore
+   - 深度分析用 cli-explore-agent
+   - 需要执行操作用 universal-executor
+
+3. **MCP 工具组合**
+   - 先用 mcp__ace-tool__search_context 获取语义上下文
+   - 再用 Agent 进行深度分析
+   - 最后用 smart_search 补充精确匹配
+
+4. **结果缓存**
+   - 将分析结果持久化到 workDir
+   - 后续阶段可直接读取，避免重复分析
+
+5. **Brief Returns**
+   - Agent 返回路径 + 摘要，而非完整内容
+   - 避免上下文溢出
diff --git a/.claude/skills/skill-generator/templates/llm-action.md b/.claude/skills/skill-generator/templates/llm-action.md
new file mode 100644
index 00000000..f003a618
--- /dev/null
+++ b/.claude/skills/skill-generator/templates/llm-action.md
@@ -0,0 +1,355 @@
+# LLM Action Template
+
+LLM 动作模板，用于在 Skill 中集成 LLM 调用能力。
+
+---
+
+## 配置结构
+
+```typescript
+interface LLMActionConfig {
+  id: string;                    // "llm-analyze", "llm-generate"
+  name: string;                  // "LLM Analysis"
+  type: 'llm';                   // 动作类型标识
+
+  // LLM 工具配置
+  tool: {
+    primary: 'gemini' | 'qwen' | 'codex';
+    fallback_chain: string[];    // ['gemini', 'qwen', 'codex']
+  };
+
+  // 执行模式
+  mode: 'analysis' | 'write';
+
+  // 提示词配置
+  prompt: {
+    template: string;            // 提示词模板路径或内联
+    variables: string[];         // 需要替换的变量
+  };
+
+  // 输入输出
+  input: string[];               // 依赖的上下文文件
+  output: string;                // 输出文件路径
+
+  // 超时配置
+  timeout?: number;              // 毫秒，默认 600000 (10min)
+}
+```
+
+---
+
+## 模板生成函数
+
+```javascript
+function generateLLMAction(config) {
+  const { id, name, tool, mode, prompt, input, output, timeout = 600000 } = config;
+
+  return `
+# ${name}
+
+## Action: ${id}
+
+### 执行逻辑
+
+\`\`\`javascript
+async function execute${toPascalCase(id)}(context) {
+  const workDir = context.workDir;
+  const state = context.state;
+
+  // 1. 收集输入上下文
+  const inputContext = ${JSON.stringify(input)}.map(f => {
+    const path = \`\${workDir}/\${f}\`;
+    return Read(path);
+  }).join('\\n\\n---\\n\\n');
+
+  // 2. 构建提示词
+  const promptTemplate = \`${prompt.template}\`;
+  const finalPrompt = promptTemplate
+    ${prompt.variables.map(v => `.replace('{{${v}}}', context.${v} || '')`).join('\n    ')};
+
+  // 3. 执行 LLM 调用 (带 fallback)
+  const tools = ['${tool.primary}', ${tool.fallback_chain.map(t => `'${t}'`).join(', ')}];
+  let result = null;
+  let usedTool = null;
+
+  for (const t of tools) {
+    try {
+      result = await callLLM(t, finalPrompt, '${mode}', ${timeout});
+      usedTool = t;
+      break;
+    } catch (error) {
+      console.log(\`\${t} failed: \${error.message}, trying next...\`);
+    }
+  }
+
+  if (!result) {
+    throw new Error('All LLM tools failed');
+  }
+
+  // 4. 保存结果
+  Write(\`\${workDir}/${output}\`, result);
+
+  // 5. 更新状态
+  state.llm_calls = (state.llm_calls || 0) + 1;
+  state.last_llm_tool = usedTool;
+
+  return {
+    success: true,
+    output: '${output}',
+    tool_used: usedTool
+  };
+}
+
+// LLM 调用封装
+async function callLLM(tool, prompt, mode, timeout) {
+  const modeFlag = mode === 'write' ? '--mode write' : '--mode analysis';
+
+  // 使用 CCW CLI 统一接口
+  const command = \`ccw cli -p "\${escapePrompt(prompt)}" --tool \${tool} \${modeFlag}\`;
+
+  const result = Bash({
+    command,
+    timeout,
+    run_in_background: true  // 异步执行
+  });
+
+  // 等待完成
+  return await waitForResult(result.task_id, timeout);
+}
+
+function escapePrompt(prompt) {
+  // 转义双引号和特殊字符
+  return prompt.replace(/"/g, '\\\\"').replace(/\$/g, '\\\\$');
+}
+\`\`\`
+
+### Prompt 模板
+
+\`\`\`
+${prompt.template}
+\`\`\`
+
+### 变量说明
+
+${prompt.variables.map(v => `- \`{{${v}}}\`: ${v} 变量`).join('\n')}
+`;
+}
+
+function toPascalCase(str) {
+  return str.split('-').map(s => s.charAt(0).toUpperCase() + s.slice(1)).join('');
+}
+```
+
+---
+
+## 预置 LLM 动作模板
+
+### 1. 代码分析动作
+
+```yaml
+id: llm-code-analysis
+name: LLM Code Analysis
+type: llm
+tool:
+  primary: gemini
+  fallback_chain: [qwen]
+mode: analysis
+prompt:
+  template: |
+    PURPOSE: 分析代码结构和模式，提取关键设计特征
+    TASK:
+    • 识别主要模块和组件
+    • 分析依赖关系
+    • 提取设计模式
+    • 评估代码质量
+    MODE: analysis
+    CONTEXT: {{code_context}}
+    EXPECTED: JSON 格式的分析报告，包含 modules, dependencies, patterns, quality_score
+    RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md)
+  variables:
+    - code_context
+input:
+  - collected-code.md
+output: analysis-report.json
+timeout: 900000
+```
+
+### 2. 文档生成动作
+
+```yaml
+id: llm-doc-generation
+name: LLM Documentation Generation
+type: llm
+tool:
+  primary: gemini
+  fallback_chain: [qwen, codex]
+mode: write
+prompt:
+  template: |
+    PURPOSE: 根据分析结果生成高质量文档
+    TASK:
+    • 基于分析报告生成文档大纲
+    • 填充各章节内容
+    • 添加代码示例和说明
+    • 生成 Mermaid 图表
+    MODE: write
+    CONTEXT: {{analysis_report}}
+    EXPECTED: 完整的 Markdown 文档，包含目录、章节、图表
+    RULES: $(cat ~/.claude/workflows/cli-templates/protocols/write-protocol.md)
+  variables:
+    - analysis_report
+input:
+  - analysis-report.json
+output: generated-doc.md
+timeout: 1200000
+```
+
+### 3. 代码重构建议动作
+
+```yaml
+id: llm-refactor-suggest
+name: LLM Refactoring Suggestions
+type: llm
+tool:
+  primary: codex
+  fallback_chain: [gemini]
+mode: analysis
+prompt:
+  template: |
+    PURPOSE: 分析代码并提供重构建议
+    TASK:
+    • 识别代码异味 (code smells)
+    • 评估复杂度热点
+    • 提出具体重构方案
+    • 估算重构影响范围
+    MODE: analysis
+    CONTEXT: {{source_code}}
+    EXPECTED: 重构建议列表，每项包含 location, issue, suggestion, impact
+    RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md)
+  variables:
+    - source_code
+input:
+  - source-files.md
+output: refactor-suggestions.json
+timeout: 600000
+```
+
+---
+
+## 使用示例
+
+### 在 Phase 中使用 LLM 动作
+
+```javascript
+// phases/02-llm-analysis.md
+
+const llmConfig = {
+  id: 'llm-analyze-skill',
+  name: 'Skill Pattern Analysis',
+  type: 'llm',
+  tool: {
+    primary: 'gemini',
+    fallback_chain: ['qwen']
+  },
+  mode: 'analysis',
+  prompt: {
+    template: `
+PURPOSE: 分析现有 Skill 的设计模式
+TASK:
+• 提取 Skill 结构规范
+• 识别 Phase 组织模式
+• 分析 Agent 调用模式
+MODE: analysis
+CONTEXT: {{skill_source}}
+EXPECTED: 结构化的设计模式分析
+`,
+    variables: ['skill_source']
+  },
+  input: ['collected-skills.md'],
+  output: 'skill-patterns.json'
+};
+
+// 执行
+const result = await executeLLMAction(llmConfig, {
+  workDir: '.workflow/.scratchpad/skill-gen-xxx',
+  skill_source: Read('.workflow/.scratchpad/skill-gen-xxx/collected-skills.md')
+});
+```
+
+### 在 Orchestrator 中调度 LLM 动作
+
+```javascript
+// autonomous-orchestrator 中的 LLM 动作调度
+
+const actions = [
+  { type: 'collect', priority: 100 },
+  { type: 'llm', id: 'llm-analyze', priority: 90 },  // LLM 分析
+  { type: 'process', priority: 80 },
+  { type: 'llm', id: 'llm-generate', priority: 70 }, // LLM 生成
+  { type: 'validate', priority: 60 }
+];
+
+for (const action of sortByPriority(actions)) {
+  if (action.type === 'llm') {
+    const llmResult = await executeLLMAction(
+      getLLMConfig(action.id),
+      context
+    );
+    context.state[action.id] = llmResult;
+  }
+}
+```
+
+---
+
+## 错误处理
+
+```javascript
+async function executeLLMActionWithRetry(config, context, maxRetries = 3) {
+  let lastError = null;
+
+  for (let attempt = 1; attempt <= maxRetries; attempt++) {
+    try {
+      return await executeLLMAction(config, context);
+    } catch (error) {
+      lastError = error;
+      console.log(`Attempt ${attempt} failed: ${error.message}`);
+
+      // 指数退避
+      if (attempt < maxRetries) {
+        await sleep(Math.pow(2, attempt) * 1000);
+      }
+    }
+  }
+
+  // 所有重试失败
+  return {
+    success: false,
+    error: lastError.message,
+    fallback: 'manual_review_required'
+  };
+}
+```
+
+---
+
+## 最佳实践
+
+1. **选择合适的工具**
+   - 分析任务：Gemini（大上下文）> Qwen
+   - 生成任务：Codex（自主执行）> Gemini > Qwen
+   - 代码修改：Codex > Gemini
+
+2. **配置 Fallback Chain**
+   - 总是配置至少一个 fallback
+   - 考虑工具特性选择 fallback 顺序
+
+3. **超时设置**
+   - 分析任务：10-15 分钟
+   - 生成任务：15-20 分钟
+   - 复杂任务：20-60 分钟
+
+4. **提示词设计**
+   - 使用 PURPOSE/TASK/MODE/CONTEXT/EXPECTED/RULES 结构
+   - 引用标准协议模板
+   - 明确输出格式要求
diff --git a/benchmark_search.py b/benchmark_search.py
new file mode 100644
index 00000000..9afdec07
--- /dev/null
+++ b/benchmark_search.py
@@ -0,0 +1,330 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Multi-dimensional search benchmark: Compare search methods across multiple queries.
+
+Dimensions:
+1. Speed (time_ms)
+2. Result Quality (relevance score distribution)
+3. Ranking Stability (position changes vs baseline)
+4. Coverage (unique files found)
+"""
+import subprocess
+import sys
+import os
+import re
+import json
+import time
+import io
+
+# Fix Windows console encoding
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
+sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+
+os.chdir(r"D:\dongdiankaifa9\hydro_generator_module")
+
+# Test queries covering different search intents
+TEST_QUERIES = [
+    ("热网络计算", "Chinese: thermal network calculation"),
+    ("ThermalResistance", "Code identifier"),
+    ("boundary condition handling", "Natural language"),
+    ("stator slot cooling", "Domain-specific"),
+    ("def build", "Code pattern"),
+]
+
+# Search methods to compare
+SEARCH_METHODS = [
+    ("hybrid", None, "Hybrid (FTS+Vector RRF)"),
+    ("vector", None, "Pure Vector"),
+    ("cascade", "binary", "Cascade Binary"),
+    ("cascade", "hybrid", "Cascade Hybrid (Cross-Encoder)"),
+]
+
+ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
+
+
+@dataclass
+class SearchResult:
+    method: str
+    strategy: Optional[str]
+    query: str
+    time_ms: float
+    count: int
+    top_files: List[str]
+    top_scores: List[float]
+    success: bool
+    error: Optional[str] = None
+
+
+def run_search(query: str, method: str, strategy: Optional[str] = None, limit: int = 10) -> SearchResult:
+    """Run a search and return structured result."""
+    cmd = [sys.executable, "-m", "codexlens", "search", query,
+           "--method", method, "--limit", str(limit), "--json"]
+
+    if strategy and method == "cascade":
+        cmd.extend(["--cascade-strategy", strategy])
+
+    start = time.perf_counter()
+    result = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8")
+    elapsed = (time.perf_counter() - start) * 1000
+
+    # Strip ANSI codes
+    output = ansi_escape.sub('', result.stdout + result.stderr)
+
+    # Parse JSON
+    start_idx = output.find('{')
+    if start_idx < 0:
+        return SearchResult(
+            method=method, strategy=strategy, query=query,
+            time_ms=elapsed, count=0, top_files=[], top_scores=[],
+            success=False, error="No JSON found"
+        )
+
+    # Parse nested JSON properly
+    in_string = False
+    escaped = False
+    depth = 0
+    end_idx = start_idx
+
+    for i, c in enumerate(output[start_idx:]):
+        if escaped:
+            escaped = False
+            continue
+        if c == '\\':
+            escaped = True
+            continue
+        if c == '"' and not escaped:
+            in_string = not in_string
+            continue
+        if not in_string:
+            if c == '{':
+                depth += 1
+            elif c == '}':
+                depth -= 1
+                if depth == 0:
+                    end_idx = start_idx + i + 1
+                    break
+
+    try:
+        data = json.loads(output[start_idx:end_idx])
+        if not data.get("success"):
+            return SearchResult(
+                method=method, strategy=strategy, query=query,
+                time_ms=elapsed, count=0, top_files=[], top_scores=[],
+                success=False, error=data.get("error", "Unknown error")
+            )
+
+        results = data.get("result", {}).get("results", [])[:limit]
+        stats = data.get("result", {}).get("stats", {})
+
+        top_files = [os.path.basename(r.get("path", "")) for r in results]
+        top_scores = [r.get("score", 0) for r in results]
+
+        return SearchResult(
+            method=method, strategy=strategy, query=query,
+            time_ms=stats.get("time_ms", elapsed),
+            count=len(results),
+            top_files=top_files,
+            top_scores=top_scores,
+            success=True
+        )
+    except Exception as e:
+        return SearchResult(
+            method=method, strategy=strategy, query=query,
+            time_ms=elapsed, count=0, top_files=[], top_scores=[],
+            success=False, error=str(e)
+        )
+
+
+def calculate_ranking_similarity(baseline: List[str], candidate: List[str]) -> float:
+    """Calculate ranking similarity using normalized DCG."""
+    if not baseline or not candidate:
+        return 0.0
+
+    # Simple overlap-based similarity with position weighting
+    score = 0.0
+    for i, file in enumerate(candidate[:10]):
+        if file in baseline:
+            baseline_pos = baseline.index(file)
+            # Weight by position similarity
+            pos_diff = abs(i - baseline_pos)
+            score += 1.0 / (1 + pos_diff * 0.2)
+
+    return score / min(len(baseline), 10)
+
+
+def print_divider(char="=", width=80):
+    print(char * width)
+
+
+def main():
+    print_divider()
+    print("🔬 CodexLens 搜索方法多维度对比测试")
+    print_divider()
+    print(f"测试目录: {os.getcwd()}")
+    print(f"测试查询数: {len(TEST_QUERIES)}")
+    print(f"对比方法数: {len(SEARCH_METHODS)}")
+    print_divider()
+
+    all_results: Dict[str, Dict[str, SearchResult]] = {}
+
+    # Run all tests
+    for query, query_desc in TEST_QUERIES:
+        print(f"\n📝 查询: \"{query}\" ({query_desc})")
+        print("-" * 60)
+
+        all_results[query] = {}
+
+        for method, strategy, method_name in SEARCH_METHODS:
+            method_key = f"{method}_{strategy}" if strategy else method
+            print(f"  ⏳ {method_name}...", end=" ", flush=True)
+
+            result = run_search(query, method, strategy)
+            all_results[query][method_key] = result
+
+            if result.success:
+                print(f"✓ {result.time_ms:.0f}ms, {result.count} results")
+            else:
+                print(f"✗ {result.error}")
+
+    # === Analysis ===
+    print("\n")
+    print_divider()
+    print("📊 综合分析报告")
+    print_divider()
+
+    # 1. Speed Comparison
+    print("\n### 1️⃣ 速度对比 (平均耗时 ms)")
+    print("-" * 60)
+
+    method_times: Dict[str, List[float]] = {f"{m}_{s}" if s else m: [] for m, s, _ in SEARCH_METHODS}
+
+    for query in all_results:
+        for method_key, result in all_results[query].items():
+            if result.success:
+                method_times[method_key].append(result.time_ms)
+
+    speed_ranking = []
+    for method, strategy, method_name in SEARCH_METHODS:
+        method_key = f"{method}_{strategy}" if strategy else method
+        times = method_times[method_key]
+        if times:
+            avg_time = sum(times) / len(times)
+            min_time = min(times)
+            max_time = max(times)
+            speed_ranking.append((method_name, avg_time, min_time, max_time))
+
+    speed_ranking.sort(key=lambda x: x[1])
+
+    print(f"{'方法':<35} {'平均':>10} {'最快':>10} {'最慢':>10}")
+    print("-" * 65)
+    for method_name, avg, min_t, max_t in speed_ranking:
+        print(f"{method_name:<35} {avg:>10.0f} {min_t:>10.0f} {max_t:>10.0f}")
+
+    # Speed winner
+    if speed_ranking:
+        fastest = speed_ranking[0]
+        slowest = speed_ranking[-1]
+        speedup = slowest[1] / fastest[1] if fastest[1] > 0 else 0
+        print(f"\n🏆 最快: {fastest[0]} (比最慢快 {speedup:.1f}x)")
+
+    # 2. Score Distribution
+    print("\n### 2️⃣ 相关性得分分布 (Top-10 平均分)")
+    print("-" * 60)
+
+    method_scores: Dict[str, List[float]] = {f"{m}_{s}" if s else m: [] for m, s, _ in SEARCH_METHODS}
+
+    for query in all_results:
+        for method_key, result in all_results[query].items():
+            if result.success and result.top_scores:
+                avg_score = sum(result.top_scores) / len(result.top_scores)
+                method_scores[method_key].append(avg_score)
+
+    print(f"{'方法':<35} {'平均分':>12} {'分布范围':>20}")
+    print("-" * 67)
+    for method, strategy, method_name in SEARCH_METHODS:
+        method_key = f"{method}_{strategy}" if strategy else method
+        scores = method_scores[method_key]
+        if scores:
+            avg_score = sum(scores) / len(scores)
+            min_score = min(scores)
+            max_score = max(scores)
+            print(f"{method_name:<35} {avg_score:>12.4f} {min_score:.4f} - {max_score:.4f}")
+
+    # 3. Ranking Stability (vs Hybrid as baseline)
+    print("\n### 3️⃣ 排名稳定性 (与 Hybrid 基线对比)")
+    print("-" * 60)
+
+    print(f"{'方法':<35} {'相似度':>12} {'说明':>20}")
+    print("-" * 67)
+
+    for method, strategy, method_name in SEARCH_METHODS:
+        method_key = f"{method}_{strategy}" if strategy else method
+        if method_key == "hybrid":
+            print(f"{method_name:<35} {'1.0000':>12} {'(基线)':>20}")
+            continue
+
+        similarities = []
+        for query in all_results:
+            baseline = all_results[query].get("hybrid")
+            candidate = all_results[query].get(method_key)
+            if baseline and candidate and baseline.success and candidate.success:
+                sim = calculate_ranking_similarity(baseline.top_files, candidate.top_files)
+                similarities.append(sim)
+
+        if similarities:
+            avg_sim = sum(similarities) / len(similarities)
+            diff_level = "高度一致" if avg_sim > 0.7 else "中度差异" if avg_sim > 0.4 else "显著差异"
+            print(f"{method_name:<35} {avg_sim:>12.4f} {diff_level:>20}")
+
+    # 4. Detailed Query Comparison
+    print("\n### 4️⃣ 各查询详细对比")
+    print("-" * 60)
+
+    for query, query_desc in TEST_QUERIES:
+        print(f"\n📌 \"{query}\" ({query_desc})")
+        print()
+
+        # Show top-3 results for each method
+        for method, strategy, method_name in SEARCH_METHODS:
+            method_key = f"{method}_{strategy}" if strategy else method
+            result = all_results[query].get(method_key)
+
+            if result and result.success:
+                print(f"  [{method_name}] {result.time_ms:.0f}ms")
+                for i, (file, score) in enumerate(zip(result.top_files[:3], result.top_scores[:3]), 1):
+                    print(f"    {i}. {file:<40} {score:.4f}")
+            else:
+                print(f"  [{method_name}] 失败: {result.error if result else 'N/A'}")
+        print()
+
+    # 5. Summary
+    print_divider()
+    print("📋 总结")
+    print_divider()
+
+    print("""
+┌─────────────────────────────────────────────────────────────────────┐
+│ 方法特点总结                                                          │
+├─────────────────────────────────────────────────────────────────────┤
+│ Hybrid (FTS+Vector)     │ 基线方法，综合质量好，速度中等              │
+│ Pure Vector             │ 语义理解强，适合自然语言查询                │
+│ Cascade Binary          │ 速度最快，适合大代码库快速检索              │
+│ Cascade Hybrid          │ Cross-Encoder 精排，质量最高但速度较慢       │
+└─────────────────────────────────────────────────────────────────────┘
+
+推荐使用场景:
+• 日常搜索: hybrid (默认)
+• 大代码库快速检索: cascade --cascade-strategy binary
+• 追求最高质量: cascade --cascade-strategy hybrid
+• 自然语言查询: vector
+""")
+
+    print_divider()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts
index 2fc0bf73..e74218ae 100644
--- a/ccw/src/tools/smart-search.ts
+++ b/ccw/src/tools/smart-search.ts
@@ -3,7 +3,7 @@
  *
  * Features:
  * - Intent classification with automatic mode selection
- * - CodexLens integration (init, hybrid, vector, semantic)
+ * - CodexLens integration (init, dense_rerank, fts)
  * - Ripgrep fallback for exact mode
  * - Index status checking and warnings
  * - Multi-backend search routing with RRF ranking
@@ -12,6 +12,8 @@
  * - init: Initialize CodexLens index
  * - search: Intelligent search with auto mode selection
  * - status: Check index status
+ * - update: Incremental index update for changed files
+ * - watch: Start file watcher for automatic updates
  */
 
 import { z } from 'zod';
@@ -59,9 +61,9 @@ function createTimer(): { mark: (name: string) => void; getTimings: () => Timing
 
 // Define Zod schema for validation
 const ParamsSchema = z.object({
-  // Action: search (content), find_files (path/name pattern), init, status
+  // Action: search (content), find_files (path/name pattern), init, status, update (incremental), watch
   // Note: search_files is deprecated, use search with output_mode='files_only'
-  action: z.enum(['init', 'search', 'search_files', 'find_files', 'status']).default('search'),
+  action: z.enum(['init', 'search', 'search_files', 'find_files', 'status', 'update', 'watch']).default('search'),
   query: z.string().optional().describe('Content search query (for action="search")'),
   pattern: z.string().optional().describe('Glob pattern for path matching (for action="find_files")'),
   mode: z.enum(['auto', 'hybrid', 'exact', 'ripgrep', 'priority']).default('auto'),
@@ -84,6 +86,8 @@ const ParamsSchema = z.object({
   // File type filtering
   excludeExtensions: z.array(z.string()).optional().describe('File extensions to exclude from results (e.g., ["md", "txt"])'),
   codeOnly: z.boolean().default(false).describe('Only return code files (excludes md, txt, json, yaml, xml, etc.)'),
+  // Watcher options
+  debounce: z.number().default(1000).describe('Debounce interval in ms for watch action'),
   // Fuzzy matching is implicit in hybrid mode (RRF fusion)
 });
 
@@ -720,6 +724,130 @@ async function executeStatusAction(params: Params): Promise<SearchResult> {
   };
 }
 
+/**
+ * Action: update - Incremental index update
+ * Updates index for changed files without full rebuild
+ */
+async function executeUpdateAction(params: Params): Promise<SearchResult> {
+  const { path = '.', languages } = params;
+
+  // Check CodexLens availability
+  const readyStatus = await ensureCodexLensReady();
+  if (!readyStatus.ready) {
+    return {
+      success: false,
+      error: `CodexLens not available: ${readyStatus.error}`,
+    };
+  }
+
+  // Check if index exists first
+  const indexStatus = await checkIndexStatus(path);
+  if (!indexStatus.indexed) {
+    return {
+      success: false,
+      error: `Directory not indexed. Run smart_search(action="init") first.`,
+    };
+  }
+
+  // Build args for incremental init (without --force)
+  const args = ['init', path];
+  if (languages && languages.length > 0) {
+    args.push('--languages', languages.join(','));
+  }
+
+  // Track progress updates
+  const progressUpdates: ProgressInfo[] = [];
+  let lastProgress: ProgressInfo | null = null;
+
+  const result = await executeCodexLens(args, {
+    cwd: path,
+    timeout: 600000, // 10 minutes for incremental updates
+    onProgress: (progress: ProgressInfo) => {
+      progressUpdates.push(progress);
+      lastProgress = progress;
+    },
+  });
+
+  // Build metadata with progress info
+  const metadata: SearchMetadata = {
+    action: 'update',
+    path,
+  };
+
+  if (lastProgress !== null) {
+    const p = lastProgress as ProgressInfo;
+    metadata.progress = {
+      stage: p.stage,
+      message: p.message,
+      percent: p.percent,
+      filesProcessed: p.filesProcessed,
+      totalFiles: p.totalFiles,
+    };
+  }
+
+  if (progressUpdates.length > 0) {
+    metadata.progressHistory = progressUpdates.slice(-5);
+  }
+
+  return {
+    success: result.success,
+    error: result.error,
+    message: result.success
+      ? `Incremental update completed for ${path}`
+      : undefined,
+    metadata,
+  };
+}
+
+/**
+ * Action: watch - Start file watcher for automatic incremental updates
+ * Note: This starts a background process, returns immediately with status
+ */
+async function executeWatchAction(params: Params): Promise<SearchResult> {
+  const { path = '.', languages, debounce = 1000 } = params;
+
+  // Check CodexLens availability
+  const readyStatus = await ensureCodexLensReady();
+  if (!readyStatus.ready) {
+    return {
+      success: false,
+      error: `CodexLens not available: ${readyStatus.error}`,
+    };
+  }
+
+  // Check if index exists first
+  const indexStatus = await checkIndexStatus(path);
+  if (!indexStatus.indexed) {
+    return {
+      success: false,
+      error: `Directory not indexed. Run smart_search(action="init") first.`,
+    };
+  }
+
+  // Build args for watch command
+  const args = ['watch', path, '--debounce', debounce.toString()];
+  if (languages && languages.length > 0) {
+    args.push('--language', languages.join(','));
+  }
+
+  // Start watcher in background (non-blocking)
+  // Note: The watcher runs until manually stopped
+  const result = await executeCodexLens(args, {
+    cwd: path,
+    timeout: 5000, // Short timeout for initial startup check
+  });
+
+  return {
+    success: true,
+    message: `File watcher started for ${path}. Use Ctrl+C or kill the process to stop.`,
+    metadata: {
+      action: 'watch',
+      path,
+      note: 'Watcher runs in background. Changes are indexed automatically with debounce.',
+    },
+  };
+}
+
 /**
  * Mode: auto - Intent classification and mode selection
  * Routes to: hybrid (NL + index) | exact (index) | ripgrep (no index)
@@ -816,8 +944,8 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
       };
     }
 
-    // Use CodexLens exact mode as fallback
-    const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'exact', '--json'];
+    // Use CodexLens fts mode as fallback
+    const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json'];
     const result = await executeCodexLens(args, { cwd: path });
 
     if (!result.success) {
@@ -1023,7 +1151,7 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
 
   // Request more results to support split (full content + extra files)
   const totalToFetch = maxResults + extraFilesCount;
-  const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'exact', '--json'];
+  const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json'];
   if (enrich) {
     args.push('--enrich');
   }
@@ -1060,7 +1188,7 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
 
   // Fallback to fuzzy mode if exact returns no results
   if (allResults.length === 0) {
-    const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'fuzzy', '--json'];
+    const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--use-fuzzy', '--json'];
     if (enrich) {
       fuzzyArgs.push('--enrich');
     }
@@ -1119,8 +1247,8 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
 }
 
 /**
- * Mode: hybrid - Best quality search with RRF fusion
- * Uses CodexLens hybrid mode (exact + fuzzy + vector)
+ * Mode: hybrid - Best quality semantic search
+ * Uses CodexLens dense_rerank method (dense coarse + cross-encoder rerank)
  * Requires index with embeddings
  */
 async function executeHybridMode(params: Params): Promise<SearchResult> {
@@ -1150,7 +1278,7 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
 
   // Request more results to support split (full content + extra files)
   const totalToFetch = maxResults + extraFilesCount;
-  const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'hybrid', '--json'];
+  const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'dense_rerank', '--json'];
   if (enrich) {
     args.push('--enrich');
   }
@@ -1232,7 +1360,7 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
   timer.mark('split_results');
 
   // Build metadata with baseline info if detected
-  let note = 'Hybrid mode uses RRF fusion (exact + fuzzy + vector) for best results';
+  let note = 'Using dense_rerank (dense coarse + cross-encoder rerank) for semantic search';
   if (baselineInfo) {
     note += ` | Filtered ${initialCount - allResults.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`;
   }
@@ -1698,6 +1826,8 @@ export const schema: ToolSchema = {
 - find_files: Find files by path/name pattern (glob matching)
 - init: Create FTS index
 - status: Check index status
+- update: Incremental index update (for changed files)
+- watch: Start file watcher for automatic updates
 
 **Content Search (action="search"):**
   smart_search(query="authentication logic")        # auto mode - routes to best backend
@@ -1711,6 +1841,11 @@ export const schema: ToolSchema = {
   smart_search(action="find_files", pattern="test_*.py")      # find test files
   smart_search(action="find_files", pattern="*.tsx", offset=20, limit=10)  # pagination
 
+**Index Maintenance:**
+  smart_search(action="update", path="/project")              # incremental index update
+  smart_search(action="watch", path="/project")               # start file watcher
+  smart_search(action="watch", debounce=2000)                 # custom debounce interval
+
 **Pagination:** All actions support offset/limit for paginated results:
   smart_search(query="auth", limit=10, offset=0)    # first page
   smart_search(query="auth", limit=10, offset=10)   # second page
@@ -2168,6 +2303,16 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
         result = await executeFindFilesAction(parsed.data);
         break;
 
+      case 'update':
+        // Incremental index update
+        result = await executeUpdateAction(parsed.data);
+        break;
+
+      case 'watch':
+        // Start file watcher (returns status, watcher runs in background)
+        result = await executeWatchAction(parsed.data);
+        break;
+
       case 'search_files':
         // DEPRECATED: Redirect to search with files_only output
         deprecationWarning = 'action="search_files" is deprecated. Use action="search" with output_mode="files_only" for content-to-files search, or action="find_files" for path pattern matching.';
diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py
index fdb864ed..453dfeb7 100644
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -432,75 +432,55 @@ def search(
     limit: int = typer.Option(20, "--limit", "-n", min=1, max=500, help="Max results."),
     depth: int = typer.Option(-1, "--depth", "-d", help="Search depth (-1 = unlimited, 0 = current only)."),
     files_only: bool = typer.Option(False, "--files-only", "-f", help="Return only file paths without content snippets."),
-    method: str = typer.Option("hybrid", "--method", "-m", help="Search method: fts, vector, splade, hybrid, cascade."),
+    method: str = typer.Option("dense_rerank", "--method", "-m", help="Search method: 'dense_rerank' (semantic, default), 'fts' (exact keyword)."),
     use_fuzzy: bool = typer.Option(False, "--use-fuzzy", help="Enable fuzzy matching in FTS method."),
+    # Hidden advanced options for backward compatibility
     weights: Optional[str] = typer.Option(
         None,
         "--weights", "-w",
-        help="RRF weights as key=value pairs (e.g., 'splade=0.4,vector=0.6' or 'fts=0.4,vector=0.6'). Default: auto-detect based on available backends."
+        hidden=True,
+        help="[Advanced] RRF weights as key=value pairs."
     ),
     cascade_strategy: Optional[str] = typer.Option(
         None,
         "--cascade-strategy",
-        help="Cascade search strategy: 'binary' (fast binary+dense) or 'hybrid' (FTS+cross-encoder). Only used with --method cascade."
+        hidden=True,
+        help="[Advanced] Cascade strategy for --method cascade."
     ),
     # Hidden deprecated parameter for backward compatibility
     mode: Optional[str] = typer.Option(None, "--mode", hidden=True, help="[DEPRECATED] Use --method instead."),
     json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
 ) -> None:
-    """Search indexed file contents using hybrid retrieval.
+    """Search indexed file contents.
 
     Uses chain search across directory indexes.
     Use --depth to limit search recursion (0 = current dir only).
 
     Search Methods:
-      - fts: Full-text search using FTS5 (unicode61 tokenizer). Use --use-fuzzy for typo tolerance.
-      - vector: Pure semantic vector search - for natural language queries.
-      - splade: SPLADE sparse neural search - semantic term expansion.
-      - hybrid: RRF fusion of sparse + dense search (default) - best recall.
-      - cascade: Two-stage retrieval (binary coarse + dense rerank) - fast + accurate.
+      - dense_rerank (default): Semantic search using Dense embedding coarse retrieval +
+        Cross-encoder reranking. Best for natural language queries and code understanding.
+      - fts: Full-text search using FTS5 (unicode61 tokenizer). Best for exact code
+        identifiers like function/class names. Use --use-fuzzy for typo tolerance.
 
     Method Selection Guide:
       - Code identifiers (function/class names): fts
-      - Natural language queries: vector or hybrid
+      - Natural language queries: dense_rerank (default)
       - Typo-tolerant search: fts --use-fuzzy
-      - Best overall quality: hybrid (default)
-      - Large codebase performance: cascade
 
-    Vector Search Requirements:
-      Vector, hybrid, and cascade methods require pre-generated embeddings.
+    Requirements:
+      The dense_rerank method requires pre-generated embeddings.
       Use 'codexlens embeddings-generate' to create embeddings first.
 
-    Hybrid Mode Weights:
-      Use --weights to adjust RRF fusion weights:
-      - SPLADE mode: 'splade=0.4,vector=0.6' (default)
-      - FTS mode: 'fts=0.4,vector=0.6' (default)
-
     Examples:
-      # Default hybrid search
-      codexlens search "authentication"
+      # Default semantic search (dense_rerank)
+      codexlens search "authentication logic"
 
       # Exact code identifier search
       codexlens search "authenticate_user" --method fts
 
       # Typo-tolerant fuzzy search
       codexlens search "authentcate" --method fts --use-fuzzy
-
-      # Pure semantic search
-      codexlens search "how to verify user credentials" --method vector
-
-      # SPLADE sparse neural search
-      codexlens search "user login flow" --method splade
-
-      # Fast cascade retrieval for large codebases (binary strategy)
-      codexlens search "authentication" --method cascade
-
-      # Cascade with cross-encoder reranking (hybrid strategy)
-      codexlens search "authentication" --method cascade --cascade-strategy hybrid
-
-      # Hybrid with custom weights
-      codexlens search "authentication" --method hybrid --weights splade=0.5,vector=0.5
     """
     _configure_logging(verbose, json_mode)
     search_path = path.expanduser().resolve()
@@ -538,29 +518,33 @@ def search(
     # Configure search (load settings from file)
     config = Config.load()
 
-    # Validate method
-    valid_methods = ["fts", "vector", "splade", "hybrid", "cascade"]
+    # Validate method - simplified interface exposes only dense_rerank and fts
+    # Other methods (vector, splade, hybrid, cascade) are hidden but still work for backward compatibility
+    valid_methods = ["fts", "dense_rerank", "vector", "splade", "hybrid", "cascade"]
     if actual_method not in valid_methods:
         if json_mode:
-            print_json(success=False, error=f"Invalid method: {actual_method}. Must be one of: {', '.join(valid_methods)}")
+            print_json(success=False, error=f"Invalid method: {actual_method}. Use 'dense_rerank' (semantic) or 'fts' (exact keyword).")
         else:
             console.print(f"[red]Invalid method:[/red] {actual_method}")
-            console.print(f"[dim]Valid methods: {', '.join(valid_methods)}[/dim]")
+            console.print("[dim]Use 'dense_rerank' (semantic, default) or 'fts' (exact keyword)[/dim]")
         raise typer.Exit(code=1)
 
-    # Validate cascade_strategy if provided
-    if cascade_strategy is not None:
-        valid_strategies = ["binary", "hybrid"]
-        if cascade_strategy not in valid_strategies:
+    # Map dense_rerank to cascade method internally
+    internal_cascade_strategy = cascade_strategy
+    if actual_method == "dense_rerank":
+        actual_method = "cascade"
+        internal_cascade_strategy = "dense_rerank"
+
+    # Validate cascade_strategy if provided (for advanced users)
+    if internal_cascade_strategy is not None:
+        valid_strategies = ["binary", "hybrid", "binary_rerank", "dense_rerank"]
+        if internal_cascade_strategy not in valid_strategies:
             if json_mode:
-                print_json(success=False, error=f"Invalid cascade strategy: {cascade_strategy}. Must be one of: {', '.join(valid_strategies)}")
+                print_json(success=False, error=f"Invalid cascade strategy: {internal_cascade_strategy}. Must be one of: {', '.join(valid_strategies)}")
             else:
-                console.print(f"[red]Invalid cascade strategy:[/red] {cascade_strategy}")
+                console.print(f"[red]Invalid cascade strategy:[/red] {internal_cascade_strategy}")
                 console.print(f"[dim]Valid strategies: {', '.join(valid_strategies)}[/dim]")
             raise typer.Exit(code=1)
-        # Warn if using cascade_strategy with non-cascade method
-        if actual_method != "cascade" and not json_mode:
-            console.print(f"[yellow]Warning: --cascade-strategy is only effective with --method cascade[/yellow]")
 
     # Parse custom weights if provided
     hybrid_weights = None
@@ -693,7 +677,7 @@ def search(
         else:
             # Dispatch to cascade_search for cascade method
             if actual_method == "cascade":
-                result = engine.cascade_search(query, search_path, k=limit, options=options, strategy=cascade_strategy)
+                result = engine.cascade_search(query, search_path, k=limit, options=options, strategy=internal_cascade_strategy)
             else:
                 result = engine.search(query, search_path, options)
             results_list = [
diff --git a/codex-lens/src/codexlens/config.py b/codex-lens/src/codexlens/config.py
index 46660aa7..c0a3f929 100644
--- a/codex-lens/src/codexlens/config.py
+++ b/codex-lens/src/codexlens/config.py
@@ -164,6 +164,10 @@ class Config:
     embedding_strategy: str = "latency_aware"  # round_robin, latency_aware, weighted_random
     embedding_cooldown: float = 60.0  # Default cooldown seconds for rate-limited endpoints
 
+    # API concurrency settings
+    api_max_workers: int = 4  # Max concurrent API calls for embedding/reranking
+    api_batch_size: int = 8  # Batch size for API requests
+
     def __post_init__(self) -> None:
         try:
             self.data_dir = self.data_dir.expanduser().resolve()
@@ -276,6 +280,10 @@ class Config:
                 "coarse_k": self.cascade_coarse_k,
                 "fine_k": self.cascade_fine_k,
             },
+            "api": {
+                "max_workers": self.api_max_workers,
+                "batch_size": self.api_batch_size,
+            },
         }
         with open(self.settings_path, "w", encoding="utf-8") as f:
             json.dump(settings, f, indent=2)
@@ -348,11 +356,11 @@ class Config:
             cascade = settings.get("cascade", {})
             if "strategy" in cascade:
                 strategy = cascade["strategy"]
-                if strategy in {"binary", "hybrid"}:
+                if strategy in {"binary", "hybrid", "binary_rerank", "dense_rerank"}:
                     self.cascade_strategy = strategy
                 else:
                     log.warning(
-                        "Invalid cascade strategy in %s: %r (expected 'binary' or 'hybrid')",
+                        "Invalid cascade strategy in %s: %r (expected 'binary', 'hybrid', 'binary_rerank', or 'dense_rerank')",
                         self.settings_path,
                         strategy,
                     )
@@ -360,6 +368,13 @@ class Config:
                 self.cascade_coarse_k = cascade["coarse_k"]
             if "fine_k" in cascade:
                 self.cascade_fine_k = cascade["fine_k"]
+
+            # Load API settings
+            api = settings.get("api", {})
+            if "max_workers" in api:
+                self.api_max_workers = api["max_workers"]
+            if "batch_size" in api:
+                self.api_batch_size = api["batch_size"]
         except Exception as exc:
             log.warning(
                 "Failed to load settings from %s (%s): %s",
diff --git a/codex-lens/src/codexlens/search/chain_search.py b/codex-lens/src/codexlens/search/chain_search.py
index f62166f0..e04455ee 100644
--- a/codex-lens/src/codexlens/search/chain_search.py
+++ b/codex-lens/src/codexlens/search/chain_search.py
@@ -797,13 +797,15 @@ class ChainSearchEngine:
         k: int = 10,
         coarse_k: int = 100,
         options: Optional[SearchOptions] = None,
-        strategy: Optional[Literal["binary", "hybrid"]] = None,
+        strategy: Optional[Literal["binary", "hybrid", "binary_rerank", "dense_rerank"]] = None,
     ) -> ChainSearchResult:
         """Unified cascade search entry point with strategy selection.
 
         Provides a single interface for cascade search with configurable strategy:
-        - "binary": Uses binary vector coarse ranking + dense fine ranking (faster)
+        - "binary": Uses binary vector coarse ranking + dense fine ranking (fastest)
         - "hybrid": Uses FTS+SPLADE+Vector coarse ranking + cross-encoder reranking (original)
+        - "binary_rerank": Uses binary vector coarse ranking + cross-encoder reranking (best balance)
+        - "dense_rerank": Uses dense vector coarse ranking + cross-encoder reranking
 
         The strategy is determined with the following priority:
         1. The `strategy` parameter (e.g., from CLI --cascade-strategy option)
@@ -816,36 +818,585 @@ class ChainSearchEngine:
             k: Number of final results to return (default 10)
             coarse_k: Number of coarse candidates from first stage (default 100)
             options: Search configuration (uses defaults if None)
-            strategy: Cascade strategy - "binary" or "hybrid". Overrides config if provided.
+            strategy: Cascade strategy - "binary", "hybrid", or "binary_rerank".
 
         Returns:
             ChainSearchResult with reranked results and statistics
 
         Examples:
             >>> engine = ChainSearchEngine(registry, mapper, config=config)
-            >>> # Use binary cascade (default, faster)
+            >>> # Use binary cascade (default, fastest)
             >>> result = engine.cascade_search("auth", Path("D:/project"))
             >>> # Use hybrid cascade (original behavior)
             >>> result = engine.cascade_search("auth", Path("D:/project"), strategy="hybrid")
+            >>> # Use binary + cross-encoder (best balance of speed and quality)
+            >>> result = engine.cascade_search("auth", Path("D:/project"), strategy="binary_rerank")
         """
         # Strategy priority: parameter > config > default
         effective_strategy = strategy
+        valid_strategies = ("binary", "hybrid", "binary_rerank", "dense_rerank")
         if effective_strategy is None:
             # Not passed via parameter, check config
             if self._config is not None:
                 config_strategy = getattr(self._config, "cascade_strategy", None)
-                if config_strategy in ("binary", "hybrid"):
+                if config_strategy in valid_strategies:
                     effective_strategy = config_strategy
 
         # If still not set, apply default
-        if effective_strategy not in ("binary", "hybrid"):
+        if effective_strategy not in valid_strategies:
             effective_strategy = "binary"
 
         if effective_strategy == "binary":
             return self.binary_cascade_search(query, source_path, k, coarse_k, options)
+        elif effective_strategy == "binary_rerank":
+            return self.binary_rerank_cascade_search(query, source_path, k, coarse_k, options)
+        elif effective_strategy == "dense_rerank":
+            return self.dense_rerank_cascade_search(query, source_path, k, coarse_k, options)
         else:
             return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
 
+    def binary_rerank_cascade_search(
+        self,
+        query: str,
+        source_path: Path,
+        k: int = 10,
+        coarse_k: int = 100,
+        options: Optional[SearchOptions] = None,
+    ) -> ChainSearchResult:
+        """Execute binary cascade search with cross-encoder reranking.
+
+        Combines the speed of binary vector coarse search with the quality of
+        cross-encoder reranking for the best balance of speed and accuracy.
+
+        Binary + Reranker cascade process:
+        1. Stage 1 (Coarse): Fast binary vector search using Hamming distance
+           to quickly filter to coarse_k candidates (256-dim binary, 32 bytes/vector)
+        2. Stage 2 (Fine): Cross-encoder reranking for precise semantic ranking
+           of candidates using query-document attention
+
+        This approach is typically faster than hybrid_cascade_search while
+        achieving similar or better quality through cross-encoder reranking.
+
+        Performance characteristics:
+        - Binary search: O(N) with SIMD-accelerated XOR + popcount (~8ms)
+        - Cross-encoder: Applied to top coarse_k candidates (~15-20s for API)
+        - Total: Faster coarse + high-quality fine = best balance
+
+        Args:
+            query: Natural language or keyword query string
+            source_path: Starting directory path
+            k: Number of final results to return (default 10)
+            coarse_k: Number of coarse candidates from first stage (default 100)
+            options: Search configuration (uses defaults if None)
+
+        Returns:
+            ChainSearchResult with cross-encoder reranked results and statistics
+
+        Examples:
+            >>> engine = ChainSearchEngine(registry, mapper, config=config)
+            >>> result = engine.binary_rerank_cascade_search(
+            ...     "how to authenticate users",
+            ...     Path("D:/project/src"),
+            ...     k=10,
+            ...     coarse_k=100
+            ... )
+            >>> for r in result.results:
+            ...     print(f"{r.path}: {r.score:.3f}")
+        """
+        if not NUMPY_AVAILABLE:
+            self.logger.warning(
+                "NumPy not available, falling back to hybrid cascade search"
+            )
+            return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
+
+        options = options or SearchOptions()
+        start_time = time.time()
+        stats = SearchStats()
+
+        # Use config defaults if available
+        if self._config is not None:
+            if hasattr(self._config, "cascade_coarse_k"):
+                coarse_k = coarse_k or self._config.cascade_coarse_k
+            if hasattr(self._config, "cascade_fine_k"):
+                k = k or self._config.cascade_fine_k
+
+        # Step 1: Find starting index
+        start_index = self._find_start_index(source_path)
+        if not start_index:
+            self.logger.warning(f"No index found for {source_path}")
+            stats.time_ms = (time.time() - start_time) * 1000
+            return ChainSearchResult(
+                query=query,
+                results=[],
+                symbols=[],
+                stats=stats
+            )
+
+        # Step 2: Collect all index paths
+        index_paths = self._collect_index_paths(start_index, options.depth)
+        stats.dirs_searched = len(index_paths)
+
+        if not index_paths:
+            self.logger.warning(f"No indexes collected from {start_index}")
+            stats.time_ms = (time.time() - start_time) * 1000
+            return ChainSearchResult(
+                query=query,
+                results=[],
+                symbols=[],
+                stats=stats
+            )
+
+        # Initialize binary embedding backend
+        try:
+            from codexlens.indexing.embedding import BinaryEmbeddingBackend
+        except ImportError as exc:
+            self.logger.warning(
+                "BinaryEmbeddingBackend not available: %s, falling back to hybrid cascade",
+                exc
+            )
+            return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
+
+        # Step 4: Binary coarse search (same as binary_cascade_search)
+        binary_coarse_time = time.time()
+        coarse_candidates: List[Tuple[int, int, Path]] = []
+
+        # Try centralized BinarySearcher first (preferred for mmap indexes)
+        # The index root is the parent of the first index path
+        index_root = index_paths[0].parent if index_paths else None
+        used_centralized = False
+
+        if index_root:
+            binary_searcher = self._get_centralized_binary_searcher(index_root)
+            if binary_searcher is not None:
+                try:
+                    # BinarySearcher expects dense vector, not packed binary
+                    from codexlens.semantic.embedder import Embedder
+                    embedder = Embedder()
+                    query_dense = embedder.embed_to_numpy([query])[0]
+
+                    results = binary_searcher.search(query_dense, top_k=coarse_k)
+                    for chunk_id, distance in results:
+                        coarse_candidates.append((chunk_id, distance, index_root))
+                    # Only mark as used if we got actual results
+                    if coarse_candidates:
+                        used_centralized = True
+                        self.logger.debug(
+                            "Binary coarse search (centralized): %d candidates in %.2fms",
+                            len(results), (time.time() - binary_coarse_time) * 1000
+                        )
+                except Exception as exc:
+                    self.logger.debug(f"Centralized binary search failed: {exc}")
+
+        if not used_centralized:
+            # Get GPU preference from config
+            use_gpu = True
+            if self._config is not None:
+                use_gpu = getattr(self._config, "embedding_use_gpu", True)
+
+            try:
+                binary_backend = BinaryEmbeddingBackend(use_gpu=use_gpu)
+                query_binary = binary_backend.embed_packed([query])[0]
+            except Exception as exc:
+                self.logger.warning(f"Failed to generate binary query embedding: {exc}")
+                return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
+
+            # Fallback to per-directory binary indexes
+            for index_path in index_paths:
+                try:
+                    binary_index = self._get_or_create_binary_index(index_path)
+                    if binary_index is None or binary_index.count() == 0:
+                        continue
+                    # BinaryANNIndex returns (ids, distances) arrays
+                    ids, distances = binary_index.search(query_binary, coarse_k)
+                    for chunk_id, dist in zip(ids, distances):
+                        coarse_candidates.append((chunk_id, dist, index_path))
+                except Exception as exc:
+                    self.logger.debug(
+                        "Binary search failed for %s: %s", index_path, exc
+                    )
+
+        if not coarse_candidates:
+            self.logger.info("No binary candidates found, falling back to hybrid cascade for reranking")
+            # Fall back to hybrid_cascade_search which uses FTS+Vector coarse + cross-encoder rerank
+            return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
+
+        # Sort by Hamming distance and take top coarse_k
+        coarse_candidates.sort(key=lambda x: x[1])
+        coarse_candidates = coarse_candidates[:coarse_k]
+
+        self.logger.debug(
+            "Binary coarse search: %d candidates in %.2fms",
+            len(coarse_candidates), (time.time() - binary_coarse_time) * 1000
+        )
+
+        # Step 5: Build SearchResult objects for cross-encoder reranking
+        # Group candidates by index path for efficient retrieval
+        candidates_by_index: Dict[Path, List[int]] = {}
+        for chunk_id, distance, index_path in coarse_candidates:
+            if index_path not in candidates_by_index:
+                candidates_by_index[index_path] = []
+            candidates_by_index[index_path].append(chunk_id)
+
+        # Retrieve chunk content for reranking
+        # Always use centralized VectorMetadataStore since chunks are stored there
+        import sqlite3
+        coarse_results: List[SearchResult] = []
+
+        # Find the centralized metadata store path (project root)
+        # index_root was computed earlier, use it for chunk retrieval
+        central_meta_path = index_root / VECTORS_META_DB_NAME if index_root else None
+        central_meta_store = None
+        if central_meta_path and central_meta_path.exists():
+            central_meta_store = VectorMetadataStore(central_meta_path)
+
+        for index_path, chunk_ids in candidates_by_index.items():
+            try:
+                chunks_data = []
+                if central_meta_store:
+                    # Try centralized VectorMetadataStore first (preferred)
+                    chunks_data = central_meta_store.get_chunks_by_ids(chunk_ids)
+
+                if not chunks_data and used_centralized:
+                    # Fallback to per-index-path meta store
+                    meta_db_path = index_path / VECTORS_META_DB_NAME
+                    if meta_db_path.exists():
+                        meta_store = VectorMetadataStore(meta_db_path)
+                        chunks_data = meta_store.get_chunks_by_ids(chunk_ids)
+
+                if not chunks_data:
+                    # Final fallback: query semantic_chunks table directly
+                    # This handles per-directory indexes with semantic_chunks table
+                    try:
+                        conn = sqlite3.connect(str(index_path))
+                        conn.row_factory = sqlite3.Row
+                        placeholders = ",".join("?" * len(chunk_ids))
+                        cursor = conn.execute(
+                            f"""
+                            SELECT id, file_path, content, metadata, category
+                            FROM semantic_chunks
+                            WHERE id IN ({placeholders})
+                            """,
+                            chunk_ids
+                        )
+                        chunks_data = [
+                            {
+                                "id": row["id"],
+                                "file_path": row["file_path"],
+                                "content": row["content"],
+                                "metadata": row["metadata"],
+                                "category": row["category"],
+                            }
+                            for row in cursor.fetchall()
+                        ]
+                        conn.close()
+                    except Exception:
+                        pass  # Skip if table doesn't exist
+
+                for chunk in chunks_data:
+                    # Find the Hamming distance for this chunk
+                    chunk_id = chunk.get("id") or chunk.get("chunk_id")
+                    distance = next(
+                        (d for cid, d, _ in coarse_candidates if cid == chunk_id),
+                        256
+                    )
+                    # Initial score from Hamming distance (will be replaced by reranker)
+                    score = 1.0 - (distance / 256.0)
+
+                    content = chunk.get("content", "")
+                    result = SearchResult(
+                        path=chunk.get("file_path", ""),
+                        score=float(score),
+                        excerpt=content[:500] if content else "",
+                        content=content,
+                    )
+                    coarse_results.append(result)
+            except Exception as exc:
+                self.logger.debug(
+                    "Failed to retrieve chunks from %s: %s", index_path, exc
+                )
+
+        if not coarse_results:
+            stats.time_ms = (time.time() - start_time) * 1000
+            return ChainSearchResult(
+                query=query, results=[], symbols=[], stats=stats
+            )
+
+        self.logger.debug(
+            "Retrieved %d chunks for cross-encoder reranking", len(coarse_results)
+        )
+
+        # Step 6: Cross-encoder reranking (same as hybrid_cascade_search)
+        rerank_time = time.time()
+        reranked_results = self._cross_encoder_rerank(query, coarse_results, top_k=k)
+
+        self.logger.debug(
+            "Cross-encoder reranking: %d results in %.2fms",
+            len(reranked_results), (time.time() - rerank_time) * 1000
+        )
+
+        # Deduplicate by path (keep highest score)
+        path_to_result: Dict[str, SearchResult] = {}
+        for result in reranked_results:
+            if result.path not in path_to_result or result.score > path_to_result[result.path].score:
+                path_to_result[result.path] = result
+
+        final_results = list(path_to_result.values())[:k]
+
+        stats.files_matched = len(final_results)
+        stats.time_ms = (time.time() - start_time) * 1000
+
+        self.logger.debug(
+            "Binary+Rerank cascade search complete: %d results in %.2fms",
+            len(final_results),
+            stats.time_ms,
+        )
+
+        return ChainSearchResult(
+            query=query,
+            results=final_results,
+            symbols=[],
+            stats=stats,
+        )
+
+    def dense_rerank_cascade_search(
+        self,
+        query: str,
+        source_path: Path,
+        k: int = 10,
+        coarse_k: int = 100,
+        options: Optional[SearchOptions] = None,
+    ) -> ChainSearchResult:
+        """Execute dense cascade search with cross-encoder reranking.
+
+        Combines dense vector coarse search (HNSW) with cross-encoder reranking
+        for comparison with binary_rerank strategy.
+
+        Dense + Reranker cascade process:
+        1. Stage 1 (Coarse): Dense vector search using HNSW (cosine similarity)
+           to get coarse_k candidates (2048-dim float32)
+        2. Stage 2 (Fine): Cross-encoder reranking for precise semantic ranking
+
+        Args:
+            query: Natural language or keyword query string
+            source_path: Starting directory path
+            k: Number of final results to return (default 10)
+            coarse_k: Number of coarse candidates from first stage (default 100)
+            options: Search configuration (uses defaults if None)
+
+        Returns:
+            ChainSearchResult with cross-encoder reranked results and statistics
+        """
+        if not NUMPY_AVAILABLE:
+            self.logger.warning(
+                "NumPy not available, falling back to hybrid cascade search"
+            )
+            return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
+
+        options = options or SearchOptions()
+        start_time = time.time()
+        stats = SearchStats()
+
+        # Use config defaults if available
+        if self._config is not None:
+            if hasattr(self._config, "cascade_coarse_k"):
+                coarse_k = coarse_k or self._config.cascade_coarse_k
+            if hasattr(self._config, "cascade_fine_k"):
+                k = k or self._config.cascade_fine_k
+
+        # Step 1: Find starting index
+        start_index = self._find_start_index(source_path)
+        if not start_index:
+            self.logger.warning(f"No index found for {source_path}")
+            stats.time_ms = (time.time() - start_time) * 1000
+            return ChainSearchResult(
+                query=query,
+                results=[],
+                symbols=[],
+                stats=stats
+            )
+
+        # Step 2: Collect all index paths
+        index_paths = self._collect_index_paths(start_index, options.depth)
+        stats.dirs_searched = len(index_paths)
+
+        if not index_paths:
+            self.logger.warning(f"No indexes collected from {start_index}")
+            stats.time_ms = (time.time() - start_time) * 1000
+            return ChainSearchResult(
+                query=query,
+                results=[],
+                symbols=[],
+                stats=stats
+            )
+
+        # Step 3: Generate query dense embedding using same model as index
+        # Read embedding config to match the model used during indexing
+        dense_coarse_time = time.time()
+        try:
+            from codexlens.semantic.factory import get_embedder
+
+            # Get embedding settings from config
+            embedding_backend = "litellm"  # Default to API for dense
+            embedding_model = "qwen3-embedding-sf"  # Default model
+            use_gpu = True
+
+            if self._config is not None:
+                embedding_backend = getattr(self._config, "embedding_backend", "litellm")
+                embedding_model = getattr(self._config, "embedding_model", "qwen3-embedding-sf")
+                use_gpu = getattr(self._config, "embedding_use_gpu", True)
+
+            # Create embedder matching index configuration
+            if embedding_backend == "litellm":
+                embedder = get_embedder(backend="litellm", model=embedding_model)
+            else:
+                embedder = get_embedder(backend="fastembed", profile=embedding_model, use_gpu=use_gpu)
+
+            query_dense = embedder.embed_to_numpy([query])[0]
+            self.logger.debug(f"Dense query embedding: {query_dense.shape[0]}-dim via {embedding_backend}/{embedding_model}")
+        except Exception as exc:
+            self.logger.warning(f"Failed to generate dense query embedding: {exc}")
+            return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
+
+        # Step 4: Dense coarse search using HNSW indexes
+        coarse_candidates: List[Tuple[int, float, Path]] = []  # (chunk_id, distance, index_path)
+        index_root = index_paths[0].parent if index_paths else None
+
+        for index_path in index_paths:
+            try:
+                # Load HNSW index
+                from codexlens.semantic.ann_index import ANNIndex
+                ann_index = ANNIndex(index_path, dim=query_dense.shape[0])
+                if not ann_index.load():
+                    continue
+
+                if ann_index.count() == 0:
+                    continue
+
+                # Search HNSW index
+                ids, distances = ann_index.search(query_dense, top_k=coarse_k)
+                for chunk_id, dist in zip(ids, distances):
+                    coarse_candidates.append((chunk_id, dist, index_path))
+
+            except Exception as exc:
+                self.logger.debug(
+                    "Dense search failed for %s: %s", index_path, exc
+                )
+
+        if not coarse_candidates:
+            self.logger.info("No dense candidates found, falling back to hybrid cascade")
+            return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
+
+        # Sort by distance (ascending for cosine distance) and take top coarse_k
+        coarse_candidates.sort(key=lambda x: x[1])
+        coarse_candidates = coarse_candidates[:coarse_k]
+
+        self.logger.debug(
+            "Dense coarse search: %d candidates in %.2fms",
+            len(coarse_candidates), (time.time() - dense_coarse_time) * 1000
+        )
+
+        # Step 5: Build SearchResult objects for cross-encoder reranking
+        candidates_by_index: Dict[Path, List[int]] = {}
+        for chunk_id, distance, index_path in coarse_candidates:
+            if index_path not in candidates_by_index:
+                candidates_by_index[index_path] = []
+            candidates_by_index[index_path].append(chunk_id)
+
+        # Retrieve chunk content for reranking
+        import sqlite3
+        coarse_results: List[SearchResult] = []
+
+        for index_path, chunk_ids in candidates_by_index.items():
+            try:
+                # Query semantic_chunks table directly
+                conn = sqlite3.connect(str(index_path))
+                conn.row_factory = sqlite3.Row
+                placeholders = ",".join("?" * len(chunk_ids))
+                cursor = conn.execute(
+                    f"""
+                    SELECT id, file_path, content, metadata, category
+                    FROM semantic_chunks
+                    WHERE id IN ({placeholders})
+                    """,
+                    chunk_ids
+                )
+                chunks_data = [
+                    {
+                        "id": row["id"],
+                        "file_path": row["file_path"],
+                        "content": row["content"],
+                        "metadata": row["metadata"],
+                        "category": row["category"],
+                    }
+                    for row in cursor.fetchall()
+                ]
+                conn.close()
+
+                for chunk in chunks_data:
+                    chunk_id = chunk.get("id")
+                    distance = next(
+                        (d for cid, d, _ in coarse_candidates if cid == chunk_id),
+                        1.0
+                    )
+                    # Convert cosine distance to score
+                    score = 1.0 - distance
+
+                    content = chunk.get("content", "")
+                    result = SearchResult(
+                        path=chunk.get("file_path", ""),
+                        score=float(score),
+                        excerpt=content[:500] if content else "",
+                        content=content,
+                    )
+                    coarse_results.append(result)
+            except Exception as exc:
+                self.logger.debug(
+                    "Failed to retrieve chunks from %s: %s", index_path, exc
+                )
+
+        if not coarse_results:
+            stats.time_ms = (time.time() - start_time) * 1000
+            return ChainSearchResult(
+                query=query, results=[], symbols=[], stats=stats
+            )
+
+        self.logger.debug(
+            "Retrieved %d chunks for cross-encoder reranking", len(coarse_results)
+        )
+
+        # Step 6: Cross-encoder reranking
+        rerank_time = time.time()
+        reranked_results = self._cross_encoder_rerank(query, coarse_results, top_k=k)
+
+        self.logger.debug(
+            "Cross-encoder reranking: %d results in %.2fms",
+            len(reranked_results), (time.time() - rerank_time) * 1000
+        )
+
+        # Deduplicate by path (keep highest score)
+        path_to_result: Dict[str, SearchResult] = {}
+        for result in reranked_results:
+            if result.path not in path_to_result or result.score > path_to_result[result.path].score:
+                path_to_result[result.path] = result
+
+        final_results = list(path_to_result.values())[:k]
+
+        stats.files_matched = len(final_results)
+        stats.time_ms = (time.time() - start_time) * 1000
+
+        self.logger.debug(
+            "Dense+Rerank cascade search complete: %d results in %.2fms",
+            len(final_results),
+            stats.time_ms,
+        )
+
+        return ChainSearchResult(
+            query=query,
+            results=final_results,
+            symbols=[],
+            stats=stats,
+        )
+
     def _get_or_create_binary_index(self, index_path: Path) -> Optional[Any]:
         """Get or create a BinaryANNIndex for the given index path.