feat: 增强解决方案管理功能，支持按解决方案 ID 过滤和简要输出，优化嵌入模型配置读取

2026-02-05 01:50:27 +08:00 · 2026-01-07 09:31:52 +08:00
parent 05f762117a
commit 86d3e36722
5 changed files with 267 additions and 97 deletions
--- a/.claude/commands/issue/queue.md
+++ b/.claude/commands/issue/queue.md
@@ -33,6 +33,7 @@ Queue formation command using **issue-queue-agent** that analyzes all bound solu
 | Get next item | `ccw issue next --json` | `Read('queues/*.json')` |
 | Update status | `ccw issue update <id> --status ...` | Direct file edit |
 | Sync from queue | `ccw issue update --from-queue` | Direct file edit |
+| **Read solution (brief)** | `ccw issue solution <id> --brief` | `Read('solutions/*.jsonl')` |

 **Output Options**:
 - `--brief`: JSON with minimal fields (id, status, counts)
@@ -109,14 +110,14 @@ Phase 6: Status Update & Summary
 ### Phase 1: Solution Loading & Distribution

 **Data Loading:**
- Load `issues.jsonl` and filter issues with `status === 'planned'` and `bound_solution_id`
+- Use `ccw issue list --status planned --brief` to get planned issues with `bound_solution_id`
 - If no planned issues found → display message, suggest `/issue:plan`

-**Solution Collection** (for each planned issue):
- Read `solutions/{issue-id}.jsonl`
- Find bound solution by `bound_solution_id`
- If bound solution not found → warn and skip issue
- Extract `files_touched` from all task `modification_points`
+**Solution Brief Loading** (for each planned issue):
+```bash
+ccw issue solution <issue-id> --brief
+# Returns: [{ solution_id, is_bound, task_count, files_touched[] }]
+```

 **Build Solution Objects:**
 ```json
@@ -130,19 +131,8 @@ Phase 6: Status Update & Summary
 ```

 **Multi-Queue Distribution** (if `--queues > 1`):
-```javascript
-const numQueues = args.queues || 1;
-if (numQueues > 1) {
-  // Partition solutions to minimize cross-group file conflicts
-  const groups = partitionByFileOverlap(solutions, numQueues);
-  // groups = [[sol1, sol2], [sol3, sol4], [sol5]]
-}
-```
-
-**Partitioning Strategy:**
- Group solutions with overlapping `files_touched` into same queue
- Use greedy assignment: assign each solution to queue with most file overlap
- If no overlap, assign to queue with fewest solutions (balance load)
+- Use `files_touched` from brief output for partitioning
+- Group solutions with overlapping files into same queue

 **Output:** Array of solution objects (or N arrays if multi-queue)

@@ -168,10 +158,11 @@ const queueIds = numQueues === 1

 ### Input
 ${JSON.stringify(solutions)}
+// Each object: { issue_id, solution_id, task_count, files_touched[], priority }

 ### Workflow

-Step 1: Build dependency graph from solutions (nodes=solutions, edges=file conflicts)
+Step 1: Build dependency graph from solutions (nodes=solutions, edges=file conflicts via files_touched)
 Step 2: Use Gemini CLI for conflict analysis (5 types: file, API, data, dependency, architecture)
 Step 3: For high-severity conflicts without clear resolution → add to `clarifications`
 Step 4: Calculate semantic priority (base from issue priority + task_count boost)
@@ -201,6 +192,7 @@ Step 6: Write queue JSON + update index
 - Queue Item ID format: S-1, S-2, S-3, ...
 - Use provided Queue ID (do NOT generate new)
 - `clarifications` only present if high-severity unresolved conflicts exist
+- Use `files_touched` from input (already extracted by orchestrator)

 ### Done Criteria
 - [ ] Queue JSON written with all solutions ordered
--- a/ccw/src/commands/issue.ts
+++ b/ccw/src/commands/issue.ts
@@ -203,6 +203,7 @@ interface IssueOptions {
  executor?: string;
  priority?: string;
  solution?: string;
+  solutionId?: string;  // --solution-id <id> for filtering solutions
  result?: string;
  reason?: string;
  json?: boolean;
@@ -869,16 +870,16 @@ async function createAction(options: IssueOptions): Promise<void> {
 }

 /**
- * solution - Create solution from JSON data
- * Usage: ccw issue solution <issue-id> --data '{"tasks":[...]}'
- *        echo '{"tasks":[...]}' | ccw issue solution <issue-id>
- * Output: JSON with created solution (includes auto-generated ID)
+ * solution - Create or read solutions
+ * Create: ccw issue solution <issue-id> --data '{"tasks":[...]}'
+ * Read:   ccw issue solution <issue-id> [--brief] [--solution-id <id>]
+ * Brief:  Returns { solution_id, files_touched[], task_count } for each solution
 */
 async function solutionAction(issueId: string | undefined, options: IssueOptions): Promise<void> {
  if (!issueId) {
    console.error(chalk.red('Issue ID required'));
-    console.error(chalk.gray('Usage: ccw issue solution <issue-id> --data \'{"tasks":[...]}\''));
-    console.error(chalk.gray('       echo \'{"tasks":[...]}\' | ccw issue solution <issue-id>'));
+    console.error(chalk.gray('Usage: ccw issue solution <issue-id> [--brief] [--solution-id <id>]'));
+    console.error(chalk.gray('       ccw issue solution <issue-id> --data \'{"tasks":[...]}\''));
    process.exit(1);
  }

@@ -893,20 +894,84 @@ async function solutionAction(issueId: string | undefined, options: IssueOptions
    }
  }

-  if (!jsonData) {
-    console.error(chalk.red('JSON data required'));
-    console.error(chalk.gray('Usage: ccw issue solution <issue-id> --data \'{"tasks":[...]}\''));
-    console.error(chalk.gray('       echo \'{"tasks":[...]}\' | ccw issue solution <issue-id>'));
+  // CREATE mode: if --data provided
+  if (jsonData) {
+    try {
+      const data = JSON.parse(jsonData);
+      const solution = createSolution(issueId, data);
+      console.log(JSON.stringify(solution, null, 2));
+    } catch (err) {
+      console.error(chalk.red((err as Error).message));
+      process.exit(1);
+    }
+    return;
+  }
+
+  // READ mode: list solutions for issue
+  const issue = findIssue(issueId);
+  if (!issue) {
+    console.error(chalk.red(`Issue "${issueId}" not found`));
    process.exit(1);
  }

-  try {
-    const data = JSON.parse(jsonData);
-    const solution = createSolution(issueId, data);
-    console.log(JSON.stringify(solution, null, 2));
-  } catch (err) {
-    console.error(chalk.red((err as Error).message));
-    process.exit(1);
+  const solutions = readSolutions(issueId);
+  if (solutions.length === 0) {
+    if (options.json || options.brief) {
+      console.log('[]');
+    } else {
+      console.log(chalk.yellow(`No solutions found for ${issueId}`));
+    }
+    return;
+  }
+
+  // Filter by solution-id if specified
+  let targetSolutions = solutions;
+  if (options.solutionId) {
+    targetSolutions = solutions.filter(s => s.id === options.solutionId);
+    if (targetSolutions.length === 0) {
+      console.error(chalk.red(`Solution "${options.solutionId}" not found`));
+      process.exit(1);
+    }
+  }
+
+  // Brief mode: extract files_touched from modification_points
+  if (options.brief) {
+    const briefSolutions = targetSolutions.map(sol => {
+      const filesTouched = new Set<string>();
+      for (const task of sol.tasks) {
+        if (task.modification_points) {
+          for (const mp of task.modification_points) {
+            if (mp.file) filesTouched.add(mp.file);
+          }
+        }
+      }
+      return {
+        solution_id: sol.id,
+        is_bound: sol.is_bound,
+        task_count: sol.tasks.length,
+        files_touched: Array.from(filesTouched)
+      };
+    });
+    console.log(JSON.stringify(briefSolutions, null, 2));
+    return;
+  }
+
+  // JSON mode: full solutions
+  if (options.json) {
+    console.log(JSON.stringify(targetSolutions, null, 2));
+    return;
+  }
+
+  // Human-readable output
+  console.log(chalk.bold.cyan(`\nSolutions for ${issueId}:\n`));
+  for (const sol of targetSolutions) {
+    const marker = sol.is_bound ? chalk.green('◉ BOUND') : chalk.gray('○');
+    console.log(`${marker} ${sol.id}`);
+    console.log(chalk.gray(`  Tasks: ${sol.tasks.length}`));
+    if (sol.description) {
+      console.log(chalk.gray(`  ${sol.description.substring(0, 80)}...`));
+    }
+    console.log();
  }
 }

@@ -2429,6 +2494,8 @@ export async function issueCommand(
      console.log(chalk.gray('  list [issue-id]                    List issues or tasks'));
      console.log(chalk.gray('  history                            List completed issues (from history)'));
      console.log(chalk.gray('  status [issue-id]                  Show detailed status'));
+      console.log(chalk.gray('  solution <id>                      List solutions for issue'));
+      console.log(chalk.gray('  solution <id> --brief              Brief: solution_id, files_touched, task_count'));
      console.log(chalk.gray('  solution <id> --data \'{...}\'       Create solution (auto-generates ID)'));
      console.log(chalk.gray('  bind <issue-id> [sol-id]           Bind solution'));
      console.log(chalk.gray('  update <issue-id> --status <s>     Update issue status'));
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -125,8 +125,8 @@ def index_init(
    workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes (default: auto-detect based on CPU count)."),
    force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."),
    no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
-    backend: str = typer.Option("fastembed", "--backend", "-b", help="Embedding backend: fastembed (local) or litellm (remote API)."),
-    model: str = typer.Option("code", "--model", "-m", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."),
+    backend: Optional[str] = typer.Option(None, "--backend", "-b", help="Embedding backend: fastembed (local) or litellm (remote API). Defaults to settings.json config."),
+    model: Optional[str] = typer.Option(None, "--model", "-m", help="Embedding model: profile name for fastembed or model name for litellm. Defaults to settings.json config."),
    max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
@@ -152,6 +152,12 @@ def index_init(
    """
    _configure_logging(verbose, json_mode)
    config = Config()
+
+    # Fallback to settings.json config if CLI params not provided
+    config.load_settings()  # Ensure settings are loaded
+    actual_backend = backend or config.embedding_backend
+    actual_model = model or config.embedding_model
+
    languages = _parse_languages(language)
    base_path = path.expanduser().resolve()

@@ -199,15 +205,15 @@ def index_init(

                # Validate embedding backend
                valid_backends = ["fastembed", "litellm"]
-                if backend not in valid_backends:
-                    error_msg = f"Invalid embedding backend: {backend}. Must be one of: {', '.join(valid_backends)}"
+                if actual_backend not in valid_backends:
+                    error_msg = f"Invalid embedding backend: {actual_backend}. Must be one of: {', '.join(valid_backends)}"
                    if json_mode:
                        print_json(success=False, error=error_msg)
                    else:
                        console.print(f"[red]Error:[/red] {error_msg}")
                    raise typer.Exit(code=1)

-                backend_available, backend_error = is_embedding_backend_available(backend)
+                backend_available, backend_error = is_embedding_backend_available(actual_backend)

                if backend_available:
                    # Use the index root directory (not the _index.db file)
@@ -215,8 +221,8 @@ def index_init(

                    if not json_mode:
                        console.print("\n[bold]Generating embeddings...[/bold]")
-                        console.print(f"Backend: [cyan]{backend}[/cyan]")
-                        console.print(f"Model: [cyan]{model}[/cyan]")
+                        console.print(f"Backend: [cyan]{actual_backend}[/cyan]")
+                        console.print(f"Model: [cyan]{actual_model}[/cyan]")
                    else:
                        # Output progress message for JSON mode (parsed by Node.js)
                        print("Generating embeddings...", flush=True)
@@ -236,8 +242,8 @@ def index_init(

                    embed_result = generate_embeddings_recursive(
                        index_root,
-                        embedding_backend=backend,
-                        model_profile=model,
+                        embedding_backend=actual_backend,
+                        model_profile=actual_model,
                        force=False,  # Don't force regenerate during init
                        chunk_size=2000,
                        progress_callback=progress_update,  # Always use callback
@@ -283,7 +289,7 @@ def index_init(
                        }
                else:
                    if not json_mode and verbose:
-                        console.print(f"[dim]Embedding backend '{backend}' not available. Skipping embeddings.[/dim]")
+                        console.print(f"[dim]Embedding backend '{actual_backend}' not available. Skipping embeddings.[/dim]")
                    result["embeddings"] = {
                        "generated": False,
                        "error": backend_error or "Embedding backend not available",
--- a/codex-lens/src/codexlens/search/chain_search.py
+++ b/codex-lens/src/codexlens/search/chain_search.py
@@ -10,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import List, Optional, Dict, Any, Literal, Tuple, TYPE_CHECKING
+import json
 import logging
 import os
 import time
@@ -1241,20 +1242,60 @@ class ChainSearchEngine:
                stats=stats
            )

-        # Step 3: Generate query dense embedding using same model as index
+        # Step 3: Find centralized HNSW index and read model config
+        from codexlens.config import VECTORS_HNSW_NAME
+        central_hnsw_path = None
+        index_root = start_index.parent
+        current_dir = index_root
+        for _ in range(10):  # Limit search depth
+            candidate = current_dir / VECTORS_HNSW_NAME
+            if candidate.exists():
+                central_hnsw_path = candidate
+                index_root = current_dir  # Update to where HNSW was found
+                break
+            parent = current_dir.parent
+            if parent == current_dir:  # Reached root
+                break
+            current_dir = parent
+
+        # Step 4: Generate query dense embedding using same model as centralized index
        # Read embedding config to match the model used during indexing
        dense_coarse_time = time.time()
        try:
            from codexlens.semantic.factory import get_embedder

-            # Get embedding settings from config
+            # Get embedding settings from centralized index config (preferred) or fallback to self._config
            embedding_backend = "litellm"  # Default to API for dense
            embedding_model = "qwen3-embedding-sf"  # Default model
            use_gpu = True

+            # Try to read model config from centralized index's embeddings_config table
+            central_index_db = index_root / "_index.db"
+            if central_index_db.exists():
+                try:
+                    from codexlens.semantic.vector_store import VectorStore
+                    with VectorStore(central_index_db) as vs:
+                        model_config = vs.get_model_config()
+                        if model_config:
+                            embedding_backend = model_config.get("backend", embedding_backend)
+                            embedding_model = model_config.get("model_name", embedding_model)
+                            self.logger.debug(
+                                "Read model config from centralized index: %s/%s",
+                                embedding_backend, embedding_model
+                            )
+                except Exception as e:
+                    self.logger.debug("Failed to read centralized model config: %s", e)
+
+            # Fallback to self._config if not read from index
            if self._config is not None:
-                embedding_backend = getattr(self._config, "embedding_backend", "litellm")
-                embedding_model = getattr(self._config, "embedding_model", "qwen3-embedding-sf")
+                if embedding_backend == "litellm" and embedding_model == "qwen3-embedding-sf":
+                    # Only use config values if we didn't read from centralized index
+                    config_backend = getattr(self._config, "embedding_backend", None)
+                    config_model = getattr(self._config, "embedding_model", None)
+                    if config_backend:
+                        embedding_backend = config_backend
+                    if config_model:
+                        embedding_model = config_model
                use_gpu = getattr(self._config, "embedding_use_gpu", True)

            # Create embedder matching index configuration
@@ -1269,31 +1310,54 @@ class ChainSearchEngine:
            self.logger.warning(f"Failed to generate dense query embedding: {exc}")
            return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)

-        # Step 4: Dense coarse search using HNSW indexes
+        # Step 5: Dense coarse search using centralized HNSW index
        coarse_candidates: List[Tuple[int, float, Path]] = []  # (chunk_id, distance, index_path)
-        index_root = index_paths[0].parent if index_paths else None

-        for index_path in index_paths:
+        if central_hnsw_path is not None:
+            # Use centralized index
            try:
-                # Load HNSW index
                from codexlens.semantic.ann_index import ANNIndex
-                ann_index = ANNIndex(index_path, dim=query_dense.shape[0])
-                if not ann_index.load():
-                    continue
-
-                if ann_index.count() == 0:
-                    continue
-
-                # Search HNSW index
-                ids, distances = ann_index.search(query_dense, top_k=coarse_k)
-                for chunk_id, dist in zip(ids, distances):
-                    coarse_candidates.append((chunk_id, dist, index_path))
-
+                ann_index = ANNIndex.create_central(
+                    index_root=index_root,
+                    dim=query_dense.shape[0],
+                )
+                if ann_index.load() and ann_index.count() > 0:
+                    # Search centralized HNSW index
+                    ids, distances = ann_index.search(query_dense, top_k=coarse_k)
+                    for chunk_id, dist in zip(ids, distances):
+                        coarse_candidates.append((chunk_id, dist, index_root / "_index.db"))
+                    self.logger.debug(
+                        "Centralized dense search: %d candidates from %s",
+                        len(ids), central_hnsw_path
+                    )
            except Exception as exc:
                self.logger.debug(
-                    "Dense search failed for %s: %s", index_path, exc
+                    "Centralized dense search failed for %s: %s", central_hnsw_path, exc
                )

+        # Fallback: try per-directory HNSW indexes if centralized not found
+        if not coarse_candidates:
+            for index_path in index_paths:
+                try:
+                    # Load HNSW index
+                    from codexlens.semantic.ann_index import ANNIndex
+                    ann_index = ANNIndex(index_path, dim=query_dense.shape[0])
+                    if not ann_index.load():
+                        continue
+
+                    if ann_index.count() == 0:
+                        continue
+
+                    # Search HNSW index
+                    ids, distances = ann_index.search(query_dense, top_k=coarse_k)
+                    for chunk_id, dist in zip(ids, distances):
+                        coarse_candidates.append((chunk_id, dist, index_path))
+
+                except Exception as exc:
+                    self.logger.debug(
+                        "Dense search failed for %s: %s", index_path, exc
+                    )
+
        if not coarse_candidates:
            self.logger.info("No dense candidates found, falling back to hybrid cascade")
            return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
@@ -1307,7 +1371,7 @@ class ChainSearchEngine:
            len(coarse_candidates), (time.time() - dense_coarse_time) * 1000
        )

-        # Step 5: Build SearchResult objects for cross-encoder reranking
+        # Step 6: Build SearchResult objects for cross-encoder reranking
        candidates_by_index: Dict[Path, List[int]] = {}
        for chunk_id, distance, index_path in coarse_candidates:
            if index_path not in candidates_by_index:
@@ -1320,29 +1384,63 @@ class ChainSearchEngine:

        for index_path, chunk_ids in candidates_by_index.items():
            try:
-                # Query semantic_chunks table directly
-                conn = sqlite3.connect(str(index_path))
-                conn.row_factory = sqlite3.Row
-                placeholders = ",".join("?" * len(chunk_ids))
-                cursor = conn.execute(
-                    f"""
-                    SELECT id, file_path, content, metadata, category
-                    FROM semantic_chunks
-                    WHERE id IN ({placeholders})
-                    """,
-                    chunk_ids
-                )
-                chunks_data = [
-                    {
-                        "id": row["id"],
-                        "file_path": row["file_path"],
-                        "content": row["content"],
-                        "metadata": row["metadata"],
-                        "category": row["category"],
-                    }
-                    for row in cursor.fetchall()
-                ]
-                conn.close()
+                # For centralized index, use _vectors_meta.db for chunk metadata
+                # which contains file_path, content, start_line, end_line
+                if central_hnsw_path is not None and index_path == index_root / "_index.db":
+                    # Use centralized metadata from _vectors_meta.db
+                    meta_db_path = index_root / "_vectors_meta.db"
+                    if meta_db_path.exists():
+                        conn = sqlite3.connect(str(meta_db_path))
+                        conn.row_factory = sqlite3.Row
+                        placeholders = ",".join("?" * len(chunk_ids))
+                        cursor = conn.execute(
+                            f"""
+                            SELECT chunk_id, file_path, content, start_line, end_line
+                            FROM chunk_metadata
+                            WHERE chunk_id IN ({placeholders})
+                            """,
+                            chunk_ids
+                        )
+                        chunks_data = [
+                            {
+                                "id": row["chunk_id"],
+                                "file_path": row["file_path"],
+                                "content": row["content"],
+                                "metadata": json.dumps({
+                                    "start_line": row["start_line"],
+                                    "end_line": row["end_line"]
+                                }),
+                                "category": "code" if row["file_path"].endswith(('.py', '.ts', '.js', '.java', '.go', '.rs', '.cpp', '.c')) else "doc",
+                            }
+                            for row in cursor.fetchall()
+                        ]
+                        conn.close()
+                    else:
+                        chunks_data = []
+                else:
+                    # Fall back to per-directory semantic_chunks table
+                    conn = sqlite3.connect(str(index_path))
+                    conn.row_factory = sqlite3.Row
+                    placeholders = ",".join("?" * len(chunk_ids))
+                    cursor = conn.execute(
+                        f"""
+                        SELECT id, file_path, content, metadata, category
+                        FROM semantic_chunks
+                        WHERE id IN ({placeholders})
+                        """,
+                        chunk_ids
+                    )
+                    chunks_data = [
+                        {
+                            "id": row["id"],
+                            "file_path": row["file_path"],
+                            "content": row["content"],
+                            "metadata": row["metadata"],
+                            "category": row["category"],
+                        }
+                        for row in cursor.fetchall()
+                    ]
+                    conn.close()

                for chunk in chunks_data:
                    chunk_id = chunk.get("id")
--- a/codex-lens/src/codexlens/search/hybrid_search.py
+++ b/codex-lens/src/codexlens/search/hybrid_search.py
@@ -605,13 +605,20 @@ class HybridSearchEngine:
            index_root = hnsw_path.parent
            model_config = None

-            # Try to get model config from the provided index_path first
+            # Try to get model config from the centralized index root first
+            # (not the sub-directory index_path, which may have outdated config)
            try:
                from codexlens.semantic.vector_store import VectorStore
-                with VectorStore(index_path) as vs:
-                    model_config = vs.get_model_config()
-            except Exception:
-                pass
+                central_index_path = index_root / "_index.db"
+                if central_index_path.exists():
+                    with VectorStore(central_index_path) as vs:
+                        model_config = vs.get_model_config()
+                    self.logger.debug(
+                        "Loaded model config from centralized index: %s",
+                        model_config
+                    )
+            except Exception as e:
+                self.logger.debug("Failed to load model config from centralized index: %s", e)

            # Detect dimension from HNSW file if model config not found
            if model_config is None: