diff --git a/.claude/commands/issue/queue.md b/.claude/commands/issue/queue.md index 9679c342..a28e08ef 100644 --- a/.claude/commands/issue/queue.md +++ b/.claude/commands/issue/queue.md @@ -33,6 +33,7 @@ Queue formation command using **issue-queue-agent** that analyzes all bound solu | Get next item | `ccw issue next --json` | `Read('queues/*.json')` | | Update status | `ccw issue update --status ...` | Direct file edit | | Sync from queue | `ccw issue update --from-queue` | Direct file edit | +| **Read solution (brief)** | `ccw issue solution --brief` | `Read('solutions/*.jsonl')` | **Output Options**: - `--brief`: JSON with minimal fields (id, status, counts) @@ -109,14 +110,14 @@ Phase 6: Status Update & Summary ### Phase 1: Solution Loading & Distribution **Data Loading:** -- Load `issues.jsonl` and filter issues with `status === 'planned'` and `bound_solution_id` +- Use `ccw issue list --status planned --brief` to get planned issues with `bound_solution_id` - If no planned issues found → display message, suggest `/issue:plan` -**Solution Collection** (for each planned issue): -- Read `solutions/{issue-id}.jsonl` -- Find bound solution by `bound_solution_id` -- If bound solution not found → warn and skip issue -- Extract `files_touched` from all task `modification_points` +**Solution Brief Loading** (for each planned issue): +```bash +ccw issue solution --brief +# Returns: [{ solution_id, is_bound, task_count, files_touched[] }] +``` **Build Solution Objects:** ```json @@ -130,19 +131,8 @@ Phase 6: Status Update & Summary ``` **Multi-Queue Distribution** (if `--queues > 1`): -```javascript -const numQueues = args.queues || 1; -if (numQueues > 1) { - // Partition solutions to minimize cross-group file conflicts - const groups = partitionByFileOverlap(solutions, numQueues); - // groups = [[sol1, sol2], [sol3, sol4], [sol5]] -} -``` - -**Partitioning Strategy:** -- Group solutions with overlapping `files_touched` into same queue -- Use greedy assignment: assign each solution to queue with most file overlap -- If no overlap, assign to queue with fewest solutions (balance load) +- Use `files_touched` from brief output for partitioning +- Group solutions with overlapping files into same queue **Output:** Array of solution objects (or N arrays if multi-queue) @@ -168,10 +158,11 @@ const queueIds = numQueues === 1 ### Input ${JSON.stringify(solutions)} +// Each object: { issue_id, solution_id, task_count, files_touched[], priority } ### Workflow -Step 1: Build dependency graph from solutions (nodes=solutions, edges=file conflicts) +Step 1: Build dependency graph from solutions (nodes=solutions, edges=file conflicts via files_touched) Step 2: Use Gemini CLI for conflict analysis (5 types: file, API, data, dependency, architecture) Step 3: For high-severity conflicts without clear resolution → add to `clarifications` Step 4: Calculate semantic priority (base from issue priority + task_count boost) @@ -201,6 +192,7 @@ Step 6: Write queue JSON + update index - Queue Item ID format: S-1, S-2, S-3, ... - Use provided Queue ID (do NOT generate new) - `clarifications` only present if high-severity unresolved conflicts exist +- Use `files_touched` from input (already extracted by orchestrator) ### Done Criteria - [ ] Queue JSON written with all solutions ordered diff --git a/ccw/src/commands/issue.ts b/ccw/src/commands/issue.ts index 15fa2c07..57aa4cb0 100644 --- a/ccw/src/commands/issue.ts +++ b/ccw/src/commands/issue.ts @@ -203,6 +203,7 @@ interface IssueOptions { executor?: string; priority?: string; solution?: string; + solutionId?: string; // --solution-id for filtering solutions result?: string; reason?: string; json?: boolean; @@ -869,16 +870,16 @@ async function createAction(options: IssueOptions): Promise { } /** - * solution - Create solution from JSON data - * Usage: ccw issue solution --data '{"tasks":[...]}' - * echo '{"tasks":[...]}' | ccw issue solution - * Output: JSON with created solution (includes auto-generated ID) + * solution - Create or read solutions + * Create: ccw issue solution --data '{"tasks":[...]}' + * Read: ccw issue solution [--brief] [--solution-id ] + * Brief: Returns { solution_id, files_touched[], task_count } for each solution */ async function solutionAction(issueId: string | undefined, options: IssueOptions): Promise { if (!issueId) { console.error(chalk.red('Issue ID required')); - console.error(chalk.gray('Usage: ccw issue solution --data \'{"tasks":[...]}\'')); - console.error(chalk.gray(' echo \'{"tasks":[...]}\' | ccw issue solution ')); + console.error(chalk.gray('Usage: ccw issue solution [--brief] [--solution-id ]')); + console.error(chalk.gray(' ccw issue solution --data \'{"tasks":[...]}\'')); process.exit(1); } @@ -893,20 +894,84 @@ async function solutionAction(issueId: string | undefined, options: IssueOptions } } - if (!jsonData) { - console.error(chalk.red('JSON data required')); - console.error(chalk.gray('Usage: ccw issue solution --data \'{"tasks":[...]}\'')); - console.error(chalk.gray(' echo \'{"tasks":[...]}\' | ccw issue solution ')); + // CREATE mode: if --data provided + if (jsonData) { + try { + const data = JSON.parse(jsonData); + const solution = createSolution(issueId, data); + console.log(JSON.stringify(solution, null, 2)); + } catch (err) { + console.error(chalk.red((err as Error).message)); + process.exit(1); + } + return; + } + + // READ mode: list solutions for issue + const issue = findIssue(issueId); + if (!issue) { + console.error(chalk.red(`Issue "${issueId}" not found`)); process.exit(1); } - try { - const data = JSON.parse(jsonData); - const solution = createSolution(issueId, data); - console.log(JSON.stringify(solution, null, 2)); - } catch (err) { - console.error(chalk.red((err as Error).message)); - process.exit(1); + const solutions = readSolutions(issueId); + if (solutions.length === 0) { + if (options.json || options.brief) { + console.log('[]'); + } else { + console.log(chalk.yellow(`No solutions found for ${issueId}`)); + } + return; + } + + // Filter by solution-id if specified + let targetSolutions = solutions; + if (options.solutionId) { + targetSolutions = solutions.filter(s => s.id === options.solutionId); + if (targetSolutions.length === 0) { + console.error(chalk.red(`Solution "${options.solutionId}" not found`)); + process.exit(1); + } + } + + // Brief mode: extract files_touched from modification_points + if (options.brief) { + const briefSolutions = targetSolutions.map(sol => { + const filesTouched = new Set(); + for (const task of sol.tasks) { + if (task.modification_points) { + for (const mp of task.modification_points) { + if (mp.file) filesTouched.add(mp.file); + } + } + } + return { + solution_id: sol.id, + is_bound: sol.is_bound, + task_count: sol.tasks.length, + files_touched: Array.from(filesTouched) + }; + }); + console.log(JSON.stringify(briefSolutions, null, 2)); + return; + } + + // JSON mode: full solutions + if (options.json) { + console.log(JSON.stringify(targetSolutions, null, 2)); + return; + } + + // Human-readable output + console.log(chalk.bold.cyan(`\nSolutions for ${issueId}:\n`)); + for (const sol of targetSolutions) { + const marker = sol.is_bound ? chalk.green('◉ BOUND') : chalk.gray('○'); + console.log(`${marker} ${sol.id}`); + console.log(chalk.gray(` Tasks: ${sol.tasks.length}`)); + if (sol.description) { + console.log(chalk.gray(` ${sol.description.substring(0, 80)}...`)); + } + console.log(); } } @@ -2429,6 +2494,8 @@ export async function issueCommand( console.log(chalk.gray(' list [issue-id] List issues or tasks')); console.log(chalk.gray(' history List completed issues (from history)')); console.log(chalk.gray(' status [issue-id] Show detailed status')); + console.log(chalk.gray(' solution List solutions for issue')); + console.log(chalk.gray(' solution --brief Brief: solution_id, files_touched, task_count')); console.log(chalk.gray(' solution --data \'{...}\' Create solution (auto-generates ID)')); console.log(chalk.gray(' bind [sol-id] Bind solution')); console.log(chalk.gray(' update --status Update issue status')); diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index 8bfed9d5..7e019cf8 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -125,8 +125,8 @@ def index_init( workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes (default: auto-detect based on CPU count)."), force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."), no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."), - backend: str = typer.Option("fastembed", "--backend", "-b", help="Embedding backend: fastembed (local) or litellm (remote API)."), - model: str = typer.Option("code", "--model", "-m", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."), + backend: Optional[str] = typer.Option(None, "--backend", "-b", help="Embedding backend: fastembed (local) or litellm (remote API). Defaults to settings.json config."), + model: Optional[str] = typer.Option(None, "--model", "-m", help="Embedding model: profile name for fastembed or model name for litellm. Defaults to settings.json config."), max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."), json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), @@ -152,6 +152,12 @@ def index_init( """ _configure_logging(verbose, json_mode) config = Config() + + # Fallback to settings.json config if CLI params not provided + config.load_settings() # Ensure settings are loaded + actual_backend = backend or config.embedding_backend + actual_model = model or config.embedding_model + languages = _parse_languages(language) base_path = path.expanduser().resolve() @@ -199,15 +205,15 @@ def index_init( # Validate embedding backend valid_backends = ["fastembed", "litellm"] - if backend not in valid_backends: - error_msg = f"Invalid embedding backend: {backend}. Must be one of: {', '.join(valid_backends)}" + if actual_backend not in valid_backends: + error_msg = f"Invalid embedding backend: {actual_backend}. Must be one of: {', '.join(valid_backends)}" if json_mode: print_json(success=False, error=error_msg) else: console.print(f"[red]Error:[/red] {error_msg}") raise typer.Exit(code=1) - backend_available, backend_error = is_embedding_backend_available(backend) + backend_available, backend_error = is_embedding_backend_available(actual_backend) if backend_available: # Use the index root directory (not the _index.db file) @@ -215,8 +221,8 @@ def index_init( if not json_mode: console.print("\n[bold]Generating embeddings...[/bold]") - console.print(f"Backend: [cyan]{backend}[/cyan]") - console.print(f"Model: [cyan]{model}[/cyan]") + console.print(f"Backend: [cyan]{actual_backend}[/cyan]") + console.print(f"Model: [cyan]{actual_model}[/cyan]") else: # Output progress message for JSON mode (parsed by Node.js) print("Generating embeddings...", flush=True) @@ -236,8 +242,8 @@ def index_init( embed_result = generate_embeddings_recursive( index_root, - embedding_backend=backend, - model_profile=model, + embedding_backend=actual_backend, + model_profile=actual_model, force=False, # Don't force regenerate during init chunk_size=2000, progress_callback=progress_update, # Always use callback @@ -283,7 +289,7 @@ def index_init( } else: if not json_mode and verbose: - console.print(f"[dim]Embedding backend '{backend}' not available. Skipping embeddings.[/dim]") + console.print(f"[dim]Embedding backend '{actual_backend}' not available. Skipping embeddings.[/dim]") result["embeddings"] = { "generated": False, "error": backend_error or "Embedding backend not available", diff --git a/codex-lens/src/codexlens/search/chain_search.py b/codex-lens/src/codexlens/search/chain_search.py index a507458a..d23cdef4 100644 --- a/codex-lens/src/codexlens/search/chain_search.py +++ b/codex-lens/src/codexlens/search/chain_search.py @@ -10,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field from pathlib import Path from typing import List, Optional, Dict, Any, Literal, Tuple, TYPE_CHECKING +import json import logging import os import time @@ -1241,20 +1242,60 @@ class ChainSearchEngine: stats=stats ) - # Step 3: Generate query dense embedding using same model as index + # Step 3: Find centralized HNSW index and read model config + from codexlens.config import VECTORS_HNSW_NAME + central_hnsw_path = None + index_root = start_index.parent + current_dir = index_root + for _ in range(10): # Limit search depth + candidate = current_dir / VECTORS_HNSW_NAME + if candidate.exists(): + central_hnsw_path = candidate + index_root = current_dir # Update to where HNSW was found + break + parent = current_dir.parent + if parent == current_dir: # Reached root + break + current_dir = parent + + # Step 4: Generate query dense embedding using same model as centralized index # Read embedding config to match the model used during indexing dense_coarse_time = time.time() try: from codexlens.semantic.factory import get_embedder - # Get embedding settings from config + # Get embedding settings from centralized index config (preferred) or fallback to self._config embedding_backend = "litellm" # Default to API for dense embedding_model = "qwen3-embedding-sf" # Default model use_gpu = True + # Try to read model config from centralized index's embeddings_config table + central_index_db = index_root / "_index.db" + if central_index_db.exists(): + try: + from codexlens.semantic.vector_store import VectorStore + with VectorStore(central_index_db) as vs: + model_config = vs.get_model_config() + if model_config: + embedding_backend = model_config.get("backend", embedding_backend) + embedding_model = model_config.get("model_name", embedding_model) + self.logger.debug( + "Read model config from centralized index: %s/%s", + embedding_backend, embedding_model + ) + except Exception as e: + self.logger.debug("Failed to read centralized model config: %s", e) + + # Fallback to self._config if not read from index if self._config is not None: - embedding_backend = getattr(self._config, "embedding_backend", "litellm") - embedding_model = getattr(self._config, "embedding_model", "qwen3-embedding-sf") + if embedding_backend == "litellm" and embedding_model == "qwen3-embedding-sf": + # Only use config values if we didn't read from centralized index + config_backend = getattr(self._config, "embedding_backend", None) + config_model = getattr(self._config, "embedding_model", None) + if config_backend: + embedding_backend = config_backend + if config_model: + embedding_model = config_model use_gpu = getattr(self._config, "embedding_use_gpu", True) # Create embedder matching index configuration @@ -1269,31 +1310,54 @@ class ChainSearchEngine: self.logger.warning(f"Failed to generate dense query embedding: {exc}") return self.hybrid_cascade_search(query, source_path, k, coarse_k, options) - # Step 4: Dense coarse search using HNSW indexes + # Step 5: Dense coarse search using centralized HNSW index coarse_candidates: List[Tuple[int, float, Path]] = [] # (chunk_id, distance, index_path) - index_root = index_paths[0].parent if index_paths else None - for index_path in index_paths: + if central_hnsw_path is not None: + # Use centralized index try: - # Load HNSW index from codexlens.semantic.ann_index import ANNIndex - ann_index = ANNIndex(index_path, dim=query_dense.shape[0]) - if not ann_index.load(): - continue - - if ann_index.count() == 0: - continue - - # Search HNSW index - ids, distances = ann_index.search(query_dense, top_k=coarse_k) - for chunk_id, dist in zip(ids, distances): - coarse_candidates.append((chunk_id, dist, index_path)) - + ann_index = ANNIndex.create_central( + index_root=index_root, + dim=query_dense.shape[0], + ) + if ann_index.load() and ann_index.count() > 0: + # Search centralized HNSW index + ids, distances = ann_index.search(query_dense, top_k=coarse_k) + for chunk_id, dist in zip(ids, distances): + coarse_candidates.append((chunk_id, dist, index_root / "_index.db")) + self.logger.debug( + "Centralized dense search: %d candidates from %s", + len(ids), central_hnsw_path + ) except Exception as exc: self.logger.debug( - "Dense search failed for %s: %s", index_path, exc + "Centralized dense search failed for %s: %s", central_hnsw_path, exc ) + # Fallback: try per-directory HNSW indexes if centralized not found + if not coarse_candidates: + for index_path in index_paths: + try: + # Load HNSW index + from codexlens.semantic.ann_index import ANNIndex + ann_index = ANNIndex(index_path, dim=query_dense.shape[0]) + if not ann_index.load(): + continue + + if ann_index.count() == 0: + continue + + # Search HNSW index + ids, distances = ann_index.search(query_dense, top_k=coarse_k) + for chunk_id, dist in zip(ids, distances): + coarse_candidates.append((chunk_id, dist, index_path)) + + except Exception as exc: + self.logger.debug( + "Dense search failed for %s: %s", index_path, exc + ) + if not coarse_candidates: self.logger.info("No dense candidates found, falling back to hybrid cascade") return self.hybrid_cascade_search(query, source_path, k, coarse_k, options) @@ -1307,7 +1371,7 @@ class ChainSearchEngine: len(coarse_candidates), (time.time() - dense_coarse_time) * 1000 ) - # Step 5: Build SearchResult objects for cross-encoder reranking + # Step 6: Build SearchResult objects for cross-encoder reranking candidates_by_index: Dict[Path, List[int]] = {} for chunk_id, distance, index_path in coarse_candidates: if index_path not in candidates_by_index: @@ -1320,29 +1384,63 @@ class ChainSearchEngine: for index_path, chunk_ids in candidates_by_index.items(): try: - # Query semantic_chunks table directly - conn = sqlite3.connect(str(index_path)) - conn.row_factory = sqlite3.Row - placeholders = ",".join("?" * len(chunk_ids)) - cursor = conn.execute( - f""" - SELECT id, file_path, content, metadata, category - FROM semantic_chunks - WHERE id IN ({placeholders}) - """, - chunk_ids - ) - chunks_data = [ - { - "id": row["id"], - "file_path": row["file_path"], - "content": row["content"], - "metadata": row["metadata"], - "category": row["category"], - } - for row in cursor.fetchall() - ] - conn.close() + # For centralized index, use _vectors_meta.db for chunk metadata + # which contains file_path, content, start_line, end_line + if central_hnsw_path is not None and index_path == index_root / "_index.db": + # Use centralized metadata from _vectors_meta.db + meta_db_path = index_root / "_vectors_meta.db" + if meta_db_path.exists(): + conn = sqlite3.connect(str(meta_db_path)) + conn.row_factory = sqlite3.Row + placeholders = ",".join("?" * len(chunk_ids)) + cursor = conn.execute( + f""" + SELECT chunk_id, file_path, content, start_line, end_line + FROM chunk_metadata + WHERE chunk_id IN ({placeholders}) + """, + chunk_ids + ) + chunks_data = [ + { + "id": row["chunk_id"], + "file_path": row["file_path"], + "content": row["content"], + "metadata": json.dumps({ + "start_line": row["start_line"], + "end_line": row["end_line"] + }), + "category": "code" if row["file_path"].endswith(('.py', '.ts', '.js', '.java', '.go', '.rs', '.cpp', '.c')) else "doc", + } + for row in cursor.fetchall() + ] + conn.close() + else: + chunks_data = [] + else: + # Fall back to per-directory semantic_chunks table + conn = sqlite3.connect(str(index_path)) + conn.row_factory = sqlite3.Row + placeholders = ",".join("?" * len(chunk_ids)) + cursor = conn.execute( + f""" + SELECT id, file_path, content, metadata, category + FROM semantic_chunks + WHERE id IN ({placeholders}) + """, + chunk_ids + ) + chunks_data = [ + { + "id": row["id"], + "file_path": row["file_path"], + "content": row["content"], + "metadata": row["metadata"], + "category": row["category"], + } + for row in cursor.fetchall() + ] + conn.close() for chunk in chunks_data: chunk_id = chunk.get("id") diff --git a/codex-lens/src/codexlens/search/hybrid_search.py b/codex-lens/src/codexlens/search/hybrid_search.py index b9bf2b2c..6be4d637 100644 --- a/codex-lens/src/codexlens/search/hybrid_search.py +++ b/codex-lens/src/codexlens/search/hybrid_search.py @@ -605,13 +605,20 @@ class HybridSearchEngine: index_root = hnsw_path.parent model_config = None - # Try to get model config from the provided index_path first + # Try to get model config from the centralized index root first + # (not the sub-directory index_path, which may have outdated config) try: from codexlens.semantic.vector_store import VectorStore - with VectorStore(index_path) as vs: - model_config = vs.get_model_config() - except Exception: - pass + central_index_path = index_root / "_index.db" + if central_index_path.exists(): + with VectorStore(central_index_path) as vs: + model_config = vs.get_model_config() + self.logger.debug( + "Loaded model config from centralized index: %s", + model_config + ) + except Exception as e: + self.logger.debug("Failed to load model config from centralized index: %s", e) # Detect dimension from HNSW file if model config not found if model_config is None: