From 5cae3cb3c8cdebf11d3b6456c205174aa35963d9 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Mon, 23 Feb 2026 22:56:25 +0800 Subject: [PATCH] feat(memorycore): add tags system, session summaries, hook injection, tag filtering, and solidify compress mode Implement 5 interconnected memorycore enhancements: 1. Tags backend: add tags TEXT column to memories table with migration, JSON array storage, full CRUD support via upsertMemory/getMemory/getMemories 2. LLM auto-tag extraction: extend extraction prompt to produce tags, parse and validate in pipeline, create CMEM from extraction results 3. Session summary API: expose rollout_summary via new REST endpoints GET /api/core-memory/sessions/summaries and sessions/:id/summary 4. Hook injection: increase SESSION_START_LIMIT to 1500, add Component 5 (Recent Sessions) to UnifiedContextBuilder with 300-char budget 5. Tag filtering: add getMemoriesByTags() with json_each() for safe SQL matching, wire through MCP tool, CLI --tags flag, REST ?tags= param 6. Solidify compress mode: add --type compress to solidify.md with getRecentMemories(), archiveMemories(), buildCompressionMetadata() Security fixes: safeParseTags() for corrupt DB data, json_each() instead of LIKE injection, ESCAPE clause for searchSessionsByKeyword, singleton store in unified-context-builder. --- .claude/commands/workflow/session/solidify.md | 208 ++++++++++++++++-- ccw/src/commands/core-memory.ts | 45 +++- ccw/src/core/core-memory-store.ts | 185 +++++++++++++++- ccw/src/core/memory-extraction-pipeline.ts | 25 ++- ccw/src/core/memory-extraction-prompts.ts | 19 +- ccw/src/core/routes/core-memory-routes.ts | 61 ++++- ccw/src/core/routes/memory-routes.ts | 23 +- ccw/src/core/services/hook-context-service.ts | 4 +- ccw/src/core/unified-context-builder.ts | 56 ++++- ccw/src/tools/core-memory.ts | 18 +- 10 files changed, 582 insertions(+), 62 deletions(-) diff --git a/.claude/commands/workflow/session/solidify.md b/.claude/commands/workflow/session/solidify.md index c0eb8aa7..c2a5b5cf 100644 --- a/.claude/commands/workflow/session/solidify.md +++ b/.claude/commands/workflow/session/solidify.md @@ -1,12 +1,13 @@ --- name: solidify -description: Crystallize session learnings and user-defined constraints into permanent project guidelines -argument-hint: "[-y|--yes] [--type ] [--category ] \"rule or insight\"" +description: Crystallize session learnings and user-defined constraints into permanent project guidelines, or compress recent memories +argument-hint: "[-y|--yes] [--type ] [--category ] [--limit ] \"rule or insight\"" examples: - /workflow:session:solidify "Use functional components for all React code" --type convention - /workflow:session:solidify -y "No direct DB access from controllers" --type constraint --category architecture - /workflow:session:solidify "Cache invalidation requires event sourcing" --type learning --category architecture - /workflow:session:solidify --interactive + - /workflow:session:solidify --type compress --limit 10 --- ## Auto Mode @@ -29,10 +30,11 @@ Crystallizes ephemeral session context (insights, decisions, constraints) into p | Parameter | Type | Required | Description | |-----------|------|----------|-------------| -| `rule` | string | ✅ (unless --interactive) | The rule, convention, or insight to solidify | -| `--type` | enum | ❌ | Type: `convention`, `constraint`, `learning` (default: auto-detect) | -| `--category` | string | ❌ | Category for organization (see categories below) | -| `--interactive` | flag | ❌ | Launch guided wizard for adding rules | +| `rule` | string | Yes (unless --interactive or --type compress) | The rule, convention, or insight to solidify | +| `--type` | enum | No | Type: `convention`, `constraint`, `learning`, `compress` (default: auto-detect) | +| `--category` | string | No | Category for organization (see categories below) | +| `--interactive` | flag | No | Launch guided wizard for adding rules | +| `--limit` | number | No | Number of recent memories to compress (default: 20, only for --type compress) | ### Type Categories @@ -42,32 +44,68 @@ Crystallizes ephemeral session context (insights, decisions, constraints) into p **constraint** → Hard rules that must not be violated (goes to `constraints` section) - Subcategories: `architecture`, `tech_stack`, `performance`, `security` -**learning** → Session-specific insights (goes to `learnings` array) +**learning** -> Session-specific insights (goes to `learnings` array) - Subcategories: `architecture`, `performance`, `security`, `testing`, `process`, `other` +**compress** -> Compress/deduplicate recent memories into a single consolidated CMEM +- No subcategories (operates on core memories, not project guidelines) +- Fetches recent non-archived memories, LLM-compresses them, creates a new CMEM +- Source memories are archived after successful compression + ## Execution Process ``` Input Parsing: - ├─ Parse: rule text (required unless --interactive) - ├─ Parse: --type (convention|constraint|learning) - ├─ Parse: --category (subcategory) - └─ Parse: --interactive (flag) + |- Parse: rule text (required unless --interactive or --type compress) + |- Parse: --type (convention|constraint|learning|compress) + |- Parse: --category (subcategory) + |- Parse: --interactive (flag) + +- Parse: --limit (number, default 20, compress only) -Step 1: Ensure Guidelines File Exists - └─ If not exists → Create with empty structure +IF --type compress: + Step C1: Fetch Recent Memories + +- Call getRecentMemories(limit, excludeArchived=true) -Step 2: Auto-detect Type (if not specified) - └─ Analyze rule text for keywords + Step C2: Validate Candidates + +- If fewer than 2 memories found -> abort with message -Step 3: Validate and Format Entry - └─ Build entry object based on type + Step C3: LLM Compress + +- Build compression prompt with all memory contents + +- Send to LLM for consolidation + +- Receive compressed text -Step 4: Update Guidelines File - └─ Add entry to appropriate section + Step C4: Merge Tags + +- Collect tags from all source memories + +- Deduplicate into a single merged tag array -Step 5: Display Confirmation - └─ Show what was added and where + Step C5: Create Compressed CMEM + +- Generate new CMEM via upsertMemory with: + - content: compressed text from LLM + - summary: auto-generated + - tags: merged deduplicated tags + - metadata: buildCompressionMetadata(sourceIds, originalSize, compressedSize) + + Step C6: Archive Source Memories + +- Call archiveMemories(sourceIds) + + Step C7: Display Compression Report + +- Show source count, compression ratio, new CMEM ID + +ELSE (convention/constraint/learning): + Step 1: Ensure Guidelines File Exists + +- If not exists -> Create with empty structure + + Step 2: Auto-detect Type (if not specified) + +- Analyze rule text for keywords + + Step 3: Validate and Format Entry + +- Build entry object based on type + + Step 4: Update Guidelines File + +- Add entry to appropriate section + + Step 5: Display Confirmation + +- Show what was added and where ``` ## Implementation @@ -197,17 +235,127 @@ Write('.workflow/project-guidelines.json', JSON.stringify(guidelines, null, 2)); ### Step 5: Display Confirmation ``` -✓ Guideline solidified +Guideline solidified Type: ${type} Category: ${category} Rule: "${rule}" -Location: .workflow/project-guidelines.json → ${type}s.${category} +Location: .workflow/project-guidelines.json -> ${type}s.${category} Total ${type}s in ${category}: ${count} ``` +## Compress Mode (--type compress) + +When `--type compress` is specified, the command operates on core memories instead of project guidelines. It fetches recent memories, sends them to an LLM for consolidation, and creates a new compressed CMEM. + +### Step C1: Fetch Recent Memories + +```javascript +// Uses CoreMemoryStore.getRecentMemories() +const limit = parsedArgs.limit || 20; +const recentMemories = store.getRecentMemories(limit, /* excludeArchived */ true); + +if (recentMemories.length < 2) { + console.log("Not enough non-archived memories to compress (need at least 2)."); + return; +} +``` + +### Step C2: Build Compression Prompt + +Concatenate all memory contents and send to LLM with the following prompt: + +``` +Given these ${N} memories, produce a single consolidated memory that: +1. Preserves all key information and insights +2. Removes redundancy and duplicate concepts +3. Organizes content by theme/topic +4. Maintains specific technical details and decisions + +Source memories: +--- +[Memory CMEM-XXXXXXXX-XXXXXX]: +${memory.content} +--- +[Memory CMEM-XXXXXXXX-XXXXXX]: +${memory.content} +--- +... + +Output: A single comprehensive memory text. +``` + +### Step C3: Merge Tags from Source Memories + +```javascript +// Collect all tags from source memories and deduplicate +const allTags = new Set(); +for (const memory of recentMemories) { + if (memory.tags) { + for (const tag of memory.tags) { + allTags.add(tag); + } + } +} +const mergedTags = Array.from(allTags); +``` + +### Step C4: Create Compressed CMEM + +```javascript +const sourceIds = recentMemories.map(m => m.id); +const originalSize = recentMemories.reduce((sum, m) => sum + m.content.length, 0); +const compressedSize = compressedText.length; + +const metadata = store.buildCompressionMetadata(sourceIds, originalSize, compressedSize); + +const newMemory = store.upsertMemory({ + content: compressedText, + summary: `Compressed from ${sourceIds.length} memories`, + tags: mergedTags, + metadata: metadata +}); +``` + +### Step C5: Archive Source Memories + +```javascript +// Archive all source memories after successful compression +store.archiveMemories(sourceIds); +``` + +### Step C6: Display Compression Report + +``` +Memory compression complete + +New CMEM: ${newMemory.id} +Sources compressed: ${sourceIds.length} +Original size: ${originalSize} chars +Compressed size: ${compressedSize} chars +Compression ratio: ${(compressedSize / originalSize * 100).toFixed(1)}% +Tags merged: ${mergedTags.join(', ') || '(none)'} +Source memories archived: ${sourceIds.join(', ')} +``` + +### Compressed CMEM Metadata Format + +The compressed CMEM's `metadata` field contains a JSON string with: + +```json +{ + "compressed_from": ["CMEM-20260101-120000", "CMEM-20260102-140000", "..."], + "compression_ratio": 0.45, + "compressed_at": "2026-02-23T10:30:00.000Z" +} +``` + +- `compressed_from`: Array of source memory IDs that were consolidated +- `compression_ratio`: Ratio of compressed size to original size (lower = more compression) +- `compressed_at`: ISO timestamp of when the compression occurred + ## Interactive Mode When `--interactive` flag is provided: @@ -280,6 +428,20 @@ Result: } ``` +### Compress Recent Memories +```bash +/workflow:session:solidify --type compress --limit 10 +``` + +Result: Creates a new CMEM with consolidated content from the 10 most recent non-archived memories. Source memories are archived. The new CMEM's metadata tracks which memories were compressed: +```json +{ + "compressed_from": ["CMEM-20260220-100000", "CMEM-20260221-143000", "..."], + "compression_ratio": 0.42, + "compressed_at": "2026-02-23T10:30:00.000Z" +} +``` + ## Integration with Planning The `project-guidelines.json` is consumed by: diff --git a/ccw/src/commands/core-memory.ts b/ccw/src/commands/core-memory.ts index 0ecd7cd5..83c83c83 100644 --- a/ccw/src/commands/core-memory.ts +++ b/ccw/src/commands/core-memory.ts @@ -39,6 +39,7 @@ interface CommandOptions { topK?: string; minScore?: string; category?: string; + tags?: string; } /** @@ -624,9 +625,49 @@ async function loadClusterAction(clusterId: string, options: CommandOptions): Pr * Search sessions by keyword */ async function searchAction(keyword: string, options: CommandOptions): Promise { + // When --tags is provided, use tag-based filtering instead of keyword search + if (options.tags) { + const parsedTags = options.tags.split(',').map(t => t.trim()).filter(Boolean); + if (parsedTags.length === 0) { + console.error(chalk.red('Error: --tags requires at least one tag')); + console.error(chalk.gray('Usage: ccw core-memory search --tags tag1,tag2')); + process.exit(1); + } + + try { + const store = getCoreMemoryStore(getProjectPath()); + const memories = store.getMemoriesByTags(parsedTags, { limit: 100 }); + + if (memories.length === 0) { + console.log(chalk.yellow(`\n No memories found with tags: [${parsedTags.join(', ')}]\n`)); + return; + } + + console.log(chalk.bold.cyan(`\n Memories with tags [${parsedTags.join(', ')}]\n`)); + console.log(chalk.gray(' ─────────────────────────────────────────────────────────────────')); + + for (const memory of memories) { + const date = new Date(memory.updated_at).toLocaleString(); + const archived = memory.archived ? chalk.gray(' [archived]') : ''; + const tagDisplay = (memory.tags && memory.tags.length > 0) ? chalk.gray(` [${memory.tags.join(', ')}]`) : ''; + console.log(chalk.cyan(` ${memory.id}`) + archived + tagDisplay); + console.log(chalk.white(` ${memory.summary || memory.content.substring(0, 80)}${memory.content.length > 80 ? '...' : ''}`)); + console.log(chalk.gray(` Updated: ${date}`)); + console.log(chalk.gray(' ─────────────────────────────────────────────────────────────────')); + } + + console.log(chalk.gray(`\n Total: ${memories.length}\n`)); + + } catch (error) { + console.error(chalk.red(`Error: ${(error as Error).message}`)); + process.exit(1); + } + return; + } + if (!keyword || keyword.trim() === '') { console.error(chalk.red('Error: Keyword is required')); - console.error(chalk.gray('Usage: ccw core-memory search [--type core|workflow|cli|all]')); + console.error(chalk.gray('Usage: ccw core-memory search [--type core|workflow|cli|all] [--tags tag1,tag2]')); process.exit(1); } @@ -1065,6 +1106,7 @@ export async function coreMemoryCommand( console.log(chalk.white(' context ') + chalk.gray('Get progressive index')); console.log(chalk.white(' load-cluster ') + chalk.gray('Load cluster context')); console.log(chalk.white(' search ') + chalk.gray('Search sessions')); + console.log(chalk.white(' search --tags tag1,tag2 ') + chalk.gray('Filter memories by tags')); console.log(chalk.white(' search --unified ') + chalk.gray('Unified vector+FTS search')); console.log(); console.log(chalk.bold(' Memory V2 Pipeline:')); @@ -1080,6 +1122,7 @@ export async function coreMemoryCommand( console.log(chalk.gray(' --json Output as JSON')); console.log(chalk.gray(' --scope Auto-cluster scope (all/recent/unclustered)')); console.log(chalk.gray(' --dedup Deduplicate similar clusters')); + console.log(chalk.gray(' --tags Filter by tags (comma-separated)')); console.log(chalk.gray(' --delete Delete a cluster')); console.log(chalk.gray(' --merge Merge source clusters into target')); console.log(); diff --git a/ccw/src/core/core-memory-store.ts b/ccw/src/core/core-memory-store.ts index c7518bfc..6b6c7398 100644 --- a/ccw/src/core/core-memory-store.ts +++ b/ccw/src/core/core-memory-store.ts @@ -10,6 +10,16 @@ import { StoragePaths, ensureStorageDir } from '../config/storage-paths.js'; import { UnifiedVectorIndex, isUnifiedEmbedderAvailable } from './unified-vector-index.js'; import type { ChunkMetadata } from './unified-vector-index.js'; +// Helpers +function safeParseTags(raw: string | null | undefined): string[] { + try { + const parsed = JSON.parse(raw || '[]'); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } +} + // Types export interface CoreMemory { id: string; // Format: CMEM-YYYYMMDD-HHMMSS @@ -20,6 +30,7 @@ export interface CoreMemory { updated_at: string; archived: boolean; metadata?: string; // JSON string + tags?: string[]; // JSON array stored as TEXT } export interface SessionCluster { @@ -137,7 +148,8 @@ export class CoreMemoryStore { created_at TEXT NOT NULL, updated_at TEXT NOT NULL, archived INTEGER DEFAULT 0, - metadata TEXT + metadata TEXT, + tags TEXT DEFAULT '[]' ); -- Session clusters table @@ -313,6 +325,16 @@ export class CoreMemoryStore { // Re-enable foreign key constraints this.db.pragma('foreign_keys = ON'); + + // Add tags column to existing memories table + try { + this.db.exec(`ALTER TABLE memories ADD COLUMN tags TEXT DEFAULT '[]'`); + } catch (e) { + const msg = (e as Error).message || ''; + if (!msg.includes('duplicate column name')) { + throw e; // Re-throw unexpected errors + } + } } catch (e) { // If migration fails, continue - tables may not exist try { @@ -408,10 +430,11 @@ export class CoreMemoryStore { // Update existing memory const stmt = this.db.prepare(` UPDATE memories - SET content = ?, summary = ?, raw_output = ?, updated_at = ?, archived = ?, metadata = ? + SET content = ?, summary = ?, raw_output = ?, updated_at = ?, archived = ?, metadata = ?, tags = ? WHERE id = ? `); + const tags = memory.tags ?? existingMemory.tags ?? []; stmt.run( memory.content, memory.summary || existingMemory.summary, @@ -419,6 +442,7 @@ export class CoreMemoryStore { now, memory.archived !== undefined ? (memory.archived ? 1 : 0) : existingMemory.archived ? 1 : 0, memory.metadata || existingMemory.metadata, + JSON.stringify(tags), id ); @@ -429,8 +453,8 @@ export class CoreMemoryStore { } else { // Insert new memory const stmt = this.db.prepare(` - INSERT INTO memories (id, content, summary, raw_output, created_at, updated_at, archived, metadata) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) + INSERT INTO memories (id, content, summary, raw_output, created_at, updated_at, archived, metadata, tags) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) `); stmt.run( @@ -441,7 +465,8 @@ export class CoreMemoryStore { now, now, memory.archived ? 1 : 0, - memory.metadata || null + memory.metadata || null, + JSON.stringify(memory.tags || []) ); // Sync new content to vector index @@ -467,12 +492,13 @@ export class CoreMemoryStore { created_at: row.created_at, updated_at: row.updated_at, archived: Boolean(row.archived), - metadata: row.metadata + metadata: row.metadata, + tags: safeParseTags(row.tags) }; } /** - * Get all memories + * Get memories with optional filtering by archived status */ getMemories(options: { archived?: boolean; limit?: number; offset?: number } = {}): CoreMemory[] { const { archived, limit = 50, offset = 0 } = options; @@ -506,7 +532,52 @@ export class CoreMemoryStore { created_at: row.created_at, updated_at: row.updated_at, archived: Boolean(row.archived), - metadata: row.metadata + metadata: row.metadata, + tags: safeParseTags(row.tags) + })); + } + + /** + * Get memories filtered by tags (AND logic - must contain ALL specified tags) + */ + getMemoriesByTags(tags: string[], options: { archived?: boolean; limit?: number; offset?: number } = {}): CoreMemory[] { + const { archived, limit = 50, offset = 0 } = options; + + if (tags.length === 0) { + return this.getMemories({ archived, limit, offset }); + } + + // Use json_each for proper structured matching (safe from injection) + const conditions = tags.map(() => `EXISTS (SELECT 1 FROM json_each(memories.tags) WHERE json_each.value = ?)`).join(' AND '); + const params: (string | number)[] = [...tags]; + + let archiveClause = ''; + if (archived !== undefined) { + archiveClause = ' AND archived = ?'; + params.push(archived ? 1 : 0); + } + + const query = ` + SELECT * FROM memories + WHERE ${conditions}${archiveClause} + ORDER BY updated_at DESC + LIMIT ? OFFSET ? + `; + params.push(limit, offset); + + const stmt = this.db.prepare(query); + const rows = stmt.all(...params) as any[]; + + return rows.map(row => ({ + id: row.id, + content: row.content, + summary: row.summary, + raw_output: row.raw_output, + created_at: row.created_at, + updated_at: row.updated_at, + archived: Boolean(row.archived), + metadata: row.metadata, + tags: safeParseTags(row.tags) })); } @@ -534,6 +605,60 @@ export class CoreMemoryStore { stmt.run(new Date().toISOString(), id); } + /** + * Get recent memories ordered by creation time (newest first) + * Used by compression flow to select candidates for consolidation + */ + getRecentMemories(limit: number = 20, excludeArchived: boolean = true): CoreMemory[] { + const query = excludeArchived + ? `SELECT * FROM memories WHERE archived = 0 ORDER BY created_at DESC LIMIT ?` + : `SELECT * FROM memories ORDER BY created_at DESC LIMIT ?`; + + const stmt = this.db.prepare(query); + const rows = stmt.all(limit) as any[]; + + return rows.map(row => ({ + id: row.id, + content: row.content, + summary: row.summary, + raw_output: row.raw_output, + created_at: row.created_at, + updated_at: row.updated_at, + archived: Boolean(row.archived), + metadata: row.metadata, + tags: safeParseTags(row.tags) + })); + } + + /** + * Archive multiple memories in a single transaction + * Used after successful compression to archive source memories + */ + archiveMemories(ids: string[]): void { + if (ids.length === 0) return; + + const now = new Date().toISOString(); + const placeholders = ids.map(() => '?').join(', '); + const stmt = this.db.prepare(` + UPDATE memories + SET archived = 1, updated_at = ? + WHERE id IN (${placeholders}) + `); + stmt.run(now, ...ids); + } + + /** + * Build metadata JSON for a compressed memory + * Tracks source memory IDs, compression ratio, and timestamp + */ + buildCompressionMetadata(sourceIds: string[], originalSize: number, compressedSize: number): string { + return JSON.stringify({ + compressed_from: sourceIds, + compression_ratio: compressedSize / originalSize, + compressed_at: new Date().toISOString() + }); + } + /** * Delete a memory */ @@ -969,11 +1094,12 @@ ${memory.content} searchSessionsByKeyword(keyword: string): SessionMetadataCache[] { const stmt = this.db.prepare(` SELECT * FROM session_metadata_cache - WHERE title LIKE ? OR summary LIKE ? OR keywords LIKE ? + WHERE title LIKE ? ESCAPE '\\' OR summary LIKE ? ESCAPE '\\' OR keywords LIKE ? ESCAPE '\\' ORDER BY access_count DESC, last_accessed DESC `); - const pattern = `%${keyword}%`; + const escaped = keyword.replace(/[%_\\]/g, c => '\\' + c); + const pattern = `%${escaped}%`; const rows = stmt.all(pattern, pattern, pattern) as any[]; return rows.map(row => ({ @@ -1413,6 +1539,39 @@ ${memory.content} })); } + /** + * Get session summaries from stage1_outputs, ordered by generated_at descending. + * Returns lightweight objects with thread_id, rollout_summary, and generated_at. + */ + getSessionSummaries(limit: number = 20): Array<{ thread_id: string; rollout_summary: string; generated_at: number }> { + const stmt = this.db.prepare( + `SELECT thread_id, rollout_summary, generated_at FROM stage1_outputs ORDER BY generated_at DESC LIMIT ?` + ); + const rows = stmt.all(limit) as any[]; + return rows.map(row => ({ + thread_id: row.thread_id, + rollout_summary: row.rollout_summary, + generated_at: row.generated_at, + })); + } + + /** + * Get a single session summary by thread_id. + * Returns null if no extraction output exists for the given thread. + */ + getSessionSummary(threadId: string): { thread_id: string; rollout_summary: string; generated_at: number } | null { + const stmt = this.db.prepare( + `SELECT thread_id, rollout_summary, generated_at FROM stage1_outputs WHERE thread_id = ?` + ); + const row = stmt.get(threadId) as any; + if (!row) return null; + return { + thread_id: row.thread_id, + rollout_summary: row.rollout_summary, + generated_at: row.generated_at, + }; + } + /** * Count Phase 1 outputs */ @@ -1581,7 +1740,8 @@ export function getMemoriesFromProject(projectId: string): CoreMemory[] { created_at: row.created_at, updated_at: row.updated_at, archived: Boolean(row.archived), - metadata: row.metadata + metadata: row.metadata, + tags: safeParseTags(row.tags) })); } @@ -1621,7 +1781,8 @@ export function findMemoryAcrossProjects(memoryId: string): { memory: CoreMemory created_at: row.created_at, updated_at: row.updated_at, archived: Boolean(row.archived), - metadata: row.metadata + metadata: row.metadata, + tags: safeParseTags(row.tags) }, projectId }; diff --git a/ccw/src/core/memory-extraction-pipeline.ts b/ccw/src/core/memory-extraction-pipeline.ts index a1d2b8c0..55bac895 100644 --- a/ccw/src/core/memory-extraction-pipeline.ts +++ b/ccw/src/core/memory-extraction-pipeline.ts @@ -40,6 +40,7 @@ export interface ExtractionInput { export interface ExtractionOutput { raw_memory: string; rollout_summary: string; + tags: string[]; } export interface TranscriptFilterOptions { @@ -289,10 +290,10 @@ export class MemoryExtractionPipeline { * Applies secret redaction and size limit enforcement. * * @param llmOutput - Raw text output from the LLM - * @returns Validated ExtractionOutput with raw_memory and rollout_summary + * @returns Validated ExtractionOutput with raw_memory, rollout_summary, and tags */ postProcess(llmOutput: string): ExtractionOutput { - let parsed: { raw_memory?: string; rollout_summary?: string } | null = null; + let parsed: { raw_memory?: string; rollout_summary?: string; tags?: string[] } | null = null; // Mode 1: Pure JSON try { @@ -333,7 +334,17 @@ export class MemoryExtractionPipeline { rolloutSummary = rolloutSummary.substring(0, MAX_SUMMARY_CHARS); } - return { raw_memory: rawMemory, rollout_summary: rolloutSummary }; + // Extract and validate tags (fallback to empty array) + let tags: string[] = []; + if (parsed.tags && Array.isArray(parsed.tags)) { + tags = parsed.tags + .filter((t: unknown) => typeof t === 'string') + .map((t: string) => t.toLowerCase().trim()) + .filter((t: string) => t.length > 0) + .slice(0, 8); + } + + return { raw_memory: rawMemory, rollout_summary: rolloutSummary, tags }; } // ======================================================================== @@ -388,6 +399,14 @@ export class MemoryExtractionPipeline { const store = getCoreMemoryStore(this.projectPath); store.upsertStage1Output(output); + // Create/update a core memory (CMEM) from extraction results with tags + store.upsertMemory({ + id: `CMEM-EXT-${sessionId}`, + content: extracted.raw_memory, + summary: extracted.rollout_summary, + tags: extracted.tags, + }); + // Sync extracted content to vector index (fire-and-forget) this.syncExtractionToVectorIndex(output); diff --git a/ccw/src/core/memory-extraction-prompts.ts b/ccw/src/core/memory-extraction-prompts.ts index 2412a78e..865e8af4 100644 --- a/ccw/src/core/memory-extraction-prompts.ts +++ b/ccw/src/core/memory-extraction-prompts.ts @@ -3,7 +3,7 @@ * * Provides system and user prompt templates for extracting structured memory * from CLI session transcripts. The LLM output must conform to a JSON schema - * with raw_memory and rollout_summary fields. + * with raw_memory, rollout_summary, and tags fields. * * Design spec section 4.4: Prompt structure with outcome triage rules. */ @@ -12,20 +12,29 @@ * System prompt for the extraction LLM call. * * Instructs the model to: - * - Produce a JSON object with raw_memory and rollout_summary + * - Produce a JSON object with raw_memory, rollout_summary, and tags * - Follow structure markers in raw_memory (# summary, Memory context, etc.) * - Apply outcome triage rules for categorizing task results * - Keep rollout_summary concise (1-2 sentences) + * - Generate 3-8 lowercase tags capturing topic, action, and technology */ export const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction agent. Your job is to read a CLI session transcript and produce structured memory output. -You MUST respond with a valid JSON object containing exactly two fields: +You MUST respond with a valid JSON object containing exactly three fields: { "raw_memory": "", - "rollout_summary": "<1-2 sentence summary>" + "rollout_summary": "<1-2 sentence summary>", + "tags": ["", "", ...] } +## tags format + +An array of 3-8 short lowercase tags (1-3 words each) capturing: +- Main topic or domain (e.g., "authentication", "database migration") +- Action type (e.g., "bug fix", "refactoring", "new feature") +- Key technology (e.g., "react", "typescript", "sqlite") + ## raw_memory format The raw_memory field must follow this structure: @@ -87,5 +96,5 @@ Session ID: ${sessionId} ${transcript} --- END TRANSCRIPT --- -Respond with a JSON object containing "raw_memory" and "rollout_summary" fields.`; +Respond with a JSON object containing "raw_memory", "rollout_summary", and "tags" fields.`; } diff --git a/ccw/src/core/routes/core-memory-routes.ts b/ccw/src/core/routes/core-memory-routes.ts index 4c1610d3..56fce7bd 100644 --- a/ccw/src/core/routes/core-memory-routes.ts +++ b/ccw/src/core/routes/core-memory-routes.ts @@ -38,10 +38,21 @@ export async function handleCoreMemoryRoutes(ctx: RouteContext): Promise t.trim()).filter(Boolean); + memories = tags.length > 0 + ? store.getMemoriesByTags(tags, { archived, limit, offset }) + : store.getMemories({ archived, limit, offset }); + } else { + memories = store.getMemories({ archived, limit, offset }); + } res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ success: true, memories })); @@ -78,7 +89,7 @@ export async function handleCoreMemoryRoutes(ctx: RouteContext): Promise { - const { content, summary, raw_output, id, archived, metadata, path: projectPath } = body; + const { content, summary, raw_output, id, archived, metadata, tags, path: projectPath } = body; if (!content) { return { error: 'content is required', status: 400 }; @@ -94,7 +105,8 @@ export async function handleCoreMemoryRoutes(ctx: RouteContext): Promise { // API: Memory Module - Get all memories (core memory list) if (pathname === '/api/memory' && req.method === 'GET') { const projectPath = url.searchParams.get('path') || initialPath; + const tagsParam = url.searchParams.get('tags'); try { const store = getCoreMemoryStore(projectPath); - const memories = store.getMemories({ archived: false, limit: 100 }); + + // Use tag filter if tags query parameter is provided + let memories; + if (tagsParam) { + const tags = tagsParam.split(',').map(t => t.trim()).filter(Boolean); + memories = tags.length > 0 + ? store.getMemoriesByTags(tags, { archived: false, limit: 100 }) + : store.getMemories({ archived: false, limit: 100 }); + } else { + memories = store.getMemories({ archived: false, limit: 100 }); + } // Calculate total size const totalSize = memories.reduce((sum, m) => sum + (m.content?.length || 0), 0); @@ -109,7 +120,7 @@ export async function handleMemoryRoutes(ctx: RouteContext): Promise { createdAt: m.created_at, updatedAt: m.updated_at, source: m.metadata || undefined, - tags: [], // TODO: Extract tags from metadata if available + tags: m.tags || [], size: m.content?.length || 0 })); @@ -139,7 +150,7 @@ export async function handleMemoryRoutes(ctx: RouteContext): Promise { try { const store = getCoreMemoryStore(basePath); - const memory = store.upsertMemory({ content }); + const memory = store.upsertMemory({ content, tags }); // Broadcast update event broadcastToClients({ @@ -156,7 +167,7 @@ export async function handleMemoryRoutes(ctx: RouteContext): Promise { createdAt: memory.created_at, updatedAt: memory.updated_at, source: memory.metadata || undefined, - tags: tags || [], + tags: memory.tags || [], size: memory.content?.length || 0 }; } catch (error: unknown) { @@ -175,7 +186,7 @@ export async function handleMemoryRoutes(ctx: RouteContext): Promise { try { const store = getCoreMemoryStore(basePath); - const memory = store.upsertMemory({ id: memoryId, content }); + const memory = store.upsertMemory({ id: memoryId, content, tags }); // Broadcast update event broadcastToClients({ @@ -192,7 +203,7 @@ export async function handleMemoryRoutes(ctx: RouteContext): Promise { createdAt: memory.created_at, updatedAt: memory.updated_at, source: memory.metadata || undefined, - tags: tags || [], + tags: memory.tags || [], size: memory.content?.length || 0 }; } catch (error: unknown) { diff --git a/ccw/src/core/services/hook-context-service.ts b/ccw/src/core/services/hook-context-service.ts index 13fe477a..f73c6209 100644 --- a/ccw/src/core/services/hook-context-service.ts +++ b/ccw/src/core/services/hook-context-service.ts @@ -7,7 +7,7 @@ * - session-end: task generation for async background processing * * Character limits: - * - session-start: <= 1000 chars + * - session-start: <= 1500 chars * - per-prompt: <= 500 chars */ @@ -19,7 +19,7 @@ import { SessionStateService, type SessionState } from './session-state-service. // ============================================================================= /** Maximum character count for session-start context */ -const SESSION_START_LIMIT = 1000; +const SESSION_START_LIMIT = 1500; /** Maximum character count for per-prompt context */ const PER_PROMPT_LIMIT = 500; diff --git a/ccw/src/core/unified-context-builder.ts b/ccw/src/core/unified-context-builder.ts index ea111d07..f78d7513 100644 --- a/ccw/src/core/unified-context-builder.ts +++ b/ccw/src/core/unified-context-builder.ts @@ -7,7 +7,7 @@ * - session-end: incremental embedding + clustering + heat score update tasks * * Character limits: - * - session-start: <= 1000 chars + * - session-start: <= 1500 chars * - per-prompt: <= 500 chars */ @@ -29,7 +29,7 @@ import { SessionClusteringService } from './session-clustering-service.js'; // ============================================================================= /** Maximum character count for session-start context */ -const SESSION_START_LIMIT = 1000; +const SESSION_START_LIMIT = 1500; /** Maximum character count for per-prompt context */ const PER_PROMPT_LIMIT = 500; @@ -52,6 +52,12 @@ const VECTOR_TOP_K = 8; /** Minimum vector similarity score */ const VECTOR_MIN_SCORE = 0.3; +/** Maximum characters for the recent sessions component */ +const RECENT_SESSIONS_LIMIT = 300; + +/** Number of recent sessions to show */ +const RECENT_SESSIONS_COUNT = 5; + // ============================================================================= // Types // ============================================================================= @@ -84,11 +90,12 @@ export class UnifiedContextBuilder { /** * Build context for session-start hook injection. * - * Components (assembled in order, truncated to <= 1000 chars total): + * Components (assembled in order, truncated to <= 1500 chars total): * 1. MEMORY.md summary (up to 500 chars) * 2. Cluster overview (top 3 active clusters) * 3. Hot entities (top 5 within last 7 days) * 4. Solidified patterns (skills/*.md file list) + * 5. Recent sessions (last 5 session summaries) */ async buildSessionStartContext(): Promise { const sections: string[] = []; @@ -117,6 +124,12 @@ export class UnifiedContextBuilder { sections.push(patterns); } + // Component 5: Recent sessions + const recentSessions = await this.buildRecentSessions(); + if (recentSessions) { + sections.push(recentSessions); + } + if (sections.length === 0) { return ''; } @@ -285,8 +298,8 @@ export class UnifiedContextBuilder { */ private async buildClusterOverview(): Promise { try { - const { CoreMemoryStore } = await import('./core-memory-store.js'); - const store = new CoreMemoryStore(this.projectPath); + const { getCoreMemoryStore } = await import('./core-memory-store.js'); + const store = getCoreMemoryStore(this.projectPath); const clusters = store.listClusters('active'); if (clusters.length === 0) { @@ -380,6 +393,39 @@ export class UnifiedContextBuilder { } } + /** + * Build recent sessions component. + * Shows last N session summaries from CoreMemoryStore. + */ + private async buildRecentSessions(): Promise { + try { + const { getCoreMemoryStore } = await import('./core-memory-store.js'); + const store = getCoreMemoryStore(this.projectPath); + const summaries = store.getSessionSummaries(RECENT_SESSIONS_COUNT); + + if (summaries.length === 0) { + return ''; + } + + let output = '## Recent Sessions\n'; + for (const s of summaries) { + const shortId = s.thread_id.substring(0, 12); + const summaryText = s.rollout_summary.length > 60 + ? s.rollout_summary.substring(0, 60) + '...' + : s.rollout_summary; + output += `- ${shortId}: ${summaryText}\n`; + } + + if (output.length > RECENT_SESSIONS_LIMIT) { + output = output.substring(0, RECENT_SESSIONS_LIMIT); + } + + return output; + } catch { + return ''; + } + } + // --------------------------------------------------------------------------- // Private: Formatting helpers // --------------------------------------------------------------------------- diff --git a/ccw/src/tools/core-memory.ts b/ccw/src/tools/core-memory.ts index 61cbcaa6..8baaaac0 100644 --- a/ccw/src/tools/core-memory.ts +++ b/ccw/src/tools/core-memory.ts @@ -38,6 +38,8 @@ const ParamsSchema = z.object({ force: z.boolean().optional().default(false), // V2 extract parameters max_sessions: z.number().optional(), + // Tags filter parameter + tags: z.array(z.string()).optional(), // V2 jobs parameters kind: z.string().optional(), status_filter: z.enum(['pending', 'running', 'done', 'error']).optional(), @@ -60,6 +62,7 @@ interface CoreMemoryCompact { preview: string; // Truncated content/summary preview archived: boolean; updated_at: string; + tags: string[]; } interface ListResult { @@ -182,9 +185,13 @@ const PREVIEW_MAX_LENGTH = 100; * List all memories with compact output */ function executeList(params: Params): ListResult { - const { limit, path } = params; + const { limit, path, tags } = params; const store = getCoreMemoryStore(getProjectPath(path)); - const memories = store.getMemories({ limit }) as CoreMemory[]; + + // Use tag filter if tags provided, otherwise get all + const memories = tags && tags.length > 0 + ? store.getMemoriesByTags(tags, { limit }) as CoreMemory[] + : store.getMemories({ limit }) as CoreMemory[]; // Convert to compact format with truncated preview const compactMemories: CoreMemoryCompact[] = memories.map((m) => { @@ -198,6 +205,7 @@ function executeList(params: Params): ListResult { preview, archived: m.archived, updated_at: m.updated_at, + tags: (m as any).tags || [], }; }); @@ -584,6 +592,7 @@ export const schema: ToolSchema = { Usage: core_memory(operation="list") # List all memories + core_memory(operation="list", tags=["auth","api"]) # List memories with specific tags core_memory(operation="import", text="important context") # Import text as new memory core_memory(operation="export", id="CMEM-xxx") # Export memory as plain text core_memory(operation="summary", id="CMEM-xxx") # Generate AI summary @@ -674,6 +683,11 @@ Memory IDs use format: CMEM-YYYYMMDD-HHMMSS`, enum: ['pending', 'running', 'done', 'error'], description: 'Filter jobs by status (for jobs operation)', }, + tags: { + type: 'array', + items: { type: 'string' }, + description: 'Filter memories by tags (AND logic - must contain ALL specified tags). Used with list operation.', + }, }, required: ['operation'], },