/** * Pattern Detector - Detects recurring content patterns across sessions * * Uses vector clustering (cosine similarity > 0.85) to group semantically similar * chunks into patterns. Patterns appearing in N>=3 distinct sessions are flagged * as candidates. High-confidence patterns (>=0.8) are solidified into CoreMemory * and skills/*.md files. */ import { CoreMemoryStore, getCoreMemoryStore } from './core-memory-store.js'; import { UnifiedVectorIndex, isUnifiedEmbedderAvailable } from './unified-vector-index.js'; import type { VectorSearchMatch } from './unified-vector-index.js'; import { existsSync, mkdirSync, writeFileSync } from 'fs'; import { join } from 'path'; // -- Constants -- /** Minimum cosine similarity to group chunks into the same pattern */ const PATTERN_SIMILARITY_THRESHOLD = 0.85; /** Minimum number of distinct sessions a pattern must appear in */ const MIN_SESSION_FREQUENCY = 3; /** Confidence threshold for auto-solidification */ const SOLIDIFY_CONFIDENCE_THRESHOLD = 0.8; /** Maximum number of chunks to analyze per detection run */ const MAX_CHUNKS_TO_ANALYZE = 200; /** Top-K neighbors to search per chunk during clustering */ const NEIGHBOR_TOP_K = 15; // -- Types -- export interface DetectedPattern { /** Unique pattern identifier */ id: string; /** Human-readable pattern name derived from content */ name: string; /** Representative content snippet */ representative: string; /** Source IDs (sessions) where this pattern appears */ sourceIds: string[]; /** Number of distinct sessions */ sessionCount: number; /** Average similarity score within the pattern group */ avgSimilarity: number; /** Confidence score (0-1), based on frequency and similarity */ confidence: number; /** Category of the chunks in this pattern */ category: string; } export interface PatternDetectionResult { /** All detected patterns */ patterns: DetectedPattern[]; /** Number of chunks analyzed */ chunksAnalyzed: number; /** Patterns that were solidified (written to CoreMemory + skills) */ solidified: string[]; /** Elapsed time in ms */ elapsedMs: number; } export interface SolidifyResult { memoryId: string; skillPath: string | null; } // -- PatternDetector -- export class PatternDetector { private projectPath: string; private coreMemoryStore: CoreMemoryStore; private vectorIndex: UnifiedVectorIndex | null = null; constructor(projectPath: string) { this.projectPath = projectPath; this.coreMemoryStore = getCoreMemoryStore(projectPath); if (isUnifiedEmbedderAvailable()) { this.vectorIndex = new UnifiedVectorIndex(projectPath); } } /** * Detect recurring patterns across sessions by vector clustering. * * Algorithm: * 1. Get representative chunks from VectorStore (via search with broad queries) * 2. For each chunk, search HNSW for nearest neighbors (cosine > PATTERN_SIMILARITY_THRESHOLD) * 3. Group chunks with high mutual similarity into pattern clusters * 4. Count distinct source_ids per cluster (session frequency) * 5. Patterns with sessionCount >= MIN_SESSION_FREQUENCY become candidates * * @returns Detection result with candidate patterns */ async detectPatterns(): Promise { const startTime = Date.now(); const result: PatternDetectionResult = { patterns: [], chunksAnalyzed: 0, solidified: [], elapsedMs: 0, }; if (!this.vectorIndex) { result.elapsedMs = Date.now() - startTime; return result; } // Step 1: Gather chunks from the vector store via broad category searches const allChunks = await this.gatherChunksForAnalysis(); result.chunksAnalyzed = allChunks.length; if (allChunks.length < MIN_SESSION_FREQUENCY) { result.elapsedMs = Date.now() - startTime; return result; } // Step 2: Cluster chunks by vector similarity const patternGroups = await this.clusterChunksByVector(allChunks); // Step 3: Filter by session frequency and build DetectedPattern objects for (const group of patternGroups) { const uniqueSources = new Set(group.map(c => c.source_id)); if (uniqueSources.size < MIN_SESSION_FREQUENCY) continue; const avgSim = group.reduce((sum, c) => sum + c.score, 0) / group.length; // Confidence: combines frequency (normalized) and avg similarity const frequencyScore = Math.min(uniqueSources.size / 10, 1.0); const confidence = avgSim * 0.6 + frequencyScore * 0.4; const representative = group[0]; // Highest scoring chunk const patternName = this.derivePatternName(group); const patternId = `PAT-${Date.now()}-${Math.random().toString(36).substring(2, 6)}`; result.patterns.push({ id: patternId, name: patternName, representative: representative.content.substring(0, 500), sourceIds: Array.from(uniqueSources), sessionCount: uniqueSources.size, avgSimilarity: Math.round(avgSim * 1000) / 1000, confidence: Math.round(confidence * 1000) / 1000, category: representative.category || 'unknown', }); } // Sort by confidence descending result.patterns.sort((a, b) => b.confidence - a.confidence); // Step 4: Auto-solidify high-confidence patterns (fire-and-forget) for (const pattern of result.patterns) { if (pattern.confidence >= SOLIDIFY_CONFIDENCE_THRESHOLD) { try { await this.solidifyPattern(pattern); result.solidified.push(pattern.id); } catch (err) { console.warn( `[PatternDetector] Failed to solidify pattern ${pattern.id}:`, (err as Error).message ); } } } result.elapsedMs = Date.now() - startTime; return result; } /** * Gather a representative set of chunks for pattern analysis. * Uses broad search queries across categories to collect diverse chunks. */ private async gatherChunksForAnalysis(): Promise { if (!this.vectorIndex) return []; const allChunks: VectorSearchMatch[] = []; const seenContent = new Set(); // Search across common categories with broad queries const broadQueries = [ 'implementation pattern', 'configuration setup', 'error handling', 'testing approach', 'workflow process', ]; const categories = ['core_memory', 'cli_history', 'workflow'] as const; for (const category of categories) { for (const query of broadQueries) { if (allChunks.length >= MAX_CHUNKS_TO_ANALYZE) break; try { const result = await this.vectorIndex.search(query, { topK: Math.ceil(MAX_CHUNKS_TO_ANALYZE / (broadQueries.length * categories.length)), minScore: 0.1, category, }); if (result.success) { for (const match of result.matches) { // Deduplicate by content hash (first 100 chars) const contentKey = match.content.substring(0, 100); if (!seenContent.has(contentKey)) { seenContent.add(contentKey); allChunks.push(match); } if (allChunks.length >= MAX_CHUNKS_TO_ANALYZE) break; } } } catch { // Search failed for this query/category, continue } } } return allChunks; } /** * Cluster chunks by vector similarity using HNSW neighbor search. * * For each unprocessed chunk, search for its nearest neighbors. * Chunks with cosine similarity > PATTERN_SIMILARITY_THRESHOLD are grouped together. * Uses a union-find-like approach via visited tracking. */ private async clusterChunksByVector( chunks: VectorSearchMatch[] ): Promise { if (!this.vectorIndex) return []; const groups: VectorSearchMatch[][] = []; const processed = new Set(); for (let i = 0; i < chunks.length; i++) { if (processed.has(i)) continue; const seedChunk = chunks[i]; const group: VectorSearchMatch[] = [seedChunk]; processed.add(i); // Search for neighbors of this chunk's content try { const neighbors = await this.vectorIndex.search(seedChunk.content, { topK: NEIGHBOR_TOP_K, minScore: PATTERN_SIMILARITY_THRESHOLD, }); if (neighbors.success) { for (const neighbor of neighbors.matches) { // Skip self-matches if (neighbor.content === seedChunk.content) continue; // Find this neighbor in our chunk list for (let j = 0; j < chunks.length; j++) { if (processed.has(j)) continue; if ( chunks[j].source_id === neighbor.source_id && chunks[j].chunk_index === neighbor.chunk_index ) { group.push({ ...chunks[j], score: neighbor.score }); processed.add(j); break; } } // Also include neighbors not in our original list if (neighbor.source_id && neighbor.source_id !== seedChunk.source_id) { // Check if already in group by source_id const alreadyInGroup = group.some( g => g.source_id === neighbor.source_id && g.chunk_index === neighbor.chunk_index ); if (!alreadyInGroup) { group.push(neighbor); } } } } } catch { // HNSW search failed, skip this chunk's neighborhood } // Only keep groups with chunks from multiple sources const uniqueSources = new Set(group.map(c => c.source_id)); if (uniqueSources.size >= 2) { groups.push(group); } } return groups; } /** * Derive a human-readable pattern name from a group of similar chunks. * Extracts common keywords/phrases from the representative content. */ private derivePatternName(group: VectorSearchMatch[]): string { // Extended stopwords including generic tech terms const stopwords = new Set([ 'the', 'and', 'for', 'that', 'this', 'with', 'from', 'have', 'will', 'are', 'was', 'were', 'been', 'what', 'when', 'where', 'which', 'there', 'their', 'they', 'them', 'then', 'than', 'into', 'some', 'code', 'file', 'function', 'class', 'import', 'export', 'const', 'async', 'await', 'return', 'type', 'interface', 'string', 'number', 'true', 'false', 'null', 'undefined', 'object', 'array', 'value', 'data', 'result', 'error', 'name', 'path', 'index', 'item', 'list', 'should', 'would', 'could', 'does', 'make', 'like', 'just', 'also', 'used', 'using', 'each', 'other', 'more', 'only', 'need', 'very', ]); const isSignificant = (w: string) => w.length >= 4 && !stopwords.has(w); // Count word and bigram frequency across all chunks const wordFreq = new Map(); const bigramFreq = new Map(); for (const chunk of group) { const words = chunk.content.toLowerCase().split(/[\s\W]+/).filter(isSignificant); const uniqueWords = new Set(words); for (const word of uniqueWords) { wordFreq.set(word, (wordFreq.get(word) || 0) + 1); } // Extract bigrams from consecutive significant words for (let i = 0; i < words.length - 1; i++) { const bigram = `${words[i]}-${words[i + 1]}`; bigramFreq.set(bigram, (bigramFreq.get(bigram) || 0) + 1); } } // Prefer bigrams that appear in multiple chunks const topBigrams = Array.from(bigramFreq.entries()) .filter(([, count]) => count >= 2) .sort((a, b) => b[1] - a[1]); if (topBigrams.length > 0) { // Use top bigram, optionally append a distinguishing single word const name = topBigrams[0][0]; const bigramWords = new Set(name.split('-')); const extra = Array.from(wordFreq.entries()) .filter(([w, count]) => count >= 2 && !bigramWords.has(w)) .sort((a, b) => b[1] - a[1]); if (extra.length > 0) { const candidate = `${name}-${extra[0][0]}`; return candidate.length <= 50 ? candidate : name; } return name; } // Fallback to top single words const topWords = Array.from(wordFreq.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 3) .map(([w]) => w); if (topWords.length >= 2) { const name = topWords.join('-'); return name.length <= 50 ? name : topWords.slice(0, 2).join('-'); } else if (topWords.length === 1) { return topWords[0]; } return 'unnamed-pattern'; } /** * Solidify a detected pattern by writing it to CoreMemory and skills/*.md. * * Creates: * 1. A CoreMemory entry with the pattern content and metadata * 2. A skills/{pattern_slug}.md file with the pattern documentation * * This method is fire-and-forget - errors are logged but not propagated. * * @param pattern - The detected pattern to solidify * @returns Result with memory ID and skill file path */ async solidifyPattern(pattern: DetectedPattern): Promise { // 1. Create CoreMemory entry const memoryContent = this.buildPatternMemoryContent(pattern); const memory = this.coreMemoryStore.upsertMemory({ content: memoryContent, summary: `Detected pattern: ${pattern.name} (${pattern.sessionCount} sessions, confidence: ${pattern.confidence})`, metadata: JSON.stringify({ type: 'detected_pattern', pattern_id: pattern.id, pattern_name: pattern.name, session_count: pattern.sessionCount, confidence: pattern.confidence, source_ids: pattern.sourceIds, detected_at: new Date().toISOString(), }), }); // 2. Write skills file let skillPath: string | null = null; try { const slug = pattern.name .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-|-$/g, '') .substring(0, 50); const skillsDir = join(this.projectPath, '.claude', 'skills'); if (!existsSync(skillsDir)) { mkdirSync(skillsDir, { recursive: true }); } skillPath = join(skillsDir, `${slug}.md`); const skillContent = this.buildSkillContent(pattern); writeFileSync(skillPath, skillContent, 'utf-8'); } catch (err) { console.warn( `[PatternDetector] Failed to write skill file for ${pattern.name}:`, (err as Error).message ); skillPath = null; } console.log( `[PatternDetector] Solidified pattern '${pattern.name}' -> memory=${memory.id}, skill=${skillPath || 'none'}` ); return { memoryId: memory.id, skillPath }; } /** * Build CoreMemory content for a detected pattern. */ private buildPatternMemoryContent(pattern: DetectedPattern): string { const lines: string[] = [ `# Detected Pattern: ${pattern.name}`, '', `**Confidence**: ${pattern.confidence}`, `**Sessions**: ${pattern.sessionCount} (${pattern.sourceIds.join(', ')})`, `**Category**: ${pattern.category}`, `**Avg Similarity**: ${pattern.avgSimilarity}`, '', '## Representative Content', '', pattern.representative, '', '## Usage', '', 'This pattern was automatically detected across multiple sessions.', 'It represents a recurring approach or concept in this project.', ]; return lines.join('\n'); } /** * Build skill file content for a detected pattern. */ private buildSkillContent(pattern: DetectedPattern): string { const lines: string[] = [ `# ${pattern.name}`, '', `> Auto-detected pattern (confidence: ${pattern.confidence}, sessions: ${pattern.sessionCount})`, '', '## Description', '', pattern.representative, '', '## Context', '', `This pattern was detected across ${pattern.sessionCount} sessions:`, ...pattern.sourceIds.map(id => `- ${id}`), '', '## When to Apply', '', 'Apply this pattern when working on similar tasks or encountering related concepts.', '', `---`, `*Auto-generated by PatternDetector on ${new Date().toISOString()}*`, ]; return lines.join('\n'); } }