mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-28 09:23:08 +08:00
Add comprehensive tests for ast-grep and tree-sitter relationship extraction
- Introduced test suite for AstGrepPythonProcessor covering pattern definitions, parsing, and relationship extraction. - Added comparison tests between tree-sitter and ast-grep for consistency in relationship extraction. - Implemented tests for ast-grep binding module to verify functionality and availability. - Ensured tests cover various scenarios including inheritance, function calls, and imports.
This commit is contained in:
488
ccw/src/core/unified-memory-service.ts
Normal file
488
ccw/src/core/unified-memory-service.ts
Normal file
@@ -0,0 +1,488 @@
|
||||
/**
|
||||
* Unified Memory Service - Cross-store search with RRF fusion
|
||||
*
|
||||
* Provides a single search() interface that combines:
|
||||
* - Vector search (HNSW via UnifiedVectorIndex)
|
||||
* - Full-text search (FTS5 via MemoryStore.searchPrompts)
|
||||
* - Heat-based scoring (entity heat from MemoryStore)
|
||||
*
|
||||
* Fusion: Reciprocal Rank Fusion (RRF)
|
||||
* score = sum(1 / (k + rank_i) * weight_i)
|
||||
* k = 60, weights = { vector: 0.6, fts: 0.3, heat: 0.1 }
|
||||
*/
|
||||
|
||||
import { UnifiedVectorIndex, isUnifiedEmbedderAvailable } from './unified-vector-index.js';
|
||||
import type {
|
||||
VectorCategory,
|
||||
VectorSearchMatch,
|
||||
VectorIndexStatus,
|
||||
} from './unified-vector-index.js';
|
||||
import { CoreMemoryStore, getCoreMemoryStore } from './core-memory-store.js';
|
||||
import type { CoreMemory } from './core-memory-store.js';
|
||||
import { MemoryStore, getMemoryStore } from './memory-store.js';
|
||||
import type { PromptHistory, HotEntity } from './memory-store.js';
|
||||
|
||||
// =============================================================================
|
||||
// Types
|
||||
// =============================================================================
|
||||
|
||||
/** Options for unified search */
|
||||
export interface UnifiedSearchOptions {
|
||||
/** Maximum number of results to return (default: 20) */
|
||||
limit?: number;
|
||||
/** Minimum relevance score threshold (default: 0.0) */
|
||||
minScore?: number;
|
||||
/** Filter by category */
|
||||
category?: VectorCategory;
|
||||
/** Vector search top-k (default: 30, fetched internally for fusion) */
|
||||
vectorTopK?: number;
|
||||
/** FTS search limit (default: 30, fetched internally for fusion) */
|
||||
ftsLimit?: number;
|
||||
}
|
||||
|
||||
/** A unified search result item */
|
||||
export interface UnifiedSearchResult {
|
||||
/** Unique identifier for the source item */
|
||||
source_id: string;
|
||||
/** Source type: core_memory, cli_history, workflow, entity, pattern */
|
||||
source_type: string;
|
||||
/** Fused relevance score (0..1 range, higher is better) */
|
||||
score: number;
|
||||
/** Text content (snippet or full) */
|
||||
content: string;
|
||||
/** Category of the result */
|
||||
category: string;
|
||||
/** Which ranking sources contributed to this result */
|
||||
rank_sources: {
|
||||
vector_rank?: number;
|
||||
vector_score?: number;
|
||||
fts_rank?: number;
|
||||
heat_score?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/** Aggregated statistics from all stores + vector index */
|
||||
export interface UnifiedMemoryStats {
|
||||
core_memories: {
|
||||
total: number;
|
||||
archived: number;
|
||||
};
|
||||
stage1_outputs: number;
|
||||
entities: number;
|
||||
prompts: number;
|
||||
conversations: number;
|
||||
vector_index: {
|
||||
available: boolean;
|
||||
total_chunks: number;
|
||||
hnsw_available: boolean;
|
||||
hnsw_count: number;
|
||||
dimension: number;
|
||||
categories?: Record<string, number>;
|
||||
};
|
||||
}
|
||||
|
||||
/** KNN recommendation result */
|
||||
export interface RecommendationResult {
|
||||
source_id: string;
|
||||
source_type: string;
|
||||
score: number;
|
||||
content: string;
|
||||
category: string;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// RRF Constants
|
||||
// =============================================================================
|
||||
|
||||
/** RRF smoothing constant (standard value from the original RRF paper) */
|
||||
const RRF_K = 60;
|
||||
|
||||
/** Fusion weights */
|
||||
const WEIGHT_VECTOR = 0.6;
|
||||
const WEIGHT_FTS = 0.3;
|
||||
const WEIGHT_HEAT = 0.1;
|
||||
|
||||
// =============================================================================
|
||||
// UnifiedMemoryService
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Unified Memory Service providing cross-store search and recommendations.
|
||||
*
|
||||
* Combines vector similarity, full-text search, and entity heat scores
|
||||
* using Reciprocal Rank Fusion (RRF) for result ranking.
|
||||
*/
|
||||
export class UnifiedMemoryService {
|
||||
private projectPath: string;
|
||||
private vectorIndex: UnifiedVectorIndex | null = null;
|
||||
private coreMemoryStore: CoreMemoryStore;
|
||||
private memoryStore: MemoryStore;
|
||||
|
||||
constructor(projectPath: string) {
|
||||
this.projectPath = projectPath;
|
||||
this.coreMemoryStore = getCoreMemoryStore(projectPath);
|
||||
this.memoryStore = getMemoryStore(projectPath);
|
||||
|
||||
if (isUnifiedEmbedderAvailable()) {
|
||||
this.vectorIndex = new UnifiedVectorIndex(projectPath);
|
||||
}
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
// Search
|
||||
// ==========================================================================
|
||||
|
||||
/**
|
||||
* Unified search across all memory stores.
|
||||
*
|
||||
* Pipeline:
|
||||
* 1. Vector search via UnifiedVectorIndex (semantic similarity)
|
||||
* 2. FTS5 search via MemoryStore.searchPrompts (keyword matching)
|
||||
* 3. Heat boost via entity heat scores
|
||||
* 4. RRF fusion to combine ranked lists
|
||||
*
|
||||
* @param query - Natural language search query
|
||||
* @param options - Search options
|
||||
* @returns Fused search results sorted by relevance
|
||||
*/
|
||||
async search(
|
||||
query: string,
|
||||
options: UnifiedSearchOptions = {}
|
||||
): Promise<UnifiedSearchResult[]> {
|
||||
const {
|
||||
limit = 20,
|
||||
minScore = 0.0,
|
||||
category,
|
||||
vectorTopK = 30,
|
||||
ftsLimit = 30,
|
||||
} = options;
|
||||
|
||||
// Run vector search and FTS search in parallel
|
||||
const [vectorResults, ftsResults, hotEntities] = await Promise.all([
|
||||
this.runVectorSearch(query, vectorTopK, category),
|
||||
this.runFtsSearch(query, ftsLimit),
|
||||
this.getHeatScores(),
|
||||
]);
|
||||
|
||||
// Build heat score lookup
|
||||
const heatMap = new Map<string, number>();
|
||||
for (const entity of hotEntities) {
|
||||
// Use normalized_value as key for heat lookup
|
||||
heatMap.set(entity.normalized_value, entity.stats.heat_score);
|
||||
}
|
||||
|
||||
// Collect all unique source_ids from both result sets
|
||||
const allSourceIds = new Set<string>();
|
||||
const vectorRankMap = new Map<string, { rank: number; score: number; match: VectorSearchMatch }>();
|
||||
const ftsRankMap = new Map<string, { rank: number; item: PromptHistory }>();
|
||||
|
||||
// Build vector rank map
|
||||
for (let i = 0; i < vectorResults.length; i++) {
|
||||
const match = vectorResults[i];
|
||||
const id = match.source_id;
|
||||
allSourceIds.add(id);
|
||||
vectorRankMap.set(id, { rank: i + 1, score: match.score, match });
|
||||
}
|
||||
|
||||
// Build FTS rank map
|
||||
for (let i = 0; i < ftsResults.length; i++) {
|
||||
const item = ftsResults[i];
|
||||
const id = item.session_id;
|
||||
allSourceIds.add(id);
|
||||
ftsRankMap.set(id, { rank: i + 1, item });
|
||||
}
|
||||
|
||||
// Calculate RRF score for each unique source_id
|
||||
const results: UnifiedSearchResult[] = [];
|
||||
|
||||
for (const sourceId of allSourceIds) {
|
||||
const vectorEntry = vectorRankMap.get(sourceId);
|
||||
const ftsEntry = ftsRankMap.get(sourceId);
|
||||
|
||||
// RRF: score = sum(weight_i / (k + rank_i))
|
||||
let rrfScore = 0;
|
||||
const rankSources: UnifiedSearchResult['rank_sources'] = {};
|
||||
|
||||
// Vector component
|
||||
if (vectorEntry) {
|
||||
rrfScore += WEIGHT_VECTOR / (RRF_K + vectorEntry.rank);
|
||||
rankSources.vector_rank = vectorEntry.rank;
|
||||
rankSources.vector_score = vectorEntry.score;
|
||||
}
|
||||
|
||||
// FTS component
|
||||
if (ftsEntry) {
|
||||
rrfScore += WEIGHT_FTS / (RRF_K + ftsEntry.rank);
|
||||
rankSources.fts_rank = ftsEntry.rank;
|
||||
}
|
||||
|
||||
// Heat component (boost based on entity heat)
|
||||
const heatScore = this.lookupHeatScore(sourceId, heatMap);
|
||||
if (heatScore > 0) {
|
||||
// Normalize heat score to a rank-like value (1 = hottest)
|
||||
// Use inverse: higher heat = lower rank number = higher contribution
|
||||
const heatRank = Math.max(1, Math.ceil(100 / (1 + heatScore)));
|
||||
rrfScore += WEIGHT_HEAT / (RRF_K + heatRank);
|
||||
rankSources.heat_score = heatScore;
|
||||
}
|
||||
|
||||
if (rrfScore < minScore) continue;
|
||||
|
||||
// Build result entry
|
||||
let content = '';
|
||||
let sourceType = '';
|
||||
let resultCategory = '';
|
||||
|
||||
if (vectorEntry) {
|
||||
content = vectorEntry.match.content;
|
||||
sourceType = vectorEntry.match.source_type;
|
||||
resultCategory = vectorEntry.match.category;
|
||||
} else if (ftsEntry) {
|
||||
content = ftsEntry.item.prompt_text || ftsEntry.item.context_summary || '';
|
||||
sourceType = 'cli_history';
|
||||
resultCategory = 'cli_history';
|
||||
}
|
||||
|
||||
results.push({
|
||||
source_id: sourceId,
|
||||
source_type: sourceType,
|
||||
score: rrfScore,
|
||||
content,
|
||||
category: resultCategory,
|
||||
rank_sources: rankSources,
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by RRF score descending, take top `limit`
|
||||
results.sort((a, b) => b.score - a.score);
|
||||
return results.slice(0, limit);
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
// Recommendations
|
||||
// ==========================================================================
|
||||
|
||||
/**
|
||||
* Get recommendations based on a memory's vector neighbors (KNN).
|
||||
*
|
||||
* Fetches the content of the given memory, then runs a vector search
|
||||
* to find similar content across all stores.
|
||||
*
|
||||
* @param memoryId - Core memory ID (CMEM-*)
|
||||
* @param limit - Number of recommendations (default: 5)
|
||||
* @returns Recommended items sorted by similarity
|
||||
*/
|
||||
async getRecommendations(
|
||||
memoryId: string,
|
||||
limit: number = 5
|
||||
): Promise<RecommendationResult[]> {
|
||||
// Get the memory content
|
||||
const memory = this.coreMemoryStore.getMemory(memoryId);
|
||||
if (!memory) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!this.vectorIndex) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Use memory content as query for KNN search
|
||||
// Request extra results so we can filter out self
|
||||
const searchResult = await this.vectorIndex.search(memory.content, {
|
||||
topK: limit + 5,
|
||||
minScore: 0.3,
|
||||
});
|
||||
|
||||
if (!searchResult.success) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Filter out self and map to recommendations
|
||||
const recommendations: RecommendationResult[] = [];
|
||||
for (const match of searchResult.matches) {
|
||||
// Skip the source memory itself
|
||||
if (match.source_id === memoryId) continue;
|
||||
|
||||
recommendations.push({
|
||||
source_id: match.source_id,
|
||||
source_type: match.source_type,
|
||||
score: match.score,
|
||||
content: match.content,
|
||||
category: match.category,
|
||||
});
|
||||
|
||||
if (recommendations.length >= limit) break;
|
||||
}
|
||||
|
||||
return recommendations;
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
// Statistics
|
||||
// ==========================================================================
|
||||
|
||||
/**
|
||||
* Get aggregated statistics from all stores and the vector index.
|
||||
*
|
||||
* @returns Unified stats across core memories, V2 outputs, entities, prompts, and vectors
|
||||
*/
|
||||
async getStats(): Promise<UnifiedMemoryStats> {
|
||||
// Get core memory stats
|
||||
const allMemories = this.coreMemoryStore.getMemories({ limit: 100000 });
|
||||
const archivedMemories = allMemories.filter(m => m.archived);
|
||||
const stage1Count = this.coreMemoryStore.countStage1Outputs();
|
||||
|
||||
// Get memory store stats (entities, prompts, conversations)
|
||||
const db = (this.memoryStore as any).db;
|
||||
let entityCount = 0;
|
||||
let promptCount = 0;
|
||||
let conversationCount = 0;
|
||||
|
||||
try {
|
||||
entityCount = (db.prepare('SELECT COUNT(*) as count FROM entities').get() as { count: number }).count;
|
||||
} catch { /* table may not exist */ }
|
||||
|
||||
try {
|
||||
promptCount = (db.prepare('SELECT COUNT(*) as count FROM prompt_history').get() as { count: number }).count;
|
||||
} catch { /* table may not exist */ }
|
||||
|
||||
try {
|
||||
conversationCount = (db.prepare('SELECT COUNT(*) as count FROM conversations').get() as { count: number }).count;
|
||||
} catch { /* table may not exist */ }
|
||||
|
||||
// Get vector index status
|
||||
let vectorStatus: VectorIndexStatus = {
|
||||
success: false,
|
||||
total_chunks: 0,
|
||||
hnsw_available: false,
|
||||
hnsw_count: 0,
|
||||
dimension: 0,
|
||||
};
|
||||
|
||||
if (this.vectorIndex) {
|
||||
try {
|
||||
vectorStatus = await this.vectorIndex.getStatus();
|
||||
} catch {
|
||||
// Vector index not available
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
core_memories: {
|
||||
total: allMemories.length,
|
||||
archived: archivedMemories.length,
|
||||
},
|
||||
stage1_outputs: stage1Count,
|
||||
entities: entityCount,
|
||||
prompts: promptCount,
|
||||
conversations: conversationCount,
|
||||
vector_index: {
|
||||
available: vectorStatus.success,
|
||||
total_chunks: vectorStatus.total_chunks,
|
||||
hnsw_available: vectorStatus.hnsw_available,
|
||||
hnsw_count: vectorStatus.hnsw_count,
|
||||
dimension: vectorStatus.dimension,
|
||||
categories: vectorStatus.categories,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
// Internal helpers
|
||||
// ==========================================================================
|
||||
|
||||
/**
|
||||
* Run vector search via UnifiedVectorIndex.
|
||||
* Returns empty array if vector index is not available.
|
||||
*/
|
||||
private async runVectorSearch(
|
||||
query: string,
|
||||
topK: number,
|
||||
category?: VectorCategory
|
||||
): Promise<VectorSearchMatch[]> {
|
||||
if (!this.vectorIndex) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await this.vectorIndex.search(query, {
|
||||
topK,
|
||||
minScore: 0.1,
|
||||
category,
|
||||
});
|
||||
|
||||
if (!result.success) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return result.matches;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run FTS5 full-text search via MemoryStore.searchPrompts.
|
||||
* Returns empty array on error.
|
||||
*/
|
||||
private async runFtsSearch(
|
||||
query: string,
|
||||
limit: number
|
||||
): Promise<PromptHistory[]> {
|
||||
try {
|
||||
// FTS5 requires sanitized query (no special characters)
|
||||
const sanitized = this.sanitizeFtsQuery(query);
|
||||
if (!sanitized) return [];
|
||||
|
||||
return this.memoryStore.searchPrompts(sanitized, limit);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get hot entities for heat-based scoring.
|
||||
*/
|
||||
private async getHeatScores(): Promise<HotEntity[]> {
|
||||
try {
|
||||
return this.memoryStore.getHotEntities(50);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up heat score for a source ID.
|
||||
* Checks if any entity's normalized_value matches the source_id.
|
||||
*/
|
||||
private lookupHeatScore(
|
||||
sourceId: string,
|
||||
heatMap: Map<string, number>
|
||||
): number {
|
||||
// Direct match
|
||||
if (heatMap.has(sourceId)) {
|
||||
return heatMap.get(sourceId)!;
|
||||
}
|
||||
|
||||
// Check if source_id is a substring of any entity value (file paths)
|
||||
for (const [key, score] of heatMap) {
|
||||
if (sourceId.includes(key) || key.includes(sourceId)) {
|
||||
return score;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize a query string for FTS5 MATCH syntax.
|
||||
* Removes special characters that would cause FTS5 parse errors.
|
||||
*/
|
||||
private sanitizeFtsQuery(query: string): string {
|
||||
// Remove FTS5 special operators and punctuation
|
||||
return query
|
||||
.replace(/[*":(){}[\]^~\\/<>!@#$%&=+|;,.'`]/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user