mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-03 15:43:11 +08:00
feat: unified task.json schema migration and multi-module updates
- Create task-schema.json (JSON Schema draft-07) with 10 field blocks fusing Unified JSONL, 6-field Task JSON, and Solution Schema advantages - Migrate unified-execute-with-file from JSONL to .task/*.json directory scanning - Migrate 3 producers (lite-plan, plan-converter, collaborative-plan) to .task/*.json multi-file output - Add review-cycle Phase 7.5 export-to-tasks (FIX-*.json) and issue-resolve --export-tasks option - Add schema compatibility annotations to action-planning-agent, workflow-plan, and tdd-plan - Add spec-generator skill phases and templates - Add memory v2 pipeline (consolidation, extraction, job scheduler, embedder) - Add secret-redactor utility and core-memory enhancements - Add codex-lens accuracy benchmarks and staged env config overrides
This commit is contained in:
474
ccw/src/core/memory-consolidation-pipeline.ts
Normal file
474
ccw/src/core/memory-consolidation-pipeline.ts
Normal file
@@ -0,0 +1,474 @@
|
||||
/**
|
||||
* Memory Consolidation Pipeline - Phase 2 Global Consolidation
|
||||
*
|
||||
* Orchestrates the global memory consolidation process:
|
||||
* Lock -> Materialize -> Agent -> Monitor -> Done
|
||||
*
|
||||
* Phase 1 outputs (per-session extractions stored in stage1_outputs DB table)
|
||||
* are materialized to disk as rollout_summaries/*.md + raw_memories.md,
|
||||
* then a CLI agent (--mode write) reads those files and produces MEMORY.md.
|
||||
*
|
||||
* The pipeline uses lease-based locking via MemoryJobScheduler to ensure
|
||||
* only one consolidation runs at a time, with heartbeat-based lease renewal.
|
||||
*/
|
||||
|
||||
import { existsSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { MemoryJobScheduler, type ClaimResult } from './memory-job-scheduler.js';
|
||||
import { getCoreMemoryStore, type Stage1Output } from './core-memory-store.js';
|
||||
import { getProjectPaths, ensureStorageDir } from '../config/storage-paths.js';
|
||||
import {
|
||||
HEARTBEAT_INTERVAL_SECONDS,
|
||||
MAX_RAW_MEMORIES_FOR_GLOBAL,
|
||||
} from './memory-v2-config.js';
|
||||
import {
|
||||
CONSOLIDATION_SYSTEM_PROMPT,
|
||||
buildConsolidationPrompt,
|
||||
} from './memory-consolidation-prompts.js';
|
||||
|
||||
// -- Types --
|
||||
|
||||
export interface ConsolidationStatus {
|
||||
status: 'idle' | 'running' | 'completed' | 'error';
|
||||
lastRun?: number;
|
||||
memoryMdExists: boolean;
|
||||
inputCount: number;
|
||||
lastError?: string;
|
||||
}
|
||||
|
||||
export interface MaterializationResult {
|
||||
summariesWritten: number;
|
||||
summariesPruned: number;
|
||||
rawMemoriesSize: number;
|
||||
}
|
||||
|
||||
// -- Constants --
|
||||
|
||||
const JOB_KIND = 'memory_consolidate_global';
|
||||
const JOB_KEY = 'global';
|
||||
const MAX_CONCURRENT = 1;
|
||||
const AGENT_TIMEOUT_MS = 300_000; // 5 minutes
|
||||
const DEFAULT_CLI_TOOL = 'gemini';
|
||||
|
||||
// -- Utility --
|
||||
|
||||
/**
|
||||
* Sanitize a thread ID for use as a filename.
|
||||
* Replaces filesystem-unsafe characters with underscores.
|
||||
*/
|
||||
function sanitizeFilename(name: string): string {
|
||||
return name.replace(/[<>:"/\\|?*\x00-\x1f]/g, '_');
|
||||
}
|
||||
|
||||
// -- Standalone Functions --
|
||||
|
||||
/**
|
||||
* Write one .md file per stage1_output to rollout_summaries/, prune orphans.
|
||||
*
|
||||
* Each file is named {sanitized_thread_id}.md and contains the rollout_summary.
|
||||
* Files in the directory that do not correspond to any DB row are deleted.
|
||||
*/
|
||||
export function syncRolloutSummaries(
|
||||
memoryHome: string,
|
||||
outputs: Stage1Output[]
|
||||
): MaterializationResult {
|
||||
const summariesDir = join(memoryHome, 'rollout_summaries');
|
||||
ensureStorageDir(summariesDir);
|
||||
|
||||
// Build set of expected filenames
|
||||
const expectedFiles = new Set<string>();
|
||||
let summariesWritten = 0;
|
||||
|
||||
for (const output of outputs) {
|
||||
const filename = `${sanitizeFilename(output.thread_id)}.md`;
|
||||
expectedFiles.add(filename);
|
||||
const filePath = join(summariesDir, filename);
|
||||
|
||||
// Write summary content with thread header
|
||||
const content = [
|
||||
`# Session: ${output.thread_id}`,
|
||||
`> Generated: ${new Date(output.generated_at * 1000).toISOString()}`,
|
||||
`> Source updated: ${new Date(output.source_updated_at * 1000).toISOString()}`,
|
||||
'',
|
||||
output.rollout_summary,
|
||||
].join('\n');
|
||||
|
||||
writeFileSync(filePath, content, 'utf-8');
|
||||
summariesWritten++;
|
||||
}
|
||||
|
||||
// Prune orphan files not in DB
|
||||
let summariesPruned = 0;
|
||||
const existingFiles = readdirSync(summariesDir);
|
||||
for (const file of existingFiles) {
|
||||
if (file.endsWith('.md') && !expectedFiles.has(file)) {
|
||||
unlinkSync(join(summariesDir, file));
|
||||
summariesPruned++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
summariesWritten,
|
||||
summariesPruned,
|
||||
rawMemoriesSize: 0, // Not applicable for this function
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Concatenate the latest raw_memory entries into raw_memories.md with thread headers.
|
||||
*
|
||||
* Entries are sorted by generated_at descending, limited to maxCount.
|
||||
* Format per entry:
|
||||
* ## Thread: {thread_id}
|
||||
* {raw_memory content}
|
||||
* ---
|
||||
*
|
||||
* @returns The byte size of the written raw_memories.md file.
|
||||
*/
|
||||
export function rebuildRawMemories(
|
||||
memoryHome: string,
|
||||
outputs: Stage1Output[],
|
||||
maxCount: number
|
||||
): number {
|
||||
ensureStorageDir(memoryHome);
|
||||
|
||||
// Sort by generated_at descending, take up to maxCount
|
||||
const sorted = [...outputs]
|
||||
.sort((a, b) => b.generated_at - a.generated_at)
|
||||
.slice(0, maxCount);
|
||||
|
||||
const sections: string[] = [];
|
||||
for (const output of sorted) {
|
||||
sections.push(
|
||||
`## Thread: ${output.thread_id}`,
|
||||
`> Generated: ${new Date(output.generated_at * 1000).toISOString()}`,
|
||||
'',
|
||||
output.raw_memory,
|
||||
'',
|
||||
'---',
|
||||
'',
|
||||
);
|
||||
}
|
||||
|
||||
const content = sections.join('\n');
|
||||
const filePath = join(memoryHome, 'raw_memories.md');
|
||||
writeFileSync(filePath, content, 'utf-8');
|
||||
|
||||
return Buffer.byteLength(content, 'utf-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Read MEMORY.md content for session prompt injection.
|
||||
*
|
||||
* @param projectPath - Project root path (used to resolve storage paths)
|
||||
* @returns MEMORY.md content string, or null if the file does not exist
|
||||
*/
|
||||
export function getMemoryMdContent(projectPath: string): string | null {
|
||||
const paths = getProjectPaths(projectPath);
|
||||
const memoryMdPath = paths.memoryV2.memoryMd;
|
||||
|
||||
if (!existsSync(memoryMdPath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return readFileSync(memoryMdPath, 'utf-8');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// -- Pipeline Class --
|
||||
|
||||
/**
|
||||
* MemoryConsolidationPipeline orchestrates global memory consolidation:
|
||||
* 1. Claim global lock via job scheduler
|
||||
* 2. Materialize Phase 1 outputs to disk (rollout_summaries/ + raw_memories.md)
|
||||
* 3. Invoke consolidation agent via executeCliTool --mode write
|
||||
* 4. Monitor with heartbeat lease renewal
|
||||
* 5. Mark job as succeeded or failed
|
||||
*/
|
||||
export class MemoryConsolidationPipeline {
|
||||
private projectPath: string;
|
||||
private store: ReturnType<typeof getCoreMemoryStore>;
|
||||
private scheduler: MemoryJobScheduler;
|
||||
private memoryHome: string;
|
||||
private cliTool: string;
|
||||
|
||||
constructor(projectPath: string, cliTool?: string) {
|
||||
this.projectPath = projectPath;
|
||||
this.store = getCoreMemoryStore(projectPath);
|
||||
this.scheduler = new MemoryJobScheduler(this.store.getDb());
|
||||
this.memoryHome = getProjectPaths(projectPath).memoryV2.root;
|
||||
this.cliTool = cliTool || DEFAULT_CLI_TOOL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to claim the global consolidation lock.
|
||||
*
|
||||
* Before claiming, ensures the job row exists and checks dirtiness.
|
||||
* The job scheduler handles the actual concurrency control.
|
||||
*/
|
||||
claimGlobalLock(): ClaimResult {
|
||||
return this.scheduler.claimJob(JOB_KIND, JOB_KEY, MAX_CONCURRENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Materialize rollout summaries from DB to disk.
|
||||
* Writes one .md file per stage1_output row, prunes orphan files.
|
||||
*/
|
||||
materializeSummaries(): MaterializationResult {
|
||||
const outputs = this.store.listStage1Outputs();
|
||||
return syncRolloutSummaries(this.memoryHome, outputs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild raw_memories.md from DB entries.
|
||||
* Concatenates the latest entries up to MAX_RAW_MEMORIES_FOR_GLOBAL.
|
||||
*/
|
||||
materializeRawMemories(): number {
|
||||
const outputs = this.store.listStage1Outputs();
|
||||
return rebuildRawMemories(
|
||||
this.memoryHome,
|
||||
outputs,
|
||||
MAX_RAW_MEMORIES_FOR_GLOBAL
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the consolidation agent via executeCliTool with --mode write.
|
||||
* Starts a heartbeat timer to keep the lease alive during agent execution.
|
||||
*
|
||||
* @param token - Ownership token from claimGlobalLock
|
||||
* @returns true if agent completed successfully, false otherwise
|
||||
*/
|
||||
async runConsolidationAgent(token: string): Promise<boolean> {
|
||||
// Lazy import to avoid circular dependencies at module load time
|
||||
const { executeCliTool } = await import('../tools/cli-executor-core.js');
|
||||
|
||||
// Determine input state for prompt
|
||||
const summariesDir = join(this.memoryHome, 'rollout_summaries');
|
||||
let summaryCount = 0;
|
||||
if (existsSync(summariesDir)) {
|
||||
summaryCount = readdirSync(summariesDir).filter(f => f.endsWith('.md')).length;
|
||||
}
|
||||
const hasExistingMemoryMd = existsSync(join(this.memoryHome, 'MEMORY.md'));
|
||||
|
||||
// Ensure skills directory exists
|
||||
ensureStorageDir(join(this.memoryHome, 'skills'));
|
||||
|
||||
// Build the full prompt
|
||||
const userPrompt = buildConsolidationPrompt(summaryCount, hasExistingMemoryMd);
|
||||
const fullPrompt = `${CONSOLIDATION_SYSTEM_PROMPT}\n\n${userPrompt}`;
|
||||
|
||||
// Start heartbeat timer
|
||||
const heartbeatMs = HEARTBEAT_INTERVAL_SECONDS * 1000;
|
||||
const heartbeatTimer = setInterval(() => {
|
||||
const renewed = this.scheduler.heartbeat(JOB_KIND, JOB_KEY, token);
|
||||
if (!renewed) {
|
||||
// Heartbeat rejected - lease was lost
|
||||
clearInterval(heartbeatTimer);
|
||||
}
|
||||
}, heartbeatMs);
|
||||
|
||||
try {
|
||||
// Execute the consolidation agent
|
||||
const result = await Promise.race([
|
||||
executeCliTool({
|
||||
tool: this.cliTool,
|
||||
prompt: fullPrompt,
|
||||
mode: 'write',
|
||||
cd: this.memoryHome,
|
||||
category: 'internal',
|
||||
}),
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(
|
||||
() => reject(new Error('Consolidation agent timed out')),
|
||||
AGENT_TIMEOUT_MS
|
||||
)
|
||||
),
|
||||
]);
|
||||
|
||||
return result.success;
|
||||
} finally {
|
||||
clearInterval(heartbeatTimer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that Phase 1 artifacts were not modified by the agent.
|
||||
* Compares file count in rollout_summaries/ before and after agent run.
|
||||
*
|
||||
* @param expectedSummaryCount - Number of summaries before agent run
|
||||
* @returns true if artifacts are intact
|
||||
*/
|
||||
verifyPhase1Integrity(expectedSummaryCount: number): boolean {
|
||||
const summariesDir = join(this.memoryHome, 'rollout_summaries');
|
||||
if (!existsSync(summariesDir)) return expectedSummaryCount === 0;
|
||||
|
||||
const currentCount = readdirSync(summariesDir).filter(f => f.endsWith('.md')).length;
|
||||
return currentCount === expectedSummaryCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current consolidation status.
|
||||
*/
|
||||
getStatus(): ConsolidationStatus {
|
||||
const jobStatus = this.scheduler.getJobStatus(JOB_KIND, JOB_KEY);
|
||||
const inputCount = this.store.countStage1Outputs();
|
||||
const memoryMdExists = existsSync(join(this.memoryHome, 'MEMORY.md'));
|
||||
|
||||
if (!jobStatus) {
|
||||
return {
|
||||
status: 'idle',
|
||||
memoryMdExists,
|
||||
inputCount,
|
||||
};
|
||||
}
|
||||
|
||||
const statusMap: Record<string, ConsolidationStatus['status']> = {
|
||||
pending: 'idle',
|
||||
running: 'running',
|
||||
done: 'completed',
|
||||
error: 'error',
|
||||
};
|
||||
|
||||
return {
|
||||
status: statusMap[jobStatus.status] || 'idle',
|
||||
lastRun: jobStatus.finished_at,
|
||||
memoryMdExists,
|
||||
inputCount,
|
||||
lastError: jobStatus.last_error,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Read MEMORY.md content for session prompt injection.
|
||||
* Convenience method that delegates to the standalone function.
|
||||
*/
|
||||
getMemoryMdContent(): string | null {
|
||||
return getMemoryMdContent(this.projectPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the full consolidation pipeline.
|
||||
*
|
||||
* Pipeline flow:
|
||||
* 1. Check if there are inputs to process
|
||||
* 2. Claim global lock
|
||||
* 3. Materialize Phase 1 outputs to disk
|
||||
* 4. Run consolidation agent with heartbeat
|
||||
* 5. Verify Phase 1 integrity
|
||||
* 6. Mark job as succeeded or failed
|
||||
*
|
||||
* @returns Final consolidation status
|
||||
*/
|
||||
async runConsolidation(): Promise<ConsolidationStatus> {
|
||||
// Step 1: Check inputs
|
||||
const inputCount = this.store.countStage1Outputs();
|
||||
if (inputCount === 0) {
|
||||
return {
|
||||
status: 'idle',
|
||||
memoryMdExists: existsSync(join(this.memoryHome, 'MEMORY.md')),
|
||||
inputCount: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Step 2: Claim global lock
|
||||
const claim = this.claimGlobalLock();
|
||||
if (!claim.claimed || !claim.ownership_token) {
|
||||
return {
|
||||
status: claim.reason === 'already_running' ? 'running' : 'idle',
|
||||
memoryMdExists: existsSync(join(this.memoryHome, 'MEMORY.md')),
|
||||
inputCount,
|
||||
lastError: claim.reason
|
||||
? `Lock not acquired: ${claim.reason}`
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
const token = claim.ownership_token;
|
||||
|
||||
try {
|
||||
// Step 3: Materialize Phase 1 outputs to disk
|
||||
const matResult = this.materializeSummaries();
|
||||
const rawMemoriesSize = this.materializeRawMemories();
|
||||
|
||||
const expectedSummaryCount = matResult.summariesWritten;
|
||||
|
||||
// Step 4: Run consolidation agent with heartbeat
|
||||
const agentSuccess = await this.runConsolidationAgent(token);
|
||||
|
||||
if (!agentSuccess) {
|
||||
this.scheduler.markFailed(
|
||||
JOB_KIND,
|
||||
JOB_KEY,
|
||||
token,
|
||||
'Consolidation agent returned failure'
|
||||
);
|
||||
return {
|
||||
status: 'error',
|
||||
memoryMdExists: existsSync(join(this.memoryHome, 'MEMORY.md')),
|
||||
inputCount,
|
||||
lastError: 'Consolidation agent returned failure',
|
||||
};
|
||||
}
|
||||
|
||||
// Step 5: Verify Phase 1 integrity
|
||||
if (!this.verifyPhase1Integrity(expectedSummaryCount)) {
|
||||
this.scheduler.markFailed(
|
||||
JOB_KIND,
|
||||
JOB_KEY,
|
||||
token,
|
||||
'Phase 1 artifacts were modified during consolidation'
|
||||
);
|
||||
return {
|
||||
status: 'error',
|
||||
memoryMdExists: existsSync(join(this.memoryHome, 'MEMORY.md')),
|
||||
inputCount,
|
||||
lastError: 'Phase 1 artifacts were modified during consolidation',
|
||||
};
|
||||
}
|
||||
|
||||
// Step 6: Check that MEMORY.md was actually produced
|
||||
const memoryMdExists = existsSync(join(this.memoryHome, 'MEMORY.md'));
|
||||
if (!memoryMdExists) {
|
||||
this.scheduler.markFailed(
|
||||
JOB_KIND,
|
||||
JOB_KEY,
|
||||
token,
|
||||
'Agent completed but MEMORY.md was not produced'
|
||||
);
|
||||
return {
|
||||
status: 'error',
|
||||
memoryMdExists: false,
|
||||
inputCount,
|
||||
lastError: 'Agent completed but MEMORY.md was not produced',
|
||||
};
|
||||
}
|
||||
|
||||
// Step 7: Mark success with watermark
|
||||
// Use the current input count as the success watermark
|
||||
const watermark = inputCount;
|
||||
this.scheduler.markSucceeded(JOB_KIND, JOB_KEY, token, watermark);
|
||||
|
||||
return {
|
||||
status: 'completed',
|
||||
lastRun: Math.floor(Date.now() / 1000),
|
||||
memoryMdExists: true,
|
||||
inputCount,
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMessage =
|
||||
err instanceof Error ? err.message : String(err);
|
||||
this.scheduler.markFailed(JOB_KIND, JOB_KEY, token, errorMessage);
|
||||
|
||||
return {
|
||||
status: 'error',
|
||||
memoryMdExists: existsSync(join(this.memoryHome, 'MEMORY.md')),
|
||||
inputCount,
|
||||
lastError: errorMessage,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user