feat: Implement recursive core-memory database discovery and project listing

- Added `findAllCoreMemoryDatabases` function to recursively locate core-memory databases in nested project structures.
- Updated `listAllProjects` to utilize the new recursive function for improved project listing.
- Enhanced `getMemoriesFromProject` and `findMemoryAcrossProjects` to support nested project structures.

feat: Introduce spec context injection in hooks configuration

- Added a new hook configuration for "Spec Context Injection" to load project specs based on prompt keywords.

chore: Add gray-matter dependency for YAML frontmatter parsing

- Included `gray-matter` package in `package.json` for parsing YAML frontmatter in markdown files.

feat: Create Spec Index Builder tool for managing project specs

- Implemented `spec-index-builder.ts` to scan markdown files, extract YAML frontmatter, and generate index cache files for different spec dimensions.

feat: Develop Spec Init tool for initializing spec directories and seed documents

- Created `spec-init.ts` to set up the directory structure and seed documents for the spec system.

feat: Build Spec Keyword Extractor for keyword extraction from prompts

- Added `spec-keyword-extractor.ts` to extract keywords from user prompts, supporting both English and Chinese text.

feat: Implement Spec Loader for loading and filtering specs based on keywords

- Developed `spec-loader.ts` to handle loading of specs, filtering by read mode and keyword matches, and formatting output for CLI or hooks.
This commit is contained in:
catlog22
2026-02-26 12:51:29 +08:00
parent a35fb0fe8f
commit 2b5c334bc4
27 changed files with 2595 additions and 955 deletions

View File

@@ -1643,7 +1643,45 @@ function getCCWHome(): string {
}
/**
* List all projects with their memory counts
* Recursively find all core-memory databases in nested project structure
* Handles both flat structure (projects/my-project/) and nested structure (projects/d-/my-project/)
*/
function findAllCoreMemoryDatabases(
projectsDir: string,
baseRelPath: string = ''
): Array<{ projectId: string; dbPath: string }> {
const results: Array<{ projectId: string; dbPath: string }> = [];
const entries = readdirSync(projectsDir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory()) continue;
// Skip hidden directories
if (entry.name.startsWith('.')) continue;
const currentPath = join(projectsDir, entry.name);
const currentRelPath = baseRelPath ? join(baseRelPath, entry.name) : entry.name;
// Check if this directory has a core-memory database
const coreMemoryDb = join(currentPath, 'core-memory', 'core_memory.db');
if (existsSync(coreMemoryDb)) {
// Found a project - use relative path as project ID
results.push({
projectId: currentRelPath,
dbPath: coreMemoryDb
});
}
// Recurse into subdirectories to find nested projects
const nestedResults = findAllCoreMemoryDatabases(currentPath, currentRelPath);
results.push(...nestedResults);
}
return results;
}
/**
* List all projects with their memory counts (supports nested project structure)
*/
export function listAllProjects(): ProjectInfo[] {
const projectsDir = join(getCCWHome(), 'projects');
@@ -1652,43 +1690,38 @@ export function listAllProjects(): ProjectInfo[] {
return [];
}
// Find all core-memory databases recursively
const allProjects = findAllCoreMemoryDatabases(projectsDir);
const projects: ProjectInfo[] = [];
const entries = readdirSync(projectsDir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory()) continue;
const projectId = entry.name;
const coreMemoryDb = join(projectsDir, projectId, 'core-memory', 'core_memory.db');
for (const { projectId, dbPath } of allProjects) {
let memoriesCount = 0;
let clustersCount = 0;
let lastUpdated: string | undefined;
if (existsSync(coreMemoryDb)) {
try {
const db = new Database(dbPath, { readonly: true });
// Count memories
const memResult = db.prepare('SELECT COUNT(*) as count FROM memories').get() as { count: number };
memoriesCount = memResult?.count || 0;
// Count clusters
try {
const db = new Database(coreMemoryDb, { readonly: true });
// Count memories
const memResult = db.prepare('SELECT COUNT(*) as count FROM memories').get() as { count: number };
memoriesCount = memResult?.count || 0;
// Count clusters
try {
const clusterResult = db.prepare('SELECT COUNT(*) as count FROM session_clusters').get() as { count: number };
clustersCount = clusterResult?.count || 0;
} catch {
// Table might not exist
}
// Get last update time
const lastMemory = db.prepare('SELECT MAX(updated_at) as last FROM memories').get() as { last: string };
lastUpdated = lastMemory?.last;
db.close();
const clusterResult = db.prepare('SELECT COUNT(*) as count FROM session_clusters').get() as { count: number };
clustersCount = clusterResult?.count || 0;
} catch {
// Database might be locked or corrupted
// Table might not exist
}
// Get last update time
const lastMemory = db.prepare('SELECT MAX(updated_at) as last FROM memories').get() as { last: string };
lastUpdated = lastMemory?.last;
db.close();
} catch {
// Database might be locked or corrupted
}
// Convert project ID back to approximate path
@@ -1715,7 +1748,8 @@ export function listAllProjects(): ProjectInfo[] {
}
/**
* Get memories from another project by ID
* Get memories from another project by ID (supports nested project structure)
* @param projectId - Project ID which can be a nested path like "d-/ccws"
*/
export function getMemoriesFromProject(projectId: string): CoreMemory[] {
const projectsDir = join(getCCWHome(), 'projects');
@@ -1746,8 +1780,8 @@ export function getMemoriesFromProject(projectId: string): CoreMemory[] {
}
/**
* Find a memory by ID across all projects
* Searches through all project databases to locate a specific memory
* Find a memory by ID across all projects (supports nested project structure)
* Searches through all project databases recursively to locate a specific memory
*/
export function findMemoryAcrossProjects(memoryId: string): { memory: CoreMemory; projectId: string } | null {
const projectsDir = join(getCCWHome(), 'projects');
@@ -1756,18 +1790,12 @@ export function findMemoryAcrossProjects(memoryId: string): { memory: CoreMemory
return null;
}
const entries = readdirSync(projectsDir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory()) continue;
const projectId = entry.name;
const coreMemoryDb = join(projectsDir, projectId, 'core-memory', 'core_memory.db');
if (!existsSync(coreMemoryDb)) continue;
// Find all core-memory databases recursively
const allProjects = findAllCoreMemoryDatabases(projectsDir);
for (const { projectId, dbPath } of allProjects) {
try {
const db = new Database(coreMemoryDb, { readonly: true });
const db = new Database(dbPath, { readonly: true });
const row = db.prepare('SELECT * FROM memories WHERE id = ?').get(memoryId) as any;
db.close();

View File

@@ -80,6 +80,14 @@
"Maps keywords to execution modes using ModeRegistryService",
"Injects systemMessage on mode activation"
]
},
{
"name": "Spec Context Injection",
"description": "Loads project specs matching prompt keywords and injects as system context",
"enabled": true,
"command": "ccw spec load --stdin",
"timeout": 5000,
"failMode": "silent"
}
],
"file-modified": [

View File

@@ -0,0 +1,417 @@
/**
* Spec Index Builder
*
* Scans .workflow/{dimension}/*.md files, parses YAML frontmatter via
* gray-matter, and writes .spec-index/{dimension}.index.json cache files.
*
* Supports 4 dimensions: specs, roadmap, changelog, personal
*
* YAML Frontmatter Schema:
* ---
* title: "Document Title"
* dimension: "specs"
* keywords: ["auth", "security"]
* readMode: "required" # required | optional
* priority: "high" # critical | high | medium | low
* ---
*/
import matter from 'gray-matter';
import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from 'fs';
import { join, basename, extname, relative } from 'path';
// ============================================================================
// Types
// ============================================================================
/**
* YAML frontmatter schema for spec MD files.
*/
export interface SpecFrontmatter {
title: string;
dimension: string;
keywords: string[];
readMode: 'required' | 'optional';
priority: 'critical' | 'high' | 'medium' | 'low';
}
/**
* Single entry in the dimension index cache.
*/
export interface SpecIndexEntry {
/** Document title from frontmatter */
title: string;
/** Relative file path from project root */
file: string;
/** Dimension this spec belongs to */
dimension: string;
/** Keywords for matching against user prompts */
keywords: string[];
/** Whether this spec is required or optional */
readMode: 'required' | 'optional';
/** Priority level for ordering */
priority: 'critical' | 'high' | 'medium' | 'low';
}
/**
* Complete index for one dimension.
*/
export interface DimensionIndex {
/** Dimension name */
dimension: string;
/** All spec entries in this dimension */
entries: SpecIndexEntry[];
/** ISO timestamp when this index was built */
built_at: string;
}
// ============================================================================
// Constants
// ============================================================================
/**
* The 4 supported spec dimensions.
*/
export const SPEC_DIMENSIONS = ['specs', 'roadmap', 'changelog', 'personal'] as const;
export type SpecDimension = typeof SPEC_DIMENSIONS[number];
/**
* Valid readMode values.
*/
const VALID_READ_MODES = ['required', 'optional'] as const;
/**
* Valid priority values.
*/
const VALID_PRIORITIES = ['critical', 'high', 'medium', 'low'] as const;
/**
* Directory name for spec index cache files.
*/
const SPEC_INDEX_DIR = '.spec-index';
// ============================================================================
// Public API
// ============================================================================
/**
* Get the path to the index JSON file for a given dimension.
*
* @param projectPath - Project root directory
* @param dimension - The dimension name
* @returns Absolute path to .spec-index/{dimension}.index.json
*/
export function getIndexPath(projectPath: string, dimension: string): string {
return join(projectPath, SPEC_INDEX_DIR, `${dimension}.index.json`);
}
/**
* Get the path to the .workflow/{dimension} directory.
*
* @param projectPath - Project root directory
* @param dimension - The dimension name
* @returns Absolute path to .workflow/{dimension}/
*/
export function getDimensionDir(projectPath: string, dimension: string): string {
return join(projectPath, '.workflow', dimension);
}
/**
* Build the index for a single dimension.
*
* Scans .workflow/{dimension}/*.md files, parses YAML frontmatter,
* extracts the 5 required fields, and returns a DimensionIndex.
*
* Files with malformed or missing frontmatter are skipped gracefully.
*
* @param projectPath - Project root directory
* @param dimension - The dimension to index (e.g., 'specs')
* @returns DimensionIndex with all valid entries
*/
export async function buildDimensionIndex(
projectPath: string,
dimension: string
): Promise<DimensionIndex> {
const dimensionDir = getDimensionDir(projectPath, dimension);
const entries: SpecIndexEntry[] = [];
// If directory doesn't exist, return empty index
if (!existsSync(dimensionDir)) {
return {
dimension,
entries: [],
built_at: new Date().toISOString(),
};
}
// Scan for .md files
let files: string[];
try {
files = readdirSync(dimensionDir).filter(
f => extname(f).toLowerCase() === '.md'
);
} catch {
// Directory read error - return empty index
return {
dimension,
entries: [],
built_at: new Date().toISOString(),
};
}
for (const file of files) {
const filePath = join(dimensionDir, file);
const entry = parseSpecFile(filePath, dimension, projectPath);
if (entry) {
entries.push(entry);
}
}
return {
dimension,
entries,
built_at: new Date().toISOString(),
};
}
/**
* Build indices for all 4 dimensions and write to .spec-index/.
*
* Creates .spec-index/ directory if it doesn't exist.
* Writes {dimension}.index.json for each dimension.
*
* @param projectPath - Project root directory
*/
export async function buildAllIndices(projectPath: string): Promise<void> {
const indexDir = join(projectPath, SPEC_INDEX_DIR);
// Ensure .spec-index directory exists
if (!existsSync(indexDir)) {
mkdirSync(indexDir, { recursive: true });
}
for (const dimension of SPEC_DIMENSIONS) {
const index = await buildDimensionIndex(projectPath, dimension);
const indexPath = getIndexPath(projectPath, dimension);
try {
writeFileSync(indexPath, JSON.stringify(index, null, 2), 'utf-8');
} catch (err) {
// Log but continue with other dimensions
console.error(
`[spec-index-builder] Failed to write index for ${dimension}: ${(err as Error).message}`
);
}
}
}
/**
* Read a cached dimension index from disk.
*
* @param projectPath - Project root directory
* @param dimension - The dimension to read
* @returns DimensionIndex if cache exists and is valid, null otherwise
*/
export function readCachedIndex(
projectPath: string,
dimension: string
): DimensionIndex | null {
const indexPath = getIndexPath(projectPath, dimension);
if (!existsSync(indexPath)) {
return null;
}
try {
const content = readFileSync(indexPath, 'utf-8');
const parsed = JSON.parse(content) as DimensionIndex;
// Basic validation
if (
parsed &&
typeof parsed.dimension === 'string' &&
Array.isArray(parsed.entries) &&
typeof parsed.built_at === 'string'
) {
return parsed;
}
return null;
} catch {
return null;
}
}
/**
* Get the dimension index, using cache if available, otherwise building fresh.
*
* @param projectPath - Project root directory
* @param dimension - The dimension to get
* @param forceRebuild - Skip cache and rebuild from source files
* @returns DimensionIndex
*/
export async function getDimensionIndex(
projectPath: string,
dimension: string,
forceRebuild = false
): Promise<DimensionIndex> {
if (!forceRebuild) {
const cached = readCachedIndex(projectPath, dimension);
if (cached) {
return cached;
}
}
// Build fresh and cache
const index = await buildDimensionIndex(projectPath, dimension);
const indexDir = join(projectPath, SPEC_INDEX_DIR);
if (!existsSync(indexDir)) {
mkdirSync(indexDir, { recursive: true });
}
const indexPath = getIndexPath(projectPath, dimension);
try {
writeFileSync(indexPath, JSON.stringify(index, null, 2), 'utf-8');
} catch {
// Cache write failure is non-fatal
}
return index;
}
// ============================================================================
// Internal helpers
// ============================================================================
/**
* Parse a single spec MD file and extract its frontmatter into a SpecIndexEntry.
*
* @param filePath - Absolute path to the MD file
* @param dimension - The dimension this file belongs to
* @param projectPath - Project root for computing relative paths
* @returns SpecIndexEntry if frontmatter is valid, null if malformed/missing
*/
function parseSpecFile(
filePath: string,
dimension: string,
projectPath: string
): SpecIndexEntry | null {
let content: string;
try {
content = readFileSync(filePath, 'utf-8');
} catch {
return null;
}
// Parse frontmatter
let parsed: matter.GrayMatterFile<string>;
try {
parsed = matter(content);
} catch {
// Malformed frontmatter - skip
return null;
}
const data = parsed.data as Record<string, unknown>;
// Extract and validate frontmatter fields
const title = extractString(data, 'title');
if (!title) {
// Title is required - use filename as fallback
const fallbackTitle = basename(filePath, extname(filePath));
return buildEntry(fallbackTitle, filePath, dimension, projectPath, data);
}
return buildEntry(title, filePath, dimension, projectPath, data);
}
/**
* Build a SpecIndexEntry from parsed frontmatter data.
*/
function buildEntry(
title: string,
filePath: string,
dimension: string,
projectPath: string,
data: Record<string, unknown>
): SpecIndexEntry {
// Compute relative file path from project root using path.relative
// Normalize to forward slashes for cross-platform consistency
const relativePath = relative(projectPath, filePath).replace(/\\/g, '/');
// Extract keywords - accept string[] or single string
const keywords = extractStringArray(data, 'keywords');
// Extract readMode with validation
const rawReadMode = extractString(data, 'readMode');
const readMode = isValidReadMode(rawReadMode) ? rawReadMode : 'optional';
// Extract priority with validation
const rawPriority = extractString(data, 'priority');
const priority = isValidPriority(rawPriority) ? rawPriority : 'medium';
return {
title,
file: relativePath,
dimension,
keywords,
readMode,
priority,
};
}
/**
* Extract a string value from parsed YAML data.
*/
function extractString(
data: Record<string, unknown>,
key: string
): string | null {
const value = data[key];
if (typeof value === 'string' && value.trim().length > 0) {
return value.trim();
}
return null;
}
/**
* Extract a string array from parsed YAML data.
* Handles both array format and comma-separated string format.
*/
function extractStringArray(
data: Record<string, unknown>,
key: string
): string[] {
const value = data[key];
if (Array.isArray(value)) {
return value
.filter((item): item is string => typeof item === 'string')
.map(s => s.trim())
.filter(s => s.length > 0);
}
if (typeof value === 'string') {
return value
.split(',')
.map(s => s.trim())
.filter(s => s.length > 0);
}
return [];
}
/**
* Type guard for valid readMode values.
*/
function isValidReadMode(value: string | null): value is 'required' | 'optional' {
return value !== null && (VALID_READ_MODES as readonly string[]).includes(value);
}
/**
* Type guard for valid priority values.
*/
function isValidPriority(value: string | null): value is 'critical' | 'high' | 'medium' | 'low' {
return value !== null && (VALID_PRIORITIES as readonly string[]).includes(value);
}

296
ccw/src/tools/spec-init.ts Normal file
View File

@@ -0,0 +1,296 @@
/**
* Spec Init - Initialize the 4-dimension spec system
*
* Creates .workflow/specs/, .workflow/roadmap/, .workflow/changelog/,
* .workflow/personal/, and .workflow/.spec-index/ directories with
* seed MD documents containing YAML frontmatter templates.
*
* Idempotent: skips existing files, only creates missing directories/files.
*/
import { existsSync, mkdirSync, writeFileSync } from 'fs';
import { join } from 'path';
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface SpecFrontmatter {
title: string;
dimension: string;
keywords: string[];
readMode: 'required' | 'optional';
priority: 'high' | 'medium' | 'low';
}
export interface SeedDoc {
filename: string;
frontmatter: SpecFrontmatter;
body: string;
}
export interface InitResult {
created: string[];
skipped: string[];
directories: string[];
}
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
export const DIMENSIONS = ['specs', 'roadmap', 'changelog', 'personal'] as const;
export const INDEX_DIR = '.spec-index';
// ---------------------------------------------------------------------------
// Seed Documents
// ---------------------------------------------------------------------------
export const SEED_DOCS: Map<string, SeedDoc[]> = new Map([
[
'specs',
[
{
filename: 'coding-conventions.md',
frontmatter: {
title: 'Coding Conventions',
dimension: 'specs',
keywords: ['typescript', 'naming', 'style', 'convention'],
readMode: 'required',
priority: 'high',
},
body: `# Coding Conventions
## Naming
- Use camelCase for variables and functions
- Use PascalCase for classes and interfaces
- Use UPPER_SNAKE_CASE for constants
## Formatting
- 2-space indentation
- Single quotes for strings
- Trailing commas in multi-line constructs
## Patterns
- Prefer composition over inheritance
- Use early returns to reduce nesting
- Keep functions under 30 lines when practical
## Error Handling
- Always handle errors explicitly
- Prefer typed errors over generic catch-all
- Log errors with sufficient context
`,
},
{
filename: 'architecture-constraints.md',
frontmatter: {
title: 'Architecture Constraints',
dimension: 'specs',
keywords: ['architecture', 'module', 'layer', 'pattern'],
readMode: 'required',
priority: 'high',
},
body: `# Architecture Constraints
## Module Boundaries
- Each module owns its data and exposes a public API
- No circular dependencies between modules
- Shared utilities live in a dedicated shared layer
## Layer Separation
- Presentation layer must not import data layer directly
- Business logic must be independent of framework specifics
- Configuration must be externalized, not hardcoded
## Dependency Rules
- External dependencies require justification
- Prefer standard library when available
- Pin dependency versions for reproducibility
`,
},
],
],
[
'personal',
[
{
filename: 'coding-style.md',
frontmatter: {
title: 'Personal Coding Style',
dimension: 'personal',
keywords: ['style', 'preference'],
readMode: 'optional',
priority: 'medium',
},
body: `# Personal Coding Style
## Preferences
- Describe your preferred coding style here
- Example: verbose variable names vs terse, functional vs imperative
## Patterns I Prefer
- List patterns you reach for most often
- Example: builder pattern, factory functions, tagged unions
## Things I Avoid
- List anti-patterns or approaches you dislike
- Example: deep inheritance hierarchies, magic strings
`,
},
{
filename: 'tool-preferences.md',
frontmatter: {
title: 'Tool Preferences',
dimension: 'personal',
keywords: ['tool', 'cli', 'editor'],
readMode: 'optional',
priority: 'low',
},
body: `# Tool Preferences
## Editor
- Preferred editor and key extensions/plugins
## CLI Tools
- Preferred shell, package manager, build tools
## Debugging
- Preferred debugging approach and tools
`,
},
],
],
[
'roadmap',
[
{
filename: 'current.md',
frontmatter: {
title: 'Current Roadmap',
dimension: 'roadmap',
keywords: ['roadmap', 'plan', 'milestone'],
readMode: 'optional',
priority: 'medium',
},
body: `# Current Roadmap
## Active Milestone
- Milestone name and target date
- Key deliverables
## Upcoming
- Next planned features or improvements
## Completed
- Recently completed milestones for reference
`,
},
],
],
[
'changelog',
[],
],
]);
// ---------------------------------------------------------------------------
// Frontmatter Serializer
// ---------------------------------------------------------------------------
/**
* Serialize a SpecFrontmatter object to YAML frontmatter string.
* Uses template literal to avoid a js-yaml dependency.
*/
export function formatFrontmatter(fm: SpecFrontmatter): string {
const keywordsYaml = fm.keywords.map((k) => ` - ${k}`).join('\n');
return [
'---',
`title: "${fm.title}"`,
`dimension: ${fm.dimension}`,
`keywords:`,
keywordsYaml,
`readMode: ${fm.readMode}`,
`priority: ${fm.priority}`,
'---',
].join('\n');
}
// ---------------------------------------------------------------------------
// Init Function
// ---------------------------------------------------------------------------
/**
* Initialize the spec system directory structure and seed documents.
*
* Idempotent: creates directories if missing, writes seed files only when
* they do not already exist.
*
* @param projectPath - Absolute path to the project root
* @returns InitResult with lists of created/skipped paths
*/
export function initSpecSystem(projectPath: string): InitResult {
const workflowDir = join(projectPath, '.workflow');
const result: InitResult = {
created: [],
skipped: [],
directories: [],
};
// Ensure .workflow root exists
if (!existsSync(workflowDir)) {
mkdirSync(workflowDir, { recursive: true });
}
// Create dimension directories
for (const dim of DIMENSIONS) {
const dirPath = join(workflowDir, dim);
if (!existsSync(dirPath)) {
mkdirSync(dirPath, { recursive: true });
result.directories.push(dirPath);
}
}
// Create index directory
const indexPath = join(workflowDir, INDEX_DIR);
if (!existsSync(indexPath)) {
mkdirSync(indexPath, { recursive: true });
result.directories.push(indexPath);
}
// Write seed documents per dimension
for (const [dimension, docs] of SEED_DOCS) {
const dimDir = join(workflowDir, dimension);
for (const doc of docs) {
const filePath = join(dimDir, doc.filename);
if (existsSync(filePath)) {
result.skipped.push(filePath);
continue;
}
const content = formatFrontmatter(doc.frontmatter) + '\n\n' + doc.body;
writeFileSync(filePath, content, 'utf8');
result.created.push(filePath);
}
}
return result;
}

View File

@@ -0,0 +1,208 @@
/**
* Spec Keyword Extractor
*
* Extracts keywords from user prompt text for matching against
* spec document YAML frontmatter keywords.
*
* Supports:
* - English word tokenization (split by spaces/punctuation, remove stop words)
* - Chinese character segment extraction (CJK boundary splitting)
*/
/**
* Common English stop words to filter out during keyword extraction.
* These words appear frequently but carry little semantic meaning
* for spec matching.
*/
export const STOP_WORDS = new Set([
// Articles
'a', 'an', 'the',
// Pronouns
'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he', 'she', 'it', 'they', 'them',
'this', 'that', 'these', 'those', 'what', 'which', 'who', 'whom',
// Prepositions
'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'as', 'into',
'about', 'between', 'through', 'after', 'before', 'above', 'below',
// Conjunctions
'and', 'or', 'but', 'if', 'then', 'else', 'when', 'while', 'so', 'because',
// Auxiliary verbs
'is', 'am', 'are', 'was', 'were', 'be', 'been', 'being',
'has', 'have', 'had', 'do', 'does', 'did',
'will', 'would', 'shall', 'should', 'may', 'might', 'can', 'could', 'must',
// Common verbs (too generic for matching)
'get', 'got', 'make', 'made', 'let', 'go', 'going', 'come', 'take', 'give',
// Adverbs
'not', 'no', 'yes', 'also', 'just', 'only', 'very', 'too', 'now', 'here',
'there', 'how', 'why', 'where', 'all', 'each', 'every', 'both', 'some',
'any', 'most', 'more', 'less', 'much', 'many', 'few', 'other', 'such',
// Misc
'please', 'need', 'want', 'like', 'know', 'think', 'see', 'use', 'using',
'way', 'thing', 'something', 'anything', 'nothing',
]);
/**
* Regex to detect CJK (Chinese/Japanese/Korean) characters.
* Covers CJK Unified Ideographs and common extensions.
*/
const CJK_REGEX = /[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff]/;
/**
* Regex to match contiguous CJK character sequences.
*/
const CJK_SEGMENT_REGEX = /[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff]+/g;
/**
* Regex to split text into English word tokens.
* Splits on whitespace and common punctuation.
*/
const WORD_SPLIT_REGEX = /[\s,;:!?.()\[\]{}<>"'`~@#$%^&*+=|\\/_\-\u3001\u3002\uff0c\uff1b\uff1a\uff01\uff1f]+/;
/**
* Minimum word length to keep (filters out single-char English tokens).
*/
const MIN_WORD_LENGTH = 2;
/**
* Extract keywords from prompt text.
*
* For English text:
* Splits by whitespace/punctuation, lowercases, removes stop words,
* filters short tokens, and deduplicates.
*
* For Chinese text:
* Extracts contiguous CJK character sequences. For sequences longer
* than 2 characters, also generates 2-character sliding window bigrams
* to improve matching (since Chinese keywords in YAML are typically
* 2-4 character compounds).
*
* @param text - The user prompt text to extract keywords from
* @returns Array of unique keywords (lowercase for English, original for CJK)
*/
export function extractKeywords(text: string): string[] {
if (!text || typeof text !== 'string') {
return [];
}
const keywords = new Set<string>();
// Extract English keywords
const englishKeywords = extractEnglishKeywords(text);
for (const kw of englishKeywords) {
keywords.add(kw);
}
// Extract CJK keywords
const cjkKeywords = extractCjkKeywords(text);
for (const kw of cjkKeywords) {
keywords.add(kw);
}
return Array.from(keywords);
}
/**
* Extract English keywords from text.
*
* @param text - Input text
* @returns Array of lowercase English keyword tokens
*/
function extractEnglishKeywords(text: string): string[] {
// Remove CJK characters first so they don't pollute English tokens
const cleanedText = text.replace(CJK_SEGMENT_REGEX, ' ');
const tokens = cleanedText
.split(WORD_SPLIT_REGEX)
.map(token => token.toLowerCase().trim())
.filter(token =>
token.length >= MIN_WORD_LENGTH &&
!STOP_WORDS.has(token) &&
// Filter out pure number tokens
!/^\d+$/.test(token)
);
// Deduplicate while preserving order
return Array.from(new Set(tokens));
}
/**
* Extract CJK keywords from text.
*
* Extracts contiguous CJK segments. For segments longer than 2 characters,
* generates 2-character bigrams as well (common Chinese keyword length).
*
* @param text - Input text
* @returns Array of CJK keyword segments
*/
function extractCjkKeywords(text: string): string[] {
if (!CJK_REGEX.test(text)) {
return [];
}
const keywords = new Set<string>();
// Find all contiguous CJK segments
const segments = text.match(CJK_SEGMENT_REGEX);
if (!segments) {
return [];
}
for (const segment of segments) {
// Add the full segment
keywords.add(segment);
// For longer segments, generate 2-char bigrams
if (segment.length > 2) {
for (let i = 0; i <= segment.length - 2; i++) {
keywords.add(segment.substring(i, i + 2));
}
}
}
return Array.from(keywords);
}
/**
* Check if a keyword matches any entry in a keyword list.
* Supports case-insensitive matching for English and exact matching for CJK.
*
* @param keyword - The keyword to check
* @param targetKeywords - The target keyword list from spec frontmatter
* @returns true if keyword matches any target
*/
export function keywordMatches(keyword: string, targetKeywords: string[]): boolean {
const lowerKeyword = keyword.toLowerCase();
return targetKeywords.some(target => {
const lowerTarget = target.toLowerCase();
// Exact match (case insensitive)
if (lowerKeyword === lowerTarget) return true;
// Substring match: keyword appears within target or vice versa
if (lowerTarget.includes(lowerKeyword) || lowerKeyword.includes(lowerTarget)) return true;
return false;
});
}
/**
* Calculate match score between extracted keywords and spec keywords.
* Higher score means better match.
*
* @param extractedKeywords - Keywords extracted from user prompt
* @param specKeywords - Keywords from spec YAML frontmatter
* @returns Number of matching keywords (0 = no match)
*/
export function calculateMatchScore(
extractedKeywords: string[],
specKeywords: string[]
): number {
if (!extractedKeywords.length || !specKeywords.length) {
return 0;
}
let score = 0;
for (const keyword of extractedKeywords) {
if (keywordMatches(keyword, specKeywords)) {
score++;
}
}
return score;
}

View File

@@ -0,0 +1,378 @@
/**
* Spec Loader
*
* Core loading logic for the spec system. Reads index caches, filters specs
* by readMode and keyword match, loads MD content, merges by dimension
* priority, and formats output for CLI or Hook consumption.
*
* Single entry point: loadSpecs(options) -> SpecLoadResult
*
* Data flow:
* Keywords -> IndexCache -> Filter(required + keyword-matched) ->
* MDLoader -> PriorityMerger -> OutputFormatter
*/
import matter from 'gray-matter';
import { readFileSync, existsSync } from 'fs';
import { join } from 'path';
import {
getDimensionIndex,
SpecIndexEntry,
DimensionIndex,
SPEC_DIMENSIONS,
type SpecDimension,
} from './spec-index-builder.js';
import {
extractKeywords,
calculateMatchScore,
} from './spec-keyword-extractor.js';
// ============================================================================
// Types
// ============================================================================
/**
* Input options for loadSpecs().
*/
export interface SpecLoadOptions {
/** Absolute path to the project root */
projectPath: string;
/** Specific dimension to load (loads all if omitted) */
dimension?: SpecDimension;
/** Pre-extracted keywords (skips extraction if provided) */
keywords?: string[];
/** Output format: 'cli' for markdown, 'hook' for JSON */
outputFormat: 'cli' | 'hook';
/** Raw stdin data from Claude Code hook (used to extract user_prompt) */
stdinData?: { user_prompt?: string; prompt?: string; [key: string]: unknown };
/** Enable debug logging to stderr */
debug?: boolean;
}
/**
* Output from loadSpecs().
*/
export interface SpecLoadResult {
/** Formatted content string (markdown or JSON) */
content: string;
/** Output format that was used */
format: 'markdown' | 'json';
/** List of spec titles that were matched and loaded */
matchedSpecs: string[];
/** Total number of spec files loaded */
totalLoaded: number;
}
/**
* Internal representation of a loaded spec's content.
*/
interface LoadedSpec {
title: string;
dimension: string;
priority: string;
content: string;
}
// ============================================================================
// Constants
// ============================================================================
/**
* Dimension priority for merge ordering.
* Lower number = loaded first (lower priority, gets overridden).
* Higher number = loaded last (higher priority, overrides).
*/
const DIMENSION_PRIORITY: Record<string, number> = {
personal: 1,
changelog: 2,
roadmap: 3,
specs: 4,
};
/**
* Priority weight for ordering specs within a dimension.
*/
const SPEC_PRIORITY_WEIGHT: Record<string, number> = {
critical: 4,
high: 3,
medium: 2,
low: 1,
};
// ============================================================================
// Public API
// ============================================================================
/**
* Load specs based on options.
*
* Pipeline:
* 1. Extract keywords from options.keywords, stdinData, or empty
* 2. For each dimension: read index cache (fallback to on-the-fly build)
* 3. Filter: all required specs + optional specs with keyword match
* 4. Load MD file content (strip frontmatter)
* 5. Merge by dimension priority
* 6. Format for CLI (markdown) or Hook (JSON)
*
* @param options - Loading configuration
* @returns SpecLoadResult with formatted content
*/
export async function loadSpecs(options: SpecLoadOptions): Promise<SpecLoadResult> {
const { projectPath, outputFormat, debug } = options;
// Step 1: Resolve keywords
const keywords = resolveKeywords(options);
if (debug) {
debugLog(`Extracted ${keywords.length} keywords: [${keywords.join(', ')}]`);
}
// Step 2: Determine which dimensions to process
const dimensions = options.dimension
? [options.dimension]
: [...SPEC_DIMENSIONS];
// Step 3: For each dimension, read index and filter specs
const allLoadedSpecs: LoadedSpec[] = [];
let totalScanned = 0;
for (const dim of dimensions) {
const index = await getDimensionIndex(projectPath, dim);
totalScanned += index.entries.length;
const { required, matched } = filterSpecs(index, keywords);
if (debug) {
debugLog(
`[${dim}] scanned=${index.entries.length} required=${required.length} matched=${matched.length}`
);
}
// Step 4: Load content for filtered entries
const entriesToLoad = [...required, ...matched];
const loaded = loadSpecContent(projectPath, entriesToLoad);
allLoadedSpecs.push(...loaded);
}
if (debug) {
debugLog(
`Total: scanned=${totalScanned} loaded=${allLoadedSpecs.length}`
);
}
// Step 5: Merge by dimension priority
const mergedContent = mergeByPriority(allLoadedSpecs);
// Step 6: Format output
const matchedTitles = allLoadedSpecs.map(s => s.title);
const content = formatOutput(mergedContent, matchedTitles, outputFormat);
const format = outputFormat === 'cli' ? 'markdown' : 'json';
return {
content,
format,
matchedSpecs: matchedTitles,
totalLoaded: allLoadedSpecs.length,
};
}
// ============================================================================
// Core Functions
// ============================================================================
/**
* Filter specs by readMode and keyword match.
*
* - required: all entries with readMode === 'required'
* - matched: entries with readMode === 'optional' that have keyword intersection
*
* @param index - The dimension index to filter
* @param keywords - Extracted prompt keywords
* @returns Separated required and matched entries (deduplicated)
*/
export function filterSpecs(
index: DimensionIndex,
keywords: string[]
): { required: SpecIndexEntry[]; matched: SpecIndexEntry[] } {
const required: SpecIndexEntry[] = [];
const matched: SpecIndexEntry[] = [];
for (const entry of index.entries) {
if (entry.readMode === 'required') {
required.push(entry);
continue;
}
// Optional entries: check keyword intersection
if (keywords.length > 0 && entry.keywords.length > 0) {
const score = calculateMatchScore(keywords, entry.keywords);
if (score > 0) {
matched.push(entry);
}
}
}
return { required, matched };
}
/**
* Merge loaded spec content by dimension priority.
*
* Dimension priority order: personal(1) < changelog(2) < roadmap(3) < specs(4).
* Within a dimension, specs are ordered by priority weight (critical > high > medium > low).
*
* @param specs - All loaded specs
* @returns Merged content string ordered by priority
*/
export function mergeByPriority(specs: LoadedSpec[]): string {
if (specs.length === 0) {
return '';
}
// Sort by dimension priority (ascending), then by spec priority weight (descending)
const sorted = [...specs].sort((a, b) => {
const dimA = DIMENSION_PRIORITY[a.dimension] ?? 0;
const dimB = DIMENSION_PRIORITY[b.dimension] ?? 0;
if (dimA !== dimB) {
return dimA - dimB;
}
const priA = SPEC_PRIORITY_WEIGHT[a.priority] ?? 0;
const priB = SPEC_PRIORITY_WEIGHT[b.priority] ?? 0;
return priB - priA;
});
// Concatenate content with separators
const sections: string[] = [];
for (const spec of sorted) {
sections.push(`## ${spec.title}\n\n${spec.content.trim()}`);
}
return sections.join('\n\n---\n\n');
}
// ============================================================================
// Internal Helpers
// ============================================================================
/**
* Resolve keywords from options.
*
* Priority:
* 1. options.keywords (pre-extracted)
* 2. options.stdinData.user_prompt or options.stdinData.prompt (extract from text)
* 3. empty array (only required specs will load)
*/
function resolveKeywords(options: SpecLoadOptions): string[] {
if (options.keywords && options.keywords.length > 0) {
return options.keywords;
}
const prompt = options.stdinData?.user_prompt || options.stdinData?.prompt;
if (prompt && typeof prompt === 'string') {
return extractKeywords(prompt);
}
return [];
}
/**
* Load MD file content for a list of spec entries.
*
* Reads each file, strips YAML frontmatter via gray-matter, returns body content.
* Silently skips files that cannot be read.
*
* @param projectPath - Project root directory
* @param entries - Spec index entries to load
* @returns Array of loaded specs with content
*/
function loadSpecContent(
projectPath: string,
entries: SpecIndexEntry[]
): LoadedSpec[] {
const loaded: LoadedSpec[] = [];
for (const entry of entries) {
const filePath = join(projectPath, entry.file);
if (!existsSync(filePath)) {
continue;
}
let raw: string;
try {
raw = readFileSync(filePath, 'utf-8');
} catch {
continue;
}
// Strip frontmatter using gray-matter
let body: string;
try {
const parsed = matter(raw);
body = parsed.content;
} catch {
// Fallback: use raw content if frontmatter parsing fails
body = raw;
}
// Skip empty content
if (!body.trim()) {
continue;
}
loaded.push({
title: entry.title,
dimension: entry.dimension,
priority: entry.priority,
content: body,
});
}
return loaded;
}
/**
* Format the merged content for output.
*
* CLI format: markdown with --- separators and section titles.
* Hook format: JSON { continue: true, systemMessage: '<project-specs>...</project-specs>' }
*
* @param mergedContent - Priority-merged spec content
* @param matchedTitles - List of matched spec titles
* @param format - Output format ('cli' or 'hook')
* @returns Formatted string
*/
function formatOutput(
mergedContent: string,
matchedTitles: string[],
format: 'cli' | 'hook'
): string {
if (!mergedContent) {
if (format === 'hook') {
return JSON.stringify({ continue: true });
}
return '(No matching specs found)';
}
if (format === 'cli') {
// CLI: markdown with header
const header = `# Project Specs (${matchedTitles.length} loaded)`;
return `${header}\n\n${mergedContent}`;
}
// Hook: JSON with systemMessage wrapped in <project-specs> tags
const wrappedContent = `<project-specs>\n${mergedContent}\n</project-specs>`;
return JSON.stringify({
continue: true,
systemMessage: wrappedContent,
});
}
/**
* Write a debug log message to stderr (avoids polluting stdout for hooks).
*/
function debugLog(message: string): void {
process.stderr.write(`[spec-loader] ${message}\n`);
}