Add comprehensive code review specifications and templates

- Introduced best practices requirements specification covering code quality, performance, maintainability, error handling, and documentation standards.
- Established quality standards with overall quality metrics and mandatory checks for security, code quality, performance, and maintainability.
- Created security requirements specification aligned with OWASP Top 10 and CWE Top 25, detailing checks and patterns for common vulnerabilities.
- Developed templates for documenting best practice findings, security findings, and generating reports, including structured markdown and JSON formats.
- Updated dependencies in the project, ensuring compatibility and stability.
- Added test files and README documentation for vector indexing tests.
This commit is contained in:
catlog22
2026-01-06 23:11:15 +08:00
parent 02d66325a0
commit ef770ff29b
32 changed files with 4530 additions and 164 deletions

View File

@@ -20,14 +20,15 @@ export interface CliConfig {
tools: Record<string, CliToolConfig>;
}
export type CliToolName = 'gemini' | 'qwen' | 'codex';
export type CliToolName = 'gemini' | 'qwen' | 'codex' | 'claude';
// ========== Constants ==========
export const PREDEFINED_MODELS: Record<CliToolName, string[]> = {
gemini: ['gemini-2.5-pro', 'gemini-2.5-flash', 'gemini-2.0-flash', 'gemini-1.5-pro', 'gemini-1.5-flash'],
qwen: ['coder-model', 'vision-model', 'qwen2.5-coder-32b'],
codex: ['gpt-5.2', 'gpt-4.1', 'o4-mini', 'o3']
codex: ['gpt-5.2', 'gpt-4.1', 'o4-mini', 'o3'],
claude: ['sonnet', 'opus', 'haiku', 'claude-sonnet-4-5-20250929', 'claude-opus-4-5-20251101']
};
export const DEFAULT_CONFIG: CliConfig = {
@@ -47,6 +48,11 @@ export const DEFAULT_CONFIG: CliConfig = {
enabled: true,
primaryModel: 'gpt-5.2',
secondaryModel: 'gpt-5.2'
},
claude: {
enabled: true,
primaryModel: 'sonnet',
secondaryModel: 'haiku'
}
}
};
@@ -63,7 +69,7 @@ function ensureConfigDirForProject(baseDir: string): void {
}
function isValidToolName(tool: string): tool is CliToolName {
return ['gemini', 'qwen', 'codex'].includes(tool);
return ['gemini', 'qwen', 'codex', 'claude'].includes(tool);
}
function validateConfig(config: unknown): config is CliConfig {
@@ -74,7 +80,7 @@ function validateConfig(config: unknown): config is CliConfig {
if (!c.tools || typeof c.tools !== 'object') return false;
const tools = c.tools as Record<string, unknown>;
for (const toolName of ['gemini', 'qwen', 'codex']) {
for (const toolName of ['gemini', 'qwen', 'codex', 'claude']) {
const tool = tools[toolName];
if (!tool || typeof tool !== 'object') return false;

View File

@@ -66,6 +66,309 @@ function errorLog(category: string, message: string, error?: Error | unknown, co
}
}
// ========== Unified Stream-JSON Parser ==========
/**
* Claude CLI stream-json message types
*/
interface ClaudeStreamMessage {
type: 'system' | 'assistant' | 'result' | 'error';
subtype?: 'init' | 'success' | 'error';
session_id?: string;
model?: string;
message?: {
content: Array<{ type: 'text'; text: string }>;
};
result?: string;
total_cost_usd?: number;
usage?: {
input_tokens?: number;
output_tokens?: number;
};
error?: string;
}
/**
* Gemini/Qwen CLI stream-json message types
*/
interface GeminiStreamMessage {
type: 'init' | 'message' | 'result';
timestamp?: string;
session_id?: string;
model?: string;
role?: 'user' | 'assistant';
content?: string;
delta?: boolean;
status?: 'success' | 'error';
stats?: {
total_tokens?: number;
input_tokens?: number;
output_tokens?: number;
};
}
/**
* Codex CLI JSON message types
*/
interface CodexStreamMessage {
type: 'thread.started' | 'turn.started' | 'item.completed' | 'turn.completed';
thread_id?: string;
item?: {
type: 'reasoning' | 'agent_message';
text: string;
};
usage?: {
input_tokens?: number;
output_tokens?: number;
};
}
/**
* Unified Stream-JSON Parser for Claude, Gemini, Qwen, and Codex
* Supports different JSON formats and extracts text, session info, and usage data
*/
class UnifiedStreamParser {
private tool: 'claude' | 'gemini' | 'qwen' | 'codex';
private lineBuffer = '';
private extractedText = '';
private sessionInfo: { session_id?: string; model?: string; thread_id?: string } = {};
private usageInfo: { cost?: number; tokens?: { input: number; output: number } } = {};
constructor(tool: 'claude' | 'gemini' | 'qwen' | 'codex') {
this.tool = tool;
}
/**
* Process incoming data chunk
* @returns Extracted text to output with message type prefixes
*/
processChunk(data: string): string {
this.lineBuffer += data;
const lines = this.lineBuffer.split('\n');
// Keep last incomplete line in buffer
this.lineBuffer = lines.pop() || '';
let output = '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
try {
output += this.parseJsonLine(trimmed);
} catch (err) {
// Not valid JSON or not a stream-json line - pass through as-is
debugLog('STREAM_PARSER', `Non-JSON line (passing through): ${trimmed.substring(0, 100)}`);
output += line + '\n';
}
}
return output;
}
/**
* Parse a single JSON line based on tool type
*/
private parseJsonLine(line: string): string {
switch (this.tool) {
case 'claude':
return this.parseClaudeLine(line);
case 'gemini':
case 'qwen':
return this.parseGeminiQwenLine(line);
case 'codex':
return this.parseCodexLine(line);
default:
return '';
}
}
/**
* Parse Claude stream-json format
*/
private parseClaudeLine(line: string): string {
const msg: ClaudeStreamMessage = JSON.parse(line);
let output = '';
// Extract session metadata
if (msg.type === 'system' && msg.subtype === 'init') {
this.sessionInfo.session_id = msg.session_id;
this.sessionInfo.model = msg.model;
debugLog('STREAM_PARSER', 'Claude session initialized', this.sessionInfo);
output += `[系统] 会话初始化: ${msg.model || 'unknown'}\n`;
}
// Extract assistant response text
if (msg.type === 'assistant' && msg.message?.content) {
for (const item of msg.message.content) {
if (item.type === 'text' && item.text && item.text.trim()) { // Filter empty/whitespace-only text
this.extractedText += item.text;
output += `[响应] ${item.text}\n`; // Add newline for proper line separation
}
}
}
// Extract result metadata
if (msg.type === 'result') {
if (msg.total_cost_usd !== undefined) {
this.usageInfo.cost = msg.total_cost_usd;
}
if (msg.usage) {
this.usageInfo.tokens = {
input: msg.usage.input_tokens || 0,
output: msg.usage.output_tokens || 0
};
}
debugLog('STREAM_PARSER', 'Claude execution result received', {
subtype: msg.subtype,
cost: this.usageInfo.cost,
tokens: this.usageInfo.tokens
});
output += `[结果] 状态: ${msg.subtype || 'completed'}\n`;
}
// Handle errors
if (msg.type === 'error') {
errorLog('STREAM_PARSER', `Claude error in stream: ${msg.error || 'Unknown error'}`);
output += `[错误] ${msg.error || 'Unknown error'}\n`;
}
return output;
}
private lastMessageType: string = ''; // Track last message type for delta mode
/**
* Parse Gemini/Qwen stream-json format
*/
private parseGeminiQwenLine(line: string): string {
const msg: GeminiStreamMessage = JSON.parse(line);
let output = '';
// Extract session metadata
if (msg.type === 'init') {
this.sessionInfo.session_id = msg.session_id;
this.sessionInfo.model = msg.model;
debugLog('STREAM_PARSER', `${this.tool} session initialized`, this.sessionInfo);
output += `[系统] 会话初始化: ${msg.model || 'unknown'}\n`;
this.lastMessageType = 'init';
}
// Extract assistant message
if (msg.type === 'message' && msg.role === 'assistant' && msg.content) {
const contentText = msg.content.trim(); // Filter empty/whitespace-only content
if (contentText) {
this.extractedText += msg.content;
if (msg.delta) {
// Delta mode: add prefix only for first chunk
if (this.lastMessageType !== 'assistant') {
output += `[响应] ${msg.content}`;
} else {
output += msg.content;
}
} else {
// Full message mode
output += `[响应] ${msg.content}\n`;
}
this.lastMessageType = 'assistant';
}
}
// Extract result statistics
if (msg.type === 'result') {
// Add newline before result if last was delta streaming
if (this.lastMessageType === 'assistant') {
output += '\n';
}
if (msg.stats) {
this.usageInfo.tokens = {
input: msg.stats.input_tokens || 0,
output: msg.stats.output_tokens || 0
};
}
debugLog('STREAM_PARSER', `${this.tool} execution result received`, {
status: msg.status,
tokens: this.usageInfo.tokens
});
output += `[结果] 状态: ${msg.status || 'success'}\n`;
this.lastMessageType = 'result';
}
return output;
}
/**
* Parse Codex JSON format
*/
private parseCodexLine(line: string): string {
const msg: CodexStreamMessage = JSON.parse(line);
let output = '';
// Extract thread metadata
if (msg.type === 'thread.started' && msg.thread_id) {
this.sessionInfo.thread_id = msg.thread_id;
debugLog('STREAM_PARSER', 'Codex thread started', { thread_id: msg.thread_id });
output += `[系统] 线程启动: ${msg.thread_id}\n`;
}
// Extract reasoning text
if (msg.type === 'item.completed' && msg.item?.type === 'reasoning') {
output += `[思考] ${msg.item.text}\n`;
}
// Extract agent message
if (msg.type === 'item.completed' && msg.item?.type === 'agent_message') {
this.extractedText += msg.item.text;
output += `[响应] ${msg.item.text}\n`;
}
// Extract usage statistics
if (msg.type === 'turn.completed' && msg.usage) {
this.usageInfo.tokens = {
input: msg.usage.input_tokens || 0,
output: msg.usage.output_tokens || 0
};
debugLog('STREAM_PARSER', 'Codex turn completed', {
tokens: this.usageInfo.tokens
});
output += `[结果] 回合完成\n`;
}
return output;
}
/**
* Flush remaining buffer on stream end
*/
flush(): string {
if (this.lineBuffer.trim()) {
return this.processChunk('\n'); // Force process remaining line
}
return '';
}
/**
* Get full extracted text
*/
getExtractedText(): string {
return this.extractedText;
}
/**
* Get session metadata
*/
getSessionInfo() {
return this.sessionInfo;
}
/**
* Get usage metadata
*/
getUsageInfo() {
return this.usageInfo;
}
}
// LiteLLM integration
import { executeLiteLLMEndpoint } from './litellm-executor.js';
import { findEndpointById } from '../config/litellm-api-config-manager.js';
@@ -116,7 +419,7 @@ function getSqliteStoreSync(baseDir: string) {
// Define Zod schema for validation
const ParamsSchema = z.object({
tool: z.enum(['gemini', 'qwen', 'codex']),
tool: z.enum(['gemini', 'qwen', 'codex', 'claude']),
prompt: z.string().min(1, 'Prompt is required'),
mode: z.enum(['analysis', 'write', 'auto']).default('analysis'),
format: z.enum(['plain', 'yaml', 'json']).default('plain'), // Multi-turn prompt concatenation format
@@ -255,6 +558,7 @@ interface ExecutionOutput {
conversation: ConversationRecord; // Full conversation record
stdout: string;
stderr: string;
parsedOutput?: string; // Parsed output from stream parser (for stream-json tools)
}
/**
@@ -380,6 +684,8 @@ function buildCommand(params: {
if (include) {
args.push('--include-directories', include);
}
// Enable stream-json output for unified parsing
args.push('--output-format', 'stream-json');
break;
case 'qwen':
@@ -400,6 +706,8 @@ function buildCommand(params: {
if (include) {
args.push('--include-directories', include);
}
// Enable stream-json output for unified parsing
args.push('--output-format', 'stream-json');
break;
case 'codex':
@@ -434,6 +742,8 @@ function buildCommand(params: {
args.push('--add-dir', addDir);
}
}
// Enable JSON output for unified parsing
args.push('--json');
// Use `-` to indicate reading prompt from stdin
args.push('-');
} else {
@@ -458,6 +768,8 @@ function buildCommand(params: {
args.push('--add-dir', addDir);
}
}
// Enable JSON output for unified parsing
args.push('--json');
// Use `-` to indicate reading prompt from stdin (avoids Windows escaping issues)
args.push('-');
}
@@ -483,8 +795,9 @@ function buildCommand(params: {
} else {
args.push('--permission-mode', 'default');
}
// Output format for better parsing
args.push('--output-format', 'text');
// Output format: stream-json for real-time parsing, text for backward compatibility
args.push('--output-format', 'stream-json');
args.push('--verbose'); // Required for stream-json format
// Add directories
if (include) {
const dirs = include.split(',').map(d => d.trim()).filter(d => d);
@@ -962,11 +1275,23 @@ async function executeCliTool(
let stderr = '';
let timedOut = false;
// Initialize unified stream parser for all tools
const streamParser = ['claude', 'gemini', 'qwen', 'codex'].includes(tool)
? new UnifiedStreamParser(tool as 'claude' | 'gemini' | 'qwen' | 'codex')
: null;
// Handle stdout
child.stdout!.on('data', (data) => {
const text = data.toString();
stdout += text;
if (onOutput) {
// Parse stream-json for all supported tools
if (streamParser && onOutput) {
const parsedText = streamParser.processChunk(text);
if (parsedText) {
onOutput({ type: 'stdout', data: parsedText });
}
} else if (onOutput) {
onOutput({ type: 'stdout', data: text });
}
});
@@ -985,6 +1310,23 @@ async function executeCliTool(
// Clear current child process reference
currentChildProcess = null;
// Flush unified parser buffer if present
if (streamParser && onOutput) {
const remaining = streamParser.flush();
if (remaining) {
onOutput({ type: 'stdout', data: remaining });
}
// Log usage information if available
const usageInfo = streamParser.getUsageInfo();
if (usageInfo.cost !== undefined || usageInfo.tokens) {
debugLog('STREAM_USAGE', `${tool} execution usage`, {
cost_usd: usageInfo.cost,
tokens: usageInfo.tokens
});
}
}
const endTime = Date.now();
const duration = endTime - startTime;
@@ -1212,7 +1554,8 @@ async function executeCliTool(
execution,
conversation,
stdout,
stderr
stderr,
parsedOutput: streamParser?.getExtractedText() || undefined
});
});

View File

@@ -2,7 +2,8 @@
* Smart Search Tool - Unified intelligent search with CodexLens integration
*
* Features:
* - Intent classification with automatic mode selection
* - Fuzzy mode: FTS + ripgrep fusion with RRF ranking (default)
* - Semantic mode: Dense coarse retrieval + cross-encoder reranking
* - CodexLens integration (init, dense_rerank, fts)
* - Ripgrep fallback for exact mode
* - Index status checking and warnings
@@ -10,7 +11,7 @@
*
* Actions:
* - init: Initialize CodexLens index
* - search: Intelligent search with auto mode selection
* - search: Intelligent search with fuzzy (default) or semantic mode
* - status: Check index status
* - update: Incremental index update for changed files
* - watch: Start file watcher for automatic updates
@@ -66,7 +67,7 @@ const ParamsSchema = z.object({
action: z.enum(['init', 'search', 'search_files', 'find_files', 'status', 'update', 'watch']).default('search'),
query: z.string().optional().describe('Content search query (for action="search")'),
pattern: z.string().optional().describe('Glob pattern for path matching (for action="find_files")'),
mode: z.enum(['auto', 'hybrid', 'exact', 'ripgrep', 'priority']).default('auto'),
mode: z.enum(['fuzzy', 'semantic']).default('fuzzy'),
output_mode: z.enum(['full', 'files_only', 'count']).default('full'),
path: z.string().optional(),
paths: z.array(z.string()).default([]),
@@ -94,7 +95,7 @@ const ParamsSchema = z.object({
type Params = z.infer<typeof ParamsSchema>;
// Search mode constants
const SEARCH_MODES = ['auto', 'hybrid', 'exact', 'ripgrep', 'priority'] as const;
const SEARCH_MODES = ['fuzzy', 'semantic'] as const;
// Classification confidence threshold
const CONFIDENCE_THRESHOLD = 0.7;
@@ -850,6 +851,93 @@ async function executeWatchAction(params: Params): Promise<SearchResult> {
};
}
/**
* Mode: fuzzy - FTS + ripgrep fusion with RRF ranking
* Runs both exact (FTS) and ripgrep searches in parallel, merges and ranks results
*/
async function executeFuzzyMode(params: Params): Promise<SearchResult> {
const { query, path = '.', maxResults = 5, extraFilesCount = 10 } = params;
if (!query) {
return {
success: false,
error: 'Query is required for search',
};
}
const timer = createTimer();
// Run both searches in parallel
const [ftsResult, ripgrepResult] = await Promise.allSettled([
executeCodexLensExactMode(params),
executeRipgrepMode(params),
]);
timer.mark('parallel_search');
// Collect results from both sources
const resultsMap = new Map<string, any[]>();
// Add FTS results if successful
if (ftsResult.status === 'fulfilled' && ftsResult.value.success && ftsResult.value.results) {
resultsMap.set('exact', ftsResult.value.results as any[]);
}
// Add ripgrep results if successful
if (ripgrepResult.status === 'fulfilled' && ripgrepResult.value.success && ripgrepResult.value.results) {
resultsMap.set('ripgrep', ripgrepResult.value.results as any[]);
}
// If both failed, return error
if (resultsMap.size === 0) {
const errors: string[] = [];
if (ftsResult.status === 'rejected') errors.push(`FTS: ${ftsResult.reason}`);
if (ripgrepResult.status === 'rejected') errors.push(`Ripgrep: ${ripgrepResult.reason}`);
return {
success: false,
error: `Both search backends failed: ${errors.join('; ')}`,
};
}
// Apply RRF fusion with fuzzy-optimized weights
// Fuzzy mode: balanced between exact and ripgrep
const fusionWeights = { exact: 0.5, ripgrep: 0.5 };
const totalToFetch = maxResults + extraFilesCount;
const fusedResults = applyRRFFusion(resultsMap, fusionWeights, totalToFetch);
timer.mark('rrf_fusion');
// Normalize results format
const normalizedResults = fusedResults.map((item: any) => ({
file: item.file || item.path,
line: item.line || 0,
column: item.column || 0,
content: item.content || '',
score: item.fusion_score || 0,
matchCount: item.matchCount,
matchScore: item.matchScore,
}));
// Split results: first N with full content, rest as file paths only
const { results, extra_files } = splitResultsWithExtraFiles(normalizedResults, maxResults, extraFilesCount);
// Log timing
timer.log();
const timings = timer.getTimings();
return {
success: true,
results,
extra_files: extra_files.length > 0 ? extra_files : undefined,
metadata: {
mode: 'fuzzy',
backend: 'fts+ripgrep',
count: results.length,
query,
note: `Fuzzy search using RRF fusion of FTS and ripgrep (weights: exact=${fusionWeights.exact}, ripgrep=${fusionWeights.ripgrep})`,
timing: TIMING_ENABLED ? timings : undefined,
},
};
}
/**
* Mode: auto - Intent classification and mode selection
* Routes to: hybrid (NL + index) | exact (index) | ripgrep (no index)
@@ -1832,10 +1920,9 @@ export const schema: ToolSchema = {
- watch: Start file watcher for automatic updates
**Content Search (action="search"):**
smart_search(query="authentication logic") # auto mode - routes to best backend
smart_search(query="MyClass", mode="exact") # exact mode - precise FTS matching
smart_search(query="auth", mode="ripgrep") # ripgrep mode - fast literal search
smart_search(query="how to auth", mode="hybrid") # hybrid mode - semantic + fuzzy search
smart_search(query="authentication logic") # fuzzy mode (default) - FTS + ripgrep fusion
smart_search(query="MyClass", mode="fuzzy") # fuzzy mode - fast hybrid search
smart_search(query="how to auth", mode="semantic") # semantic mode - dense + reranker
**File Discovery (action="find_files"):**
smart_search(action="find_files", pattern="*.ts") # find all TypeScript files
@@ -1852,17 +1939,7 @@ export const schema: ToolSchema = {
smart_search(query="auth", limit=10, offset=0) # first page
smart_search(query="auth", limit=10, offset=10) # second page
**Multi-Word Search (ripgrep mode with tokenization):**
smart_search(query="CCW_PROJECT_ROOT CCW_ALLOWED_DIRS", mode="ripgrep") # tokenized OR matching
smart_search(query="auth login user", mode="ripgrep") # matches any token, ranks by match count
smart_search(query="exact phrase", mode="ripgrep", tokenize=false) # disable tokenization
**Regex Search (ripgrep mode):**
smart_search(query="class.*Builder") # auto-detects regex pattern
smart_search(query="def.*\\(.*\\):") # find function definitions
smart_search(query="import.*from", caseSensitive=false) # case-insensitive
**Modes:** auto (intelligent routing), hybrid (semantic+fuzzy), exact (FTS), ripgrep (fast with tokenization), priority (fallback chain)`,
**Modes:** fuzzy (FTS + ripgrep fusion, default), semantic (dense + reranker)`,
inputSchema: {
type: 'object',
properties: {
@@ -1883,8 +1960,8 @@ export const schema: ToolSchema = {
mode: {
type: 'string',
enum: SEARCH_MODES,
description: 'Search mode: auto, hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback chain)',
default: 'auto',
description: 'Search mode: fuzzy (FTS + ripgrep fusion, default), semantic (dense + reranker for natural language queries)',
default: 'fuzzy',
},
output_mode: {
type: 'string',
@@ -2323,25 +2400,16 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
case 'search':
default:
// Handle search modes: auto | hybrid | exact | ripgrep | priority
// Handle search modes: fuzzy | semantic
switch (mode) {
case 'auto':
result = await executeAutoMode(parsed.data);
case 'fuzzy':
result = await executeFuzzyMode(parsed.data);
break;
case 'hybrid':
case 'semantic':
result = await executeHybridMode(parsed.data);
break;
case 'exact':
result = await executeCodexLensExactMode(parsed.data);
break;
case 'ripgrep':
result = await executeRipgrepMode(parsed.data);
break;
case 'priority':
result = await executePriorityFallbackMode(parsed.data);
break;
default:
throw new Error(`Unsupported mode: ${mode}. Use: auto, hybrid, exact, ripgrep, or priority`);
throw new Error(`Unsupported mode: ${mode}. Use: fuzzy or semantic`);
}
break;
}