From 99eeeff6f7b38fa012d9b3194dbf680e7815b95b Mon Sep 17 00:00:00 2001 From: catlog22 Date: Mon, 29 Dec 2025 17:36:50 +0800 Subject: [PATCH] test(cli-executor): add qwen/codex and multi-tool workflow tests Solution-ID: SOL-1735410003 Issue-ID: ISS-1766921318981-23 Task-ID: T3 --- .../cli-executor/multi-tool-workflow.test.ts | 586 ++++++++++++++++++ .../integration/cli-executor/tool-stub.js | 12 +- multi-tool-workflow.test.ts | 2 + 3 files changed, 598 insertions(+), 2 deletions(-) create mode 100644 ccw/tests/integration/cli-executor/multi-tool-workflow.test.ts create mode 100644 multi-tool-workflow.test.ts diff --git a/ccw/tests/integration/cli-executor/multi-tool-workflow.test.ts b/ccw/tests/integration/cli-executor/multi-tool-workflow.test.ts new file mode 100644 index 00000000..fb0bdffa --- /dev/null +++ b/ccw/tests/integration/cli-executor/multi-tool-workflow.test.ts @@ -0,0 +1,586 @@ +/** + * Integration tests for cli-executor: qwen/codex and multi-tool scenarios. + * + * Notes: + * - Targets the runtime implementation shipped in `ccw/dist`. + * - Uses stub CLI shims (gemini/qwen/codex) to avoid external dependencies. + */ + +import { after, afterEach, before, beforeEach, describe, it, mock } from 'node:test'; +import assert from 'node:assert/strict'; + +import { + closeCliHistoryStores, + makeEnhancedPrompt, + setupTestEnv, + setupTestProject, + validateExecutionResult, + type CliToolName, + type TestEnv, + type TestProject, +} from './setup.ts'; + +const cliExecutorUrl = new URL('../../../dist/tools/cli-executor.js', import.meta.url); +cliExecutorUrl.searchParams.set('t', String(Date.now())); + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +let cliExecutor: any; + +let env: TestEnv; +let project: TestProject; + +function parseFirstJsonLine(text: string): any { + const line = text.split(/\r?\n/).find((l) => l.trim().length > 0); + return JSON.parse(line || '{}'); +} + +function normalizeSlash(value: string): string { + return value.replace(/\\/g, '/'); +} + +async function executeWithFallback(params: { + prompt: string; + mode: 'analysis' | 'write' | 'auto'; + model?: string; + cd: string; + includeDirs?: string; + timeout?: number; +}): Promise<{ attempts: Array<{ tool: CliToolName; success: boolean }>; result: any }> { + const attempts: Array<{ tool: CliToolName; success: boolean }> = []; + const tools: CliToolName[] = ['gemini', 'qwen', 'codex']; + + let last: any = null; + for (const tool of tools) { + const res = await cliExecutor.executeCliTool({ tool, ...params }); + last = res; + attempts.push({ tool, success: Boolean(res?.success) }); + if (res?.success) return { attempts, result: res }; + } + + return { attempts, result: last }; +} + +function selectToolForTask(task: { intent: string; complexity: 'low' | 'medium' | 'high' }): CliToolName { + const intent = task.intent.toLowerCase(); + if (intent.includes('implement') || intent.includes('refactor') || task.complexity === 'high') return 'codex'; + if (intent.includes('analyze') || intent.includes('explain') || intent.includes('plan')) return 'gemini'; + return 'qwen'; +} + +describe('cli-executor integration: qwen/codex + multi-tool', () => { + before(async () => { + mock.method(console, 'log', () => {}); + mock.method(console, 'error', () => {}); + cliExecutor = await import(cliExecutorUrl.href); + }); + + beforeEach(() => { + cliExecutor?.clearToolCache?.(); + env = setupTestEnv(['gemini', 'qwen', 'codex']); + project = setupTestProject(); + }); + + afterEach(async () => { + await closeCliHistoryStores(); + env.restore(); + env.cleanup(); + project.cleanup(); + }); + + after(() => { + mock.restoreAll(); + }); + + it('qwen analysis mode passes -m model and no approval flag', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Qwen analysis', + task: 'Inspect code', + mode: 'analysis', + context: '@src/**/*.ts', + expected: 'OK', + rules: 'analysis=READ-ONLY', + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'qwen', + prompt, + mode: 'analysis', + model: 'qwen-test-model', + cd: project.projectDir, + }); + + validateExecutionResult(res, { success: true, tool: 'qwen' }); + const payload = parseFirstJsonLine(res.stdout); + assert.ok(payload.args.includes('-m')); + assert.ok(payload.args.includes('qwen-test-model')); + assert.equal(payload.args.includes('--approval-mode'), false); + }); + + it('qwen write mode includes --approval-mode yolo', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Qwen write', + task: 'Create file', + mode: 'write', + context: '@src/index.ts', + expected: 'file written', + rules: 'write=CREATE', + directives: { write_files: { 'qwen.txt': 'hello' } }, + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'qwen', + prompt, + mode: 'write', + model: 'qwen-test-model', + cd: project.projectDir, + }); + + validateExecutionResult(res, { success: true, tool: 'qwen' }); + const payload = parseFirstJsonLine(res.stdout); + assert.ok(payload.args.includes('--approval-mode')); + assert.ok(payload.args.includes('yolo')); + assert.ok(payload.wrote_files.includes('qwen.txt')); + }); + + it('qwen includeDirs maps to --include-directories', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Qwen includeDirs', + task: 'Resolve shared files', + mode: 'analysis', + context: '@../shared/**/*', + expected: 'Resolved shared files list', + rules: 'analysis=READ-ONLY', + directives: { resolve_patterns: true }, + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'qwen', + prompt, + mode: 'analysis', + model: 'qwen-test-model', + cd: project.projectDir, + includeDirs: '../shared', + }); + + const payload = parseFirstJsonLine(res.stdout); + assert.ok(payload.args.includes('--include-directories')); + assert.ok(payload.args.includes('../shared')); + assert.ok(payload.resolved_files.some((p: string) => String(p).startsWith('../shared/'))); + }); + + it('qwen resume=true uses native --continue', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Qwen resume latest', + task: 'Use native resume', + mode: 'analysis', + context: '@src/index.ts', + expected: 'Args include --continue', + rules: 'analysis=READ-ONLY', + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'qwen', + prompt, + mode: 'analysis', + model: 'qwen-test-model', + cd: project.projectDir, + resume: true, + }); + + const payload = parseFirstJsonLine(res.stdout); + assert.ok(payload.args.includes('--continue')); + }); + + it('qwen noNative=true disables native resume flags', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Qwen resume disabled', + task: 'Force prompt concat', + mode: 'analysis', + context: '@src/index.ts', + expected: 'No --continue flag', + rules: 'analysis=READ-ONLY', + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'qwen', + prompt, + mode: 'analysis', + model: 'qwen-test-model', + cd: project.projectDir, + resume: true, + noNative: true, + }); + + const payload = parseFirstJsonLine(res.stdout); + assert.equal(payload.args.includes('--continue'), false); + assert.equal(payload.args.includes('--resume'), false); + }); + + it('codex analysis mode uses exec + --full-auto and reads prompt from stdin (-)', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Codex analysis', + task: 'Read-only review', + mode: 'analysis', + context: '@src/index.ts', + expected: 'OK', + rules: 'analysis=READ-ONLY', + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'codex', + prompt, + mode: 'analysis', + model: 'codex-test-model', + cd: project.projectDir, + }); + + validateExecutionResult(res, { success: true, tool: 'codex' }); + const payload = parseFirstJsonLine(res.stdout); + assert.deepEqual(payload.args.slice(0, 2), ['exec', '--full-auto']); + assert.equal(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'), false); + assert.equal(payload.args.at(-1), '-'); + assert.ok(String(payload.prompt).includes('PURPOSE: Codex analysis')); + assert.equal(payload.args.join(' ').includes('PURPOSE: Codex analysis'), false); + }); + + it('codex write mode uses --dangerously-bypass-approvals-and-sandbox', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Codex write', + task: 'Write file', + mode: 'write', + context: '@src/index.ts', + expected: 'file written', + rules: 'write=CREATE', + directives: { write_files: { 'codex.txt': 'hello' } }, + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'codex', + prompt, + mode: 'write', + model: 'codex-test-model', + cd: project.projectDir, + }); + + const payload = parseFirstJsonLine(res.stdout); + assert.equal(payload.args[0], 'exec'); + assert.ok(payload.args.includes('--dangerously-bypass-approvals-and-sandbox')); + assert.ok(payload.wrote_files.includes('codex.txt')); + }); + + it('codex auto mode uses --dangerously-bypass-approvals-and-sandbox', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Codex auto', + task: 'Autonomous execution', + mode: 'auto', + context: '@src/index.ts', + expected: 'OK', + rules: 'auto=ALLOW', + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'codex', + prompt, + mode: 'auto', + model: 'codex-test-model', + cd: project.projectDir, + }); + + const payload = parseFirstJsonLine(res.stdout); + assert.ok(payload.args.includes('--dangerously-bypass-approvals-and-sandbox')); + }); + + it('codex includeDirs maps to repeated --add-dir flags', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Codex includeDirs', + task: 'Resolve shared files', + mode: 'analysis', + context: '@../shared/**/*', + expected: 'Resolved shared files list', + rules: 'analysis=READ-ONLY', + directives: { resolve_patterns: true }, + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'codex', + prompt, + mode: 'analysis', + model: 'codex-test-model', + cd: project.projectDir, + includeDirs: '../shared,../shared', // duplicates should still map to flags + }); + + const payload = parseFirstJsonLine(res.stdout); + const addDirCount = payload.args.filter((a: string) => a === '--add-dir').length; + assert.ok(addDirCount >= 1); + assert.ok(payload.resolved_files.some((p: string) => String(p).startsWith('../shared/'))); + }); + + it('codex resume=true uses `resume --last` and respects analysis permissions', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Codex resume latest', + task: 'Use native resume', + mode: 'analysis', + context: '@src/index.ts', + expected: 'Args include resume --last', + rules: 'analysis=READ-ONLY', + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'codex', + prompt, + mode: 'analysis', + model: 'codex-test-model', + cd: project.projectDir, + resume: true, + }); + + const payload = parseFirstJsonLine(res.stdout); + assert.equal(payload.args[0], 'resume'); + assert.ok(payload.args.includes('--last')); + assert.ok(payload.args.includes('--full-auto')); + assert.equal(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'), false); + }); + + it('working directory is isolated per execution via --cd', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Working directory', + task: 'Echo cwd', + mode: 'analysis', + context: '@src/index.ts', + expected: 'cwd matches', + rules: 'analysis=READ-ONLY', + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'qwen', + prompt, + mode: 'analysis', + model: 'qwen-test-model', + cd: project.projectDir, + }); + + const payload = parseFirstJsonLine(res.stdout); + assert.equal(payload.cwd, normalizeSlash(project.projectDir)); + }); + + it('model override is passed as -m for codex', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Model override', + task: 'Use model', + mode: 'analysis', + context: '@src/index.ts', + expected: 'args include -m', + rules: 'analysis=READ-ONLY', + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'codex', + prompt, + mode: 'analysis', + model: 'codex-model-override', + cd: project.projectDir, + }); + + const payload = parseFirstJsonLine(res.stdout); + const idx = payload.args.indexOf('-m'); + assert.ok(idx >= 0); + assert.equal(payload.args[idx + 1], 'codex-model-override'); + }); + + it('non-zero exit with output and no fatal stderr is treated as success (qwen)', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Non-fatal exit', + task: 'exit=1 without fatal stderr', + mode: 'analysis', + context: '@src/index.ts', + expected: 'success', + rules: 'analysis=READ-ONLY', + directives: { exit_code: 1 }, + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'qwen', + prompt, + mode: 'analysis', + model: 'qwen-test-model', + cd: project.projectDir, + }); + + assert.equal(res.success, true); + assert.equal(res.execution.status, 'success'); + }); + + it('rate limit exceeded is treated as fatal error', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Fatal exit', + task: 'stderr contains rate limit exceeded', + mode: 'analysis', + context: '@src/index.ts', + expected: 'error', + rules: 'analysis=READ-ONLY', + directives: { exit_code: 1, stderr: 'rate limit exceeded\n' }, + }); + + const res = await cliExecutor.executeCliTool({ + tool: 'qwen', + prompt, + mode: 'analysis', + model: 'qwen-test-model', + cd: project.projectDir, + }); + + assert.equal(res.success, false); + assert.equal(res.execution.status, 'error'); + }); + + it('fallback chain: gemini fatal error -> qwen success', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Fallback test', + task: 'Try tools in order', + mode: 'analysis', + context: '@src/index.ts', + expected: 'qwen used', + rules: 'analysis=READ-ONLY', + directives: { + tool_overrides: { + gemini: { exit_code: 1, stderr: 'FATAL: Authentication failed: API key\n' }, + }, + }, + }); + + const { attempts, result } = await executeWithFallback({ + prompt, + mode: 'analysis', + model: 'test-model', + cd: project.projectDir, + }); + + assert.deepEqual(attempts.map((a) => a.tool), ['gemini', 'qwen']); + assert.equal(attempts[0].success, false); + assert.equal(attempts[1].success, true); + assert.equal(result.execution.tool, 'qwen'); + }); + + it('fallback chain: gemini fail + qwen fail -> codex success', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Fallback test 2', + task: 'Try tools in order', + mode: 'analysis', + context: '@src/index.ts', + expected: 'codex used', + rules: 'analysis=READ-ONLY', + directives: { + tool_overrides: { + gemini: { exit_code: 1, stderr: 'FATAL: rate limit exceeded\n' }, + qwen: { exit_code: 1, stderr: 'FATAL: rate limit exceeded\n' }, + }, + }, + }); + + const { attempts, result } = await executeWithFallback({ + prompt, + mode: 'analysis', + model: 'test-model', + cd: project.projectDir, + }); + + assert.deepEqual(attempts.map((a) => a.tool), ['gemini', 'qwen', 'codex']); + assert.equal(attempts[0].success, false); + assert.equal(attempts[1].success, false); + assert.equal(attempts[2].success, true); + assert.equal(result.execution.tool, 'codex'); + }); + + it('tool selection heuristic chooses expected tool for 5+ task types', () => { + const cases: Array<{ intent: string; complexity: 'low' | 'medium' | 'high'; expected: CliToolName }> = [ + { intent: 'analyze architecture', complexity: 'low', expected: 'gemini' }, + { intent: 'explain error', complexity: 'medium', expected: 'gemini' }, + { intent: 'plan migration steps', complexity: 'medium', expected: 'gemini' }, + { intent: 'implement new feature', complexity: 'medium', expected: 'codex' }, + { intent: 'refactor core module', complexity: 'high', expected: 'codex' }, + { intent: 'summarize notes', complexity: 'low', expected: 'qwen' }, + ]; + + for (const c of cases) { + assert.equal(selectToolForTask({ intent: c.intent, complexity: c.complexity }), c.expected); + } + }); + + it('compares enhanced prompt parsing across gemini/qwen/codex for consistency', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'Compare tools', + task: 'Parse enhanced prompt', + mode: 'analysis', + context: '@src/index.ts', + expected: 'same parsed fields', + rules: 'analysis=READ-ONLY', + }); + + const results = await Promise.all( + (['gemini', 'qwen', 'codex'] as CliToolName[]).map((tool) => + cliExecutor.executeCliTool({ tool, prompt, mode: 'analysis', model: 'm', cd: project.projectDir }), + ), + ); + + const payloads = results.map((r) => parseFirstJsonLine(r.stdout)); + const parsed = payloads.map((p) => p.parsed); + + assert.deepEqual(parsed[0], parsed[1]); + assert.deepEqual(parsed[1], parsed[2]); + }); + + it('parallel execution returns results from at least two tools', async () => { + const projectA = setupTestProject(); + const projectB = setupTestProject(); + try { + const prompt = makeEnhancedPrompt({ + purpose: 'Parallel', + task: 'Run two tools', + mode: 'analysis', + context: '@src/index.ts', + expected: 'two results', + rules: 'analysis=READ-ONLY', + }); + + const [geminiRes, codexRes] = await Promise.all([ + cliExecutor.executeCliTool({ tool: 'gemini', prompt, mode: 'analysis', model: 'm', cd: projectA.projectDir }), + cliExecutor.executeCliTool({ tool: 'codex', prompt, mode: 'analysis', model: 'm', cd: projectB.projectDir }), + ]); + + assert.equal(geminiRes.success, true); + assert.equal(codexRes.success, true); + assert.equal(geminiRes.execution.tool, 'gemini'); + assert.equal(codexRes.execution.tool, 'codex'); + } finally { + await closeCliHistoryStores(); + projectA.cleanup(); + projectB.cleanup(); + } + }); + + it('stdin vs args: gemini/qwen do not use "-" marker, codex does', async () => { + const prompt = makeEnhancedPrompt({ + purpose: 'stdin vs args', + task: 'Validate prompt delivery mechanism', + mode: 'analysis', + context: '@src/index.ts', + expected: 'codex uses -', + rules: 'analysis=READ-ONLY', + }); + + const [geminiRes, qwenRes, codexRes] = await Promise.all([ + cliExecutor.executeCliTool({ tool: 'gemini', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }), + cliExecutor.executeCliTool({ tool: 'qwen', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }), + cliExecutor.executeCliTool({ tool: 'codex', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }), + ]); + + const geminiArgs = parseFirstJsonLine(geminiRes.stdout).args; + const qwenArgs = parseFirstJsonLine(qwenRes.stdout).args; + const codexArgs = parseFirstJsonLine(codexRes.stdout).args; + + assert.equal(geminiArgs.includes('-'), false); + assert.equal(qwenArgs.includes('-'), false); + assert.equal(codexArgs.at(-1), '-'); + }); +}); diff --git a/ccw/tests/integration/cli-executor/tool-stub.js b/ccw/tests/integration/cli-executor/tool-stub.js index 347a5c0e..a9c0fab4 100644 --- a/ccw/tests/integration/cli-executor/tool-stub.js +++ b/ccw/tests/integration/cli-executor/tool-stub.js @@ -130,7 +130,16 @@ async function main() { const args = process.argv.slice(3).map(String); const prompt = await readStdin(); - const directives = parseDirectives(prompt) || {}; + const baseDirectives = parseDirectives(prompt) || {}; + const overrides = + baseDirectives.tool_overrides && + typeof baseDirectives.tool_overrides === 'object' && + baseDirectives.tool_overrides[tool] && + typeof baseDirectives.tool_overrides[tool] === 'object' + ? baseDirectives.tool_overrides[tool] + : {}; + const directives = { ...baseDirectives, ...overrides }; + const resolvedFiles = directives.resolve_patterns ? resolvePatterns(prompt, tool, args) : []; const wroteFiles = directives.write_files ? safeWriteFiles(directives.write_files) : []; @@ -163,4 +172,3 @@ main().catch((err) => { process.stderr.write(String(err?.stack || err?.message || err)); process.exit(1); }); - diff --git a/multi-tool-workflow.test.ts b/multi-tool-workflow.test.ts new file mode 100644 index 00000000..376d0be8 --- /dev/null +++ b/multi-tool-workflow.test.ts @@ -0,0 +1,2 @@ +import './ccw/tests/integration/cli-executor/multi-tool-workflow.test.ts'; +