test(cli-executor): add qwen/codex and multi-tool workflow tests

Solution-ID: SOL-1735410003

Issue-ID: ISS-1766921318981-23

Task-ID: T3
This commit is contained in:
catlog22
2025-12-29 17:36:50 +08:00
parent 883b9f0672
commit 99eeeff6f7
3 changed files with 598 additions and 2 deletions

View File

@@ -0,0 +1,586 @@
/**
* Integration tests for cli-executor: qwen/codex and multi-tool scenarios.
*
* Notes:
* - Targets the runtime implementation shipped in `ccw/dist`.
* - Uses stub CLI shims (gemini/qwen/codex) to avoid external dependencies.
*/
import { after, afterEach, before, beforeEach, describe, it, mock } from 'node:test';
import assert from 'node:assert/strict';
import {
closeCliHistoryStores,
makeEnhancedPrompt,
setupTestEnv,
setupTestProject,
validateExecutionResult,
type CliToolName,
type TestEnv,
type TestProject,
} from './setup.ts';
const cliExecutorUrl = new URL('../../../dist/tools/cli-executor.js', import.meta.url);
cliExecutorUrl.searchParams.set('t', String(Date.now()));
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let cliExecutor: any;
let env: TestEnv;
let project: TestProject;
function parseFirstJsonLine(text: string): any {
const line = text.split(/\r?\n/).find((l) => l.trim().length > 0);
return JSON.parse(line || '{}');
}
function normalizeSlash(value: string): string {
return value.replace(/\\/g, '/');
}
async function executeWithFallback(params: {
prompt: string;
mode: 'analysis' | 'write' | 'auto';
model?: string;
cd: string;
includeDirs?: string;
timeout?: number;
}): Promise<{ attempts: Array<{ tool: CliToolName; success: boolean }>; result: any }> {
const attempts: Array<{ tool: CliToolName; success: boolean }> = [];
const tools: CliToolName[] = ['gemini', 'qwen', 'codex'];
let last: any = null;
for (const tool of tools) {
const res = await cliExecutor.executeCliTool({ tool, ...params });
last = res;
attempts.push({ tool, success: Boolean(res?.success) });
if (res?.success) return { attempts, result: res };
}
return { attempts, result: last };
}
function selectToolForTask(task: { intent: string; complexity: 'low' | 'medium' | 'high' }): CliToolName {
const intent = task.intent.toLowerCase();
if (intent.includes('implement') || intent.includes('refactor') || task.complexity === 'high') return 'codex';
if (intent.includes('analyze') || intent.includes('explain') || intent.includes('plan')) return 'gemini';
return 'qwen';
}
describe('cli-executor integration: qwen/codex + multi-tool', () => {
before(async () => {
mock.method(console, 'log', () => {});
mock.method(console, 'error', () => {});
cliExecutor = await import(cliExecutorUrl.href);
});
beforeEach(() => {
cliExecutor?.clearToolCache?.();
env = setupTestEnv(['gemini', 'qwen', 'codex']);
project = setupTestProject();
});
afterEach(async () => {
await closeCliHistoryStores();
env.restore();
env.cleanup();
project.cleanup();
});
after(() => {
mock.restoreAll();
});
it('qwen analysis mode passes -m model and no approval flag', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Qwen analysis',
task: 'Inspect code',
mode: 'analysis',
context: '@src/**/*.ts',
expected: 'OK',
rules: 'analysis=READ-ONLY',
});
const res = await cliExecutor.executeCliTool({
tool: 'qwen',
prompt,
mode: 'analysis',
model: 'qwen-test-model',
cd: project.projectDir,
});
validateExecutionResult(res, { success: true, tool: 'qwen' });
const payload = parseFirstJsonLine(res.stdout);
assert.ok(payload.args.includes('-m'));
assert.ok(payload.args.includes('qwen-test-model'));
assert.equal(payload.args.includes('--approval-mode'), false);
});
it('qwen write mode includes --approval-mode yolo', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Qwen write',
task: 'Create file',
mode: 'write',
context: '@src/index.ts',
expected: 'file written',
rules: 'write=CREATE',
directives: { write_files: { 'qwen.txt': 'hello' } },
});
const res = await cliExecutor.executeCliTool({
tool: 'qwen',
prompt,
mode: 'write',
model: 'qwen-test-model',
cd: project.projectDir,
});
validateExecutionResult(res, { success: true, tool: 'qwen' });
const payload = parseFirstJsonLine(res.stdout);
assert.ok(payload.args.includes('--approval-mode'));
assert.ok(payload.args.includes('yolo'));
assert.ok(payload.wrote_files.includes('qwen.txt'));
});
it('qwen includeDirs maps to --include-directories', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Qwen includeDirs',
task: 'Resolve shared files',
mode: 'analysis',
context: '@../shared/**/*',
expected: 'Resolved shared files list',
rules: 'analysis=READ-ONLY',
directives: { resolve_patterns: true },
});
const res = await cliExecutor.executeCliTool({
tool: 'qwen',
prompt,
mode: 'analysis',
model: 'qwen-test-model',
cd: project.projectDir,
includeDirs: '../shared',
});
const payload = parseFirstJsonLine(res.stdout);
assert.ok(payload.args.includes('--include-directories'));
assert.ok(payload.args.includes('../shared'));
assert.ok(payload.resolved_files.some((p: string) => String(p).startsWith('../shared/')));
});
it('qwen resume=true uses native --continue', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Qwen resume latest',
task: 'Use native resume',
mode: 'analysis',
context: '@src/index.ts',
expected: 'Args include --continue',
rules: 'analysis=READ-ONLY',
});
const res = await cliExecutor.executeCliTool({
tool: 'qwen',
prompt,
mode: 'analysis',
model: 'qwen-test-model',
cd: project.projectDir,
resume: true,
});
const payload = parseFirstJsonLine(res.stdout);
assert.ok(payload.args.includes('--continue'));
});
it('qwen noNative=true disables native resume flags', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Qwen resume disabled',
task: 'Force prompt concat',
mode: 'analysis',
context: '@src/index.ts',
expected: 'No --continue flag',
rules: 'analysis=READ-ONLY',
});
const res = await cliExecutor.executeCliTool({
tool: 'qwen',
prompt,
mode: 'analysis',
model: 'qwen-test-model',
cd: project.projectDir,
resume: true,
noNative: true,
});
const payload = parseFirstJsonLine(res.stdout);
assert.equal(payload.args.includes('--continue'), false);
assert.equal(payload.args.includes('--resume'), false);
});
it('codex analysis mode uses exec + --full-auto and reads prompt from stdin (-)', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Codex analysis',
task: 'Read-only review',
mode: 'analysis',
context: '@src/index.ts',
expected: 'OK',
rules: 'analysis=READ-ONLY',
});
const res = await cliExecutor.executeCliTool({
tool: 'codex',
prompt,
mode: 'analysis',
model: 'codex-test-model',
cd: project.projectDir,
});
validateExecutionResult(res, { success: true, tool: 'codex' });
const payload = parseFirstJsonLine(res.stdout);
assert.deepEqual(payload.args.slice(0, 2), ['exec', '--full-auto']);
assert.equal(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'), false);
assert.equal(payload.args.at(-1), '-');
assert.ok(String(payload.prompt).includes('PURPOSE: Codex analysis'));
assert.equal(payload.args.join(' ').includes('PURPOSE: Codex analysis'), false);
});
it('codex write mode uses --dangerously-bypass-approvals-and-sandbox', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Codex write',
task: 'Write file',
mode: 'write',
context: '@src/index.ts',
expected: 'file written',
rules: 'write=CREATE',
directives: { write_files: { 'codex.txt': 'hello' } },
});
const res = await cliExecutor.executeCliTool({
tool: 'codex',
prompt,
mode: 'write',
model: 'codex-test-model',
cd: project.projectDir,
});
const payload = parseFirstJsonLine(res.stdout);
assert.equal(payload.args[0], 'exec');
assert.ok(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'));
assert.ok(payload.wrote_files.includes('codex.txt'));
});
it('codex auto mode uses --dangerously-bypass-approvals-and-sandbox', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Codex auto',
task: 'Autonomous execution',
mode: 'auto',
context: '@src/index.ts',
expected: 'OK',
rules: 'auto=ALLOW',
});
const res = await cliExecutor.executeCliTool({
tool: 'codex',
prompt,
mode: 'auto',
model: 'codex-test-model',
cd: project.projectDir,
});
const payload = parseFirstJsonLine(res.stdout);
assert.ok(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'));
});
it('codex includeDirs maps to repeated --add-dir flags', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Codex includeDirs',
task: 'Resolve shared files',
mode: 'analysis',
context: '@../shared/**/*',
expected: 'Resolved shared files list',
rules: 'analysis=READ-ONLY',
directives: { resolve_patterns: true },
});
const res = await cliExecutor.executeCliTool({
tool: 'codex',
prompt,
mode: 'analysis',
model: 'codex-test-model',
cd: project.projectDir,
includeDirs: '../shared,../shared', // duplicates should still map to flags
});
const payload = parseFirstJsonLine(res.stdout);
const addDirCount = payload.args.filter((a: string) => a === '--add-dir').length;
assert.ok(addDirCount >= 1);
assert.ok(payload.resolved_files.some((p: string) => String(p).startsWith('../shared/')));
});
it('codex resume=true uses `resume --last` and respects analysis permissions', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Codex resume latest',
task: 'Use native resume',
mode: 'analysis',
context: '@src/index.ts',
expected: 'Args include resume --last',
rules: 'analysis=READ-ONLY',
});
const res = await cliExecutor.executeCliTool({
tool: 'codex',
prompt,
mode: 'analysis',
model: 'codex-test-model',
cd: project.projectDir,
resume: true,
});
const payload = parseFirstJsonLine(res.stdout);
assert.equal(payload.args[0], 'resume');
assert.ok(payload.args.includes('--last'));
assert.ok(payload.args.includes('--full-auto'));
assert.equal(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'), false);
});
it('working directory is isolated per execution via --cd', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Working directory',
task: 'Echo cwd',
mode: 'analysis',
context: '@src/index.ts',
expected: 'cwd matches',
rules: 'analysis=READ-ONLY',
});
const res = await cliExecutor.executeCliTool({
tool: 'qwen',
prompt,
mode: 'analysis',
model: 'qwen-test-model',
cd: project.projectDir,
});
const payload = parseFirstJsonLine(res.stdout);
assert.equal(payload.cwd, normalizeSlash(project.projectDir));
});
it('model override is passed as -m for codex', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Model override',
task: 'Use model',
mode: 'analysis',
context: '@src/index.ts',
expected: 'args include -m',
rules: 'analysis=READ-ONLY',
});
const res = await cliExecutor.executeCliTool({
tool: 'codex',
prompt,
mode: 'analysis',
model: 'codex-model-override',
cd: project.projectDir,
});
const payload = parseFirstJsonLine(res.stdout);
const idx = payload.args.indexOf('-m');
assert.ok(idx >= 0);
assert.equal(payload.args[idx + 1], 'codex-model-override');
});
it('non-zero exit with output and no fatal stderr is treated as success (qwen)', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Non-fatal exit',
task: 'exit=1 without fatal stderr',
mode: 'analysis',
context: '@src/index.ts',
expected: 'success',
rules: 'analysis=READ-ONLY',
directives: { exit_code: 1 },
});
const res = await cliExecutor.executeCliTool({
tool: 'qwen',
prompt,
mode: 'analysis',
model: 'qwen-test-model',
cd: project.projectDir,
});
assert.equal(res.success, true);
assert.equal(res.execution.status, 'success');
});
it('rate limit exceeded is treated as fatal error', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Fatal exit',
task: 'stderr contains rate limit exceeded',
mode: 'analysis',
context: '@src/index.ts',
expected: 'error',
rules: 'analysis=READ-ONLY',
directives: { exit_code: 1, stderr: 'rate limit exceeded\n' },
});
const res = await cliExecutor.executeCliTool({
tool: 'qwen',
prompt,
mode: 'analysis',
model: 'qwen-test-model',
cd: project.projectDir,
});
assert.equal(res.success, false);
assert.equal(res.execution.status, 'error');
});
it('fallback chain: gemini fatal error -> qwen success', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Fallback test',
task: 'Try tools in order',
mode: 'analysis',
context: '@src/index.ts',
expected: 'qwen used',
rules: 'analysis=READ-ONLY',
directives: {
tool_overrides: {
gemini: { exit_code: 1, stderr: 'FATAL: Authentication failed: API key\n' },
},
},
});
const { attempts, result } = await executeWithFallback({
prompt,
mode: 'analysis',
model: 'test-model',
cd: project.projectDir,
});
assert.deepEqual(attempts.map((a) => a.tool), ['gemini', 'qwen']);
assert.equal(attempts[0].success, false);
assert.equal(attempts[1].success, true);
assert.equal(result.execution.tool, 'qwen');
});
it('fallback chain: gemini fail + qwen fail -> codex success', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Fallback test 2',
task: 'Try tools in order',
mode: 'analysis',
context: '@src/index.ts',
expected: 'codex used',
rules: 'analysis=READ-ONLY',
directives: {
tool_overrides: {
gemini: { exit_code: 1, stderr: 'FATAL: rate limit exceeded\n' },
qwen: { exit_code: 1, stderr: 'FATAL: rate limit exceeded\n' },
},
},
});
const { attempts, result } = await executeWithFallback({
prompt,
mode: 'analysis',
model: 'test-model',
cd: project.projectDir,
});
assert.deepEqual(attempts.map((a) => a.tool), ['gemini', 'qwen', 'codex']);
assert.equal(attempts[0].success, false);
assert.equal(attempts[1].success, false);
assert.equal(attempts[2].success, true);
assert.equal(result.execution.tool, 'codex');
});
it('tool selection heuristic chooses expected tool for 5+ task types', () => {
const cases: Array<{ intent: string; complexity: 'low' | 'medium' | 'high'; expected: CliToolName }> = [
{ intent: 'analyze architecture', complexity: 'low', expected: 'gemini' },
{ intent: 'explain error', complexity: 'medium', expected: 'gemini' },
{ intent: 'plan migration steps', complexity: 'medium', expected: 'gemini' },
{ intent: 'implement new feature', complexity: 'medium', expected: 'codex' },
{ intent: 'refactor core module', complexity: 'high', expected: 'codex' },
{ intent: 'summarize notes', complexity: 'low', expected: 'qwen' },
];
for (const c of cases) {
assert.equal(selectToolForTask({ intent: c.intent, complexity: c.complexity }), c.expected);
}
});
it('compares enhanced prompt parsing across gemini/qwen/codex for consistency', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'Compare tools',
task: 'Parse enhanced prompt',
mode: 'analysis',
context: '@src/index.ts',
expected: 'same parsed fields',
rules: 'analysis=READ-ONLY',
});
const results = await Promise.all(
(['gemini', 'qwen', 'codex'] as CliToolName[]).map((tool) =>
cliExecutor.executeCliTool({ tool, prompt, mode: 'analysis', model: 'm', cd: project.projectDir }),
),
);
const payloads = results.map((r) => parseFirstJsonLine(r.stdout));
const parsed = payloads.map((p) => p.parsed);
assert.deepEqual(parsed[0], parsed[1]);
assert.deepEqual(parsed[1], parsed[2]);
});
it('parallel execution returns results from at least two tools', async () => {
const projectA = setupTestProject();
const projectB = setupTestProject();
try {
const prompt = makeEnhancedPrompt({
purpose: 'Parallel',
task: 'Run two tools',
mode: 'analysis',
context: '@src/index.ts',
expected: 'two results',
rules: 'analysis=READ-ONLY',
});
const [geminiRes, codexRes] = await Promise.all([
cliExecutor.executeCliTool({ tool: 'gemini', prompt, mode: 'analysis', model: 'm', cd: projectA.projectDir }),
cliExecutor.executeCliTool({ tool: 'codex', prompt, mode: 'analysis', model: 'm', cd: projectB.projectDir }),
]);
assert.equal(geminiRes.success, true);
assert.equal(codexRes.success, true);
assert.equal(geminiRes.execution.tool, 'gemini');
assert.equal(codexRes.execution.tool, 'codex');
} finally {
await closeCliHistoryStores();
projectA.cleanup();
projectB.cleanup();
}
});
it('stdin vs args: gemini/qwen do not use "-" marker, codex does', async () => {
const prompt = makeEnhancedPrompt({
purpose: 'stdin vs args',
task: 'Validate prompt delivery mechanism',
mode: 'analysis',
context: '@src/index.ts',
expected: 'codex uses -',
rules: 'analysis=READ-ONLY',
});
const [geminiRes, qwenRes, codexRes] = await Promise.all([
cliExecutor.executeCliTool({ tool: 'gemini', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }),
cliExecutor.executeCliTool({ tool: 'qwen', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }),
cliExecutor.executeCliTool({ tool: 'codex', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }),
]);
const geminiArgs = parseFirstJsonLine(geminiRes.stdout).args;
const qwenArgs = parseFirstJsonLine(qwenRes.stdout).args;
const codexArgs = parseFirstJsonLine(codexRes.stdout).args;
assert.equal(geminiArgs.includes('-'), false);
assert.equal(qwenArgs.includes('-'), false);
assert.equal(codexArgs.at(-1), '-');
});
});

View File

@@ -130,7 +130,16 @@ async function main() {
const args = process.argv.slice(3).map(String);
const prompt = await readStdin();
const directives = parseDirectives(prompt) || {};
const baseDirectives = parseDirectives(prompt) || {};
const overrides =
baseDirectives.tool_overrides &&
typeof baseDirectives.tool_overrides === 'object' &&
baseDirectives.tool_overrides[tool] &&
typeof baseDirectives.tool_overrides[tool] === 'object'
? baseDirectives.tool_overrides[tool]
: {};
const directives = { ...baseDirectives, ...overrides };
const resolvedFiles = directives.resolve_patterns ? resolvePatterns(prompt, tool, args) : [];
const wroteFiles = directives.write_files ? safeWriteFiles(directives.write_files) : [];
@@ -163,4 +172,3 @@ main().catch((err) => {
process.stderr.write(String(err?.stack || err?.message || err));
process.exit(1);
});

View File

@@ -0,0 +1,2 @@
import './ccw/tests/integration/cli-executor/multi-tool-workflow.test.ts';