mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
587 lines
18 KiB
TypeScript
587 lines
18 KiB
TypeScript
/**
|
|
* Integration tests for cli-executor: qwen/codex and multi-tool scenarios.
|
|
*
|
|
* Notes:
|
|
* - Targets the runtime implementation shipped in `ccw/dist`.
|
|
* - Uses stub CLI shims (gemini/qwen/codex) to avoid external dependencies.
|
|
*/
|
|
|
|
import { after, afterEach, before, beforeEach, describe, it, mock } from 'node:test';
|
|
import assert from 'node:assert/strict';
|
|
|
|
import {
|
|
closeCliHistoryStores,
|
|
makeEnhancedPrompt,
|
|
setupTestEnv,
|
|
setupTestProject,
|
|
validateExecutionResult,
|
|
type CliToolName,
|
|
type TestEnv,
|
|
type TestProject,
|
|
} from './setup.ts';
|
|
|
|
const cliExecutorUrl = new URL('../../../dist/tools/cli-executor.js', import.meta.url);
|
|
cliExecutorUrl.searchParams.set('t', String(Date.now()));
|
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
let cliExecutor: any;
|
|
|
|
let env: TestEnv;
|
|
let project: TestProject;
|
|
|
|
function parseFirstJsonLine(text: string): any {
|
|
const line = text.split(/\r?\n/).find((l) => l.trim().length > 0);
|
|
return JSON.parse(line || '{}');
|
|
}
|
|
|
|
function normalizeSlash(value: string): string {
|
|
return value.replace(/\\/g, '/');
|
|
}
|
|
|
|
async function executeWithFallback(params: {
|
|
prompt: string;
|
|
mode: 'analysis' | 'write' | 'auto';
|
|
model?: string;
|
|
cd: string;
|
|
includeDirs?: string;
|
|
timeout?: number;
|
|
}): Promise<{ attempts: Array<{ tool: CliToolName; success: boolean }>; result: any }> {
|
|
const attempts: Array<{ tool: CliToolName; success: boolean }> = [];
|
|
const tools: CliToolName[] = ['gemini', 'qwen', 'codex'];
|
|
|
|
let last: any = null;
|
|
for (const tool of tools) {
|
|
const res = await cliExecutor.executeCliTool({ tool, ...params });
|
|
last = res;
|
|
attempts.push({ tool, success: Boolean(res?.success) });
|
|
if (res?.success) return { attempts, result: res };
|
|
}
|
|
|
|
return { attempts, result: last };
|
|
}
|
|
|
|
function selectToolForTask(task: { intent: string; complexity: 'low' | 'medium' | 'high' }): CliToolName {
|
|
const intent = task.intent.toLowerCase();
|
|
if (intent.includes('implement') || intent.includes('refactor') || task.complexity === 'high') return 'codex';
|
|
if (intent.includes('analyze') || intent.includes('explain') || intent.includes('plan')) return 'gemini';
|
|
return 'qwen';
|
|
}
|
|
|
|
describe('cli-executor integration: qwen/codex + multi-tool', () => {
|
|
before(async () => {
|
|
mock.method(console, 'log', () => {});
|
|
mock.method(console, 'error', () => {});
|
|
cliExecutor = await import(cliExecutorUrl.href);
|
|
});
|
|
|
|
beforeEach(() => {
|
|
cliExecutor?.clearToolCache?.();
|
|
env = setupTestEnv(['gemini', 'qwen', 'codex']);
|
|
project = setupTestProject();
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await closeCliHistoryStores();
|
|
env.restore();
|
|
env.cleanup();
|
|
project.cleanup();
|
|
});
|
|
|
|
after(() => {
|
|
mock.restoreAll();
|
|
});
|
|
|
|
it('qwen analysis mode passes -m model and no approval flag', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Qwen analysis',
|
|
task: 'Inspect code',
|
|
mode: 'analysis',
|
|
context: '@src/**/*.ts',
|
|
expected: 'OK',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'qwen',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'qwen-test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
validateExecutionResult(res, { success: true, tool: 'qwen' });
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.ok(payload.args.includes('-m'));
|
|
assert.ok(payload.args.includes('qwen-test-model'));
|
|
assert.equal(payload.args.includes('--approval-mode'), false);
|
|
});
|
|
|
|
it('qwen write mode includes --approval-mode yolo', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Qwen write',
|
|
task: 'Create file',
|
|
mode: 'write',
|
|
context: '@src/index.ts',
|
|
expected: 'file written',
|
|
rules: 'write=CREATE',
|
|
directives: { write_files: { 'qwen.txt': 'hello' } },
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'qwen',
|
|
prompt,
|
|
mode: 'write',
|
|
model: 'qwen-test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
validateExecutionResult(res, { success: true, tool: 'qwen' });
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.ok(payload.args.includes('--approval-mode'));
|
|
assert.ok(payload.args.includes('yolo'));
|
|
assert.ok(payload.wrote_files.includes('qwen.txt'));
|
|
});
|
|
|
|
it('qwen includeDirs maps to --include-directories', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Qwen includeDirs',
|
|
task: 'Resolve shared files',
|
|
mode: 'analysis',
|
|
context: '@../shared/**/*',
|
|
expected: 'Resolved shared files list',
|
|
rules: 'analysis=READ-ONLY',
|
|
directives: { resolve_patterns: true },
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'qwen',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'qwen-test-model',
|
|
cd: project.projectDir,
|
|
includeDirs: '../shared',
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.ok(payload.args.includes('--include-directories'));
|
|
assert.ok(payload.args.includes('../shared'));
|
|
assert.ok(payload.resolved_files.some((p: string) => String(p).startsWith('../shared/')));
|
|
});
|
|
|
|
it('qwen resume=true uses native --continue', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Qwen resume latest',
|
|
task: 'Use native resume',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'Args include --continue',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'qwen',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'qwen-test-model',
|
|
cd: project.projectDir,
|
|
resume: true,
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.ok(payload.args.includes('--continue'));
|
|
});
|
|
|
|
it('qwen noNative=true disables native resume flags', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Qwen resume disabled',
|
|
task: 'Force prompt concat',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'No --continue flag',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'qwen',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'qwen-test-model',
|
|
cd: project.projectDir,
|
|
resume: true,
|
|
noNative: true,
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.equal(payload.args.includes('--continue'), false);
|
|
assert.equal(payload.args.includes('--resume'), false);
|
|
});
|
|
|
|
it('codex analysis mode uses exec + --full-auto and reads prompt from stdin (-)', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Codex analysis',
|
|
task: 'Read-only review',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'OK',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'codex',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'codex-test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
validateExecutionResult(res, { success: true, tool: 'codex' });
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.deepEqual(payload.args.slice(0, 2), ['exec', '--full-auto']);
|
|
assert.equal(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'), false);
|
|
assert.equal(payload.args.at(-1), '-');
|
|
assert.ok(String(payload.prompt).includes('PURPOSE: Codex analysis'));
|
|
assert.equal(payload.args.join(' ').includes('PURPOSE: Codex analysis'), false);
|
|
});
|
|
|
|
it('codex write mode uses --dangerously-bypass-approvals-and-sandbox', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Codex write',
|
|
task: 'Write file',
|
|
mode: 'write',
|
|
context: '@src/index.ts',
|
|
expected: 'file written',
|
|
rules: 'write=CREATE',
|
|
directives: { write_files: { 'codex.txt': 'hello' } },
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'codex',
|
|
prompt,
|
|
mode: 'write',
|
|
model: 'codex-test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.equal(payload.args[0], 'exec');
|
|
assert.ok(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'));
|
|
assert.ok(payload.wrote_files.includes('codex.txt'));
|
|
});
|
|
|
|
it('codex auto mode uses --dangerously-bypass-approvals-and-sandbox', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Codex auto',
|
|
task: 'Autonomous execution',
|
|
mode: 'auto',
|
|
context: '@src/index.ts',
|
|
expected: 'OK',
|
|
rules: 'auto=ALLOW',
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'codex',
|
|
prompt,
|
|
mode: 'auto',
|
|
model: 'codex-test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.ok(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'));
|
|
});
|
|
|
|
it('codex includeDirs maps to repeated --add-dir flags', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Codex includeDirs',
|
|
task: 'Resolve shared files',
|
|
mode: 'analysis',
|
|
context: '@../shared/**/*',
|
|
expected: 'Resolved shared files list',
|
|
rules: 'analysis=READ-ONLY',
|
|
directives: { resolve_patterns: true },
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'codex',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'codex-test-model',
|
|
cd: project.projectDir,
|
|
includeDirs: '../shared,../shared', // duplicates should still map to flags
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
const addDirCount = payload.args.filter((a: string) => a === '--add-dir').length;
|
|
assert.ok(addDirCount >= 1);
|
|
assert.ok(payload.resolved_files.some((p: string) => String(p).startsWith('../shared/')));
|
|
});
|
|
|
|
it('codex resume=true uses `resume --last` and respects analysis permissions', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Codex resume latest',
|
|
task: 'Use native resume',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'Args include resume --last',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'codex',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'codex-test-model',
|
|
cd: project.projectDir,
|
|
resume: true,
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.equal(payload.args[0], 'resume');
|
|
assert.ok(payload.args.includes('--last'));
|
|
assert.ok(payload.args.includes('--full-auto'));
|
|
assert.equal(payload.args.includes('--dangerously-bypass-approvals-and-sandbox'), false);
|
|
});
|
|
|
|
it('working directory is isolated per execution via --cd', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Working directory',
|
|
task: 'Echo cwd',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'cwd matches',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'qwen',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'qwen-test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
assert.equal(payload.cwd, normalizeSlash(project.projectDir));
|
|
});
|
|
|
|
it('model override is passed as -m for codex', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Model override',
|
|
task: 'Use model',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'args include -m',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'codex',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'codex-model-override',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
const payload = parseFirstJsonLine(res.stdout);
|
|
const idx = payload.args.indexOf('-m');
|
|
assert.ok(idx >= 0);
|
|
assert.equal(payload.args[idx + 1], 'codex-model-override');
|
|
});
|
|
|
|
it('non-zero exit with output and no fatal stderr is treated as success (qwen)', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Non-fatal exit',
|
|
task: 'exit=1 without fatal stderr',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'success',
|
|
rules: 'analysis=READ-ONLY',
|
|
directives: { exit_code: 1 },
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'qwen',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'qwen-test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
assert.equal(res.success, true);
|
|
assert.equal(res.execution.status, 'success');
|
|
});
|
|
|
|
it('rate limit exceeded is treated as fatal error', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Fatal exit',
|
|
task: 'stderr contains rate limit exceeded',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'error',
|
|
rules: 'analysis=READ-ONLY',
|
|
directives: { exit_code: 1, stderr: 'rate limit exceeded\n' },
|
|
});
|
|
|
|
const res = await cliExecutor.executeCliTool({
|
|
tool: 'qwen',
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'qwen-test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
assert.equal(res.success, false);
|
|
assert.equal(res.execution.status, 'error');
|
|
});
|
|
|
|
it('fallback chain: gemini fatal error -> qwen success', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Fallback test',
|
|
task: 'Try tools in order',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'qwen used',
|
|
rules: 'analysis=READ-ONLY',
|
|
directives: {
|
|
tool_overrides: {
|
|
gemini: { exit_code: 1, stderr: 'FATAL: Authentication failed: API key\n' },
|
|
},
|
|
},
|
|
});
|
|
|
|
const { attempts, result } = await executeWithFallback({
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
assert.deepEqual(attempts.map((a) => a.tool), ['gemini', 'qwen']);
|
|
assert.equal(attempts[0].success, false);
|
|
assert.equal(attempts[1].success, true);
|
|
assert.equal(result.execution.tool, 'qwen');
|
|
});
|
|
|
|
it('fallback chain: gemini fail + qwen fail -> codex success', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Fallback test 2',
|
|
task: 'Try tools in order',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'codex used',
|
|
rules: 'analysis=READ-ONLY',
|
|
directives: {
|
|
tool_overrides: {
|
|
gemini: { exit_code: 1, stderr: 'FATAL: rate limit exceeded\n' },
|
|
qwen: { exit_code: 1, stderr: 'FATAL: rate limit exceeded\n' },
|
|
},
|
|
},
|
|
});
|
|
|
|
const { attempts, result } = await executeWithFallback({
|
|
prompt,
|
|
mode: 'analysis',
|
|
model: 'test-model',
|
|
cd: project.projectDir,
|
|
});
|
|
|
|
assert.deepEqual(attempts.map((a) => a.tool), ['gemini', 'qwen', 'codex']);
|
|
assert.equal(attempts[0].success, false);
|
|
assert.equal(attempts[1].success, false);
|
|
assert.equal(attempts[2].success, true);
|
|
assert.equal(result.execution.tool, 'codex');
|
|
});
|
|
|
|
it('tool selection heuristic chooses expected tool for 5+ task types', () => {
|
|
const cases: Array<{ intent: string; complexity: 'low' | 'medium' | 'high'; expected: CliToolName }> = [
|
|
{ intent: 'analyze architecture', complexity: 'low', expected: 'gemini' },
|
|
{ intent: 'explain error', complexity: 'medium', expected: 'gemini' },
|
|
{ intent: 'plan migration steps', complexity: 'medium', expected: 'gemini' },
|
|
{ intent: 'implement new feature', complexity: 'medium', expected: 'codex' },
|
|
{ intent: 'refactor core module', complexity: 'high', expected: 'codex' },
|
|
{ intent: 'summarize notes', complexity: 'low', expected: 'qwen' },
|
|
];
|
|
|
|
for (const c of cases) {
|
|
assert.equal(selectToolForTask({ intent: c.intent, complexity: c.complexity }), c.expected);
|
|
}
|
|
});
|
|
|
|
it('compares enhanced prompt parsing across gemini/qwen/codex for consistency', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Compare tools',
|
|
task: 'Parse enhanced prompt',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'same parsed fields',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const results = await Promise.all(
|
|
(['gemini', 'qwen', 'codex'] as CliToolName[]).map((tool) =>
|
|
cliExecutor.executeCliTool({ tool, prompt, mode: 'analysis', model: 'm', cd: project.projectDir }),
|
|
),
|
|
);
|
|
|
|
const payloads = results.map((r) => parseFirstJsonLine(r.stdout));
|
|
const parsed = payloads.map((p) => p.parsed);
|
|
|
|
assert.deepEqual(parsed[0], parsed[1]);
|
|
assert.deepEqual(parsed[1], parsed[2]);
|
|
});
|
|
|
|
it('parallel execution returns results from at least two tools', async () => {
|
|
const projectA = setupTestProject();
|
|
const projectB = setupTestProject();
|
|
try {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'Parallel',
|
|
task: 'Run two tools',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'two results',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const [geminiRes, codexRes] = await Promise.all([
|
|
cliExecutor.executeCliTool({ tool: 'gemini', prompt, mode: 'analysis', model: 'm', cd: projectA.projectDir }),
|
|
cliExecutor.executeCliTool({ tool: 'codex', prompt, mode: 'analysis', model: 'm', cd: projectB.projectDir }),
|
|
]);
|
|
|
|
assert.equal(geminiRes.success, true);
|
|
assert.equal(codexRes.success, true);
|
|
assert.equal(geminiRes.execution.tool, 'gemini');
|
|
assert.equal(codexRes.execution.tool, 'codex');
|
|
} finally {
|
|
await closeCliHistoryStores();
|
|
projectA.cleanup();
|
|
projectB.cleanup();
|
|
}
|
|
});
|
|
|
|
it('stdin vs args: gemini/qwen do not use "-" marker, codex does', async () => {
|
|
const prompt = makeEnhancedPrompt({
|
|
purpose: 'stdin vs args',
|
|
task: 'Validate prompt delivery mechanism',
|
|
mode: 'analysis',
|
|
context: '@src/index.ts',
|
|
expected: 'codex uses -',
|
|
rules: 'analysis=READ-ONLY',
|
|
});
|
|
|
|
const [geminiRes, qwenRes, codexRes] = await Promise.all([
|
|
cliExecutor.executeCliTool({ tool: 'gemini', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }),
|
|
cliExecutor.executeCliTool({ tool: 'qwen', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }),
|
|
cliExecutor.executeCliTool({ tool: 'codex', prompt, mode: 'analysis', model: 'm', cd: project.projectDir }),
|
|
]);
|
|
|
|
const geminiArgs = parseFirstJsonLine(geminiRes.stdout).args;
|
|
const qwenArgs = parseFirstJsonLine(qwenRes.stdout).args;
|
|
const codexArgs = parseFirstJsonLine(codexRes.stdout).args;
|
|
|
|
assert.equal(geminiArgs.includes('-'), false);
|
|
assert.equal(qwenArgs.includes('-'), false);
|
|
assert.equal(codexArgs.at(-1), '-');
|
|
});
|
|
});
|