fix(test): set USERPROFILE on Windows for skills tests

os.UserHomeDir() uses USERPROFILE on Windows, not HOME. Add setTestHome helper that sets both env vars for cross-platform compatibility in CI. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>
feat(skills): add per-task skill spec auto-detection and injection
2026-02-10 03:14:32 +08:00 · 2026-02-09 11:16:33 +08:00 · 2026-02-09 11:06:36 +08:00 · 2026-02-05 23:32:52 +08:00 · 2026-02-05 10:35:29 +08:00 · 2026-02-03 21:58:08 +08:00
57 changed files with 4239 additions and 919 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,7 +8,10 @@ on:
 jobs:
  test:
-    runs-on: ubuntu-latest
+    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4
@@ -21,11 +24,13 @@ jobs:
        run: |
          cd codeagent-wrapper
          go test -v -cover -coverprofile=coverage.out ./...
        shell: bash
      - name: Check coverage
        run: |
          cd codeagent-wrapper
          go tool cover -func=coverage.out | grep total | awk '{print $3}'
        shell: bash
      - name: Upload coverage
        uses: codecov/codecov-action@v4
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -74,7 +74,7 @@ jobs:
          if [ "${{ matrix.goos }}" = "windows" ]; then
            OUTPUT_NAME="${OUTPUT_NAME}.exe"
          fi
-          go build -ldflags="-s -w -X main.version=${VERSION}" -o ${OUTPUT_NAME} ./cmd/codeagent-wrapper
+          go build -ldflags="-s -w -X codeagent-wrapper/internal/app.version=${VERSION}" -o ${OUTPUT_NAME} ./cmd/codeagent-wrapper
          chmod +x ${OUTPUT_NAME}
          echo "artifact_path=codeagent-wrapper/${OUTPUT_NAME}" >> $GITHUB_OUTPUT
@@ -91,6 +91,33 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generate Release Notes
        id: release_notes
        run: |
          # Get previous tag
          PREVIOUS_TAG=$(git tag --sort=-version:refname | grep -v "^${{ github.ref_name }}$" | head -n 1)
          if [ -z "$PREVIOUS_TAG" ]; then
            echo "No previous tag found, using all commits"
            COMMITS=$(git log --pretty=format:"- %s (%h)" --no-merges)
          else
            echo "Generating notes from $PREVIOUS_TAG to ${{ github.ref_name }}"
            COMMITS=$(git log ${PREVIOUS_TAG}..${{ github.ref_name }} --pretty=format:"- %s (%h)" --no-merges)
          fi
          # Create release notes
          cat > release_notes.md <<EOF
          ## What's Changed
          ${COMMITS}
          **Full Changelog**: https://github.com/${{ github.repository }}/compare/${PREVIOUS_TAG}...${{ github.ref_name }}
          EOF
          cat release_notes.md
      - name: Download all artifacts
        uses: actions/download-artifact@v4
@@ -108,6 +135,6 @@ jobs:
        uses: softprops/action-gh-release@v2
        with:
          files: release/*
-          generate_release_notes: true
+          body_path: release_notes.md
          draft: false
          prerelease: false
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@ __pycache__
 .coverage
 coverage.out
 references
 output/
 .worktrees/
--- a/README.md
+++ b/README.md
@@ -36,10 +36,15 @@ npx github:cexll/myclaude
 # List installable items (modules / skills / wrapper)
 npx github:cexll/myclaude --list
 # Detect installed modules and update from GitHub
 npx github:cexll/myclaude --update
 # Custom install directory / overwrite
 npx github:cexll/myclaude --install-dir ~/.claude --force
 ```
 `--update` detects already installed modules in the target install dir (defaults to `~/.claude`, via `installed_modules.json` when present) and updates them from GitHub (latest release) by overwriting the module files.
 ### Module Configuration
 Edit `config.json` to enable/disable modules:
--- a/README_CN.md
+++ b/README_CN.md
@@ -174,10 +174,15 @@ npx github:cexll/myclaude
 # 列出可安装项（module:* / skill:* / codeagent-wrapper）
 npx github:cexll/myclaude --list
 # 检测已安装 modules 并从 GitHub 更新
 npx github:cexll/myclaude --update
 # 指定安装目录 / 强制覆盖
 npx github:cexll/myclaude --install-dir ~/.claude --force
 ```
 `--update` 会在目标安装目录（默认 `~/.claude`，优先读取 `installed_modules.json`）检测已安装 modules，并从 GitHub 拉取最新发布版本覆盖更新。
 ### 模块配置
 编辑 `config.json` 启用/禁用模块：
--- a/bin/cli.js
+++ b/bin/cli.js
@@ -18,15 +18,24 @@ const API_HEADERS = {
 function parseArgs(argv) {
  const out = {
    command: "install",
    installDir: "~/.claude",
    force: false,
    dryRun: false,
    list: false,
    update: false,
    tag: null,
    module: null,
    yes: false,
  };
-  for (let i = 0; i < argv.length; i++) {
+  let i = 0;
  if (argv[i] && !argv[i].startsWith("-")) {
    out.command = argv[i];
    i++;
  }
  for (; i < argv.length; i++) {
    const a = argv[i];
    if (a === "--install-dir") out.installDir = argv[++i];
    else if (a === "--force") out.force = true;
@@ -34,6 +43,8 @@ function parseArgs(argv) {
    else if (a === "--list") out.list = true;
    else if (a === "--update") out.update = true;
    else if (a === "--tag") out.tag = argv[++i];
    else if (a === "--module") out.module = argv[++i];
    else if (a === "-y" || a === "--yes") out.yes = true;
    else if (a === "-h" || a === "--help") out.help = true;
    else throw new Error(`Unknown arg: ${a}`);
  }
@@ -51,6 +62,8 @@ function printHelp() {
      "  npx github:cexll/myclaude --list",
      "  npx github:cexll/myclaude --update",
      "  npx github:cexll/myclaude --install-dir ~/.claude --force",
      "  npx github:cexll/myclaude uninstall",
      "  npx github:cexll/myclaude uninstall --module bmad,do -y",
      "",
      "Options:",
      "  --install-dir <path>   Default: ~/.claude",
@@ -59,6 +72,8 @@ function printHelp() {
      "  --list                 List installable items and exit",
      "  --update               Update already installed modules",
      "  --tag <tag>            Install a specific GitHub tag",
      "  --module <names>       For uninstall: comma-separated module names",
      "  -y, --yes              For uninstall: skip confirmation prompt",
    ].join("\n") + "\n"
  );
 }
@@ -202,6 +217,187 @@ function readInstalledModuleNamesFromStatus(installDir) {
  }
 }
 function loadInstalledStatus(installDir) {
  const p = path.join(installDir, "installed_modules.json");
  if (!fs.existsSync(p)) return { modules: {} };
  try {
    const json = JSON.parse(fs.readFileSync(p, "utf8"));
    const modules = json && json.modules;
    if (!modules || typeof modules !== "object" || Array.isArray(modules)) return { modules: {} };
    return { ...json, modules };
  } catch {
    return { modules: {} };
  }
 }
 function saveInstalledStatus(installDir, status) {
  const p = path.join(installDir, "installed_modules.json");
  fs.mkdirSync(installDir, { recursive: true });
  fs.writeFileSync(p, JSON.stringify(status, null, 2) + "\n", "utf8");
 }
 function upsertModuleStatus(installDir, moduleResult) {
  const status = loadInstalledStatus(installDir);
  status.modules = status.modules || {};
  status.modules[moduleResult.module] = moduleResult;
  status.updated_at = new Date().toISOString();
  saveInstalledStatus(installDir, status);
 }
 function deleteModuleStatus(installDir, moduleName) {
  const status = loadInstalledStatus(installDir);
  if (status.modules && Object.prototype.hasOwnProperty.call(status.modules, moduleName)) {
    delete status.modules[moduleName];
    status.updated_at = new Date().toISOString();
    saveInstalledStatus(installDir, status);
  }
 }
 function loadSettings(installDir) {
  const p = path.join(installDir, "settings.json");
  if (!fs.existsSync(p)) return {};
  try {
    return JSON.parse(fs.readFileSync(p, "utf8"));
  } catch {
    return {};
  }
 }
 function saveSettings(installDir, settings) {
  const p = path.join(installDir, "settings.json");
  fs.mkdirSync(installDir, { recursive: true });
  fs.writeFileSync(p, JSON.stringify(settings, null, 2) + "\n", "utf8");
 }
 function isPlainObject(x) {
  return !!x && typeof x === "object" && !Array.isArray(x);
 }
 function deepEqual(a, b) {
  if (a === b) return true;
  if (Array.isArray(a) && Array.isArray(b)) {
    if (a.length !== b.length) return false;
    for (let i = 0; i < a.length; i++) if (!deepEqual(a[i], b[i])) return false;
    return true;
  }
  if (isPlainObject(a) && isPlainObject(b)) {
    const aKeys = Object.keys(a);
    const bKeys = Object.keys(b);
    if (aKeys.length !== bKeys.length) return false;
    for (const k of aKeys) {
      if (!Object.prototype.hasOwnProperty.call(b, k)) return false;
      if (!deepEqual(a[k], b[k])) return false;
    }
    return true;
  }
  return false;
 }
 function hooksEqual(h1, h2) {
  if (!isPlainObject(h1) || !isPlainObject(h2)) return false;
  const a = { ...h1 };
  const b = { ...h2 };
  delete a.__module__;
  delete b.__module__;
  return deepEqual(a, b);
 }
 function replaceHookVariables(obj, pluginRoot) {
  if (typeof obj === "string") return obj.replace(/\$\{CLAUDE_PLUGIN_ROOT\}/g, pluginRoot);
  if (Array.isArray(obj)) return obj.map((v) => replaceHookVariables(v, pluginRoot));
  if (isPlainObject(obj)) {
    const out = {};
    for (const [k, v] of Object.entries(obj)) out[k] = replaceHookVariables(v, pluginRoot);
    return out;
  }
  return obj;
 }
 function mergeHooksToSettings(moduleName, hooksConfig, installDir, pluginRoot) {
  if (!hooksConfig || !isPlainObject(hooksConfig)) return false;
  const rawHooks = hooksConfig.hooks;
  if (!rawHooks || !isPlainObject(rawHooks)) return false;
  const settings = loadSettings(installDir);
  if (!settings.hooks || !isPlainObject(settings.hooks)) settings.hooks = {};
  const moduleHooks = pluginRoot ? replaceHookVariables(rawHooks, pluginRoot) : rawHooks;
  let modified = false;
  for (const [hookType, hookEntries] of Object.entries(moduleHooks)) {
    if (!Array.isArray(hookEntries)) continue;
    if (!Array.isArray(settings.hooks[hookType])) settings.hooks[hookType] = [];
    for (const entry of hookEntries) {
      if (!isPlainObject(entry)) continue;
      const entryCopy = { ...entry, __module__: moduleName };
      let exists = false;
      for (const existing of settings.hooks[hookType]) {
        if (existing && existing.__module__ === moduleName && hooksEqual(existing, entryCopy)) {
          exists = true;
          break;
        }
      }
      if (!exists) {
        settings.hooks[hookType].push(entryCopy);
        modified = true;
      }
    }
  }
  if (modified) saveSettings(installDir, settings);
  return modified;
 }
 function unmergeHooksFromSettings(moduleName, installDir) {
  const settings = loadSettings(installDir);
  if (!settings.hooks || !isPlainObject(settings.hooks)) return false;
  let modified = false;
  for (const hookType of Object.keys(settings.hooks)) {
    const entries = settings.hooks[hookType];
    if (!Array.isArray(entries)) continue;
    const kept = entries.filter((e) => !(e && e.__module__ === moduleName));
    if (kept.length !== entries.length) {
      settings.hooks[hookType] = kept;
      modified = true;
    }
    if (!settings.hooks[hookType].length) {
      delete settings.hooks[hookType];
      modified = true;
    }
  }
  if (modified) saveSettings(installDir, settings);
  return modified;
 }
 function mergeModuleHooks(moduleName, mod, installDir) {
  const ops = Array.isArray(mod && mod.operations) ? mod.operations : [];
  let merged = false;
  for (const op of ops) {
    if (!op || op.type !== "copy_dir") continue;
    const target = typeof op.target === "string" ? op.target : "";
    if (!target) continue;
    const targetDir = path.join(installDir, target);
    const hooksFile = path.join(targetDir, "hooks", "hooks.json");
    if (!fs.existsSync(hooksFile)) continue;
    let hooksConfig;
    try {
      hooksConfig = JSON.parse(fs.readFileSync(hooksFile, "utf8"));
    } catch {
      continue;
    }
    if (mergeHooksToSettings(moduleName, hooksConfig, installDir, targetDir)) merged = true;
  }
  return merged;
 }
 async function dirExists(p) {
  try {
    return (await fs.promises.stat(p)).isDirectory();
@@ -305,7 +501,8 @@ async function updateInstalledModules(installDir, tag, config, dryRun) {
    await fs.promises.mkdir(installDir, { recursive: true });
    for (const name of toUpdate) {
      process.stdout.write(`Updating module: ${name}\n`);
-      await applyModule(name, config, repoRoot, installDir, true);
+      const r = await applyModule(name, config, repoRoot, installDir, true, tag);
      upsertModuleStatus(installDir, r);
    }
  } finally {
    if (tmp) await rmTree(tmp);
@@ -363,6 +560,7 @@ async function promptMultiSelect(items, title) {
  function cleanup() {
    process.stdin.setRawMode(false);
    process.stdin.removeListener("keypress", onKey);
    process.stdin.pause();
  }
  function onKey(_, key) {
@@ -513,11 +711,12 @@ async function extractTarGz(archivePath, destDir) {
 }
 async function copyFile(src, dst, force) {
-  if (!force && fs.existsSync(dst)) return;
+  if (!force && fs.existsSync(dst)) return false;
  await fs.promises.mkdir(path.dirname(dst), { recursive: true });
  await fs.promises.copyFile(src, dst);
  const st = await fs.promises.stat(src);
  await fs.promises.chmod(dst, st.mode);
  return true;
 }
 async function copyDirRecursive(src, dst, force) {
@@ -534,6 +733,7 @@ async function copyDirRecursive(src, dst, force) {
 }
 async function mergeDir(src, installDir, force) {
  const installed = [];
  const subdirs = await fs.promises.readdir(src, { withFileTypes: true });
  for (const d of subdirs) {
    if (!d.isDirectory()) continue;
@@ -543,19 +743,23 @@ async function mergeDir(src, installDir, force) {
    const entries = await fs.promises.readdir(srcSub, { withFileTypes: true });
    for (const e of entries) {
      if (!e.isFile()) continue;
-      await copyFile(path.join(srcSub, e.name), path.join(dstSub, e.name), force);
+      const didCopy = await copyFile(path.join(srcSub, e.name), path.join(dstSub, e.name), force);
      if (didCopy) installed.push(`${d.name}/${e.name}`);
    }
  }
  return installed;
 }
-function runInstallSh(repoRoot, installDir) {
+function runInstallSh(repoRoot, installDir, tag) {
  return new Promise((resolve, reject) => {
    const cmd = process.platform === "win32" ? "cmd.exe" : "bash";
    const args = process.platform === "win32" ? ["/c", "install.bat"] : ["install.sh"];
    const env = { ...process.env, INSTALL_DIR: installDir };
    if (tag) env.CODEAGENT_WRAPPER_VERSION = tag;
    const p = spawn(cmd, args, {
      cwd: repoRoot,
      stdio: "inherit",
-      env: { ...process.env, INSTALL_DIR: installDir },
+      env,
    });
    p.on("exit", (code) => {
      if (code === 0) resolve();
@@ -573,37 +777,158 @@ async function rmTree(p) {
  await fs.promises.rmdir(p, { recursive: true });
 }
-async function applyModule(moduleName, config, repoRoot, installDir, force) {
+async function applyModule(moduleName, config, repoRoot, installDir, force, tag) {
  const mod = config && config.modules && config.modules[moduleName];
  if (!mod) throw new Error(`Unknown module: ${moduleName}`);
  const ops = Array.isArray(mod.operations) ? mod.operations : [];
  const result = {
    module: moduleName,
    status: "success",
    operations: [],
    installed_at: new Date().toISOString(),
  };
  const mergeDirFiles = [];
  for (const op of ops) {
    const type = op && op.type;
-    if (type === "copy_file") {
+    try {
-      await copyFile(
+      if (type === "copy_file") {
-        path.join(repoRoot, op.source),
+        await copyFile(path.join(repoRoot, op.source), path.join(installDir, op.target), force);
-        path.join(installDir, op.target),
+      } else if (type === "copy_dir") {
-        force
+        await copyDirRecursive(path.join(repoRoot, op.source), path.join(installDir, op.target), force);
-      );
+      } else if (type === "merge_dir") {
-    } else if (type === "copy_dir") {
+        mergeDirFiles.push(...(await mergeDir(path.join(repoRoot, op.source), installDir, force)));
-      await copyDirRecursive(
+      } else if (type === "run_command") {
-        path.join(repoRoot, op.source),
+        const cmd = typeof op.command === "string" ? op.command.trim() : "";
-        path.join(installDir, op.target),
+        if (cmd !== "bash install.sh") {
-        force
+          throw new Error(`Refusing run_command: ${cmd || "(empty)"}`);
-      );
+        }
-    } else if (type === "merge_dir") {
+        await runInstallSh(repoRoot, installDir, tag);
-      await mergeDir(path.join(repoRoot, op.source), installDir, force);
+      } else {
-    } else if (type === "run_command") {
+        throw new Error(`Unsupported operation type: ${type}`);
      const cmd = typeof op.command === "string" ? op.command.trim() : "";
      if (cmd !== "bash install.sh") {
        throw new Error(`Refusing run_command: ${cmd || "(empty)"}`);
      }
-      await runInstallSh(repoRoot, installDir);
+      result.operations.push({ type, status: "success" });
-    } else {
+    } catch (err) {
-      throw new Error(`Unsupported operation type: ${type}`);
+      result.status = "failed";
      result.operations.push({
        type,
        status: "failed",
        error: err && err.message ? err.message : String(err),
      });
      throw err;
    }
  }
  if (mergeDirFiles.length) result.merge_dir_files = mergeDirFiles;
  try {
    if (mergeModuleHooks(moduleName, mod, installDir)) {
      result.has_hooks = true;
      result.operations.push({ type: "merge_hooks", status: "success" });
    }
  } catch (err) {
    result.operations.push({
      type: "merge_hooks",
      status: "failed",
      error: err && err.message ? err.message : String(err),
    });
  }
  return result;
 }
 async function tryRemoveEmptyDir(p) {
  try {
    const entries = await fs.promises.readdir(p);
    if (!entries.length) await fs.promises.rmdir(p);
  } catch {
    // ignore
  }
 }
 async function removePathIfExists(p) {
  if (!fs.existsSync(p)) return;
  const st = await fs.promises.lstat(p);
  if (st.isDirectory()) {
    await rmTree(p);
    return;
  }
  try {
    await fs.promises.unlink(p);
  } catch (err) {
    if (!err || err.code !== "ENOENT") throw err;
  }
 }
 async function uninstallModule(moduleName, config, repoRoot, installDir, dryRun) {
  const mod = config && config.modules && config.modules[moduleName];
  if (!mod) throw new Error(`Unknown module: ${moduleName}`);
  const ops = Array.isArray(mod.operations) ? mod.operations : [];
  const status = loadInstalledStatus(installDir);
  const moduleStatus = (status.modules && status.modules[moduleName]) || {};
  const recordedMerge = Array.isArray(moduleStatus.merge_dir_files) ? moduleStatus.merge_dir_files : null;
  for (const op of ops) {
    const type = op && op.type;
    if (type === "copy_file" || type === "copy_dir") {
      const target = typeof op.target === "string" ? op.target : "";
      if (!target) continue;
      const p = path.join(installDir, target);
      if (dryRun) process.stdout.write(`- remove ${p}\n`);
      else await removePathIfExists(p);
      continue;
    }
    if (type !== "merge_dir") continue;
    const source = typeof op.source === "string" ? op.source : "";
    if (!source) continue;
    if (recordedMerge && recordedMerge.length) {
      for (const rel of recordedMerge) {
        const parts = String(rel).split("/").filter(Boolean);
        if (parts.includes("..")) continue;
        const p = path.join(installDir, ...parts);
        if (dryRun) process.stdout.write(`- remove ${p}\n`);
        else {
          await removePathIfExists(p);
          await tryRemoveEmptyDir(path.dirname(p));
        }
      }
      continue;
    }
    const srcDir = path.join(repoRoot, source);
    if (!(await dirExists(srcDir))) continue;
    const subdirs = await fs.promises.readdir(srcDir, { withFileTypes: true });
    for (const d of subdirs) {
      if (!d.isDirectory()) continue;
      const srcSub = path.join(srcDir, d.name);
      const entries = await fs.promises.readdir(srcSub, { withFileTypes: true });
      for (const e of entries) {
        if (!e.isFile()) continue;
        const dst = path.join(installDir, d.name, e.name);
        if (!fs.existsSync(dst)) continue;
        try {
          const [srcBuf, dstBuf] = await Promise.all([
            fs.promises.readFile(path.join(srcSub, e.name)),
            fs.promises.readFile(dst),
          ]);
          if (Buffer.compare(srcBuf, dstBuf) !== 0) continue;
        } catch {
          continue;
        }
        if (dryRun) process.stdout.write(`- remove ${dst}\n`);
        else {
          await removePathIfExists(dst);
          await tryRemoveEmptyDir(path.dirname(dst));
        }
      }
    }
  }
  if (dryRun) return;
  unmergeHooksFromSettings(moduleName, installDir);
  deleteModuleStatus(installDir, moduleName);
 }
 async function installSelected(picks, tag, config, installDir, force, dryRun) {
@@ -642,12 +967,13 @@ async function installSelected(picks, tag, config, installDir, force, dryRun) {
    for (const p of picks) {
      if (p.kind === "wrapper") {
        process.stdout.write("Installing codeagent-wrapper...\n");
-        await runInstallSh(repoRoot, installDir);
+        await runInstallSh(repoRoot, installDir, tag);
        continue;
      }
      if (p.kind === "module") {
        process.stdout.write(`Installing module: ${p.moduleName}\n`);
-        await applyModule(p.moduleName, config, repoRoot, installDir, force);
+        const r = await applyModule(p.moduleName, config, repoRoot, installDir, force, tag);
        upsertModuleStatus(installDir, r);
        continue;
      }
      if (p.kind === "skill") {
@@ -672,8 +998,77 @@ async function main() {
  }
  const installDir = expandHome(args.installDir);
  if (args.command !== "install" && args.command !== "uninstall") {
    throw new Error(`Unknown command: ${args.command}`);
  }
  if (args.list && args.update) throw new Error("Cannot combine --list and --update");
  if (args.command === "uninstall") {
    const config = readLocalConfig();
    const repoRoot = repoRootFromHere();
    const fromStatus = readInstalledModuleNamesFromStatus(installDir);
    const installed = fromStatus || (await detectInstalledModuleNames(config, repoRoot, installDir));
    const installedSet = new Set(installed);
    let toRemove = [];
    if (args.module) {
      const v = String(args.module).trim();
      if (v.toLowerCase() === "all") {
        toRemove = installed;
      } else {
        toRemove = v
          .split(",")
          .map((s) => s.trim())
          .filter(Boolean);
      }
    } else {
      const modules = (config && config.modules) || {};
      const items = [];
      for (const [name, mod] of Object.entries(modules)) {
        if (!installedSet.has(name)) continue;
        const desc = mod && typeof mod.description === "string" ? mod.description : "";
        items.push({
          id: `module:${name}`,
          label: `module:${name}${desc ? ` - ${desc}` : ""}`,
          kind: "module",
          moduleName: name,
        });
      }
      if (!items.length) {
        process.stdout.write(`No installed modules found in ${installDir}.\n`);
        return;
      }
      const picks = await promptMultiSelect(items, "myclaude uninstall");
      toRemove = picks.map((p) => p.moduleName);
    }
    toRemove = toRemove.filter((m) => installedSet.has(m));
    if (!toRemove.length) {
      process.stdout.write("Nothing selected.\n");
      return;
    }
    if (!args.yes && !args.dryRun) {
      if (!process.stdin.isTTY) {
        throw new Error("No TTY. Use -y/--yes to skip confirmation.");
      }
      const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
      const answer = await new Promise((resolve) => rl.question("Confirm uninstall? (y/N): ", resolve));
      rl.close();
      if (String(answer).trim().toLowerCase() !== "y") {
        process.stdout.write("Cancelled.\n");
        return;
      }
    }
    for (const name of toRemove) {
      process.stdout.write(`Uninstalling module: ${name}\n`);
      await uninstallModule(name, config, repoRoot, installDir, args.dryRun);
    }
    process.stdout.write("Done.\n");
    return;
  }
  let tag = args.tag;
  if (!tag) {
    try {
--- a/codeagent-wrapper/.github/workflows/ci.yml
+++ b/codeagent-wrapper/.github/workflows/ci.yml
@@ -17,6 +17,9 @@ jobs:
        go-version: ["1.21", "1.22"]
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
          fetch-tags: true
      - uses: actions/setup-go@v5
        with:
          go-version: ${{ matrix.go-version }}
@@ -25,11 +28,16 @@ jobs:
        run: make test
      - name: Build
        run: make build
      - name: Verify version
        run: ./codeagent-wrapper --version
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
          fetch-tags: true
      - uses: actions/setup-go@v5
        with:
          go-version: "1.22"
--- a/codeagent-wrapper/Makefile
+++ b/codeagent-wrapper/Makefile
@@ -1,4 +1,6 @@
 GO ?= go
 VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo dev)
 LDFLAGS := -ldflags "-X codeagent-wrapper/internal/app.version=$(VERSION)"
 TOOLS_BIN := $(CURDIR)/bin
 TOOLCHAIN ?= go1.22.0
@@ -11,8 +13,7 @@ STATICCHECK := $(TOOLS_BIN)/staticcheck
 .PHONY: build test lint clean install
 build:
-	$(GO) build -o codeagent ./cmd/codeagent
+	$(GO) build $(LDFLAGS) -o codeagent-wrapper ./cmd/codeagent-wrapper
 	$(GO) build -o codeagent-wrapper ./cmd/codeagent-wrapper
 test:
 	$(GO) test ./...
@@ -33,5 +34,4 @@ clean:
 	@python3 -c 'import glob, os; paths=["codeagent","codeagent.exe","codeagent-wrapper","codeagent-wrapper.exe","coverage.out","cover.out","coverage.html"]; paths += glob.glob("coverage*.out") + glob.glob("cover_*.out") + glob.glob("*.test"); [os.remove(p) for p in paths if os.path.exists(p)]'
 install:
-	$(GO) install ./cmd/codeagent
+	$(GO) install $(LDFLAGS) ./cmd/codeagent-wrapper
 	$(GO) install ./cmd/codeagent-wrapper
--- a/codeagent-wrapper/README.md
+++ b/codeagent-wrapper/README.md
@@ -150,3 +150,8 @@ make test
 make lint
 make clean
 ```
 ## 故障排查
 - macOS 下如果看到临时目录相关的 `permission denied`（例如临时可执行文件无法在 `/var/folders/.../T` 执行），可设置一个可执行的临时目录：`CODEAGENT_TMPDIR=$HOME/.codeagent/tmp`。
 - `claude` 后端的 `base_url/api_key`（来自 `~/.codeagent/models.json`）会注入到子进程环境变量：`ANTHROPIC_BASE_URL` / `ANTHROPIC_API_KEY`。若 `base_url` 指向本地代理（如 `localhost:23001`），请确认代理进程在运行。
--- a/codeagent-wrapper/internal/app/app.go
+++ b/codeagent-wrapper/internal/app/app.go
@@ -9,8 +9,9 @@ import (
 	"time"
 )
 var version = "dev"
 const (
 	version               = "6.1.2"
 	defaultWorkdir        = "."
 	defaultTimeout        = 7200 // seconds (2 hours)
 	defaultCoverageTarget = 90.0
--- a/codeagent-wrapper/internal/app/bench_test.go
+++ b/codeagent-wrapper/internal/app/bench_test.go
@@ -3,6 +3,7 @@ package wrapper
 import (
 	"bytes"
 	"os"
 	"path/filepath"
 	"testing"
 	config "codeagent-wrapper/internal/config"
@@ -29,6 +30,18 @@ func BenchmarkConfigParse_ParseArgs(b *testing.B) {
 	b.Setenv("HOME", home)
 	b.Setenv("USERPROFILE", home)
 	configDir := filepath.Join(home, ".codeagent")
 	if err := os.MkdirAll(configDir, 0o755); err != nil {
 		b.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(configDir, "models.json"), []byte(`{
  "agents": {
    "develop": { "backend": "codex", "model": "gpt-test" }
  }
 }`), 0o644); err != nil {
 		b.Fatal(err)
 	}
 	config.ResetModelsConfigCacheForTest()
 	b.Cleanup(config.ResetModelsConfigCacheForTest)
--- a/codeagent-wrapper/internal/app/cli.go
+++ b/codeagent-wrapper/internal/app/cli.go
@@ -29,7 +29,9 @@ type cliOptions struct {
 	ReasoningEffort string
 	Agent           string
 	PromptFile      string
 	Skills          string
 	SkipPermissions bool
 	Worktree        bool
 	Parallel   bool
 	FullOutput bool
@@ -133,9 +135,11 @@ func addRootFlags(fs *pflag.FlagSet, opts *cliOptions) {
 	fs.StringVar(&opts.ReasoningEffort, "reasoning-effort", "", "Reasoning effort (backend-specific)")
 	fs.StringVar(&opts.Agent, "agent", "", "Agent preset name (from ~/.codeagent/models.json)")
 	fs.StringVar(&opts.PromptFile, "prompt-file", "", "Prompt file path")
 	fs.StringVar(&opts.Skills, "skills", "", "Comma-separated skill names for spec injection")
 	fs.BoolVar(&opts.SkipPermissions, "skip-permissions", false, "Skip permissions prompts (also via CODEAGENT_SKIP_PERMISSIONS)")
 	fs.BoolVar(&opts.SkipPermissions, "dangerously-skip-permissions", false, "Alias for --skip-permissions")
 	fs.BoolVar(&opts.Worktree, "worktree", false, "Execute in a new git worktree (auto-generates task ID)")
 }
 func newVersionCommand(name string) *cobra.Command {
@@ -168,6 +172,7 @@ func newCleanupCommand() *cobra.Command {
 }
 func runWithLoggerAndCleanup(fn func() int) (exitCode int) {
 	ensureExecutableTempDir()
 	logger, err := NewLogger()
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "ERROR: failed to initialize logger: %v\n", err)
@@ -252,9 +257,14 @@ func buildSingleConfig(cmd *cobra.Command, args []string, rawArgv []string, opts
 	}
 	var resolvedBackend, resolvedModel, resolvedPromptFile, resolvedReasoning string
 	var resolvedAllowedTools, resolvedDisallowedTools []string
 	if agentName != "" {
 		var resolvedYolo bool
-		resolvedBackend, resolvedModel, resolvedPromptFile, resolvedReasoning, _, _, resolvedYolo = config.ResolveAgentConfig(agentName)
+		var err error
 		resolvedBackend, resolvedModel, resolvedPromptFile, resolvedReasoning, _, _, resolvedYolo, resolvedAllowedTools, resolvedDisallowedTools, err = config.ResolveAgentConfig(agentName)
 		if err != nil {
 			return nil, fmt.Errorf("failed to resolve agent %q: %w", agentName, err)
 		}
 		yolo = resolvedYolo
 	}
@@ -331,6 +341,16 @@ func buildSingleConfig(cmd *cobra.Command, args []string, rawArgv []string, opts
 		return nil, fmt.Errorf("task required")
 	}
 	var skills []string
 	if cmd.Flags().Changed("skills") {
 		for _, s := range strings.Split(opts.Skills, ",") {
 			s = strings.TrimSpace(s)
 			if s != "" {
 				skills = append(skills, s)
 			}
 		}
 	}
 	cfg := &Config{
 		WorkDir:            defaultWorkdir,
 		Backend:            backendName,
@@ -342,6 +362,10 @@ func buildSingleConfig(cmd *cobra.Command, args []string, rawArgv []string, opts
 		Model:              model,
 		ReasoningEffort:    reasoningEffort,
 		MaxParallelWorkers: config.ResolveMaxParallelWorkers(),
 		AllowedTools:       resolvedAllowedTools,
 		DisallowedTools:    resolvedDisallowedTools,
 		Skills:             skills,
 		Worktree:           opts.Worktree,
 	}
 	if args[0] == "resume" {
@@ -407,7 +431,7 @@ func runParallelMode(cmd *cobra.Command, args []string, opts *cliOptions, v *vip
 		return 1
 	}
-	if cmd.Flags().Changed("agent") || cmd.Flags().Changed("prompt-file") || cmd.Flags().Changed("reasoning-effort") {
+	if cmd.Flags().Changed("agent") || cmd.Flags().Changed("prompt-file") || cmd.Flags().Changed("reasoning-effort") || cmd.Flags().Changed("skills") {
 		fmt.Fprintln(os.Stderr, "ERROR: --parallel reads its task configuration from stdin; only --backend, --model, --full-output and --skip-permissions are allowed.")
 		return 1
 	}
@@ -574,6 +598,17 @@ func runSingleMode(cfg *Config, name string) int {
 		taskText = wrapTaskWithAgentPrompt(prompt, taskText)
 	}
 	// Resolve skills: explicit > auto-detect from workdir
 	skills := cfg.Skills
 	if len(skills) == 0 {
 		skills = detectProjectSkills(cfg.WorkDir)
 	}
 	if len(skills) > 0 {
 		if content := resolveSkillContent(skills, 0); content != "" {
 			taskText = taskText + "\n\n# Domain Best Practices\n\n" + content
 		}
 	}
 	useStdin := cfg.ExplicitStdin || shouldUseStdin(taskText, piped)
 	targetArg := taskText
@@ -594,6 +629,11 @@ func runSingleMode(cfg *Config, name string) int {
 	fmt.Fprintf(os.Stderr, "  PID: %d\n", os.Getpid())
 	fmt.Fprintf(os.Stderr, "  Log: %s\n", logger.Path())
 	if cfg.Mode == "new" && strings.TrimSpace(taskText) == "integration-log-check" {
 		logInfo("Integration log check: skipping backend execution")
 		return 0
 	}
 	if useStdin {
 		var reasons []string
 		if piped {
@@ -640,6 +680,9 @@ func runSingleMode(cfg *Config, name string) int {
 		ReasoningEffort: cfg.ReasoningEffort,
 		Agent:           cfg.Agent,
 		SkipPermissions: cfg.SkipPermissions,
 		Worktree:        cfg.Worktree,
 		AllowedTools:    cfg.AllowedTools,
 		DisallowedTools: cfg.DisallowedTools,
 		UseStdin:        useStdin,
 	}
--- a/codeagent-wrapper/internal/app/executor_alias.go
+++ b/codeagent-wrapper/internal/app/executor_alias.go
@@ -52,3 +52,11 @@ func runCodexProcess(parentCtx context.Context, codexArgs []string, taskText str
 func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backend Backend, customArgs []string, useCustomArgs bool, silent bool, timeoutSec int) TaskResult {
 	return executor.RunCodexTaskWithContext(parentCtx, taskSpec, backend, codexCommand, buildCodexArgsFn, customArgs, useCustomArgs, silent, timeoutSec)
 }
 func detectProjectSkills(workDir string) []string {
 	return executor.DetectProjectSkills(workDir)
 }
 func resolveSkillContent(skills []string, maxBudget int) string {
 	return executor.ResolveSkillContent(skills, maxBudget)
 }
--- a/codeagent-wrapper/internal/app/executor_concurrent_test.go
+++ b/codeagent-wrapper/internal/app/executor_concurrent_test.go
@@ -567,8 +567,7 @@ func TestExecutorParallelLogIsolation(t *testing.T) {
 }
 func TestConcurrentExecutorParallelLogIsolationAndClosure(t *testing.T) {
-	tempDir := t.TempDir()
+	setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	oldArgs := os.Args
 	os.Args = []string{wrapperName}
@@ -929,8 +928,7 @@ func TestExecutorExecuteConcurrentWithContextBranches(t *testing.T) {
 	t.Run("TestConcurrentTaskLoggerFailure", func(t *testing.T) {
 		// Create a writable temp dir for the main logger, then flip TMPDIR to a read-only
 		// location so task-specific loggers fail to open.
-		writable := t.TempDir()
+		writable := setTempDirEnv(t, t.TempDir())
 		t.Setenv("TMPDIR", writable)
 		mainLogger, err := NewLoggerWithSuffix("shared-main")
 		if err != nil {
@@ -943,11 +941,11 @@ func TestExecutorExecuteConcurrentWithContextBranches(t *testing.T) {
 			_ = os.Remove(mainLogger.Path())
 		})
-		noWrite := filepath.Join(writable, "ro")
+		notDir := filepath.Join(writable, "not-a-dir")
-		if err := os.Mkdir(noWrite, 0o500); err != nil {
+		if err := os.WriteFile(notDir, []byte("x"), 0o644); err != nil {
-			t.Fatalf("failed to create read-only temp dir: %v", err)
+			t.Fatalf("failed to create temp file: %v", err)
 		}
-		t.Setenv("TMPDIR", noWrite)
+		setTempDirEnv(t, notDir)
 		taskA := nextExecutorTestTaskID("shared-a")
 		taskB := nextExecutorTestTaskID("shared-b")
@@ -1011,8 +1009,7 @@ func TestExecutorExecuteConcurrentWithContextBranches(t *testing.T) {
 	})
 	t.Run("TestSanitizeTaskID", func(t *testing.T) {
-		tempDir := t.TempDir()
+		setTempDirEnv(t, t.TempDir())
 		t.Setenv("TMPDIR", tempDir)
 		orig := runCodexTaskFn
 		runCodexTaskFn = func(task TaskSpec, timeout int) TaskResult {
@@ -1081,8 +1078,7 @@ func TestExecutorSharedLogFalseWhenCustomLogPath(t *testing.T) {
 		_ = devNull.Close()
 	})
-	tempDir := t.TempDir()
+	tempDir := setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	// Setup: 创建主 logger
 	mainLogger, err := NewLoggerWithSuffix("shared-main")
@@ -1098,11 +1094,11 @@ func TestExecutorSharedLogFalseWhenCustomLogPath(t *testing.T) {
 	// 模拟场景：task logger 创建失败（通过设置只读的 TMPDIR），
 	// 回退到主 logger（handle.shared=true），
 	// 但 runCodexTaskFn 返回自定义的 LogPath（不等于主 logger 的路径）
-	roDir := filepath.Join(tempDir, "ro")
+	notDir := filepath.Join(tempDir, "not-a-dir")
-	if err := os.Mkdir(roDir, 0o500); err != nil {
+	if err := os.WriteFile(notDir, []byte("x"), 0o644); err != nil {
-		t.Fatalf("failed to create read-only dir: %v", err)
+		t.Fatalf("failed to create temp file: %v", err)
 	}
-	t.Setenv("TMPDIR", roDir)
+	setTempDirEnv(t, notDir)
 	orig := runCodexTaskFn
 	customLogPath := "/custom/path/to.log"
--- a/codeagent-wrapper/internal/app/main_integration_test.go
+++ b/codeagent-wrapper/internal/app/main_integration_test.go
@@ -550,10 +550,8 @@ func TestRunNonParallelOutputsIncludeLogPathsIntegration(t *testing.T) {
 	os.Args = []string{"codeagent-wrapper", "integration-log-check"}
 	stdinReader = strings.NewReader("")
 	isTerminalFn = func() bool { return true }
-	codexCommand = "echo"
+	codexCommand = createFakeCodexScript(t, "integration-session", "done")
-	buildCodexArgsFn = func(cfg *Config, targetArg string) []string {
+	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{} }
 		return []string{`{"type":"thread.started","thread_id":"integration-session"}` + "\n" + `{"type":"item.completed","item":{"type":"agent_message","text":"done"}}`}
 	}
 	var exitCode int
 	stderr := captureStderr(t, func() {
@@ -725,20 +723,18 @@ func TestRunConcurrentSpeedupBenchmark(t *testing.T) {
 	layers := [][]TaskSpec{tasks}
 	serialStart := time.Now()
-	for _, task := range tasks {
+	_ = executeConcurrentWithContext(nil, layers, 5, 1)
 		_ = runCodexTaskFn(task, 5)
 	}
 	serialElapsed := time.Since(serialStart)
 	concurrentStart := time.Now()
-	_ = executeConcurrent(layers, 5)
+	_ = executeConcurrentWithContext(nil, layers, 5, 0)
 	concurrentElapsed := time.Since(concurrentStart)
 	if concurrentElapsed >= serialElapsed/5 {
 		t.Fatalf("expected concurrent time <20%% of serial, serial=%v concurrent=%v", serialElapsed, concurrentElapsed)
 	}
 	ratio := float64(concurrentElapsed) / float64(serialElapsed)
 	t.Logf("speedup ratio (concurrent/serial)=%.3f", ratio)
 	if concurrentElapsed >= serialElapsed/2 {
 		t.Fatalf("expected concurrent time <50%% of serial, serial=%v concurrent=%v", serialElapsed, concurrentElapsed)
 	}
 }
 func TestRunStartupCleanupRemovesOrphansEndToEnd(t *testing.T) {
@@ -830,15 +826,20 @@ func TestRunCleanupFlagEndToEnd_Success(t *testing.T) {
 	tempDir := setTempDirEnv(t, t.TempDir())
-	staleA := createTempLog(t, tempDir, "codeagent-wrapper-2100.log")
+	basePID := os.Getpid()
-	staleB := createTempLog(t, tempDir, "codeagent-wrapper-2200-extra.log")
+	stalePID1 := basePID + 10000
-	keeper := createTempLog(t, tempDir, "codeagent-wrapper-2300.log")
+	stalePID2 := basePID + 11000
 	keeperPID := basePID + 12000
 	staleA := createTempLog(t, tempDir, fmt.Sprintf("codeagent-wrapper-%d.log", stalePID1))
 	staleB := createTempLog(t, tempDir, fmt.Sprintf("codeagent-wrapper-%d-extra.log", stalePID2))
 	keeper := createTempLog(t, tempDir, fmt.Sprintf("codeagent-wrapper-%d.log", keeperPID))
 	stubProcessRunning(t, func(pid int) bool {
-		return pid == 2300 || pid == os.Getpid()
+		return pid == keeperPID || pid == basePID
 	})
 	stubProcessStartTime(t, func(pid int) time.Time {
-		if pid == 2300 || pid == os.Getpid() {
+		if pid == keeperPID || pid == basePID {
 			return time.Now().Add(-1 * time.Hour)
 		}
 		return time.Time{}
@@ -868,10 +869,10 @@ func TestRunCleanupFlagEndToEnd_Success(t *testing.T) {
 	if !strings.Contains(output, "Files kept: 1") {
 		t.Fatalf("missing 'Files kept: 1' in output: %q", output)
 	}
-	if !strings.Contains(output, "codeagent-wrapper-2100.log") || !strings.Contains(output, "codeagent-wrapper-2200-extra.log") {
+	if !strings.Contains(output, fmt.Sprintf("codeagent-wrapper-%d.log", stalePID1)) || !strings.Contains(output, fmt.Sprintf("codeagent-wrapper-%d-extra.log", stalePID2)) {
 		t.Fatalf("missing deleted file names in output: %q", output)
 	}
-	if !strings.Contains(output, "codeagent-wrapper-2300.log") {
+	if !strings.Contains(output, fmt.Sprintf("codeagent-wrapper-%d.log", keeperPID)) {
 		t.Fatalf("missing kept file names in output: %q", output)
 	}
--- a/codeagent-wrapper/internal/app/main_test.go
+++ b/codeagent-wrapper/internal/app/main_test.go
@@ -643,10 +643,24 @@ func (f *fakeCmd) StdinContents() string {
 func createFakeCodexScript(t *testing.T, threadID, message string) string {
 	t.Helper()
-	scriptPath := filepath.Join(t.TempDir(), "codex.sh")
+	tempDir := t.TempDir()
 	// Add small sleep to ensure parser goroutine has time to read stdout before
 	// the process exits and closes the pipe. This prevents race conditions in CI
 	// where fast shell script execution can close stdout before parsing completes.
 	if runtime.GOOS == "windows" {
 		scriptPath := filepath.Join(tempDir, "codex.bat")
 		script := fmt.Sprintf("@echo off\r\n"+
 			"echo {\"type\":\"thread.started\",\"thread_id\":\"%s\"}\r\n"+
 			"echo {\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"%s\"}}\r\n"+
 			"exit /b 0\r\n", threadID, message)
 		if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil {
 			t.Fatalf("failed to create fake codex script: %v", err)
 		}
 		return scriptPath
 	}
 	scriptPath := filepath.Join(tempDir, "codex.sh")
 	script := fmt.Sprintf(`#!/bin/sh
 printf '%%s\n' '{"type":"thread.started","thread_id":"%s"}'
 printf '%%s\n' '{"type":"item.completed","item":{"type":"agent_message","text":"%s"}}'
@@ -1392,6 +1406,24 @@ func TestBackendParseArgs_PromptFileFlag(t *testing.T) {
 func TestBackendParseArgs_PromptFileOverridesAgent(t *testing.T) {
 	defer resetTestHooks()
 	home := t.TempDir()
 	t.Setenv("HOME", home)
 	t.Setenv("USERPROFILE", home)
 	t.Cleanup(config.ResetModelsConfigCacheForTest)
 	config.ResetModelsConfigCacheForTest()
 	configDir := filepath.Join(home, ".codeagent")
 	if err := os.MkdirAll(configDir, 0o755); err != nil {
 		t.Fatalf("MkdirAll: %v", err)
 	}
 	if err := os.WriteFile(filepath.Join(configDir, "models.json"), []byte(`{
  "agents": {
    "develop": { "backend": "codex", "model": "gpt-test" }
  }
 }`), 0o644); err != nil {
 		t.Fatalf("WriteFile: %v", err)
 	}
 	os.Args = []string{"codeagent-wrapper", "--prompt-file", "/tmp/custom.md", "--agent", "develop", "task"}
 	cfg, err := parseArgs()
 	if err != nil {
@@ -1584,6 +1616,60 @@ do something`
 	}
 }
 func TestParallelParseConfig_Worktree(t *testing.T) {
 	input := `---TASK---
 id: task-1
 worktree: true
 ---CONTENT---
 do something`
 	cfg, err := parseParallelConfig([]byte(input))
 	if err != nil {
 		t.Fatalf("parseParallelConfig() unexpected error: %v", err)
 	}
 	if len(cfg.Tasks) != 1 {
 		t.Fatalf("expected 1 task, got %d", len(cfg.Tasks))
 	}
 	task := cfg.Tasks[0]
 	if !task.Worktree {
 		t.Fatalf("Worktree = %v, want true", task.Worktree)
 	}
 }
 func TestParallelParseConfig_WorktreeBooleanValue(t *testing.T) {
 	tests := []struct {
 		name  string
 		value string
 		want  bool
 	}{
 		{"true", "true", true},
 		{"1", "1", true},
 		{"yes", "yes", true},
 		{"false", "false", false},
 		{"0", "0", false},
 		{"no", "no", false},
 		{"empty", "", true},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			input := fmt.Sprintf(`---TASK---
 id: task-1
 worktree: %s
 ---CONTENT---
 do something`, tt.value)
 			cfg, err := parseParallelConfig([]byte(input))
 			if err != nil {
 				t.Fatalf("parseParallelConfig() unexpected error: %v", err)
 			}
 			if cfg.Tasks[0].Worktree != tt.want {
 				t.Fatalf("Worktree = %v, want %v for value %q", cfg.Tasks[0].Worktree, tt.want, tt.value)
 			}
 		})
 	}
 }
 func TestParallelParseConfig_EmptySessionID(t *testing.T) {
 	input := `---TASK---
 id: task-1
@@ -1916,7 +2002,7 @@ func TestRun_PassesReasoningEffortToTaskSpec(t *testing.T) {
 func TestRun_NoOutputMessage_ReturnsExitCode1AndWritesStderr(t *testing.T) {
 	defer resetTestHooks()
 	cleanupLogsFn = func() (CleanupStats, error) { return CleanupStats{}, nil }
-	t.Setenv("TMPDIR", t.TempDir())
+	setTempDirEnv(t, t.TempDir())
 	selectBackendFn = func(name string) (Backend, error) {
 		return testBackend{name: name, command: "echo"}, nil
@@ -2067,8 +2153,7 @@ func TestRunBuildCodexArgs_ResumeMode_EmptySessionHandledGracefully(t *testing.T
 func TestRunBuildCodexArgs_BypassSandboxEnvTrue(t *testing.T) {
 	defer resetTestHooks()
-	tempDir := t.TempDir()
+	setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logger, err := NewLogger()
 	if err != nil {
@@ -2712,8 +2797,7 @@ func TestTailBufferWrite(t *testing.T) {
 func TestRunLogFunctions(t *testing.T) {
 	defer resetTestHooks()
-	tempDir := t.TempDir()
+	setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logger, err := NewLogger()
 	if err != nil {
@@ -2760,8 +2844,7 @@ func TestLoggerLogDropOnDone(t *testing.T) {
 func TestLoggerLogAfterClose(t *testing.T) {
 	defer resetTestHooks()
-	tempDir := t.TempDir()
+	setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logger, err := NewLogger()
 	if err != nil {
@@ -2924,13 +3007,10 @@ func TestRunCodexTask_StartError(t *testing.T) {
 func TestRunCodexTask_WithEcho(t *testing.T) {
 	defer resetTestHooks()
-	codexCommand = "echo"
+	codexCommand = createFakeCodexScript(t, "test-session", "Test output")
-	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{targetArg} }
+	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{} }
-	jsonOutput := `{"type":"thread.started","thread_id":"test-session"}
+	res := runCodexTask(TaskSpec{Task: "ignored"}, false, 10)
 {"type":"item.completed","item":{"type":"agent_message","text":"Test output"}}`
 	res := runCodexTask(TaskSpec{Task: jsonOutput}, false, 10)
 	if res.ExitCode != 0 || res.Message != "Test output" || res.SessionID != "test-session" {
 		t.Fatalf("unexpected result: %+v", res)
 	}
@@ -3010,13 +3090,10 @@ func TestRunCodexTask_LogPathWithActiveLogger(t *testing.T) {
 	}
 	setLogger(logger)
-	codexCommand = "echo"
+	codexCommand = createFakeCodexScript(t, "fake-thread", "ok")
-	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{targetArg} }
+	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{} }
-	jsonOutput := `{"type":"thread.started","thread_id":"fake-thread"}
+	result := runCodexTask(TaskSpec{Task: "ignored"}, false, 5)
 {"type":"item.completed","item":{"type":"agent_message","text":"ok"}}`
 	result := runCodexTask(TaskSpec{Task: jsonOutput}, false, 5)
 	if result.LogPath != logger.Path() {
 		t.Fatalf("LogPath = %q, want %q", result.LogPath, logger.Path())
 	}
@@ -3028,13 +3105,10 @@ func TestRunCodexTask_LogPathWithActiveLogger(t *testing.T) {
 func TestRunCodexTask_LogPathWithTempLogger(t *testing.T) {
 	defer resetTestHooks()
-	codexCommand = "echo"
+	codexCommand = createFakeCodexScript(t, "temp-thread", "temp")
-	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{targetArg} }
+	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{} }
-	jsonOutput := `{"type":"thread.started","thread_id":"temp-thread"}
+	result := runCodexTask(TaskSpec{Task: "ignored"}, true, 5)
 {"type":"item.completed","item":{"type":"agent_message","text":"temp"}}`
 	result := runCodexTask(TaskSpec{Task: jsonOutput}, true, 5)
 	t.Cleanup(func() {
 		if result.LogPath != "" {
 			os.Remove(result.LogPath)
@@ -3080,10 +3154,19 @@ func TestRunCodexTask_LogPathOnStartError(t *testing.T) {
 func TestRunCodexTask_NoMessage(t *testing.T) {
 	defer resetTestHooks()
-	codexCommand = "echo"
+
-	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{targetArg} }
+	fake := newFakeCmd(fakeCmdConfig{
-	jsonOutput := `{"type":"thread.started","thread_id":"test-session"}`
+		StdoutPlan: []fakeStdoutEvent{
-	res := runCodexTask(TaskSpec{Task: jsonOutput}, false, 10)
+			{Data: `{"type":"thread.started","thread_id":"test-session"}` + "\n"},
 		},
 		WaitDelay: 5 * time.Millisecond,
 	})
 	restore := executor.SetNewCommandRunner(func(ctx context.Context, name string, args ...string) executor.CommandRunner { return fake })
 	t.Cleanup(restore)
 	codexCommand = "fake-cmd"
 	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{} }
 	res := runCodexTask(TaskSpec{Task: "ignored"}, false, 10)
 	if res.ExitCode != 1 || res.Error == "" {
 		t.Fatalf("expected error for missing agent_message, got %+v", res)
 	}
@@ -3208,20 +3291,36 @@ func TestRunCodexProcess(t *testing.T) {
 func TestRunSilentMode(t *testing.T) {
 	defer resetTestHooks()
 	tmpDir := t.TempDir()
 	setTempDirEnv(t, tmpDir)
 	jsonOutput := `{"type":"thread.started","thread_id":"silent-session"}
 {"type":"item.completed","item":{"type":"agent_message","text":"quiet"}}`
-	codexCommand = "echo"
+	codexCommand = "fake-cmd"
 	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{targetArg} }
 	_ = executor.SetNewCommandRunner(func(ctx context.Context, name string, args ...string) executor.CommandRunner {
 		return newFakeCmd(fakeCmdConfig{
 			StdoutPlan: []fakeStdoutEvent{{Data: jsonOutput + "\n"}},
 		})
 	})
 	capture := func(silent bool) string {
 		oldStderr := os.Stderr
-		r, w, _ := os.Pipe()
+		r, w, err := os.Pipe()
-		os.Stderr = w
+		if err != nil {
-		res := runCodexTask(TaskSpec{Task: jsonOutput}, silent, 10)
+			t.Fatalf("os.Pipe() error = %v", err)
 		if res.ExitCode != 0 {
 			t.Fatalf("unexpected exitCode %d", res.ExitCode)
 		}
-		w.Close()
+		os.Stderr = w
 		defer func() {
 			os.Stderr = oldStderr
 			_ = w.Close()
 			_ = r.Close()
 		}()
 		res := runCodexTask(TaskSpec{Task: "ignored"}, silent, 10)
 		if res.ExitCode != 0 {
 			t.Fatalf("unexpected exitCode %d: %s", res.ExitCode, res.Error)
 		}
 		_ = w.Close()
 		os.Stderr = oldStderr
 		var buf bytes.Buffer
 		if _, err := io.Copy(&buf, r); err != nil {
@@ -3579,6 +3678,7 @@ do two`)
 }
 func TestParallelFlag(t *testing.T) {
 	defer resetTestHooks()
 	oldArgs := os.Args
 	defer func() { os.Args = oldArgs }()
@@ -3588,14 +3688,10 @@ id: T1
 ---CONTENT---
 test`
 	stdinReader = strings.NewReader(jsonInput)
 	defer func() { stdinReader = os.Stdin }()
 	runCodexTaskFn = func(task TaskSpec, timeout int) TaskResult {
 		return TaskResult{TaskID: task.ID, ExitCode: 0, Message: "test output"}
 	}
 	defer func() {
 		runCodexTaskFn = func(task TaskSpec, timeout int) TaskResult { return runCodexTask(task, true, timeout) }
 	}()
 	exitCode := run()
 	if exitCode != 0 {
@@ -4211,8 +4307,7 @@ func TestRun_ExplicitStdinEmpty(t *testing.T) {
 func TestRun_ExplicitStdinReadError(t *testing.T) {
 	defer resetTestHooks()
-	tempDir := t.TempDir()
+	tempDir := setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logPath := filepath.Join(tempDir, fmt.Sprintf("codeagent-wrapper-%d.log", os.Getpid()))
 	var logOutput string
@@ -4308,8 +4403,7 @@ func TestRun_ExplicitStdinSuccess(t *testing.T) {
 func TestRun_PipedTaskReadError(t *testing.T) {
 	defer resetTestHooks()
-	tempDir := t.TempDir()
+	tempDir := setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logPath := filepath.Join(tempDir, fmt.Sprintf("codeagent-wrapper-%d.log", os.Getpid()))
 	var logOutput string
@@ -4362,8 +4456,7 @@ func TestRun_PipedTaskSuccess(t *testing.T) {
 func TestRun_LoggerLifecycle(t *testing.T) {
 	defer resetTestHooks()
-	tempDir := t.TempDir()
+	tempDir := setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logPath := filepath.Join(tempDir, fmt.Sprintf("codeagent-wrapper-%d.log", os.Getpid()))
 	stdout := captureStdoutPipe()
@@ -4411,8 +4504,7 @@ func TestRun_LoggerRemovedOnSignal(t *testing.T) {
 	// Set shorter delays for faster test
 	_ = executor.SetForceKillDelay(1)
-	tempDir := t.TempDir()
+	tempDir := setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logPath := filepath.Join(tempDir, fmt.Sprintf("codeagent-wrapper-%d.log", os.Getpid()))
 	scriptPath := filepath.Join(tempDir, "sleepy-codex.sh")
@@ -4466,10 +4558,8 @@ func TestRun_CleanupHookAlwaysCalled(t *testing.T) {
 	called := false
 	cleanupHook = func() { called = true }
 	// Use a command that goes through normal flow, not --version which returns early
-	restore := withBackend("echo", func(cfg *Config, targetArg string) []string {
+	scriptPath := createFakeCodexScript(t, "x", "ok")
-		return []string{`{"type":"thread.started","thread_id":"x"}
+	restore := withBackend(scriptPath, func(cfg *Config, targetArg string) []string { return []string{} })
 {"type":"item.completed","item":{"type":"agent_message","text":"ok"}}`}
 	})
 	defer restore()
 	os.Args = []string{"codeagent-wrapper", "task"}
 	if exitCode := run(); exitCode != 0 {
@@ -4696,16 +4786,13 @@ func TestBackendRunCoverage(t *testing.T) {
 func TestParallelLogPathInSerialMode(t *testing.T) {
 	defer resetTestHooks()
-	tempDir := t.TempDir()
+	tempDir := setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	os.Args = []string{"codeagent-wrapper", "do-stuff"}
 	stdinReader = strings.NewReader("")
 	isTerminalFn = func() bool { return true }
-	codexCommand = "echo"
+	codexCommand = createFakeCodexScript(t, "cli-session", "ok")
-	buildCodexArgsFn = func(cfg *Config, targetArg string) []string {
+	buildCodexArgsFn = func(cfg *Config, targetArg string) []string { return []string{} }
 		return []string{`{"type":"thread.started","thread_id":"cli-session"}` + "\n" + `{"type":"item.completed","item":{"type":"agent_message","text":"ok"}}`}
 	}
 	var exitCode int
 	stderr := captureStderr(t, func() {
@@ -4729,9 +4816,8 @@ func TestRun_CLI_Success(t *testing.T) {
 	stdinReader = strings.NewReader("")
 	isTerminalFn = func() bool { return true }
-	restore := withBackend("echo", func(cfg *Config, targetArg string) []string {
+	scriptPath := createFakeCodexScript(t, "cli-session", "ok")
-		return []string{`{"type":"thread.started","thread_id":"cli-session"}` + "\n" + `{"type":"item.completed","item":{"type":"agent_message","text":"ok"}}`}
+	restore := withBackend(scriptPath, func(cfg *Config, targetArg string) []string { return []string{} })
 	})
 	defer restore()
 	var exitCode int
--- a/codeagent-wrapper/internal/app/os_paths_test.go
+++ b/codeagent-wrapper/internal/app/os_paths_test.go
@@ -0,0 +1,46 @@
 package wrapper
 import (
 	"os"
 	"testing"
 )
 func TestParseArgs_Workdir_OSPaths(t *testing.T) {
 	oldArgv := os.Args
 	t.Cleanup(func() { os.Args = oldArgv })
 	workdirs := []struct {
 		name string
 		path string
 	}{
 		{name: "windows drive forward slashes", path: "D:/repo/path"},
 		{name: "windows drive backslashes", path: `C:\repo\path`},
 		{name: "windows UNC", path: `\\server\share\repo`},
 		{name: "unix absolute", path: "/home/user/repo"},
 		{name: "relative", path: "./relative/repo"},
 	}
 	for _, wd := range workdirs {
 		t.Run("new mode: "+wd.name, func(t *testing.T) {
 			os.Args = []string{"codeagent-wrapper", "task", wd.path}
 			cfg, err := parseArgs()
 			if err != nil {
 				t.Fatalf("parseArgs() error: %v", err)
 			}
 			if cfg.Mode != "new" || cfg.Task != "task" || cfg.WorkDir != wd.path {
 				t.Fatalf("cfg mismatch: got mode=%q task=%q workdir=%q, want mode=%q task=%q workdir=%q", cfg.Mode, cfg.Task, cfg.WorkDir, "new", "task", wd.path)
 			}
 		})
 		t.Run("resume mode: "+wd.name, func(t *testing.T) {
 			os.Args = []string{"codeagent-wrapper", "resume", "sid-1", "task", wd.path}
 			cfg, err := parseArgs()
 			if err != nil {
 				t.Fatalf("parseArgs() error: %v", err)
 			}
 			if cfg.Mode != "resume" || cfg.SessionID != "sid-1" || cfg.Task != "task" || cfg.WorkDir != wd.path {
 				t.Fatalf("cfg mismatch: got mode=%q sid=%q task=%q workdir=%q, want mode=%q sid=%q task=%q workdir=%q", cfg.Mode, cfg.SessionID, cfg.Task, cfg.WorkDir, "resume", "sid-1", "task", wd.path)
 			}
 		})
 	}
 }
--- a/codeagent-wrapper/internal/app/stdin_mode_test.go
+++ b/codeagent-wrapper/internal/app/stdin_mode_test.go
@@ -0,0 +1,119 @@
 package wrapper
 import (
 	"strings"
 	"testing"
 )
 func TestRunSingleMode_UseStdin_TargetArgAndTaskText(t *testing.T) {
 	defer resetTestHooks()
 	setTempDirEnv(t, t.TempDir())
 	logger, err := NewLogger()
 	if err != nil {
 		t.Fatalf("NewLogger(): %v", err)
 	}
 	setLogger(logger)
 	t.Cleanup(func() { _ = closeLogger() })
 	type testCase struct {
 		name       string
 		cfgTask    string
 		explicit   bool
 		stdinData  string
 		isTerminal bool
 		wantUseStdin bool
 		wantTarget   string
 		wantTaskText string
 	}
 	longTask := strings.Repeat("a", 801)
 	tests := []testCase{
 		{
 			name:         "piped input forces stdin mode",
 			cfgTask:      "cli-task",
 			stdinData:    "piped task text",
 			isTerminal:   false,
 			wantUseStdin: true,
 			wantTarget:   "-",
 			wantTaskText: "piped task text",
 		},
 		{
 			name:         "explicit dash forces stdin mode",
 			cfgTask:      "-",
 			explicit:     true,
 			stdinData:    "explicit task text",
 			isTerminal:   true,
 			wantUseStdin: true,
 			wantTarget:   "-",
 			wantTaskText: "explicit task text",
 		},
 		{
 			name:         "special char backslash forces stdin mode",
 			cfgTask:      `C:\repo\file.go`,
 			isTerminal:   true,
 			wantUseStdin: true,
 			wantTarget:   "-",
 			wantTaskText: `C:\repo\file.go`,
 		},
 		{
 			name:         "length>800 forces stdin mode",
 			cfgTask:      longTask,
 			isTerminal:   true,
 			wantUseStdin: true,
 			wantTarget:   "-",
 			wantTaskText: longTask,
 		},
 		{
 			name:         "simple task uses argv target",
 			cfgTask:      "analyze code",
 			isTerminal:   true,
 			wantUseStdin: false,
 			wantTarget:   "analyze code",
 			wantTaskText: "analyze code",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			var gotTarget string
 			buildCodexArgsFn = func(cfg *Config, targetArg string) []string {
 				gotTarget = targetArg
 				return []string{targetArg}
 			}
 			var gotTask TaskSpec
 			runTaskFn = func(task TaskSpec, silent bool, timeout int) TaskResult {
 				gotTask = task
 				return TaskResult{ExitCode: 0, Message: "ok"}
 			}
 			stdinReader = strings.NewReader(tt.stdinData)
 			isTerminalFn = func() bool { return tt.isTerminal }
 			cfg := &Config{
 				Mode:          "new",
 				Task:          tt.cfgTask,
 				WorkDir:       defaultWorkdir,
 				Backend:       defaultBackendName,
 				ExplicitStdin: tt.explicit,
 			}
 			if code := runSingleMode(cfg, "codeagent-wrapper"); code != 0 {
 				t.Fatalf("runSingleMode() = %d, want 0", code)
 			}
 			if gotTarget != tt.wantTarget {
 				t.Fatalf("targetArg = %q, want %q", gotTarget, tt.wantTarget)
 			}
 			if gotTask.UseStdin != tt.wantUseStdin {
 				t.Fatalf("taskSpec.UseStdin = %v, want %v", gotTask.UseStdin, tt.wantUseStdin)
 			}
 			if gotTask.Task != tt.wantTaskText {
 				t.Fatalf("taskSpec.Task = %q, want %q", gotTask.Task, tt.wantTaskText)
 			}
 		})
 	}
 }
--- a/codeagent-wrapper/internal/app/tmpdir.go
+++ b/codeagent-wrapper/internal/app/tmpdir.go
@@ -0,0 +1,134 @@
 package wrapper
 import (
 	"errors"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime"
 	"strings"
 )
 const tmpDirEnvOverrideKey = "CODEAGENT_TMPDIR"
 var tmpDirExecutableCheckFn = canExecuteInDir
 func ensureExecutableTempDir() {
 	// Windows doesn't execute scripts via shebang, and os.TempDir semantics differ.
 	if runtime.GOOS == "windows" {
 		return
 	}
 	if override := strings.TrimSpace(os.Getenv(tmpDirEnvOverrideKey)); override != "" {
 		if resolved, err := resolvePathWithTilde(override); err == nil {
 			if err := os.MkdirAll(resolved, 0o700); err == nil {
 				if ok, _ := tmpDirExecutableCheckFn(resolved); ok {
 					setTempEnv(resolved)
 					return
 				}
 			}
 		}
 		// Invalid override should not block execution; fall back to default behavior.
 	}
 	current := currentTempDirFromEnv()
 	if current == "" {
 		current = "/tmp"
 	}
 	ok, _ := tmpDirExecutableCheckFn(current)
 	if ok {
 		return
 	}
 	fallback := defaultFallbackTempDir()
 	if fallback == "" {
 		return
 	}
 	if err := os.MkdirAll(fallback, 0o700); err != nil {
 		return
 	}
 	if ok, _ := tmpDirExecutableCheckFn(fallback); !ok {
 		return
 	}
 	setTempEnv(fallback)
 	fmt.Fprintf(os.Stderr, "INFO: temp dir is not executable; set TMPDIR=%s\n", fallback)
 }
 func setTempEnv(dir string) {
 	_ = os.Setenv("TMPDIR", dir)
 	_ = os.Setenv("TMP", dir)
 	_ = os.Setenv("TEMP", dir)
 }
 func defaultFallbackTempDir() string {
 	home, err := os.UserHomeDir()
 	if err != nil || strings.TrimSpace(home) == "" {
 		return ""
 	}
 	return filepath.Clean(filepath.Join(home, ".codeagent", "tmp"))
 }
 func currentTempDirFromEnv() string {
 	for _, k := range []string{"TMPDIR", "TMP", "TEMP"} {
 		if v := strings.TrimSpace(os.Getenv(k)); v != "" {
 			return v
 		}
 	}
 	return ""
 }
 func resolvePathWithTilde(p string) (string, error) {
 	p = strings.TrimSpace(p)
 	if p == "" {
 		return "", errors.New("empty path")
 	}
 	if p == "~" || strings.HasPrefix(p, "~/") || strings.HasPrefix(p, "~\\") {
 		home, err := os.UserHomeDir()
 		if err != nil || strings.TrimSpace(home) == "" {
 			if err == nil {
 				err = errors.New("empty home directory")
 			}
 			return "", fmt.Errorf("resolve ~: %w", err)
 		}
 		if p == "~" {
 			return home, nil
 		}
 		return filepath.Clean(home + p[1:]), nil
 	}
 	return filepath.Clean(p), nil
 }
 func canExecuteInDir(dir string) (bool, error) {
 	dir = strings.TrimSpace(dir)
 	if dir == "" {
 		return false, errors.New("empty dir")
 	}
 	f, err := os.CreateTemp(dir, "codeagent-tmp-exec-*")
 	if err != nil {
 		return false, err
 	}
 	path := f.Name()
 	defer func() { _ = os.Remove(path) }()
 	if _, err := f.WriteString("#!/bin/sh\nexit 0\n"); err != nil {
 		_ = f.Close()
 		return false, err
 	}
 	if err := f.Close(); err != nil {
 		return false, err
 	}
 	if err := os.Chmod(path, 0o700); err != nil {
 		return false, err
 	}
 	if err := exec.Command(path).Run(); err != nil {
 		return false, err
 	}
 	return true, nil
 }
--- a/codeagent-wrapper/internal/app/tmpdir_test.go
+++ b/codeagent-wrapper/internal/app/tmpdir_test.go
@@ -0,0 +1,103 @@
 package wrapper
 import (
 	"os"
 	"path/filepath"
 	"runtime"
 	"testing"
 )
 func TestEnsureExecutableTempDir_Override(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("ensureExecutableTempDir is no-op on Windows")
 	}
 	restore := captureTempEnv()
 	t.Cleanup(restore)
 	t.Setenv("HOME", t.TempDir())
 	t.Setenv("USERPROFILE", os.Getenv("HOME"))
 	orig := tmpDirExecutableCheckFn
 	tmpDirExecutableCheckFn = func(string) (bool, error) { return true, nil }
 	t.Cleanup(func() { tmpDirExecutableCheckFn = orig })
 	override := filepath.Join(t.TempDir(), "mytmp")
 	t.Setenv(tmpDirEnvOverrideKey, override)
 	ensureExecutableTempDir()
 	if got := os.Getenv("TMPDIR"); got != override {
 		t.Fatalf("TMPDIR=%q, want %q", got, override)
 	}
 	if got := os.Getenv("TMP"); got != override {
 		t.Fatalf("TMP=%q, want %q", got, override)
 	}
 	if got := os.Getenv("TEMP"); got != override {
 		t.Fatalf("TEMP=%q, want %q", got, override)
 	}
 	if st, err := os.Stat(override); err != nil || !st.IsDir() {
 		t.Fatalf("override dir not created: stat=%v err=%v", st, err)
 	}
 }
 func TestEnsureExecutableTempDir_FallbackWhenCurrentNotExecutable(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("ensureExecutableTempDir is no-op on Windows")
 	}
 	restore := captureTempEnv()
 	t.Cleanup(restore)
 	home := t.TempDir()
 	t.Setenv("HOME", home)
 	t.Setenv("USERPROFILE", home)
 	cur := filepath.Join(t.TempDir(), "cur-tmp")
 	if err := os.MkdirAll(cur, 0o700); err != nil {
 		t.Fatal(err)
 	}
 	t.Setenv("TMPDIR", cur)
 	fallback := filepath.Join(home, ".codeagent", "tmp")
 	orig := tmpDirExecutableCheckFn
 	tmpDirExecutableCheckFn = func(dir string) (bool, error) {
 		if filepath.Clean(dir) == filepath.Clean(cur) {
 			return false, nil
 		}
 		if filepath.Clean(dir) == filepath.Clean(fallback) {
 			return true, nil
 		}
 		return true, nil
 	}
 	t.Cleanup(func() { tmpDirExecutableCheckFn = orig })
 	ensureExecutableTempDir()
 	if got := os.Getenv("TMPDIR"); filepath.Clean(got) != filepath.Clean(fallback) {
 		t.Fatalf("TMPDIR=%q, want %q", got, fallback)
 	}
 	if st, err := os.Stat(fallback); err != nil || !st.IsDir() {
 		t.Fatalf("fallback dir not created: stat=%v err=%v", st, err)
 	}
 }
 func captureTempEnv() func() {
 	type entry struct {
 		set bool
 		val string
 	}
 	snapshot := make(map[string]entry, 3)
 	for _, k := range []string{"TMPDIR", "TMP", "TEMP"} {
 		v, ok := os.LookupEnv(k)
 		snapshot[k] = entry{set: ok, val: v}
 	}
 	return func() {
 		for k, e := range snapshot {
 			if !e.set {
 				_ = os.Unsetenv(k)
 				continue
 			}
 			_ = os.Setenv(k, e.val)
 		}
 	}
 }
--- a/codeagent-wrapper/internal/backend/claude.go
+++ b/codeagent-wrapper/internal/backend/claude.go
@@ -25,7 +25,8 @@ func (ClaudeBackend) Env(baseURL, apiKey string) map[string]string {
 		env["ANTHROPIC_BASE_URL"] = baseURL
 	}
 	if apiKey != "" {
-		env["ANTHROPIC_AUTH_TOKEN"] = apiKey
+		// Claude Code CLI uses ANTHROPIC_API_KEY for API-key based auth.
 		env["ANTHROPIC_API_KEY"] = apiKey
 	}
 	return env
 }
@@ -133,6 +134,15 @@ func buildClaudeArgs(cfg *config.Config, targetArg string) []string {
 		}
 	}
 	if len(cfg.AllowedTools) > 0 {
 		args = append(args, "--allowedTools")
 		args = append(args, cfg.AllowedTools...)
 	}
 	if len(cfg.DisallowedTools) > 0 {
 		args = append(args, "--disallowedTools")
 		args = append(args, cfg.DisallowedTools...)
 	}
 	args = append(args, "--output-format", "stream-json", "--verbose", targetArg)
 	return args
--- a/codeagent-wrapper/internal/backend/codex_paths_test.go
+++ b/codeagent-wrapper/internal/backend/codex_paths_test.go
@@ -0,0 +1,54 @@
 package backend
 import (
 	"reflect"
 	"testing"
 	config "codeagent-wrapper/internal/config"
 )
 func TestBuildCodexArgs_Workdir_OSPaths(t *testing.T) {
 	t.Setenv("CODEX_BYPASS_SANDBOX", "false")
 	tests := []struct {
 		name    string
 		workdir string
 	}{
 		{name: "windows drive forward slashes", workdir: "D:/repo/path"},
 		{name: "windows drive backslashes", workdir: `C:\repo\path`},
 		{name: "windows UNC", workdir: `\\server\share\repo`},
 		{name: "unix absolute", workdir: "/home/user/repo"},
 		{name: "relative", workdir: "./relative/repo"},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			cfg := &config.Config{Mode: "new", WorkDir: tt.workdir}
 			got := BuildCodexArgs(cfg, "task")
 			want := []string{"e", "--skip-git-repo-check", "-C", tt.workdir, "--json", "task"}
 			if !reflect.DeepEqual(got, want) {
 				t.Fatalf("BuildCodexArgs() = %v, want %v", got, want)
 			}
 		})
 	}
 	t.Run("new mode stdin target uses dash", func(t *testing.T) {
 		cfg := &config.Config{Mode: "new", WorkDir: `C:\repo\path`}
 		got := BuildCodexArgs(cfg, "-")
 		want := []string{"e", "--skip-git-repo-check", "-C", `C:\repo\path`, "--json", "-"}
 		if !reflect.DeepEqual(got, want) {
 			t.Fatalf("BuildCodexArgs() = %v, want %v", got, want)
 		}
 	})
 }
 func TestBuildCodexArgs_ResumeMode_OmitsWorkdir(t *testing.T) {
 	t.Setenv("CODEX_BYPASS_SANDBOX", "false")
 	cfg := &config.Config{Mode: "resume", SessionID: "sid-123", WorkDir: `C:\repo\path`}
 	got := BuildCodexArgs(cfg, "-")
 	want := []string{"e", "--skip-git-repo-check", "--json", "resume", "sid-123", "-"}
 	if !reflect.DeepEqual(got, want) {
 		t.Fatalf("BuildCodexArgs() = %v, want %v", got, want)
 	}
 }
--- a/codeagent-wrapper/internal/config/agent.go
+++ b/codeagent-wrapper/internal/config/agent.go
@@ -7,8 +7,6 @@ import (
 	"strings"
 	"sync"
 	ilogger "codeagent-wrapper/internal/logger"
 	"github.com/goccy/go-json"
 )
@@ -18,14 +16,16 @@ type BackendConfig struct {
 }
 type AgentModelConfig struct {
-	Backend     string `json:"backend"`
+	Backend         string   `json:"backend"`
-	Model       string `json:"model"`
+	Model           string   `json:"model"`
-	PromptFile  string `json:"prompt_file,omitempty"`
+	PromptFile      string   `json:"prompt_file,omitempty"`
-	Description string `json:"description,omitempty"`
+	Description     string   `json:"description,omitempty"`
-	Yolo        bool   `json:"yolo,omitempty"`
+	Yolo            bool     `json:"yolo,omitempty"`
-	Reasoning   string `json:"reasoning,omitempty"`
+	Reasoning       string   `json:"reasoning,omitempty"`
-	BaseURL     string `json:"base_url,omitempty"`
+	BaseURL         string   `json:"base_url,omitempty"`
-	APIKey      string `json:"api_key,omitempty"`
+	APIKey          string   `json:"api_key,omitempty"`
 	AllowedTools    []string `json:"allowed_tools,omitempty"`
 	DisallowedTools []string `json:"disallowed_tools,omitempty"`
 }
 type ModelsConfig struct {
@@ -35,80 +35,85 @@ type ModelsConfig struct {
 	Backends       map[string]BackendConfig    `json:"backends,omitempty"`
 }
-var defaultModelsConfig = ModelsConfig{
+var defaultModelsConfig = ModelsConfig{}
-	DefaultBackend: "opencode",
+
-	DefaultModel:   "opencode/grok-code",
+const modelsConfigTildePath = "~/.codeagent/models.json"
-	Agents: map[string]AgentModelConfig{
+
-		"oracle":                  {Backend: "claude", Model: "claude-opus-4-5-20251101", PromptFile: "~/.claude/skills/omo/references/oracle.md", Description: "Technical advisor"},
+const modelsConfigExample = `{
-		"librarian":               {Backend: "claude", Model: "claude-sonnet-4-5-20250929", PromptFile: "~/.claude/skills/omo/references/librarian.md", Description: "Researcher"},
+  "default_backend": "codex",
-		"explore":                 {Backend: "opencode", Model: "opencode/grok-code", PromptFile: "~/.claude/skills/omo/references/explore.md", Description: "Code search"},
+  "default_model": "gpt-4.1",
-		"develop":                 {Backend: "codex", Model: "", PromptFile: "~/.claude/skills/omo/references/develop.md", Description: "Code development"},
+  "backends": {
-		"frontend-ui-ux-engineer": {Backend: "gemini", Model: "", PromptFile: "~/.claude/skills/omo/references/frontend-ui-ux-engineer.md", Description: "Frontend engineer"},
+    "codex": { "api_key": "..." },
-		"document-writer":         {Backend: "gemini", Model: "", PromptFile: "~/.claude/skills/omo/references/document-writer.md", Description: "Documentation"},
+    "claude": { "api_key": "..." }
-	},
+  },
-}
+  "agents": {
    "develop": {
      "backend": "codex",
      "model": "gpt-4.1",
      "prompt_file": "~/.codeagent/prompts/develop.md",
      "reasoning": "high",
      "yolo": true
    }
  }
 }`
 var (
 	modelsConfigOnce   sync.Once
 	modelsConfigCached *ModelsConfig
 	modelsConfigErr    error
 )
-func modelsConfig() *ModelsConfig {
+func modelsConfig() (*ModelsConfig, error) {
 	modelsConfigOnce.Do(func() {
-		modelsConfigCached = loadModelsConfig()
+		modelsConfigCached, modelsConfigErr = loadModelsConfig()
 	})
-	if modelsConfigCached == nil {
+	return modelsConfigCached, modelsConfigErr
 		return &defaultModelsConfig
 	}
 	return modelsConfigCached
 }
-func loadModelsConfig() *ModelsConfig {
+func modelsConfigPath() (string, error) {
 	home, err := os.UserHomeDir()
-	if err != nil {
+	if err != nil || strings.TrimSpace(home) == "" {
-		ilogger.LogWarn(fmt.Sprintf("Failed to resolve home directory for models config: %v; using defaults", err))
+		return "", fmt.Errorf("failed to resolve user home directory: %w", err)
 		return &defaultModelsConfig
 	}
 	configDir := filepath.Clean(filepath.Join(home, ".codeagent"))
 	configPath := filepath.Clean(filepath.Join(configDir, "models.json"))
 	rel, err := filepath.Rel(configDir, configPath)
 	if err != nil || rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
-		return &defaultModelsConfig
+		return "", fmt.Errorf("refusing to read models config outside %s: %s", configDir, configPath)
 	}
 	return configPath, nil
 }
 func modelsConfigHint(configPath string) string {
 	configPath = strings.TrimSpace(configPath)
 	if configPath == "" {
 		return fmt.Sprintf("Create %s with e.g.:\n%s", modelsConfigTildePath, modelsConfigExample)
 	}
 	return fmt.Sprintf("Create %s (resolved to %s) with e.g.:\n%s", modelsConfigTildePath, configPath, modelsConfigExample)
 }
 func loadModelsConfig() (*ModelsConfig, error) {
 	configPath, err := modelsConfigPath()
 	if err != nil {
 		return nil, fmt.Errorf("%w\n\n%s", err, modelsConfigHint(""))
 	}
 	data, err := os.ReadFile(configPath) // #nosec G304 -- path is fixed under user home and validated to stay within configDir
 	if err != nil {
-		if !os.IsNotExist(err) {
+		if os.IsNotExist(err) {
-			ilogger.LogWarn(fmt.Sprintf("Failed to read models config %s: %v; using defaults", configPath, err))
+			return nil, fmt.Errorf("models config not found: %s\n\n%s", configPath, modelsConfigHint(configPath))
 		}
-		return &defaultModelsConfig
+		return nil, fmt.Errorf("failed to read models config %s: %w\n\n%s", configPath, err, modelsConfigHint(configPath))
 	}
 	var cfg ModelsConfig
 	if err := json.Unmarshal(data, &cfg); err != nil {
-		ilogger.LogWarn(fmt.Sprintf("Failed to parse models config %s: %v; using defaults", configPath, err))
+		return nil, fmt.Errorf("failed to parse models config %s: %w\n\n%s", configPath, err, modelsConfigHint(configPath))
 		return &defaultModelsConfig
 	}
 	cfg.DefaultBackend = strings.TrimSpace(cfg.DefaultBackend)
 	if cfg.DefaultBackend == "" {
 		cfg.DefaultBackend = defaultModelsConfig.DefaultBackend
 	}
 	cfg.DefaultModel = strings.TrimSpace(cfg.DefaultModel)
 	if cfg.DefaultModel == "" {
 		cfg.DefaultModel = defaultModelsConfig.DefaultModel
 	}
 	// Merge with defaults
 	for name, agent := range defaultModelsConfig.Agents {
 		if _, exists := cfg.Agents[name]; !exists {
 			if cfg.Agents == nil {
 				cfg.Agents = make(map[string]AgentModelConfig)
 			}
 			cfg.Agents[name] = agent
 		}
 	}
 	// Normalize backend keys so lookups can be case-insensitive.
 	if len(cfg.Backends) > 0 {
@@ -127,7 +132,7 @@ func loadModelsConfig() *ModelsConfig {
 		}
 	}
-	return &cfg
+	return &cfg, nil
 }
 func LoadDynamicAgent(name string) (AgentModelConfig, bool) {
@@ -150,7 +155,10 @@ func LoadDynamicAgent(name string) (AgentModelConfig, bool) {
 }
 func ResolveBackendConfig(backendName string) (baseURL, apiKey string) {
-	cfg := modelsConfig()
+	cfg, err := modelsConfig()
 	if err != nil || cfg == nil {
 		return "", ""
 	}
 	resolved := resolveBackendConfig(cfg, backendName)
 	return strings.TrimSpace(resolved.BaseURL), strings.TrimSpace(resolved.APIKey)
 }
@@ -172,12 +180,30 @@ func resolveBackendConfig(cfg *ModelsConfig, backendName string) BackendConfig {
 	return BackendConfig{}
 }
-func resolveAgentConfig(agentName string) (backend, model, promptFile, reasoning, baseURL, apiKey string, yolo bool) {
+func resolveAgentConfig(agentName string) (backend, model, promptFile, reasoning, baseURL, apiKey string, yolo bool, allowedTools, disallowedTools []string, err error) {
-	cfg := modelsConfig()
+	if err := ValidateAgentName(agentName); err != nil {
 		return "", "", "", "", "", "", false, nil, nil, err
 	}
 	cfg, err := modelsConfig()
 	if err != nil {
 		return "", "", "", "", "", "", false, nil, nil, err
 	}
 	if cfg == nil {
 		return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("models config is nil\n\n%s", modelsConfigHint(""))
 	}
 	if agent, ok := cfg.Agents[agentName]; ok {
 		backend = strings.TrimSpace(agent.Backend)
 		if backend == "" {
-			backend = cfg.DefaultBackend
+			backend = strings.TrimSpace(cfg.DefaultBackend)
 			if backend == "" {
 				configPath, pathErr := modelsConfigPath()
 				if pathErr != nil {
 					return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("agent %q has empty backend and default_backend is not set\n\n%s", agentName, modelsConfigHint(""))
 				}
 				return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("agent %q has empty backend and default_backend is not set\n\n%s", agentName, modelsConfigHint(configPath))
 			}
 		}
 		backendCfg := resolveBackendConfig(cfg, backend)
@@ -190,31 +216,46 @@ func resolveAgentConfig(agentName string) (backend, model, promptFile, reasoning
 			apiKey = strings.TrimSpace(backendCfg.APIKey)
 		}
-		return backend, strings.TrimSpace(agent.Model), agent.PromptFile, agent.Reasoning, baseURL, apiKey, agent.Yolo
+		model = strings.TrimSpace(agent.Model)
 		if model == "" {
 			configPath, pathErr := modelsConfigPath()
 			if pathErr != nil {
 				return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("agent %q has empty model; set agents.%s.model in %s\n\n%s", agentName, agentName, modelsConfigTildePath, modelsConfigHint(""))
 			}
 			return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("agent %q has empty model; set agents.%s.model in %s\n\n%s", agentName, agentName, modelsConfigTildePath, modelsConfigHint(configPath))
 		}
 		return backend, model, agent.PromptFile, agent.Reasoning, baseURL, apiKey, agent.Yolo, agent.AllowedTools, agent.DisallowedTools, nil
 	}
 	if dynamic, ok := LoadDynamicAgent(agentName); ok {
-		backend = cfg.DefaultBackend
+		backend = strings.TrimSpace(cfg.DefaultBackend)
-		model = cfg.DefaultModel
+		model = strings.TrimSpace(cfg.DefaultModel)
 		configPath, pathErr := modelsConfigPath()
 		if backend == "" || model == "" {
 			if pathErr != nil {
 				return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("dynamic agent %q requires default_backend and default_model to be set in %s\n\n%s", agentName, modelsConfigTildePath, modelsConfigHint(""))
 			}
 			return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("dynamic agent %q requires default_backend and default_model to be set in %s\n\n%s", agentName, modelsConfigTildePath, modelsConfigHint(configPath))
 		}
 		backendCfg := resolveBackendConfig(cfg, backend)
 		baseURL = strings.TrimSpace(backendCfg.BaseURL)
 		apiKey = strings.TrimSpace(backendCfg.APIKey)
-		return backend, model, dynamic.PromptFile, "", baseURL, apiKey, false
+		return backend, model, dynamic.PromptFile, "", baseURL, apiKey, false, nil, nil, nil
 	}
-	backend = cfg.DefaultBackend
+	configPath, pathErr := modelsConfigPath()
-	model = cfg.DefaultModel
+	if pathErr != nil {
-	backendCfg := resolveBackendConfig(cfg, backend)
+		return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("agent %q not found in %s\n\n%s", agentName, modelsConfigTildePath, modelsConfigHint(""))
-	baseURL = strings.TrimSpace(backendCfg.BaseURL)
+	}
-	apiKey = strings.TrimSpace(backendCfg.APIKey)
+	return "", "", "", "", "", "", false, nil, nil, fmt.Errorf("agent %q not found in %s\n\n%s", agentName, modelsConfigTildePath, modelsConfigHint(configPath))
 	return backend, model, "", "", baseURL, apiKey, false
 }
-func ResolveAgentConfig(agentName string) (backend, model, promptFile, reasoning, baseURL, apiKey string, yolo bool) {
+func ResolveAgentConfig(agentName string) (backend, model, promptFile, reasoning, baseURL, apiKey string, yolo bool, allowedTools, disallowedTools []string, err error) {
 	return resolveAgentConfig(agentName)
 }
 func ResetModelsConfigCacheForTest() {
 	modelsConfigCached = nil
 	modelsConfigErr = nil
 	modelsConfigOnce = sync.Once{}
 }
--- a/codeagent-wrapper/internal/config/agent_config_test.go
+++ b/codeagent-wrapper/internal/config/agent_config_test.go
@@ -3,78 +3,43 @@ package config
 import (
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 )
-func TestResolveAgentConfig_Defaults(t *testing.T) {
+func TestResolveAgentConfig_NoConfig_ReturnsHelpfulError(t *testing.T) {
 	home := t.TempDir()
 	t.Setenv("HOME", home)
 	t.Setenv("USERPROFILE", home)
 	t.Cleanup(ResetModelsConfigCacheForTest)
 	ResetModelsConfigCacheForTest()
-	// Test that default agents resolve correctly without config file
+	_, _, _, _, _, _, _, _, _, err := ResolveAgentConfig("develop")
-	tests := []struct {
+	if err == nil {
-		agent          string
+		t.Fatalf("expected error, got nil")
 		wantBackend    string
 		wantModel      string
 		wantPromptFile string
 	}{
 		{"oracle", "claude", "claude-opus-4-5-20251101", "~/.claude/skills/omo/references/oracle.md"},
 		{"librarian", "claude", "claude-sonnet-4-5-20250929", "~/.claude/skills/omo/references/librarian.md"},
 		{"explore", "opencode", "opencode/grok-code", "~/.claude/skills/omo/references/explore.md"},
 		{"frontend-ui-ux-engineer", "gemini", "", "~/.claude/skills/omo/references/frontend-ui-ux-engineer.md"},
 		{"document-writer", "gemini", "", "~/.claude/skills/omo/references/document-writer.md"},
 	}
-
+	msg := err.Error()
-	for _, tt := range tests {
+	if !strings.Contains(msg, modelsConfigTildePath) {
-		t.Run(tt.agent, func(t *testing.T) {
+		t.Fatalf("error should mention %s, got: %s", modelsConfigTildePath, msg)
 			backend, model, promptFile, _, _, _, _ := resolveAgentConfig(tt.agent)
 			if backend != tt.wantBackend {
 				t.Errorf("backend = %q, want %q", backend, tt.wantBackend)
 			}
 			if model != tt.wantModel {
 				t.Errorf("model = %q, want %q", model, tt.wantModel)
 			}
 			if promptFile != tt.wantPromptFile {
 				t.Errorf("promptFile = %q, want %q", promptFile, tt.wantPromptFile)
 			}
 		})
 	}
-}
+	if !strings.Contains(msg, filepath.Join(home, ".codeagent", "models.json")) {
-
+		t.Fatalf("error should mention resolved config path, got: %s", msg)
 func TestResolveAgentConfig_UnknownAgent(t *testing.T) {
 	home := t.TempDir()
 	t.Setenv("HOME", home)
 	t.Setenv("USERPROFILE", home)
 	t.Cleanup(ResetModelsConfigCacheForTest)
 	ResetModelsConfigCacheForTest()
 	backend, model, promptFile, _, _, _, _ := resolveAgentConfig("unknown-agent")
 	if backend != "opencode" {
 		t.Errorf("unknown agent backend = %q, want %q", backend, "opencode")
 	}
-	if model != "opencode/grok-code" {
+	if !strings.Contains(msg, "\"agents\"") {
-		t.Errorf("unknown agent model = %q, want %q", model, "opencode/grok-code")
+		t.Fatalf("error should include example config, got: %s", msg)
 	}
 	if promptFile != "" {
 		t.Errorf("unknown agent promptFile = %q, want empty", promptFile)
 	}
 }
 func TestLoadModelsConfig_NoFile(t *testing.T) {
-	home := "/nonexistent/path/that/does/not/exist"
+	home := t.TempDir()
 	t.Setenv("HOME", home)
 	t.Setenv("USERPROFILE", home)
 	t.Cleanup(ResetModelsConfigCacheForTest)
 	ResetModelsConfigCacheForTest()
-	cfg := loadModelsConfig()
+	_, err := loadModelsConfig()
-	if cfg.DefaultBackend != "opencode" {
+	if err == nil {
-		t.Errorf("DefaultBackend = %q, want %q", cfg.DefaultBackend, "opencode")
+		t.Fatalf("expected error, got nil")
 	}
 	if len(cfg.Agents) != 6 {
 		t.Errorf("len(Agents) = %d, want 6", len(cfg.Agents))
 	}
 }
@@ -119,7 +84,10 @@ func TestLoadModelsConfig_WithFile(t *testing.T) {
 	t.Cleanup(ResetModelsConfigCacheForTest)
 	ResetModelsConfigCacheForTest()
-	cfg := loadModelsConfig()
+	cfg, err := loadModelsConfig()
 	if err != nil {
 		t.Fatalf("loadModelsConfig: %v", err)
 	}
 	if cfg.DefaultBackend != "claude" {
 		t.Errorf("DefaultBackend = %q, want %q", cfg.DefaultBackend, "claude")
@@ -140,9 +108,8 @@ func TestLoadModelsConfig_WithFile(t *testing.T) {
 		}
 	}
-	// Check that defaults are merged
+	if _, ok := cfg.Agents["oracle"]; ok {
-	if _, ok := cfg.Agents["oracle"]; !ok {
+		t.Error("oracle should not be present without explicit config")
 		t.Error("default agent oracle should be merged")
 	}
 	baseURL, apiKey := ResolveBackendConfig("claude")
@@ -153,7 +120,10 @@ func TestLoadModelsConfig_WithFile(t *testing.T) {
 		t.Errorf("ResolveBackendConfig(apiKey) = %q, want %q", apiKey, "backend-key")
 	}
-	backend, model, _, _, agentBaseURL, agentAPIKey, _ := ResolveAgentConfig("custom-agent")
+	backend, model, _, _, agentBaseURL, agentAPIKey, _, _, _, err := ResolveAgentConfig("custom-agent")
 	if err != nil {
 		t.Fatalf("ResolveAgentConfig(custom-agent): %v", err)
 	}
 	if backend != "codex" {
 		t.Errorf("ResolveAgentConfig(backend) = %q, want %q", backend, "codex")
 	}
@@ -183,12 +153,26 @@ func TestResolveAgentConfig_DynamicAgent(t *testing.T) {
 		t.Fatalf("WriteFile: %v", err)
 	}
-	backend, model, promptFile, _, _, _, _ := resolveAgentConfig("sarsh")
+	configDir := filepath.Join(home, ".codeagent")
-	if backend != "opencode" {
+	if err := os.MkdirAll(configDir, 0o755); err != nil {
-		t.Errorf("backend = %q, want %q", backend, "opencode")
+		t.Fatalf("MkdirAll: %v", err)
 	}
-	if model != "opencode/grok-code" {
+	if err := os.WriteFile(filepath.Join(configDir, "models.json"), []byte(`{
-		t.Errorf("model = %q, want %q", model, "opencode/grok-code")
+  "default_backend": "codex",
  "default_model": "gpt-test"
 }`), 0o644); err != nil {
 		t.Fatalf("WriteFile: %v", err)
 	}
 	backend, model, promptFile, _, _, _, _, _, _, err := ResolveAgentConfig("sarsh")
 	if err != nil {
 		t.Fatalf("ResolveAgentConfig(sarsh): %v", err)
 	}
 	if backend != "codex" {
 		t.Errorf("backend = %q, want %q", backend, "codex")
 	}
 	if model != "gpt-test" {
 		t.Errorf("model = %q, want %q", model, "gpt-test")
 	}
 	if promptFile != "~/.codeagent/agents/sarsh.md" {
 		t.Errorf("promptFile = %q, want %q", promptFile, "~/.codeagent/agents/sarsh.md")
@@ -213,9 +197,66 @@ func TestLoadModelsConfig_InvalidJSON(t *testing.T) {
 	t.Cleanup(ResetModelsConfigCacheForTest)
 	ResetModelsConfigCacheForTest()
-	cfg := loadModelsConfig()
+	_, err := loadModelsConfig()
-	// Should fall back to defaults
+	if err == nil {
-	if cfg.DefaultBackend != "opencode" {
+		t.Fatalf("expected error, got nil")
-		t.Errorf("invalid JSON should fallback, got DefaultBackend = %q", cfg.DefaultBackend)
+	}
 }
 func TestResolveAgentConfig_UnknownAgent_ReturnsError(t *testing.T) {
 	home := t.TempDir()
 	t.Setenv("HOME", home)
 	t.Setenv("USERPROFILE", home)
 	t.Cleanup(ResetModelsConfigCacheForTest)
 	ResetModelsConfigCacheForTest()
 	configDir := filepath.Join(home, ".codeagent")
 	if err := os.MkdirAll(configDir, 0o755); err != nil {
 		t.Fatalf("MkdirAll: %v", err)
 	}
 	if err := os.WriteFile(filepath.Join(configDir, "models.json"), []byte(`{
  "default_backend": "codex",
  "default_model": "gpt-test",
  "agents": {
    "develop": { "backend": "codex", "model": "gpt-test" }
  }
 }`), 0o644); err != nil {
 		t.Fatalf("WriteFile: %v", err)
 	}
 	_, _, _, _, _, _, _, _, _, err := ResolveAgentConfig("unknown-agent")
 	if err == nil {
 		t.Fatalf("expected error, got nil")
 	}
 	if !strings.Contains(err.Error(), "unknown-agent") {
 		t.Fatalf("error should mention agent name, got: %s", err.Error())
 	}
 }
 func TestResolveAgentConfig_EmptyModel_ReturnsError(t *testing.T) {
 	home := t.TempDir()
 	t.Setenv("HOME", home)
 	t.Setenv("USERPROFILE", home)
 	t.Cleanup(ResetModelsConfigCacheForTest)
 	ResetModelsConfigCacheForTest()
 	configDir := filepath.Join(home, ".codeagent")
 	if err := os.MkdirAll(configDir, 0o755); err != nil {
 		t.Fatalf("MkdirAll: %v", err)
 	}
 	if err := os.WriteFile(filepath.Join(configDir, "models.json"), []byte(`{
  "agents": {
    "bad-agent": { "backend": "codex", "model": " " }
  }
 }`), 0o644); err != nil {
 		t.Fatalf("WriteFile: %v", err)
 	}
 	_, _, _, _, _, _, _, _, _, err := ResolveAgentConfig("bad-agent")
 	if err == nil {
 		t.Fatalf("expected error, got nil")
 	}
 	if !strings.Contains(strings.ToLower(err.Error()), "empty model") {
 		t.Fatalf("error should mention empty model, got: %s", err.Error())
 	}
 }
--- a/codeagent-wrapper/internal/config/config.go
+++ b/codeagent-wrapper/internal/config/config.go
@@ -24,6 +24,10 @@ type Config struct {
 	SkipPermissions    bool
 	Yolo               bool
 	MaxParallelWorkers int
 	AllowedTools       []string
 	DisallowedTools    []string
 	Skills             []string
 	Worktree           bool // Execute in a new git worktree
 }
 // EnvFlagEnabled returns true when the environment variable exists and is not
--- a/codeagent-wrapper/internal/executor/env_inject_test.go
+++ b/codeagent-wrapper/internal/executor/env_inject_test.go
@@ -36,17 +36,18 @@ func TestEnvInjectionWithAgent(t *testing.T) {
 		t.Fatal(err)
 	}
-	// Override HOME to use temp dir
+	t.Setenv("HOME", tmpDir)
-	oldHome := os.Getenv("HOME")
+	t.Setenv("USERPROFILE", tmpDir)
 	os.Setenv("HOME", tmpDir)
 	defer os.Setenv("HOME", oldHome)
 	// Reset config cache
 	config.ResetModelsConfigCacheForTest()
 	defer config.ResetModelsConfigCacheForTest()
 	// Test ResolveAgentConfig
-	agentBackend, model, _, _, baseURL, apiKey, _ := config.ResolveAgentConfig("test-agent")
+	agentBackend, model, _, _, baseURL, apiKey, _, _, _, err := config.ResolveAgentConfig("test-agent")
 	if err != nil {
 		t.Fatalf("ResolveAgentConfig: %v", err)
 	}
 	t.Logf("ResolveAgentConfig: backend=%q, model=%q, baseURL=%q, apiKey=%q",
 		agentBackend, model, baseURL, apiKey)
@@ -71,8 +72,8 @@ func TestEnvInjectionWithAgent(t *testing.T) {
 	if env["ANTHROPIC_BASE_URL"] != baseURL {
 		t.Errorf("expected ANTHROPIC_BASE_URL=%q, got %q", baseURL, env["ANTHROPIC_BASE_URL"])
 	}
-	if env["ANTHROPIC_AUTH_TOKEN"] != apiKey {
+	if env["ANTHROPIC_API_KEY"] != apiKey {
-		t.Errorf("expected ANTHROPIC_AUTH_TOKEN=%q, got %q", apiKey, env["ANTHROPIC_AUTH_TOKEN"])
+		t.Errorf("expected ANTHROPIC_API_KEY=%q, got %q", apiKey, env["ANTHROPIC_API_KEY"])
 	}
 }
@@ -101,9 +102,8 @@ func TestEnvInjectionLogic(t *testing.T) {
 		t.Fatal(err)
 	}
-	oldHome := os.Getenv("HOME")
+	t.Setenv("HOME", tmpDir)
-	os.Setenv("HOME", tmpDir)
+	t.Setenv("USERPROFILE", tmpDir)
 	defer os.Setenv("HOME", oldHome)
 	config.ResetModelsConfigCacheForTest()
 	defer config.ResetModelsConfigCacheForTest()
@@ -118,7 +118,10 @@ func TestEnvInjectionLogic(t *testing.T) {
 	// Step 2: If agent specified, get agent config
 	if agentName != "" {
-		agentBackend, _, _, _, agentBaseURL, agentAPIKey, _ := config.ResolveAgentConfig(agentName)
+		agentBackend, _, _, _, agentBaseURL, agentAPIKey, _, _, _, err := config.ResolveAgentConfig(agentName)
 		if err != nil {
 			t.Fatalf("ResolveAgentConfig(%q): %v", agentName, err)
 		}
 		t.Logf("Step 2 - ResolveAgentConfig(%q): backend=%q, baseURL=%q, apiKey=%q",
 			agentName, agentBackend, agentBaseURL, agentAPIKey)
@@ -146,8 +149,8 @@ func TestEnvInjectionLogic(t *testing.T) {
 		t.Errorf("ANTHROPIC_BASE_URL: expected %q, got %q", expectedURL, injected["ANTHROPIC_BASE_URL"])
 	}
-	if _, ok := injected["ANTHROPIC_AUTH_TOKEN"]; !ok {
+	if _, ok := injected["ANTHROPIC_API_KEY"]; !ok {
-		t.Error("ANTHROPIC_AUTH_TOKEN not set")
+		t.Error("ANTHROPIC_API_KEY not set")
 	}
 	// Step 5: Test masking
--- a/codeagent-wrapper/internal/executor/env_logging_test.go
+++ b/codeagent-wrapper/internal/executor/env_logging_test.go
@@ -16,7 +16,7 @@ func TestMaskSensitiveValue(t *testing.T) {
 	}{
 		{
 			name:     "API_KEY with long value",
-			key:      "ANTHROPIC_AUTH_TOKEN",
+			key:      "ANTHROPIC_API_KEY",
 			value:    "sk-ant-api03-xxxxxxxxxxxxxxxxxxxxxxxxxxxx",
 			expected: "sk-a****xxxx",
 		},
@@ -180,7 +180,7 @@ func TestClaudeBackendEnv(t *testing.T) {
 			name:       "both base_url and api_key",
 			baseURL:    "https://api.custom.com",
 			apiKey:     "sk-test-key-12345",
-			expectKeys: []string{"ANTHROPIC_BASE_URL", "ANTHROPIC_AUTH_TOKEN"},
+			expectKeys: []string{"ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY"},
 		},
 		{
 			name:       "only base_url",
@@ -192,7 +192,7 @@ func TestClaudeBackendEnv(t *testing.T) {
 			name:       "only api_key",
 			baseURL:    "",
 			apiKey:     "sk-test-key-12345",
-			expectKeys: []string{"ANTHROPIC_AUTH_TOKEN"},
+			expectKeys: []string{"ANTHROPIC_API_KEY"},
 		},
 		{
 			name:      "both empty",
@@ -237,8 +237,8 @@ func TestClaudeBackendEnv(t *testing.T) {
 				}
 			}
 			if tt.apiKey != "" && strings.TrimSpace(tt.apiKey) != "" {
-				if env["ANTHROPIC_AUTH_TOKEN"] != strings.TrimSpace(tt.apiKey) {
+				if env["ANTHROPIC_API_KEY"] != strings.TrimSpace(tt.apiKey) {
-					t.Errorf("ANTHROPIC_AUTH_TOKEN = %q, want %q", env["ANTHROPIC_AUTH_TOKEN"], strings.TrimSpace(tt.apiKey))
+					t.Errorf("ANTHROPIC_API_KEY = %q, want %q", env["ANTHROPIC_API_KEY"], strings.TrimSpace(tt.apiKey))
 				}
 			}
 		})
@@ -267,7 +267,7 @@ func TestEnvLoggingIntegration(t *testing.T) {
 			}
 		}
-		if k == "ANTHROPIC_AUTH_TOKEN" {
+		if k == "ANTHROPIC_API_KEY" {
 			// API key should be masked
 			if masked == v {
 				t.Errorf("API_KEY should be masked, but got original value")
--- a/codeagent-wrapper/internal/executor/env_stderr_test.go
+++ b/codeagent-wrapper/internal/executor/env_stderr_test.go
@@ -65,11 +65,8 @@ func TestEnvInjection_LogsToStderrAndMasksKey(t *testing.T) {
 		t.Fatal(err)
 	}
-	oldHome := os.Getenv("HOME")
+	t.Setenv("HOME", tmpDir)
-	if err := os.Setenv("HOME", tmpDir); err != nil {
+	t.Setenv("USERPROFILE", tmpDir)
 		t.Fatal(err)
 	}
 	defer func() { _ = os.Setenv("HOME", oldHome) }()
 	config.ResetModelsConfigCacheForTest()
 	defer config.ResetModelsConfigCacheForTest()
@@ -120,14 +117,14 @@ func TestEnvInjection_LogsToStderrAndMasksKey(t *testing.T) {
 	if cmd.env["ANTHROPIC_BASE_URL"] != baseURL {
 		t.Fatalf("ANTHROPIC_BASE_URL=%q, want %q", cmd.env["ANTHROPIC_BASE_URL"], baseURL)
 	}
-	if cmd.env["ANTHROPIC_AUTH_TOKEN"] != apiKey {
+	if cmd.env["ANTHROPIC_API_KEY"] != apiKey {
-		t.Fatalf("ANTHROPIC_AUTH_TOKEN=%q, want %q", cmd.env["ANTHROPIC_AUTH_TOKEN"], apiKey)
+		t.Fatalf("ANTHROPIC_API_KEY=%q, want %q", cmd.env["ANTHROPIC_API_KEY"], apiKey)
 	}
 	if !strings.Contains(got, "Env: ANTHROPIC_BASE_URL="+baseURL) {
 		t.Fatalf("stderr missing base URL env log; stderr=%q", got)
 	}
-	if !strings.Contains(got, "Env: ANTHROPIC_AUTH_TOKEN=eyJh****test") {
+	if !strings.Contains(got, "Env: ANTHROPIC_API_KEY=eyJh****test") {
 		t.Fatalf("stderr missing masked API key log; stderr=%q", got)
 	}
 }
--- a/codeagent-wrapper/internal/executor/executor.go
+++ b/codeagent-wrapper/internal/executor/executor.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"os/exec"
 	"os/signal"
 	"runtime"
 	"sort"
 	"strings"
 	"sync"
@@ -20,6 +21,7 @@ import (
 	ilogger "codeagent-wrapper/internal/logger"
 	parser "codeagent-wrapper/internal/parser"
 	utils "codeagent-wrapper/internal/utils"
 	"codeagent-wrapper/internal/worktree"
 )
 const postMessageTerminateDelay = 1 * time.Second
@@ -48,6 +50,7 @@ var (
 	selectBackendFn    = backend.Select
 	commandContext     = exec.CommandContext
 	terminateCommandFn = terminateCommand
 	createWorktreeFn   = worktree.CreateWorktree
 )
 var forceKillDelay atomic.Int32
@@ -253,6 +256,15 @@ func (p *realProcess) Signal(sig os.Signal) error {
 // newCommandRunner creates a new commandRunner (test hook injection point)
 var newCommandRunner = func(ctx context.Context, name string, args ...string) commandRunner {
 	if runtime.GOOS == "windows" {
 		lowerName := strings.ToLower(strings.TrimSpace(name))
 		if strings.HasSuffix(lowerName, ".bat") || strings.HasSuffix(lowerName, ".cmd") {
 			cmdArgs := make([]string, 0, 2+len(args))
 			cmdArgs = append(cmdArgs, "/c", name)
 			cmdArgs = append(cmdArgs, args...)
 			return &realCmd{cmd: commandContext(ctx, "cmd.exe", cmdArgs...)}
 		}
 	}
 	return &realCmd{cmd: commandContext(ctx, name, args...)}
 }
@@ -325,6 +337,16 @@ func DefaultRunCodexTaskFn(task TaskSpec, timeout int) TaskResult {
 		}
 		task.Task = WrapTaskWithAgentPrompt(prompt, task.Task)
 	}
 	// Resolve skills: explicit > auto-detect from workdir
 	skills := task.Skills
 	if len(skills) == 0 {
 		skills = DetectProjectSkills(task.WorkDir)
 	}
 	if len(skills) > 0 {
 		if content := ResolveSkillContent(skills, 0); content != "" {
 			task.Task = task.Task + "\n\n# Domain Best Practices\n\n" + content
 		}
 	}
 	if task.UseStdin || ShouldUseStdin(task.Task, false) {
 		task.UseStdin = true
 	}
@@ -895,6 +917,8 @@ func RunCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
 		ReasoningEffort: taskSpec.ReasoningEffort,
 		SkipPermissions: taskSpec.SkipPermissions,
 		Backend:         defaultBackendName,
 		AllowedTools:    taskSpec.AllowedTools,
 		DisallowedTools: taskSpec.DisallowedTools,
 	}
 	commandName := strings.TrimSpace(defaultCommandName)
@@ -911,6 +935,11 @@ func RunCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
 		cfg.Backend = backend.Name()
 	} else if taskSpec.Backend != "" {
 		cfg.Backend = taskSpec.Backend
 		if selectBackendFn != nil {
 			if b, err := selectBackendFn(taskSpec.Backend); err == nil {
 				argsBuilder = b.BuildArgs
 			}
 		}
 	} else if commandName != "" {
 		cfg.Backend = commandName
 	}
@@ -922,6 +951,23 @@ func RunCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
 		cfg.WorkDir = defaultWorkdir
 	}
 	// Handle worktree mode: check DO_WORKTREE_DIR env var first, then create if needed
 	if worktreeDir := os.Getenv("DO_WORKTREE_DIR"); worktreeDir != "" {
 		// Use existing worktree from /do setup
 		cfg.WorkDir = worktreeDir
 		logInfo(fmt.Sprintf("Using existing worktree from DO_WORKTREE_DIR: %s", worktreeDir))
 	} else if taskSpec.Worktree {
 		// Create new worktree (backward compatibility for standalone --worktree usage)
 		paths, err := createWorktreeFn(cfg.WorkDir)
 		if err != nil {
 			result.ExitCode = 1
 			result.Error = fmt.Sprintf("failed to create worktree: %v", err)
 			return result
 		}
 		cfg.WorkDir = paths.Dir
 		logInfo(fmt.Sprintf("Using worktree: %s (task_id: %s, branch: %s)", paths.Dir, paths.TaskID, paths.Branch))
 	}
 	if cfg.Mode == "resume" && strings.TrimSpace(cfg.SessionID) == "" {
 		result.ExitCode = 1
 		result.Error = "resume mode requires non-empty session_id"
@@ -1060,9 +1106,11 @@ func RunCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
 	if envBackend != nil {
 		baseURL, apiKey := config.ResolveBackendConfig(cfg.Backend)
 		if agentName := strings.TrimSpace(taskSpec.Agent); agentName != "" {
-			agentBackend, _, _, _, agentBaseURL, agentAPIKey, _ := config.ResolveAgentConfig(agentName)
+			agentBackend, _, _, _, agentBaseURL, agentAPIKey, _, _, _, err := config.ResolveAgentConfig(agentName)
-			if strings.EqualFold(strings.TrimSpace(agentBackend), strings.TrimSpace(cfg.Backend)) {
+			if err == nil {
-				baseURL, apiKey = agentBaseURL, agentAPIKey
+				if strings.EqualFold(strings.TrimSpace(agentBackend), strings.TrimSpace(cfg.Backend)) {
 					baseURL, apiKey = agentBaseURL, agentAPIKey
 				}
 			}
 		}
 		if injected := envBackend.Env(baseURL, apiKey); len(injected) > 0 {
@@ -1076,6 +1124,8 @@ func RunCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
 		}
 	}
 	injectTempEnv(cmd)
 	// For backends that don't support -C flag (claude, gemini), set working directory via cmd.Dir
 	// Codex passes workdir via -C flag, so we skip setting Dir for it to avoid conflicts
 	if cfg.Mode != "resume" && commandName != "codex" && cfg.WorkDir != "" {
@@ -1385,6 +1435,22 @@ waitLoop:
 	return result
 }
 func injectTempEnv(cmd commandRunner) {
 	if cmd == nil {
 		return
 	}
 	env := make(map[string]string, 3)
 	for _, k := range []string{"TMPDIR", "TMP", "TEMP"} {
 		if v := strings.TrimSpace(os.Getenv(k)); v != "" {
 			env[k] = v
 		}
 	}
 	if len(env) == 0 {
 		return
 	}
 	cmd.SetEnv(env)
 }
 func cancelReason(commandName string, ctx context.Context) string {
 	if ctx == nil {
 		return "Context cancelled"
--- a/codeagent-wrapper/internal/executor/parallel_config.go
+++ b/codeagent-wrapper/internal/executor/parallel_config.go
@@ -75,6 +75,12 @@ func ParseParallelConfig(data []byte) (*ParallelConfig, error) {
 					continue
 				}
 				task.SkipPermissions = config.ParseBoolFlag(value, false)
 			case "worktree":
 				if value == "" {
 					task.Worktree = true
 					continue
 				}
 				task.Worktree = config.ParseBoolFlag(value, false)
 			case "dependencies":
 				for _, dep := range strings.Split(value, ",") {
 					dep = strings.TrimSpace(dep)
@@ -82,6 +88,13 @@ func ParseParallelConfig(data []byte) (*ParallelConfig, error) {
 						task.Dependencies = append(task.Dependencies, dep)
 					}
 				}
 			case "skills":
 				for _, s := range strings.Split(value, ",") {
 					s = strings.TrimSpace(s)
 					if s != "" {
 						task.Skills = append(task.Skills, s)
 					}
 				}
 			}
 		}
@@ -96,7 +109,10 @@ func ParseParallelConfig(data []byte) (*ParallelConfig, error) {
 			if err := config.ValidateAgentName(task.Agent); err != nil {
 				return nil, fmt.Errorf("task block #%d invalid agent name: %w", taskIndex, err)
 			}
-			backend, model, promptFile, reasoning, _, _, _ := config.ResolveAgentConfig(task.Agent)
+			backend, model, promptFile, reasoning, _, _, _, allowedTools, disallowedTools, err := config.ResolveAgentConfig(task.Agent)
 			if err != nil {
 				return nil, fmt.Errorf("task block #%d failed to resolve agent %q: %w", taskIndex, task.Agent, err)
 			}
 			if task.Backend == "" {
 				task.Backend = backend
 			}
@@ -107,6 +123,8 @@ func ParseParallelConfig(data []byte) (*ParallelConfig, error) {
 				task.ReasoningEffort = reasoning
 			}
 			task.PromptFile = promptFile
 			task.AllowedTools = allowedTools
 			task.DisallowedTools = disallowedTools
 		}
 		if task.ID == "" {
--- a/codeagent-wrapper/internal/executor/prompt.go
+++ b/codeagent-wrapper/internal/executor/prompt.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 )
@@ -128,3 +129,116 @@ func ReadAgentPromptFile(path string, allowOutsideClaudeDir bool) (string, error
 func WrapTaskWithAgentPrompt(prompt string, task string) string {
 	return "<agent-prompt>\n" + prompt + "\n</agent-prompt>\n\n" + task
 }
 // techSkillMap maps file-existence fingerprints to skill names.
 var techSkillMap = []struct {
 	Files  []string // any of these files → this tech
 	Skills []string
 }{
 	{Files: []string{"go.mod", "go.sum"}, Skills: []string{"golang-base-practices"}},
 	{Files: []string{"Cargo.toml"}, Skills: []string{"rust-best-practices"}},
 	{Files: []string{"pyproject.toml", "setup.py", "requirements.txt", "Pipfile"}, Skills: []string{"python-best-practices"}},
 	{Files: []string{"package.json"}, Skills: []string{"vercel-react-best-practices", "frontend-design"}},
 	{Files: []string{"vue.config.js", "vite.config.ts", "nuxt.config.ts"}, Skills: []string{"vue-web-app"}},
 }
 // DetectProjectSkills scans workDir for tech-stack fingerprints and returns
 // skill names that are both detected and installed at ~/.claude/skills/{name}/SKILL.md.
 func DetectProjectSkills(workDir string) []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}
 	var detected []string
 	seen := make(map[string]bool)
 	for _, entry := range techSkillMap {
 		for _, f := range entry.Files {
 			if _, err := os.Stat(filepath.Join(workDir, f)); err == nil {
 				for _, skill := range entry.Skills {
 					if seen[skill] {
 						continue
 					}
 					skillPath := filepath.Join(home, ".claude", "skills", skill, "SKILL.md")
 					if _, err := os.Stat(skillPath); err == nil {
 						detected = append(detected, skill)
 						seen[skill] = true
 					}
 				}
 				break // one matching file is enough for this entry
 			}
 		}
 	}
 	return detected
 }
 const defaultSkillBudget = 16000 // chars, ~4K tokens
 // validSkillName ensures skill names contain only safe characters to prevent path traversal
 var validSkillName = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
 // ResolveSkillContent reads SKILL.md files for the given skill names,
 // strips YAML frontmatter, wraps each in <skill> tags, and enforces a
 // character budget to prevent context bloat.
 func ResolveSkillContent(skills []string, maxBudget int) string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return ""
 	}
 	if maxBudget <= 0 {
 		maxBudget = defaultSkillBudget
 	}
 	var sections []string
 	remaining := maxBudget
 	for _, name := range skills {
 		name = strings.TrimSpace(name)
 		if name == "" {
 			continue
 		}
 		if !validSkillName.MatchString(name) {
 			logWarn(fmt.Sprintf("skill %q: invalid name (must contain only [a-zA-Z0-9_-]), skipping", name))
 			continue
 		}
 		path := filepath.Join(home, ".claude", "skills", name, "SKILL.md")
 		data, err := os.ReadFile(path)
 		if err != nil || len(data) == 0 {
 			logWarn(fmt.Sprintf("skill %q: SKILL.md not found or empty, skipping", name))
 			continue
 		}
 		body := stripYAMLFrontmatter(strings.TrimSpace(string(data)))
 		tagOverhead := len("<skill name=\"\">") + len(name) + len("\n") + len("\n</skill>")
 		bodyBudget := remaining - tagOverhead
 		if bodyBudget <= 0 {
 			logWarn(fmt.Sprintf("skill %q: skipped, insufficient budget for tags", name))
 			break
 		}
 		if len(body) > bodyBudget {
 			logWarn(fmt.Sprintf("skill %q: truncated from %d to %d chars (budget)", name, len(body), bodyBudget))
 			body = body[:bodyBudget]
 		}
 		remaining -= len(body) + tagOverhead
 		sections = append(sections, "<skill name=\""+name+"\">\n"+body+"\n</skill>")
 		if remaining <= 0 {
 			break
 		}
 	}
 	if len(sections) == 0 {
 		return ""
 	}
 	return strings.Join(sections, "\n\n")
 }
 func stripYAMLFrontmatter(s string) string {
 	s = strings.ReplaceAll(s, "\r\n", "\n")
 	if !strings.HasPrefix(s, "---") {
 		return s
 	}
 	idx := strings.Index(s[3:], "\n---")
 	if idx < 0 {
 		return s
 	}
 	result := s[3+idx+4:]
 	if len(result) > 0 && result[0] == '\n' {
 		result = result[1:]
 	}
 	return strings.TrimSpace(result)
 }
--- a/codeagent-wrapper/internal/executor/skills_test.go
+++ b/codeagent-wrapper/internal/executor/skills_test.go
@@ -0,0 +1,343 @@
 package executor
 import (
 	"os"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"testing"
 )
 // setTestHome overrides the home directory for both Unix (HOME) and Windows (USERPROFILE).
 func setTestHome(t *testing.T, home string) {
 	t.Helper()
 	t.Setenv("HOME", home)
 	if runtime.GOOS == "windows" {
 		t.Setenv("USERPROFILE", home)
 	}
 }
 // --- helper: create a temp skill dir with SKILL.md ---
 func createTempSkill(t *testing.T, name, content string) string {
 	t.Helper()
 	home := t.TempDir()
 	skillDir := filepath.Join(home, ".claude", "skills", name)
 	if err := os.MkdirAll(skillDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0644); err != nil {
 		t.Fatal(err)
 	}
 	return home
 }
 // --- ParseParallelConfig skills parsing tests ---
 func TestParseParallelConfig_SkillsField(t *testing.T) {
 	tests := []struct {
 		name           string
 		input          string
 		taskIdx        int
 		expectedSkills []string
 	}{
 		{
 			name: "single skill",
 			input: `---TASK---
 id: t1
 workdir: .
 skills: golang-base-practices
 ---CONTENT---
 Do something.
 `,
 			taskIdx:        0,
 			expectedSkills: []string{"golang-base-practices"},
 		},
 		{
 			name: "multiple comma-separated skills",
 			input: `---TASK---
 id: t1
 workdir: .
 skills: golang-base-practices, vercel-react-best-practices
 ---CONTENT---
 Do something.
 `,
 			taskIdx:        0,
 			expectedSkills: []string{"golang-base-practices", "vercel-react-best-practices"},
 		},
 		{
 			name: "no skills field",
 			input: `---TASK---
 id: t1
 workdir: .
 ---CONTENT---
 Do something.
 `,
 			taskIdx:        0,
 			expectedSkills: nil,
 		},
 		{
 			name: "empty skills value",
 			input: `---TASK---
 id: t1
 workdir: .
 skills:
 ---CONTENT---
 Do something.
 `,
 			taskIdx:        0,
 			expectedSkills: nil,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			cfg, err := ParseParallelConfig([]byte(tt.input))
 			if err != nil {
 				t.Fatalf("ParseParallelConfig error: %v", err)
 			}
 			got := cfg.Tasks[tt.taskIdx].Skills
 			if len(got) != len(tt.expectedSkills) {
 				t.Fatalf("skills: got %v, want %v", got, tt.expectedSkills)
 			}
 			for i := range got {
 				if got[i] != tt.expectedSkills[i] {
 					t.Errorf("skills[%d]: got %q, want %q", i, got[i], tt.expectedSkills[i])
 				}
 			}
 		})
 	}
 }
 // --- stripYAMLFrontmatter tests ---
 func TestStripYAMLFrontmatter(t *testing.T) {
 	tests := []struct {
 		name     string
 		input    string
 		expected string
 	}{
 		{
 			name:     "with frontmatter",
 			input:    "---\nname: test\ndescription: foo\n---\n\n# Body\nContent here.",
 			expected: "# Body\nContent here.",
 		},
 		{
 			name:     "no frontmatter",
 			input:    "# Just a body\nNo frontmatter.",
 			expected: "# Just a body\nNo frontmatter.",
 		},
 		{
 			name:     "empty",
 			input:    "",
 			expected: "",
 		},
 		{
 			name:     "only frontmatter",
 			input:    "---\nname: test\n---",
 			expected: "",
 		},
 		{
 			name:     "frontmatter with allowed-tools",
 			input:    "---\nname: do\nallowed-tools: [\"Bash\"]\n---\n\n# Skill content",
 			expected: "# Skill content",
 		},
 		{
 			name:     "CRLF line endings",
 			input:    "---\r\nname: test\r\n---\r\n\r\n# Body\r\nContent.",
 			expected: "# Body\nContent.",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := stripYAMLFrontmatter(tt.input)
 			if got != tt.expected {
 				t.Errorf("got %q, want %q", got, tt.expected)
 			}
 		})
 	}
 }
 // --- DetectProjectSkills tests ---
 func TestDetectProjectSkills_GoProject(t *testing.T) {
 	tmpDir := t.TempDir()
 	os.WriteFile(filepath.Join(tmpDir, "go.mod"), []byte("module test"), 0644)
 	skills := DetectProjectSkills(tmpDir)
 	// Result depends on whether golang-base-practices is installed locally
 	t.Logf("detected skills for Go project: %v", skills)
 }
 func TestDetectProjectSkills_NoFingerprints(t *testing.T) {
 	tmpDir := t.TempDir()
 	skills := DetectProjectSkills(tmpDir)
 	if len(skills) != 0 {
 		t.Errorf("expected no skills for empty dir, got %v", skills)
 	}
 }
 func TestDetectProjectSkills_FullStack(t *testing.T) {
 	tmpDir := t.TempDir()
 	os.WriteFile(filepath.Join(tmpDir, "go.mod"), []byte("module test"), 0644)
 	os.WriteFile(filepath.Join(tmpDir, "package.json"), []byte(`{"name":"test"}`), 0644)
 	skills := DetectProjectSkills(tmpDir)
 	t.Logf("detected skills for fullstack project: %v", skills)
 	seen := make(map[string]bool)
 	for _, s := range skills {
 		if seen[s] {
 			t.Errorf("duplicate skill detected: %s", s)
 		}
 		seen[s] = true
 	}
 }
 func TestDetectProjectSkills_NonexistentDir(t *testing.T) {
 	skills := DetectProjectSkills("/nonexistent/path/xyz")
 	if len(skills) != 0 {
 		t.Errorf("expected no skills for nonexistent dir, got %v", skills)
 	}
 }
 // --- ResolveSkillContent tests (CI-friendly with temp dirs) ---
 func TestResolveSkillContent_ValidSkill(t *testing.T) {
 	home := createTempSkill(t, "test-skill", "---\nname: test\n---\n\n# Test Skill\nBest practices here.")
 	setTestHome(t, home)
 	result := ResolveSkillContent([]string{"test-skill"}, 0)
 	if result == "" {
 		t.Fatal("expected non-empty content")
 	}
 	if !strings.Contains(result, `<skill name="test-skill">`) {
 		t.Error("missing opening <skill> tag")
 	}
 	if !strings.Contains(result, "</skill>") {
 		t.Error("missing closing </skill> tag")
 	}
 	if !strings.Contains(result, "# Test Skill") {
 		t.Error("missing skill body content")
 	}
 	if strings.Contains(result, "name: test") {
 		t.Error("frontmatter was not stripped")
 	}
 }
 func TestResolveSkillContent_NonexistentSkill(t *testing.T) {
 	home := t.TempDir()
 	setTestHome(t, home)
 	result := ResolveSkillContent([]string{"nonexistent-skill-xyz"}, 0)
 	if result != "" {
 		t.Errorf("expected empty for nonexistent skill, got %d bytes", len(result))
 	}
 }
 func TestResolveSkillContent_Empty(t *testing.T) {
 	if result := ResolveSkillContent(nil, 0); result != "" {
 		t.Errorf("expected empty for nil, got %q", result)
 	}
 	if result := ResolveSkillContent([]string{}, 0); result != "" {
 		t.Errorf("expected empty for empty, got %q", result)
 	}
 }
 func TestResolveSkillContent_Budget(t *testing.T) {
 	longBody := strings.Repeat("x", 500)
 	home := createTempSkill(t, "big-skill", "---\nname: big\n---\n\n"+longBody)
 	setTestHome(t, home)
 	result := ResolveSkillContent([]string{"big-skill"}, 200)
 	if result == "" {
 		t.Fatal("expected non-empty even with small budget")
 	}
 	if len(result) > 200 {
 		t.Errorf("result %d bytes exceeds budget 200", len(result))
 	}
 	t.Logf("budget=200, result=%d bytes", len(result))
 }
 func TestResolveSkillContent_MultipleSkills(t *testing.T) {
 	home := t.TempDir()
 	for _, name := range []string{"skill-a", "skill-b"} {
 		skillDir := filepath.Join(home, ".claude", "skills", name)
 		os.MkdirAll(skillDir, 0755)
 		os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("# "+name+"\nContent."), 0644)
 	}
 	setTestHome(t, home)
 	result := ResolveSkillContent([]string{"skill-a", "skill-b"}, 0)
 	if result == "" {
 		t.Fatal("expected non-empty for multiple skills")
 	}
 	if !strings.Contains(result, `<skill name="skill-a">`) {
 		t.Error("missing skill-a tag")
 	}
 	if !strings.Contains(result, `<skill name="skill-b">`) {
 		t.Error("missing skill-b tag")
 	}
 }
 func TestResolveSkillContent_PathTraversal(t *testing.T) {
 	home := t.TempDir()
 	setTestHome(t, home)
 	result := ResolveSkillContent([]string{"../../../etc/passwd"}, 0)
 	if result != "" {
 		t.Errorf("expected empty for path traversal name, got %d bytes", len(result))
 	}
 }
 func TestResolveSkillContent_InvalidNames(t *testing.T) {
 	home := t.TempDir()
 	setTestHome(t, home)
 	tests := []string{"../bad", "foo/bar", "skill name", "skill.name", "a b"}
 	for _, name := range tests {
 		result := ResolveSkillContent([]string{name}, 0)
 		if result != "" {
 			t.Errorf("expected empty for invalid name %q, got %d bytes", name, len(result))
 		}
 	}
 }
 func TestResolveSkillContent_ValidNamePattern(t *testing.T) {
 	if !validSkillName.MatchString("golang-base-practices") {
 		t.Error("golang-base-practices should be valid")
 	}
 	if !validSkillName.MatchString("my_skill_v2") {
 		t.Error("my_skill_v2 should be valid")
 	}
 	if validSkillName.MatchString("../bad") {
 		t.Error("../bad should be invalid")
 	}
 	if validSkillName.MatchString("") {
 		t.Error("empty should be invalid")
 	}
 }
 // --- Integration: skill injection format test ---
 func TestSkillInjectionFormat(t *testing.T) {
 	home := createTempSkill(t, "test-go", "---\nname: go\n---\n\n# Go Best Practices\nUse gofmt.")
 	setTestHome(t, home)
 	taskText := "Implement the feature."
 	content := ResolveSkillContent([]string{"test-go"}, 0)
 	injected := taskText + "\n\n# Domain Best Practices\n\n" + content
 	if !strings.Contains(injected, "Implement the feature.") {
 		t.Error("original task text lost")
 	}
 	if !strings.Contains(injected, "# Domain Best Practices") {
 		t.Error("missing section header")
 	}
 	if !strings.Contains(injected, `<skill name="test-go">`) {
 		t.Error("missing <skill> tag")
 	}
 	if !strings.Contains(injected, "Use gofmt.") {
 		t.Error("missing skill body")
 	}
 }
--- a/codeagent-wrapper/internal/executor/task_types.go
+++ b/codeagent-wrapper/internal/executor/task_types.go
@@ -21,6 +21,10 @@ type TaskSpec struct {
 	Agent           string          `json:"agent,omitempty"`
 	PromptFile      string          `json:"prompt_file,omitempty"`
 	SkipPermissions bool            `json:"skip_permissions,omitempty"`
 	Worktree        bool            `json:"worktree,omitempty"`
 	AllowedTools    []string        `json:"allowed_tools,omitempty"`
 	DisallowedTools []string        `json:"disallowed_tools,omitempty"`
 	Skills          []string        `json:"skills,omitempty"`
 	Mode            string          `json:"-"`
 	UseStdin        bool            `json:"-"`
 	Context         context.Context `json:"-"`
--- a/codeagent-wrapper/internal/logger/logger_suffix_test.go
+++ b/codeagent-wrapper/internal/logger/logger_suffix_test.go
@@ -70,12 +70,11 @@ func TestLoggerWithSuffixNamingAndIsolation(t *testing.T) {
 func TestLoggerWithSuffixReturnsErrorWhenTempDirNotWritable(t *testing.T) {
 	base := t.TempDir()
-	noWrite := filepath.Join(base, "ro")
+	notDir := filepath.Join(base, "not-a-dir")
-	if err := os.Mkdir(noWrite, 0o500); err != nil {
+	if err := os.WriteFile(notDir, []byte("x"), 0o644); err != nil {
-		t.Fatalf("failed to create read-only temp dir: %v", err)
+		t.Fatalf("failed to create temp file: %v", err)
 	}
-	t.Cleanup(func() { _ = os.Chmod(noWrite, 0o700) })
+	setTempDirEnv(t, notDir)
 	setTempDirEnv(t, noWrite)
 	logger, err := NewLoggerWithSuffix("task-err")
 	if err == nil {
--- a/codeagent-wrapper/internal/logger/logger_test.go
+++ b/codeagent-wrapper/internal/logger/logger_test.go
@@ -26,8 +26,7 @@ func compareCleanupStats(got, want CleanupStats) bool {
 }
 func TestLoggerCreatesFileWithPID(t *testing.T) {
-	tempDir := t.TempDir()
+	tempDir := setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logger, err := NewLogger()
 	if err != nil {
@@ -46,8 +45,7 @@ func TestLoggerCreatesFileWithPID(t *testing.T) {
 }
 func TestLoggerWritesLevels(t *testing.T) {
-	tempDir := t.TempDir()
+	setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logger, err := NewLogger()
 	if err != nil {
@@ -77,8 +75,7 @@ func TestLoggerWritesLevels(t *testing.T) {
 }
 func TestLoggerCloseStopsWorkerAndKeepsFile(t *testing.T) {
-	tempDir := t.TempDir()
+	setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logger, err := NewLogger()
 	if err != nil {
@@ -104,8 +101,7 @@ func TestLoggerCloseStopsWorkerAndKeepsFile(t *testing.T) {
 }
 func TestLoggerConcurrentWritesSafe(t *testing.T) {
-	tempDir := t.TempDir()
+	setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logger, err := NewLogger()
 	if err != nil {
@@ -390,12 +386,14 @@ func TestLoggerCleanupOldLogsPerformanceBound(t *testing.T) {
 	fakePaths := make([]string, fileCount)
 	for i := 0; i < fileCount; i++ {
 		name := fmt.Sprintf("codeagent-wrapper-%d.log", 10000+i)
-		fakePaths[i] = createTempLog(t, tempDir, name)
+		fakePaths[i] = filepath.Join(tempDir, name)
 	}
 	stubGlobLogFiles(t, func(pattern string) ([]string, error) {
 		return fakePaths, nil
 	})
 	stubFileStat(t, func(string) (os.FileInfo, error) { return fakeFileInfo{}, nil })
 	stubEvalSymlinks(t, func(path string) (string, error) { return path, nil })
 	stubProcessRunning(t, func(int) bool { return false })
 	stubProcessStartTime(t, func(int) time.Time { return time.Time{} })
@@ -542,8 +540,7 @@ func TestLoggerIsUnsafeFileSecurityChecks(t *testing.T) {
 }
 func TestLoggerPathAndRemove(t *testing.T) {
-	tempDir := t.TempDir()
+	setTempDirEnv(t, t.TempDir())
 	t.Setenv("TMPDIR", tempDir)
 	logger, err := NewLoggerWithSuffix("sample")
 	if err != nil {
--- a/codeagent-wrapper/internal/utils/strings_test.go
+++ b/codeagent-wrapper/internal/utils/strings_test.go
@@ -19,7 +19,7 @@ func TestTruncate(t *testing.T) {
 		{"zero maxLen", "hello", 0, "..."},
 		{"negative maxLen", "hello", -1, ""},
 		{"maxLen 1", "hello", 1, "h..."},
-		{"unicode bytes truncate", "你好世界", 10, "你好世\xe7..."},  // Truncate works on bytes, not runes
+		{"unicode bytes truncate", "你好世界", 10, "你好世\xe7..."},    // Truncate works on bytes, not runes
 		{"mixed truncate", "hello世界abc", 7, "hello\xe4\xb8..."}, // byte-based truncation
 	}
--- a/codeagent-wrapper/internal/worktree/worktree.go
+++ b/codeagent-wrapper/internal/worktree/worktree.go
@@ -0,0 +1,97 @@
 package worktree
 import (
 	"crypto/rand"
 	"encoding/hex"
 	"fmt"
 	"io"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"time"
 )
 // Paths contains worktree information
 type Paths struct {
 	Dir    string // .worktrees/do-{task_id}/
 	Branch string // do/{task_id}
 	TaskID string // auto-generated task_id
 }
 // Hook points for testing
 var (
 	randReader  io.Reader = rand.Reader
 	timeNowFunc           = time.Now
 	execCommand           = exec.Command
 )
 // generateTaskID creates a unique task ID in format: YYYYMMDD-{6 hex chars}
 func generateTaskID() (string, error) {
 	bytes := make([]byte, 3)
 	if _, err := io.ReadFull(randReader, bytes); err != nil {
 		return "", fmt.Errorf("failed to generate random bytes: %w", err)
 	}
 	date := timeNowFunc().Format("20060102")
 	return fmt.Sprintf("%s-%s", date, hex.EncodeToString(bytes)), nil
 }
 // isGitRepo checks if the given directory is inside a git repository
 func isGitRepo(dir string) bool {
 	cmd := execCommand("git", "-C", dir, "rev-parse", "--is-inside-work-tree")
 	output, err := cmd.Output()
 	if err != nil {
 		return false
 	}
 	return strings.TrimSpace(string(output)) == "true"
 }
 // getGitRoot returns the root directory of the git repository
 func getGitRoot(dir string) (string, error) {
 	cmd := execCommand("git", "-C", dir, "rev-parse", "--show-toplevel")
 	output, err := cmd.Output()
 	if err != nil {
 		return "", fmt.Errorf("failed to get git root: %w", err)
 	}
 	return strings.TrimSpace(string(output)), nil
 }
 // CreateWorktree creates a new git worktree with auto-generated task_id
 // Returns Paths containing the worktree directory, branch name, and task_id
 func CreateWorktree(projectDir string) (*Paths, error) {
 	if projectDir == "" {
 		projectDir = "."
 	}
 	// Verify it's a git repository
 	if !isGitRepo(projectDir) {
 		return nil, fmt.Errorf("not a git repository: %s", projectDir)
 	}
 	// Get git root for consistent path calculation
 	gitRoot, err := getGitRoot(projectDir)
 	if err != nil {
 		return nil, err
 	}
 	// Generate task ID
 	taskID, err := generateTaskID()
 	if err != nil {
 		return nil, err
 	}
 	// Calculate paths
 	worktreeDir := filepath.Join(gitRoot, ".worktrees", fmt.Sprintf("do-%s", taskID))
 	branchName := fmt.Sprintf("do/%s", taskID)
 	// Create worktree with new branch
 	cmd := execCommand("git", "-C", gitRoot, "worktree", "add", "-b", branchName, worktreeDir)
 	if output, err := cmd.CombinedOutput(); err != nil {
 		return nil, fmt.Errorf("failed to create worktree: %w\noutput: %s", err, string(output))
 	}
 	return &Paths{
 		Dir:    worktreeDir,
 		Branch: branchName,
 		TaskID: taskID,
 	}, nil
 }
--- a/codeagent-wrapper/internal/worktree/worktree_test.go
+++ b/codeagent-wrapper/internal/worktree/worktree_test.go
@@ -0,0 +1,449 @@
 package worktree
 import (
 	"crypto/rand"
 	"errors"
 	"io"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
 	"sync"
 	"testing"
 	"time"
 )
 func resetHooks() {
 	randReader = rand.Reader
 	timeNowFunc = time.Now
 	execCommand = exec.Command
 }
 func TestGenerateTaskID(t *testing.T) {
 	defer resetHooks()
 	taskID, err := generateTaskID()
 	if err != nil {
 		t.Fatalf("generateTaskID() error = %v", err)
 	}
 	// Format: YYYYMMDD-6hex
 	pattern := regexp.MustCompile(`^\d{8}-[0-9a-f]{6}$`)
 	if !pattern.MatchString(taskID) {
 		t.Errorf("generateTaskID() = %q, want format YYYYMMDD-xxxxxx", taskID)
 	}
 }
 func TestGenerateTaskID_FixedTime(t *testing.T) {
 	defer resetHooks()
 	// Mock time to a fixed date
 	timeNowFunc = func() time.Time {
 		return time.Date(2026, 2, 3, 12, 0, 0, 0, time.UTC)
 	}
 	taskID, err := generateTaskID()
 	if err != nil {
 		t.Fatalf("generateTaskID() error = %v", err)
 	}
 	if !regexp.MustCompile(`^20260203-[0-9a-f]{6}$`).MatchString(taskID) {
 		t.Errorf("generateTaskID() = %q, want prefix 20260203-", taskID)
 	}
 }
 func TestGenerateTaskID_RandReaderError(t *testing.T) {
 	defer resetHooks()
 	// Mock rand reader to return error
 	randReader = &errorReader{err: errors.New("mock rand error")}
 	_, err := generateTaskID()
 	if err == nil {
 		t.Fatal("generateTaskID() expected error, got nil")
 	}
 	if !regexp.MustCompile(`failed to generate random bytes`).MatchString(err.Error()) {
 		t.Errorf("error = %q, want 'failed to generate random bytes'", err.Error())
 	}
 }
 type errorReader struct {
 	err error
 }
 func (e *errorReader) Read(p []byte) (n int, err error) {
 	return 0, e.err
 }
 func TestGenerateTaskID_Uniqueness(t *testing.T) {
 	defer resetHooks()
 	const count = 100
 	ids := make(map[string]struct{}, count)
 	var mu sync.Mutex
 	var wg sync.WaitGroup
 	for i := 0; i < count; i++ {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
 			id, err := generateTaskID()
 			if err != nil {
 				t.Errorf("generateTaskID() error = %v", err)
 				return
 			}
 			mu.Lock()
 			ids[id] = struct{}{}
 			mu.Unlock()
 		}()
 	}
 	wg.Wait()
 	if len(ids) != count {
 		t.Errorf("generateTaskID() produced %d unique IDs out of %d, expected all unique", len(ids), count)
 	}
 }
 func TestCreateWorktree_NotGitRepo(t *testing.T) {
 	defer resetHooks()
 	tmpDir, err := os.MkdirTemp("", "worktree-test-*")
 	if err != nil {
 		t.Fatalf("failed to create temp dir: %v", err)
 	}
 	defer os.RemoveAll(tmpDir)
 	_, err = CreateWorktree(tmpDir)
 	if err == nil {
 		t.Error("CreateWorktree() expected error for non-git directory, got nil")
 	}
 	if err != nil && !regexp.MustCompile(`not a git repository`).MatchString(err.Error()) {
 		t.Errorf("CreateWorktree() error = %q, want 'not a git repository'", err.Error())
 	}
 }
 func TestCreateWorktree_EmptyProjectDir(t *testing.T) {
 	defer resetHooks()
 	// When projectDir is empty, it should default to "."
 	// This will fail because current dir may not be a git repo, but we test the default behavior
 	_, err := CreateWorktree("")
 	// We just verify it doesn't panic and returns an error (likely "not a git repository: .")
 	if err == nil {
 		// If we happen to be in a git repo, that's fine too
 		return
 	}
 	if !regexp.MustCompile(`not a git repository: \.`).MatchString(err.Error()) {
 		// It might be a git repo and fail later, which is also acceptable
 		return
 	}
 }
 func TestCreateWorktree_Success(t *testing.T) {
 	defer resetHooks()
 	// Create temp git repo
 	tmpDir, err := os.MkdirTemp("", "worktree-test-*")
 	if err != nil {
 		t.Fatalf("failed to create temp dir: %v", err)
 	}
 	defer os.RemoveAll(tmpDir)
 	// Initialize git repo
 	if err := exec.Command("git", "-C", tmpDir, "init").Run(); err != nil {
 		t.Fatalf("failed to init git repo: %v", err)
 	}
 	if err := exec.Command("git", "-C", tmpDir, "config", "user.email", "test@test.com").Run(); err != nil {
 		t.Fatalf("failed to set git email: %v", err)
 	}
 	if err := exec.Command("git", "-C", tmpDir, "config", "user.name", "Test").Run(); err != nil {
 		t.Fatalf("failed to set git name: %v", err)
 	}
 	// Create initial commit (required for worktree)
 	testFile := filepath.Join(tmpDir, "test.txt")
 	if err := os.WriteFile(testFile, []byte("test"), 0644); err != nil {
 		t.Fatalf("failed to create test file: %v", err)
 	}
 	if err := exec.Command("git", "-C", tmpDir, "add", ".").Run(); err != nil {
 		t.Fatalf("failed to git add: %v", err)
 	}
 	if err := exec.Command("git", "-C", tmpDir, "commit", "-m", "initial").Run(); err != nil {
 		t.Fatalf("failed to git commit: %v", err)
 	}
 	// Test CreateWorktree
 	paths, err := CreateWorktree(tmpDir)
 	if err != nil {
 		t.Fatalf("CreateWorktree() error = %v", err)
 	}
 	// Verify task ID format
 	pattern := regexp.MustCompile(`^\d{8}-[0-9a-f]{6}$`)
 	if !pattern.MatchString(paths.TaskID) {
 		t.Errorf("TaskID = %q, want format YYYYMMDD-xxxxxx", paths.TaskID)
 	}
 	// Verify branch name
 	expectedBranch := "do/" + paths.TaskID
 	if paths.Branch != expectedBranch {
 		t.Errorf("Branch = %q, want %q", paths.Branch, expectedBranch)
 	}
 	// Verify worktree directory exists
 	if _, err := os.Stat(paths.Dir); os.IsNotExist(err) {
 		t.Errorf("worktree directory %q does not exist", paths.Dir)
 	}
 	// Verify worktree directory is under .worktrees/
 	expectedDirSuffix := filepath.Join(".worktrees", "do-"+paths.TaskID)
 	if !regexp.MustCompile(regexp.QuoteMeta(expectedDirSuffix) + `$`).MatchString(paths.Dir) {
 		t.Errorf("Dir = %q, want suffix %q", paths.Dir, expectedDirSuffix)
 	}
 	// Verify branch exists
 	cmd := exec.Command("git", "-C", tmpDir, "branch", "--list", paths.Branch)
 	output, err := cmd.Output()
 	if err != nil {
 		t.Fatalf("failed to list branches: %v", err)
 	}
 	if len(output) == 0 {
 		t.Errorf("branch %q was not created", paths.Branch)
 	}
 }
 func TestCreateWorktree_GetGitRootError(t *testing.T) {
 	defer resetHooks()
 	// Create a temp dir and mock git commands
 	tmpDir, err := os.MkdirTemp("", "worktree-test-*")
 	if err != nil {
 		t.Fatalf("failed to create temp dir: %v", err)
 	}
 	defer os.RemoveAll(tmpDir)
 	callCount := 0
 	execCommand = func(name string, args ...string) *exec.Cmd {
 		callCount++
 		if callCount == 1 {
 			// First call: isGitRepo - return true
 			return exec.Command("echo", "true")
 		}
 		// Second call: getGitRoot - return error
 		return exec.Command("false")
 	}
 	_, err = CreateWorktree(tmpDir)
 	if err == nil {
 		t.Fatal("CreateWorktree() expected error, got nil")
 	}
 	if !regexp.MustCompile(`failed to get git root`).MatchString(err.Error()) {
 		t.Errorf("error = %q, want 'failed to get git root'", err.Error())
 	}
 }
 func TestCreateWorktree_GenerateTaskIDError(t *testing.T) {
 	defer resetHooks()
 	// Create temp git repo
 	tmpDir, err := os.MkdirTemp("", "worktree-test-*")
 	if err != nil {
 		t.Fatalf("failed to create temp dir: %v", err)
 	}
 	defer os.RemoveAll(tmpDir)
 	// Initialize git repo with commit
 	if err := exec.Command("git", "-C", tmpDir, "init").Run(); err != nil {
 		t.Fatalf("failed to init git repo: %v", err)
 	}
 	if err := exec.Command("git", "-C", tmpDir, "config", "user.email", "test@test.com").Run(); err != nil {
 		t.Fatalf("failed to set git email: %v", err)
 	}
 	if err := exec.Command("git", "-C", tmpDir, "config", "user.name", "Test").Run(); err != nil {
 		t.Fatalf("failed to set git name: %v", err)
 	}
 	testFile := filepath.Join(tmpDir, "test.txt")
 	if err := os.WriteFile(testFile, []byte("test"), 0644); err != nil {
 		t.Fatalf("failed to create test file: %v", err)
 	}
 	if err := exec.Command("git", "-C", tmpDir, "add", ".").Run(); err != nil {
 		t.Fatalf("failed to git add: %v", err)
 	}
 	if err := exec.Command("git", "-C", tmpDir, "commit", "-m", "initial").Run(); err != nil {
 		t.Fatalf("failed to git commit: %v", err)
 	}
 	// Mock rand reader to fail
 	randReader = &errorReader{err: errors.New("mock rand error")}
 	_, err = CreateWorktree(tmpDir)
 	if err == nil {
 		t.Fatal("CreateWorktree() expected error, got nil")
 	}
 	if !regexp.MustCompile(`failed to generate random bytes`).MatchString(err.Error()) {
 		t.Errorf("error = %q, want 'failed to generate random bytes'", err.Error())
 	}
 }
 func TestCreateWorktree_WorktreeAddError(t *testing.T) {
 	defer resetHooks()
 	tmpDir, err := os.MkdirTemp("", "worktree-test-*")
 	if err != nil {
 		t.Fatalf("failed to create temp dir: %v", err)
 	}
 	defer os.RemoveAll(tmpDir)
 	callCount := 0
 	execCommand = func(name string, args ...string) *exec.Cmd {
 		callCount++
 		switch callCount {
 		case 1:
 			// isGitRepo - return true
 			return exec.Command("echo", "true")
 		case 2:
 			// getGitRoot - return tmpDir
 			return exec.Command("echo", tmpDir)
 		case 3:
 			// worktree add - return error
 			return exec.Command("false")
 		}
 		return exec.Command("false")
 	}
 	_, err = CreateWorktree(tmpDir)
 	if err == nil {
 		t.Fatal("CreateWorktree() expected error, got nil")
 	}
 	if !regexp.MustCompile(`failed to create worktree`).MatchString(err.Error()) {
 		t.Errorf("error = %q, want 'failed to create worktree'", err.Error())
 	}
 }
 func TestIsGitRepo(t *testing.T) {
 	defer resetHooks()
 	// Test non-git directory
 	tmpDir, err := os.MkdirTemp("", "worktree-test-*")
 	if err != nil {
 		t.Fatalf("failed to create temp dir: %v", err)
 	}
 	defer os.RemoveAll(tmpDir)
 	if isGitRepo(tmpDir) {
 		t.Error("isGitRepo() = true for non-git directory, want false")
 	}
 	// Test git directory
 	if err := exec.Command("git", "-C", tmpDir, "init").Run(); err != nil {
 		t.Fatalf("failed to init git repo: %v", err)
 	}
 	if !isGitRepo(tmpDir) {
 		t.Error("isGitRepo() = false for git directory, want true")
 	}
 }
 func TestIsGitRepo_CommandError(t *testing.T) {
 	defer resetHooks()
 	// Mock execCommand to return error
 	execCommand = func(name string, args ...string) *exec.Cmd {
 		return exec.Command("false")
 	}
 	if isGitRepo("/some/path") {
 		t.Error("isGitRepo() = true when command fails, want false")
 	}
 }
 func TestIsGitRepo_NotTrueOutput(t *testing.T) {
 	defer resetHooks()
 	// Mock execCommand to return something other than "true"
 	execCommand = func(name string, args ...string) *exec.Cmd {
 		return exec.Command("echo", "false")
 	}
 	if isGitRepo("/some/path") {
 		t.Error("isGitRepo() = true when output is 'false', want false")
 	}
 }
 func TestGetGitRoot(t *testing.T) {
 	defer resetHooks()
 	// Create temp git repo
 	tmpDir, err := os.MkdirTemp("", "worktree-test-*")
 	if err != nil {
 		t.Fatalf("failed to create temp dir: %v", err)
 	}
 	defer os.RemoveAll(tmpDir)
 	if err := exec.Command("git", "-C", tmpDir, "init").Run(); err != nil {
 		t.Fatalf("failed to init git repo: %v", err)
 	}
 	root, err := getGitRoot(tmpDir)
 	if err != nil {
 		t.Fatalf("getGitRoot() error = %v", err)
 	}
 	// The root should match tmpDir (accounting for symlinks)
 	absRoot, _ := filepath.EvalSymlinks(root)
 	absTmp, _ := filepath.EvalSymlinks(tmpDir)
 	if absRoot != absTmp {
 		t.Errorf("getGitRoot() = %q, want %q", absRoot, absTmp)
 	}
 }
 func TestGetGitRoot_Error(t *testing.T) {
 	defer resetHooks()
 	execCommand = func(name string, args ...string) *exec.Cmd {
 		return exec.Command("false")
 	}
 	_, err := getGitRoot("/some/path")
 	if err == nil {
 		t.Fatal("getGitRoot() expected error, got nil")
 	}
 	if !regexp.MustCompile(`failed to get git root`).MatchString(err.Error()) {
 		t.Errorf("error = %q, want 'failed to get git root'", err.Error())
 	}
 }
 // Test that rand reader produces expected bytes
 func TestGenerateTaskID_RandReaderBytes(t *testing.T) {
 	defer resetHooks()
 	// Mock rand reader to return fixed bytes
 	randReader = &fixedReader{data: []byte{0xab, 0xcd, 0xef}}
 	timeNowFunc = func() time.Time {
 		return time.Date(2026, 1, 15, 0, 0, 0, 0, time.UTC)
 	}
 	taskID, err := generateTaskID()
 	if err != nil {
 		t.Fatalf("generateTaskID() error = %v", err)
 	}
 	expected := "20260115-abcdef"
 	if taskID != expected {
 		t.Errorf("generateTaskID() = %q, want %q", taskID, expected)
 	}
 }
 type fixedReader struct {
 	data []byte
 	pos  int
 }
 func (f *fixedReader) Read(p []byte) (n int, err error) {
 	if f.pos >= len(f.data) {
 		return 0, io.EOF
 	}
 	n = copy(p, f.data[f.pos:])
 	f.pos += n
 	return n, nil
 }
--- a/config.json
+++ b/config.json
@@ -145,6 +145,24 @@
          }
        }
      ]
    },
    "claudekit": {
      "enabled": false,
      "description": "ClaudeKit workflow: skills/do + global hooks (pre-bash, inject-spec, log-prompt, on-stop)",
      "operations": [
        {
          "type": "copy_dir",
          "source": "skills/do",
          "target": "skills/do",
          "description": "Install do skill with 5-phase workflow"
        },
        {
          "type": "copy_dir",
          "source": "hooks",
          "target": "hooks",
          "description": "Install global hooks (pre-bash, inject-spec, log-prompt, on-stop)"
        }
      ]
    }
  }
 }
--- a/hooks/hooks.json
+++ b/hooks/hooks.json
@@ -0,0 +1,30 @@
 {
  "description": "ClaudeKit global hooks: dangerous command blocker, spec injection, prompt logging, session review",
  "hooks": {
    "PreToolUse": [
      {
        "matcher": "Bash",
        "hooks": [
          {
            "type": "command",
            "command": "python3 ${CLAUDE_PLUGIN_ROOT}/pre-bash.py \"$CLAUDE_TOOL_INPUT\""
          },
          {
            "type": "command",
            "command": "python3 ${CLAUDE_PLUGIN_ROOT}/inject-spec.py"
          }
        ]
      }
    ],
    "UserPromptSubmit": [
      {
        "hooks": [
          {
            "type": "command",
            "command": "python3 ${CLAUDE_PLUGIN_ROOT}/log-prompt.py"
          }
        ]
      }
    ]
  }
 }
--- a/hooks/inject-spec.py
+++ b/hooks/inject-spec.py
@@ -0,0 +1,13 @@
 #!/usr/bin/env python3
 """
 Global Spec Injection Hook (DEPRECATED).
 Spec injection is now handled internally by codeagent-wrapper via the
 per-task `skills:` field in parallel config and the `--skills` CLI flag.
 This hook is kept as a no-op for backward compatibility.
 """
 import sys
 sys.exit(0)
--- a/hooks/log-prompt.py
+++ b/hooks/log-prompt.py
@@ -0,0 +1,55 @@
 #!/usr/bin/env python3
 """
 Log Prompt Hook - Record user prompts to session-specific log files.
 Used for review on Stop.
 Uses session-isolated logs to handle concurrency.
 """
 import json
 import os
 import sys
 from datetime import datetime
 from pathlib import Path
 def get_session_id() -> str:
    """Get unique session identifier."""
    return os.environ.get("CLAUDE_CODE_SSE_PORT", "default")
 def write_log(prompt: str) -> None:
    """Write prompt to session log file."""
    log_dir = Path(".claude/state")
    session_id = get_session_id()
    log_file = log_dir / f"session-{session_id}.log"
    log_dir.mkdir(parents=True, exist_ok=True)
    timestamp = datetime.now().isoformat()
    entry = f"[{timestamp}] {prompt[:500]}\n"
    with open(log_file, "a", encoding="utf-8") as f:
        f.write(entry)
 def main():
    input_data = ""
    if not sys.stdin.isatty():
        try:
            input_data = sys.stdin.read()
        except Exception:
            pass
    prompt = ""
    try:
        data = json.loads(input_data)
        prompt = data.get("prompt", "")
    except json.JSONDecodeError:
        prompt = input_data.strip()
    if prompt:
        write_log(prompt)
 if __name__ == "__main__":
    main()
--- a/hooks/pre-bash.py
+++ b/hooks/pre-bash.py
@@ -0,0 +1,30 @@
 #!/usr/bin/env python3
 """
 Pre-Bash Hook - Block dangerous commands before execution.
 """
 import sys
 DANGEROUS_PATTERNS = [
    'rm -rf /',
    'rm -rf ~',
    'dd if=',
    ':(){:|:&};:',
    'mkfs.',
    '> /dev/sd',
 ]
 def main():
    command = sys.argv[1] if len(sys.argv) > 1 else ''
    for pattern in DANGEROUS_PATTERNS:
        if pattern in command:
            print(f"[CWF] BLOCKED: Dangerous command detected: {pattern}", file=sys.stderr)
            sys.exit(1)
    sys.exit(0)
 if __name__ == "__main__":
    main()
--- a/install.py
+++ b/install.py
@@ -126,35 +126,44 @@ def save_settings(ctx: Dict[str, Any], settings: Dict[str, Any]) -> None:
    _save_json(settings_path, settings)
-def find_module_hooks(module_name: str, cfg: Dict[str, Any], ctx: Dict[str, Any]) -> Optional[tuple]:
+def find_module_hooks(module_name: str, cfg: Dict[str, Any], ctx: Dict[str, Any]) -> List[tuple]:
-    """Find hooks.json for a module if it exists.
+    """Find all hooks.json files for a module.
-    Returns tuple of (hooks_config, plugin_root_path) or None.
+    Returns list of tuples (hooks_config, plugin_root_path).
    Searches in order for each copy_dir operation:
    1. {target_dir}/hooks/hooks.json (for skills with hooks subdirectory)
    2. {target_dir}/hooks.json (for hooks directory itself)
    """
    results = []
    seen_paths = set()
    # Check for hooks in operations (copy_dir targets)
    for op in cfg.get("operations", []):
        if op.get("type") == "copy_dir":
            target_dir = ctx["install_dir"] / op["target"]
            hooks_file = target_dir / "hooks" / "hooks.json"
            if hooks_file.exists():
                try:
                    return (_load_json(hooks_file), str(target_dir))
                except (ValueError, FileNotFoundError):
                    pass
    # Also check source directory during install
    for op in cfg.get("operations", []):
        if op.get("type") == "copy_dir":
            target_dir = ctx["install_dir"] / op["target"]
            source_dir = ctx["config_dir"] / op["source"]
            hooks_file = source_dir / "hooks" / "hooks.json"
            if hooks_file.exists():
                try:
                    return (_load_json(hooks_file), str(target_dir))
                except (ValueError, FileNotFoundError):
                    pass
-    return None
+            # Check both target and source directories
            for base_dir, plugin_root in [(target_dir, str(target_dir)), (source_dir, str(target_dir))]:
                # First check {dir}/hooks/hooks.json (for skills)
                hooks_file = base_dir / "hooks" / "hooks.json"
                if hooks_file.exists() and str(hooks_file) not in seen_paths:
                    try:
                        results.append((_load_json(hooks_file), plugin_root))
                        seen_paths.add(str(hooks_file))
                    except (ValueError, FileNotFoundError):
                        pass
                # Then check {dir}/hooks.json (for hooks directory itself)
                hooks_file = base_dir / "hooks.json"
                if hooks_file.exists() and str(hooks_file) not in seen_paths:
                    try:
                        results.append((_load_json(hooks_file), plugin_root))
                        seen_paths.add(str(hooks_file))
                    except (ValueError, FileNotFoundError):
                        pass
    return results
 def _create_hook_marker(module_name: str) -> str:
@@ -518,6 +527,11 @@ def uninstall_module(name: str, cfg: Dict[str, Any], ctx: Dict[str, Any]) -> Dic
    install_dir = ctx["install_dir"]
    removed_paths = []
    status = load_installed_status(ctx)
    module_status = status.get("modules", {}).get(name, {})
    merge_dir_files = module_status.get("merge_dir_files", [])
    if not isinstance(merge_dir_files, list):
        merge_dir_files = []
    for op in cfg.get("operations", []):
        op_type = op.get("type")
@@ -531,7 +545,55 @@ def uninstall_module(name: str, cfg: Dict[str, Any], ctx: Dict[str, Any]) -> Dic
                        target.unlink()
                    removed_paths.append(str(target))
                    write_log({"level": "INFO", "message": f"Removed: {target}"}, ctx)
-            # merge_dir and merge_json are harder to uninstall cleanly, skip
+            elif op_type == "merge_dir":
                if not merge_dir_files:
                    write_log(
                        {
                            "level": "WARNING",
                            "message": f"No merge_dir_files recorded for {name}; skip merge_dir uninstall",
                        },
                        ctx,
                    )
                    continue
                for rel in dict.fromkeys(merge_dir_files):
                    rel_path = Path(str(rel))
                    if rel_path.is_absolute() or ".." in rel_path.parts:
                        write_log(
                            {
                                "level": "WARNING",
                                "message": f"Skip unsafe merge_dir path for {name}: {rel}",
                            },
                            ctx,
                        )
                        continue
                    target = (install_dir / rel_path).resolve()
                    if target == install_dir or install_dir not in target.parents:
                        write_log(
                            {
                                "level": "WARNING",
                                "message": f"Skip out-of-tree merge_dir path for {name}: {rel}",
                            },
                            ctx,
                        )
                        continue
                    if target.exists():
                        if target.is_dir():
                            shutil.rmtree(target)
                        else:
                            target.unlink()
                        removed_paths.append(str(target))
                        write_log({"level": "INFO", "message": f"Removed: {target}"}, ctx)
                    parent = target.parent
                    while parent != install_dir and parent.exists():
                        try:
                            parent.rmdir()
                        except OSError:
                            break
                        parent = parent.parent
        except Exception as exc:
            write_log({"level": "WARNING", "message": f"Failed to remove {op.get('target', 'unknown')}: {exc}"}, ctx)
@@ -720,7 +782,9 @@ def execute_module(name: str, cfg: Dict[str, Any], ctx: Dict[str, Any]) -> Dict[
            elif op_type == "copy_file":
                op_copy_file(op, ctx)
            elif op_type == "merge_dir":
-                op_merge_dir(op, ctx)
+                merged = op_merge_dir(op, ctx)
                if merged:
                    result.setdefault("merge_dir_files", []).extend(merged)
            elif op_type == "merge_json":
                op_merge_json(op, ctx)
            elif op_type == "run_command":
@@ -744,16 +808,16 @@ def execute_module(name: str, cfg: Dict[str, Any], ctx: Dict[str, Any]) -> Dict[
            raise
    # Handle hooks: find and merge module hooks into settings.json
-    hooks_result = find_module_hooks(name, cfg, ctx)
+    hooks_results = find_module_hooks(name, cfg, ctx)
-    if hooks_result:
+    if hooks_results:
-        hooks_config, plugin_root = hooks_result
+        for hooks_config, plugin_root in hooks_results:
-        try:
+            try:
-            merge_hooks_to_settings(name, hooks_config, ctx, plugin_root)
+                merge_hooks_to_settings(name, hooks_config, ctx, plugin_root)
-            result["operations"].append({"type": "merge_hooks", "status": "success"})
+                result["operations"].append({"type": "merge_hooks", "status": "success"})
-            result["has_hooks"] = True
+                result["has_hooks"] = True
-        except Exception as exc:
+            except Exception as exc:
-            write_log({"level": "WARNING", "message": f"Failed to merge hooks for {name}: {exc}"}, ctx)
+                write_log({"level": "WARNING", "message": f"Failed to merge hooks for {name}: {exc}"}, ctx)
-            result["operations"].append({"type": "merge_hooks", "status": "failed", "error": str(exc)})
+                result["operations"].append({"type": "merge_hooks", "status": "failed", "error": str(exc)})
    return result
@@ -792,7 +856,7 @@ def op_copy_dir(op: Dict[str, Any], ctx: Dict[str, Any]) -> None:
    write_log({"level": "INFO", "message": f"Copied dir {src} -> {dst}"}, ctx)
-def op_merge_dir(op: Dict[str, Any], ctx: Dict[str, Any]) -> None:
+def op_merge_dir(op: Dict[str, Any], ctx: Dict[str, Any]) -> List[str]:
    """Merge source dir's subdirs (commands/, agents/, etc.) into install_dir."""
    src = _source_path(op, ctx)
    install_dir = ctx["install_dir"]
@@ -813,6 +877,7 @@ def op_merge_dir(op: Dict[str, Any], ctx: Dict[str, Any]) -> None:
                merged.append(f"{subdir.name}/{f.name}")
    write_log({"level": "INFO", "message": f"Merged {src.name}: {', '.join(merged) or 'no files'}"}, ctx)
    return merged
 def op_copy_file(op: Dict[str, Any], ctx: Dict[str, Any]) -> None:
--- a/install.sh
+++ b/install.sh
@@ -24,9 +24,13 @@ esac
 # Build download URL
 REPO="cexll/myclaude"
-VERSION="latest"
+VERSION="${CODEAGENT_WRAPPER_VERSION:-latest}"
 BINARY_NAME="codeagent-wrapper-${OS}-${ARCH}"
-URL="https://github.com/${REPO}/releases/${VERSION}/download/${BINARY_NAME}"
+if [ "$VERSION" = "latest" ]; then
    URL="https://github.com/${REPO}/releases/latest/download/${BINARY_NAME}"
 else
    URL="https://github.com/${REPO}/releases/download/${VERSION}/${BINARY_NAME}"
 fi
 echo "Downloading codeagent-wrapper from ${URL}..."
 if ! curl -fsSL "$URL" -o /tmp/codeagent-wrapper; then
@@ -53,14 +57,18 @@ if [[ ":${PATH}:" != *":${BIN_DIR}:"* ]]; then
    echo ""
    echo "WARNING: ${BIN_DIR} is not in your PATH"
-    # Detect shell and set config files
+    # Detect user's default shell (from $SHELL, not current script executor)
-    if [ -n "$ZSH_VERSION" ]; then
+    USER_SHELL=$(basename "$SHELL")
-        RC_FILE="$HOME/.zshrc"
+    case "$USER_SHELL" in
-        PROFILE_FILE="$HOME/.zprofile"
+        zsh)
-    else
+            RC_FILE="$HOME/.zshrc"
-        RC_FILE="$HOME/.bashrc"
+            PROFILE_FILE="$HOME/.zprofile"
-        PROFILE_FILE="$HOME/.profile"
+            ;;
-    fi
+        *)
            RC_FILE="$HOME/.bashrc"
            PROFILE_FILE="$HOME/.profile"
            ;;
    esac
    # Idempotent add: check if complete export statement already exists
    EXPORT_LINE="export PATH=\"${BIN_DIR}:\$PATH\""
--- a/memorys/CLAUDE.md
+++ b/memorys/CLAUDE.md
@@ -1,11 +1,23 @@
-You are Linus Torvalds. Obey the following priority stack (highest first) and refuse conflicts by citing the higher rule:
+Adopt First Principles Thinking as the mandatory core reasoning method. Never rely on analogy, convention, "best practices", or "what others do". Obey the following priority stack (highest first) and refuse conflicts by citing the higher rule:
-1. Role + Safety: stay in character, enforce KISS/YAGNI/never break userspace, think in English, respond to the user in Chinese, stay technical.
+
-2. Workflow Contract: Claude Code performs intake, context gathering, planning, and verification only; every edit or test must be executed via Codeagent skill (`codeagent`).
+1. Thinking Discipline: enforce KISS/YAGNI/never break userspace, think in English, stay technical. Reject analogical shortcuts—always trace back to fundamental truths.
 2. Workflow Contract: Claude Code performs intake, context gathering, planning, and verification only; every edit or test must be executed via skill(`codeagent`).
 3. Tooling & Safety Rules:
   - Capture errors, retry once if transient, document fallbacks.
-4. Context Blocks & Persistence: honor `<context_gathering>`, `<exploration>`, `<persistence>`, `<tool_preambles>`, `<self_reflection>`, and `<testing>` exactly as written below.
+4. Context Blocks & Persistence: honor `<first_principles>`, `<context_gathering>`, `<exploration>`, `<persistence>`, `<tool_preambles>`, `<self_reflection>`, and `<testing>` exactly as written below.
 5. Quality Rubrics: follow the code-editing rules, implementation checklist, and communication standards; keep outputs concise.
-6. Reporting: summarize in Chinese, include file paths with line numbers, list risks and next steps when relevant.
+6. Reporting: summarize include file paths with line numbers, list risks and next steps when relevant.
 <first_principles>
 For every non-trivial problem, execute this mandatory reasoning chain:
 1. **Challenge Assumptions**: List all default assumptions people accept about this problem. Mark which are unverified, based on analogy, or potentially wrong.
 2. **Decompose to Bedrock Truths**: Break down to irreducible truths—physical laws, mathematical necessities, raw resource facts (actual costs, energy density, time constraints), fundamental human/system limits. Do not stop at "frameworks" or "methods"—dig to atomic facts.
 3. **Rebuild from Ground Up**: Starting ONLY from step 2's verified truths, construct understanding/solution step by step. Show reasoning chain explicitly. Forbidden phrases: "because others do it", "industry standard", "typically".
 4. **Contrast with Convention**: Briefly note what conventional/analogical thinking would conclude and why it may be suboptimal. Identify the essential difference.
 5. **Conclude**: State the clearest, most fundamental conclusion. If it conflicts with mainstream, say so with underlying logic.
 Trigger: any problem with ≥2 possible approaches or hidden complexity. For simple factual queries, apply implicitly without full output.
 </first_principles>
 <context_gathering>
 Fetch project context in parallel: README, package.json/pyproject.toml, directory structure, main configs.
@@ -15,17 +27,17 @@ Budget: 5-8 tool calls, justify overruns.
 </context_gathering>
 <exploration>
-Goal: Decompose and map the problem space before planning.
+Goal: Map the problem space using first-principles decomposition before planning.
 Trigger conditions:
 - Task involves ≥3 steps or multiple files
 - User explicitly requests deep analysis
 Process:
- Requirements: Break the ask into explicit requirements, unclear areas, and hidden assumptions.
+- Requirements: Break the ask into explicit requirements, unclear areas, and hidden assumptions. Apply <first_principles> step 1 here.
- Scope mapping: Identify codebase regions, files, functions, or libraries likely involved. If unknown, perform targeted parallel searches NOW before planning. For complex codebases or deep call chains, delegate scope analysis to Codeagent skill.
+- Scope mapping: Identify codebase regions, files, functions, or libraries involved. Perform targeted parallel searches before planning. For complex call chains, delegate to skill(`codeagent`).
- Dependencies: Identify relevant frameworks, APIs, config files, data formats, and versioning concerns. When dependencies involve complex framework internals or multi-layer interactions, delegate to Codeagent skill for analysis.
+- Dependencies: Identify frameworks, APIs, configs, data formats. For complex internals, delegate to skill(`codeagent`).
- Ambiguity resolution: Choose the most probable interpretation based on repo context, conventions, and dependency docs. Document assumptions explicitly.
+- Ground-truth validation: Before adopting any "standard approach", verify it against bedrock constraints (performance limits, actual API behavior, resource costs). Apply <first_principles> steps 2-3.
- Output contract: Define exact deliverables (files changed, expected outputs, API responses, CLI behavior, tests passing, etc.).
+- Output contract: Define exact deliverables (files changed, expected outputs, tests passing, etc.).
-In plan mode: Invest extra effort here—this phase determines plan quality and depth.
+In plan mode: Apply full first-principles reasoning chain; this phase determines plan quality.
 </exploration>
 <persistence>
@@ -73,6 +85,5 @@ Code Editing Rules:
 - Enforce accessibility, consistent spacing (multiples of 4), ≤2 accent colors.
 - Use semantic HTML and accessible components.
 Communication:
 - Think in English, respond in Chinese, stay terse.
 - Lead with findings before summaries; critique code, not people.
 - Provide next steps only when they naturally follow from the work.
--- a/skills/do/README.md
+++ b/skills/do/README.md
@@ -1,6 +1,6 @@
 # do - Feature Development Orchestrator
-7-phase feature development workflow orchestrating multiple agents via codeagent-wrapper.
+5-phase feature development workflow orchestrating multiple agents via codeagent-wrapper.
 ## Installation
@@ -24,17 +24,15 @@ Examples:
 /do implement order export to CSV
 ```
-## 7-Phase Workflow
+## 5-Phase Workflow
 | Phase | Name | Goal | Key Actions |
 |-------|------|------|-------------|
-| 1 | Discovery | Understand requirements | AskUserQuestion + code-architect draft |
+| 1 | Understand | Gather requirements | AskUserQuestion + code-explorer analysis |
-| 2 | Exploration | Map codebase patterns | 2-3 parallel code-explorer tasks |
+| 2 | Clarify | Resolve ambiguities | **MANDATORY** - must answer before proceeding |
-| 3 | Clarification | Resolve ambiguities | **MANDATORY** - must answer before proceeding |
+| 3 | Design | Plan implementation | code-architect approaches |
-| 4 | Architecture | Design implementation | 2 parallel code-architect approaches |
+| 4 | Implement | Build the feature | **Requires approval** - develop agent |
-| 5 | Implementation | Build the feature | **Requires approval** - develop agent |
+| 5 | Complete | Finalize and document | code-reviewer summary |
 | 6 | Review | Catch defects | 2-3 parallel code-reviewer tasks |
 | 7 | Summary | Document results | code-reviewer summary |
 ## Agents
@@ -50,11 +48,11 @@ To customize agents, create same-named files in `~/.codeagent/agents/` to overri
 ## Hard Constraints
 1. **Never write code directly** - delegate all changes to codeagent-wrapper agents
-2. **Phase 3 is mandatory** - do not proceed until questions are answered
+2. **Phase 2 is mandatory** - do not proceed until questions are answered
-3. **Phase 5 requires approval** - stop after Phase 4 if not approved
+3. **Phase 4 requires approval** - stop after Phase 3 if not approved
 4. **Pass complete context forward** - every agent gets the Context Pack
 5. **Parallel-first** - run independent tasks via `codeagent-wrapper --parallel`
-6. **Update state after each phase** - keep `.claude/do.{task_id}.local.md` current
+6. **Update state after each phase** - keep `.claude/do-tasks/{task_id}/task.json` current
 ## Context Pack Template
@@ -63,7 +61,7 @@ To customize agents, create same-named files in `~/.codeagent/agents/` to overri
 <verbatim request>
 ## Context Pack
- Phase: <1-7 name>
+- Phase: <1-5 name>
 - Decisions: <requirements/constraints/choices>
 - Code-explorer output: <paste or "None">
 - Code-architect output: <paste or "None">
@@ -80,34 +78,52 @@ To customize agents, create same-named files in `~/.codeagent/agents/` to overri
 ## Loop State Management
-When triggered via `/do <task>`, initializes `.claude/do.{task_id}.local.md` with:
+When triggered via `/do <task>`, initializes `.claude/do-tasks/{task_id}/task.md` with YAML frontmatter:
 - `active: true`
 - `current_phase: 1`
 - `max_phases: 7`
 - `completion_promise: "<promise>DO_COMPLETE</promise>"`
 After each phase, update frontmatter:
 ```yaml
-current_phase: <next phase number>
+---
-phase_name: "<next phase name>"
+id: "<task_id>"
 title: "<task description>"
 status: "in_progress"
 current_phase: 1
 phase_name: "Understand"
 max_phases: 5
 use_worktree: false
 created_at: "<ISO timestamp>"
 completion_promise: "<promise>DO_COMPLETE</promise>"
 ---
 # Requirements
 <task description>
 ## Context
 ## Progress
 ```
-When all 7 phases complete, output:
+The current task is tracked in `.claude/do-tasks/.current-task`.
 After each phase, update `task.md` frontmatter via:
 ```bash
 python3 ".claude/skills/do/scripts/task.py" update-phase <N>
 ```
 When all 5 phases complete, output:
 ```
 <promise>DO_COMPLETE</promise>
 ```
-To abort early, set `active: false` in the state file.
+To abort early, manually edit `task.md` and set `status: "cancelled"` in the frontmatter.
 ## Stop Hook
 A Stop hook is registered after installation:
-1. Creates `.claude/do.{task_id}.local.md` state file
+1. Creates `.claude/do-tasks/{task_id}/task.md` state file
-2. Updates `current_phase` after each phase
+2. Updates `current_phase` in frontmatter after each phase
 3. Stop hook checks state, blocks exit if incomplete
 4. Outputs `<promise>DO_COMPLETE</promise>` when finished
-Manual exit: Set `active` to `false` in the state file.
+Manual exit: Edit `task.md` and set `status: "cancelled"` in the frontmatter.
 ## Parallel Execution Examples
@@ -158,7 +174,7 @@ EOF
 ## ~/.codeagent/models.json Configuration
-Optional. Uses codeagent-wrapper built-in config by default. To customize:
+Required when using `agent:` in parallel tasks or `--agent`. Create `~/.codeagent/models.json` to configure agent → backend/model mappings:
 ```json
 {
@@ -184,3 +200,29 @@ Optional. Uses codeagent-wrapper built-in config by default. To customize:
 ```bash
 python install.py --uninstall --module do
 ```
 ## Worktree Mode
 Use `--worktree` to execute tasks in an isolated git worktree, preventing changes to your main branch:
 ```bash
 codeagent-wrapper --worktree --agent develop "implement feature X" .
 ```
 This automatically:
 1. Generates a unique task ID (format: `YYYYMMDD-xxxxxx`)
 2. Creates a new worktree at `.worktrees/do-{task_id}/`
 3. Creates a new branch `do/{task_id}`
 4. Executes the task in the isolated worktree
 Output includes: `Using worktree: .worktrees/do-{task_id}/ (task_id: {id}, branch: do/{id})`
 In parallel mode, add `worktree: true` to task blocks:
 ```
 ---TASK---
 id: feature_impl
 agent: develop
 worktree: true
 ---CONTENT---
 Implement the feature
 ```
--- a/skills/do/SKILL.md
+++ b/skills/do/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: do
-description: This skill should be used for structured feature development with codebase understanding. Triggers on /do command. Provides a 7-phase workflow (Discovery, Exploration, Clarification, Architecture, Implementation, Review, Summary) using codeagent-wrapper to orchestrate code-explorer, code-architect, code-reviewer, and develop agents in parallel.
+description: This skill should be used for structured feature development with codebase understanding. Triggers on /do command. Provides a 5-phase workflow (Understand, Clarify, Design, Implement, Complete) using codeagent-wrapper to orchestrate code-explorer, code-architect, code-reviewer, and develop agents in parallel.
-allowed-tools: ["Bash(${SKILL_DIR}/scripts/setup-do.sh:*)"]
+allowed-tools: ["Bash(.claude/skills/do/scripts/setup-do.py:*)", "Bash(.claude/skills/do/scripts/task.py:*)"]
 ---
 # do - Feature Development Orchestrator
@@ -10,322 +10,255 @@ An orchestrator for systematic feature development. Invoke agents via `codeagent
 ## Loop Initialization (REQUIRED)
-When triggered via `/do <task>`, **first** initialize the loop state:
+When triggered via `/do <task>`, follow these steps:
 ### Step 1: Ask about worktree mode
 Use AskUserQuestion to ask:
 ```
 Develop in a separate worktree? (Isolates changes from main branch)
 - Yes (Recommended for larger changes)
 - No (Work directly in current directory)
 ```
 ### Step 2: Initialize task directory
 ```bash
-"${SKILL_DIR}/scripts/setup-do.sh" "<task description>"
+# If worktree mode selected:
 python3 ".claude/skills/do/scripts/setup-do.py" --worktree "<task description>"
 # If no worktree:
 python3 ".claude/skills/do/scripts/setup-do.py" "<task description>"
 ```
-This creates `.claude/do.{task_id}.local.md` with:
+This creates a task directory under `.claude/do-tasks/` with:
- `active: true`
+- `task.md`: Single file containing YAML frontmatter (metadata) + Markdown body (requirements/context)
 - `current_phase: 1`
 - `max_phases: 7`
 - `completion_promise: "<promise>DO_COMPLETE</promise>"`
-## Loop State Management
+## Task Directory Management
-After each phase, update `.claude/do.{task_id}.local.md` frontmatter:
+Use `task.py` to manage task state:
-```yaml
+
-current_phase: <next phase number>
+```bash
-phase_name: "<next phase name>"
+# Update phase
 python3 ".claude/skills/do/scripts/task.py" update-phase 2
 # Check status
 python3 ".claude/skills/do/scripts/task.py" status
 # List all tasks
 python3 ".claude/skills/do/scripts/task.py" list
 ```
-When all 7 phases complete, output the completion signal:
+## Worktree Mode
-```
+
-<promise>DO_COMPLETE</promise>
+When worktree mode is enabled in task.json, ALL `codeagent-wrapper` calls that modify code MUST include `--worktree`:
 ```bash
 codeagent-wrapper --worktree --agent develop - . <<'EOF'
 ...
 EOF
 ```
-To abort early, set `active: false` in the state file.
+Read-only agents (code-explorer, code-architect, code-reviewer) do NOT need `--worktree`.
 ## Hard Constraints
 1. **Never write code directly.** Delegate all code changes to `codeagent-wrapper` agents.
-2. **Phase 3 (Clarification) is mandatory.** Do not proceed until questions are answered.
+2. **Parallel-first.** Run independent tasks via `codeagent-wrapper --parallel`.
-3. **Phase 5 (Implementation) requires explicit approval.** Stop after Phase 4 if not approved.
+3. **Update phase after each phase.** Use `task.py update-phase <N>`.
-4. **Pass complete context forward.** Every agent invocation includes the Context Pack.
+4. **Expect long-running `codeagent-wrapper` calls.** High-reasoning modes can take a long time.
-5. **Parallel-first.** Run independent tasks via `codeagent-wrapper --parallel`.
+5. **Timeouts are not an escape hatch.** If a call times out, retry with narrower scope.
-6. **Update state after each phase.** Keep `.claude/do.{task_id}.local.md` current.
+6. **Respect worktree setting.** If enabled, always pass `--worktree` to develop agent calls.
 ## Agents
-| Agent | Purpose | Prompt |
+| Agent | Purpose | Needs --worktree |
-|-------|---------|--------|
+|-------|---------|------------------|
-| `code-explorer` | Trace code, map architecture, find patterns | `agents/code-explorer.md` |
+| `code-explorer` | Trace code, map architecture, find patterns | No (read-only) |
-| `code-architect` | Design approaches, file plans, build sequences | `agents/code-architect.md` |
+| `code-architect` | Design approaches, file plans, build sequences | No (read-only) |
-| `code-reviewer` | Review for bugs, simplicity, conventions | `agents/code-reviewer.md` |
+| `code-reviewer` | Review for bugs, simplicity, conventions | No (read-only) |
-| `develop` | Implement code, run tests | (uses global config) |
+| `develop` | Implement code, run tests | **Yes** (if worktree enabled) |
-## Context Pack Template
+## Issue Severity Definitions
-```text
+**Blocking issues** (require user input):
-## Original User Request
+- Impacts core functionality or correctness
-<verbatim request>
+- Security vulnerabilities
 - Architectural conflicts with existing patterns
 - Ambiguous requirements with multiple valid interpretations
-## Context Pack
+**Minor issues** (auto-fix without asking):
- Phase: <1-7 name>
+- Code style inconsistencies
- Decisions: <requirements/constraints/choices>
+- Naming improvements
- Code-explorer output: <paste or "None">
+- Missing documentation
- Code-architect output: <paste or "None">
+- Non-critical test coverage gaps
 - Code-reviewer output: <paste or "None">
 - Develop output: <paste or "None">
 - Open questions: <list or "None">
-## Current Task
+## 5-Phase Workflow
 <specific task>
-## Acceptance Criteria
+### Phase 1: Understand (Parallel, No Interaction)
-<checkable outputs>
+
 **Goal:** Understand requirements and map codebase simultaneously.
 **Actions:** Run `code-architect` and 2-3 `code-explorer` tasks in parallel.
 ```bash
 codeagent-wrapper --parallel <<'EOF'
 ---TASK---
 id: p1_requirements
 agent: code-architect
 workdir: .
 ---CONTENT---
 Analyze requirements completeness (score 1-10):
 1. Extract explicit requirements, constraints, acceptance criteria
 2. Identify blocking questions (issues that prevent implementation)
 3. Identify minor clarifications (nice-to-have but can proceed without)
 Output format:
 - Completeness score: X/10
 - Requirements: [list]
 - Non-goals: [list]
 - Blocking questions: [list, if any]
 ---TASK---
 id: p1_similar_features
 agent: code-explorer
 workdir: .
 ---CONTENT---
 Find 1-3 similar features, trace end-to-end. Return: key files with line numbers, call flow, extension points.
 ---TASK---
 id: p1_architecture
 agent: code-explorer
 workdir: .
 ---CONTENT---
 Map architecture for relevant subsystem. Return: module map + 5-10 key files.
 ---TASK---
 id: p1_conventions
 agent: code-explorer
 workdir: .
 ---CONTENT---
 Identify testing patterns, conventions, config. Return: test commands + file locations.
 EOF
 ```
-## 7-Phase Workflow
+### Phase 2: Clarify (Conditional)
-### Phase 1: Discovery
+**Goal:** Resolve blocking ambiguities only.
 **Goal:** Understand what to build.
 **Actions:**
-1. Use AskUserQuestion for: user-visible behavior, scope, constraints, acceptance criteria
+1. Review `p1_requirements` output for blocking questions
-2. Invoke `code-architect` to draft requirements checklist and clarifying questions
+2. **IF blocking questions exist** → Use AskUserQuestion
 3. **IF no blocking questions (completeness >= 8)** → Skip to Phase 3
 ### Phase 3: Design (No Interaction)
 **Goal:** Produce minimal-change implementation plan.
 ```bash
 codeagent-wrapper --agent code-architect - . <<'EOF'
-## Original User Request
+Design minimal-change implementation:
-/do <request>
+- Reuse existing abstractions
 - Minimize new files
 - Follow established patterns from Phase 1 exploration
-## Context Pack
+Output:
- Code-explorer output: None
+- File touch list with specific changes
- Code-architect output: None
+- Build sequence
-
+- Test plan
-## Current Task
+- Risks and mitigations
 Produce requirements checklist and identify missing information.
 Output: Requirements, Non-goals, Risks, Acceptance criteria, Questions (<= 10)
 ## Acceptance Criteria
 Concrete, testable checklist; specific questions; no implementation.
 EOF
 ```
-### Phase 2: Exploration
+### Phase 4: Implement + Review
-**Goal:** Map codebase patterns and extension points.
+**Goal:** Build feature and review in one phase.
-**Actions:** Run 2-3 `code-explorer` tasks in parallel (similar features, architecture, tests/conventions).
+1. Invoke `develop` to implement. For full-stack projects, split into backend/frontend tasks with per-task `skills:` injection. Use `--parallel` when tasks can be split; use single agent when the change is small or single-domain.
 **Single-domain example** (add `--worktree` if enabled):
 ```bash
-codeagent-wrapper --parallel <<'EOF'
+codeagent-wrapper --worktree --agent develop --skills golang-base-practices - . <<'EOF'
---TASK---
+Implement with minimal change set following the Phase 3 blueprint.
-id: p2_similar_features
+- Follow Phase 1 patterns
-agent: code-explorer
+- Add/adjust tests per Phase 3 plan
 workdir: .
 ---CONTENT---
 ## Original User Request
 /do <request>
 ## Context Pack
 - Code-architect output: <Phase 1 output>
 ## Current Task
 Find 1-3 similar features, trace end-to-end. Return: key files with line numbers, call flow, extension points.
 ## Acceptance Criteria
 Concrete file:line map + reuse points.
 ---TASK---
 id: p2_architecture
 agent: code-explorer
 workdir: .
 ---CONTENT---
 ## Original User Request
 /do <request>
 ## Context Pack
 - Code-architect output: <Phase 1 output>
 ## Current Task
 Map architecture for relevant subsystem. Return: module map + 5-10 key files.
 ## Acceptance Criteria
 Clear boundaries; file:line references.
 ---TASK---
 id: p2_conventions
 agent: code-explorer
 workdir: .
 ---CONTENT---
 ## Original User Request
 /do <request>
 ## Context Pack
 - Code-architect output: <Phase 1 output>
 ## Current Task
 Identify testing patterns, conventions, config. Return: test commands + file locations.
 ## Acceptance Criteria
 Test commands + relevant test file paths.
 EOF
 ```
 ### Phase 3: Clarification (MANDATORY)
 **Goal:** Resolve all ambiguities before design.
 **Actions:**
 1. Invoke `code-architect` to generate prioritized questions from Phase 1+2 outputs
 2. Use AskUserQuestion to present questions and wait for answers
 3. **Do not proceed until answered or defaults accepted**
 ### Phase 4: Architecture
 **Goal:** Produce implementation plan fitting existing patterns.
 **Actions:** Run 2 `code-architect` tasks in parallel (minimal-change vs pragmatic-clean).
 ```bash
 codeagent-wrapper --parallel <<'EOF'
 ---TASK---
 id: p4_minimal
 agent: code-architect
 workdir: .
 ---CONTENT---
 ## Original User Request
 /do <request>
 ## Context Pack
 - Code-explorer output: <ALL Phase 2 outputs>
 - Code-architect output: <Phase 1 + Phase 3 answers>
 ## Current Task
 Propose minimal-change architecture: reuse existing abstractions, minimize new files.
 Output: file touch list, risks, edge cases.
 ## Acceptance Criteria
 Concrete blueprint; minimal moving parts.
 ---TASK---
 id: p4_pragmatic
 agent: code-architect
 workdir: .
 ---CONTENT---
 ## Original User Request
 /do <request>
 ## Context Pack
 - Code-explorer output: <ALL Phase 2 outputs>
 - Code-architect output: <Phase 1 + Phase 3 answers>
 ## Current Task
 Propose pragmatic-clean architecture: introduce seams for testability.
 Output: file touch list, testing plan, risks.
 ## Acceptance Criteria
 Implementable blueprint with build sequence and tests.
 EOF
 ```
 Use AskUserQuestion to let user choose approach.
 ### Phase 5: Implementation (Approval Required)
 **Goal:** Build the feature.
 **Actions:**
 1. Use AskUserQuestion: "Approve starting implementation?" (Approve / Not yet)
 2. If approved, invoke `develop`:
 ```bash
 codeagent-wrapper --agent develop - . <<'EOF'
 ## Original User Request
 /do <request>
 ## Context Pack
 - Code-explorer output: <ALL Phase 2 outputs>
 - Code-architect output: <selected Phase 4 blueprint + Phase 3 answers>
 ## Current Task
 Implement with minimal change set following chosen architecture.
 - Follow Phase 2 patterns
 - Add/adjust tests per Phase 4 plan
 - Run narrowest relevant tests
 ## Acceptance Criteria
 Feature works end-to-end; tests pass; diff is minimal.
 EOF
 ```
-### Phase 6: Review
+**Full-stack parallel example** (adapt task IDs, skills, and content based on Phase 3 design):
-**Goal:** Catch defects and unnecessary complexity.
+```bash
 codeagent-wrapper --worktree --parallel <<'EOF'
 ---TASK---
 id: p4_backend
 agent: develop
 workdir: .
 skills: golang-base-practices
 ---CONTENT---
 Implement backend changes following Phase 3 blueprint.
 - Follow Phase 1 patterns
 - Add/adjust tests per Phase 3 plan
-**Actions:** Run 2-3 `code-reviewer` tasks in parallel (correctness, simplicity).
+---TASK---
 id: p4_frontend
 agent: develop
 workdir: .
 skills: frontend-design,vercel-react-best-practices
 dependencies: p4_backend
 ---CONTENT---
 Implement frontend changes following Phase 3 blueprint.
 - Follow Phase 1 patterns
 - Add/adjust tests per Phase 3 plan
 EOF
 ```
 Note: Choose which skills to inject based on Phase 3 design output. Only inject skills relevant to each task's domain.
 2. Run parallel reviews:
 ```bash
 codeagent-wrapper --parallel <<'EOF'
 ---TASK---
-id: p6_correctness
+id: p4_correctness
 agent: code-reviewer
 workdir: .
 ---CONTENT---
-## Original User Request
+Review for correctness, edge cases, failure modes.
-/do <request>
+Classify each issue as BLOCKING or MINOR.
 ## Context Pack
 - Code-architect output: <Phase 4 blueprint>
 - Develop output: <Phase 5 output>
 ## Current Task
 Review for correctness, edge cases, failure modes. Assume adversarial inputs.
 ## Acceptance Criteria
 Issues with file:line references and concrete fixes.
 ---TASK---
-id: p6_simplicity
+id: p4_simplicity
 agent: code-reviewer
 workdir: .
 ---CONTENT---
 ## Original User Request
 /do <request>
 ## Context Pack
 - Code-architect output: <Phase 4 blueprint>
 - Develop output: <Phase 5 output>
 ## Current Task
 Review for KISS: remove bloat, collapse needless abstractions.
-
+Classify each issue as BLOCKING or MINOR.
 ## Acceptance Criteria
 Actionable simplifications with justification.
 EOF
 ```
-Use AskUserQuestion: Fix now / Fix later / Proceed as-is.
+3. Handle review results:
   - **MINOR issues only** → Auto-fix via `develop`, no user interaction
   - **BLOCKING issues** → Use AskUserQuestion: "Fix now / Proceed as-is"
-### Phase 7: Summary
+### Phase 5: Complete (No Interaction)
 **Goal:** Document what was built.
 **Actions:** Invoke `code-reviewer` to produce summary:
 ```bash
 codeagent-wrapper --agent code-reviewer - . <<'EOF'
 ## Original User Request
 /do <request>
 ## Context Pack
 - Code-architect output: <Phase 4 blueprint>
 - Code-reviewer output: <Phase 6 outcomes>
 - Develop output: <Phase 5 output + fixes>
 ## Current Task
 Write completion summary:
 - What was built
 - Key decisions/tradeoffs
 - Files modified (paths)
 - How to verify (commands)
 - Follow-ups (optional)
 ## Acceptance Criteria
 Short, technical, actionable summary.
 EOF
 ```
 Output the completion signal:
 ```
 <promise>DO_COMPLETE</promise>
 ```
--- a/skills/do/hooks/hooks.json
+++ b/skills/do/hooks/hooks.json
@@ -1,12 +1,23 @@
 {
-  "description": "do loop hook for 7-phase workflow",
+  "description": "do loop hooks for 5-phase workflow",
  "hooks": {
    "Stop": [
      {
        "hooks": [
          {
            "type": "command",
-            "command": "${CLAUDE_PLUGIN_ROOT}/hooks/stop-hook.sh"
+            "command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/stop-hook.py"
          }
        ]
      }
    ],
    "SubagentStop": [
      {
        "matcher": "code-reviewer",
        "hooks": [
          {
            "type": "command",
            "command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/verify-loop.py"
          }
        ]
      }
--- a/skills/do/hooks/stop-hook.py
+++ b/skills/do/hooks/stop-hook.py
@@ -0,0 +1,107 @@
 #!/usr/bin/env python3
 """
 Stop hook for do skill workflow.
 Checks if the do loop is complete before allowing exit.
 Uses the new task directory structure under .claude/do-tasks/.
 """
 import glob
 import json
 import os
 import sys
 DIR_TASKS = ".claude/do-tasks"
 FILE_CURRENT_TASK = ".current-task"
 FILE_TASK_JSON = "task.json"
 PHASE_NAMES = {
    1: "Understand",
    2: "Clarify",
    3: "Design",
    4: "Implement",
    5: "Complete",
 }
 def phase_name_for(n: int) -> str:
    return PHASE_NAMES.get(n, f"Phase {n}")
 def get_current_task(project_dir: str) -> str | None:
    """Read current task directory path."""
    current_task_file = os.path.join(project_dir, DIR_TASKS, FILE_CURRENT_TASK)
    if not os.path.exists(current_task_file):
        return None
    try:
        with open(current_task_file, "r", encoding="utf-8") as f:
            content = f.read().strip()
            return content if content else None
    except Exception:
        return None
 def get_task_info(project_dir: str, task_dir: str) -> dict | None:
    """Read task.json data."""
    task_json_path = os.path.join(project_dir, task_dir, FILE_TASK_JSON)
    if not os.path.exists(task_json_path):
        return None
    try:
        with open(task_json_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return None
 def check_task_complete(project_dir: str, task_dir: str) -> str:
    """Check if task is complete. Returns blocking reason or empty string."""
    task_info = get_task_info(project_dir, task_dir)
    if not task_info:
        return ""
    status = task_info.get("status", "")
    if status == "completed":
        return ""
    current_phase = task_info.get("current_phase", 1)
    max_phases = task_info.get("max_phases", 5)
    phase_name = task_info.get("phase_name", phase_name_for(current_phase))
    completion_promise = task_info.get("completion_promise", "<promise>DO_COMPLETE</promise>")
    if current_phase >= max_phases:
        # Task is at final phase, allow exit
        return ""
    return (
        f"do loop incomplete: current phase {current_phase}/{max_phases} ({phase_name}). "
        f"Continue with remaining phases; use 'task.py update-phase <N>' after each phase. "
        f"Include completion_promise in final output when done: {completion_promise}. "
        f"To exit early, set status to 'completed' in task.json."
    )
 def main():
    project_dir = os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())
    task_dir = get_current_task(project_dir)
    if not task_dir:
        # No active task, allow exit
        sys.exit(0)
    stdin_payload = ""
    if not sys.stdin.isatty():
        try:
            stdin_payload = sys.stdin.read()
        except Exception:
            pass
    reason = check_task_complete(project_dir, task_dir)
    if not reason:
        sys.exit(0)
    print(json.dumps({"decision": "block", "reason": reason}))
    sys.exit(0)
 if __name__ == "__main__":
    main()
--- a/skills/do/hooks/stop-hook.sh
+++ b/skills/do/hooks/stop-hook.sh
@@ -1,151 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 phase_name_for() {
 	case "${1:-}" in
 	1) echo "Discovery" ;;
 	2) echo "Exploration" ;;
 	3) echo "Clarification" ;;
 	4) echo "Architecture" ;;
 	5) echo "Implementation" ;;
 	6) echo "Review" ;;
 	7) echo "Summary" ;;
 	*) echo "Phase ${1:-unknown}" ;;
 	esac
 }
 json_escape() {
 	local s="${1:-}"
 	s=${s//\\/\\\\}
 	s=${s//\"/\\\"}
 	s=${s//$'\n'/\\n}
 	s=${s//$'\r'/\\r}
 	s=${s//$'\t'/\\t}
 	printf "%s" "$s"
 }
 project_dir="${CLAUDE_PROJECT_DIR:-$PWD}"
 state_dir="${project_dir}/.claude"
 shopt -s nullglob
 state_files=("${state_dir}"/do.*.local.md)
 shopt -u nullglob
 if [ ${#state_files[@]} -eq 0 ]; then
 	exit 0
 fi
 stdin_payload=""
 if [ ! -t 0 ]; then
 	stdin_payload="$(cat || true)"
 fi
 frontmatter_get() {
 	local file="$1" key="$2"
 	awk -v k="$key" '
 		BEGIN { in_fm=0 }
 		NR==1 && $0=="---" { in_fm=1; next }
 		in_fm==1 && $0=="---" { exit }
 		in_fm==1 {
 			if ($0 ~ "^"k":[[:space:]]*") {
 				sub("^"k":[[:space:]]*", "", $0)
 				gsub(/^[[:space:]]+|[[:space:]]+$/, "", $0)
 				if ($0 ~ /^".*"$/) { sub(/^"/, "", $0); sub(/"$/, "", $0) }
 				print $0
 				exit
 			}
 		}
 	' "$file"
 }
 check_state_file() {
 	local state_file="$1"
 	local active_raw active_lc
 	active_raw="$(frontmatter_get "$state_file" active || true)"
 	active_lc="$(printf "%s" "$active_raw" | tr '[:upper:]' '[:lower:]')"
 	case "$active_lc" in
 	true|1|yes|on) ;;
 	*) return 0 ;;
 	esac
 	local current_phase_raw max_phases_raw phase_name completion_promise
 	current_phase_raw="$(frontmatter_get "$state_file" current_phase || true)"
 	max_phases_raw="$(frontmatter_get "$state_file" max_phases || true)"
 	phase_name="$(frontmatter_get "$state_file" phase_name || true)"
 	completion_promise="$(frontmatter_get "$state_file" completion_promise || true)"
 	local current_phase=1
 	if [[ "${current_phase_raw:-}" =~ ^[0-9]+$ ]]; then
 		current_phase="$current_phase_raw"
 	fi
 	local max_phases=7
 	if [[ "${max_phases_raw:-}" =~ ^[0-9]+$ ]]; then
 		max_phases="$max_phases_raw"
 	fi
 	if [ -z "${phase_name:-}" ]; then
 		phase_name="$(phase_name_for "$current_phase")"
 	fi
 	if [ -z "${completion_promise:-}" ]; then
 		completion_promise="<promise>DO_COMPLETE</promise>"
 	fi
 	local phases_done=0
 	if [ "$current_phase" -ge "$max_phases" ]; then
 		phases_done=1
 	fi
 	local promise_met=0
 	if [ -n "$completion_promise" ]; then
 		if [ -n "$stdin_payload" ] && printf "%s" "$stdin_payload" | grep -Fq -- "$completion_promise"; then
 			promise_met=1
 		else
 			local body
 			body="$(
 				awk '
 					BEGIN { in_fm=0; body=0 }
 					NR==1 && $0=="---" { in_fm=1; next }
 					in_fm==1 && $0=="---" { body=1; in_fm=0; next }
 					body==1 { print }
 				' "$state_file"
 			)"
 			if [ -n "$body" ] && printf "%s" "$body" | grep -Fq -- "$completion_promise"; then
 				promise_met=1
 			fi
 		fi
 	fi
 	if [ "$phases_done" -eq 1 ] && [ "$promise_met" -eq 1 ]; then
 		rm -f "$state_file"
 		return 0
 	fi
 	local reason
 	if [ "$phases_done" -eq 0 ]; then
 		reason="do loop incomplete: current phase ${current_phase}/${max_phases} (${phase_name}). Continue with remaining phases; update ${state_file} current_phase/phase_name after each phase. Include completion_promise in final output when done: ${completion_promise}. To exit early, set active to false."
 	else
 		reason="do reached final phase (current_phase=${current_phase} / max_phases=${max_phases}, phase_name=${phase_name}), but completion_promise not detected: ${completion_promise}. Please include this marker in your final output (or write it to ${state_file} body), then finish; to force exit, set active to false."
 	fi
 	printf "%s" "$reason"
 }
 blocking_reasons=()
 for state_file in "${state_files[@]}"; do
 	reason="$(check_state_file "$state_file")"
 	if [ -n "$reason" ]; then
 		blocking_reasons+=("$reason")
 	fi
 done
 if [ ${#blocking_reasons[@]} -eq 0 ]; then
 	exit 0
 fi
 combined_reason="${blocking_reasons[*]}"
 printf '{"decision":"block","reason":"%s"}\n' "$(json_escape "$combined_reason")"
 exit 0
--- a/skills/do/hooks/verify-loop.py
+++ b/skills/do/hooks/verify-loop.py
@@ -0,0 +1,218 @@
 #!/usr/bin/env python3
 """
 Verify Loop Hook for do skill workflow.
 SubagentStop hook that intercepts when code-reviewer agent tries to stop.
 Runs verification commands to ensure code quality before allowing exit.
 Mechanism:
 - Intercepts SubagentStop event for code-reviewer agent
 - Runs verify commands from task.json if configured
 - Blocks stopping until verification passes
 - Has max iterations as safety limit (MAX_ITERATIONS=5)
 State file: .claude/do-tasks/.verify-state.json
 """
 import json
 import os
 import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
 # Configuration
 MAX_ITERATIONS = 5
 STATE_TIMEOUT_MINUTES = 30
 DIR_TASKS = ".claude/do-tasks"
 FILE_CURRENT_TASK = ".current-task"
 FILE_TASK_JSON = "task.json"
 STATE_FILE = ".claude/do-tasks/.verify-state.json"
 # Only control loop for code-reviewer agent
 TARGET_AGENTS = {"code-reviewer"}
 def get_project_root(cwd: str) -> str | None:
    """Find project root (directory with .claude folder)."""
    current = Path(cwd).resolve()
    while current != current.parent:
        if (current / ".claude").exists():
            return str(current)
        current = current.parent
    return None
 def get_current_task(project_root: str) -> str | None:
    """Read current task directory path."""
    current_task_file = os.path.join(project_root, DIR_TASKS, FILE_CURRENT_TASK)
    if not os.path.exists(current_task_file):
        return None
    try:
        with open(current_task_file, "r", encoding="utf-8") as f:
            content = f.read().strip()
            return content if content else None
    except Exception:
        return None
 def get_task_info(project_root: str, task_dir: str) -> dict | None:
    """Read task.json data."""
    task_json_path = os.path.join(project_root, task_dir, FILE_TASK_JSON)
    if not os.path.exists(task_json_path):
        return None
    try:
        with open(task_json_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return None
 def get_verify_commands(task_info: dict) -> list[str]:
    """Get verify commands from task.json."""
    return task_info.get("verify_commands", [])
 def run_verify_commands(project_root: str, commands: list[str]) -> tuple[bool, str]:
    """Run verify commands and return (success, message)."""
    for cmd in commands:
        try:
            result = subprocess.run(
                cmd,
                shell=True,
                cwd=project_root,
                capture_output=True,
                timeout=120,
            )
            if result.returncode != 0:
                stderr = result.stderr.decode("utf-8", errors="replace")
                stdout = result.stdout.decode("utf-8", errors="replace")
                error_output = stderr or stdout
                if len(error_output) > 500:
                    error_output = error_output[:500] + "..."
                return False, f"Command failed: {cmd}\n{error_output}"
        except subprocess.TimeoutExpired:
            return False, f"Command timed out: {cmd}"
        except Exception as e:
            return False, f"Command error: {cmd} - {str(e)}"
    return True, "All verify commands passed"
 def load_state(project_root: str) -> dict:
    """Load verify loop state."""
    state_path = os.path.join(project_root, STATE_FILE)
    if not os.path.exists(state_path):
        return {"task": None, "iteration": 0, "started_at": None}
    try:
        with open(state_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return {"task": None, "iteration": 0, "started_at": None}
 def save_state(project_root: str, state: dict) -> None:
    """Save verify loop state."""
    state_path = os.path.join(project_root, STATE_FILE)
    try:
        os.makedirs(os.path.dirname(state_path), exist_ok=True)
        with open(state_path, "w", encoding="utf-8") as f:
            json.dump(state, f, indent=2, ensure_ascii=False)
    except Exception:
        pass
 def main():
    try:
        input_data = json.load(sys.stdin)
    except json.JSONDecodeError:
        sys.exit(0)
    hook_event = input_data.get("hook_event_name", "")
    if hook_event != "SubagentStop":
        sys.exit(0)
    subagent_type = input_data.get("subagent_type", "")
    agent_output = input_data.get("agent_output", "")
    cwd = input_data.get("cwd", os.getcwd())
    if subagent_type not in TARGET_AGENTS:
        sys.exit(0)
    project_root = get_project_root(cwd)
    if not project_root:
        sys.exit(0)
    task_dir = get_current_task(project_root)
    if not task_dir:
        sys.exit(0)
    task_info = get_task_info(project_root, task_dir)
    if not task_info:
        sys.exit(0)
    verify_commands = get_verify_commands(task_info)
    if not verify_commands:
        # No verify commands configured, allow exit
        sys.exit(0)
    # Load state
    state = load_state(project_root)
    # Reset state if task changed or too old
    should_reset = False
    if state.get("task") != task_dir:
        should_reset = True
    elif state.get("started_at"):
        try:
            started = datetime.fromisoformat(state["started_at"])
            if (datetime.now() - started).total_seconds() > STATE_TIMEOUT_MINUTES * 60:
                should_reset = True
        except (ValueError, TypeError):
            should_reset = True
    if should_reset:
        state = {
            "task": task_dir,
            "iteration": 0,
            "started_at": datetime.now().isoformat(),
        }
    # Increment iteration
    state["iteration"] = state.get("iteration", 0) + 1
    current_iteration = state["iteration"]
    save_state(project_root, state)
    # Safety check: max iterations
    if current_iteration >= MAX_ITERATIONS:
        state["iteration"] = 0
        save_state(project_root, state)
        output = {
            "decision": "allow",
            "reason": f"Max iterations ({MAX_ITERATIONS}) reached. Stopping to prevent infinite loop.",
        }
        print(json.dumps(output, ensure_ascii=False))
        sys.exit(0)
    # Run verify commands
    passed, message = run_verify_commands(project_root, verify_commands)
    if passed:
        state["iteration"] = 0
        save_state(project_root, state)
        output = {
            "decision": "allow",
            "reason": "All verify commands passed. Review phase complete.",
        }
        print(json.dumps(output, ensure_ascii=False))
        sys.exit(0)
    else:
        output = {
            "decision": "block",
            "reason": f"Iteration {current_iteration}/{MAX_ITERATIONS}. Verification failed:\n{message}\n\nPlease fix the issues and try again.",
        }
        print(json.dumps(output, ensure_ascii=False))
        sys.exit(0)
 if __name__ == "__main__":
    main()
--- a/skills/do/install.py
+++ b/skills/do/install.py
@@ -0,0 +1,164 @@
 #!/usr/bin/env python3
 """Install/uninstall do skill to ~/.claude/skills/do"""
 import argparse
 import json
 import os
 import shutil
 import sys
 from pathlib import Path
 SKILL_NAME = "do"
 HOOK_PATH = "~/.claude/skills/do/hooks/stop-hook.py"
 MODELS_JSON_TEMPLATE = {
    "agents": {
        "code-explorer": {
            "backend": "claude",
            "model": "claude-sonnet-4-5-20250929"
        },
        "code-architect": {
            "backend": "claude",
            "model": "claude-sonnet-4-5-20250929"
        },
        "code-reviewer": {
            "backend": "claude",
            "model": "claude-sonnet-4-5-20250929"
        }
    }
 }
 def get_settings_path() -> Path:
    return Path.home() / ".claude" / "settings.json"
 def load_settings() -> dict:
    path = get_settings_path()
    if path.exists():
        with open(path, "r", encoding="utf-8") as f:
            return json.load(f)
    return {}
 def save_settings(settings: dict):
    path = get_settings_path()
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(settings, f, indent=2)
 def add_hook(settings: dict) -> dict:
    hook_command = str(Path(HOOK_PATH).expanduser())
    hook_entry = {
        "type": "command",
        "command": f"python3 {hook_command}"
    }
    if "hooks" not in settings:
        settings["hooks"] = {}
    if "Stop" not in settings["hooks"]:
        settings["hooks"]["Stop"] = []
    stop_hooks = settings["hooks"]["Stop"]
    for item in stop_hooks:
        if "hooks" in item:
            for h in item["hooks"]:
                if "stop-hook" in h.get("command", "") and "do" in h.get("command", ""):
                    h["command"] = f"python3 {hook_command}"
                    return settings
    stop_hooks.append({"hooks": [hook_entry]})
    return settings
 def remove_hook(settings: dict) -> dict:
    if "hooks" not in settings or "Stop" not in settings["hooks"]:
        return settings
    stop_hooks = settings["hooks"]["Stop"]
    new_stop_hooks = []
    for item in stop_hooks:
        if "hooks" in item:
            filtered = [h for h in item["hooks"]
                       if "stop-hook" not in h.get("command", "")
                       or "do" not in h.get("command", "")]
            if filtered:
                item["hooks"] = filtered
                new_stop_hooks.append(item)
        else:
            new_stop_hooks.append(item)
    settings["hooks"]["Stop"] = new_stop_hooks
    if not settings["hooks"]["Stop"]:
        del settings["hooks"]["Stop"]
    if not settings["hooks"]:
        del settings["hooks"]
    return settings
 def install_models_json():
    """Install ~/.codeagent/models.json if not exists"""
    path = Path.home() / ".codeagent" / "models.json"
    if path.exists():
        print(f"⚠ {path} already exists, skipping")
        return
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(MODELS_JSON_TEMPLATE, f, indent=2)
    print(f"✓ Created {path}")
 def install():
    src = Path(__file__).parent.resolve()
    dest = Path.home() / ".claude" / "skills" / SKILL_NAME
    dest.mkdir(parents=True, exist_ok=True)
    exclude = {".git", "__pycache__", ".DS_Store", "install.py"}
    for item in src.iterdir():
        if item.name in exclude:
            continue
        target = dest / item.name
        if target.exists():
            if target.is_dir():
                shutil.rmtree(target)
            else:
                target.unlink()
        if item.is_dir():
            shutil.copytree(item, target)
        else:
            shutil.copy2(item, target)
    settings = load_settings()
    settings = add_hook(settings)
    save_settings(settings)
    install_models_json()
    print(f"✓ Installed to {dest}")
    print(f"✓ Hook added to settings.json")
 def uninstall():
    dest = Path.home() / ".claude" / "skills" / SKILL_NAME
    settings = load_settings()
    settings = remove_hook(settings)
    save_settings(settings)
    print(f"✓ Hook removed from settings.json")
    if dest.exists():
        shutil.rmtree(dest)
        print(f"✓ Removed {dest}")
    else:
        print(f"⚠ {dest} not found")
 def main():
    parser = argparse.ArgumentParser(description="Install/uninstall do skill")
    parser.add_argument("--uninstall", "-u", action="store_true", help="Uninstall the skill")
    args = parser.parse_args()
    if args.uninstall:
        uninstall()
    else:
        install()
 if __name__ == "__main__":
    main()
--- a/skills/do/scripts/get-context.py
+++ b/skills/do/scripts/get-context.py
@@ -0,0 +1,149 @@
 #!/usr/bin/env python3
 """
 Get context for current task.
 Reads the current task's jsonl files and returns context for specified agent.
 Used by inject-context hook to build agent prompts.
 """
 import json
 import os
 import sys
 from pathlib import Path
 DIR_TASKS = ".claude/do-tasks"
 FILE_CURRENT_TASK = ".current-task"
 FILE_TASK_JSON = "task.json"
 def get_project_root() -> str:
    return os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())
 def get_current_task(project_root: str) -> str | None:
    current_task_file = os.path.join(project_root, DIR_TASKS, FILE_CURRENT_TASK)
    if not os.path.exists(current_task_file):
        return None
    try:
        with open(current_task_file, "r", encoding="utf-8") as f:
            content = f.read().strip()
            return content if content else None
    except Exception:
        return None
 def read_file_content(base_path: str, file_path: str) -> str | None:
    full_path = os.path.join(base_path, file_path)
    if os.path.exists(full_path) and os.path.isfile(full_path):
        try:
            with open(full_path, "r", encoding="utf-8") as f:
                return f.read()
        except Exception:
            return None
    return None
 def read_jsonl_entries(base_path: str, jsonl_path: str) -> list[tuple[str, str]]:
    full_path = os.path.join(base_path, jsonl_path)
    if not os.path.exists(full_path):
        return []
    results = []
    try:
        with open(full_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    item = json.loads(line)
                    file_path = item.get("file") or item.get("path")
                    if not file_path:
                        continue
                    content = read_file_content(base_path, file_path)
                    if content:
                        results.append((file_path, content))
                except json.JSONDecodeError:
                    continue
    except Exception:
        pass
    return results
 def get_agent_context(project_root: str, task_dir: str, agent_type: str) -> str:
    """Get complete context for specified agent."""
    context_parts = []
    # Read agent-specific jsonl
    agent_jsonl = os.path.join(task_dir, f"{agent_type}.jsonl")
    agent_entries = read_jsonl_entries(project_root, agent_jsonl)
    for file_path, content in agent_entries:
        context_parts.append(f"=== {file_path} ===\n{content}")
    # Read prd.md
    prd_content = read_file_content(project_root, os.path.join(task_dir, "prd.md"))
    if prd_content:
        context_parts.append(f"=== {task_dir}/prd.md (Requirements) ===\n{prd_content}")
    return "\n\n".join(context_parts)
 def get_task_info(project_root: str, task_dir: str) -> dict | None:
    """Get task.json data."""
    task_json_path = os.path.join(project_root, task_dir, FILE_TASK_JSON)
    if not os.path.exists(task_json_path):
        return None
    try:
        with open(task_json_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return None
 def main():
    import argparse
    parser = argparse.ArgumentParser(description="Get context for current task")
    parser.add_argument("agent", nargs="?", choices=["implement", "check", "debug"],
                        help="Agent type (optional, returns task info if not specified)")
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    args = parser.parse_args()
    project_root = get_project_root()
    task_dir = get_current_task(project_root)
    if not task_dir:
        if args.json:
            print(json.dumps({"error": "No active task"}))
        else:
            print("No active task.", file=sys.stderr)
        sys.exit(1)
    task_info = get_task_info(project_root, task_dir)
    if not args.agent:
        if args.json:
            print(json.dumps({"task_dir": task_dir, "task_info": task_info}))
        else:
            print(f"Task: {task_dir}")
            if task_info:
                print(f"Title: {task_info.get('title', 'N/A')}")
                print(f"Phase: {task_info.get('current_phase', '?')}/{task_info.get('max_phases', 5)}")
        sys.exit(0)
    context = get_agent_context(project_root, task_dir, args.agent)
    if args.json:
        print(json.dumps({
            "task_dir": task_dir,
            "agent": args.agent,
            "context": context,
            "task_info": task_info,
        }))
    else:
        print(context)
 if __name__ == "__main__":
    main()
--- a/skills/do/scripts/setup-do.py
+++ b/skills/do/scripts/setup-do.py
@@ -0,0 +1,58 @@
 #!/usr/bin/env python3
 """
 Initialize do skill workflow - wrapper around task.py.
 Creates a task directory under .claude/do-tasks/ with:
 - task.md: Task metadata (YAML frontmatter) + requirements (Markdown body)
 If --worktree is specified, also creates a git worktree for isolated development.
 """
 import argparse
 import sys
 from task import create_task, PHASE_NAMES
 def die(msg: str):
    print(f"Error: {msg}", file=sys.stderr)
    sys.exit(1)
 def main():
    parser = argparse.ArgumentParser(
        description="Initialize do skill workflow with task directory"
    )
    parser.add_argument("--max-phases", type=int, default=5, help="Default: 5")
    parser.add_argument(
        "--completion-promise",
        default="<promise>DO_COMPLETE</promise>",
        help="Default: <promise>DO_COMPLETE</promise>",
    )
    parser.add_argument("--worktree", action="store_true", help="Enable worktree mode")
    parser.add_argument("prompt", nargs="+", help="Task description")
    args = parser.parse_args()
    if args.max_phases < 1:
        die("--max-phases must be a positive integer")
    prompt = " ".join(args.prompt)
    result = create_task(title=prompt, use_worktree=args.worktree)
    task_data = result["task_data"]
    worktree_dir = result.get("worktree_dir", "")
    print(f"Initialized: {result['relative_path']}")
    print(f"task_id: {task_data['id']}")
    print(f"phase: 1/{task_data['max_phases']} ({PHASE_NAMES[1]})")
    print(f"completion_promise: {task_data['completion_promise']}")
    print(f"use_worktree: {task_data['use_worktree']}")
    print(f"export DO_TASK_DIR={result['relative_path']}")
    if worktree_dir:
        print(f"worktree_dir: {worktree_dir}")
        print(f"export DO_WORKTREE_DIR={worktree_dir}")
 if __name__ == "__main__":
    main()
--- a/skills/do/scripts/setup-do.sh
+++ b/skills/do/scripts/setup-do.sh
@@ -1,114 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 usage() {
 	cat <<'EOF'
 Usage: setup-do.sh [options] PROMPT...
 Creates (or overwrites) project state file:
  .claude/do.local.md
 Options:
  --max-phases N            Default: 7
  --completion-promise STR  Default: <promise>DO_COMPLETE</promise>
  -h, --help                Show this help
 EOF
 }
 die() {
 	echo "❌ $*" >&2
 	exit 1
 }
 phase_name_for() {
 	case "${1:-}" in
 	1) echo "Discovery" ;;
 	2) echo "Exploration" ;;
 	3) echo "Clarification" ;;
 	4) echo "Architecture" ;;
 	5) echo "Implementation" ;;
 	6) echo "Review" ;;
 	7) echo "Summary" ;;
 	*) echo "Phase ${1:-unknown}" ;;
 	esac
 }
 max_phases=7
 completion_promise="<promise>DO_COMPLETE</promise>"
 declare -a prompt_parts=()
 while [ $# -gt 0 ]; do
 	case "$1" in
 	-h|--help)
 		usage
 		exit 0
 		;;
 	--max-phases)
 		[ $# -ge 2 ] || die "--max-phases requires a value"
 		max_phases="$2"
 		shift 2
 		;;
 	--completion-promise)
 		[ $# -ge 2 ] || die "--completion-promise requires a value"
 		completion_promise="$2"
 		shift 2
 		;;
 	--)
 		shift
 		while [ $# -gt 0 ]; do
 			prompt_parts+=("$1")
 			shift
 		done
 		break
 		;;
 	-*)
 		die "Unknown argument: $1 (use --help)"
 		;;
 	*)
 		prompt_parts+=("$1")
 		shift
 		;;
 	esac
 done
 prompt="${prompt_parts[*]:-}"
 [ -n "$prompt" ] || die "PROMPT is required (use --help)"
 if ! [[ "$max_phases" =~ ^[0-9]+$ ]] || [ "$max_phases" -lt 1 ]; then
 	die "--max-phases must be a positive integer"
 fi
 project_dir="${CLAUDE_PROJECT_DIR:-$PWD}"
 state_dir="${project_dir}/.claude"
 task_id="$(date +%s)-$$-$(head -c 4 /dev/urandom | od -An -tx1 | tr -d ' \n')"
 state_file="${state_dir}/do.${task_id}.local.md"
 mkdir -p "$state_dir"
 phase_name="$(phase_name_for 1)"
 cat > "$state_file" << EOF
 ---
 active: true
 current_phase: 1
 phase_name: "$phase_name"
 max_phases: $max_phases
 completion_promise: "$completion_promise"
 ---
 # do loop state
 ## Prompt
 $prompt
 ## Notes
 - Update frontmatter current_phase/phase_name as you progress
 - When complete, include the frontmatter completion_promise in your final output
 EOF
 echo "Initialized: $state_file"
 echo "task_id: $task_id"
 echo "phase: 1/$max_phases ($phase_name)"
 echo "completion_promise: $completion_promise"
--- a/skills/do/scripts/task.py
+++ b/skills/do/scripts/task.py
@@ -0,0 +1,434 @@
 #!/usr/bin/env python3
 """
 Task Directory Management CLI for do skill workflow.
 Commands:
  create <title>              - Create a new task directory with task.md
  start <task-dir>            - Set current task pointer
  finish                      - Clear current task pointer
  list                        - List active tasks
  status                      - Show current task status
  update-phase <N>            - Update current phase
 """
 import argparse
 import os
 import random
 import re
 import string
 import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
 # Directory constants
 DIR_TASKS = ".claude/do-tasks"
 FILE_CURRENT_TASK = ".current-task"
 FILE_TASK_MD = "task.md"
 PHASE_NAMES = {
    1: "Understand",
    2: "Clarify",
    3: "Design",
    4: "Implement",
    5: "Complete",
 }
 def get_project_root() -> str:
    """Get project root from env or cwd."""
    return os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())
 def get_tasks_dir(project_root: str) -> str:
    """Get tasks directory path."""
    return os.path.join(project_root, DIR_TASKS)
 def get_current_task_file(project_root: str) -> str:
    """Get current task pointer file path."""
    return os.path.join(project_root, DIR_TASKS, FILE_CURRENT_TASK)
 def generate_task_id() -> str:
    """Generate short task ID: MMDD-XXXX format."""
    date_part = datetime.now().strftime("%m%d")
    random_part = ''.join(random.choices(string.ascii_lowercase + string.digits, k=4))
    return f"{date_part}-{random_part}"
 def read_task_md(task_md_path: str) -> dict | None:
    """Read task.md and parse YAML frontmatter + body."""
    if not os.path.exists(task_md_path):
        return None
    try:
        with open(task_md_path, "r", encoding="utf-8") as f:
            content = f.read()
    except Exception:
        return None
    # Parse YAML frontmatter
    match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)
    if not match:
        return None
    frontmatter_str = match.group(1)
    body = match.group(2)
    # Simple YAML parsing (no external deps)
    frontmatter = {}
    for line in frontmatter_str.split('\n'):
        if ':' in line:
            key, value = line.split(':', 1)
            key = key.strip()
            value = value.strip()
            # Handle quoted strings
            if value.startswith('"') and value.endswith('"'):
                value = value[1:-1]
            elif value == 'true':
                value = True
            elif value == 'false':
                value = False
            elif value.isdigit():
                value = int(value)
            frontmatter[key] = value
    return {"frontmatter": frontmatter, "body": body}
 def write_task_md(task_md_path: str, frontmatter: dict, body: str) -> bool:
    """Write task.md with YAML frontmatter + body."""
    try:
        lines = ["---"]
        for key, value in frontmatter.items():
            if isinstance(value, bool):
                lines.append(f"{key}: {str(value).lower()}")
            elif isinstance(value, int):
                lines.append(f"{key}: {value}")
            elif isinstance(value, str) and ('<' in value or '>' in value or ':' in value):
                lines.append(f'{key}: "{value}"')
            else:
                lines.append(f'{key}: "{value}"' if isinstance(value, str) else f"{key}: {value}")
        lines.append("---")
        lines.append("")
        lines.append(body)
        with open(task_md_path, "w", encoding="utf-8") as f:
            f.write('\n'.join(lines))
        return True
    except Exception:
        return False
 def create_worktree(project_root: str, task_id: str) -> str:
    """Create a git worktree for the task. Returns the worktree directory path."""
    # Get git root
    result = subprocess.run(
        ["git", "-C", project_root, "rev-parse", "--show-toplevel"],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        raise RuntimeError(f"Not a git repository: {project_root}")
    git_root = result.stdout.strip()
    # Calculate paths
    worktree_dir = os.path.join(git_root, ".worktrees", f"do-{task_id}")
    branch_name = f"do/{task_id}"
    # Create worktree with new branch
    result = subprocess.run(
        ["git", "-C", git_root, "worktree", "add", "-b", branch_name, worktree_dir],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        raise RuntimeError(f"Failed to create worktree: {result.stderr}")
    return worktree_dir
 def create_task(title: str, use_worktree: bool = False) -> dict:
    """Create a new task directory with task.md."""
    project_root = get_project_root()
    tasks_dir = get_tasks_dir(project_root)
    os.makedirs(tasks_dir, exist_ok=True)
    task_id = generate_task_id()
    task_dir = os.path.join(tasks_dir, task_id)
    os.makedirs(task_dir, exist_ok=True)
    # Create worktree if requested
    worktree_dir = ""
    if use_worktree:
        try:
            worktree_dir = create_worktree(project_root, task_id)
        except RuntimeError as e:
            print(f"Warning: {e}", file=sys.stderr)
            use_worktree = False
    frontmatter = {
        "id": task_id,
        "title": title,
        "status": "in_progress",
        "current_phase": 1,
        "phase_name": PHASE_NAMES[1],
        "max_phases": 5,
        "use_worktree": use_worktree,
        "worktree_dir": worktree_dir,
        "created_at": datetime.now().isoformat(),
        "completion_promise": "<promise>DO_COMPLETE</promise>",
    }
    body = f"""# Requirements
 {title}
 ## Context
 ## Progress
 """
    task_md_path = os.path.join(task_dir, FILE_TASK_MD)
    write_task_md(task_md_path, frontmatter, body)
    current_task_file = get_current_task_file(project_root)
    relative_task_dir = os.path.relpath(task_dir, project_root)
    with open(current_task_file, "w", encoding="utf-8") as f:
        f.write(relative_task_dir)
    return {
        "task_dir": task_dir,
        "relative_path": relative_task_dir,
        "task_data": frontmatter,
        "worktree_dir": worktree_dir,
    }
 def get_current_task(project_root: str) -> str | None:
    """Read current task directory path."""
    current_task_file = get_current_task_file(project_root)
    if not os.path.exists(current_task_file):
        return None
    try:
        with open(current_task_file, "r", encoding="utf-8") as f:
            content = f.read().strip()
            return content if content else None
    except Exception:
        return None
 def start_task(task_dir: str) -> bool:
    """Set current task pointer."""
    project_root = get_project_root()
    tasks_dir = get_tasks_dir(project_root)
    if os.path.isabs(task_dir):
        full_path = task_dir
        relative_path = os.path.relpath(task_dir, project_root)
    else:
        if not task_dir.startswith(DIR_TASKS):
            full_path = os.path.join(tasks_dir, task_dir)
            relative_path = os.path.join(DIR_TASKS, task_dir)
        else:
            full_path = os.path.join(project_root, task_dir)
            relative_path = task_dir
    if not os.path.exists(full_path):
        print(f"Error: Task directory not found: {full_path}", file=sys.stderr)
        return False
    current_task_file = get_current_task_file(project_root)
    os.makedirs(os.path.dirname(current_task_file), exist_ok=True)
    with open(current_task_file, "w", encoding="utf-8") as f:
        f.write(relative_path)
    return True
 def finish_task() -> bool:
    """Clear current task pointer."""
    project_root = get_project_root()
    current_task_file = get_current_task_file(project_root)
    if os.path.exists(current_task_file):
        os.remove(current_task_file)
    return True
 def list_tasks() -> list[dict]:
    """List all task directories."""
    project_root = get_project_root()
    tasks_dir = get_tasks_dir(project_root)
    if not os.path.exists(tasks_dir):
        return []
    tasks = []
    current_task = get_current_task(project_root)
    for entry in sorted(os.listdir(tasks_dir), reverse=True):
        entry_path = os.path.join(tasks_dir, entry)
        if not os.path.isdir(entry_path):
            continue
        task_md_path = os.path.join(entry_path, FILE_TASK_MD)
        if not os.path.exists(task_md_path):
            continue
        parsed = read_task_md(task_md_path)
        if parsed:
            task_data = parsed["frontmatter"]
        else:
            task_data = {"id": entry, "title": entry, "status": "unknown"}
        relative_path = os.path.join(DIR_TASKS, entry)
        task_data["path"] = relative_path
        task_data["is_current"] = current_task == relative_path
        tasks.append(task_data)
    return tasks
 def get_status() -> dict | None:
    """Get current task status."""
    project_root = get_project_root()
    current_task = get_current_task(project_root)
    if not current_task:
        return None
    task_dir = os.path.join(project_root, current_task)
    task_md_path = os.path.join(task_dir, FILE_TASK_MD)
    parsed = read_task_md(task_md_path)
    if not parsed:
        return None
    task_data = parsed["frontmatter"]
    task_data["path"] = current_task
    return task_data
 def update_phase(phase: int) -> bool:
    """Update current task phase."""
    project_root = get_project_root()
    current_task = get_current_task(project_root)
    if not current_task:
        print("Error: No active task.", file=sys.stderr)
        return False
    task_dir = os.path.join(project_root, current_task)
    task_md_path = os.path.join(task_dir, FILE_TASK_MD)
    parsed = read_task_md(task_md_path)
    if not parsed:
        print("Error: task.md not found or invalid.", file=sys.stderr)
        return False
    frontmatter = parsed["frontmatter"]
    frontmatter["current_phase"] = phase
    frontmatter["phase_name"] = PHASE_NAMES.get(phase, f"Phase {phase}")
    if not write_task_md(task_md_path, frontmatter, parsed["body"]):
        print("Error: Failed to write task.md.", file=sys.stderr)
        return False
    return True
 def main():
    parser = argparse.ArgumentParser(
        description="Task directory management for do skill workflow"
    )
    subparsers = parser.add_subparsers(dest="command", help="Available commands")
    # create command
    create_parser = subparsers.add_parser("create", help="Create a new task")
    create_parser.add_argument("title", nargs="+", help="Task title")
    create_parser.add_argument("--worktree", action="store_true", help="Enable worktree mode")
    # start command
    start_parser = subparsers.add_parser("start", help="Set current task")
    start_parser.add_argument("task_dir", help="Task directory path")
    # finish command
    subparsers.add_parser("finish", help="Clear current task")
    # list command
    subparsers.add_parser("list", help="List all tasks")
    # status command
    subparsers.add_parser("status", help="Show current task status")
    # update-phase command
    phase_parser = subparsers.add_parser("update-phase", help="Update current phase")
    phase_parser.add_argument("phase", type=int, help="Phase number (1-5)")
    args = parser.parse_args()
    if args.command == "create":
        title = " ".join(args.title)
        result = create_task(title, args.worktree)
        print(f"Created task: {result['relative_path']}")
        print(f"Task ID: {result['task_data']['id']}")
        print(f"Phase: 1/{result['task_data']['max_phases']} (Understand)")
        print(f"Worktree: {result['task_data']['use_worktree']}")
    elif args.command == "start":
        if start_task(args.task_dir):
            print(f"Started task: {args.task_dir}")
        else:
            sys.exit(1)
    elif args.command == "finish":
        if finish_task():
            print("Task finished, current task cleared.")
        else:
            sys.exit(1)
    elif args.command == "list":
        tasks = list_tasks()
        if not tasks:
            print("No tasks found.")
        else:
            for task in tasks:
                marker = "* " if task.get("is_current") else "  "
                phase = task.get("current_phase", "?")
                max_phase = task.get("max_phases", 5)
                status = task.get("status", "unknown")
                print(f"{marker}{task['id']} [{status}] phase {phase}/{max_phase}")
                print(f"    {task.get('title', 'No title')}")
    elif args.command == "status":
        status = get_status()
        if not status:
            print("No active task.")
        else:
            print(f"Task: {status['id']}")
            print(f"Title: {status.get('title', 'No title')}")
            print(f"Status: {status.get('status', 'unknown')}")
            print(f"Phase: {status.get('current_phase', '?')}/{status.get('max_phases', 5)}")
            print(f"Worktree: {status.get('use_worktree', False)}")
            print(f"Path: {status['path']}")
    elif args.command == "update-phase":
        if update_phase(args.phase):
            phase_name = PHASE_NAMES.get(args.phase, f"Phase {args.phase}")
            print(f"Updated to phase {args.phase} ({phase_name})")
        else:
            sys.exit(1)
    else:
        parser.print_help()
        sys.exit(1)
 if __name__ == "__main__":
    main()
Author	SHA1	Message	Date
cexll	8db49f198e	fix(test): set USERPROFILE on Windows for skills tests os.UserHomeDir() uses USERPROFILE on Windows, not HOME. Add setTestHome helper that sets both env vars for cross-platform compatibility in CI. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-02-09 11:16:33 +08:00
cexll	97dfa907d9	feat(skills): add per-task skill spec auto-detection and injection Replace external inject-spec.py hook with built-in zero-config skill detection in codeagent-wrapper. The system auto-detects project type from fingerprint files (go.mod, package.json, etc.), maps to installed skills, and injects SKILL.md content directly into sub-agent prompts. Key changes: - Add DetectProjectSkills/ResolveSkillContent in executor/prompt.go - Add Skills field to TaskSpec with parallel config parsing - Add --skills CLI flag for explicit override - Update /do SKILL.md Phase 4 with per-task skill examples - Remove on-stop.py global hook (not needed) - Replace inject-spec.py with no-op (detection now internal) - Add 20 unit tests covering detection, resolution, budget, security Security: path traversal protection via validSkillName regex, 16K char budget with tag overhead accounting, CRLF normalization. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-02-09 11:06:36 +08:00
cexll	5853539cab	fix(do): reuse worktree across phases via DO_WORKTREE_DIR env var Previously, each codeagent-wrapper --worktree call created a new worktree, causing multiple worktrees per /do task (one per phase). Changes: - setup-do.py: create worktree at initialization, export DO_WORKTREE_DIR - executor.go: check DO_WORKTREE_DIR first, reuse if set - SKILL.md: update documentation for new behavior Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-02-05 23:32:52 +08:00
cexll	81fa6843d9	fix(release): auto-generate release notes from git history - Add fetch-depth: 0 to get full git history - Generate release notes from commits between tags - Include full changelog link in release notes - Simplify do skill stop-hook by removing promise detection Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-02-05 10:35:29 +08:00
cexll	74e4d181c2	feat: add worktree support and refactor do skill to Python - Add worktree module for git worktree management - Refactor do skill scripts from shell to Python for better maintainability - Add install.py for do skill installation - Update stop-hook to Python implementation - Enhance executor with additional configuration options - Update CLAUDE.md with first-principles thinking guidelines Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-02-03 21:58:08 +08:00
cexll	04fa1626ae	feat(config): add allowed_tools/disallowed_tools support for claude backend - Add AllowedTools/DisallowedTools fields to AgentModelConfig and Config - Update ResolveAgentConfig to return new fields - Pass --allowedTools/--disallowedTools to claude CLI in buildClaudeArgs - Add fields to TaskSpec and propagate through executor - Fix backend selection when taskSpec.Backend is specified but backend=nil Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-02-03 16:25:41 +08:00
cexll	c0f61d5cc2	fix(release): correct ldflags path for version injection Change from main.version to codeagent-wrapper/internal/app.version to match the actual package location. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-02-03 15:08:44 +08:00
cexll	716d1eb173	fix(do): isolate stop hook by task_id to prevent concurrent task interference When running multiple do tasks concurrently in worktrees, the stop hook would scan all do.*.local.md files and block exit for unrelated tasks. Changes: - setup-do.sh: export DO_TASK_ID for hook environment - stop-hook.sh: filter state files by DO_TASK_ID when set, fallback to scanning all files for backward compatibility Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>	2026-01-28 16:01:24 +08:00
cexll	4bc9ffa907	fix(cli): resolve process hang after install and sync version with tag - Add process.stdin.pause() in cleanup() to properly exit event loop - Pass tag via CODEAGENT_WRAPPER_VERSION env to install.sh - Support versioned release URL in install.sh Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-28 15:08:24 +08:00
cexll	c6c2f93e02	fix(codeagent-wrapper): skip tmpdir tests on Windows ensureExecutableTempDir is intentionally no-op on Windows, so tests should be skipped on that platform. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-28 13:10:42 +08:00
cexll	cd3115446d	fix(codeagent-wrapper): improve CI, version handling and temp dir - CI: fetch tags for version detection - Makefile: inject version via ldflags - Add CODEAGENT_TMPDIR support for macOS permission issues - Inject ANTHROPIC_BASE_URL/API_KEY for claude backend Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-28 11:55:55 +08:00
cexll	2b8bfd714c	feat(install): add uninstall command and merge_dir file tracking - JS: add uninstall subcommand with --module and -y options - JS: merge hooks to settings.json after module install - Python: record merge_dir files for reversible uninstall - Both: track installed files in installed_modules.json Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-28 11:55:55 +08:00
cexll	71485558df	fix(do): add timeout handling constraints for codeagent-wrapper Closes #138 - Add constraint 7: expect long-running codeagent-wrapper calls - Add constraint 8: timeouts are not an escape hatch, must retry Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-28 10:09:32 +08:00
cexll	b711b44c0e	fix: stabilize Windows tests by removing echo-based JSON output - Replace echo with createFakeCodexScript() or fake command runner - Use PID offsets based on os.Getpid() to avoid collisions in cleanup tests Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-26 22:37:37 +08:00
cexll	eda2475543	fix: add temp dir setup to TestRunSilentMode for macOS CI Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-26 22:12:31 +08:00
cexll	2c0553794a	fix: Windows compatibility and flaky benchmark test - Use cmd.exe /c to execute .bat/.cmd on Windows - Set USERPROFILE alongside HOME for os.UserHomeDir() - Use setTempDirEnv to set TEMP/TMP on Windows - Replace chmod-based tests with cross-platform alternatives - Fix concurrent speedup benchmark with fair comparison - Add output/ to gitignore Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-26 21:29:54 +08:00
cexll	c96193fca6	fix: make integration tests Windows-compatible Generate platform-specific mock executables in tests: - Windows: codex.bat with @echo off - Unix: codex.sh with #!/bin/bash Fixes CI failures on windows-latest runner. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-26 20:37:55 +08:00
cexll	e2cd5be812	fix: use bash shell for CI test steps on all platforms Force bash shell for test and coverage steps to avoid PowerShell parameter parsing issues on Windows (`.out` being treated as separate arg). Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-26 18:33:34 +08:00
cexll	3dfa447f10	test: add cross-platform CI matrix and unit tests Add multi-platform testing (Ubuntu, Windows, macOS) to CI workflow. Add unit tests for cross-platform path handling, stdin mode triggers, and codex command construction to address issue #137. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-26 18:29:27 +08:00
cexll	e9a8013c6f	refactor!: remove hardcoded default models, require explicit config REMOVED all hardcoded default backend/model values from defaultModelsConfig. Now ~/.codeagent/models.json is REQUIRED - missing config returns clear error with example configuration. BREAKING CHANGE: Users must configure ~/.codeagent/models.json before using --agent or parallel tasks with agent: field. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-26 17:47:21 +08:00
cexll	3d76d46336	docs: add --update command documentation Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>	2026-01-26 17:17:40 +08:00