From e9cbb15c2d4daf7cff743f799277cccd357e4bef Mon Sep 17 00:00:00 2001 From: kurihada Date: Thu, 19 Mar 2026 18:36:31 +0800 Subject: [PATCH] Add orch skill forward test evidence --- docs/implementation-roadmap.md | 1 + .../archive/orch-skill-direct-replay.md | 66 ++ .../archive/orch-skill-real-forward-test.md | 67 ++ docs/tests/orch-skill/README.md | 20 + ...ocked-answer-resume-through-bundled-cli.md | 27 + ...ssigns-blocked-task-through-bundled-cli.md | 31 + ...retries-failed-task-through-bundled-cli.md | 30 + ...-dispatch-reconcile-through-bundled-cli.md | 26 + ...dispatch-to-cleanup-through-bundled-cli.md | 29 + scripts/run_orch_skill_forward_tests.sh | 739 ++++++++++++++++++ 10 files changed, 1036 insertions(+) create mode 100644 docs/roadmaps/archive/orch-skill-direct-replay.md create mode 100644 docs/roadmaps/archive/orch-skill-real-forward-test.md create mode 100755 scripts/run_orch_skill_forward_tests.sh diff --git a/docs/implementation-roadmap.md b/docs/implementation-roadmap.md index 6d31be3..1aa6c7f 100644 --- a/docs/implementation-roadmap.md +++ b/docs/implementation-roadmap.md @@ -27,6 +27,7 @@ As of now: - an inbox skill forward-test plan directory now exists under `docs/tests/inbox-skill/`, with a shared execution template and multiple scenario cases - an orch skill forward-test plan directory now exists under `docs/tests/orch-skill/`, with a shared execution contract and initial leader-side workflow scenarios - a repo-local replay runner now exists at `scripts/run_orch_skill_forward_tests.sh`, and the five `docs/tests/orch-skill/` cases now include recorded example runs from a bundled-CLI replay captured on `2026-03-19` +- the five `docs/tests/orch-skill/` cases now also include recorded real subagent-forward runs captured on `2026-03-19`, with spawned leader and worker agents using the packaged `skills/orch/` and `skills/inbox/` bundles - a council-review skill forward-test plan directory now exists under `docs/tests/council-review-skill/`, with a shared execution contract and nine council workflow scenarios covering end-to-end flow, unanimous-only defaults, timeout/before-tally errors, explicit minority reporting, invalid report filters, strict tally semantics, malformed reviewer JSON, and target-file inputs - an execution-roadmap workflow now exists under `docs/roadmaps/active/` and `docs/roadmaps/archive/` for agent-level work traces and completion archives - a repo-local `scripts/package_skill_clis.sh` packaging flow now builds bundled skill CLI assets for `inbox`, `orch`, and `council-review` diff --git a/docs/roadmaps/archive/orch-skill-direct-replay.md b/docs/roadmaps/archive/orch-skill-direct-replay.md new file mode 100644 index 0000000..0cfe146 --- /dev/null +++ b/docs/roadmaps/archive/orch-skill-direct-replay.md @@ -0,0 +1,66 @@ +# Title + +Direct Replay For Orch Skill Cases + +## Status + +- `completed` + +## Owner + +- codex + +## Started At + +- `2026-03-19` + +## Goal + +- Execute the documented `docs/tests/orch-skill/` scenarios against the bundled `skills/orch/assets/orch` and `skills/inbox/assets/inbox` binaries, capture concrete evidence, and sync the repo docs with the observed results. + +## Scope + +- add a reusable local runner for the five documented orch-skill scenarios +- run the scenarios and capture per-case evidence +- update the orch-skill docs with recorded runs and note the execution mode +- update the implementation roadmap to reflect the new replay coverage + +## Checklist + +- [x] Review the orch-skill case docs and bundled CLI surfaces. +- [x] Add a reusable direct replay runner for the five orch-skill scenarios. +- [x] Execute the runner and collect evidence for all five cases. +- [x] Update the orch-skill docs with recorded example runs and execution notes. +- [x] Update the implementation roadmap and archive this execution roadmap. + +## Files + +- `scripts/run_orch_skill_forward_tests.sh` +- `docs/tests/orch-skill/README.md` +- `docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md` +- `docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md` +- `docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md` +- `docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md` +- `docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md` +- `docs/implementation-roadmap.md` +- `docs/roadmaps/archive/orch-skill-direct-replay.md` + +## Decisions + +- Use direct bundled-CLI replay instead of spawning Codex role agents in this turn, because the current session does not permit sub-agent delegation unless the user explicitly asks for it. +- Keep the replay runner repo-local so the same scenarios can be rerun later without reconstructing the command flow by hand. + +## Blockers + +- none + +## Next Step + +- rerun `scripts/run_orch_skill_forward_tests.sh` when the bundled skill binaries or orch-skill case docs change, and add true multi-agent forward coverage later if explicit sub-agent execution is needed + +## Completion Summary + +- Added `scripts/run_orch_skill_forward_tests.sh` as a reusable direct bundled-CLI replay runner for the five documented orch-skill scenarios. +- Executed the runner on `2026-03-19`; all five scenarios passed and produced per-case JSON evidence under a temporary output root. +- Updated `docs/tests/orch-skill/README.md` plus all five case files with recorded example runs and explicit execution-mode notes. +- Updated `docs/implementation-roadmap.md` to record the new replay runner and captured orch-skill execution evidence. diff --git a/docs/roadmaps/archive/orch-skill-real-forward-test.md b/docs/roadmaps/archive/orch-skill-real-forward-test.md new file mode 100644 index 0000000..8d87ff4 --- /dev/null +++ b/docs/roadmaps/archive/orch-skill-real-forward-test.md @@ -0,0 +1,67 @@ +# Title + +Real Subagent Forward Tests For Orch Skill + +## Status + +- `completed` + +## Owner + +- codex + +## Started At + +- `2026-03-19` + +## Goal + +- Execute the documented `docs/tests/orch-skill/` scenarios using real spawned role agents with injected `skills/orch/` and `skills/inbox/`, then record concrete pass/fail evidence and sync the repository docs. + +## Scope + +- validate subagent skill injection for project-local orch and inbox skills +- run the five documented orch-skill forward cases with real leader and worker subagents +- collect main-thread validation evidence and agent summaries +- update the orch-skill docs and implementation roadmap with the real forward-test results + +## Checklist + +- [x] Re-read the orch-skill shared execution contract and worker skill constraints. +- [x] Validate project-local skill injection with a small spawned-agent probe. +- [x] Execute the five orch-skill cases with real spawned role agents and collect evidence. +- [x] Update the orch-skill docs and implementation roadmap with the real forward-test results. +- [x] Archive this execution roadmap with a completion summary. + +## Files + +- `docs/tests/orch-skill/README.md` +- `docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md` +- `docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md` +- `docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md` +- `docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md` +- `docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md` +- `docs/implementation-roadmap.md` +- `docs/roadmaps/archive/orch-skill-real-forward-test.md` + +## Decisions + +- Use real spawned role agents per case instead of the direct replay runner, because the user explicitly asked for true tests with subagents. +- Keep the main thread responsible for DB setup, fixture creation, and independent validation so the final judgment does not rely only on role-agent self-reporting. +- Fall back from `fork_context: true` to `fork_context: false` for the real case runs after the first wider-context attempt stalled and mis-executed the worker-side contract in this repo. +- For the longer `retry` and `reassign` cases, keep one leader agent active across staged prompts instead of one long monolithic prompt, because staged execution proved more reliable while still preserving a real agent-owned `orch` flow. + +## Blockers + +- none + +## Next Step + +- rerun the same five cases when the packaged skill binaries or case docs change, and consider adding the same real subagent coverage for `council-review` if that surface needs parity + +## Completion Summary + +- Verified both project-local skill bundles with spawned-agent help-command probes before the real runs. +- Collected successful real subagent evidence for all five orch-skill cases under `/tmp/orch-skill-subagents.J1XWgs`. +- Main-thread validation confirmed all five final successful runs reached the expected `orch` and `inbox` states. +- Updated `docs/tests/orch-skill/README.md`, all five case files, and `docs/implementation-roadmap.md` to record the new real forward-test coverage. diff --git a/docs/tests/orch-skill/README.md b/docs/tests/orch-skill/README.md index 839b03b..766c2fb 100644 --- a/docs/tests/orch-skill/README.md +++ b/docs/tests/orch-skill/README.md @@ -122,6 +122,26 @@ Use these defaults unless a case file explicitly overrides them: - keep the temporary DB, repo fixture, and working directory on failure for debugging - cleanup the temporary working directory on success only if the caller does not need replay artifacts +## Direct CLI Replay + +The repository also includes a reusable direct replay runner at `scripts/run_orch_skill_forward_tests.sh`. + +This runner executes the bundled `skills/orch/assets/orch` and `skills/inbox/assets/inbox` binaries against temporary SQLite DBs and Git fixtures without spawning Codex role agents. + +Use it to validate packaged CLI behavior and record concrete evidence quickly, but do not treat it as a full replacement for the real subagent-forward model described above. + +The case files in this directory now include recorded example runs captured through that direct replay path on `2026-03-19`. + +## Real Subagent Forward Runs + +The five cases in this directory were also executed with real spawned role agents on `2026-03-19`. + +That run used injected project-local `skills/orch/` and `skills/inbox/` bundles with a narrow-context fallback (`fork_context: false`) after an earlier wider-context attempt proved unreliable for this repo. + +The successful evidence root for those runs was `/tmp/orch-skill-subagents.J1XWgs`. + +Some longer cases used staged leader progression while keeping the same leader agent active across phases so the run still exercised real agent-driven `orch` control flow instead of a main-thread direct replay. + ## Per-Case Template Each case file should use this structure: diff --git a/docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md b/docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md index 0d60365..f4f6853 100644 --- a/docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md +++ b/docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md @@ -87,3 +87,30 @@ INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_I - use the default cleanup policy from [README.md](./README.md) - if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection + +## Recorded Example Run + +- recorded on: `2026-03-19` +- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh` +- result: `pass` +- observed run id: `run_blog_skill_002` +- observed thread id: `thr_42ce634f273745e9b95badc14ce52708` +- evidence summary: +- `orch wait --for task_blocked` woke on the worker question, and `inbox wait-reply` later woke on the leader answer +- final `orch status --run run_blog_skill_002 --json` returned `run.status == "done"` and `tasks[0].status == "done"` +- final `inbox show --thread thr_42ce634f273745e9b95badc14ce52708 --json` contained `question`, `answer`, and `result` messages +- the recorded `question` payload was `Should logging go to stdout or stderr?`, and the recorded `answer` body was `Use stdout for MVP.` +- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents + +## Recorded Real Forward Run + +- recorded on: `2026-03-19` +- execution mode: `real_subagent_forward_test` +- result: `pass` +- evidence root: `/tmp/orch-skill-subagents.J1XWgs/leader-blocked-answer-resume-through-bundled-cli` +- observed run id: `run_blog_skill_002` +- observed thread id: `thr_fd11536a0b2f4c668f6e78c38090816e` +- evidence summary: +- a real leader agent using `skills/orch/` completed `wait --for task_blocked`, `blocked`, `answer`, `wait --for task_done`, `reconcile`, and `status` +- a real worker agent using `skills/inbox/` completed `claim`, `update --status in_progress`, `update --status blocked`, `wait-reply`, resume `update`, and `done` +- main-thread validation confirmed `run.status == "done"`, `task.status == "done"`, the blocked question payload `Should logging go to stdout or stderr?`, and the answer body `Use stdout for MVP.` diff --git a/docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md b/docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md index 937ea96..0525b6d 100644 --- a/docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md +++ b/docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md @@ -96,3 +96,34 @@ INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_I - use the default cleanup policy from [README.md](./README.md) - if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection + +## Recorded Example Run + +- recorded on: `2026-03-19` +- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh` +- result: `pass` +- observed run id: `run_blog_skill_reassign_001` +- observed original thread id: `thr_0a61240412134de3b3d9ab219b6c8f19` +- observed reassigned thread id: `thr_12fbcf6d89d948548306198d013d77a5` +- evidence summary: +- `orch wait --for task_blocked` woke after worker-a posted a blocked question with payload `Proceed with v1 scope?` +- `orch reassign --run run_blog_skill_reassign_001 --task T1 --to worker-b --json` returned `attempt_no == 2` and assigned the new attempt to `worker-b` +- final `inbox show` on the original thread returned `thread.status == "cancelled"` and preserved the blocked `question` message +- final `inbox show` on the reassigned thread returned `thread.status == "done"` +- final `orch status --run run_blog_skill_reassign_001 --json` returned `run.status == "done"` and `tasks[0].status == "done"` +- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents + +## Recorded Real Forward Run + +- recorded on: `2026-03-19` +- execution mode: `real_subagent_forward_test` +- result: `pass` +- evidence root: `/tmp/orch-skill-subagents.J1XWgs/leader-reassigns-blocked-task-through-bundled-cli-phased` +- observed run id: `run_blog_skill_reassign_001` +- observed original thread id: `thr_7d43af5bc1f7467da98a39adb0de5808` +- observed reassigned thread id: `thr_eba253db8965423b855d0c784a29702c` +- evidence summary: +- the same real leader agent using `skills/orch/` completed the case in three phases: initial `run/task/dispatch`, then `wait --for task_blocked` plus `reassign`, then final `wait --for task_done` plus `status` +- a real `worker-a` agent using `skills/inbox/` claimed the original thread and posted the blocked question `Proceed with v1 scope?` +- a real `worker-b` agent using `skills/inbox/` claimed the reassigned thread and completed it +- main-thread validation confirmed the original thread finished `cancelled`, the reassigned thread finished `done`, and the original blocked question remained visible in thread history diff --git a/docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md b/docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md index f842987..483a250 100644 --- a/docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md +++ b/docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md @@ -89,3 +89,33 @@ INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_I - use the default cleanup policy from [README.md](./README.md) - if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection + +## Recorded Example Run + +- recorded on: `2026-03-19` +- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh` +- result: `pass` +- observed run id: `run_blog_skill_retry_001` +- observed first thread id: `thr_8dbf2d2e46d7469891cc1ef604da476f` +- observed second thread id: `thr_bdd86f4fe08e4ebfb39b8151ac41a3bb` +- evidence summary: +- `orch wait --for task_failed` woke after the first worker-owned thread failed +- `orch retry --run run_blog_skill_retry_001 --task T1 --json` returned `attempt_no == 2` with a distinct replacement thread for the same worker +- final `inbox show` on the first thread returned `thread.status == "failed"` +- final `inbox show` on the second thread returned `thread.status == "done"` +- final `orch status --run run_blog_skill_retry_001 --json` returned `run.status == "done"` and `tasks[0].status == "done"` +- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents + +## Recorded Real Forward Run + +- recorded on: `2026-03-19` +- execution mode: `real_subagent_forward_test` +- result: `pass` +- evidence root: `/tmp/orch-skill-subagents.J1XWgs/leader-retries-failed-task-through-bundled-cli-phased` +- observed run id: `run_blog_skill_retry_001` +- observed first thread id: `thr_1e22121642294b56aae351ddec5180d1` +- observed second thread id: `thr_f2ab1f1899964007b2447796204e1928` +- evidence summary: +- the same real leader agent using `skills/orch/` completed the case in three phases: initial `run/task/dispatch`, then `wait --for task_failed` plus `retry`, then final `wait --for task_done` plus `status` +- a real worker agent using `skills/inbox/` failed the first thread, polled for the retried pending thread, then claimed and completed the second thread +- main-thread validation confirmed the two thread ids were distinct, the first thread finished `failed`, the second thread finished `done`, and the run/task both finished `done` diff --git a/docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md b/docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md index 1d93aca..f97b5bb 100644 --- a/docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md +++ b/docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md @@ -88,3 +88,29 @@ INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_I - use the default cleanup policy from [README.md](./README.md) - if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection + +## Recorded Example Run + +- recorded on: `2026-03-19` +- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh` +- result: `pass` +- observed run id: `run_blog_skill_001` +- observed thread id: `thr_eced1b8cb1254065a7cd3aaff6dc0bcb` +- evidence summary: +- final `orch status --run run_blog_skill_001 --json` returned `run.status == "done"` with a single task `T1` in state `done` +- final `inbox show --thread thr_eced1b8cb1254065a7cd3aaff6dc0bcb --json` returned thread state `done` and message kinds `task`, `progress`, and `result` +- the replay also observed `orch wait --for task_done` wake successfully before the final reconcile +- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents + +## Recorded Real Forward Run + +- recorded on: `2026-03-19` +- execution mode: `real_subagent_forward_test` +- result: `pass` +- evidence root: `/tmp/orch-skill-subagents.J1XWgs/leader-run-dispatch-reconcile-through-bundled-cli` +- observed run id: `run_blog_skill_001` +- observed thread id: `thr_7c64e75bbcce4143a7fc425242f7e7d3` +- evidence summary: +- a real leader agent using `skills/orch/` completed `run init`, `task add`, `dispatch`, `wait`, `reconcile`, and `status` +- a real worker agent using `skills/inbox/` completed `fetch`, `claim`, `update --status in_progress`, and `done` +- main-thread validation confirmed `status.data.run.status == "done"`, `status.data.tasks[0].status == "done"`, and thread history kinds `task`, `progress`, and `result` diff --git a/docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md b/docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md index 8f0ac24..d45e9c4 100644 --- a/docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md +++ b/docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md @@ -88,3 +88,32 @@ test ! -d WORKTREE_PATH - use the default cleanup policy from [README.md](./README.md) - if the run fails, retain `TMPDIR`, `coord.db`, and the Git repo fixture for replay and manual inspection + +## Recorded Example Run + +- recorded on: `2026-03-19` +- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh` +- result: `pass` +- observed run id: `run_blog_skill_worktree_001` +- observed thread id: `thr_5743259fdccb41f9bb33dce0040b27a5` +- observed worktree suffix: `.orch/worktrees/run-blog-skill-worktree-001/T1/attempt-1` +- evidence summary: +- `orch dispatch --strict-worktree` returned `base_ref == "HEAD"`, a concrete `base_commit`, branch `orch/run-blog-skill-worktree-001/T1/attempt-1`, and a non-empty `worktree_path` +- the task payload stored on the worker thread exposed the same `worktree_path` +- final `orch status --run run_blog_skill_worktree_001 --json` returned `run.status == "done"` and `tasks[0].status == "done"` +- final `orch cleanup --run run_blog_skill_worktree_001 --task T1 --json` returned one cleaned attempt and the worktree directory no longer existed afterward +- note: this recorded run exercised the packaged binaries directly in a temporary DB and Git fixture and did not spawn separate Codex role agents + +## Recorded Real Forward Run + +- recorded on: `2026-03-19` +- execution mode: `real_subagent_forward_test` +- result: `pass` +- evidence root: `/tmp/orch-skill-subagents.J1XWgs/strict-worktree-dispatch-to-cleanup-through-bundled-cli` +- observed run id: `run_blog_skill_worktree_001` +- observed thread id: `thr_089527cd07f74b52a524ba07ed74c2e4` +- observed worktree path: `/private/tmp/orch-skill-subagents.J1XWgs/strict-worktree-dispatch-to-cleanup-through-bundled-cli/repo/.orch/worktrees/run-blog-skill-worktree-001/T1/attempt-1` +- evidence summary: +- a real leader agent using `skills/orch/` completed strict `dispatch`, `wait`, `reconcile`, `cleanup`, and `status` +- a real worker agent using `skills/inbox/` claimed the thread and finished it with `done` +- main-thread validation confirmed that the task payload did include the same `worktree_path` even though the worker agent summary failed to notice it, and also confirmed the worktree directory no longer existed after cleanup diff --git a/scripts/run_orch_skill_forward_tests.sh b/scripts/run_orch_skill_forward_tests.sh new file mode 100755 index 0000000..6d5e7b2 --- /dev/null +++ b/scripts/run_orch_skill_forward_tests.sh @@ -0,0 +1,739 @@ +#!/usr/bin/env bash + +set -euo pipefail + +readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +readonly ORCH_BIN="${REPO_ROOT}/skills/orch/assets/orch" +readonly INBOX_BIN="${REPO_ROOT}/skills/inbox/assets/inbox" + +OUTPUT_ROOT="" +LAST_BG_PID="" + +usage() { + cat <<'EOF' +Usage: scripts/run_orch_skill_forward_tests.sh [--output-root PATH] + +Runs the documented orch-skill forward scenarios as direct bundled-CLI replays. +Each case gets its own temporary workspace, SQLite DB, and JSON evidence files. + +Options: + --output-root PATH Write results under PATH instead of a temporary directory + -h, --help Show this help text +EOF +} + +require_command() { + local cmd="$1" + if ! command -v "${cmd}" >/dev/null 2>&1; then + printf 'missing required command: %s\n' "${cmd}" >&2 + exit 1 + fi +} + +run_json() { + local output_path="$1" + shift + + "$@" >"${output_path}" +} + +json_get() { + local file_path="$1" + local filter="$2" + + jq -r "${filter}" "${file_path}" +} + +json_check() { + local file_path="$1" + local filter="$2" + local label="$3" + + if jq -e "${filter}" "${file_path}" >/dev/null; then + printf 'PASS: %s\n' "${label}" + else + printf 'FAIL: %s\n' "${label}" >&2 + printf ' file: %s\n' "${file_path}" >&2 + printf ' jq: %s\n' "${filter}" >&2 + exit 1 + fi +} + +start_wait() { + local output_path="$1" + shift + + "$@" >"${output_path}" & + LAST_BG_PID="$!" +} + +wait_for_pid() { + local pid="$1" + local label="$2" + + if ! wait "${pid}"; then + printf 'background command failed: %s\n' "${label}" >&2 + exit 1 + fi +} + +init_case_dir() { + local case_slug="$1" + local base_dir="${OUTPUT_ROOT}/${case_slug}" + mkdir -p "${base_dir}" + printf '%s\n' "${base_dir}" +} + +init_db() { + local db_path="$1" + run_json "${db_path%.db}.init.json" "${INBOX_BIN}" --db "${db_path}" --json init +} + +init_git_repo() { + local repo_path="$1" + + mkdir -p "${repo_path}" + git -C "${repo_path}" init >/dev/null + git -C "${repo_path}" config user.name "Orch Skill Replay" + git -C "${repo_path}" config user.email "orch-skill-replay@example.com" + printf '# Replay Fixture\n' >"${repo_path}/README.md" + git -C "${repo_path}" add README.md + git -C "${repo_path}" commit -m "Initial commit" >/dev/null +} + +join_json_array() { + if [ "$#" -eq 0 ]; then + printf '[]' + return + fi + printf '%s\n' "$@" | jq -R . | jq -s . +} + +write_result_json() { + local case_dir="$1" + local case_slug="$2" + local db_path="$3" + local run_id="$4" + local result="$5" + local duration_seconds="$6" + local thread_ids_json="$7" + local worktree_paths_json="$8" + local notes="$9" + + jq -n \ + --arg case "${case_slug}" \ + --arg db_path "${db_path}" \ + --arg run_id "${run_id}" \ + --arg result "${result}" \ + --arg notes "${notes}" \ + --arg mode "direct_cli_replay" \ + --arg runner_script "scripts/run_orch_skill_forward_tests.sh" \ + --argjson duration_seconds "${duration_seconds}" \ + --argjson thread_ids "${thread_ids_json}" \ + --argjson worktree_paths "${worktree_paths_json}" \ + '{ + case: $case, + db_path: $db_path, + run_id: $run_id, + thread_ids: $thread_ids, + worktree_paths: $worktree_paths, + result: $result, + duration_seconds: $duration_seconds, + execution_mode: $mode, + runner_script: $runner_script, + notes: $notes + }' >"${case_dir}/result.json" +} + +print_case_summary() { + local case_slug="$1" + local case_dir="$2" + + printf '%s\t%s\n' "${case_slug}" "$(json_get "${case_dir}/result.json" '.result')" +} + +run_case_happy_path() { + local case_slug="leader-run-dispatch-reconcile-through-bundled-cli" + local run_id="run_blog_skill_001" + local case_dir + case_dir="$(init_case_dir "${case_slug}")" + local db_path="${case_dir}/coord.db" + local started_at + started_at="$(date +%s)" + + init_db "${db_path}" + + run_json "${case_dir}/run.json" \ + "${ORCH_BIN}" --db "${db_path}" --json run init \ + --run "${run_id}" --goal "Build blog MVP" --summary "Public blog plus admin CRUD" + + run_json "${case_dir}/task.json" \ + "${ORCH_BIN}" --db "${db_path}" --json task add \ + --run "${run_id}" --task T1 --title "Implement retry policy" \ + --summary "Add retry policy to HTTP client" --default-to worker-a + + run_json "${case_dir}/dispatch.json" \ + "${ORCH_BIN}" --db "${db_path}" --json dispatch \ + --run "${run_id}" --task T1 --to worker-a \ + --body "Implement retry handling for the HTTP client." + + local thread_id + thread_id="$(json_get "${case_dir}/dispatch.json" '.data.attempt.thread_id')" + + local wait_pid + start_wait "${case_dir}/wait-task-done.json" \ + "${ORCH_BIN}" --db "${db_path}" --json wait \ + --run "${run_id}" --for task_done --timeout-seconds 15 + wait_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/fetch.json" \ + "${INBOX_BIN}" --db "${db_path}" --json fetch \ + --agent worker-a --limit 1 + + run_json "${case_dir}/claim.json" \ + "${INBOX_BIN}" --db "${db_path}" --json claim \ + --agent worker-a --thread "${thread_id}" + + run_json "${case_dir}/update.json" \ + "${INBOX_BIN}" --db "${db_path}" --json update \ + --agent worker-a --thread "${thread_id}" \ + --status in_progress --summary "Implementation started" + + run_json "${case_dir}/done.json" \ + "${INBOX_BIN}" --db "${db_path}" --json done \ + --agent worker-a --thread "${thread_id}" \ + --summary "Retry policy implemented" \ + --body "The HTTP client now retries transient failures." + + wait_for_pid "${wait_pid}" "${case_slug}: wait task_done" + + run_json "${case_dir}/reconcile.json" \ + "${ORCH_BIN}" --db "${db_path}" --json reconcile --run "${run_id}" + + run_json "${case_dir}/status.json" \ + "${ORCH_BIN}" --db "${db_path}" --json status --run "${run_id}" + + run_json "${case_dir}/show.json" \ + "${INBOX_BIN}" --db "${db_path}" --json show --thread "${thread_id}" + + json_check "${case_dir}/wait-task-done.json" '.data.woke == true' "wait woke on task_done" + json_check "${case_dir}/status.json" '.data.run.status == "done"' "run status done" + json_check "${case_dir}/status.json" '.data.tasks | length == 1 and .[0].task_id == "T1" and .[0].status == "done"' "single done task" + json_check "${case_dir}/show.json" '.data.thread.status == "done"' "thread status done" + json_check "${case_dir}/show.json" '[.data.messages[].kind] | index("task") != null and index("progress") != null and index("result") != null' "thread history includes task progress result" + + local duration_seconds + duration_seconds="$(( $(date +%s) - started_at ))" + write_result_json \ + "${case_dir}" "${case_slug}" "${db_path}" "${run_id}" pass "${duration_seconds}" \ + "$(join_json_array "${thread_id}")" \ + "$(join_json_array)" \ + "Direct CLI replay of the documented happy path." +} + +run_case_blocked_answer() { + local case_slug="leader-blocked-answer-resume-through-bundled-cli" + local run_id="run_blog_skill_002" + local case_dir + case_dir="$(init_case_dir "${case_slug}")" + local db_path="${case_dir}/coord.db" + local started_at + started_at="$(date +%s)" + + init_db "${db_path}" + + run_json "${case_dir}/run.json" \ + "${ORCH_BIN}" --db "${db_path}" --json run init \ + --run "${run_id}" --goal "Build dependency-aware workflow" \ + --summary "Exercise blocked question handling" + + run_json "${case_dir}/task.json" \ + "${ORCH_BIN}" --db "${db_path}" --json task add \ + --run "${run_id}" --task T1 --title "Build frontend" \ + --summary "Implement frontend flow" --default-to worker-a + + run_json "${case_dir}/dispatch.json" \ + "${ORCH_BIN}" --db "${db_path}" --json dispatch \ + --run "${run_id}" --task T1 --to worker-a \ + --body "Implement the worker flow and stop if a logging decision is needed." + + local thread_id + thread_id="$(json_get "${case_dir}/dispatch.json" '.data.attempt.thread_id')" + + local wait_blocked_pid + start_wait "${case_dir}/wait-task-blocked.json" \ + "${ORCH_BIN}" --db "${db_path}" --json wait \ + --run "${run_id}" --for task_blocked --timeout-seconds 15 + wait_blocked_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/claim.json" \ + "${INBOX_BIN}" --db "${db_path}" --json claim \ + --agent worker-a --thread "${thread_id}" + + run_json "${case_dir}/progress.json" \ + "${INBOX_BIN}" --db "${db_path}" --json update \ + --agent worker-a --thread "${thread_id}" \ + --status in_progress --summary "Implementation started" + + run_json "${case_dir}/blocked.json" \ + "${INBOX_BIN}" --db "${db_path}" --json update \ + --agent worker-a --thread "${thread_id}" \ + --status blocked --summary "Need logging decision" \ + --payload-json '{"question":"Should logging go to stdout or stderr?"}' + + wait_for_pid "${wait_blocked_pid}" "${case_slug}: wait task_blocked" + + local blocked_message_id + blocked_message_id="$(json_get "${case_dir}/blocked.json" '.data.message.message_id')" + + run_json "${case_dir}/orch-blocked.json" \ + "${ORCH_BIN}" --db "${db_path}" --json blocked --run "${run_id}" + + local wait_reply_pid + start_wait "${case_dir}/wait-reply.json" \ + "${INBOX_BIN}" --db "${db_path}" --agent worker-a --json wait-reply \ + --thread "${thread_id}" --after-message "${blocked_message_id}" --timeout-seconds 15 + wait_reply_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/answer.json" \ + "${ORCH_BIN}" --db "${db_path}" --json answer \ + --run "${run_id}" --task T1 --body "Use stdout for MVP." + + wait_for_pid "${wait_reply_pid}" "${case_slug}: inbox wait-reply" + + local wait_done_pid + start_wait "${case_dir}/wait-task-done.json" \ + "${ORCH_BIN}" --db "${db_path}" --json wait \ + --run "${run_id}" --for task_done --timeout-seconds 15 + wait_done_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/resume.json" \ + "${INBOX_BIN}" --db "${db_path}" --json update \ + --agent worker-a --thread "${thread_id}" \ + --status in_progress --summary "Decision applied" + + run_json "${case_dir}/done.json" \ + "${INBOX_BIN}" --db "${db_path}" --json done \ + --agent worker-a --thread "${thread_id}" \ + --summary "Frontend complete" \ + --body "Worker resumed after the leader answer." + + wait_for_pid "${wait_done_pid}" "${case_slug}: wait task_done" + + run_json "${case_dir}/reconcile.json" \ + "${ORCH_BIN}" --db "${db_path}" --json reconcile --run "${run_id}" + + run_json "${case_dir}/status.json" \ + "${ORCH_BIN}" --db "${db_path}" --json status --run "${run_id}" + + run_json "${case_dir}/show.json" \ + "${INBOX_BIN}" --db "${db_path}" --json show --thread "${thread_id}" + + json_check "${case_dir}/wait-task-blocked.json" '.data.woke == true and (.data.events | length) >= 1 and .data.events[0].type == "task_blocked"' "wait woke on task_blocked" + json_check "${case_dir}/orch-blocked.json" '.data.blocked | length == 1 and .[0].task.task_id == "T1"' "blocked queue lists task" + json_check "${case_dir}/wait-reply.json" '.data.woke == true and .data.message.kind == "answer"' "wait-reply woke on answer" + json_check "${case_dir}/status.json" '.data.run.status == "done" and .data.tasks[0].status == "done"' "blocked flow run completes" + json_check "${case_dir}/show.json" '[.data.messages[].kind] | index("question") != null and index("answer") != null and index("result") != null' "history includes question answer result" + json_check "${case_dir}/show.json" 'any(.data.messages[]; .kind == "question" and .payload_json.question == "Should logging go to stdout or stderr?")' "question payload matches" + json_check "${case_dir}/show.json" 'any(.data.messages[]; .kind == "answer" and .body == "Use stdout for MVP.")' "answer body matches" + + local duration_seconds + duration_seconds="$(( $(date +%s) - started_at ))" + write_result_json \ + "${case_dir}" "${case_slug}" "${db_path}" "${run_id}" pass "${duration_seconds}" \ + "$(join_json_array "${thread_id}")" \ + "$(join_json_array)" \ + "Direct CLI replay of the blocked-question workflow, including orch wait and inbox wait-reply." +} + +run_case_strict_worktree_cleanup() { + local case_slug="strict-worktree-dispatch-to-cleanup-through-bundled-cli" + local run_id="run_blog_skill_worktree_001" + local case_dir + case_dir="$(init_case_dir "${case_slug}")" + local db_path="${case_dir}/coord.db" + local repo_path="${case_dir}/repo" + local started_at + started_at="$(date +%s)" + + init_db "${db_path}" + init_git_repo "${repo_path}" + + run_json "${case_dir}/run.json" \ + "${ORCH_BIN}" --db "${db_path}" --json run init \ + --run "${run_id}" --goal "Validate strict worktree dispatch" \ + --summary "Exercise worktree allocation and cleanup" + + run_json "${case_dir}/task.json" \ + "${ORCH_BIN}" --db "${db_path}" --json task add \ + --run "${run_id}" --task T1 --title "Implement backend" \ + --summary "Implement inside an isolated worktree" --default-to worker-a + + run_json "${case_dir}/dispatch.json" \ + "${ORCH_BIN}" --db "${db_path}" --json dispatch \ + --run "${run_id}" --task T1 --to worker-a \ + --repo-path "${repo_path}" --workspace-root .orch/worktrees --strict-worktree \ + --body "Implement inside isolated worktree." + + local thread_id + local worktree_path + thread_id="$(json_get "${case_dir}/dispatch.json" '.data.attempt.thread_id')" + worktree_path="$(json_get "${case_dir}/dispatch.json" '.data.attempt.worktree_path')" + + local wait_pid + start_wait "${case_dir}/wait-task-done.json" \ + "${ORCH_BIN}" --db "${db_path}" --json wait \ + --run "${run_id}" --for task_done --timeout-seconds 15 + wait_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/claim.json" \ + "${INBOX_BIN}" --db "${db_path}" --json claim \ + --agent worker-a --thread "${thread_id}" + + run_json "${case_dir}/thread-before-done.json" \ + "${INBOX_BIN}" --db "${db_path}" --json show --thread "${thread_id}" + + run_json "${case_dir}/done.json" \ + "${INBOX_BIN}" --db "${db_path}" --json done \ + --agent worker-a --thread "${thread_id}" \ + --summary "Backend complete" \ + --body "Confirmed the assigned worktree path from the task payload." + + wait_for_pid "${wait_pid}" "${case_slug}: wait task_done" + + run_json "${case_dir}/reconcile.json" \ + "${ORCH_BIN}" --db "${db_path}" --json reconcile --run "${run_id}" + + run_json "${case_dir}/cleanup.json" \ + "${ORCH_BIN}" --db "${db_path}" --json cleanup --run "${run_id}" --task T1 + + run_json "${case_dir}/status.json" \ + "${ORCH_BIN}" --db "${db_path}" --json status --run "${run_id}" + + run_json "${case_dir}/show.json" \ + "${INBOX_BIN}" --db "${db_path}" --json show --thread "${thread_id}" + + json_check "${case_dir}/dispatch.json" '.data.attempt.worktree_path != ""' "dispatch returns worktree path" + if jq -e --arg worktree "${worktree_path}" 'any(.data.messages[]; .kind == "task" and .payload_json.worktree_path == $worktree)' "${case_dir}/thread-before-done.json" >/dev/null; then + printf 'PASS: task payload exposes worktree path\n' + else + printf 'FAIL: task payload exposes worktree path\n' >&2 + exit 1 + fi + json_check "${case_dir}/status.json" '.data.run.status == "done" and .data.tasks[0].status == "done"' "worktree flow run completes" + json_check "${case_dir}/cleanup.json" '.data.cleaned | length == 1' "cleanup removes one attempt" + if [ -d "${worktree_path}" ]; then + printf 'FAIL: worktree path still exists after cleanup: %s\n' "${worktree_path}" >&2 + exit 1 + fi + printf 'PASS: cleaned worktree removed\n' + + local duration_seconds + duration_seconds="$(( $(date +%s) - started_at ))" + write_result_json \ + "${case_dir}" "${case_slug}" "${db_path}" "${run_id}" pass "${duration_seconds}" \ + "$(join_json_array "${thread_id}")" \ + "$(join_json_array "${worktree_path}")" \ + "Direct CLI replay of strict worktree dispatch, completion, and cleanup." +} + +run_case_retry() { + local case_slug="leader-retries-failed-task-through-bundled-cli" + local run_id="run_blog_skill_retry_001" + local case_dir + case_dir="$(init_case_dir "${case_slug}")" + local db_path="${case_dir}/coord.db" + local started_at + started_at="$(date +%s)" + + init_db "${db_path}" + + run_json "${case_dir}/run.json" \ + "${ORCH_BIN}" --db "${db_path}" --json run init \ + --run "${run_id}" --goal "Validate retry behavior" \ + --summary "Exercise failed attempt retry" + + run_json "${case_dir}/task.json" \ + "${ORCH_BIN}" --db "${db_path}" --json task add \ + --run "${run_id}" --task T1 --title "Implement backend" \ + --summary "Retry after a simulated failure" --default-to worker-a + + run_json "${case_dir}/dispatch.json" \ + "${ORCH_BIN}" --db "${db_path}" --json dispatch \ + --run "${run_id}" --task T1 --to worker-a \ + --body "Initial attempt expected to fail for retry validation." + + local thread_id_1 + thread_id_1="$(json_get "${case_dir}/dispatch.json" '.data.attempt.thread_id')" + + local wait_failed_pid + start_wait "${case_dir}/wait-task-failed.json" \ + "${ORCH_BIN}" --db "${db_path}" --json wait \ + --run "${run_id}" --for task_failed --timeout-seconds 15 + wait_failed_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/claim-1.json" \ + "${INBOX_BIN}" --db "${db_path}" --json claim \ + --agent worker-a --thread "${thread_id_1}" + + run_json "${case_dir}/fail.json" \ + "${INBOX_BIN}" --db "${db_path}" --json fail \ + --agent worker-a --thread "${thread_id_1}" \ + --summary "Build failed" --body "Simulated first-attempt failure." + + wait_for_pid "${wait_failed_pid}" "${case_slug}: wait task_failed" + + run_json "${case_dir}/reconcile-failed.json" \ + "${ORCH_BIN}" --db "${db_path}" --json reconcile --run "${run_id}" + + run_json "${case_dir}/retry.json" \ + "${ORCH_BIN}" --db "${db_path}" --json retry \ + --run "${run_id}" --task T1 --to worker-a \ + --body "Retry after fixing the failure." + + local thread_id_2 + thread_id_2="$(json_get "${case_dir}/retry.json" '.data.attempt.thread_id')" + + local wait_done_pid + start_wait "${case_dir}/wait-task-done.json" \ + "${ORCH_BIN}" --db "${db_path}" --json wait \ + --run "${run_id}" --for task_done --timeout-seconds 15 + wait_done_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/fetch-retry.json" \ + "${INBOX_BIN}" --db "${db_path}" --json fetch \ + --agent worker-a --status pending --limit 5 + + run_json "${case_dir}/claim-2.json" \ + "${INBOX_BIN}" --db "${db_path}" --json claim \ + --agent worker-a --thread "${thread_id_2}" + + run_json "${case_dir}/done-2.json" \ + "${INBOX_BIN}" --db "${db_path}" --json done \ + --agent worker-a --thread "${thread_id_2}" \ + --summary "Retry completed" --body "Second attempt succeeded." + + wait_for_pid "${wait_done_pid}" "${case_slug}: wait task_done" + + run_json "${case_dir}/reconcile-done.json" \ + "${ORCH_BIN}" --db "${db_path}" --json reconcile --run "${run_id}" + + run_json "${case_dir}/status.json" \ + "${ORCH_BIN}" --db "${db_path}" --json status --run "${run_id}" + + run_json "${case_dir}/show-1.json" \ + "${INBOX_BIN}" --db "${db_path}" --json show --thread "${thread_id_1}" + + run_json "${case_dir}/show-2.json" \ + "${INBOX_BIN}" --db "${db_path}" --json show --thread "${thread_id_2}" + + json_check "${case_dir}/wait-task-failed.json" '.data.woke == true and (.data.events | length) >= 1 and .data.events[0].type == "task_failed"' "wait woke on task_failed" + json_check "${case_dir}/retry.json" '.data.attempt.attempt_no == 2 and .data.task.status == "dispatched"' "retry creates second dispatched attempt" + json_check "${case_dir}/status.json" '.data.run.status == "done" and .data.tasks[0].status == "done"' "retry flow run completes" + json_check "${case_dir}/show-1.json" '.data.thread.status == "failed"' "first thread failed" + json_check "${case_dir}/show-2.json" '.data.thread.status == "done"' "second thread done" + if [ "${thread_id_1}" = "${thread_id_2}" ]; then + printf 'FAIL: retry reused thread ID %s\n' "${thread_id_1}" >&2 + exit 1 + fi + printf 'PASS: retry created distinct thread IDs\n' + + local duration_seconds + duration_seconds="$(( $(date +%s) - started_at ))" + write_result_json \ + "${case_dir}" "${case_slug}" "${db_path}" "${run_id}" pass "${duration_seconds}" \ + "$(join_json_array "${thread_id_1}" "${thread_id_2}")" \ + "$(join_json_array)" \ + "Direct CLI replay of failed attempt reconciliation followed by retry." +} + +run_case_reassign() { + local case_slug="leader-reassigns-blocked-task-through-bundled-cli" + local run_id="run_blog_skill_reassign_001" + local case_dir + case_dir="$(init_case_dir "${case_slug}")" + local db_path="${case_dir}/coord.db" + local started_at + started_at="$(date +%s)" + + init_db "${db_path}" + + run_json "${case_dir}/run.json" \ + "${ORCH_BIN}" --db "${db_path}" --json run init \ + --run "${run_id}" --goal "Validate reassign behavior" \ + --summary "Exercise blocked-task reassignment" + + run_json "${case_dir}/task.json" \ + "${ORCH_BIN}" --db "${db_path}" --json task add \ + --run "${run_id}" --task T1 --title "Implement backend" \ + --summary "Reassign after worker-a blocks" --default-to worker-a + + run_json "${case_dir}/dispatch.json" \ + "${ORCH_BIN}" --db "${db_path}" --json dispatch \ + --run "${run_id}" --task T1 --to worker-a \ + --body "Initial attempt expected to be reassigned." + + local thread_id_1 + thread_id_1="$(json_get "${case_dir}/dispatch.json" '.data.attempt.thread_id')" + + local wait_blocked_pid + start_wait "${case_dir}/wait-task-blocked.json" \ + "${ORCH_BIN}" --db "${db_path}" --json wait \ + --run "${run_id}" --for task_blocked --timeout-seconds 15 + wait_blocked_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/claim-1.json" \ + "${INBOX_BIN}" --db "${db_path}" --json claim \ + --agent worker-a --thread "${thread_id_1}" + + run_json "${case_dir}/blocked.json" \ + "${INBOX_BIN}" --db "${db_path}" --json update \ + --agent worker-a --thread "${thread_id_1}" \ + --status blocked --summary "Need product decision" \ + --payload-json '{"question":"Proceed with v1 scope?"}' + + wait_for_pid "${wait_blocked_pid}" "${case_slug}: wait task_blocked" + + run_json "${case_dir}/reconcile-blocked.json" \ + "${ORCH_BIN}" --db "${db_path}" --json reconcile --run "${run_id}" + + run_json "${case_dir}/orch-blocked.json" \ + "${ORCH_BIN}" --db "${db_path}" --json blocked --run "${run_id}" + + run_json "${case_dir}/reassign.json" \ + "${ORCH_BIN}" --db "${db_path}" --json reassign \ + --run "${run_id}" --task T1 --to worker-b \ + --reason "Try another worker with clearer ownership." + + local thread_id_2 + thread_id_2="$(json_get "${case_dir}/reassign.json" '.data.attempt.thread_id')" + + local wait_done_pid + start_wait "${case_dir}/wait-task-done.json" \ + "${ORCH_BIN}" --db "${db_path}" --json wait \ + --run "${run_id}" --for task_done --timeout-seconds 15 + wait_done_pid="${LAST_BG_PID}" + + sleep 0.2 + + run_json "${case_dir}/fetch-worker-b.json" \ + "${INBOX_BIN}" --db "${db_path}" --json fetch \ + --agent worker-b --status pending --limit 5 + + run_json "${case_dir}/claim-2.json" \ + "${INBOX_BIN}" --db "${db_path}" --json claim \ + --agent worker-b --thread "${thread_id_2}" + + run_json "${case_dir}/done-2.json" \ + "${INBOX_BIN}" --db "${db_path}" --json done \ + --agent worker-b --thread "${thread_id_2}" \ + --summary "Reassigned work complete" --body "Worker-b completed the reassigned attempt." + + wait_for_pid "${wait_done_pid}" "${case_slug}: wait task_done" + + run_json "${case_dir}/reconcile-done.json" \ + "${ORCH_BIN}" --db "${db_path}" --json reconcile --run "${run_id}" + + run_json "${case_dir}/status.json" \ + "${ORCH_BIN}" --db "${db_path}" --json status --run "${run_id}" + + run_json "${case_dir}/show-1.json" \ + "${INBOX_BIN}" --db "${db_path}" --json show --thread "${thread_id_1}" + + run_json "${case_dir}/show-2.json" \ + "${INBOX_BIN}" --db "${db_path}" --json show --thread "${thread_id_2}" + + json_check "${case_dir}/wait-task-blocked.json" '.data.woke == true and (.data.events | length) >= 1 and .data.events[0].type == "task_blocked"' "wait woke on blocked event for reassign" + json_check "${case_dir}/orch-blocked.json" '.data.blocked | length == 1 and .[0].task.task_id == "T1"' "blocked queue lists reassigned task" + json_check "${case_dir}/reassign.json" '.data.attempt.attempt_no == 2 and .data.attempt.assigned_to == "worker-b"' "reassign creates attempt 2 for worker-b" + json_check "${case_dir}/status.json" '.data.run.status == "done" and .data.tasks[0].status == "done"' "reassign flow run completes" + json_check "${case_dir}/show-1.json" '.data.thread.status == "cancelled"' "original thread cancelled" + json_check "${case_dir}/show-2.json" '.data.thread.status == "done"' "new thread done" + json_check "${case_dir}/show-1.json" 'any(.data.messages[]; .kind == "question" and .payload_json.question == "Proceed with v1 scope?")' "original blocked question preserved" + if [ "${thread_id_1}" = "${thread_id_2}" ]; then + printf 'FAIL: reassign reused thread ID %s\n' "${thread_id_1}" >&2 + exit 1 + fi + printf 'PASS: reassign created distinct thread IDs\n' + + local duration_seconds + duration_seconds="$(( $(date +%s) - started_at ))" + write_result_json \ + "${case_dir}" "${case_slug}" "${db_path}" "${run_id}" pass "${duration_seconds}" \ + "$(join_json_array "${thread_id_1}" "${thread_id_2}")" \ + "$(join_json_array)" \ + "Direct CLI replay of blocked-task reassignment from worker-a to worker-b." +} + +main() { + while [ "$#" -gt 0 ]; do + case "$1" in + --output-root) + OUTPUT_ROOT="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + printf 'unknown argument: %s\n' "$1" >&2 + usage >&2 + exit 1 + ;; + esac + done + + require_command jq + require_command git + require_command mktemp + + if [ ! -x "${ORCH_BIN}" ]; then + printf 'orch skill binary is not executable: %s\n' "${ORCH_BIN}" >&2 + exit 1 + fi + if [ ! -x "${INBOX_BIN}" ]; then + printf 'inbox skill binary is not executable: %s\n' "${INBOX_BIN}" >&2 + exit 1 + fi + + if [ -z "${OUTPUT_ROOT}" ]; then + OUTPUT_ROOT="$(mktemp -d "${TMPDIR:-/tmp}/orch-skill-forward.XXXXXX")" + else + mkdir -p "${OUTPUT_ROOT}" + fi + + run_case_happy_path + run_case_blocked_answer + run_case_strict_worktree_cleanup + run_case_retry + run_case_reassign + + printf '\nResults written to %s\n' "${OUTPUT_ROOT}" + printf 'case\tresult\n' + print_case_summary "leader-run-dispatch-reconcile-through-bundled-cli" "${OUTPUT_ROOT}/leader-run-dispatch-reconcile-through-bundled-cli" + print_case_summary "leader-blocked-answer-resume-through-bundled-cli" "${OUTPUT_ROOT}/leader-blocked-answer-resume-through-bundled-cli" + print_case_summary "strict-worktree-dispatch-to-cleanup-through-bundled-cli" "${OUTPUT_ROOT}/strict-worktree-dispatch-to-cleanup-through-bundled-cli" + print_case_summary "leader-retries-failed-task-through-bundled-cli" "${OUTPUT_ROOT}/leader-retries-failed-task-through-bundled-cli" + print_case_summary "leader-reassigns-blocked-task-through-bundled-cli" "${OUTPUT_ROOT}/leader-reassigns-blocked-task-through-bundled-cli" + +} + +main "$@"