From fd2b57feaf08b08e07549f84e8cba9a03851ab59 Mon Sep 17 00:00:00 2001
From: kurihada <kurihada@qq.com>
Date: Tue, 24 Mar 2026 02:30:38 +0800
Subject: [PATCH] Remove markdown test docs and document tests inline

---
 AGENTS.md                                     |  16 +-
 docs/skill-workspace-monorepo.md              |  39 +-
 docs/tests/council-review-skill/README.md     | 182 --------
 ...ainstorm-end-to-end-through-bundled-cli.md | 108 -----
 ...ejects-before-tally-through-bundled-cli.md |  73 ----
 ...ejects-invalid-show-through-bundled-cli.md | 102 -----
 ...l-includes-minority-through-bundled-cli.md | 107 -----
 ...id-json-fails-tally-through-bundled-cli.md | 126 ------
 ...rt-with-target-file-through-bundled-cli.md | 113 -----
 ...-distinct-proposals-through-bundled-cli.md | 120 ------
 ...only-default-report-through-bundled-cli.md |  88 ----
 ...ouncil-wait-timeout-through-bundled-cli.md |  77 ----
 docs/tests/inbox-skill/README.md              | 162 --------
 .../artifact-roundtrip-through-bundled-cli.md |  83 ----
 ...meout-without-reply-through-bundled-cli.md |  88 ----
 ...cels-claimed-thread-through-bundled-cli.md |  84 ----
 ...lti-agent-roundtrip-through-bundled-cli.md | 106 -----
 ...kers-claim-conflict-through-bundled-cli.md |  93 -----
 docs/tests/inbox/README.md                    |  71 ----
 docs/tests/inbox/ROADMAP.md                   | 364 ----------------
 docs/tests/inbox/_shared/README.md            | 130 ------
 docs/tests/inbox/cancel/README.md             |   9 -
 .../cancel/cancel-marks-thread-cancelled.md   |  29 --
 .../cancel-persists-reason-and-artifact.md    |  30 --
 .../cancel-rejects-when-thread-missing.md     |  27 --
 docs/tests/inbox/claim/README.md              |  10 -
 .../claim/claim-acquires-thread-lease.md      |  33 --
 .../claim-records-requested-lease-duration.md |  25 --
 ...aim-rejects-when-thread-already-claimed.md |  28 --
 .../claim-rejects-when-thread-missing.md      |  28 --
 docs/tests/inbox/done/README.md               |  10 -
 .../inbox/done/done-marks-thread-terminal.md  |  33 --
 .../done-persists-result-body-and-artifact.md |  34 --
 .../inbox/done/done-rejects-non-owner.md      |  25 --
 .../done/done-rejects-on-terminal-thread.md   |  25 --
 docs/tests/inbox/fail/README.md               |  10 -
 .../inbox/fail/fail-marks-thread-failed.md    |  33 --
 ...fail-persists-failure-body-and-artifact.md |  34 --
 .../inbox/fail/fail-rejects-non-owner.md      |  25 --
 .../fail/fail-rejects-on-terminal-thread.md   |  25 --
 docs/tests/inbox/fetch/README.md              |  10 -
 ...fetch-respects-status-and-limit-filters.md |  31 --
 ...tch-returns-no-matching-work-when-empty.md |  24 --
 ...returns-pending-thread-for-target-agent.md |  30 --
 .../fetch/fetch-unread-uses-read-cursor.md    |  34 --
 docs/tests/inbox/init/README.md               |   8 -
 .../init/init-creates-schema-on-empty-db.md   |  28 --
 .../init/init-is-idempotent-on-existing-db.md |  27 --
 docs/tests/inbox/list/README.md               |  10 -
 .../inbox/list/list-filters-by-assigned-to.md |  25 --
 .../inbox/list/list-filters-by-created-by.md  |  26 --
 .../inbox/list/list-filters-by-status.md      |  26 --
 docs/tests/inbox/list/list-respects-limit.md  |  26 --
 docs/tests/inbox/renew/README.md              |   9 -
 .../inbox/renew/renew-extends-active-lease.md |  33 --
 .../inbox/renew/renew-rejects-non-owner.md    |  24 --
 .../renew-rejects-without-active-lease.md     |  26 --
 docs/tests/inbox/reply/README.md              |  10 -
 .../inbox/reply/reply-adds-answer-message.md  |  34 --
 .../inbox/reply/reply-attaches-artifact.md    |  31 --
 .../reply-rejects-invalid-payload-json.md     |  25 --
 .../reply/reply-supports-control-kind.md      |  25 --
 docs/tests/inbox/send/README.md               |  12 -
 ...send-appends-message-to-existing-thread.md |  33 --
 .../send-attaches-artifact-with-metadata.md   |  27 --
 .../inbox/send/send-creates-new-thread.md     |  36 --
 .../send/send-reads-body-from-body-file.md    |  30 --
 ...-rejects-invalid-artifact-metadata-json.md |  24 --
 .../send/send-rejects-invalid-payload-json.md |  25 --
 docs/tests/inbox/show/README.md               |  10 -
 .../show-includes-artifacts-per-message.md    |  26 --
 .../show-mark-read-advances-read-cursor.md    |  27 --
 .../show/show-rejects-when-thread-missing.md  |  26 --
 ...show-returns-thread-and-message-history.md |  29 --
 docs/tests/inbox/update/README.md             |  11 -
 .../update-accepts-body-file-and-artifact.md  |  33 --
 ...te-moves-thread-to-blocked-with-payload.md |  27 --
 .../update-moves-thread-to-in-progress.md     |  34 --
 .../update-rejects-invalid-payload-json.md    |  25 --
 .../inbox/update/update-rejects-non-owner.md  |  25 --
 docs/tests/inbox/wait-reply/README.md         |   9 -
 .../wait-reply-can-start-from-after-event.md  |  29 --
 .../wait-reply-times-out-when-no-reply.md     |  28 --
 ...ait-reply-wakes-on-answer-after-message.md |  31 --
 docs/tests/inbox/watch/README.md              |   9 -
 .../watch/watch-respects-status-filter.md     |  29 --
 .../watch/watch-times-out-with-no-activity.md |  27 --
 .../watch/watch-wakes-on-matching-thread.md   |  30 --
 docs/tests/inbox/workflows/README.md          | 276 -------------
 docs/tests/orch-skill/README.md               | 210 ----------
 ...k-with-payload-json-through-bundled-cli.md | 107 -----
 ...ocked-answer-resume-through-bundled-cli.md | 116 ------
 ...cancels-active-task-through-bundled-cli.md | 105 -----
 ...nd-launches-worker-through-codex-bridge.md |  97 -----
 ...-after-prerequisite-through-bundled-cli.md | 115 ------
 ...ssigns-blocked-task-through-bundled-cli.md | 129 ------
 ...retries-failed-task-through-bundled-cli.md | 121 ------
 ...-dispatch-reconcile-through-bundled-cli.md | 116 ------
 ...ch-launches-worker-through-codex-bridge.md |  97 -----
 ...dispatch-to-cleanup-through-bundled-cli.md | 119 ------
 docs/tests/orch/README.md                     |  77 ----
 docs/tests/orch/ROADMAP.md                    | 389 ------------------
 docs/tests/orch/_shared/README.md             | 137 ------
 docs/tests/orch/answer/README.md              |   9 -
 ...nswer-accepts-payload-json-without-body.md |  37 --
 .../answer-appends-answer-to-active-thread.md |  36 --
 .../answer-rejects-empty-body-and-payload.md  |  29 --
 docs/tests/orch/blocked/README.md             |   7 -
 ...-lists-latest-question-for-blocked-task.md |  38 --
 docs/tests/orch/cancel/README.md              |   8 -
 .../orch/cancel/cancel-cancels-entire-run.md  |  30 --
 .../orch/cancel/cancel-cancels-single-task.md |  32 --
 docs/tests/orch/cleanup/README.md             |   9 -
 .../cleanup-rejects-attempt-without-task.md   |  29 --
 .../cleanup-removes-completed-worktree.md     |  37 --
 ...urns-no-matching-work-when-filters-miss.md |  33 --
 docs/tests/orch/council-report/README.md      |  12 -
 ...port-defaults-to-consensus-and-majority.md |  70 ----
 ...to-consensus-when-run-is-only-unanimous.md |  73 ----
 .../council-report-json-shape-is-stable.md    |  41 --
 .../council-report-rejects-before-tally.md    |  32 --
 .../council-report-rejects-invalid-show.md    |  29 --
 ...uncil-report-show-all-includes-minority.md |  29 --
 docs/tests/orch/council-start/README.md       |   7 -
 ...ouncil-start-dispatches-three-reviewers.md |  46 ---
 docs/tests/orch/council-tally/README.md       |   8 -
 ...groups-reviewer-findings-in-normal-mode.md |  67 ---
 ...keeps-distinct-proposals-in-strict-mode.md |  61 ---
 docs/tests/orch/council-wait/README.md        |   8 -
 ...ait-times-out-when-reviewers-incomplete.md |  35 --
 ...-wait-wakes-when-all-reviewers-complete.md |  53 ---
 docs/tests/orch/dep-add/README.md             |   7 -
 ...ndent-task-until-prerequisite-completes.md |  38 --
 docs/tests/orch/dispatch/README.md            |  13 -
 ...-allows-explicit-base-ref-on-dirty-repo.md |  31 --
 .../dispatch-analysis-mode-skips-worktree.md  |  31 --
 ...eates-attempt-and-thread-for-ready-task.md |  39 --
 .../dispatch-creates-strict-worktree.md       |  40 --
 ...tch-rejects-dirty-repo-without-base-ref.md |  30 --
 .../dispatch-rejects-non-ready-task.md        |  34 --
 ...spatch-requires-explicit-execution-mode.md |  29 --
 docs/tests/orch/ready/README.md               |   8 -
 .../ready/ready-lists-only-eligible-tasks.md  |  38 --
 ...y-orders-by-priority-and-respects-limit.md |  39 --
 docs/tests/orch/reassign/README.md            |   7 -
 ...s-old-thread-and-dispatches-new-attempt.md |  39 --
 docs/tests/orch/reconcile/README.md           |   9 -
 ...laimed-or-in-progress-thread-to-running.md |  34 --
 ...or-failed-thread-to-terminal-task-state.md |  43 --
 ...verifying-when-task-has-required-checks.md |  42 --
 docs/tests/orch/retry/README.md               |   7 -
 ...try-creates-new-attempt-for-failed-task.md |  40 --
 docs/tests/orch/run-init/README.md            |   7 -
 .../orch/run-init/run-init-creates-new-run.md |  34 --
 docs/tests/orch/run-show/README.md            |   7 -
 ...how-returns-run-summary-and-task-counts.md |  36 --
 docs/tests/orch/status/README.md              |   8 -
 ...reconciles-and-includes-blocked-context.md |  41 --
 ...tatus-returns-run-summary-and-task-list.md |  42 --
 docs/tests/orch/task-add/README.md            |  11 -
 .../task-add-creates-ready-root-task.md       |  36 --
 ...ask-add-rejects-invalid-acceptance-json.md |  27 --
 .../task-add-rejects-invalid-priority.md      |  27 --
 .../task-add-rejects-spec-sha-mismatch.md     |  34 --
 ...-snapshots-spec-and-verification-policy.md |  49 ---
 docs/tests/orch/verify/README.md              |   8 -
 ...rks-task-done-when-required-checks-pass.md |  35 --
 ...y-status-returns-spec-and-gate-for-task.md |  32 --
 docs/tests/orch/wait/README.md                |   8 -
 .../wait-times-out-without-matching-event.md  |  33 --
 .../wait/wait-wakes-on-matching-run-event.md  |  40 --
 docs/tests/orch/workflows/README.md           | 175 --------
 docs/tests/repo-memory-skill/README.md        | 135 ------
 .../ingest-and-search-through-bundled-cli.md  |  72 ----
 .../link-two-entries-through-bundled-cli.md   |  69 ----
 .../search-and-add-through-bundled-cli.md     |  70 ----
 ...e-after-file-change-through-bundled-cli.md |  71 ----
 ...ing-hard-dependency-through-bundled-cli.md |  70 ----
 docs/tests/repo-memory/README.md              |  94 -----
 docs/tests/repo-memory/ROADMAP.md             | 311 --------------
 docs/tests/repo-memory/_shared/README.md      | 137 ------
 docs/tests/repo-memory/add/README.md          |   9 -
 ...-failed-validation-still-registers-repo.md |  32 --
 .../add/add-registers-repo-and-entry.md       |  31 --
 ...tes-existing-entry-on-same-kind-and-key.md |  35 --
 docs/tests/repo-memory/events/README.md       |   9 -
 .../events/events-reads-history-by-id.md      |  28 --
 .../events-rejects-missing-entry-selector.md  |  25 --
 .../events-resolves-entry-by-repo-kind-key.md |  27 --
 docs/tests/repo-memory/ingest/README.md       |   9 -
 .../ingest/ingest-imports-docs-ai-markdown.md |  45 --
 ...ts-headingless-markdown-as-single-entry.md |  38 --
 .../ingest-rejects-when-no-markdown-found.md  |  27 --
 docs/tests/repo-memory/init/README.md         |   8 -
 .../init/init-creates-schema-on-empty-db.md   |  25 --
 .../init/init-is-idempotent-on-existing-db.md |  26 --
 docs/tests/repo-memory/link/README.md         |   9 -
 .../link-creates-relation-between-entries.md  |  28 --
 .../link/link-rejects-missing-relation.md     |  26 --
 .../link-rejects-when-entry-id-missing.md     |  26 --
 docs/tests/repo-memory/list/README.md         |   8 -
 .../list/list-filters-by-kind-and-status.md   |  34 --
 .../list-returns-no-entries-when-empty.md     |  25 --
 docs/tests/repo-memory/repos/README.md        |   8 -
 .../repos/repos-lists-tracked-repositories.md |  28 --
 .../repos/repos-prints-no-repos-when-empty.md |  25 --
 docs/tests/repo-memory/search/README.md       |  10 -
 .../search-matches-alias-with-repo-filter.md  |  34 --
 .../search/search-rejects-missing-query.md    |  25 --
 .../search-returns-matching-entry-snippet.md  |  31 --
 .../search-returns-no-results-when-empty.md   |  25 --
 docs/tests/repo-memory/verify/README.md       |  11 -
 ...rify-downgrades-changed-file-dependency.md |  36 --
 ...grades-entry-missing-verified-on-commit.md |  37 --
 ...ify-marks-missing-hard-dependency-stale.md |  36 --
 .../verify-prints-no-repos-when-empty.md      |  25 --
 ...fy-skips-explicit-repo-without-git-head.md |  28 --
 docs/tests/repo-memory/workflows/README.md    | 138 -------
 packages/coord-core/store/inbox_test.go       |   1 +
 packages/coord-core/store/orch_test.go        |   1 +
 .../cli/inbox/cancel_integration_test.go      |   4 +-
 .../cli/inbox/claim_integration_test.go       |   4 +
 .../cli/inbox/done_integration_test.go        |   4 +
 .../cli/inbox/fail_integration_test.go        |   4 +
 .../cli/inbox/fetch_integration_test.go       |   4 +
 .../internal/cli/inbox/help_contracts_test.go |   4 +
 .../cli/inbox/init_integration_test.go        |   2 +
 .../internal/cli/inbox/integration_test.go    |   6 +
 .../cli/inbox/list_integration_test.go        |   5 +-
 .../cli/inbox/renew_integration_test.go       |   3 +
 .../cli/inbox/reply_integration_test.go       |   4 +
 .../cli/inbox/send_integration_test.go        |   6 +
 .../cli/inbox/show_integration_test.go        |   5 +-
 .../cli/inbox/update_integration_test.go      |   5 +
 .../cli/inbox/wait_reply_integration_test.go  |   4 +-
 .../cli/inbox/watch_integration_test.go       |   4 +-
 .../internal/httpapi/router_test.go           |   4 +-
 .../cli/orch/command_contracts_core_test.go   |   7 +
 .../cli/orch/command_contracts_edges_test.go  |   4 +
 .../orch/command_contracts_remaining_test.go  |   8 +
 .../cli/orch/council_report_contracts_test.go |   3 +
 .../internal/cli/orch/help_contracts_test.go  |   6 +
 .../internal/cli/orch/integration_test.go     |  24 ++
 .../cmd/repo-memory/add_integration_test.go   |   3 +
 .../repo-memory/events_integration_test.go    |   3 +
 .../cmd/repo-memory/help_test.go              |   3 +
 .../repo-memory/ingest_integration_test.go    |   3 +
 .../cmd/repo-memory/init_integration_test.go  |   2 +
 .../cmd/repo-memory/link_integration_test.go  |   3 +
 .../cmd/repo-memory/list_integration_test.go  |   2 +
 .../cmd/repo-memory/main_test.go              |   2 +
 .../cmd/repo-memory/repos_integration_test.go |   2 +
 .../repo-memory/search_integration_test.go    |   4 +
 .../repo-memory/verify_integration_test.go    |   5 +
 .../repo-memory/workflow_integration_test.go  |   4 +
 .../internal/documents/load_test.go           |   1 +
 .../internal/store/store_test.go              |   3 +
 257 files changed, 174 insertions(+), 10431 deletions(-)
 delete mode 100644 docs/tests/council-review-skill/README.md
 delete mode 100644 docs/tests/council-review-skill/council-brainstorm-end-to-end-through-bundled-cli.md
 delete mode 100644 docs/tests/council-review-skill/council-report-rejects-before-tally-through-bundled-cli.md
 delete mode 100644 docs/tests/council-review-skill/council-report-rejects-invalid-show-through-bundled-cli.md
 delete mode 100644 docs/tests/council-review-skill/council-report-show-all-includes-minority-through-bundled-cli.md
 delete mode 100644 docs/tests/council-review-skill/council-reviewer-output-invalid-json-fails-tally-through-bundled-cli.md
 delete mode 100644 docs/tests/council-review-skill/council-start-with-target-file-through-bundled-cli.md
 delete mode 100644 docs/tests/council-review-skill/council-tally-strict-keeps-distinct-proposals-through-bundled-cli.md
 delete mode 100644 docs/tests/council-review-skill/council-unanimous-only-default-report-through-bundled-cli.md
 delete mode 100644 docs/tests/council-review-skill/council-wait-timeout-through-bundled-cli.md
 delete mode 100644 docs/tests/inbox-skill/README.md
 delete mode 100644 docs/tests/inbox-skill/artifact-roundtrip-through-bundled-cli.md
 delete mode 100644 docs/tests/inbox-skill/blocked-worker-timeout-without-reply-through-bundled-cli.md
 delete mode 100644 docs/tests/inbox-skill/leader-cancels-claimed-thread-through-bundled-cli.md
 delete mode 100644 docs/tests/inbox-skill/multi-agent-roundtrip-through-bundled-cli.md
 delete mode 100644 docs/tests/inbox-skill/parallel-workers-claim-conflict-through-bundled-cli.md
 delete mode 100644 docs/tests/inbox/README.md
 delete mode 100644 docs/tests/inbox/ROADMAP.md
 delete mode 100644 docs/tests/inbox/_shared/README.md
 delete mode 100644 docs/tests/inbox/cancel/README.md
 delete mode 100644 docs/tests/inbox/cancel/cancel-marks-thread-cancelled.md
 delete mode 100644 docs/tests/inbox/cancel/cancel-persists-reason-and-artifact.md
 delete mode 100644 docs/tests/inbox/cancel/cancel-rejects-when-thread-missing.md
 delete mode 100644 docs/tests/inbox/claim/README.md
 delete mode 100644 docs/tests/inbox/claim/claim-acquires-thread-lease.md
 delete mode 100644 docs/tests/inbox/claim/claim-records-requested-lease-duration.md
 delete mode 100644 docs/tests/inbox/claim/claim-rejects-when-thread-already-claimed.md
 delete mode 100644 docs/tests/inbox/claim/claim-rejects-when-thread-missing.md
 delete mode 100644 docs/tests/inbox/done/README.md
 delete mode 100644 docs/tests/inbox/done/done-marks-thread-terminal.md
 delete mode 100644 docs/tests/inbox/done/done-persists-result-body-and-artifact.md
 delete mode 100644 docs/tests/inbox/done/done-rejects-non-owner.md
 delete mode 100644 docs/tests/inbox/done/done-rejects-on-terminal-thread.md
 delete mode 100644 docs/tests/inbox/fail/README.md
 delete mode 100644 docs/tests/inbox/fail/fail-marks-thread-failed.md
 delete mode 100644 docs/tests/inbox/fail/fail-persists-failure-body-and-artifact.md
 delete mode 100644 docs/tests/inbox/fail/fail-rejects-non-owner.md
 delete mode 100644 docs/tests/inbox/fail/fail-rejects-on-terminal-thread.md
 delete mode 100644 docs/tests/inbox/fetch/README.md
 delete mode 100644 docs/tests/inbox/fetch/fetch-respects-status-and-limit-filters.md
 delete mode 100644 docs/tests/inbox/fetch/fetch-returns-no-matching-work-when-empty.md
 delete mode 100644 docs/tests/inbox/fetch/fetch-returns-pending-thread-for-target-agent.md
 delete mode 100644 docs/tests/inbox/fetch/fetch-unread-uses-read-cursor.md
 delete mode 100644 docs/tests/inbox/init/README.md
 delete mode 100644 docs/tests/inbox/init/init-creates-schema-on-empty-db.md
 delete mode 100644 docs/tests/inbox/init/init-is-idempotent-on-existing-db.md
 delete mode 100644 docs/tests/inbox/list/README.md
 delete mode 100644 docs/tests/inbox/list/list-filters-by-assigned-to.md
 delete mode 100644 docs/tests/inbox/list/list-filters-by-created-by.md
 delete mode 100644 docs/tests/inbox/list/list-filters-by-status.md
 delete mode 100644 docs/tests/inbox/list/list-respects-limit.md
 delete mode 100644 docs/tests/inbox/renew/README.md
 delete mode 100644 docs/tests/inbox/renew/renew-extends-active-lease.md
 delete mode 100644 docs/tests/inbox/renew/renew-rejects-non-owner.md
 delete mode 100644 docs/tests/inbox/renew/renew-rejects-without-active-lease.md
 delete mode 100644 docs/tests/inbox/reply/README.md
 delete mode 100644 docs/tests/inbox/reply/reply-adds-answer-message.md
 delete mode 100644 docs/tests/inbox/reply/reply-attaches-artifact.md
 delete mode 100644 docs/tests/inbox/reply/reply-rejects-invalid-payload-json.md
 delete mode 100644 docs/tests/inbox/reply/reply-supports-control-kind.md
 delete mode 100644 docs/tests/inbox/send/README.md
 delete mode 100644 docs/tests/inbox/send/send-appends-message-to-existing-thread.md
 delete mode 100644 docs/tests/inbox/send/send-attaches-artifact-with-metadata.md
 delete mode 100644 docs/tests/inbox/send/send-creates-new-thread.md
 delete mode 100644 docs/tests/inbox/send/send-reads-body-from-body-file.md
 delete mode 100644 docs/tests/inbox/send/send-rejects-invalid-artifact-metadata-json.md
 delete mode 100644 docs/tests/inbox/send/send-rejects-invalid-payload-json.md
 delete mode 100644 docs/tests/inbox/show/README.md
 delete mode 100644 docs/tests/inbox/show/show-includes-artifacts-per-message.md
 delete mode 100644 docs/tests/inbox/show/show-mark-read-advances-read-cursor.md
 delete mode 100644 docs/tests/inbox/show/show-rejects-when-thread-missing.md
 delete mode 100644 docs/tests/inbox/show/show-returns-thread-and-message-history.md
 delete mode 100644 docs/tests/inbox/update/README.md
 delete mode 100644 docs/tests/inbox/update/update-accepts-body-file-and-artifact.md
 delete mode 100644 docs/tests/inbox/update/update-moves-thread-to-blocked-with-payload.md
 delete mode 100644 docs/tests/inbox/update/update-moves-thread-to-in-progress.md
 delete mode 100644 docs/tests/inbox/update/update-rejects-invalid-payload-json.md
 delete mode 100644 docs/tests/inbox/update/update-rejects-non-owner.md
 delete mode 100644 docs/tests/inbox/wait-reply/README.md
 delete mode 100644 docs/tests/inbox/wait-reply/wait-reply-can-start-from-after-event.md
 delete mode 100644 docs/tests/inbox/wait-reply/wait-reply-times-out-when-no-reply.md
 delete mode 100644 docs/tests/inbox/wait-reply/wait-reply-wakes-on-answer-after-message.md
 delete mode 100644 docs/tests/inbox/watch/README.md
 delete mode 100644 docs/tests/inbox/watch/watch-respects-status-filter.md
 delete mode 100644 docs/tests/inbox/watch/watch-times-out-with-no-activity.md
 delete mode 100644 docs/tests/inbox/watch/watch-wakes-on-matching-thread.md
 delete mode 100644 docs/tests/inbox/workflows/README.md
 delete mode 100644 docs/tests/orch-skill/README.md
 delete mode 100644 docs/tests/orch-skill/leader-answers-blocked-task-with-payload-json-through-bundled-cli.md
 delete mode 100644 docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md
 delete mode 100644 docs/tests/orch-skill/leader-cancels-active-task-through-bundled-cli.md
 delete mode 100644 docs/tests/orch-skill/leader-dispatches-and-launches-worker-through-codex-bridge.md
 delete mode 100644 docs/tests/orch-skill/leader-dispatches-dependent-task-after-prerequisite-through-bundled-cli.md
 delete mode 100644 docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md
 delete mode 100644 docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md
 delete mode 100644 docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md
 delete mode 100644 docs/tests/orch-skill/strict-worktree-dispatch-launches-worker-through-codex-bridge.md
 delete mode 100644 docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md
 delete mode 100644 docs/tests/orch/README.md
 delete mode 100644 docs/tests/orch/ROADMAP.md
 delete mode 100644 docs/tests/orch/_shared/README.md
 delete mode 100644 docs/tests/orch/answer/README.md
 delete mode 100644 docs/tests/orch/answer/answer-accepts-payload-json-without-body.md
 delete mode 100644 docs/tests/orch/answer/answer-appends-answer-to-active-thread.md
 delete mode 100644 docs/tests/orch/answer/answer-rejects-empty-body-and-payload.md
 delete mode 100644 docs/tests/orch/blocked/README.md
 delete mode 100644 docs/tests/orch/blocked/blocked-lists-latest-question-for-blocked-task.md
 delete mode 100644 docs/tests/orch/cancel/README.md
 delete mode 100644 docs/tests/orch/cancel/cancel-cancels-entire-run.md
 delete mode 100644 docs/tests/orch/cancel/cancel-cancels-single-task.md
 delete mode 100644 docs/tests/orch/cleanup/README.md
 delete mode 100644 docs/tests/orch/cleanup/cleanup-rejects-attempt-without-task.md
 delete mode 100644 docs/tests/orch/cleanup/cleanup-removes-completed-worktree.md
 delete mode 100644 docs/tests/orch/cleanup/cleanup-returns-no-matching-work-when-filters-miss.md
 delete mode 100644 docs/tests/orch/council-report/README.md
 delete mode 100644 docs/tests/orch/council-report/council-report-defaults-to-consensus-and-majority.md
 delete mode 100644 docs/tests/orch/council-report/council-report-defaults-to-consensus-when-run-is-only-unanimous.md
 delete mode 100644 docs/tests/orch/council-report/council-report-json-shape-is-stable.md
 delete mode 100644 docs/tests/orch/council-report/council-report-rejects-before-tally.md
 delete mode 100644 docs/tests/orch/council-report/council-report-rejects-invalid-show.md
 delete mode 100644 docs/tests/orch/council-report/council-report-show-all-includes-minority.md
 delete mode 100644 docs/tests/orch/council-start/README.md
 delete mode 100644 docs/tests/orch/council-start/council-start-dispatches-three-reviewers.md
 delete mode 100644 docs/tests/orch/council-tally/README.md
 delete mode 100644 docs/tests/orch/council-tally/council-tally-groups-reviewer-findings-in-normal-mode.md
 delete mode 100644 docs/tests/orch/council-tally/council-tally-keeps-distinct-proposals-in-strict-mode.md
 delete mode 100644 docs/tests/orch/council-wait/README.md
 delete mode 100644 docs/tests/orch/council-wait/council-wait-times-out-when-reviewers-incomplete.md
 delete mode 100644 docs/tests/orch/council-wait/council-wait-wakes-when-all-reviewers-complete.md
 delete mode 100644 docs/tests/orch/dep-add/README.md
 delete mode 100644 docs/tests/orch/dep-add/dep-add-blocks-dependent-task-until-prerequisite-completes.md
 delete mode 100644 docs/tests/orch/dispatch/README.md
 delete mode 100644 docs/tests/orch/dispatch/dispatch-allows-explicit-base-ref-on-dirty-repo.md
 delete mode 100644 docs/tests/orch/dispatch/dispatch-analysis-mode-skips-worktree.md
 delete mode 100644 docs/tests/orch/dispatch/dispatch-creates-attempt-and-thread-for-ready-task.md
 delete mode 100644 docs/tests/orch/dispatch/dispatch-creates-strict-worktree.md
 delete mode 100644 docs/tests/orch/dispatch/dispatch-rejects-dirty-repo-without-base-ref.md
 delete mode 100644 docs/tests/orch/dispatch/dispatch-rejects-non-ready-task.md
 delete mode 100644 docs/tests/orch/dispatch/dispatch-requires-explicit-execution-mode.md
 delete mode 100644 docs/tests/orch/ready/README.md
 delete mode 100644 docs/tests/orch/ready/ready-lists-only-eligible-tasks.md
 delete mode 100644 docs/tests/orch/ready/ready-orders-by-priority-and-respects-limit.md
 delete mode 100644 docs/tests/orch/reassign/README.md
 delete mode 100644 docs/tests/orch/reassign/reassign-cancels-old-thread-and-dispatches-new-attempt.md
 delete mode 100644 docs/tests/orch/reconcile/README.md
 delete mode 100644 docs/tests/orch/reconcile/reconcile-maps-claimed-or-in-progress-thread-to-running.md
 delete mode 100644 docs/tests/orch/reconcile/reconcile-maps-done-or-failed-thread-to-terminal-task-state.md
 delete mode 100644 docs/tests/orch/reconcile/reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md
 delete mode 100644 docs/tests/orch/retry/README.md
 delete mode 100644 docs/tests/orch/retry/retry-creates-new-attempt-for-failed-task.md
 delete mode 100644 docs/tests/orch/run-init/README.md
 delete mode 100644 docs/tests/orch/run-init/run-init-creates-new-run.md
 delete mode 100644 docs/tests/orch/run-show/README.md
 delete mode 100644 docs/tests/orch/run-show/run-show-returns-run-summary-and-task-counts.md
 delete mode 100644 docs/tests/orch/status/README.md
 delete mode 100644 docs/tests/orch/status/status-auto-reconciles-and-includes-blocked-context.md
 delete mode 100644 docs/tests/orch/status/status-returns-run-summary-and-task-list.md
 delete mode 100644 docs/tests/orch/task-add/README.md
 delete mode 100644 docs/tests/orch/task-add/task-add-creates-ready-root-task.md
 delete mode 100644 docs/tests/orch/task-add/task-add-rejects-invalid-acceptance-json.md
 delete mode 100644 docs/tests/orch/task-add/task-add-rejects-invalid-priority.md
 delete mode 100644 docs/tests/orch/task-add/task-add-rejects-spec-sha-mismatch.md
 delete mode 100644 docs/tests/orch/task-add/task-add-snapshots-spec-and-verification-policy.md
 delete mode 100644 docs/tests/orch/verify/README.md
 delete mode 100644 docs/tests/orch/verify/verify-record-updates-gate-and-marks-task-done-when-required-checks-pass.md
 delete mode 100644 docs/tests/orch/verify/verify-status-returns-spec-and-gate-for-task.md
 delete mode 100644 docs/tests/orch/wait/README.md
 delete mode 100644 docs/tests/orch/wait/wait-times-out-without-matching-event.md
 delete mode 100644 docs/tests/orch/wait/wait-wakes-on-matching-run-event.md
 delete mode 100644 docs/tests/orch/workflows/README.md
 delete mode 100644 docs/tests/repo-memory-skill/README.md
 delete mode 100644 docs/tests/repo-memory-skill/ingest-and-search-through-bundled-cli.md
 delete mode 100644 docs/tests/repo-memory-skill/link-two-entries-through-bundled-cli.md
 delete mode 100644 docs/tests/repo-memory-skill/search-and-add-through-bundled-cli.md
 delete mode 100644 docs/tests/repo-memory-skill/verify-downgrade-after-file-change-through-bundled-cli.md
 delete mode 100644 docs/tests/repo-memory-skill/verify-stale-missing-hard-dependency-through-bundled-cli.md
 delete mode 100644 docs/tests/repo-memory/README.md
 delete mode 100644 docs/tests/repo-memory/ROADMAP.md
 delete mode 100644 docs/tests/repo-memory/_shared/README.md
 delete mode 100644 docs/tests/repo-memory/add/README.md
 delete mode 100644 docs/tests/repo-memory/add/add-failed-validation-still-registers-repo.md
 delete mode 100644 docs/tests/repo-memory/add/add-registers-repo-and-entry.md
 delete mode 100644 docs/tests/repo-memory/add/add-updates-existing-entry-on-same-kind-and-key.md
 delete mode 100644 docs/tests/repo-memory/events/README.md
 delete mode 100644 docs/tests/repo-memory/events/events-reads-history-by-id.md
 delete mode 100644 docs/tests/repo-memory/events/events-rejects-missing-entry-selector.md
 delete mode 100644 docs/tests/repo-memory/events/events-resolves-entry-by-repo-kind-key.md
 delete mode 100644 docs/tests/repo-memory/ingest/README.md
 delete mode 100644 docs/tests/repo-memory/ingest/ingest-imports-docs-ai-markdown.md
 delete mode 100644 docs/tests/repo-memory/ingest/ingest-imports-headingless-markdown-as-single-entry.md
 delete mode 100644 docs/tests/repo-memory/ingest/ingest-rejects-when-no-markdown-found.md
 delete mode 100644 docs/tests/repo-memory/init/README.md
 delete mode 100644 docs/tests/repo-memory/init/init-creates-schema-on-empty-db.md
 delete mode 100644 docs/tests/repo-memory/init/init-is-idempotent-on-existing-db.md
 delete mode 100644 docs/tests/repo-memory/link/README.md
 delete mode 100644 docs/tests/repo-memory/link/link-creates-relation-between-entries.md
 delete mode 100644 docs/tests/repo-memory/link/link-rejects-missing-relation.md
 delete mode 100644 docs/tests/repo-memory/link/link-rejects-when-entry-id-missing.md
 delete mode 100644 docs/tests/repo-memory/list/README.md
 delete mode 100644 docs/tests/repo-memory/list/list-filters-by-kind-and-status.md
 delete mode 100644 docs/tests/repo-memory/list/list-returns-no-entries-when-empty.md
 delete mode 100644 docs/tests/repo-memory/repos/README.md
 delete mode 100644 docs/tests/repo-memory/repos/repos-lists-tracked-repositories.md
 delete mode 100644 docs/tests/repo-memory/repos/repos-prints-no-repos-when-empty.md
 delete mode 100644 docs/tests/repo-memory/search/README.md
 delete mode 100644 docs/tests/repo-memory/search/search-matches-alias-with-repo-filter.md
 delete mode 100644 docs/tests/repo-memory/search/search-rejects-missing-query.md
 delete mode 100644 docs/tests/repo-memory/search/search-returns-matching-entry-snippet.md
 delete mode 100644 docs/tests/repo-memory/search/search-returns-no-results-when-empty.md
 delete mode 100644 docs/tests/repo-memory/verify/README.md
 delete mode 100644 docs/tests/repo-memory/verify/verify-downgrades-changed-file-dependency.md
 delete mode 100644 docs/tests/repo-memory/verify/verify-downgrades-entry-missing-verified-on-commit.md
 delete mode 100644 docs/tests/repo-memory/verify/verify-marks-missing-hard-dependency-stale.md
 delete mode 100644 docs/tests/repo-memory/verify/verify-prints-no-repos-when-empty.md
 delete mode 100644 docs/tests/repo-memory/verify/verify-skips-explicit-repo-without-git-head.md
 delete mode 100644 docs/tests/repo-memory/workflows/README.md

diff --git a/AGENTS.md b/AGENTS.md
index 429ea26..b23f5ed 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -9,7 +9,6 @@ This file applies to the entire repository.
 Before starting substantial work, read the source-of-truth docs that match the task:
 
 - implementation and repository-structure work: [docs/architecture.md](/home/kurihada/project/ai-workflow-skill/docs/architecture.md), [docs/skill-workspace-monorepo.md](/home/kurihada/project/ai-workflow-skill/docs/skill-workspace-monorepo.md)
-- inbox Markdown test-plan work: [docs/tests/inbox/ROADMAP.md](/home/kurihada/project/ai-workflow-skill/docs/tests/inbox/ROADMAP.md)
 
 ## Documentation Update Rule
 
@@ -20,18 +19,15 @@ Do not finish a task and leave its documentation stale.
 Required behavior:
 
 - if you complete or materially change implementation behavior, architecture, runtime ownership, or product structure, update the relevant source-of-truth docs in the same change
-- if you add, remove, or materially revise inbox Markdown test cases or test-plan documents, update [docs/tests/inbox/ROADMAP.md](/home/kurihada/project/ai-workflow-skill/docs/tests/inbox/ROADMAP.md) in the same change
-- when a test-plan document is created, update document progress
-- when a test case is written, update authored-case tracking and pending backlog
-- when a planned item is no longer needed, mark it as removed or deferred instead of silently dropping it
+- if you materially change automated test coverage or test structure, keep the relevant source files and repository docs consistent in the same change
 
-## Inbox Test-Plan Specific Rule
+## Test Documentation Rule
 
-For `docs/tests/inbox/`:
+For automated tests in this repository:
 
-- organize by folder plus `README.md`
-- do not use numeric test IDs
-- use stable case slugs and keep the roadmap synchronized with the actual files on disk
+- do not create or maintain standalone Markdown test-plan documents under `docs/tests/`
+- keep test intent close to the executable test by adding a short comment above each top-level `Test...` function
+- keep test comments concise and focused on the behavior or contract the test is protecting
 
 ## Project Skills
 
diff --git a/docs/skill-workspace-monorepo.md b/docs/skill-workspace-monorepo.md
index 72afdf7..f52ac12 100644
--- a/docs/skill-workspace-monorepo.md
+++ b/docs/skill-workspace-monorepo.md
@@ -96,15 +96,6 @@ Examples:
 │  ├─ package_skill_runtimes.sh
 │  ├─ skill-bundles.json
 │  └─ ...
-└─ docs/tests/
-   ├─ inbox/
-   ├─ orch/
-   ├─ repo-memory/
-   ├─ inbox-skill/
-   ├─ orch-skill/
-   ├─ council-review-skill/
-   ├─ repo-memory-skill/
-   └─ ...
 ```
 
 ## Package Boundaries
@@ -314,28 +305,16 @@ Each runtime package owns:
 - integration tests
 - package-local fixtures
 
-### CLI Markdown Test Plans
+### Test Intent Documentation
 
-Standalone CLIs with user-facing contracts should also keep a Markdown test-plan
-set under `docs/tests/<cli>/`.
+User-facing test intent should live with the executable tests, not in a separate
+Markdown plan tree.
 
-Examples:
+Required shape:
 
-- `docs/tests/inbox/`
-- `docs/tests/orch/`
-- `docs/tests/repo-memory/`
-
-### Skill Forward Tests
-
-`docs/tests/*-skill/` remains skill-oriented.
-These tests validate the bundled skill behavior, not only the runtime package.
-
-Examples:
-
-- `docs/tests/inbox-skill/`
-- `docs/tests/orch-skill/`
-- `docs/tests/council-review-skill/`
-- `docs/tests/repo-memory-skill/`
+- add a short comment above each top-level test describing the behavior it protects
+- prefer package-local fixtures and helpers over cross-repo prose test plans
+- keep bundled-skill verification as executable tests or scripts, not as standalone Markdown inventories
 
 ### Cross-Package Validation
 
@@ -351,7 +330,7 @@ Keep documentation split by concern:
 
 - runtime/package docs live under the owning package when tightly tied to implementation
 - cross-workspace architecture docs stay in root `docs/`
-- skill forward-test plans stay in `docs/tests/*-skill/`
+- test intent stays in executable test source through short comments above top-level test cases
 
 This document becomes the repository-level source of truth for the workspace
 split.
@@ -424,8 +403,6 @@ Changes:
 - move the exploratory repo-memory runtime into `packages/repo-memory-runtime`
 - normalize module pathing, tests, and packaging
 - add `skills/repo-memory`
-- add `docs/tests/repo-memory/`
-- add `docs/tests/repo-memory-skill/`
 
 Exit criteria:
 
diff --git a/docs/tests/council-review-skill/README.md b/docs/tests/council-review-skill/README.md
deleted file mode 100644
index 133ab5b..0000000
--- a/docs/tests/council-review-skill/README.md
+++ /dev/null
@@ -1,182 +0,0 @@
-# Council Review Skill Test Plan
-
-## Purpose
-
-This directory tracks human-readable test plans for the `skills/council-review/` Codex skill bundle.
-
-These documents are not command-contract specs for the `orch council` CLI itself.
-That coverage already lives under [../orch/](../orch/).
-
-This directory exists to describe a different test surface:
-
-- whether a leader agent can actually use the packaged `council-review` skill
-- whether the bundled `./assets/orch` CLI works inside real skill-guided council workflows
-- whether a council run driven by the skill reaches the expected reviewer, grouping, tally, and report state
-
-## Test Model
-
-- `README.md` is the index for this directory
-- each skill test case lives in its own Markdown file
-- use stable case slugs in filenames
-
-## Shared Execution Contract
-
-Use these defaults unless a case file explicitly overrides them:
-
-- run the scenario with real subagents, not simulated transcripts
-- inject `skills/council-review/` into the leader agent
-- inject `skills/inbox/` into reviewer agents whenever reviewer task completion is required
-- initialize the shared SQLite DB before launching role agents with `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-- require the leader to coordinate through the bundled `./assets/orch` CLI from the council-review skill instead of ordinary chat
-- require reviewer agents to coordinate through the bundled `./assets/inbox` CLI from their skill instead of ordinary chat
-- validate final council run, reviewer task state, and report state independently from the main thread after the agents stop
-- create any required target-file or repo fixture before launching agents for target-file, mixed, or repo-target cases
-
-## How An Agent Runs These Cases
-
-Use one test-runner agent to execute each case.
-
-The test-runner agent is responsible for:
-
-- reading this `README.md` first, then one specific case file
-- creating an isolated temporary directory and DB path for that run
-- initializing the DB once through the bundled inbox CLI before launching role agents
-- creating any required temporary target file or Git repo fixture before launching role agents
-- launching the role agents described in `Agent Topology`
-- injecting `skills/council-review/` into the leader and `skills/inbox/` into reviewers
-- passing each role agent the prompt text from the case file with concrete values substituted for `COUNCIL_SKILL_PATH`, `INBOX_SKILL_PATH`, `TMPDIR`, `RUN_ID`, `THREAD_ID`, and `REPORT_PATH` when needed
-- coordinating launch order or parallel start according to the case file
-- collecting agent final summaries as evidence
-- resolving final run ids, thread ids, and report artifact paths from agent outputs
-- running the `Validation Commands` from the main thread after the role agents stop
-- comparing the observed results against `Expected Outcomes` and `Assertions`
-- returning a final pass/fail judgment with concrete evidence
-
-The role agents are responsible for:
-
-- acting only within the role assigned in the case file
-- using the injected skill bundle rather than ad hoc repository discovery
-- coordinating through the bundled CLI and shared DB
-- reporting concrete run ids, thread ids, report artifact paths, and key command outcomes back to the test-runner agent
-
-The test-runner agent should treat a case as passed only when:
-
-- all role agents reach a final state without violating the case contract
-- the independent validation commands succeed
-- the final council, orch, and inbox state matches the assertions in the case file
-
-The test-runner agent should treat a case as failed when:
-
-- any required agent times out or stalls
-- a required council, orch, or inbox action is skipped
-- the leader falls back to ordinary chat for workflow control that should go through the bundled council-review skill
-- reviewer agents fall back to ordinary chat instead of returning results through inbox
-- the final council grouping, summary, or report state conflicts with the documented assertions
-
-The test-runner agent should report results in this shape:
-
-- `case`
-- `db_path`
-- `run_id`
-- `thread_ids`
-- `report_paths`
-- `result`: `pass` or `fail`
-- `agent_summaries`
-- `validation_evidence`
-- `assertion_checklist`
-- `notes`
-
-## Default Timeouts
-
-Use these defaults unless a case file explicitly overrides them:
-
-- per-agent timeout: `4m`
-- overall scenario timeout: `6m`
-- async wait margin for the main thread: `45s`
-
-## Default Failure Conditions
-
-Treat the test as failed if any of the following happens:
-
-- any required agent does not reach a final state before timeout
-- any required council, orch, or inbox command returns a non-success result unless the case expects that failure
-- the final `council report --json` output does not match the expected grouped recommendations
-- the final `orch status` output does not match the expected reviewer task state
-- a required markdown report artifact is missing when the case expects one
-- the agents fall back to ordinary chat for critical coordination instead of the bundled CLIs
-
-## Evidence Capture
-
-Collect at least the following artifacts for every run:
-
-- agent final summaries
-- final `council report --json` output when the case reaches report stage
-- final `orch status --run RUN_ID --json` output
-- final `inbox show --thread THREAD_ID --json` output for every relevant reviewer thread when reviewers participated
-- any `council wait` or `council tally` output relevant to the case
-- the temporary DB path, resolved run id, resolved thread ids, and any report artifact paths
-
-## Cleanup Policy
-
-Use these defaults unless a case file explicitly overrides them:
-
-- keep the temporary DB, repo fixture, and working directory on failure for debugging
-- cleanup the temporary working directory on success only if the caller does not need replay artifacts
-
-## Per-Case Template
-
-Each case file should use this structure:
-
-- `Test Type`
-- `Purpose`
-- `Preconditions`
-- `Agent Topology`
-- `Inputs`
-- `Execution Parameters`
-- `Execution Steps`
-- `Validation Commands`
-- `Expected Outcomes`
-- `Assertions`
-- `Cleanup`
-- `Recorded Example Run` when a real run has already been captured
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `council-brainstorm-end-to-end-through-bundled-cli` | [council-brainstorm-end-to-end-through-bundled-cli.md](./council-brainstorm-end-to-end-through-bundled-cli.md) | validates that the council-review skill can drive `start -> wait -> tally -> report` with three real reviewer agents |
-| `council-unanimous-only-default-report-through-bundled-cli` | [council-unanimous-only-default-report-through-bundled-cli.md](./council-unanimous-only-default-report-through-bundled-cli.md) | validates that unanimous-only runs default to `consensus` output while preserving the underlying summary counts |
-| `council-wait-timeout-through-bundled-cli` | [council-wait-timeout-through-bundled-cli.md](./council-wait-timeout-through-bundled-cli.md) | validates that the leader sees the expected timeout contract when reviewer tasks do not complete |
-| `council-report-rejects-before-tally-through-bundled-cli` | [council-report-rejects-before-tally-through-bundled-cli.md](./council-report-rejects-before-tally-through-bundled-cli.md) | validates that the skill surfaces the stable invalid-state error when report is attempted before tally |
-| `council-report-show-all-includes-minority-through-bundled-cli` | [council-report-show-all-includes-minority-through-bundled-cli.md](./council-report-show-all-includes-minority-through-bundled-cli.md) | validates that an explicit `--show all` report includes the otherwise hidden minority group |
-| `council-report-rejects-invalid-show-through-bundled-cli` | [council-report-rejects-invalid-show-through-bundled-cli.md](./council-report-rejects-invalid-show-through-bundled-cli.md) | validates that the leader sees the stable `invalid_input` contract for an invalid report bucket selection |
-| `council-tally-strict-keeps-distinct-proposals-through-bundled-cli` | [council-tally-strict-keeps-distinct-proposals-through-bundled-cli.md](./council-tally-strict-keeps-distinct-proposals-through-bundled-cli.md) | validates that strict similarity preserves near-duplicate wording as separate minority groups |
-| `council-reviewer-output-invalid-json-fails-tally-through-bundled-cli` | [council-reviewer-output-invalid-json-fails-tally-through-bundled-cli.md](./council-reviewer-output-invalid-json-fails-tally-through-bundled-cli.md) | validates that malformed reviewer result JSON reaches the leader as the stable tally-time `invalid_input` contract |
-| `council-start-with-target-file-through-bundled-cli` | [council-start-with-target-file-through-bundled-cli.md](./council-start-with-target-file-through-bundled-cli.md) | validates that the skill can start a council run from explicit `--target-file` context instead of a pure inline prompt |
-
-## Scope
-
-In scope:
-
-- explicit `$council-review` skill invocation
-- bundled `./assets/orch` CLI usage for `orch council ...`
-- end-to-end council start, wait, tally, and report flows
-- interaction between a leader using `skills/council-review/` and reviewers using `skills/inbox/`
-- default report policy, explicit minority inclusion, and invalid report-filter validation
-- normal and strict tally behavior
-- malformed reviewer-output failure paths
-- non-prompt target context including `target-file`
-
-Out of scope:
-
-- per-command flag and JSON contract coverage for `orch council`
-- generic leader orchestration flows that already belong under [../orch-skill/](../orch-skill/)
-- worker-only skill behavior that belongs under [../inbox-skill/](../inbox-skill/)
-- implicit skill triggering without `$council-review`
-
-## Relationship To Other Test Docs
-
-- [../orch/](../orch/) covers CLI command behavior
-- [../orch-skill/](../orch-skill/) covers generic leader-side orchestration behavior on top of `orch`
-- [../inbox-skill/](../inbox-skill/) covers worker-side skill-guided behavior on top of inbox
-- this directory covers the separate user-facing `council-review` skill on top of `orch council`
diff --git a/docs/tests/council-review-skill/council-brainstorm-end-to-end-through-bundled-cli.md b/docs/tests/council-review-skill/council-brainstorm-end-to-end-through-bundled-cli.md
deleted file mode 100644
index bb50073..0000000
--- a/docs/tests/council-review-skill/council-brainstorm-end-to-end-through-bundled-cli.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# Case: `council-brainstorm-end-to-end-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a high-level council workflow validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill can drive `council start -> wait -> tally -> report` while three real reviewer agents return structured outputs through the packaged inbox skill.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use the bundled `./assets/orch` CLI through the council-review skill
-- three reviewer agents can claim and complete their fixed-role inbox tasks
-- the leader can wait, tally, and report after all reviewer outputs arrive
-- the final report defaults to `consensus,majority`
-- a markdown report artifact is written
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `COUNCIL_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `architecture-reviewer`
-- `implementation-reviewer`
-- `risk-reviewer`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_001 with a short architecture review prompt, 2) wait until all three reviewers complete, 3) tally with normal similarity, 4) report with default settings, 5) stop after reporting RUN_ID and REPORT_PATH. Do not use ordinary chat to coordinate with the reviewers.
-```
-
-### Architecture Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as architecture-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim your assigned council task, 2) complete it with done using this exact JSON body: {"reviewer_role":"architecture-reviewer","findings":[{"title":"Split contracts","summary":"Transport contracts are mixed into UI code.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This lowers coupling.","confidence":"high","tags":["architecture"],"target_refs":{"repo_path":"."}},{"title":"Share helpers","summary":"Council report rendering paths are repeated.","proposal":"Introduce shared council coordinator helpers for report rendering.","rationale":"This keeps report assembly consistent.","confidence":"medium","tags":["reporting"],"target_refs":{"repo_path":"."}}]}, 3) stop after reporting THREAD_ID. Do not use ordinary chat to coordinate with the leader.
-```
-
-### Implementation Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as implementation-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim your assigned council task, 2) complete it with done using this exact JSON body: {"reviewer_role":"implementation-reviewer","findings":[{"title":"Extract contracts","summary":"Shared transport shapes are duplicated.","proposal":"Move API contract definitions into dedicated module","rationale":"This reduces duplication.","confidence":"high","tags":["maintainability"],"target_refs":{"repo_path":"."}},{"title":"Reuse report helpers","summary":"Formatting logic should stay shared.","proposal":"Introduce shared council coordinator helpers for report rendering","rationale":"This avoids formatter drift.","confidence":"medium","tags":["reporting"],"target_refs":{"repo_path":"."}}]}, 3) stop after reporting THREAD_ID. Do not use ordinary chat to coordinate with the leader.
-```
-
-### Risk Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as risk-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim your assigned council task, 2) complete it with done using this exact JSON body: {"reviewer_role":"risk-reviewer","findings":[{"title":"Lock contracts","summary":"Contract drift becomes risky over time.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This reduces integration regressions.","confidence":"high","tags":["risk"],"target_refs":{"repo_path":"."}},{"title":"Cover JSON output","summary":"The council report response should stay stable.","proposal":"Add regression tests for council report JSON output.","rationale":"This catches contract regressions earlier.","confidence":"high","tags":["testing"],"target_refs":{"repo_path":"."}}]}, 3) stop after reporting THREAD_ID. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/council-review/` into `leader`
-3. Inject `skills/inbox/` into the three reviewer agents
-4. Point all agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader`, `architecture-reviewer`, `implementation-reviewer`, and `risk-reviewer` in parallel
-6. Wait for all agents to finish
-7. Resolve `RUN_ID=council_skill_001`, reviewer `THREAD_ID`s, and `REPORT_PATH` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run council_skill_001
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council report --run council_skill_001
-test -f REPORT_PATH
-```
-
-## Expected Outcomes
-
-- the leader successfully starts `council_skill_001`
-- all three reviewers complete their fixed-role tasks
-- `council wait` returns `all_complete == true`
-- `council tally` returns one `consensus`, one `majority`, and one `minority`
-- `council report` defaults to showing `consensus,majority`
-- a markdown report artifact exists on disk
-
-## Assertions
-
-- `status.data.run.status == "done"`
-- `status.data.tasks` contains exactly three reviewer tasks and all are `done`
-- `report.data.show == ["consensus","majority"]`
-- `report.data.summary.consensus == 1`
-- `report.data.summary.majority == 1`
-- `report.data.summary.minority == 1`
-- `report.data.grouped_recommendations` length is `2`
-- `REPORT_PATH` exists
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
diff --git a/docs/tests/council-review-skill/council-report-rejects-before-tally-through-bundled-cli.md b/docs/tests/council-review-skill/council-report-rejects-before-tally-through-bundled-cli.md
deleted file mode 100644
index 0fdccbe..0000000
--- a/docs/tests/council-review-skill/council-report-rejects-before-tally-through-bundled-cli.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Case: `council-report-rejects-before-tally-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and an invalid-state council workflow validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill sees the expected stable error when report is attempted before grouped recommendations have been persisted.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can start a council run through the bundled council-review skill
-- the leader can attempt report without tally
-- the command returns the stable invalid-state contract rather than fabricating an empty report
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- bundled CLI executable exists at `COUNCIL_SKILL_PATH/assets/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_004 with a short review target, 2) attempt council report immediately without running tally, 3) stop after reporting RUN_ID, exit code, and error payload. Do not use ordinary chat to simulate reviewer output.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/council-review/` into `leader`
-3. Point the leader at the database path `TMPDIR/coord.db`
-4. Launch the leader
-5. Wait for the leader to finish
-6. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council report --run council_skill_004
-```
-
-## Expected Outcomes
-
-- the leader successfully starts `council_skill_004`
-- the report command exits with the stable invalid-state contract
-- the error message indicates that council tally must run first
-
-## Assertions
-
-- command exit code is `30`
-- error code is `invalid_state`
-- the error message mentions that grouped recommendations are not available yet or that `council tally` must run first
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
diff --git a/docs/tests/council-review-skill/council-report-rejects-invalid-show-through-bundled-cli.md b/docs/tests/council-review-skill/council-report-rejects-invalid-show-through-bundled-cli.md
deleted file mode 100644
index 65b2108..0000000
--- a/docs/tests/council-review-skill/council-report-rejects-invalid-show-through-bundled-cli.md
+++ /dev/null
@@ -1,102 +0,0 @@
-# Case: `council-report-rejects-invalid-show-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and an invalid-input report-filter validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill reaches the stable `invalid_input` error contract when it asks `council report` for an unsupported bucket list.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can drive a real council run through `start -> wait -> tally`
-- three reviewer agents can complete their tasks through the packaged inbox skill
-- the leader can attempt `council report --show consensus,invalid`
-- the skill surfaces the stable `invalid_input` error instead of silently dropping the bad bucket
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `COUNCIL_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `architecture-reviewer`
-- `implementation-reviewer`
-- `risk-reviewer`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_006 with a short architecture review prompt, 2) wait until all three reviewers complete, 3) tally with normal similarity, 4) attempt council report with --show consensus,invalid, 5) stop after reporting RUN_ID, exit code, and the error payload you observed. Do not use ordinary chat to coordinate with the reviewers.
-```
-
-### Reviewer Prompts
-
-- Reuse the same reviewer body JSON and inbox-only workflow as in [council-brainstorm-end-to-end-through-bundled-cli.md](./council-brainstorm-end-to-end-through-bundled-cli.md), but target run `council_skill_006`.
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/council-review/` into `leader`
-3. Inject `skills/inbox/` into the three reviewer agents
-4. Point all agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader`, `architecture-reviewer`, `implementation-reviewer`, and `risk-reviewer` in parallel
-6. Wait for all agents to finish
-7. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council report --run council_skill_006 --show consensus,invalid
-```
-
-## Expected Outcomes
-
-- the leader successfully starts `council_skill_006`
-- reviewer completion and tally both succeed before the invalid report attempt
-- the report command exits with the stable invalid-input contract
-- the error message names the accepted bucket values
-
-## Assertions
-
-- command exit code is `30`
-- error code is `invalid_input`
-- the error message mentions `consensus`
-- the error message mentions `majority`
-- the error message mentions `minority`
-- the error message mentions `all`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/council-skill-invalid-show-narrow.Sw6so6`
-- observed run id: `council_skill_006`
-- observed thread ids:
-- `architecture-reviewer`: `thr_7fad634dd9d245239d4fbd2287992d54`
-- `implementation-reviewer`: `thr_fc76cff125f04fc491064b828a18ff69`
-- `risk-reviewer`: `thr_f421bf49fa1240beb5c7a2d5f38aab6b`
-- evidence summary:
-- main-thread `status --run council_skill_006 --json` returned `run.status == "done"` and `task_counts.done == 3`
-- main-thread `council report --run council_skill_006 --show consensus,invalid --json` exited with code `30`
-- the returned error payload was `invalid_input` with message `show must contain consensus, majority, minority, or all`
diff --git a/docs/tests/council-review-skill/council-report-show-all-includes-minority-through-bundled-cli.md b/docs/tests/council-review-skill/council-report-show-all-includes-minority-through-bundled-cli.md
deleted file mode 100644
index 66d8a5d..0000000
--- a/docs/tests/council-review-skill/council-report-show-all-includes-minority-through-bundled-cli.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Case: `council-report-show-all-includes-minority-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and an explicit report-filter validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill can override the default report buckets and explicitly request the minority group through the bundled CLI.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can drive a complete `start -> wait -> tally -> report` council flow through the bundled council-review skill
-- three reviewer agents can complete their tasks through the packaged inbox skill
-- the leader can request `council report --show all`
-- the final report includes `consensus`, `majority`, and `minority`
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `COUNCIL_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `architecture-reviewer`
-- `implementation-reviewer`
-- `risk-reviewer`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_005 with a short architecture review prompt, 2) wait until all three reviewers complete, 3) tally with normal similarity, 4) report with --show all, 5) stop after reporting RUN_ID, REPORT_PATH, and the show buckets you observed. Do not use ordinary chat to coordinate with the reviewers.
-```
-
-### Reviewer Prompts
-
-- Reuse the same reviewer body JSON and inbox-only workflow as in [council-brainstorm-end-to-end-through-bundled-cli.md](./council-brainstorm-end-to-end-through-bundled-cli.md), but target run `council_skill_005`.
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/council-review/` into `leader`
-3. Inject `skills/inbox/` into the three reviewer agents
-4. Point all agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader`, `architecture-reviewer`, `implementation-reviewer`, and `risk-reviewer` in parallel
-6. Wait for all agents to finish
-7. Resolve `RUN_ID=council_skill_005` and `REPORT_PATH` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council report --run council_skill_005 --show all
-test -f REPORT_PATH
-```
-
-## Expected Outcomes
-
-- the leader successfully starts `council_skill_005`
-- all three reviewers complete their fixed-role tasks
-- the report succeeds with explicit `show == ["consensus","majority","minority"]`
-- the minority recommendation is present in `grouped_recommendations`
-- a markdown report artifact exists on disk
-
-## Assertions
-
-- `report.data.show == ["consensus","majority","minority"]`
-- `report.data.summary.consensus == 1`
-- `report.data.summary.majority == 1`
-- `report.data.summary.minority == 1`
-- `report.data.grouped_recommendations` length is `3`
-- at least one returned recommendation has `bucket == "minority"`
-- `REPORT_PATH` exists
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/council-skill-show-all-narrow.Uk0ThB`
-- observed run id: `council_skill_005`
-- observed thread ids:
-- `architecture-reviewer`: `thr_c4cb0a9a5dd142619e854fc0f3864ea8`
-- `implementation-reviewer`: `thr_3a54f2e1bc6945f38627958f7f6b4728`
-- `risk-reviewer`: `thr_16765453dedf45b4a6ccf4ecfab710db`
-- observed report path: `/tmp/council-skill-show-all-narrow.Uk0ThB/.orch/reports/council_skill_005.md`
-- evidence summary:
-- main-thread `status --run council_skill_005 --json` returned `run.status == "done"` and `task_counts.done == 3`
-- main-thread `council report --run council_skill_005 --show all --json` returned `show == ["consensus","majority","minority"]`, summary counts `1/1/1`, and `grouped_recommendations` length `3`
-- the returned groups included a `minority` bucket and the markdown artifact existed on disk
diff --git a/docs/tests/council-review-skill/council-reviewer-output-invalid-json-fails-tally-through-bundled-cli.md b/docs/tests/council-review-skill/council-reviewer-output-invalid-json-fails-tally-through-bundled-cli.md
deleted file mode 100644
index 51a0a7a..0000000
--- a/docs/tests/council-review-skill/council-reviewer-output-invalid-json-fails-tally-through-bundled-cli.md
+++ /dev/null
@@ -1,126 +0,0 @@
-# Case: `council-reviewer-output-invalid-json-fails-tally-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a malformed-reviewer-output validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill reaches the stable tally-time `invalid_input` contract when one reviewer completes its inbox task with malformed council JSON.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can start a real council run through the bundled council-review skill
-- all three reviewer tasks can still reach terminal `done` state through the packaged inbox skill
-- one reviewer can return malformed JSON in the result body
-- the leader sees `council tally` fail with the expected invalid-input error instead of a silent partial tally
-- malformed JSON is exercised as the most realistic representative of the same reviewer-output validation layer that also rejects missing `reviewer_role` and role mismatches
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `COUNCIL_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `architecture-reviewer`
-- `implementation-reviewer`
-- `risk-reviewer`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_008 with a short architecture review prompt, 2) wait until all three reviewers complete, 3) attempt council tally with normal similarity, 4) stop after reporting RUN_ID, exit code, and the error payload you observed. Do not use ordinary chat to coordinate with the reviewers.
-```
-
-### Architecture Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as architecture-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill.
-
-Workflow:
-1) fetch and claim your assigned council task
-2) write TMPDIR/architecture-invalid.json containing exactly this invalid JSON body:
-{"reviewer_role":"architecture-reviewer","findings":[{"title":"Split contracts","summary":"Transport contracts are mixed into UI code.","proposal":"Move API contract definitions into a dedicated module."}
-3) complete the task with done using summary "Review complete" and --body-file TMPDIR/architecture-invalid.json
-4) stop after reporting THREAD_ID and the body file path
-
-Do not use ordinary chat to coordinate with the leader.
-```
-
-### Implementation Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as implementation-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim your assigned council task, 2) complete it with done using this exact JSON body: {"reviewer_role":"implementation-reviewer","findings":[{"title":"Extract API contracts","summary":"Shared transport shapes are duplicated.","proposal":"Move API contract definitions into dedicated module","rationale":"This reduces duplication.","confidence":"medium","tags":["maintainability"],"target_refs":{"repo_path":"."}}]}, 3) stop after reporting THREAD_ID. Do not use ordinary chat to coordinate with the leader.
-```
-
-### Risk Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as risk-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim your assigned council task, 2) complete it with done using this exact JSON body: {"reviewer_role":"risk-reviewer","findings":[{"title":"Add auth integration tests","summary":"Login regressions are hard to catch.","proposal":"Add integration tests for auth flows.","rationale":"This catches regressions earlier.","confidence":"high","tags":["risk"],"target_refs":{"repo_path":"."}}]}, 3) stop after reporting THREAD_ID. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/council-review/` into `leader`
-3. Inject `skills/inbox/` into the three reviewer agents
-4. Point all agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader`, `architecture-reviewer`, `implementation-reviewer`, and `risk-reviewer` in parallel
-6. Wait for all agents to finish
-7. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council wait --run council_skill_008 --timeout-seconds 2
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council tally --run council_skill_008 --similarity normal
-```
-
-## Expected Outcomes
-
-- all three reviewer tasks still reach terminal `done`
-- `council wait` returns `all_complete == true`
-- `council tally` exits with the stable invalid-input contract
-- the error message indicates that reviewer output must be valid JSON
-
-## Assertions
-
-- `wait.data.all_complete == true`
-- command exit code for `council tally` is `30`
-- error code is `invalid_input`
-- the error message mentions `reviewer output must be valid JSON`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/council-reviewer-output-invalid-json-fails-tally-through-bundled-cli.narrow1.i6ZP98`
-- observed run id: `council_skill_008`
-- observed thread ids:
-- `architecture-reviewer`: `thr_350c43fdf8a449228b8611ce5114326d`
-- `implementation-reviewer`: `thr_db858b530cb044a7bceeaa417f1cea75`
-- `risk-reviewer`: `thr_1c93381b070c47c49e312039b8343655`
-- evidence summary:
-- main-thread `council wait --run council_skill_008 --timeout-seconds 2 --json` returned `woke == true` and `all_complete == true`
-- main-thread `council tally --run council_skill_008 --similarity normal --json` exited with code `30`
-- the returned error payload was `invalid_input` with message `reviewer output must be valid JSON`
-- this run confirmed the negative path where reviewer tasks are all `done` but tally still fails on stored reviewer-output validation
diff --git a/docs/tests/council-review-skill/council-start-with-target-file-through-bundled-cli.md b/docs/tests/council-review-skill/council-start-with-target-file-through-bundled-cli.md
deleted file mode 100644
index faf5a84..0000000
--- a/docs/tests/council-review-skill/council-start-with-target-file-through-bundled-cli.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# Case: `council-start-with-target-file-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a non-prompt target-context validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill can start a council run from explicit `--target-file` context instead of relying on a pure inline prompt.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the test runner can prepare a concrete brief file before launching the leader
-- the leader can start a council run through the bundled council-review skill using `--target-file`
-- the target-file path is persisted in council input metadata
-- reviewer tasks are still dispatched normally from the file-based target
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `COUNCIL_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- `sqlite3` is available locally for metadata validation
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching the leader through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-
-## Inputs
-
-### Target File Fixture
-
-Create `TMPDIR/brief.md` before launching the leader with contents similar to:
-
-```md
-# Brief
-
-Review the current council-review packaging flow.
-
-- Confirm the skill can carry file-based context.
-- Focus on documentation quality and report semantics.
-```
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_009 using --target-file TMPDIR/brief.md, --target-type mixed, and --mode review, 2) stop after reporting RUN_ID and the target metadata you observed from the start response. Do not use ordinary chat to simulate reviewer work.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Create `TMPDIR/brief.md` with the target file contents
-3. Inject `skills/council-review/` into `leader`
-4. Point the leader at the database path `TMPDIR/coord.db`
-5. Launch the leader
-6. Wait for the leader to finish
-7. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json run show --run council_skill_009
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run council_skill_009
-sqlite3 TMPDIR/coord.db "SELECT prompt, target_file, repo_path, target_task_id FROM council_inputs WHERE run_id = 'council_skill_009';"
-sqlite3 TMPDIR/coord.db "SELECT acceptance_json FROM tasks WHERE run_id = 'council_skill_009' AND task_id = 'CR1';"
-```
-
-## Expected Outcomes
-
-- the leader successfully starts `council_skill_009`
-- the run goal references the target file rather than an inline prompt
-- the stored council input row keeps `target_file == TMPDIR/brief.md`
-- reviewer task dispatch still produces the usual three council tasks
-- reviewer task acceptance metadata carries the `target_file` reference forward
-
-## Assertions
-
-- `run_show.data.run.goal` mentions `brief.md`
-- `status.data.tasks` length is `3`
-- `status.data.run.status` is not terminal
-- the `council_inputs` row has empty `prompt`, `repo_path`, and `target_task_id`
-- the `council_inputs` row has `target_file == "TMPDIR/brief.md"`
-- the `CR1` acceptance JSON contains `"target_file":"TMPDIR/brief.md"`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR`, `brief.md`, and `coord.db` for replay and manual inspection
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/council-skill-target-file.ikPOLP`
-- observed run id: `council_skill_009`
-- observed thread ids:
-- `CR1`: `thr_32df58f9b55945b899257f583708b7ef`
-- `CR2`: `thr_c5f8c552cb1240649546df8386be3668`
-- `CR3`: `thr_172eabff13eb48ed9af2deee928a9438`
-- evidence summary:
-- main-thread `status --run council_skill_009 --json` returned three `dispatched` council tasks and a non-terminal run
-- main-thread `sqlite3` validation showed `council_inputs.target_file == "/tmp/council-skill-target-file.ikPOLP/brief.md"` with empty `prompt`, `repo_path`, and `target_task_id`
-- main-thread `sqlite3` validation of `CR1` acceptance JSON showed the same `target_file` persisted into the council task payload
diff --git a/docs/tests/council-review-skill/council-tally-strict-keeps-distinct-proposals-through-bundled-cli.md b/docs/tests/council-review-skill/council-tally-strict-keeps-distinct-proposals-through-bundled-cli.md
deleted file mode 100644
index 683ae02..0000000
--- a/docs/tests/council-review-skill/council-tally-strict-keeps-distinct-proposals-through-bundled-cli.md
+++ /dev/null
@@ -1,120 +0,0 @@
-# Case: `council-tally-strict-keeps-distinct-proposals-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a strict-similarity tally validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill can request `--similarity strict` and preserve wording-level proposal differences that would normally collapse in `normal` mode.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can drive `start -> wait -> tally` through the bundled council-review skill
-- three reviewer agents can complete their tasks through the packaged inbox skill
-- the architecture and implementation reviewers can submit near-duplicate but not identical proposals
-- strict tally keeps all three proposals as separate minority groups
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `COUNCIL_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `architecture-reviewer`
-- `implementation-reviewer`
-- `risk-reviewer`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_007 with a short architecture review prompt, 2) wait until all three reviewers complete, 3) tally with --similarity strict, 4) stop after reporting RUN_ID, tally counts, and the grouped proposals you observed. Do not use ordinary chat to coordinate with the reviewers.
-```
-
-### Architecture Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as architecture-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim your assigned council task, 2) complete it with done using this exact JSON body: {"reviewer_role":"architecture-reviewer","findings":[{"title":"Split contracts","summary":"Transport contracts are mixed into UI code.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This lowers coupling.","confidence":"high","tags":["architecture"],"target_refs":{"repo_path":"."}}]}, 3) stop after reporting THREAD_ID. Do not use ordinary chat to coordinate with the leader.
-```
-
-### Implementation Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as implementation-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim your assigned council task, 2) complete it with done using this exact JSON body: {"reviewer_role":"implementation-reviewer","findings":[{"title":"Extract API contracts","summary":"Shared transport shapes are duplicated.","proposal":"Move API contract definitions into dedicated module","rationale":"This reduces duplication.","confidence":"medium","tags":["maintainability"],"target_refs":{"repo_path":"."}}]}, 3) stop after reporting THREAD_ID. Do not use ordinary chat to coordinate with the leader.
-```
-
-### Risk Reviewer Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as risk-reviewer on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim your assigned council task, 2) complete it with done using this exact JSON body: {"reviewer_role":"risk-reviewer","findings":[{"title":"Add auth integration tests","summary":"Login regressions are hard to catch.","proposal":"Add integration tests for auth flows.","rationale":"This catches regressions earlier.","confidence":"high","tags":["risk"],"target_refs":{"repo_path":"."}}]}, 3) stop after reporting THREAD_ID. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/council-review/` into `leader`
-3. Inject `skills/inbox/` into the three reviewer agents
-4. Point all agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader`, `architecture-reviewer`, `implementation-reviewer`, and `risk-reviewer` in parallel
-6. Wait for all agents to finish
-7. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council wait --run council_skill_007 --timeout-seconds 2
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council tally --run council_skill_007 --similarity strict
-```
-
-## Expected Outcomes
-
-- all three reviewers complete their fixed-role tasks
-- `council wait` returns `all_complete == true`
-- `council tally` succeeds with `similarity == "strict"`
-- the two nearly identical contract proposals remain separate rather than merging
-- every resulting recommendation lands in `minority`
-
-## Assertions
-
-- `wait.data.all_complete == true`
-- `tally.data.similarity == "strict"`
-- `tally.data.counts.minority == 3`
-- `tally.data.grouped_recommendations` length is `3`
-- every returned recommendation has `bucket == "minority"`
-- the returned proposal set contains `Move API contract definitions into a dedicated module.`
-- the returned proposal set contains `Move API contract definitions into dedicated module`
-- the returned proposal set contains `Add integration tests for auth flows.`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/council-tally-strict-keeps-distinct-proposals-through-bundled-cli.narrow4.UCbqOc`
-- observed run id: `council_skill_007`
-- observed thread ids:
-- `architecture-reviewer`: `thr_9e153f61692b4475a55f5c3068842ea5`
-- `implementation-reviewer`: `thr_abbd9a2961374b13b3d3e27720fe27ab`
-- `risk-reviewer`: `thr_3f2d64211f274f64b606bd8b8c6be5f7`
-- evidence summary:
-- main-thread `council wait --run council_skill_007 --timeout-seconds 2 --json` returned `woke == true` and `all_complete == true`
-- main-thread `council tally --run council_skill_007 --similarity strict --json` returned `similarity == "strict"` and `counts.minority == 3`
-- the returned proposal set preserved all three distinct values, including both `Move API contract definitions into a dedicated module.` and `Move API contract definitions into dedicated module`
diff --git a/docs/tests/council-review-skill/council-unanimous-only-default-report-through-bundled-cli.md b/docs/tests/council-review-skill/council-unanimous-only-default-report-through-bundled-cli.md
deleted file mode 100644
index 31408a7..0000000
--- a/docs/tests/council-review-skill/council-unanimous-only-default-report-through-bundled-cli.md
+++ /dev/null
@@ -1,88 +0,0 @@
-# Case: `council-unanimous-only-default-report-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a unanimous-only reporting validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill can run a unanimous-only council and observe the expected default report behavior after tally.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can start a council run with `--only-unanimous`
-- three reviewer agents can complete their tasks through the packaged inbox skill
-- the leader can tally and report through the bundled council-review skill
-- the final report defaults to `consensus` only while preserving the full summary counts
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `COUNCIL_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `architecture-reviewer`
-- `implementation-reviewer`
-- `risk-reviewer`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_002 with --only-unanimous, 2) wait until all three reviewers complete, 3) tally with normal similarity, 4) report with default settings, 5) stop after reporting RUN_ID and the default show buckets you observed. Do not use ordinary chat to coordinate with the reviewers.
-```
-
-### Reviewer Prompts
-
-- Reuse the same reviewer body JSON and inbox-only workflow as in [council-brainstorm-end-to-end-through-bundled-cli.md](./council-brainstorm-end-to-end-through-bundled-cli.md), but target run `council_skill_002`.
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/council-review/` into `leader`
-3. Inject `skills/inbox/` into the three reviewer agents
-4. Point all agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader`, `architecture-reviewer`, `implementation-reviewer`, and `risk-reviewer` in parallel
-6. Wait for all agents to finish
-7. Resolve `RUN_ID=council_skill_002` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council report --run council_skill_002
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run council_skill_002
-```
-
-## Expected Outcomes
-
-- the unanimous-only run completes successfully
-- the report default `show` value is only `consensus`
-- the underlying summary still contains `consensus`, `majority`, and `minority` counts
-- only the consensus group is returned in `grouped_recommendations`
-
-## Assertions
-
-- `report.data.show == ["consensus"]`
-- `report.data.summary.consensus == 1`
-- `report.data.summary.majority == 1`
-- `report.data.summary.minority == 1`
-- `report.data.grouped_recommendations` length is `1`
-- the sole returned recommendation has `bucket == "consensus"`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
diff --git a/docs/tests/council-review-skill/council-wait-timeout-through-bundled-cli.md b/docs/tests/council-review-skill/council-wait-timeout-through-bundled-cli.md
deleted file mode 100644
index 4c7752a..0000000
--- a/docs/tests/council-review-skill/council-wait-timeout-through-bundled-cli.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# Case: `council-wait-timeout-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a timeout-path council workflow validation.
-
-The goal is to verify that a leader using the packaged `council-review` skill sees the expected timeout contract when reviewer tasks do not complete.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can start a council run through the bundled skill CLI
-- the leader can call `council wait` with a short timeout
-- the command reports `woke == false` and `all_complete == false`
-- reviewer task metadata remains visible for later follow-up
-
-## Preconditions
-
-- council-review skill path exists: `COUNCIL_SKILL_PATH=skills/council-review`
-- bundled CLI executable exists at `COUNCIL_SKILL_PATH/assets/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $council-review at COUNCIL_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) start council run council_skill_003 with a short review target, 2) immediately call council wait with a short timeout such as 1 second, 3) stop after reporting RUN_ID and the wait result you observed. Do not use ordinary chat to simulate reviewer output.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- override the council wait timeout to a short interval such as `1s`
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/council-review/` into `leader`
-3. Point the leader at the database path `TMPDIR/coord.db`
-4. Launch the leader
-5. Wait for the leader to finish
-6. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json council wait --run council_skill_003 --timeout-seconds 1
-COUNCIL_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run council_skill_003
-```
-
-## Expected Outcomes
-
-- the leader successfully starts `council_skill_003`
-- `council wait` times out cleanly
-- the wait response still includes three reviewer statuses
-- the run remains non-terminal because reviewers have not completed
-
-## Assertions
-
-- `wait.data.woke == false`
-- `wait.data.all_complete == false`
-- `wait.data.reviewers` length is `3`
-- `status.data.run.status` is not `done`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
diff --git a/docs/tests/inbox-skill/README.md b/docs/tests/inbox-skill/README.md
deleted file mode 100644
index ac7fed8..0000000
--- a/docs/tests/inbox-skill/README.md
+++ /dev/null
@@ -1,162 +0,0 @@
-# Inbox Skill Test Plan
-
-## Purpose
-
-This directory tracks human-readable test plans for the `skills/inbox/` Codex skill bundle.
-
-These documents are not command-contract specs for the `inbox` CLI itself.
-That coverage already lives under [../inbox/](../inbox/).
-
-This directory exists to describe a different test surface:
-
-- whether an agent can actually use the packaged inbox skill
-- whether multiple agents can coordinate through the bundled CLI asset
-- whether a real skill-guided conversation reaches the expected inbox state
-
-## Test Model
-
-- `README.md` is the index for this directory
-- each skill test case lives in its own Markdown file
-- use stable case slugs in filenames
-
-## Shared Execution Contract
-
-Use these defaults unless a case file explicitly overrides them:
-
-- run the scenario with real subagents, not simulated transcripts
-- inject the same skill bundle into every participating agent
-- launch all role agents in parallel when the scenario depends on agent-to-agent timing
-- require every agent to coordinate through the bundled CLI and shared SQLite DB instead of ordinary chat
-- validate the final inbox state independently from the main thread after the agents stop
-
-## How An Agent Runs These Cases
-
-Use one test-runner agent to execute each case.
-
-The test-runner agent is responsible for:
-
-- reading this `README.md` first, then one specific case file
-- creating an isolated temporary directory and SQLite DB path for that run
-- launching the role agents described in `Agent Topology`
-- injecting the same `skills/inbox/` bundle into every role agent
-- passing each role agent the prompt text from the case file with concrete values substituted for `SKILL_PATH`, `TMPDIR`, and `THREAD_ID` when needed
-- coordinating launch order or parallel start according to the case file
-- collecting agent final summaries as evidence
-- resolving the final `THREAD_ID`
-- running the `Validation Commands` from the main thread after the role agents stop
-- comparing the observed results against `Expected Outcomes` and `Assertions`
-- returning a final pass/fail judgment with concrete evidence
-
-The role agents are responsible for:
-
-- acting only within the role assigned in the case file
-- using the injected inbox skill rather than ad hoc repository discovery
-- coordinating through the bundled CLI and shared DB
-- reporting the concrete thread id, key command outcomes, and final observed state back to the test-runner agent
-
-The test-runner agent should treat a case as passed only when:
-
-- all role agents reach a final state without violating the case contract
-- the independent validation commands succeed
-- the final inbox state matches the assertions in the case file
-
-The test-runner agent should treat a case as failed when:
-
-- any role agent times out or stalls
-- a required inbox action is skipped
-- a role agent falls back to ordinary chat for critical coordination
-- the final inbox state conflicts with the documented assertions
-
-The test-runner agent should report results in this shape:
-
-- `case`
-- `db_path`
-- `thread_id`
-- `result`: `pass` or `fail`
-- `agent_summaries`
-- `validation_evidence`
-- `assertion_checklist`
-- `notes`
-
-## Default Timeouts
-
-Use these defaults unless a case file explicitly overrides them:
-
-- per-agent timeout: `3m`
-- overall scenario timeout: `5m`
-- async wait margin for the main thread: `30s`
-
-## Default Failure Conditions
-
-Treat the test as failed if any of the following happens:
-
-- any required agent does not reach a final state before timeout
-- any required inbox command returns a non-success result unless the case expects that failure
-- the final `show` output does not match the expected thread state
-- the expected message sequence or key message bodies do not appear
-- the agents fall back to ordinary chat for critical coordination instead of inbox messages
-
-## Evidence Capture
-
-Collect at least the following artifacts for every run:
-
-- agent final summaries
-- final `show --thread THREAD_ID --json` output
-- at least one independent listing or lookup command such as `list` or `fetch`
-- the temporary DB path and resolved thread id
-
-## Cleanup Policy
-
-Use these defaults unless a case file explicitly overrides them:
-
-- keep the temporary DB and working directory on failure for debugging
-- cleanup the temporary DB and working directory on success only if the caller does not need replay artifacts
-
-## Per-Case Template
-
-Each case file should use this structure:
-
-- `Test Type`
-- `Purpose`
-- `Preconditions`
-- `Agent Topology`
-- `Inputs`
-- `Execution Parameters`
-- `Execution Steps`
-- `Validation Commands`
-- `Expected Outcomes`
-- `Assertions`
-- `Cleanup`
-- `Recorded Example Run` when a real run has already been captured
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `multi-agent-roundtrip-through-bundled-cli` | [multi-agent-roundtrip-through-bundled-cli.md](./multi-agent-roundtrip-through-bundled-cli.md) | validates that two agents can use the bundled inbox skill to complete a blocked question and done result roundtrip |
-| `parallel-workers-claim-conflict-through-bundled-cli` | [parallel-workers-claim-conflict-through-bundled-cli.md](./parallel-workers-claim-conflict-through-bundled-cli.md) | validates that two workers using the skill observe a real `lease_conflict` on the same thread |
-| `blocked-worker-timeout-without-reply-through-bundled-cli` | [blocked-worker-timeout-without-reply-through-bundled-cli.md](./blocked-worker-timeout-without-reply-through-bundled-cli.md) | validates that a blocked worker using the skill receives the expected `wait-reply` timeout outcome when no leader reply arrives |
-| `leader-cancels-claimed-thread-through-bundled-cli` | [leader-cancels-claimed-thread-through-bundled-cli.md](./leader-cancels-claimed-thread-through-bundled-cli.md) | validates that a leader can cancel an actively claimed thread and that both agents observe the cancelled terminal state |
-| `artifact-roundtrip-through-bundled-cli` | [artifact-roundtrip-through-bundled-cli.md](./artifact-roundtrip-through-bundled-cli.md) | validates that bundled CLI usage through the skill preserves body-file and artifact data across task and result messages |
-
-## Scope
-
-In scope:
-
-- explicit `$inbox` skill invocation
-- bundled `./assets/inbox` CLI usage
-- shared SQLite DB coordination between multiple agents
-- end-to-end thread state and message history validation
-- negative-path skill scenarios such as lease conflicts and reply timeouts
-- skill-guided artifact and body-file roundtrips
-
-Out of scope:
-
-- per-command flag and JSON contract coverage
-- store-level race conditions
-- implicit skill triggering without `$inbox`
-
-## Relationship To Other Test Docs
-
-- [../inbox/](../inbox/) covers CLI command behavior
-- this directory covers skill-guided multi-agent behavior on top of that CLI
diff --git a/docs/tests/inbox-skill/artifact-roundtrip-through-bundled-cli.md b/docs/tests/inbox-skill/artifact-roundtrip-through-bundled-cli.md
deleted file mode 100644
index 51292c2..0000000
--- a/docs/tests/inbox-skill/artifact-roundtrip-through-bundled-cli.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# Case: `artifact-roundtrip-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and an artifact-preservation validation.
-
-The goal is to verify that agents using the packaged inbox skill can exchange body-file content and artifacts through the bundled CLI without losing message data.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can create task input files and send them through the bundled CLI
-- the worker can inspect those artifacts through inbox history
-- the worker can return a final result using body-file or artifact inputs
-- the final thread history preserves both task-side and result-side file references
-
-## Preconditions
-
-- skill path exists: `SKILL_PATH=skills/inbox`
-- bundled CLI executable exists: `SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- test database path is `TMPDIR/coord.db`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as leader on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) initialize the DB, 2) create a small task file under TMPDIR, 3) send one task to worker-a using body-file plus at least one artifact and artifact metadata, 4) wait until worker-a marks the thread done, 5) inspect the final thread with show, 6) stop. Do not use ordinary chat to coordinate with the other agent.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim the task, 2) inspect the task message with show and confirm the artifact is visible, 3) create a small result file under TMPDIR, 4) finish the thread with done using body-file or artifact input, 5) stop after reporting what files were preserved. Do not use ordinary chat to coordinate with the other agent.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Inject the same `skills/inbox/` skill into both real agents
-2. Point both agents at the same database path `TMPDIR/coord.db`
-3. Launch `leader` and `worker-a` in parallel
-4. Wait for both agents to finish
-5. Resolve `THREAD_ID` from the agent outputs or inbox history
-6. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## Expected Outcomes
-
-- `leader` successfully creates a task file and sends it through `body-file`
-- the initial task message contains at least one artifact reference
-- `worker-a` successfully inspects the task artifact through `show`
-- `worker-a` completes the thread with `done`
-- the final `show` output preserves task-side and result-side file content or artifact references
-
-## Assertions
-
-- the first task message contains non-empty body content sourced from a file
-- the first task message contains at least one artifact entry
-- the final `result` message contains either body-file content or at least one artifact entry
-- the final thread status is `done`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR`, created files, and `coord.db` for replay and manual inspection
diff --git a/docs/tests/inbox-skill/blocked-worker-timeout-without-reply-through-bundled-cli.md b/docs/tests/inbox-skill/blocked-worker-timeout-without-reply-through-bundled-cli.md
deleted file mode 100644
index 1ab1bed..0000000
--- a/docs/tests/inbox-skill/blocked-worker-timeout-without-reply-through-bundled-cli.md
+++ /dev/null
@@ -1,88 +0,0 @@
-# Case: `blocked-worker-timeout-without-reply-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a timeout-path skill validation.
-
-The goal is to verify that a blocked worker using the bundled inbox skill sees the correct `wait-reply` timeout behavior when no answer arrives.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- a worker can use the skill to fetch, claim, and block a real thread
-- the worker can call `wait-reply` through the bundled CLI
-- the leader intentionally does not answer
-- the worker receives the expected timeout contract instead of silently succeeding
-- the thread remains in a blocked state with the question preserved
-
-## Preconditions
-
-- skill path exists: `SKILL_PATH=skills/inbox`
-- bundled CLI executable exists: `SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- test database path is `TMPDIR/coord.db`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as leader on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) initialize the DB, 2) send exactly one task to worker-a, 3) monitor until worker-a asks one blocked question, 4) intentionally do not reply, 5) stop after confirming the thread is still blocked. Do not use ordinary chat to coordinate with the other agent.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch pending work, 2) claim it, 3) send a blocked update with one precise question, 4) call wait-reply with a short timeout, 5) stop after reporting the timeout result exactly as observed. Do not use ordinary chat to coordinate with the other agent.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- override the worker-side wait timeout to a short interval such as `10s`
-- keep the default cleanup policy
-
-## Execution Steps
-
-1. Inject the same `skills/inbox/` skill into both real agents
-2. Point both agents at the same database path `TMPDIR/coord.db`
-3. Launch `leader` and `worker-a` in parallel
-4. Wait for both agents to finish
-5. Resolve `THREAD_ID` from the agent outputs or inbox history
-6. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json list --status blocked
-```
-
-## Expected Outcomes
-
-- `leader` successfully creates one thread for `worker-a`
-- `worker-a` successfully fetches and claims it
-- `worker-a` emits one blocked `question`
-- the blocked question is preserved at least in `message.payload_json.question`
-- `worker-a` runs `wait-reply` and receives the no-match timeout contract
-- the leader emits no `answer` message
-- the final thread status remains `blocked`
-
-## Assertions
-
-- the worker reports exit code `10` and JSON error code `no_matching_work` from `wait-reply`
-- `show` includes the blocked `question` message
-- `show.data.messages[*].payload_json.question` contains `Should logging go to stdout or stderr?`
-- `show` does not include any `answer` message
-- `list --status blocked` returns the thread
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
diff --git a/docs/tests/inbox-skill/leader-cancels-claimed-thread-through-bundled-cli.md b/docs/tests/inbox-skill/leader-cancels-claimed-thread-through-bundled-cli.md
deleted file mode 100644
index f4c7c28..0000000
--- a/docs/tests/inbox-skill/leader-cancels-claimed-thread-through-bundled-cli.md
+++ /dev/null
@@ -1,84 +0,0 @@
-# Case: `leader-cancels-claimed-thread-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a terminal-state intervention validation.
-
-The goal is to verify that a leader and worker can both observe a thread transition to `cancelled` through the bundled inbox skill while the thread is actively claimed.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the worker can fetch and claim a real thread through the skill
-- the leader can cancel that thread through the same bundled CLI
-- the final thread state is `cancelled`
-- both parties can inspect the terminal state from inbox history
-
-## Preconditions
-
-- skill path exists: `SKILL_PATH=skills/inbox`
-- bundled CLI executable exists: `SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- test database path is `TMPDIR/coord.db`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as leader on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) initialize the DB, 2) send exactly one task to worker-a, 3) wait until worker-a has claimed the thread or reported in_progress, 4) cancel the thread with a clear reason, 5) inspect the final thread with show, 6) stop. Do not use ordinary chat to coordinate with the other agent.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch pending work, 2) claim it, 3) send an in_progress update, 4) keep monitoring the thread until it reaches a terminal state, 5) stop after reporting the final status you observed. Do not use ordinary chat to coordinate with the other agent.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Inject the same `skills/inbox/` skill into both real agents
-2. Point both agents at the same database path `TMPDIR/coord.db`
-3. Launch `leader` and `worker-a` in parallel
-4. Wait for both agents to finish
-5. Resolve `THREAD_ID` from the agent outputs or inbox history
-6. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json list --status cancelled
-```
-
-## Expected Outcomes
-
-- `worker-a` successfully claims the thread
-- `worker-a` emits one `progress` message
-- `leader` successfully emits `cancel` with a reason
-- the final thread status is `cancelled`
-- the worker reports that it observed the cancelled terminal state
-
-## Assertions
-
-- `show` contains at least `task -> event -> progress -> control`
-- the final thread status is `cancelled`
-- the terminal message or thread history captures the cancel reason
-- `list --status cancelled` returns the thread
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
diff --git a/docs/tests/inbox-skill/multi-agent-roundtrip-through-bundled-cli.md b/docs/tests/inbox-skill/multi-agent-roundtrip-through-bundled-cli.md
deleted file mode 100644
index f8bb904..0000000
--- a/docs/tests/inbox-skill/multi-agent-roundtrip-through-bundled-cli.md
+++ /dev/null
@@ -1,106 +0,0 @@
-# Case: `multi-agent-roundtrip-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a multi-agent end-to-end skill validation.
-
-The goal is not to validate one CLI subcommand in isolation. The goal is to validate that two real agents can complete a closed-loop coordination flow through the packaged `skills/inbox/` skill and bundled CLI.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- both agents can explicitly use `$inbox`
-- both agents coordinate through the bundled `./assets/inbox` against the same SQLite DB
-- the worker follows the protocol `fetch -> claim -> update -> wait-reply -> done`
-- the leader follows the protocol `init -> send -> show/reply -> show`
-- the final inbox thread state and message history match the expected contract
-
-## Preconditions
-
-- skill path exists: `SKILL_PATH=skills/inbox`
-- bundled CLI executable exists: `SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- test database path is `TMPDIR/coord.db`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as leader on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) initialize the DB, 2) send exactly one task to worker-a asking them to implement a small logging choice, 3) monitor the thread until worker-a asks one blocked question, 4) answer the blocked question with a clear decision ('use stdout'), 5) wait until worker-a marks the thread done, 6) inspect the final thread with show, then stop. Do not use ordinary chat to coordinate with the other agent.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) wait until there is pending work for worker-a, 2) fetch it, 3) claim it, 4) send an in_progress update, 5) send a blocked update with one precise question asking whether logging should go to stdout or stderr, 6) wait for a reply, 7) finish the task with done using the received decision, 8) stop. Do not use ordinary chat to coordinate with the other agent.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Inject the same `skills/inbox/` skill into both real agents
-2. Point both agents at the same database path `TMPDIR/coord.db`
-3. Launch `leader` and `worker-a` in parallel
-4. Wait for both agents to finish
-5. Resolve `THREAD_ID` from the agent outputs or inbox history
-6. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json list --assigned-to worker-a
-```
-
-## Expected Outcomes
-
-- `leader` successfully runs `init`
-- `leader` successfully `send`s one new thread to `worker-a`
-- `worker-a` successfully `fetch`es that thread and successfully `claim`s it
-- `worker-a` emits one `progress` message
-- `worker-a` emits one `question` message focused on `stdout` vs `stderr`
-- `leader` successfully emits one `answer` message with the explicit decision `Use stdout.`
-- `worker-a` successfully consumes that answer through `wait-reply`
-- `worker-a` successfully emits `done`
-- `show` returns `thread.status == "done"`
-
-## Assertions
-
-- `show` contains at least the following message kinds in order:
-  - `task`
-  - `event` (`thread claimed`)
-  - `progress`
-  - `question`
-  - `answer`
-  - `result`
-- `question.body == "Should logging go to stdout or stderr?"`
-- `answer.body == "Use stdout."`
-- the final `result` message explicitly states that logging uses `stdout`
-- `list --assigned-to worker-a` shows the thread and its status is `done`
-- coordination happens primarily through the inbox thread rather than ordinary chat
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-This case already has one reference forward-test run:
-
-- DB: `/tmp/inbox-skill-fwd.j9kKvp/coord.db`
-- Thread: `thr_48d6f6a77eff4c2e88ce80e8fdc05da3`
-
-That run passed. The thread history contained `task -> event -> progress -> question -> answer -> result`, and the final thread state was `done`.
diff --git a/docs/tests/inbox-skill/parallel-workers-claim-conflict-through-bundled-cli.md b/docs/tests/inbox-skill/parallel-workers-claim-conflict-through-bundled-cli.md
deleted file mode 100644
index 2474cb1..0000000
--- a/docs/tests/inbox-skill/parallel-workers-claim-conflict-through-bundled-cli.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# Case: `parallel-workers-claim-conflict-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a multi-agent negative-path validation.
-
-The goal is to verify that two workers using the same bundled inbox skill can exercise a real claim conflict through the SQLite-backed inbox instead of simulating the outcome.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- multiple workers can use the same `skills/inbox/` bundle against one shared DB
-- one worker can successfully claim the thread
-- a competing worker can observe and attempt to claim that same thread
-- the competing worker receives the expected `lease_conflict` contract
-- the thread remains owned by the original worker
-
-## Preconditions
-
-- skill path exists: `SKILL_PATH=skills/inbox`
-- bundled CLI executable exists: `SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- test database path is `TMPDIR/coord.db`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-- `worker-b`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as leader on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) initialize the DB, 2) send exactly one task assigned to worker-a, 3) stop after confirming the thread exists and report the thread id. Do not use ordinary chat to coordinate with the workers.
-```
-
-### Worker A Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) wait for pending work assigned to worker-a, 2) fetch it, 3) claim it, 4) stop after confirming the claim succeeded and report the thread id and lease result. Do not use ordinary chat to coordinate with the other agents.
-```
-
-### Worker B Prompt
-
-```text
-Use $inbox at SKILL_PATH to act as worker-b on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. This is a conflict test. Workflow: 1) wait until there is a thread assigned to worker-a visible through inbox inspection, 2) resolve its thread id, 3) attempt to claim that thread as worker-b, 4) stop after reporting the exact error contract you observed. Do not use ordinary chat to coordinate with the other agents.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Inject the same `skills/inbox/` skill into all three real agents
-2. Point all three agents at the same database path `TMPDIR/coord.db`
-3. Launch `leader`, `worker-a`, and `worker-b` in parallel
-4. Wait for all agents to finish
-5. Resolve `THREAD_ID` from the agent outputs or inbox history
-6. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json list --assigned-to worker-a
-```
-
-## Expected Outcomes
-
-- `leader` successfully runs `init`
-- `leader` successfully creates one thread for `worker-a`
-- `worker-a` successfully `claim`s that thread
-- `worker-b` attempts `claim --agent worker-b --thread THREAD_ID`
-- `worker-b` receives exit code `20` and JSON error code `lease_conflict`
-- the final thread remains assigned to `worker-a`
-
-## Assertions
-
-- `show` contains a worker-side `event` message with summary `thread claimed`
-- the final thread status is still `claimed` or `in_progress`, not transferred to `worker-b`
-- `list --assigned-to worker-a` still returns the thread
-- no agent reports successful ownership transfer to `worker-b`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
diff --git a/docs/tests/inbox/README.md b/docs/tests/inbox/README.md
deleted file mode 100644
index da44949..0000000
--- a/docs/tests/inbox/README.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# Inbox Markdown Test Plan
-
-## Purpose
-
-This directory contains the human-readable Markdown test plan for the `inbox` CLI.
-
-It complements automated Go tests. The goal is not to restate implementation details, but to preserve the user-visible CLI contract in a form that can be reviewed, extended, and executed manually when needed.
-
-## Directory Rules
-
-- one folder per command or shared area
-- each folder keeps a `README.md` entrypoint
-- command folders use `README.md` as an index only
-- each command test case lives in its own Markdown file named after the case slug
-- no numeric test IDs
-- each command case is identified by its concrete file path
-
-Case file naming pattern:
-
-```text
-<case-slug>.md
-```
-
-## Authoring Principles
-
-- focus on externally visible behavior of the CLI
-- prefer stable command examples that a new agent can replay against a temp database
-- describe both success shape and failure contract
-- when a case already exists in automated Go tests, reuse its scenario rather than inventing a new one
-- keep terminology consistent with command flags and JSON fields exposed by the CLI
-
-## Common Execution Model
-
-Most cases in this directory assume the same baseline:
-
-1. create an isolated temporary directory
-2. choose a database path such as `TMPDIR/coord.db`
-3. run `inbox --db TMPDIR/coord.db --json init`
-4. run the target command sequence against that database
-
-Unless a case says otherwise:
-
-- commands should use `--json`
-- assertions should check both exit code and JSON payload
-- examples may use explicit `--agent`, or rely on the root `--agent` flag when that is the behavior under test
-
-## Folder Map
-
-- `README.md`: global conventions and glossary
-- `_shared/README.md`: reusable fixtures, JSON assertions, exit codes, payload rules
-- `workflows/README.md`: cross-command end-to-end scenarios
-- per-command folders: command-specific index `README.md` files plus one case document per test case
-
-## Glossary
-
-- `thread`: the durable coordination unit tracked by `thread_id`
-- `message`: an event-bearing entry appended to a thread
-- `artifact`: a file attachment associated with a message
-- `read cursor`: the per-agent marker used by unread flows
-- `lease`: the temporary ownership granted by `claim` and extended by `renew`
-- `terminal state`: a thread state such as `done`, `failed`, or `cancelled`
-
-## Relationship To Automated Tests
-
-The current best executable reference is [integration_test.go](../../../packages/inbox-runtime/internal/cli/inbox/integration_test.go).
-
-When this Markdown plan is expanded:
-
-- prefer matching an existing automated scenario first
-- record any additional manual-only contract coverage explicitly in the relevant command case file and keep the folder index synchronized
-- keep `docs/tests/inbox/ROADMAP.md` synchronized with authored files and case slugs
diff --git a/docs/tests/inbox/ROADMAP.md b/docs/tests/inbox/ROADMAP.md
deleted file mode 100644
index 5f2cda0..0000000
--- a/docs/tests/inbox/ROADMAP.md
+++ /dev/null
@@ -1,364 +0,0 @@
-# Inbox Test Documentation Roadmap
-
-## Purpose
-
-This roadmap tracks the human-readable Markdown test plan for `inbox`.
-
-It exists so a new agent can immediately answer four questions without re-reading the whole codebase:
-
-- which test-plan documents already exist
-- which cases have already been written down
-- which cases are still missing
-- what file should be updated next
-
-This roadmap is for the Markdown test-plan set under `docs/tests/inbox/`.
-It is not a replacement for automated Go tests.
-
-## Current Snapshot
-
-Snapshot date:
-
-- `2026-03-19`
-
-Current state:
-
-- `inbox` CLI is implemented end-to-end
-- automated Go integration tests already exist for the main lifecycle, wait flows, unread behavior, artifacts, and JSON error contracts
-- this roadmap now exists under `docs/tests/inbox/ROADMAP.md`
-- all planned global, shared, workflow, command-index, and command-case Markdown documents have been authored
-- command-level documents have been audited once per command against current CLI and store behavior, with edge-contract notes added for defaults, fallbacks, and error boundaries where needed
-- every inbox command folder now uses `README.md` as an index plus one Markdown file per case
-
-Progress summary for planned test-plan documents, excluding `ROADMAP.md`:
-
-- planned document files: `70`
-- authored document files: `70`
-- planned case slugs in this roadmap: `61`
-- authored case slugs in this roadmap: `61`
-
-## Scope
-
-In scope:
-
-- `inbox init`
-- `inbox send`
-- `inbox fetch`
-- `inbox claim`
-- `inbox renew`
-- `inbox update`
-- `inbox reply`
-- `inbox done`
-- `inbox fail`
-- `inbox cancel`
-- `inbox list`
-- `inbox show`
-- `inbox watch`
-- `inbox wait-reply`
-- cross-command workflows
-- shared test conventions for JSON output, exit codes, fixtures, and assertions
-
-Out of scope:
-
-- `orch`
-- `council-review`
-- implementation details that are not visible through the CLI contract
-
-## Tracking Rules
-
-Directory model:
-
-- one folder per command or shared area
-- each folder keeps a `README.md` entrypoint
-- command folders use `README.md` as an index only
-- each command case lives in its own Markdown file named after the case slug
-- cross-command workflow cases remain grouped in `docs/tests/inbox/workflows/README.md`
-
-Case identity:
-
-- do not use numeric IDs
-- identify each command case by its concrete file path
-- identify each workflow case by `path + case slug`
-- command case file naming pattern:
-
-```text
-<case-slug>.md
-```
-
-- workflow case heading pattern:
-
-```md
-## case: send-rejects-invalid-payload-json
-```
-
-Per-case structure inside the case document:
-
-- `用例意义`
-- `前置条件`
-- `输入`
-- `预期输出`
-- `断言结论`
-
-How to update this roadmap when a new case is written:
-
-1. if it is a command case, create or update the target `<case-slug>.md` file under the relevant command folder
-2. if it is a command case, add or update the entry in that folder `README.md` index
-3. if it is a workflow case, add or update the case inside `docs/tests/inbox/workflows/README.md`
-4. move the case slug from `Pending Case Backlog` to `Authored Case Register`
-5. update the authored counts in `Current Snapshot`
-6. if a new Markdown file is created, update `Document Progress`
-
-Allowed status values in this roadmap:
-
-- `pending`
-- `in_progress`
-- `done`
-- `deferred`
-
-## Existing Automated Coverage Reference
-
-The Markdown test-plan set starts at zero, but these automated tests already exist and should be used as source material when writing the docs:
-
-- [integration_test.go](../../../packages/inbox-runtime/internal/cli/inbox/integration_test.go#L12) `TestInboxLifecycle`
-- [integration_test.go](../../../packages/inbox-runtime/internal/cli/inbox/integration_test.go#L176) `TestInboxFailLifecycle`
-- [integration_test.go](../../../packages/inbox-runtime/internal/cli/inbox/integration_test.go#L243) `TestInboxRenewWaitReplyAndCancel`
-- [integration_test.go](../../../packages/inbox-runtime/internal/cli/inbox/integration_test.go#L392) `TestInboxWatchListUnreadAndAppend`
-- [integration_test.go](../../../packages/inbox-runtime/internal/cli/inbox/integration_test.go#L549) `TestInboxUnreadReadCursor`
-- [integration_test.go](../../../packages/inbox-runtime/internal/cli/inbox/integration_test.go#L639) `TestInboxJSONErrorsAndExitCodes`
-
-These tests do not remove the need for the Markdown plan. They only reduce discovery work.
-
-## Planned Directory Tree
-
-```text
-docs/tests/inbox/
-  ROADMAP.md
-  README.md
-  _shared/
-    README.md
-  workflows/
-    README.md
-  init/
-    README.md
-    <case-slug>.md
-  send/
-    README.md
-    <case-slug>.md
-  fetch/
-    README.md
-    <case-slug>.md
-  claim/
-    README.md
-    <case-slug>.md
-  renew/
-    README.md
-    <case-slug>.md
-  update/
-    README.md
-    <case-slug>.md
-  reply/
-    README.md
-    <case-slug>.md
-  done/
-    README.md
-    <case-slug>.md
-  fail/
-    README.md
-    <case-slug>.md
-  cancel/
-    README.md
-    <case-slug>.md
-  list/
-    README.md
-    <case-slug>.md
-  show/
-    README.md
-    <case-slug>.md
-  watch/
-    README.md
-    <case-slug>.md
-  wait-reply/
-    README.md
-    <case-slug>.md
-```
-
-## Document Progress
-
-| Path | Purpose | Planned Cases | Authored Cases | Status |
-| --- | --- | ---: | ---: | --- |
-| `docs/tests/inbox/README.md` | Global testing conventions and glossary | 0 | 0 | done |
-| `docs/tests/inbox/_shared/README.md` | Shared fixtures, JSON assertions, exit-code rules | 0 | 0 | done |
-| `docs/tests/inbox/workflows/README.md` | Cross-command scenarios | 8 | 8 | done |
-| `docs/tests/inbox/init/README.md` | `init` command case index | 0 | 0 | done |
-| `docs/tests/inbox/init/init-creates-schema-on-empty-db.md` | `init` command case | 1 | 1 | done |
-| `docs/tests/inbox/init/init-is-idempotent-on-existing-db.md` | `init` command case | 1 | 1 | done |
-| `docs/tests/inbox/send/README.md` | `send` command case index | 0 | 0 | done |
-| `docs/tests/inbox/send/send-creates-new-thread.md` | `send` command case | 1 | 1 | done |
-| `docs/tests/inbox/send/send-appends-message-to-existing-thread.md` | `send` command case | 1 | 1 | done |
-| `docs/tests/inbox/send/send-reads-body-from-body-file.md` | `send` command case | 1 | 1 | done |
-| `docs/tests/inbox/send/send-attaches-artifact-with-metadata.md` | `send` command case | 1 | 1 | done |
-| `docs/tests/inbox/send/send-rejects-invalid-payload-json.md` | `send` command case | 1 | 1 | done |
-| `docs/tests/inbox/send/send-rejects-invalid-artifact-metadata-json.md` | `send` command case | 1 | 1 | done |
-| `docs/tests/inbox/fetch/README.md` | `fetch` command case index | 0 | 0 | done |
-| `docs/tests/inbox/fetch/fetch-returns-pending-thread-for-target-agent.md` | `fetch` command case | 1 | 1 | done |
-| `docs/tests/inbox/fetch/fetch-respects-status-and-limit-filters.md` | `fetch` command case | 1 | 1 | done |
-| `docs/tests/inbox/fetch/fetch-unread-uses-read-cursor.md` | `fetch` command case | 1 | 1 | done |
-| `docs/tests/inbox/fetch/fetch-returns-no-matching-work-when-empty.md` | `fetch` command case | 1 | 1 | done |
-| `docs/tests/inbox/claim/README.md` | `claim` command case index | 0 | 0 | done |
-| `docs/tests/inbox/claim/claim-acquires-thread-lease.md` | `claim` command case | 1 | 1 | done |
-| `docs/tests/inbox/claim/claim-rejects-when-thread-missing.md` | `claim` command case | 1 | 1 | done |
-| `docs/tests/inbox/claim/claim-rejects-when-thread-already-claimed.md` | `claim` command case | 1 | 1 | done |
-| `docs/tests/inbox/claim/claim-records-requested-lease-duration.md` | `claim` command case | 1 | 1 | done |
-| `docs/tests/inbox/renew/README.md` | `renew` command case index | 0 | 0 | done |
-| `docs/tests/inbox/renew/renew-extends-active-lease.md` | `renew` command case | 1 | 1 | done |
-| `docs/tests/inbox/renew/renew-rejects-non-owner.md` | `renew` command case | 1 | 1 | done |
-| `docs/tests/inbox/renew/renew-rejects-without-active-lease.md` | `renew` command case | 1 | 1 | done |
-| `docs/tests/inbox/update/README.md` | `update` command case index | 0 | 0 | done |
-| `docs/tests/inbox/update/update-moves-thread-to-in-progress.md` | `update` command case | 1 | 1 | done |
-| `docs/tests/inbox/update/update-moves-thread-to-blocked-with-payload.md` | `update` command case | 1 | 1 | done |
-| `docs/tests/inbox/update/update-accepts-body-file-and-artifact.md` | `update` command case | 1 | 1 | done |
-| `docs/tests/inbox/update/update-rejects-invalid-payload-json.md` | `update` command case | 1 | 1 | done |
-| `docs/tests/inbox/update/update-rejects-non-owner.md` | `update` command case | 1 | 1 | done |
-| `docs/tests/inbox/reply/README.md` | `reply` command case index | 0 | 0 | done |
-| `docs/tests/inbox/reply/reply-adds-answer-message.md` | `reply` command case | 1 | 1 | done |
-| `docs/tests/inbox/reply/reply-supports-control-kind.md` | `reply` command case | 1 | 1 | done |
-| `docs/tests/inbox/reply/reply-attaches-artifact.md` | `reply` command case | 1 | 1 | done |
-| `docs/tests/inbox/reply/reply-rejects-invalid-payload-json.md` | `reply` command case | 1 | 1 | done |
-| `docs/tests/inbox/done/README.md` | `done` command case index | 0 | 0 | done |
-| `docs/tests/inbox/done/done-marks-thread-terminal.md` | `done` command case | 1 | 1 | done |
-| `docs/tests/inbox/done/done-persists-result-body-and-artifact.md` | `done` command case | 1 | 1 | done |
-| `docs/tests/inbox/done/done-rejects-non-owner.md` | `done` command case | 1 | 1 | done |
-| `docs/tests/inbox/done/done-rejects-on-terminal-thread.md` | `done` command case | 1 | 1 | done |
-| `docs/tests/inbox/fail/README.md` | `fail` command case index | 0 | 0 | done |
-| `docs/tests/inbox/fail/fail-marks-thread-failed.md` | `fail` command case | 1 | 1 | done |
-| `docs/tests/inbox/fail/fail-persists-failure-body-and-artifact.md` | `fail` command case | 1 | 1 | done |
-| `docs/tests/inbox/fail/fail-rejects-non-owner.md` | `fail` command case | 1 | 1 | done |
-| `docs/tests/inbox/fail/fail-rejects-on-terminal-thread.md` | `fail` command case | 1 | 1 | done |
-| `docs/tests/inbox/cancel/README.md` | `cancel` command case index | 0 | 0 | done |
-| `docs/tests/inbox/cancel/cancel-marks-thread-cancelled.md` | `cancel` command case | 1 | 1 | done |
-| `docs/tests/inbox/cancel/cancel-persists-reason-and-artifact.md` | `cancel` command case | 1 | 1 | done |
-| `docs/tests/inbox/cancel/cancel-rejects-when-thread-missing.md` | `cancel` command case | 1 | 1 | done |
-| `docs/tests/inbox/list/README.md` | `list` command case index | 0 | 0 | done |
-| `docs/tests/inbox/list/list-filters-by-status.md` | `list` command case | 1 | 1 | done |
-| `docs/tests/inbox/list/list-filters-by-created-by.md` | `list` command case | 1 | 1 | done |
-| `docs/tests/inbox/list/list-filters-by-assigned-to.md` | `list` command case | 1 | 1 | done |
-| `docs/tests/inbox/list/list-respects-limit.md` | `list` command case | 1 | 1 | done |
-| `docs/tests/inbox/show/README.md` | `show` command case index | 0 | 0 | done |
-| `docs/tests/inbox/show/show-returns-thread-and-message-history.md` | `show` command case | 1 | 1 | done |
-| `docs/tests/inbox/show/show-includes-artifacts-per-message.md` | `show` command case | 1 | 1 | done |
-| `docs/tests/inbox/show/show-mark-read-advances-read-cursor.md` | `show` command case | 1 | 1 | done |
-| `docs/tests/inbox/show/show-rejects-when-thread-missing.md` | `show` command case | 1 | 1 | done |
-| `docs/tests/inbox/watch/README.md` | `watch` command case index | 0 | 0 | done |
-| `docs/tests/inbox/watch/watch-wakes-on-matching-thread.md` | `watch` command case | 1 | 1 | done |
-| `docs/tests/inbox/watch/watch-respects-status-filter.md` | `watch` command case | 1 | 1 | done |
-| `docs/tests/inbox/watch/watch-times-out-with-no-activity.md` | `watch` command case | 1 | 1 | done |
-| `docs/tests/inbox/wait-reply/README.md` | `wait-reply` command case index | 0 | 0 | done |
-| `docs/tests/inbox/wait-reply/wait-reply-wakes-on-answer-after-message.md` | `wait-reply` command case | 1 | 1 | done |
-| `docs/tests/inbox/wait-reply/wait-reply-can-start-from-after-event.md` | `wait-reply` command case | 1 | 1 | done |
-| `docs/tests/inbox/wait-reply/wait-reply-times-out-when-no-reply.md` | `wait-reply` command case | 1 | 1 | done |
-
-## Authoring Order
-
-Recommended order:
-
-1. `docs/tests/inbox/README.md`
-2. `docs/tests/inbox/_shared/README.md`
-3. `docs/tests/inbox/workflows/README.md`
-4. `docs/tests/inbox/send/README.md` plus its linked case files
-5. `docs/tests/inbox/fetch/README.md` plus its linked case files
-6. `docs/tests/inbox/claim/README.md` plus its linked case files
-7. `docs/tests/inbox/reply/README.md` plus its linked case files
-8. `docs/tests/inbox/done/README.md` plus its linked case files
-9. `docs/tests/inbox/show/README.md` plus its linked case files
-10. the remaining command indexes and case files
-
-Reason:
-
-- the workflow file captures the highest-value end-to-end behavior first
-- the command documents can then reuse shared conventions and already-fixed terminology
-
-## Authored Case Register
-
-| Path | Case Slug | Coverage Note | Status |
-| --- | --- | --- | --- |
-| `docs/tests/inbox/workflows/README.md` | `thread-lifecycle-happy-path` | end-to-end happy path from send to show after done | done |
-| `docs/tests/inbox/workflows/README.md` | `blocked-question-reply-resume-to-done` | blocked thread receives answer and resumes to done | done |
-| `docs/tests/inbox/workflows/README.md` | `fail-lifecycle-from-claim-to-terminal` | claimed thread transitions to failed terminal state | done |
-| `docs/tests/inbox/workflows/README.md` | `cancel-lifecycle-after-worker-claim` | claimed thread can be cancelled by initiator | done |
-| `docs/tests/inbox/workflows/README.md` | `watch-wakes-then-fetch-sees-new-thread` | watch wake-up remains consistent with unread fetch visibility | done |
-| `docs/tests/inbox/workflows/README.md` | `artifact-visible-through-send-and-show` | body-file and artifact data survive send and show | done |
-| `docs/tests/inbox/workflows/README.md` | `unread-clears-after-mark-read-and-reappears-on-new-message` | read cursor clears unread and new message restores it | done |
-| `docs/tests/inbox/workflows/README.md` | `wait-reply-clears-blocked-unread-for-agent` | wait-reply consumes reply and clears blocked unread view | done |
-| `docs/tests/inbox/init/init-creates-schema-on-empty-db.md` | `init-creates-schema-on-empty-db` | initializes an empty database path and returns initialized status | done |
-| `docs/tests/inbox/init/init-is-idempotent-on-existing-db.md` | `init-is-idempotent-on-existing-db` | repeated init succeeds on the same database path | done |
-| `docs/tests/inbox/send/send-creates-new-thread.md` | `send-creates-new-thread` | creates a pending thread with an initial task message | done |
-| `docs/tests/inbox/send/send-appends-message-to-existing-thread.md` | `send-appends-message-to-existing-thread` | appends a message to an existing non-terminal thread | done |
-| `docs/tests/inbox/send/send-reads-body-from-body-file.md` | `send-reads-body-from-body-file` | reads message body from a file path | done |
-| `docs/tests/inbox/send/send-attaches-artifact-with-metadata.md` | `send-attaches-artifact-with-metadata` | persists artifact path, kind, and metadata on send | done |
-| `docs/tests/inbox/send/send-rejects-invalid-payload-json.md` | `send-rejects-invalid-payload-json` | rejects malformed payload JSON with `invalid_input` | done |
-| `docs/tests/inbox/send/send-rejects-invalid-artifact-metadata-json.md` | `send-rejects-invalid-artifact-metadata-json` | rejects malformed artifact metadata JSON | done |
-| `docs/tests/inbox/fetch/fetch-returns-pending-thread-for-target-agent.md` | `fetch-returns-pending-thread-for-target-agent` | returns pending candidate work for the target agent | done |
-| `docs/tests/inbox/fetch/fetch-respects-status-and-limit-filters.md` | `fetch-respects-status-and-limit-filters` | enforces status filtering and max row count | done |
-| `docs/tests/inbox/fetch/fetch-unread-uses-read-cursor.md` | `fetch-unread-uses-read-cursor` | unread filtering depends on per-agent read cursor state | done |
-| `docs/tests/inbox/fetch/fetch-returns-no-matching-work-when-empty.md` | `fetch-returns-no-matching-work-when-empty` | empty fetch result returns no_matching_work | done |
-| `docs/tests/inbox/claim/claim-acquires-thread-lease.md` | `claim-acquires-thread-lease` | claims a pending thread and records a claim event message | done |
-| `docs/tests/inbox/claim/claim-rejects-when-thread-missing.md` | `claim-rejects-when-thread-missing` | missing thread returns not_found | done |
-| `docs/tests/inbox/claim/claim-rejects-when-thread-already-claimed.md` | `claim-rejects-when-thread-already-claimed` | active lease conflict returns lease_conflict | done |
-| `docs/tests/inbox/claim/claim-records-requested-lease-duration.md` | `claim-records-requested-lease-duration` | claim event payload records requested lease duration | done |
-| `docs/tests/inbox/renew/renew-extends-active-lease.md` | `renew-extends-active-lease` | owner renews an active lease and gets a renewal event | done |
-| `docs/tests/inbox/renew/renew-rejects-non-owner.md` | `renew-rejects-non-owner` | non-owner renew attempt returns lease_conflict | done |
-| `docs/tests/inbox/renew/renew-rejects-without-active-lease.md` | `renew-rejects-without-active-lease` | missing active lease returns invalid_state | done |
-| `docs/tests/inbox/update/update-moves-thread-to-in-progress.md` | `update-moves-thread-to-in-progress` | moves a claimed thread to `in_progress` and emits a progress message | done |
-| `docs/tests/inbox/update/update-moves-thread-to-blocked-with-payload.md` | `update-moves-thread-to-blocked-with-payload` | moves a claimed thread to `blocked` with structured question payload | done |
-| `docs/tests/inbox/update/update-accepts-body-file-and-artifact.md` | `update-accepts-body-file-and-artifact` | persists update body from file plus artifacts | done |
-| `docs/tests/inbox/update/update-rejects-invalid-payload-json.md` | `update-rejects-invalid-payload-json` | rejects malformed `--payload-json` input | done |
-| `docs/tests/inbox/update/update-rejects-non-owner.md` | `update-rejects-non-owner` | rejects update when caller is not the active lease owner | done |
-| `docs/tests/inbox/reply/reply-adds-answer-message.md` | `reply-adds-answer-message` | appends default `answer` message to an existing non-terminal thread | done |
-| `docs/tests/inbox/reply/reply-supports-control-kind.md` | `reply-supports-control-kind` | supports explicit `--kind control` reply message | done |
-| `docs/tests/inbox/reply/reply-attaches-artifact.md` | `reply-attaches-artifact` | appends reply message with artifact payload | done |
-| `docs/tests/inbox/reply/reply-rejects-invalid-payload-json.md` | `reply-rejects-invalid-payload-json` | rejects malformed `--payload-json` input | done |
-| `docs/tests/inbox/done/done-marks-thread-terminal.md` | `done-marks-thread-terminal` | marks a claimed thread as `done` with a result message | done |
-| `docs/tests/inbox/done/done-persists-result-body-and-artifact.md` | `done-persists-result-body-and-artifact` | persists result body and artifact for follow-up reads | done |
-| `docs/tests/inbox/done/done-rejects-non-owner.md` | `done-rejects-non-owner` | rejects `done` from non-owner agent | done |
-| `docs/tests/inbox/done/done-rejects-on-terminal-thread.md` | `done-rejects-on-terminal-thread` | rejects `done` on terminal thread states | done |
-| `docs/tests/inbox/fail/fail-marks-thread-failed.md` | `fail-marks-thread-failed` | marks a claimed thread as `failed` with a result message | done |
-| `docs/tests/inbox/fail/fail-persists-failure-body-and-artifact.md` | `fail-persists-failure-body-and-artifact` | persists failure body and artifacts for diagnosis | done |
-| `docs/tests/inbox/fail/fail-rejects-non-owner.md` | `fail-rejects-non-owner` | rejects `fail` from non-owner agent | done |
-| `docs/tests/inbox/fail/fail-rejects-on-terminal-thread.md` | `fail-rejects-on-terminal-thread` | rejects `fail` on terminal thread states | done |
-| `docs/tests/inbox/cancel/cancel-marks-thread-cancelled.md` | `cancel-marks-thread-cancelled` | moves a non-terminal thread into `cancelled` and emits a control message | done |
-| `docs/tests/inbox/cancel/cancel-persists-reason-and-artifact.md` | `cancel-persists-reason-and-artifact` | persists cancel reason text and attached artifacts | done |
-| `docs/tests/inbox/cancel/cancel-rejects-when-thread-missing.md` | `cancel-rejects-when-thread-missing` | returns stable not-found contract when thread does not exist | done |
-| `docs/tests/inbox/list/list-filters-by-status.md` | `list-filters-by-status` | filters returned threads by status set | done |
-| `docs/tests/inbox/list/list-filters-by-created-by.md` | `list-filters-by-created-by` | filters returned threads by creator | done |
-| `docs/tests/inbox/list/list-filters-by-assigned-to.md` | `list-filters-by-assigned-to` | filters returned threads by current assignee | done |
-| `docs/tests/inbox/list/list-respects-limit.md` | `list-respects-limit` | enforces hard cap on returned thread count | done |
-| `docs/tests/inbox/show/show-returns-thread-and-message-history.md` | `show-returns-thread-and-message-history` | returns thread details and full time-ordered message history | done |
-| `docs/tests/inbox/show/show-includes-artifacts-per-message.md` | `show-includes-artifacts-per-message` | expands per-message artifacts in the show payload | done |
-| `docs/tests/inbox/show/show-mark-read-advances-read-cursor.md` | `show-mark-read-advances-read-cursor` | advances caller read cursor when `--mark-read` is used | done |
-| `docs/tests/inbox/show/show-rejects-when-thread-missing.md` | `show-rejects-when-thread-missing` | returns stable not-found contract for missing thread | done |
-| `docs/tests/inbox/watch/watch-wakes-on-matching-thread.md` | `watch-wakes-on-matching-thread` | wakes when a matching post-start event arrives and returns event context | done |
-| `docs/tests/inbox/watch/watch-respects-status-filter.md` | `watch-respects-status-filter` | wakes only when thread transitions into requested status | done |
-| `docs/tests/inbox/watch/watch-times-out-with-no-activity.md` | `watch-times-out-with-no-activity` | returns timeout contract when no matching activity arrives | done |
-| `docs/tests/inbox/wait-reply/wait-reply-wakes-on-answer-after-message.md` | `wait-reply-wakes-on-answer-after-message` | wakes for a qualifying reply after known message boundary | done |
-| `docs/tests/inbox/wait-reply/wait-reply-can-start-from-after-event.md` | `wait-reply-can-start-from-after-event` | resumes waiting from a known event cursor | done |
-| `docs/tests/inbox/wait-reply/wait-reply-times-out-when-no-reply.md` | `wait-reply-times-out-when-no-reply` | returns timeout contract when no qualifying reply arrives | done |
-
-## Pending Case Backlog
-
-No pending case slugs remain in the current plan.
-
-When a new CLI contract or workflow needs coverage:
-
-1. if it is a command case, create a new `<case-slug>.md` file under the relevant command folder and add it to that folder `README.md` index
-2. if it is a workflow case, add it to `docs/tests/inbox/workflows/README.md`
-3. add the new slug to `Authored Case Register`
-4. update `Current Snapshot` and `Document Progress`
-
-## Definition Of Done
-
-This roadmap is complete only when all of the following are true:
-
-- every implemented inbox command has a corresponding document folder
-- each planned command index and case document exists
-- each pending case slug has been either authored or explicitly deferred
-- the authored-case register matches the actual Markdown files on disk
-- a new agent can pick any pending case and know exactly where it should be written
diff --git a/docs/tests/inbox/_shared/README.md b/docs/tests/inbox/_shared/README.md
deleted file mode 100644
index 80ca409..0000000
--- a/docs/tests/inbox/_shared/README.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Inbox Shared Test Conventions
-
-## Purpose
-
-This document captures shared assumptions used by multiple `inbox` test-plan documents so command and workflow files can stay focused on behavior rather than repeating setup boilerplate.
-
-## Recommended Fixture Shape
-
-Use an isolated temp workspace per case:
-
-- database path: `TMPDIR/coord.db`
-- optional body file: `TMPDIR/body.md`
-- optional artifact file: `TMPDIR/artifact.txt`
-
-Recommended bootstrap command:
-
-```bash
-inbox --db TMPDIR/coord.db --json init
-```
-
-## Global Flags
-
-Root-level flags apply to every subcommand:
-
-- `--db`: SQLite database path, default `.agents/coord.db`
-- `--json`: emit machine-readable JSON
-- `--agent`: acting agent identity shortcut used by commands that accept agent context
-
-When a command-specific `--agent` or `--from` flag is omitted, the root `--agent` value may be used instead. Cases that verify fallback behavior should state that explicitly.
-
-## Success JSON Contract
-
-Successful JSON output uses this shape:
-
-```json
-{
-  "ok": true,
-  "command": "send",
-  "data": {}
-}
-```
-
-Shared assertion points:
-
-- `ok` is `true`
-- `command` matches the invoked subcommand
-- `data` contains the command-specific payload
-
-## Error JSON Contract
-
-Failure JSON output uses this shape:
-
-```json
-{
-  "ok": false,
-  "error": {
-    "code": "invalid_input",
-    "message": "..."
-  }
-}
-```
-
-Shared assertion points:
-
-- `ok` is `false`
-- `error.code` matches the stable contract
-- `error.message` is present and human-readable
-
-## Exit Code Contract
-
-The current CLI contract uses these exit codes:
-
-| Exit Code | Meaning | Typical Error Code |
-| --- | --- | --- |
-| `0` | success | none |
-| `10` | no matching work / timeout without match | `no_matching_work` |
-| `20` | lease conflict | `lease_conflict` |
-| `30` | invalid input, invalid state, usage-style error | `invalid_input` or `invalid_state` |
-| `40` | referenced thread or message missing | `not_found` |
-| `50` | unexpected internal failure | `internal_error` |
-
-When a case expects no result, assert both the exit code and the JSON error code.
-
-## Body Input Rules
-
-Commands that support `--body` and `--body-file` follow these rules:
-
-- `--body` and `--body-file` are mutually exclusive
-- `--body-file` content is read verbatim into the message body
-- unreadable `--body-file` should be treated as `invalid_input`
-
-Relevant commands:
-
-- `send`
-- `update`
-- `reply`
-- `done`
-- `fail`
-
-## Artifact Rules
-
-Commands with artifact support use these shared rules:
-
-- `--artifact` may be repeated
-- `--artifact-kind` may be specified once for all artifacts, or once per artifact
-- `--artifact-metadata-json` may be specified once for all artifacts, or once per artifact
-- `--artifact-kind` and `--artifact-metadata-json` are invalid without at least one `--artifact`
-- an empty artifact path is invalid input
-
-When artifact behavior is under test, assert at least:
-
-- artifact count
-- artifact `path`
-- artifact `kind`
-- metadata presence when supplied
-
-## Read And Unread Assertions
-
-Unread-related cases should verify behavior from the agent's point of view, not only raw message existence.
-
-Recommended checks:
-
-- `fetch --unread` returns a thread before read acknowledgement
-- `show --mark-read` clears unread state for that agent
-- a new message to the same thread makes the thread unread again
-- `wait-reply` may clear blocked unread state for the waiting agent when the reply is consumed
-
-## Workflow Authoring Rule
-
-If a case spans multiple commands, place the end-to-end narrative in `workflows/README.md` first, then add narrower command-level cases only when they introduce behavior that is easier to reason about in isolation.
diff --git a/docs/tests/inbox/cancel/README.md b/docs/tests/inbox/cancel/README.md
deleted file mode 100644
index 32c3e26..0000000
--- a/docs/tests/inbox/cancel/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Inbox `cancel` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `cancel-marks-thread-cancelled` | [cancel-marks-thread-cancelled.md](./cancel-marks-thread-cancelled.md) | moves a non-terminal thread into `cancelled` and emits a control message |
-| `cancel-persists-reason-and-artifact` | [cancel-persists-reason-and-artifact.md](./cancel-persists-reason-and-artifact.md) | persists cancel reason text and attached artifacts |
-| `cancel-rejects-when-thread-missing` | [cancel-rejects-when-thread-missing.md](./cancel-rejects-when-thread-missing.md) | returns stable not-found contract when thread does not exist |
diff --git a/docs/tests/inbox/cancel/cancel-marks-thread-cancelled.md b/docs/tests/inbox/cancel/cancel-marks-thread-cancelled.md
deleted file mode 100644
index b42167d..0000000
--- a/docs/tests/inbox/cancel/cancel-marks-thread-cancelled.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# case: cancel-marks-thread-cancelled
-
-### 用例意义
-
-验证 `cancel` 可以把非终态线程推进到 `cancelled` 终态，并生成控制消息。
-
-### 前置条件
-
-- 已存在一个非终态线程 `THREAD_ID`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json cancel --agent leader --thread THREAD_ID --reason "Task superseded by a larger refactor"
-```
-
-### 预期输出
-
-- 命令退出码为 `0`
-- `thread.status == "cancelled"`
-- `message.kind == "control"`
-
-### 断言结论
-
-- `cancel` 是线程级终态转换
-- 取消时会释放活跃 lease
-- `cancel` 不要求调用方持有活跃 lease；只要线程存在且尚未进入终态，就可以被取消
-- 如果线程已经是 `done`、`failed` 或 `cancelled`，应返回 `invalid_state`，而不是 `lease_conflict`
-
diff --git a/docs/tests/inbox/cancel/cancel-persists-reason-and-artifact.md b/docs/tests/inbox/cancel/cancel-persists-reason-and-artifact.md
deleted file mode 100644
index b26d294..0000000
--- a/docs/tests/inbox/cancel/cancel-persists-reason-and-artifact.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# case: cancel-persists-reason-and-artifact
-
-### 用例意义
-
-验证 `cancel` 的原因文本与附件会被完整持久化。
-
-### 前置条件
-
-- 已存在一个非终态线程 `THREAD_ID`
-- `TMPDIR/cancel.md` 已存在
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json cancel --agent leader --thread THREAD_ID --reason "Task superseded by a larger refactor" --artifact TMPDIR/cancel.md --artifact-kind brief
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-### 预期输出
-
-- `cancel` 成功
-- 取消消息 `summary` 与 `body` 都保留取消原因
-- 取消消息包含 1 个 artifact
-
-### 断言结论
-
-- `cancel` 既保留人类可读原因，也支持附带上下文材料
-- 当 `--reason` 为空时，取消消息的 `summary` 会回退为 `thread cancelled`，而 `body` 保持空字符串
-- `--artifact-kind` 与 `--artifact-metadata-json` 需要至少一个 `--artifact`，且多值数量必须是 `1` 或与 artifact 数量一致；否则应返回 `invalid_input`
-
diff --git a/docs/tests/inbox/cancel/cancel-rejects-when-thread-missing.md b/docs/tests/inbox/cancel/cancel-rejects-when-thread-missing.md
deleted file mode 100644
index b671867..0000000
--- a/docs/tests/inbox/cancel/cancel-rejects-when-thread-missing.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# case: cancel-rejects-when-thread-missing
-
-### 用例意义
-
-验证 `cancel` 对不存在线程返回稳定的 not-found 错误契约。
-
-### 前置条件
-
-- 空数据库已完成 `init`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json cancel --agent leader --thread thr_missing
-```
-
-### 预期输出
-
-- 退出码为 `40`
-- JSON 错误码为 `not_found`
-
-### 断言结论
-
-- `cancel` 不会为缺失线程隐式创建控制消息
-- 当命令级 `--agent` 未显式提供时，可以回退使用根级 `--agent`；两者都缺失时应返回 `invalid_input`
-- `--thread` 是必填 flag；缺失时属于 `invalid_input` 类 usage error
-
diff --git a/docs/tests/inbox/claim/README.md b/docs/tests/inbox/claim/README.md
deleted file mode 100644
index 210b21f..0000000
--- a/docs/tests/inbox/claim/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Inbox `claim` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `claim-acquires-thread-lease` | [claim-acquires-thread-lease.md](./claim-acquires-thread-lease.md) | claims a pending thread and records a claim event message |
-| `claim-rejects-when-thread-missing` | [claim-rejects-when-thread-missing.md](./claim-rejects-when-thread-missing.md) | missing thread returns not_found |
-| `claim-rejects-when-thread-already-claimed` | [claim-rejects-when-thread-already-claimed.md](./claim-rejects-when-thread-already-claimed.md) | active lease conflict returns lease_conflict |
-| `claim-records-requested-lease-duration` | [claim-records-requested-lease-duration.md](./claim-records-requested-lease-duration.md) | claim event payload records requested lease duration |
diff --git a/docs/tests/inbox/claim/claim-acquires-thread-lease.md b/docs/tests/inbox/claim/claim-acquires-thread-lease.md
deleted file mode 100644
index b6079c3..0000000
--- a/docs/tests/inbox/claim/claim-acquires-thread-lease.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Case: `claim-acquires-thread-lease`
-
-## 用例意义
-
-验证 `claim` 可以把 `pending` 线程切换到 `claimed`，并生成租约事件消息。
-
-## 前置条件
-
-- 已存在一个 `pending` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID --lease-seconds 300
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `thread.status == "claimed"`
-- `thread.assigned_to == "worker-a"`
-- `message.kind == "event"`
-- `message.summary == "thread claimed"`
-
-## 断言结论
-
-- `claim` 同时更新线程状态与活跃租约
-- 成功领取会附带一条事件消息，而不是静默改状态
-- 未显式传 `--lease-seconds`，或传入非正值时，租约时长应回退到默认 `900` 秒
-
-## 补充约束
-
-- 当 `--agent` 未显式提供时，可以回退使用根级 `--agent`
diff --git a/docs/tests/inbox/claim/claim-records-requested-lease-duration.md b/docs/tests/inbox/claim/claim-records-requested-lease-duration.md
deleted file mode 100644
index 5a34c12..0000000
--- a/docs/tests/inbox/claim/claim-records-requested-lease-duration.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `claim-records-requested-lease-duration`
-
-## 用例意义
-
-验证 `claim --lease-seconds` 的请求值会进入事件消息 payload，便于后续审计。
-
-## 前置条件
-
-- 已存在一个 `pending` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID --lease-seconds 300
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `message.payload_json.lease_seconds == 300`
-- `message.payload_json.lease_token` 存在
-
-## 断言结论
-
-- 请求的租约时长不是仅用于内部计算，也会被持久化到事件消息中
diff --git a/docs/tests/inbox/claim/claim-rejects-when-thread-already-claimed.md b/docs/tests/inbox/claim/claim-rejects-when-thread-already-claimed.md
deleted file mode 100644
index f3d474e..0000000
--- a/docs/tests/inbox/claim/claim-rejects-when-thread-already-claimed.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Case: `claim-rejects-when-thread-already-claimed`
-
-## 用例意义
-
-验证同一线程在已有活跃租约时，其他执行者无法重复领取。
-
-## 前置条件
-
-- `worker-z` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json claim --agent worker-y --thread THREAD_ID
-```
-
-## 预期输出
-
-- 退出码为 `20`
-- JSON 错误码为 `lease_conflict`
-
-## 断言结论
-
-- 活跃 lease 是 `claim` 的排他条件
-
-## 补充约束
-
-- `claim` 只允许作用在 `pending` 线程上；如果线程已是 `claimed`、`in_progress`、`blocked`，或已进入任一终态，则应返回 `invalid_state`，而不是 `lease_conflict`
diff --git a/docs/tests/inbox/claim/claim-rejects-when-thread-missing.md b/docs/tests/inbox/claim/claim-rejects-when-thread-missing.md
deleted file mode 100644
index 6b269fd..0000000
--- a/docs/tests/inbox/claim/claim-rejects-when-thread-missing.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Case: `claim-rejects-when-thread-missing`
-
-## 用例意义
-
-验证 `claim` 对不存在的线程返回稳定的 not-found 错误契约。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json claim --agent worker-z --thread thr_missing
-```
-
-## 预期输出
-
-- 退出码为 `40`
-- JSON 错误码为 `not_found`
-
-## 断言结论
-
-- 缺失线程会被明确区分为引用错误，而不是 lease 冲突
-
-## 补充约束
-
-- `--thread` 是必填 flag；缺失时属于 `invalid_input` 类 usage error
diff --git a/docs/tests/inbox/done/README.md b/docs/tests/inbox/done/README.md
deleted file mode 100644
index 300554f..0000000
--- a/docs/tests/inbox/done/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Inbox `done` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `done-marks-thread-terminal` | [done-marks-thread-terminal.md](./done-marks-thread-terminal.md) | marks a claimed thread as `done` with a result message |
-| `done-persists-result-body-and-artifact` | [done-persists-result-body-and-artifact.md](./done-persists-result-body-and-artifact.md) | persists result body and artifact for follow-up reads |
-| `done-rejects-non-owner` | [done-rejects-non-owner.md](./done-rejects-non-owner.md) | rejects `done` from non-owner agent |
-| `done-rejects-on-terminal-thread` | [done-rejects-on-terminal-thread.md](./done-rejects-on-terminal-thread.md) | rejects `done` on terminal thread states |
diff --git a/docs/tests/inbox/done/done-marks-thread-terminal.md b/docs/tests/inbox/done/done-marks-thread-terminal.md
deleted file mode 100644
index aeb37de..0000000
--- a/docs/tests/inbox/done/done-marks-thread-terminal.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Case: `done-marks-thread-terminal`
-
-## 用例意义
-
-验证租约拥有者可以将线程推进到 `done` 终态，并生成结果消息。
-
-## 前置条件
-
-- `worker-a` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Retry policy implemented" --body "The HTTP client now retries the selected transient failures."
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `thread.status == "done"`
-- `message.kind == "result"`
-
-## 断言结论
-
-- `done` 会把线程推进到成功终态
-- 完成时会释放活跃 lease
-
-## 补充约束
-
-- 当 `--agent` 未显式提供时，可以回退使用根级 `--agent`
-- 若线程存在但当前没有活跃 lease，例如 lease 已释放或过期，`done` 应返回 `invalid_state`，而不是 `lease_conflict`
-- `--thread` 与 `--summary` 是必填 flag；缺失时属于 `invalid_input` 类 usage error
-
diff --git a/docs/tests/inbox/done/done-persists-result-body-and-artifact.md b/docs/tests/inbox/done/done-persists-result-body-and-artifact.md
deleted file mode 100644
index ec03ec2..0000000
--- a/docs/tests/inbox/done/done-persists-result-body-and-artifact.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `done-persists-result-body-and-artifact`
-
-## 用例意义
-
-验证 `done` 能持久化结果正文与附件，并被后续 `show` 读取。
-
-## 前置条件
-
-- `worker-a` 已成功 `claim` 线程 `THREAD_ID`
-- `TMPDIR/result.md` 已存在
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Retry policy implemented" --body-file TMPDIR/result.md --artifact TMPDIR/result.md --artifact-kind report
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## 预期输出
-
-- `done` 成功
-- 最终结果消息 `body` 等于文件内容
-- 结果消息包含 1 个 `report` artifact
-
-## 断言结论
-
-- `done` 是结果交付命令，不只是状态切换命令
-- `done` 也支持 `--payload-json`；若传入非法 JSON，应返回 `invalid_input`
-
-## 补充约束
-
-- `--body` 与 `--body-file` 互斥；不可读的 `--body-file` 也属于 `invalid_input`
-- artifact 相关 flag 依赖至少一个 `--artifact`，并遵守“指定一次或按 artifact 数量逐个指定”的计数规则
-
diff --git a/docs/tests/inbox/done/done-rejects-non-owner.md b/docs/tests/inbox/done/done-rejects-non-owner.md
deleted file mode 100644
index e08c9e8..0000000
--- a/docs/tests/inbox/done/done-rejects-non-owner.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `done-rejects-non-owner`
-
-## 用例意义
-
-验证非租约拥有者不能代替执行者完成线程。
-
-## 前置条件
-
-- `worker-a` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json done --agent worker-b --thread THREAD_ID --summary "Retry policy implemented"
-```
-
-## 预期输出
-
-- 退出码为 `20`
-- JSON 错误码为 `lease_conflict`
-
-## 断言结论
-
-- `done` 受活跃 lease 所属者约束
-
diff --git a/docs/tests/inbox/done/done-rejects-on-terminal-thread.md b/docs/tests/inbox/done/done-rejects-on-terminal-thread.md
deleted file mode 100644
index ce8a46b..0000000
--- a/docs/tests/inbox/done/done-rejects-on-terminal-thread.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `done-rejects-on-terminal-thread`
-
-## 用例意义
-
-验证已进入终态的线程不能再次执行 `done`。
-
-## 前置条件
-
-- 线程 `THREAD_ID` 已经是 `done`、`failed` 或 `cancelled`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Retry policy implemented"
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_state`
-
-## 断言结论
-
-- `done` 对终态线程是幂等失败，而不是重复成功
-
diff --git a/docs/tests/inbox/fail/README.md b/docs/tests/inbox/fail/README.md
deleted file mode 100644
index 649d091..0000000
--- a/docs/tests/inbox/fail/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Inbox `fail` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `fail-marks-thread-failed` | [fail-marks-thread-failed.md](./fail-marks-thread-failed.md) | marks a claimed thread as `failed` with a result message |
-| `fail-persists-failure-body-and-artifact` | [fail-persists-failure-body-and-artifact.md](./fail-persists-failure-body-and-artifact.md) | persists failure body and artifacts for diagnosis |
-| `fail-rejects-non-owner` | [fail-rejects-non-owner.md](./fail-rejects-non-owner.md) | rejects `fail` from non-owner agent |
-| `fail-rejects-on-terminal-thread` | [fail-rejects-on-terminal-thread.md](./fail-rejects-on-terminal-thread.md) | rejects `fail` on terminal thread states |
diff --git a/docs/tests/inbox/fail/fail-marks-thread-failed.md b/docs/tests/inbox/fail/fail-marks-thread-failed.md
deleted file mode 100644
index 54346b3..0000000
--- a/docs/tests/inbox/fail/fail-marks-thread-failed.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Case: `fail-marks-thread-failed`
-
-## 用例意义
-
-验证租约拥有者可以把线程推进到 `failed` 终态，并生成失败结果消息。
-
-## 前置条件
-
-- `worker-b` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fail --agent worker-b --thread THREAD_ID --summary "Migration failed" --body "The migration cannot proceed because the prior schema is inconsistent."
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `thread.status == "failed"`
-- `message.kind == "result"`
-
-## 断言结论
-
-- `fail` 与 `done` 共享结果消息模型，但进入的是失败终态
-- 成功 `fail` 后会释放当前活跃 lease，避免线程停留在失败终态却仍显示被占用
-
-## 补充约束
-
-- 当 `--agent` 未显式提供时，可以回退使用根级 `--agent`
-- `fail` 生成的 `result` 消息会发回线程创建者，而不是发给当前执行者自己
-- 如果线程没有活跃 lease，`fail` 应返回 `invalid_state`，而不是 `lease_conflict`
-
diff --git a/docs/tests/inbox/fail/fail-persists-failure-body-and-artifact.md b/docs/tests/inbox/fail/fail-persists-failure-body-and-artifact.md
deleted file mode 100644
index 629dc1b..0000000
--- a/docs/tests/inbox/fail/fail-persists-failure-body-and-artifact.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `fail-persists-failure-body-and-artifact`
-
-## 用例意义
-
-验证 `fail` 能持久化失败说明与附件。
-
-## 前置条件
-
-- `worker-b` 已成功 `claim` 线程 `THREAD_ID`
-- `TMPDIR/failure.md` 已存在
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fail --agent worker-b --thread THREAD_ID --summary "Migration failed" --body-file TMPDIR/failure.md --artifact TMPDIR/failure.md --artifact-kind report
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## 预期输出
-
-- `fail` 成功
-- 最终结果消息 `body` 等于文件内容
-- 结果消息包含 1 个 `report` artifact
-
-## 断言结论
-
-- 失败终态同样要能完整交付排障材料
-
-## 补充约束
-
-- `--payload-json` 需要是合法 JSON；空值会按 `{}` 处理
-- `--body` 与 `--body-file` 互斥；不可读的 `--body-file` 属于 `invalid_input`
-- `artifact-kind` 和 `artifact-metadata-json` 不能脱离 `--artifact` 单独使用，且多值数量必须满足“一次全量应用”或“逐 artifact 对齐”
-
diff --git a/docs/tests/inbox/fail/fail-rejects-non-owner.md b/docs/tests/inbox/fail/fail-rejects-non-owner.md
deleted file mode 100644
index 06f0acc..0000000
--- a/docs/tests/inbox/fail/fail-rejects-non-owner.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `fail-rejects-non-owner`
-
-## 用例意义
-
-验证非租约拥有者不能把线程标记为失败。
-
-## 前置条件
-
-- `worker-b` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fail --agent worker-x --thread THREAD_ID --summary "Migration failed"
-```
-
-## 预期输出
-
-- 退出码为 `20`
-- JSON 错误码为 `lease_conflict`
-
-## 断言结论
-
-- `fail` 与 `done` 一样受 lease owner 约束
-
diff --git a/docs/tests/inbox/fail/fail-rejects-on-terminal-thread.md b/docs/tests/inbox/fail/fail-rejects-on-terminal-thread.md
deleted file mode 100644
index 8eee49e..0000000
--- a/docs/tests/inbox/fail/fail-rejects-on-terminal-thread.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `fail-rejects-on-terminal-thread`
-
-## 用例意义
-
-验证已进入终态的线程不能再次执行 `fail`。
-
-## 前置条件
-
-- 线程 `THREAD_ID` 已经是 `done`、`failed` 或 `cancelled`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fail --agent worker-b --thread THREAD_ID --summary "Migration failed"
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_state`
-
-## 断言结论
-
-- `fail` 对终态线程不会重复成功
-
diff --git a/docs/tests/inbox/fetch/README.md b/docs/tests/inbox/fetch/README.md
deleted file mode 100644
index 5a5ecfd..0000000
--- a/docs/tests/inbox/fetch/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Inbox `fetch` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `fetch-returns-pending-thread-for-target-agent` | [fetch-returns-pending-thread-for-target-agent.md](./fetch-returns-pending-thread-for-target-agent.md) | returns pending candidate work for the target agent |
-| `fetch-respects-status-and-limit-filters` | [fetch-respects-status-and-limit-filters.md](./fetch-respects-status-and-limit-filters.md) | enforces status filtering and max row count |
-| `fetch-unread-uses-read-cursor` | [fetch-unread-uses-read-cursor.md](./fetch-unread-uses-read-cursor.md) | unread filtering depends on per-agent read cursor state |
-| `fetch-returns-no-matching-work-when-empty` | [fetch-returns-no-matching-work-when-empty.md](./fetch-returns-no-matching-work-when-empty.md) | empty fetch result returns no_matching_work |
diff --git a/docs/tests/inbox/fetch/fetch-respects-status-and-limit-filters.md b/docs/tests/inbox/fetch/fetch-respects-status-and-limit-filters.md
deleted file mode 100644
index 23a5c97..0000000
--- a/docs/tests/inbox/fetch/fetch-respects-status-and-limit-filters.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Case: `fetch-respects-status-and-limit-filters`
-
-## 用例意义
-
-验证 `fetch` 同时遵守状态过滤与返回上限。
-
-## 前置条件
-
-- `worker-a` 拥有多个不同状态的线程
-- 其中至少两个线程满足目标状态过滤条件
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fetch --agent worker-a --status pending,blocked --limit 1
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 返回线程数不超过 `1`
-- 返回的每条线程都满足 `status in ["pending","blocked"]`
-
-## 断言结论
-
-- `fetch` 的 `status` 与 `limit` 会同时生效
-- 返回顺序按 `updated_at` 倒序，优先暴露最新线程
-
-## 补充约束
-
-- `--limit` 传入 `0` 或负数时，实际会回退到默认上限 `20`
diff --git a/docs/tests/inbox/fetch/fetch-returns-no-matching-work-when-empty.md b/docs/tests/inbox/fetch/fetch-returns-no-matching-work-when-empty.md
deleted file mode 100644
index dda29c0..0000000
--- a/docs/tests/inbox/fetch/fetch-returns-no-matching-work-when-empty.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Case: `fetch-returns-no-matching-work-when-empty`
-
-## 用例意义
-
-验证 `fetch` 在没有匹配线程时返回稳定的“无工作”错误契约。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fetch --agent worker-z --status pending
-```
-
-## 预期输出
-
-- 退出码为 `10`
-- JSON 错误码为 `no_matching_work`
-
-## 断言结论
-
-- 空结果不是成功空数组，而是显式的“无匹配工作”信号
diff --git a/docs/tests/inbox/fetch/fetch-returns-pending-thread-for-target-agent.md b/docs/tests/inbox/fetch/fetch-returns-pending-thread-for-target-agent.md
deleted file mode 100644
index ea46c26..0000000
--- a/docs/tests/inbox/fetch/fetch-returns-pending-thread-for-target-agent.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Case: `fetch-returns-pending-thread-for-target-agent`
-
-## 用例意义
-
-验证 `fetch` 能按目标执行者拉取待处理线程。
-
-## 前置条件
-
-- `leader` 已向 `worker-a` 发送至少一个 `pending` 线程
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fetch --agent worker-a --status pending
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 返回 `data.threads`
-- 至少包含一个 `assigned_to == "worker-a"` 且 `status == "pending"` 的线程
-
-## 断言结论
-
-- `fetch` 默认是执行者视角的候选工作列表，不是全局线程扫描
-
-## 补充约束
-
-- 未显式传 `--status` 时，`fetch` 默认只查询 `pending` 线程
-- 未显式传命令级 `--agent` 时，可回退到根级 `--agent`
diff --git a/docs/tests/inbox/fetch/fetch-unread-uses-read-cursor.md b/docs/tests/inbox/fetch/fetch-unread-uses-read-cursor.md
deleted file mode 100644
index 9ec3a72..0000000
--- a/docs/tests/inbox/fetch/fetch-unread-uses-read-cursor.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `fetch-unread-uses-read-cursor`
-
-## 用例意义
-
-验证 `fetch --unread` 基于 agent 的 read cursor 计算未读，而不是仅按线程是否存在新消息。
-
-## 前置条件
-
-- `leader` 已向 `worker-e` 发送一个 `pending` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fetch --agent worker-e --status pending --unread
-inbox --db TMPDIR/coord.db --agent worker-e --json show --thread THREAD_ID --mark-read
-inbox --db TMPDIR/coord.db --json fetch --agent worker-e --status pending --unread
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-e --thread THREAD_ID --summary "Use sentence case" --body "Keep the nav labels in sentence case."
-inbox --db TMPDIR/coord.db --json fetch --agent worker-e --status pending --unread
-```
-
-## 预期输出
-
-- 第一次 `fetch --unread` 返回该线程
-- `show --mark-read` 后，第二次 `fetch --unread` 无匹配结果
-- 新消息追加后，第三次 `fetch --unread` 再次返回该线程
-
-## 断言结论
-
-- 未读判断依赖 `thread_reads.last_read_message_id`
-- 新消息到达会让同线程重新进入未读结果集
-
-## 补充约束
-
-- 使用 `--unread` 时必须具备 agent 身份，否则会返回 `invalid_input`
diff --git a/docs/tests/inbox/init/README.md b/docs/tests/inbox/init/README.md
deleted file mode 100644
index 94e5ee0..0000000
--- a/docs/tests/inbox/init/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Inbox `init` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `init-creates-schema-on-empty-db` | [init-creates-schema-on-empty-db.md](./init-creates-schema-on-empty-db.md) | initializes an empty database path and returns initialized status |
-| `init-is-idempotent-on-existing-db` | [init-is-idempotent-on-existing-db.md](./init-is-idempotent-on-existing-db.md) | repeated init succeeds on the same database path |
diff --git a/docs/tests/inbox/init/init-creates-schema-on-empty-db.md b/docs/tests/inbox/init/init-creates-schema-on-empty-db.md
deleted file mode 100644
index 481a40c..0000000
--- a/docs/tests/inbox/init/init-creates-schema-on-empty-db.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Case: `init-creates-schema-on-empty-db`
-
-## 用例意义
-
-验证在空数据库路径上执行 `init` 会创建可用的 inbox schema，并返回稳定的初始化响应。
-
-## 前置条件
-
-- 选择一个尚不存在的数据库路径 `TMPDIR/coord.db`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json init
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 返回 `ok=true`
-- `command` 为 `init`
-- `data.db_path` 等于传入路径
-- `data.status` 为 `initialized`
-
-## 断言结论
-
-- `init` 在空路径上可以直接完成 schema 初始化
-- 初始化结果足以让后续 `send`、`fetch` 等命令继续使用同一数据库
diff --git a/docs/tests/inbox/init/init-is-idempotent-on-existing-db.md b/docs/tests/inbox/init/init-is-idempotent-on-existing-db.md
deleted file mode 100644
index 435b7b7..0000000
--- a/docs/tests/inbox/init/init-is-idempotent-on-existing-db.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Case: `init-is-idempotent-on-existing-db`
-
-## 用例意义
-
-验证 `init` 可以对已初始化过的数据库重复执行，而不会报错或破坏已有 schema。
-
-## 前置条件
-
-- `TMPDIR/coord.db` 已经执行过一次 `inbox --db TMPDIR/coord.db --json init`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json init
-inbox --db TMPDIR/coord.db --json init
-```
-
-## 预期输出
-
-- 两次命令都退出码为 `0`
-- 两次响应都返回 `data.status == "initialized"`
-- 两次响应都返回相同的 `data.db_path`
-
-## 断言结论
-
-- `init` 是幂等操作
-- 对已存在 schema 的重复初始化不应引入额外迁移失败或状态漂移
diff --git a/docs/tests/inbox/list/README.md b/docs/tests/inbox/list/README.md
deleted file mode 100644
index e1626c8..0000000
--- a/docs/tests/inbox/list/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Inbox `list` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `list-filters-by-status` | [list-filters-by-status.md](./list-filters-by-status.md) | filters returned threads by status set |
-| `list-filters-by-created-by` | [list-filters-by-created-by.md](./list-filters-by-created-by.md) | filters returned threads by creator |
-| `list-filters-by-assigned-to` | [list-filters-by-assigned-to.md](./list-filters-by-assigned-to.md) | filters returned threads by current assignee |
-| `list-respects-limit` | [list-respects-limit.md](./list-respects-limit.md) | enforces hard cap on returned thread count |
diff --git a/docs/tests/inbox/list/list-filters-by-assigned-to.md b/docs/tests/inbox/list/list-filters-by-assigned-to.md
deleted file mode 100644
index 00ecd6c..0000000
--- a/docs/tests/inbox/list/list-filters-by-assigned-to.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# case: list-filters-by-assigned-to
-
-### 用例意义
-
-验证 `list --assigned-to` 能按当前指派执行者筛选线程。
-
-### 前置条件
-
-- 数据库中存在多个不同 `assigned_to` 的线程
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json list --assigned-to worker-d --status pending
-```
-
-### 预期输出
-
-- 命令退出码为 `0`
-- 返回的每条线程都满足 `assigned_to == "worker-d"`
-
-### 断言结论
-
-- `list` 可用于管理侧查看某位执行者当前承担的线程集合
-
diff --git a/docs/tests/inbox/list/list-filters-by-created-by.md b/docs/tests/inbox/list/list-filters-by-created-by.md
deleted file mode 100644
index bbb5ebb..0000000
--- a/docs/tests/inbox/list/list-filters-by-created-by.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# case: list-filters-by-created-by
-
-### 用例意义
-
-验证 `list --created-by` 能按线程创建者筛选结果。
-
-### 前置条件
-
-- 至少有两位不同创建者产生的线程
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json list --created-by leader
-```
-
-### 预期输出
-
-- 命令退出码为 `0`
-- 返回的每条线程都满足 `created_by == "leader"`
-
-### 断言结论
-
-- `created-by` 过滤条件直接作用在线程元数据上
-- 没有任何匹配线程时，`list` 返回退出码 `10` 和错误码 `no_matching_work`，而不是成功空数组
-
diff --git a/docs/tests/inbox/list/list-filters-by-status.md b/docs/tests/inbox/list/list-filters-by-status.md
deleted file mode 100644
index 4de38b8..0000000
--- a/docs/tests/inbox/list/list-filters-by-status.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# case: list-filters-by-status
-
-### 用例意义
-
-验证 `list --status` 只返回指定状态集合内的线程。
-
-### 前置条件
-
-- 数据库中存在多个不同状态的线程
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json list --status pending,blocked
-```
-
-### 预期输出
-
-- 命令退出码为 `0`
-- 返回的每条线程都满足 `status in ["pending","blocked"]`
-
-### 断言结论
-
-- `list` 会严格应用状态过滤
-- 当未显式传 `--assigned-to` 时，`list` 可以作为全局视角，也可以在提供 `--agent` 或根级 `--agent` 时退化为“按 assigned-to 过滤”的快捷入口
-
diff --git a/docs/tests/inbox/list/list-respects-limit.md b/docs/tests/inbox/list/list-respects-limit.md
deleted file mode 100644
index 89739ee..0000000
--- a/docs/tests/inbox/list/list-respects-limit.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# case: list-respects-limit
-
-### 用例意义
-
-验证 `list --limit` 会约束返回条数，并按更新时间倒序返回最新线程。
-
-### 前置条件
-
-- 存在多个满足过滤条件的线程
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json list --assigned-to worker-d --limit 1
-```
-
-### 预期输出
-
-- 命令退出码为 `0`
-- 返回线程数不超过 `1`
-
-### 断言结论
-
-- `list` 的 limit 是硬上限，不会返回超量结果
-- `--limit <= 0` 时会回退到默认值 `20`
-
diff --git a/docs/tests/inbox/renew/README.md b/docs/tests/inbox/renew/README.md
deleted file mode 100644
index 3f54bf3..0000000
--- a/docs/tests/inbox/renew/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Inbox `renew` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `renew-extends-active-lease` | [renew-extends-active-lease.md](./renew-extends-active-lease.md) | owner renews an active lease and gets a renewal event |
-| `renew-rejects-non-owner` | [renew-rejects-non-owner.md](./renew-rejects-non-owner.md) | non-owner renew attempt returns lease_conflict |
-| `renew-rejects-without-active-lease` | [renew-rejects-without-active-lease.md](./renew-rejects-without-active-lease.md) | missing active lease returns invalid_state |
diff --git a/docs/tests/inbox/renew/renew-extends-active-lease.md b/docs/tests/inbox/renew/renew-extends-active-lease.md
deleted file mode 100644
index 8bb8bed..0000000
--- a/docs/tests/inbox/renew/renew-extends-active-lease.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Case: `renew-extends-active-lease`
-
-## 用例意义
-
-验证租约拥有者可以对活跃 lease 执行续租，并生成续租事件消息。
-
-## 前置条件
-
-- `worker-c` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json renew --agent worker-c --thread THREAD_ID --lease-seconds 600
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `thread.status` 保持原状态
-- `message.kind == "event"`
-- `message.summary == "lease renewed"`
-- `message.payload_json.lease_seconds == 600`
-- `message.payload_json.lease_token` 存在
-
-## 断言结论
-
-- `renew` 是在原线程上追加续租事件，而不是重新 claim
-
-## 补充约束
-
-- `renew` 需要 agent 身份；可以通过命令级 `--agent` 提供，也可以回退到根级 `--agent`
-- `--lease-seconds` 传入 `0` 或负数时，CLI 会按 `900` 秒默认值处理
diff --git a/docs/tests/inbox/renew/renew-rejects-non-owner.md b/docs/tests/inbox/renew/renew-rejects-non-owner.md
deleted file mode 100644
index 0ab580e..0000000
--- a/docs/tests/inbox/renew/renew-rejects-non-owner.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Case: `renew-rejects-non-owner`
-
-## 用例意义
-
-验证非租约拥有者不能续租别人的活跃 lease。
-
-## 前置条件
-
-- `worker-c` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json renew --agent worker-x --thread THREAD_ID --lease-seconds 600
-```
-
-## 预期输出
-
-- 退出码为 `20`
-- JSON 错误码为 `lease_conflict`
-
-## 断言结论
-
-- `renew` 与 `claim` 一样受 lease owner 约束
diff --git a/docs/tests/inbox/renew/renew-rejects-without-active-lease.md b/docs/tests/inbox/renew/renew-rejects-without-active-lease.md
deleted file mode 100644
index 527134d..0000000
--- a/docs/tests/inbox/renew/renew-rejects-without-active-lease.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Case: `renew-rejects-without-active-lease`
-
-## 用例意义
-
-验证线程没有活跃租约时，`renew` 会明确失败。
-
-## 前置条件
-
-- 已存在线程 `THREAD_ID`
-- 该线程当前没有活跃 lease
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json renew --agent worker-c --thread THREAD_ID --lease-seconds 600
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_state`
-
-## 断言结论
-
-- `renew` 依赖已有活跃租约
-- 没有 lease 属于状态错误，不是 not-found
diff --git a/docs/tests/inbox/reply/README.md b/docs/tests/inbox/reply/README.md
deleted file mode 100644
index 8d93726..0000000
--- a/docs/tests/inbox/reply/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Inbox `reply` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `reply-adds-answer-message` | [reply-adds-answer-message.md](./reply-adds-answer-message.md) | appends default `answer` message to an existing non-terminal thread |
-| `reply-supports-control-kind` | [reply-supports-control-kind.md](./reply-supports-control-kind.md) | supports explicit `--kind control` reply message |
-| `reply-attaches-artifact` | [reply-attaches-artifact.md](./reply-attaches-artifact.md) | appends reply message with artifact payload |
-| `reply-rejects-invalid-payload-json` | [reply-rejects-invalid-payload-json.md](./reply-rejects-invalid-payload-json.md) | rejects malformed `--payload-json` input |
diff --git a/docs/tests/inbox/reply/reply-adds-answer-message.md b/docs/tests/inbox/reply/reply-adds-answer-message.md
deleted file mode 100644
index dfa7906..0000000
--- a/docs/tests/inbox/reply/reply-adds-answer-message.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `reply-adds-answer-message`
-
-## 用例意义
-
-验证 `reply` 默认会向现有线程追加一条 `answer` 消息，并保持线程状态不变。
-
-## 前置条件
-
-- 已存在一个非终态线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-a --thread THREAD_ID --summary "Retry read timeouts" --body "Yes, include read timeouts in the retry policy."
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `message.kind == "answer"`
-- `thread.thread_id == THREAD_ID`
-- 线程状态保持原值
-
-## 断言结论
-
-- `reply` 是线程内追加消息，而不是状态转换命令
-
-## 补充约束
-
-- `--from` 未显式提供时，可以回退使用根级 `--agent`；如果两者都缺失，应返回 `invalid_input`
-- `--thread`、`--to`、`--summary` 都是必填 flag；缺失时属于 `invalid_input` 类 usage error
-- `reply` 只允许作用在既有非终态线程上；缺失线程应返回 `not_found`，终态线程应返回 `invalid_state`
-- `--body` 与 `--body-file` 互斥；不可读的 `--body-file` 应返回 `invalid_input`
-
diff --git a/docs/tests/inbox/reply/reply-attaches-artifact.md b/docs/tests/inbox/reply/reply-attaches-artifact.md
deleted file mode 100644
index 9585736..0000000
--- a/docs/tests/inbox/reply/reply-attaches-artifact.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Case: `reply-attaches-artifact`
-
-## 用例意义
-
-验证 `reply` 支持追加带附件的答复消息。
-
-## 前置条件
-
-- 已存在一个非终态线程 `THREAD_ID`
-- `TMPDIR/decision.md` 已存在
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-a --thread THREAD_ID --summary "Retry read timeouts" --artifact TMPDIR/decision.md --artifact-kind brief --artifact-metadata-json '{"label":"decision"}'
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `message.artifacts` 长度为 `1`
-- artifact 路径、kind、metadata 都可读
-
-## 断言结论
-
-- `reply` 与 `send/update/done/fail` 共享附件写入契约
-
-## 补充约束
-
-- `artifact-kind` 与 `artifact-metadata-json` 依赖至少一个 `--artifact`；数量不匹配也应返回 `invalid_input`
-
diff --git a/docs/tests/inbox/reply/reply-rejects-invalid-payload-json.md b/docs/tests/inbox/reply/reply-rejects-invalid-payload-json.md
deleted file mode 100644
index 756e98b..0000000
--- a/docs/tests/inbox/reply/reply-rejects-invalid-payload-json.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `reply-rejects-invalid-payload-json`
-
-## 用例意义
-
-验证 `reply` 对非法 `--payload-json` 输入返回稳定错误契约。
-
-## 前置条件
-
-- 已存在一个非终态线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-a --thread THREAD_ID --summary "Retry read timeouts" --payload-json not-json
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-
-## 断言结论
-
-- `reply` 的 payload 与其他消息写入命令一样需要通过 JSON 校验
-
diff --git a/docs/tests/inbox/reply/reply-supports-control-kind.md b/docs/tests/inbox/reply/reply-supports-control-kind.md
deleted file mode 100644
index 6b8c635..0000000
--- a/docs/tests/inbox/reply/reply-supports-control-kind.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `reply-supports-control-kind`
-
-## 用例意义
-
-验证 `reply --kind control` 可以发送控制类消息，而不局限于默认 `answer`。
-
-## 前置条件
-
-- 已存在一个非终态线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-a --thread THREAD_ID --kind control --summary "Pause rollout" --body "Pause rollout until QA confirms the fix."
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `message.kind == "control"`
-
-## 断言结论
-
-- `reply` 的消息种类可由调用方显式指定
-
diff --git a/docs/tests/inbox/send/README.md b/docs/tests/inbox/send/README.md
deleted file mode 100644
index e8b4af0..0000000
--- a/docs/tests/inbox/send/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Inbox `send` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `send-creates-new-thread` | [send-creates-new-thread.md](./send-creates-new-thread.md) | creates a pending thread with an initial task message |
-| `send-appends-message-to-existing-thread` | [send-appends-message-to-existing-thread.md](./send-appends-message-to-existing-thread.md) | appends a message to an existing non-terminal thread |
-| `send-reads-body-from-body-file` | [send-reads-body-from-body-file.md](./send-reads-body-from-body-file.md) | reads message body from a file path |
-| `send-attaches-artifact-with-metadata` | [send-attaches-artifact-with-metadata.md](./send-attaches-artifact-with-metadata.md) | persists artifact path, kind, and metadata on send |
-| `send-rejects-invalid-payload-json` | [send-rejects-invalid-payload-json.md](./send-rejects-invalid-payload-json.md) | rejects malformed payload JSON with `invalid_input` |
-| `send-rejects-invalid-artifact-metadata-json` | [send-rejects-invalid-artifact-metadata-json.md](./send-rejects-invalid-artifact-metadata-json.md) | rejects malformed artifact metadata JSON |
diff --git a/docs/tests/inbox/send/send-appends-message-to-existing-thread.md b/docs/tests/inbox/send/send-appends-message-to-existing-thread.md
deleted file mode 100644
index 9d62571..0000000
--- a/docs/tests/inbox/send/send-appends-message-to-existing-thread.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Case: `send-appends-message-to-existing-thread`
-
-## 用例意义
-
-验证 `send` 在指定既有 `--thread` 时会向原线程追加消息，而不是重建线程。
-
-## 前置条件
-
-- 已存在一个由 `leader` 发给 `worker-d` 的非终态线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-d --thread THREAD_ID --summary "Use a markdown editor" --body "Prefer a textarea-based markdown editor for v1."
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## 预期输出
-
-- `send` 成功，返回的 `thread.thread_id` 仍为 `THREAD_ID`
-- 线程状态保持原值，不被强制改写为新状态
-- `show` 可见消息数增加
-
-## 断言结论
-
-- 追加消息不会重置线程生命周期
-- 线程历史按时间顺序保留旧消息与新消息
-
-## 补充约束
-
-- `--to` 是 CLI 必填参数；即使是向既有线程追加消息也不能省略
-- 对既有线程执行追加时，如果传入了不同的 `--to`，线程的 `assigned_to` 会更新为新的接收者
-- 终态线程不允许继续通过 `send` 追加消息，预期错误类型为 `invalid_state`
diff --git a/docs/tests/inbox/send/send-attaches-artifact-with-metadata.md b/docs/tests/inbox/send/send-attaches-artifact-with-metadata.md
deleted file mode 100644
index 22814f3..0000000
--- a/docs/tests/inbox/send/send-attaches-artifact-with-metadata.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Case: `send-attaches-artifact-with-metadata`
-
-## 用例意义
-
-验证 `send` 支持附带 artifact、kind 和 metadata，并可在返回值或后续 `show` 中读取。
-
-## 前置条件
-
-- `TMPDIR/task.md` 已存在
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-d --subject "Build admin editor" --summary "Create the first editor screen" --artifact TMPDIR/task.md --artifact-kind brief --artifact-metadata-json '{"label":"task-brief"}'
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `message.artifacts` 长度为 `1`
-- artifact `path == "TMPDIR/task.md"`
-- artifact `kind == "brief"`
-- artifact `metadata_json.label == "task-brief"`
-
-## 断言结论
-
-- `send` 可以在创建消息时持久化附件及其结构化元数据
diff --git a/docs/tests/inbox/send/send-creates-new-thread.md b/docs/tests/inbox/send/send-creates-new-thread.md
deleted file mode 100644
index d1985a9..0000000
--- a/docs/tests/inbox/send/send-creates-new-thread.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Case: `send-creates-new-thread`
-
-## 用例意义
-
-验证 `send` 在未指定既有线程时会创建新线程，并写入首条任务消息。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-a --subject "Implement feature X" --summary "Add retry policy" --body "Implement retry handling for the HTTP client." --run run_blog_001 --task T1
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 返回 `thread.thread_id`
-- `thread.status == "pending"`
-- `thread.created_by == "leader"`
-- `thread.assigned_to == "worker-a"`
-- `message.kind == "task"`
-
-## 断言结论
-
-- `send` 会新建线程而不是只插入孤立消息
-- 新线程的默认初始状态是 `pending`
-
-## 补充约束
-
-- `--from` 未显式传入时，会回退使用根级 `--agent`
-- 新建线程时未显式传 `--summary`，会回退到 `--subject`
-- 新建线程时 `--kind` 默认是 `task`，`--priority` 默认是 `normal`
-- 当 `--thread` 指向不存在的线程时，`send` 会使用该 thread ID 新建线程，而不是返回 `not_found`
diff --git a/docs/tests/inbox/send/send-reads-body-from-body-file.md b/docs/tests/inbox/send/send-reads-body-from-body-file.md
deleted file mode 100644
index 2b977cd..0000000
--- a/docs/tests/inbox/send/send-reads-body-from-body-file.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Case: `send-reads-body-from-body-file`
-
-## 用例意义
-
-验证 `send --body-file` 会把文件内容写入消息正文。
-
-## 前置条件
-
-- `TMPDIR/task.md` 已存在，内容为测试正文
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-d --subject "Build admin editor" --summary "Create the first editor screen" --body-file TMPDIR/task.md
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## 预期输出
-
-- `send` 成功
-- `show` 首条消息的 `body` 与文件内容一致
-
-## 断言结论
-
-- `body-file` 内容会被原样读取
-- 该行为与直接传 `--body` 的最终存储结果等价
-
-## 补充约束
-
-- `--body` 与 `--body-file` 互斥；该约束由 shared 文档统一说明
diff --git a/docs/tests/inbox/send/send-rejects-invalid-artifact-metadata-json.md b/docs/tests/inbox/send/send-rejects-invalid-artifact-metadata-json.md
deleted file mode 100644
index 598fadc..0000000
--- a/docs/tests/inbox/send/send-rejects-invalid-artifact-metadata-json.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Case: `send-rejects-invalid-artifact-metadata-json`
-
-## 用例意义
-
-验证 `send` 对非法 artifact metadata JSON 给出稳定错误契约。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-z --subject "Invalid artifact json" --artifact TMPDIR/report.md --artifact-metadata-json not-json
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-
-## 断言结论
-
-- artifact metadata 会在写入前校验 JSON 合法性
diff --git a/docs/tests/inbox/send/send-rejects-invalid-payload-json.md b/docs/tests/inbox/send/send-rejects-invalid-payload-json.md
deleted file mode 100644
index 6d7d615..0000000
--- a/docs/tests/inbox/send/send-rejects-invalid-payload-json.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `send-rejects-invalid-payload-json`
-
-## 用例意义
-
-验证 `send` 对非法 `--payload-json` 输入给出稳定错误契约。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-z --subject "Invalid payload json" --payload-json not-json
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-
-## 断言结论
-
-- 非法 payload 在写库前就会被拒绝
-- 错误归类为输入问题，而不是内部错误
diff --git a/docs/tests/inbox/show/README.md b/docs/tests/inbox/show/README.md
deleted file mode 100644
index 2af13ae..0000000
--- a/docs/tests/inbox/show/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Inbox `show` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `show-returns-thread-and-message-history` | [show-returns-thread-and-message-history.md](./show-returns-thread-and-message-history.md) | returns thread details and full time-ordered message history |
-| `show-includes-artifacts-per-message` | [show-includes-artifacts-per-message.md](./show-includes-artifacts-per-message.md) | expands per-message artifacts in the show payload |
-| `show-mark-read-advances-read-cursor` | [show-mark-read-advances-read-cursor.md](./show-mark-read-advances-read-cursor.md) | advances caller read cursor when `--mark-read` is used |
-| `show-rejects-when-thread-missing` | [show-rejects-when-thread-missing.md](./show-rejects-when-thread-missing.md) | returns stable not-found contract for missing thread |
diff --git a/docs/tests/inbox/show/show-includes-artifacts-per-message.md b/docs/tests/inbox/show/show-includes-artifacts-per-message.md
deleted file mode 100644
index d86f890..0000000
--- a/docs/tests/inbox/show/show-includes-artifacts-per-message.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# case: show-includes-artifacts-per-message
-
-### 用例意义
-
-验证 `show` 返回的每条消息都包含其关联 artifact 列表。
-
-### 前置条件
-
-- 线程 `THREAD_ID` 中至少一条消息附带 artifact
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-### 预期输出
-
-- 命令退出码为 `0`
-- 相关消息节点包含 `artifacts`
-- artifact 的 `path`、`kind`、`metadata_json` 可读
-
-### 断言结论
-
-- `show` 需要把附件一并展开，而不是只返回 message 基本字段
-
diff --git a/docs/tests/inbox/show/show-mark-read-advances-read-cursor.md b/docs/tests/inbox/show/show-mark-read-advances-read-cursor.md
deleted file mode 100644
index a20fbb3..0000000
--- a/docs/tests/inbox/show/show-mark-read-advances-read-cursor.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# case: show-mark-read-advances-read-cursor
-
-### 用例意义
-
-验证 `show --mark-read` 会推进调用 agent 的 read cursor，并影响后续 unread 查询。
-
-### 前置条件
-
-- `worker-e` 有一个未读线程 `THREAD_ID`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --agent worker-e --json show --thread THREAD_ID --mark-read
-inbox --db TMPDIR/coord.db --json fetch --agent worker-e --status pending --unread
-```
-
-### 预期输出
-
-- `show` 成功
-- 随后的 `fetch --unread` 对该线程不再返回结果
-
-### 断言结论
-
-- `mark-read` 的副作用是推进该 agent 的 `last_read_message_id`
-- 使用 `--mark-read` 时必须提供 agent 身份，可通过根级 `--agent` 或命令参数传入
-
diff --git a/docs/tests/inbox/show/show-rejects-when-thread-missing.md b/docs/tests/inbox/show/show-rejects-when-thread-missing.md
deleted file mode 100644
index 98f554c..0000000
--- a/docs/tests/inbox/show/show-rejects-when-thread-missing.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# case: show-rejects-when-thread-missing
-
-### 用例意义
-
-验证 `show` 对不存在线程返回稳定的 not-found 错误契约。
-
-### 前置条件
-
-- 空数据库已完成 `init`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json show --thread thr_missing
-```
-
-### 预期输出
-
-- 退出码为 `40`
-- JSON 错误码为 `not_found`
-
-### 断言结论
-
-- `show` 不会对缺失线程返回空对象
-- `--thread` 是必填 flag；缺失时属于 `invalid_input` 类 usage error
-
diff --git a/docs/tests/inbox/show/show-returns-thread-and-message-history.md b/docs/tests/inbox/show/show-returns-thread-and-message-history.md
deleted file mode 100644
index 4d962ed..0000000
--- a/docs/tests/inbox/show/show-returns-thread-and-message-history.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# case: show-returns-thread-and-message-history
-
-### 用例意义
-
-验证 `show` 会返回线程详情和完整消息历史。
-
-### 前置条件
-
-- 已存在一个含多条消息的线程 `THREAD_ID`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-### 预期输出
-
-- 命令退出码为 `0`
-- 返回 `data.thread`
-- 返回 `data.messages`
-- 消息按创建时间升序排列
-
-### 断言结论
-
-- `show` 是线程详情与时间序历史的读取入口
-- `show` 不依赖线程是否处于活动态；只要线程存在，就应能读取包括终态线程在内的完整历史
-- 未使用 `--mark-read` 时，`show` 不要求提供 agent 身份
-
diff --git a/docs/tests/inbox/update/README.md b/docs/tests/inbox/update/README.md
deleted file mode 100644
index 38f34fd..0000000
--- a/docs/tests/inbox/update/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# Inbox `update` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `update-moves-thread-to-in-progress` | [update-moves-thread-to-in-progress.md](./update-moves-thread-to-in-progress.md) | moves a claimed thread to `in_progress` and emits a progress message |
-| `update-moves-thread-to-blocked-with-payload` | [update-moves-thread-to-blocked-with-payload.md](./update-moves-thread-to-blocked-with-payload.md) | moves a claimed thread to `blocked` with structured question payload |
-| `update-accepts-body-file-and-artifact` | [update-accepts-body-file-and-artifact.md](./update-accepts-body-file-and-artifact.md) | persists update body from file plus artifacts |
-| `update-rejects-invalid-payload-json` | [update-rejects-invalid-payload-json.md](./update-rejects-invalid-payload-json.md) | rejects malformed `--payload-json` input |
-| `update-rejects-non-owner` | [update-rejects-non-owner.md](./update-rejects-non-owner.md) | rejects update when caller is not the active lease owner |
diff --git a/docs/tests/inbox/update/update-accepts-body-file-and-artifact.md b/docs/tests/inbox/update/update-accepts-body-file-and-artifact.md
deleted file mode 100644
index 0a4745a..0000000
--- a/docs/tests/inbox/update/update-accepts-body-file-and-artifact.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Case: `update-accepts-body-file-and-artifact`
-
-## 用例意义
-
-验证 `update` 支持通过 `body-file` 与 artifact 发送结构化进度材料。
-
-## 前置条件
-
-- `worker-a` 已成功 `claim` 线程 `THREAD_ID`
-- `TMPDIR/progress.md` 已存在
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status in_progress --summary "Implementation started" --body-file TMPDIR/progress.md --artifact TMPDIR/progress.md --artifact-kind note
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## 预期输出
-
-- `update` 成功
-- 对应消息 `body` 等于文件内容
-- 对应消息包含 1 个 artifact，kind 为 `note`
-
-## 断言结论
-
-- `update` 的正文与 artifact 支持与 `send/reply/done/fail` 保持一致
-
-## 补充约束
-
-- `--body` 与 `--body-file` 互斥；读取 `body-file` 失败时应返回 `invalid_input`
-- `artifact-kind` 与 `artifact-metadata-json` 不能脱离 `--artifact` 单独使用；数量不匹配时也应返回 `invalid_input`
-
diff --git a/docs/tests/inbox/update/update-moves-thread-to-blocked-with-payload.md b/docs/tests/inbox/update/update-moves-thread-to-blocked-with-payload.md
deleted file mode 100644
index 66b9e45..0000000
--- a/docs/tests/inbox/update/update-moves-thread-to-blocked-with-payload.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Case: `update-moves-thread-to-blocked-with-payload`
-
-## 用例意义
-
-验证 `update --status blocked` 会写入阻塞问题消息，并保留结构化 payload。
-
-## 前置条件
-
-- `worker-a` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status blocked --summary "Need timeout decision" --payload-json '{"question":"Should retries apply to read timeouts?"}'
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `thread.status == "blocked"`
-- `message.kind == "question"`
-- `message.payload_json.question` 保存提问内容
-
-## 断言结论
-
-- `blocked` 更新会生成面向创建者的问题消息
-
diff --git a/docs/tests/inbox/update/update-moves-thread-to-in-progress.md b/docs/tests/inbox/update/update-moves-thread-to-in-progress.md
deleted file mode 100644
index 062c3cb..0000000
--- a/docs/tests/inbox/update/update-moves-thread-to-in-progress.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `update-moves-thread-to-in-progress`
-
-## 用例意义
-
-验证租约拥有者可以把线程推进到 `in_progress`，并生成进度消息。
-
-## 前置条件
-
-- `worker-a` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status in_progress --summary "Implementation started" --body "Scanning current HTTP client usage."
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `thread.status == "in_progress"`
-- `message.kind == "progress"`
-- `message.to_agent` 指向线程创建者
-
-## 断言结论
-
-- `update` 会把状态推进和消息追加合并为同一次事务
-
-## 补充约束
-
-- `update` 只接受 `in_progress` 和 `blocked` 两种 `--status`；其他值应返回退出码 `30` 与错误码 `invalid_input`
-- `update` 依赖活跃 lease：
-- 若线程存在活跃 lease 但归属其他 agent，应返回 `lease_conflict`
-- 若线程当前没有活跃 lease，应返回 `invalid_state`
-
diff --git a/docs/tests/inbox/update/update-rejects-invalid-payload-json.md b/docs/tests/inbox/update/update-rejects-invalid-payload-json.md
deleted file mode 100644
index e820d8b..0000000
--- a/docs/tests/inbox/update/update-rejects-invalid-payload-json.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `update-rejects-invalid-payload-json`
-
-## 用例意义
-
-验证 `update` 对非法 `--payload-json` 输入返回稳定错误契约。
-
-## 前置条件
-
-- `worker-a` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status blocked --summary "Need timeout decision" --payload-json not-json
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-
-## 断言结论
-
-- 阻塞问题的 payload 需要满足合法 JSON 约束
-
diff --git a/docs/tests/inbox/update/update-rejects-non-owner.md b/docs/tests/inbox/update/update-rejects-non-owner.md
deleted file mode 100644
index 616590b..0000000
--- a/docs/tests/inbox/update/update-rejects-non-owner.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `update-rejects-non-owner`
-
-## 用例意义
-
-验证非租约拥有者不能更新线程状态。
-
-## 前置条件
-
-- `worker-a` 已成功 `claim` 线程 `THREAD_ID`
-
-## 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json update --agent worker-b --thread THREAD_ID --status in_progress --summary "Implementation started"
-```
-
-## 预期输出
-
-- 退出码为 `20`
-- JSON 错误码为 `lease_conflict`
-
-## 断言结论
-
-- `update` 明确依赖活跃 lease 所属者
-
diff --git a/docs/tests/inbox/wait-reply/README.md b/docs/tests/inbox/wait-reply/README.md
deleted file mode 100644
index a483f30..0000000
--- a/docs/tests/inbox/wait-reply/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Inbox `wait-reply` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `wait-reply-wakes-on-answer-after-message` | [wait-reply-wakes-on-answer-after-message.md](./wait-reply-wakes-on-answer-after-message.md) | wakes for a qualifying reply after known message boundary |
-| `wait-reply-can-start-from-after-event` | [wait-reply-can-start-from-after-event.md](./wait-reply-can-start-from-after-event.md) | resumes waiting from a known event cursor |
-| `wait-reply-times-out-when-no-reply` | [wait-reply-times-out-when-no-reply.md](./wait-reply-times-out-when-no-reply.md) | returns timeout contract when no qualifying reply arrives |
diff --git a/docs/tests/inbox/wait-reply/wait-reply-can-start-from-after-event.md b/docs/tests/inbox/wait-reply/wait-reply-can-start-from-after-event.md
deleted file mode 100644
index 72f982d..0000000
--- a/docs/tests/inbox/wait-reply/wait-reply-can-start-from-after-event.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# case: wait-reply-can-start-from-after-event
-
-### 用例意义
-
-验证 `wait-reply --after-event` 支持从既知事件游标之后恢复等待。
-
-### 前置条件
-
-- 已通过先前的 `watch` 或 `wait-reply` 结果拿到某个 `NEXT_EVENT_ID`
-- 线程 `THREAD_ID` 后续还会收到新的回复类消息
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --agent worker-c --json wait-reply --thread THREAD_ID --after-event NEXT_EVENT_ID --timeout-seconds 2
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-c --thread THREAD_ID --summary "Redirect to login" --body "Redirect guests to login for the MVP."
-```
-
-### 预期输出
-
-- `wait-reply` 在事件游标之后的新回复出现时被唤醒
-- 返回新的 `next_event_id`
-
-### 断言结论
-
-- `after-event` 允许等待逻辑在断点之后继续，而不会重复消费旧回复
-- `--kinds` 支持自定义逗号分隔的唤醒消息类型；未显式提供时默认使用 `answer,control,result`
-- 默认唤醒 kinds 为 `answer,control,result`
-
diff --git a/docs/tests/inbox/wait-reply/wait-reply-times-out-when-no-reply.md b/docs/tests/inbox/wait-reply/wait-reply-times-out-when-no-reply.md
deleted file mode 100644
index 178e6f2..0000000
--- a/docs/tests/inbox/wait-reply/wait-reply-times-out-when-no-reply.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# case: wait-reply-times-out-when-no-reply
-
-### 用例意义
-
-验证在超时时间内没有匹配回复出现时，`wait-reply` 返回稳定超时契约。
-
-### 前置条件
-
-- 存在一个线程 `THREAD_ID`
-- 不会有新的 `answer/control/result` 消息到达
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --agent worker-c --json wait-reply --thread THREAD_ID --timeout-seconds 1
-```
-
-### 预期输出
-
-- 退出码为 `10`
-- JSON 错误码为 `no_matching_work`
-
-### 断言结论
-
-- `wait-reply` 超时被视为“没有等到匹配回复”
-- `--thread` 是必填 flag；缺失时属于 `invalid_input` 类 usage error
-- `--timeout-seconds=0` 表示无限等待，而不是立即超时
-
diff --git a/docs/tests/inbox/wait-reply/wait-reply-wakes-on-answer-after-message.md b/docs/tests/inbox/wait-reply/wait-reply-wakes-on-answer-after-message.md
deleted file mode 100644
index 9a1a638..0000000
--- a/docs/tests/inbox/wait-reply/wait-reply-wakes-on-answer-after-message.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# case: wait-reply-wakes-on-answer-after-message
-
-### 用例意义
-
-验证 `wait-reply` 可以从某条已知消息之后开始等待，并在答复到达后唤醒。
-
-### 前置条件
-
-- `worker-c` 已拥有一个 `blocked` 线程 `THREAD_ID`
-- 阻塞消息的 `message_id` 为 `BLOCKED_MESSAGE_ID`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --agent worker-c --json wait-reply --thread THREAD_ID --after-message BLOCKED_MESSAGE_ID --timeout-seconds 2
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-c --thread THREAD_ID --summary "Redirect to login" --body "Redirect guests to login for the MVP."
-```
-
-### 预期输出
-
-- `wait-reply` 退出码为 `0`
-- `wait-reply.data.woke == true`
-- 返回的 `message.kind == "answer"`
-
-### 断言结论
-
-- `wait-reply` 可以可靠地从既知消息边界之后等待后续答复
-- `--agent` 不是必填；它主要用于在命中外来消息时推进该 agent 的 read cursor
-- `--after-message` 必须引用该线程中已知的消息；如果消息不存在，应返回 `not_found`
-- 当返回消息是发给等待 agent 的外来消息时，`wait-reply` 会顺带推进该 agent 的 read cursor
-
diff --git a/docs/tests/inbox/watch/README.md b/docs/tests/inbox/watch/README.md
deleted file mode 100644
index f850bfa..0000000
--- a/docs/tests/inbox/watch/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Inbox `watch` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `watch-wakes-on-matching-thread` | [watch-wakes-on-matching-thread.md](./watch-wakes-on-matching-thread.md) | wakes when a matching post-start event arrives and returns event context |
-| `watch-respects-status-filter` | [watch-respects-status-filter.md](./watch-respects-status-filter.md) | wakes only when thread transitions into requested status |
-| `watch-times-out-with-no-activity` | [watch-times-out-with-no-activity.md](./watch-times-out-with-no-activity.md) | returns timeout contract when no matching activity arrives |
diff --git a/docs/tests/inbox/watch/watch-respects-status-filter.md b/docs/tests/inbox/watch/watch-respects-status-filter.md
deleted file mode 100644
index ba2d5ab..0000000
--- a/docs/tests/inbox/watch/watch-respects-status-filter.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# case: watch-respects-status-filter
-
-### 用例意义
-
-验证 `watch --status` 只会对匹配状态的后续事件唤醒。
-
-### 前置条件
-
-- 存在一个会被推进到 `blocked` 的线程 `THREAD_ID`
-- `watch` 以 `--status blocked` 先启动
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json watch --agent worker-c --status blocked --timeout-seconds 2
-inbox --db TMPDIR/coord.db --json update --agent worker-c --thread THREAD_ID --status blocked --summary "Need policy decision"
-```
-
-### 预期输出
-
-- `watch` 只在线程进入 `blocked` 后返回
-- 返回的 `thread.status == "blocked"`
-
-### 断言结论
-
-- `watch` 的状态过滤作用在“事件发生后的线程状态”上
-- `--status` 默认值为 `pending,blocked,done,failed`；未显式传入时，`watch` 不是只观察 `pending`
-- 显式传入 `--after-event` 时，`watch` 会从该事件游标之后恢复，允许调用方断点续看
-
diff --git a/docs/tests/inbox/watch/watch-times-out-with-no-activity.md b/docs/tests/inbox/watch/watch-times-out-with-no-activity.md
deleted file mode 100644
index 368d64e..0000000
--- a/docs/tests/inbox/watch/watch-times-out-with-no-activity.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# case: watch-times-out-with-no-activity
-
-### 用例意义
-
-验证在超时时间内没有匹配活动时，`watch` 返回稳定超时契约。
-
-### 前置条件
-
-- 没有新匹配事件会发生
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json watch --agent worker-d --status pending --timeout-seconds 1
-```
-
-### 预期输出
-
-- 退出码为 `10`
-- JSON 错误码为 `no_matching_work`
-
-### 断言结论
-
-- `watch` 超时被归类为“无匹配工作”，而不是内部错误
-- `--timeout-seconds 0` 表示无限等待，而不是立即超时
-- 未传 `--after-event` 时，`watch` 默认从“当前时刻之后”开始等待，不会回放既有事件
-
diff --git a/docs/tests/inbox/watch/watch-wakes-on-matching-thread.md b/docs/tests/inbox/watch/watch-wakes-on-matching-thread.md
deleted file mode 100644
index 04bd0b1..0000000
--- a/docs/tests/inbox/watch/watch-wakes-on-matching-thread.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# case: watch-wakes-on-matching-thread
-
-### 用例意义
-
-验证 `watch` 在新匹配线程到达时会被唤醒，并返回线程、消息与事件信息。
-
-### 前置条件
-
-- `worker-d` 当前没有匹配 `pending` 线程
-- `watch` 先于 `send` 启动
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json watch --agent worker-d --status pending --timeout-seconds 2
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-d --subject "Build admin editor" --summary "Create the first editor screen"
-```
-
-### 预期输出
-
-- `watch` 退出码为 `0`
-- `watch.data.woke == true`
-- 返回 `thread`、`message`、`event`
-
-### 断言结论
-
-- `watch` 唤醒结果不仅说明“醒了”，还提供触发该唤醒的具体事件上下文
-- `--agent` 未显式提供时，可以回退使用根级 `--agent`；如果两者都未提供，则 `watch` 变为不按 `assigned_to` 过滤的全局观察
-- 成功唤醒时返回的 `next_event_id` 应等于触发唤醒的 `event.event_id`
-
diff --git a/docs/tests/inbox/workflows/README.md b/docs/tests/inbox/workflows/README.md
deleted file mode 100644
index 11faddd..0000000
--- a/docs/tests/inbox/workflows/README.md
+++ /dev/null
@@ -1,276 +0,0 @@
-# Inbox Workflow Test Plan
-
-## Scope
-
-This document tracks cross-command scenarios where the main value is the interaction between multiple `inbox` subcommands.
-
-All examples assume:
-
-- isolated temp database
-- `inbox --db TMPDIR/coord.db --json init` already executed
-- assertions follow the shared rules in [../_shared/README.md](../_shared/README.md)
-
-## case: thread-lifecycle-happy-path
-
-### 用例意义
-
-验证 `send -> fetch -> claim -> update(in_progress) -> update(blocked) -> reply -> done -> show` 的主干链路可用，且线程与消息历史一致。
-
-### 前置条件
-
-- 空数据库已完成 `init`
-- 发送方为 `leader`
-- 执行方为 `worker-a`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-a --subject "Implement feature X" --summary "Add retry policy" --body "Implement retry handling for the HTTP client." --run run_blog_001 --task T1
-inbox --db TMPDIR/coord.db --json fetch --agent worker-a --status pending
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID --lease-seconds 300
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status in_progress --summary "Implementation started" --body "Scanning current HTTP client usage."
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status blocked --summary "Need timeout decision" --payload-json '{"question":"Should retries apply to read timeouts?"}'
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-a --thread THREAD_ID --summary "Retry read timeouts" --body "Yes, include read timeouts in the retry policy."
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Retry policy implemented" --body "The HTTP client now retries the selected transient failures."
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-### 预期输出
-
-- `send` 返回新建线程，线程状态为 `pending`
-- `fetch` 返回唯一匹配线程
-- `claim` 后线程状态为 `claimed`
-- 第一次 `update` 后线程状态为 `in_progress`
-- 第二次 `update` 后线程状态为 `blocked`
-- `reply` 返回一条 `kind=answer` 的消息
-- `done` 后线程状态为 `done`
-- `show` 返回线程状态 `done`，并包含完整消息历史
-
-### 断言结论
-
-- 全链路所有命令退出码为 `0`
-- `show.data.thread.status == "done"`
-- `show.data.messages` 长度为 `6`
-- 历史中的状态推进顺序与执行顺序一致，不出现丢消息或状态回退
-
-## case: blocked-question-reply-resume-to-done
-
-### 用例意义
-
-验证被阻塞线程在收到答复后可以继续推进，并最终进入完成态。
-
-### 前置条件
-
-- 已存在由 `leader` 发给 `worker-c` 的线程
-- `worker-c` 已经成功 `claim`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json update --agent worker-c --thread THREAD_ID --status blocked --summary "Need policy decision" --body "Should guest users be redirected to login or shown a 403 page?"
-inbox --db TMPDIR/coord.db --agent worker-c --json wait-reply --thread THREAD_ID --after-message BLOCKED_MESSAGE_ID --timeout-seconds 2
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-c --thread THREAD_ID --summary "Redirect to login" --body "Redirect guests to login for the MVP."
-inbox --db TMPDIR/coord.db --json done --agent worker-c --thread THREAD_ID --summary "Policy applied" --body "The flow now redirects guests to login."
-```
-
-### 预期输出
-
-- `update` 将线程推进到 `blocked`
-- `wait-reply` 在答复出现后唤醒
-- 唤醒结果包含答复消息
-- `done` 成功将线程推进到 `done`
-
-### 断言结论
-
-- `wait-reply.data.woke == true`
-- `wait-reply.data.message.kind == "answer"`
-- 最终 `done.data.thread.status == "done"`
-- 该用例强调“阻塞后可恢复”，不是单纯验证 reply 本身
-
-## case: fail-lifecycle-from-claim-to-terminal
-
-### 用例意义
-
-验证线程在被领取后可以直接进入失败终态，并且 `show` 对终态读取一致。
-
-### 前置条件
-
-- 空数据库已完成 `init`
-- `leader` 已向 `worker-b` 发送任务
-- `worker-b` 已 `claim` 该线程
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fail --agent worker-b --thread THREAD_ID --summary "Migration failed" --body "The migration cannot proceed because the prior schema is inconsistent."
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-### 预期输出
-
-- `fail` 返回线程状态 `failed`
-- `show` 返回相同终态
-
-### 断言结论
-
-- `fail.data.thread.status == "failed"`
-- `show.data.thread.status == "failed"`
-- 失败消息保留在线程历史中，可被后续排障读取
-
-## case: cancel-lifecycle-after-worker-claim
-
-### 用例意义
-
-验证线程在执行者已领取后，发起方仍可以取消任务，并进入 `cancelled` 终态。
-
-### 前置条件
-
-- `leader` 已向 `worker-c` 发送任务
-- `worker-c` 已成功 `claim`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json cancel --agent leader --thread THREAD_ID --reason "Task superseded by a larger refactor"
-```
-
-### 预期输出
-
-- `cancel` 成功
-- 返回线程状态 `cancelled`
-- 返回的消息记录取消原因
-
-### 断言结论
-
-- `cancel.data.thread.status == "cancelled"`
-- 取消属于终态转换，不要求执行者先主动释放 lease
-- 原因字段可被后续 `show` 或审计场景消费
-
-## case: watch-wakes-then-fetch-sees-new-thread
-
-### 用例意义
-
-验证 `watch` 的等待语义与 `fetch --unread` 的可见性一致，确保新线程到达时执行者既会被唤醒，也能随后拉到未读任务。
-
-### 前置条件
-
-- `worker-d` 尚无匹配 `pending` 线程
-- `watch` 先于 `send` 启动
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json watch --agent worker-d --status pending --timeout-seconds 2
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-d --subject "Build admin editor" --summary "Create the first editor screen" --body-file TMPDIR/task.md --artifact TMPDIR/task.md --artifact-kind brief --artifact-metadata-json '{"label":"task-brief"}' --run run_blog_004 --task T4
-inbox --db TMPDIR/coord.db --json fetch --agent worker-d --status pending --unread
-```
-
-### 预期输出
-
-- `watch` 因新线程到达而唤醒
-- 唤醒结果中的 `thread_id` 与 `send` 返回值一致
-- 随后 `fetch --unread` 仍能看到该 `pending` 线程
-
-### 断言结论
-
-- `watch.data.woke == true`
-- `watch.data.thread.thread_id == send.data.thread.thread_id`
-- `fetch.data.threads` 长度为 `1`
-- `watch` 唤醒不应提前消费掉线程的未读可见性
-
-## case: artifact-visible-through-send-and-show
-
-### 用例意义
-
-验证 `send` 写入的 body-file 与 artifact 信息能被后续 `show` 完整读回。
-
-### 前置条件
-
-- `TMPDIR/task.md` 已存在，内容为测试任务正文
-- 空数据库已完成 `init`
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-d --subject "Build admin editor" --summary "Create the first editor screen" --body-file TMPDIR/task.md --artifact TMPDIR/task.md --artifact-kind brief --artifact-metadata-json '{"label":"task-brief"}' --run run_blog_004 --task T4
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-### 预期输出
-
-- `send` 成功创建线程并附带一条 artifact
-- `show` 的首条消息包含从文件读取的正文与 artifact 列表
-
-### 断言结论
-
-- 首条消息 `body` 等于 `TMPDIR/task.md` 的文件内容
-- 首条消息 `artifacts` 长度为 `1`
-- 首个 artifact 的 `path` 等于 `TMPDIR/task.md`
-- 首个 artifact 的 `kind` 等于 `brief`
-
-## case: unread-clears-after-mark-read-and-reappears-on-new-message
-
-### 用例意义
-
-验证 read cursor 的最关键用户感知行为：未读任务可被显式清空，并会在同线程新消息到达后重新出现。
-
-### 前置条件
-
-- `leader` 已向 `worker-e` 发送一个 `pending` 线程
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --json fetch --agent worker-e --status pending --unread
-inbox --db TMPDIR/coord.db --agent worker-e --json show --thread THREAD_ID --mark-read
-inbox --db TMPDIR/coord.db --json fetch --agent worker-e --status pending --unread
-inbox --db TMPDIR/coord.db --json send --from leader --to worker-e --thread THREAD_ID --summary "Use sentence case" --body "Keep the nav labels in sentence case."
-inbox --db TMPDIR/coord.db --json fetch --agent worker-e --status pending --unread
-```
-
-### 预期输出
-
-- 第一次 `fetch --unread` 返回该线程
-- `show --mark-read` 成功推进 `worker-e` 的 read cursor
-- 第二次 `fetch --unread` 无匹配结果
-- 新消息追加后，第三次 `fetch --unread` 再次返回该线程
-
-### 断言结论
-
-- 第一次 `fetch` 返回 1 条线程
-- 第二次 `fetch` 退出码为 `10`，错误码为 `no_matching_work`
-- 追加消息后第三次 `fetch` 再次返回 1 条线程
-- 未读状态是按 agent 视角计算，而不是线程级布尔值
-
-## case: wait-reply-clears-blocked-unread-for-agent
-
-### 用例意义
-
-验证等待答复的消费者在收到答复后，其阻塞线程未读状态会被消费，避免“已经处理过回复但列表仍显示未读”的错觉。
-
-### 前置条件
-
-- `worker-c` 已拥有一个 `blocked` 线程
-- 该线程阻塞消息对应的 `message_id` 已知
-- `worker-c` 使用 `wait-reply` 等待答复
-
-### 输入
-
-```bash
-inbox --db TMPDIR/coord.db --agent worker-c --json wait-reply --thread THREAD_ID --after-message BLOCKED_MESSAGE_ID --timeout-seconds 2
-inbox --db TMPDIR/coord.db --json reply --from leader --to worker-c --thread THREAD_ID --summary "Redirect to login" --body "Redirect guests to login for the MVP."
-inbox --db TMPDIR/coord.db --agent worker-c --json fetch --status blocked --unread
-```
-
-### 预期输出
-
-- `wait-reply` 在答复后唤醒
-- 唤醒结果携带 `answer` 消息
-- 随后的 `fetch --status blocked --unread` 不再返回该线程
-
-### 断言结论
-
-- `wait-reply.data.woke == true`
-- `wait-reply.data.message.kind == "answer"`
-- 后续 `fetch` 退出码为 `10`
-- 对等待中的 agent 来说，答复消费与未读清理是同一条用户契约链路
diff --git a/docs/tests/orch-skill/README.md b/docs/tests/orch-skill/README.md
deleted file mode 100644
index 805b33d..0000000
--- a/docs/tests/orch-skill/README.md
+++ /dev/null
@@ -1,210 +0,0 @@
-# Orch Skill Test Plan
-
-## Purpose
-
-This directory tracks human-readable test plans for the `skills/orch/` Codex skill bundle.
-
-These documents are not command-contract specs for the `orch` CLI itself.
-That coverage already lives under [../orch/](../orch/).
-
-This directory exists to describe a different test surface:
-
-- whether a leader agent can actually use the packaged `orch` skill
-- whether the bundled `./assets/orch` CLI works inside real skill-guided conversations
-- whether leader-side orchestration driven by the skill reaches the expected run, task, thread, and worktree state
-
-## Test Model
-
-- `README.md` is the index for this directory
-- each skill test case lives in its own Markdown file
-- use stable case slugs in filenames
-
-## Shared Execution Contract
-
-Use these defaults unless a case file explicitly overrides them:
-
-- run the scenario with real subagents, not simulated transcripts
-- inject `skills/orch/` into the leader agent
-- inject `skills/inbox/` into worker agents whenever worker-side thread progress is required
-- initialize the shared SQLite DB before launching role agents with `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-- require the leader to coordinate through the bundled `./assets/orch` CLI from the skill instead of ordinary chat
-- require workers to coordinate through the bundled `./assets/inbox` CLI from their skill instead of ordinary chat
-- launch-bridge cases may use a leader-only topology where the leader spawns worker subagents after dispatch instead of relying on the test-runner to launch separate worker roles
-- validate final run and thread state independently from the main thread after the agents stop
-- create any required Git repo fixture before launching agents for worktree cases
-
-## How An Agent Runs These Cases
-
-Use one test-runner agent to execute each case.
-
-The test-runner agent is responsible for:
-
-- reading this `README.md` first, then one specific case file
-- creating an isolated temporary directory and DB path for that run
-- initializing the DB once through the bundled inbox CLI before launching role agents
-- creating any required temporary Git repo fixture before launching role agents
-- launching the role agents described in `Agent Topology`
-- injecting `skills/orch/` into the leader and `skills/inbox/` into workers
-- passing each role agent the prompt text from the case file with concrete values substituted for `ORCH_SKILL_PATH`, `INBOX_SKILL_PATH`, `TMPDIR`, `RUN_ID`, `THREAD_ID`, and `WORKTREE_PATH` when needed
-- coordinating launch order or parallel start according to the case file
-- collecting agent final summaries as evidence
-- resolving final run ids, thread ids, and worktree paths from agent outputs
-- running the `Validation Commands` from the main thread after the role agents stop
-- comparing the observed results against `Expected Outcomes` and `Assertions`
-- returning a final pass/fail judgment with concrete evidence
-
-The role agents are responsible for:
-
-- acting only within the role assigned in the case file
-- using the injected skill bundle rather than ad hoc repository discovery
-- coordinating through the bundled CLI and shared DB
-- reporting concrete run ids, thread ids, worktree paths, and key command outcomes back to the test-runner agent
-
-For launch-bridge cases:
-
-- the leader may be the only top-level role agent
-- that leader is responsible for spawning any worker subagents itself after `dispatch`
-- spawned worker subagents should use the generated worker brief plus `skills/inbox/`, not ordinary chat
-
-The test-runner agent should treat a case as passed only when:
-
-- all role agents reach a final state without violating the case contract
-- the independent validation commands succeed
-- the final orch and inbox state matches the assertions in the case file
-
-The test-runner agent should treat a case as failed when:
-
-- any required agent times out or stalls
-- a required orch or inbox action is skipped
-- the leader falls back to ordinary chat for orchestration decisions that should go through `orch`
-- workers fall back to ordinary chat for progress that should go through `inbox`
-- the final run, task, thread, or worktree state conflicts with the documented assertions
-
-The test-runner agent should report results in this shape:
-
-- `case`
-- `db_path`
-- `run_id`
-- `thread_ids`
-- `worktree_paths`
-- `result`: `pass` or `fail`
-- `agent_summaries`
-- `validation_evidence`
-- `assertion_checklist`
-- `notes`
-
-## Default Timeouts
-
-Use these defaults unless a case file explicitly overrides them:
-
-- per-agent timeout: `4m`
-- overall scenario timeout: `6m`
-- async wait margin for the main thread: `45s`
-
-## Default Failure Conditions
-
-Treat the test as failed if any of the following happens:
-
-- any required agent does not reach a final state before timeout
-- any required orch or inbox command returns a non-success result unless the case expects that failure
-- the final `orch status` output does not match the expected run or task state
-- the final `inbox show` output does not match the expected thread or message history
-- a required worktree is missing too early or still present after cleanup in a cleanup case
-- the agents fall back to ordinary chat for critical coordination instead of the bundled CLIs
-
-## Evidence Capture
-
-Collect at least the following artifacts for every run:
-
-- agent final summaries
-- final `orch status --run RUN_ID --json` output
-- final `inbox show --thread THREAD_ID --json` output for every relevant thread
-- any `blocked`, `wait`, `retry`, `reassign`, or `cleanup` output relevant to the case
-- the temporary DB path, resolved run id, resolved thread ids, and any worktree paths
-
-## Cleanup Policy
-
-Use these defaults unless a case file explicitly overrides them:
-
-- keep the temporary DB, repo fixture, and working directory on failure for debugging
-- cleanup the temporary working directory on success only if the caller does not need replay artifacts
-
-## Direct CLI Replay
-
-The repository also includes a reusable direct replay runner at `scripts/run_orch_skill_forward_tests.sh`.
-
-This runner executes the bundled `skills/orch/assets/orch` and `skills/inbox/assets/inbox` binaries against temporary SQLite DBs and Git fixtures without spawning Codex role agents.
-
-Use it to validate packaged CLI behavior and record concrete evidence quickly, but do not treat it as a full replacement for the real subagent-forward model described above.
-
-All eight case files in this directory now include recorded example runs captured through that direct replay path on `2026-03-19`.
-
-## Real Subagent Forward Runs
-
-The original five cases in this directory were also executed with real spawned role agents on `2026-03-19`.
-
-That run used injected project-local `skills/orch/` and `skills/inbox/` bundles with a narrow-context fallback (`fork_context: false`) after an earlier wider-context attempt proved unreliable for this repo.
-
-The successful evidence root for those runs was `/tmp/orch-skill-subagents.J1XWgs`.
-
-Some longer cases used staged leader progression while keeping the same leader agent active across phases so the run still exercised real agent-driven `orch` control flow instead of a main-thread direct replay.
-
-The three gap-fill cases added later on `2026-03-19` currently have direct replay evidence only and have not yet been rerun through the real subagent-forward path.
-
-## Per-Case Template
-
-Each case file should use this structure:
-
-- `Test Type`
-- `Purpose`
-- `Preconditions`
-- `Agent Topology`
-- `Inputs`
-- `Execution Parameters`
-- `Execution Steps`
-- `Validation Commands`
-- `Expected Outcomes`
-- `Assertions`
-- `Cleanup`
-- `Recorded Example Run` when a real run has already been captured
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `leader-run-dispatch-reconcile-through-bundled-cli` | [leader-run-dispatch-reconcile-through-bundled-cli.md](./leader-run-dispatch-reconcile-through-bundled-cli.md) | validates that a leader can drive a complete `run -> task -> dispatch -> reconcile -> status` happy path through the packaged orch skill |
-| `leader-blocked-answer-resume-through-bundled-cli` | [leader-blocked-answer-resume-through-bundled-cli.md](./leader-blocked-answer-resume-through-bundled-cli.md) | validates that a leader can observe a blocked task, answer it through `orch`, and reach final completion with a real worker |
-| `strict-worktree-dispatch-to-cleanup-through-bundled-cli` | [strict-worktree-dispatch-to-cleanup-through-bundled-cli.md](./strict-worktree-dispatch-to-cleanup-through-bundled-cli.md) | validates that the skill can drive `execution-mode code` worktree allocation, reconcile completion, and cleanup through the bundled orch CLI |
-| `leader-dispatches-dependent-task-after-prerequisite-through-bundled-cli` | [leader-dispatches-dependent-task-after-prerequisite-through-bundled-cli.md](./leader-dispatches-dependent-task-after-prerequisite-through-bundled-cli.md) | validates that a leader can use `dep add` and `ready` to hold back dependent work until a prerequisite completes, then dispatch the newly ready task |
-| `leader-cancels-active-task-through-bundled-cli` | [leader-cancels-active-task-through-bundled-cli.md](./leader-cancels-active-task-through-bundled-cli.md) | validates that a leader can cancel an already active task through the packaged orch skill without cancelling unrelated ready work |
-| `leader-answers-blocked-task-with-payload-json-through-bundled-cli` | [leader-answers-blocked-task-with-payload-json-through-bundled-cli.md](./leader-answers-blocked-task-with-payload-json-through-bundled-cli.md) | validates that a leader can answer a blocked task with structured payload data only and still drive the run to completion |
-| `leader-retries-failed-task-through-bundled-cli` | [leader-retries-failed-task-through-bundled-cli.md](./leader-retries-failed-task-through-bundled-cli.md) | validates that a leader can reconcile a failed attempt and create a successful retry through the packaged orch skill |
-| `leader-reassigns-blocked-task-through-bundled-cli` | [leader-reassigns-blocked-task-through-bundled-cli.md](./leader-reassigns-blocked-task-through-bundled-cli.md) | validates that a leader can reassign a blocked task from one worker to another and close the run through the packaged orch skill |
-| `leader-dispatches-and-launches-worker-through-codex-bridge` | [leader-dispatches-and-launches-worker-through-codex-bridge.md](./leader-dispatches-and-launches-worker-through-codex-bridge.md) | validates that a leader can dispatch a task, render a standardized worker brief, and launch a worker subagent from the same Codex thread |
-| `strict-worktree-dispatch-launches-worker-through-codex-bridge` | [strict-worktree-dispatch-launches-worker-through-codex-bridge.md](./strict-worktree-dispatch-launches-worker-through-codex-bridge.md) | validates that a leader can launch a code-writing worker subagent from saved `execution-mode code` dispatch metadata while preserving the assigned worktree contract |
-
-## Scope
-
-In scope:
-
-- explicit `$orch` skill invocation
-- bundled `./assets/orch` CLI usage
-- leader-side run, task, dependency, dispatch, reconcile, answer, retry, reassign, wait, status, and cleanup flows
-- interaction between a leader using `skills/orch/` and workers using `skills/inbox/`
-- leader-side launch-bridge workflows where the leader spawns worker subagents after `dispatch`
-- worktree-backed dispatch and cleanup validation
-- end-to-end run state and thread history validation
-
-Out of scope:
-
-- per-command flag and JSON contract coverage for `orch`
-- worker-only skill behavior that already belongs under [../inbox-skill/](../inbox-skill/)
-- the separate `council-review` skill package
-- implicit skill triggering without `$orch`
-- changing the core `orch` CLI so it launches workers by itself
-
-## Relationship To Other Test Docs
-
-- [../orch/](../orch/) covers CLI command behavior
-- [../inbox-skill/](../inbox-skill/) covers worker-side skill-guided behavior on top of inbox
-- this directory covers leader-side skill-guided behavior on top of `orch`
diff --git a/docs/tests/orch-skill/leader-answers-blocked-task-with-payload-json-through-bundled-cli.md b/docs/tests/orch-skill/leader-answers-blocked-task-with-payload-json-through-bundled-cli.md
deleted file mode 100644
index 796aed7..0000000
--- a/docs/tests/orch-skill/leader-answers-blocked-task-with-payload-json-through-bundled-cli.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Case: `leader-answers-blocked-task-with-payload-json-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a structured-answer skill validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can answer a blocked task with pure `--payload-json`, allowing the worker to resume without relying on a freeform answer body.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use `wait`, `blocked`, `answer --payload-json`, `reconcile`, and `status` through the bundled orch skill
-- a worker can post a blocked question through the bundled inbox skill
-- the answer reaches the active thread as structured payload data
-- the worker resumes after reading that payload and completes the task
-- the final run reaches `done`
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_payload_answer_001, 2) add and dispatch one task T1 to worker-a with --execution-mode analysis, 3) wait until the task becomes blocked, 4) inspect blocked tasks, 5) answer the blocked question using payload-json only with decision=stdout, source=leader, and format=structured, 6) wait until the task completes, 7) reconcile and inspect final status, 8) stop after reporting RUN_ID and THREAD_ID. Do not use ordinary chat to coordinate with the worker.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim the assigned task, 2) send a blocked update asking for a structured logging decision, 3) wait for a reply, 4) confirm the reply payload tells you to use stdout, 5) finish the task with done, 6) stop after reporting the THREAD_ID you handled. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/orch/` into `leader`
-3. Inject `skills/inbox/` into `worker-a`
-4. Point both agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader` and `worker-a` in parallel
-6. Wait for both agents to finish
-7. Resolve `THREAD_ID` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_payload_answer_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## Expected Outcomes
-
-- the leader successfully observes a blocked event and inspects the blocked queue
-- the leader successfully emits one payload-only answer through `orch`
-- `worker-a` receives that answer through inbox history and sees `payload_json.decision == "stdout"`
-- `worker-a` completes the task after the structured answer arrives
-- the final run state is `done`
-
-## Assertions
-
-- `status.data.run.status == "done"`
-- `status.data.tasks[0].status == "done"`
-- `show.data.messages[*].kind` includes `question`, `answer`, and `result`
-- one `question` message contains `payload_json.question == "Use stdout or stderr for structured logs?"`
-- one `answer` message contains `payload_json.decision == "stdout"`
-- one `answer` message contains `payload_json.source == "leader"`
-- one `answer` message contains `payload_json.format == "structured"`
-- the final thread status is `done`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-- recorded on: `2026-03-19`
-- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh`
-- result: `pass`
-- observed run id: `run_blog_skill_payload_answer_001`
-- observed thread id: `thr_735bde0f91794174b2b85fbe89e80581`
-- evidence summary:
-- `orch wait --for task_blocked` woke after the worker question, and `orch blocked` listed task `T1` as the active blocked task
-- `orch answer --payload-json '{"decision":"stdout","source":"leader","format":"structured"}'` appended an `answer` message with those exact payload fields and an empty body
-- `inbox wait-reply` woke on that structured answer and exposed `payload_json.decision == "stdout"`
-- final `orch status --run run_blog_skill_payload_answer_001 --json` returned `run.status == "done"` and `tasks[0].status == "done"`
-- final `inbox show --thread thr_735bde0f91794174b2b85fbe89e80581 --json` contained the blocked `question`, the structured `answer`, and the terminal `result`
-- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents
diff --git a/docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md b/docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md
deleted file mode 100644
index f4508b9..0000000
--- a/docs/tests/orch-skill/leader-blocked-answer-resume-through-bundled-cli.md
+++ /dev/null
@@ -1,116 +0,0 @@
-# Case: `leader-blocked-answer-resume-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a blocked-question resolution skill validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can observe a blocked task, answer it through `orch`, and reach final completion with a real worker using the packaged inbox skill.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use `orch wait`, `blocked`, `answer`, `reconcile`, and `status` through the bundled skill CLI
-- a worker can ask a blocked question through the bundled inbox skill
-- the answer reaches the active attempt thread
-- the worker resumes after the answer and completes the task
-- the final run reaches `done`
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_002, 2) add and dispatch one task T1 to worker-a with --execution-mode analysis, 3) wait until the task becomes blocked, 4) inspect blocked tasks, 5) answer the blocked question with the decision "Use stdout for MVP.", 6) wait until the task completes, 7) reconcile and inspect final status, 8) stop after reporting RUN_ID and THREAD_ID. Do not use ordinary chat to coordinate with the worker.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim the assigned task, 2) send one in_progress update, 3) send a blocked update asking "Should logging go to stdout or stderr?", 4) wait for a reply, 5) finish the task with done after you receive the leader decision, 6) stop after reporting the THREAD_ID you handled. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/orch/` into `leader`
-3. Inject `skills/inbox/` into `worker-a`
-4. Point both agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader` and `worker-a` in parallel
-6. Wait for both agents to finish
-7. Resolve `THREAD_ID` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_002
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## Expected Outcomes
-
-- `leader` successfully observes a blocked event through `orch`
-- `leader` successfully inspects the blocked queue and emits one `answer`
-- `worker-a` receives that answer through inbox history and completes the task
-- the final run state is `done`
-
-## Assertions
-
-- `status.data.run.status == "done"`
-- `status.data.tasks[0].status == "done"`
-- `show.data.messages[*].kind` includes `question`, `answer`, and `result`
-- one `question` message contains `payload_json.question == "Should logging go to stdout or stderr?"`
-- one `answer` message contains body `Use stdout for MVP.`
-- the final thread status is `done`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-- recorded on: `2026-03-19`
-- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh`
-- result: `pass`
-- observed run id: `run_blog_skill_002`
-- observed thread id: `thr_42ce634f273745e9b95badc14ce52708`
-- evidence summary:
-- `orch wait --for task_blocked` woke on the worker question, and `inbox wait-reply` later woke on the leader answer
-- final `orch status --run run_blog_skill_002 --json` returned `run.status == "done"` and `tasks[0].status == "done"`
-- final `inbox show --thread thr_42ce634f273745e9b95badc14ce52708 --json` contained `question`, `answer`, and `result` messages
-- the recorded `question` payload was `Should logging go to stdout or stderr?`, and the recorded `answer` body was `Use stdout for MVP.`
-- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/orch-skill-subagents.J1XWgs/leader-blocked-answer-resume-through-bundled-cli`
-- observed run id: `run_blog_skill_002`
-- observed thread id: `thr_fd11536a0b2f4c668f6e78c38090816e`
-- evidence summary:
-- a real leader agent using `skills/orch/` completed `wait --for task_blocked`, `blocked`, `answer`, `wait --for task_done`, `reconcile`, and `status`
-- a real worker agent using `skills/inbox/` completed `claim`, `update --status in_progress`, `update --status blocked`, `wait-reply`, resume `update`, and `done`
-- main-thread validation confirmed `run.status == "done"`, `task.status == "done"`, the blocked question payload `Should logging go to stdout or stderr?`, and the answer body `Use stdout for MVP.`
diff --git a/docs/tests/orch-skill/leader-cancels-active-task-through-bundled-cli.md b/docs/tests/orch-skill/leader-cancels-active-task-through-bundled-cli.md
deleted file mode 100644
index db27653..0000000
--- a/docs/tests/orch-skill/leader-cancels-active-task-through-bundled-cli.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# Case: `leader-cancels-active-task-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a direct task-cancel skill validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can cancel an already active task attempt without cancelling unrelated ready work in the same run.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use `dispatch`, `cancel`, `ready`, and `status` through the bundled orch skill
-- `worker-a` can claim the original thread and report active progress through the bundled inbox skill
-- the leader can cancel that active task through `orch cancel --task`
-- the original thread reaches `cancelled`
-- another task in the same run remains actionable instead of being implicitly cancelled
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_cancel_001, 2) add task T1 for worker-a and a second task T2 that should remain untouched, 3) dispatch T1 with --execution-mode analysis, 4) wait until worker-a has claimed it or marked it in progress, 5) cancel T1 with a clear reason through orch, 6) inspect ready work and final run status, 7) stop after reporting THREAD_ID_1. Do not use ordinary chat to coordinate with the worker.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim the assigned thread, 2) send one in_progress update, 3) stop after reporting THREAD_ID_1 and that the task became active. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/orch/` into `leader`
-3. Inject `skills/inbox/` into `worker-a`
-4. Point both agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader` and `worker-a` in parallel
-6. Wait for both agents to finish
-7. Resolve `THREAD_ID_1` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_cancel_001
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json ready --run run_blog_skill_cancel_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID_1
-```
-
-## Expected Outcomes
-
-- `worker-a` successfully claims the original thread and reports `in_progress`
-- the leader successfully cancels `T1` through `orch cancel --task`
-- the original thread reaches `cancelled`
-- the untouched task `T2` remains available in the ready queue
-- the run remains open rather than collapsing into a fully cancelled run
-
-## Assertions
-
-- `status.data.tasks` contains `T1` with status `cancelled`
-- `status.data.tasks` contains `T2` with status `ready`
-- `status.data.run.status == "ready"`
-- `ready.data.tasks` contains only `T2`
-- `show.data.thread.status == "cancelled"`
-- the thread history preserves the worker `progress` message before the cancel
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-- recorded on: `2026-03-19`
-- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh`
-- result: `pass`
-- observed run id: `run_blog_skill_cancel_001`
-- observed thread id: `thr_175e00bca76549ea8529cb4c92d99fd4`
-- evidence summary:
-- final `orch status --run run_blog_skill_cancel_001 --json` returned `run.status == "ready"` with task counts `cancelled: 1` and `ready: 1`
-- that same `status` output showed `T1.status == "cancelled"` while `T2.status == "ready"`
-- final `orch ready --run run_blog_skill_cancel_001 --json` returned only `T2`, confirming the untouched task remained dispatchable
-- final `inbox show --thread thr_175e00bca76549ea8529cb4c92d99fd4 --json` returned `thread.status == "cancelled"` and preserved the worker `progress` message before the cancel
-- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents
diff --git a/docs/tests/orch-skill/leader-dispatches-and-launches-worker-through-codex-bridge.md b/docs/tests/orch-skill/leader-dispatches-and-launches-worker-through-codex-bridge.md
deleted file mode 100644
index a4170b4..0000000
--- a/docs/tests/orch-skill/leader-dispatches-and-launches-worker-through-codex-bridge.md
+++ /dev/null
@@ -1,97 +0,0 @@
-# Case: `leader-dispatches-and-launches-worker-through-codex-bridge`
-
-## Test Type
-
-This is a `forward-test` and a leader-side launch-bridge validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can dispatch work, render a standardized worker brief through the skill assets, and launch a worker subagent from the same Codex thread without hand-writing the inbox handoff.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use the bundled `./assets/orch` CLI through the skill
-- the leader can save `dispatch --json` output and turn it into a stable worker brief through `./assets/orch-worker-brief`
-- the leader can spawn a worker subagent that uses `skills/inbox/` instead of ordinary chat
-- the launched worker claims the dispatched thread and completes it
-- the final orch run state and inbox thread state both reach `done`
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- the helper asset exists at `ORCH_SKILL_PATH/assets/orch-worker-brief`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-
-The leader is responsible for spawning the worker subagent after dispatch.
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_launch_001, 2) add exactly one task T1 assigned to worker-a, 3) dispatch it with --execution-mode analysis and save --json to TMPDIR/dispatch.json, 4) render a worker brief with ORCH_SKILL_PATH/assets/orch-worker-brief into TMPDIR/worker-brief.txt, 5) spawn one worker subagent that uses INBOX_SKILL_PATH and the generated worker brief, 6) wait or poll until the worker reports completion, 7) inspect final status, 8) stop after reporting RUN_ID and THREAD_ID. Do not use ordinary chat to coordinate with the worker; the launched worker must use inbox only.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/orch/` into `leader`
-3. Ensure `leader` can also reference `skills/inbox/` by path when it spawns the worker subagent
-4. Point the leader at the same database path `TMPDIR/coord.db`
-5. Launch `leader`
-6. Wait for `leader` and any spawned worker subagent(s) to finish
-7. Resolve `RUN_ID=run_blog_skill_launch_001` and `THREAD_ID` from the leader output
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_launch_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-test -f TMPDIR/dispatch.json
-test -f TMPDIR/worker-brief.txt
-```
-
-## Expected Outcomes
-
-- the leader successfully creates `run_blog_skill_launch_001`
-- the leader successfully dispatches `T1` and saves the JSON response
-- the leader successfully renders a non-empty worker brief from that JSON response
-- the leader successfully spawns a worker subagent that uses `skills/inbox/`
-- the launched worker successfully claims the dispatched thread
-- the launched worker completes the thread with `done`
-- the final run state is `done`
-
-## Assertions
-
-- `status.data.run.run_id == "run_blog_skill_launch_001"`
-- `status.data.run.status == "done"`
-- `status.data.tasks` contains exactly one task `T1`
-- `status.data.tasks[0].status == "done"`
-- `status.data.tasks[0].latest_attempt.assigned_to == "worker-a"`
-- `show.data.thread.status == "done"`
-- `show.data.messages[*].kind` includes `task`, `progress`, and `result`
-- `TMPDIR/worker-brief.txt` mentions the expected `thread_id`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-- no recorded run yet
-- this case should be captured with a real leader agent plus leader-launched worker subagent after the launch bridge assets are adopted
diff --git a/docs/tests/orch-skill/leader-dispatches-dependent-task-after-prerequisite-through-bundled-cli.md b/docs/tests/orch-skill/leader-dispatches-dependent-task-after-prerequisite-through-bundled-cli.md
deleted file mode 100644
index b9ae556..0000000
--- a/docs/tests/orch-skill/leader-dispatches-dependent-task-after-prerequisite-through-bundled-cli.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Case: `leader-dispatches-dependent-task-after-prerequisite-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a dependency-gated ready-queue skill validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can create a dependency edge, observe the correct `ready` set before and after prerequisite completion, and dispatch the dependent task only after it becomes eligible.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use `dep add`, `ready`, `dispatch`, `wait`, `reconcile`, and `status` through the bundled orch skill
-- `worker-a` can complete the prerequisite task on the bundled inbox skill
-- the dependent task stays out of the initial `ready` queue
-- the dependent task appears in `ready` only after the prerequisite reaches `done`
-- the leader can dispatch that newly ready dependent task to `worker-b` and close the run
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-- `worker-b`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_deps_001, 2) add prerequisite task T1 for worker-a and dependent task T2 for worker-b, 3) make T2 depend on T1, 4) inspect ready work and confirm only T1 is dispatchable at first, 5) dispatch T1 with --execution-mode analysis, 6) wait until T1 completes, 7) reconcile and inspect ready work again, 8) dispatch T2 only after it becomes ready with --execution-mode analysis, 9) wait until T2 completes, 10) reconcile and inspect final status, 11) stop after reporting THREAD_ID_1 and THREAD_ID_2. Do not use ordinary chat to coordinate with the workers.
-```
-
-### Worker A Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim the prerequisite thread assigned to worker-a, 2) send one in_progress update, 3) finish it with done, 4) stop after reporting THREAD_ID_1. Do not use ordinary chat to coordinate with the leader or worker-b.
-```
-
-### Worker B Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-b on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) wait until dependent work assigned to worker-b appears, 2) fetch and claim that thread, 3) finish it with done, 4) stop after reporting THREAD_ID_2. Do not use ordinary chat to coordinate with the leader or worker-a.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/orch/` into `leader`
-3. Inject `skills/inbox/` into `worker-a` and `worker-b`
-4. Point all agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader`, `worker-a`, and `worker-b` in parallel
-6. Wait for all agents to finish
-7. Resolve `THREAD_ID_1` and `THREAD_ID_2` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_deps_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID_1
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID_2
-```
-
-## Expected Outcomes
-
-- the leader initially sees only `T1` in the `ready` output
-- `worker-a` completes the prerequisite thread for `T1`
-- after reconcile, the leader sees `T2` become ready
-- `worker-b` receives a distinct thread for `T2` and completes it
-- the final run reaches `done`
-
-## Assertions
-
-- the initial `ready` output contains `T1` and does not contain `T2`
-- the post-reconcile `ready` output contains `T2`
-- `THREAD_ID_1 != THREAD_ID_2`
-- `status.data.run.status == "done"`
-- `status.data.tasks` contains `T1` and `T2`, both with status `done`
-- `show THREAD_ID_1` reports a terminal done thread state
-- `show THREAD_ID_2` reports a terminal done thread state
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-- recorded on: `2026-03-19`
-- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh`
-- result: `pass`
-- observed run id: `run_blog_skill_deps_001`
-- observed first thread id: `thr_7f57b577e5ce4cc094341e7d2eae4570`
-- observed second thread id: `thr_5dbc81f2fe234b6dbf0c57a176e13acf`
-- evidence summary:
-- the initial `ready` output returned only `T1`, confirming that dependent task `T2` stayed gated before prerequisite completion
-- after `worker-a` completed `T1` and the leader ran `reconcile`, the next `ready` output returned only `T2`
-- final `orch status --run run_blog_skill_deps_001 --json` returned `run.status == "done"` with both tasks `T1` and `T2` in state `done`
-- final `inbox show` on both thread ids returned terminal thread state `done`
-- the replay also observed `orch wait --for task_done` wake on the prerequisite completion before the dependent dispatch
-- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents
diff --git a/docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md b/docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md
deleted file mode 100644
index a3a1743..0000000
--- a/docs/tests/orch-skill/leader-reassigns-blocked-task-through-bundled-cli.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# Case: `leader-reassigns-blocked-task-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a reassignment-path skill validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can observe a blocked task, reassign it from one worker to another, and drive the run to completion through the new attempt.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use `blocked`, `reassign`, `reconcile`, and `status` through the bundled orch skill
-- `worker-a` can claim the original attempt and block on a question
-- `worker-b` can receive the reassigned attempt as a new thread
-- the original thread is cancelled and the new thread reaches `done`
-- the final run reaches `done`
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-- `worker-b`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_reassign_001, 2) add and dispatch one task T1 to worker-a with --execution-mode analysis, 3) wait until worker-a blocks, 4) inspect blocked tasks, 5) reassign T1 to worker-b with a short reason, 6) wait until worker-b completes the new attempt, 7) reconcile and inspect final status, 8) stop after reporting THREAD_ID_1 and THREAD_ID_2. Do not use ordinary chat to coordinate with the workers.
-```
-
-### Worker A Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim the initial assigned thread, 2) send one blocked update with a precise question, 3) stop after reporting THREAD_ID_1 and the blocked summary you sent. Do not use ordinary chat to coordinate with the leader or worker-b.
-```
-
-### Worker B Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-b on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) wait until reassigned work for worker-b appears, 2) fetch and claim it, 3) complete it with done, 4) stop after reporting THREAD_ID_2. Do not use ordinary chat to coordinate with the leader or worker-a.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/orch/` into `leader`
-3. Inject `skills/inbox/` into `worker-a` and `worker-b`
-4. Point all agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader`, `worker-a`, and `worker-b` in parallel
-6. Wait for all agents to finish
-7. Resolve `THREAD_ID_1` and `THREAD_ID_2` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_reassign_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID_1
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID_2
-```
-
-## Expected Outcomes
-
-- `worker-a` successfully claims the original thread and blocks it
-- the leader successfully reassigns the task to `worker-b`
-- the original thread reaches `cancelled`
-- `worker-b` receives a distinct reassigned thread and completes it
-- the final run reaches `done`
-
-## Assertions
-
-- `THREAD_ID_1 != THREAD_ID_2`
-- `status.data.run.status == "done"`
-- `status.data.tasks[0].status == "done"`
-- `show THREAD_ID_1` reports a terminal cancelled thread state
-- `show THREAD_ID_2` reports a terminal done thread state
-- the blocked question remains visible in the original thread history
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-- recorded on: `2026-03-19`
-- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh`
-- result: `pass`
-- observed run id: `run_blog_skill_reassign_001`
-- observed original thread id: `thr_0a61240412134de3b3d9ab219b6c8f19`
-- observed reassigned thread id: `thr_12fbcf6d89d948548306198d013d77a5`
-- evidence summary:
-- `orch wait --for task_blocked` woke after worker-a posted a blocked question with payload `Proceed with v1 scope?`
-- `orch reassign --run run_blog_skill_reassign_001 --task T1 --to worker-b --json` returned `attempt_no == 2` and assigned the new attempt to `worker-b`
-- final `inbox show` on the original thread returned `thread.status == "cancelled"` and preserved the blocked `question` message
-- final `inbox show` on the reassigned thread returned `thread.status == "done"`
-- final `orch status --run run_blog_skill_reassign_001 --json` returned `run.status == "done"` and `tasks[0].status == "done"`
-- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/orch-skill-subagents.J1XWgs/leader-reassigns-blocked-task-through-bundled-cli-phased`
-- observed run id: `run_blog_skill_reassign_001`
-- observed original thread id: `thr_7d43af5bc1f7467da98a39adb0de5808`
-- observed reassigned thread id: `thr_eba253db8965423b855d0c784a29702c`
-- evidence summary:
-- the same real leader agent using `skills/orch/` completed the case in three phases: initial `run/task/dispatch`, then `wait --for task_blocked` plus `reassign`, then final `wait --for task_done` plus `status`
-- a real `worker-a` agent using `skills/inbox/` claimed the original thread and posted the blocked question `Proceed with v1 scope?`
-- a real `worker-b` agent using `skills/inbox/` claimed the reassigned thread and completed it
-- main-thread validation confirmed the original thread finished `cancelled`, the reassigned thread finished `done`, and the original blocked question remained visible in thread history
diff --git a/docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md b/docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md
deleted file mode 100644
index dd64a9c..0000000
--- a/docs/tests/orch-skill/leader-retries-failed-task-through-bundled-cli.md
+++ /dev/null
@@ -1,121 +0,0 @@
-# Case: `leader-retries-failed-task-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a retry-path skill validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can reconcile a failed attempt, issue `retry`, and drive the task to success through a second attempt handled by a real worker.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use the bundled orch skill to dispatch an initial attempt
-- a worker can fail the first attempt through inbox
-- the leader can reconcile that failure and create a fresh retry attempt
-- the worker can complete the retried attempt
-- the final run reaches `done` and the two attempts map to different threads
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_retry_001, 2) add and dispatch one task T1 to worker-a with --execution-mode analysis, 3) wait until the first attempt fails, 4) reconcile, 5) retry T1 with a short retry note, 6) wait until the retried attempt completes, 7) reconcile again and inspect final status, 8) stop after reporting RUN_ID, THREAD_ID_1, and THREAD_ID_2. Do not use ordinary chat to coordinate with the worker.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim the first assigned thread, 2) fail that first attempt with a clear summary, 3) keep watching for retried work assigned to worker-a, 4) fetch and claim the retried thread, 5) finish the retried attempt with done, 6) stop after reporting both THREAD_ID_1 and THREAD_ID_2. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/orch/` into `leader`
-3. Inject `skills/inbox/` into `worker-a`
-4. Point both agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader` and `worker-a` in parallel
-6. Wait for both agents to finish
-7. Resolve `THREAD_ID_1` and `THREAD_ID_2` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_retry_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID_1
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID_2
-```
-
-## Expected Outcomes
-
-- the first worker-owned thread reaches `failed`
-- the leader successfully issues `retry`
-- the second worker-owned thread is distinct from the first
-- the second worker-owned thread reaches `done`
-- the final run state is `done`
-
-## Assertions
-
-- `THREAD_ID_1 != THREAD_ID_2`
-- `status.data.run.status == "done"`
-- `status.data.tasks[0].status == "done"`
-- `show THREAD_ID_1` reports a terminal failed thread state
-- `show THREAD_ID_2` reports a terminal done thread state
-- the worker summary confirms that the retried attempt was a new thread rather than a reused one
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-- recorded on: `2026-03-19`
-- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh`
-- result: `pass`
-- observed run id: `run_blog_skill_retry_001`
-- observed first thread id: `thr_8dbf2d2e46d7469891cc1ef604da476f`
-- observed second thread id: `thr_bdd86f4fe08e4ebfb39b8151ac41a3bb`
-- evidence summary:
-- `orch wait --for task_failed` woke after the first worker-owned thread failed
-- `orch retry --run run_blog_skill_retry_001 --task T1 --json` returned `attempt_no == 2` with a distinct replacement thread for the same worker
-- final `inbox show` on the first thread returned `thread.status == "failed"`
-- final `inbox show` on the second thread returned `thread.status == "done"`
-- final `orch status --run run_blog_skill_retry_001 --json` returned `run.status == "done"` and `tasks[0].status == "done"`
-- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/orch-skill-subagents.J1XWgs/leader-retries-failed-task-through-bundled-cli-phased`
-- observed run id: `run_blog_skill_retry_001`
-- observed first thread id: `thr_1e22121642294b56aae351ddec5180d1`
-- observed second thread id: `thr_f2ab1f1899964007b2447796204e1928`
-- evidence summary:
-- the same real leader agent using `skills/orch/` completed the case in three phases: initial `run/task/dispatch`, then `wait --for task_failed` plus `retry`, then final `wait --for task_done` plus `status`
-- a real worker agent using `skills/inbox/` failed the first thread, polled for the retried pending thread, then claimed and completed the second thread
-- main-thread validation confirmed the two thread ids were distinct, the first thread finished `failed`, the second thread finished `done`, and the run/task both finished `done`
diff --git a/docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md b/docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md
deleted file mode 100644
index a996e5a..0000000
--- a/docs/tests/orch-skill/leader-run-dispatch-reconcile-through-bundled-cli.md
+++ /dev/null
@@ -1,116 +0,0 @@
-# Case: `leader-run-dispatch-reconcile-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a leader-side happy-path skill validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can drive a complete run lifecycle while a worker uses the packaged `inbox` skill for thread progress.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can use the bundled `./assets/orch` CLI through the skill
-- the leader can create a run, add a task, dispatch it, reconcile worker progress, and inspect final status
-- a worker using the bundled inbox skill can claim the dispatched thread and finish it
-- the final orch run state and inbox thread state both reach `done`
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_001, 2) add exactly one task T1 assigned to worker-a, 3) dispatch it with --execution-mode analysis, 4) wait or poll until the worker reports completion, 5) reconcile the run, 6) inspect final status, 7) stop after reporting RUN_ID and THREAD_ID. Do not use ordinary chat to coordinate with the worker.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch pending work for worker-a, 2) claim it, 3) send one in_progress update, 4) finish it with done, 5) stop after reporting the THREAD_ID you handled. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Inject `skills/orch/` into `leader`
-3. Inject `skills/inbox/` into `worker-a`
-4. Point both agents at the same database path `TMPDIR/coord.db`
-5. Launch `leader` and `worker-a` in parallel
-6. Wait for both agents to finish
-7. Resolve `RUN_ID=run_blog_skill_001` and `THREAD_ID` from the agent outputs
-8. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## Expected Outcomes
-
-- `leader` successfully creates `run_blog_skill_001`
-- `leader` successfully adds and dispatches `T1`
-- `worker-a` successfully claims the dispatched thread
-- `worker-a` emits at least one `in_progress` update
-- `worker-a` completes the thread with `done`
-- `leader` successfully reconciles and sees `run.status == "done"`
-
-## Assertions
-
-- `status.data.run.run_id == "run_blog_skill_001"`
-- `status.data.run.status == "done"`
-- `status.data.tasks` contains exactly one task `T1`
-- `status.data.tasks[0].status == "done"`
-- `show.data.thread.status == "done"`
-- `show.data.messages[*].kind` includes `task`, `progress`, and `result`
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR` and `coord.db` for replay and manual inspection
-
-## Recorded Example Run
-
-- recorded on: `2026-03-19`
-- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh`
-- result: `pass`
-- observed run id: `run_blog_skill_001`
-- observed thread id: `thr_eced1b8cb1254065a7cd3aaff6dc0bcb`
-- evidence summary:
-- final `orch status --run run_blog_skill_001 --json` returned `run.status == "done"` with a single task `T1` in state `done`
-- final `inbox show --thread thr_eced1b8cb1254065a7cd3aaff6dc0bcb --json` returned thread state `done` and message kinds `task`, `progress`, and `result`
-- the replay also observed `orch wait --for task_done` wake successfully before the final reconcile
-- note: this recorded run exercised the packaged binaries directly in a temporary DB and did not spawn separate Codex role agents
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/orch-skill-subagents.J1XWgs/leader-run-dispatch-reconcile-through-bundled-cli`
-- observed run id: `run_blog_skill_001`
-- observed thread id: `thr_7c64e75bbcce4143a7fc425242f7e7d3`
-- evidence summary:
-- a real leader agent using `skills/orch/` completed `run init`, `task add`, `dispatch`, `wait`, `reconcile`, and `status`
-- a real worker agent using `skills/inbox/` completed `fetch`, `claim`, `update --status in_progress`, and `done`
-- main-thread validation confirmed `status.data.run.status == "done"`, `status.data.tasks[0].status == "done"`, and thread history kinds `task`, `progress`, and `result`
diff --git a/docs/tests/orch-skill/strict-worktree-dispatch-launches-worker-through-codex-bridge.md b/docs/tests/orch-skill/strict-worktree-dispatch-launches-worker-through-codex-bridge.md
deleted file mode 100644
index 686e747..0000000
--- a/docs/tests/orch-skill/strict-worktree-dispatch-launches-worker-through-codex-bridge.md
+++ /dev/null
@@ -1,97 +0,0 @@
-# Case: `strict-worktree-dispatch-launches-worker-through-codex-bridge`
-
-## Test Type
-
-This is a `forward-test` and a worktree launch-bridge validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can dispatch a code task, render a standardized worker brief from the saved dispatch JSON, and launch a worker subagent that respects the assigned worktree contract.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can dispatch a code task with `--execution-mode code` through the bundled orch skill
-- the leader can turn that dispatch JSON into a stable worker brief through `./assets/orch-worker-brief`
-- the launched worker subagent uses `skills/inbox/` and reports through inbox
-- the launched worker observes the assigned `worktree_path` and completes the attempt
-- the leader can reconcile the finished task and clean the attempt worktree
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- the helper asset exists at `ORCH_SKILL_PATH/assets/orch-worker-brief`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-- create `TMPDIR/repo` as a Git repository with one committed file before launching agents
-
-## Agent Topology
-
-- `leader`
-
-The leader is responsible for spawning the code-writing worker subagent after dispatch.
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_launch_worktree_001, 2) add one code task T1 for worker-a, 3) dispatch it with --execution-mode code --repo-path TMPDIR/repo --workspace-root .orch/worktrees while saving --json to TMPDIR/dispatch.json, 4) render a worker brief with ORCH_SKILL_PATH/assets/orch-worker-brief into TMPDIR/worker-brief.txt, 5) spawn one worker subagent that uses INBOX_SKILL_PATH and the generated worker brief, 6) wait until the worker completes, 7) inspect final status, 8) clean up attempt 1, 9) stop after reporting RUN_ID, THREAD_ID, and WORKTREE_PATH. Do not use ordinary chat to coordinate with the worker; the launched worker must use inbox only and should respect the assigned worktree.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Create `TMPDIR/repo` with an initial commit before launching agents
-3. Inject `skills/orch/` into `leader`
-4. Ensure `leader` can also reference `skills/inbox/` by path when it spawns the worker subagent
-5. Point the leader at the same database path `TMPDIR/coord.db`
-6. Launch `leader`
-7. Wait for `leader` and any spawned worker subagent(s) to finish
-8. Resolve `THREAD_ID` and `WORKTREE_PATH` from the leader output
-9. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_launch_worktree_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-test -f TMPDIR/dispatch.json
-test -f TMPDIR/worker-brief.txt
-test ! -d WORKTREE_PATH
-```
-
-## Expected Outcomes
-
-- the leader reports a non-empty `WORKTREE_PATH` from dispatch
-- the rendered worker brief includes that same `worktree_path`
-- the launched worker subagent claims the assigned thread and completes it through inbox
-- the final run status is `done`
-- the cleanup step removes the worktree directory
-
-## Assertions
-
-- `status.data.run.status == "done"`
-- `status.data.tasks[0].status == "done"`
-- `status.data.tasks[0].latest_attempt.worktree_path == WORKTREE_PATH`
-- `show.data.thread.status == "done"`
-- the task-side thread history includes a payload field or body content referencing the worktree path
-- `TMPDIR/worker-brief.txt` mentions the expected `WORKTREE_PATH`
-- `WORKTREE_PATH` does not exist after cleanup
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR`, `coord.db`, and the Git repo fixture for replay and manual inspection
-
-## Recorded Example Run
-
-- no recorded run yet
-- this case should be captured with a real leader agent plus leader-launched worker subagent after the launch bridge assets are adopted
diff --git a/docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md b/docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md
deleted file mode 100644
index fcd851b..0000000
--- a/docs/tests/orch-skill/strict-worktree-dispatch-to-cleanup-through-bundled-cli.md
+++ /dev/null
@@ -1,119 +0,0 @@
-# Case: `strict-worktree-dispatch-to-cleanup-through-bundled-cli`
-
-## Test Type
-
-This is a `forward-test` and a worktree-lifecycle skill validation.
-
-The goal is to verify that a leader using the packaged `orch` skill can allocate a code-mode worktree, reconcile completion, and clean that worktree up through the bundled CLI while a worker completes the task through inbox.
-
-## Purpose
-
-Validate that all of the following can be true at the same time:
-
-- the leader can dispatch a code task with `--execution-mode code` through the bundled orch skill
-- the worker can complete the resulting attempt thread through inbox
-- the leader can reconcile the finished task and clean the attempt worktree
-- the final filesystem state matches the cleanup contract
-
-## Preconditions
-
-- orch skill path exists: `ORCH_SKILL_PATH=skills/orch`
-- inbox skill path exists: `INBOX_SKILL_PATH=skills/inbox`
-- bundled CLI executables exist at `ORCH_SKILL_PATH/assets/orch` and `INBOX_SKILL_PATH/assets/inbox`
-- use an empty temporary directory `TMPDIR`
-- initialize `TMPDIR/coord.db` before launching role agents through `INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json init`
-- create `TMPDIR/repo` as a Git repository with one committed file before launching role agents
-
-## Agent Topology
-
-- `leader`
-- `worker-a`
-
-## Inputs
-
-### Leader Prompt
-
-```text
-Use $orch at ORCH_SKILL_PATH to act as leader on the already initialized SQLite DB TMPDIR/coord.db. Only coordinate through the bundled orch CLI from the skill. Workflow: 1) create run run_blog_skill_worktree_001, 2) add one code task T1 for worker-a, 3) dispatch it with --execution-mode code --repo-path TMPDIR/repo --workspace-root .orch/worktrees, 4) record the returned THREAD_ID and WORKTREE_PATH, 5) wait until the worker completes, 6) reconcile, 7) clean up attempt 1, 8) stop after reporting RUN_ID, THREAD_ID, and WORKTREE_PATH. Do not use ordinary chat to coordinate with the worker.
-```
-
-### Worker Prompt
-
-```text
-Use $inbox at INBOX_SKILL_PATH to act as worker-a on SQLite DB TMPDIR/coord.db. Only coordinate through the bundled inbox CLI from the skill. Workflow: 1) fetch and claim the assigned task, 2) inspect the task payload enough to confirm a worktree path was provided, 3) finish the task with done, 4) stop after reporting the THREAD_ID you handled and whether you observed a worktree path. Do not use ordinary chat to coordinate with the leader.
-```
-
-## Execution Parameters
-
-- use the shared execution contract from [README.md](./README.md)
-- use the shared timeout defaults from [README.md](./README.md)
-- do not override the default cleanup policy
-
-## Execution Steps
-
-1. Initialize `TMPDIR/coord.db` once through the bundled inbox CLI before launching agents
-2. Create `TMPDIR/repo` with an initial commit before launching agents
-3. Inject `skills/orch/` into `leader`
-4. Inject `skills/inbox/` into `worker-a`
-5. Point both agents at the same database path `TMPDIR/coord.db`
-6. Launch `leader` and `worker-a` in parallel
-7. Wait for both agents to finish
-8. Resolve `THREAD_ID` and `WORKTREE_PATH` from the agent outputs
-9. Independently run the validation commands from the main thread
-
-## Validation Commands
-
-```bash
-ORCH_SKILL_PATH/assets/orch --db TMPDIR/coord.db --json status --run run_blog_skill_worktree_001
-INBOX_SKILL_PATH/assets/inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-test ! -d WORKTREE_PATH
-```
-
-## Expected Outcomes
-
-- the leader reports a non-empty `WORKTREE_PATH` from dispatch
-- the worker reports that the task payload exposed a worktree path
-- the final run status is `done`
-- the cleanup step removes the worktree directory
-
-## Assertions
-
-- `status.data.run.status == "done"`
-- `status.data.tasks[0].status == "done"`
-- `show.data.thread.status == "done"`
-- the task-side thread history includes a payload field or body content referencing the worktree path
-- `WORKTREE_PATH` does not exist after cleanup
-
-## Cleanup
-
-- use the default cleanup policy from [README.md](./README.md)
-- if the run fails, retain `TMPDIR`, `coord.db`, and the Git repo fixture for replay and manual inspection
-
-## Recorded Example Run
-
-- recorded on: `2026-03-19`
-- execution mode: `direct_cli_replay` via `scripts/run_orch_skill_forward_tests.sh`
-- result: `pass`
-- observed run id: `run_blog_skill_worktree_001`
-- observed thread id: `thr_5743259fdccb41f9bb33dce0040b27a5`
-- observed worktree suffix: `.orch/worktrees/run-blog-skill-worktree-001/T1/attempt-1`
-- evidence summary:
-- `orch dispatch --execution-mode code` returned `base_ref == "HEAD"`, a concrete `base_commit`, branch `orch/run-blog-skill-worktree-001/T1/attempt-1`, and a non-empty `worktree_path`
-- the task payload stored on the worker thread exposed the same `worktree_path`
-- final `orch status --run run_blog_skill_worktree_001 --json` returned `run.status == "done"` and `tasks[0].status == "done"`
-- final `orch cleanup --run run_blog_skill_worktree_001 --task T1 --json` returned one cleaned attempt and the worktree directory no longer existed afterward
-- note: this recorded run exercised the packaged binaries directly in a temporary DB and Git fixture and did not spawn separate Codex role agents
-
-## Recorded Real Forward Run
-
-- recorded on: `2026-03-19`
-- execution mode: `real_subagent_forward_test`
-- result: `pass`
-- evidence root: `/tmp/orch-skill-subagents.J1XWgs/strict-worktree-dispatch-to-cleanup-through-bundled-cli`
-- observed run id: `run_blog_skill_worktree_001`
-- observed thread id: `thr_089527cd07f74b52a524ba07ed74c2e4`
-- observed worktree path: `/private/tmp/orch-skill-subagents.J1XWgs/strict-worktree-dispatch-to-cleanup-through-bundled-cli/repo/.orch/worktrees/run-blog-skill-worktree-001/T1/attempt-1`
-- evidence summary:
-- a real leader agent using `skills/orch/` completed code-mode `dispatch`, `wait`, `reconcile`, `cleanup`, and `status`
-- a real worker agent using `skills/inbox/` claimed the thread and finished it with `done`
-- main-thread validation confirmed that the task payload did include the same `worktree_path` even though the worker agent summary failed to notice it, and also confirmed the worktree directory no longer existed after cleanup
diff --git a/docs/tests/orch/README.md b/docs/tests/orch/README.md
deleted file mode 100644
index c073a4c..0000000
--- a/docs/tests/orch/README.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# Orch Markdown Test Plan
-
-## Purpose
-
-This directory contains the human-readable Markdown test plan for the `orch` CLI.
-
-It complements automated Go tests. The goal is to preserve the user-visible scheduler contract in a form that can be reviewed, extended, and executed manually without re-deriving command behavior from implementation code.
-
-## Directory Rules
-
-- one folder per `orch` leaf command or shared area
-- each folder keeps a `README.md` entrypoint
-- command folders use `README.md` as an index only
-- each command test case lives in its own Markdown file named after the case slug
-- no numeric test IDs
-- each command case is identified by its concrete file path
-
-Case file naming pattern:
-
-```text
-<case-slug>.md
-```
-
-## Authoring Principles
-
-- focus on externally visible CLI behavior rather than store internals
-- prefer stable command sequences that a new agent can replay against a temp database
-- document both success contracts and failure boundaries
-- reuse scenarios from automated `orch` integration tests before inventing new cases
-- keep terminology consistent with the scheduler concepts exposed by `orch`: run, task, dependency, attempt, blocked task, worktree, and council review
-
-## Common Execution Model
-
-Most cases in this directory assume the same baseline:
-
-1. create an isolated temporary directory
-2. choose a database path such as `TMPDIR/coord.db`
-3. run the target `orch` command sequence with `--db TMPDIR/coord.db --json`
-4. when a case needs worker-side state transitions, drive them through `inbox` against the same database
-
-Unless a case says otherwise:
-
-- commands should use `--json`
-- assertions should check both exit code and JSON payload
-- `orch` may be pointed at an empty database path; schema bootstrapping happens automatically on open
-
-## Folder Map
-
-- `README.md`: global conventions and glossary
-- `ROADMAP.md`: document progress, planned case backlog, and authored-case register
-- `_shared/README.md`: reusable fixtures, JSON assertions, exit-code rules, and worktree conventions
-- `workflows/README.md`: cross-command end-to-end scenarios
-- per-command folders: one leaf-command directory per implemented `orch` command surface
-- `verify/`: verification-gate command cases
-
-## Glossary
-
-- `run`: one coordinated execution for a user request
-- `task`: one schedulable unit of work inside a run
-- `dependency`: an edge that gates one task on another
-- `attempt`: one execution try for a task
-- `dispatch`: the act of materializing a task into an inbox thread
-- `workspace`: the branch and worktree assigned to a code-writing attempt
-- `verification gate`: the check aggregation state between worker `done` and final task completion
-- `verifying`: the task state used while required checks are still pending or being recorded
-- `blocked task`: a task whose active attempt requires clarification or another external decision
-- `council review`: a higher-level workflow built on top of `orch` that dispatches fixed reviewer roles and tallies recommendations
-
-## Relationship To Automated Tests
-
-The current best executable reference is [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go).
-
-When this Markdown plan expands:
-
-- prefer matching an existing automated scenario first
-- record any additional manual-only contract coverage explicitly in the relevant command case file
-- keep [ROADMAP.md](./ROADMAP.md) synchronized with authored files and case slugs
diff --git a/docs/tests/orch/ROADMAP.md b/docs/tests/orch/ROADMAP.md
deleted file mode 100644
index 6907ce5..0000000
--- a/docs/tests/orch/ROADMAP.md
+++ /dev/null
@@ -1,389 +0,0 @@
-# Orch Test Documentation Roadmap
-
-## Purpose
-
-This roadmap tracks the human-readable Markdown test plan for `orch`.
-
-It exists so a new agent can immediately answer four questions without re-reading the whole codebase:
-
-- which test-plan documents already exist
-- which cases have already been written down
-- which cases are still missing
-- what file should be updated next
-
-This roadmap is for the Markdown test-plan set under `docs/tests/orch/`.
-It is not a replacement for automated Go tests.
-
-## Current Snapshot
-
-Snapshot date:
-
-- `2026-03-23`
-
-Current state:
-
-- `orch` CLI now covers scheduler control, explicit execution-mode dispatch, verification gates, wait, and council review surfaces
-- automated Go tests now cover every currently documented `orch` command case and workflow case, combining the original integration suite with focused contract tests for run/task/ready/dispatch/verify/blocked/answer/cleanup/status/reconcile/workflow/council-report edges
-- `status` coverage now also documents the richer leader view: auto-reconcile plus latest attempt, latest message, blocked-question context, and task gate context
-- this roadmap now exists under `docs/tests/orch/ROADMAP.md`
-- all planned global, shared, workflow, command-index, and command-case Markdown documents in the current `orch` test-plan set have been authored
-- every implemented `orch` leaf-command folder now uses `README.md` as an index plus one Markdown file per planned case
-- workflow cases now exist in `docs/tests/orch/workflows/README.md`, and the automated suite now explicitly covers both command-level contracts and the remaining end-to-end workflow gaps
-
-Progress summary for planned test-plan documents, excluding `ROADMAP.md`:
-
-- planned document files: `71`
-- authored document files: `71`
-- planned case slugs in this roadmap: `52`
-- authored case slugs in this roadmap: `52`
-
-## Scope
-
-In scope:
-
-- `orch run init`
-- `orch run show`
-- `orch task add`
-- `orch dep add`
-- `orch ready`
-- `orch dispatch`
-- `orch reconcile`
-- `orch verify`
-- `orch wait`
-- `orch blocked`
-- `orch answer`
-- `orch retry`
-- `orch reassign`
-- `orch cancel`
-- `orch cleanup`
-- `orch status`
-- `orch council start`
-- `orch council wait`
-- `orch council tally`
-- `orch council report`
-- cross-command workflows
-- shared test conventions for JSON output, exit codes, temp databases, repo fixtures, and worktree assertions
-
-Out of scope:
-
-- `inbox` command behavior except as supporting setup for `orch` scenarios
-- implementation details that are not visible through the `orch` CLI contract
-- future `orch` commands that are not currently implemented
-
-## Tracking Rules
-
-Directory model:
-
-- one folder per leaf command or shared area
-- each folder keeps a `README.md` entrypoint
-- command folders use `README.md` as an index only
-- each command case lives in its own Markdown file named after the case slug
-- cross-command workflow cases remain grouped in `docs/tests/orch/workflows/README.md`
-
-Case identity:
-
-- do not use numeric IDs
-- identify each command case by its concrete file path
-- identify each workflow case by `path + case slug`
-- command case file naming pattern:
-
-```text
-<case-slug>.md
-```
-
-- workflow case heading pattern:
-
-```md
-## case: run-dispatch-reconcile-status-happy-path
-```
-
-Per-case structure inside the case document:
-
-- `用例意义`
-- `前置条件`
-- `输入`
-- `预期输出`
-- `断言结论`
-
-How to update this roadmap when a new case is written:
-
-1. if it is a command case, create or update the target `<case-slug>.md` file under the relevant leaf-command folder
-2. if it is a command case, add or update the entry in that folder `README.md` index
-3. if it is a workflow case, add or update the case inside `docs/tests/orch/workflows/README.md`
-4. move the case slug from `Pending Case Backlog` to `Authored Case Register`
-5. update the authored counts in `Current Snapshot`
-6. if a new Markdown file is created, update `Document Progress`
-
-Allowed status values in this roadmap:
-
-- `pending`
-- `in_progress`
-- `done`
-- `deferred`
-
-## Existing Automated Coverage Reference
-
-The Markdown test-plan set starts at zero, but these automated tests already exist and should be used as source material when writing the docs:
-
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L12) `TestOrchRunDispatchReconcileLifecycle`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L184) `TestOrchDependencyBlockedAndAnswerFlow`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L458) `TestOrchDispatchRejectsNonReadyTask`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L512) `TestOrchDispatchCreatesStrictWorktree`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L631) `TestOrchStrictWorktreeRejectsDirtyRepoWithoutBaseRef`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L680) `TestOrchStrictWorktreeAllowsExplicitBaseRefOnDirtyRepo`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L733) `TestOrchDispatchAutoEnablesWorktreeForCodeLikeTask`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L782) `TestOrchDispatchDoesNotAutoEnableWorktreeForNonCodeTask`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L829) `TestOrchWaitWakesOnBlockedEvent`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L944) `TestOrchWaitTimesOutWithoutMatchingEvent`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L981) `TestOrchRetryCreatesNewAttempt`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1079) `TestOrchReassignCancelsOldThreadAndDispatchesNewAttempt`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1186) `TestOrchCancelTaskAndRun`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1315) `TestOrchCleanupRemovesCompletedWorktree`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1402) `TestOrchCouncilStartDispatchesThreeReviewers`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1545) `TestOrchCouncilWaitWakesWhenAllReviewersComplete`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1678) `TestOrchCouncilWaitTimesOutWhenReviewersIncomplete`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1715) `TestOrchCouncilTallyGroupsReviewerFindingsNormal`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1813) `TestOrchCouncilTallyStrictKeepsDistinctProposals`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1873) `TestOrchCouncilReportDefaultShowsConsensusAndMajority`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1950) `TestOrchCouncilReportShowAllIncludesMinority`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L1979) `TestOrchCouncilReportJSONShape`
-- [command_contracts_core_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go) `TestOrchRunShowReturnsRunSummaryAndTaskCounts`
-- [command_contracts_core_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go) `TestOrchRunShowRejectsMissingRun`
-- [command_contracts_core_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go) `TestOrchTaskAddRejectsInvalidAcceptanceJSON`
-- [command_contracts_core_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go) `TestOrchTaskAddRejectsInvalidPriority`
-- [command_contracts_core_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go#L147) `TestOrchTaskAddSnapshotsSpecAndVerificationPolicy`
-- [command_contracts_core_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go#L203) `TestOrchTaskAddRejectsSpecSHAMismatch`
-- [command_contracts_core_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go) `TestOrchReadyOrdersByPriorityAndRespectsLimit`
-- [integration_test.go](../../../packages/orch-runtime/internal/cli/orch/integration_test.go#L185) `TestOrchVerificationGateLifecycle`
-- [command_contracts_edges_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_edges_test.go) `TestOrchAnswerAcceptsPayloadJSONWithoutBody`
-- [command_contracts_edges_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_edges_test.go) `TestOrchAnswerRejectsEmptyBodyAndPayload`
-- [command_contracts_edges_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_edges_test.go) `TestOrchCleanupRejectsAttemptWithoutTask`
-- [command_contracts_edges_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_edges_test.go) `TestOrchCleanupReturnsNoMatchingWorkWhenFiltersMiss`
-- [command_contracts_remaining_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go) `TestOrchRunInitCreatesNewRun`
-- [command_contracts_remaining_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go) `TestOrchDispatchCreatesAttemptAndThreadForReadyTask`
-- [command_contracts_remaining_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go) `TestOrchBlockedListsLatestQuestionForBlockedTask`
-- [command_contracts_remaining_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go) `TestOrchStatusReturnsRunSummaryAndTaskList`
-- [command_contracts_remaining_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go) `TestOrchReconcileMapsFailedThreadToTerminalTaskState`
-- [command_contracts_remaining_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go) `TestOrchWorkflowStrictWorktreeDispatchToCleanup`
-- [command_contracts_remaining_test.go](../../../packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go) `TestOrchWorkflowCouncilReviewEndToEnd`
-- [council_report_contracts_test.go](../../../packages/orch-runtime/internal/cli/orch/council_report_contracts_test.go) `TestOrchCouncilReportRejectsBeforeTally`
-- [council_report_contracts_test.go](../../../packages/orch-runtime/internal/cli/orch/council_report_contracts_test.go) `TestOrchCouncilReportRejectsInvalidShow`
-- [council_report_contracts_test.go](../../../packages/orch-runtime/internal/cli/orch/council_report_contracts_test.go) `TestOrchCouncilReportDefaultsToConsensusForOnlyUnanimousRun`
-
-These tests do not remove the need for the Markdown plan. They only reduce discovery work.
-
-## Planned Directory Tree
-
-```text
-docs/tests/orch/
-  ROADMAP.md
-  README.md
-  _shared/
-    README.md
-  workflows/
-    README.md
-  run-init/
-    README.md
-  run-show/
-    README.md
-  task-add/
-    README.md
-  dep-add/
-    README.md
-  ready/
-    README.md
-  dispatch/
-    README.md
-  reconcile/
-    README.md
-  verify/
-    README.md
-  wait/
-    README.md
-  blocked/
-    README.md
-  answer/
-    README.md
-  retry/
-    README.md
-  reassign/
-    README.md
-  cancel/
-    README.md
-  cleanup/
-    README.md
-  status/
-    README.md
-  council-start/
-    README.md
-  council-wait/
-    README.md
-  council-tally/
-    README.md
-  council-report/
-    README.md
-```
-
-## Document Progress
-
-| Path | Purpose | Planned Cases | Authored Cases | Status |
-| --- | --- | ---: | ---: | --- |
-| `docs/tests/orch/README.md` | Global testing conventions and glossary | 0 | 0 | done |
-| `docs/tests/orch/_shared/README.md` | Shared fixtures, JSON assertions, exit-code rules, and worktree conventions | 0 | 0 | done |
-| `docs/tests/orch/workflows/README.md` | Cross-command scenarios | 4 | 4 | done |
-| `docs/tests/orch/run-init/README.md` | `run init` command case index | 0 | 0 | done |
-| `docs/tests/orch/run-init/run-init-creates-new-run.md` | `run init` command case | 1 | 1 | done |
-| `docs/tests/orch/run-show/README.md` | `run show` command case index | 0 | 0 | done |
-| `docs/tests/orch/run-show/run-show-returns-run-summary-and-task-counts.md` | `run show` command case | 1 | 1 | done |
-| `docs/tests/orch/task-add/README.md` | `task add` command case index | 0 | 0 | done |
-| `docs/tests/orch/task-add/task-add-creates-ready-root-task.md` | `task add` command case | 1 | 1 | done |
-| `docs/tests/orch/task-add/task-add-rejects-invalid-acceptance-json.md` | `task add` command case | 1 | 1 | done |
-| `docs/tests/orch/task-add/task-add-rejects-invalid-priority.md` | `task add` command case | 1 | 1 | done |
-| `docs/tests/orch/task-add/task-add-snapshots-spec-and-verification-policy.md` | `task add` command case | 1 | 1 | done |
-| `docs/tests/orch/task-add/task-add-rejects-spec-sha-mismatch.md` | `task add` command case | 1 | 1 | done |
-| `docs/tests/orch/dep-add/README.md` | `dep add` command case index | 0 | 0 | done |
-| `docs/tests/orch/dep-add/dep-add-blocks-dependent-task-until-prerequisite-completes.md` | `dep add` command case | 1 | 1 | done |
-| `docs/tests/orch/ready/README.md` | `ready` command case index | 0 | 0 | done |
-| `docs/tests/orch/ready/ready-lists-only-eligible-tasks.md` | `ready` command case | 1 | 1 | done |
-| `docs/tests/orch/ready/ready-orders-by-priority-and-respects-limit.md` | `ready` command case | 1 | 1 | done |
-| `docs/tests/orch/dispatch/README.md` | `dispatch` command case index | 0 | 0 | done |
-| `docs/tests/orch/dispatch/dispatch-creates-attempt-and-thread-for-ready-task.md` | `dispatch` command case | 1 | 1 | done |
-| `docs/tests/orch/dispatch/dispatch-rejects-non-ready-task.md` | `dispatch` command case | 1 | 1 | done |
-| `docs/tests/orch/dispatch/dispatch-creates-strict-worktree.md` | `dispatch` command case | 1 | 1 | done |
-| `docs/tests/orch/dispatch/dispatch-rejects-dirty-repo-without-base-ref.md` | `dispatch` command case | 1 | 1 | done |
-| `docs/tests/orch/dispatch/dispatch-allows-explicit-base-ref-on-dirty-repo.md` | `dispatch` command case | 1 | 1 | done |
-| `docs/tests/orch/dispatch/dispatch-requires-explicit-execution-mode.md` | `dispatch` command case | 1 | 1 | done |
-| `docs/tests/orch/dispatch/dispatch-analysis-mode-skips-worktree.md` | `dispatch` command case | 1 | 1 | done |
-| `docs/tests/orch/reconcile/README.md` | `reconcile` command case index | 0 | 0 | done |
-| `docs/tests/orch/reconcile/reconcile-maps-claimed-or-in-progress-thread-to-running.md` | `reconcile` command case | 1 | 1 | done |
-| `docs/tests/orch/reconcile/reconcile-maps-done-or-failed-thread-to-terminal-task-state.md` | `reconcile` command case | 1 | 1 | done |
-| `docs/tests/orch/reconcile/reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md` | `reconcile` command case | 1 | 1 | done |
-| `docs/tests/orch/verify/README.md` | `verify` command case index | 0 | 0 | done |
-| `docs/tests/orch/verify/verify-status-returns-spec-and-gate-for-task.md` | `verify` command case | 1 | 1 | done |
-| `docs/tests/orch/verify/verify-record-updates-gate-and-marks-task-done-when-required-checks-pass.md` | `verify` command case | 1 | 1 | done |
-| `docs/tests/orch/wait/README.md` | `wait` command case index | 0 | 0 | done |
-| `docs/tests/orch/wait/wait-wakes-on-matching-run-event.md` | `wait` command case | 1 | 1 | done |
-| `docs/tests/orch/wait/wait-times-out-without-matching-event.md` | `wait` command case | 1 | 1 | done |
-| `docs/tests/orch/blocked/README.md` | `blocked` command case index | 0 | 0 | done |
-| `docs/tests/orch/blocked/blocked-lists-latest-question-for-blocked-task.md` | `blocked` command case | 1 | 1 | done |
-| `docs/tests/orch/answer/README.md` | `answer` command case index | 0 | 0 | done |
-| `docs/tests/orch/answer/answer-appends-answer-to-active-thread.md` | `answer` command case | 1 | 1 | done |
-| `docs/tests/orch/answer/answer-accepts-payload-json-without-body.md` | `answer` command case | 1 | 1 | done |
-| `docs/tests/orch/answer/answer-rejects-empty-body-and-payload.md` | `answer` command case | 1 | 1 | done |
-| `docs/tests/orch/retry/README.md` | `retry` command case index | 0 | 0 | done |
-| `docs/tests/orch/retry/retry-creates-new-attempt-for-failed-task.md` | `retry` command case | 1 | 1 | done |
-| `docs/tests/orch/reassign/README.md` | `reassign` command case index | 0 | 0 | done |
-| `docs/tests/orch/reassign/reassign-cancels-old-thread-and-dispatches-new-attempt.md` | `reassign` command case | 1 | 1 | done |
-| `docs/tests/orch/cancel/README.md` | `cancel` command case index | 0 | 0 | done |
-| `docs/tests/orch/cancel/cancel-cancels-single-task.md` | `cancel` command case | 1 | 1 | done |
-| `docs/tests/orch/cancel/cancel-cancels-entire-run.md` | `cancel` command case | 1 | 1 | done |
-| `docs/tests/orch/cleanup/README.md` | `cleanup` command case index | 0 | 0 | done |
-| `docs/tests/orch/cleanup/cleanup-removes-completed-worktree.md` | `cleanup` command case | 1 | 1 | done |
-| `docs/tests/orch/cleanup/cleanup-rejects-attempt-without-task.md` | `cleanup` command case | 1 | 1 | done |
-| `docs/tests/orch/cleanup/cleanup-returns-no-matching-work-when-filters-miss.md` | `cleanup` command case | 1 | 1 | done |
-| `docs/tests/orch/status/README.md` | `status` command case index | 0 | 0 | done |
-| `docs/tests/orch/status/status-returns-run-summary-and-task-list.md` | `status` command case | 1 | 1 | done |
-| `docs/tests/orch/status/status-auto-reconciles-and-includes-blocked-context.md` | `status` command case | 1 | 1 | done |
-| `docs/tests/orch/council-start/README.md` | `council start` command case index | 0 | 0 | done |
-| `docs/tests/orch/council-start/council-start-dispatches-three-reviewers.md` | `council start` command case | 1 | 1 | done |
-| `docs/tests/orch/council-wait/README.md` | `council wait` command case index | 0 | 0 | done |
-| `docs/tests/orch/council-wait/council-wait-wakes-when-all-reviewers-complete.md` | `council wait` command case | 1 | 1 | done |
-| `docs/tests/orch/council-wait/council-wait-times-out-when-reviewers-incomplete.md` | `council wait` command case | 1 | 1 | done |
-| `docs/tests/orch/council-tally/README.md` | `council tally` command case index | 0 | 0 | done |
-| `docs/tests/orch/council-tally/council-tally-groups-reviewer-findings-in-normal-mode.md` | `council tally` command case | 1 | 1 | done |
-| `docs/tests/orch/council-tally/council-tally-keeps-distinct-proposals-in-strict-mode.md` | `council tally` command case | 1 | 1 | done |
-| `docs/tests/orch/council-report/README.md` | `council report` command case index | 0 | 0 | done |
-| `docs/tests/orch/council-report/council-report-defaults-to-consensus-and-majority.md` | `council report` command case | 1 | 1 | done |
-| `docs/tests/orch/council-report/council-report-show-all-includes-minority.md` | `council report` command case | 1 | 1 | done |
-| `docs/tests/orch/council-report/council-report-json-shape-is-stable.md` | `council report` command case | 1 | 1 | done |
-| `docs/tests/orch/council-report/council-report-rejects-before-tally.md` | `council report` command case | 1 | 1 | done |
-| `docs/tests/orch/council-report/council-report-rejects-invalid-show.md` | `council report` command case | 1 | 1 | done |
-| `docs/tests/orch/council-report/council-report-defaults-to-consensus-when-run-is-only-unanimous.md` | `council report` command case | 1 | 1 | done |
-
-## Authoring Order
-
-1. global conventions in `docs/tests/orch/README.md`
-2. shared fixtures and assertion helpers in `docs/tests/orch/_shared/README.md`
-3. workflow cases in `docs/tests/orch/workflows/README.md`
-4. core scheduler command docs: `run-init`, `task-add`, `dep-add`, `ready`, `dispatch`, `reconcile`, `status`
-5. verification command docs: `verify`
-6. interactive leader command docs: `wait`, `blocked`, `answer`, `retry`, `reassign`, `cancel`, `cleanup`
-7. council workflow docs: `council-start`, `council-wait`, `council-tally`, `council-report`
-
-## Authored Case Register
-
-| Path | Case Slug | Coverage Note | Status |
-| --- | --- | --- | --- |
-| `docs/tests/orch/workflows/README.md` | `run-dispatch-reconcile-status-happy-path` | end-to-end happy path from run creation through final status | done |
-| `docs/tests/orch/workflows/README.md` | `dependency-blocked-answer-resume-flow` | dependency gating plus blocked question and answer recovery | done |
-| `docs/tests/orch/workflows/README.md` | `code-mode-dispatch-to-cleanup` | worktree-backed code task flows from dispatch through cleanup | done |
-| `docs/tests/orch/workflows/README.md` | `council-review-end-to-end` | council workflow runs from reviewer dispatch through final report | done |
-| `docs/tests/orch/run-init/run-init-creates-new-run.md` | `run-init-creates-new-run` | creates a run with goal and optional summary | done |
-| `docs/tests/orch/run-show/run-show-returns-run-summary-and-task-counts.md` | `run-show-returns-run-summary-and-task-counts` | shows aggregate run metadata after activity | done |
-| `docs/tests/orch/task-add/task-add-creates-ready-root-task.md` | `task-add-creates-ready-root-task` | dependency-free task becomes ready immediately | done |
-| `docs/tests/orch/task-add/task-add-rejects-invalid-acceptance-json.md` | `task-add-rejects-invalid-acceptance-json` | malformed `--acceptance-json` returns stable invalid_input | done |
-| `docs/tests/orch/task-add/task-add-rejects-invalid-priority.md` | `task-add-rejects-invalid-priority` | unsupported priorities are rejected with invalid_input | done |
-| `docs/tests/orch/task-add/task-add-snapshots-spec-and-verification-policy.md` | `task-add-snapshots-spec-and-verification-policy` | task add snapshots spec content, verification profile, and scope policy onto the task | done |
-| `docs/tests/orch/task-add/task-add-rejects-spec-sha-mismatch.md` | `task-add-rejects-spec-sha-mismatch` | explicit spec hash mismatch returns invalid_input | done |
-| `docs/tests/orch/dep-add/dep-add-blocks-dependent-task-until-prerequisite-completes.md` | `dep-add-blocks-dependent-task-until-prerequisite-completes` | dependency edge prevents immediate readiness | done |
-| `docs/tests/orch/ready/ready-lists-only-eligible-tasks.md` | `ready-lists-only-eligible-tasks` | ready list excludes dependency-gated tasks | done |
-| `docs/tests/orch/ready/ready-orders-by-priority-and-respects-limit.md` | `ready-orders-by-priority-and-respects-limit` | ready output orders by priority and applies explicit limit truncation | done |
-| `docs/tests/orch/dispatch/dispatch-creates-attempt-and-thread-for-ready-task.md` | `dispatch-creates-attempt-and-thread-for-ready-task` | ready task dispatch creates attempt, thread, and task message | done |
-| `docs/tests/orch/dispatch/dispatch-rejects-non-ready-task.md` | `dispatch-rejects-non-ready-task` | dispatch on gated task returns invalid_state | done |
-| `docs/tests/orch/dispatch/dispatch-creates-strict-worktree.md` | `dispatch-creates-strict-worktree` | explicit `execution-mode code` dispatch provisions isolated workspace metadata | done |
-| `docs/tests/orch/dispatch/dispatch-rejects-dirty-repo-without-base-ref.md` | `dispatch-rejects-dirty-repo-without-base-ref` | dirty repository without explicit base ref is rejected in `execution-mode code` | done |
-| `docs/tests/orch/dispatch/dispatch-allows-explicit-base-ref-on-dirty-repo.md` | `dispatch-allows-explicit-base-ref-on-dirty-repo` | explicit base ref allows `execution-mode code` dispatch from a dirty repository | done |
-| `docs/tests/orch/dispatch/dispatch-requires-explicit-execution-mode.md` | `dispatch-requires-explicit-execution-mode` | dispatch rejects calls that omit `--execution-mode analysis|code` | done |
-| `docs/tests/orch/dispatch/dispatch-analysis-mode-skips-worktree.md` | `dispatch-analysis-mode-skips-worktree` | analysis mode stays on the normal non-worktree path | done |
-| `docs/tests/orch/reconcile/reconcile-maps-claimed-or-in-progress-thread-to-running.md` | `reconcile-maps-claimed-or-in-progress-thread-to-running` | reconcile maps active inbox execution to running task state | done |
-| `docs/tests/orch/reconcile/reconcile-maps-done-or-failed-thread-to-terminal-task-state.md` | `reconcile-maps-done-or-failed-thread-to-terminal-task-state` | reconcile maps terminal inbox states to terminal task states | done |
-| `docs/tests/orch/reconcile/reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md` | `reconcile-maps-done-thread-to-verifying-when-task-has-required-checks` | reconcile routes worker done into verifying when the task has required checks | done |
-| `docs/tests/orch/verify/verify-status-returns-spec-and-gate-for-task.md` | `verify-status-returns-spec-and-gate-for-task` | verify status returns the task spec snapshot, selected attempt, and current gate state | done |
-| `docs/tests/orch/verify/verify-record-updates-gate-and-marks-task-done-when-required-checks-pass.md` | `verify-record-updates-gate-and-marks-task-done-when-required-checks-pass` | verify record recomputes the gate and promotes the task to done when all required checks pass | done |
-| `docs/tests/orch/wait/wait-wakes-on-matching-run-event.md` | `wait-wakes-on-matching-run-event` | wait wakes on a later matching run-scoped event | done |
-| `docs/tests/orch/wait/wait-times-out-without-matching-event.md` | `wait-times-out-without-matching-event` | wait timeout returns a normal non-woken result | done |
-| `docs/tests/orch/blocked/blocked-lists-latest-question-for-blocked-task.md` | `blocked-lists-latest-question-for-blocked-task` | blocked view includes latest question payload for the task | done |
-| `docs/tests/orch/answer/answer-appends-answer-to-active-thread.md` | `answer-appends-answer-to-active-thread` | answer appends an inbox answer message to the blocked attempt thread | done |
-| `docs/tests/orch/answer/answer-accepts-payload-json-without-body.md` | `answer-accepts-payload-json-without-body` | payload-only answers stay valid and machine-readable | done |
-| `docs/tests/orch/answer/answer-rejects-empty-body-and-payload.md` | `answer-rejects-empty-body-and-payload` | empty answer requests fail with invalid_input | done |
-| `docs/tests/orch/retry/retry-creates-new-attempt-for-failed-task.md` | `retry-creates-new-attempt-for-failed-task` | retry dispatches a successor attempt after failure | done |
-| `docs/tests/orch/reassign/reassign-cancels-old-thread-and-dispatches-new-attempt.md` | `reassign-cancels-old-thread-and-dispatches-new-attempt` | reassign cancels old execution and opens a new attempt | done |
-| `docs/tests/orch/cancel/cancel-cancels-single-task.md` | `cancel-cancels-single-task` | single-task cancel moves only the targeted task to cancelled | done |
-| `docs/tests/orch/cancel/cancel-cancels-entire-run.md` | `cancel-cancels-entire-run` | run cancel cascades terminal state across the run | done |
-| `docs/tests/orch/cleanup/cleanup-removes-completed-worktree.md` | `cleanup-removes-completed-worktree` | cleanup removes completed attempt worktree artifacts | done |
-| `docs/tests/orch/cleanup/cleanup-rejects-attempt-without-task.md` | `cleanup-rejects-attempt-without-task` | cleanup enforces `--task` when `--attempt` is specified | done |
-| `docs/tests/orch/cleanup/cleanup-returns-no-matching-work-when-filters-miss.md` | `cleanup-returns-no-matching-work-when-filters-miss` | cleanup returns no_matching_work when selectors find no candidates | done |
-| `docs/tests/orch/status/status-returns-run-summary-and-task-list.md` | `status-returns-run-summary-and-task-list` | status reports aggregate run state, per-task statuses, and latest attempt context | done |
-| `docs/tests/orch/status/status-auto-reconciles-and-includes-blocked-context.md` | `status-auto-reconciles-and-includes-blocked-context` | status auto-reconciles inbox state and exposes blocked-task question context | done |
-| `docs/tests/orch/council-start/council-start-dispatches-three-reviewers.md` | `council-start-dispatches-three-reviewers` | council start creates and dispatches three fixed reviewer tasks | done |
-| `docs/tests/orch/council-wait/council-wait-wakes-when-all-reviewers-complete.md` | `council-wait-wakes-when-all-reviewers-complete` | council wait wakes when all reviewer tasks complete | done |
-| `docs/tests/orch/council-wait/council-wait-times-out-when-reviewers-incomplete.md` | `council-wait-times-out-when-reviewers-incomplete` | council wait timeout stays machine-readable | done |
-| `docs/tests/orch/council-tally/council-tally-groups-reviewer-findings-in-normal-mode.md` | `council-tally-groups-reviewer-findings-in-normal-mode` | normal similarity groups semantically aligned reviewer findings | done |
-| `docs/tests/orch/council-tally/council-tally-keeps-distinct-proposals-in-strict-mode.md` | `council-tally-keeps-distinct-proposals-in-strict-mode` | strict similarity preserves wording-level proposal separation | done |
-| `docs/tests/orch/council-report/council-report-defaults-to-consensus-and-majority.md` | `council-report-defaults-to-consensus-and-majority` | default report keeps main output on consensus and majority buckets | done |
-| `docs/tests/orch/council-report/council-report-show-all-includes-minority.md` | `council-report-show-all-includes-minority` | `--show all` includes minority recommendations in final report | done |
-| `docs/tests/orch/council-report/council-report-json-shape-is-stable.md` | `council-report-json-shape-is-stable` | JSON response shape and report artifact metadata remain stable | done |
-| `docs/tests/orch/council-report/council-report-rejects-before-tally.md` | `council-report-rejects-before-tally` | report generation before tally fails with invalid_state | done |
-| `docs/tests/orch/council-report/council-report-rejects-invalid-show.md` | `council-report-rejects-invalid-show` | unsupported `--show` values return invalid_input | done |
-| `docs/tests/orch/council-report/council-report-defaults-to-consensus-when-run-is-only-unanimous.md` | `council-report-defaults-to-consensus-when-run-is-only-unanimous` | omitted `--show` collapses to consensus for only-unanimous runs | done |
-
-## Pending Case Backlog
-
-No pending case slugs remain in the current plan.
-
-When a new `orch` CLI contract or workflow needs coverage:
-
-1. if it is a command case, create a new `<case-slug>.md` file under the relevant leaf-command folder and add it to that folder `README.md` index
-2. if it is a workflow case, add it to `docs/tests/orch/workflows/README.md`
-3. add the new slug to `Authored Case Register`
-4. update `Current Snapshot` and `Document Progress`
-
-## Definition Of Done
-
-This roadmap is complete only when all of the following are true:
-
-- every implemented `orch` leaf command has a corresponding document folder
-- each planned command index and case document exists
-- each pending case slug has been either authored or explicitly deferred
-- the authored-case register matches the actual Markdown files on disk
-- a new agent can pick any future case and know exactly where it should be written
diff --git a/docs/tests/orch/_shared/README.md b/docs/tests/orch/_shared/README.md
deleted file mode 100644
index cf22b9a..0000000
--- a/docs/tests/orch/_shared/README.md
+++ /dev/null
@@ -1,137 +0,0 @@
-# Orch Shared Test Conventions
-
-## Purpose
-
-This document captures shared assumptions used by multiple `orch` test-plan documents so command and workflow files can stay focused on behavior instead of repeating setup boilerplate.
-
-## Recommended Fixture Shape
-
-Use an isolated temp workspace per case:
-
-- database path: `TMPDIR/coord.db`
-- optional repository path: `TMPDIR/repo`
-- optional workspace root: `TMPDIR/worktrees` or `REPO/.orch/worktrees`
-- optional body file: `TMPDIR/body.md`
-
-Recommended minimal bootstrap command:
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_demo_001 --goal "Demo run"
-```
-
-Some cases need additional bootstrap:
-
-- `task add` for task-oriented flows
-- `dep add` for dependency-gating flows
-- `inbox` commands when simulating worker claim, progress, blocked, done, or fail transitions
-
-## Global Flags
-
-Root-level flags apply to every `orch` subcommand:
-
-- `--db`: SQLite database path, default `.agents/coord.db`
-- `--json`: emit machine-readable JSON
-
-The current root command does not define a global `--agent`; worker-side activity should be modeled through `inbox` using the same database.
-
-## Success JSON Contract
-
-Successful JSON output uses this shape:
-
-```json
-{
-  "ok": true,
-  "command": "dispatch",
-  "data": {}
-}
-```
-
-Shared assertion points:
-
-- `ok` is `true`
-- `command` matches the invoked leaf command
-- `data` contains the command-specific payload
-
-## Error JSON Contract
-
-Failure JSON output uses this shape:
-
-```json
-{
-  "ok": false,
-  "error": {
-    "code": "invalid_input",
-    "message": "..."
-  }
-}
-```
-
-Shared assertion points:
-
-- `ok` is `false`
-- `error.code` matches the stable contract
-- `error.message` is present and human-readable
-
-## Exit Code Contract
-
-The current `orch` CLI contract primarily uses these exit codes:
-
-| Exit Code | Meaning | Typical Error Code |
-| --- | --- | --- |
-| `0` | success | none |
-| `30` | invalid input, invalid state, or usage-style error | `invalid_input` or `invalid_state` |
-| `40` | referenced run, task, or thread missing | `not_found` |
-| `50` | unexpected internal failure | `internal_error` |
-
-When a case expects failure, assert both the exit code and the JSON error code.
-
-## Body Input Rules
-
-Commands that support `--body` and `--body-file` should be documented with these shared rules:
-
-- `--body` and `--body-file` are mutually exclusive
-- `--body-file` content is read verbatim into the dispatched or answer body
-- unreadable `--body-file` should be treated as `invalid_input`
-
-Relevant commands:
-
-- `dispatch`
-- `answer`
-- `retry`
-
-## Worktree And Repo Rules
-
-Cases covering worktree behavior should state:
-
-- whether the source repository is clean or dirty
-- whether `--execution-mode code` is selected explicitly
-- whether `--base-ref` is omitted or explicitly provided
-- where the expected worktree path should be created
-
-When worktree behavior is under test, assert at least:
-
-- attempt `base_ref`
-- attempt `base_commit`
-- attempt `branch_name`
-- attempt `worktree_path`
-- attempt `workspace_status`
-
-## Direct DB Inspection
-
-Most `orch` cases should stay at the CLI contract level, but a few manual reproduction flows need direct SQL reads to recover attempt-to-thread mappings that the current `orch` CLI does not print in a standalone query command.
-
-When a case truly needs that mapping:
-
-- use a read-only `sqlite3` query against `TMPDIR/coord.db`
-- prefer querying `task_attempts` by stable keys such as `run_id`, `task_id`, and `attempt_no`
-- treat the SQL read as fixture setup for the next CLI command, not as the main assertion target
-
-Typical example:
-
-```bash
-sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_001' AND task_id = 'CR1' AND attempt_no = 1;"
-```
-
-## Workflow Authoring Rule
-
-If a case spans multiple `orch` commands, place the end-to-end narrative in `workflows/README.md` first, then add narrower command-level cases only when they are easier to reason about in isolation.
diff --git a/docs/tests/orch/answer/README.md b/docs/tests/orch/answer/README.md
deleted file mode 100644
index ac48dbe..0000000
--- a/docs/tests/orch/answer/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Orch `answer` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `answer-appends-answer-to-active-thread` | [answer-appends-answer-to-active-thread.md](./answer-appends-answer-to-active-thread.md) | appends an inbox answer message onto the active blocked attempt thread |
-| `answer-accepts-payload-json-without-body` | [answer-accepts-payload-json-without-body.md](./answer-accepts-payload-json-without-body.md) | accepts structured `--payload-json` input even when no body text is provided |
-| `answer-rejects-empty-body-and-payload` | [answer-rejects-empty-body-and-payload.md](./answer-rejects-empty-body-and-payload.md) | rejects an answer request that provides neither body text nor payload JSON |
diff --git a/docs/tests/orch/answer/answer-accepts-payload-json-without-body.md b/docs/tests/orch/answer/answer-accepts-payload-json-without-body.md
deleted file mode 100644
index b64f3d3..0000000
--- a/docs/tests/orch/answer/answer-accepts-payload-json-without-body.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Case: `answer-accepts-payload-json-without-body`
-
-## 用例意义
-
-验证 `answer` 在未提供 `--body` 的情况下，仍可通过纯 `--payload-json` 向当前阻塞尝试写回结构化决策。
-
-## 前置条件
-
-- 运行 `run_blog_002` 中的任务 `T2` 已处于 `blocked`
-- `blocked` 列表中可见 `T2`
-- 已知该阻塞尝试对应线程为 `THREAD_ID`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json answer --run run_blog_002 --task T2 --payload-json '{"decision":"stdout","source":"leader"}'
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## 预期输出
-
-- `answer` 退出码为 `0`
-- `answer.data.message.kind == "answer"`
-- `answer.data.message.payload_json.decision == "stdout"`
-- `answer.data.message.payload_json.source == "leader"`
-- `show.data.messages` 末尾新增一条 `kind=answer` 的消息
-- 末尾消息的 `payload_json.decision == "stdout"`
-
-## 断言结论
-
-- `answer` 不要求 leader 必须提供纯文本正文；结构化 payload 本身就可以构成有效答复
-- worker 可以从同一条 `answer` 消息里读取结构化决策，而不必依赖约定俗成的正文格式
-
-## 补充约束
-
-- `--payload-json` 必须是合法 JSON；非法值应返回 `invalid_input`
-- `--body` 与 `--body-file` 仍然互斥，即使本用例不使用它们
diff --git a/docs/tests/orch/answer/answer-appends-answer-to-active-thread.md b/docs/tests/orch/answer/answer-appends-answer-to-active-thread.md
deleted file mode 100644
index ee7b615..0000000
--- a/docs/tests/orch/answer/answer-appends-answer-to-active-thread.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Case: `answer-appends-answer-to-active-thread`
-
-## 用例意义
-
-验证 `answer` 会把 leader 的答复写回当前阻塞尝试的 inbox 线程，并以 `answer` 消息形式供 worker 继续消费。
-
-## 前置条件
-
-- 运行 `run_blog_002` 中的任务 `T2` 已处于 `blocked`
-- `blocked` 列表中可见 `T2`
-- 已知该阻塞尝试对应线程为 `THREAD_ID`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json answer --run run_blog_002 --task T2 --body "Use stdout for MVP."
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## 预期输出
-
-- `answer` 退出码为 `0`
-- `answer.data.message.kind == "answer"`
-- `answer.data.task.task_id == "T2"`
-- `show.data.messages` 末尾新增一条 `kind=answer` 的消息
-- 末尾消息 `body == "Use stdout for MVP."`
-
-## 断言结论
-
-- `answer` 的本质是向活动线程追加 leader 决策消息，而不是直接修改任务状态
-- worker 仍需继续通过 `inbox` 或后续 `reconcile` 推进任务状态
-
-## 补充约束
-
-- `answer` 支持 `--body-file` 与 `--payload-json`
-- `--body` 与 `--body-file` 互斥；若两者都为空，则至少需要提供 `--payload-json`
diff --git a/docs/tests/orch/answer/answer-rejects-empty-body-and-payload.md b/docs/tests/orch/answer/answer-rejects-empty-body-and-payload.md
deleted file mode 100644
index aebe648..0000000
--- a/docs/tests/orch/answer/answer-rejects-empty-body-and-payload.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Case: `answer-rejects-empty-body-and-payload`
-
-## 用例意义
-
-验证 `answer` 在既没有正文也没有结构化 payload 时返回稳定输入错误，而不是写入空答复消息。
-
-## 前置条件
-
-- 运行 `run_blog_002` 中的任务 `T2` 已处于 `blocked`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json answer --run run_blog_002 --task T2
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-
-## 断言结论
-
-- `answer` 至少需要一种有效输入载荷：正文或 `payload-json`
-- 空答复会在写入线程前被拒绝，而不是生成一条语义不明的 `answer` 消息
-
-## 补充约束
-
-- 若同时传入 `--body` 和 `--body-file`，也应返回 `invalid_input`
diff --git a/docs/tests/orch/blocked/README.md b/docs/tests/orch/blocked/README.md
deleted file mode 100644
index e092694..0000000
--- a/docs/tests/orch/blocked/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Orch `blocked` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `blocked-lists-latest-question-for-blocked-task` | [blocked-lists-latest-question-for-blocked-task.md](./blocked-lists-latest-question-for-blocked-task.md) | lists blocked tasks together with the latest worker question payload |
diff --git a/docs/tests/orch/blocked/blocked-lists-latest-question-for-blocked-task.md b/docs/tests/orch/blocked/blocked-lists-latest-question-for-blocked-task.md
deleted file mode 100644
index 2a79071..0000000
--- a/docs/tests/orch/blocked/blocked-lists-latest-question-for-blocked-task.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Case: `blocked-lists-latest-question-for-blocked-task`
-
-## 用例意义
-
-验证 `blocked` 会列出当前阻塞任务，并附带最新问题消息，便于 leader 直接做决策。
-
-## 前置条件
-
-- 已创建运行 `run_blog_002`
-- 已创建任务 `T1`、`T2`，且 `T2` 依赖 `T1`
-- `T1` 已完成并经 `reconcile` 推进，使 `T2` 变为 `ready`
-- `T2` 已完成 `dispatch`
-- `worker-b` 已 `claim` `T2` 对应线程，并通过 `inbox update --status blocked` 写入问题
-- 最近一次 `reconcile` 已执行
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json blocked --run run_blog_002
-```
-
-## 预期输出
-
-- 退出码为 `0`
-- `blocked.data.blocked` 长度为 `1`
-- 唯一条目的 `task.task_id == "T2"`
-- `question.kind == "question"`
-- `question.summary == "Need logging decision"`
-- `question.payload_json.question == "stdout or stderr?"`
-
-## 断言结论
-
-- `blocked` 返回的不只是任务状态，还会附带 leader 真正需要回答的问题消息
-- 该命令适合作为 leader 的“待答复队列”入口，而不是只做状态列表展示
-
-## 补充约束
-
-- 若没有阻塞任务，非 JSON 输出会打印 `no blocked tasks`
diff --git a/docs/tests/orch/cancel/README.md b/docs/tests/orch/cancel/README.md
deleted file mode 100644
index 5dbd0cd..0000000
--- a/docs/tests/orch/cancel/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Orch `cancel` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `cancel-cancels-single-task` | [cancel-cancels-single-task.md](./cancel-cancels-single-task.md) | cancels one task without implicitly cancelling unrelated tasks in the same run |
-| `cancel-cancels-entire-run` | [cancel-cancels-entire-run.md](./cancel-cancels-entire-run.md) | cancels the run and forces every task into the cancelled terminal state |
diff --git a/docs/tests/orch/cancel/cancel-cancels-entire-run.md b/docs/tests/orch/cancel/cancel-cancels-entire-run.md
deleted file mode 100644
index d97b166..0000000
--- a/docs/tests/orch/cancel/cancel-cancels-entire-run.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Case: `cancel-cancels-entire-run`
-
-## 用例意义
-
-验证不带 `--task` 的 `cancel` 会取消整个运行，并把所有任务推进到 `cancelled`。
-
-## 前置条件
-
-- 运行 `run_blog_cancel_001` 已存在
-- 该运行下至少有 `T1`、`T2` 两个任务
-- 在执行本用例前，可能已有单任务取消发生
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json cancel --run run_blog_cancel_001 --reason "Stop the run."
-orch --db TMPDIR/coord.db --json status --run run_blog_cancel_001
-```
-
-## 预期输出
-
-- `cancel` 退出码为 `0`
-- `cancel.data.run.status == "cancelled"`
-- `status.data.run.status == "cancelled"`
-- `status.data.tasks` 中所有任务的 `status` 都为 `cancelled`
-
-## 断言结论
-
-- 运行级取消会级联终止运行下的全部任务
-- 该命令是 leader 主动停止整个调度的主入口，而不是只做标记
diff --git a/docs/tests/orch/cancel/cancel-cancels-single-task.md b/docs/tests/orch/cancel/cancel-cancels-single-task.md
deleted file mode 100644
index 9bb045c..0000000
--- a/docs/tests/orch/cancel/cancel-cancels-single-task.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Case: `cancel-cancels-single-task`
-
-## 用例意义
-
-验证 `cancel --task` 只取消指定任务，不会隐式取消同一运行中的其他任务。
-
-## 前置条件
-
-- 已创建运行 `run_blog_cancel_001`
-- 已创建任务 `T1`、`T2`
-- `T1` 已完成 `dispatch`
-- 已知 `T1` 对应线程为 `THREAD_ID`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json cancel --run run_blog_cancel_001 --task T1 --reason "Task is no longer needed."
-orch --db TMPDIR/coord.db --json status --run run_blog_cancel_001
-inbox --db TMPDIR/coord.db --json show --thread THREAD_ID
-```
-
-## 预期输出
-
-- `cancel` 退出码为 `0`
-- `status` 中 `T1.status == "cancelled"`
-- `status` 中 `T2` 仍保持非 `cancelled` 状态
-- `show.data.thread.status == "cancelled"`，指向 `T1` 的原线程
-
-## 断言结论
-
-- 单任务取消是局部控制动作，不会把运行整体终止
-- 对已分派任务，取消也会同步终止对应 inbox 线程，避免 worker 继续执行
diff --git a/docs/tests/orch/cleanup/README.md b/docs/tests/orch/cleanup/README.md
deleted file mode 100644
index 465cd59..0000000
--- a/docs/tests/orch/cleanup/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Orch `cleanup` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `cleanup-removes-completed-worktree` | [cleanup-removes-completed-worktree.md](./cleanup-removes-completed-worktree.md) | removes a completed attempt worktree and records the cleanup result |
-| `cleanup-rejects-attempt-without-task` | [cleanup-rejects-attempt-without-task.md](./cleanup-rejects-attempt-without-task.md) | rejects `--attempt` when no matching `--task` selector is provided |
-| `cleanup-returns-no-matching-work-when-filters-miss` | [cleanup-returns-no-matching-work-when-filters-miss.md](./cleanup-returns-no-matching-work-when-filters-miss.md) | returns the stable no-matching-work contract when cleanup filters yield no candidates |
diff --git a/docs/tests/orch/cleanup/cleanup-rejects-attempt-without-task.md b/docs/tests/orch/cleanup/cleanup-rejects-attempt-without-task.md
deleted file mode 100644
index 6b3aaeb..0000000
--- a/docs/tests/orch/cleanup/cleanup-rejects-attempt-without-task.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Case: `cleanup-rejects-attempt-without-task`
-
-## 用例意义
-
-验证 `cleanup` 在使用 `--attempt` 精确选择尝试时，要求同时提供 `--task`，避免对 run 级别尝试号产生歧义。
-
-## 前置条件
-
-- 已创建运行 `run_blog_cleanup_002`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json cleanup --run run_blog_cleanup_002 --attempt 1
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-
-## 断言结论
-
-- `cleanup` 的选择器组合在查询前就会进行基本输入校验
-- `--attempt` 不是独立的 run 级过滤器，必须依附具体 `task`
-
-## 补充约束
-
-- 若既未提供 `--task`，也未提供 `--attempt` 或 `--all-completed`，同样应返回 `invalid_input`
diff --git a/docs/tests/orch/cleanup/cleanup-removes-completed-worktree.md b/docs/tests/orch/cleanup/cleanup-removes-completed-worktree.md
deleted file mode 100644
index e2c2c94..0000000
--- a/docs/tests/orch/cleanup/cleanup-removes-completed-worktree.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Case: `cleanup-removes-completed-worktree`
-
-## 用例意义
-
-验证 `cleanup` 会移除已完成尝试的 worktree，并把清理结果返回给 leader。
-
-## 前置条件
-
-- 已创建运行 `run_blog_cleanup_001`
-- 已创建任务 `T1`
-- `T1` 已通过严格 worktree 模式完成 `dispatch`
-- `worker-a` 已完成 `claim` 并通过 `inbox done` 把线程推进到 `done`
-- 最近一次 `reconcile` 已执行，使任务状态同步为 `done`
-- 已知当前尝试的 worktree 路径为 `WORKTREE_PATH`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json cleanup --run run_blog_cleanup_001 --task T1
-```
-
-## 预期输出
-
-- 退出码为 `0`
-- `cleanup.data.cleaned` 长度为 `1`
-- 唯一记录对应 `T1` 的已完成尝试
-- `WORKTREE_PATH` 在文件系统上已不存在
-
-## 断言结论
-
-- `cleanup` 针对的是尝试工作区资源，不会改变任务的完成结果
-- 成功清理后，leader 可以安全回收已终态尝试占用的 worktree
-
-## 补充约束
-
-- `cleanup` 支持按 `--task`、`--attempt` 或 `--all-completed` 选择范围
-- `--force` 用于非常规清理；本用例验证的是常规完成态清理路径
diff --git a/docs/tests/orch/cleanup/cleanup-returns-no-matching-work-when-filters-miss.md b/docs/tests/orch/cleanup/cleanup-returns-no-matching-work-when-filters-miss.md
deleted file mode 100644
index 9142beb..0000000
--- a/docs/tests/orch/cleanup/cleanup-returns-no-matching-work-when-filters-miss.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Case: `cleanup-returns-no-matching-work-when-filters-miss`
-
-## 用例意义
-
-验证 `cleanup` 在筛选条件没有命中任何可清理 worktree 时，返回稳定的“无匹配工作”契约，而不是成功空列表。
-
-## 前置条件
-
-- 已创建运行 `run_blog_cleanup_003`
-- 已创建任务 `T1`
-- 当前 run 中不存在 `workspace_status` 为 `completed` 或 `abandoned` 的 worktree 尝试
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_cleanup_003 --goal "Validate cleanup empty result"
-orch --db TMPDIR/coord.db --json task add --run run_blog_cleanup_003 --task T1 --title "Prepare cleanup target"
-orch --db TMPDIR/coord.db --json cleanup --run run_blog_cleanup_003 --task T1
-```
-
-## 预期输出
-
-- `cleanup` 退出码为 `10`
-- JSON 错误码为 `no_matching_work`
-
-## 断言结论
-
-- `cleanup` 对空筛选结果使用显式 no-matching-work 信号，而不是返回成功空数组
-- leader 或脚本可以据此区分“没有候选 worktree”与“清理已成功完成”
-
-## 补充约束
-
-- 该契约同样适用于使用 `--all-completed` 或 `--attempt` 时筛选不到候选的场景
diff --git a/docs/tests/orch/council-report/README.md b/docs/tests/orch/council-report/README.md
deleted file mode 100644
index 0b40252..0000000
--- a/docs/tests/orch/council-report/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Orch `council report` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `council-report-defaults-to-consensus-and-majority` | [council-report-defaults-to-consensus-and-majority.md](./council-report-defaults-to-consensus-and-majority.md) | renders a markdown report that omits minority recommendations by default and writes a markdown artifact |
-| `council-report-show-all-includes-minority` | [council-report-show-all-includes-minority.md](./council-report-show-all-includes-minority.md) | includes minority recommendations when `--show all` is requested |
-| `council-report-json-shape-is-stable` | [council-report-json-shape-is-stable.md](./council-report-json-shape-is-stable.md) | returns the stable JSON report contract with summary, filtered groups, and artifact metadata |
-| `council-report-rejects-before-tally` | [council-report-rejects-before-tally.md](./council-report-rejects-before-tally.md) | rejects report generation with `invalid_state` when grouped recommendations have not been tallied yet |
-| `council-report-rejects-invalid-show` | [council-report-rejects-invalid-show.md](./council-report-rejects-invalid-show.md) | rejects unsupported `--show` bucket values with `invalid_input` |
-| `council-report-defaults-to-consensus-when-run-is-only-unanimous` | [council-report-defaults-to-consensus-when-run-is-only-unanimous.md](./council-report-defaults-to-consensus-when-run-is-only-unanimous.md) | defaults omitted `--show` to `consensus` when the run was started with `--only-unanimous` |
diff --git a/docs/tests/orch/council-report/council-report-defaults-to-consensus-and-majority.md b/docs/tests/orch/council-report/council-report-defaults-to-consensus-and-majority.md
deleted file mode 100644
index 5746d02..0000000
--- a/docs/tests/orch/council-report/council-report-defaults-to-consensus-and-majority.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Case: `council-report-defaults-to-consensus-and-majority`
-
-## 用例意义
-
-验证 `council report` 默认只展示 `consensus` 与 `majority` bucket，同时生成 markdown artifact。
-
-## 前置条件
-
-- 使用隔离的临时目录 `TMPDIR`
-- 已准备好能产出 `consensus`、`majority`、`minority` 三类 recommendation 的 reviewer 输出 JSON
-- 本地可使用 `sqlite3` 从 `task_attempts` 中读取 reviewer thread ID
-
-## 输入
-
-```bash
-cat <<'EOF' > TMPDIR/architecture-review.json
-{"reviewer_role":"architecture-reviewer","findings":[{"title":"Split contracts","summary":"Transport contracts are mixed into UI code.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This lowers coupling.","confidence":"high","tags":["architecture"],"target_refs":{"repo_path":"."}},{"title":"Share helpers","summary":"Council report rendering paths are repeated.","proposal":"Introduce shared council coordinator helpers for report rendering.","rationale":"This keeps report assembly consistent.","confidence":"medium","tags":["reporting"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-cat <<'EOF' > TMPDIR/implementation-review.json
-{"reviewer_role":"implementation-reviewer","findings":[{"title":"Extract contracts","summary":"Shared transport shapes are duplicated.","proposal":"Move API contract definitions into dedicated module","rationale":"This reduces duplication.","confidence":"high","tags":["maintainability"],"target_refs":{"repo_path":"."}},{"title":"Reuse report helpers","summary":"Formatting logic should stay shared.","proposal":"Introduce shared council coordinator helpers for report rendering","rationale":"This avoids formatter drift.","confidence":"medium","tags":["reporting"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-cat <<'EOF' > TMPDIR/risk-review.json
-{"reviewer_role":"risk-reviewer","findings":[{"title":"Lock contracts","summary":"Contract drift becomes risky over time.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This reduces integration regressions.","confidence":"high","tags":["risk"],"target_refs":{"repo_path":"."}},{"title":"Cover JSON output","summary":"The council report response should stay stable.","proposal":"Add regression tests for council report JSON output.","rationale":"This catches contract regressions earlier.","confidence":"high","tags":["testing"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-orch --db TMPDIR/coord.db --json council start \
-  --run council_blog_report_001 \
-  --target "Review the council reporting flow."
-
-THREAD_ID_CR1=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_report_001' AND task_id = 'CR1' AND attempt_no = 1;")
-THREAD_ID_CR2=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_report_001' AND task_id = 'CR2' AND attempt_no = 1;")
-THREAD_ID_CR3=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_report_001' AND task_id = 'CR3' AND attempt_no = 1;")
-
-inbox --db TMPDIR/coord.db --json claim --agent architecture-reviewer --thread "$THREAD_ID_CR1"
-inbox --db TMPDIR/coord.db --json done --agent architecture-reviewer --thread "$THREAD_ID_CR1" --summary "Review complete" --body-file TMPDIR/architecture-review.json
-
-inbox --db TMPDIR/coord.db --json claim --agent implementation-reviewer --thread "$THREAD_ID_CR2"
-inbox --db TMPDIR/coord.db --json done --agent implementation-reviewer --thread "$THREAD_ID_CR2" --summary "Review complete" --body-file TMPDIR/implementation-review.json
-
-inbox --db TMPDIR/coord.db --json claim --agent risk-reviewer --thread "$THREAD_ID_CR3"
-inbox --db TMPDIR/coord.db --json done --agent risk-reviewer --thread "$THREAD_ID_CR3" --summary "Review complete" --body-file TMPDIR/risk-review.json
-
-orch --db TMPDIR/coord.db --json council tally \
-  --run council_blog_report_001 \
-  --similarity normal
-
-orch --db TMPDIR/coord.db council report \
-  --run council_blog_report_001
-```
-
-## 预期输出
-
-- `council report` 退出码为 `0`
-- stdout 是 markdown，而不是 JSON
-- 报告正文包含 `# Council Review Report`
-- 报告正文包含 `## Consensus`
-- 报告正文包含 `## Majority`
-- 报告正文不包含 `## Minority`
-- `TMPDIR/.orch/reports/council_blog_report_001.md` 被创建，且内容与 stdout 一致
-
-## 断言结论
-
-- `council report` 的默认呈现策略是“主报告展示 consensus + majority，隐藏 minority”
-- 该命令既是渲染命令，也是 artifact 产出命令
-
-## 补充约束
-
-- 默认 bucket 行为受 `council run` 的 `only_unanimous` 配置影响；当前常规路径默认仍是 `consensus,majority`
diff --git a/docs/tests/orch/council-report/council-report-defaults-to-consensus-when-run-is-only-unanimous.md b/docs/tests/orch/council-report/council-report-defaults-to-consensus-when-run-is-only-unanimous.md
deleted file mode 100644
index 698d4c4..0000000
--- a/docs/tests/orch/council-report/council-report-defaults-to-consensus-when-run-is-only-unanimous.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Case: `council-report-defaults-to-consensus-when-run-is-only-unanimous`
-
-## 用例意义
-
-验证当 council run 以 `--only-unanimous` 启动时，省略 `--show` 的 `council report --json` 默认只返回 `consensus` bucket。
-
-## 前置条件
-
-- 使用隔离的临时目录 `TMPDIR`
-- 已准备好与 `council-report-defaults-to-consensus-and-majority` 相同的 3 份 reviewer 输出 JSON
-- 本地可使用 `sqlite3` 从 `task_attempts` 中读取 reviewer thread ID
-
-## 输入
-
-```bash
-cat <<'EOF' > TMPDIR/architecture-review.json
-{"reviewer_role":"architecture-reviewer","findings":[{"title":"Split contracts","summary":"Transport contracts are mixed into UI code.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This lowers coupling.","confidence":"high","tags":["architecture"],"target_refs":{"repo_path":"."}},{"title":"Share helpers","summary":"Council report rendering paths are repeated.","proposal":"Introduce shared council coordinator helpers for report rendering.","rationale":"This keeps report assembly consistent.","confidence":"medium","tags":["reporting"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-cat <<'EOF' > TMPDIR/implementation-review.json
-{"reviewer_role":"implementation-reviewer","findings":[{"title":"Extract contracts","summary":"Shared transport shapes are duplicated.","proposal":"Move API contract definitions into dedicated module","rationale":"This reduces duplication.","confidence":"high","tags":["maintainability"],"target_refs":{"repo_path":"."}},{"title":"Reuse report helpers","summary":"Formatting logic should stay shared.","proposal":"Introduce shared council coordinator helpers for report rendering","rationale":"This avoids formatter drift.","confidence":"medium","tags":["reporting"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-cat <<'EOF' > TMPDIR/risk-review.json
-{"reviewer_role":"risk-reviewer","findings":[{"title":"Lock contracts","summary":"Contract drift becomes risky over time.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This reduces integration regressions.","confidence":"high","tags":["risk"],"target_refs":{"repo_path":"."}},{"title":"Cover JSON output","summary":"The council report response should stay stable.","proposal":"Add regression tests for council report JSON output.","rationale":"This catches contract regressions earlier.","confidence":"high","tags":["testing"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-orch --db TMPDIR/coord.db --json council start \
-  --run council_blog_report_011 \
-  --target "Review the council reporting flow." \
-  --only-unanimous
-
-THREAD_ID_CR1=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_report_011' AND task_id = 'CR1' AND attempt_no = 1;")
-THREAD_ID_CR2=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_report_011' AND task_id = 'CR2' AND attempt_no = 1;")
-THREAD_ID_CR3=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_report_011' AND task_id = 'CR3' AND attempt_no = 1;")
-
-inbox --db TMPDIR/coord.db --json claim --agent architecture-reviewer --thread "$THREAD_ID_CR1"
-inbox --db TMPDIR/coord.db --json done --agent architecture-reviewer --thread "$THREAD_ID_CR1" --summary "Review complete" --body-file TMPDIR/architecture-review.json
-
-inbox --db TMPDIR/coord.db --json claim --agent implementation-reviewer --thread "$THREAD_ID_CR2"
-inbox --db TMPDIR/coord.db --json done --agent implementation-reviewer --thread "$THREAD_ID_CR2" --summary "Review complete" --body-file TMPDIR/implementation-review.json
-
-inbox --db TMPDIR/coord.db --json claim --agent risk-reviewer --thread "$THREAD_ID_CR3"
-inbox --db TMPDIR/coord.db --json done --agent risk-reviewer --thread "$THREAD_ID_CR3" --summary "Review complete" --body-file TMPDIR/risk-review.json
-
-orch --db TMPDIR/coord.db --json council tally \
-  --run council_blog_report_011 \
-  --similarity normal
-
-orch --db TMPDIR/coord.db --json council report \
-  --run council_blog_report_011
-```
-
-## 预期输出
-
-- 最后一条 `council report` 命令退出码为 `0`
-- `ok == true`
-- `data.run_id == "council_blog_report_011"`
-- `data.show == ["consensus"]`
-- `data.summary.consensus == 1`
-- `data.summary.majority == 1`
-- `data.summary.minority == 1`
-- `data.grouped_recommendations` 长度为 `1`
-- 唯一返回的 recommendation 的 `bucket == "consensus"`
-
-## 断言结论
-
-- `--only-unanimous` 不会删除持久化的 `majority` 或 `minority` 数据，但会改变省略 `--show` 时的默认输出策略
-- leader 若希望在 unanimous-only run 中仍查看 `majority`，必须显式传入 `--show`
-
-## 补充约束
-
-- 即使这里使用 `--json` 断言 `show` 默认值，命令仍会写出 markdown artifact
diff --git a/docs/tests/orch/council-report/council-report-json-shape-is-stable.md b/docs/tests/orch/council-report/council-report-json-shape-is-stable.md
deleted file mode 100644
index 699c80a..0000000
--- a/docs/tests/orch/council-report/council-report-json-shape-is-stable.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Case: `council-report-json-shape-is-stable`
-
-## 用例意义
-
-验证 `council report --json` 返回稳定 JSON 契约，包含 `show`、`summary`、过滤后的 grouped recommendations，以及 report artifact 元数据。
-
-## 前置条件
-
-- 已按 `council-report-defaults-to-consensus-and-majority` 的前置流程完成 reviewer 输出与 `council tally`
-- 运行 ID 为 `council_blog_report_003`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json council report \
-  --run council_blog_report_003
-```
-
-## 预期输出
-
-- 退出码为 `0`
-- `ok == true`
-- `command == "council report"`
-- `data.run_id == "council_blog_report_003"`
-- `data.show == ["consensus","majority"]`
-- `data.summary.consensus == 1`
-- `data.summary.majority == 1`
-- `data.summary.minority == 1`
-- `data.report_artifacts` 长度为 `1`
-- 首个 artifact 的 `kind == "markdown"`
-- `data.grouped_recommendations` 长度为 `2`
-- 第一组 recommendation 的 `bucket == "consensus"`
-
-## 断言结论
-
-- `--json` 模式返回的是 leader 可继续消费的稳定 machine-readable contract
-- 默认 JSON 输出只返回被当前 `show` 过滤后的 recommendation，而 summary 仍保留全量 bucket 统计
-
-## 补充约束
-
-- 即使 `--json` 模式返回 artifact path，markdown artifact 仍应实际落盘
diff --git a/docs/tests/orch/council-report/council-report-rejects-before-tally.md b/docs/tests/orch/council-report/council-report-rejects-before-tally.md
deleted file mode 100644
index 5f3d881..0000000
--- a/docs/tests/orch/council-report/council-report-rejects-before-tally.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Case: `council-report-rejects-before-tally`
-
-## 用例意义
-
-验证 `council report` 在还没有持久化 grouped recommendations 时会返回稳定的 `invalid_state` 契约，而不是生成空报告。
-
-## 前置条件
-
-- 使用隔离的临时目录 `TMPDIR`
-- 当前数据库中尚未对该 council run 执行 `council tally`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json council start \
-  --run council_blog_report_010 \
-  --target "Review the council reporting flow."
-
-orch --db TMPDIR/coord.db --json council report \
-  --run council_blog_report_010
-```
-
-## 预期输出
-
-- 第二条 `council report` 命令退出码为 `30`
-- JSON 错误码为 `invalid_state`
-- 错误消息指出 grouped recommendations 尚不可用，需先执行 `council tally`
-
-## 断言结论
-
-- `council report` 不是“边读 reviewer 输出边临时汇总”的命令
-- report 阶段依赖已持久化的 `council_groups`，因此 `tally -> report` 的顺序是稳定 CLI 契约
diff --git a/docs/tests/orch/council-report/council-report-rejects-invalid-show.md b/docs/tests/orch/council-report/council-report-rejects-invalid-show.md
deleted file mode 100644
index f2f084e..0000000
--- a/docs/tests/orch/council-report/council-report-rejects-invalid-show.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Case: `council-report-rejects-invalid-show`
-
-## 用例意义
-
-验证 `council report --show` 对非法 bucket 值返回稳定的 `invalid_input`，避免 leader 误以为未知 bucket 会被静默忽略。
-
-## 前置条件
-
-- 已按 `council-report-defaults-to-consensus-and-majority` 的前置流程完成 reviewer 输出与 `council tally`
-- 运行 ID 为 `council_blog_report_001`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json council report \
-  --run council_blog_report_001 \
-  --show consensus,invalid
-```
-
-## 预期输出
-
-- 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-- 错误消息说明 `--show` 只接受 `consensus`、`majority`、`minority` 或 `all`
-
-## 断言结论
-
-- `--show` 不是宽松过滤参数；未知 bucket 会触发显式输入错误
-- leader 侧脚本可以依赖这一点来尽早发现错误配置，而不是事后对空报告排障
diff --git a/docs/tests/orch/council-report/council-report-show-all-includes-minority.md b/docs/tests/orch/council-report/council-report-show-all-includes-minority.md
deleted file mode 100644
index 334e3de..0000000
--- a/docs/tests/orch/council-report/council-report-show-all-includes-minority.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Case: `council-report-show-all-includes-minority`
-
-## 用例意义
-
-验证 `council report --show all` 会把默认被省略的 `minority` recommendation 一并展示出来。
-
-## 前置条件
-
-- 已按 `council-report-defaults-to-consensus-and-majority` 的前置流程完成 reviewer 输出与 `council tally`
-- 运行 ID 为 `council_blog_report_002`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db council report \
-  --run council_blog_report_002 \
-  --show all
-```
-
-## 预期输出
-
-- `council report` 退出码为 `0`
-- stdout markdown 同时包含 `## Consensus`、`## Majority`、`## Minority`
-- markdown 中出现 minority proposal，例如 `Add regression tests for council report JSON output.`
-
-## 断言结论
-
-- `--show all` 会覆盖默认的 bucket 过滤策略
-- `minority` recommendation 会保留在持久化数据里，只是默认不进入主报告
diff --git a/docs/tests/orch/council-start/README.md b/docs/tests/orch/council-start/README.md
deleted file mode 100644
index a960dc3..0000000
--- a/docs/tests/orch/council-start/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Orch `council start` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `council-start-dispatches-three-reviewers` | [council-start-dispatches-three-reviewers.md](./council-start-dispatches-three-reviewers.md) | creates the council run, dispatches the fixed three reviewer roles, and exposes the expected default metadata |
diff --git a/docs/tests/orch/council-start/council-start-dispatches-three-reviewers.md b/docs/tests/orch/council-start/council-start-dispatches-three-reviewers.md
deleted file mode 100644
index 618afa4..0000000
--- a/docs/tests/orch/council-start/council-start-dispatches-three-reviewers.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# Case: `council-start-dispatches-three-reviewers`
-
-## 用例意义
-
-验证 `council start` 会创建一个新的 council run，并立即分派固定的三位 reviewer：`architecture-reviewer`、`implementation-reviewer`、`risk-reviewer`。
-
-## 前置条件
-
-- 使用隔离的临时目录 `TMPDIR`
-- 目标数据库 `TMPDIR/coord.db` 尚不存在
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json council start \
-  --run council_blog_001 \
-  --target "Review the current blog architecture and propose optimizations." \
-  --target-type mixed \
-  --output both
-
-orch --db TMPDIR/coord.db --json status --run council_blog_001
-```
-
-## 预期输出
-
-- `council start` 退出码为 `0`
-- `start.data.run_id == "council_blog_001"`
-- `start.data.mode == "brainstorm"`
-- `start.data.target_type == "mixed"`
-- `start.data.output == "both"`
-- `start.data.only_unanimous == false`
-- `start.data.reviewers` 长度为 `3`
-- 三个 reviewer 的 `reviewer_role` 分别为 `architecture-reviewer`、`implementation-reviewer`、`risk-reviewer`
-- 三个 reviewer 的 `status` 都是 `dispatched`
-- 后续 `status` 返回 `3` 个 task，run 处于活动中而不是终态
-
-## 断言结论
-
-- `council start` 不只是创建 run 元数据，还会直接完成 reviewer task 的创建与分派
-- v1 reviewer 集合是固定的三角色集合，而不是由用户动态指定
-
-## 补充约束
-
-- 未显式传入 `--mode` 时，默认回退到 `brainstorm`
-- 未显式传入 `--only-unanimous` 时，默认值是 `false`
-- council reviewer task 在当前实现里不应自动申请 code worktree
diff --git a/docs/tests/orch/council-tally/README.md b/docs/tests/orch/council-tally/README.md
deleted file mode 100644
index b104715..0000000
--- a/docs/tests/orch/council-tally/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Orch `council tally` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `council-tally-groups-reviewer-findings-in-normal-mode` | [council-tally-groups-reviewer-findings-in-normal-mode.md](./council-tally-groups-reviewer-findings-in-normal-mode.md) | groups semantically similar reviewer outputs into majority and minority buckets in `normal` mode |
-| `council-tally-keeps-distinct-proposals-in-strict-mode` | [council-tally-keeps-distinct-proposals-in-strict-mode.md](./council-tally-keeps-distinct-proposals-in-strict-mode.md) | preserves wording differences as separate minority groups in `strict` mode |
diff --git a/docs/tests/orch/council-tally/council-tally-groups-reviewer-findings-in-normal-mode.md b/docs/tests/orch/council-tally/council-tally-groups-reviewer-findings-in-normal-mode.md
deleted file mode 100644
index 200bf2d..0000000
--- a/docs/tests/orch/council-tally/council-tally-groups-reviewer-findings-in-normal-mode.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# Case: `council-tally-groups-reviewer-findings-in-normal-mode`
-
-## 用例意义
-
-验证 `council tally --similarity normal` 会把语义相近的 reviewer proposal 合并到同一组，并产出 `majority` / `minority` bucket。
-
-## 前置条件
-
-- 使用隔离的临时目录 `TMPDIR`
-- 本地可使用 `sqlite3` 从 `task_attempts` 中读取 reviewer thread ID
-- 已准备好三份 reviewer 输出 JSON；其中 architecture 与 implementation proposal 语义相近，risk proposal 独立
-
-## 输入
-
-```bash
-cat <<'EOF' > TMPDIR/architecture-review.json
-{"reviewer_role":"architecture-reviewer","findings":[{"title":"Split contracts","summary":"Transport contracts are mixed into UI code.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This lowers coupling.","confidence":"high","tags":["architecture","coupling"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-cat <<'EOF' > TMPDIR/implementation-review.json
-{"reviewer_role":"implementation-reviewer","findings":[{"title":"Extract API contracts","summary":"Shared transport shapes are duplicated.","proposal":"Move API contract definitions into dedicated module","rationale":"This reduces duplication.","confidence":"medium","tags":["maintainability"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-cat <<'EOF' > TMPDIR/risk-review.json
-{"reviewer_role":"risk-reviewer","findings":[{"title":"Add auth integration tests","summary":"Login regressions are hard to catch.","proposal":"Add integration tests for auth flows.","rationale":"This catches regressions earlier.","confidence":"high","tags":["risk","testing"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-orch --db TMPDIR/coord.db --json council start \
-  --run council_blog_tally_001 \
-  --target "Review the current blog architecture."
-
-THREAD_ID_CR1=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_tally_001' AND task_id = 'CR1' AND attempt_no = 1;")
-THREAD_ID_CR2=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_tally_001' AND task_id = 'CR2' AND attempt_no = 1;")
-THREAD_ID_CR3=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_tally_001' AND task_id = 'CR3' AND attempt_no = 1;")
-
-inbox --db TMPDIR/coord.db --json claim --agent architecture-reviewer --thread "$THREAD_ID_CR1"
-inbox --db TMPDIR/coord.db --json done --agent architecture-reviewer --thread "$THREAD_ID_CR1" --summary "Review complete" --body-file TMPDIR/architecture-review.json
-
-inbox --db TMPDIR/coord.db --json claim --agent implementation-reviewer --thread "$THREAD_ID_CR2"
-inbox --db TMPDIR/coord.db --json done --agent implementation-reviewer --thread "$THREAD_ID_CR2" --summary "Review complete" --body-file TMPDIR/implementation-review.json
-
-inbox --db TMPDIR/coord.db --json claim --agent risk-reviewer --thread "$THREAD_ID_CR3"
-inbox --db TMPDIR/coord.db --json done --agent risk-reviewer --thread "$THREAD_ID_CR3" --summary "Review complete" --body-file TMPDIR/risk-review.json
-
-orch --db TMPDIR/coord.db --json council tally \
-  --run council_blog_tally_001 \
-  --similarity normal
-```
-
-## 预期输出
-
-- `council tally` 退出码为 `0`
-- `tally.data.similarity == "normal"`
-- `tally.data.counts.majority == 1`
-- `tally.data.counts.minority == 1`
-- `tally.data.grouped_recommendations` 长度为 `2`
-- 第一组 recommendation 的 `bucket == "majority"`
-- 第一组 recommendation 的 `support_count == 2`
-
-## 断言结论
-
-- `normal` 模式会优先按归一化意图合并 proposal，而不是逐字面比较
-- tally 输出不仅返回统计摘要，还返回分组后的 recommendation 明细
-
-## 补充约束
-
-- reviewer `done` 消息体必须是结构化 JSON；无效 JSON 或缺失 `reviewer_role`/`proposal` 会让 tally 返回 `invalid_input`
diff --git a/docs/tests/orch/council-tally/council-tally-keeps-distinct-proposals-in-strict-mode.md b/docs/tests/orch/council-tally/council-tally-keeps-distinct-proposals-in-strict-mode.md
deleted file mode 100644
index aa63933..0000000
--- a/docs/tests/orch/council-tally/council-tally-keeps-distinct-proposals-in-strict-mode.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# Case: `council-tally-keeps-distinct-proposals-in-strict-mode`
-
-## 用例意义
-
-验证 `council tally --similarity strict` 不会合并 wording 不同的 proposal，即使它们语义接近，也会保留为独立 recommendation。
-
-## 前置条件
-
-- 使用隔离的临时目录 `TMPDIR`
-- 本地可使用 `sqlite3` 从 `task_attempts` 中读取 reviewer thread ID
-- 已准备好三份 reviewer 输出 JSON；其中 architecture 与 implementation proposal 语义相近但措辞不同
-
-## 输入
-
-```bash
-cat <<'EOF' > TMPDIR/architecture-review.json
-{"reviewer_role":"architecture-reviewer","findings":[{"title":"Split contracts","summary":"Transport contracts are mixed into UI code.","proposal":"Move API contract definitions into a dedicated module.","rationale":"This lowers coupling.","confidence":"high","tags":["architecture"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-cat <<'EOF' > TMPDIR/implementation-review.json
-{"reviewer_role":"implementation-reviewer","findings":[{"title":"Extract API contracts","summary":"Shared transport shapes are duplicated.","proposal":"Move API contract definitions into dedicated module","rationale":"This reduces duplication.","confidence":"medium","tags":["maintainability"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-cat <<'EOF' > TMPDIR/risk-review.json
-{"reviewer_role":"risk-reviewer","findings":[{"title":"Add auth integration tests","summary":"Login regressions are hard to catch.","proposal":"Add integration tests for auth flows.","rationale":"This catches regressions earlier.","confidence":"high","tags":["risk"],"target_refs":{"repo_path":"."}}]}
-EOF
-
-orch --db TMPDIR/coord.db --json council start \
-  --run council_blog_tally_002 \
-  --target "Review the current blog architecture."
-
-THREAD_ID_CR1=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_tally_002' AND task_id = 'CR1' AND attempt_no = 1;")
-THREAD_ID_CR2=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_tally_002' AND task_id = 'CR2' AND attempt_no = 1;")
-THREAD_ID_CR3=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_tally_002' AND task_id = 'CR3' AND attempt_no = 1;")
-
-inbox --db TMPDIR/coord.db --json claim --agent architecture-reviewer --thread "$THREAD_ID_CR1"
-inbox --db TMPDIR/coord.db --json done --agent architecture-reviewer --thread "$THREAD_ID_CR1" --summary "Review complete" --body-file TMPDIR/architecture-review.json
-
-inbox --db TMPDIR/coord.db --json claim --agent implementation-reviewer --thread "$THREAD_ID_CR2"
-inbox --db TMPDIR/coord.db --json done --agent implementation-reviewer --thread "$THREAD_ID_CR2" --summary "Review complete" --body-file TMPDIR/implementation-review.json
-
-inbox --db TMPDIR/coord.db --json claim --agent risk-reviewer --thread "$THREAD_ID_CR3"
-inbox --db TMPDIR/coord.db --json done --agent risk-reviewer --thread "$THREAD_ID_CR3" --summary "Review complete" --body-file TMPDIR/risk-review.json
-
-orch --db TMPDIR/coord.db --json council tally \
-  --run council_blog_tally_002 \
-  --similarity strict
-```
-
-## 预期输出
-
-- `council tally` 退出码为 `0`
-- `tally.data.similarity == "strict"`
-- `tally.data.counts.minority == 3`
-- `tally.data.grouped_recommendations` 长度为 `3`
-- 三组 recommendation 都应落入 `minority`
-
-## 断言结论
-
-- `strict` 模式的目标是保留 proposal 的字面差异，而不是宽松合并
-- 当没有 proposal 被合并时，support count 会退化成单 reviewer 支持
diff --git a/docs/tests/orch/council-wait/README.md b/docs/tests/orch/council-wait/README.md
deleted file mode 100644
index 11c8037..0000000
--- a/docs/tests/orch/council-wait/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Orch `council wait` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `council-wait-wakes-when-all-reviewers-complete` | [council-wait-wakes-when-all-reviewers-complete.md](./council-wait-wakes-when-all-reviewers-complete.md) | wakes successfully once all three reviewer threads reach terminal success |
-| `council-wait-times-out-when-reviewers-incomplete` | [council-wait-times-out-when-reviewers-incomplete.md](./council-wait-times-out-when-reviewers-incomplete.md) | returns a stable timeout result while reviewer work remains incomplete |
diff --git a/docs/tests/orch/council-wait/council-wait-times-out-when-reviewers-incomplete.md b/docs/tests/orch/council-wait/council-wait-times-out-when-reviewers-incomplete.md
deleted file mode 100644
index 91489d6..0000000
--- a/docs/tests/orch/council-wait/council-wait-times-out-when-reviewers-incomplete.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Case: `council-wait-times-out-when-reviewers-incomplete`
-
-## 用例意义
-
-验证 `council wait` 在 reviewer 尚未全部完成时返回稳定的超时结果，而不是误判为成功唤醒。
-
-## 前置条件
-
-- 使用隔离的临时目录 `TMPDIR`
-- 目标数据库 `TMPDIR/coord.db` 尚不存在
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json council start \
-  --run council_blog_wait_002 \
-  --target "Review the current blog architecture."
-
-orch --db TMPDIR/coord.db --json council wait \
-  --run council_blog_wait_002 \
-  --timeout-seconds 1
-```
-
-## 预期输出
-
-- `council wait` 退出码为 `0`
-- `wait.data.woke == false`
-- `wait.data.all_complete == false`
-- `wait.data.reviewers` 长度为 `3`
-- 返回 reviewer 状态集合时，不要求每个 reviewer 已完成
-
-## 断言结论
-
-- `council wait` 的超时结果是显式的“未唤醒”状态，而不是错误退出
-- leader 可以基于同一个返回结构同时处理唤醒与超时两种路径
diff --git a/docs/tests/orch/council-wait/council-wait-wakes-when-all-reviewers-complete.md b/docs/tests/orch/council-wait/council-wait-wakes-when-all-reviewers-complete.md
deleted file mode 100644
index 0b3ac9c..0000000
--- a/docs/tests/orch/council-wait/council-wait-wakes-when-all-reviewers-complete.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# Case: `council-wait-wakes-when-all-reviewers-complete`
-
-## 用例意义
-
-验证 `council wait` 在三位 reviewer 都完成后会被唤醒，并返回完整 reviewer 状态集合。
-
-## 前置条件
-
-- 使用隔离的临时目录 `TMPDIR`
-- 已通过 `council start` 创建 run `council_blog_wait_001`
-- 本地可使用 `sqlite3` 从 `task_attempts` 中读取 reviewer thread ID，用于构造 `inbox` 完成态
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json council start \
-  --run council_blog_wait_001 \
-  --target "Review the current blog architecture."
-
-THREAD_ID_CR1=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_wait_001' AND task_id = 'CR1' AND attempt_no = 1;")
-THREAD_ID_CR2=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_wait_001' AND task_id = 'CR2' AND attempt_no = 1;")
-THREAD_ID_CR3=$(sqlite3 TMPDIR/coord.db "SELECT thread_id FROM task_attempts WHERE run_id = 'council_blog_wait_001' AND task_id = 'CR3' AND attempt_no = 1;")
-
-inbox --db TMPDIR/coord.db --json claim --agent architecture-reviewer --thread "$THREAD_ID_CR1"
-inbox --db TMPDIR/coord.db --json done --agent architecture-reviewer --thread "$THREAD_ID_CR1" --summary "Review complete"
-
-inbox --db TMPDIR/coord.db --json claim --agent implementation-reviewer --thread "$THREAD_ID_CR2"
-inbox --db TMPDIR/coord.db --json done --agent implementation-reviewer --thread "$THREAD_ID_CR2" --summary "Review complete"
-
-inbox --db TMPDIR/coord.db --json claim --agent risk-reviewer --thread "$THREAD_ID_CR3"
-inbox --db TMPDIR/coord.db --json done --agent risk-reviewer --thread "$THREAD_ID_CR3" --summary "Review complete"
-
-orch --db TMPDIR/coord.db --json council wait \
-  --run council_blog_wait_001 \
-  --timeout-seconds 2
-```
-
-## 预期输出
-
-- `council wait` 退出码为 `0`
-- `wait.data.woke == true`
-- `wait.data.all_complete == true`
-- `wait.data.reviewers` 长度为 `3`
-- 三个 reviewer 的 `status` 都是 `done`
-
-## 断言结论
-
-- `council wait` 的唤醒条件是“三位 reviewer 全部达到终态成功”
-- 返回结果不仅告知已唤醒，还会携带完整 reviewer 状态快照，便于 leader 继续执行 tally/report
-
-## 补充约束
-
-- 当前手工复现实例需要通过 `task_attempts` 提取 reviewer `thread_id`，因为 `orch` CLI 还不直接暴露 attempt-thread mapping
diff --git a/docs/tests/orch/dep-add/README.md b/docs/tests/orch/dep-add/README.md
deleted file mode 100644
index a690d56..0000000
--- a/docs/tests/orch/dep-add/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Orch `dep add` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `dep-add-blocks-dependent-task-until-prerequisite-completes` | [dep-add-blocks-dependent-task-until-prerequisite-completes.md](./dep-add-blocks-dependent-task-until-prerequisite-completes.md) | adds a dependency edge that keeps the dependent task out of the ready set |
diff --git a/docs/tests/orch/dep-add/dep-add-blocks-dependent-task-until-prerequisite-completes.md b/docs/tests/orch/dep-add/dep-add-blocks-dependent-task-until-prerequisite-completes.md
deleted file mode 100644
index 0e7d94b..0000000
--- a/docs/tests/orch/dep-add/dep-add-blocks-dependent-task-until-prerequisite-completes.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Case: `dep-add-blocks-dependent-task-until-prerequisite-completes`
-
-## 用例意义
-
-验证 `dep add` 会建立依赖边，并让被依赖任务在前置任务完成前保持不可调度。
-
-## 前置条件
-
-- 已存在 run `run_blog_002`
-- run 下已存在任务 `T1` 与 `T2`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_002 --goal "Build dependency-aware workflow"
-orch --db TMPDIR/coord.db --json task add --run run_blog_002 --task T1 --title "Build backend" --default-to worker-a
-orch --db TMPDIR/coord.db --json task add --run run_blog_002 --task T2 --title "Build frontend" --default-to worker-b
-orch --db TMPDIR/coord.db --json dep add --run run_blog_002 --task T2 --depends-on T1
-orch --db TMPDIR/coord.db --json ready --run run_blog_002
-```
-
-## 预期输出
-
-- `dep add` 退出码为 `0`
-- `data.dependency.task_id == "T2"`
-- `data.dependency.depends_on_task_id == "T1"`
-- 后续 `ready` 只返回 `T1`
-- `T2` 不出现在 `ready.data.tasks` 中
-
-## 断言结论
-
-- `dep add` 会立刻影响 ready 计算结果
-- 依赖关系属于调度门控，而不是仅供展示的元数据
-
-## 补充约束
-
-- `--task` 不能依赖自己；自依赖应返回 `invalid_input`
-- 重复添加同一条依赖边应返回 `invalid_state`
diff --git a/docs/tests/orch/dispatch/README.md b/docs/tests/orch/dispatch/README.md
deleted file mode 100644
index 5097378..0000000
--- a/docs/tests/orch/dispatch/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Orch `dispatch` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `dispatch-creates-attempt-and-thread-for-ready-task` | [dispatch-creates-attempt-and-thread-for-ready-task.md](./dispatch-creates-attempt-and-thread-for-ready-task.md) | dispatches a ready task into a new attempt, inbox thread, and initial task message |
-| `dispatch-rejects-non-ready-task` | [dispatch-rejects-non-ready-task.md](./dispatch-rejects-non-ready-task.md) | rejects dispatch when the task is still gated by dependencies |
-| `dispatch-creates-strict-worktree` | [dispatch-creates-strict-worktree.md](./dispatch-creates-strict-worktree.md) | provisions a code-mode worktree and writes workspace metadata into the attempt and payload |
-| `dispatch-rejects-dirty-repo-without-base-ref` | [dispatch-rejects-dirty-repo-without-base-ref.md](./dispatch-rejects-dirty-repo-without-base-ref.md) | blocks code-mode worktree dispatch from a dirty repository without an explicit base ref |
-| `dispatch-allows-explicit-base-ref-on-dirty-repo` | [dispatch-allows-explicit-base-ref-on-dirty-repo.md](./dispatch-allows-explicit-base-ref-on-dirty-repo.md) | accepts dirty repository state when `--base-ref` resolves to a concrete commit |
-| `dispatch-requires-explicit-execution-mode` | [dispatch-requires-explicit-execution-mode.md](./dispatch-requires-explicit-execution-mode.md) | rejects dispatch when the caller does not declare `--execution-mode analysis|code` |
-| `dispatch-analysis-mode-skips-worktree` | [dispatch-analysis-mode-skips-worktree.md](./dispatch-analysis-mode-skips-worktree.md) | keeps analysis-mode tasks on the normal non-worktree dispatch path |
diff --git a/docs/tests/orch/dispatch/dispatch-allows-explicit-base-ref-on-dirty-repo.md b/docs/tests/orch/dispatch/dispatch-allows-explicit-base-ref-on-dirty-repo.md
deleted file mode 100644
index c6ca5f8..0000000
--- a/docs/tests/orch/dispatch/dispatch-allows-explicit-base-ref-on-dirty-repo.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Case: `dispatch-allows-explicit-base-ref-on-dirty-repo`
-
-## 用例意义
-
-验证 strict worktree dispatch 在仓库已变脏时，只要显式给出可解析的 `--base-ref`，仍可继续创建 attempt。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个 Git 仓库
-- 仓库工作区存在未提交变更
-- `HEAD` 仍指向合法 commit
-- 已存在 run `run_blog_worktree_003` 与任务 `T1`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_worktree_003 --goal "Validate explicit base ref on dirty repo"
-orch --db TMPDIR/coord.db --json task add --run run_blog_worktree_003 --task T1 --title "Implement backend" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_worktree_003 --task T1 --execution-mode code --repo-path TMPDIR/repo --workspace-root .orch/worktrees --base-ref HEAD
-```
-
-## 预期输出
-
-- `dispatch` 退出码为 `0`
-- `data.attempt.base_ref == "HEAD"`
-- `data.attempt.base_commit` 等于 dirty 之前当前可解析的 `HEAD` commit
-
-## 断言结论
-
-- `--base-ref` 是 dirty repo strict dispatch 的显式解锁条件
-- worktree 基线来自 commit，而不是当前未提交工作区内容
diff --git a/docs/tests/orch/dispatch/dispatch-analysis-mode-skips-worktree.md b/docs/tests/orch/dispatch/dispatch-analysis-mode-skips-worktree.md
deleted file mode 100644
index 0960e8e..0000000
--- a/docs/tests/orch/dispatch/dispatch-analysis-mode-skips-worktree.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Case: `dispatch-analysis-mode-skips-worktree`
-
-## 用例意义
-
-验证 `dispatch --execution-mode analysis` 会保持 thread-only，不会创建 worktree。
-
-## 前置条件
-
-- 已存在 run `run_blog_auto_worktree_002`
-- 已存在任务 `T1`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_auto_worktree_002 --goal "Validate analysis-mode dispatch fallback"
-orch --db TMPDIR/coord.db --json task add --run run_blog_auto_worktree_002 --task T1 --title "Review QA findings" --summary "Summarize test failures and next steps" --default-to qa-worker
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_auto_worktree_002 --task T1 --execution-mode analysis
-```
-
-## 预期输出
-
-- `dispatch` 退出码为 `0`
-- `data.attempt.worktree_path == ""`
-- `data.attempt.workspace_status == ""`
-- `data.message.payload_json.execution_mode == "analysis"`
-- 仍会正常返回 `thread_id` 与首条任务消息
-
-## 断言结论
-
-- analysis mode 始终走标准 dispatch 路径，不会平白引入分支和工作目录
-- worker brief 和桥接层可以从 payload 中读取显式的 `execution_mode`
diff --git a/docs/tests/orch/dispatch/dispatch-creates-attempt-and-thread-for-ready-task.md b/docs/tests/orch/dispatch/dispatch-creates-attempt-and-thread-for-ready-task.md
deleted file mode 100644
index c7d3ff9..0000000
--- a/docs/tests/orch/dispatch/dispatch-creates-attempt-and-thread-for-ready-task.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# Case: `dispatch-creates-attempt-and-thread-for-ready-task`
-
-## 用例意义
-
-验证 `dispatch` 在任务已 `ready` 时，会创建 attempt、映射 inbox thread，并写入首条任务消息。
-
-## 前置条件
-
-- 已存在 run `run_blog_001`
-- 已存在无依赖任务 `T1`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_001 --goal "Build blog MVP" --summary "Public blog plus admin CRUD"
-orch --db TMPDIR/coord.db --json task add --run run_blog_001 --task T1 --title "Implement retry policy" --summary "Add retry policy to HTTP client" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_001 --task T1 --execution-mode analysis --body "Implement retry handling for the HTTP client."
-```
-
-## 预期输出
-
-- `dispatch` 退出码为 `0`
-- `data.task.status == "dispatched"`
-- `data.attempt.attempt_no == 1`
-- 返回 `data.attempt.thread_id`
-- `data.attempt.assigned_to == "worker-a"`
-- `data.thread.thread_id` 与 `data.attempt.thread_id` 一致
-- `data.message.kind == "task"`
-- `data.message.payload_json.execution_mode == "analysis"`
-
-## 断言结论
-
-- `dispatch` 是把调度意图物化为一次 attempt 和 inbox thread 的命令
-- 任务进入 `dispatched` 后，leader 可以用 thread 映射等待 worker 侧进展
-
-## 补充约束
-
-- 未显式传 `--to` 时，会回退使用任务的 `default_to`
-- `--body` 与 `--body-file` 互斥；不可读的 `--body-file` 应返回 `invalid_input`
diff --git a/docs/tests/orch/dispatch/dispatch-creates-strict-worktree.md b/docs/tests/orch/dispatch/dispatch-creates-strict-worktree.md
deleted file mode 100644
index 8550fd6..0000000
--- a/docs/tests/orch/dispatch/dispatch-creates-strict-worktree.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Case: `dispatch-creates-strict-worktree`
-
-## 用例意义
-
-验证显式 `--execution-mode code` dispatch 会创建隔离 worktree，并把 workspace 元数据持久化到 attempt 与任务 payload 中。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个干净的 Git 仓库
-- 仓库内已存在至少一个已提交文件
-- 已存在 run `run_blog_worktree_001` 与任务 `T1`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_worktree_001 --goal "Validate strict worktree dispatch"
-orch --db TMPDIR/coord.db --json task add --run run_blog_worktree_001 --task T1 --title "Implement backend" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_worktree_001 --task T1 --execution-mode code --repo-path TMPDIR/repo --workspace-root .orch/worktrees --body "Implement inside isolated worktree."
-```
-
-## 预期输出
-
-- `dispatch` 退出码为 `0`
-- `data.attempt.base_ref == "HEAD"`
-- `data.attempt.base_commit` 等于仓库当前 `HEAD` commit
-- `data.attempt.branch_name == "orch/run-blog-worktree-001/T1/attempt-1"`
-- 返回非空 `data.attempt.worktree_path`
-- `data.attempt.workspace_status == "created"`
-- `data.message.payload_json.execution_mode == "code"`
-- `data.message.payload_json.worktree_path` 与 attempt 中的路径一致
-
-## 断言结论
-
-- code-mode dispatch 会创建真正的隔离工作目录，而不是只记录一组字符串元数据
-- worker 读取任务 payload 时可以拿到同一份 worktree 路径
-
-## 补充约束
-
-- 未显式传 `--base-ref` 且仓库干净时，会默认回退到 `HEAD`
-- `--workspace-root` 为相对路径时，会相对于仓库根目录解析
diff --git a/docs/tests/orch/dispatch/dispatch-rejects-dirty-repo-without-base-ref.md b/docs/tests/orch/dispatch/dispatch-rejects-dirty-repo-without-base-ref.md
deleted file mode 100644
index fe5fda6..0000000
--- a/docs/tests/orch/dispatch/dispatch-rejects-dirty-repo-without-base-ref.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Case: `dispatch-rejects-dirty-repo-without-base-ref`
-
-## 用例意义
-
-验证 strict worktree dispatch 在仓库存在未提交修改且未显式指定 `--base-ref` 时，会拒绝继续执行。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个 Git 仓库
-- 仓库工作区存在未提交变更
-- 已存在 run `run_blog_worktree_002` 与任务 `T1`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_worktree_002 --goal "Validate dirty repo rejection"
-orch --db TMPDIR/coord.db --json task add --run run_blog_worktree_002 --task T1 --title "Implement backend" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_worktree_002 --task T1 --execution-mode code --repo-path TMPDIR/repo --workspace-root .orch/worktrees
-```
-
-## 预期输出
-
-- `dispatch` 退出码为 `30`
-- JSON 错误码为 `invalid_state`
-- `.orch/worktrees/run_blog_worktree_002/T1/attempt-1` 不应被创建
-
-## 断言结论
-
-- strict 模式不会隐式吞掉未提交工作区状态
-- 当 leader 依赖脏工作区时，必须显式给出 `--base-ref`
diff --git a/docs/tests/orch/dispatch/dispatch-rejects-non-ready-task.md b/docs/tests/orch/dispatch/dispatch-rejects-non-ready-task.md
deleted file mode 100644
index 8355c1a..0000000
--- a/docs/tests/orch/dispatch/dispatch-rejects-non-ready-task.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `dispatch-rejects-non-ready-task`
-
-## 用例意义
-
-验证 `dispatch` 在任务仍被依赖阻塞时会返回稳定的 `invalid_state` 契约，而不是偷偷创建 attempt。
-
-## 前置条件
-
-- 已存在 run `run_blog_003`
-- 任务 `T2` 依赖 `T1`
-- `T1` 尚未完成
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_003 --goal "Validate ready gating"
-orch --db TMPDIR/coord.db --json task add --run run_blog_003 --task T1 --title "Backend"
-orch --db TMPDIR/coord.db --json task add --run run_blog_003 --task T2 --title "Frontend"
-orch --db TMPDIR/coord.db --json dep add --run run_blog_003 --task T2 --depends-on T1
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_003 --task T2 --execution-mode analysis
-```
-
-## 预期输出
-
-- `dispatch` 退出码为 `30`
-- JSON 错误码为 `invalid_state`
-
-## 断言结论
-
-- 依赖未满足时，`dispatch` 失败得很早，并且不会越过 ready gate
-
-## 补充约束
-
-- 该错误是调度状态错误，不是 `not_found`
diff --git a/docs/tests/orch/dispatch/dispatch-requires-explicit-execution-mode.md b/docs/tests/orch/dispatch/dispatch-requires-explicit-execution-mode.md
deleted file mode 100644
index 90aa0a4..0000000
--- a/docs/tests/orch/dispatch/dispatch-requires-explicit-execution-mode.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Case: `dispatch-requires-explicit-execution-mode`
-
-## 用例意义
-
-验证 `dispatch` 不再做 worktree heuristics，而是要求 caller 显式声明 `--execution-mode analysis|code`。
-
-## 前置条件
-
-- 已存在 run `run_blog_auto_worktree_001`
-- 已存在任务 `T1`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_auto_worktree_001 --goal "Validate explicit execution mode"
-orch --db TMPDIR/coord.db --json task add --run run_blog_auto_worktree_001 --task T1 --title "Implement backend API" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_auto_worktree_001 --task T1
-```
-
-## 预期输出
-
-- `dispatch` 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-- 错误消息指出必须提供 `--execution-mode`
-
-## 断言结论
-
-- dispatch 的执行模式是显式契约，而不是由 runtime 根据任务元数据自行猜测
-- leader 必须对 thread-only 与 worktree-backed 执行路径负责
diff --git a/docs/tests/orch/ready/README.md b/docs/tests/orch/ready/README.md
deleted file mode 100644
index 2ba734e..0000000
--- a/docs/tests/orch/ready/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Orch `ready` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `ready-lists-only-eligible-tasks` | [ready-lists-only-eligible-tasks.md](./ready-lists-only-eligible-tasks.md) | returns only dependency-cleared tasks in priority-aware ready order |
-| `ready-orders-by-priority-and-respects-limit` | [ready-orders-by-priority-and-respects-limit.md](./ready-orders-by-priority-and-respects-limit.md) | sorts ready tasks by priority and applies `--limit` after ordering |
diff --git a/docs/tests/orch/ready/ready-lists-only-eligible-tasks.md b/docs/tests/orch/ready/ready-lists-only-eligible-tasks.md
deleted file mode 100644
index b19fc20..0000000
--- a/docs/tests/orch/ready/ready-lists-only-eligible-tasks.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Case: `ready-lists-only-eligible-tasks`
-
-## 用例意义
-
-验证 `ready` 只返回当前真正可调度的任务，而不会把仍受依赖阻塞的任务混入结果。
-
-## 前置条件
-
-- 已存在 run `run_blog_002`
-- `T1` 与 `T2` 已创建
-- `T2` 已通过 `dep add` 依赖 `T1`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_002 --goal "Build dependency-aware workflow"
-orch --db TMPDIR/coord.db --json task add --run run_blog_002 --task T1 --title "Build backend" --default-to worker-a
-orch --db TMPDIR/coord.db --json task add --run run_blog_002 --task T2 --title "Build frontend" --default-to worker-b
-orch --db TMPDIR/coord.db --json dep add --run run_blog_002 --task T2 --depends-on T1
-orch --db TMPDIR/coord.db --json ready --run run_blog_002
-```
-
-## 预期输出
-
-- `ready` 退出码为 `0`
-- `data.tasks` 长度为 `1`
-- 唯一返回项是 `T1`
-- 返回任务状态为 `ready`
-
-## 断言结论
-
-- `ready` 是经过依赖和状态过滤后的结果，不是“所有未完成任务”的简单列表
-- 新 agent 可以依赖该命令决定可立即 dispatch 的工作
-
-## 补充约束
-
-- 未显式传 `--limit` 时，默认上限是 `20`
-- `--run` 指向不存在的 run 时，应返回 `not_found`
diff --git a/docs/tests/orch/ready/ready-orders-by-priority-and-respects-limit.md b/docs/tests/orch/ready/ready-orders-by-priority-and-respects-limit.md
deleted file mode 100644
index 6643701..0000000
--- a/docs/tests/orch/ready/ready-orders-by-priority-and-respects-limit.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# Case: `ready-orders-by-priority-and-respects-limit`
-
-## 用例意义
-
-验证 `ready` 会先按优先级排序可调度任务，再应用 `--limit` 截断结果，而不是按创建顺序直接裁剪。
-
-## 前置条件
-
-- 已存在 run `run_blog_005`
-- 该 run 下有至少三个无依赖且处于 `ready` 的任务
-- 三个任务优先级分别为 `high`、`normal`、`low`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_005 --goal "Validate ready ordering"
-orch --db TMPDIR/coord.db --json task add --run run_blog_005 --task T1 --title "Low priority task" --priority low
-orch --db TMPDIR/coord.db --json task add --run run_blog_005 --task T2 --title "Normal priority task" --priority normal
-orch --db TMPDIR/coord.db --json task add --run run_blog_005 --task T3 --title "High priority task" --priority high
-orch --db TMPDIR/coord.db --json ready --run run_blog_005 --limit 2
-```
-
-## 预期输出
-
-- `ready` 退出码为 `0`
-- `data.tasks` 长度为 `2`
-- 第一个返回项是高优先级任务 `T3`
-- 第二个返回项是普通优先级任务 `T2`
-- 低优先级任务 `T1` 不出现在本次结果中
-
-## 断言结论
-
-- `ready` 的用户可见顺序是 `high -> normal -> low`
-- `--limit` 的截断发生在优先级排序之后，因此 leader 可以依赖该命令优先看到更重要的可调度任务
-
-## 补充约束
-
-- 当多个 ready 任务优先级相同时，当前实现会按创建时间升序稳定返回
-- 未显式传 `--limit` 时，默认上限仍是 `20`
diff --git a/docs/tests/orch/reassign/README.md b/docs/tests/orch/reassign/README.md
deleted file mode 100644
index 5b34e2c..0000000
--- a/docs/tests/orch/reassign/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Orch `reassign` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `reassign-cancels-old-thread-and-dispatches-new-attempt` | [reassign-cancels-old-thread-and-dispatches-new-attempt.md](./reassign-cancels-old-thread-and-dispatches-new-attempt.md) | cancels the old blocked thread and creates a new attempt for another worker |
diff --git a/docs/tests/orch/reassign/reassign-cancels-old-thread-and-dispatches-new-attempt.md b/docs/tests/orch/reassign/reassign-cancels-old-thread-and-dispatches-new-attempt.md
deleted file mode 100644
index 673da7d..0000000
--- a/docs/tests/orch/reassign/reassign-cancels-old-thread-and-dispatches-new-attempt.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# Case: `reassign-cancels-old-thread-and-dispatches-new-attempt`
-
-## 用例意义
-
-验证 `reassign` 会取消旧的阻塞线程，并为新 worker 创建新的尝试与线程。
-
-## 前置条件
-
-- 已创建运行 `run_blog_reassign_001`
-- 已创建任务 `T1`
-- `T1` 已通过严格 worktree 模式完成首次 `dispatch`
-- `worker-a` 已 `claim` 首次尝试线程，并通过 `inbox update --status blocked` 写入问题
-- 最近一次 `reconcile` 已执行，使任务进入 `blocked`
-- 已知旧线程为 `OLD_THREAD_ID`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json reassign --run run_blog_reassign_001 --task T1 --to worker-b --reason "Try another worker with clearer ownership."
-inbox --db TMPDIR/coord.db --json show --thread OLD_THREAD_ID
-```
-
-## 预期输出
-
-- `reassign` 退出码为 `0`
-- `reassign.data.attempt.assigned_to == "worker-b"`
-- `reassign.data.attempt.attempt_no == 2`
-- `reassign.data.attempt.thread_id != OLD_THREAD_ID`
-- `show.data.thread.status == "cancelled"`，指向旧线程
-
-## 断言结论
-
-- `reassign` 不是简单修改 `assigned_to` 字段，而是显式终止旧尝试并派生新尝试
-- 旧线程被取消后，worker 侧不会继续在过期上下文上执行
-
-## 补充约束
-
-- `reassign` 只接受 `blocked` 或 `failed` 任务
-- `--to` 是必填参数；`--reason` 建议始终填写，便于审计和人工排障
diff --git a/docs/tests/orch/reconcile/README.md b/docs/tests/orch/reconcile/README.md
deleted file mode 100644
index a378f82..0000000
--- a/docs/tests/orch/reconcile/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Orch `reconcile` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `reconcile-maps-claimed-or-in-progress-thread-to-running` | [reconcile-maps-claimed-or-in-progress-thread-to-running.md](./reconcile-maps-claimed-or-in-progress-thread-to-running.md) | maps worker claim or in-progress inbox state back into a running orch task |
-| `reconcile-maps-done-or-failed-thread-to-terminal-task-state` | [reconcile-maps-done-or-failed-thread-to-terminal-task-state.md](./reconcile-maps-done-or-failed-thread-to-terminal-task-state.md) | maps `done` without a gate or `failed` inbox states into terminal task states and updates run aggregates |
-| `reconcile-maps-done-thread-to-verifying-when-task-has-required-checks` | [reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md](./reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md) | routes worker `done` into `verifying` when the task has required checks |
diff --git a/docs/tests/orch/reconcile/reconcile-maps-claimed-or-in-progress-thread-to-running.md b/docs/tests/orch/reconcile/reconcile-maps-claimed-or-in-progress-thread-to-running.md
deleted file mode 100644
index c27960c..0000000
--- a/docs/tests/orch/reconcile/reconcile-maps-claimed-or-in-progress-thread-to-running.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `reconcile-maps-claimed-or-in-progress-thread-to-running`
-
-## 用例意义
-
-验证 `reconcile` 会把 worker 侧的 `claim` / `in_progress` 进展同步回 `orch`，将任务推进到 `running`。
-
-## 前置条件
-
-- 已存在 run `run_blog_001`
-- 任务 `T1` 已通过 `dispatch` 创建 attempt 和 thread
-- worker 已对该 thread 完成 `claim`，并可选地追加 `in_progress` 更新
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_001 --goal "Build blog MVP"
-orch --db TMPDIR/coord.db --json task add --run run_blog_001 --task T1 --title "Implement retry policy" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_001 --task T1 --execution-mode analysis --body "Implement retry handling for the HTTP client."
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status in_progress --summary "Implementation started"
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_001
-```
-
-## 预期输出
-
-- `reconcile` 退出码为 `0`
-- `data.updated_tasks` 长度为 `1`
-- 唯一更新任务的 `status == "running"`
-- `data.run.run_id == "run_blog_001"`
-
-## 断言结论
-
-- `reconcile` 是 leader 侧把 inbox 执行状态投影回 scheduler 状态机的关键同步点
-- claim 与 in-progress 的 worker 信号不会停留在 inbox 层
diff --git a/docs/tests/orch/reconcile/reconcile-maps-done-or-failed-thread-to-terminal-task-state.md b/docs/tests/orch/reconcile/reconcile-maps-done-or-failed-thread-to-terminal-task-state.md
deleted file mode 100644
index 1a9c350..0000000
--- a/docs/tests/orch/reconcile/reconcile-maps-done-or-failed-thread-to-terminal-task-state.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# Case: `reconcile-maps-done-or-failed-thread-to-terminal-task-state`
-
-## 用例意义
-
-验证 `reconcile` 会把 worker 侧 thread 的终态同步到 `orch` 任务，并刷新 run 聚合状态。
-这个 case 只覆盖两类终态：
-
-- worker `done` 且 task 没有 required checks
-- worker `fail`
-
-## 前置条件
-
-- 已存在 run 和已 dispatch 的任务
-- 该任务没有 configured verification gate，或者输入使用的是 `fail`
-- worker 已对该 thread 完成 `done` 或 `fail`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_001 --goal "Build blog MVP"
-orch --db TMPDIR/coord.db --json task add --run run_blog_001 --task T1 --title "Implement retry policy" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_001 --task T1 --execution-mode analysis --body "Implement retry handling for the HTTP client."
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Retry policy implemented" --body "The HTTP client now retries transient failures."
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_001
-orch --db TMPDIR/coord.db --json status --run run_blog_001
-```
-
-## 预期输出
-
-- `reconcile` 退出码为 `0`
-- `data.updated_tasks` 包含 `T1`
-- `T1.status == "done"`；若输入是 `fail`，则应为 `failed`
-- 后续 `status.data.run.status` 与终态任务聚合结果一致
-
-## 断言结论
-
-- 任务终态依赖 `reconcile` 落回 `orch`，而不是由 worker 直接改写 task 表
-- run 级聚合状态会随终态任务一并刷新
-
-## 补充约束
-
-- 如果 task 声明了 required checks，worker `done` 不应再直接进入 `done`；那条分支由 `reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md` 覆盖
diff --git a/docs/tests/orch/reconcile/reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md b/docs/tests/orch/reconcile/reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md
deleted file mode 100644
index c4d96bf..0000000
--- a/docs/tests/orch/reconcile/reconcile-maps-done-thread-to-verifying-when-task-has-required-checks.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Case: `reconcile-maps-done-thread-to-verifying-when-task-has-required-checks`
-
-## 用例意义
-
-验证 `reconcile` 在 worker 报 `done` 之后，如果任务声明了 required checks，不会直接把 task 置为 `done`，而是先推进到 `verifying`。
-
-## 前置条件
-
-- 已存在带 required checks 的任务
-- 该任务已经 dispatch 并被 worker claim
-- worker 已对该 thread 执行 `done`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_verify_001 --goal "Exercise verification gates"
-orch --db TMPDIR/coord.db --json task add \
-  --run run_verify_001 \
-  --task T1 \
-  --title "Implement verifier-backed task" \
-  --default-to worker-a \
-  --spec-file TMPDIR/task.md \
-  --check-profile cadence_component \
-  --required-check lint \
-  --required-check test
-orch --db TMPDIR/coord.db --json dispatch --run run_verify_001 --task T1 --execution-mode analysis --body "Implement the gated task."
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Implementation finished" --body "Ready for verification."
-orch --db TMPDIR/coord.db --json reconcile --run run_verify_001
-```
-
-## 预期输出
-
-- `reconcile` 退出码为 `0`
-- `data.updated_tasks` 包含 `T1`
-- `T1.status == "verifying"`
-- 后续 `orch verify status --run run_verify_001 --task T1` 返回 `data.gate.status == "pending"`
-
-## 断言结论
-
-- worker 的 `done` 不再自动等同于 task `done`
-- 一旦 task 定义了 required checks，`reconcile` 的职责是把它送入验证门，而不是直接宣布完成
diff --git a/docs/tests/orch/retry/README.md b/docs/tests/orch/retry/README.md
deleted file mode 100644
index 6b46360..0000000
--- a/docs/tests/orch/retry/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Orch `retry` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `retry-creates-new-attempt-for-failed-task` | [retry-creates-new-attempt-for-failed-task.md](./retry-creates-new-attempt-for-failed-task.md) | creates a successor attempt, thread, and worktree after a failed attempt |
diff --git a/docs/tests/orch/retry/retry-creates-new-attempt-for-failed-task.md b/docs/tests/orch/retry/retry-creates-new-attempt-for-failed-task.md
deleted file mode 100644
index d7fab47..0000000
--- a/docs/tests/orch/retry/retry-creates-new-attempt-for-failed-task.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Case: `retry-creates-new-attempt-for-failed-task`
-
-## 用例意义
-
-验证 `retry` 会在失败任务上创建新的尝试记录，而不是复用旧线程或旧 worktree。
-
-## 前置条件
-
-- 已创建运行 `run_blog_retry_001`
-- 已创建任务 `T1`
-- `T1` 已通过严格 worktree 模式完成首次 `dispatch`
-- `worker-a` 已 `claim` 首次尝试线程并通过 `inbox fail` 把线程推进到 `failed`
-- 最近一次 `reconcile` 已执行，使任务状态同步为 `failed`
-- 已知首次尝试的线程为 `OLD_THREAD_ID`，worktree 为 `OLD_WORKTREE_PATH`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json retry --run run_blog_retry_001 --task T1 --body "Retry after fixing the failure."
-```
-
-## 预期输出
-
-- 退出码为 `0`
-- `retry.data.task.status == "dispatched"`
-- `retry.data.attempt.attempt_no == 2`
-- `retry.data.attempt.thread_id != OLD_THREAD_ID`
-- `retry.data.attempt.worktree_path != OLD_WORKTREE_PATH`
-- 新 worktree 路径在文件系统上存在
-- `retry.data.previous_attempt.attempt_no == 1`
-
-## 断言结论
-
-- `retry` 会为失败任务生成新的执行尝试，而不是把旧尝试重新打开
-- 对代码任务而言，重试会分配新的 worktree，避免旧失败环境污染下一次执行
-
-## 补充约束
-
-- `--to` 可选；未显式传入时，默认沿用当前任务/尝试的既有分配信息
-- `retry` 支持 `--body-file`，并遵守与 `--body` 的互斥规则
diff --git a/docs/tests/orch/run-init/README.md b/docs/tests/orch/run-init/README.md
deleted file mode 100644
index c16eaed..0000000
--- a/docs/tests/orch/run-init/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Orch `run init` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `run-init-creates-new-run` | [run-init-creates-new-run.md](./run-init-creates-new-run.md) | creates a new active run and persists goal and summary |
diff --git a/docs/tests/orch/run-init/run-init-creates-new-run.md b/docs/tests/orch/run-init/run-init-creates-new-run.md
deleted file mode 100644
index f63a64a..0000000
--- a/docs/tests/orch/run-init/run-init-creates-new-run.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `run-init-creates-new-run`
-
-## 用例意义
-
-验证 `run init` 会创建新的 orchestration run，并返回稳定的 run 元数据。
-
-## 前置条件
-
-- `TMPDIR/coord.db` 尚不存在或为空路径
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_001 --goal "Build blog MVP" --summary "Public blog plus admin CRUD"
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- `data.run.run_id == "run_blog_001"`
-- `data.run.goal == "Build blog MVP"`
-- `data.run.summary == "Public blog plus admin CRUD"`
-- `data.run.status == "active"`
-- 返回 `created_at` 与 `updated_at`
-
-## 断言结论
-
-- `run init` 会创建 run 记录，而不是只做内存态初始化
-- 新建 run 的默认状态是 `active`
-
-## 补充约束
-
-- `--run` 与 `--goal` 是必填；缺失任一项都应返回 `invalid_input`
-- 当同一 `run_id` 已存在时，应返回 `invalid_state`，而不是覆盖旧 run
diff --git a/docs/tests/orch/run-show/README.md b/docs/tests/orch/run-show/README.md
deleted file mode 100644
index ebb01c0..0000000
--- a/docs/tests/orch/run-show/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Orch `run show` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `run-show-returns-run-summary-and-task-counts` | [run-show-returns-run-summary-and-task-counts.md](./run-show-returns-run-summary-and-task-counts.md) | returns aggregate run metadata and task-count summary without listing task rows |
diff --git a/docs/tests/orch/run-show/run-show-returns-run-summary-and-task-counts.md b/docs/tests/orch/run-show/run-show-returns-run-summary-and-task-counts.md
deleted file mode 100644
index 45834ca..0000000
--- a/docs/tests/orch/run-show/run-show-returns-run-summary-and-task-counts.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Case: `run-show-returns-run-summary-and-task-counts`
-
-## 用例意义
-
-验证 `run show` 会返回 run 元数据与聚合任务统计，适合作为 leader 端的轻量总览命令。
-
-## 前置条件
-
-- 已存在 `run_blog_001`
-- 该 run 下至少已有一个任务，以便产生非空 `task_counts`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_001 --goal "Build blog MVP" --summary "Public blog plus admin CRUD"
-orch --db TMPDIR/coord.db --json task add --run run_blog_001 --task T1 --title "Implement retry policy" --summary "Add retry policy to HTTP client" --default-to worker-a
-orch --db TMPDIR/coord.db --json run show --run run_blog_001
-```
-
-## 预期输出
-
-- `run show` 退出码为 `0`
-- `data.run.run_id == "run_blog_001"`
-- `data.run.status == "ready"`
-- `data.task_counts.ready >= 1`
-- 返回值不包含 `tasks` 数组
-
-## 断言结论
-
-- `run show` 提供的是聚合视图，而不是完整任务明细
-- run 级状态会反映当前任务聚合结果；当 run 下已有 `ready` 任务时，返回状态会是 `ready`
-- 任务计数可以在不调用 `status` 的情况下被读取
-
-## 补充约束
-
-- 当 `--run` 指向不存在的 run 时，应返回 `not_found`
diff --git a/docs/tests/orch/status/README.md b/docs/tests/orch/status/README.md
deleted file mode 100644
index 71eb3ef..0000000
--- a/docs/tests/orch/status/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Orch `status` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `status-returns-run-summary-and-task-list` | [status-returns-run-summary-and-task-list.md](./status-returns-run-summary-and-task-list.md) | returns aggregate run status plus the per-task status list and latest attempt context |
-| `status-auto-reconciles-and-includes-blocked-context` | [status-auto-reconciles-and-includes-blocked-context.md](./status-auto-reconciles-and-includes-blocked-context.md) | auto-reconciles inbox state and exposes blocked-task attempt and question context |
diff --git a/docs/tests/orch/status/status-auto-reconciles-and-includes-blocked-context.md b/docs/tests/orch/status/status-auto-reconciles-and-includes-blocked-context.md
deleted file mode 100644
index bb182f6..0000000
--- a/docs/tests/orch/status/status-auto-reconciles-and-includes-blocked-context.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Case: `status-auto-reconciles-and-includes-blocked-context`
-
-## 用例意义
-
-验证 `status` 在返回结果前会先 reconcile 当前 inbox 线程状态，并附带 blocked 任务的 latest attempt、latest message 与 latest blocked question 上下文，方便 leader 直接判断谁在执行、卡在什么问题上。
-
-## 前置条件
-
-- 已存在 run `run_blog_002`
-- 任务 `T1` 已 dispatch 到 `worker-a`
-- `worker-a` 已 `claim` 对应线程，并写入一次 `blocked` 问题
-- leader 尚未显式执行 `reconcile`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_002 --goal "Build blog MVP"
-orch --db TMPDIR/coord.db --json task add --run run_blog_002 --task T1 --title "Implement retry policy" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_002 --task T1 --execution-mode analysis --body "Implement retry handling for the HTTP client."
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status blocked --summary "Need logging decision" --payload-json '{"question":"Should retry attempts be logged?"}'
-orch --db TMPDIR/coord.db --json status --run run_blog_002
-```
-
-## 预期输出
-
-- `status` 退出码为 `0`
-- `data.run.status == "blocked"`
-- `data.tasks[0].status == "blocked"`
-- `data.tasks[0].latest_attempt.thread_id == THREAD_ID`
-- `data.tasks[0].latest_attempt.status == "blocked"`
-- `data.tasks[0].latest_message.kind == "question"`
-- `data.tasks[0].latest_message.summary == "Need logging decision"`
-- `data.tasks[0].blocked_question.kind == "question"`
-- `data.tasks[0].blocked_question.summary == "Need logging decision"`
-
-## 断言结论
-
-- `status` 现在是更偏 operational 的 leader 视图，而不是只读的任务列表查询
-- leader 在常见排障场景里，不必先手工 `reconcile` 再额外跑 `blocked`
-- enriched task context 能直接暴露当前 attempt 与问题摘要，减少二次查询
diff --git a/docs/tests/orch/status/status-returns-run-summary-and-task-list.md b/docs/tests/orch/status/status-returns-run-summary-and-task-list.md
deleted file mode 100644
index 9b39a1a..0000000
--- a/docs/tests/orch/status/status-returns-run-summary-and-task-list.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Case: `status-returns-run-summary-and-task-list`
-
-## 用例意义
-
-验证 `status` 会返回 run 聚合视图、任务明细列表以及最新 attempt/message 上下文，是 leader 端的完整状态检查入口。
-
-## 前置条件
-
-- 已存在 run `run_blog_001`
-- 任务 `T1` 已经过完整的 dispatch -> worker done -> reconcile 流程
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_001 --goal "Build blog MVP"
-orch --db TMPDIR/coord.db --json task add --run run_blog_001 --task T1 --title "Implement retry policy" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_001 --task T1 --execution-mode analysis --body "Implement retry handling for the HTTP client."
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Retry policy implemented" --body "The HTTP client now retries transient failures."
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_001
-orch --db TMPDIR/coord.db --json status --run run_blog_001
-```
-
-## 预期输出
-
-- `status` 退出码为 `0`
-- `data.run.run_id == "run_blog_001"`
-- `data.run.status == "done"`
-- 返回 `data.task_counts`
-- 返回 `data.tasks` 数组
-- `data.tasks[0].task_id == "T1"`
-- `data.tasks[0].status == "done"`
-- `data.tasks[0].latest_attempt.assigned_to == "worker-a"`
-- `data.tasks[0].latest_attempt.status == "done"`
-- `data.tasks[0].latest_message.kind == "result"`
-- `data.tasks[0].latest_message.summary == "Retry policy implemented"`
-
-## 断言结论
-
-- `status` 比 `run show` 更完整，适合做 run 级收口检查
-- 任务清单与 run 聚合状态应保持一致，不应出现 run 已完成而任务仍显示旧状态的结果
-- leader 不必再单独查询 attempt 或 thread 历史，常见收口信息可直接从 `status` 拿到
diff --git a/docs/tests/orch/task-add/README.md b/docs/tests/orch/task-add/README.md
deleted file mode 100644
index c7b4285..0000000
--- a/docs/tests/orch/task-add/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# Orch `task add` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `task-add-creates-ready-root-task` | [task-add-creates-ready-root-task.md](./task-add-creates-ready-root-task.md) | creates a dependency-free task that becomes ready immediately |
-| `task-add-rejects-invalid-acceptance-json` | [task-add-rejects-invalid-acceptance-json.md](./task-add-rejects-invalid-acceptance-json.md) | rejects malformed `--acceptance-json` with `invalid_input` |
-| `task-add-rejects-invalid-priority` | [task-add-rejects-invalid-priority.md](./task-add-rejects-invalid-priority.md) | rejects priorities outside `low|normal|high` |
-| `task-add-snapshots-spec-and-verification-policy` | [task-add-snapshots-spec-and-verification-policy.md](./task-add-snapshots-spec-and-verification-policy.md) | snapshots spec file content, verification profile, and scope policy onto the task |
-| `task-add-rejects-spec-sha-mismatch` | [task-add-rejects-spec-sha-mismatch.md](./task-add-rejects-spec-sha-mismatch.md) | rejects explicit spec hashes that do not match the task spec file content |
diff --git a/docs/tests/orch/task-add/task-add-creates-ready-root-task.md b/docs/tests/orch/task-add/task-add-creates-ready-root-task.md
deleted file mode 100644
index c6d26a7..0000000
--- a/docs/tests/orch/task-add/task-add-creates-ready-root-task.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Case: `task-add-creates-ready-root-task`
-
-## 用例意义
-
-验证 `task add` 为无依赖任务创建记录时，会在同一事务里把任务推进为 `ready`。
-
-## 前置条件
-
-- 已存在 run `run_blog_001`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_001 --goal "Build blog MVP"
-orch --db TMPDIR/coord.db --json task add --run run_blog_001 --task T1 --title "Implement retry policy" --summary "Add retry policy to HTTP client" --default-to worker-a
-```
-
-## 预期输出
-
-- `task add` 退出码为 `0`
-- `data.task.task_id == "T1"`
-- `data.task.title == "Implement retry policy"`
-- `data.task.status == "ready"`
-- `data.task.default_to == "worker-a"`
-- `data.task.priority == "normal"`
-
-## 断言结论
-
-- `task add` 不只是插入 `planned` 任务；对无依赖任务会立即刷新为 `ready`
-- 默认优先级会稳定回退到 `normal`
-
-## 补充约束
-
-- `--run`、`--task`、`--title` 是必填
-- 未显式传 `--acceptance-json` 时，会回退为合法 JSON 默认值，而不是空字符串
-- 同一 run 下重复的 `task_id` 应返回 `invalid_state`
diff --git a/docs/tests/orch/task-add/task-add-rejects-invalid-acceptance-json.md b/docs/tests/orch/task-add/task-add-rejects-invalid-acceptance-json.md
deleted file mode 100644
index 22a4758..0000000
--- a/docs/tests/orch/task-add/task-add-rejects-invalid-acceptance-json.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Case: `task-add-rejects-invalid-acceptance-json`
-
-## 用例意义
-
-验证 `task add` 会拒绝格式非法的 `--acceptance-json`，并返回稳定的 `invalid_input` 错误契约。
-
-## 前置条件
-
-- 已存在 run `run_blog_003`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_003 --goal "Validate task add input guards"
-orch --db TMPDIR/coord.db --json task add --run run_blog_003 --task T1 --title "Implement retry policy" --acceptance-json '{"done":true'
-```
-
-## 预期输出
-
-- `task add` 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-- 错误消息指出 `acceptance-json` 必须是合法 JSON
-
-## 断言结论
-
-- `task add` 不会把格式错误的 acceptance 条件静默写入数据库
-- `--acceptance-json` 的校验属于稳定的 CLI 输入契约，而不是存储层偶然失败
diff --git a/docs/tests/orch/task-add/task-add-rejects-invalid-priority.md b/docs/tests/orch/task-add/task-add-rejects-invalid-priority.md
deleted file mode 100644
index f817d88..0000000
--- a/docs/tests/orch/task-add/task-add-rejects-invalid-priority.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Case: `task-add-rejects-invalid-priority`
-
-## 用例意义
-
-验证 `task add` 只接受 `low|normal|high` 三种优先级值，并在其他输入下返回稳定错误。
-
-## 前置条件
-
-- 已存在 run `run_blog_004`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_004 --goal "Validate task priority input"
-orch --db TMPDIR/coord.db --json task add --run run_blog_004 --task T1 --title "Implement retry policy" --priority urgent
-```
-
-## 预期输出
-
-- `task add` 退出码为 `30`
-- JSON 错误码为 `invalid_input`
-- 错误消息指出 `priority` 必须是 `low`、`normal` 或 `high`
-
-## 断言结论
-
-- `task add` 的优先级枚举是明确而稳定的 CLI 契约
-- 非法优先级会在任务写入前被拒绝，而不是退回到默认值或被静默接受
diff --git a/docs/tests/orch/task-add/task-add-rejects-spec-sha-mismatch.md b/docs/tests/orch/task-add/task-add-rejects-spec-sha-mismatch.md
deleted file mode 100644
index 4cfcc83..0000000
--- a/docs/tests/orch/task-add/task-add-rejects-spec-sha-mismatch.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `task-add-rejects-spec-sha-mismatch`
-
-## 用例意义
-
-验证 `task add` 在接收 `--spec-file` 与 `--spec-sha` 时，会拒绝内容摘要不匹配的任务定义，避免 task spec 漂移。
-
-## 前置条件
-
-- 已存在 run `run_blog_007`
-- 临时目录内存在可读取的 spec 文件 `TMPDIR/task.md`
-- 调用时传入的 `--spec-sha` 与文件实际 SHA256 不一致
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_007 --goal "Validate spec sha mismatch"
-orch --db TMPDIR/coord.db --json task add \
-  --run run_blog_007 \
-  --task T1 \
-  --title "Implement verifier" \
-  --spec-file TMPDIR/task.md \
-  --spec-sha deadbeef
-```
-
-## 预期输出
-
-- `task add` 退出码为 `30`
-- JSON error payload 的 `error.code == "invalid_input"`
-- `error.message` 包含 `spec-sha does not match spec-file contents`
-
-## 断言结论
-
-- task spec 快照不是“尽力而为”的附带字段；当显式声明 SHA 时，CLI 会把它当成契约校验
-- leader 不能在 spec 内容与预期摘要不一致时继续创建 task
diff --git a/docs/tests/orch/task-add/task-add-snapshots-spec-and-verification-policy.md b/docs/tests/orch/task-add/task-add-snapshots-spec-and-verification-policy.md
deleted file mode 100644
index 0640376..0000000
--- a/docs/tests/orch/task-add/task-add-snapshots-spec-and-verification-policy.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Case: `task-add-snapshots-spec-and-verification-policy`
-
-## 用例意义
-
-验证 `task add` 在创建任务时，不只是写入基础调度字段，还会快照 task spec 与验证策略。
-
-## 前置条件
-
-- 已存在 run `run_blog_006`
-- 临时目录内存在可读取的 spec 文件 `TMPDIR/task.md`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_006 --goal "Validate spec-aware task add"
-orch --db TMPDIR/coord.db --json task add \
-  --run run_blog_006 \
-  --task T1 \
-  --title "Implement verifier" \
-  --spec-file TMPDIR/task.md \
-  --check-profile cadence_component \
-  --required-check lint \
-  --required-check test \
-  --allowed-path packages/ui \
-  --blocked-path scripts/release-metadata.mjs \
-  --metadata-json '{"repo":"cadence-ui"}'
-```
-
-## 预期输出
-
-- `task add` 退出码为 `0`
-- `data.task.status == "ready"`
-- `data.task.spec.spec_file == "TMPDIR/task.md"`
-- `data.task.spec.check_profile == "cadence_component"`
-- `data.task.spec.required_checks` 包含 `lint` 与 `test`
-- `data.task.spec.allowed_paths` 包含 `packages/ui`
-- `data.task.spec.blocked_paths` 包含 `scripts/release-metadata.mjs`
-- `data.task.gate.status == "pending"`
-- `data.task.gate.required_checks` 与 spec 中的 required checks 一致
-
-## 断言结论
-
-- `task add` 现在会把任务说明和验证策略一起固化到 task spec，而不是只保存 `title`/`summary`
-- required checks 一旦存在，task 会立即带上 `pending` gate，而不是等到 worker 完成后才临时推断
-
-## 补充约束
-
-- `spec_file` 对应内容应作为快照随 task 保存，而不是只保存路径引用
-- `check_profile` 目前只是任务策略名，后续 profile/adapter 机制会负责把它解释成真正的执行计划
diff --git a/docs/tests/orch/verify/README.md b/docs/tests/orch/verify/README.md
deleted file mode 100644
index 2e10ae9..0000000
--- a/docs/tests/orch/verify/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Orch `verify` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `verify-status-returns-spec-and-gate-for-task` | [verify-status-returns-spec-and-gate-for-task.md](./verify-status-returns-spec-and-gate-for-task.md) | returns the selected task, latest attempt, spec snapshot, and current gate state |
-| `verify-record-updates-gate-and-marks-task-done-when-required-checks-pass` | [verify-record-updates-gate-and-marks-task-done-when-required-checks-pass.md](./verify-record-updates-gate-and-marks-task-done-when-required-checks-pass.md) | records named checks, recomputes the gate, and promotes the task to `done` when all required checks pass |
diff --git a/docs/tests/orch/verify/verify-record-updates-gate-and-marks-task-done-when-required-checks-pass.md b/docs/tests/orch/verify/verify-record-updates-gate-and-marks-task-done-when-required-checks-pass.md
deleted file mode 100644
index e71a7a6..0000000
--- a/docs/tests/orch/verify/verify-record-updates-gate-and-marks-task-done-when-required-checks-pass.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Case: `verify-record-updates-gate-and-marks-task-done-when-required-checks-pass`
-
-## 用例意义
-
-验证 `verify record` 在逐个记录 required checks 后，会重新计算 gate，并在所有必过项通过时把 task 从 `verifying` 推进到 `done`。
-
-## 前置条件
-
-- 已存在处于 `verifying` 的任务 `T1`
-- 该任务的 required checks 为 `lint` 与 `test`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json verify record --run run_verify_001 --task T1 --check lint --status passed --summary "lint clean"
-orch --db TMPDIR/coord.db --json verify record --run run_verify_001 --task T1 --check test --status passed --summary "tests clean"
-orch --db TMPDIR/coord.db --json status --run run_verify_001
-```
-
-## 预期输出
-
-- 第一次 `verify record` 后：
-  - `data.task.status == "verifying"`
-  - `data.gate.status == "pending"`
-  - `data.gate.pending_checks` 仍包含 `test`
-- 第二次 `verify record` 后：
-  - `data.task.status == "done"`
-  - `data.gate.status == "passed"`
-  - `data.gate.pending_checks` 为空
-- 后续 `status.data.run.status == "done"`
-
-## 断言结论
-
-- `verify record` 不是单纯写一条 check 日志；它会驱动 gate 和 task 状态机前进
-- 只有所有 required checks 通过，task 才会真正完成
diff --git a/docs/tests/orch/verify/verify-status-returns-spec-and-gate-for-task.md b/docs/tests/orch/verify/verify-status-returns-spec-and-gate-for-task.md
deleted file mode 100644
index cbc1e39..0000000
--- a/docs/tests/orch/verify/verify-status-returns-spec-and-gate-for-task.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Case: `verify-status-returns-spec-and-gate-for-task`
-
-## 用例意义
-
-验证 `verify status` 能把 task 的验证上下文一次性展示出来，而不是要求 leader 手工拼装 task、attempt、spec 与 check 结果。
-
-## 前置条件
-
-- 已存在带 required checks 的任务
-- 该任务已经经过 `reconcile` 进入 `verifying`
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json verify status --run run_verify_001 --task T1
-```
-
-## 预期输出
-
-- `verify status` 退出码为 `0`
-- `data.task.task_id == "T1"`
-- `data.attempt.attempt_no == 1`
-- `data.spec.spec_file` 非空
-- `data.spec.check_profile == "cadence_component"`
-- `data.gate.status == "pending"`
-- `data.gate.required_checks` 包含 `lint` 与 `test`
-- `data.gate.pending_checks` 在首次查询时仍包含所有未通过检查
-
-## 断言结论
-
-- `verify status` 是 leader 查看 gate 的主入口，而不是只返回 task 表里的裸状态
-- gate 是否仍在等待检查、已经失败、还是已经通过，都应在一个响应里可见
diff --git a/docs/tests/orch/wait/README.md b/docs/tests/orch/wait/README.md
deleted file mode 100644
index 435bb4f..0000000
--- a/docs/tests/orch/wait/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Orch `wait` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `wait-wakes-on-matching-run-event` | [wait-wakes-on-matching-run-event.md](./wait-wakes-on-matching-run-event.md) | wakes on a later matching task event and returns that event payload |
-| `wait-times-out-without-matching-event` | [wait-times-out-without-matching-event.md](./wait-times-out-without-matching-event.md) | returns a stable timeout payload when no later matching event appears |
diff --git a/docs/tests/orch/wait/wait-times-out-without-matching-event.md b/docs/tests/orch/wait/wait-times-out-without-matching-event.md
deleted file mode 100644
index b02ddcb..0000000
--- a/docs/tests/orch/wait/wait-times-out-without-matching-event.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Case: `wait-times-out-without-matching-event`
-
-## 用例意义
-
-验证 `wait` 在没有后续匹配事件时返回稳定的超时结果，而不是把超时视为命令失败。
-
-## 前置条件
-
-- 空数据库已初始化
-- 已创建运行 `run_blog_wait_002`
-- 当前没有会产生 `task_done` 的后续事件
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json wait --run run_blog_wait_002 --for task_done --after-event 0 --timeout-seconds 1
-```
-
-## 预期输出
-
-- 退出码为 `0`
-- `wait.data.woke == false`
-- `wait.data.next_event_id == 0`
-- `wait.data.events` 为空或缺省
-
-## 断言结论
-
-- `wait` 的超时是可消费的正常结果，不是错误态
-- leader 可以基于 `woke=false` 决定继续轮询、切换过滤条件，或退出当前控制循环
-
-## 补充约束
-
-- 该用例强调超时契约，不要求系统中存在任何任务
diff --git a/docs/tests/orch/wait/wait-wakes-on-matching-run-event.md b/docs/tests/orch/wait/wait-wakes-on-matching-run-event.md
deleted file mode 100644
index f188696..0000000
--- a/docs/tests/orch/wait/wait-wakes-on-matching-run-event.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Case: `wait-wakes-on-matching-run-event`
-
-## 用例意义
-
-验证 `wait` 能在后续匹配事件出现时被唤醒，并返回稳定的事件载荷。
-
-## 前置条件
-
-- 空数据库已初始化
-- 已创建运行 `run_blog_wait_001`
-- 已添加任务 `T1` 并完成一次 `dispatch`
-- 已知当前尝试线程为 `THREAD_ID`
-- `wait` 在工作线程写入阻塞状态前启动
-
-## 输入
-
-```bash
-orch --db TMPDIR/coord.db --json wait --run run_blog_wait_001 --for task_blocked --after-event 0 --timeout-seconds 2
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status blocked --summary "Need logging decision" --payload-json '{"question":"stdout or stderr?"}'
-```
-
-## 预期输出
-
-- `wait` 退出码为 `0`
-- `wait.data.woke == true`
-- `wait.data.events` 长度为 `1`
-- 唯一事件的 `type == "task_blocked"`
-- 事件 `summary == "Need logging decision"`
-- 事件 `payload.question == "stdout or stderr?"`
-
-## 断言结论
-
-- `wait` 不是简单睡眠，而是面向 run 事件流的阻塞读取接口
-- `task_blocked` 事件会把 worker 提问摘要和结构化 payload 暴露给 leader
-
-## 补充约束
-
-- `--for` 支持逗号分隔的事件类型列表；该用例验证的是单事件过滤
-- `wait` 返回成功时也会给出 `next_event_id`，便于后续增量等待
diff --git a/docs/tests/orch/workflows/README.md b/docs/tests/orch/workflows/README.md
deleted file mode 100644
index f82a0b2..0000000
--- a/docs/tests/orch/workflows/README.md
+++ /dev/null
@@ -1,175 +0,0 @@
-# Orch Workflow Test Plan
-
-## Scope
-
-This document tracks cross-command scenarios where the main value is the interaction between multiple `orch` commands, and often between `orch` and `inbox`.
-
-All examples assume:
-
-- isolated temp database
-- `orch --db TMPDIR/coord.db --json` is used consistently
-- assertions follow the shared rules in [../_shared/README.md](../_shared/README.md)
-
-## case: run-dispatch-reconcile-status-happy-path
-
-### 用例意义
-
-验证 `orch` 的主干领导者流程可用：创建 run、加入 task、查看 ready、dispatch、通过 `inbox` 推进 worker 状态、reconcile，再用 `status` 看到最终完成态。
-
-### 前置条件
-
-- 空数据库路径 `TMPDIR/coord.db`
-- 执行者为 `worker-a`
-
-### 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_001 --goal "Build blog MVP" --summary "Public blog plus admin CRUD"
-orch --db TMPDIR/coord.db --json task add --run run_blog_001 --task T1 --title "Implement retry policy" --summary "Add retry policy to HTTP client" --default-to worker-a
-orch --db TMPDIR/coord.db --json ready --run run_blog_001
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_001 --task T1 --execution-mode analysis --body "Implement retry handling for the HTTP client."
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID
-inbox --db TMPDIR/coord.db --json update --agent worker-a --thread THREAD_ID --status in_progress --summary "Implementation started"
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_001
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Retry policy implemented" --body "The HTTP client now retries transient failures."
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_001
-orch --db TMPDIR/coord.db --json status --run run_blog_001
-```
-
-### 预期输出
-
-- `run init` 成功创建 `run_blog_001`
-- `task add` 返回的新 task 初始状态为 `ready`
-- `ready` 只返回 `T1`
-- `dispatch` 创建 attempt 与 inbox thread，并将 task 推进到 `dispatched`
-- 第一次 `reconcile` 后 task 状态变为 `running`
-- 第二次 `reconcile` 后 task 状态变为 `done`
-- `status` 返回 `run.status == "done"`
-
-### 断言结论
-
-- `orch` 的主干 happy path 不是单命令行为，而是 `orch` 与 `inbox` 共同完成的闭环
-- `reconcile` 是把 worker-side 线程状态折叠回 leader-side task 状态的关键步骤
-
-## case: dependency-blocked-answer-resume-flow
-
-### 用例意义
-
-验证依赖门控、blocked 列表、`answer` 反馈以及最终恢复到完成态的完整交互链路。
-
-### 前置条件
-
-- 空数据库路径 `TMPDIR/coord.db`
-- `worker-a` 负责 `T1`
-- `worker-b` 负责 `T2`
-
-### 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_002 --goal "Build dependency-aware workflow"
-orch --db TMPDIR/coord.db --json task add --run run_blog_002 --task T1 --title "Build backend" --summary "Implement backend APIs" --default-to worker-a
-orch --db TMPDIR/coord.db --json task add --run run_blog_002 --task T2 --title "Build frontend" --summary "Implement frontend flows" --default-to worker-b
-orch --db TMPDIR/coord.db --json dep add --run run_blog_002 --task T2 --depends-on T1
-orch --db TMPDIR/coord.db --json ready --run run_blog_002
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_002 --task T1 --execution-mode analysis
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_BACKEND
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_BACKEND --summary "Backend complete"
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_002
-orch --db TMPDIR/coord.db --json ready --run run_blog_002
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_002 --task T2 --execution-mode analysis
-inbox --db TMPDIR/coord.db --json claim --agent worker-b --thread THREAD_FRONTEND
-inbox --db TMPDIR/coord.db --json update --agent worker-b --thread THREAD_FRONTEND --status blocked --summary "Need logging decision" --payload-json '{"question":"stdout or stderr?"}'
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_002
-orch --db TMPDIR/coord.db --json blocked --run run_blog_002
-orch --db TMPDIR/coord.db --json answer --run run_blog_002 --task T2 --body "Use stdout for MVP."
-inbox --db TMPDIR/coord.db --json update --agent worker-b --thread THREAD_FRONTEND --status in_progress --summary "Decision applied"
-inbox --db TMPDIR/coord.db --json done --agent worker-b --thread THREAD_FRONTEND --summary "Frontend complete"
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_002
-orch --db TMPDIR/coord.db --json status --run run_blog_002
-```
-
-### 预期输出
-
-- 初始 `ready` 仅包含 `T1`
-- `T1` 完成并 `reconcile` 后，`T2` 才出现在 `ready`
-- `blocked` 返回 `T2` 与最新 question
-- `answer` 向活跃 thread 追加一条 `kind=answer` 消息
-- 最终 `status` 中 run 进入 `done`
-
-### 断言结论
-
-- 依赖门控和 blocked-answer 机制在同一个 run 中可以顺序衔接
-- `answer` 不直接改 task 状态；真正的状态恢复仍依赖 worker 继续推进线程并由 `reconcile` 采集
-
-## case: code-mode-dispatch-to-cleanup
-
-### 用例意义
-
-验证代码任务的 `execution-mode code` worktree 路径能从 dispatch 一直走到 cleanup，确保隔离工作区既会被创建，也能在完成后被移除。
-
-### 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始内容的 Git 仓库
-- `worker-a` 负责代码任务
-
-### 输入
-
-```bash
-orch --db TMPDIR/coord.db --json run init --run run_blog_worktree_001 --goal "Validate code-mode worktree dispatch"
-orch --db TMPDIR/coord.db --json task add --run run_blog_worktree_001 --task T1 --title "Implement backend" --default-to worker-a
-orch --db TMPDIR/coord.db --json dispatch --run run_blog_worktree_001 --task T1 --execution-mode code --repo-path TMPDIR/repo --workspace-root .orch/worktrees --body "Implement inside isolated worktree."
-inbox --db TMPDIR/coord.db --json claim --agent worker-a --thread THREAD_ID
-inbox --db TMPDIR/coord.db --json done --agent worker-a --thread THREAD_ID --summary "Backend complete"
-orch --db TMPDIR/coord.db --json reconcile --run run_blog_worktree_001
-orch --db TMPDIR/coord.db --json cleanup --run run_blog_worktree_001 --task T1 --attempt 1
-```
-
-### 预期输出
-
-- `dispatch` 返回非空的 `attempt.base_ref`、`attempt.base_commit`、`attempt.branch_name`、`attempt.worktree_path`
-- `attempt.workspace_status == "created"`
-- `cleanup` 返回被清理的 attempt 记录
-- 清理后 `worktree_path` 不再存在于文件系统
-
-### 断言结论
-
-- `execution-mode code` 的 worktree 不是单次 dispatch 细节，而是完整 attempt 生命周期的一部分
-- `cleanup` 的目标是已完成或废弃的工作区，不应误删仍在活动中的执行目录
-
-## case: council-review-end-to-end
-
-### 用例意义
-
-验证 `orch council` 高层工作流可从 reviewer dispatch 一直走到 final report，且 grouped recommendations 与最终输出衔接一致。
-
-### 前置条件
-
-- 空数据库路径 `TMPDIR/coord.db`
-- 三个固定 reviewer 分别为 `architecture-reviewer`、`implementation-reviewer`、`risk-reviewer`
-
-### 输入
-
-```bash
-orch --db TMPDIR/coord.db --json council start --run council_blog_001 --target "Review the current blog architecture."
-inbox --db TMPDIR/coord.db --json claim --agent architecture-reviewer --thread THREAD_CR1
-inbox --db TMPDIR/coord.db --json done --agent architecture-reviewer --thread THREAD_CR1 --summary "Review complete" --body '{"reviewer_role":"architecture-reviewer","findings":[...]}'
-inbox --db TMPDIR/coord.db --json claim --agent implementation-reviewer --thread THREAD_CR2
-inbox --db TMPDIR/coord.db --json done --agent implementation-reviewer --thread THREAD_CR2 --summary "Review complete" --body '{"reviewer_role":"implementation-reviewer","findings":[...]}'
-inbox --db TMPDIR/coord.db --json claim --agent risk-reviewer --thread THREAD_CR3
-inbox --db TMPDIR/coord.db --json done --agent risk-reviewer --thread THREAD_CR3 --summary "Review complete" --body '{"reviewer_role":"risk-reviewer","findings":[...]}'
-orch --db TMPDIR/coord.db --json council wait --run council_blog_001 --timeout-seconds 2
-orch --db TMPDIR/coord.db --json council tally --run council_blog_001 --similarity normal
-orch --db TMPDIR/coord.db --json council report --run council_blog_001
-```
-
-### 预期输出
-
-- `council start` 创建 3 个 reviewer task 并完成 dispatch
-- `council wait` 在 3 个 reviewer 全部完成后返回 `all_complete == true`
-- `council tally` 返回 grouped recommendations，并按 `consensus|majority|minority` 分桶
-- `council report` 返回默认 `show == ["consensus","majority"]`，并产出 markdown artifact
-
-### 断言结论
-
-- council workflow 是建立在 `orch` 调度面之上的高层流程，而不是独立基础设施
-- final report 依赖已持久化的 grouped recommendations，因此 `tally` 与 `report` 必须在契约上连续
diff --git a/docs/tests/repo-memory-skill/README.md b/docs/tests/repo-memory-skill/README.md
deleted file mode 100644
index 47be397..0000000
--- a/docs/tests/repo-memory-skill/README.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# Repo Memory Skill Test Plan
-
-## Purpose
-
-This directory tracks human-readable test plans for the `skills/repo-memory/`
-Codex skill bundle.
-
-These documents are not direct CLI command-contract specs for `repo-memory`.
-That coverage now lives under [../repo-memory/](../repo-memory/).
-
-These documents are also not package-level unit tests for the runtime.
-Those live under `packages/repo-memory-runtime/`.
-
-This directory covers a different surface:
-
-- whether an agent can actually use the packaged `repo-memory` skill
-- whether the bundled `./assets/repo-memory` CLI works inside real skill-guided
-  repository work
-- whether durable repository knowledge is stored and retrieved correctly
-
-## Test Model
-
-- `README.md` is the index for this directory
-- each skill test case lives in its own Markdown file
-- use stable case slugs in filenames
-
-## Shared Execution Contract
-
-Use these defaults unless a case file explicitly overrides them:
-
-- run the scenario with one real agent using the bundled `repo-memory` skill
-- create an isolated temporary directory, repository fixture, and SQLite DB path
-- require the agent to use the bundled `./assets/repo-memory` CLI instead of ad hoc
-  notes
-- validate final database state independently from the main thread after the
-  agent stops
-
-## How An Agent Runs These Cases
-
-Use one test-runner agent to execute each case.
-
-The test-runner agent is responsible for:
-
-- reading this `README.md` first, then one specific case file
-- creating an isolated temporary directory, repository fixture, and SQLite DB path
-- injecting `skills/repo-memory/` into the role agent
-- passing the concrete `SKILL_PATH`, `TMPDIR`, `DB_PATH`, and `REPO_PATH` values from the case file
-- requiring the role agent to use the bundled `./assets/repo-memory` CLI instead of free-form notes
-- collecting the role agent final summary as evidence
-- running the case `Validation Commands` from the main thread after the role agent stops
-- comparing the observed results against `Expected Outcomes` and `Assertions`
-
-The role agent is responsible for:
-
-- acting only within the case scope
-- using the injected `repo-memory` skill rather than ad hoc repository discovery
-- coordinating through the bundled CLI and SQLite DB
-- reporting concrete keys, entry ids, and final observed state back to the test-runner agent
-
-## Default Timeouts
-
-Use these defaults unless a case file explicitly overrides them:
-
-- per-agent timeout: `3m`
-- overall scenario timeout: `4m`
-
-## Default Failure Conditions
-
-Treat the test as failed if any of the following happens:
-
-- the role agent does not reach a final state before timeout
-- a required bundled CLI command returns a non-success result unless the case expects that failure
-- the final repo-memory DB state conflicts with the documented assertions
-- the role agent falls back to free-form notes for durable knowledge that should go through the bundled CLI
-
-## Evidence Capture
-
-Collect at least the following artifacts for every run:
-
-- the role agent final summary
-- the temporary DB path and repository path
-- the outputs of the case `Validation Commands`
-- any resolved entry ids, keys, or relation rows needed to verify the case
-
-## Cleanup Policy
-
-Use these defaults unless a case file explicitly overrides them:
-
-- keep the temporary DB and repo fixture on failure for debugging
-- cleanup on success only if replay artifacts are not needed
-
-## Per-Case Template
-
-Each case file should use this structure:
-
-- `Test Type`
-- `Purpose`
-- `Preconditions`
-- `Inputs`
-- `Execution Parameters`
-- `Execution Steps`
-- `Validation Commands`
-- `Expected Outcomes`
-- `Assertions`
-- `Cleanup`
-- `Recorded Example Run` when a real run has already been captured
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `search-and-add-through-bundled-cli` | [search-and-add-through-bundled-cli.md](./search-and-add-through-bundled-cli.md) | validates that an agent can miss on search, add one durable entry, then retrieve it through the packaged `repo-memory` skill |
-| `ingest-and-search-through-bundled-cli` | [ingest-and-search-through-bundled-cli.md](./ingest-and-search-through-bundled-cli.md) | validates that an agent can ingest `docs/ai` markdown through the bundled CLI and then retrieve imported knowledge through search and list |
-| `verify-downgrade-after-file-change-through-bundled-cli` | [verify-downgrade-after-file-change-through-bundled-cli.md](./verify-downgrade-after-file-change-through-bundled-cli.md) | validates that an agent can record confirmed knowledge, mutate the tracked file, run verify, and observe a `needs_review` downgrade |
-| `verify-stale-missing-hard-dependency-through-bundled-cli` | [verify-stale-missing-hard-dependency-through-bundled-cli.md](./verify-stale-missing-hard-dependency-through-bundled-cli.md) | validates that an agent can detect a missing hard dependency through `verify` and observe a `stale` result |
-| `link-two-entries-through-bundled-cli` | [link-two-entries-through-bundled-cli.md](./link-two-entries-through-bundled-cli.md) | validates that an agent can add two entries, link them, and leave a durable relation in the packaged repo-memory database |
-
-## Scope
-
-In scope:
-
-- explicit `$repo-memory` skill invocation
-- bundled `./assets/repo-memory` CLI usage
-- durable knowledge add/search/list/event flows
-- markdown ingest through `docs/ai`
-- verify downgrade and stale transitions
-- entry relation/link flows
-- package-backed SQLite memory database behavior as surfaced through the skill
-
-Out of scope:
-
-- direct CLI contract coverage that now belongs under [../repo-memory/](../repo-memory/)
-- package-level unit tests for `packages/repo-memory-runtime`
-- future auto-export flows such as `repo-brief` generation
-- implicit skill triggering without `$repo-memory`
diff --git a/docs/tests/repo-memory-skill/ingest-and-search-through-bundled-cli.md b/docs/tests/repo-memory-skill/ingest-and-search-through-bundled-cli.md
deleted file mode 100644
index 6a02d6d..0000000
--- a/docs/tests/repo-memory-skill/ingest-and-search-through-bundled-cli.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# Ingest And Search Through Bundled CLI
-
-## Test Type
-
-- forward skill execution
-
-## Purpose
-
-- validate that a single agent can use `skills/repo-memory/` to ingest
-  repository-local `docs/ai` markdown through the bundled CLI and retrieve the
-  imported knowledge afterwards through `search` and `list`
-
-## Preconditions
-
-- `skills/repo-memory/assets/repo-memory` exists and is executable
-- the test runner can create a temporary Git repository fixture
-- the test runner can create a temporary SQLite DB path
-- the repository fixture includes one `docs/ai/repo-memory.md` file with at
-  least `Module Map` and `Danger Zones` sections
-
-## Inputs
-
-- `SKILL_PATH=/.../skills/repo-memory`
-- `TMPDIR=/tmp/...`
-- `DB_PATH=TMPDIR/repo-memory.db`
-- `REPO_PATH=TMPDIR/repo-fixture`
-
-## Execution Parameters
-
-- one agent only
-- per-agent timeout: `3m`
-- overall timeout: `4m`
-
-## Execution Steps
-
-1. Create a temporary Git repository fixture under `REPO_PATH`.
-2. Add `docs/ai/repo-memory.md` with markdown content that describes module and
-   danger knowledge.
-3. Ask the agent to use `$repo-memory` against `DB_PATH`.
-4. Have the agent initialize or bootstrap the DB as needed, run `ingest`
-   against `REPO_PATH`, then use `search` and `list` to confirm the imported
-   knowledge is visible.
-5. Capture the agent summary and the concrete imported entry keys it reports.
-
-## Validation Commands
-
-Run these from the main thread after the agent stops:
-
-```bash
-SKILL_PATH/assets/repo-memory ingest --db DB_PATH --repo REPO_PATH
-SKILL_PATH/assets/repo-memory search --db DB_PATH --repo REPO_PATH --query "gateway"
-SKILL_PATH/assets/repo-memory list --db DB_PATH --repo REPO_PATH
-```
-
-## Expected Outcomes
-
-- `ingest` succeeds and reports one imported markdown document
-- `search` returns the imported `module` entry for the `Module Map` section
-- `list` returns at least one `module` entry and one `danger` entry for the
-  fixture repo
-
-## Assertions
-
-- the agent used the bundled CLI instead of copying markdown into ad hoc notes
-- the imported knowledge is attached to the target repo path
-- the imported keys match the expected `repo-memory:<slug>` style generated from
-  the markdown sections
-
-## Cleanup
-
-- keep the temporary DB and repo on failure
-- remove temporary artifacts on success only if replay evidence is not needed
diff --git a/docs/tests/repo-memory-skill/link-two-entries-through-bundled-cli.md b/docs/tests/repo-memory-skill/link-two-entries-through-bundled-cli.md
deleted file mode 100644
index 90fc1ce..0000000
--- a/docs/tests/repo-memory-skill/link-two-entries-through-bundled-cli.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Link Two Entries Through Bundled CLI
-
-## Test Type
-
-- forward skill execution
-
-## Purpose
-
-- validate that a single agent can use `skills/repo-memory/` to add two durable
-  knowledge entries, create a relation between them through the bundled CLI,
-  and leave a durable graph edge in the SQLite database
-
-## Preconditions
-
-- `skills/repo-memory/assets/repo-memory` exists and is executable
-- the test runner can create a temporary Git repository fixture
-- the test runner can create a temporary SQLite DB path
-- the repository fixture includes any evidence files needed for the two entries
-
-## Inputs
-
-- `SKILL_PATH=/.../skills/repo-memory`
-- `TMPDIR=/tmp/...`
-- `DB_PATH=TMPDIR/repo-memory.db`
-- `REPO_PATH=TMPDIR/repo-fixture`
-
-## Execution Parameters
-
-- one agent only
-- per-agent timeout: `3m`
-- overall timeout: `4m`
-
-## Execution Steps
-
-1. Create a temporary Git repository fixture under `REPO_PATH`.
-2. Add any files needed to justify two durable knowledge entries.
-3. Ask the agent to use `$repo-memory` against `DB_PATH`.
-4. Have the agent add one `term` entry and one `chain` entry for the same repo.
-5. Have the agent link the first entry to the second with relation
-   `related_to`.
-6. Capture the agent summary and the concrete entry ids it reports.
-
-## Validation Commands
-
-Run these from the main thread after the agent stops:
-
-```bash
-SKILL_PATH/assets/repo-memory list --db DB_PATH --repo REPO_PATH
-SKILL_PATH/assets/repo-memory events --db DB_PATH --id 1
-SKILL_PATH/assets/repo-memory events --db DB_PATH --id 2
-sqlite3 DB_PATH "SELECT relation FROM knowledge_links WHERE from_entry_id = 1 AND to_entry_id = 2;"
-```
-
-## Expected Outcomes
-
-- both `add` calls succeed and leave two queryable entries
-- `link` succeeds and reports the relation textually
-- the final SQL validation returns one `related_to` row
-
-## Assertions
-
-- the agent used the bundled CLI for entry creation and relation creation
-- the relation is durable in the packaged SQLite DB, not just mentioned in the summary
-- both entries remain independently inspectable through `events`
-
-## Cleanup
-
-- keep the temporary DB and repo on failure
-- remove temporary artifacts on success only if replay evidence is not needed
diff --git a/docs/tests/repo-memory-skill/search-and-add-through-bundled-cli.md b/docs/tests/repo-memory-skill/search-and-add-through-bundled-cli.md
deleted file mode 100644
index 025bc87..0000000
--- a/docs/tests/repo-memory-skill/search-and-add-through-bundled-cli.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Search And Add Through Bundled CLI
-
-## Test Type
-
-- forward skill execution
-
-## Purpose
-
-- validate that a single agent can use `skills/repo-memory/` to search an empty
-  memory DB, write one durable entry through the bundled CLI, and retrieve the
-  same knowledge afterwards
-
-## Preconditions
-
-- `skills/repo-memory/assets/repo-memory` exists and is executable
-- the test runner can create a temporary Git repository fixture
-- the test runner can create a temporary SQLite DB path
-
-## Inputs
-
-- `SKILL_PATH=/.../skills/repo-memory`
-- `TMPDIR=/tmp/...`
-- `DB_PATH=TMPDIR/repo-memory.db`
-- `REPO_PATH=TMPDIR/repo-fixture`
-
-## Execution Parameters
-
-- one agent only
-- per-agent timeout: `3m`
-- overall timeout: `4m`
-
-## Execution Steps
-
-1. Create a temporary Git repository fixture under `REPO_PATH`.
-2. Add one file that will serve as evidence for the durable knowledge entry.
-3. Ask the agent to use `$repo-memory` against `DB_PATH`.
-4. Have the agent initialize the DB, search for a key that does not yet exist,
-   add one `term` entry with evidence, then search again for the same key.
-5. Capture the agent summary and the concrete entry key used.
-
-## Validation Commands
-
-Run these from the main thread after the agent stops:
-
-```bash
-SKILL_PATH/assets/repo-memory init --db DB_PATH
-SKILL_PATH/assets/repo-memory search --db DB_PATH --repo REPO_PATH --query "plan task"
-SKILL_PATH/assets/repo-memory list --db DB_PATH --repo REPO_PATH --kind term
-SKILL_PATH/assets/repo-memory events --db DB_PATH --id 1
-```
-
-## Expected Outcomes
-
-- the first search misses before the entry is written
-- the `add` command succeeds and creates entry `1`
-- the second search returns the new `term`
-- `list` returns exactly one `term` entry for the fixture repo
-- `events` includes a `created` event for the new entry
-
-## Assertions
-
-- the stored entry key matches the one the agent added
-- the stored entry summary matches the durable fact the agent recorded
-- the stored entry is linked to the target repo path
-- the agent used the bundled CLI rather than free-form notes
-
-## Cleanup
-
-- keep the temporary DB and repo on failure
-- remove temporary artifacts on success only if replay evidence is not needed
diff --git a/docs/tests/repo-memory-skill/verify-downgrade-after-file-change-through-bundled-cli.md b/docs/tests/repo-memory-skill/verify-downgrade-after-file-change-through-bundled-cli.md
deleted file mode 100644
index 0b82180..0000000
--- a/docs/tests/repo-memory-skill/verify-downgrade-after-file-change-through-bundled-cli.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# Verify Downgrade After File Change Through Bundled CLI
-
-## Test Type
-
-- forward skill execution
-
-## Purpose
-
-- validate that a single agent can use `skills/repo-memory/` to record
-  confirmed knowledge with a hard file dependency, change that file, run
-  `verify`, and observe the expected `needs_review` downgrade
-
-## Preconditions
-
-- `skills/repo-memory/assets/repo-memory` exists and is executable
-- the test runner can create a temporary Git repository fixture
-- the repository fixture contains one evidence file committed in Git before the
-  agent starts
-- the test runner can modify the evidence file before or during the scenario
-
-## Inputs
-
-- `SKILL_PATH=/.../skills/repo-memory`
-- `TMPDIR=/tmp/...`
-- `DB_PATH=TMPDIR/repo-memory.db`
-- `REPO_PATH=TMPDIR/repo-fixture`
-- `EVIDENCE_PATH=REPO_PATH/foo.txt`
-
-## Execution Parameters
-
-- one agent only
-- per-agent timeout: `3m`
-- overall timeout: `4m`
-
-## Execution Steps
-
-1. Create a temporary Git repository fixture under `REPO_PATH`.
-2. Commit one evidence file at `EVIDENCE_PATH`.
-3. Ask the agent to use `$repo-memory` against `DB_PATH`.
-4. Have the agent add one `confirmed` entry that depends on `EVIDENCE_PATH`.
-5. Mutate `EVIDENCE_PATH` after the entry is recorded.
-6. Have the agent run `verify`, then inspect the result with `list` and
-   `events`.
-7. Capture the agent summary and the final entry status it reports.
-
-## Validation Commands
-
-Run these from the main thread after the agent stops:
-
-```bash
-SKILL_PATH/assets/repo-memory verify --db DB_PATH --repo REPO_PATH
-SKILL_PATH/assets/repo-memory list --db DB_PATH --repo REPO_PATH --status needs_review
-SKILL_PATH/assets/repo-memory events --db DB_PATH --id 1
-```
-
-## Expected Outcomes
-
-- `verify` reports one downgraded entry
-- `list` returns the target entry in `needs_review`
-- `events` includes a `downgraded` event for the target entry
-
-## Assertions
-
-- the agent used the bundled CLI for both the write and the verification flow
-- the downgrade reason is driven by real repository state, not by chat-only reasoning
-- the final state transition is visible both in the current listing and the event history
-
-## Cleanup
-
-- keep the temporary DB and repo on failure
-- remove temporary artifacts on success only if replay evidence is not needed
diff --git a/docs/tests/repo-memory-skill/verify-stale-missing-hard-dependency-through-bundled-cli.md b/docs/tests/repo-memory-skill/verify-stale-missing-hard-dependency-through-bundled-cli.md
deleted file mode 100644
index 08d0ae3..0000000
--- a/docs/tests/repo-memory-skill/verify-stale-missing-hard-dependency-through-bundled-cli.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Verify Stale Missing Hard Dependency Through Bundled CLI
-
-## Test Type
-
-- forward skill execution
-
-## Purpose
-
-- validate that a single agent can use `skills/repo-memory/` to record
-  confirmed knowledge with a missing hard dependency, run `verify`, and observe
-  the expected `stale` outcome
-
-## Preconditions
-
-- `skills/repo-memory/assets/repo-memory` exists and is executable
-- the test runner can create a temporary Git repository fixture
-- the repository fixture has a valid Git HEAD before verification starts
-- the hard dependency path referenced by the entry does not exist
-
-## Inputs
-
-- `SKILL_PATH=/.../skills/repo-memory`
-- `TMPDIR=/tmp/...`
-- `DB_PATH=TMPDIR/repo-memory.db`
-- `REPO_PATH=TMPDIR/repo-fixture`
-- `MISSING_PATH=REPO_PATH/missing.txt`
-
-## Execution Parameters
-
-- one agent only
-- per-agent timeout: `3m`
-- overall timeout: `4m`
-
-## Execution Steps
-
-1. Create a temporary Git repository fixture under `REPO_PATH` and ensure it
-   has an initial commit.
-2. Ask the agent to use `$repo-memory` against `DB_PATH`.
-3. Have the agent add one `confirmed` entry that declares `MISSING_PATH` as a
-   hard dependency.
-4. Have the agent run `verify`, then inspect the result with `list` and
-   `events`.
-5. Capture the agent summary and the final entry status it reports.
-
-## Validation Commands
-
-Run these from the main thread after the agent stops:
-
-```bash
-SKILL_PATH/assets/repo-memory verify --db DB_PATH --repo REPO_PATH
-SKILL_PATH/assets/repo-memory list --db DB_PATH --repo REPO_PATH --status stale
-SKILL_PATH/assets/repo-memory events --db DB_PATH --id 1
-```
-
-## Expected Outcomes
-
-- `verify` reports one stale entry
-- `list` returns the target entry in `stale`
-- `events` includes a `marked_stale` event for the target entry
-
-## Assertions
-
-- the agent used the bundled CLI for the full verify flow
-- the stale result is driven by the missing hard dependency, not by a generic command failure
-- the final state is visible in both current listing output and event history
-
-## Cleanup
-
-- keep the temporary DB and repo on failure
-- remove temporary artifacts on success only if replay evidence is not needed
diff --git a/docs/tests/repo-memory/README.md b/docs/tests/repo-memory/README.md
deleted file mode 100644
index cae5353..0000000
--- a/docs/tests/repo-memory/README.md
+++ /dev/null
@@ -1,94 +0,0 @@
-# Repo Memory Markdown Test Plan
-
-## Purpose
-
-This directory contains the human-readable Markdown test plan for the
-`repo-memory` CLI.
-
-It complements package-level Go tests. The goal is to preserve the user-visible
-command contract in a form that can be reviewed, extended, and replayed without
-re-deriving behavior from implementation code.
-
-## Directory Rules
-
-- one folder per `repo-memory` command or shared area
-- each folder keeps a `README.md` entrypoint
-- command folders use `README.md` as an index only
-- each command test case lives in its own Markdown file named after the case slug
-- no numeric test IDs
-- each command case is identified by its concrete file path
-
-Case file naming pattern:
-
-```text
-<case-slug>.md
-```
-
-## Authoring Principles
-
-- focus on externally visible CLI behavior rather than store internals
-- prefer stable command sequences that a new agent can replay against a temp repo and SQLite DB
-- document both success contracts and failure boundaries
-- reuse scenarios from existing Go tests before inventing new cases
-- keep terminology consistent with the CLI: repo, entry, alias, dependency, event, verify, stale, and needs_review
-
-## Common Execution Model
-
-Most cases in this directory assume the same baseline:
-
-1. create an isolated temporary directory
-2. create a Git repository fixture such as `TMPDIR/repo`
-3. choose a database path such as `TMPDIR/repo-memory.db`
-4. run `repo-memory init --db TMPDIR/repo-memory.db`
-5. run the target command sequence against that database and repository
-
-Unless a case says otherwise:
-
-- assertions should check both exit code and human-readable stdout or stderr
-- repo-scoped cases should use an actual Git repo so `verified_on_commit` and repo registration are meaningful
-- `add` and `ingest` may bootstrap schema automatically, but most read commands assume `init` already ran
-
-## Folder Map
-
-- `README.md`: global conventions and glossary
-- `ROADMAP.md`: document progress, planned case backlog, and authored-case register
-- `_shared/README.md`: reusable fixtures, output assertions, exit-code rules, and repo conventions
-- `workflows/README.md`: cross-command end-to-end scenarios
-- per-command folders: command-specific index `README.md` files plus one case document per test case
-
-## Glossary
-
-- `repo`: one tracked repository root stored in the memory database
-- `entry`: one durable knowledge record keyed by repo, kind, key, and optional scope
-- `alias`: an alternate search term attached to one entry
-- `dependency`: a file, dir, or glob evidence locator used during verification
-- `event`: a historical record such as `created`, `updated`, `downgraded`, or `marked_stale`
-- `needs_review`: knowledge that may still be useful but should be re-checked
-- `stale`: knowledge that no longer has valid hard evidence and should not be treated as current
-
-## Relationship To Automated Tests
-
-The current executable references are:
-
-- [init_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/init_integration_test.go) for `init`
-- [add_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/add_integration_test.go) for `add`
-- [ingest_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/ingest_integration_test.go) for `ingest`
-- [search_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/search_integration_test.go) for `search`
-- [list_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/list_integration_test.go) for `list`
-- [events_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/events_integration_test.go) for `events`
-- [link_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/link_integration_test.go) for `link`
-- [verify_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/verify_integration_test.go) for `verify`
-- [repos_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/repos_integration_test.go) for `repos`
-- [workflow_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/workflow_integration_test.go) for the four documented workflow cases
-- [store_test.go](../../../packages/repo-memory-runtime/internal/store/store_test.go) for import, search, alias, dependency, link, and verification-state transitions
-- [load_test.go](../../../packages/repo-memory-runtime/internal/documents/load_test.go) for markdown parsing
-- [main_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/main_test.go) for verify downgrade heuristics
-
-These tests do not replace the Markdown plan. They are the executable companion
-to it.
-
-When this Markdown plan expands:
-
-- prefer matching an existing automated scenario first
-- record any additional manual-only CLI contract coverage explicitly in the relevant command case file
-- keep [ROADMAP.md](./ROADMAP.md) synchronized with authored files and case slugs
diff --git a/docs/tests/repo-memory/ROADMAP.md b/docs/tests/repo-memory/ROADMAP.md
deleted file mode 100644
index 9cbe8f7..0000000
--- a/docs/tests/repo-memory/ROADMAP.md
+++ /dev/null
@@ -1,311 +0,0 @@
-# Repo Memory Test Documentation Roadmap
-
-## Purpose
-
-This roadmap tracks the human-readable Markdown test plan for `repo-memory`.
-
-It exists so a new agent can immediately answer four questions without
-re-reading the whole codebase:
-
-- which test-plan documents already exist
-- which cases have already been written down
-- which cases are still missing
-- what file should be updated next
-
-This roadmap is for the Markdown test-plan set under `docs/tests/repo-memory/`.
-It is not a replacement for automated Go tests.
-
-## Current Snapshot
-
-Snapshot date:
-
-- `2026-03-20`
-
-Current state:
-
-- `repo-memory` CLI is implemented for `init`, `add`, `ingest`, `search`, `list`, `events`, `link`, `verify`, and `repos`
-- package-local automated Go tests now cover every currently documented `repo-memory` command case and workflow case through dedicated CLI integration tests, plus the existing markdown parser and store-level tests
-- this roadmap now exists under `docs/tests/repo-memory/ROADMAP.md`
-- all planned global, shared, workflow, command-index, and command-case Markdown documents in the current `repo-memory` test-plan set have been authored
-- each implemented `repo-memory` command folder now uses `README.md` as an index plus one Markdown file per planned case
-- `docs/tests/repo-memory-skill/` can now stay focused on skill-forward behavior while `docs/tests/repo-memory/` owns direct CLI contract coverage
-- a follow-up edge audit on `2026-03-20` identified seven additional boundary cases, and those cases are now authored in the Markdown plan
-- `verify` automation also exposed and fixed a store-level deadlock in `ListVerifyCandidates`, so the documented verify workflows now execute end-to-end in tests
-
-Progress summary for planned test-plan documents, excluding `ROADMAP.md`:
-
-- planned document files: `39`
-- authored document files: `39`
-- planned case slugs in this roadmap: `31`
-- authored case slugs in this roadmap: `31`
-
-## Scope
-
-In scope:
-
-- `repo-memory init`
-- `repo-memory add`
-- `repo-memory ingest`
-- `repo-memory search`
-- `repo-memory list`
-- `repo-memory events`
-- `repo-memory link`
-- `repo-memory verify`
-- `repo-memory repos`
-- cross-command workflows
-- shared test conventions for text output, exit codes, temp repos, repo fixtures, and read-only DB inspection where the CLI has no inspection surface
-
-Out of scope:
-
-- `skills/repo-memory/` forward execution behavior except as a related reference
-- implementation details that are not visible through the `repo-memory` CLI contract
-- future commands that are not currently implemented
-
-## Tracking Rules
-
-Directory model:
-
-- one folder per command or shared area
-- each folder keeps a `README.md` entrypoint
-- command folders use `README.md` as an index only
-- each command case lives in its own Markdown file named after the case slug
-- cross-command workflow cases remain grouped in `docs/tests/repo-memory/workflows/README.md`
-
-Case identity:
-
-- do not use numeric IDs
-- identify each command case by its concrete file path
-- identify each workflow case by `path + case slug`
-- command case file naming pattern:
-
-```text
-<case-slug>.md
-```
-
-- workflow case heading pattern:
-
-```md
-## case: add-search-events-roundtrip
-```
-
-Per-case structure inside the case document:
-
-- `用例意义`
-- `前置条件`
-- `输入`
-- `预期输出`
-- `断言结论`
-
-How to update this roadmap when a new case is written:
-
-1. if it is a command case, create or update the target `<case-slug>.md` file under the relevant command folder
-2. if it is a command case, add or update the entry in that folder `README.md` index
-3. if it is a workflow case, add or update the case inside `docs/tests/repo-memory/workflows/README.md`
-4. move the case slug from `Pending Case Backlog` to `Authored Case Register`
-5. update the authored counts in `Current Snapshot`
-6. if a new Markdown file is created, update `Document Progress`
-
-Allowed status values in this roadmap:
-
-- `pending`
-- `in_progress`
-- `done`
-- `deferred`
-
-## Existing Automated Coverage Reference
-
-The Markdown test-plan set starts from explicit CLI contract docs, but these
-automated tests already exist and should be used as source material when
-writing the docs:
-
-- [init_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/init_integration_test.go) for `init`
-- [add_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/add_integration_test.go) for `add`
-- [ingest_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/ingest_integration_test.go) for `ingest`
-- [search_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/search_integration_test.go) for `search`
-- [list_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/list_integration_test.go) for `list`
-- [events_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/events_integration_test.go) for `events`
-- [link_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/link_integration_test.go) for `link`
-- [verify_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/verify_integration_test.go) for `verify`
-- [repos_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/repos_integration_test.go) for `repos`
-- [workflow_integration_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/workflow_integration_test.go) for the documented workflow cases
-- [store_test.go](../../../packages/repo-memory-runtime/internal/store/store_test.go#L11) `TestImportDocumentAndSearch`
-- [store_test.go](../../../packages/repo-memory-runtime/internal/store/store_test.go#L67) `TestUpsertEntryWithAliasesAndDependencies`
-- [store_test.go](../../../packages/repo-memory-runtime/internal/store/store_test.go#L178) `TestApplyVerificationResult`
-- [load_test.go](../../../packages/repo-memory-runtime/internal/documents/load_test.go#L10) `TestParseFile`
-- [main_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/main_test.go#L12) `TestVerifyCandidateDetectsFileChange`
-- [main_test.go](../../../packages/repo-memory-runtime/cmd/repo-memory/main_test.go#L50) `TestVerifyCandidateMarksMissingDependencyStale`
-
-These tests do not remove the need for the Markdown plan. They only reduce
-discovery work.
-
-## Planned Directory Tree
-
-```text
-docs/tests/repo-memory/
-  ROADMAP.md
-  README.md
-  _shared/
-    README.md
-  workflows/
-    README.md
-  init/
-    README.md
-    <case-slug>.md
-  add/
-    README.md
-    <case-slug>.md
-  ingest/
-    README.md
-    <case-slug>.md
-  search/
-    README.md
-    <case-slug>.md
-  list/
-    README.md
-    <case-slug>.md
-  events/
-    README.md
-    <case-slug>.md
-  link/
-    README.md
-    <case-slug>.md
-  verify/
-    README.md
-    <case-slug>.md
-  repos/
-    README.md
-    <case-slug>.md
-```
-
-## Document Progress
-
-| Path | Purpose | Planned Cases | Authored Cases | Status |
-| --- | --- | ---: | ---: | --- |
-| `docs/tests/repo-memory/README.md` | Global testing conventions and glossary | 0 | 0 | done |
-| `docs/tests/repo-memory/_shared/README.md` | Shared fixtures, output assertions, exit codes, and repo rules | 0 | 0 | done |
-| `docs/tests/repo-memory/workflows/README.md` | Cross-command scenarios | 4 | 4 | done |
-| `docs/tests/repo-memory/init/README.md` | `init` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/init/init-creates-schema-on-empty-db.md` | `init` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/init/init-is-idempotent-on-existing-db.md` | `init` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/add/README.md` | `add` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/add/add-registers-repo-and-entry.md` | `add` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/add/add-updates-existing-entry-on-same-kind-and-key.md` | `add` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/add/add-failed-validation-still-registers-repo.md` | `add` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/ingest/README.md` | `ingest` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/ingest/ingest-imports-docs-ai-markdown.md` | `ingest` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/ingest/ingest-rejects-when-no-markdown-found.md` | `ingest` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/ingest/ingest-imports-headingless-markdown-as-single-entry.md` | `ingest` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/search/README.md` | `search` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/search/search-returns-matching-entry-snippet.md` | `search` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/search/search-matches-alias-with-repo-filter.md` | `search` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/search/search-returns-no-results-when-empty.md` | `search` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/search/search-rejects-missing-query.md` | `search` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/list/README.md` | `list` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/list/list-filters-by-kind-and-status.md` | `list` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/list/list-returns-no-entries-when-empty.md` | `list` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/events/README.md` | `events` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/events/events-reads-history-by-id.md` | `events` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/events/events-resolves-entry-by-repo-kind-key.md` | `events` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/events/events-rejects-missing-entry-selector.md` | `events` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/link/README.md` | `link` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/link/link-creates-relation-between-entries.md` | `link` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/link/link-rejects-missing-relation.md` | `link` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/link/link-rejects-when-entry-id-missing.md` | `link` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/verify/README.md` | `verify` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/verify/verify-downgrades-changed-file-dependency.md` | `verify` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/verify/verify-marks-missing-hard-dependency-stale.md` | `verify` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/verify/verify-prints-no-repos-when-empty.md` | `verify` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/verify/verify-skips-explicit-repo-without-git-head.md` | `verify` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/verify/verify-downgrades-entry-missing-verified-on-commit.md` | `verify` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/repos/README.md` | `repos` command case index | 0 | 0 | done |
-| `docs/tests/repo-memory/repos/repos-lists-tracked-repositories.md` | `repos` command case | 1 | 1 | done |
-| `docs/tests/repo-memory/repos/repos-prints-no-repos-when-empty.md` | `repos` command case | 1 | 1 | done |
-
-## Authoring Order
-
-Recommended order:
-
-1. `docs/tests/repo-memory/README.md`
-2. `docs/tests/repo-memory/_shared/README.md`
-3. `docs/tests/repo-memory/workflows/README.md`
-4. `docs/tests/repo-memory/add/README.md` plus its linked case files
-5. `docs/tests/repo-memory/ingest/README.md` plus its linked case files
-6. `docs/tests/repo-memory/search/README.md` plus its linked case files
-7. `docs/tests/repo-memory/verify/README.md` plus its linked case files
-8. the remaining command indexes and case files
-
-Reason:
-
-- the workflow file captures the highest-value repo-memory lifecycle behavior first
-- command documents can then reuse shared conventions and already-fixed terminology
-
-## Edge Audit Notes
-
-Per-command review status after auditing the current CLI source and authored docs:
-
-- `init`: current coverage is sufficient for now; no additional high-value boundary was found beyond ordinary filesystem failure paths
-- `add`: now covers both normal upsert flow and the surprising zero-entry repo side effect on failed validation
-- `ingest`: now covers both ordinary section import and the headingless fallback-entry parser path
-- `search`: now covers both positive lookup behavior and the required `--query` failure contract
-- `list`: current coverage is acceptable for now; remaining gaps are mostly lower-priority default-limit behavior and shared uninitialized-schema behavior
-- `events`: now covers both selector styles and the missing-selector failure contract
-- `link`: now covers both successful relation writes and missing-input rejection for ids and relation
-- `verify`: now covers changed-file downgrade, missing-hard-dependency stale, empty-repo set, explicit skip without Git HEAD, and missing `verified_on_commit`
-- `repos`: current coverage is acceptable for now; the zero-entry repo side effect is intentionally documented from the `add` boundary case
-
-## Authored Case Register
-
-| Path | Case Slug | Coverage Note | Status |
-| --- | --- | --- | --- |
-| `docs/tests/repo-memory/workflows/README.md` | `add-search-events-roundtrip` | end-to-end write, search, and history roundtrip for one durable entry | done |
-| `docs/tests/repo-memory/workflows/README.md` | `ingest-search-list-across-sections` | markdown ingest surfaces multiple imported entries through search and list | done |
-| `docs/tests/repo-memory/workflows/README.md` | `add-link-and-resolve-related-entry` | two entries are linked and the relation is persisted | done |
-| `docs/tests/repo-memory/workflows/README.md` | `verify-downgrades-after-repo-change` | repo changes propagate into downgraded or stale durable knowledge | done |
-| `docs/tests/repo-memory/init/init-creates-schema-on-empty-db.md` | `init-creates-schema-on-empty-db` | initializes an empty database path and prints the initialized path | done |
-| `docs/tests/repo-memory/init/init-is-idempotent-on-existing-db.md` | `init-is-idempotent-on-existing-db` | repeated init succeeds on the same database path | done |
-| `docs/tests/repo-memory/add/add-registers-repo-and-entry.md` | `add-registers-repo-and-entry` | auto-bootstraps schema, registers the repo, and creates one durable entry | done |
-| `docs/tests/repo-memory/add/add-updates-existing-entry-on-same-kind-and-key.md` | `add-updates-existing-entry-on-same-kind-and-key` | repeated upsert reuses the same entry id and records an update event | done |
-| `docs/tests/repo-memory/add/add-failed-validation-still-registers-repo.md` | `add-failed-validation-still-registers-repo` | failed validation still leaves a zero-entry repo row visible through `repos` | done |
-| `docs/tests/repo-memory/ingest/ingest-imports-docs-ai-markdown.md` | `ingest-imports-docs-ai-markdown` | imports markdown sections under `docs/ai` as confirmed knowledge entries | done |
-| `docs/tests/repo-memory/ingest/ingest-rejects-when-no-markdown-found.md` | `ingest-rejects-when-no-markdown-found` | rejects an empty scan tree with a stable error message | done |
-| `docs/tests/repo-memory/ingest/ingest-imports-headingless-markdown-as-single-entry.md` | `ingest-imports-headingless-markdown-as-single-entry` | headingless markdown falls back to one imported entry instead of being skipped | done |
-| `docs/tests/repo-memory/search/search-returns-matching-entry-snippet.md` | `search-returns-matching-entry-snippet` | returns a ranked text result with status and snippet lines | done |
-| `docs/tests/repo-memory/search/search-matches-alias-with-repo-filter.md` | `search-matches-alias-with-repo-filter` | matches alias terms while narrowing by repo substring | done |
-| `docs/tests/repo-memory/search/search-returns-no-results-when-empty.md` | `search-returns-no-results-when-empty` | prints `no results` for an empty search result set | done |
-| `docs/tests/repo-memory/search/search-rejects-missing-query.md` | `search-rejects-missing-query` | rejects empty search invocation without `--query` | done |
-| `docs/tests/repo-memory/list/list-filters-by-kind-and-status.md` | `list-filters-by-kind-and-status` | narrows entries by repo substring, kind, and status | done |
-| `docs/tests/repo-memory/list/list-returns-no-entries-when-empty.md` | `list-returns-no-entries-when-empty` | prints `no entries` when the filtered result set is empty | done |
-| `docs/tests/repo-memory/events/events-reads-history-by-id.md` | `events-reads-history-by-id` | prints newest-first history for one entry id | done |
-| `docs/tests/repo-memory/events/events-resolves-entry-by-repo-kind-key.md` | `events-resolves-entry-by-repo-kind-key` | resolves an entry without `--id` when repo, kind, and key are provided | done |
-| `docs/tests/repo-memory/events/events-rejects-missing-entry-selector.md` | `events-rejects-missing-entry-selector` | rejects calls that omit both `--id` and `--repo+--kind+--key` | done |
-| `docs/tests/repo-memory/link/link-creates-relation-between-entries.md` | `link-creates-relation-between-entries` | persists one directed relation between two existing entries | done |
-| `docs/tests/repo-memory/link/link-rejects-missing-relation.md` | `link-rejects-missing-relation` | rejects empty relation input before write | done |
-| `docs/tests/repo-memory/link/link-rejects-when-entry-id-missing.md` | `link-rejects-when-entry-id-missing` | rejects link requests without both entry ids | done |
-| `docs/tests/repo-memory/verify/verify-downgrades-changed-file-dependency.md` | `verify-downgrades-changed-file-dependency` | downgrades a confirmed entry to `needs_review` when a tracked file changed | done |
-| `docs/tests/repo-memory/verify/verify-marks-missing-hard-dependency-stale.md` | `verify-marks-missing-hard-dependency-stale` | marks a confirmed entry `stale` when a hard dependency disappears | done |
-| `docs/tests/repo-memory/verify/verify-prints-no-repos-when-empty.md` | `verify-prints-no-repos-when-empty` | prints `no repos` when the initialized DB has no tracked repositories | done |
-| `docs/tests/repo-memory/verify/verify-skips-explicit-repo-without-git-head.md` | `verify-skips-explicit-repo-without-git-head` | explicit repo verification reports a skip when the repo is not a Git repo or has no HEAD | done |
-| `docs/tests/repo-memory/verify/verify-downgrades-entry-missing-verified-on-commit.md` | `verify-downgrades-entry-missing-verified-on-commit` | entries created without `verified_on_commit` downgrade to `needs_review` once the repo becomes verifiable | done |
-| `docs/tests/repo-memory/repos/repos-lists-tracked-repositories.md` | `repos-lists-tracked-repositories` | lists every tracked repository with entry counts and update timestamps | done |
-| `docs/tests/repo-memory/repos/repos-prints-no-repos-when-empty.md` | `repos-prints-no-repos-when-empty` | prints `no repos` for an initialized but unused database | done |
-
-## Pending Case Backlog
-
-No pending case slugs remain in the current plan.
-
-When a new CLI contract or workflow needs coverage:
-
-1. if it is a command case, create a new `<case-slug>.md` file under the relevant command folder and add it to that folder `README.md` index
-2. if it is a workflow case, add it to `docs/tests/repo-memory/workflows/README.md`
-3. add the new slug to `Authored Case Register`
-4. update `Current Snapshot` and `Document Progress`
-
-## Definition Of Done
-
-This roadmap is complete only when all of the following are true:
-
-- every implemented `repo-memory` command has a corresponding document folder
-- each planned command index and case document exists
-- each pending case slug has been either authored or explicitly deferred
-- the authored-case register matches the actual Markdown files on disk
-- a new agent can pick any pending case and know exactly where it should be written
diff --git a/docs/tests/repo-memory/_shared/README.md b/docs/tests/repo-memory/_shared/README.md
deleted file mode 100644
index 8308ec1..0000000
--- a/docs/tests/repo-memory/_shared/README.md
+++ /dev/null
@@ -1,137 +0,0 @@
-# Repo Memory Shared Test Conventions
-
-## Purpose
-
-This document captures shared assumptions used by multiple `repo-memory`
-test-plan documents so command and workflow files can stay focused on behavior
-instead of repeating setup boilerplate.
-
-## Recommended Fixture Shape
-
-Use an isolated temp workspace per case:
-
-- database path: `TMPDIR/repo-memory.db`
-- repository path: `TMPDIR/repo`
-- optional second repository path: `TMPDIR/repo-b`
-- default ingest path: `TMPDIR/repo/docs/ai`
-- optional evidence file: `TMPDIR/repo/path/to/file`
-
-Recommended bootstrap command:
-
-```bash
-repo-memory init --db TMPDIR/repo-memory.db
-```
-
-Recommended repo bootstrap for Git-aware cases:
-
-```bash
-git -C TMPDIR/repo init
-git -C TMPDIR/repo config user.email test@example.com
-git -C TMPDIR/repo config user.name Tester
-git -C TMPDIR/repo add .
-git -C TMPDIR/repo commit -m "init"
-```
-
-## Flag Model
-
-`repo-memory` uses one subcommand-specific flag set per command. There is no
-root `--json` mode and no shared global flag parser.
-
-Common flags across multiple commands:
-
-- `--db`: SQLite database path
-- `--repo`: repository root or repo-path substring filter depending on command
-- `--limit`: result limit on read commands
-
-Case files should call out whether `--repo` is:
-
-- a required absolute repo root for `add` and `ingest`
-- an optional substring filter for `search` and `list`
-- an optional absolute repo root selector for `verify`
-
-## Text Output Contract
-
-Successful output is plain text written to stdout.
-
-Shared assertion guidance:
-
-- assert stable prefixes or phrases, not timestamp values
-- prefer checking identifiers, kinds, keys, statuses, and count summaries
-- when output contains absolute repo paths, compare against the concrete fixture path used by the case
-
-Representative success phrases:
-
-- `initialized TMPDIR/repo-memory.db`
-- `upserted entry 1 (term:AITask)`
-- `ingested 1 docs from ABS_REPO`
-- `linked #1 -[related_to]-> #2`
-- `ABS_REPO: verified 1 entries, 1 downgraded, 0 stale`
-- `no results`
-- `no entries`
-- `no repos`
-
-## Exit Code Contract
-
-The current CLI contract uses these exit codes:
-
-| Exit Code | Meaning | Typical Trigger |
-| --- | --- | --- |
-| `0` | success | command completed normally, including empty-result text such as `no results` |
-| `1` | command failure | invalid flags, missing required values, missing repo docs, unresolved entry, or store/runtime error |
-| `2` | top-level usage failure | missing subcommand or unknown subcommand |
-
-When a case expects failure, assert both the non-zero exit code and the human-readable stderr message.
-
-## Schema Bootstrap Rules
-
-Current command behavior is intentionally uneven and should be documented:
-
-- `init` creates schema explicitly
-- `add` and `ingest` call `Init` internally and can succeed on a fresh DB path
-- `search`, `list`, `events`, `link`, `verify`, and `repos` assume schema already exists
-
-If a case relies on automatic schema bootstrap in `add` or `ingest`, state that explicitly in `前置条件` and `断言结论`.
-
-## Repo Fixture Rules
-
-Cases covering `add`, `ingest`, or `verify` should state:
-
-- whether the target repo is a real Git repo
-- whether a HEAD commit already exists
-- which file path is being used as evidence or dependency
-
-`verify` behavior depends on Git state:
-
-- if a repo has a readable HEAD commit, verification updates repo metadata and evaluates dependencies
-- if a repo is missing Git state or has no HEAD, the command prints `skipped (not a git repo or no HEAD)` for that repo
-
-## Markdown Ingest Rules
-
-`ingest` scans markdown recursively under `docs/ai` by default.
-
-Shared assertions for ingest cases:
-
-- `.md` files are discovered recursively
-- file base name influences imported key prefixes
-- heading text influences imported `kind` classification and key slug
-- an empty `docs/ai` tree fails with `no markdown files found under ...`
-
-## Direct DB Inspection
-
-Most cases should stay at the CLI contract level.
-
-One exception is `link`: the CLI currently writes the relation but does not have
-an inspection command for links. Link cases may use a read-only `sqlite3` query
-to confirm persistence after the CLI call.
-
-Typical example:
-
-```bash
-sqlite3 TMPDIR/repo-memory.db "SELECT relation FROM knowledge_links WHERE from_entry_id = 1 AND to_entry_id = 2;"
-```
-
-## Workflow Authoring Rule
-
-If a case spans multiple commands, place the end-to-end narrative in
-`workflows/README.md` first, then add narrower command-level cases only when
-they are easier to reason about in isolation.
diff --git a/docs/tests/repo-memory/add/README.md b/docs/tests/repo-memory/add/README.md
deleted file mode 100644
index 2037c07..0000000
--- a/docs/tests/repo-memory/add/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Repo Memory `add` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `add-registers-repo-and-entry` | [add-registers-repo-and-entry.md](./add-registers-repo-and-entry.md) | auto-bootstraps schema, registers the repo, and creates one durable entry |
-| `add-updates-existing-entry-on-same-kind-and-key` | [add-updates-existing-entry-on-same-kind-and-key.md](./add-updates-existing-entry-on-same-kind-and-key.md) | reuses the same entry id and records an update event on repeated upsert |
-| `add-failed-validation-still-registers-repo` | [add-failed-validation-still-registers-repo.md](./add-failed-validation-still-registers-repo.md) | failed validation still leaves a zero-entry repo row visible through `repos` |
diff --git a/docs/tests/repo-memory/add/add-failed-validation-still-registers-repo.md b/docs/tests/repo-memory/add/add-failed-validation-still-registers-repo.md
deleted file mode 100644
index d35f045..0000000
--- a/docs/tests/repo-memory/add/add-failed-validation-still-registers-repo.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Case: `add-failed-validation-still-registers-repo`
-
-## 用例意义
-
-验证 `add` 即使因为 entry 校验失败退出，仍会留下 repo 注册副作用，这个行为需要被明确记录。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- `TMPDIR/repo-memory.db` 尚不存在
-
-## 输入
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --summary "missing kind"
-repo-memory repos --db TMPDIR/repo-memory.db
-repo-memory list --db TMPDIR/repo-memory.db --repo repo
-```
-
-## 预期输出
-
-- `add` 退出码为 `1`
-- `add` 的 stderr 包含 `kind is required`
-- `repos` 退出码为 `0`
-- `repos` 输出包含 `TMPDIR/repo (0 entries, updated `
-- `list` 输出 `no entries`
-
-## 断言结论
-
-- `add` 会先初始化数据库并注册 repo，再进入 entry 级校验
-- 失败的 `add` 不会创建 entry，但会留下可见的零条目 repo 记录
-- 后续测试或调用方如果依赖“失败无副作用”，必须显式考虑这个例外
diff --git a/docs/tests/repo-memory/add/add-registers-repo-and-entry.md b/docs/tests/repo-memory/add/add-registers-repo-and-entry.md
deleted file mode 100644
index f9e4648..0000000
--- a/docs/tests/repo-memory/add/add-registers-repo-and-entry.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Case: `add-registers-repo-and-entry`
-
-## 用例意义
-
-验证 `add` 可以在新数据库路径上自动初始化 schema、注册 repo，并写入一条可枚举的 durable knowledge entry。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- 证据文件 `TMPDIR/repo/app/app/src/main/java/foo/AITask.java` 已存在
-- `TMPDIR/repo-memory.db` 尚不存在
-
-## 输入
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask --summary "Plan 内嵌任务结构，不是独立表" --status confirmed --source-path TMPDIR/repo/app/app/src/main/java/foo/AITask.java --source-line 42 --alias "AI Task" --dep file:TMPDIR/repo/app/app/src/main/java/foo/AITask.java:hard
-repo-memory list --db TMPDIR/repo-memory.db --repo repo --kind term --status confirmed
-```
-
-## 预期输出
-
-- `add` 退出码为 `0`
-- `add` 输出 `upserted entry 1 (term:AITask)`
-- `list` 输出包含 `#1 [repo] term:AITask [confirmed]`
-- `list` 输出包含摘要 `Plan 内嵌任务结构，不是独立表`
-
-## 断言结论
-
-- `add` 会自动完成 schema bootstrap 与 repo 注册，不要求先单独跑 `init`
-- 新增 entry 立即可被 `list` 读取
-- 证据路径、别名、依赖等增强字段不会阻止主写入流程
diff --git a/docs/tests/repo-memory/add/add-updates-existing-entry-on-same-kind-and-key.md b/docs/tests/repo-memory/add/add-updates-existing-entry-on-same-kind-and-key.md
deleted file mode 100644
index f6545db..0000000
--- a/docs/tests/repo-memory/add/add-updates-existing-entry-on-same-kind-and-key.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Case: `add-updates-existing-entry-on-same-kind-and-key`
-
-## 用例意义
-
-验证同一 repo 下相同 `kind + key + scope` 再次执行 `add` 时会更新既有 entry，而不是生成重复记录。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- 空数据库已完成 `init`
-- 已执行过一次：
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask --summary "初版摘要" --status draft
-```
-
-## 输入
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask --summary "修订后的摘要" --status confirmed --alias "AI Task"
-repo-memory events --db TMPDIR/repo-memory.db --id 1
-repo-memory list --db TMPDIR/repo-memory.db --repo repo --kind term
-```
-
-## 预期输出
-
-- 第二次 `add` 仍输出 `upserted entry 1 (term:AITask)`
-- `events` 中同时包含 `updated (draft -> confirmed)` 与更早的 `created (- -> draft)`
-- `list` 中该 entry 的摘要为 `修订后的摘要`
-
-## 断言结论
-
-- `add` 的核心语义是 upsert，而不是 append-only create
-- 更新会保留同一 entry id，同时追加历史事件
-- 最新摘要、状态、别名会覆盖旧值而不是与旧值并存
diff --git a/docs/tests/repo-memory/events/README.md b/docs/tests/repo-memory/events/README.md
deleted file mode 100644
index e676b84..0000000
--- a/docs/tests/repo-memory/events/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Repo Memory `events` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `events-reads-history-by-id` | [events-reads-history-by-id.md](./events-reads-history-by-id.md) | prints newest-first history for one entry id |
-| `events-resolves-entry-by-repo-kind-key` | [events-resolves-entry-by-repo-kind-key.md](./events-resolves-entry-by-repo-kind-key.md) | resolves an entry without `--id` when repo, kind, and key are provided |
-| `events-rejects-missing-entry-selector` | [events-rejects-missing-entry-selector.md](./events-rejects-missing-entry-selector.md) | rejects calls that provide neither `--id` nor `--repo + --kind + --key` |
diff --git a/docs/tests/repo-memory/events/events-reads-history-by-id.md b/docs/tests/repo-memory/events/events-reads-history-by-id.md
deleted file mode 100644
index 9da0dc7..0000000
--- a/docs/tests/repo-memory/events/events-reads-history-by-id.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Case: `events-reads-history-by-id`
-
-## 用例意义
-
-验证 `events --id` 会返回某个 entry 的历史记录，并按新到旧排序。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- `TMPDIR/repo` 下已经执行过两次同 key 的 `add`，第二次把状态从 `draft` 更新为 `confirmed`
-
-## 输入
-
-```bash
-repo-memory events --db TMPDIR/repo-memory.db --id 1
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 第一行包含 `term:AITask [confirmed] #1`
-- 事件列表包含 `updated (draft -> confirmed)`
-- 较早事件包含 `created (- -> draft)`
-
-## 断言结论
-
-- `events` 不只显示当前状态，也保留状态演进轨迹
-- 输出顺序是最新事件优先，便于人工快速读到最近变化
diff --git a/docs/tests/repo-memory/events/events-rejects-missing-entry-selector.md b/docs/tests/repo-memory/events/events-rejects-missing-entry-selector.md
deleted file mode 100644
index c968ea8..0000000
--- a/docs/tests/repo-memory/events/events-rejects-missing-entry-selector.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `events-rejects-missing-entry-selector`
-
-## 用例意义
-
-验证 `events` 在既没有 `--id`，也没有完整 `--repo + --kind + --key` 选择器时，会返回稳定错误。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory events --db TMPDIR/repo-memory.db
-```
-
-## 预期输出
-
-- 命令退出码为 `1`
-- stderr 包含 `either --id or --repo+--kind+--key is required`
-
-## 断言结论
-
-- `events` 必须先能唯一定位 entry，才会进入历史读取路径
-- 两种定位方式是互补关系，但至少要提供其中一种
diff --git a/docs/tests/repo-memory/events/events-resolves-entry-by-repo-kind-key.md b/docs/tests/repo-memory/events/events-resolves-entry-by-repo-kind-key.md
deleted file mode 100644
index ae28219..0000000
--- a/docs/tests/repo-memory/events/events-resolves-entry-by-repo-kind-key.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Case: `events-resolves-entry-by-repo-kind-key`
-
-## 用例意义
-
-验证 `events` 在没有 `--id` 时，仍可通过 `repo + kind + key` 解析目标 entry。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- `TMPDIR/repo` 下已存在 `term:AITask` 一条 entry
-
-## 输入
-
-```bash
-repo-memory events --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 第一行包含 `term:AITask`
-- 事件列表至少包含一条 `created`
-
-## 断言结论
-
-- `events` 支持两种定位方式：`--id` 或 `--repo + --kind + --key`
-- 对调用方来说，repo-scoped natural key 足以定位单条 durable knowledge
diff --git a/docs/tests/repo-memory/ingest/README.md b/docs/tests/repo-memory/ingest/README.md
deleted file mode 100644
index f82a8ec..0000000
--- a/docs/tests/repo-memory/ingest/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Repo Memory `ingest` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `ingest-imports-docs-ai-markdown` | [ingest-imports-docs-ai-markdown.md](./ingest-imports-docs-ai-markdown.md) | imports markdown sections under `docs/ai` as confirmed knowledge entries |
-| `ingest-rejects-when-no-markdown-found` | [ingest-rejects-when-no-markdown-found.md](./ingest-rejects-when-no-markdown-found.md) | rejects an empty scan tree with a stable error message |
-| `ingest-imports-headingless-markdown-as-single-entry` | [ingest-imports-headingless-markdown-as-single-entry.md](./ingest-imports-headingless-markdown-as-single-entry.md) | imports headingless markdown as one fallback `Overview` entry |
diff --git a/docs/tests/repo-memory/ingest/ingest-imports-docs-ai-markdown.md b/docs/tests/repo-memory/ingest/ingest-imports-docs-ai-markdown.md
deleted file mode 100644
index 8be25c4..0000000
--- a/docs/tests/repo-memory/ingest/ingest-imports-docs-ai-markdown.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Case: `ingest-imports-docs-ai-markdown`
-
-## 用例意义
-
-验证 `ingest` 会扫描 `docs/ai` 下的 Markdown，并把 section 导入为可检索的 durable knowledge entry。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- `TMPDIR/repo/docs/ai/repo-memory.md` 内容至少包含：
-
-```md
-# Repo Memory
-
-## Module Map
-
-- gateway
-- app/app
-
-## Danger Zones
-
-- shared libs first
-```
-
-- `TMPDIR/repo-memory.db` 尚不存在
-
-## 输入
-
-```bash
-repo-memory ingest --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-repo-memory list --db TMPDIR/repo-memory.db --repo repo
-```
-
-## 预期输出
-
-- `ingest` 退出码为 `0`
-- `ingest` 输出 `ingested 1 docs from ABS_REPO`
-- `list` 输出包含 `module:repo-memory:module-map [confirmed]`
-- `list` 输出包含 `danger:repo-memory:danger-zones [confirmed]`
-
-## 断言结论
-
-- 一个 Markdown 文件中的多个 section 会被拆成多条知识 entry
-- `ingest` 会自动初始化数据库并注册 repo
-- 从 `repo-memory.md` 导入的 `Module Map`、`Danger Zones` 会被分类为不同 kind
diff --git a/docs/tests/repo-memory/ingest/ingest-imports-headingless-markdown-as-single-entry.md b/docs/tests/repo-memory/ingest/ingest-imports-headingless-markdown-as-single-entry.md
deleted file mode 100644
index d4a2f3c..0000000
--- a/docs/tests/repo-memory/ingest/ingest-imports-headingless-markdown-as-single-entry.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Case: `ingest-imports-headingless-markdown-as-single-entry`
-
-## 用例意义
-
-验证 `ingest` 遇到没有任何 Markdown heading 的文档时，不会跳过，而是回退为单条导入 entry。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- `TMPDIR/repo/docs/ai/repo-memory.md` 存在，内容只有普通段落，没有任何 `#` heading，例如：
-
-```md
-This repository keeps AI memory notes near docs/ai.
-Gateway owns ingress and app/app owns orchestration.
-```
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory ingest --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-repo-memory list --db TMPDIR/repo-memory.db --repo repo
-repo-memory search --db TMPDIR/repo-memory.db --repo repo --query "Gateway orchestration"
-```
-
-## 预期输出
-
-- `ingest` 退出码为 `0`
-- `ingest` 输出 `ingested 1 docs from ABS_REPO`
-- `list` 输出包含 `decision:repo-memory:overview [confirmed]`
-- `search` 输出包含 `decision:repo-memory:overview`
-
-## 断言结论
-
-- headingless markdown 不会被忽略
-- 该类文档会以回退 heading `Overview` 导入为单条 entry
-- 回退导入的 entry 仍然可以被 `list` 与 `search` 正常消费
diff --git a/docs/tests/repo-memory/ingest/ingest-rejects-when-no-markdown-found.md b/docs/tests/repo-memory/ingest/ingest-rejects-when-no-markdown-found.md
deleted file mode 100644
index c4cc4e7..0000000
--- a/docs/tests/repo-memory/ingest/ingest-rejects-when-no-markdown-found.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Case: `ingest-rejects-when-no-markdown-found`
-
-## 用例意义
-
-验证 `ingest` 在扫描目录存在但没有 Markdown 文件时，会返回稳定的失败信息而不是静默成功。
-
-## 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- `TMPDIR/repo/docs/ai/` 目录存在但为空
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory ingest --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-```
-
-## 预期输出
-
-- 命令退出码为 `1`
-- stderr 包含 `no markdown files found under ABS_REPO/docs/ai`
-
-## 断言结论
-
-- `ingest` 不会把“没有可导入文档”误报为成功
-- 错误边界发生在导入阶段，repo 路径本身仍然是合法的
diff --git a/docs/tests/repo-memory/init/README.md b/docs/tests/repo-memory/init/README.md
deleted file mode 100644
index 9075b57..0000000
--- a/docs/tests/repo-memory/init/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Repo Memory `init` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `init-creates-schema-on-empty-db` | [init-creates-schema-on-empty-db.md](./init-creates-schema-on-empty-db.md) | initializes an empty database path and prints the initialized path |
-| `init-is-idempotent-on-existing-db` | [init-is-idempotent-on-existing-db.md](./init-is-idempotent-on-existing-db.md) | repeated init succeeds on the same database path |
diff --git a/docs/tests/repo-memory/init/init-creates-schema-on-empty-db.md b/docs/tests/repo-memory/init/init-creates-schema-on-empty-db.md
deleted file mode 100644
index 7bf0fbe..0000000
--- a/docs/tests/repo-memory/init/init-creates-schema-on-empty-db.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `init-creates-schema-on-empty-db`
-
-## 用例意义
-
-验证在空数据库路径上执行 `init` 会创建可用的 repo-memory schema，并返回稳定的初始化文本。
-
-## 前置条件
-
-- 选择一个尚不存在的数据库路径 `TMPDIR/repo-memory.db`
-
-## 输入
-
-```bash
-repo-memory init --db TMPDIR/repo-memory.db
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- stdout 等于 `initialized TMPDIR/repo-memory.db`
-
-## 断言结论
-
-- `init` 在空路径上可以直接完成 schema 初始化
-- 初始化结果足以让后续 `search`、`list`、`repos` 等只读命令使用同一数据库
diff --git a/docs/tests/repo-memory/init/init-is-idempotent-on-existing-db.md b/docs/tests/repo-memory/init/init-is-idempotent-on-existing-db.md
deleted file mode 100644
index 8485dcf..0000000
--- a/docs/tests/repo-memory/init/init-is-idempotent-on-existing-db.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Case: `init-is-idempotent-on-existing-db`
-
-## 用例意义
-
-验证 `init` 在已初始化数据库上重复执行仍然成功，不要求调用方先判断 schema 是否存在。
-
-## 前置条件
-
-- `TMPDIR/repo-memory.db` 已经执行过一次 `repo-memory init --db TMPDIR/repo-memory.db`
-
-## 输入
-
-```bash
-repo-memory init --db TMPDIR/repo-memory.db
-repo-memory init --db TMPDIR/repo-memory.db
-```
-
-## 预期输出
-
-- 两次命令退出码都为 `0`
-- 两次 stdout 都等于 `initialized TMPDIR/repo-memory.db`
-
-## 断言结论
-
-- `init` 是幂等操作
-- 测试夹具或调用脚本可以安全重复执行初始化而不破坏已有数据
diff --git a/docs/tests/repo-memory/link/README.md b/docs/tests/repo-memory/link/README.md
deleted file mode 100644
index b00571d..0000000
--- a/docs/tests/repo-memory/link/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Repo Memory `link` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `link-creates-relation-between-entries` | [link-creates-relation-between-entries.md](./link-creates-relation-between-entries.md) | persists one directed relation between two existing entries |
-| `link-rejects-missing-relation` | [link-rejects-missing-relation.md](./link-rejects-missing-relation.md) | rejects empty relation input before write |
-| `link-rejects-when-entry-id-missing` | [link-rejects-when-entry-id-missing.md](./link-rejects-when-entry-id-missing.md) | rejects link requests that omit either `--from-id` or `--to-id` |
diff --git a/docs/tests/repo-memory/link/link-creates-relation-between-entries.md b/docs/tests/repo-memory/link/link-creates-relation-between-entries.md
deleted file mode 100644
index 1b2e794..0000000
--- a/docs/tests/repo-memory/link/link-creates-relation-between-entries.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Case: `link-creates-relation-between-entries`
-
-## 用例意义
-
-验证 `link` 可以在两条已存在 entry 之间建立一条可持久化的关系记录。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- 已存在两条 entry：`#1 term:AITask` 与 `#2 chain:ai-insight.get`
-
-## 输入
-
-```bash
-repo-memory link --db TMPDIR/repo-memory.db --from-id 1 --to-id 2 --relation related_to
-sqlite3 TMPDIR/repo-memory.db "SELECT relation FROM knowledge_links WHERE from_entry_id = 1 AND to_entry_id = 2;"
-```
-
-## 预期输出
-
-- `link` 命令退出码为 `0`
-- `link` 输出 `linked #1 -[related_to]-> #2`
-- SQL 查询返回一行 `related_to`
-
-## 断言结论
-
-- `link` 的副作用已落库，而不是只打印成功提示
-- 关系是定向的，方向由 `from-id` 与 `to-id` 决定
diff --git a/docs/tests/repo-memory/link/link-rejects-missing-relation.md b/docs/tests/repo-memory/link/link-rejects-missing-relation.md
deleted file mode 100644
index 77349b0..0000000
--- a/docs/tests/repo-memory/link/link-rejects-missing-relation.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Case: `link-rejects-missing-relation`
-
-## 用例意义
-
-验证 `link` 对空 relation 输入给出稳定错误，而不是写入无意义关系。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- 已存在两条 entry：`#1` 与 `#2`
-
-## 输入
-
-```bash
-repo-memory link --db TMPDIR/repo-memory.db --from-id 1 --to-id 2
-```
-
-## 预期输出
-
-- 命令退出码为 `1`
-- stderr 包含 `relation is required`
-
-## 断言结论
-
-- `relation` 是必填输入
-- 错误发生在写库前，不会产生半有效的 link 记录
diff --git a/docs/tests/repo-memory/link/link-rejects-when-entry-id-missing.md b/docs/tests/repo-memory/link/link-rejects-when-entry-id-missing.md
deleted file mode 100644
index a331a31..0000000
--- a/docs/tests/repo-memory/link/link-rejects-when-entry-id-missing.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Case: `link-rejects-when-entry-id-missing`
-
-## 用例意义
-
-验证 `link` 在缺少 `--from-id` 或 `--to-id` 时，会在写库前拒绝请求。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- 已存在一条 entry：`#1 term:AITask`
-
-## 输入
-
-```bash
-repo-memory link --db TMPDIR/repo-memory.db --from-id 1 --relation related_to
-```
-
-## 预期输出
-
-- 命令退出码为 `1`
-- stderr 包含 `both entry ids are required`
-
-## 断言结论
-
-- `link` 需要完整的双端 entry id，不能只靠单端 id 建立半关系
-- 缺失 id 的错误发生在写库前，不会生成不完整 link 记录
diff --git a/docs/tests/repo-memory/list/README.md b/docs/tests/repo-memory/list/README.md
deleted file mode 100644
index 4fa710c..0000000
--- a/docs/tests/repo-memory/list/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Repo Memory `list` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `list-filters-by-kind-and-status` | [list-filters-by-kind-and-status.md](./list-filters-by-kind-and-status.md) | narrows entries by repo substring, kind, and status |
-| `list-returns-no-entries-when-empty` | [list-returns-no-entries-when-empty.md](./list-returns-no-entries-when-empty.md) | prints `no entries` when the filtered result set is empty |
diff --git a/docs/tests/repo-memory/list/list-filters-by-kind-and-status.md b/docs/tests/repo-memory/list/list-filters-by-kind-and-status.md
deleted file mode 100644
index 63457c4..0000000
--- a/docs/tests/repo-memory/list/list-filters-by-kind-and-status.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `list-filters-by-kind-and-status`
-
-## 用例意义
-
-验证 `list` 会同时应用 repo、kind、status 过滤条件，而不是只看其中之一。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- 同一 repo 下已存在三条 entry：
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask --summary "Plan 内嵌任务结构" --status confirmed
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AIJob --summary "后台任务封装" --status draft
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind chain --key ai-insight.get --summary "gateway -> app service -> cache/db" --status confirmed
-```
-
-## 输入
-
-```bash
-repo-memory list --db TMPDIR/repo-memory.db --repo repo --kind term --status confirmed
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 输出包含 `term:AITask [confirmed]`
-- 输出不包含 `AIJob`
-- 输出不包含 `chain:ai-insight.get`
-
-## 断言结论
-
-- `list` 过滤条件是交集语义
-- repo 路径过滤、kind 过滤、status 过滤可以叠加使用
diff --git a/docs/tests/repo-memory/list/list-returns-no-entries-when-empty.md b/docs/tests/repo-memory/list/list-returns-no-entries-when-empty.md
deleted file mode 100644
index 4e83e3e..0000000
--- a/docs/tests/repo-memory/list/list-returns-no-entries-when-empty.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `list-returns-no-entries-when-empty`
-
-## 用例意义
-
-验证 `list` 在没有任何匹配 entry 时返回稳定空结果文本。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory list --db TMPDIR/repo-memory.db --repo repo --kind term --status confirmed
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- stdout 等于 `no entries`
-
-## 断言结论
-
-- 空列表属于正常读取路径，不应被当作错误
-- 调用方可以通过固定文本判断当前过滤条件下无匹配项
diff --git a/docs/tests/repo-memory/repos/README.md b/docs/tests/repo-memory/repos/README.md
deleted file mode 100644
index 14a19a3..0000000
--- a/docs/tests/repo-memory/repos/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Repo Memory `repos` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `repos-lists-tracked-repositories` | [repos-lists-tracked-repositories.md](./repos-lists-tracked-repositories.md) | lists every tracked repository with entry counts and update timestamps |
-| `repos-prints-no-repos-when-empty` | [repos-prints-no-repos-when-empty.md](./repos-prints-no-repos-when-empty.md) | prints `no repos` for an initialized but unused database |
diff --git a/docs/tests/repo-memory/repos/repos-lists-tracked-repositories.md b/docs/tests/repo-memory/repos/repos-lists-tracked-repositories.md
deleted file mode 100644
index c32a06f..0000000
--- a/docs/tests/repo-memory/repos/repos-lists-tracked-repositories.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Case: `repos-lists-tracked-repositories`
-
-## 用例意义
-
-验证 `repos` 会列出当前数据库中所有已注册仓库，并附带 entry 数量。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- 已存在两个 Git 仓库：`TMPDIR/cupid-service` 与 `TMPDIR/mars-service`
-- 已分别执行一次 `add`，使两个 repo 都被注册到数据库
-
-## 输入
-
-```bash
-repo-memory repos --db TMPDIR/repo-memory.db
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 输出包含 `TMPDIR/cupid-service (1 entries, updated `
-- 输出包含 `TMPDIR/mars-service (1 entries, updated `
-
-## 断言结论
-
-- `repos` 以 repo 为聚合维度展示当前内存库覆盖范围
-- 输出中的条目数来自持久化 entry 统计，而不是瞬时搜索结果
diff --git a/docs/tests/repo-memory/repos/repos-prints-no-repos-when-empty.md b/docs/tests/repo-memory/repos/repos-prints-no-repos-when-empty.md
deleted file mode 100644
index 214a4cb..0000000
--- a/docs/tests/repo-memory/repos/repos-prints-no-repos-when-empty.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `repos-prints-no-repos-when-empty`
-
-## 用例意义
-
-验证 `repos` 在数据库已初始化但没有任何 repo 记录时返回稳定空结果文本。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory repos --db TMPDIR/repo-memory.db
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- stdout 等于 `no repos`
-
-## 断言结论
-
-- `repos` 的空结果是正常状态，而不是错误
-- 该命令可作为“数据库里是否已经有任何 repo memory” 的快速探针
diff --git a/docs/tests/repo-memory/search/README.md b/docs/tests/repo-memory/search/README.md
deleted file mode 100644
index 2cf5ab0..0000000
--- a/docs/tests/repo-memory/search/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Repo Memory `search` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `search-returns-matching-entry-snippet` | [search-returns-matching-entry-snippet.md](./search-returns-matching-entry-snippet.md) | returns a ranked text result with status and snippet lines |
-| `search-matches-alias-with-repo-filter` | [search-matches-alias-with-repo-filter.md](./search-matches-alias-with-repo-filter.md) | matches alias terms while narrowing by repo substring |
-| `search-returns-no-results-when-empty` | [search-returns-no-results-when-empty.md](./search-returns-no-results-when-empty.md) | prints `no results` for an empty search result set |
-| `search-rejects-missing-query` | [search-rejects-missing-query.md](./search-rejects-missing-query.md) | rejects invocations that omit the required `--query` flag |
diff --git a/docs/tests/repo-memory/search/search-matches-alias-with-repo-filter.md b/docs/tests/repo-memory/search/search-matches-alias-with-repo-filter.md
deleted file mode 100644
index f5d5e3f..0000000
--- a/docs/tests/repo-memory/search/search-matches-alias-with-repo-filter.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Case: `search-matches-alias-with-repo-filter`
-
-## 用例意义
-
-验证 `search` 会命中 alias，同时 `--repo` 作为路径子串过滤器只返回目标仓库的结果。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- 已存在两个 Git 仓库：`TMPDIR/cupid-service` 与 `TMPDIR/mars-service`
-- 已分别执行：
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/cupid-service --kind term --key AITask --summary "Plan 内嵌任务结构" --status confirmed --alias "plan task"
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/mars-service --kind term --key DeployPlan --summary "发布计划" --status confirmed --alias "release plan"
-```
-
-## 输入
-
-```bash
-repo-memory search --db TMPDIR/repo-memory.db --repo cupid --query "plan task"
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 输出包含 `[cupid-service] term:AITask [confirmed]`
-- 输出不包含 `[mars-service]`
-
-## 断言结论
-
-- alias 会进入搜索面
-- `--repo` 是 repo path substring filter，而不是必须传完整绝对路径
-- 过滤发生在结果集层面，不影响 alias 命中能力
diff --git a/docs/tests/repo-memory/search/search-rejects-missing-query.md b/docs/tests/repo-memory/search/search-rejects-missing-query.md
deleted file mode 100644
index 65e3d44..0000000
--- a/docs/tests/repo-memory/search/search-rejects-missing-query.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `search-rejects-missing-query`
-
-## 用例意义
-
-验证 `search` 对缺失 `--query` 的调用给出稳定失败契约。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory search --db TMPDIR/repo-memory.db
-```
-
-## 预期输出
-
-- 命令退出码为 `1`
-- stderr 包含 `--query is required`
-
-## 断言结论
-
-- `search` 不支持“列出全部”式空查询
-- 缺失查询词属于输入错误，而不是空结果
diff --git a/docs/tests/repo-memory/search/search-returns-matching-entry-snippet.md b/docs/tests/repo-memory/search/search-returns-matching-entry-snippet.md
deleted file mode 100644
index ed7fa25..0000000
--- a/docs/tests/repo-memory/search/search-returns-matching-entry-snippet.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Case: `search-returns-matching-entry-snippet`
-
-## 用例意义
-
-验证 `search` 返回的文本结果既包含 entry 身份信息，也包含便于人工判断的 snippet。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- `TMPDIR/repo` 下已有一条 `confirmed` entry：
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind chain --key ai-insight.get --summary "gateway -> app service -> cache/db" --detail "The AI insight read path goes through gateway before app service reaches cache and database." --status confirmed
-```
-
-## 输入
-
-```bash
-repo-memory search --db TMPDIR/repo-memory.db --repo repo --query "insight gateway"
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- 第一行包含 `1. [repo] chain:ai-insight.get [confirmed]`
-- 后续文本包含 `gateway` 的 snippet 片段
-
-## 断言结论
-
-- `search` 的核心输出不是纯 id 列表，而是可直接消费的人工排查文本
-- 查询会同时命中 `key`、`summary`、`detail`
diff --git a/docs/tests/repo-memory/search/search-returns-no-results-when-empty.md b/docs/tests/repo-memory/search/search-returns-no-results-when-empty.md
deleted file mode 100644
index 249bd16..0000000
--- a/docs/tests/repo-memory/search/search-returns-no-results-when-empty.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `search-returns-no-results-when-empty`
-
-## 用例意义
-
-验证 `search` 在没有命中项时返回稳定空结果文本，而不是异常退出。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory search --db TMPDIR/repo-memory.db --query "missing term"
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- stdout 等于 `no results`
-
-## 断言结论
-
-- 空搜索结果被视为正常控制流
-- 调用方可以用退出码 `0` + 文本 `no results` 区分“没命中”和“命令失败”
diff --git a/docs/tests/repo-memory/verify/README.md b/docs/tests/repo-memory/verify/README.md
deleted file mode 100644
index 7661c2d..0000000
--- a/docs/tests/repo-memory/verify/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# Repo Memory `verify` Test Plan Index
-
-## Case Files
-
-| Case Slug | File | Coverage Note |
-| --- | --- | --- |
-| `verify-downgrades-changed-file-dependency` | [verify-downgrades-changed-file-dependency.md](./verify-downgrades-changed-file-dependency.md) | downgrades a confirmed entry to `needs_review` when a tracked file changed |
-| `verify-marks-missing-hard-dependency-stale` | [verify-marks-missing-hard-dependency-stale.md](./verify-marks-missing-hard-dependency-stale.md) | marks a confirmed entry `stale` when a hard dependency disappears |
-| `verify-prints-no-repos-when-empty` | [verify-prints-no-repos-when-empty.md](./verify-prints-no-repos-when-empty.md) | prints `no repos` when the initialized DB has no tracked repositories |
-| `verify-skips-explicit-repo-without-git-head` | [verify-skips-explicit-repo-without-git-head.md](./verify-skips-explicit-repo-without-git-head.md) | prints a skip line when the explicit repo is not a Git repo or has no HEAD |
-| `verify-downgrades-entry-missing-verified-on-commit` | [verify-downgrades-entry-missing-verified-on-commit.md](./verify-downgrades-entry-missing-verified-on-commit.md) | downgrades an entry to `needs_review` once the repo becomes verifiable but the entry lacks `verified_on_commit` |
diff --git a/docs/tests/repo-memory/verify/verify-downgrades-changed-file-dependency.md b/docs/tests/repo-memory/verify/verify-downgrades-changed-file-dependency.md
deleted file mode 100644
index 1c3bded..0000000
--- a/docs/tests/repo-memory/verify/verify-downgrades-changed-file-dependency.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Case: `verify-downgrades-changed-file-dependency`
-
-## 用例意义
-
-验证 `verify` 在硬依赖文件内容发生变更时，会把 `confirmed` entry 降级为 `needs_review`。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- 已执行：
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask --summary "Plan 内嵌任务结构" --status confirmed --dep file:TMPDIR/repo/foo.txt:hard
-```
-
-- 在执行 `verify` 前，`TMPDIR/repo/foo.txt` 已被修改但尚未重新验证
-
-## 输入
-
-```bash
-repo-memory verify --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-repo-memory list --db TMPDIR/repo-memory.db --repo repo --status needs_review
-repo-memory events --db TMPDIR/repo-memory.db --id 1
-```
-
-## 预期输出
-
-- `verify` 输出包含 `verified 1 entries, 1 downgraded, 0 stale`
-- `list` 输出包含 `term:AITask [needs_review]`
-- `events` 输出包含 `downgraded (confirmed -> needs_review)`
-
-## 断言结论
-
-- 文件变更不会直接删除知识，而是先降级为 `needs_review`
-- `verify` 会同时更新当前状态与历史事件
diff --git a/docs/tests/repo-memory/verify/verify-downgrades-entry-missing-verified-on-commit.md b/docs/tests/repo-memory/verify/verify-downgrades-entry-missing-verified-on-commit.md
deleted file mode 100644
index 3862df1..0000000
--- a/docs/tests/repo-memory/verify/verify-downgrades-entry-missing-verified-on-commit.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Case: `verify-downgrades-entry-missing-verified-on-commit`
-
-## 用例意义
-
-验证 entry 一旦缺少 `verified_on_commit`，在 repo 变得可验证后会被 `verify` 降级到 `needs_review`。
-
-## 前置条件
-
-- `TMPDIR/repo` 目录最开始不是 Git repo，但包含证据文件 `foo.txt`
-- 已执行：
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask --summary "Recorded before git init" --status confirmed --dep file:TMPDIR/repo/foo.txt:hard
-```
-
-- 之后才把 `TMPDIR/repo` 初始化为 Git repo，并完成第一次 commit
-
-## 输入
-
-```bash
-repo-memory verify --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-repo-memory list --db TMPDIR/repo-memory.db --repo repo --status needs_review
-repo-memory events --db TMPDIR/repo-memory.db --id 1
-```
-
-## 预期输出
-
-- `verify` 退出码为 `0`
-- `verify` 输出包含 `verified 1 entries, 1 downgraded, 0 stale`
-- `list` 输出包含 `term:AITask [needs_review]`
-- `events` 输出包含 `downgraded (confirmed -> needs_review)`
-- `events` 输出包含原因 `missing verified_on_commit`
-
-## 断言结论
-
-- 只要 entry 缺少 `verified_on_commit`，即使依赖文件当前存在，也不能继续保持 `confirmed`
-- 这个边界主要出现在“先写 repo-memory，后补 Git 历史”的仓库演进阶段
diff --git a/docs/tests/repo-memory/verify/verify-marks-missing-hard-dependency-stale.md b/docs/tests/repo-memory/verify/verify-marks-missing-hard-dependency-stale.md
deleted file mode 100644
index f151b5c..0000000
--- a/docs/tests/repo-memory/verify/verify-marks-missing-hard-dependency-stale.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Case: `verify-marks-missing-hard-dependency-stale`
-
-## 用例意义
-
-验证 `verify` 在硬依赖文件已经不存在时，会把 entry 标记为 `stale`。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- 已执行：
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask --summary "Plan 内嵌任务结构" --status confirmed --dep file:TMPDIR/repo/missing.txt:hard
-```
-
-- `TMPDIR/repo/missing.txt` 不存在
-
-## 输入
-
-```bash
-repo-memory verify --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-repo-memory list --db TMPDIR/repo-memory.db --repo repo --status stale
-repo-memory events --db TMPDIR/repo-memory.db --id 1
-```
-
-## 预期输出
-
-- `verify` 输出包含 `verified 1 entries, 0 downgraded, 1 stale`
-- `list` 输出包含 `term:AITask [stale]`
-- `events` 输出包含 `marked_stale (confirmed -> stale)`
-
-## 断言结论
-
-- 缺失的硬依赖会让知识条目直接过期，而不是只进入待复核状态
-- `stale` 与 `needs_review` 是两种不同的 verify 结果
diff --git a/docs/tests/repo-memory/verify/verify-prints-no-repos-when-empty.md b/docs/tests/repo-memory/verify/verify-prints-no-repos-when-empty.md
deleted file mode 100644
index 2838222..0000000
--- a/docs/tests/repo-memory/verify/verify-prints-no-repos-when-empty.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Case: `verify-prints-no-repos-when-empty`
-
-## 用例意义
-
-验证 `verify` 在已初始化但尚未注册任何 repo 的数据库上返回稳定空结果文本。
-
-## 前置条件
-
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory verify --db TMPDIR/repo-memory.db
-```
-
-## 预期输出
-
-- 命令退出码为 `0`
-- stdout 等于 `no repos`
-
-## 断言结论
-
-- 没有 tracked repo 时，`verify` 走正常空结果路径
-- 调用方可以先跑 `verify`，再根据 `no repos` 决定是否需要补充 `add` 或 `ingest`
diff --git a/docs/tests/repo-memory/verify/verify-skips-explicit-repo-without-git-head.md b/docs/tests/repo-memory/verify/verify-skips-explicit-repo-without-git-head.md
deleted file mode 100644
index f1f4acb..0000000
--- a/docs/tests/repo-memory/verify/verify-skips-explicit-repo-without-git-head.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Case: `verify-skips-explicit-repo-without-git-head`
-
-## 用例意义
-
-验证 `verify --repo <path>` 在目标目录不是 Git repo 或没有 HEAD commit 时，会返回稳定 skip 文本，而不是失败。
-
-## 前置条件
-
-- `TMPDIR/repo` 目录存在，但不是 Git repo，或者已经 `git init` 但还没有第一次 commit
-- 空数据库已完成 `init`
-
-## 输入
-
-```bash
-repo-memory verify --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-repo-memory repos --db TMPDIR/repo-memory.db
-```
-
-## 预期输出
-
-- `verify` 退出码为 `0`
-- `verify` 输出包含 `TMPDIR/repo: skipped (not a git repo or no HEAD)`
-- `repos` 输出 `no repos`
-
-## 断言结论
-
-- 对显式 repo 的 verify，缺失 Git HEAD 被视为可跳过状态，不是命令失败
-- skip 发生在 repo 注册之前，因此不会额外写入 repo 记录
diff --git a/docs/tests/repo-memory/workflows/README.md b/docs/tests/repo-memory/workflows/README.md
deleted file mode 100644
index 8e92014..0000000
--- a/docs/tests/repo-memory/workflows/README.md
+++ /dev/null
@@ -1,138 +0,0 @@
-# Repo Memory Workflow Test Plan
-
-## Scope
-
-This document tracks cross-command scenarios where the main value is the
-interaction between multiple `repo-memory` subcommands.
-
-All examples assume:
-
-- isolated temp DB and repo fixtures
-- assertions follow the shared rules in [../_shared/README.md](../_shared/README.md)
-- commands use the concrete fixture paths created for the case
-
-## case: add-search-events-roundtrip
-
-### 用例意义
-
-验证 `add -> search -> events` 的主干链路可用，确保新写入的 durable knowledge 能被立即检索并带有可追溯历史。
-
-### 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- 证据文件 `TMPDIR/repo/app/app/src/main/java/foo/AITask.java` 已存在
-- 空数据库已完成 `init`
-
-### 输入
-
-```bash
-repo-memory add --db TMPDIR/repo-memory.db --repo TMPDIR/repo --kind term --key AITask --summary "Plan 内嵌任务结构，不是独立表" --status confirmed --source-path TMPDIR/repo/app/app/src/main/java/foo/AITask.java --source-line 42 --alias "AI Task" --dep file:TMPDIR/repo/app/app/src/main/java/foo/AITask.java:hard
-repo-memory search --db TMPDIR/repo-memory.db --repo repo --query "AI Task"
-repo-memory events --db TMPDIR/repo-memory.db --id 1
-```
-
-### 预期输出
-
-- `add` 输出 `upserted entry 1 (term:AITask)`
-- `search` 返回 `term:AITask [confirmed]`
-- `events` 以 `term:AITask [confirmed] #1` 开头，并包含 `created`
-
-### 断言结论
-
-- 新增 entry 后无需额外同步即可被搜索到
-- alias 可参与搜索命中
-- 事件历史足以追溯 durable knowledge 的创建来源
-
-## case: ingest-search-list-across-sections
-
-### 用例意义
-
-验证 `ingest -> search -> list` 可以把 `docs/ai` Markdown 中的多个 section 转成可搜索、可枚举的知识条目。
-
-### 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- `TMPDIR/repo/docs/ai/repo-memory.md` 包含 `Module Map` 与 `Danger Zones` 等 section
-- 目标数据库路径尚未初始化也可，因为该用例验证 `ingest` 的自动 schema bootstrap
-
-### 输入
-
-```bash
-repo-memory ingest --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-repo-memory search --db TMPDIR/repo-memory.db --repo repo --query "gateway"
-repo-memory list --db TMPDIR/repo-memory.db --repo repo
-```
-
-### 预期输出
-
-- `ingest` 输出 `ingested 1 docs from ABS_REPO`
-- `search` 命中从 `Module Map` 导入的 section
-- `list` 至少包含 `module:repo-memory:module-map` 与 `danger:repo-memory:danger-zones`
-
-### 断言结论
-
-- 一个 Markdown 文件可以导出多个 durable knowledge entry
-- 导入 entry 默认状态为 `confirmed`
-- `ingest` 既完成导入，也完成 repo 注册与 schema 初始化
-
-## case: add-link-and-resolve-related-entry
-
-### 用例意义
-
-验证两个 entry 可以通过 `link` 建立关系，同时各自的历史记录仍可独立读取。
-
-### 前置条件
-
-- 空数据库已完成 `init`
-- 同一 repo 下已存在 `term:AITask` 与 `chain:ai-insight.get` 两条 entry
-
-### 输入
-
-```bash
-repo-memory link --db TMPDIR/repo-memory.db --from-id 1 --to-id 2 --relation related_to
-sqlite3 TMPDIR/repo-memory.db "SELECT relation FROM knowledge_links WHERE from_entry_id = 1 AND to_entry_id = 2;"
-repo-memory events --db TMPDIR/repo-memory.db --id 1
-repo-memory events --db TMPDIR/repo-memory.db --id 2
-```
-
-### 预期输出
-
-- `link` 输出 `linked #1 -[related_to]-> #2`
-- SQL 查询返回一行 `related_to`
-- 两个 `events` 调用仍能分别读取各自历史
-
-### 断言结论
-
-- `link` 的副作用被持久化，而不是只回显成功文本
-- entry 关系与 entry 历史是两个独立维度
-
-## case: verify-downgrades-after-repo-change
-
-### 用例意义
-
-验证 `add -> verify -> list -> events` 在 repo 内容变更后会把已确认知识降级到需要复核或过期状态。
-
-### 前置条件
-
-- `TMPDIR/repo` 是一个已提交初始 commit 的 Git 仓库
-- 已存在一个 `confirmed` entry，硬依赖 `TMPDIR/repo/foo.txt`
-- 在 `verify` 前，`foo.txt` 已被修改或删除
-
-### 输入
-
-```bash
-repo-memory verify --db TMPDIR/repo-memory.db --repo TMPDIR/repo
-repo-memory list --db TMPDIR/repo-memory.db --repo repo
-repo-memory events --db TMPDIR/repo-memory.db --id 1
-```
-
-### 预期输出
-
-- `verify` 输出包含 `verified 1 entries`
-- `list` 中相应 entry 状态变为 `needs_review` 或 `stale`
-- `events` 中新增 `downgraded` 或 `marked_stale` 事件
-
-### 断言结论
-
-- `verify` 会根据 repo 当前状态重新评估 durable knowledge
-- 状态变化不仅更新当前 entry，也会追加历史事件供后续审计
diff --git a/packages/coord-core/store/inbox_test.go b/packages/coord-core/store/inbox_test.go
index fe6d9bb..9386c09 100644
--- a/packages/coord-core/store/inbox_test.go
+++ b/packages/coord-core/store/inbox_test.go
@@ -10,6 +10,7 @@ import (
 	dbpkg "ai-workflow-skill/packages/coord-core/db"
 )
 
+// TestClaimThreadReturnsLeaseConflictAfterBusyWrite verifies claim returns a lease conflict after a concurrent write wins.
 func TestClaimThreadReturnsLeaseConflictAfterBusyWrite(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/coord-core/store/orch_test.go b/packages/coord-core/store/orch_test.go
index c2c1a65..7d06ae7 100644
--- a/packages/coord-core/store/orch_test.go
+++ b/packages/coord-core/store/orch_test.go
@@ -8,6 +8,7 @@ import (
 	dbpkg "ai-workflow-skill/packages/coord-core/db"
 )
 
+// TestRecordCheckRefreshesDependentReadyStateWhenGatePasses verifies the final passing check unlocks dependent work.
 func TestRecordCheckRefreshesDependentReadyStateWhenGatePasses(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/cancel_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/cancel_integration_test.go
index 0197d4d..6534bb0 100644
--- a/packages/inbox-runtime/internal/cli/inbox/cancel_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/cancel_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestCancelMarksThreadCancelled verifies cancel marks thread cancelled.
 func TestCancelMarksThreadCancelled(t *testing.T) {
 	t.Parallel()
 
@@ -47,6 +48,7 @@ func TestCancelMarksThreadCancelled(t *testing.T) {
 	}
 }
 
+// TestCancelPersistsReasonAndArtifact verifies cancel persists reason and artifact.
 func TestCancelPersistsReasonAndArtifact(t *testing.T) {
 	t.Parallel()
 
@@ -127,6 +129,7 @@ func TestCancelPersistsReasonAndArtifact(t *testing.T) {
 	}
 }
 
+// TestCancelRejectsWhenThreadMissing verifies cancel rejects when thread missing.
 func TestCancelRejectsWhenThreadMissing(t *testing.T) {
 	t.Parallel()
 
@@ -145,4 +148,3 @@ func TestCancelRejectsWhenThreadMissing(t *testing.T) {
 	}
 	assertErrorJSON(t, stdout, "not_found")
 }
-
diff --git a/packages/inbox-runtime/internal/cli/inbox/claim_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/claim_integration_test.go
index 2ee5875..fc9b960 100644
--- a/packages/inbox-runtime/internal/cli/inbox/claim_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/claim_integration_test.go
@@ -2,6 +2,7 @@ package inbox
 
 import "testing"
 
+// TestClaimAcquiresThreadLease verifies claim acquires thread lease.
 func TestClaimAcquiresThreadLease(t *testing.T) {
 	t.Parallel()
 
@@ -34,6 +35,7 @@ func TestClaimAcquiresThreadLease(t *testing.T) {
 	}
 }
 
+// TestClaimRejectsWhenThreadMissing verifies claim rejects when thread missing.
 func TestClaimRejectsWhenThreadMissing(t *testing.T) {
 	t.Parallel()
 
@@ -51,6 +53,7 @@ func TestClaimRejectsWhenThreadMissing(t *testing.T) {
 	assertErrorJSON(t, stdout, "not_found")
 }
 
+// TestClaimRejectsWhenThreadAlreadyClaimed verifies claim rejects when thread already claimed.
 func TestClaimRejectsWhenThreadAlreadyClaimed(t *testing.T) {
 	t.Parallel()
 
@@ -79,6 +82,7 @@ func TestClaimRejectsWhenThreadAlreadyClaimed(t *testing.T) {
 	assertErrorJSON(t, stdout, "lease_conflict")
 }
 
+// TestClaimRecordsRequestedLeaseDuration verifies claim records requested lease duration.
 func TestClaimRecordsRequestedLeaseDuration(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/done_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/done_integration_test.go
index f0041e9..eb8b9be 100644
--- a/packages/inbox-runtime/internal/cli/inbox/done_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/done_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestDoneMarksThreadTerminal verifies done marks thread terminal.
 func TestDoneMarksThreadTerminal(t *testing.T) {
 	t.Parallel()
 
@@ -33,6 +34,7 @@ func TestDoneMarksThreadTerminal(t *testing.T) {
 	}
 }
 
+// TestDonePersistsResultBodyAndArtifact verifies done persists result body and artifact.
 func TestDonePersistsResultBodyAndArtifact(t *testing.T) {
 	t.Parallel()
 
@@ -89,6 +91,7 @@ func TestDonePersistsResultBodyAndArtifact(t *testing.T) {
 	}
 }
 
+// TestDoneRejectsNonOwner verifies done rejects non owner.
 func TestDoneRejectsNonOwner(t *testing.T) {
 	t.Parallel()
 
@@ -109,6 +112,7 @@ func TestDoneRejectsNonOwner(t *testing.T) {
 	assertErrorJSON(t, stdout, "lease_conflict")
 }
 
+// TestDoneRejectsOnTerminalThread verifies done rejects on terminal thread.
 func TestDoneRejectsOnTerminalThread(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/fail_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/fail_integration_test.go
index 62bf9d1..41040ce 100644
--- a/packages/inbox-runtime/internal/cli/inbox/fail_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/fail_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestFailMarksThreadFailed verifies fail marks thread failed.
 func TestFailMarksThreadFailed(t *testing.T) {
 	t.Parallel()
 
@@ -36,6 +37,7 @@ func TestFailMarksThreadFailed(t *testing.T) {
 	}
 }
 
+// TestFailPersistsFailureBodyAndArtifact verifies fail persists failure body and artifact.
 func TestFailPersistsFailureBodyAndArtifact(t *testing.T) {
 	t.Parallel()
 
@@ -92,6 +94,7 @@ func TestFailPersistsFailureBodyAndArtifact(t *testing.T) {
 	}
 }
 
+// TestFailRejectsNonOwner verifies fail rejects non owner.
 func TestFailRejectsNonOwner(t *testing.T) {
 	t.Parallel()
 
@@ -112,6 +115,7 @@ func TestFailRejectsNonOwner(t *testing.T) {
 	assertErrorJSON(t, stdout, "lease_conflict")
 }
 
+// TestFailRejectsOnTerminalThread verifies fail rejects on terminal thread.
 func TestFailRejectsOnTerminalThread(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/fetch_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/fetch_integration_test.go
index efdbf4e..5ddf483 100644
--- a/packages/inbox-runtime/internal/cli/inbox/fetch_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/fetch_integration_test.go
@@ -2,6 +2,7 @@ package inbox
 
 import "testing"
 
+// TestFetchReturnsPendingThreadForTargetAgent verifies fetch returns pending thread for target agent.
 func TestFetchReturnsPendingThreadForTargetAgent(t *testing.T) {
 	t.Parallel()
 
@@ -35,6 +36,7 @@ func TestFetchReturnsPendingThreadForTargetAgent(t *testing.T) {
 	}
 }
 
+// TestFetchRespectsStatusAndLimitFilters verifies fetch respects status and limit filters.
 func TestFetchRespectsStatusAndLimitFilters(t *testing.T) {
 	t.Parallel()
 
@@ -94,6 +96,7 @@ func TestFetchRespectsStatusAndLimitFilters(t *testing.T) {
 	}
 }
 
+// TestFetchUnreadUsesReadCursor verifies fetch unread uses read cursor.
 func TestFetchUnreadUsesReadCursor(t *testing.T) {
 	t.Parallel()
 
@@ -169,6 +172,7 @@ func TestFetchUnreadUsesReadCursor(t *testing.T) {
 	}
 }
 
+// TestFetchReturnsNoMatchingWorkWhenEmpty verifies fetch returns no matching work when empty.
 func TestFetchReturnsNoMatchingWorkWhenEmpty(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/help_contracts_test.go b/packages/inbox-runtime/internal/cli/inbox/help_contracts_test.go
index 5ca3590..c5de4f4 100644
--- a/packages/inbox-runtime/internal/cli/inbox/help_contracts_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/help_contracts_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestInboxRootHelpExplainsWorkerLoop verifies inbox root help explains worker loop.
 func TestInboxRootHelpExplainsWorkerLoop(t *testing.T) {
 	t.Parallel()
 
@@ -25,6 +26,7 @@ func TestInboxRootHelpExplainsWorkerLoop(t *testing.T) {
 	}
 }
 
+// TestInboxUpdateHelpExplainsBlockedQuestions verifies inbox update help explains blocked questions.
 func TestInboxUpdateHelpExplainsBlockedQuestions(t *testing.T) {
 	t.Parallel()
 
@@ -45,6 +47,7 @@ func TestInboxUpdateHelpExplainsBlockedQuestions(t *testing.T) {
 	}
 }
 
+// TestInboxWaitReplyHelpExplainsBlockingPrimitive verifies inbox wait reply help explains blocking primitive.
 func TestInboxWaitReplyHelpExplainsBlockingPrimitive(t *testing.T) {
 	t.Parallel()
 
@@ -62,6 +65,7 @@ func TestInboxWaitReplyHelpExplainsBlockingPrimitive(t *testing.T) {
 	}
 }
 
+// TestInboxListHelpExplainsDifferenceFromFetch verifies inbox list help explains difference from fetch.
 func TestInboxListHelpExplainsDifferenceFromFetch(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/init_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/init_integration_test.go
index 99b8bb0..6e786c1 100644
--- a/packages/inbox-runtime/internal/cli/inbox/init_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/init_integration_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestInitCreatesSchemaOnEmptyDB verifies init creates schema on empty DB.
 func TestInitCreatesSchemaOnEmptyDB(t *testing.T) {
 	t.Parallel()
 
@@ -28,6 +29,7 @@ func TestInitCreatesSchemaOnEmptyDB(t *testing.T) {
 	}
 }
 
+// TestInitIsIdempotentOnExistingDB verifies init is idempotent on existing DB.
 func TestInitIsIdempotentOnExistingDB(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/integration_test.go b/packages/inbox-runtime/internal/cli/inbox/integration_test.go
index da84f05..a268e43 100644
--- a/packages/inbox-runtime/internal/cli/inbox/integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/integration_test.go
@@ -7,6 +7,7 @@ import (
 	"time"
 )
 
+// TestInboxLifecycle verifies inbox lifecycle.
 func TestInboxLifecycle(t *testing.T) {
 	t.Parallel()
 
@@ -171,6 +172,7 @@ func TestInboxLifecycle(t *testing.T) {
 	}
 }
 
+// TestInboxFailLifecycle verifies inbox fail lifecycle.
 func TestInboxFailLifecycle(t *testing.T) {
 	t.Parallel()
 
@@ -238,6 +240,7 @@ func TestInboxFailLifecycle(t *testing.T) {
 	}
 }
 
+// TestInboxRenewWaitReplyAndCancel verifies inbox renew wait reply and cancel.
 func TestInboxRenewWaitReplyAndCancel(t *testing.T) {
 	t.Parallel()
 
@@ -387,6 +390,7 @@ func TestInboxRenewWaitReplyAndCancel(t *testing.T) {
 	}
 }
 
+// TestInboxWatchListUnreadAndAppend verifies inbox watch list unread and append.
 func TestInboxWatchListUnreadAndAppend(t *testing.T) {
 	t.Parallel()
 
@@ -544,6 +548,7 @@ func TestInboxWatchListUnreadAndAppend(t *testing.T) {
 	}
 }
 
+// TestInboxUnreadReadCursor verifies inbox unread read cursor.
 func TestInboxUnreadReadCursor(t *testing.T) {
 	t.Parallel()
 
@@ -634,6 +639,7 @@ func TestInboxUnreadReadCursor(t *testing.T) {
 	}
 }
 
+// TestInboxJSONErrorsAndExitCodes verifies inbox JSON errors and exit codes.
 func TestInboxJSONErrorsAndExitCodes(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/list_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/list_integration_test.go
index cd6081e..09ece95 100644
--- a/packages/inbox-runtime/internal/cli/inbox/list_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/list_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"time"
 )
 
+// TestListFiltersByStatus verifies list returns only threads matching the requested statuses.
 func TestListFiltersByStatus(t *testing.T) {
 	t.Parallel()
 
@@ -53,6 +54,7 @@ func TestListFiltersByStatus(t *testing.T) {
 	}
 }
 
+// TestListFiltersByCreatedBy verifies list filters threads by creator.
 func TestListFiltersByCreatedBy(t *testing.T) {
 	t.Parallel()
 
@@ -88,6 +90,7 @@ func TestListFiltersByCreatedBy(t *testing.T) {
 	}
 }
 
+// TestListFiltersByAssignedTo verifies list filters threads by assignee.
 func TestListFiltersByAssignedTo(t *testing.T) {
 	t.Parallel()
 
@@ -126,6 +129,7 @@ func TestListFiltersByAssignedTo(t *testing.T) {
 	}
 }
 
+// TestListRespectsLimit verifies list returns only the most recent rows up to the requested limit.
 func TestListRespectsLimit(t *testing.T) {
 	t.Parallel()
 
@@ -180,4 +184,3 @@ func createThreadForList(t *testing.T, dbPath, from, to, subject, summary string
 	mustDecodeJSON(t, sendOut, &sendResp)
 	return nestedString(t, sendResp, "data", "thread", "thread_id")
 }
-
diff --git a/packages/inbox-runtime/internal/cli/inbox/renew_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/renew_integration_test.go
index 05ed1ee..8c4b642 100644
--- a/packages/inbox-runtime/internal/cli/inbox/renew_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/renew_integration_test.go
@@ -2,6 +2,7 @@ package inbox
 
 import "testing"
 
+// TestRenewExtendsActiveLease verifies renew extends active lease.
 func TestRenewExtendsActiveLease(t *testing.T) {
 	t.Parallel()
 
@@ -53,6 +54,7 @@ func TestRenewExtendsActiveLease(t *testing.T) {
 	}
 }
 
+// TestRenewRejectsNonOwner verifies renew rejects non owner.
 func TestRenewRejectsNonOwner(t *testing.T) {
 	t.Parallel()
 
@@ -82,6 +84,7 @@ func TestRenewRejectsNonOwner(t *testing.T) {
 	assertErrorJSON(t, stdout, "lease_conflict")
 }
 
+// TestRenewRejectsWithoutActiveLease verifies renew rejects without active lease.
 func TestRenewRejectsWithoutActiveLease(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/reply_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/reply_integration_test.go
index 3f68a62..fd31005 100644
--- a/packages/inbox-runtime/internal/cli/inbox/reply_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/reply_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestReplyAddsAnswerMessage verifies reply appends an answer message to the thread.
 func TestReplyAddsAnswerMessage(t *testing.T) {
 	t.Parallel()
 
@@ -37,6 +38,7 @@ func TestReplyAddsAnswerMessage(t *testing.T) {
 	}
 }
 
+// TestReplySupportsControlKind verifies reply accepts control messages in addition to answers.
 func TestReplySupportsControlKind(t *testing.T) {
 	t.Parallel()
 
@@ -63,6 +65,7 @@ func TestReplySupportsControlKind(t *testing.T) {
 	}
 }
 
+// TestReplyAttachesArtifact verifies reply persists an attached artifact on the reply message.
 func TestReplyAttachesArtifact(t *testing.T) {
 	t.Parallel()
 
@@ -115,6 +118,7 @@ func TestReplyAttachesArtifact(t *testing.T) {
 	}
 }
 
+// TestReplyRejectsInvalidPayloadJSON verifies reply returns invalid_input for malformed payload JSON.
 func TestReplyRejectsInvalidPayloadJSON(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/send_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/send_integration_test.go
index ef167e4..81369c3 100644
--- a/packages/inbox-runtime/internal/cli/inbox/send_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/send_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestSendCreatesNewThread verifies send creates a pending thread with the initial task message.
 func TestSendCreatesNewThread(t *testing.T) {
 	t.Parallel()
 
@@ -45,6 +46,7 @@ func TestSendCreatesNewThread(t *testing.T) {
 	}
 }
 
+// TestSendAppendsMessageToExistingThread verifies send appends a message without creating a new thread.
 func TestSendAppendsMessageToExistingThread(t *testing.T) {
 	t.Parallel()
 
@@ -88,6 +90,7 @@ func TestSendAppendsMessageToExistingThread(t *testing.T) {
 	}
 }
 
+// TestSendReadsBodyFromBodyFile verifies send loads the message body from a body file.
 func TestSendReadsBodyFromBodyFile(t *testing.T) {
 	t.Parallel()
 
@@ -139,6 +142,7 @@ func TestSendReadsBodyFromBodyFile(t *testing.T) {
 	}
 }
 
+// TestSendAttachesArtifactWithMetadata verifies send persists an artifact and its metadata on the message.
 func TestSendAttachesArtifactWithMetadata(t *testing.T) {
 	t.Parallel()
 
@@ -190,6 +194,7 @@ func TestSendAttachesArtifactWithMetadata(t *testing.T) {
 	}
 }
 
+// TestSendRejectsInvalidPayloadJSON verifies send returns invalid_input for malformed payload JSON.
 func TestSendRejectsInvalidPayloadJSON(t *testing.T) {
 	t.Parallel()
 
@@ -209,6 +214,7 @@ func TestSendRejectsInvalidPayloadJSON(t *testing.T) {
 	assertErrorJSON(t, stdout, "invalid_input")
 }
 
+// TestSendRejectsInvalidArtifactMetadataJSON verifies send returns invalid_input for malformed artifact metadata JSON.
 func TestSendRejectsInvalidArtifactMetadataJSON(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/show_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/show_integration_test.go
index f22868c..1ad7b9d 100644
--- a/packages/inbox-runtime/internal/cli/inbox/show_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/show_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestShowReturnsThreadAndMessageHistory verifies show returns the thread with ordered message history.
 func TestShowReturnsThreadAndMessageHistory(t *testing.T) {
 	t.Parallel()
 
@@ -65,6 +66,7 @@ func TestShowReturnsThreadAndMessageHistory(t *testing.T) {
 	}
 }
 
+// TestShowIncludesArtifactsPerMessage verifies show includes message artifacts in the thread payload.
 func TestShowIncludesArtifactsPerMessage(t *testing.T) {
 	t.Parallel()
 
@@ -122,6 +124,7 @@ func TestShowIncludesArtifactsPerMessage(t *testing.T) {
 	}
 }
 
+// TestShowMarkReadAdvancesReadCursor verifies show --mark-read clears the thread from unread fetch results.
 func TestShowMarkReadAdvancesReadCursor(t *testing.T) {
 	t.Parallel()
 
@@ -176,6 +179,7 @@ func TestShowMarkReadAdvancesReadCursor(t *testing.T) {
 	assertErrorJSON(t, stdout, "no_matching_work")
 }
 
+// TestShowRejectsWhenThreadMissing verifies show returns not_found for an unknown thread.
 func TestShowRejectsWhenThreadMissing(t *testing.T) {
 	t.Parallel()
 
@@ -193,4 +197,3 @@ func TestShowRejectsWhenThreadMissing(t *testing.T) {
 	}
 	assertErrorJSON(t, stdout, "not_found")
 }
-
diff --git a/packages/inbox-runtime/internal/cli/inbox/update_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/update_integration_test.go
index b0185b8..b1df7d2 100644
--- a/packages/inbox-runtime/internal/cli/inbox/update_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/update_integration_test.go
@@ -59,6 +59,7 @@ func lastThreadMessageFromShow(t *testing.T, showResp map[string]any) map[string
 	return lastMessage
 }
 
+// TestUpdateMovesThreadToInProgress verifies update moves thread to in progress.
 func TestUpdateMovesThreadToInProgress(t *testing.T) {
 	t.Parallel()
 
@@ -90,6 +91,7 @@ func TestUpdateMovesThreadToInProgress(t *testing.T) {
 	}
 }
 
+// TestUpdateMovesThreadToBlockedWithPayload verifies update moves thread to blocked with payload.
 func TestUpdateMovesThreadToBlockedWithPayload(t *testing.T) {
 	t.Parallel()
 
@@ -125,6 +127,7 @@ func TestUpdateMovesThreadToBlockedWithPayload(t *testing.T) {
 	}
 }
 
+// TestUpdateAcceptsBodyFileAndArtifact verifies update accepts body file and artifact.
 func TestUpdateAcceptsBodyFileAndArtifact(t *testing.T) {
 	t.Parallel()
 
@@ -182,6 +185,7 @@ func TestUpdateAcceptsBodyFileAndArtifact(t *testing.T) {
 	}
 }
 
+// TestUpdateRejectsInvalidPayloadJSON verifies update rejects invalid payload JSON.
 func TestUpdateRejectsInvalidPayloadJSON(t *testing.T) {
 	t.Parallel()
 
@@ -204,6 +208,7 @@ func TestUpdateRejectsInvalidPayloadJSON(t *testing.T) {
 	assertErrorJSON(t, stdout, "invalid_input")
 }
 
+// TestUpdateRejectsNonOwner verifies update rejects non owner.
 func TestUpdateRejectsNonOwner(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/inbox-runtime/internal/cli/inbox/wait_reply_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/wait_reply_integration_test.go
index a2b31a1..efc5aea 100644
--- a/packages/inbox-runtime/internal/cli/inbox/wait_reply_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/wait_reply_integration_test.go
@@ -13,6 +13,7 @@ type waitReplyCommandResult struct {
 	exit   int
 }
 
+// TestWaitReplyWakesOnAnswerAfterMessage verifies wait reply wakes on answer after message.
 func TestWaitReplyWakesOnAnswerAfterMessage(t *testing.T) {
 	t.Parallel()
 
@@ -67,6 +68,7 @@ func TestWaitReplyWakesOnAnswerAfterMessage(t *testing.T) {
 	}
 }
 
+// TestWaitReplyCanStartFromAfterEvent verifies wait reply can start from after event.
 func TestWaitReplyCanStartFromAfterEvent(t *testing.T) {
 	t.Parallel()
 
@@ -155,6 +157,7 @@ func TestWaitReplyCanStartFromAfterEvent(t *testing.T) {
 	}
 }
 
+// TestWaitReplyTimesOutWhenNoReply verifies wait reply times out when no reply.
 func TestWaitReplyTimesOutWhenNoReply(t *testing.T) {
 	t.Parallel()
 
@@ -218,4 +221,3 @@ func seedBlockedThreadForWaitReply(t *testing.T, dbPath string) (threadID string
 	blockedMessageID = nestedString(t, blockedResp, "data", "message", "message_id")
 	return threadID, blockedMessageID
 }
-
diff --git a/packages/inbox-runtime/internal/cli/inbox/watch_integration_test.go b/packages/inbox-runtime/internal/cli/inbox/watch_integration_test.go
index 8d87eb5..ceecf38 100644
--- a/packages/inbox-runtime/internal/cli/inbox/watch_integration_test.go
+++ b/packages/inbox-runtime/internal/cli/inbox/watch_integration_test.go
@@ -12,6 +12,7 @@ type watchCommandResult struct {
 	exit   int
 }
 
+// TestWatchWakesOnMatchingThread verifies watch wakes on matching thread.
 func TestWatchWakesOnMatchingThread(t *testing.T) {
 	t.Parallel()
 
@@ -78,6 +79,7 @@ func TestWatchWakesOnMatchingThread(t *testing.T) {
 	}
 }
 
+// TestWatchRespectsStatusFilter verifies watch respects status filter.
 func TestWatchRespectsStatusFilter(t *testing.T) {
 	t.Parallel()
 
@@ -149,6 +151,7 @@ func TestWatchRespectsStatusFilter(t *testing.T) {
 	}
 }
 
+// TestWatchTimesOutWithNoActivity verifies watch times out with no activity.
 func TestWatchTimesOutWithNoActivity(t *testing.T) {
 	t.Parallel()
 
@@ -168,4 +171,3 @@ func TestWatchTimesOutWithNoActivity(t *testing.T) {
 	}
 	assertErrorJSON(t, stdout, "no_matching_work")
 }
-
diff --git a/packages/operator-api/internal/httpapi/router_test.go b/packages/operator-api/internal/httpapi/router_test.go
index a968d85..57e9422 100644
--- a/packages/operator-api/internal/httpapi/router_test.go
+++ b/packages/operator-api/internal/httpapi/router_test.go
@@ -9,11 +9,12 @@ import (
 	"testing"
 	"time"
 
-	"ai-workflow-skill/packages/operator-api/internal/app"
 	dbpkg "ai-workflow-skill/packages/coord-core/db"
 	"ai-workflow-skill/packages/coord-core/store"
+	"ai-workflow-skill/packages/operator-api/internal/app"
 )
 
+// TestRouterExposesReadOnlyWebEndpoints verifies the read-only web endpoints return seeded coordination data.
 func TestRouterExposesReadOnlyWebEndpoints(t *testing.T) {
 	t.Parallel()
 
@@ -107,6 +108,7 @@ func TestRouterExposesReadOnlyWebEndpoints(t *testing.T) {
 	assertStatusAndJSONField(t, handler, "/api/threads/"+dispatch.Attempt.ThreadID, http.StatusOK, []string{"thread", "thread", "thread_id"}, dispatch.Attempt.ThreadID)
 }
 
+// TestRouterMapsNotFoundErrors verifies missing resources map to a not_found API error.
 func TestRouterMapsNotFoundErrors(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go b/packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go
index d3099ce..57f0b1b 100644
--- a/packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go
+++ b/packages/orch-runtime/internal/cli/orch/command_contracts_core_test.go
@@ -7,6 +7,7 @@ import (
 	"testing"
 )
 
+// TestOrchRunShowReturnsRunSummaryAndTaskCounts verifies orch run show returns run summary and task counts.
 func TestOrchRunShowReturnsRunSummaryAndTaskCounts(t *testing.T) {
 	t.Parallel()
 
@@ -67,6 +68,7 @@ func TestOrchRunShowReturnsRunSummaryAndTaskCounts(t *testing.T) {
 	}
 }
 
+// TestOrchRunShowRejectsMissingRun verifies orch run show rejects missing run.
 func TestOrchRunShowRejectsMissingRun(t *testing.T) {
 	t.Parallel()
 
@@ -84,6 +86,7 @@ func TestOrchRunShowRejectsMissingRun(t *testing.T) {
 	assertErrorJSON(t, stdout, "not_found")
 }
 
+// TestOrchTaskAddRejectsInvalidAcceptanceJSON verifies orch task add rejects invalid acceptance JSON.
 func TestOrchTaskAddRejectsInvalidAcceptanceJSON(t *testing.T) {
 	t.Parallel()
 
@@ -114,6 +117,7 @@ func TestOrchTaskAddRejectsInvalidAcceptanceJSON(t *testing.T) {
 	assertErrorMessageContains(t, stdout, "acceptance-json must be valid JSON")
 }
 
+// TestOrchTaskAddRejectsInvalidPriority verifies orch task add rejects invalid priority.
 func TestOrchTaskAddRejectsInvalidPriority(t *testing.T) {
 	t.Parallel()
 
@@ -144,6 +148,7 @@ func TestOrchTaskAddRejectsInvalidPriority(t *testing.T) {
 	assertErrorMessageContains(t, stdout, "priority must be one of low, normal, high")
 }
 
+// TestOrchTaskAddSnapshotsSpecAndVerificationPolicy verifies orch task add snapshots spec and verification policy.
 func TestOrchTaskAddSnapshotsSpecAndVerificationPolicy(t *testing.T) {
 	t.Parallel()
 
@@ -200,6 +205,7 @@ func TestOrchTaskAddSnapshotsSpecAndVerificationPolicy(t *testing.T) {
 	}
 }
 
+// TestOrchTaskAddRejectsSpecSHAMismatch verifies orch task add rejects spec SHA mismatch.
 func TestOrchTaskAddRejectsSpecSHAMismatch(t *testing.T) {
 	t.Parallel()
 
@@ -236,6 +242,7 @@ func TestOrchTaskAddRejectsSpecSHAMismatch(t *testing.T) {
 	assertErrorMessageContains(t, stdout, "spec-sha does not match spec-file contents")
 }
 
+// TestOrchReadyOrdersByPriorityAndRespectsLimit verifies orch ready orders by priority and respects limit.
 func TestOrchReadyOrdersByPriorityAndRespectsLimit(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/orch-runtime/internal/cli/orch/command_contracts_edges_test.go b/packages/orch-runtime/internal/cli/orch/command_contracts_edges_test.go
index 642e4ae..7aa989e 100644
--- a/packages/orch-runtime/internal/cli/orch/command_contracts_edges_test.go
+++ b/packages/orch-runtime/internal/cli/orch/command_contracts_edges_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestOrchAnswerAcceptsPayloadJSONWithoutBody verifies orch answer accepts payload JSON without body.
 func TestOrchAnswerAcceptsPayloadJSONWithoutBody(t *testing.T) {
 	t.Parallel()
 
@@ -71,6 +72,7 @@ func TestOrchAnswerAcceptsPayloadJSONWithoutBody(t *testing.T) {
 	}
 }
 
+// TestOrchAnswerRejectsEmptyBodyAndPayload verifies orch answer rejects empty body and payload.
 func TestOrchAnswerRejectsEmptyBodyAndPayload(t *testing.T) {
 	t.Parallel()
 
@@ -90,6 +92,7 @@ func TestOrchAnswerRejectsEmptyBodyAndPayload(t *testing.T) {
 	assertErrorJSON(t, stdout, "invalid_input")
 }
 
+// TestOrchCleanupRejectsAttemptWithoutTask verifies orch cleanup rejects attempt without task.
 func TestOrchCleanupRejectsAttemptWithoutTask(t *testing.T) {
 	t.Parallel()
 
@@ -117,6 +120,7 @@ func TestOrchCleanupRejectsAttemptWithoutTask(t *testing.T) {
 	assertErrorJSON(t, stdout, "invalid_input")
 }
 
+// TestOrchCleanupReturnsNoMatchingWorkWhenFiltersMiss verifies orch cleanup returns no matching work when filters miss.
 func TestOrchCleanupReturnsNoMatchingWorkWhenFiltersMiss(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go b/packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go
index d2d3bec..0498b49 100644
--- a/packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go
+++ b/packages/orch-runtime/internal/cli/orch/command_contracts_remaining_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestOrchRunInitCreatesNewRun verifies `run init` persists a new run that `run show` can read back.
 func TestOrchRunInitCreatesNewRun(t *testing.T) {
 	t.Parallel()
 
@@ -56,6 +57,7 @@ func TestOrchRunInitCreatesNewRun(t *testing.T) {
 	}
 }
 
+// TestOrchDispatchCreatesAttemptAndThreadForReadyTask verifies `dispatch` creates the first attempt, thread, and task message for a ready task.
 func TestOrchDispatchCreatesAttemptAndThreadForReadyTask(t *testing.T) {
 	t.Parallel()
 
@@ -116,6 +118,7 @@ func TestOrchDispatchCreatesAttemptAndThreadForReadyTask(t *testing.T) {
 	}
 }
 
+// TestOrchBlockedListsLatestQuestionForBlockedTask verifies `blocked` returns the latest blocked question for a task after reconcile.
 func TestOrchBlockedListsLatestQuestionForBlockedTask(t *testing.T) {
 	t.Parallel()
 
@@ -277,6 +280,7 @@ func TestOrchBlockedListsLatestQuestionForBlockedTask(t *testing.T) {
 	}
 }
 
+// TestOrchStatusReturnsRunSummaryAndTaskList verifies `status` returns the run summary together with the current task list.
 func TestOrchStatusReturnsRunSummaryAndTaskList(t *testing.T) {
 	t.Parallel()
 
@@ -393,6 +397,7 @@ func TestOrchStatusReturnsRunSummaryAndTaskList(t *testing.T) {
 	}
 }
 
+// TestOrchStatusAutoReconcilesAndIncludesBlockedContext verifies `status` reconciles inbox state first and includes blocked context.
 func TestOrchStatusAutoReconcilesAndIncludesBlockedContext(t *testing.T) {
 	t.Parallel()
 
@@ -496,6 +501,7 @@ func TestOrchStatusAutoReconcilesAndIncludesBlockedContext(t *testing.T) {
 	}
 }
 
+// TestOrchReconcileMapsFailedThreadToTerminalTaskState verifies `reconcile` maps a failed inbox thread into the terminal task state.
 func TestOrchReconcileMapsFailedThreadToTerminalTaskState(t *testing.T) {
 	t.Parallel()
 
@@ -601,6 +607,7 @@ func TestOrchReconcileMapsFailedThreadToTerminalTaskState(t *testing.T) {
 	}
 }
 
+// TestOrchWorkflowCodeModeDispatchToCleanup verifies the code-mode workflow runs from dispatch through cleanup.
 func TestOrchWorkflowCodeModeDispatchToCleanup(t *testing.T) {
 	t.Parallel()
 
@@ -703,6 +710,7 @@ func TestOrchWorkflowCodeModeDispatchToCleanup(t *testing.T) {
 	}
 }
 
+// TestOrchWorkflowCouncilReviewEndToEnd verifies the council review workflow completes end to end.
 func TestOrchWorkflowCouncilReviewEndToEnd(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/orch-runtime/internal/cli/orch/council_report_contracts_test.go b/packages/orch-runtime/internal/cli/orch/council_report_contracts_test.go
index 5114b5c..ec8d97c 100644
--- a/packages/orch-runtime/internal/cli/orch/council_report_contracts_test.go
+++ b/packages/orch-runtime/internal/cli/orch/council_report_contracts_test.go
@@ -7,6 +7,7 @@ import (
 	"testing"
 )
 
+// TestOrchCouncilReportRejectsBeforeTally verifies orch council report rejects before tally.
 func TestOrchCouncilReportRejectsBeforeTally(t *testing.T) {
 	t.Parallel()
 
@@ -38,6 +39,7 @@ func TestOrchCouncilReportRejectsBeforeTally(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilReportRejectsInvalidShow verifies orch council report rejects invalid show.
 func TestOrchCouncilReportRejectsInvalidShow(t *testing.T) {
 	t.Parallel()
 
@@ -65,6 +67,7 @@ func TestOrchCouncilReportRejectsInvalidShow(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilReportDefaultsToConsensusForOnlyUnanimousRun verifies orch council report defaults to consensus for only unanimous run.
 func TestOrchCouncilReportDefaultsToConsensusForOnlyUnanimousRun(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/orch-runtime/internal/cli/orch/help_contracts_test.go b/packages/orch-runtime/internal/cli/orch/help_contracts_test.go
index c6d0d2b..fc9a41b 100644
--- a/packages/orch-runtime/internal/cli/orch/help_contracts_test.go
+++ b/packages/orch-runtime/internal/cli/orch/help_contracts_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestOrchRootHelpExplainsLeaderWorkflow verifies orch root help explains leader workflow.
 func TestOrchRootHelpExplainsLeaderWorkflow(t *testing.T) {
 	t.Parallel()
 
@@ -25,6 +26,7 @@ func TestOrchRootHelpExplainsLeaderWorkflow(t *testing.T) {
 	}
 }
 
+// TestOrchDispatchHelpExplainsExecutionModes verifies orch dispatch help explains execution modes.
 func TestOrchDispatchHelpExplainsExecutionModes(t *testing.T) {
 	t.Parallel()
 
@@ -45,6 +47,7 @@ func TestOrchDispatchHelpExplainsExecutionModes(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilStartHelpExplainsWorkflow verifies orch council start help explains workflow.
 func TestOrchCouncilStartHelpExplainsWorkflow(t *testing.T) {
 	t.Parallel()
 
@@ -62,6 +65,7 @@ func TestOrchCouncilStartHelpExplainsWorkflow(t *testing.T) {
 	}
 }
 
+// TestOrchStatusHelpExplainsDashboardRole verifies orch status help explains dashboard role.
 func TestOrchStatusHelpExplainsDashboardRole(t *testing.T) {
 	t.Parallel()
 
@@ -79,6 +83,7 @@ func TestOrchStatusHelpExplainsDashboardRole(t *testing.T) {
 	}
 }
 
+// TestOrchCleanupHelpExplainsScopeFlags verifies orch cleanup help explains scope flags.
 func TestOrchCleanupHelpExplainsScopeFlags(t *testing.T) {
 	t.Parallel()
 
@@ -99,6 +104,7 @@ func TestOrchCleanupHelpExplainsScopeFlags(t *testing.T) {
 	}
 }
 
+// TestOrchVerifyHelpExplainsGateWorkflow verifies orch verify help explains gate workflow.
 func TestOrchVerifyHelpExplainsGateWorkflow(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/orch-runtime/internal/cli/orch/integration_test.go b/packages/orch-runtime/internal/cli/orch/integration_test.go
index 721cce2..db02210 100644
--- a/packages/orch-runtime/internal/cli/orch/integration_test.go
+++ b/packages/orch-runtime/internal/cli/orch/integration_test.go
@@ -10,6 +10,7 @@ import (
 	"time"
 )
 
+// TestOrchRunDispatchReconcileLifecycle verifies a task moves from ready to done through dispatch and reconcile.
 func TestOrchRunDispatchReconcileLifecycle(t *testing.T) {
 	t.Parallel()
 
@@ -183,6 +184,7 @@ func TestOrchRunDispatchReconcileLifecycle(t *testing.T) {
 	}
 }
 
+// TestOrchVerificationGateLifecycle verifies required checks keep a task gated until every required result passes.
 func TestOrchVerificationGateLifecycle(t *testing.T) {
 	t.Parallel()
 
@@ -360,6 +362,7 @@ func TestOrchVerificationGateLifecycle(t *testing.T) {
 	}
 }
 
+// TestOrchDependencyBlockedAndAnswerFlow verifies blocked dependency work resumes after the leader answers the worker question.
 func TestOrchDependencyBlockedAndAnswerFlow(t *testing.T) {
 	t.Parallel()
 
@@ -636,6 +639,7 @@ func TestOrchDependencyBlockedAndAnswerFlow(t *testing.T) {
 	}
 }
 
+// TestOrchDispatchRejectsNonReadyTask verifies `dispatch` refuses tasks that are not ready to run.
 func TestOrchDispatchRejectsNonReadyTask(t *testing.T) {
 	t.Parallel()
 
@@ -691,6 +695,7 @@ func TestOrchDispatchRejectsNonReadyTask(t *testing.T) {
 	assertErrorJSON(t, stdout, "invalid_state")
 }
 
+// TestOrchDispatchCodeModeCreatesWorktree verifies code-mode dispatch allocates and records a worktree for the attempt.
 func TestOrchDispatchCodeModeCreatesWorktree(t *testing.T) {
 	t.Parallel()
 
@@ -810,6 +815,7 @@ func TestOrchDispatchCodeModeCreatesWorktree(t *testing.T) {
 	}
 }
 
+// TestOrchDispatchCodeModeRejectsDirtyRepoWithoutBaseRef verifies code-mode dispatch rejects a dirty repo when no base ref is provided.
 func TestOrchDispatchCodeModeRejectsDirtyRepoWithoutBaseRef(t *testing.T) {
 	t.Parallel()
 
@@ -859,6 +865,7 @@ func TestOrchDispatchCodeModeRejectsDirtyRepoWithoutBaseRef(t *testing.T) {
 	}
 }
 
+// TestOrchDispatchCodeModeAllowsExplicitBaseRefOnDirtyRepo verifies code-mode dispatch accepts a dirty repo when an explicit base ref is set.
 func TestOrchDispatchCodeModeAllowsExplicitBaseRefOnDirtyRepo(t *testing.T) {
 	t.Parallel()
 
@@ -912,6 +919,7 @@ func TestOrchDispatchCodeModeAllowsExplicitBaseRefOnDirtyRepo(t *testing.T) {
 	}
 }
 
+// TestOrchDispatchRequiresExplicitExecutionMode verifies `dispatch` requires an explicit execution mode.
 func TestOrchDispatchRequiresExplicitExecutionMode(t *testing.T) {
 	dbPath := filepath.Join(t.TempDir(), "coord.db")
 
@@ -947,6 +955,7 @@ func TestOrchDispatchRequiresExplicitExecutionMode(t *testing.T) {
 	assertErrorJSON(t, stdout, "invalid_input")
 }
 
+// TestOrchDispatchAnalysisModeSkipsWorktree verifies analysis-mode dispatch skips worktree creation.
 func TestOrchDispatchAnalysisModeSkipsWorktree(t *testing.T) {
 	t.Parallel()
 
@@ -999,6 +1008,7 @@ func TestOrchDispatchAnalysisModeSkipsWorktree(t *testing.T) {
 	}
 }
 
+// TestOrchWaitWakesOnBlockedEvent verifies `wait` wakes when a blocked event is emitted for the run.
 func TestOrchWaitWakesOnBlockedEvent(t *testing.T) {
 	t.Parallel()
 
@@ -1115,6 +1125,7 @@ func TestOrchWaitWakesOnBlockedEvent(t *testing.T) {
 	}
 }
 
+// TestOrchWaitTimesOutWithoutMatchingEvent verifies `wait` times out when no matching run event arrives.
 func TestOrchWaitTimesOutWithoutMatchingEvent(t *testing.T) {
 	t.Parallel()
 
@@ -1152,6 +1163,7 @@ func TestOrchWaitTimesOutWithoutMatchingEvent(t *testing.T) {
 	}
 }
 
+// TestOrchWaitDefaultsWakeOnVerifyingEvent verifies `wait` wakes on verifying events when no explicit event filter is set.
 func TestOrchWaitDefaultsWakeOnVerifyingEvent(t *testing.T) {
 	t.Parallel()
 
@@ -1277,6 +1289,7 @@ func TestOrchWaitDefaultsWakeOnVerifyingEvent(t *testing.T) {
 	}
 }
 
+// TestOrchRetryCreatesNewAttempt verifies `retry` creates a new attempt after a failed task.
 func TestOrchRetryCreatesNewAttempt(t *testing.T) {
 	t.Parallel()
 
@@ -1375,6 +1388,7 @@ func TestOrchRetryCreatesNewAttempt(t *testing.T) {
 	}
 }
 
+// TestOrchReassignCancelsOldThreadAndDispatchesNewAttempt verifies `reassign` cancels the old thread and dispatches a replacement attempt.
 func TestOrchReassignCancelsOldThreadAndDispatchesNewAttempt(t *testing.T) {
 	t.Parallel()
 
@@ -1482,6 +1496,7 @@ func TestOrchReassignCancelsOldThreadAndDispatchesNewAttempt(t *testing.T) {
 	}
 }
 
+// TestOrchCancelTaskAndRun verifies `cancel` can stop both a single task and the whole run.
 func TestOrchCancelTaskAndRun(t *testing.T) {
 	t.Parallel()
 
@@ -1612,6 +1627,7 @@ func TestOrchCancelTaskAndRun(t *testing.T) {
 	}
 }
 
+// TestOrchCleanupRemovesCompletedWorktree verifies `cleanup` removes the worktree for a completed attempt.
 func TestOrchCleanupRemovesCompletedWorktree(t *testing.T) {
 	t.Parallel()
 
@@ -1699,6 +1715,7 @@ func TestOrchCleanupRemovesCompletedWorktree(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilStartDispatchesThreeReviewers verifies `council start` dispatches three reviewer tasks.
 func TestOrchCouncilStartDispatchesThreeReviewers(t *testing.T) {
 	t.Parallel()
 
@@ -1842,6 +1859,7 @@ func TestOrchCouncilStartDispatchesThreeReviewers(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilWaitWakesWhenAllReviewersComplete verifies `council wait` wakes once all reviewers finish.
 func TestOrchCouncilWaitWakesWhenAllReviewersComplete(t *testing.T) {
 	t.Parallel()
 
@@ -1975,6 +1993,7 @@ func TestOrchCouncilWaitWakesWhenAllReviewersComplete(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilWaitTimesOutWhenReviewersIncomplete verifies `council wait` times out while reviewers are still incomplete.
 func TestOrchCouncilWaitTimesOutWhenReviewersIncomplete(t *testing.T) {
 	t.Parallel()
 
@@ -2012,6 +2031,7 @@ func TestOrchCouncilWaitTimesOutWhenReviewersIncomplete(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilTallyGroupsReviewerFindingsNormal verifies normal council tally groups compatible reviewer findings.
 func TestOrchCouncilTallyGroupsReviewerFindingsNormal(t *testing.T) {
 	t.Parallel()
 
@@ -2110,6 +2130,7 @@ func TestOrchCouncilTallyGroupsReviewerFindingsNormal(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilTallyStrictKeepsDistinctProposals verifies strict council tally keeps distinct proposals separate.
 func TestOrchCouncilTallyStrictKeepsDistinctProposals(t *testing.T) {
 	t.Parallel()
 
@@ -2170,6 +2191,7 @@ func TestOrchCouncilTallyStrictKeepsDistinctProposals(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilReportDefaultShowsConsensusAndMajority verifies the default council report shows consensus and majority sections.
 func TestOrchCouncilReportDefaultShowsConsensusAndMajority(t *testing.T) {
 	t.Parallel()
 
@@ -2247,6 +2269,7 @@ func TestOrchCouncilReportDefaultShowsConsensusAndMajority(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilReportShowAllIncludesMinority verifies `council report --show all` includes minority findings.
 func TestOrchCouncilReportShowAllIncludesMinority(t *testing.T) {
 	t.Parallel()
 
@@ -2276,6 +2299,7 @@ func TestOrchCouncilReportShowAllIncludesMinority(t *testing.T) {
 	}
 }
 
+// TestOrchCouncilReportJSONShape verifies `council report` keeps its JSON response shape stable.
 func TestOrchCouncilReportJSONShape(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/add_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/add_integration_test.go
index 5e083bf..44a3792 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/add_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/add_integration_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestAddRegistersRepoAndEntry verifies add persists a new entry and registers its repo.
 func TestAddRegistersRepoAndEntry(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	initGitRepo(t, fixture.RepoPath)
@@ -42,6 +43,7 @@ func TestAddRegistersRepoAndEntry(t *testing.T) {
 	assertContains(t, listOut, "Plan 内嵌任务结构，不是独立表")
 }
 
+// TestAddUpdatesExistingEntryOnSameKindAndKey verifies add upserts an existing entry when kind and key match.
 func TestAddUpdatesExistingEntryOnSameKindAndKey(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -92,6 +94,7 @@ func TestAddUpdatesExistingEntryOnSameKindAndKey(t *testing.T) {
 	assertContains(t, listOut, "修订后的摘要")
 }
 
+// TestAddFailedValidationStillRegistersRepo verifies add still records the repo when validation fails.
 func TestAddFailedValidationStillRegistersRepo(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	initGitRepo(t, fixture.RepoPath)
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/events_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/events_integration_test.go
index bd0d99c..e875dc4 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/events_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/events_integration_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestEventsReadsHistoryByID verifies events returns newest-first history for an entry ID.
 func TestEventsReadsHistoryByID(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -43,6 +44,7 @@ func TestEventsReadsHistoryByID(t *testing.T) {
 	}
 }
 
+// TestEventsResolvesEntryByRepoKindKey verifies events resolves an entry from the repo-kind-key selector.
 func TestEventsResolvesEntryByRepoKindKey(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -70,6 +72,7 @@ func TestEventsResolvesEntryByRepoKindKey(t *testing.T) {
 	assertContains(t, eventsOut, "created (- -> confirmed)")
 }
 
+// TestEventsRejectsMissingEntrySelector verifies events requires either an ID or repo-kind-key selector.
 func TestEventsRejectsMissingEntrySelector(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/help_test.go b/packages/repo-memory-runtime/cmd/repo-memory/help_test.go
index 35915db..b453039 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/help_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/help_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestRepoMemoryRootHelpShowsWorkflowAndCommands verifies root help describes the workflow and available commands.
 func TestRepoMemoryRootHelpShowsWorkflowAndCommands(t *testing.T) {
 	t.Parallel()
 
@@ -25,6 +26,7 @@ func TestRepoMemoryRootHelpShowsWorkflowAndCommands(t *testing.T) {
 	}
 }
 
+// TestRepoMemoryCommandHelpWorksThroughHelpSubcommand verifies the help subcommand renders command-specific help.
 func TestRepoMemoryCommandHelpWorksThroughHelpSubcommand(t *testing.T) {
 	t.Parallel()
 
@@ -45,6 +47,7 @@ func TestRepoMemoryCommandHelpWorksThroughHelpSubcommand(t *testing.T) {
 	}
 }
 
+// TestRepoMemoryCommandHelpWorksWithDashHelp verifies --help renders command-specific help.
 func TestRepoMemoryCommandHelpWorksWithDashHelp(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/ingest_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/ingest_integration_test.go
index 02a2c7c..e2210cf 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/ingest_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/ingest_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestIngestImportsDocsAIMarkdown verifies ingest imports docs ai markdown.
 func TestIngestImportsDocsAIMarkdown(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -31,6 +32,7 @@ func TestIngestImportsDocsAIMarkdown(t *testing.T) {
 	mustContain(t, listOut, "danger:repo-memory:danger-zones [confirmed]")
 }
 
+// TestIngestRejectsWhenNoMarkdownFound verifies ingest rejects when no markdown found.
 func TestIngestRejectsWhenNoMarkdownFound(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -52,6 +54,7 @@ func TestIngestRejectsWhenNoMarkdownFound(t *testing.T) {
 	mustContain(t, stderr, "no markdown files found under "+filepath.Join(fixture.RepoPath, "docs", "ai"))
 }
 
+// TestIngestImportsHeadinglessMarkdownAsSingleEntry verifies ingest imports headingless markdown as single entry.
 func TestIngestImportsHeadinglessMarkdownAsSingleEntry(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/init_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/init_integration_test.go
index c1f5839..2674e67 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/init_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/init_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestInitCreatesSchemaOnEmptyDB verifies init creates a new database schema.
 func TestInitCreatesSchemaOnEmptyDB(t *testing.T) {
 	dbPath := filepath.Join(t.TempDir(), "repo-memory.db")
 
@@ -19,6 +20,7 @@ func TestInitCreatesSchemaOnEmptyDB(t *testing.T) {
 	}
 }
 
+// TestInitIsIdempotentOnExistingDB verifies init can be rerun on an existing database.
 func TestInitIsIdempotentOnExistingDB(t *testing.T) {
 	dbPath := filepath.Join(t.TempDir(), "repo-memory.db")
 
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/link_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/link_integration_test.go
index 908c6fd..d4278a6 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/link_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/link_integration_test.go
@@ -4,6 +4,7 @@ import (
 	"testing"
 )
 
+// TestLinkCreatesRelationBetweenEntries verifies link creates relation between entries.
 func TestLinkCreatesRelationBetweenEntries(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -43,6 +44,7 @@ func TestLinkCreatesRelationBetweenEntries(t *testing.T) {
 	}
 }
 
+// TestLinkRejectsMissingRelation verifies link rejects missing relation.
 func TestLinkRejectsMissingRelation(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -79,6 +81,7 @@ func TestLinkRejectsMissingRelation(t *testing.T) {
 	assertContains(t, stderr, "relation is required")
 }
 
+// TestLinkRejectsWhenEntryIDMissing verifies link rejects when entry ID missing.
 func TestLinkRejectsWhenEntryIDMissing(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/list_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/list_integration_test.go
index 4bbeca7..f1a9a05 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/list_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/list_integration_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestListFiltersByKindAndStatus verifies list filters by kind and status.
 func TestListFiltersByKindAndStatus(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -57,6 +58,7 @@ func TestListFiltersByKindAndStatus(t *testing.T) {
 	mustNotContain(t, listOut, "chain:ai-insight.get")
 }
 
+// TestListReturnsNoEntriesWhenEmpty verifies list returns no entries when empty.
 func TestListReturnsNoEntriesWhenEmpty(t *testing.T) {
 	fixture := initRepoMemoryTestDB(t)
 
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/main_test.go b/packages/repo-memory-runtime/cmd/repo-memory/main_test.go
index a7586d2..2b10101 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/main_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/main_test.go
@@ -9,6 +9,7 @@ import (
 	"ai-workflow-skill/packages/repo-memory-runtime/internal/store"
 )
 
+// TestVerifyCandidateDetectsFileChange verifies verify candidate detects file change.
 func TestVerifyCandidateDetectsFileChange(t *testing.T) {
 	t.Parallel()
 
@@ -47,6 +48,7 @@ func TestVerifyCandidateDetectsFileChange(t *testing.T) {
 	}
 }
 
+// TestVerifyCandidateMarksMissingDependencyStale verifies verify candidate marks missing dependency stale.
 func TestVerifyCandidateMarksMissingDependencyStale(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/repos_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/repos_integration_test.go
index 1524dc3..eece7f2 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/repos_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/repos_integration_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestReposListsTrackedRepositories verifies repos lists tracked repositories.
 func TestReposListsTrackedRepositories(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -40,6 +41,7 @@ func TestReposListsTrackedRepositories(t *testing.T) {
 	assertContains(t, reposOut, marsRepo+" (1 entries, updated ")
 }
 
+// TestReposPrintsNoReposWhenEmpty verifies repos prints no repos when empty.
 func TestReposPrintsNoReposWhenEmpty(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/search_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/search_integration_test.go
index db433c2..cfea4e6 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/search_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/search_integration_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 )
 
+// TestSearchReturnsMatchingEntrySnippet verifies search returns matching entry snippet.
 func TestSearchReturnsMatchingEntrySnippet(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -35,6 +36,7 @@ func TestSearchReturnsMatchingEntrySnippet(t *testing.T) {
 	mustContain(t, searchOut, "gateway")
 }
 
+// TestSearchMatchesAliasWithRepoFilter verifies search matches alias with repo filter.
 func TestSearchMatchesAliasWithRepoFilter(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -83,6 +85,7 @@ func TestSearchMatchesAliasWithRepoFilter(t *testing.T) {
 	mustNotContain(t, searchOut, "[mars-service]")
 }
 
+// TestSearchReturnsNoResultsWhenEmpty verifies search returns no results when empty.
 func TestSearchReturnsNoResultsWhenEmpty(t *testing.T) {
 	fixture := initRepoMemoryTestDB(t)
 
@@ -98,6 +101,7 @@ func TestSearchReturnsNoResultsWhenEmpty(t *testing.T) {
 	}
 }
 
+// TestSearchRejectsMissingQuery verifies search rejects missing query.
 func TestSearchRejectsMissingQuery(t *testing.T) {
 	fixture := initRepoMemoryTestDB(t)
 
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/verify_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/verify_integration_test.go
index baf5839..9e21307 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/verify_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/verify_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestVerifyDowngradesChangedFileDependency verifies verify downgrades changed file dependency.
 func TestVerifyDowngradesChangedFileDependency(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -51,6 +52,7 @@ func TestVerifyDowngradesChangedFileDependency(t *testing.T) {
 	assertContains(t, eventsOut, "downgraded (confirmed -> needs_review)")
 }
 
+// TestVerifyMarksMissingHardDependencyStale verifies verify marks missing hard dependency stale.
 func TestVerifyMarksMissingHardDependencyStale(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -92,6 +94,7 @@ func TestVerifyMarksMissingHardDependencyStale(t *testing.T) {
 	assertContains(t, eventsOut, "marked_stale (confirmed -> stale)")
 }
 
+// TestVerifyPrintsNoReposWhenEmpty verifies verify prints no repos when empty.
 func TestVerifyPrintsNoReposWhenEmpty(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -105,6 +108,7 @@ func TestVerifyPrintsNoReposWhenEmpty(t *testing.T) {
 	}
 }
 
+// TestVerifySkipsExplicitRepoWithoutGitHead verifies verify skips explicit repo without Git head.
 func TestVerifySkipsExplicitRepoWithoutGitHead(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
@@ -127,6 +131,7 @@ func TestVerifySkipsExplicitRepoWithoutGitHead(t *testing.T) {
 	}
 }
 
+// TestVerifyDowngradesEntryMissingVerifiedOnCommit verifies verify downgrades entry missing verified on commit.
 func TestVerifyDowngradesEntryMissingVerifiedOnCommit(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	runRepoMemoryCommand(t, "init", "--db", fixture.DBPath)
diff --git a/packages/repo-memory-runtime/cmd/repo-memory/workflow_integration_test.go b/packages/repo-memory-runtime/cmd/repo-memory/workflow_integration_test.go
index e018091..3d95ac0 100644
--- a/packages/repo-memory-runtime/cmd/repo-memory/workflow_integration_test.go
+++ b/packages/repo-memory-runtime/cmd/repo-memory/workflow_integration_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 )
 
+// TestWorkflowAddSearchEventsRoundtrip verifies workflow add search events roundtrip.
 func TestWorkflowAddSearchEventsRoundtrip(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	evidencePath := filepath.Join(fixture.RepoPath, "app", "app", "src", "main", "java", "foo", "AITask.java")
@@ -48,6 +49,7 @@ func TestWorkflowAddSearchEventsRoundtrip(t *testing.T) {
 	assertContains(t, eventsOut, "created")
 }
 
+// TestWorkflowIngestSearchListAcrossSections verifies workflow ingest search list across sections.
 func TestWorkflowIngestSearchListAcrossSections(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	docPath := filepath.Join(fixture.RepoPath, "docs", "ai", "repo-memory.md")
@@ -92,6 +94,7 @@ func TestWorkflowIngestSearchListAcrossSections(t *testing.T) {
 	assertContains(t, listOut, "danger:repo-memory:danger-zones [confirmed]")
 }
 
+// TestWorkflowAddLinkAndResolveRelatedEntry verifies workflow add link and resolve related entry.
 func TestWorkflowAddLinkAndResolveRelatedEntry(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	evidencePath := filepath.Join(fixture.RepoPath, "docs", "term.md")
@@ -148,6 +151,7 @@ func TestWorkflowAddLinkAndResolveRelatedEntry(t *testing.T) {
 	assertContains(t, eventsTwo, "chain:ai-insight.get [confirmed] #2")
 }
 
+// TestWorkflowVerifyDowngradesAfterRepoChange verifies workflow verify downgrades after repo change.
 func TestWorkflowVerifyDowngradesAfterRepoChange(t *testing.T) {
 	fixture := newRepoMemoryFixture(t)
 	evidencePath := filepath.Join(fixture.RepoPath, "foo.txt")
diff --git a/packages/repo-memory-runtime/internal/documents/load_test.go b/packages/repo-memory-runtime/internal/documents/load_test.go
index 1aaf519..1fbd059 100644
--- a/packages/repo-memory-runtime/internal/documents/load_test.go
+++ b/packages/repo-memory-runtime/internal/documents/load_test.go
@@ -7,6 +7,7 @@ import (
 	"testing"
 )
 
+// TestParseFile verifies ParseFile loads front matter and markdown sections.
 func TestParseFile(t *testing.T) {
 	t.Parallel()
 
diff --git a/packages/repo-memory-runtime/internal/store/store_test.go b/packages/repo-memory-runtime/internal/store/store_test.go
index e10c6fe..32034d1 100644
--- a/packages/repo-memory-runtime/internal/store/store_test.go
+++ b/packages/repo-memory-runtime/internal/store/store_test.go
@@ -8,6 +8,7 @@ import (
 	"ai-workflow-skill/packages/repo-memory-runtime/internal/documents"
 )
 
+// TestImportDocumentAndSearch verifies imported document sections are searchable.
 func TestImportDocumentAndSearch(t *testing.T) {
 	t.Parallel()
 
@@ -64,6 +65,7 @@ func TestImportDocumentAndSearch(t *testing.T) {
 	}
 }
 
+// TestUpsertEntryWithAliasesAndDependencies verifies upsert stores aliases, dependencies, events, and links.
 func TestUpsertEntryWithAliasesAndDependencies(t *testing.T) {
 	t.Parallel()
 
@@ -175,6 +177,7 @@ func TestUpsertEntryWithAliasesAndDependencies(t *testing.T) {
 	}
 }
 
+// TestApplyVerificationResult verifies verification can downgrade an entry and record the downgrade event.
 func TestApplyVerificationResult(t *testing.T) {
 	t.Parallel()