From 1938eb8f0788d212d1f1b1c40d934cd93ec42464 Mon Sep 17 00:00:00 2001 From: kurihada Date: Fri, 20 Mar 2026 13:01:16 +0800 Subject: [PATCH] refactor(monorepo): extract coord-core --- cmd/orchd/main.go | 2 +- docs/implementation-roadmap.md | 11 +- .../skill-workspace-monorepo-migration.md | 4 +- internal/app/web.go | 2 +- internal/cli/inbox/artifact.go | 4 +- internal/cli/inbox/body.go | 2 +- internal/cli/inbox/cancel.go | 4 +- internal/cli/inbox/claim.go | 4 +- internal/cli/inbox/db.go | 2 +- internal/cli/inbox/done.go | 4 +- internal/cli/inbox/execute.go | 4 +- internal/cli/inbox/fetch.go | 4 +- internal/cli/inbox/init.go | 2 +- internal/cli/inbox/list.go | 4 +- internal/cli/inbox/renew.go | 4 +- internal/cli/inbox/reply.go | 4 +- internal/cli/inbox/send.go | 4 +- internal/cli/inbox/show.go | 4 +- internal/cli/inbox/update.go | 4 +- internal/cli/inbox/wait_reply.go | 4 +- internal/cli/inbox/watch.go | 4 +- internal/cli/orch/answer.go | 4 +- internal/cli/orch/blocked.go | 4 +- internal/cli/orch/body.go | 2 +- internal/cli/orch/cancel.go | 4 +- internal/cli/orch/cleanup.go | 4 +- internal/cli/orch/council_report.go | 4 +- internal/cli/orch/council_start.go | 4 +- internal/cli/orch/council_tally.go | 4 +- internal/cli/orch/council_wait.go | 4 +- internal/cli/orch/db.go | 2 +- internal/cli/orch/dep.go | 4 +- internal/cli/orch/dispatch.go | 4 +- internal/cli/orch/execute.go | 4 +- internal/cli/orch/ready.go | 4 +- internal/cli/orch/reassign.go | 4 +- internal/cli/orch/reconcile.go | 4 +- internal/cli/orch/retry.go | 4 +- internal/cli/orch/run.go | 4 +- internal/cli/orch/status.go | 4 +- internal/cli/orch/task.go | 4 +- internal/cli/orch/wait.go | 4 +- internal/cli/orch/worktree.go | 4 +- internal/httpapi/response.go | 2 +- internal/httpapi/router.go | 2 +- internal/httpapi/router_test.go | 4 +- internal/query/read_service.go | 2 +- packages/coord-core/db/migrate.go | 50 + packages/coord-core/db/open.go | 38 + packages/coord-core/db/pragmas.go | 23 + packages/coord-core/db/schema/001_inbox.sql | 51 + packages/coord-core/db/schema/002_orch.sql | 52 + packages/coord-core/db/schema/003_events.sql | 18 + packages/coord-core/db/schema/004_council.sql | 45 + .../coord-core/db/schema/005_inbox_reads.sql | 12 + .../db/schema/006_council_inputs.sql | 9 + .../db/schema/007_council_reports.sql | 8 + packages/coord-core/go.mod | 5 + packages/coord-core/protocol/cli_error.go | 33 + packages/coord-core/protocol/json.go | 28 + packages/coord-core/store/council.go | 1503 ++++++++++ packages/coord-core/store/doc.go | 3 + packages/coord-core/store/inbox.go | 1932 ++++++++++++ packages/coord-core/store/inbox_test.go | 107 + packages/coord-core/store/orch.go | 2579 +++++++++++++++++ 65 files changed, 6586 insertions(+), 85 deletions(-) create mode 100644 packages/coord-core/db/migrate.go create mode 100644 packages/coord-core/db/open.go create mode 100644 packages/coord-core/db/pragmas.go create mode 100644 packages/coord-core/db/schema/001_inbox.sql create mode 100644 packages/coord-core/db/schema/002_orch.sql create mode 100644 packages/coord-core/db/schema/003_events.sql create mode 100644 packages/coord-core/db/schema/004_council.sql create mode 100644 packages/coord-core/db/schema/005_inbox_reads.sql create mode 100644 packages/coord-core/db/schema/006_council_inputs.sql create mode 100644 packages/coord-core/db/schema/007_council_reports.sql create mode 100644 packages/coord-core/protocol/cli_error.go create mode 100644 packages/coord-core/protocol/json.go create mode 100644 packages/coord-core/store/council.go create mode 100644 packages/coord-core/store/doc.go create mode 100644 packages/coord-core/store/inbox.go create mode 100644 packages/coord-core/store/inbox_test.go create mode 100644 packages/coord-core/store/orch.go diff --git a/cmd/orchd/main.go b/cmd/orchd/main.go index 71a4948..d305bb2 100644 --- a/cmd/orchd/main.go +++ b/cmd/orchd/main.go @@ -12,7 +12,7 @@ import ( "time" "ai-workflow-skill/internal/app" - "ai-workflow-skill/internal/db" + "ai-workflow-skill/packages/coord-core/db" "ai-workflow-skill/internal/httpapi" ) diff --git a/docs/implementation-roadmap.md b/docs/implementation-roadmap.md index 815d9da..9162fd4 100644 --- a/docs/implementation-roadmap.md +++ b/docs/implementation-roadmap.md @@ -38,6 +38,7 @@ As of now: - the first real Phase 2 read-only operator UI is now implemented in `apps/web`, including routed runs list, run detail, blocked queue, and thread timeline views backed by the existing `orchd` HTTP API, plus Tailwind v4 consumer wiring so the source-owned Cadence UI components render correctly in the app - a repository-level skill workspace monorepo migration plan now exists under `docs/skill-workspace-monorepo.md`, defining the target split between runtime packages under `packages/`, agent-facing skill bundles under `skills/`, support apps under `apps/`, and package-based skill packaging flows - the first migration phase for the skill workspace monorepo is now complete: root `go.work` exists, `pnpm-workspace.yaml` now discovers `packages/*`, empty runtime module roots now exist under `packages/`, and a declarative `scripts/skill-bundles.json` plus `scripts/package_skill_runtimes.sh` scaffold now define package-oriented skill bundle metadata from the repo root +- `packages/coord-core` now exists as the first real extracted runtime package, containing shared coordination DB/schema, protocol, and store code, and the active coordination runtimes now import `coord-core` instead of root `internal/db`, `internal/store`, and `internal/protocol` - a repo-local `scripts/package_skill_clis.sh` packaging flow now builds bundled skill CLI assets for `inbox`, `orch`, and `council-review` - `orch` now implements `run init/show`, `task add`, `dep add`, `ready`, `dispatch`, `reconcile`, `wait`, `blocked`, `answer`, `retry`, `reassign`, `cancel`, `cleanup`, and `status` - `orch` can create runs, gate tasks through dependencies, dispatch work through `inbox`, reconcile worker thread state back into task state, answer blocked tasks, retry or reassign work, cancel tasks or runs, clean attempt worktrees, and create per-attempt Git worktrees during strict dispatch @@ -495,10 +496,14 @@ Completed so far: - initial module roots now exist for `packages/coord-core`, `packages/inbox-runtime`, `packages/orch-runtime`, `packages/orchd-runtime`, and `packages/repo-memory-runtime` - `scripts/skill-bundles.json` now records the first package-oriented skill bundle metadata, including the future `repo-memory` runtime mapping - `scripts/package_skill_runtimes.sh` now provides a declarative bundle plan/validate scaffold that targets package paths rather than hardcoded root runtime paths +- `packages/coord-core/db` now owns the shared SQLite open, pragmas, migrations, and coordination schema files +- `packages/coord-core/protocol` now owns the shared JSON and CLI error helpers used across the coordination stack +- `packages/coord-core/store` now owns the shared inbox, orch, and council store logic plus its coordination-domain tests +- root coordination runtimes under `cmd/`, `internal/cli/`, `internal/app/`, `internal/httpapi/`, and `internal/query/` now import `coord-core` instead of depending on root `internal/db`, `internal/store`, or root `internal/protocol` +- `go test ./...` still passes for the root module, and `go test ./...` passes inside `packages/coord-core` Remaining: -- extract shared coordination code into `packages/coord-core` - extract `inbox`, `orch`, and `orchd` into package-owned runtimes - import `repo-memory` as its own runtime package and add the corresponding skill bundle - graduate the bundle scaffold into the primary packaging flow once package-owned runtime entrypoints exist @@ -509,12 +514,12 @@ If a new agent is taking over now, the next concrete step should be: 1. treat `Milestone 9: Web Product Phase 2 Read-Only Operator UI` as complete for the initial operator surface and do not expand web feature scope further until the workspace split is decided package-by-package 2. treat the Phase 1 workspace bootstrap for `Milestone 10` as complete and keep the new `go.work`, `packages/`, and declarative bundle metadata as the baseline for all further migration steps -3. extract the shared coordination kernel into `packages/coord-core` before moving `inbox`, `orch`, or `orchd` into package-owned runtimes +3. treat the shared coordination kernel extraction into `packages/coord-core` as complete and move `inbox` plus `orch` into package-owned runtimes next 4. keep the authored skill forward-test plans under `docs/tests/*-skill/` synchronized as runtime ownership moves from root paths to package paths 5. keep the legacy hardcoded packaging flow working temporarily, but evolve the new declarative bundle scaffold into the primary packaging path before adding `repo-memory` 6. import `repo-memory` only after the package-based runtime and skill packaging pattern exists -The inbox implementation and its human-readable test-plan set are already in place, `orch` supports the main scheduler loop plus the complete council start/wait/tally/report workflow, the web product now has its first real operator-facing read surfaces, and the repository has finished the first workspace-bootstrap phase of the skill monorepo migration, so the next step should be package extraction rather than continuing to accrete new root-owned runtimes. +The inbox implementation and its human-readable test-plan set are already in place, `orch` supports the main scheduler loop plus the complete council start/wait/tally/report workflow, the web product now has its first real operator-facing read surfaces, and the repository has completed both the workspace bootstrap and the shared coordination-kernel extraction phases of the skill monorepo migration, so the next step should be runtime extraction rather than continuing to accrete new root-owned runtimes. ## Recommended Driver Choices diff --git a/docs/roadmaps/active/skill-workspace-monorepo-migration.md b/docs/roadmaps/active/skill-workspace-monorepo-migration.md index 0c74ecd..1504744 100644 --- a/docs/roadmaps/active/skill-workspace-monorepo-migration.md +++ b/docs/roadmaps/active/skill-workspace-monorepo-migration.md @@ -26,7 +26,7 @@ - [x] create or adopt an active execution roadmap for the migration workstream - [x] Phase 1: bootstrap `go.work`, expanded workspace manifests, package roots, and declarative skill bundle metadata -- [ ] Phase 2: extract shared coordination code into `packages/coord-core` +- [x] Phase 2: extract shared coordination code into `packages/coord-core` - [ ] Phase 3: extract `inbox-runtime` and `orch-runtime` - [ ] Phase 4: extract `orchd-runtime` - [ ] Phase 5: import `repo-memory-runtime` and add `skills/repo-memory` @@ -54,4 +54,4 @@ ## Next Step -- start Phase 2 by extracting the shared coordination kernel into `packages/coord-core`, using the new workspace and bundle metadata scaffold as the stable base for subsequent package moves +- start Phase 3 by moving `inbox` and `orch` into package-owned runtimes on top of the now-shared `packages/coord-core` kernel diff --git a/internal/app/web.go b/internal/app/web.go index f8d66fa..d0ea287 100644 --- a/internal/app/web.go +++ b/internal/app/web.go @@ -5,7 +5,7 @@ import ( "database/sql" "ai-workflow-skill/internal/query" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/store" ) type WebService struct { diff --git a/internal/cli/inbox/artifact.go b/internal/cli/inbox/artifact.go index 5c886df..54a6eb0 100644 --- a/internal/cli/inbox/artifact.go +++ b/internal/cli/inbox/artifact.go @@ -3,8 +3,8 @@ package inbox import ( "strings" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/body.go b/internal/cli/inbox/body.go index d3bb85d..3dfe9f1 100644 --- a/internal/cli/inbox/body.go +++ b/internal/cli/inbox/body.go @@ -3,7 +3,7 @@ package inbox import ( "os" - "ai-workflow-skill/internal/protocol" + "ai-workflow-skill/packages/coord-core/protocol" ) func resolveBodyValue(body, bodyFile string) (string, error) { diff --git a/internal/cli/inbox/cancel.go b/internal/cli/inbox/cancel.go index 0e3ad82..c980f67 100644 --- a/internal/cli/inbox/cancel.go +++ b/internal/cli/inbox/cancel.go @@ -3,8 +3,8 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/claim.go b/internal/cli/inbox/claim.go index c43c87c..0ddb6d9 100644 --- a/internal/cli/inbox/claim.go +++ b/internal/cli/inbox/claim.go @@ -4,8 +4,8 @@ import ( "errors" "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/db.go b/internal/cli/inbox/db.go index a26d111..c187f8c 100644 --- a/internal/cli/inbox/db.go +++ b/internal/cli/inbox/db.go @@ -4,7 +4,7 @@ import ( "context" "database/sql" - "ai-workflow-skill/internal/db" + "ai-workflow-skill/packages/coord-core/db" ) func openInboxDB(ctx context.Context, dbPath string) (*sql.DB, error) { diff --git a/internal/cli/inbox/done.go b/internal/cli/inbox/done.go index 093aa5c..8ba49db 100644 --- a/internal/cli/inbox/done.go +++ b/internal/cli/inbox/done.go @@ -3,8 +3,8 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/execute.go b/internal/cli/inbox/execute.go index 88374a9..67e5561 100644 --- a/internal/cli/inbox/execute.go +++ b/internal/cli/inbox/execute.go @@ -6,8 +6,8 @@ import ( "io" "strings" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" ) func Execute(args []string, stdout, stderr io.Writer) int { diff --git a/internal/cli/inbox/fetch.go b/internal/cli/inbox/fetch.go index 98f5cd1..0564621 100644 --- a/internal/cli/inbox/fetch.go +++ b/internal/cli/inbox/fetch.go @@ -4,8 +4,8 @@ import ( "fmt" "strings" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/init.go b/internal/cli/inbox/init.go index da5ad71..2be90d4 100644 --- a/internal/cli/inbox/init.go +++ b/internal/cli/inbox/init.go @@ -3,7 +3,7 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" + "ai-workflow-skill/packages/coord-core/protocol" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/list.go b/internal/cli/inbox/list.go index 081c833..4c99f28 100644 --- a/internal/cli/inbox/list.go +++ b/internal/cli/inbox/list.go @@ -3,8 +3,8 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/renew.go b/internal/cli/inbox/renew.go index c0f9f69..735d1b0 100644 --- a/internal/cli/inbox/renew.go +++ b/internal/cli/inbox/renew.go @@ -3,8 +3,8 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/reply.go b/internal/cli/inbox/reply.go index f8b10c5..db1387a 100644 --- a/internal/cli/inbox/reply.go +++ b/internal/cli/inbox/reply.go @@ -3,8 +3,8 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/send.go b/internal/cli/inbox/send.go index e079039..cf46a03 100644 --- a/internal/cli/inbox/send.go +++ b/internal/cli/inbox/send.go @@ -3,8 +3,8 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/show.go b/internal/cli/inbox/show.go index 6171cff..f08dba0 100644 --- a/internal/cli/inbox/show.go +++ b/internal/cli/inbox/show.go @@ -3,8 +3,8 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/update.go b/internal/cli/inbox/update.go index 0e88fbd..4cdfe17 100644 --- a/internal/cli/inbox/update.go +++ b/internal/cli/inbox/update.go @@ -3,8 +3,8 @@ package inbox import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/wait_reply.go b/internal/cli/inbox/wait_reply.go index 33fc432..63c3b1e 100644 --- a/internal/cli/inbox/wait_reply.go +++ b/internal/cli/inbox/wait_reply.go @@ -4,8 +4,8 @@ import ( "fmt" "time" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/inbox/watch.go b/internal/cli/inbox/watch.go index a48cb0b..2a128f8 100644 --- a/internal/cli/inbox/watch.go +++ b/internal/cli/inbox/watch.go @@ -4,8 +4,8 @@ import ( "fmt" "time" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/answer.go b/internal/cli/orch/answer.go index 373dce7..5f80508 100644 --- a/internal/cli/orch/answer.go +++ b/internal/cli/orch/answer.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/blocked.go b/internal/cli/orch/blocked.go index 5e0cb93..dd59880 100644 --- a/internal/cli/orch/blocked.go +++ b/internal/cli/orch/blocked.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/body.go b/internal/cli/orch/body.go index 4663691..0a53195 100644 --- a/internal/cli/orch/body.go +++ b/internal/cli/orch/body.go @@ -3,7 +3,7 @@ package orch import ( "os" - "ai-workflow-skill/internal/protocol" + "ai-workflow-skill/packages/coord-core/protocol" ) func resolveBodyValue(body, bodyFile string) (string, error) { diff --git a/internal/cli/orch/cancel.go b/internal/cli/orch/cancel.go index 155d38d..74ce970 100644 --- a/internal/cli/orch/cancel.go +++ b/internal/cli/orch/cancel.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/cleanup.go b/internal/cli/orch/cleanup.go index 9fd2693..d652041 100644 --- a/internal/cli/orch/cleanup.go +++ b/internal/cli/orch/cleanup.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/council_report.go b/internal/cli/orch/council_report.go index df27919..ff1a0bf 100644 --- a/internal/cli/orch/council_report.go +++ b/internal/cli/orch/council_report.go @@ -6,8 +6,8 @@ import ( "path/filepath" "strings" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/council_start.go b/internal/cli/orch/council_start.go index 7dc59a4..105d5e4 100644 --- a/internal/cli/orch/council_start.go +++ b/internal/cli/orch/council_start.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/council_tally.go b/internal/cli/orch/council_tally.go index db2ce38..a246003 100644 --- a/internal/cli/orch/council_tally.go +++ b/internal/cli/orch/council_tally.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/council_wait.go b/internal/cli/orch/council_wait.go index a6b9130..ce4417f 100644 --- a/internal/cli/orch/council_wait.go +++ b/internal/cli/orch/council_wait.go @@ -4,8 +4,8 @@ import ( "fmt" "time" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/db.go b/internal/cli/orch/db.go index a6e1861..9449570 100644 --- a/internal/cli/orch/db.go +++ b/internal/cli/orch/db.go @@ -4,7 +4,7 @@ import ( "context" "database/sql" - "ai-workflow-skill/internal/db" + "ai-workflow-skill/packages/coord-core/db" ) func openOrchDB(ctx context.Context, dbPath string) (*sql.DB, error) { diff --git a/internal/cli/orch/dep.go b/internal/cli/orch/dep.go index 14bcac3..5f2e7f1 100644 --- a/internal/cli/orch/dep.go +++ b/internal/cli/orch/dep.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/dispatch.go b/internal/cli/orch/dispatch.go index 4b23a23..c90623e 100644 --- a/internal/cli/orch/dispatch.go +++ b/internal/cli/orch/dispatch.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/execute.go b/internal/cli/orch/execute.go index 5805455..1e1ac1e 100644 --- a/internal/cli/orch/execute.go +++ b/internal/cli/orch/execute.go @@ -6,8 +6,8 @@ import ( "io" "strings" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" ) func Execute(args []string, stdout, stderr io.Writer) int { diff --git a/internal/cli/orch/ready.go b/internal/cli/orch/ready.go index 4a55b8b..99645de 100644 --- a/internal/cli/orch/ready.go +++ b/internal/cli/orch/ready.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/reassign.go b/internal/cli/orch/reassign.go index 04419db..f872100 100644 --- a/internal/cli/orch/reassign.go +++ b/internal/cli/orch/reassign.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/reconcile.go b/internal/cli/orch/reconcile.go index ba07374..d1e218c 100644 --- a/internal/cli/orch/reconcile.go +++ b/internal/cli/orch/reconcile.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/retry.go b/internal/cli/orch/retry.go index c4457f5..704294c 100644 --- a/internal/cli/orch/retry.go +++ b/internal/cli/orch/retry.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/run.go b/internal/cli/orch/run.go index fed6eec..8a4b226 100644 --- a/internal/cli/orch/run.go +++ b/internal/cli/orch/run.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/status.go b/internal/cli/orch/status.go index 91d45d8..4c93b61 100644 --- a/internal/cli/orch/status.go +++ b/internal/cli/orch/status.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/task.go b/internal/cli/orch/task.go index 0a9e843..76187f5 100644 --- a/internal/cli/orch/task.go +++ b/internal/cli/orch/task.go @@ -3,8 +3,8 @@ package orch import ( "fmt" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/wait.go b/internal/cli/orch/wait.go index fbaf0a9..9041d6e 100644 --- a/internal/cli/orch/wait.go +++ b/internal/cli/orch/wait.go @@ -5,8 +5,8 @@ import ( "strings" "time" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/cli/orch/worktree.go b/internal/cli/orch/worktree.go index 9b7ac63..380518a 100644 --- a/internal/cli/orch/worktree.go +++ b/internal/cli/orch/worktree.go @@ -9,8 +9,8 @@ import ( "path/filepath" "strings" - "ai-workflow-skill/internal/protocol" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/protocol" + "ai-workflow-skill/packages/coord-core/store" "github.com/spf13/cobra" ) diff --git a/internal/httpapi/response.go b/internal/httpapi/response.go index 123691e..1b3aa3e 100644 --- a/internal/httpapi/response.go +++ b/internal/httpapi/response.go @@ -6,7 +6,7 @@ import ( "fmt" "net/http" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/store" ) type errorEnvelope struct { diff --git a/internal/httpapi/router.go b/internal/httpapi/router.go index f581cac..d9d8f55 100644 --- a/internal/httpapi/router.go +++ b/internal/httpapi/router.go @@ -9,7 +9,7 @@ import ( "github.com/go-chi/chi/v5/middleware" "ai-workflow-skill/internal/query" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/store" ) type readService interface { diff --git a/internal/httpapi/router_test.go b/internal/httpapi/router_test.go index f01c66f..b1376e0 100644 --- a/internal/httpapi/router_test.go +++ b/internal/httpapi/router_test.go @@ -10,8 +10,8 @@ import ( "time" "ai-workflow-skill/internal/app" - dbpkg "ai-workflow-skill/internal/db" - "ai-workflow-skill/internal/store" + dbpkg "ai-workflow-skill/packages/coord-core/db" + "ai-workflow-skill/packages/coord-core/store" ) func TestRouterExposesReadOnlyWebEndpoints(t *testing.T) { diff --git a/internal/query/read_service.go b/internal/query/read_service.go index 4abb5eb..bc4093d 100644 --- a/internal/query/read_service.go +++ b/internal/query/read_service.go @@ -6,7 +6,7 @@ import ( "fmt" "time" - "ai-workflow-skill/internal/store" + "ai-workflow-skill/packages/coord-core/store" ) type ReadService struct { diff --git a/packages/coord-core/db/migrate.go b/packages/coord-core/db/migrate.go new file mode 100644 index 0000000..1cf8e11 --- /dev/null +++ b/packages/coord-core/db/migrate.go @@ -0,0 +1,50 @@ +package db + +import ( + "context" + "database/sql" + "embed" + "fmt" + "sort" +) + +//go:embed schema/*.sql +var schemaFS embed.FS + +func ApplyMigrations(ctx context.Context, db *sql.DB) error { + files, err := schemaFS.ReadDir("schema") + if err != nil { + return fmt.Errorf("read embedded schema directory: %w", err) + } + + names := make([]string, 0, len(files)) + for _, file := range files { + if file.IsDir() { + continue + } + names = append(names, file.Name()) + } + sort.Strings(names) + + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin schema transaction: %w", err) + } + defer tx.Rollback() + + for _, name := range names { + content, err := schemaFS.ReadFile("schema/" + name) + if err != nil { + return fmt.Errorf("read embedded schema file %q: %w", name, err) + } + if _, err := tx.ExecContext(ctx, string(content)); err != nil { + return fmt.Errorf("apply schema file %q: %w", name, err) + } + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit schema transaction: %w", err) + } + + return nil +} diff --git a/packages/coord-core/db/open.go b/packages/coord-core/db/open.go new file mode 100644 index 0000000..401a937 --- /dev/null +++ b/packages/coord-core/db/open.go @@ -0,0 +1,38 @@ +package db + +import ( + "context" + "database/sql" + "fmt" + "os" + "path/filepath" + + _ "modernc.org/sqlite" +) + +func Open(ctx context.Context, dbPath string) (*sql.DB, error) { + if err := ensureParentDir(dbPath); err != nil { + return nil, err + } + + db, err := sql.Open("sqlite", dbPath) + if err != nil { + return nil, fmt.Errorf("open sqlite database: %w", err) + } + + if err := applyPragmas(ctx, db); err != nil { + _ = db.Close() + return nil, err + } + + return db, nil +} + +func ensureParentDir(dbPath string) error { + parent := filepath.Dir(dbPath) + if parent == "." || parent == "" { + return nil + } + + return os.MkdirAll(parent, 0o755) +} diff --git a/packages/coord-core/db/pragmas.go b/packages/coord-core/db/pragmas.go new file mode 100644 index 0000000..a6efc84 --- /dev/null +++ b/packages/coord-core/db/pragmas.go @@ -0,0 +1,23 @@ +package db + +import ( + "context" + "database/sql" + "fmt" +) + +func applyPragmas(ctx context.Context, db *sql.DB) error { + pragmas := []string{ + "PRAGMA foreign_keys = ON;", + "PRAGMA journal_mode = WAL;", + "PRAGMA busy_timeout = 5000;", + } + + for _, pragma := range pragmas { + if _, err := db.ExecContext(ctx, pragma); err != nil { + return fmt.Errorf("apply pragma %q: %w", pragma, err) + } + } + + return nil +} diff --git a/packages/coord-core/db/schema/001_inbox.sql b/packages/coord-core/db/schema/001_inbox.sql new file mode 100644 index 0000000..6ebf2b2 --- /dev/null +++ b/packages/coord-core/db/schema/001_inbox.sql @@ -0,0 +1,51 @@ +CREATE TABLE IF NOT EXISTS threads ( + thread_id TEXT PRIMARY KEY, + run_id TEXT NOT NULL, + task_id TEXT NOT NULL, + subject TEXT NOT NULL, + created_by TEXT NOT NULL, + assigned_to TEXT NOT NULL, + status TEXT NOT NULL, + priority TEXT NOT NULL DEFAULT 'normal', + latest_message_id TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS messages ( + message_id TEXT PRIMARY KEY, + thread_id TEXT NOT NULL, + from_agent TEXT NOT NULL, + to_agent TEXT NOT NULL, + kind TEXT NOT NULL, + summary TEXT NOT NULL, + body TEXT NOT NULL DEFAULT '', + payload_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL, + FOREIGN KEY(thread_id) REFERENCES threads(thread_id) +); + +CREATE TABLE IF NOT EXISTS leases ( + thread_id TEXT PRIMARY KEY, + agent_id TEXT NOT NULL, + lease_token TEXT NOT NULL, + claimed_at TEXT NOT NULL, + expires_at TEXT NOT NULL, + released_at TEXT +); + +CREATE TABLE IF NOT EXISTS artifacts ( + artifact_id TEXT PRIMARY KEY, + message_id TEXT NOT NULL, + path TEXT NOT NULL, + kind TEXT NOT NULL, + metadata_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL, + FOREIGN KEY(message_id) REFERENCES messages(message_id) +); + +CREATE INDEX IF NOT EXISTS idx_threads_status_assigned + ON threads(status, assigned_to, updated_at); + +CREATE INDEX IF NOT EXISTS idx_messages_thread_created + ON messages(thread_id, created_at); diff --git a/packages/coord-core/db/schema/002_orch.sql b/packages/coord-core/db/schema/002_orch.sql new file mode 100644 index 0000000..e0cfeaf --- /dev/null +++ b/packages/coord-core/db/schema/002_orch.sql @@ -0,0 +1,52 @@ +CREATE TABLE IF NOT EXISTS runs ( + run_id TEXT PRIMARY KEY, + goal TEXT NOT NULL, + summary TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'active', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS tasks ( + run_id TEXT NOT NULL, + task_id TEXT NOT NULL, + title TEXT NOT NULL, + summary TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL, + default_to TEXT, + priority TEXT NOT NULL DEFAULT 'normal', + acceptance_json TEXT NOT NULL DEFAULT '[]', + latest_attempt_no INTEGER, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + PRIMARY KEY (run_id, task_id), + FOREIGN KEY(run_id) REFERENCES runs(run_id) +); + +CREATE TABLE IF NOT EXISTS task_dependencies ( + run_id TEXT NOT NULL, + task_id TEXT NOT NULL, + depends_on_task_id TEXT NOT NULL, + PRIMARY KEY (run_id, task_id, depends_on_task_id) +); + +CREATE TABLE IF NOT EXISTS task_attempts ( + run_id TEXT NOT NULL, + task_id TEXT NOT NULL, + attempt_no INTEGER NOT NULL, + assigned_to TEXT NOT NULL, + thread_id TEXT NOT NULL, + base_ref TEXT, + base_commit TEXT, + branch_name TEXT, + worktree_path TEXT, + workspace_status TEXT, + result_commit TEXT, + status TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + PRIMARY KEY (run_id, task_id, attempt_no) +); + +CREATE INDEX IF NOT EXISTS idx_tasks_run_status + ON tasks(run_id, status, priority, updated_at); diff --git a/packages/coord-core/db/schema/003_events.sql b/packages/coord-core/db/schema/003_events.sql new file mode 100644 index 0000000..f1ceb3a --- /dev/null +++ b/packages/coord-core/db/schema/003_events.sql @@ -0,0 +1,18 @@ +CREATE TABLE IF NOT EXISTS events ( + event_id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id TEXT NOT NULL, + task_id TEXT NOT NULL, + thread_id TEXT, + source TEXT NOT NULL, + event_type TEXT NOT NULL, + message_id TEXT, + summary TEXT NOT NULL DEFAULT '', + payload_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_events_run_event + ON events(run_id, event_id); + +CREATE INDEX IF NOT EXISTS idx_events_thread_event + ON events(thread_id, event_id); diff --git a/packages/coord-core/db/schema/004_council.sql b/packages/coord-core/db/schema/004_council.sql new file mode 100644 index 0000000..21695c8 --- /dev/null +++ b/packages/coord-core/db/schema/004_council.sql @@ -0,0 +1,45 @@ +CREATE TABLE IF NOT EXISTS council_runs ( + run_id TEXT PRIMARY KEY, + mode TEXT NOT NULL, + target_type TEXT NOT NULL, + output_mode TEXT NOT NULL, + only_unanimous INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS council_reviewers ( + run_id TEXT NOT NULL, + reviewer_role TEXT NOT NULL, + task_id TEXT NOT NULL, + status TEXT NOT NULL, + PRIMARY KEY (run_id, reviewer_role) +); + +CREATE TABLE IF NOT EXISTS council_findings ( + run_id TEXT NOT NULL, + reviewer_role TEXT NOT NULL, + finding_id TEXT NOT NULL, + title TEXT NOT NULL, + summary TEXT NOT NULL, + proposal TEXT NOT NULL, + rationale TEXT NOT NULL, + confidence TEXT NOT NULL, + tags_json TEXT NOT NULL DEFAULT '[]', + target_refs_json TEXT NOT NULL DEFAULT '{}', + PRIMARY KEY (run_id, reviewer_role, finding_id) +); + +CREATE TABLE IF NOT EXISTS council_groups ( + run_id TEXT NOT NULL, + group_id TEXT NOT NULL, + proposal TEXT NOT NULL, + bucket TEXT NOT NULL, + support_count INTEGER NOT NULL, + supporters_json TEXT NOT NULL DEFAULT '[]', + dissenters_json TEXT NOT NULL DEFAULT '[]', + rationale_summary TEXT NOT NULL DEFAULT '', + tags_json TEXT NOT NULL DEFAULT '[]', + source_finding_ids_json TEXT NOT NULL DEFAULT '[]', + PRIMARY KEY (run_id, group_id) +); diff --git a/packages/coord-core/db/schema/005_inbox_reads.sql b/packages/coord-core/db/schema/005_inbox_reads.sql new file mode 100644 index 0000000..d830226 --- /dev/null +++ b/packages/coord-core/db/schema/005_inbox_reads.sql @@ -0,0 +1,12 @@ +CREATE TABLE IF NOT EXISTS thread_reads ( + thread_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + last_read_message_id TEXT NOT NULL, + last_read_at TEXT NOT NULL, + PRIMARY KEY(thread_id, agent_id), + FOREIGN KEY(thread_id) REFERENCES threads(thread_id), + FOREIGN KEY(last_read_message_id) REFERENCES messages(message_id) +); + +CREATE INDEX IF NOT EXISTS idx_thread_reads_agent + ON thread_reads(agent_id, last_read_at); diff --git a/packages/coord-core/db/schema/006_council_inputs.sql b/packages/coord-core/db/schema/006_council_inputs.sql new file mode 100644 index 0000000..b1709b1 --- /dev/null +++ b/packages/coord-core/db/schema/006_council_inputs.sql @@ -0,0 +1,9 @@ +CREATE TABLE IF NOT EXISTS council_inputs ( + run_id TEXT PRIMARY KEY, + prompt TEXT NOT NULL DEFAULT '', + target_file TEXT NOT NULL DEFAULT '', + repo_path TEXT NOT NULL DEFAULT '', + target_task_id TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); diff --git a/packages/coord-core/db/schema/007_council_reports.sql b/packages/coord-core/db/schema/007_council_reports.sql new file mode 100644 index 0000000..cec16f4 --- /dev/null +++ b/packages/coord-core/db/schema/007_council_reports.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS council_reports ( + run_id TEXT PRIMARY KEY, + show_json TEXT NOT NULL DEFAULT '[]', + summary_json TEXT NOT NULL DEFAULT '{}', + markdown_path TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); diff --git a/packages/coord-core/go.mod b/packages/coord-core/go.mod index 27376eb..8426105 100644 --- a/packages/coord-core/go.mod +++ b/packages/coord-core/go.mod @@ -1,3 +1,8 @@ module ai-workflow-skill/packages/coord-core go 1.26 + +require ( + github.com/google/uuid v1.6.0 + modernc.org/sqlite v1.40.1 +) diff --git a/packages/coord-core/protocol/cli_error.go b/packages/coord-core/protocol/cli_error.go new file mode 100644 index 0000000..75fd04d --- /dev/null +++ b/packages/coord-core/protocol/cli_error.go @@ -0,0 +1,33 @@ +package protocol + +type CLIError struct { + Code string + ExitCode int + Message string + Err error +} + +func (e *CLIError) Error() string { + return e.Message +} + +func (e *CLIError) Unwrap() error { + return e.Err +} + +func NewCLIError(code string, exitCode int, message string, err error) error { + return &CLIError{ + Code: code, + ExitCode: exitCode, + Message: message, + Err: err, + } +} + +func InvalidInput(message string, err error) error { + return NewCLIError("invalid_input", 30, message, err) +} + +func NoMatchingWork(message string) error { + return NewCLIError("no_matching_work", 10, message, nil) +} diff --git a/packages/coord-core/protocol/json.go b/packages/coord-core/protocol/json.go new file mode 100644 index 0000000..9cb20db --- /dev/null +++ b/packages/coord-core/protocol/json.go @@ -0,0 +1,28 @@ +package protocol + +import ( + "encoding/json" + "io" +) + +type Success struct { + OK bool `json:"ok"` + Command string `json:"command"` + Data map[string]any `json:"data,omitempty"` +} + +type Error struct { + OK bool `json:"ok"` + Error ErrorPayload `json:"error"` +} + +type ErrorPayload struct { + Code string `json:"code"` + Message string `json:"message"` +} + +func WriteJSON(w io.Writer, v any) error { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(v) +} diff --git a/packages/coord-core/store/council.go b/packages/coord-core/store/council.go new file mode 100644 index 0000000..628501d --- /dev/null +++ b/packages/coord-core/store/council.go @@ -0,0 +1,1503 @@ +package store + +import ( + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "sort" + "strings" + "time" +) + +var councilReviewerRoles = []string{ + "architecture-reviewer", + "implementation-reviewer", + "risk-reviewer", +} + +type CouncilRun struct { + RunID string `json:"run_id"` + Mode string `json:"mode"` + TargetType string `json:"target_type"` + OutputMode string `json:"output_mode"` + OnlyUnanimous bool `json:"only_unanimous"` +} + +type CouncilInput struct { + RunID string `json:"run_id"` + Prompt string `json:"prompt,omitempty"` + TargetFile string `json:"target_file,omitempty"` + RepoPath string `json:"repo_path,omitempty"` + TargetTaskID string `json:"task_id,omitempty"` +} + +type CouncilReviewer struct { + ReviewerRole string `json:"reviewer_role"` + TaskID string `json:"task_id"` + Status string `json:"status"` +} + +type CouncilStartInput struct { + RunID string + Target string + TargetFile string + RepoPath string + TargetTaskID string + TargetType string + Mode string + OutputMode string + OnlyUnanimous bool +} + +type CouncilStartResult struct { + Run CouncilRun `json:"run"` + Input CouncilInput `json:"input"` + Reviewers []CouncilReviewer `json:"reviewers"` +} + +type CouncilWaitInput struct { + RunID string + Timeout time.Duration +} + +type CouncilWaitResult struct { + Woke bool `json:"woke"` + RunID string `json:"run_id"` + AllComplete bool `json:"all_complete"` + ReviewerStatuses []CouncilReviewer `json:"reviewers"` +} + +type CouncilFinding struct { + RunID string `json:"run_id"` + ReviewerRole string `json:"reviewer_role"` + FindingID string `json:"finding_id"` + Title string `json:"title"` + Summary string `json:"summary"` + Proposal string `json:"proposal"` + Rationale string `json:"rationale"` + Confidence string `json:"confidence"` + TagsJSON json.RawMessage `json:"tags_json"` + TargetRefsJSON json.RawMessage `json:"target_refs_json"` +} + +type CouncilGroup struct { + RunID string `json:"run_id"` + GroupID string `json:"group_id"` + Proposal string `json:"proposal"` + Bucket string `json:"bucket"` + SupportCount int `json:"support_count"` + SupportersJSON json.RawMessage `json:"supporters_json"` + DissentersJSON json.RawMessage `json:"dissenters_json"` + RationaleSummary string `json:"rationale_summary"` + TagsJSON json.RawMessage `json:"tags_json"` + SourceFindingIDsJSON json.RawMessage `json:"source_finding_ids_json"` +} + +type CouncilTallyInput struct { + RunID string + Similarity string +} + +type CouncilTallyResult struct { + RunID string `json:"run_id"` + Similarity string `json:"similarity"` + Counts map[string]int `json:"counts"` + GroupedRecommendations []CouncilGroup `json:"grouped_recommendations"` +} + +type CouncilReportInput struct { + RunID string + Show string +} + +type CouncilReportArtifact struct { + Kind string `json:"kind"` + Path string `json:"path"` +} + +type CouncilReportResult struct { + RunID string `json:"run_id"` + Show []string `json:"show"` + Summary map[string]int `json:"summary"` + GroupedRecommendations []CouncilGroup `json:"grouped_recommendations"` + Markdown string `json:"markdown,omitempty"` + ReportArtifacts []CouncilReportArtifact `json:"report_artifacts,omitempty"` +} + +type CouncilPersistReportInput struct { + RunID string + Show []string + Summary map[string]int + MarkdownPath string +} + +type councilReviewerOutput struct { + ReviewerRole string `json:"reviewer_role"` + Findings []councilFindingOutput `json:"findings"` +} + +type councilFindingOutput struct { + Title string `json:"title"` + Summary string `json:"summary"` + Proposal string `json:"proposal"` + Rationale string `json:"rationale"` + Confidence string `json:"confidence"` + Tags json.RawMessage `json:"tags"` + TargetRefs json.RawMessage `json:"target_refs"` +} + +func (s *OrchStore) StartCouncil(ctx context.Context, input CouncilStartInput) (CouncilStartResult, error) { + runID := strings.TrimSpace(input.RunID) + if runID == "" { + return CouncilStartResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + councilInput, err := normalizeCouncilInput(input) + if err != nil { + return CouncilStartResult{}, err + } + + councilRun, err := normalizeCouncilRun(input) + if err != nil { + return CouncilStartResult{}, err + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return CouncilStartResult{}, fmt.Errorf("begin council start transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, runID); err == nil { + return CouncilStartResult{}, fmt.Errorf("%w: run %s already exists", ErrInvalidState, runID) + } else if !errors.Is(err, ErrRunNotFound) { + return CouncilStartResult{}, err + } + + goal := buildCouncilRunGoal(councilInput) + summary := buildCouncilRunSummary(councilRun, councilInput) + + _, err = tx.ExecContext( + ctx, + `INSERT INTO runs (run_id, goal, summary, status, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?)`, + runID, + goal, + summary, + "active", + formatTime(now), + formatTime(now), + ) + if err != nil { + return CouncilStartResult{}, fmt.Errorf("insert council run into runs: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: runID, + Source: "orch", + EventType: "run_initialized", + Summary: summary, + PayloadJSON: marshalJSON(map[string]any{"goal": goal, "summary": summary}), + CreatedAt: now, + }); err != nil { + return CouncilStartResult{}, err + } + + _, err = tx.ExecContext( + ctx, + `INSERT INTO council_runs ( + run_id, mode, target_type, output_mode, only_unanimous, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?)`, + runID, + councilRun.Mode, + councilRun.TargetType, + councilRun.OutputMode, + boolToInt(councilRun.OnlyUnanimous), + formatTime(now), + formatTime(now), + ) + if err != nil { + return CouncilStartResult{}, fmt.Errorf("insert council run metadata: %w", err) + } + + _, err = tx.ExecContext( + ctx, + `INSERT INTO council_inputs ( + run_id, prompt, target_file, repo_path, target_task_id, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?)`, + runID, + councilInput.Prompt, + councilInput.TargetFile, + councilInput.RepoPath, + councilInput.TargetTaskID, + formatTime(now), + formatTime(now), + ) + if err != nil { + return CouncilStartResult{}, fmt.Errorf("insert council input metadata: %w", err) + } + + reviewers := make([]CouncilReviewer, 0, len(councilReviewerRoles)) + for i, reviewerRole := range councilReviewerRoles { + taskID := fmt.Sprintf("CR%d", i+1) + task := Task{ + RunID: runID, + TaskID: taskID, + Title: buildCouncilTaskTitle(reviewerRole), + Summary: buildCouncilTaskSummary(reviewerRole), + Status: "ready", + DefaultTo: reviewerRole, + Priority: "normal", + AcceptanceJSON: []byte(buildCouncilTaskAcceptanceJSON(councilRun, councilInput, reviewerRole)), + CreatedAt: now, + UpdatedAt: now, + } + + _, err = tx.ExecContext( + ctx, + `INSERT INTO tasks ( + run_id, task_id, title, summary, status, default_to, priority, + acceptance_json, latest_attempt_no, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL, ?, ?)`, + task.RunID, + task.TaskID, + task.Title, + task.Summary, + task.Status, + nullIfEmpty(task.DefaultTo), + task.Priority, + string(task.AcceptanceJSON), + formatTime(task.CreatedAt), + formatTime(task.UpdatedAt), + ) + if err != nil { + return CouncilStartResult{}, fmt.Errorf("insert council reviewer task: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: runID, + TaskID: taskID, + Source: "orch", + EventType: "task_added", + Summary: task.Title, + PayloadJSON: marshalJSON(map[string]any{"title": task.Title, "priority": task.Priority}), + CreatedAt: now, + }); err != nil { + return CouncilStartResult{}, err + } + if err := insertEvent(ctx, tx, eventInput{ + RunID: runID, + TaskID: taskID, + Source: "orch", + EventType: "task_ready", + Summary: task.Title, + PayloadJSON: marshalJSON(map[string]any{"task_id": taskID}), + CreatedAt: now, + }); err != nil { + return CouncilStartResult{}, err + } + + dispatchResult, finalizeWorkspace, err := s.dispatchTaskTx( + ctx, + tx, + task, + reviewerRole, + buildCouncilTaskBody(councilRun, councilInput, reviewerRole), + "", + nil, + now, + ) + if err != nil { + return CouncilStartResult{}, err + } + defer finalizeWorkspace(false) + + _, err = tx.ExecContext( + ctx, + `INSERT INTO council_reviewers (run_id, reviewer_role, task_id, status) + VALUES (?, ?, ?, ?)`, + runID, + reviewerRole, + taskID, + dispatchResult.Task.Status, + ) + if err != nil { + return CouncilStartResult{}, fmt.Errorf("insert council reviewer row: %w", err) + } + + reviewers = append(reviewers, CouncilReviewer{ + ReviewerRole: reviewerRole, + TaskID: taskID, + Status: dispatchResult.Task.Status, + }) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: runID, + Source: "orch", + EventType: "council_started", + Summary: "council reviewers dispatched", + PayloadJSON: marshalJSON(map[string]any{ + "mode": councilRun.Mode, + "target_type": councilRun.TargetType, + "output_mode": councilRun.OutputMode, + "only_unanimous": councilRun.OnlyUnanimous, + "reviewers": reviewers, + }), + CreatedAt: now, + }); err != nil { + return CouncilStartResult{}, err + } + + if err := updateRunAggregateStatus(ctx, tx, runID, now); err != nil { + return CouncilStartResult{}, err + } + + if err := tx.Commit(); err != nil { + return CouncilStartResult{}, fmt.Errorf("commit council start transaction: %w", err) + } + + return CouncilStartResult{ + Run: councilRun, + Input: councilInput, + Reviewers: reviewers, + }, nil +} + +func normalizeCouncilRun(input CouncilStartInput) (CouncilRun, error) { + mode := defaultString(strings.TrimSpace(input.Mode), "brainstorm") + switch mode { + case "brainstorm", "review": + default: + return CouncilRun{}, fmt.Errorf("%w: mode must be brainstorm or review", ErrInvalidInput) + } + + targetType := defaultString(strings.TrimSpace(input.TargetType), "mixed") + switch targetType { + case "text", "repo", "mixed": + default: + return CouncilRun{}, fmt.Errorf("%w: target-type must be text, repo, or mixed", ErrInvalidInput) + } + + outputMode := defaultString(strings.TrimSpace(input.OutputMode), "both") + switch outputMode { + case "markdown", "json", "both": + default: + return CouncilRun{}, fmt.Errorf("%w: output must be markdown, json, or both", ErrInvalidInput) + } + + return CouncilRun{ + RunID: strings.TrimSpace(input.RunID), + Mode: mode, + TargetType: targetType, + OutputMode: outputMode, + OnlyUnanimous: input.OnlyUnanimous, + }, nil +} + +func normalizeCouncilInput(input CouncilStartInput) (CouncilInput, error) { + result := CouncilInput{ + RunID: strings.TrimSpace(input.RunID), + Prompt: strings.TrimSpace(input.Target), + TargetFile: strings.TrimSpace(input.TargetFile), + RepoPath: strings.TrimSpace(input.RepoPath), + TargetTaskID: strings.TrimSpace(input.TargetTaskID), + } + + if result.Prompt == "" && result.TargetFile == "" && result.RepoPath == "" && result.TargetTaskID == "" { + return CouncilInput{}, fmt.Errorf("%w: at least one of target, target-file, repo-path, or task-id is required", ErrInvalidInput) + } + + return result, nil +} + +func buildCouncilRunGoal(input CouncilInput) string { + switch { + case input.Prompt != "": + return "Council review: " + truncateSingleLine(input.Prompt, 80) + case input.TargetTaskID != "": + return "Council review for task " + input.TargetTaskID + case input.TargetFile != "": + return "Council review for " + input.TargetFile + case input.RepoPath != "": + return "Council review for repo " + input.RepoPath + default: + return "Council review" + } +} + +func buildCouncilRunSummary(run CouncilRun, input CouncilInput) string { + return fmt.Sprintf("%s council (%s)", run.Mode, run.TargetType) +} + +func buildCouncilTaskTitle(reviewerRole string) string { + switch reviewerRole { + case "architecture-reviewer": + return "Council architecture review" + case "implementation-reviewer": + return "Council implementation review" + case "risk-reviewer": + return "Council risk review" + default: + return "Council review" + } +} + +func buildCouncilTaskSummary(reviewerRole string) string { + switch reviewerRole { + case "architecture-reviewer": + return "Review the target for architecture, boundaries, and interfaces" + case "implementation-reviewer": + return "Review the target for simplicity, maintainability, and practicality" + case "risk-reviewer": + return "Review the target for regressions, correctness, and operability risks" + default: + return "Review the target" + } +} + +func buildCouncilTaskAcceptanceJSON(run CouncilRun, input CouncilInput, reviewerRole string) string { + return marshalJSON(map[string]any{ + "mode": "analysis", + "council": map[string]any{ + "reviewer_role": reviewerRole, + "council_mode": run.Mode, + "target_type": run.TargetType, + "output_mode": run.OutputMode, + "only_unanimous": run.OnlyUnanimous, + "target": map[string]any{ + "prompt": input.Prompt, + "target_file": input.TargetFile, + "repo_path": input.RepoPath, + "task_id": input.TargetTaskID, + }, + "response_format": map[string]any{ + "reviewer_role": reviewerRole, + "findings": []map[string]any{ + { + "title": "string", + "summary": "string", + "proposal": "string", + "rationale": "string", + "confidence": "low|medium|high", + "tags": []string{}, + "target_refs": map[string]any{}, + }, + }, + }, + }, + }) +} + +func buildCouncilTaskBody(run CouncilRun, input CouncilInput, reviewerRole string) string { + parts := []string{ + fmt.Sprintf("Reviewer role: %s", reviewerRole), + fmt.Sprintf("Council mode: %s", run.Mode), + fmt.Sprintf("Target type: %s", run.TargetType), + "Analyze the target from your assigned reviewer perspective.", + "Return structured findings with title, summary, proposal, rationale, confidence, tags, and optional target references.", + } + + if input.Prompt != "" { + parts = append(parts, "", "Prompt:", input.Prompt) + } + if input.TargetFile != "" { + parts = append(parts, "", "Target file:", input.TargetFile) + } + if input.RepoPath != "" { + parts = append(parts, "", "Repo path:", input.RepoPath) + } + if input.TargetTaskID != "" { + parts = append(parts, "", "Related task id:", input.TargetTaskID) + } + + return strings.Join(parts, "\n") +} + +func truncateSingleLine(value string, maxLen int) string { + value = strings.TrimSpace(value) + value = strings.ReplaceAll(value, "\n", " ") + value = strings.ReplaceAll(value, "\r", " ") + value = strings.Join(strings.Fields(value), " ") + if maxLen <= 0 || len(value) <= maxLen { + return value + } + if maxLen <= 3 { + return value[:maxLen] + } + return value[:maxLen-3] + "..." +} + +func boolToInt(value bool) int { + if value { + return 1 + } + return 0 +} + +func (s *OrchStore) WaitForCouncil(ctx context.Context, input CouncilWaitInput) (CouncilWaitResult, error) { + runID := strings.TrimSpace(input.RunID) + if runID == "" { + return CouncilWaitResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + if _, err := s.GetCouncilRun(ctx, runID); err != nil { + return CouncilWaitResult{}, err + } + + waitCtx := ctx + cancel := func() {} + if input.Timeout > 0 { + waitCtx, cancel = context.WithTimeout(ctx, input.Timeout) + } + defer cancel() + + for { + reviewers, allComplete, err := s.GetCouncilReviewerStatuses(waitCtx, runID) + if err != nil { + if isDeadlineExceeded(waitCtx) { + return CouncilWaitResult{ + Woke: false, + RunID: runID, + AllComplete: false, + ReviewerStatuses: reviewers, + }, nil + } + return CouncilWaitResult{}, err + } + if allComplete { + return CouncilWaitResult{ + Woke: true, + RunID: runID, + AllComplete: true, + ReviewerStatuses: reviewers, + }, nil + } + + if _, err := s.ReconcileRun(waitCtx, runID); err != nil { + if isSQLiteBusyError(err) { + ok, waitErr := waitForNextPoll(waitCtx, 25*time.Millisecond) + if waitErr != nil { + if errors.Is(waitErr, context.DeadlineExceeded) { + reviewers, _, _ := s.GetCouncilReviewerStatuses(ctx, runID) + return CouncilWaitResult{ + Woke: false, + RunID: runID, + AllComplete: false, + ReviewerStatuses: reviewers, + }, nil + } + return CouncilWaitResult{}, waitErr + } + if !ok { + reviewers, _, _ := s.GetCouncilReviewerStatuses(ctx, runID) + return CouncilWaitResult{ + Woke: false, + RunID: runID, + AllComplete: false, + ReviewerStatuses: reviewers, + }, nil + } + continue + } + if isDeadlineExceeded(waitCtx) { + reviewers, _, _ := s.GetCouncilReviewerStatuses(ctx, runID) + return CouncilWaitResult{ + Woke: false, + RunID: runID, + AllComplete: false, + ReviewerStatuses: reviewers, + }, nil + } + return CouncilWaitResult{}, err + } + + ok, err := waitForNextPoll(waitCtx, 200*time.Millisecond) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + reviewers, _, _ := s.GetCouncilReviewerStatuses(ctx, runID) + return CouncilWaitResult{ + Woke: false, + RunID: runID, + AllComplete: false, + ReviewerStatuses: reviewers, + }, nil + } + return CouncilWaitResult{}, err + } + if !ok { + reviewers, _, _ := s.GetCouncilReviewerStatuses(ctx, runID) + return CouncilWaitResult{ + Woke: false, + RunID: runID, + AllComplete: false, + ReviewerStatuses: reviewers, + }, nil + } + } +} + +func (s *OrchStore) GetCouncilRun(ctx context.Context, runID string) (CouncilRun, error) { + row := s.db.QueryRowContext( + ctx, + `SELECT run_id, mode, target_type, output_mode, only_unanimous + FROM council_runs + WHERE run_id = ?`, + runID, + ) + + var ( + run CouncilRun + onlyUnanimous int + ) + err := row.Scan(&run.RunID, &run.Mode, &run.TargetType, &run.OutputMode, &onlyUnanimous) + if errors.Is(err, sql.ErrNoRows) { + return CouncilRun{}, fmt.Errorf("%w: council run %s not found", ErrRunNotFound, runID) + } + if err != nil { + return CouncilRun{}, fmt.Errorf("scan council run: %w", err) + } + run.OnlyUnanimous = onlyUnanimous != 0 + return run, nil +} + +func (s *OrchStore) GetCouncilReviewerStatuses(ctx context.Context, runID string) ([]CouncilReviewer, bool, error) { + rows, err := s.db.QueryContext( + ctx, + `SELECT cr.reviewer_role, cr.task_id, t.status + FROM council_reviewers cr + JOIN tasks t + ON t.run_id = cr.run_id + AND t.task_id = cr.task_id + WHERE cr.run_id = ? + ORDER BY cr.reviewer_role ASC`, + runID, + ) + if err != nil { + return nil, false, fmt.Errorf("query council reviewer statuses: %w", err) + } + defer rows.Close() + + reviewers := make([]CouncilReviewer, 0, len(councilReviewerRoles)) + allComplete := true + for rows.Next() { + var reviewer CouncilReviewer + if err := rows.Scan(&reviewer.ReviewerRole, &reviewer.TaskID, &reviewer.Status); err != nil { + return nil, false, fmt.Errorf("scan council reviewer status: %w", err) + } + if reviewer.Status != "done" && reviewer.Status != "failed" && reviewer.Status != "cancelled" { + allComplete = false + } + reviewers = append(reviewers, reviewer) + } + if err := rows.Err(); err != nil { + return nil, false, fmt.Errorf("iterate council reviewer statuses: %w", err) + } + if len(reviewers) == 0 { + return nil, false, fmt.Errorf("%w: council reviewers for run %s not found", ErrRunNotFound, runID) + } + + return reviewers, allComplete, nil +} + +func (s *OrchStore) TallyCouncil(ctx context.Context, input CouncilTallyInput) (CouncilTallyResult, error) { + runID := strings.TrimSpace(input.RunID) + if runID == "" { + return CouncilTallyResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + similarity := defaultString(strings.TrimSpace(input.Similarity), "normal") + if similarity != "normal" && similarity != "strict" { + return CouncilTallyResult{}, fmt.Errorf("%w: similarity must be strict or normal", ErrInvalidInput) + } + + if _, err := s.GetCouncilRun(ctx, runID); err != nil { + return CouncilTallyResult{}, err + } + + if _, err := s.ReconcileRun(ctx, runID); err != nil && !isSQLiteBusyError(err) { + return CouncilTallyResult{}, err + } + + reviewers, allComplete, err := s.GetCouncilReviewerStatuses(ctx, runID) + if err != nil { + return CouncilTallyResult{}, err + } + if !allComplete { + return CouncilTallyResult{}, fmt.Errorf("%w: council reviewers are not complete yet", ErrInvalidState) + } + + findings, err := s.collectCouncilFindings(ctx, runID, reviewers) + if err != nil { + return CouncilTallyResult{}, err + } + groups := groupCouncilFindings(runID, findings, reviewers, similarity) + counts := make(map[string]int) + for _, group := range groups { + counts[group.Bucket]++ + } + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return CouncilTallyResult{}, fmt.Errorf("begin council tally transaction: %w", err) + } + defer tx.Rollback() + + if _, err := tx.ExecContext(ctx, `DELETE FROM council_findings WHERE run_id = ?`, runID); err != nil { + return CouncilTallyResult{}, fmt.Errorf("clear council findings: %w", err) + } + if _, err := tx.ExecContext(ctx, `DELETE FROM council_groups WHERE run_id = ?`, runID); err != nil { + return CouncilTallyResult{}, fmt.Errorf("clear council groups: %w", err) + } + + for _, finding := range findings { + if _, err := tx.ExecContext( + ctx, + `INSERT INTO council_findings ( + run_id, reviewer_role, finding_id, title, summary, proposal, rationale, + confidence, tags_json, target_refs_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + finding.RunID, + finding.ReviewerRole, + finding.FindingID, + finding.Title, + finding.Summary, + finding.Proposal, + finding.Rationale, + finding.Confidence, + string(finding.TagsJSON), + string(finding.TargetRefsJSON), + ); err != nil { + return CouncilTallyResult{}, fmt.Errorf("insert council finding: %w", err) + } + } + + for _, group := range groups { + if _, err := tx.ExecContext( + ctx, + `INSERT INTO council_groups ( + run_id, group_id, proposal, bucket, support_count, supporters_json, + dissenters_json, rationale_summary, tags_json, source_finding_ids_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + group.RunID, + group.GroupID, + group.Proposal, + group.Bucket, + group.SupportCount, + string(group.SupportersJSON), + string(group.DissentersJSON), + group.RationaleSummary, + string(group.TagsJSON), + string(group.SourceFindingIDsJSON), + ); err != nil { + return CouncilTallyResult{}, fmt.Errorf("insert council group: %w", err) + } + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: runID, + Source: "orch", + EventType: "council_tallied", + Summary: "council recommendations grouped", + PayloadJSON: marshalJSON(map[string]any{ + "similarity": similarity, + "counts": counts, + }), + CreatedAt: nowUTC(), + }); err != nil { + return CouncilTallyResult{}, err + } + + if err := tx.Commit(); err != nil { + return CouncilTallyResult{}, fmt.Errorf("commit council tally transaction: %w", err) + } + + return CouncilTallyResult{ + RunID: runID, + Similarity: similarity, + Counts: counts, + GroupedRecommendations: groups, + }, nil +} + +func (s *OrchStore) BuildCouncilReport(ctx context.Context, input CouncilReportInput) (CouncilReportResult, error) { + runID := strings.TrimSpace(input.RunID) + if runID == "" { + return CouncilReportResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + run, err := s.GetCouncilRun(ctx, runID) + if err != nil { + return CouncilReportResult{}, err + } + councilInput, err := s.GetCouncilInput(ctx, runID) + if err != nil { + return CouncilReportResult{}, err + } + + show, err := normalizeCouncilReportShow(input.Show, run.OnlyUnanimous) + if err != nil { + return CouncilReportResult{}, err + } + + groups, tallied, err := s.ListCouncilGroups(ctx, runID) + if err != nil { + return CouncilReportResult{}, err + } + if !tallied { + return CouncilReportResult{}, fmt.Errorf("%w: council groups are not available; run council tally first", ErrInvalidState) + } + + summary := councilGroupSummary(groups) + selectedGroups := selectCouncilGroupsForReport(groups, show) + markdown := renderCouncilReportMarkdown(run, councilInput, show, summary, selectedGroups) + + return CouncilReportResult{ + RunID: runID, + Show: show, + Summary: summary, + GroupedRecommendations: selectedGroups, + Markdown: markdown, + }, nil +} + +func (s *OrchStore) PersistCouncilReport(ctx context.Context, input CouncilPersistReportInput) error { + runID := strings.TrimSpace(input.RunID) + if runID == "" { + return fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if _, err := s.GetCouncilRun(ctx, runID); err != nil { + return err + } + + showJSON := marshalJSON(input.Show) + summaryJSON := marshalJSON(normalizeCouncilSummary(input.Summary)) + markdownPath := strings.TrimSpace(input.MarkdownPath) + now := nowUTC() + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin council report transaction: %w", err) + } + defer tx.Rollback() + + if _, err := tx.ExecContext( + ctx, + `INSERT INTO council_reports ( + run_id, show_json, summary_json, markdown_path, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(run_id) DO UPDATE SET + show_json = excluded.show_json, + summary_json = excluded.summary_json, + markdown_path = excluded.markdown_path, + updated_at = excluded.updated_at`, + runID, + showJSON, + summaryJSON, + markdownPath, + formatTime(now), + formatTime(now), + ); err != nil { + return fmt.Errorf("persist council report metadata: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: runID, + Source: "orch", + EventType: "council_reported", + Summary: "council report generated", + PayloadJSON: marshalJSON(map[string]any{ + "show": input.Show, + "markdown_path": markdownPath, + }), + CreatedAt: now, + }); err != nil { + return err + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit council report transaction: %w", err) + } + return nil +} + +func (s *OrchStore) collectCouncilFindings(ctx context.Context, runID string, reviewers []CouncilReviewer) ([]CouncilFinding, error) { + findings := make([]CouncilFinding, 0) + for _, reviewer := range reviewers { + if reviewer.Status != "done" { + return nil, fmt.Errorf("%w: reviewer %s did not finish successfully", ErrInvalidState, reviewer.ReviewerRole) + } + + message, err := s.loadCouncilReviewerResultMessage(ctx, runID, reviewer.TaskID) + if err != nil { + return nil, err + } + output, err := parseCouncilReviewerOutput(reviewer.ReviewerRole, message) + if err != nil { + return nil, err + } + for i, finding := range output.Findings { + tagsJSON, err := normalizeOptionalJSONArray(finding.Tags) + if err != nil { + return nil, fmt.Errorf("%w: reviewer %s finding %d tags must be a JSON array", ErrInvalidInput, reviewer.ReviewerRole, i+1) + } + targetRefsJSON, err := normalizeOptionalJSONObject(finding.TargetRefs) + if err != nil { + return nil, fmt.Errorf("%w: reviewer %s finding %d target_refs must be a JSON object", ErrInvalidInput, reviewer.ReviewerRole, i+1) + } + confidence := strings.TrimSpace(finding.Confidence) + switch confidence { + case "low", "medium", "high": + default: + return nil, fmt.Errorf("%w: reviewer %s finding %d confidence must be low, medium, or high", ErrInvalidInput, reviewer.ReviewerRole, i+1) + } + if strings.TrimSpace(finding.Proposal) == "" { + return nil, fmt.Errorf("%w: reviewer %s finding %d proposal is required", ErrInvalidInput, reviewer.ReviewerRole, i+1) + } + findings = append(findings, CouncilFinding{ + RunID: runID, + ReviewerRole: reviewer.ReviewerRole, + FindingID: fmt.Sprintf("f%02d", i+1), + Title: strings.TrimSpace(finding.Title), + Summary: strings.TrimSpace(finding.Summary), + Proposal: strings.TrimSpace(finding.Proposal), + Rationale: strings.TrimSpace(finding.Rationale), + Confidence: confidence, + TagsJSON: json.RawMessage(tagsJSON), + TargetRefsJSON: json.RawMessage(targetRefsJSON), + }) + } + } + return findings, nil +} + +func (s *OrchStore) GetCouncilInput(ctx context.Context, runID string) (CouncilInput, error) { + row := s.db.QueryRowContext( + ctx, + `SELECT run_id, prompt, target_file, repo_path, target_task_id + FROM council_inputs + WHERE run_id = ?`, + runID, + ) + + var input CouncilInput + if err := row.Scan( + &input.RunID, + &input.Prompt, + &input.TargetFile, + &input.RepoPath, + &input.TargetTaskID, + ); errors.Is(err, sql.ErrNoRows) { + return CouncilInput{RunID: runID}, nil + } else if err != nil { + return CouncilInput{}, fmt.Errorf("scan council input: %w", err) + } + return input, nil +} + +func (s *OrchStore) ListCouncilGroups(ctx context.Context, runID string) ([]CouncilGroup, bool, error) { + rows, err := s.db.QueryContext( + ctx, + `SELECT + run_id, group_id, proposal, bucket, support_count, supporters_json, + dissenters_json, rationale_summary, tags_json, source_finding_ids_json + FROM council_groups + WHERE run_id = ? + ORDER BY + CASE bucket + WHEN 'consensus' THEN 1 + WHEN 'majority' THEN 2 + WHEN 'minority' THEN 3 + ELSE 4 + END, + support_count DESC, + group_id ASC`, + runID, + ) + if err != nil { + return nil, false, fmt.Errorf("query council groups: %w", err) + } + defer rows.Close() + + groups := make([]CouncilGroup, 0) + for rows.Next() { + var group CouncilGroup + var supportersJSON string + var dissentersJSON string + var tagsJSON string + var sourceFindingIDsJSON string + if err := rows.Scan( + &group.RunID, + &group.GroupID, + &group.Proposal, + &group.Bucket, + &group.SupportCount, + &supportersJSON, + &dissentersJSON, + &group.RationaleSummary, + &tagsJSON, + &sourceFindingIDsJSON, + ); err != nil { + return nil, false, fmt.Errorf("scan council group: %w", err) + } + group.SupportersJSON = json.RawMessage(supportersJSON) + group.DissentersJSON = json.RawMessage(dissentersJSON) + group.TagsJSON = json.RawMessage(tagsJSON) + group.SourceFindingIDsJSON = json.RawMessage(sourceFindingIDsJSON) + groups = append(groups, group) + } + if err := rows.Err(); err != nil { + return nil, false, fmt.Errorf("iterate council groups: %w", err) + } + + if len(groups) > 0 { + return groups, true, nil + } + + tallied, err := s.hasCouncilTallyEvent(ctx, runID) + if err != nil { + return nil, false, err + } + return groups, tallied, nil +} + +func (s *OrchStore) hasCouncilTallyEvent(ctx context.Context, runID string) (bool, error) { + var count int + if err := s.db.QueryRowContext( + ctx, + `SELECT COUNT(*) + FROM events + WHERE run_id = ? AND event_type = 'council_tallied'`, + runID, + ).Scan(&count); err != nil { + return false, fmt.Errorf("query council tally events: %w", err) + } + return count > 0, nil +} + +func (s *OrchStore) loadCouncilReviewerResultMessage(ctx context.Context, runID, taskID string) (Message, error) { + task, err := selectTask(ctx, s.db, runID, taskID) + if err != nil { + return Message{}, err + } + if task.LatestAttemptNo == 0 { + return Message{}, fmt.Errorf("%w: reviewer task %s has no attempt", ErrInvalidState, taskID) + } + + attempt, err := selectAttempt(ctx, s.db, runID, taskID, task.LatestAttemptNo) + if err != nil { + return Message{}, err + } + + row := s.db.QueryRowContext( + ctx, + `SELECT + message_id, thread_id, from_agent, to_agent, kind, summary, body, + payload_json, created_at + FROM messages + WHERE thread_id = ? AND kind = 'result' + ORDER BY created_at DESC + LIMIT 1`, + attempt.ThreadID, + ) + message, err := scanMessage(row) + if errors.Is(err, sql.ErrNoRows) { + return Message{}, fmt.Errorf("%w: reviewer task %s has no result message", ErrInvalidState, taskID) + } + if err != nil { + return Message{}, err + } + return message, nil +} + +func parseCouncilReviewerOutput(expectedRole string, message Message) (councilReviewerOutput, error) { + candidates := []string{strings.TrimSpace(message.Body), strings.TrimSpace(string(message.PayloadJSON))} + var lastErr error + for _, candidate := range candidates { + if candidate == "" || candidate == "{}" { + continue + } + var output councilReviewerOutput + if err := json.Unmarshal([]byte(candidate), &output); err != nil { + lastErr = err + continue + } + if strings.TrimSpace(output.ReviewerRole) == "" { + return councilReviewerOutput{}, fmt.Errorf("%w: reviewer output must include reviewer_role", ErrInvalidInput) + } + if output.ReviewerRole != expectedRole { + return councilReviewerOutput{}, fmt.Errorf("%w: reviewer output role %s does not match expected %s", ErrInvalidInput, output.ReviewerRole, expectedRole) + } + return output, nil + } + if lastErr != nil { + return councilReviewerOutput{}, fmt.Errorf("%w: reviewer output must be valid JSON", ErrInvalidInput) + } + return councilReviewerOutput{}, fmt.Errorf("%w: reviewer result message did not contain council output JSON", ErrInvalidInput) +} + +func normalizeOptionalJSONArray(raw json.RawMessage) (string, error) { + if len(raw) == 0 || strings.TrimSpace(string(raw)) == "" || strings.TrimSpace(string(raw)) == "null" { + return "[]", nil + } + var value []any + if err := json.Unmarshal(raw, &value); err != nil { + return "", err + } + return marshalJSON(value), nil +} + +func normalizeOptionalJSONObject(raw json.RawMessage) (string, error) { + if len(raw) == 0 || strings.TrimSpace(string(raw)) == "" || strings.TrimSpace(string(raw)) == "null" { + return "{}", nil + } + var value map[string]any + if err := json.Unmarshal(raw, &value); err != nil { + return "", err + } + return marshalJSON(value), nil +} + +func groupCouncilFindings(runID string, findings []CouncilFinding, reviewers []CouncilReviewer, similarity string) []CouncilGroup { + type groupedFinding struct { + key string + proposal string + findings []CouncilFinding + } + + order := make([]string, 0) + groupsByKey := make(map[string]*groupedFinding) + for _, finding := range findings { + key := councilProposalGroupKey(finding.Proposal, similarity) + group, ok := groupsByKey[key] + if !ok { + group = &groupedFinding{ + key: key, + proposal: finding.Proposal, + } + groupsByKey[key] = group + order = append(order, key) + } + group.findings = append(group.findings, finding) + } + + sortedReviewers := make([]string, 0, len(reviewers)) + for _, reviewer := range reviewers { + sortedReviewers = append(sortedReviewers, reviewer.ReviewerRole) + } + sort.Strings(sortedReviewers) + + result := make([]CouncilGroup, 0, len(order)) + for idx, key := range order { + group := groupsByKey[key] + supporterSet := make(map[string]struct{}) + tagSet := make(map[string]struct{}) + sourceFindingIDs := make([]string, 0, len(group.findings)) + rationaleSummary := "" + + for _, finding := range group.findings { + supporterSet[finding.ReviewerRole] = struct{}{} + sourceFindingIDs = append(sourceFindingIDs, finding.ReviewerRole+":"+finding.FindingID) + if rationaleSummary == "" && finding.Rationale != "" { + rationaleSummary = finding.Rationale + } + + var tags []string + if len(finding.TagsJSON) > 0 { + _ = json.Unmarshal(finding.TagsJSON, &tags) + } + for _, tag := range tags { + tag = strings.TrimSpace(tag) + if tag != "" { + tagSet[tag] = struct{}{} + } + } + } + + supporters := make([]string, 0, len(supporterSet)) + for _, reviewer := range sortedReviewers { + if _, ok := supporterSet[reviewer]; ok { + supporters = append(supporters, reviewer) + } + } + dissenters := make([]string, 0, len(sortedReviewers)-len(supporters)) + for _, reviewer := range sortedReviewers { + if _, ok := supporterSet[reviewer]; !ok { + dissenters = append(dissenters, reviewer) + } + } + + tags := make([]string, 0, len(tagSet)) + for tag := range tagSet { + tags = append(tags, tag) + } + sort.Strings(tags) + sort.Strings(sourceFindingIDs) + + supportCount := len(supporters) + bucket := "minority" + if supportCount == 3 { + bucket = "consensus" + } else if supportCount == 2 { + bucket = "majority" + } + + result = append(result, CouncilGroup{ + RunID: runID, + GroupID: fmt.Sprintf("grp_%02d", idx+1), + Proposal: group.proposal, + Bucket: bucket, + SupportCount: supportCount, + SupportersJSON: json.RawMessage(marshalJSON(supporters)), + DissentersJSON: json.RawMessage(marshalJSON(dissenters)), + RationaleSummary: rationaleSummary, + TagsJSON: json.RawMessage(marshalJSON(tags)), + SourceFindingIDsJSON: json.RawMessage(marshalJSON(sourceFindingIDs)), + }) + } + + sort.SliceStable(result, func(i, j int) bool { + if result[i].SupportCount != result[j].SupportCount { + return result[i].SupportCount > result[j].SupportCount + } + return result[i].Proposal < result[j].Proposal + }) + for i := range result { + result[i].GroupID = fmt.Sprintf("grp_%02d", i+1) + } + return result +} + +func normalizeCouncilReportShow(raw string, onlyUnanimous bool) ([]string, error) { + if strings.TrimSpace(raw) == "" { + if onlyUnanimous { + return []string{"consensus"}, nil + } + return []string{"consensus", "majority"}, nil + } + + parts := strings.Split(raw, ",") + show := make([]string, 0, len(parts)) + seen := make(map[string]struct{}, len(parts)) + for _, part := range parts { + value := strings.ToLower(strings.TrimSpace(part)) + if value == "" { + continue + } + if value == "all" { + return []string{"consensus", "majority", "minority"}, nil + } + switch value { + case "consensus", "majority", "minority": + default: + return nil, fmt.Errorf("%w: show must contain consensus, majority, minority, or all", ErrInvalidInput) + } + if _, ok := seen[value]; ok { + continue + } + seen[value] = struct{}{} + show = append(show, value) + } + if len(show) == 0 { + return nil, fmt.Errorf("%w: show must contain at least one bucket", ErrInvalidInput) + } + return show, nil +} + +func councilGroupSummary(groups []CouncilGroup) map[string]int { + summary := normalizeCouncilSummary(nil) + for _, group := range groups { + summary[group.Bucket]++ + } + return summary +} + +func normalizeCouncilSummary(summary map[string]int) map[string]int { + result := map[string]int{ + "consensus": 0, + "majority": 0, + "minority": 0, + } + for key, value := range summary { + result[key] = value + } + return result +} + +func selectCouncilGroupsForReport(groups []CouncilGroup, show []string) []CouncilGroup { + groupedByBucket := make(map[string][]CouncilGroup, len(show)) + for _, group := range groups { + groupedByBucket[group.Bucket] = append(groupedByBucket[group.Bucket], group) + } + + selected := make([]CouncilGroup, 0, len(groups)) + for _, bucket := range show { + selected = append(selected, groupedByBucket[bucket]...) + } + return selected +} + +func renderCouncilReportMarkdown(run CouncilRun, input CouncilInput, show []string, summary map[string]int, groups []CouncilGroup) string { + var builder strings.Builder + + builder.WriteString("# Council Review Report\n\n") + builder.WriteString(fmt.Sprintf("- Run ID: `%s`\n", run.RunID)) + builder.WriteString(fmt.Sprintf("- Mode: `%s`\n", run.Mode)) + builder.WriteString(fmt.Sprintf("- Target Type: `%s`\n", run.TargetType)) + builder.WriteString(fmt.Sprintf("- Report Buckets: `%s`\n\n", strings.Join(show, "`, `"))) + + builder.WriteString("## Target\n\n") + if strings.TrimSpace(input.Prompt) != "" { + builder.WriteString(fmt.Sprintf("- Prompt: %s\n", input.Prompt)) + } + if strings.TrimSpace(input.TargetFile) != "" { + builder.WriteString(fmt.Sprintf("- Target File: `%s`\n", input.TargetFile)) + } + if strings.TrimSpace(input.RepoPath) != "" { + builder.WriteString(fmt.Sprintf("- Repo Path: `%s`\n", input.RepoPath)) + } + if strings.TrimSpace(input.TargetTaskID) != "" { + builder.WriteString(fmt.Sprintf("- Task ID: `%s`\n", input.TargetTaskID)) + } + if strings.TrimSpace(input.Prompt) == "" && + strings.TrimSpace(input.TargetFile) == "" && + strings.TrimSpace(input.RepoPath) == "" && + strings.TrimSpace(input.TargetTaskID) == "" { + builder.WriteString("- No explicit target metadata was recorded.\n") + } + builder.WriteString("\n") + + builder.WriteString("## Summary\n\n") + builder.WriteString(fmt.Sprintf("- Consensus: %d\n", summary["consensus"])) + builder.WriteString(fmt.Sprintf("- Majority: %d\n", summary["majority"])) + builder.WriteString(fmt.Sprintf("- Minority: %d\n\n", summary["minority"])) + + groupedByBucket := make(map[string][]CouncilGroup, len(show)) + for _, group := range groups { + groupedByBucket[group.Bucket] = append(groupedByBucket[group.Bucket], group) + } + + for _, bucket := range show { + builder.WriteString(fmt.Sprintf("## %s\n\n", councilBucketHeading(bucket))) + bucketGroups := groupedByBucket[bucket] + if len(bucketGroups) == 0 { + builder.WriteString(fmt.Sprintf("No %s recommendations.\n\n", bucket)) + continue + } + + for _, group := range bucketGroups { + supporters := decodeCouncilStringSlice(group.SupportersJSON) + dissenters := decodeCouncilStringSlice(group.DissentersJSON) + tags := decodeCouncilStringSlice(group.TagsJSON) + sourceFindingIDs := decodeCouncilStringSlice(group.SourceFindingIDsJSON) + + builder.WriteString(fmt.Sprintf("### %s\n\n", group.GroupID)) + builder.WriteString(group.Proposal) + builder.WriteString("\n\n") + builder.WriteString(fmt.Sprintf("- Support: %d of 3 reviewers", group.SupportCount)) + if len(supporters) > 0 { + builder.WriteString(fmt.Sprintf(" (`%s`)", strings.Join(supporters, "`, `"))) + } + builder.WriteString("\n") + if len(dissenters) > 0 { + builder.WriteString(fmt.Sprintf("- Dissenters: `%s`\n", strings.Join(dissenters, "`, `"))) + } + if strings.TrimSpace(group.RationaleSummary) != "" { + builder.WriteString(fmt.Sprintf("- Rationale: %s\n", group.RationaleSummary)) + } + if len(tags) > 0 { + builder.WriteString(fmt.Sprintf("- Tags: `%s`\n", strings.Join(tags, "`, `"))) + } + if len(sourceFindingIDs) > 0 { + builder.WriteString(fmt.Sprintf("- Source Findings: `%s`\n", strings.Join(sourceFindingIDs, "`, `"))) + } + builder.WriteString("\n") + } + } + + return builder.String() +} + +func councilBucketHeading(bucket string) string { + switch bucket { + case "consensus": + return "Consensus" + case "majority": + return "Majority" + case "minority": + return "Minority" + default: + if bucket == "" { + return "Recommendations" + } + return strings.ToUpper(bucket[:1]) + bucket[1:] + } +} + +func decodeCouncilStringSlice(raw json.RawMessage) []string { + if len(raw) == 0 || strings.TrimSpace(string(raw)) == "" || strings.TrimSpace(string(raw)) == "null" { + return nil + } + + var values []string + if err := json.Unmarshal(raw, &values); err != nil { + return nil + } + + result := make([]string, 0, len(values)) + for _, value := range values { + value = strings.TrimSpace(value) + if value != "" { + result = append(result, value) + } + } + return result +} + +func councilProposalGroupKey(proposal, similarity string) string { + tokens := proposalTokens(proposal) + if similarity == "strict" { + return strings.Join(tokens, " ") + } + + stopWords := map[string]struct{}{ + "a": {}, "an": {}, "the": {}, "to": {}, "into": {}, "and": {}, "or": {}, "of": {}, "for": {}, "in": {}, "on": {}, "with": {}, "from": {}, "that": {}, "this": {}, "it": {}, "is": {}, "are": {}, "be": {}, "by": {}, "as": {}, "keep": {}, "use": {}, "add": {}, + } + set := make(map[string]struct{}) + filtered := make([]string, 0, len(tokens)) + for _, token := range tokens { + if _, stop := stopWords[token]; stop { + continue + } + if len(token) <= 2 { + continue + } + if _, seen := set[token]; seen { + continue + } + set[token] = struct{}{} + filtered = append(filtered, token) + } + sort.Strings(filtered) + if len(filtered) == 0 { + return strings.Join(tokens, " ") + } + return strings.Join(filtered, " ") +} + +func proposalTokens(value string) []string { + lower := strings.ToLower(strings.TrimSpace(value)) + fields := strings.FieldsFunc(lower, func(r rune) bool { + return !((r >= 'a' && r <= 'z') || (r >= '0' && r <= '9')) + }) + result := make([]string, 0, len(fields)) + for _, field := range fields { + if field != "" { + result = append(result, field) + } + } + return result +} diff --git a/packages/coord-core/store/doc.go b/packages/coord-core/store/doc.go new file mode 100644 index 0000000..bbd69c5 --- /dev/null +++ b/packages/coord-core/store/doc.go @@ -0,0 +1,3 @@ +package store + +// Package store contains higher-level database access helpers. diff --git a/packages/coord-core/store/inbox.go b/packages/coord-core/store/inbox.go new file mode 100644 index 0000000..727cae8 --- /dev/null +++ b/packages/coord-core/store/inbox.go @@ -0,0 +1,1932 @@ +package store + +import ( + "bytes" + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "github.com/google/uuid" +) + +var ErrLeaseConflict = errors.New("thread already claimed by another worker") +var ErrThreadNotFound = errors.New("thread not found") +var ErrMessageNotFound = errors.New("message not found") +var ErrNoActiveLease = errors.New("no active lease") +var ErrInvalidInput = errors.New("invalid input") +var ErrInvalidState = errors.New("invalid state") + +type InboxStore struct { + db *sql.DB +} + +type Thread struct { + ThreadID string `json:"thread_id"` + RunID string `json:"run_id"` + TaskID string `json:"task_id"` + Subject string `json:"subject"` + CreatedBy string `json:"created_by"` + AssignedTo string `json:"assigned_to"` + Status string `json:"status"` + Priority string `json:"priority"` + LatestMessageID string `json:"latest_message_id,omitempty"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +type Message struct { + MessageID string `json:"message_id"` + ThreadID string `json:"thread_id"` + FromAgent string `json:"from_agent"` + ToAgent string `json:"to_agent"` + Kind string `json:"kind"` + Summary string `json:"summary"` + Body string `json:"body"` + PayloadJSON json.RawMessage `json:"payload_json"` + CreatedAt time.Time `json:"created_at"` + Artifacts []Artifact `json:"artifacts,omitempty"` +} + +type Artifact struct { + ArtifactID string `json:"artifact_id"` + MessageID string `json:"message_id"` + Path string `json:"path"` + Kind string `json:"kind"` + MetadataJSON json.RawMessage `json:"metadata_json"` + CreatedAt time.Time `json:"created_at"` +} + +type ArtifactInput struct { + Path string + Kind string + MetadataJSON string +} + +type ThreadDetail struct { + Thread Thread `json:"thread"` + Messages []Message `json:"messages"` +} + +type Event struct { + EventID int64 `json:"event_id"` + RunID string `json:"run_id"` + TaskID string `json:"task_id"` + ThreadID string `json:"thread_id,omitempty"` + Source string `json:"source"` + EventType string `json:"event_type"` + MessageID string `json:"message_id,omitempty"` + Summary string `json:"summary"` + PayloadJSON json.RawMessage `json:"payload_json"` + CreatedAt time.Time `json:"created_at"` +} + +type SendInput struct { + ThreadID string + RunID string + TaskID string + Subject string + FromAgent string + ToAgent string + Kind string + Summary string + Body string + PayloadJSON string + Priority string + Artifacts []ArtifactInput +} + +type FetchInput struct { + Agent string + Statuses []string + Limit int + Unread bool +} + +type ClaimInput struct { + ThreadID string + Agent string + LeaseSeconds int +} + +type RenewInput struct { + ThreadID string + Agent string + LeaseSeconds int +} + +type ClaimResult struct { + Thread Thread `json:"thread"` + Message Message `json:"message"` +} + +type UpdateInput struct { + ThreadID string + Agent string + Status string + Summary string + Body string + PayloadJSON string + Artifacts []ArtifactInput +} + +type ReplyInput struct { + ThreadID string + FromAgent string + ToAgent string + Kind string + Summary string + Body string + PayloadJSON string + Artifacts []ArtifactInput +} + +type CompleteInput struct { + ThreadID string + Agent string + Summary string + Body string + PayloadJSON string + Failed bool + Artifacts []ArtifactInput +} + +type CancelInput struct { + ThreadID string + Agent string + Reason string + Artifacts []ArtifactInput +} + +type ListInput struct { + Agent string + Statuses []string + CreatedBy string + AssignedTo string + Limit int + Unread bool +} + +type WatchInput struct { + Agent string + Statuses []string + AfterEventID int64 + StartFromNow bool + Timeout time.Duration +} + +type WatchResult struct { + Woke bool `json:"woke"` + NextEventID int64 `json:"next_event_id"` + Thread *Thread `json:"thread,omitempty"` + Message *Message `json:"message,omitempty"` + Event *Event `json:"event,omitempty"` +} + +type WaitReplyInput struct { + ThreadID string + AfterMessageID string + AfterEventID int64 + Kinds []string + Agent string + Timeout time.Duration +} + +type WaitReplyResult struct { + Woke bool `json:"woke"` + NextEventID int64 `json:"next_event_id"` + Message *Message `json:"message,omitempty"` +} + +func NewInboxStore(db *sql.DB) *InboxStore { + return &InboxStore{db: db} +} + +func (s *InboxStore) Send(ctx context.Context, input SendInput) (Thread, Message, error) { + if input.ThreadID != "" { + thread, err := selectThread(ctx, s.db, input.ThreadID) + if err == nil { + return s.appendThreadMessage(ctx, thread, input) + } + if !errors.Is(err, ErrThreadNotFound) { + return Thread{}, Message{}, err + } + } + + return s.createThread(ctx, input) +} + +func (s *InboxStore) createThread(ctx context.Context, input SendInput) (Thread, Message, error) { + now := nowUTC() + + threadID := defaultID(input.ThreadID, "thr") + runID := defaultID(input.RunID, "run") + taskID := defaultID(input.TaskID, "task") + kind := defaultString(input.Kind, "task") + priority := defaultString(input.Priority, "normal") + summary := defaultString(input.Summary, input.Subject) + payload, err := validateAndNormalizeJSON("payload-json", input.PayloadJSON) + if err != nil { + return Thread{}, Message{}, err + } + messageID := newID("msg") + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return Thread{}, Message{}, fmt.Errorf("begin send transaction: %w", err) + } + defer tx.Rollback() + + thread := Thread{ + ThreadID: threadID, + RunID: runID, + TaskID: taskID, + Subject: input.Subject, + CreatedBy: input.FromAgent, + AssignedTo: input.ToAgent, + Status: "pending", + Priority: priority, + LatestMessageID: messageID, + CreatedAt: now, + UpdatedAt: now, + } + + if _, err := tx.ExecContext( + ctx, + `INSERT INTO threads ( + thread_id, run_id, task_id, subject, created_by, assigned_to, status, + priority, latest_message_id, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + thread.ThreadID, + thread.RunID, + thread.TaskID, + thread.Subject, + thread.CreatedBy, + thread.AssignedTo, + thread.Status, + thread.Priority, + thread.LatestMessageID, + formatTime(thread.CreatedAt), + formatTime(thread.UpdatedAt), + ); err != nil { + return Thread{}, Message{}, fmt.Errorf("insert thread: %w", err) + } + + message := Message{ + MessageID: messageID, + ThreadID: threadID, + FromAgent: input.FromAgent, + ToAgent: input.ToAgent, + Kind: kind, + Summary: summary, + Body: input.Body, + PayloadJSON: json.RawMessage(payload), + CreatedAt: now, + } + if err := insertMessage(ctx, tx, message); err != nil { + return Thread{}, Message{}, err + } + artifacts, err := insertArtifacts(ctx, tx, message.MessageID, input.Artifacts, now) + if err != nil { + return Thread{}, Message{}, err + } + message.Artifacts = artifacts + + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_created", + MessageID: message.MessageID, + Summary: summary, + PayloadJSON: payload, + CreatedAt: now, + }); err != nil { + return Thread{}, Message{}, err + } + + if err := tx.Commit(); err != nil { + return Thread{}, Message{}, fmt.Errorf("commit send transaction: %w", err) + } + + return thread, message, nil +} + +func (s *InboxStore) appendThreadMessage(ctx context.Context, existing Thread, input SendInput) (Thread, Message, error) { + now := nowUTC() + messageID := newID("msg") + payload, err := validateAndNormalizeJSON("payload-json", input.PayloadJSON) + if err != nil { + return Thread{}, Message{}, err + } + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return Thread{}, Message{}, fmt.Errorf("begin append transaction: %w", err) + } + defer tx.Rollback() + + thread, err := selectThreadForUpdate(ctx, tx, existing.ThreadID) + if err != nil { + return Thread{}, Message{}, err + } + if isTerminalStatus(thread.Status) { + return Thread{}, Message{}, fmt.Errorf("%w: thread %s is already terminal", ErrInvalidState, thread.ThreadID) + } + + assignedTo := thread.AssignedTo + if input.ToAgent != "" { + assignedTo = input.ToAgent + } + + message := Message{ + MessageID: messageID, + ThreadID: thread.ThreadID, + FromAgent: input.FromAgent, + ToAgent: defaultString(input.ToAgent, thread.AssignedTo), + Kind: defaultString(input.Kind, "task"), + Summary: defaultString(input.Summary, thread.Subject), + Body: input.Body, + PayloadJSON: json.RawMessage(payload), + CreatedAt: now, + } + + if err := insertMessage(ctx, tx, message); err != nil { + return Thread{}, Message{}, err + } + artifacts, err := insertArtifacts(ctx, tx, message.MessageID, input.Artifacts, now) + if err != nil { + return Thread{}, Message{}, err + } + message.Artifacts = artifacts + + if err := updateThreadState(ctx, tx, thread.ThreadID, thread.Status, assignedTo, message.MessageID, now); err != nil { + return Thread{}, Message{}, err + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_message_sent", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: payload, + CreatedAt: now, + }); err != nil { + return Thread{}, Message{}, err + } + + if err := tx.Commit(); err != nil { + return Thread{}, Message{}, fmt.Errorf("commit append transaction: %w", err) + } + + thread.AssignedTo = assignedTo + thread.LatestMessageID = message.MessageID + thread.UpdatedAt = now + return thread, message, nil +} + +func (s *InboxStore) FetchThreads(ctx context.Context, input FetchInput) ([]Thread, error) { + statuses := input.Statuses + if len(statuses) == 0 { + statuses = []string{"pending"} + } + + return s.ListThreads(ctx, ListInput{ + Agent: input.Agent, + Statuses: statuses, + Limit: input.Limit, + Unread: input.Unread, + }) +} + +func (s *InboxStore) ListThreads(ctx context.Context, input ListInput) ([]Thread, error) { + limit := input.Limit + if limit <= 0 { + limit = 20 + } + + var ( + joinArgs []any + whereArgs []any + conditions []string + joins []string + ) + + assignedTo := input.AssignedTo + if assignedTo == "" { + assignedTo = input.Agent + } + + if assignedTo != "" { + conditions = append(conditions, "t.assigned_to = ?") + whereArgs = append(whereArgs, assignedTo) + } + if input.CreatedBy != "" { + conditions = append(conditions, "t.created_by = ?") + whereArgs = append(whereArgs, input.CreatedBy) + } + if len(input.Statuses) > 0 { + conditions = append(conditions, "t.status IN ("+placeholders(len(input.Statuses))+")") + for _, status := range input.Statuses { + whereArgs = append(whereArgs, status) + } + } + if input.Unread { + if input.Agent == "" { + return nil, fmt.Errorf("%w: agent is required when filtering unread threads", ErrInvalidInput) + } + joins = append(joins, "JOIN messages lm ON lm.message_id = t.latest_message_id") + joins = append(joins, "LEFT JOIN thread_reads tr ON tr.thread_id = t.thread_id AND tr.agent_id = ?") + joinArgs = append(joinArgs, input.Agent) + conditions = append(conditions, "lm.to_agent = ?") + whereArgs = append(whereArgs, input.Agent) + conditions = append(conditions, "lm.from_agent <> ?") + whereArgs = append(whereArgs, input.Agent) + conditions = append(conditions, "(tr.last_read_message_id IS NULL OR tr.last_read_message_id <> t.latest_message_id)") + } + + query := `SELECT + t.thread_id, t.run_id, t.task_id, t.subject, t.created_by, t.assigned_to, t.status, + t.priority, t.latest_message_id, t.created_at, t.updated_at + FROM threads t` + if len(joins) > 0 { + query += " " + strings.Join(joins, " ") + } + if len(conditions) > 0 { + query += " WHERE " + strings.Join(conditions, " AND ") + } + query += " ORDER BY t.updated_at DESC LIMIT ?" + args := append(joinArgs, whereArgs...) + args = append(args, limit) + + rows, err := s.db.QueryContext(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("list threads: %w", err) + } + defer rows.Close() + + var threads []Thread + for rows.Next() { + thread, err := scanThread(rows) + if err != nil { + return nil, err + } + threads = append(threads, thread) + } + + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate threads: %w", err) + } + + return threads, nil +} + +func (s *InboxStore) ClaimThread(ctx context.Context, input ClaimInput) (ClaimResult, error) { + if input.LeaseSeconds <= 0 { + input.LeaseSeconds = 900 + } + + var lastBusyErr error + for attempt := 0; attempt < 20; attempt++ { + result, err := s.claimThreadOnce(ctx, input) + if err == nil { + return result, nil + } + if !isSQLiteBusyError(err) { + return ClaimResult{}, err + } + lastBusyErr = err + + ok, waitErr := waitForNextPoll(ctx, 25*time.Millisecond) + if waitErr != nil { + return ClaimResult{}, waitErr + } + if !ok { + break + } + } + + if resolvedErr := s.classifyClaimConflict(ctx, input.ThreadID); resolvedErr != nil { + return ClaimResult{}, resolvedErr + } + + return ClaimResult{}, fmt.Errorf("claim thread: %w", lastBusyErr) +} + +func (s *InboxStore) claimThreadOnce(ctx context.Context, input ClaimInput) (ClaimResult, error) { + if input.LeaseSeconds <= 0 { + input.LeaseSeconds = 900 + } + + now := nowUTC() + expiresAt := now.Add(time.Duration(input.LeaseSeconds) * time.Second) + leaseToken := newID("lease") + messageID := newID("msg") + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return ClaimResult{}, fmt.Errorf("begin claim transaction: %w", err) + } + defer tx.Rollback() + + thread, err := selectThreadForUpdate(ctx, tx, input.ThreadID) + if err != nil { + return ClaimResult{}, err + } + if isTerminalStatus(thread.Status) { + return ClaimResult{}, fmt.Errorf("%w: thread %s is already terminal", ErrInvalidState, input.ThreadID) + } + + var activeLease string + err = tx.QueryRowContext( + ctx, + `SELECT agent_id FROM leases + WHERE thread_id = ? + AND released_at IS NULL + AND expires_at > ?`, + input.ThreadID, + formatTime(now), + ).Scan(&activeLease) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + return ClaimResult{}, fmt.Errorf("check active lease: %w", err) + } + if activeLease != "" { + return ClaimResult{}, ErrLeaseConflict + } + if thread.Status != "pending" { + return ClaimResult{}, fmt.Errorf("%w: thread %s is not pending", ErrInvalidState, input.ThreadID) + } + + result, err := tx.ExecContext( + ctx, + `INSERT INTO leases ( + thread_id, agent_id, lease_token, claimed_at, expires_at, released_at + ) VALUES (?, ?, ?, ?, ?, NULL) + ON CONFLICT(thread_id) DO UPDATE SET + agent_id = excluded.agent_id, + lease_token = excluded.lease_token, + claimed_at = excluded.claimed_at, + expires_at = excluded.expires_at, + released_at = NULL + WHERE leases.released_at IS NOT NULL + OR leases.expires_at <= excluded.claimed_at`, + input.ThreadID, + input.Agent, + leaseToken, + formatTime(now), + formatTime(expiresAt), + ) + if err != nil { + return ClaimResult{}, fmt.Errorf("upsert lease: %w", err) + } + if affected, err := result.RowsAffected(); err == nil && affected == 0 { + return ClaimResult{}, ErrLeaseConflict + } + + result, err = tx.ExecContext( + ctx, + `UPDATE threads + SET status = ?, assigned_to = ?, latest_message_id = ?, updated_at = ? + WHERE thread_id = ? + AND status = ?`, + "claimed", + input.Agent, + messageID, + formatTime(now), + input.ThreadID, + "pending", + ) + if err != nil { + return ClaimResult{}, fmt.Errorf("update thread claim status: %w", err) + } + if affected, err := result.RowsAffected(); err == nil && affected == 0 { + return ClaimResult{}, fmt.Errorf("%w: thread %s is not pending", ErrInvalidState, input.ThreadID) + } + + message := Message{ + MessageID: messageID, + ThreadID: input.ThreadID, + FromAgent: input.Agent, + ToAgent: input.Agent, + Kind: "event", + Summary: "thread claimed", + Body: "", + PayloadJSON: json.RawMessage(fmt.Sprintf(`{"lease_seconds":%d,"lease_token":"%s"}`, input.LeaseSeconds, leaseToken)), + CreatedAt: now, + } + + if _, err := tx.ExecContext( + ctx, + `INSERT INTO messages ( + message_id, thread_id, from_agent, to_agent, kind, summary, body, + payload_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, + message.MessageID, + message.ThreadID, + message.FromAgent, + message.ToAgent, + message.Kind, + message.Summary, + message.Body, + string(message.PayloadJSON), + formatTime(message.CreatedAt), + ); err != nil { + return ClaimResult{}, fmt.Errorf("insert claim event message: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_claimed", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: string(message.PayloadJSON), + CreatedAt: now, + }); err != nil { + return ClaimResult{}, err + } + + if err := tx.Commit(); err != nil { + return ClaimResult{}, fmt.Errorf("commit claim transaction: %w", err) + } + + thread.Status = "claimed" + thread.AssignedTo = input.Agent + thread.LatestMessageID = messageID + thread.UpdatedAt = now + + return ClaimResult{ + Thread: thread, + Message: message, + }, nil +} + +func (s *InboxStore) RenewLease(ctx context.Context, input RenewInput) (ClaimResult, error) { + if input.LeaseSeconds <= 0 { + input.LeaseSeconds = 900 + } + + now := nowUTC() + expiresAt := now.Add(time.Duration(input.LeaseSeconds) * time.Second) + leaseToken := newID("lease") + messageID := newID("msg") + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return ClaimResult{}, fmt.Errorf("begin renew transaction: %w", err) + } + defer tx.Rollback() + + thread, err := selectThreadForUpdate(ctx, tx, input.ThreadID) + if err != nil { + return ClaimResult{}, err + } + if isTerminalStatus(thread.Status) { + return ClaimResult{}, fmt.Errorf("%w: thread %s is already terminal", ErrInvalidState, input.ThreadID) + } + + if _, err := requireActiveLease(ctx, tx, input.ThreadID, input.Agent, now); err != nil { + return ClaimResult{}, err + } + + if _, err := tx.ExecContext( + ctx, + `UPDATE leases + SET lease_token = ?, expires_at = ?, released_at = NULL + WHERE thread_id = ?`, + leaseToken, + formatTime(expiresAt), + input.ThreadID, + ); err != nil { + return ClaimResult{}, fmt.Errorf("renew lease: %w", err) + } + + message := Message{ + MessageID: messageID, + ThreadID: input.ThreadID, + FromAgent: input.Agent, + ToAgent: input.Agent, + Kind: "event", + Summary: "lease renewed", + Body: "", + PayloadJSON: json.RawMessage(fmt.Sprintf(`{"lease_seconds":%d,"lease_token":"%s"}`, input.LeaseSeconds, leaseToken)), + CreatedAt: now, + } + + if err := insertMessage(ctx, tx, message); err != nil { + return ClaimResult{}, err + } + + if err := updateThreadState(ctx, tx, thread.ThreadID, thread.Status, thread.AssignedTo, message.MessageID, now); err != nil { + return ClaimResult{}, err + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_renewed", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: string(message.PayloadJSON), + CreatedAt: now, + }); err != nil { + return ClaimResult{}, err + } + + if err := tx.Commit(); err != nil { + return ClaimResult{}, fmt.Errorf("commit renew transaction: %w", err) + } + + thread.LatestMessageID = message.MessageID + thread.UpdatedAt = now + return ClaimResult{ + Thread: thread, + Message: message, + }, nil +} + +func (s *InboxStore) UpdateThreadStatus(ctx context.Context, input UpdateInput) (Thread, Message, error) { + now := nowUTC() + messageID := newID("msg") + payload, err := validateAndNormalizeJSON("payload-json", input.PayloadJSON) + if err != nil { + return Thread{}, Message{}, err + } + + if input.Status != "in_progress" && input.Status != "blocked" { + return Thread{}, Message{}, fmt.Errorf("%w: unsupported update status %q", ErrInvalidInput, input.Status) + } + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return Thread{}, Message{}, fmt.Errorf("begin update transaction: %w", err) + } + defer tx.Rollback() + + thread, err := selectThreadForUpdate(ctx, tx, input.ThreadID) + if err != nil { + return Thread{}, Message{}, err + } + if isTerminalStatus(thread.Status) { + return Thread{}, Message{}, fmt.Errorf("%w: thread %s is already terminal", ErrInvalidState, input.ThreadID) + } + if _, err := requireActiveLease(ctx, tx, input.ThreadID, input.Agent, now); err != nil { + return Thread{}, Message{}, err + } + + kind := "progress" + if input.Status == "blocked" { + kind = "question" + } + + message := Message{ + MessageID: messageID, + ThreadID: thread.ThreadID, + FromAgent: input.Agent, + ToAgent: thread.CreatedBy, + Kind: kind, + Summary: input.Summary, + Body: input.Body, + PayloadJSON: json.RawMessage(payload), + CreatedAt: now, + } + + if err := insertMessage(ctx, tx, message); err != nil { + return Thread{}, Message{}, err + } + artifacts, err := insertArtifacts(ctx, tx, message.MessageID, input.Artifacts, now) + if err != nil { + return Thread{}, Message{}, err + } + message.Artifacts = artifacts + + if err := updateThreadState(ctx, tx, thread.ThreadID, input.Status, thread.AssignedTo, message.MessageID, now); err != nil { + return Thread{}, Message{}, err + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_" + input.Status, + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: string(message.PayloadJSON), + CreatedAt: now, + }); err != nil { + return Thread{}, Message{}, err + } + + if err := tx.Commit(); err != nil { + return Thread{}, Message{}, fmt.Errorf("commit update transaction: %w", err) + } + + thread.Status = input.Status + thread.LatestMessageID = message.MessageID + thread.UpdatedAt = now + return thread, message, nil +} + +func (s *InboxStore) ReplyToThread(ctx context.Context, input ReplyInput) (Thread, Message, error) { + now := nowUTC() + messageID := newID("msg") + payload, err := validateAndNormalizeJSON("payload-json", input.PayloadJSON) + if err != nil { + return Thread{}, Message{}, err + } + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return Thread{}, Message{}, fmt.Errorf("begin reply transaction: %w", err) + } + defer tx.Rollback() + + thread, err := selectThreadForUpdate(ctx, tx, input.ThreadID) + if err != nil { + return Thread{}, Message{}, err + } + if isTerminalStatus(thread.Status) { + return Thread{}, Message{}, fmt.Errorf("%w: thread %s is already terminal", ErrInvalidState, input.ThreadID) + } + + message := Message{ + MessageID: messageID, + ThreadID: thread.ThreadID, + FromAgent: input.FromAgent, + ToAgent: input.ToAgent, + Kind: defaultString(input.Kind, "answer"), + Summary: input.Summary, + Body: input.Body, + PayloadJSON: json.RawMessage(payload), + CreatedAt: now, + } + + if err := insertMessage(ctx, tx, message); err != nil { + return Thread{}, Message{}, err + } + artifacts, err := insertArtifacts(ctx, tx, message.MessageID, input.Artifacts, now) + if err != nil { + return Thread{}, Message{}, err + } + message.Artifacts = artifacts + + if err := updateThreadState(ctx, tx, thread.ThreadID, thread.Status, thread.AssignedTo, message.MessageID, now); err != nil { + return Thread{}, Message{}, err + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_replied", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: string(message.PayloadJSON), + CreatedAt: now, + }); err != nil { + return Thread{}, Message{}, err + } + + if err := tx.Commit(); err != nil { + return Thread{}, Message{}, fmt.Errorf("commit reply transaction: %w", err) + } + + thread.LatestMessageID = message.MessageID + thread.UpdatedAt = now + return thread, message, nil +} + +func (s *InboxStore) CompleteThread(ctx context.Context, input CompleteInput) (Thread, Message, error) { + now := nowUTC() + messageID := newID("msg") + payload, err := validateAndNormalizeJSON("payload-json", input.PayloadJSON) + if err != nil { + return Thread{}, Message{}, err + } + + nextStatus := "done" + eventType := "thread_done" + summary := input.Summary + if input.Failed { + nextStatus = "failed" + eventType = "thread_failed" + } + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return Thread{}, Message{}, fmt.Errorf("begin complete transaction: %w", err) + } + defer tx.Rollback() + + thread, err := selectThreadForUpdate(ctx, tx, input.ThreadID) + if err != nil { + return Thread{}, Message{}, err + } + if isTerminalStatus(thread.Status) { + return Thread{}, Message{}, fmt.Errorf("%w: thread %s is already terminal", ErrInvalidState, input.ThreadID) + } + if _, err := requireActiveLease(ctx, tx, input.ThreadID, input.Agent, now); err != nil { + return Thread{}, Message{}, err + } + + message := Message{ + MessageID: messageID, + ThreadID: thread.ThreadID, + FromAgent: input.Agent, + ToAgent: thread.CreatedBy, + Kind: "result", + Summary: summary, + Body: input.Body, + PayloadJSON: json.RawMessage(payload), + CreatedAt: now, + } + + if err := insertMessage(ctx, tx, message); err != nil { + return Thread{}, Message{}, err + } + artifacts, err := insertArtifacts(ctx, tx, message.MessageID, input.Artifacts, now) + if err != nil { + return Thread{}, Message{}, err + } + message.Artifacts = artifacts + + if err := updateThreadState(ctx, tx, thread.ThreadID, nextStatus, thread.AssignedTo, message.MessageID, now); err != nil { + return Thread{}, Message{}, err + } + + if _, err := tx.ExecContext( + ctx, + `UPDATE leases + SET released_at = ? + WHERE thread_id = ? + AND released_at IS NULL`, + formatTime(now), + thread.ThreadID, + ); err != nil { + return Thread{}, Message{}, fmt.Errorf("release lease: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: eventType, + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: string(message.PayloadJSON), + CreatedAt: now, + }); err != nil { + return Thread{}, Message{}, err + } + + if err := tx.Commit(); err != nil { + return Thread{}, Message{}, fmt.Errorf("commit complete transaction: %w", err) + } + + thread.Status = nextStatus + thread.LatestMessageID = message.MessageID + thread.UpdatedAt = now + return thread, message, nil +} + +func (s *InboxStore) CancelThread(ctx context.Context, input CancelInput) (Thread, Message, error) { + now := nowUTC() + messageID := newID("msg") + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return Thread{}, Message{}, fmt.Errorf("begin cancel transaction: %w", err) + } + defer tx.Rollback() + + thread, err := selectThreadForUpdate(ctx, tx, input.ThreadID) + if err != nil { + return Thread{}, Message{}, err + } + if isTerminalStatus(thread.Status) { + return Thread{}, Message{}, fmt.Errorf("%w: thread %s is already terminal", ErrInvalidState, input.ThreadID) + } + + summary := defaultString(input.Reason, "thread cancelled") + message := Message{ + MessageID: messageID, + ThreadID: thread.ThreadID, + FromAgent: input.Agent, + ToAgent: thread.AssignedTo, + Kind: "control", + Summary: summary, + Body: input.Reason, + PayloadJSON: json.RawMessage(`{}`), + CreatedAt: now, + } + + if err := insertMessage(ctx, tx, message); err != nil { + return Thread{}, Message{}, err + } + artifacts, err := insertArtifacts(ctx, tx, message.MessageID, input.Artifacts, now) + if err != nil { + return Thread{}, Message{}, err + } + message.Artifacts = artifacts + + if err := updateThreadState(ctx, tx, thread.ThreadID, "cancelled", thread.AssignedTo, message.MessageID, now); err != nil { + return Thread{}, Message{}, err + } + + if _, err := tx.ExecContext( + ctx, + `UPDATE leases + SET released_at = ? + WHERE thread_id = ? + AND released_at IS NULL`, + formatTime(now), + thread.ThreadID, + ); err != nil { + return Thread{}, Message{}, fmt.Errorf("release lease on cancel: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_cancelled", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: string(message.PayloadJSON), + CreatedAt: now, + }); err != nil { + return Thread{}, Message{}, err + } + + if err := tx.Commit(); err != nil { + return Thread{}, Message{}, fmt.Errorf("commit cancel transaction: %w", err) + } + + thread.Status = "cancelled" + thread.LatestMessageID = message.MessageID + thread.UpdatedAt = now + return thread, message, nil +} + +func (s *InboxStore) GetThread(ctx context.Context, threadID string) (ThreadDetail, error) { + return s.GetThreadForAgent(ctx, threadID, "", false) +} + +func (s *InboxStore) GetThreadForAgent(ctx context.Context, threadID, agent string, markRead bool) (ThreadDetail, error) { + thread, err := selectThread(ctx, s.db, threadID) + if err != nil { + return ThreadDetail{}, err + } + + rows, err := s.db.QueryContext( + ctx, + `SELECT + message_id, thread_id, from_agent, to_agent, kind, summary, body, + payload_json, created_at + FROM messages + WHERE thread_id = ? + ORDER BY created_at ASC`, + threadID, + ) + if err != nil { + return ThreadDetail{}, fmt.Errorf("query thread messages: %w", err) + } + defer rows.Close() + + var messages []Message + for rows.Next() { + message, err := scanMessage(rows) + if err != nil { + return ThreadDetail{}, err + } + messages = append(messages, message) + } + + if err := rows.Err(); err != nil { + return ThreadDetail{}, fmt.Errorf("iterate thread messages: %w", err) + } + + artifactsByMessageID, err := loadArtifactsForMessageIDs(ctx, s.db, messageIDs(messages)) + if err != nil { + return ThreadDetail{}, err + } + attachArtifacts(messages, artifactsByMessageID) + + if markRead { + if err := markThreadRead(ctx, s.db, thread.ThreadID, agent, thread.LatestMessageID, nowUTC()); err != nil { + return ThreadDetail{}, err + } + } + + return ThreadDetail{ + Thread: thread, + Messages: messages, + }, nil +} + +func (s *InboxStore) WatchThreads(ctx context.Context, input WatchInput) (WatchResult, error) { + cursor := input.AfterEventID + if input.StartFromNow && cursor == 0 { + current, err := s.currentMaxEventID(ctx) + if err != nil { + return WatchResult{}, err + } + cursor = current + } + + waitCtx := ctx + cancel := func() {} + if input.Timeout > 0 { + waitCtx, cancel = context.WithTimeout(ctx, input.Timeout) + } + defer cancel() + + for { + thread, message, event, found, err := s.findWatchEventAfter(waitCtx, input, cursor) + if err != nil { + if isDeadlineExceeded(waitCtx) { + return WatchResult{Woke: false, NextEventID: cursor}, nil + } + return WatchResult{}, err + } + if found { + return WatchResult{ + Woke: true, + NextEventID: event.EventID, + Thread: &thread, + Message: &message, + Event: &event, + }, nil + } + + ok, err := waitForNextPoll(waitCtx, 200*time.Millisecond) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return WatchResult{Woke: false, NextEventID: cursor}, nil + } + return WatchResult{}, err + } + if !ok { + return WatchResult{Woke: false, NextEventID: cursor}, nil + } + } +} + +func (s *InboxStore) WaitReply(ctx context.Context, input WaitReplyInput) (WaitReplyResult, error) { + cursor := input.AfterEventID + if input.AfterMessageID != "" { + eventID, err := s.lookupEventIDForMessage(ctx, input.ThreadID, input.AfterMessageID) + if err != nil { + return WaitReplyResult{}, err + } + if eventID > cursor { + cursor = eventID + } + } + + kinds := input.Kinds + if len(kinds) == 0 { + kinds = []string{"answer", "control", "result"} + } + + waitCtx := ctx + cancel := func() {} + if input.Timeout > 0 { + waitCtx, cancel = context.WithTimeout(ctx, input.Timeout) + } + defer cancel() + + for { + message, eventID, found, err := s.findReplyAfter(waitCtx, input.ThreadID, cursor, kinds) + if err != nil { + if isDeadlineExceeded(waitCtx) { + return WaitReplyResult{Woke: false, NextEventID: cursor}, nil + } + return WaitReplyResult{}, err + } + if found { + if shouldMarkMessageRead(message, input.Agent) { + if err := markThreadRead(waitCtx, s.db, input.ThreadID, input.Agent, message.MessageID, nowUTC()); err != nil { + return WaitReplyResult{}, err + } + } + return WaitReplyResult{ + Woke: true, + NextEventID: eventID, + Message: &message, + }, nil + } + + ok, err := waitForNextPoll(waitCtx, 200*time.Millisecond) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return WaitReplyResult{Woke: false, NextEventID: cursor}, nil + } + return WaitReplyResult{}, err + } + if !ok { + return WaitReplyResult{Woke: false, NextEventID: cursor}, nil + } + } +} + +type threadScanner interface { + Scan(dest ...any) error +} + +func scanThread(scanner threadScanner) (Thread, error) { + var ( + thread Thread + createdAt, updatedAt string + latestMessageID sql.NullString + ) + + if err := scanner.Scan( + &thread.ThreadID, + &thread.RunID, + &thread.TaskID, + &thread.Subject, + &thread.CreatedBy, + &thread.AssignedTo, + &thread.Status, + &thread.Priority, + &latestMessageID, + &createdAt, + &updatedAt, + ); err != nil { + return Thread{}, fmt.Errorf("scan thread: %w", err) + } + + thread.CreatedAt = parseTime(createdAt) + thread.UpdatedAt = parseTime(updatedAt) + if latestMessageID.Valid { + thread.LatestMessageID = latestMessageID.String + } + + return thread, nil +} + +func scanMessage(scanner threadScanner) (Message, error) { + var ( + message Message + payload, createdAt string + ) + + if err := scanner.Scan( + &message.MessageID, + &message.ThreadID, + &message.FromAgent, + &message.ToAgent, + &message.Kind, + &message.Summary, + &message.Body, + &payload, + &createdAt, + ); err != nil { + return Message{}, fmt.Errorf("scan message: %w", err) + } + + message.PayloadJSON = json.RawMessage(payload) + message.CreatedAt = parseTime(createdAt) + return message, nil +} + +func scanArtifact(scanner threadScanner) (Artifact, error) { + var ( + artifact Artifact + metadata, created string + ) + + if err := scanner.Scan( + &artifact.ArtifactID, + &artifact.MessageID, + &artifact.Path, + &artifact.Kind, + &metadata, + &created, + ); err != nil { + return Artifact{}, fmt.Errorf("scan artifact: %w", err) + } + + artifact.MetadataJSON = json.RawMessage(metadata) + artifact.CreatedAt = parseTime(created) + return artifact, nil +} + +func scanEvent(scanner threadScanner) (Event, error) { + var ( + event Event + messageID sql.NullString + payload, createdAt string + ) + + if err := scanner.Scan( + &event.EventID, + &event.RunID, + &event.TaskID, + &event.ThreadID, + &event.Source, + &event.EventType, + &messageID, + &event.Summary, + &payload, + &createdAt, + ); err != nil { + return Event{}, fmt.Errorf("scan event: %w", err) + } + + if messageID.Valid { + event.MessageID = messageID.String + } + event.PayloadJSON = json.RawMessage(payload) + event.CreatedAt = parseTime(createdAt) + return event, nil +} + +func selectThread(ctx context.Context, db queryRower, threadID string) (Thread, error) { + row := db.QueryRowContext( + ctx, + `SELECT + thread_id, run_id, task_id, subject, created_by, assigned_to, status, + priority, latest_message_id, created_at, updated_at + FROM threads + WHERE thread_id = ?`, + threadID, + ) + + thread, err := scanThread(row) + if errors.Is(err, sql.ErrNoRows) { + return Thread{}, fmt.Errorf("%w: %s", ErrThreadNotFound, threadID) + } + return thread, err +} + +func selectThreadForUpdate(ctx context.Context, tx *sql.Tx, threadID string) (Thread, error) { + return selectThread(ctx, tx, threadID) +} + +type queryRower interface { + QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row +} + +type execContexter interface { + ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error) +} + +type eventInput struct { + RunID string + TaskID string + ThreadID string + Source string + EventType string + MessageID string + Summary string + PayloadJSON string + CreatedAt time.Time +} + +func insertEvent(ctx context.Context, tx *sql.Tx, input eventInput) error { + _, err := tx.ExecContext( + ctx, + `INSERT INTO events ( + run_id, task_id, thread_id, source, event_type, message_id, summary, + payload_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, + input.RunID, + input.TaskID, + input.ThreadID, + input.Source, + input.EventType, + input.MessageID, + input.Summary, + normalizeJSON(input.PayloadJSON), + formatTime(input.CreatedAt), + ) + if err != nil { + return fmt.Errorf("insert event: %w", err) + } + return nil +} + +func insertMessage(ctx context.Context, tx *sql.Tx, message Message) error { + _, err := tx.ExecContext( + ctx, + `INSERT INTO messages ( + message_id, thread_id, from_agent, to_agent, kind, summary, body, + payload_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, + message.MessageID, + message.ThreadID, + message.FromAgent, + message.ToAgent, + message.Kind, + message.Summary, + message.Body, + string(message.PayloadJSON), + formatTime(message.CreatedAt), + ) + if err != nil { + return fmt.Errorf("insert message: %w", err) + } + return nil +} + +func insertArtifacts(ctx context.Context, tx *sql.Tx, messageID string, inputs []ArtifactInput, createdAt time.Time) ([]Artifact, error) { + if len(inputs) == 0 { + return nil, nil + } + + artifacts := make([]Artifact, 0, len(inputs)) + for _, input := range inputs { + metadataJSON, err := validateAndNormalizeJSON("artifact-metadata-json", input.MetadataJSON) + if err != nil { + return nil, err + } + + artifact := Artifact{ + ArtifactID: newID("art"), + MessageID: messageID, + Path: input.Path, + Kind: defaultString(input.Kind, "file"), + MetadataJSON: json.RawMessage(metadataJSON), + CreatedAt: createdAt, + } + + _, err = tx.ExecContext( + ctx, + `INSERT INTO artifacts ( + artifact_id, message_id, path, kind, metadata_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?)`, + artifact.ArtifactID, + artifact.MessageID, + artifact.Path, + artifact.Kind, + string(artifact.MetadataJSON), + formatTime(artifact.CreatedAt), + ) + if err != nil { + return nil, fmt.Errorf("insert artifact: %w", err) + } + + artifacts = append(artifacts, artifact) + } + + return artifacts, nil +} + +func updateThreadState(ctx context.Context, tx *sql.Tx, threadID, status, assignedTo, latestMessageID string, updatedAt time.Time) error { + _, err := tx.ExecContext( + ctx, + `UPDATE threads + SET status = ?, assigned_to = ?, latest_message_id = ?, updated_at = ? + WHERE thread_id = ?`, + status, + assignedTo, + latestMessageID, + formatTime(updatedAt), + threadID, + ) + if err != nil { + return fmt.Errorf("update thread state: %w", err) + } + return nil +} + +func markThreadRead(ctx context.Context, execer execContexter, threadID, agent, messageID string, readAt time.Time) error { + if agent == "" || messageID == "" { + return nil + } + + _, err := execer.ExecContext( + ctx, + `INSERT INTO thread_reads ( + thread_id, agent_id, last_read_message_id, last_read_at + ) VALUES (?, ?, ?, ?) + ON CONFLICT(thread_id, agent_id) DO UPDATE SET + last_read_message_id = excluded.last_read_message_id, + last_read_at = excluded.last_read_at`, + threadID, + agent, + messageID, + formatTime(readAt), + ) + if err != nil { + return fmt.Errorf("mark thread read: %w", err) + } + return nil +} + +func loadArtifactsForMessageIDs(ctx context.Context, db *sql.DB, messageIDs []string) (map[string][]Artifact, error) { + result := make(map[string][]Artifact) + if len(messageIDs) == 0 { + return result, nil + } + + args := make([]any, 0, len(messageIDs)) + for _, messageID := range messageIDs { + args = append(args, messageID) + } + + rows, err := db.QueryContext( + ctx, + `SELECT + artifact_id, message_id, path, kind, metadata_json, created_at + FROM artifacts + WHERE message_id IN (`+placeholders(len(messageIDs))+`) + ORDER BY created_at ASC`, + args..., + ) + if err != nil { + return nil, fmt.Errorf("query artifacts: %w", err) + } + defer rows.Close() + + for rows.Next() { + artifact, err := scanArtifact(rows) + if err != nil { + return nil, err + } + result[artifact.MessageID] = append(result[artifact.MessageID], artifact) + } + + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate artifacts: %w", err) + } + + return result, nil +} + +func attachArtifacts(messages []Message, artifactsByMessageID map[string][]Artifact) { + for i := range messages { + messages[i].Artifacts = artifactsByMessageID[messages[i].MessageID] + } +} + +func messageIDs(messages []Message) []string { + ids := make([]string, 0, len(messages)) + for _, message := range messages { + ids = append(ids, message.MessageID) + } + return ids +} + +func (s *InboxStore) classifyClaimConflict(ctx context.Context, threadID string) error { + thread, err := selectThread(ctx, s.db, threadID) + if err != nil { + return err + } + + now := nowUTC() + var activeLease string + err = s.db.QueryRowContext( + ctx, + `SELECT agent_id + FROM leases + WHERE thread_id = ? + AND released_at IS NULL + AND expires_at > ?`, + threadID, + formatTime(now), + ).Scan(&activeLease) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + return fmt.Errorf("check active lease after busy claim: %w", err) + } + if activeLease != "" { + return ErrLeaseConflict + } + if thread.Status != "pending" { + return fmt.Errorf("%w: thread %s is not pending", ErrInvalidState, threadID) + } + + return nil +} + +func requireActiveLease(ctx context.Context, tx *sql.Tx, threadID, agent string, now time.Time) (string, error) { + var ( + activeAgent string + leaseToken string + expiresAt string + releasedAt sql.NullString + ) + + err := tx.QueryRowContext( + ctx, + `SELECT agent_id, lease_token, expires_at, released_at + FROM leases + WHERE thread_id = ?`, + threadID, + ).Scan(&activeAgent, &leaseToken, &expiresAt, &releasedAt) + if errors.Is(err, sql.ErrNoRows) { + return "", ErrNoActiveLease + } + if err != nil { + return "", fmt.Errorf("read lease: %w", err) + } + + if releasedAt.Valid || !parseTime(expiresAt).After(now) { + return "", ErrNoActiveLease + } + if activeAgent != agent { + return "", ErrLeaseConflict + } + + return leaseToken, nil +} + +func (s *InboxStore) lookupEventIDForMessage(ctx context.Context, threadID, messageID string) (int64, error) { + var eventID int64 + err := s.db.QueryRowContext( + ctx, + `SELECT event_id + FROM events + WHERE thread_id = ? + AND message_id = ? + ORDER BY event_id DESC + LIMIT 1`, + threadID, + messageID, + ).Scan(&eventID) + if errors.Is(err, sql.ErrNoRows) { + return 0, fmt.Errorf("%w: message %s not found in thread %s", ErrMessageNotFound, messageID, threadID) + } + if err != nil { + return 0, fmt.Errorf("lookup message event: %w", err) + } + return eventID, nil +} + +func (s *InboxStore) currentMaxEventID(ctx context.Context) (int64, error) { + var maxEventID int64 + if err := s.db.QueryRowContext(ctx, `SELECT COALESCE(MAX(event_id), 0) FROM events`).Scan(&maxEventID); err != nil { + return 0, fmt.Errorf("query max event id: %w", err) + } + return maxEventID, nil +} + +func (s *InboxStore) findReplyAfter(ctx context.Context, threadID string, afterEventID int64, kinds []string) (Message, int64, bool, error) { + args := []any{threadID, afterEventID} + query := `SELECT + e.event_id, + m.message_id, m.thread_id, m.from_agent, m.to_agent, m.kind, m.summary, m.body, m.payload_json, m.created_at + FROM events e + JOIN messages m ON m.message_id = e.message_id + WHERE e.thread_id = ? + AND e.event_id > ?` + if len(kinds) > 0 { + query += " AND m.kind IN (" + placeholders(len(kinds)) + ")" + for _, kind := range kinds { + args = append(args, kind) + } + } + query += " ORDER BY e.event_id ASC LIMIT 1" + + row := s.db.QueryRowContext(ctx, query, args...) + + var ( + eventID int64 + message Message + payload string + created string + ) + err := row.Scan( + &eventID, + &message.MessageID, + &message.ThreadID, + &message.FromAgent, + &message.ToAgent, + &message.Kind, + &message.Summary, + &message.Body, + &payload, + &created, + ) + if errors.Is(err, sql.ErrNoRows) { + return Message{}, 0, false, nil + } + if err != nil { + return Message{}, 0, false, fmt.Errorf("query reply after event %d: %w", afterEventID, err) + } + + message.PayloadJSON = json.RawMessage(payload) + message.CreatedAt = parseTime(created) + artifactsByMessageID, err := loadArtifactsForMessageIDs(ctx, s.db, []string{message.MessageID}) + if err != nil { + return Message{}, 0, false, err + } + message.Artifacts = artifactsByMessageID[message.MessageID] + return message, eventID, true, nil +} + +func (s *InboxStore) findWatchEventAfter(ctx context.Context, input WatchInput, afterEventID int64) (Thread, Message, Event, bool, error) { + args := []any{afterEventID} + query := `SELECT + t.thread_id, t.run_id, t.task_id, t.subject, t.created_by, t.assigned_to, t.status, + t.priority, t.latest_message_id, t.created_at, t.updated_at, + e.event_id, e.run_id, e.task_id, e.thread_id, e.source, e.event_type, e.message_id, e.summary, e.payload_json, e.created_at, + m.message_id, m.thread_id, m.from_agent, m.to_agent, m.kind, m.summary, m.body, m.payload_json, m.created_at + FROM events e + JOIN threads t ON t.thread_id = e.thread_id + JOIN messages m ON m.message_id = e.message_id + WHERE e.event_id > ?` + + if input.Agent != "" { + query += " AND t.assigned_to = ?" + args = append(args, input.Agent) + } + if len(input.Statuses) > 0 { + query += " AND t.status IN (" + placeholders(len(input.Statuses)) + ")" + for _, status := range input.Statuses { + args = append(args, status) + } + } + query += " ORDER BY e.event_id ASC LIMIT 1" + + row := s.db.QueryRowContext(ctx, query, args...) + + var ( + thread Thread + threadCreatedAt string + threadUpdatedAt string + threadLatestMessage sql.NullString + event Event + eventMessageID sql.NullString + eventPayload string + eventCreatedAt string + message Message + messagePayload string + messageCreatedAt string + ) + + err := row.Scan( + &thread.ThreadID, + &thread.RunID, + &thread.TaskID, + &thread.Subject, + &thread.CreatedBy, + &thread.AssignedTo, + &thread.Status, + &thread.Priority, + &threadLatestMessage, + &threadCreatedAt, + &threadUpdatedAt, + &event.EventID, + &event.RunID, + &event.TaskID, + &event.ThreadID, + &event.Source, + &event.EventType, + &eventMessageID, + &event.Summary, + &eventPayload, + &eventCreatedAt, + &message.MessageID, + &message.ThreadID, + &message.FromAgent, + &message.ToAgent, + &message.Kind, + &message.Summary, + &message.Body, + &messagePayload, + &messageCreatedAt, + ) + if errors.Is(err, sql.ErrNoRows) { + return Thread{}, Message{}, Event{}, false, nil + } + if err != nil { + return Thread{}, Message{}, Event{}, false, fmt.Errorf("query watch event after %d: %w", afterEventID, err) + } + + if threadLatestMessage.Valid { + thread.LatestMessageID = threadLatestMessage.String + } + thread.CreatedAt = parseTime(threadCreatedAt) + thread.UpdatedAt = parseTime(threadUpdatedAt) + if eventMessageID.Valid { + event.MessageID = eventMessageID.String + } + event.PayloadJSON = json.RawMessage(eventPayload) + event.CreatedAt = parseTime(eventCreatedAt) + message.PayloadJSON = json.RawMessage(messagePayload) + message.CreatedAt = parseTime(messageCreatedAt) + artifactsByMessageID, err := loadArtifactsForMessageIDs(ctx, s.db, []string{message.MessageID}) + if err != nil { + return Thread{}, Message{}, Event{}, false, err + } + message.Artifacts = artifactsByMessageID[message.MessageID] + return thread, message, event, true, nil +} + +func waitForNextPoll(ctx context.Context, interval time.Duration) (bool, error) { + timer := time.NewTimer(interval) + defer timer.Stop() + + select { + case <-ctx.Done(): + return false, ctx.Err() + case <-timer.C: + return true, nil + } +} + +func isTerminalStatus(status string) bool { + return status == "done" || status == "failed" || status == "cancelled" +} + +func isDeadlineExceeded(ctx context.Context) bool { + return ctx.Err() != nil && errors.Is(ctx.Err(), context.DeadlineExceeded) +} + +func isSQLiteBusyError(err error) bool { + message := strings.ToLower(err.Error()) + return strings.Contains(message, "sqlite_busy") || + strings.Contains(message, "database is locked") || + strings.Contains(message, "database table is locked") +} + +func shouldMarkMessageRead(message Message, agent string) bool { + if agent == "" { + return false + } + return message.ToAgent == agent && message.FromAgent != agent +} + +func defaultID(value, prefix string) string { + if value != "" { + return value + } + return newID(prefix) +} + +func newID(prefix string) string { + return prefix + "_" + strings.ReplaceAll(uuid.NewString(), "-", "") +} + +func defaultString(value, fallback string) string { + if value != "" { + return value + } + return fallback +} + +func normalizeJSON(value string) string { + if strings.TrimSpace(value) == "" { + return "{}" + } + return value +} + +func validateAndNormalizeJSON(fieldName, value string) (string, error) { + normalized := normalizeJSON(value) + if !json.Valid([]byte(normalized)) { + return "", fmt.Errorf("%w: %s must be valid JSON", ErrInvalidInput, fieldName) + } + + var compact bytes.Buffer + if err := json.Compact(&compact, []byte(normalized)); err != nil { + return "", fmt.Errorf("%w: %s must be valid JSON", ErrInvalidInput, fieldName) + } + + return compact.String(), nil +} + +func placeholders(n int) string { + if n <= 0 { + return "" + } + parts := make([]string, n) + for i := range parts { + parts[i] = "?" + } + return strings.Join(parts, ",") +} + +func nowUTC() time.Time { + return time.Now().UTC() +} + +func formatTime(t time.Time) string { + return t.UTC().Format(time.RFC3339Nano) +} + +func parseTime(value string) time.Time { + parsed, err := time.Parse(time.RFC3339Nano, value) + if err != nil { + return time.Time{} + } + return parsed +} diff --git a/packages/coord-core/store/inbox_test.go b/packages/coord-core/store/inbox_test.go new file mode 100644 index 0000000..fe6d9bb --- /dev/null +++ b/packages/coord-core/store/inbox_test.go @@ -0,0 +1,107 @@ +package store + +import ( + "context" + "errors" + "path/filepath" + "testing" + "time" + + dbpkg "ai-workflow-skill/packages/coord-core/db" +) + +func TestClaimThreadReturnsLeaseConflictAfterBusyWrite(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + dbPath := filepath.Join(t.TempDir(), "coord.db") + + sqlDB, err := dbpkg.Open(ctx, dbPath) + if err != nil { + t.Fatalf("open base db: %v", err) + } + defer sqlDB.Close() + + if err := dbpkg.ApplyMigrations(ctx, sqlDB); err != nil { + t.Fatalf("apply migrations: %v", err) + } + + baseStore := NewInboxStore(sqlDB) + thread, _, err := baseStore.Send(ctx, SendInput{ + FromAgent: "leader", + ToAgent: "worker-a", + Subject: "race claim", + Summary: "race claim", + }) + if err != nil { + t.Fatalf("seed thread: %v", err) + } + + lockerDB, err := dbpkg.Open(ctx, dbPath) + if err != nil { + t.Fatalf("open locker db: %v", err) + } + defer lockerDB.Close() + + lockTx, err := lockerDB.BeginTx(ctx, nil) + if err != nil { + t.Fatalf("begin locker tx: %v", err) + } + + now := nowUTC() + if _, err := lockTx.ExecContext( + ctx, + `INSERT INTO leases ( + thread_id, agent_id, lease_token, claimed_at, expires_at, released_at + ) VALUES (?, ?, ?, ?, ?, NULL)`, + thread.ThreadID, + "worker-a", + "lease_locked", + formatTime(now), + formatTime(now.Add(5*time.Minute)), + ); err != nil { + t.Fatalf("seed active lease in tx: %v", err) + } + + if _, err := lockTx.ExecContext( + ctx, + `UPDATE threads + SET status = ?, assigned_to = ?, latest_message_id = ?, updated_at = ? + WHERE thread_id = ?`, + "claimed", + "worker-a", + "msg_locked", + formatTime(now), + thread.ThreadID, + ); err != nil { + t.Fatalf("seed claimed thread in tx: %v", err) + } + + commitDone := make(chan error, 1) + go func() { + time.Sleep(100 * time.Millisecond) + commitDone <- lockTx.Commit() + }() + + claimDB, err := dbpkg.Open(ctx, dbPath) + if err != nil { + t.Fatalf("open claim db: %v", err) + } + defer claimDB.Close() + + claimStore := NewInboxStore(claimDB) + _, err = claimStore.ClaimThread(ctx, ClaimInput{ + ThreadID: thread.ThreadID, + Agent: "worker-b", + LeaseSeconds: 300, + }) + if !errors.Is(err, ErrLeaseConflict) { + t.Fatalf("expected lease conflict after busy retry, got %v", err) + } + + if err := <-commitDone; err != nil { + t.Fatalf("commit locker tx: %v", err) + } +} diff --git a/packages/coord-core/store/orch.go b/packages/coord-core/store/orch.go new file mode 100644 index 0000000..d2dd19b --- /dev/null +++ b/packages/coord-core/store/orch.go @@ -0,0 +1,2579 @@ +package store + +import ( + "bytes" + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "ai-workflow-skill/packages/coord-core/protocol" +) + +var ErrRunNotFound = errors.New("run not found") +var ErrTaskNotFound = errors.New("task not found") + +type OrchStore struct { + db *sql.DB +} + +type Run struct { + RunID string `json:"run_id"` + Goal string `json:"goal"` + Summary string `json:"summary"` + Status string `json:"status"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +type Task struct { + RunID string `json:"run_id"` + TaskID string `json:"task_id"` + Title string `json:"title"` + Summary string `json:"summary"` + Status string `json:"status"` + DefaultTo string `json:"default_to,omitempty"` + Priority string `json:"priority"` + AcceptanceJSON json.RawMessage `json:"acceptance_json"` + LatestAttemptNo int `json:"latest_attempt_no,omitempty"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +type TaskDependency struct { + RunID string `json:"run_id"` + TaskID string `json:"task_id"` + DependsOnTaskID string `json:"depends_on_task_id"` +} + +type TaskAttempt struct { + RunID string `json:"run_id"` + TaskID string `json:"task_id"` + AttemptNo int `json:"attempt_no"` + AssignedTo string `json:"assigned_to"` + ThreadID string `json:"thread_id"` + BaseRef string `json:"base_ref,omitempty"` + BaseCommit string `json:"base_commit,omitempty"` + BranchName string `json:"branch_name,omitempty"` + WorktreePath string `json:"worktree_path,omitempty"` + WorkspaceStatus string `json:"workspace_status,omitempty"` + ResultCommit string `json:"result_commit,omitempty"` + Status string `json:"status"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +type RunOverview struct { + Run Run `json:"run"` + TaskCounts map[string]int `json:"task_counts"` + Tasks []Task `json:"tasks,omitempty"` +} + +type CreateRunInput struct { + RunID string + Goal string + Summary string +} + +type AddTaskInput struct { + RunID string + TaskID string + Title string + Summary string + DefaultTo string + AcceptanceJSON string + Priority string +} + +type AddDependencyInput struct { + RunID string + TaskID string + DependsOnTaskID string +} + +type ListReadyInput struct { + RunID string + Limit int +} + +type DispatchInput struct { + RunID string + TaskID string + ToAgent string + Body string + BaseRef string + PrepareWorkspace DispatchWorkspacePreparer +} + +type DispatchResult struct { + Task Task `json:"task"` + Attempt TaskAttempt `json:"attempt"` + Thread Thread `json:"thread"` + Message Message `json:"message"` +} + +type ReconcileResult struct { + Run Run `json:"run"` + TaskCounts map[string]int `json:"task_counts"` + UpdatedTasks []Task `json:"updated_tasks"` +} + +type RunEvent struct { + EventID int64 `json:"event_id"` + Type string `json:"type"` + RunID string `json:"run_id"` + TaskID string `json:"task_id"` + ThreadID string `json:"thread_id,omitempty"` + Summary string `json:"summary"` + Payload json.RawMessage `json:"payload"` + CreatedAt time.Time `json:"created_at"` +} + +type WaitInput struct { + RunID string + EventTypes []string + AfterEventID int64 + Timeout time.Duration +} + +type WaitResult struct { + Woke bool `json:"woke"` + NextEventID int64 `json:"next_event_id"` + Events []RunEvent `json:"events,omitempty"` +} + +type DispatchWorkspace struct { + BaseRef string `json:"base_ref,omitempty"` + BaseCommit string `json:"base_commit,omitempty"` + BranchName string `json:"branch_name,omitempty"` + WorktreePath string `json:"worktree_path,omitempty"` + WorkspaceStatus string `json:"workspace_status,omitempty"` +} + +type DispatchWorkspacePreparer func(task Task, attemptNo int) (DispatchWorkspace, func(), error) + +type BlockedTask struct { + Task Task `json:"task"` + Attempt TaskAttempt `json:"attempt"` + Question Message `json:"question"` +} + +type AnswerInput struct { + RunID string + TaskID string + Body string + PayloadJSON string +} + +type AnswerResult struct { + Task Task `json:"task"` + Attempt TaskAttempt `json:"attempt"` + Thread Thread `json:"thread"` + Message Message `json:"message"` +} + +type RetryInput struct { + RunID string + TaskID string + ToAgent string + Body string + PrepareWorkspace DispatchWorkspacePreparer +} + +type RetryResult struct { + Task Task `json:"task"` + Attempt TaskAttempt `json:"attempt"` + Thread Thread `json:"thread"` + Message Message `json:"message"` + PreviousAttempt TaskAttempt `json:"previous_attempt"` +} + +type ReassignInput struct { + RunID string + TaskID string + ToAgent string + Reason string + PrepareWorkspace DispatchWorkspacePreparer +} + +type ReassignResult struct { + Task Task `json:"task"` + Attempt TaskAttempt `json:"attempt"` + Thread Thread `json:"thread"` + Message Message `json:"message"` + PreviousAttempt TaskAttempt `json:"previous_attempt"` +} + +type CancelControlInput struct { + RunID string + TaskID string + Reason string +} + +type CancelResult struct { + Run Run `json:"run"` + CancelledTasks []Task `json:"cancelled_tasks"` +} + +type CleanupInput struct { + RunID string + TaskID string + AttemptNo int + AllCompleted bool + Force bool +} + +type CleanupCandidate struct { + Attempt TaskAttempt `json:"attempt"` +} + +type CleanupRecord struct { + Attempt TaskAttempt `json:"attempt"` +} + +func NewOrchStore(db *sql.DB) *OrchStore { + return &OrchStore{db: db} +} + +func (s *OrchStore) CreateRun(ctx context.Context, input CreateRunInput) (Run, error) { + runID := strings.TrimSpace(input.RunID) + goal := strings.TrimSpace(input.Goal) + if runID == "" { + return Run{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if goal == "" { + return Run{}, fmt.Errorf("%w: goal is required", ErrInvalidInput) + } + + now := nowUTC() + run := Run{ + RunID: runID, + Goal: goal, + Summary: strings.TrimSpace(input.Summary), + Status: "active", + CreatedAt: now, + UpdatedAt: now, + } + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return Run{}, fmt.Errorf("begin create run transaction: %w", err) + } + defer tx.Rollback() + + _, err = tx.ExecContext( + ctx, + `INSERT INTO runs (run_id, goal, summary, status, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?)`, + run.RunID, + run.Goal, + run.Summary, + run.Status, + formatTime(run.CreatedAt), + formatTime(run.UpdatedAt), + ) + if err != nil { + if isUniqueConstraintError(err) { + return Run{}, fmt.Errorf("%w: run %s already exists", ErrInvalidState, run.RunID) + } + return Run{}, fmt.Errorf("insert run: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: run.RunID, + TaskID: "", + Source: "orch", + EventType: "run_initialized", + Summary: defaultString(run.Summary, run.Goal), + PayloadJSON: marshalJSON(map[string]any{"goal": run.Goal, "summary": run.Summary}), + CreatedAt: now, + }); err != nil { + return Run{}, err + } + + if err := tx.Commit(); err != nil { + return Run{}, fmt.Errorf("commit create run transaction: %w", err) + } + + return run, nil +} + +func (s *OrchStore) GetRun(ctx context.Context, runID string) (Run, error) { + return selectRun(ctx, s.db, runID) +} + +func (s *OrchStore) AddTask(ctx context.Context, input AddTaskInput) (Task, error) { + if strings.TrimSpace(input.RunID) == "" { + return Task{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.TaskID) == "" { + return Task{}, fmt.Errorf("%w: task id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.Title) == "" { + return Task{}, fmt.Errorf("%w: title is required", ErrInvalidInput) + } + + priority, err := normalizePriority(input.Priority) + if err != nil { + return Task{}, err + } + acceptanceJSON, err := validateAndNormalizeJSONDefault("acceptance-json", input.AcceptanceJSON, "[]") + if err != nil { + return Task{}, err + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return Task{}, fmt.Errorf("begin add task transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, input.RunID); err != nil { + return Task{}, err + } + + _, err = tx.ExecContext( + ctx, + `INSERT INTO tasks ( + run_id, task_id, title, summary, status, default_to, priority, + acceptance_json, latest_attempt_no, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, NULL, ?, ?)`, + input.RunID, + input.TaskID, + input.Title, + input.Summary, + "planned", + nullIfEmpty(input.DefaultTo), + priority, + acceptanceJSON, + formatTime(now), + formatTime(now), + ) + if err != nil { + if isUniqueConstraintError(err) { + return Task{}, fmt.Errorf("%w: task %s already exists in run %s", ErrInvalidState, input.TaskID, input.RunID) + } + return Task{}, fmt.Errorf("insert task: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: input.RunID, + TaskID: input.TaskID, + Source: "orch", + EventType: "task_added", + Summary: input.Title, + PayloadJSON: marshalJSON(map[string]any{"title": input.Title, "priority": priority}), + CreatedAt: now, + }); err != nil { + return Task{}, err + } + + if err := refreshReadyStates(ctx, tx, input.RunID, now); err != nil { + return Task{}, err + } + if err := updateRunAggregateStatus(ctx, tx, input.RunID, now); err != nil { + return Task{}, err + } + + task, err := selectTask(ctx, tx, input.RunID, input.TaskID) + if err != nil { + return Task{}, err + } + + if err := tx.Commit(); err != nil { + return Task{}, fmt.Errorf("commit add task transaction: %w", err) + } + + return task, nil +} + +func (s *OrchStore) AddDependency(ctx context.Context, input AddDependencyInput) (TaskDependency, error) { + if strings.TrimSpace(input.RunID) == "" { + return TaskDependency{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.TaskID) == "" { + return TaskDependency{}, fmt.Errorf("%w: task id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.DependsOnTaskID) == "" { + return TaskDependency{}, fmt.Errorf("%w: depends-on task id is required", ErrInvalidInput) + } + if input.TaskID == input.DependsOnTaskID { + return TaskDependency{}, fmt.Errorf("%w: task cannot depend on itself", ErrInvalidInput) + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return TaskDependency{}, fmt.Errorf("begin add dependency transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, input.RunID); err != nil { + return TaskDependency{}, err + } + if _, err := selectTask(ctx, tx, input.RunID, input.TaskID); err != nil { + return TaskDependency{}, err + } + if _, err := selectTask(ctx, tx, input.RunID, input.DependsOnTaskID); err != nil { + return TaskDependency{}, err + } + + _, err = tx.ExecContext( + ctx, + `INSERT INTO task_dependencies (run_id, task_id, depends_on_task_id) + VALUES (?, ?, ?)`, + input.RunID, + input.TaskID, + input.DependsOnTaskID, + ) + if err != nil { + if isUniqueConstraintError(err) { + return TaskDependency{}, fmt.Errorf("%w: dependency %s -> %s already exists", ErrInvalidState, input.TaskID, input.DependsOnTaskID) + } + return TaskDependency{}, fmt.Errorf("insert dependency: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: input.RunID, + TaskID: input.TaskID, + Source: "orch", + EventType: "task_dependency_added", + Summary: fmt.Sprintf("%s depends on %s", input.TaskID, input.DependsOnTaskID), + PayloadJSON: marshalJSON(map[string]any{"depends_on_task_id": input.DependsOnTaskID}), + CreatedAt: now, + }); err != nil { + return TaskDependency{}, err + } + + if err := refreshReadyStates(ctx, tx, input.RunID, now); err != nil { + return TaskDependency{}, err + } + if err := updateRunAggregateStatus(ctx, tx, input.RunID, now); err != nil { + return TaskDependency{}, err + } + + if err := tx.Commit(); err != nil { + return TaskDependency{}, fmt.Errorf("commit add dependency transaction: %w", err) + } + + return TaskDependency{ + RunID: input.RunID, + TaskID: input.TaskID, + DependsOnTaskID: input.DependsOnTaskID, + }, nil +} + +func (s *OrchStore) ListReadyTasks(ctx context.Context, input ListReadyInput) ([]Task, error) { + if strings.TrimSpace(input.RunID) == "" { + return nil, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + limit := input.Limit + if limit <= 0 { + limit = 20 + } + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return nil, fmt.Errorf("begin list ready transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, input.RunID); err != nil { + return nil, err + } + if err := refreshReadyStates(ctx, tx, input.RunID, nowUTC()); err != nil { + return nil, err + } + if err := updateRunAggregateStatus(ctx, tx, input.RunID, nowUTC()); err != nil { + return nil, err + } + + rows, err := tx.QueryContext( + ctx, + `SELECT + run_id, task_id, title, summary, status, default_to, priority, + acceptance_json, latest_attempt_no, created_at, updated_at + FROM tasks + WHERE run_id = ? AND status = 'ready' + ORDER BY CASE priority + WHEN 'high' THEN 0 + WHEN 'normal' THEN 1 + ELSE 2 + END, created_at ASC + LIMIT ?`, + input.RunID, + limit, + ) + if err != nil { + return nil, fmt.Errorf("query ready tasks: %w", err) + } + defer rows.Close() + + var tasks []Task + for rows.Next() { + task, err := scanTask(rows) + if err != nil { + return nil, err + } + tasks = append(tasks, task) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate ready tasks: %w", err) + } + + if err := tx.Commit(); err != nil { + return nil, fmt.Errorf("commit list ready transaction: %w", err) + } + + return tasks, nil +} + +func (s *OrchStore) DispatchTask(ctx context.Context, input DispatchInput) (DispatchResult, error) { + if strings.TrimSpace(input.RunID) == "" { + return DispatchResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.TaskID) == "" { + return DispatchResult{}, fmt.Errorf("%w: task id is required", ErrInvalidInput) + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return DispatchResult{}, fmt.Errorf("begin dispatch transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, input.RunID); err != nil { + return DispatchResult{}, err + } + if err := refreshReadyStates(ctx, tx, input.RunID, now); err != nil { + return DispatchResult{}, err + } + + task, err := selectTask(ctx, tx, input.RunID, input.TaskID) + if err != nil { + return DispatchResult{}, err + } + if task.Status != "ready" { + return DispatchResult{}, fmt.Errorf("%w: task %s is not ready for dispatch", ErrInvalidState, task.TaskID) + } + + result, finalizeWorkspace, err := s.dispatchTaskTx(ctx, tx, task, strings.TrimSpace(input.ToAgent), input.Body, strings.TrimSpace(input.BaseRef), input.PrepareWorkspace, now) + if err != nil { + return DispatchResult{}, err + } + workspaceCommitted := false + defer func() { + finalizeWorkspace(workspaceCommitted) + }() + + if err := updateRunAggregateStatus(ctx, tx, task.RunID, now); err != nil { + return DispatchResult{}, err + } + + if err := tx.Commit(); err != nil { + return DispatchResult{}, fmt.Errorf("commit dispatch transaction: %w", err) + } + workspaceCommitted = true + return result, nil +} + +func (s *OrchStore) GetTaskWithLatestAttempt(ctx context.Context, runID, taskID string) (Task, *TaskAttempt, error) { + task, err := selectTask(ctx, s.db, runID, taskID) + if err != nil { + return Task{}, nil, err + } + if task.LatestAttemptNo == 0 { + return task, nil, nil + } + + attempt, err := selectAttempt(ctx, s.db, runID, taskID, task.LatestAttemptNo) + if err != nil { + return Task{}, nil, err + } + return task, &attempt, nil +} + +func (s *OrchStore) RetryTask(ctx context.Context, input RetryInput) (RetryResult, error) { + if strings.TrimSpace(input.RunID) == "" { + return RetryResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.TaskID) == "" { + return RetryResult{}, fmt.Errorf("%w: task id is required", ErrInvalidInput) + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return RetryResult{}, fmt.Errorf("begin retry transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, input.RunID); err != nil { + return RetryResult{}, err + } + + task, err := selectTask(ctx, tx, input.RunID, input.TaskID) + if err != nil { + return RetryResult{}, err + } + if task.Status != "failed" { + return RetryResult{}, fmt.Errorf("%w: task %s is not failed", ErrInvalidState, task.TaskID) + } + if task.LatestAttemptNo == 0 { + return RetryResult{}, fmt.Errorf("%w: task %s has no attempt to retry", ErrInvalidState, task.TaskID) + } + + previousAttempt, err := selectAttempt(ctx, tx, task.RunID, task.TaskID, task.LatestAttemptNo) + if err != nil { + return RetryResult{}, err + } + + result, finalizeWorkspace, err := s.dispatchTaskTx( + ctx, + tx, + task, + strings.TrimSpace(input.ToAgent), + input.Body, + defaultString(previousAttempt.BaseRef, previousAttempt.BaseCommit), + input.PrepareWorkspace, + now, + ) + if err != nil { + return RetryResult{}, err + } + workspaceCommitted := false + defer func() { + finalizeWorkspace(workspaceCommitted) + }() + + _, err = tx.ExecContext( + ctx, + `UPDATE task_attempts + SET workspace_status = CASE + WHEN workspace_status = 'cleaned' THEN workspace_status + ELSE ? + END, + updated_at = ? + WHERE run_id = ? AND task_id = ? AND attempt_no = ?`, + "abandoned", + formatTime(now), + previousAttempt.RunID, + previousAttempt.TaskID, + previousAttempt.AttemptNo, + ) + if err != nil { + return RetryResult{}, fmt.Errorf("mark previous retry attempt abandoned: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: task.RunID, + TaskID: task.TaskID, + ThreadID: result.Thread.ThreadID, + Source: "orch", + EventType: "task_retried", + MessageID: result.Message.MessageID, + Summary: result.Message.Summary, + PayloadJSON: marshalJSON(map[string]any{ + "previous_attempt_no": previousAttempt.AttemptNo, + "previous_thread_id": previousAttempt.ThreadID, + "attempt_no": result.Attempt.AttemptNo, + "thread_id": result.Attempt.ThreadID, + }), + CreatedAt: now, + }); err != nil { + return RetryResult{}, err + } + + if err := updateRunAggregateStatus(ctx, tx, task.RunID, now); err != nil { + return RetryResult{}, err + } + + if err := tx.Commit(); err != nil { + return RetryResult{}, fmt.Errorf("commit retry transaction: %w", err) + } + workspaceCommitted = true + + return RetryResult{ + Task: result.Task, + Attempt: result.Attempt, + Thread: result.Thread, + Message: result.Message, + PreviousAttempt: previousAttempt, + }, nil +} + +func (s *OrchStore) ReassignTask(ctx context.Context, input ReassignInput) (ReassignResult, error) { + if strings.TrimSpace(input.RunID) == "" { + return ReassignResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.TaskID) == "" { + return ReassignResult{}, fmt.Errorf("%w: task id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.ToAgent) == "" { + return ReassignResult{}, fmt.Errorf("%w: destination agent is required", ErrInvalidInput) + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return ReassignResult{}, fmt.Errorf("begin reassign transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, input.RunID); err != nil { + return ReassignResult{}, err + } + + task, err := selectTask(ctx, tx, input.RunID, input.TaskID) + if err != nil { + return ReassignResult{}, err + } + if task.Status != "blocked" && task.Status != "failed" { + return ReassignResult{}, fmt.Errorf("%w: task %s is not blocked or failed", ErrInvalidState, task.TaskID) + } + if task.LatestAttemptNo == 0 { + return ReassignResult{}, fmt.Errorf("%w: task %s has no attempt to reassign", ErrInvalidState, task.TaskID) + } + + previousAttempt, err := selectAttempt(ctx, tx, task.RunID, task.TaskID, task.LatestAttemptNo) + if err != nil { + return ReassignResult{}, err + } + + if task.Status == "blocked" && previousAttempt.ThreadID != "" { + thread, err := selectThread(ctx, tx, previousAttempt.ThreadID) + if err != nil && !errors.Is(err, ErrThreadNotFound) { + return ReassignResult{}, err + } + if err == nil && !isTerminalStatus(thread.Status) { + if err := cancelThreadTx(ctx, tx, thread, defaultString(input.Reason, "task reassigned"), now); err != nil { + return ReassignResult{}, err + } + } + _, err = tx.ExecContext( + ctx, + `UPDATE task_attempts + SET status = ?, workspace_status = CASE + WHEN workspace_status = 'cleaned' THEN workspace_status + ELSE ? + END, + updated_at = ? + WHERE run_id = ? AND task_id = ? AND attempt_no = ?`, + "cancelled", + "abandoned", + formatTime(now), + previousAttempt.RunID, + previousAttempt.TaskID, + previousAttempt.AttemptNo, + ) + if err != nil { + return ReassignResult{}, fmt.Errorf("mark previous blocked attempt abandoned: %w", err) + } + } else { + _, err = tx.ExecContext( + ctx, + `UPDATE task_attempts + SET workspace_status = CASE + WHEN workspace_status = 'cleaned' THEN workspace_status + ELSE ? + END, + updated_at = ? + WHERE run_id = ? AND task_id = ? AND attempt_no = ?`, + "abandoned", + formatTime(now), + previousAttempt.RunID, + previousAttempt.TaskID, + previousAttempt.AttemptNo, + ) + if err != nil { + return ReassignResult{}, fmt.Errorf("mark previous attempt abandoned: %w", err) + } + } + + result, finalizeWorkspace, err := s.dispatchTaskTx( + ctx, + tx, + task, + strings.TrimSpace(input.ToAgent), + input.Reason, + defaultString(previousAttempt.BaseRef, previousAttempt.BaseCommit), + input.PrepareWorkspace, + now, + ) + if err != nil { + return ReassignResult{}, err + } + workspaceCommitted := false + defer func() { + finalizeWorkspace(workspaceCommitted) + }() + + if err := insertEvent(ctx, tx, eventInput{ + RunID: task.RunID, + TaskID: task.TaskID, + ThreadID: result.Thread.ThreadID, + Source: "orch", + EventType: "task_reassigned", + MessageID: result.Message.MessageID, + Summary: defaultString(input.Reason, result.Message.Summary), + PayloadJSON: marshalJSON(map[string]any{ + "previous_attempt_no": previousAttempt.AttemptNo, + "previous_thread_id": previousAttempt.ThreadID, + "from_agent": previousAttempt.AssignedTo, + "to_agent": result.Attempt.AssignedTo, + "attempt_no": result.Attempt.AttemptNo, + "thread_id": result.Attempt.ThreadID, + }), + CreatedAt: now, + }); err != nil { + return ReassignResult{}, err + } + + if err := updateRunAggregateStatus(ctx, tx, task.RunID, now); err != nil { + return ReassignResult{}, err + } + + if err := tx.Commit(); err != nil { + return ReassignResult{}, fmt.Errorf("commit reassign transaction: %w", err) + } + workspaceCommitted = true + + return ReassignResult{ + Task: result.Task, + Attempt: result.Attempt, + Thread: result.Thread, + Message: result.Message, + PreviousAttempt: previousAttempt, + }, nil +} + +func (s *OrchStore) Cancel(ctx context.Context, input CancelControlInput) (CancelResult, error) { + if strings.TrimSpace(input.RunID) == "" { + return CancelResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return CancelResult{}, fmt.Errorf("begin cancel transaction: %w", err) + } + defer tx.Rollback() + + run, err := selectRun(ctx, tx, input.RunID) + if err != nil { + return CancelResult{}, err + } + + var tasks []Task + if strings.TrimSpace(input.TaskID) != "" { + task, err := selectTask(ctx, tx, input.RunID, input.TaskID) + if err != nil { + return CancelResult{}, err + } + tasks = append(tasks, task) + } else { + tasks, err = listTasksForRun(ctx, tx, input.RunID) + if err != nil { + return CancelResult{}, err + } + } + + cancelledTasks := make([]Task, 0, len(tasks)) + for _, task := range tasks { + if task.Status == "cancelled" { + if strings.TrimSpace(input.TaskID) != "" { + return CancelResult{}, fmt.Errorf("%w: task %s is already cancelled", ErrInvalidState, task.TaskID) + } + continue + } + + cancelledTask, err := cancelTaskTx(ctx, tx, task, defaultString(input.Reason, "task cancelled"), now) + if err != nil { + return CancelResult{}, err + } + cancelledTasks = append(cancelledTasks, cancelledTask) + } + + if len(cancelledTasks) == 0 && len(tasks) == 0 { + _, err = tx.ExecContext( + ctx, + `UPDATE runs SET status = ?, updated_at = ? WHERE run_id = ?`, + "cancelled", + formatTime(now), + run.RunID, + ) + if err != nil { + return CancelResult{}, fmt.Errorf("cancel empty run: %w", err) + } + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: run.RunID, + Source: "orch", + EventType: "run_cancelled", + Summary: defaultString(input.Reason, "run cancelled"), + PayloadJSON: marshalJSON(map[string]any{ + "task_id": input.TaskID, + "reason": input.Reason, + }), + CreatedAt: now, + }); err != nil { + return CancelResult{}, err + } + + if err := updateRunAggregateStatus(ctx, tx, run.RunID, now); err != nil { + return CancelResult{}, err + } + + run, err = selectRun(ctx, tx, run.RunID) + if err != nil { + return CancelResult{}, err + } + + if err := tx.Commit(); err != nil { + return CancelResult{}, fmt.Errorf("commit cancel transaction: %w", err) + } + + return CancelResult{ + Run: run, + CancelledTasks: cancelledTasks, + }, nil +} + +func (s *OrchStore) ListCleanupCandidates(ctx context.Context, input CleanupInput) ([]CleanupCandidate, error) { + if strings.TrimSpace(input.RunID) == "" { + return nil, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if input.AttemptNo > 0 && strings.TrimSpace(input.TaskID) == "" { + return nil, fmt.Errorf("%w: task id is required when attempt is specified", ErrInvalidInput) + } + if !input.AllCompleted && strings.TrimSpace(input.TaskID) == "" && input.AttemptNo == 0 { + return nil, fmt.Errorf("%w: specify --task, --attempt, or --all-completed", ErrInvalidInput) + } + + if _, err := s.GetRun(ctx, input.RunID); err != nil { + return nil, err + } + + conditions := []string{"run_id = ?", "worktree_path <> ''", "workspace_status <> 'cleaned'"} + args := []any{input.RunID} + if strings.TrimSpace(input.TaskID) != "" { + conditions = append(conditions, "task_id = ?") + args = append(args, strings.TrimSpace(input.TaskID)) + } + if input.AttemptNo > 0 { + conditions = append(conditions, "attempt_no = ?") + args = append(args, input.AttemptNo) + } + if !input.Force { + conditions = append(conditions, "workspace_status IN (?, ?)") + args = append(args, "completed", "abandoned") + } + + query := `SELECT + run_id, task_id, attempt_no, assigned_to, thread_id, base_ref, base_commit, + branch_name, worktree_path, workspace_status, result_commit, status, + created_at, updated_at + FROM task_attempts + WHERE ` + strings.Join(conditions, " AND ") + ` + ORDER BY run_id, task_id, attempt_no ASC` + + rows, err := s.db.QueryContext(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("query cleanup candidates: %w", err) + } + defer rows.Close() + + var candidates []CleanupCandidate + for rows.Next() { + attempt, err := scanAttempt(rows) + if err != nil { + return nil, err + } + candidates = append(candidates, CleanupCandidate{Attempt: attempt}) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate cleanup candidates: %w", err) + } + + if len(candidates) == 0 { + return nil, protocol.NoMatchingWork("no cleanup candidates matched the requested filters") + } + return candidates, nil +} + +func (s *OrchStore) MarkAttemptsCleaned(ctx context.Context, records []CleanupRecord) ([]TaskAttempt, error) { + if len(records) == 0 { + return nil, nil + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return nil, fmt.Errorf("begin cleanup commit transaction: %w", err) + } + defer tx.Rollback() + + cleaned := make([]TaskAttempt, 0, len(records)) + for _, record := range records { + attempt := record.Attempt + _, err := tx.ExecContext( + ctx, + `UPDATE task_attempts + SET workspace_status = ?, updated_at = ? + WHERE run_id = ? AND task_id = ? AND attempt_no = ?`, + "cleaned", + formatTime(now), + attempt.RunID, + attempt.TaskID, + attempt.AttemptNo, + ) + if err != nil { + return nil, fmt.Errorf("mark attempt cleaned: %w", err) + } + if err := insertEvent(ctx, tx, eventInput{ + RunID: attempt.RunID, + TaskID: attempt.TaskID, + ThreadID: attempt.ThreadID, + Source: "orch", + EventType: "workspace_cleaned", + Summary: fmt.Sprintf("cleaned workspace for %s/%s attempt %d", attempt.RunID, attempt.TaskID, attempt.AttemptNo), + PayloadJSON: marshalJSON(map[string]any{ + "attempt_no": attempt.AttemptNo, + "worktree_path": attempt.WorktreePath, + }), + CreatedAt: now, + }); err != nil { + return nil, err + } + attempt.WorkspaceStatus = "cleaned" + attempt.UpdatedAt = now + cleaned = append(cleaned, attempt) + } + + if err := tx.Commit(); err != nil { + return nil, fmt.Errorf("commit cleanup transaction: %w", err) + } + + return cleaned, nil +} + +func (s *OrchStore) dispatchTaskTx( + ctx context.Context, + tx *sql.Tx, + task Task, + toAgent string, + body string, + baseRef string, + prepareWorkspace DispatchWorkspacePreparer, + now time.Time, +) (DispatchResult, func(bool), error) { + assignedTo := defaultString(strings.TrimSpace(toAgent), task.DefaultTo) + if assignedTo == "" { + return DispatchResult{}, nil, fmt.Errorf("%w: dispatch target agent is required", ErrInvalidInput) + } + + attemptNo := task.LatestAttemptNo + 1 + workspace := DispatchWorkspace{ + BaseRef: strings.TrimSpace(baseRef), + } + finalizeWorkspace := func(success bool) {} + if prepareWorkspace != nil { + cleanupWorkspace := func() {} + var err error + workspace, cleanupWorkspace, err = prepareWorkspace(task, attemptNo) + if err != nil { + return DispatchResult{}, nil, err + } + if cleanupWorkspace == nil { + cleanupWorkspace = func() {} + } + finalizeWorkspace = func(success bool) { + if !success { + cleanupWorkspace() + } + } + } + + threadID := newID("thr") + messageID := newID("msg") + payloadJSON := buildDispatchPayload(task, attemptNo, workspace) + thread := Thread{ + ThreadID: threadID, + RunID: task.RunID, + TaskID: task.TaskID, + Subject: task.Title, + CreatedBy: "orch", + AssignedTo: assignedTo, + Status: "pending", + Priority: task.Priority, + LatestMessageID: messageID, + CreatedAt: now, + UpdatedAt: now, + } + + _, err := tx.ExecContext( + ctx, + `INSERT INTO threads ( + thread_id, run_id, task_id, subject, created_by, assigned_to, status, + priority, latest_message_id, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + thread.ThreadID, + thread.RunID, + thread.TaskID, + thread.Subject, + thread.CreatedBy, + thread.AssignedTo, + thread.Status, + thread.Priority, + thread.LatestMessageID, + formatTime(thread.CreatedAt), + formatTime(thread.UpdatedAt), + ) + if err != nil { + return DispatchResult{}, finalizeWorkspace, fmt.Errorf("insert dispatch thread: %w", err) + } + + message := Message{ + MessageID: messageID, + ThreadID: threadID, + FromAgent: "orch", + ToAgent: assignedTo, + Kind: "task", + Summary: defaultString(task.Summary, task.Title), + Body: body, + PayloadJSON: json.RawMessage(payloadJSON), + CreatedAt: now, + } + if err := insertMessage(ctx, tx, message); err != nil { + return DispatchResult{}, finalizeWorkspace, err + } + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_created", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: payloadJSON, + CreatedAt: now, + }); err != nil { + return DispatchResult{}, finalizeWorkspace, err + } + + attempt := TaskAttempt{ + RunID: task.RunID, + TaskID: task.TaskID, + AttemptNo: attemptNo, + AssignedTo: assignedTo, + ThreadID: threadID, + BaseRef: workspace.BaseRef, + BaseCommit: workspace.BaseCommit, + BranchName: workspace.BranchName, + WorktreePath: workspace.WorktreePath, + WorkspaceStatus: workspace.WorkspaceStatus, + Status: "dispatched", + CreatedAt: now, + UpdatedAt: now, + } + _, err = tx.ExecContext( + ctx, + `INSERT INTO task_attempts ( + run_id, task_id, attempt_no, assigned_to, thread_id, base_ref, base_commit, + branch_name, worktree_path, workspace_status, result_commit, status, + created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + attempt.RunID, + attempt.TaskID, + attempt.AttemptNo, + attempt.AssignedTo, + attempt.ThreadID, + nullIfEmpty(attempt.BaseRef), + nullIfEmpty(attempt.BaseCommit), + nullIfEmpty(attempt.BranchName), + nullIfEmpty(attempt.WorktreePath), + nullIfEmpty(attempt.WorkspaceStatus), + nil, + attempt.Status, + formatTime(attempt.CreatedAt), + formatTime(attempt.UpdatedAt), + ) + if err != nil { + return DispatchResult{}, finalizeWorkspace, fmt.Errorf("insert task attempt: %w", err) + } + + _, err = tx.ExecContext( + ctx, + `UPDATE tasks + SET status = ?, latest_attempt_no = ?, updated_at = ? + WHERE run_id = ? AND task_id = ?`, + "dispatched", + attempt.AttemptNo, + formatTime(now), + task.RunID, + task.TaskID, + ) + if err != nil { + return DispatchResult{}, finalizeWorkspace, fmt.Errorf("update task dispatch status: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: task.RunID, + TaskID: task.TaskID, + ThreadID: thread.ThreadID, + Source: "orch", + EventType: "task_dispatched", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: payloadJSON, + CreatedAt: now, + }); err != nil { + return DispatchResult{}, finalizeWorkspace, err + } + + task.Status = "dispatched" + task.LatestAttemptNo = attempt.AttemptNo + task.UpdatedAt = now + + return DispatchResult{ + Task: task, + Attempt: attempt, + Thread: thread, + Message: message, + }, finalizeWorkspace, nil +} + +func cancelTaskTx(ctx context.Context, tx *sql.Tx, task Task, reason string, now time.Time) (Task, error) { + if task.LatestAttemptNo > 0 { + attempt, err := selectAttempt(ctx, tx, task.RunID, task.TaskID, task.LatestAttemptNo) + if err != nil { + return Task{}, err + } + if attempt.ThreadID != "" { + thread, err := selectThread(ctx, tx, attempt.ThreadID) + if err != nil && !errors.Is(err, ErrThreadNotFound) { + return Task{}, err + } + if err == nil && !isTerminalStatus(thread.Status) { + if err := cancelThreadTx(ctx, tx, thread, reason, now); err != nil { + return Task{}, err + } + } + } + + attemptStatus := attempt.Status + if attemptStatus != "done" && attemptStatus != "failed" && attemptStatus != "cancelled" { + attemptStatus = "cancelled" + } + workspaceStatus := attempt.WorkspaceStatus + if workspaceStatus != "cleaned" { + workspaceStatus = "abandoned" + } + _, err = tx.ExecContext( + ctx, + `UPDATE task_attempts + SET status = ?, workspace_status = ?, updated_at = ? + WHERE run_id = ? AND task_id = ? AND attempt_no = ?`, + attemptStatus, + nullIfEmpty(workspaceStatus), + formatTime(now), + attempt.RunID, + attempt.TaskID, + attempt.AttemptNo, + ) + if err != nil { + return Task{}, fmt.Errorf("update cancelled attempt: %w", err) + } + } + + _, err := tx.ExecContext( + ctx, + `UPDATE tasks + SET status = ?, updated_at = ? + WHERE run_id = ? AND task_id = ?`, + "cancelled", + formatTime(now), + task.RunID, + task.TaskID, + ) + if err != nil { + return Task{}, fmt.Errorf("update cancelled task: %w", err) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: task.RunID, + TaskID: task.TaskID, + Source: "orch", + EventType: "task_cancelled", + Summary: defaultString(reason, "task cancelled"), + PayloadJSON: marshalJSON(map[string]any{"reason": reason}), + CreatedAt: now, + }); err != nil { + return Task{}, err + } + + task.Status = "cancelled" + task.UpdatedAt = now + return task, nil +} + +func cancelThreadTx(ctx context.Context, tx *sql.Tx, thread Thread, reason string, now time.Time) error { + messageID := newID("msg") + summary := defaultString(reason, "thread cancelled") + message := Message{ + MessageID: messageID, + ThreadID: thread.ThreadID, + FromAgent: "orch", + ToAgent: thread.AssignedTo, + Kind: "control", + Summary: summary, + Body: reason, + PayloadJSON: json.RawMessage(`{}`), + CreatedAt: now, + } + + if err := insertMessage(ctx, tx, message); err != nil { + return err + } + if err := updateThreadState(ctx, tx, thread.ThreadID, "cancelled", thread.AssignedTo, message.MessageID, now); err != nil { + return err + } + if _, err := tx.ExecContext( + ctx, + `UPDATE leases + SET released_at = ? + WHERE thread_id = ? + AND released_at IS NULL`, + formatTime(now), + thread.ThreadID, + ); err != nil { + return fmt.Errorf("release lease on orch cancel: %w", err) + } + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_cancelled", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: string(message.PayloadJSON), + CreatedAt: now, + }); err != nil { + return err + } + return nil +} + +func (s *OrchStore) ReconcileRun(ctx context.Context, runID string) (ReconcileResult, error) { + if strings.TrimSpace(runID) == "" { + return ReconcileResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return ReconcileResult{}, fmt.Errorf("begin reconcile transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, runID); err != nil { + return ReconcileResult{}, err + } + + rows, err := tx.QueryContext( + ctx, + `SELECT + t.task_id, + t.status, + a.attempt_no, + a.status, + a.thread_id, + th.status + FROM tasks t + JOIN task_attempts a + ON a.run_id = t.run_id + AND a.task_id = t.task_id + AND a.attempt_no = t.latest_attempt_no + JOIN threads th ON th.thread_id = a.thread_id + WHERE t.run_id = ? + AND t.latest_attempt_no IS NOT NULL`, + runID, + ) + if err != nil { + return ReconcileResult{}, fmt.Errorf("query reconcile candidates: %w", err) + } + defer rows.Close() + + var updatedIDs []string + for rows.Next() { + var ( + taskID string + taskStatus string + attemptNo int + attemptStatus string + threadID string + threadStatus string + ) + if err := rows.Scan(&taskID, &taskStatus, &attemptNo, &attemptStatus, &threadID, &threadStatus); err != nil { + return ReconcileResult{}, fmt.Errorf("scan reconcile candidate: %w", err) + } + + nextStatus := reconcileTaskStatus(threadStatus) + if nextStatus == "" { + continue + } + if nextStatus == taskStatus && nextStatus == attemptStatus { + continue + } + + _, err = tx.ExecContext( + ctx, + `UPDATE tasks + SET status = ?, updated_at = ? + WHERE run_id = ? AND task_id = ?`, + nextStatus, + formatTime(now), + runID, + taskID, + ) + if err != nil { + return ReconcileResult{}, fmt.Errorf("update reconciled task status: %w", err) + } + _, err = tx.ExecContext( + ctx, + `UPDATE task_attempts + SET status = ?, workspace_status = COALESCE(?, workspace_status), updated_at = ? + WHERE run_id = ? AND task_id = ? AND attempt_no = ?`, + nextStatus, + nullIfEmpty(reconcileWorkspaceStatus(threadStatus)), + formatTime(now), + runID, + taskID, + attemptNo, + ) + if err != nil { + return ReconcileResult{}, fmt.Errorf("update reconciled attempt status: %w", err) + } + + summary := fmt.Sprintf("%s -> %s", taskID, nextStatus) + payloadJSON := marshalJSON(map[string]any{ + "thread_id": threadID, + "thread_status": threadStatus, + "previous_status": taskStatus, + "previous_attempt": attemptStatus, + }) + if nextStatus == "blocked" { + question, err := selectLatestQuestionMessage(ctx, tx, threadID) + if err != nil { + return ReconcileResult{}, err + } + summary = question.Summary + payloadJSON = string(question.PayloadJSON) + } + + if err := insertEvent(ctx, tx, eventInput{ + RunID: runID, + TaskID: taskID, + ThreadID: threadID, + Source: "orch", + EventType: "task_" + nextStatus, + Summary: summary, + PayloadJSON: payloadJSON, + CreatedAt: now, + }); err != nil { + return ReconcileResult{}, err + } + + updatedIDs = append(updatedIDs, taskID) + } + if err := rows.Err(); err != nil { + return ReconcileResult{}, fmt.Errorf("iterate reconcile candidates: %w", err) + } + + if err := refreshReadyStates(ctx, tx, runID, now); err != nil { + return ReconcileResult{}, err + } + if err := updateRunAggregateStatus(ctx, tx, runID, now); err != nil { + return ReconcileResult{}, err + } + + run, err := selectRun(ctx, tx, runID) + if err != nil { + return ReconcileResult{}, err + } + taskCounts, err := collectTaskCounts(ctx, tx, runID) + if err != nil { + return ReconcileResult{}, err + } + + updatedTasks := make([]Task, 0, len(updatedIDs)) + for _, taskID := range updatedIDs { + task, err := selectTask(ctx, tx, runID, taskID) + if err != nil { + return ReconcileResult{}, err + } + updatedTasks = append(updatedTasks, task) + } + + if err := tx.Commit(); err != nil { + return ReconcileResult{}, fmt.Errorf("commit reconcile transaction: %w", err) + } + + return ReconcileResult{ + Run: run, + TaskCounts: taskCounts, + UpdatedTasks: updatedTasks, + }, nil +} + +func (s *OrchStore) ListBlockedTasks(ctx context.Context, runID string) ([]BlockedTask, error) { + if strings.TrimSpace(runID) == "" { + return nil, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return nil, fmt.Errorf("begin list blocked transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, runID); err != nil { + return nil, err + } + + rows, err := tx.QueryContext( + ctx, + `SELECT + t.run_id, t.task_id, t.title, t.summary, t.status, t.default_to, t.priority, + t.acceptance_json, t.latest_attempt_no, t.created_at, t.updated_at, + a.run_id, a.task_id, a.attempt_no, a.assigned_to, a.thread_id, a.base_ref, + a.base_commit, a.branch_name, a.worktree_path, a.workspace_status, + a.result_commit, a.status, a.created_at, a.updated_at + FROM tasks t + JOIN task_attempts a + ON a.run_id = t.run_id + AND a.task_id = t.task_id + AND a.attempt_no = t.latest_attempt_no + WHERE t.run_id = ? + AND t.status = 'blocked' + ORDER BY t.updated_at ASC`, + runID, + ) + if err != nil { + return nil, fmt.Errorf("query blocked tasks: %w", err) + } + defer rows.Close() + + var blocked []BlockedTask + for rows.Next() { + task, attempt, err := scanTaskAndAttempt(rows) + if err != nil { + return nil, err + } + question, err := selectLatestQuestionMessage(ctx, tx, attempt.ThreadID) + if err != nil { + return nil, err + } + blocked = append(blocked, BlockedTask{ + Task: task, + Attempt: attempt, + Question: question, + }) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate blocked tasks: %w", err) + } + + if err := tx.Commit(); err != nil { + return nil, fmt.Errorf("commit list blocked transaction: %w", err) + } + + return blocked, nil +} + +func (s *OrchStore) AnswerTask(ctx context.Context, input AnswerInput) (AnswerResult, error) { + if strings.TrimSpace(input.RunID) == "" { + return AnswerResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + if strings.TrimSpace(input.TaskID) == "" { + return AnswerResult{}, fmt.Errorf("%w: task id is required", ErrInvalidInput) + } + + payloadJSON, err := validateAndNormalizeJSON("payload-json", input.PayloadJSON) + if err != nil { + return AnswerResult{}, err + } + if strings.TrimSpace(input.Body) == "" && payloadJSON == "{}" { + return AnswerResult{}, fmt.Errorf("%w: body or payload-json is required", ErrInvalidInput) + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return AnswerResult{}, fmt.Errorf("begin answer transaction: %w", err) + } + defer tx.Rollback() + + task, err := selectTask(ctx, tx, input.RunID, input.TaskID) + if err != nil { + return AnswerResult{}, err + } + if task.Status != "blocked" { + return AnswerResult{}, fmt.Errorf("%w: task %s is not blocked", ErrInvalidState, task.TaskID) + } + if task.LatestAttemptNo == 0 { + return AnswerResult{}, fmt.Errorf("%w: task %s has no active attempt", ErrInvalidState, task.TaskID) + } + + attempt, err := selectAttempt(ctx, tx, input.RunID, input.TaskID, task.LatestAttemptNo) + if err != nil { + return AnswerResult{}, err + } + thread, err := selectThread(ctx, tx, attempt.ThreadID) + if err != nil { + return AnswerResult{}, err + } + if isTerminalStatus(thread.Status) { + return AnswerResult{}, fmt.Errorf("%w: thread %s is already terminal", ErrInvalidState, thread.ThreadID) + } + + message := Message{ + MessageID: newID("msg"), + ThreadID: thread.ThreadID, + FromAgent: "orch", + ToAgent: attempt.AssignedTo, + Kind: "answer", + Summary: summarizeAnswer(input.Body), + Body: input.Body, + PayloadJSON: json.RawMessage(payloadJSON), + CreatedAt: now, + } + if err := insertMessage(ctx, tx, message); err != nil { + return AnswerResult{}, err + } + if err := updateThreadState(ctx, tx, thread.ThreadID, thread.Status, thread.AssignedTo, message.MessageID, now); err != nil { + return AnswerResult{}, err + } + if err := insertEvent(ctx, tx, eventInput{ + RunID: thread.RunID, + TaskID: thread.TaskID, + ThreadID: thread.ThreadID, + Source: "inbox", + EventType: "thread_reply", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: payloadJSON, + CreatedAt: now, + }); err != nil { + return AnswerResult{}, err + } + if err := insertEvent(ctx, tx, eventInput{ + RunID: task.RunID, + TaskID: task.TaskID, + ThreadID: thread.ThreadID, + Source: "orch", + EventType: "task_answered", + MessageID: message.MessageID, + Summary: message.Summary, + PayloadJSON: payloadJSON, + CreatedAt: now, + }); err != nil { + return AnswerResult{}, err + } + + _, err = tx.ExecContext( + ctx, + `UPDATE tasks + SET updated_at = ? + WHERE run_id = ? AND task_id = ?`, + formatTime(now), + task.RunID, + task.TaskID, + ) + if err != nil { + return AnswerResult{}, fmt.Errorf("touch answered task: %w", err) + } + _, err = tx.ExecContext( + ctx, + `UPDATE task_attempts + SET updated_at = ? + WHERE run_id = ? AND task_id = ? AND attempt_no = ?`, + formatTime(now), + attempt.RunID, + attempt.TaskID, + attempt.AttemptNo, + ) + if err != nil { + return AnswerResult{}, fmt.Errorf("touch answered attempt: %w", err) + } + if err := updateRunAggregateStatus(ctx, tx, task.RunID, now); err != nil { + return AnswerResult{}, err + } + + task.UpdatedAt = now + attempt.UpdatedAt = now + thread.LatestMessageID = message.MessageID + thread.UpdatedAt = now + + if err := tx.Commit(); err != nil { + return AnswerResult{}, fmt.Errorf("commit answer transaction: %w", err) + } + + return AnswerResult{ + Task: task, + Attempt: attempt, + Thread: thread, + Message: message, + }, nil +} + +func (s *OrchStore) GetRunOverview(ctx context.Context, runID string) (RunOverview, error) { + if strings.TrimSpace(runID) == "" { + return RunOverview{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + now := nowUTC() + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return RunOverview{}, fmt.Errorf("begin run overview transaction: %w", err) + } + defer tx.Rollback() + + if _, err := selectRun(ctx, tx, runID); err != nil { + return RunOverview{}, err + } + if err := refreshReadyStates(ctx, tx, runID, now); err != nil { + return RunOverview{}, err + } + if err := updateRunAggregateStatus(ctx, tx, runID, now); err != nil { + return RunOverview{}, err + } + + run, err := selectRun(ctx, tx, runID) + if err != nil { + return RunOverview{}, err + } + taskCounts, err := collectTaskCounts(ctx, tx, runID) + if err != nil { + return RunOverview{}, err + } + tasks, err := listTasksForRun(ctx, tx, runID) + if err != nil { + return RunOverview{}, err + } + + if err := tx.Commit(); err != nil { + return RunOverview{}, fmt.Errorf("commit run overview transaction: %w", err) + } + + return RunOverview{ + Run: run, + TaskCounts: taskCounts, + Tasks: tasks, + }, nil +} + +func (s *OrchStore) WaitForEvents(ctx context.Context, input WaitInput) (WaitResult, error) { + if strings.TrimSpace(input.RunID) == "" { + return WaitResult{}, fmt.Errorf("%w: run id is required", ErrInvalidInput) + } + + eventTypes := normalizeWaitEventTypes(input.EventTypes) + if _, err := s.GetRun(ctx, input.RunID); err != nil { + return WaitResult{}, err + } + + cursor := input.AfterEventID + waitCtx := ctx + cancel := func() {} + if input.Timeout > 0 { + waitCtx, cancel = context.WithTimeout(ctx, input.Timeout) + } + defer cancel() + + for { + events, nextEventID, found, err := s.findRunEventsAfter(waitCtx, input.RunID, cursor, eventTypes) + if err != nil { + if isDeadlineExceeded(waitCtx) { + return WaitResult{Woke: false, NextEventID: cursor}, nil + } + return WaitResult{}, err + } + if found { + return WaitResult{ + Woke: true, + NextEventID: nextEventID, + Events: events, + }, nil + } + + if _, err := s.ReconcileRun(waitCtx, input.RunID); err != nil { + if isSQLiteBusyError(err) { + ok, waitErr := waitForNextPoll(waitCtx, 25*time.Millisecond) + if waitErr != nil { + if errors.Is(waitErr, context.DeadlineExceeded) { + return WaitResult{Woke: false, NextEventID: cursor}, nil + } + return WaitResult{}, waitErr + } + if !ok { + return WaitResult{Woke: false, NextEventID: cursor}, nil + } + continue + } + if isDeadlineExceeded(waitCtx) { + return WaitResult{Woke: false, NextEventID: cursor}, nil + } + return WaitResult{}, err + } + + events, nextEventID, found, err = s.findRunEventsAfter(waitCtx, input.RunID, cursor, eventTypes) + if err != nil { + if isDeadlineExceeded(waitCtx) { + return WaitResult{Woke: false, NextEventID: cursor}, nil + } + return WaitResult{}, err + } + if found { + return WaitResult{ + Woke: true, + NextEventID: nextEventID, + Events: events, + }, nil + } + + ok, err := waitForNextPoll(waitCtx, 200*time.Millisecond) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return WaitResult{Woke: false, NextEventID: cursor}, nil + } + return WaitResult{}, err + } + if !ok { + return WaitResult{Woke: false, NextEventID: cursor}, nil + } + } +} + +func listTasksForRun(ctx context.Context, db queryRowsContexter, runID string) ([]Task, error) { + rows, err := db.QueryContext( + ctx, + `SELECT + run_id, task_id, title, summary, status, default_to, priority, + acceptance_json, latest_attempt_no, created_at, updated_at + FROM tasks + WHERE run_id = ? + ORDER BY created_at ASC`, + runID, + ) + if err != nil { + return nil, fmt.Errorf("query tasks for run: %w", err) + } + defer rows.Close() + + var tasks []Task + for rows.Next() { + task, err := scanTask(rows) + if err != nil { + return nil, err + } + tasks = append(tasks, task) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate tasks for run: %w", err) + } + return tasks, nil +} + +func (s *OrchStore) findRunEventsAfter(ctx context.Context, runID string, afterEventID int64, eventTypes []string) ([]RunEvent, int64, bool, error) { + args := []any{runID, afterEventID} + query := `SELECT + event_id, event_type, run_id, task_id, thread_id, summary, payload_json, created_at + FROM events + WHERE run_id = ? + AND event_id > ?` + if len(eventTypes) > 0 { + query += " AND event_type IN (" + placeholders(len(eventTypes)) + ")" + for _, eventType := range eventTypes { + args = append(args, eventType) + } + } + query += " ORDER BY event_id ASC LIMIT 1" + + row := s.db.QueryRowContext(ctx, query, args...) + + var ( + event RunEvent + threadID sql.NullString + payload string + createdAt string + ) + err := row.Scan( + &event.EventID, + &event.Type, + &event.RunID, + &event.TaskID, + &threadID, + &event.Summary, + &payload, + &createdAt, + ) + if errors.Is(err, sql.ErrNoRows) { + return nil, 0, false, nil + } + if err != nil { + return nil, 0, false, fmt.Errorf("query run events after %d: %w", afterEventID, err) + } + + if threadID.Valid { + event.ThreadID = threadID.String + } + event.Payload = json.RawMessage(payload) + event.CreatedAt = parseTime(createdAt) + + return []RunEvent{event}, event.EventID, true, nil +} + +type queryRowsContexter interface { + QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error) +} + +func scanRun(scanner threadScanner) (Run, error) { + var ( + run Run + createdAt, updated string + ) + + if err := scanner.Scan( + &run.RunID, + &run.Goal, + &run.Summary, + &run.Status, + &createdAt, + &updated, + ); err != nil { + return Run{}, fmt.Errorf("scan run: %w", err) + } + + run.CreatedAt = parseTime(createdAt) + run.UpdatedAt = parseTime(updated) + return run, nil +} + +func scanTask(scanner threadScanner) (Task, error) { + var ( + task Task + defaultTo sql.NullString + latestAttempt sql.NullInt64 + acceptanceJSON string + createdAt, updatedAt string + ) + + if err := scanner.Scan( + &task.RunID, + &task.TaskID, + &task.Title, + &task.Summary, + &task.Status, + &defaultTo, + &task.Priority, + &acceptanceJSON, + &latestAttempt, + &createdAt, + &updatedAt, + ); err != nil { + return Task{}, fmt.Errorf("scan task: %w", err) + } + + task.DefaultTo = defaultTo.String + task.AcceptanceJSON = json.RawMessage(acceptanceJSON) + if latestAttempt.Valid { + task.LatestAttemptNo = int(latestAttempt.Int64) + } + task.CreatedAt = parseTime(createdAt) + task.UpdatedAt = parseTime(updatedAt) + return task, nil +} + +func scanAttempt(scanner threadScanner) (TaskAttempt, error) { + var ( + attempt TaskAttempt + baseRef sql.NullString + baseCommit sql.NullString + branchName sql.NullString + worktreePath sql.NullString + workspaceStatus sql.NullString + resultCommit sql.NullString + createdAt, updated string + ) + + if err := scanner.Scan( + &attempt.RunID, + &attempt.TaskID, + &attempt.AttemptNo, + &attempt.AssignedTo, + &attempt.ThreadID, + &baseRef, + &baseCommit, + &branchName, + &worktreePath, + &workspaceStatus, + &resultCommit, + &attempt.Status, + &createdAt, + &updated, + ); err != nil { + return TaskAttempt{}, fmt.Errorf("scan attempt: %w", err) + } + + attempt.BaseRef = baseRef.String + attempt.BaseCommit = baseCommit.String + attempt.BranchName = branchName.String + attempt.WorktreePath = worktreePath.String + attempt.WorkspaceStatus = workspaceStatus.String + attempt.ResultCommit = resultCommit.String + attempt.CreatedAt = parseTime(createdAt) + attempt.UpdatedAt = parseTime(updated) + return attempt, nil +} + +func scanTaskAndAttempt(scanner threadScanner) (Task, TaskAttempt, error) { + var ( + task Task + taskDefaultTo sql.NullString + taskLatestAttempt sql.NullInt64 + taskAcceptanceJSON string + taskCreatedAt string + taskUpdatedAt string + attempt TaskAttempt + attemptBaseRef sql.NullString + attemptBaseCommit sql.NullString + attemptBranchName sql.NullString + attemptWorktreePath sql.NullString + attemptWorkspaceState sql.NullString + attemptResultCommit sql.NullString + attemptCreatedAt string + attemptUpdatedAt string + ) + + if err := scanner.Scan( + &task.RunID, + &task.TaskID, + &task.Title, + &task.Summary, + &task.Status, + &taskDefaultTo, + &task.Priority, + &taskAcceptanceJSON, + &taskLatestAttempt, + &taskCreatedAt, + &taskUpdatedAt, + &attempt.RunID, + &attempt.TaskID, + &attempt.AttemptNo, + &attempt.AssignedTo, + &attempt.ThreadID, + &attemptBaseRef, + &attemptBaseCommit, + &attemptBranchName, + &attemptWorktreePath, + &attemptWorkspaceState, + &attemptResultCommit, + &attempt.Status, + &attemptCreatedAt, + &attemptUpdatedAt, + ); err != nil { + return Task{}, TaskAttempt{}, fmt.Errorf("scan task and attempt: %w", err) + } + + task.DefaultTo = taskDefaultTo.String + task.AcceptanceJSON = json.RawMessage(taskAcceptanceJSON) + if taskLatestAttempt.Valid { + task.LatestAttemptNo = int(taskLatestAttempt.Int64) + } + task.CreatedAt = parseTime(taskCreatedAt) + task.UpdatedAt = parseTime(taskUpdatedAt) + + attempt.BaseRef = attemptBaseRef.String + attempt.BaseCommit = attemptBaseCommit.String + attempt.BranchName = attemptBranchName.String + attempt.WorktreePath = attemptWorktreePath.String + attempt.WorkspaceStatus = attemptWorkspaceState.String + attempt.ResultCommit = attemptResultCommit.String + attempt.CreatedAt = parseTime(attemptCreatedAt) + attempt.UpdatedAt = parseTime(attemptUpdatedAt) + + return task, attempt, nil +} + +func selectRun(ctx context.Context, db queryRower, runID string) (Run, error) { + row := db.QueryRowContext( + ctx, + `SELECT run_id, goal, summary, status, created_at, updated_at + FROM runs + WHERE run_id = ?`, + runID, + ) + run, err := scanRun(row) + if errors.Is(err, sql.ErrNoRows) { + return Run{}, fmt.Errorf("%w: %s", ErrRunNotFound, runID) + } + return run, err +} + +func selectTask(ctx context.Context, db queryRower, runID, taskID string) (Task, error) { + row := db.QueryRowContext( + ctx, + `SELECT + run_id, task_id, title, summary, status, default_to, priority, + acceptance_json, latest_attempt_no, created_at, updated_at + FROM tasks + WHERE run_id = ? AND task_id = ?`, + runID, + taskID, + ) + task, err := scanTask(row) + if errors.Is(err, sql.ErrNoRows) { + return Task{}, fmt.Errorf("%w: %s/%s", ErrTaskNotFound, runID, taskID) + } + return task, err +} + +func selectAttempt(ctx context.Context, db queryRower, runID, taskID string, attemptNo int) (TaskAttempt, error) { + row := db.QueryRowContext( + ctx, + `SELECT + run_id, task_id, attempt_no, assigned_to, thread_id, base_ref, base_commit, + branch_name, worktree_path, workspace_status, result_commit, status, + created_at, updated_at + FROM task_attempts + WHERE run_id = ? AND task_id = ? AND attempt_no = ?`, + runID, + taskID, + attemptNo, + ) + attempt, err := scanAttempt(row) + if errors.Is(err, sql.ErrNoRows) { + return TaskAttempt{}, fmt.Errorf("%w: attempt %s/%s/%d not found", ErrInvalidState, runID, taskID, attemptNo) + } + return attempt, err +} + +func selectLatestQuestionMessage(ctx context.Context, db queryRowsAndRower, threadID string) (Message, error) { + row := db.QueryRowContext( + ctx, + `SELECT + message_id, thread_id, from_agent, to_agent, kind, summary, body, + payload_json, created_at + FROM messages + WHERE thread_id = ? AND kind = 'question' + ORDER BY created_at DESC + LIMIT 1`, + threadID, + ) + message, err := scanMessage(row) + if errors.Is(err, sql.ErrNoRows) { + return Message{}, fmt.Errorf("%w: blocked thread %s has no question message", ErrInvalidState, threadID) + } + if err != nil { + return Message{}, err + } + artifactsByMessageID, err := loadArtifactsForMessageIDsFromQueryer(ctx, db, []string{message.MessageID}) + if err != nil { + return Message{}, err + } + message.Artifacts = artifactsByMessageID[message.MessageID] + return message, nil +} + +type queryRowsAndRower interface { + queryRower + QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error) +} + +func loadArtifactsForMessageIDsFromQueryer(ctx context.Context, db queryRowsContexter, messageIDs []string) (map[string][]Artifact, error) { + result := make(map[string][]Artifact) + if len(messageIDs) == 0 { + return result, nil + } + + args := make([]any, 0, len(messageIDs)) + for _, messageID := range messageIDs { + args = append(args, messageID) + } + + rows, err := db.QueryContext( + ctx, + `SELECT + artifact_id, message_id, path, kind, metadata_json, created_at + FROM artifacts + WHERE message_id IN (`+placeholders(len(messageIDs))+`) + ORDER BY created_at ASC`, + args..., + ) + if err != nil { + return nil, fmt.Errorf("query artifacts: %w", err) + } + defer rows.Close() + + for rows.Next() { + artifact, err := scanArtifact(rows) + if err != nil { + return nil, err + } + result[artifact.MessageID] = append(result[artifact.MessageID], artifact) + } + + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate artifacts: %w", err) + } + + return result, nil +} + +func refreshReadyStates(ctx context.Context, tx *sql.Tx, runID string, now time.Time) error { + rows, err := tx.QueryContext( + ctx, + `SELECT task_id, status, title + FROM tasks + WHERE run_id = ? + AND status IN ('planned', 'ready')`, + runID, + ) + if err != nil { + return fmt.Errorf("query tasks for readiness refresh: %w", err) + } + defer rows.Close() + + type readinessRow struct { + taskID string + status string + title string + } + + var tasks []readinessRow + for rows.Next() { + var row readinessRow + if err := rows.Scan(&row.taskID, &row.status, &row.title); err != nil { + return fmt.Errorf("scan readiness refresh row: %w", err) + } + tasks = append(tasks, row) + } + if err := rows.Err(); err != nil { + return fmt.Errorf("iterate readiness refresh rows: %w", err) + } + + for _, task := range tasks { + ready, err := dependenciesSatisfied(ctx, tx, runID, task.taskID) + if err != nil { + return err + } + + desired := "planned" + if ready { + desired = "ready" + } + if desired == task.status { + continue + } + + _, err = tx.ExecContext( + ctx, + `UPDATE tasks + SET status = ?, updated_at = ? + WHERE run_id = ? AND task_id = ?`, + desired, + formatTime(now), + runID, + task.taskID, + ) + if err != nil { + return fmt.Errorf("update task readiness: %w", err) + } + + if desired == "ready" { + if err := insertEvent(ctx, tx, eventInput{ + RunID: runID, + TaskID: task.taskID, + Source: "orch", + EventType: "task_ready", + Summary: defaultString(task.title, task.taskID), + PayloadJSON: marshalJSON(map[string]any{"task_id": task.taskID}), + CreatedAt: now, + }); err != nil { + return err + } + } + } + + return nil +} + +func dependenciesSatisfied(ctx context.Context, tx *sql.Tx, runID, taskID string) (bool, error) { + var pendingCount int + err := tx.QueryRowContext( + ctx, + `SELECT COUNT(*) + FROM task_dependencies d + JOIN tasks dep + ON dep.run_id = d.run_id + AND dep.task_id = d.depends_on_task_id + WHERE d.run_id = ? + AND d.task_id = ? + AND dep.status <> 'done'`, + runID, + taskID, + ).Scan(&pendingCount) + if err != nil { + return false, fmt.Errorf("query dependency readiness: %w", err) + } + return pendingCount == 0, nil +} + +func updateRunAggregateStatus(ctx context.Context, tx *sql.Tx, runID string, now time.Time) error { + counts, err := collectTaskCounts(ctx, tx, runID) + if err != nil { + return err + } + nextStatus := deriveRunStatus(counts) + + _, err = tx.ExecContext( + ctx, + `UPDATE runs + SET status = ?, updated_at = ? + WHERE run_id = ?`, + nextStatus, + formatTime(now), + runID, + ) + if err != nil { + return fmt.Errorf("update run aggregate status: %w", err) + } + return nil +} + +func collectTaskCounts(ctx context.Context, db queryRowsContexter, runID string) (map[string]int, error) { + rows, err := db.QueryContext( + ctx, + `SELECT status, COUNT(*) + FROM tasks + WHERE run_id = ? + GROUP BY status`, + runID, + ) + if err != nil { + return nil, fmt.Errorf("query task counts: %w", err) + } + defer rows.Close() + + counts := make(map[string]int) + for rows.Next() { + var ( + status string + count int + ) + if err := rows.Scan(&status, &count); err != nil { + return nil, fmt.Errorf("scan task count: %w", err) + } + counts[status] = count + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate task counts: %w", err) + } + + return counts, nil +} + +func deriveRunStatus(counts map[string]int) string { + total := 0 + for _, count := range counts { + total += count + } + if total == 0 { + return "active" + } + if counts["blocked"] > 0 { + return "blocked" + } + if counts["failed"] > 0 { + return "failed" + } + if counts["running"] > 0 || counts["dispatched"] > 0 { + return "running" + } + if counts["ready"] > 0 { + return "ready" + } + if counts["planned"] > 0 { + return "planned" + } + if counts["done"] > 0 { + return "done" + } + if counts["cancelled"] == total { + return "cancelled" + } + return "active" +} + +func reconcileTaskStatus(threadStatus string) string { + switch threadStatus { + case "pending": + return "dispatched" + case "claimed", "in_progress": + return "running" + case "blocked": + return "blocked" + case "done": + return "done" + case "failed": + return "failed" + case "cancelled": + return "cancelled" + default: + return "" + } +} + +func normalizePriority(priority string) (string, error) { + priority = defaultString(strings.TrimSpace(priority), "normal") + switch priority { + case "low", "normal", "high": + return priority, nil + default: + return "", fmt.Errorf("%w: priority must be one of low, normal, high", ErrInvalidInput) + } +} + +func normalizeWaitEventTypes(eventTypes []string) []string { + if len(eventTypes) == 0 { + return []string{"task_ready", "task_blocked", "task_done", "task_failed"} + } + + normalized := make([]string, 0, len(eventTypes)) + seen := make(map[string]struct{}, len(eventTypes)) + for _, eventType := range eventTypes { + eventType = strings.TrimSpace(eventType) + if eventType == "" { + continue + } + if _, ok := seen[eventType]; ok { + continue + } + seen[eventType] = struct{}{} + normalized = append(normalized, eventType) + } + if len(normalized) == 0 { + return []string{"task_ready", "task_blocked", "task_done", "task_failed"} + } + return normalized +} + +func validateAndNormalizeJSONDefault(fieldName, value, defaultValue string) (string, error) { + normalized := strings.TrimSpace(value) + if normalized == "" { + normalized = defaultValue + } + if !json.Valid([]byte(normalized)) { + return "", fmt.Errorf("%w: %s must be valid JSON", ErrInvalidInput, fieldName) + } + + var compact bytes.Buffer + if err := json.Compact(&compact, []byte(normalized)); err != nil { + return "", fmt.Errorf("%w: %s must be valid JSON", ErrInvalidInput, fieldName) + } + return compact.String(), nil +} + +func buildDispatchPayload(task Task, attemptNo int, workspace DispatchWorkspace) string { + payload := map[string]any{ + "run_id": task.RunID, + "task_id": task.TaskID, + "attempt_no": attemptNo, + "title": task.Title, + "summary": task.Summary, + "priority": task.Priority, + } + + if len(task.AcceptanceJSON) > 0 { + var acceptance any + if err := json.Unmarshal(task.AcceptanceJSON, &acceptance); err == nil { + payload["acceptance"] = acceptance + } + } + if strings.TrimSpace(workspace.BaseRef) != "" { + payload["base_ref"] = strings.TrimSpace(workspace.BaseRef) + } + if strings.TrimSpace(workspace.BaseCommit) != "" { + payload["base_commit"] = strings.TrimSpace(workspace.BaseCommit) + } + if strings.TrimSpace(workspace.BranchName) != "" { + payload["branch_name"] = strings.TrimSpace(workspace.BranchName) + } + if strings.TrimSpace(workspace.WorktreePath) != "" { + payload["worktree_path"] = strings.TrimSpace(workspace.WorktreePath) + } + if strings.TrimSpace(workspace.WorkspaceStatus) != "" { + payload["workspace_status"] = strings.TrimSpace(workspace.WorkspaceStatus) + } + + return marshalJSON(payload) +} + +func marshalJSON(v any) string { + data, err := json.Marshal(v) + if err != nil { + return "{}" + } + return string(data) +} + +func nullIfEmpty(value string) any { + if strings.TrimSpace(value) == "" { + return nil + } + return value +} + +func summarizeAnswer(body string) string { + body = strings.TrimSpace(body) + if body == "" { + return "task answer" + } + line := body + if idx := strings.IndexByte(line, '\n'); idx >= 0 { + line = line[:idx] + } + line = strings.TrimSpace(line) + if line == "" { + return "task answer" + } + return line +} + +func reconcileWorkspaceStatus(threadStatus string) string { + switch threadStatus { + case "pending": + return "created" + case "claimed", "in_progress", "blocked": + return "active" + case "done", "failed": + return "completed" + case "cancelled": + return "abandoned" + default: + return "" + } +} + +func isUniqueConstraintError(err error) bool { + return strings.Contains(strings.ToLower(err.Error()), "unique constraint failed") +}