feat(tasks): add coalescing queue and task lifecycle guards

- Coalescing queue: use HasPendingTaskForIssue (queued/dispatched only)
  instead of HasActiveTaskForIssue so comments during a running task
  enqueue exactly one follow-up task that picks up all new comments.
- Stale task cleanup: runtime sweeper now fails orphaned tasks when
  their runtime goes offline (daemon crash/network partition).
- Cancel-aware daemon: handleTask checks task status after execution
  and discards results if the task was cancelled mid-run (e.g. reassign).
- Terminal issue guard: ClaimTaskForRuntime auto-cancels pending tasks
  for done/cancelled issues instead of executing them.
- Race condition safety net: unique partial index ensures at most one
  pending task per issue at the DB level.
This commit is contained in:
Jiayuan 2026-03-29 17:52:35 +08:00
parent 32f795e1ef
commit b112d1f1ae
13 changed files with 148 additions and 3 deletions

View file

@ -94,6 +94,7 @@ func NewRouter(pool *pgxpool.Pool, hub *realtime.Hub, bus *events.Bus) chi.Route
r.Post("/runtimes/{runtimeId}/usage", h.ReportRuntimeUsage)
r.Post("/runtimes/{runtimeId}/ping/{pingId}/result", h.ReportPingResult)
r.Get("/tasks/{taskId}/status", h.GetTaskStatus)
r.Post("/tasks/{taskId}/start", h.StartTask)
r.Post("/tasks/{taskId}/progress", h.ReportTaskProgress)
r.Post("/tasks/{taskId}/complete", h.CompleteTask)

View file

@ -49,6 +49,26 @@ func runRuntimeSweeper(ctx context.Context, queries *db.Queries, bus *events.Bus
slog.Info("runtime sweeper: marked stale runtimes offline", "count", len(staleRows), "workspaces", len(workspaces))
// Fail orphaned tasks (dispatched/running) whose runtimes just went offline.
failedTasks, err := queries.FailTasksForOfflineRuntimes(ctx)
if err != nil {
slog.Warn("runtime sweeper: failed to clean up stale tasks", "error", err)
} else if len(failedTasks) > 0 {
slog.Info("runtime sweeper: failed orphaned tasks", "count", len(failedTasks))
for _, ft := range failedTasks {
bus.Publish(events.Event{
Type: protocol.EventTaskFailed,
ActorType: "system",
Payload: map[string]any{
"task_id": util.UUIDToString(ft.ID),
"agent_id": util.UUIDToString(ft.AgentID),
"issue_id": util.UUIDToString(ft.IssueID),
"status": "failed",
},
})
}
}
// Notify frontend clients so they re-fetch runtime list.
for wsID := range workspaces {
bus.Publish(events.Event{