feat(tasks): add coalescing queue and task lifecycle guards
- Coalescing queue: use HasPendingTaskForIssue (queued/dispatched only) instead of HasActiveTaskForIssue so comments during a running task enqueue exactly one follow-up task that picks up all new comments. - Stale task cleanup: runtime sweeper now fails orphaned tasks when their runtime goes offline (daemon crash/network partition). - Cancel-aware daemon: handleTask checks task status after execution and discards results if the task was cancelled mid-run (e.g. reassign). - Terminal issue guard: ClaimTaskForRuntime auto-cancels pending tasks for done/cancelled issues instead of executing them. - Race condition safety net: unique partial index ensures at most one pending task per issue at the DB level.
This commit is contained in:
parent
32f795e1ef
commit
b112d1f1ae
13 changed files with 148 additions and 3 deletions
|
|
@ -94,6 +94,7 @@ func NewRouter(pool *pgxpool.Pool, hub *realtime.Hub, bus *events.Bus) chi.Route
|
|||
r.Post("/runtimes/{runtimeId}/usage", h.ReportRuntimeUsage)
|
||||
r.Post("/runtimes/{runtimeId}/ping/{pingId}/result", h.ReportPingResult)
|
||||
|
||||
r.Get("/tasks/{taskId}/status", h.GetTaskStatus)
|
||||
r.Post("/tasks/{taskId}/start", h.StartTask)
|
||||
r.Post("/tasks/{taskId}/progress", h.ReportTaskProgress)
|
||||
r.Post("/tasks/{taskId}/complete", h.CompleteTask)
|
||||
|
|
|
|||
|
|
@ -49,6 +49,26 @@ func runRuntimeSweeper(ctx context.Context, queries *db.Queries, bus *events.Bus
|
|||
|
||||
slog.Info("runtime sweeper: marked stale runtimes offline", "count", len(staleRows), "workspaces", len(workspaces))
|
||||
|
||||
// Fail orphaned tasks (dispatched/running) whose runtimes just went offline.
|
||||
failedTasks, err := queries.FailTasksForOfflineRuntimes(ctx)
|
||||
if err != nil {
|
||||
slog.Warn("runtime sweeper: failed to clean up stale tasks", "error", err)
|
||||
} else if len(failedTasks) > 0 {
|
||||
slog.Info("runtime sweeper: failed orphaned tasks", "count", len(failedTasks))
|
||||
for _, ft := range failedTasks {
|
||||
bus.Publish(events.Event{
|
||||
Type: protocol.EventTaskFailed,
|
||||
ActorType: "system",
|
||||
Payload: map[string]any{
|
||||
"task_id": util.UUIDToString(ft.ID),
|
||||
"agent_id": util.UUIDToString(ft.AgentID),
|
||||
"issue_id": util.UUIDToString(ft.IssueID),
|
||||
"status": "failed",
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Notify frontend clients so they re-fetch runtime list.
|
||||
for wsID := range workspaces {
|
||||
bus.Publish(events.Event{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue