fix(daemon): prevent stuck tasks from blocking queue and add concurrent execution
- Expand FailAgentTask SQL to accept dispatched OR running status - Add FailStaleTasks server-side sweeper (dispatched >5min, running >2.5h) - Fix daemon handleTask to fail tasks on all error paths (StartTask, CompleteTask) - Make daemon poll loop concurrent with semaphore (default 20 parallel tasks) - Raise default agent max_concurrent_tasks from 1 to 6 (migration 023) - Add --max-concurrent-tasks CLI flag and MULTICA_DAEMON_MAX_CONCURRENT_TASKS env
This commit is contained in:
parent
00c54232ad
commit
67f1f49b09
11 changed files with 261 additions and 88 deletions
|
|
@ -90,9 +90,19 @@ LIMIT 1;
|
|||
-- name: FailAgentTask :one
|
||||
UPDATE agent_task_queue
|
||||
SET status = 'failed', completed_at = now(), error = $2
|
||||
WHERE id = $1 AND status = 'running'
|
||||
WHERE id = $1 AND status IN ('dispatched', 'running')
|
||||
RETURNING *;
|
||||
|
||||
-- name: FailStaleTasks :many
|
||||
-- Fails tasks stuck in dispatched/running beyond the given thresholds.
|
||||
-- Handles cases where the daemon is alive but the task is orphaned
|
||||
-- (e.g. agent process hung, daemon failed to report completion).
|
||||
UPDATE agent_task_queue
|
||||
SET status = 'failed', completed_at = now(), error = 'task timed out'
|
||||
WHERE (status = 'dispatched' AND dispatched_at < now() - make_interval(secs => @dispatch_timeout_secs::double precision))
|
||||
OR (status = 'running' AND started_at < now() - make_interval(secs => @running_timeout_secs::double precision))
|
||||
RETURNING id, agent_id, issue_id;
|
||||
|
||||
-- name: CountRunningTasks :one
|
||||
SELECT count(*) FROM agent_task_queue
|
||||
WHERE agent_id = $1 AND status IN ('dispatched', 'running');
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue