fix(daemon): prevent stuck tasks from blocking queue and add concurrent execution

- Expand FailAgentTask SQL to accept dispatched OR running status
- Add FailStaleTasks server-side sweeper (dispatched >5min, running >2.5h)
- Fix daemon handleTask to fail tasks on all error paths (StartTask, CompleteTask)
- Make daemon poll loop concurrent with semaphore (default 20 parallel tasks)
- Raise default agent max_concurrent_tasks from 1 to 6 (migration 023)
- Add --max-concurrent-tasks CLI flag and MULTICA_DAEMON_MAX_CONCURRENT_TASKS env
This commit is contained in:
Jiayuan 2026-03-30 03:08:52 +08:00
parent 00c54232ad
commit 67f1f49b09
11 changed files with 261 additions and 88 deletions

View file

@ -90,9 +90,19 @@ LIMIT 1;
-- name: FailAgentTask :one
UPDATE agent_task_queue
SET status = 'failed', completed_at = now(), error = $2
WHERE id = $1 AND status = 'running'
WHERE id = $1 AND status IN ('dispatched', 'running')
RETURNING *;
-- name: FailStaleTasks :many
-- Fails tasks stuck in dispatched/running beyond the given thresholds.
-- Handles cases where the daemon is alive but the task is orphaned
-- (e.g. agent process hung, daemon failed to report completion).
UPDATE agent_task_queue
SET status = 'failed', completed_at = now(), error = 'task timed out'
WHERE (status = 'dispatched' AND dispatched_at < now() - make_interval(secs => @dispatch_timeout_secs::double precision))
OR (status = 'running' AND started_at < now() - make_interval(secs => @running_timeout_secs::double precision))
RETURNING id, agent_id, issue_id;
-- name: CountRunningTasks :one
SELECT count(*) FROM agent_task_queue
WHERE agent_id = $1 AND status IN ('dispatched', 'running');