fix(daemon): prevent stuck tasks from blocking queue and add concurrent execution

- Expand FailAgentTask SQL to accept dispatched OR running status - Add FailStaleTasks server-side sweeper (dispatched >5min, running >2.5h) - Fix daemon handleTask to fail tasks on all error paths (StartTask, CompleteTask) - Make daemon poll loop concurrent with semaphore (default 20 parallel tasks) - Raise default agent max_concurrent_tasks from 1 to 6 (migration 023) - Add --max-concurrent-tasks CLI flag and MULTICA_DAEMON_MAX_CONCURRENT_TASKS env
2026-03-30 03:08:52 +08:00 · 2026-03-30 03:08:52 +08:00 · 67f1f49b09
commit 67f1f49b09
parent 00c54232ad
11 changed files with 261 additions and 88 deletions
--- a/server/cmd/multica/cmd_daemon.go
+++ b/server/cmd/multica/cmd_daemon.go
@ -61,6 +61,7 @@ func init() {
 	f.Duration("poll-interval", 0, "Task poll interval (env: MULTICA_DAEMON_POLL_INTERVAL)")
 	f.Duration("heartbeat-interval", 0, "Heartbeat interval (env: MULTICA_DAEMON_HEARTBEAT_INTERVAL)")
 	f.Duration("agent-timeout", 0, "Per-task timeout (env: MULTICA_AGENT_TIMEOUT)")
 	f.Int("max-concurrent-tasks", 0, "Max tasks running in parallel (env: MULTICA_DAEMON_MAX_CONCURRENT_TASKS)")
 	daemonLogsCmd.Flags().BoolP("follow", "f", false, "Follow log output")
 	daemonLogsCmd.Flags().IntP("lines", "n", 50, "Number of lines to show")
@ -187,6 +188,9 @@ func buildDaemonStartArgs(cmd *cobra.Command) []string {
 	if d, _ := cmd.Flags().GetDuration("agent-timeout"); d > 0 {
 		args = append(args, "--agent-timeout", d.String())
 	}
 	if n, _ := cmd.Flags().GetInt("max-concurrent-tasks"); n > 0 {
 		args = append(args, "--max-concurrent-tasks", strconv.Itoa(n))
 	}
 	// Forward global persistent flags.
 	if v, _ := cmd.Flags().GetString("server-url"); v != "" {
@ -212,6 +216,9 @@ func runDaemonForeground(cmd *cobra.Command) error {
 	if d, _ := cmd.Flags().GetDuration("agent-timeout"); d > 0 {
 		overrides.AgentTimeout = d
 	}
 	if n, _ := cmd.Flags().GetInt("max-concurrent-tasks"); n > 0 {
 		overrides.MaxConcurrentTasks = n
 	}
 	cfg, err := daemon.LoadConfig(overrides)
 	if err != nil {
--- a/server/cmd/server/runtime_sweeper.go
+++ b/server/cmd/server/runtime_sweeper.go
@ -12,16 +12,24 @@ import (
 )
 const (
-	// sweepInterval is how often we check for stale runtimes.
+	// sweepInterval is how often we check for stale runtimes and tasks.
 	sweepInterval = 30 * time.Second
 	// staleThresholdSeconds marks runtimes offline if no heartbeat for this long.
 	// The daemon heartbeat interval is 15s, so 45s = 3 missed heartbeats.
 	staleThresholdSeconds = 45.0
 	// dispatchTimeoutSeconds fails tasks stuck in 'dispatched' beyond this.
 	// The dispatched→running transition should be near-instant, so 5 minutes
 	// means something went wrong (e.g. StartTask API call failed silently).
 	dispatchTimeoutSeconds = 300.0
 	// runningTimeoutSeconds fails tasks stuck in 'running' beyond this.
 	// The default agent timeout is 2h, so 2.5h gives a generous buffer.
 	runningTimeoutSeconds = 9000.0
 )
 // runRuntimeSweeper periodically marks runtimes as offline if their
-// last_seen_at exceeds the stale threshold. This handles cases where the
+// last_seen_at exceeds the stale threshold, and fails orphaned tasks.
-// daemon crashes or is killed without calling the deregister endpoint.
+// This handles cases where the daemon crashes, is killed without calling
 // the deregister endpoint, or leaves tasks in a non-terminal state.
 func runRuntimeSweeper(ctx context.Context, queries *db.Queries, bus *events.Bus) {
 	ticker := time.NewTicker(sweepInterval)
 	defer ticker.Stop()
@ -31,55 +39,95 @@ func runRuntimeSweeper(ctx context.Context, queries *db.Queries, bus *events.Bus
 		case <-ctx.Done():
 			return
 		case <-ticker.C:
-			staleRows, err := queries.MarkStaleRuntimesOffline(ctx, staleThresholdSeconds)
+			sweepStaleRuntimes(ctx, queries, bus)
-			if err != nil {
+			sweepStaleTasks(ctx, queries, bus)
 				slog.Warn("runtime sweeper: failed to mark stale runtimes offline", "error", err)
 				continue
 			}
 			if len(staleRows) == 0 {
 				continue
 			}
 			// Collect unique workspace IDs to notify.
 			workspaces := make(map[string]bool)
 			for _, row := range staleRows {
 				wsID := util.UUIDToString(row.WorkspaceID)
 				workspaces[wsID] = true
 			}
 			slog.Info("runtime sweeper: marked stale runtimes offline", "count", len(staleRows), "workspaces", len(workspaces))
 			// Fail orphaned tasks (dispatched/running) whose runtimes just went offline.
 			failedTasks, err := queries.FailTasksForOfflineRuntimes(ctx)
 			if err != nil {
 				slog.Warn("runtime sweeper: failed to clean up stale tasks", "error", err)
 			} else if len(failedTasks) > 0 {
 				slog.Info("runtime sweeper: failed orphaned tasks", "count", len(failedTasks))
 				for _, ft := range failedTasks {
 					bus.Publish(events.Event{
 						Type:      protocol.EventTaskFailed,
 						ActorType: "system",
 						Payload: map[string]any{
 							"task_id":  util.UUIDToString(ft.ID),
 							"agent_id": util.UUIDToString(ft.AgentID),
 							"issue_id": util.UUIDToString(ft.IssueID),
 							"status":   "failed",
 						},
 					})
 				}
 			}
 			// Notify frontend clients so they re-fetch runtime list.
 			for wsID := range workspaces {
 				bus.Publish(events.Event{
 					Type:        protocol.EventDaemonRegister,
 					WorkspaceID: wsID,
 					ActorType:   "system",
 					Payload: map[string]any{
 						"action": "stale_sweep",
 					},
 				})
 			}
 		}
 	}
 }
 // sweepStaleRuntimes marks runtimes offline if they haven't heartbeated,
 // then fails any tasks belonging to those offline runtimes.
 func sweepStaleRuntimes(ctx context.Context, queries *db.Queries, bus *events.Bus) {
 	staleRows, err := queries.MarkStaleRuntimesOffline(ctx, staleThresholdSeconds)
 	if err != nil {
 		slog.Warn("runtime sweeper: failed to mark stale runtimes offline", "error", err)
 		return
 	}
 	if len(staleRows) == 0 {
 		return
 	}
 	// Collect unique workspace IDs to notify.
 	workspaces := make(map[string]bool)
 	for _, row := range staleRows {
 		wsID := util.UUIDToString(row.WorkspaceID)
 		workspaces[wsID] = true
 	}
 	slog.Info("runtime sweeper: marked stale runtimes offline", "count", len(staleRows), "workspaces", len(workspaces))
 	// Fail orphaned tasks (dispatched/running) whose runtimes just went offline.
 	failedTasks, err := queries.FailTasksForOfflineRuntimes(ctx)
 	if err != nil {
 		slog.Warn("runtime sweeper: failed to clean up stale tasks", "error", err)
 	} else if len(failedTasks) > 0 {
 		slog.Info("runtime sweeper: failed orphaned tasks", "count", len(failedTasks))
 		for _, ft := range failedTasks {
 			bus.Publish(events.Event{
 				Type:      protocol.EventTaskFailed,
 				ActorType: "system",
 				Payload: map[string]any{
 					"task_id":  util.UUIDToString(ft.ID),
 					"agent_id": util.UUIDToString(ft.AgentID),
 					"issue_id": util.UUIDToString(ft.IssueID),
 					"status":   "failed",
 				},
 			})
 		}
 	}
 	// Notify frontend clients so they re-fetch runtime list.
 	for wsID := range workspaces {
 		bus.Publish(events.Event{
 			Type:        protocol.EventDaemonRegister,
 			WorkspaceID: wsID,
 			ActorType:   "system",
 			Payload: map[string]any{
 				"action": "stale_sweep",
 			},
 		})
 	}
 }
 // sweepStaleTasks fails tasks stuck in dispatched/running for too long,
 // even when the runtime is still online. This handles cases where:
 // - The agent process hangs and the daemon is still heartbeating
 // - The daemon failed to report task completion/failure
 // - A server restart left tasks in a non-terminal state
 func sweepStaleTasks(ctx context.Context, queries *db.Queries, bus *events.Bus) {
 	failedTasks, err := queries.FailStaleTasks(ctx, db.FailStaleTasksParams{
 		DispatchTimeoutSecs: dispatchTimeoutSeconds,
 		RunningTimeoutSecs:  runningTimeoutSeconds,
 	})
 	if err != nil {
 		slog.Warn("task sweeper: failed to clean up stale tasks", "error", err)
 		return
 	}
 	if len(failedTasks) == 0 {
 		return
 	}
 	slog.Info("task sweeper: failed stale tasks", "count", len(failedTasks))
 	for _, ft := range failedTasks {
 		bus.Publish(events.Event{
 			Type:      protocol.EventTaskFailed,
 			ActorType: "system",
 			Payload: map[string]any{
 				"task_id":  util.UUIDToString(ft.ID),
 				"agent_id": util.UUIDToString(ft.AgentID),
 				"issue_id": util.UUIDToString(ft.IssueID),
 				"status":   "failed",
 			},
 		})
 	}
 }
--- a/server/internal/daemon/config.go
+++ b/server/internal/daemon/config.go
@ -18,34 +18,37 @@ const (
 	DefaultRuntimeName          = "Local Agent"
 	DefaultConfigReloadInterval = 5 * time.Second
 	DefaultHealthPort           = 19514
 	DefaultMaxConcurrentTasks   = 20
 )
 // Config holds all daemon configuration.
 type Config struct {
-	ServerBaseURL     string
+	ServerBaseURL      string
-	DaemonID          string
+	DaemonID           string
-	DeviceName        string
+	DeviceName         string
-	RuntimeName       string
+	RuntimeName        string
-	Agents            map[string]AgentEntry // "claude" -> entry, "codex" -> entry
+	Agents             map[string]AgentEntry // "claude" -> entry, "codex" -> entry
-	WorkspacesRoot    string                // base path for execution envs (default: ~/multica_workspaces)
+	WorkspacesRoot     string                // base path for execution envs (default: ~/multica_workspaces)
-	KeepEnvAfterTask  bool                  // preserve env after task for debugging
+	KeepEnvAfterTask   bool                  // preserve env after task for debugging
-	HealthPort        int                   // local HTTP port for health checks (default: 19514)
+	HealthPort         int                   // local HTTP port for health checks (default: 19514)
-	PollInterval      time.Duration
+	MaxConcurrentTasks int                   // max tasks running in parallel (default: 20)
-	HeartbeatInterval time.Duration
+	PollInterval       time.Duration
-	AgentTimeout      time.Duration
+	HeartbeatInterval  time.Duration
 	AgentTimeout       time.Duration
 }
 // Overrides allows CLI flags to override environment variables and defaults.
 // Zero values are ignored and the env/default value is used instead.
 type Overrides struct {
-	ServerURL         string
+	ServerURL          string
-	WorkspacesRoot    string
+	WorkspacesRoot     string
-	PollInterval      time.Duration
+	PollInterval       time.Duration
-	HeartbeatInterval time.Duration
+	HeartbeatInterval  time.Duration
-	AgentTimeout      time.Duration
+	AgentTimeout       time.Duration
-	DaemonID          string
+	MaxConcurrentTasks int
-	DeviceName        string
+	DaemonID           string
-	RuntimeName       string
+	DeviceName         string
 	RuntimeName        string
 }
 // LoadConfig builds the daemon configuration from environment variables
@ -112,6 +115,14 @@ func LoadConfig(overrides Overrides) (Config, error) {
 		agentTimeout = overrides.AgentTimeout
 	}
 	maxConcurrentTasks, err := intFromEnv("MULTICA_DAEMON_MAX_CONCURRENT_TASKS", DefaultMaxConcurrentTasks)
 	if err != nil {
 		return Config{}, err
 	}
 	if overrides.MaxConcurrentTasks > 0 {
 		maxConcurrentTasks = overrides.MaxConcurrentTasks
 	}
 	// String overrides
 	daemonID := envOrDefault("MULTICA_DAEMON_ID", host)
 	if overrides.DaemonID != "" {
@ -149,17 +160,18 @@ func LoadConfig(overrides Overrides) (Config, error) {
 	keepEnv := os.Getenv("MULTICA_KEEP_ENV_AFTER_TASK") == "true" || os.Getenv("MULTICA_KEEP_ENV_AFTER_TASK") == "1"
 	return Config{
-		ServerBaseURL:     serverBaseURL,
+		ServerBaseURL:      serverBaseURL,
-		DaemonID:          daemonID,
+		DaemonID:           daemonID,
-		DeviceName:        deviceName,
+		DeviceName:         deviceName,
-		RuntimeName:       runtimeName,
+		RuntimeName:        runtimeName,
-		Agents:            agents,
+		Agents:             agents,
-		WorkspacesRoot:    workspacesRoot,
+		WorkspacesRoot:     workspacesRoot,
-		KeepEnvAfterTask:  keepEnv,
+		KeepEnvAfterTask:   keepEnv,
-		HealthPort:        DefaultHealthPort,
+		HealthPort:         DefaultHealthPort,
-		PollInterval:      pollInterval,
+		MaxConcurrentTasks: maxConcurrentTasks,
-		HeartbeatInterval: heartbeatInterval,
+		PollInterval:       pollInterval,
-		AgentTimeout:      agentTimeout,
+		HeartbeatInterval:  heartbeatInterval,
 		AgentTimeout:       agentTimeout,
 	}, nil
 }
--- a/server/internal/daemon/daemon.go
+++ b/server/internal/daemon/daemon.go
@ -503,11 +503,22 @@ func (d *Daemon) usageScanLoop(ctx context.Context) {
 }
 func (d *Daemon) pollLoop(ctx context.Context) error {
 	sem := make(chan struct{}, d.cfg.MaxConcurrentTasks)
 	var wg sync.WaitGroup
 	pollOffset := 0
 	pollCount := 0
 	for {
 		select {
 		case <-ctx.Done():
 			d.logger.Info("poll loop stopping, waiting for in-flight tasks", "max_wait", "30s")
 			waitDone := make(chan struct{})
 			go func() { wg.Wait(); close(waitDone) }()
 			select {
 			case <-waitDone:
 			case <-time.After(30 * time.Second):
 				d.logger.Warn("timed out waiting for in-flight tasks")
 			}
 			return ctx.Err()
 		default:
 		}
@ -515,6 +526,7 @@ func (d *Daemon) pollLoop(ctx context.Context) error {
 		runtimeIDs := d.allRuntimeIDs()
 		if len(runtimeIDs) == 0 {
 			if err := sleepWithContext(ctx, d.cfg.PollInterval); err != nil {
 				wg.Wait()
 				return err
 			}
 			continue
@ -523,21 +535,40 @@ func (d *Daemon) pollLoop(ctx context.Context) error {
 		claimed := false
 		n := len(runtimeIDs)
 		for i := 0; i < n; i++ {
 			// Check if we have capacity before claiming.
 			select {
 			case sem <- struct{}{}:
 				// Acquired a slot.
 			default:
 				// All slots occupied, stop trying to claim.
 				d.logger.Debug("poll: at capacity", "running", d.cfg.MaxConcurrentTasks)
 				goto sleep
 			}
 			rid := runtimeIDs[(pollOffset+i)%n]
 			task, err := d.client.ClaimTask(ctx, rid)
 			if err != nil {
 				<-sem // Release the slot.
 				d.logger.Warn("claim task failed", "runtime_id", rid, "error", err)
 				continue
 			}
 			if task != nil {
 				d.logger.Info("task received", "task_id", task.ID, "issue_id", task.IssueID)
-				d.handleTask(ctx, *task)
+				wg.Add(1)
 				go func(t Task) {
 					defer wg.Done()
 					defer func() { <-sem }()
 					d.handleTask(ctx, t)
 				}(*task)
 				claimed = true
 				pollOffset = (pollOffset + i + 1) % n
 				break
 			}
 			// No task for this runtime, release the slot and try next.
 			<-sem
 		}
 	sleep:
 		if !claimed {
 			pollCount++
 			if pollCount%20 == 1 {
@ -545,6 +576,7 @@ func (d *Daemon) pollLoop(ctx context.Context) error {
 			}
 			pollOffset = (pollOffset + 1) % n
 			if err := sleepWithContext(ctx, d.cfg.PollInterval); err != nil {
 				wg.Wait()
 				return err
 			}
 		} else {
@ -562,6 +594,9 @@ func (d *Daemon) handleTask(ctx context.Context, task Task) {
 	if err := d.client.StartTask(ctx, task.ID); err != nil {
 		d.logger.Error("start task failed", "task_id", task.ID, "error", err)
 		if failErr := d.client.FailTask(ctx, task.ID, fmt.Sprintf("start task failed: %s", err.Error())); failErr != nil {
 			d.logger.Error("fail task after start error", "task_id", task.ID, "error", failErr)
 		}
 		return
 	}
@ -594,7 +629,10 @@ func (d *Daemon) handleTask(ctx context.Context, task Task) {
 	default:
 		d.logger.Info("task completed", "task_id", task.ID, "status", result.Status)
 		if err := d.client.CompleteTask(ctx, task.ID, result.Comment, result.BranchName, result.SessionID, result.WorkDir); err != nil {
-			d.logger.Error("complete task failed", "task_id", task.ID, "error", err)
+			d.logger.Error("complete task failed, falling back to fail", "task_id", task.ID, "error", err)
 			if failErr := d.client.FailTask(ctx, task.ID, fmt.Sprintf("complete task failed: %s", err.Error())); failErr != nil {
 				d.logger.Error("fail task fallback also failed", "task_id", task.ID, "error", failErr)
 			}
 		}
 	}
 }
--- a/server/internal/daemon/helpers.go
+++ b/server/internal/daemon/helpers.go
@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"os"
 	"strconv"
 	"strings"
 	"time"
 )
@ -28,6 +29,18 @@ func durationFromEnv(key string, fallback time.Duration) (time.Duration, error)
 	return d, nil
 }
 func intFromEnv(key string, fallback int) (int, error) {
 	value := strings.TrimSpace(os.Getenv(key))
 	if value == "" {
 		return fallback, nil
 	}
 	n, err := strconv.Atoi(value)
 	if err != nil {
 		return 0, fmt.Errorf("%s: invalid integer %q: %w", key, value, err)
 	}
 	return n, nil
 }
 func sleepWithContext(ctx context.Context, d time.Duration) error {
 	timer := time.NewTimer(d)
 	defer timer.Stop()
--- a/server/internal/handler/agent.go
+++ b/server/internal/handler/agent.go
@ -251,7 +251,7 @@ func (h *Handler) CreateAgent(w http.ResponseWriter, r *http.Request) {
 		req.Visibility = "workspace"
 	}
 	if req.MaxConcurrentTasks == 0 {
-		req.MaxConcurrentTasks = 1
+		req.MaxConcurrentTasks = 6
 	}
 	runtime, err := h.Queries.GetAgentRuntimeForWorkspace(r.Context(), db.GetAgentRuntimeForWorkspaceParams{
--- a/server/internal/service/task.go
+++ b/server/internal/service/task.go
@ -209,7 +209,7 @@ func (s *TaskService) FailTask(ctx context.Context, taskID pgtype.UUID, errMsg s
 	})
 	if err != nil {
 		if existing, lookupErr := s.Queries.GetAgentTask(ctx, taskID); lookupErr == nil {
-			slog.Warn("fail task failed: task not in running state",
+			slog.Warn("fail task failed: task not in dispatched/running state",
 				"task_id", util.UUIDToString(taskID),
 				"current_status", existing.Status,
 				"issue_id", util.UUIDToString(existing.IssueID),
--- a/server/migrations/023_agent_concurrency_default.down.sql
+++ b/server/migrations/023_agent_concurrency_default.down.sql
@ -0,0 +1 @@
 ALTER TABLE agent ALTER COLUMN max_concurrent_tasks SET DEFAULT 1;
--- a/server/migrations/023_agent_concurrency_default.up.sql
+++ b/server/migrations/023_agent_concurrency_default.up.sql
@ -0,0 +1,2 @@
 ALTER TABLE agent ALTER COLUMN max_concurrent_tasks SET DEFAULT 6;
 UPDATE agent SET max_concurrent_tasks = 6 WHERE max_concurrent_tasks = 1;
--- a/server/pkg/db/generated/agent.sql.go
+++ b/server/pkg/db/generated/agent.sql.go
@ -229,7 +229,7 @@ func (q *Queries) DeleteAgent(ctx context.Context, id pgtype.UUID) error {
 const failAgentTask = `-- name: FailAgentTask :one
 UPDATE agent_task_queue
 SET status = 'failed', completed_at = now(), error = $2
-WHERE id = $1 AND status = 'running'
+WHERE id = $1 AND status IN ('dispatched', 'running')
 RETURNING id, agent_id, issue_id, status, priority, dispatched_at, started_at, completed_at, result, error, created_at, context, runtime_id, session_id, work_dir
 `
@ -261,6 +261,48 @@ func (q *Queries) FailAgentTask(ctx context.Context, arg FailAgentTaskParams) (A
 	return i, err
 }
 const failStaleTasks = `-- name: FailStaleTasks :many
 UPDATE agent_task_queue
 SET status = 'failed', completed_at = now(), error = 'task timed out'
 WHERE (status = 'dispatched' AND dispatched_at < now() - make_interval(secs => $1::double precision))
   OR (status = 'running' AND started_at < now() - make_interval(secs => $2::double precision))
 RETURNING id, agent_id, issue_id
 `
 type FailStaleTasksParams struct {
 	DispatchTimeoutSecs float64 `json:"dispatch_timeout_secs"`
 	RunningTimeoutSecs  float64 `json:"running_timeout_secs"`
 }
 type FailStaleTasksRow struct {
 	ID      pgtype.UUID `json:"id"`
 	AgentID pgtype.UUID `json:"agent_id"`
 	IssueID pgtype.UUID `json:"issue_id"`
 }
 // Fails tasks stuck in dispatched/running beyond the given thresholds.
 // Handles cases where the daemon is alive but the task is orphaned
 // (e.g. agent process hung, daemon failed to report completion).
 func (q *Queries) FailStaleTasks(ctx context.Context, arg FailStaleTasksParams) ([]FailStaleTasksRow, error) {
 	rows, err := q.db.Query(ctx, failStaleTasks, arg.DispatchTimeoutSecs, arg.RunningTimeoutSecs)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
 	items := []FailStaleTasksRow{}
 	for rows.Next() {
 		var i FailStaleTasksRow
 		if err := rows.Scan(&i.ID, &i.AgentID, &i.IssueID); err != nil {
 			return nil, err
 		}
 		items = append(items, i)
 	}
 	if err := rows.Err(); err != nil {
 		return nil, err
 	}
 	return items, nil
 }
 const getAgent = `-- name: GetAgent :one
 SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, tools, triggers, runtime_id, instructions FROM agent
 WHERE id = $1
--- a/server/pkg/db/queries/agent.sql
+++ b/server/pkg/db/queries/agent.sql
@ -90,9 +90,19 @@ LIMIT 1;
 -- name: FailAgentTask :one
 UPDATE agent_task_queue
 SET status = 'failed', completed_at = now(), error = $2
-WHERE id = $1 AND status = 'running'
+WHERE id = $1 AND status IN ('dispatched', 'running')
 RETURNING *;
 -- name: FailStaleTasks :many
 -- Fails tasks stuck in dispatched/running beyond the given thresholds.
 -- Handles cases where the daemon is alive but the task is orphaned
 -- (e.g. agent process hung, daemon failed to report completion).
 UPDATE agent_task_queue
 SET status = 'failed', completed_at = now(), error = 'task timed out'
 WHERE (status = 'dispatched' AND dispatched_at < now() - make_interval(secs => @dispatch_timeout_secs::double precision))
   OR (status = 'running' AND started_at < now() - make_interval(secs => @running_timeout_secs::double precision))
 RETURNING id, agent_id, issue_id;
 -- name: CountRunningTasks :one
 SELECT count(*) FROM agent_task_queue
 WHERE agent_id = $1 AND status IN ('dispatched', 'running');
		`@ -0,0 +1 @@`
							`ALTER TABLE agent ALTER COLUMN max_concurrent_tasks SET DEFAULT 1;`
		`@ -0,0 +1,2 @@`
							`ALTER TABLE agent ALTER COLUMN max_concurrent_tasks SET DEFAULT 6;`
							`UPDATE agent SET max_concurrent_tasks = 6 WHERE max_concurrent_tasks = 1;`