multica/server/internal/daemon/health.go
LinYushen 6d2a0b45d2
refactor: decouple task lifecycle from issue status (#151)
* refactor: decouple task lifecycle from issue status, add daemon health server

- Remove automatic issue status changes from StartTask (in_progress),
  CompleteTask (in_review), and FailTask (blocked) in task service.
  Issue status is now fully managed by the agent via `multica issue status`.
- Update agent prompt and meta skill to instruct agents to manage issue
  status themselves (in_progress → done/in_review/blocked).
- Add daemon health HTTP server on 127.0.0.1:19514 with /health endpoint
  exposing pid, uptime, agents, and workspaces. Fail fast if port is taken
  (another daemon already running).
- Update `multica status` to check both server and daemon health.
- Add Save button to repos section in workspace settings UI.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* refactor(daemon): simplify prompt, fix runtime config path, improve task error logging

- Slim down BuildPrompt to a minimal hint; detailed workflow now lives in CLAUDE.md/AGENTS.md
- Write CLAUDE.md to workDir root instead of .claude/CLAUDE.md
- Fix git-exclude pattern (.claude → CLAUDE.md)
- Decouple task queue reconciliation from issue status changes (agents manage status via CLI)
- Add diagnostic logging when CompleteTask/FailTask fail due to unexpected task state

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(task): use task_completed/task_failed inbox notification types

FailTask was sending "agent_blocked" which conflates agent crash with
issue-level blocked status. Align notification types with the new
decoupled model: task_completed and task_failed. Update frontend types
and labels accordingly.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 18:30:21 +08:00

87 lines
2.3 KiB
Go

package daemon
import (
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"os"
"time"
)
// HealthResponse is returned by the daemon's local health endpoint.
type HealthResponse struct {
Status string `json:"status"`
PID int `json:"pid"`
Uptime string `json:"uptime"`
DaemonID string `json:"daemon_id"`
DeviceName string `json:"device_name"`
ServerURL string `json:"server_url"`
Agents []string `json:"agents"`
Workspaces []healthWorkspace `json:"workspaces"`
}
type healthWorkspace struct {
ID string `json:"id"`
Runtimes []string `json:"runtimes"`
}
// listenHealth binds the health port. Returns the listener or an error if
// another daemon is already running (port taken).
func (d *Daemon) listenHealth() (net.Listener, error) {
addr := fmt.Sprintf("127.0.0.1:%d", d.cfg.HealthPort)
ln, err := net.Listen("tcp", addr)
if err != nil {
return nil, fmt.Errorf("another daemon is already running on %s: %w", addr, err)
}
return ln, nil
}
// serveHealth runs the health HTTP server on the given listener.
// Blocks until ctx is cancelled.
func (d *Daemon) serveHealth(ctx context.Context, ln net.Listener, startedAt time.Time) {
mux := http.NewServeMux()
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
d.mu.Lock()
var wsList []healthWorkspace
for id, ws := range d.workspaces {
wsList = append(wsList, healthWorkspace{
ID: id,
Runtimes: ws.runtimeIDs,
})
}
d.mu.Unlock()
agents := make([]string, 0, len(d.cfg.Agents))
for name := range d.cfg.Agents {
agents = append(agents, name)
}
resp := HealthResponse{
Status: "running",
PID: os.Getpid(),
Uptime: time.Since(startedAt).Truncate(time.Second).String(),
DaemonID: d.cfg.DaemonID,
DeviceName: d.cfg.DeviceName,
ServerURL: d.cfg.ServerBaseURL,
Agents: agents,
Workspaces: wsList,
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(resp)
})
srv := &http.Server{Handler: mux}
go func() {
<-ctx.Done()
srv.Close()
}()
d.logger.Info("health server listening", "addr", ln.Addr().String())
if err := srv.Serve(ln); err != nil && err != http.ErrServerClosed {
d.logger.Warn("health server error", "error", err)
}
}