Auto-heal missing CLI listener socket (#679)

* Auto-heal missing CLI socket listener

* Add Sentry socket listener breadcrumbs and failure capture
This commit is contained in:
Lawrence Chen 2026-02-28 01:19:38 -08:00 committed by GitHub
parent be89812bea
commit 168e6b9b25
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 348 additions and 7 deletions

View file

@ -1099,6 +1099,10 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent
private var didAttemptStartupSessionRestore = false
private var isApplyingStartupSessionRestore = false
private var sessionAutosaveTimer: DispatchSourceTimer?
private var socketListenerHealthTimer: DispatchSourceTimer?
private static let socketListenerHealthCheckInterval: DispatchTimeInterval = .seconds(5)
private var lastSocketListenerUnhealthyCaptureAt: Date = .distantPast
private static let socketListenerUnhealthyCaptureCooldown: TimeInterval = 60
private let sessionPersistenceQueue = DispatchQueue(
label: "com.cmuxterm.app.sessionPersistence",
qos: .utility
@ -1354,6 +1358,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent
isTerminatingApp = true
_ = saveSessionSnapshot(includeScrollback: true, removeWhenEmpty: false)
stopSessionAutosaveTimer()
stopSocketListenerHealthMonitor()
TerminalController.shared.stop()
BrowserHistoryStore.shared.flushPendingSaves()
if TelemetrySettings.enabledForCurrentLaunch {
@ -1381,6 +1386,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent
installLifecycleSnapshotObserversIfNeeded()
prepareStartupSessionSnapshotIfNeeded()
startSessionAutosaveTimerIfNeeded()
startSocketListenerHealthMonitorIfNeeded()
#if DEBUG
setupJumpUnreadUITestIfNeeded()
setupGotoSplitUITestIfNeeded()
@ -1991,6 +1997,58 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent
TerminalController.shared.start(tabManager: tabManager, socketPath: config.path, accessMode: config.mode)
}
private func startSocketListenerHealthMonitorIfNeeded() {
guard socketListenerHealthTimer == nil else { return }
let timer = DispatchSource.makeTimerSource(queue: .main)
timer.schedule(
deadline: .now() + Self.socketListenerHealthCheckInterval,
repeating: Self.socketListenerHealthCheckInterval
)
timer.setEventHandler { [weak self] in
Task { @MainActor [weak self] in
self?.restartSocketListenerIfNeededForHealthCheck(source: "health.timer")
}
}
timer.resume()
socketListenerHealthTimer = timer
}
private func stopSocketListenerHealthMonitor() {
socketListenerHealthTimer?.cancel()
socketListenerHealthTimer = nil
}
private func restartSocketListenerIfNeededForHealthCheck(source: String) {
guard let config = socketListenerConfigurationIfEnabled() else { return }
let health = TerminalController.shared.socketListenerHealth(expectedSocketPath: config.path)
guard !health.isHealthy else {
lastSocketListenerUnhealthyCaptureAt = .distantPast
return
}
let failureSignals = health.failureSignals
let data: [String: Any] = [
"source": source,
"path": config.path,
"isRunning": health.isRunning ? 1 : 0,
"acceptLoopAlive": health.acceptLoopAlive ? 1 : 0,
"socketPathMatches": health.socketPathMatches ? 1 : 0,
"socketPathExists": health.socketPathExists ? 1 : 0,
"failureSignals": failureSignals
]
sentryBreadcrumb("socket.listener.unhealthy", category: "socket", data: data)
let now = Date()
if now.timeIntervalSince(lastSocketListenerUnhealthyCaptureAt) >= Self.socketListenerUnhealthyCaptureCooldown {
lastSocketListenerUnhealthyCaptureAt = now
sentryCaptureWarning(
"socket.listener.unhealthy",
category: "socket",
data: data,
contextKey: "socket_listener_health"
)
}
restartSocketListenerIfEnabled(source: source)
}
private func disableSuddenTerminationIfNeeded() {
guard !didDisableSuddenTermination else { return }
ProcessInfo.processInfo.disableSuddenTermination()

View file

@ -8,3 +8,38 @@ func sentryBreadcrumb(_ message: String, category: String = "ui", data: [String:
crumb.data = data
SentrySDK.addBreadcrumb(crumb)
}
private func sentryCaptureMessage(
_ message: String,
level: SentryLevel,
category: String,
data: [String: Any]?,
contextKey: String?
) {
guard TelemetrySettings.enabledForCurrentLaunch else { return }
_ = SentrySDK.capture(message: message) { scope in
scope.setLevel(level)
scope.setTag(value: category, key: "category")
if let data {
scope.setContext(value: data, key: contextKey ?? category)
}
}
}
func sentryCaptureWarning(
_ message: String,
category: String = "ui",
data: [String: Any]? = nil,
contextKey: String? = nil
) {
sentryCaptureMessage(message, level: .warning, category: category, data: data, contextKey: contextKey)
}
func sentryCaptureError(
_ message: String,
category: String = "ui",
data: [String: Any]? = nil,
contextKey: String? = nil
) {
sentryCaptureMessage(message, level: .error, category: category, data: data, contextKey: contextKey)
}

View file

@ -8,6 +8,26 @@ import WebKit
/// Allows automated testing and external control of terminal tabs
@MainActor
class TerminalController {
struct SocketListenerHealth: Sendable {
let isRunning: Bool
let acceptLoopAlive: Bool
let socketPathMatches: Bool
let socketPathExists: Bool
var failureSignals: [String] {
var signals: [String] = []
if !isRunning { signals.append("not_running") }
if !acceptLoopAlive { signals.append("accept_loop_dead") }
if !socketPathMatches { signals.append("socket_path_mismatch") }
if !socketPathExists { signals.append("socket_missing") }
return signals
}
var isHealthy: Bool {
failureSignals.isEmpty
}
}
static let shared = TerminalController()
private nonisolated(unsafe) var socketPath = "/tmp/cmux.sock"
@ -344,6 +364,39 @@ class TerminalController {
return info.kp_eproc.e_ppid
}
private nonisolated func socketListenerEventData(
stage: String,
errnoCode: Int32? = nil,
extra: [String: Any] = [:]
) -> [String: Any] {
var data: [String: Any] = [
"stage": stage,
"path": socketPath,
"isRunning": isRunning ? 1 : 0,
"acceptLoopAlive": acceptLoopAlive ? 1 : 0,
"serverSocket": Int(serverSocket)
]
if let errnoCode {
data["errno"] = Int(errnoCode)
data["errnoDescription"] = String(cString: strerror(errnoCode))
}
for (key, value) in extra {
data[key] = value
}
return data
}
private nonisolated func reportSocketListenerFailure(
message: String,
stage: String,
errnoCode: Int32? = nil,
extra: [String: Any] = [:]
) {
let data = socketListenerEventData(stage: stage, errnoCode: errnoCode, extra: extra)
sentryBreadcrumb(message, category: "socket", data: data)
sentryCaptureError(message, category: "socket", data: data, contextKey: "socket_listener")
}
func start(tabManager: TabManager, socketPath: String, accessMode: SocketControlMode) {
self.tabManager = tabManager
self.accessMode = accessMode
@ -365,7 +418,13 @@ class TerminalController {
// Create socket
serverSocket = socket(AF_UNIX, SOCK_STREAM, 0)
guard serverSocket >= 0 else {
let errnoCode = errno
print("TerminalController: Failed to create socket")
reportSocketListenerFailure(
message: "socket.listener.start.failed",
stage: "create_socket",
errnoCode: errnoCode
)
return
}
@ -386,8 +445,14 @@ class TerminalController {
}
guard bindResult >= 0 else {
let errnoCode = errno
print("TerminalController: Failed to bind socket")
close(serverSocket)
reportSocketListenerFailure(
message: "socket.listener.start.failed",
stage: "bind",
errnoCode: errnoCode
)
return
}
@ -395,13 +460,27 @@ class TerminalController {
// Listen
guard listen(serverSocket, 5) >= 0 else {
let errnoCode = errno
print("TerminalController: Failed to listen on socket")
close(serverSocket)
reportSocketListenerFailure(
message: "socket.listener.start.failed",
stage: "listen",
errnoCode: errnoCode
)
return
}
isRunning = true
print("TerminalController: Listening on \(socketPath)")
sentryBreadcrumb(
"socket.listener.listening",
category: "socket",
data: [
"path": socketPath,
"mode": accessMode.rawValue
]
)
// Wire batched port scanner results back to workspace state.
PortScanner.shared.onPortsUpdated = { [weak self] workspaceId, panelId, ports in
@ -428,6 +507,22 @@ class TerminalController {
}
}
nonisolated func socketListenerHealth(expectedSocketPath: String) -> SocketListenerHealth {
let running = isRunning
let loopAlive = acceptLoopAlive
let pathMatches = socketPath == expectedSocketPath
var st = stat()
let exists = lstat(expectedSocketPath, &st) == 0 && (st.st_mode & S_IFMT) == S_IFSOCK
return SocketListenerHealth(
isRunning: running,
acceptLoopAlive: loopAlive,
socketPathMatches: pathMatches,
socketPathExists: exists
)
}
nonisolated func stop() {
isRunning = false
if serverSocket >= 0 {
@ -440,7 +535,17 @@ class TerminalController {
private func applySocketPermissions() {
let permissions = mode_t(accessMode.socketFilePermissions)
if chmod(socketPath, permissions) != 0 {
let errnoCode = errno
print("TerminalController: Failed to set socket permissions to \(String(permissions, radix: 8)) for \(socketPath)")
sentryBreadcrumb(
"socket.listener.permissions.failed",
category: "socket",
data: socketListenerEventData(
stage: "chmod",
errnoCode: errnoCode,
extra: ["permissions": String(permissions, radix: 8)]
)
)
}
}
@ -537,9 +642,34 @@ class TerminalController {
private nonisolated func acceptLoop() {
acceptLoopAlive = true
sentryBreadcrumb(
"socket.listener.accept_loop.started",
category: "socket",
data: socketListenerEventData(stage: "accept_loop_start")
)
var exitReason = "stopped"
var lastAcceptErrno: Int32?
defer {
if isRunning && exitReason == "stopped" {
exitReason = "unexpected_loop_exit"
}
let shouldCaptureExit = exitReason != "stopped"
acceptLoopAlive = false
isRunning = false
if shouldCaptureExit {
let data = socketListenerEventData(
stage: "accept_loop_exit",
errnoCode: lastAcceptErrno,
extra: ["reason": exitReason]
)
sentryBreadcrumb("socket.listener.accept_loop.exited", category: "socket", data: data)
sentryCaptureError(
"socket.listener.accept_loop.exited",
category: "socket",
data: data,
contextKey: "socket_listener"
)
}
}
var consecutiveFailures = 0
@ -555,10 +685,24 @@ class TerminalController {
guard clientSocket >= 0 else {
if isRunning {
let errnoCode = errno
lastAcceptErrno = errnoCode
consecutiveFailures += 1
print("TerminalController: Accept failed (\(consecutiveFailures) consecutive)")
if consecutiveFailures == 1 || consecutiveFailures % 10 == 0 {
sentryBreadcrumb(
"socket.listener.accept.failed",
category: "socket",
data: socketListenerEventData(
stage: "accept",
errnoCode: errnoCode,
extra: ["consecutiveFailures": consecutiveFailures]
)
)
}
if consecutiveFailures >= 50 {
print("TerminalController: Too many consecutive accept failures, exiting accept loop")
exitReason = "too_many_accept_failures"
break
}
usleep(10_000) // 10ms backoff

View file

@ -8065,3 +8065,107 @@ final class GhosttyTerminalViewVisibilityPolicyTests: XCTestCase {
)
}
}
final class TerminalControllerSocketListenerHealthTests: XCTestCase {
private func makeTempSocketPath() -> String {
"/tmp/cmux-socket-health-\(UUID().uuidString).sock"
}
private func bindUnixSocket(at path: String) throws -> Int32 {
unlink(path)
let fd = socket(AF_UNIX, SOCK_STREAM, 0)
guard fd >= 0 else {
throw NSError(
domain: NSPOSIXErrorDomain,
code: Int(errno),
userInfo: [NSLocalizedDescriptionKey: "Failed to create Unix socket"]
)
}
var addr = sockaddr_un()
addr.sun_family = sa_family_t(AF_UNIX)
path.withCString { ptr in
withUnsafeMutablePointer(to: &addr.sun_path) { pathPtr in
let pathBuf = UnsafeMutableRawPointer(pathPtr).assumingMemoryBound(to: CChar.self)
strcpy(pathBuf, ptr)
}
}
let bindResult = withUnsafePointer(to: &addr) { ptr in
ptr.withMemoryRebound(to: sockaddr.self, capacity: 1) { sockaddrPtr in
Darwin.bind(fd, sockaddrPtr, socklen_t(MemoryLayout<sockaddr_un>.size))
}
}
guard bindResult == 0 else {
let code = Int(errno)
Darwin.close(fd)
throw NSError(
domain: NSPOSIXErrorDomain,
code: code,
userInfo: [NSLocalizedDescriptionKey: "Failed to bind Unix socket"]
)
}
guard Darwin.listen(fd, 1) == 0 else {
let code = Int(errno)
Darwin.close(fd)
throw NSError(
domain: NSPOSIXErrorDomain,
code: code,
userInfo: [NSLocalizedDescriptionKey: "Failed to listen on Unix socket"]
)
}
return fd
}
func testSocketListenerHealthRecognizesSocketPath() throws {
let path = makeTempSocketPath()
let fd = try bindUnixSocket(at: path)
defer {
Darwin.close(fd)
unlink(path)
}
let health = TerminalController.shared.socketListenerHealth(expectedSocketPath: path)
XCTAssertTrue(health.socketPathExists)
XCTAssertFalse(health.isHealthy)
}
func testSocketListenerHealthRejectsRegularFile() throws {
let path = makeTempSocketPath()
let url = URL(fileURLWithPath: path)
try "not-a-socket".write(to: url, atomically: true, encoding: .utf8)
defer { try? FileManager.default.removeItem(at: url) }
let health = TerminalController.shared.socketListenerHealth(expectedSocketPath: path)
XCTAssertFalse(health.socketPathExists)
XCTAssertFalse(health.isHealthy)
}
func testSocketListenerHealthFailureSignalsAreEmptyWhenHealthy() {
let health = TerminalController.SocketListenerHealth(
isRunning: true,
acceptLoopAlive: true,
socketPathMatches: true,
socketPathExists: true
)
XCTAssertTrue(health.isHealthy)
XCTAssertEqual(health.failureSignals, [])
}
func testSocketListenerHealthFailureSignalsIncludeAllDetectedProblems() {
let health = TerminalController.SocketListenerHealth(
isRunning: false,
acceptLoopAlive: false,
socketPathMatches: false,
socketPathExists: false
)
XCTAssertFalse(health.isHealthy)
XCTAssertEqual(
health.failureSignals,
["not_running", "accept_loop_dead", "socket_path_mismatch", "socket_missing"]
)
}
}

View file

@ -81,26 +81,26 @@ def main() -> int:
)
require(
app_delegate,
"let mode = SocketControlSettings.effectiveMode(userMode: userMode)",
"`restartSocketListener` no longer uses effective socket control mode",
"private func socketListenerConfigurationIfEnabled() -> (mode: SocketControlMode, path: String)? {",
"Missing shared socket listener configuration helper",
failures,
)
require(
app_delegate,
"let socketPath = SocketControlSettings.socketPath()",
"`restartSocketListener` no longer uses configured socket path",
'restartSocketListenerIfEnabled(source: "menu.command")',
"`restartSocketListener` no longer delegates to restart helper",
failures,
)
require(
app_delegate,
"TerminalController.shared.stop()",
"`restartSocketListener` no longer stops current listener before restart",
"`restartSocketListenerIfEnabled` no longer stops current listener before restart",
failures,
)
require(
app_delegate,
"TerminalController.shared.start(tabManager: tabManager, socketPath: socketPath, accessMode: mode)",
"`restartSocketListener` no longer starts listener with current settings",
"TerminalController.shared.start(tabManager: tabManager, socketPath: config.path, accessMode: config.mode)",
"`restartSocketListenerIfEnabled` no longer starts listener with current settings",
failures,
)