Auto-heal missing CLI listener socket (#679)
* Auto-heal missing CLI socket listener * Add Sentry socket listener breadcrumbs and failure capture
This commit is contained in:
parent
be89812bea
commit
168e6b9b25
5 changed files with 348 additions and 7 deletions
|
|
@ -1099,6 +1099,10 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent
|
|||
private var didAttemptStartupSessionRestore = false
|
||||
private var isApplyingStartupSessionRestore = false
|
||||
private var sessionAutosaveTimer: DispatchSourceTimer?
|
||||
private var socketListenerHealthTimer: DispatchSourceTimer?
|
||||
private static let socketListenerHealthCheckInterval: DispatchTimeInterval = .seconds(5)
|
||||
private var lastSocketListenerUnhealthyCaptureAt: Date = .distantPast
|
||||
private static let socketListenerUnhealthyCaptureCooldown: TimeInterval = 60
|
||||
private let sessionPersistenceQueue = DispatchQueue(
|
||||
label: "com.cmuxterm.app.sessionPersistence",
|
||||
qos: .utility
|
||||
|
|
@ -1354,6 +1358,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent
|
|||
isTerminatingApp = true
|
||||
_ = saveSessionSnapshot(includeScrollback: true, removeWhenEmpty: false)
|
||||
stopSessionAutosaveTimer()
|
||||
stopSocketListenerHealthMonitor()
|
||||
TerminalController.shared.stop()
|
||||
BrowserHistoryStore.shared.flushPendingSaves()
|
||||
if TelemetrySettings.enabledForCurrentLaunch {
|
||||
|
|
@ -1381,6 +1386,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent
|
|||
installLifecycleSnapshotObserversIfNeeded()
|
||||
prepareStartupSessionSnapshotIfNeeded()
|
||||
startSessionAutosaveTimerIfNeeded()
|
||||
startSocketListenerHealthMonitorIfNeeded()
|
||||
#if DEBUG
|
||||
setupJumpUnreadUITestIfNeeded()
|
||||
setupGotoSplitUITestIfNeeded()
|
||||
|
|
@ -1991,6 +1997,58 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent
|
|||
TerminalController.shared.start(tabManager: tabManager, socketPath: config.path, accessMode: config.mode)
|
||||
}
|
||||
|
||||
private func startSocketListenerHealthMonitorIfNeeded() {
|
||||
guard socketListenerHealthTimer == nil else { return }
|
||||
let timer = DispatchSource.makeTimerSource(queue: .main)
|
||||
timer.schedule(
|
||||
deadline: .now() + Self.socketListenerHealthCheckInterval,
|
||||
repeating: Self.socketListenerHealthCheckInterval
|
||||
)
|
||||
timer.setEventHandler { [weak self] in
|
||||
Task { @MainActor [weak self] in
|
||||
self?.restartSocketListenerIfNeededForHealthCheck(source: "health.timer")
|
||||
}
|
||||
}
|
||||
timer.resume()
|
||||
socketListenerHealthTimer = timer
|
||||
}
|
||||
|
||||
private func stopSocketListenerHealthMonitor() {
|
||||
socketListenerHealthTimer?.cancel()
|
||||
socketListenerHealthTimer = nil
|
||||
}
|
||||
|
||||
private func restartSocketListenerIfNeededForHealthCheck(source: String) {
|
||||
guard let config = socketListenerConfigurationIfEnabled() else { return }
|
||||
let health = TerminalController.shared.socketListenerHealth(expectedSocketPath: config.path)
|
||||
guard !health.isHealthy else {
|
||||
lastSocketListenerUnhealthyCaptureAt = .distantPast
|
||||
return
|
||||
}
|
||||
let failureSignals = health.failureSignals
|
||||
let data: [String: Any] = [
|
||||
"source": source,
|
||||
"path": config.path,
|
||||
"isRunning": health.isRunning ? 1 : 0,
|
||||
"acceptLoopAlive": health.acceptLoopAlive ? 1 : 0,
|
||||
"socketPathMatches": health.socketPathMatches ? 1 : 0,
|
||||
"socketPathExists": health.socketPathExists ? 1 : 0,
|
||||
"failureSignals": failureSignals
|
||||
]
|
||||
sentryBreadcrumb("socket.listener.unhealthy", category: "socket", data: data)
|
||||
let now = Date()
|
||||
if now.timeIntervalSince(lastSocketListenerUnhealthyCaptureAt) >= Self.socketListenerUnhealthyCaptureCooldown {
|
||||
lastSocketListenerUnhealthyCaptureAt = now
|
||||
sentryCaptureWarning(
|
||||
"socket.listener.unhealthy",
|
||||
category: "socket",
|
||||
data: data,
|
||||
contextKey: "socket_listener_health"
|
||||
)
|
||||
}
|
||||
restartSocketListenerIfEnabled(source: source)
|
||||
}
|
||||
|
||||
private func disableSuddenTerminationIfNeeded() {
|
||||
guard !didDisableSuddenTermination else { return }
|
||||
ProcessInfo.processInfo.disableSuddenTermination()
|
||||
|
|
|
|||
|
|
@ -8,3 +8,38 @@ func sentryBreadcrumb(_ message: String, category: String = "ui", data: [String:
|
|||
crumb.data = data
|
||||
SentrySDK.addBreadcrumb(crumb)
|
||||
}
|
||||
|
||||
private func sentryCaptureMessage(
|
||||
_ message: String,
|
||||
level: SentryLevel,
|
||||
category: String,
|
||||
data: [String: Any]?,
|
||||
contextKey: String?
|
||||
) {
|
||||
guard TelemetrySettings.enabledForCurrentLaunch else { return }
|
||||
_ = SentrySDK.capture(message: message) { scope in
|
||||
scope.setLevel(level)
|
||||
scope.setTag(value: category, key: "category")
|
||||
if let data {
|
||||
scope.setContext(value: data, key: contextKey ?? category)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sentryCaptureWarning(
|
||||
_ message: String,
|
||||
category: String = "ui",
|
||||
data: [String: Any]? = nil,
|
||||
contextKey: String? = nil
|
||||
) {
|
||||
sentryCaptureMessage(message, level: .warning, category: category, data: data, contextKey: contextKey)
|
||||
}
|
||||
|
||||
func sentryCaptureError(
|
||||
_ message: String,
|
||||
category: String = "ui",
|
||||
data: [String: Any]? = nil,
|
||||
contextKey: String? = nil
|
||||
) {
|
||||
sentryCaptureMessage(message, level: .error, category: category, data: data, contextKey: contextKey)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,26 @@ import WebKit
|
|||
/// Allows automated testing and external control of terminal tabs
|
||||
@MainActor
|
||||
class TerminalController {
|
||||
struct SocketListenerHealth: Sendable {
|
||||
let isRunning: Bool
|
||||
let acceptLoopAlive: Bool
|
||||
let socketPathMatches: Bool
|
||||
let socketPathExists: Bool
|
||||
|
||||
var failureSignals: [String] {
|
||||
var signals: [String] = []
|
||||
if !isRunning { signals.append("not_running") }
|
||||
if !acceptLoopAlive { signals.append("accept_loop_dead") }
|
||||
if !socketPathMatches { signals.append("socket_path_mismatch") }
|
||||
if !socketPathExists { signals.append("socket_missing") }
|
||||
return signals
|
||||
}
|
||||
|
||||
var isHealthy: Bool {
|
||||
failureSignals.isEmpty
|
||||
}
|
||||
}
|
||||
|
||||
static let shared = TerminalController()
|
||||
|
||||
private nonisolated(unsafe) var socketPath = "/tmp/cmux.sock"
|
||||
|
|
@ -344,6 +364,39 @@ class TerminalController {
|
|||
return info.kp_eproc.e_ppid
|
||||
}
|
||||
|
||||
private nonisolated func socketListenerEventData(
|
||||
stage: String,
|
||||
errnoCode: Int32? = nil,
|
||||
extra: [String: Any] = [:]
|
||||
) -> [String: Any] {
|
||||
var data: [String: Any] = [
|
||||
"stage": stage,
|
||||
"path": socketPath,
|
||||
"isRunning": isRunning ? 1 : 0,
|
||||
"acceptLoopAlive": acceptLoopAlive ? 1 : 0,
|
||||
"serverSocket": Int(serverSocket)
|
||||
]
|
||||
if let errnoCode {
|
||||
data["errno"] = Int(errnoCode)
|
||||
data["errnoDescription"] = String(cString: strerror(errnoCode))
|
||||
}
|
||||
for (key, value) in extra {
|
||||
data[key] = value
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
private nonisolated func reportSocketListenerFailure(
|
||||
message: String,
|
||||
stage: String,
|
||||
errnoCode: Int32? = nil,
|
||||
extra: [String: Any] = [:]
|
||||
) {
|
||||
let data = socketListenerEventData(stage: stage, errnoCode: errnoCode, extra: extra)
|
||||
sentryBreadcrumb(message, category: "socket", data: data)
|
||||
sentryCaptureError(message, category: "socket", data: data, contextKey: "socket_listener")
|
||||
}
|
||||
|
||||
func start(tabManager: TabManager, socketPath: String, accessMode: SocketControlMode) {
|
||||
self.tabManager = tabManager
|
||||
self.accessMode = accessMode
|
||||
|
|
@ -365,7 +418,13 @@ class TerminalController {
|
|||
// Create socket
|
||||
serverSocket = socket(AF_UNIX, SOCK_STREAM, 0)
|
||||
guard serverSocket >= 0 else {
|
||||
let errnoCode = errno
|
||||
print("TerminalController: Failed to create socket")
|
||||
reportSocketListenerFailure(
|
||||
message: "socket.listener.start.failed",
|
||||
stage: "create_socket",
|
||||
errnoCode: errnoCode
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -386,8 +445,14 @@ class TerminalController {
|
|||
}
|
||||
|
||||
guard bindResult >= 0 else {
|
||||
let errnoCode = errno
|
||||
print("TerminalController: Failed to bind socket")
|
||||
close(serverSocket)
|
||||
reportSocketListenerFailure(
|
||||
message: "socket.listener.start.failed",
|
||||
stage: "bind",
|
||||
errnoCode: errnoCode
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -395,13 +460,27 @@ class TerminalController {
|
|||
|
||||
// Listen
|
||||
guard listen(serverSocket, 5) >= 0 else {
|
||||
let errnoCode = errno
|
||||
print("TerminalController: Failed to listen on socket")
|
||||
close(serverSocket)
|
||||
reportSocketListenerFailure(
|
||||
message: "socket.listener.start.failed",
|
||||
stage: "listen",
|
||||
errnoCode: errnoCode
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
isRunning = true
|
||||
print("TerminalController: Listening on \(socketPath)")
|
||||
sentryBreadcrumb(
|
||||
"socket.listener.listening",
|
||||
category: "socket",
|
||||
data: [
|
||||
"path": socketPath,
|
||||
"mode": accessMode.rawValue
|
||||
]
|
||||
)
|
||||
|
||||
// Wire batched port scanner results back to workspace state.
|
||||
PortScanner.shared.onPortsUpdated = { [weak self] workspaceId, panelId, ports in
|
||||
|
|
@ -428,6 +507,22 @@ class TerminalController {
|
|||
}
|
||||
}
|
||||
|
||||
nonisolated func socketListenerHealth(expectedSocketPath: String) -> SocketListenerHealth {
|
||||
let running = isRunning
|
||||
let loopAlive = acceptLoopAlive
|
||||
let pathMatches = socketPath == expectedSocketPath
|
||||
|
||||
var st = stat()
|
||||
let exists = lstat(expectedSocketPath, &st) == 0 && (st.st_mode & S_IFMT) == S_IFSOCK
|
||||
|
||||
return SocketListenerHealth(
|
||||
isRunning: running,
|
||||
acceptLoopAlive: loopAlive,
|
||||
socketPathMatches: pathMatches,
|
||||
socketPathExists: exists
|
||||
)
|
||||
}
|
||||
|
||||
nonisolated func stop() {
|
||||
isRunning = false
|
||||
if serverSocket >= 0 {
|
||||
|
|
@ -440,7 +535,17 @@ class TerminalController {
|
|||
private func applySocketPermissions() {
|
||||
let permissions = mode_t(accessMode.socketFilePermissions)
|
||||
if chmod(socketPath, permissions) != 0 {
|
||||
let errnoCode = errno
|
||||
print("TerminalController: Failed to set socket permissions to \(String(permissions, radix: 8)) for \(socketPath)")
|
||||
sentryBreadcrumb(
|
||||
"socket.listener.permissions.failed",
|
||||
category: "socket",
|
||||
data: socketListenerEventData(
|
||||
stage: "chmod",
|
||||
errnoCode: errnoCode,
|
||||
extra: ["permissions": String(permissions, radix: 8)]
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -537,9 +642,34 @@ class TerminalController {
|
|||
|
||||
private nonisolated func acceptLoop() {
|
||||
acceptLoopAlive = true
|
||||
sentryBreadcrumb(
|
||||
"socket.listener.accept_loop.started",
|
||||
category: "socket",
|
||||
data: socketListenerEventData(stage: "accept_loop_start")
|
||||
)
|
||||
var exitReason = "stopped"
|
||||
var lastAcceptErrno: Int32?
|
||||
defer {
|
||||
if isRunning && exitReason == "stopped" {
|
||||
exitReason = "unexpected_loop_exit"
|
||||
}
|
||||
let shouldCaptureExit = exitReason != "stopped"
|
||||
acceptLoopAlive = false
|
||||
isRunning = false
|
||||
if shouldCaptureExit {
|
||||
let data = socketListenerEventData(
|
||||
stage: "accept_loop_exit",
|
||||
errnoCode: lastAcceptErrno,
|
||||
extra: ["reason": exitReason]
|
||||
)
|
||||
sentryBreadcrumb("socket.listener.accept_loop.exited", category: "socket", data: data)
|
||||
sentryCaptureError(
|
||||
"socket.listener.accept_loop.exited",
|
||||
category: "socket",
|
||||
data: data,
|
||||
contextKey: "socket_listener"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
var consecutiveFailures = 0
|
||||
|
|
@ -555,10 +685,24 @@ class TerminalController {
|
|||
|
||||
guard clientSocket >= 0 else {
|
||||
if isRunning {
|
||||
let errnoCode = errno
|
||||
lastAcceptErrno = errnoCode
|
||||
consecutiveFailures += 1
|
||||
print("TerminalController: Accept failed (\(consecutiveFailures) consecutive)")
|
||||
if consecutiveFailures == 1 || consecutiveFailures % 10 == 0 {
|
||||
sentryBreadcrumb(
|
||||
"socket.listener.accept.failed",
|
||||
category: "socket",
|
||||
data: socketListenerEventData(
|
||||
stage: "accept",
|
||||
errnoCode: errnoCode,
|
||||
extra: ["consecutiveFailures": consecutiveFailures]
|
||||
)
|
||||
)
|
||||
}
|
||||
if consecutiveFailures >= 50 {
|
||||
print("TerminalController: Too many consecutive accept failures, exiting accept loop")
|
||||
exitReason = "too_many_accept_failures"
|
||||
break
|
||||
}
|
||||
usleep(10_000) // 10ms backoff
|
||||
|
|
|
|||
|
|
@ -8065,3 +8065,107 @@ final class GhosttyTerminalViewVisibilityPolicyTests: XCTestCase {
|
|||
)
|
||||
}
|
||||
}
|
||||
|
||||
final class TerminalControllerSocketListenerHealthTests: XCTestCase {
|
||||
private func makeTempSocketPath() -> String {
|
||||
"/tmp/cmux-socket-health-\(UUID().uuidString).sock"
|
||||
}
|
||||
|
||||
private func bindUnixSocket(at path: String) throws -> Int32 {
|
||||
unlink(path)
|
||||
|
||||
let fd = socket(AF_UNIX, SOCK_STREAM, 0)
|
||||
guard fd >= 0 else {
|
||||
throw NSError(
|
||||
domain: NSPOSIXErrorDomain,
|
||||
code: Int(errno),
|
||||
userInfo: [NSLocalizedDescriptionKey: "Failed to create Unix socket"]
|
||||
)
|
||||
}
|
||||
|
||||
var addr = sockaddr_un()
|
||||
addr.sun_family = sa_family_t(AF_UNIX)
|
||||
path.withCString { ptr in
|
||||
withUnsafeMutablePointer(to: &addr.sun_path) { pathPtr in
|
||||
let pathBuf = UnsafeMutableRawPointer(pathPtr).assumingMemoryBound(to: CChar.self)
|
||||
strcpy(pathBuf, ptr)
|
||||
}
|
||||
}
|
||||
|
||||
let bindResult = withUnsafePointer(to: &addr) { ptr in
|
||||
ptr.withMemoryRebound(to: sockaddr.self, capacity: 1) { sockaddrPtr in
|
||||
Darwin.bind(fd, sockaddrPtr, socklen_t(MemoryLayout<sockaddr_un>.size))
|
||||
}
|
||||
}
|
||||
guard bindResult == 0 else {
|
||||
let code = Int(errno)
|
||||
Darwin.close(fd)
|
||||
throw NSError(
|
||||
domain: NSPOSIXErrorDomain,
|
||||
code: code,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Failed to bind Unix socket"]
|
||||
)
|
||||
}
|
||||
|
||||
guard Darwin.listen(fd, 1) == 0 else {
|
||||
let code = Int(errno)
|
||||
Darwin.close(fd)
|
||||
throw NSError(
|
||||
domain: NSPOSIXErrorDomain,
|
||||
code: code,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Failed to listen on Unix socket"]
|
||||
)
|
||||
}
|
||||
|
||||
return fd
|
||||
}
|
||||
|
||||
func testSocketListenerHealthRecognizesSocketPath() throws {
|
||||
let path = makeTempSocketPath()
|
||||
let fd = try bindUnixSocket(at: path)
|
||||
defer {
|
||||
Darwin.close(fd)
|
||||
unlink(path)
|
||||
}
|
||||
|
||||
let health = TerminalController.shared.socketListenerHealth(expectedSocketPath: path)
|
||||
XCTAssertTrue(health.socketPathExists)
|
||||
XCTAssertFalse(health.isHealthy)
|
||||
}
|
||||
|
||||
func testSocketListenerHealthRejectsRegularFile() throws {
|
||||
let path = makeTempSocketPath()
|
||||
let url = URL(fileURLWithPath: path)
|
||||
try "not-a-socket".write(to: url, atomically: true, encoding: .utf8)
|
||||
defer { try? FileManager.default.removeItem(at: url) }
|
||||
|
||||
let health = TerminalController.shared.socketListenerHealth(expectedSocketPath: path)
|
||||
XCTAssertFalse(health.socketPathExists)
|
||||
XCTAssertFalse(health.isHealthy)
|
||||
}
|
||||
|
||||
func testSocketListenerHealthFailureSignalsAreEmptyWhenHealthy() {
|
||||
let health = TerminalController.SocketListenerHealth(
|
||||
isRunning: true,
|
||||
acceptLoopAlive: true,
|
||||
socketPathMatches: true,
|
||||
socketPathExists: true
|
||||
)
|
||||
XCTAssertTrue(health.isHealthy)
|
||||
XCTAssertEqual(health.failureSignals, [])
|
||||
}
|
||||
|
||||
func testSocketListenerHealthFailureSignalsIncludeAllDetectedProblems() {
|
||||
let health = TerminalController.SocketListenerHealth(
|
||||
isRunning: false,
|
||||
acceptLoopAlive: false,
|
||||
socketPathMatches: false,
|
||||
socketPathExists: false
|
||||
)
|
||||
XCTAssertFalse(health.isHealthy)
|
||||
XCTAssertEqual(
|
||||
health.failureSignals,
|
||||
["not_running", "accept_loop_dead", "socket_path_mismatch", "socket_missing"]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,26 +81,26 @@ def main() -> int:
|
|||
)
|
||||
require(
|
||||
app_delegate,
|
||||
"let mode = SocketControlSettings.effectiveMode(userMode: userMode)",
|
||||
"`restartSocketListener` no longer uses effective socket control mode",
|
||||
"private func socketListenerConfigurationIfEnabled() -> (mode: SocketControlMode, path: String)? {",
|
||||
"Missing shared socket listener configuration helper",
|
||||
failures,
|
||||
)
|
||||
require(
|
||||
app_delegate,
|
||||
"let socketPath = SocketControlSettings.socketPath()",
|
||||
"`restartSocketListener` no longer uses configured socket path",
|
||||
'restartSocketListenerIfEnabled(source: "menu.command")',
|
||||
"`restartSocketListener` no longer delegates to restart helper",
|
||||
failures,
|
||||
)
|
||||
require(
|
||||
app_delegate,
|
||||
"TerminalController.shared.stop()",
|
||||
"`restartSocketListener` no longer stops current listener before restart",
|
||||
"`restartSocketListenerIfEnabled` no longer stops current listener before restart",
|
||||
failures,
|
||||
)
|
||||
require(
|
||||
app_delegate,
|
||||
"TerminalController.shared.start(tabManager: tabManager, socketPath: socketPath, accessMode: mode)",
|
||||
"`restartSocketListener` no longer starts listener with current settings",
|
||||
"TerminalController.shared.start(tabManager: tabManager, socketPath: config.path, accessMode: config.mode)",
|
||||
"`restartSocketListenerIfEnabled` no longer starts listener with current settings",
|
||||
failures,
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue