- Add 5-minute per-host cooldown for remote error notifications - Add exponential backoff (capped at 60s) to proxy broker and session controller retries - Add default SSH ConnectTimeout/ServerAliveInterval/ServerAliveCountMax to detect dead connections faster - Fix error status clearing to only reset on actual .connected state Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9e75355525
commit
e419fd9164
3 changed files with 100 additions and 24 deletions
|
|
@ -4163,6 +4163,15 @@ struct CMUXCLI {
|
|||
remoteRelayPort: options.remoteRelayPort
|
||||
)
|
||||
var parts: [String] = ["ssh"]
|
||||
if !hasSSHOptionKey(effectiveSSHOptions, key: "ConnectTimeout") {
|
||||
parts += ["-o", "ConnectTimeout=6"]
|
||||
}
|
||||
if !hasSSHOptionKey(effectiveSSHOptions, key: "ServerAliveInterval") {
|
||||
parts += ["-o", "ServerAliveInterval=20"]
|
||||
}
|
||||
if !hasSSHOptionKey(effectiveSSHOptions, key: "ServerAliveCountMax") {
|
||||
parts += ["-o", "ServerAliveCountMax=2"]
|
||||
}
|
||||
if !hasSSHOptionKey(effectiveSSHOptions, key: "SetEnv") {
|
||||
parts += ["-o", "SetEnv COLORTERM=truecolor"]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -733,6 +733,7 @@ final class TerminalNotificationStore: ObservableObject {
|
|||
notification in
|
||||
store.playSuppressedNotificationFeedback(for: notification)
|
||||
}
|
||||
private var lastNotificationDateByCooldownKey: [String: Date] = [:]
|
||||
private var indexes = NotificationIndexes()
|
||||
|
||||
private init() {
|
||||
|
|
@ -890,7 +891,29 @@ final class TerminalNotificationStore: ObservableObject {
|
|||
focusedReadIndicatorByTabId[tabId]
|
||||
}
|
||||
|
||||
func addNotification(tabId: UUID, surfaceId: UUID?, title: String, subtitle: String, body: String) {
|
||||
func addNotification(
|
||||
tabId: UUID,
|
||||
surfaceId: UUID?,
|
||||
title: String,
|
||||
subtitle: String,
|
||||
body: String,
|
||||
cooldownKey: String? = nil,
|
||||
cooldownInterval: TimeInterval? = nil
|
||||
) {
|
||||
let now = Date()
|
||||
let resolvedCooldownInterval: TimeInterval?
|
||||
if let cooldownInterval, cooldownInterval.isFinite, cooldownInterval > 0 {
|
||||
resolvedCooldownInterval = cooldownInterval
|
||||
} else {
|
||||
resolvedCooldownInterval = nil
|
||||
}
|
||||
if let cooldownKey,
|
||||
let resolvedCooldownInterval,
|
||||
let lastNotificationDate = lastNotificationDateByCooldownKey[cooldownKey],
|
||||
now.timeIntervalSince(lastNotificationDate) < resolvedCooldownInterval {
|
||||
return
|
||||
}
|
||||
|
||||
var updated = notifications
|
||||
var idsToClear: [String] = []
|
||||
updated.removeAll { existing in
|
||||
|
|
@ -925,11 +948,14 @@ final class TerminalNotificationStore: ObservableObject {
|
|||
title: title,
|
||||
subtitle: subtitle,
|
||||
body: body,
|
||||
createdAt: Date(),
|
||||
createdAt: now,
|
||||
isRead: false
|
||||
)
|
||||
updated.insert(notification, at: 0)
|
||||
notifications = updated
|
||||
if let cooldownKey, resolvedCooldownInterval != nil {
|
||||
lastNotificationDateByCooldownKey[cooldownKey] = now
|
||||
}
|
||||
if !idsToClear.isEmpty {
|
||||
center.removeDeliveredNotificationsOffMain(withIdentifiers: idsToClear)
|
||||
center.removePendingNotificationRequestsOffMain(withIdentifiers: idsToClear)
|
||||
|
|
|
|||
|
|
@ -2489,6 +2489,7 @@ private final class WorkspaceRemoteProxyBroker {
|
|||
var tunnel: WorkspaceRemoteDaemonProxyTunnel?
|
||||
var endpoint: BrowserProxyEndpoint?
|
||||
var restartWorkItem: DispatchWorkItem?
|
||||
var restartRetryCount = 0
|
||||
var subscribers: [UUID: (Update) -> Void] = [:]
|
||||
|
||||
init(configuration: WorkspaceRemoteConfiguration, remotePath: String) {
|
||||
|
|
@ -2515,6 +2516,7 @@ private final class WorkspaceRemoteProxyBroker {
|
|||
entry = existing
|
||||
if existing.remotePath != remotePath {
|
||||
existing.remotePath = remotePath
|
||||
existing.restartRetryCount = 0
|
||||
if existing.tunnel != nil {
|
||||
stopEntryRuntimeLocked(existing)
|
||||
notifyLocked(existing, update: .connecting)
|
||||
|
|
@ -2558,12 +2560,13 @@ private final class WorkspaceRemoteProxyBroker {
|
|||
// Internal deterministic test hook used by docker regressions to force bind conflicts.
|
||||
localPort = forcedLocalPort
|
||||
} else {
|
||||
let retryDelay = Self.retryDelay(baseDelay: 3.0, retry: entry.restartRetryCount + 1)
|
||||
guard let allocatedPort = Self.allocateLoopbackPort() else {
|
||||
notifyLocked(
|
||||
entry,
|
||||
update: .error("Failed to allocate local proxy port\(Self.retrySuffix(delay: 3.0))")
|
||||
update: .error("Failed to allocate local proxy port\(Self.retrySuffix(delay: retryDelay))")
|
||||
)
|
||||
scheduleRestartLocked(key: key, entry: entry, delay: 3.0)
|
||||
scheduleRestartLocked(key: key, entry: entry, baseDelay: 3.0)
|
||||
return
|
||||
}
|
||||
localPort = allocatedPort
|
||||
|
|
@ -2583,28 +2586,33 @@ private final class WorkspaceRemoteProxyBroker {
|
|||
entry.tunnel = tunnel
|
||||
let endpoint = BrowserProxyEndpoint(host: "127.0.0.1", port: localPort)
|
||||
entry.endpoint = endpoint
|
||||
entry.restartRetryCount = 0
|
||||
notifyLocked(entry, update: .ready(endpoint))
|
||||
} catch {
|
||||
stopEntryRuntimeLocked(entry)
|
||||
let detail = "Failed to start local daemon proxy: \(error.localizedDescription)"
|
||||
notifyLocked(entry, update: .error("\(detail)\(Self.retrySuffix(delay: 3.0))"))
|
||||
scheduleRestartLocked(key: key, entry: entry, delay: 3.0)
|
||||
let retryDelay = Self.retryDelay(baseDelay: 3.0, retry: entry.restartRetryCount + 1)
|
||||
notifyLocked(entry, update: .error("\(detail)\(Self.retrySuffix(delay: retryDelay))"))
|
||||
scheduleRestartLocked(key: key, entry: entry, baseDelay: 3.0)
|
||||
}
|
||||
}
|
||||
|
||||
private func handleTunnelFailureLocked(key: String, detail: String) {
|
||||
guard let entry = entries[key], entry.tunnel != nil else { return }
|
||||
stopEntryRuntimeLocked(entry)
|
||||
notifyLocked(entry, update: .error("\(detail)\(Self.retrySuffix(delay: 3.0))"))
|
||||
scheduleRestartLocked(key: key, entry: entry, delay: 3.0)
|
||||
let retryDelay = Self.retryDelay(baseDelay: 3.0, retry: entry.restartRetryCount + 1)
|
||||
notifyLocked(entry, update: .error("\(detail)\(Self.retrySuffix(delay: retryDelay))"))
|
||||
scheduleRestartLocked(key: key, entry: entry, baseDelay: 3.0)
|
||||
}
|
||||
|
||||
private func scheduleRestartLocked(key: String, entry: Entry, delay: TimeInterval) {
|
||||
private func scheduleRestartLocked(key: String, entry: Entry, baseDelay: TimeInterval) {
|
||||
guard !entry.subscribers.isEmpty else {
|
||||
teardownEntryLocked(key: key, entry: entry)
|
||||
return
|
||||
}
|
||||
guard entry.restartWorkItem == nil else { return }
|
||||
entry.restartRetryCount += 1
|
||||
let retryDelay = Self.retryDelay(baseDelay: baseDelay, retry: entry.restartRetryCount)
|
||||
|
||||
let workItem = DispatchWorkItem { [weak self] in
|
||||
guard let self, let currentEntry = self.entries[key] else { return }
|
||||
|
|
@ -2618,7 +2626,7 @@ private final class WorkspaceRemoteProxyBroker {
|
|||
}
|
||||
|
||||
entry.restartWorkItem = workItem
|
||||
queue.asyncAfter(deadline: .now() + delay, execute: workItem)
|
||||
queue.asyncAfter(deadline: .now() + retryDelay, execute: workItem)
|
||||
}
|
||||
|
||||
private func teardownEntryLocked(key: String, entry: Entry) {
|
||||
|
|
@ -2687,6 +2695,11 @@ private final class WorkspaceRemoteProxyBroker {
|
|||
let seconds = max(1, Int(delay.rounded()))
|
||||
return " (retry in \(seconds)s)"
|
||||
}
|
||||
|
||||
private static func retryDelay(baseDelay: TimeInterval, retry: Int) -> TimeInterval {
|
||||
let exponent = Double(max(0, retry - 1))
|
||||
return min(baseDelay * pow(2.0, exponent), 60.0)
|
||||
}
|
||||
}
|
||||
|
||||
private final class WorkspaceRemoteCLIRelayServer {
|
||||
|
|
@ -3170,6 +3183,11 @@ private final class WorkspaceRemoteCLIRelayServer {
|
|||
}
|
||||
|
||||
final class WorkspaceRemoteSessionController {
|
||||
private struct RetrySchedule {
|
||||
let retry: Int
|
||||
let delay: TimeInterval
|
||||
}
|
||||
|
||||
private struct CommandResult {
|
||||
let status: Int32
|
||||
let stdout: String
|
||||
|
|
@ -3357,8 +3375,8 @@ final class WorkspaceRemoteSessionController {
|
|||
daemonReady = false
|
||||
daemonBootstrapVersion = nil
|
||||
daemonRemotePath = nil
|
||||
let nextRetry = scheduleReconnectLocked(delay: 4.0)
|
||||
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 4.0)
|
||||
let retrySchedule = scheduleReconnectLocked(baseDelay: 4.0)
|
||||
let retrySuffix = Self.retrySuffix(retry: retrySchedule.retry, delay: retrySchedule.delay)
|
||||
let detail = "Remote daemon bootstrap failed: \(error.localizedDescription)\(retrySuffix)"
|
||||
publishDaemonStatus(.error, detail: detail)
|
||||
publishState(.error, detail: detail)
|
||||
|
|
@ -3371,8 +3389,8 @@ final class WorkspaceRemoteSessionController {
|
|||
guard proxyLease == nil else { return }
|
||||
guard let remotePath = daemonRemotePath,
|
||||
!remotePath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
|
||||
let nextRetry = scheduleReconnectLocked(delay: 4.0)
|
||||
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 4.0)
|
||||
let retrySchedule = scheduleReconnectLocked(baseDelay: 4.0)
|
||||
let retrySuffix = Self.retrySuffix(retry: retrySchedule.retry, delay: retrySchedule.delay)
|
||||
let detail = "Remote daemon did not provide a valid remote path\(retrySuffix)"
|
||||
publishDaemonStatus(.error, detail: detail)
|
||||
publishState(.error, detail: detail)
|
||||
|
|
@ -3588,8 +3606,8 @@ final class WorkspaceRemoteSessionController {
|
|||
daemonBootstrapVersion = nil
|
||||
daemonRemotePath = nil
|
||||
|
||||
let nextRetry = scheduleReconnectLocked(delay: 2.0)
|
||||
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 2.0)
|
||||
let retrySchedule = scheduleReconnectLocked(baseDelay: 2.0)
|
||||
let retrySuffix = Self.retrySuffix(retry: retrySchedule.retry, delay: retrySchedule.delay)
|
||||
publishDaemonStatus(
|
||||
.error,
|
||||
detail: "Remote daemon transport needs re-bootstrap after proxy failure\(retrySuffix)"
|
||||
|
|
@ -3598,11 +3616,12 @@ final class WorkspaceRemoteSessionController {
|
|||
}
|
||||
|
||||
@discardableResult
|
||||
private func scheduleReconnectLocked(delay: TimeInterval) -> Int {
|
||||
guard !isStopping else { return reconnectRetryCount }
|
||||
private func scheduleReconnectLocked(baseDelay: TimeInterval) -> RetrySchedule {
|
||||
let retryNumber = reconnectRetryCount + 1
|
||||
let retryDelay = Self.retryDelay(baseDelay: baseDelay, retry: retryNumber)
|
||||
guard !isStopping else { return RetrySchedule(retry: retryNumber, delay: retryDelay) }
|
||||
reconnectWorkItem?.cancel()
|
||||
reconnectRetryCount += 1
|
||||
let retryNumber = reconnectRetryCount
|
||||
reconnectRetryCount = retryNumber
|
||||
let workItem = DispatchWorkItem { [weak self] in
|
||||
guard let self else { return }
|
||||
self.reconnectWorkItem = nil
|
||||
|
|
@ -3611,8 +3630,8 @@ final class WorkspaceRemoteSessionController {
|
|||
self.beginConnectionAttemptLocked()
|
||||
}
|
||||
reconnectWorkItem = workItem
|
||||
queue.asyncAfter(deadline: .now() + delay, execute: workItem)
|
||||
return retryNumber
|
||||
queue.asyncAfter(deadline: .now() + retryDelay, execute: workItem)
|
||||
return RetrySchedule(retry: retryNumber, delay: retryDelay)
|
||||
}
|
||||
|
||||
private func publishState(_ state: WorkspaceRemoteConnectionState, detail: String?) {
|
||||
|
|
@ -4853,6 +4872,11 @@ final class WorkspaceRemoteSessionController {
|
|||
return " (retry \(retry) in \(seconds)s)"
|
||||
}
|
||||
|
||||
private static func retryDelay(baseDelay: TimeInterval, retry: Int) -> TimeInterval {
|
||||
let exponent = Double(max(0, retry - 1))
|
||||
return min(baseDelay * pow(2.0, exponent), 60.0)
|
||||
}
|
||||
|
||||
private static func shouldEscalateProxyErrorToBootstrap(_ detail: String) -> Bool {
|
||||
let lowered = detail.lowercased()
|
||||
return lowered.contains("remote daemon transport failed")
|
||||
|
|
@ -5542,6 +5566,7 @@ final class Workspace: Identifiable, ObservableObject {
|
|||
|
||||
private static let remoteErrorStatusKey = "remote.error"
|
||||
private static let remotePortConflictStatusKey = "remote.port_conflicts"
|
||||
private static let remoteNotificationCooldown: TimeInterval = 5 * 60
|
||||
private static let sshControlMasterCleanupQueue = DispatchQueue(
|
||||
label: "com.cmux.remote-ssh.control-master-cleanup",
|
||||
qos: .utility
|
||||
|
|
@ -5577,6 +5602,20 @@ final class Workspace: Identifiable, ObservableObject {
|
|||
return entry.lowercased().contains("remote proxy unavailable")
|
||||
}
|
||||
|
||||
private func remoteNotificationCooldownKey(target: String) -> String? {
|
||||
let rawTarget = (remoteConfiguration?.destination ?? target)
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard !rawTarget.isEmpty else { return nil }
|
||||
let normalizedHost = rawTarget
|
||||
.split(separator: "@", maxSplits: 1, omittingEmptySubsequences: false)
|
||||
.last
|
||||
.map(String.init)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
.lowercased()
|
||||
guard let normalizedHost, !normalizedHost.isEmpty else { return nil }
|
||||
return "remote-host:\(normalizedHost)"
|
||||
}
|
||||
|
||||
var focusedSurfaceId: UUID? { focusedPanelId }
|
||||
var surfaceDirectories: [UUID: String] {
|
||||
get { panelDirectories }
|
||||
|
|
@ -7165,13 +7204,15 @@ final class Workspace: Identifiable, ObservableObject {
|
|||
surfaceId: nil,
|
||||
title: notificationTitle,
|
||||
subtitle: target,
|
||||
body: trimmedDetail
|
||||
body: trimmedDetail,
|
||||
cooldownKey: remoteNotificationCooldownKey(target: target),
|
||||
cooldownInterval: Self.remoteNotificationCooldown
|
||||
)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if !preserveConnectedStateForRetry && state != .error {
|
||||
if state == .connected {
|
||||
statusEntries.removeValue(forKey: Self.remoteErrorStatusKey)
|
||||
remoteLastErrorFingerprint = nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue