diff --git a/Sources/Workspace.swift b/Sources/Workspace.swift index 4ece795b..a9825ab5 100644 --- a/Sources/Workspace.swift +++ b/Sources/Workspace.swift @@ -54,6 +54,8 @@ private final class WorkspaceRemoteSessionController { private var daemonReady = false private var daemonBootstrapVersion: String? private var daemonRemotePath: String? + private var reconnectRetryCount = 0 + private var reconnectWorkItem: DispatchWorkItem? init(workspace: Workspace, configuration: WorkspaceRemoteConfiguration) { self.workspace = workspace @@ -76,6 +78,9 @@ private final class WorkspaceRemoteSessionController { private func stopAllLocked() { isStopping = true + reconnectWorkItem?.cancel() + reconnectWorkItem = nil + reconnectRetryCount = 0 if let probeProcess { probeStdoutPipe?.fileHandleForReading.readabilityHandler = nil @@ -107,8 +112,18 @@ private final class WorkspaceRemoteSessionController { private func beginConnectionAttemptLocked() { guard !isStopping else { return } - publishState(.connecting, detail: "Connecting to \(configuration.displayTarget)") - publishDaemonStatus(.bootstrapping, detail: "Bootstrapping remote daemon on \(configuration.displayTarget)") + reconnectWorkItem = nil + let connectDetail: String + let bootstrapDetail: String + if reconnectRetryCount > 0 { + connectDetail = "Reconnecting to \(configuration.displayTarget) (retry \(reconnectRetryCount))" + bootstrapDetail = "Bootstrapping remote daemon on \(configuration.displayTarget) (retry \(reconnectRetryCount))" + } else { + connectDetail = "Connecting to \(configuration.displayTarget)" + bootstrapDetail = "Bootstrapping remote daemon on \(configuration.displayTarget)" + } + publishState(.connecting, detail: connectDetail) + publishDaemonStatus(.bootstrapping, detail: bootstrapDetail) do { let hello = try bootstrapDaemonLocked() daemonReady = true @@ -127,10 +142,11 @@ private final class WorkspaceRemoteSessionController { daemonReady = false daemonBootstrapVersion = nil daemonRemotePath = nil - let detail = "Remote daemon bootstrap failed: \(error.localizedDescription)" + let nextRetry = scheduleProbeRestartLocked(delay: 4.0) + let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 4.0) + let detail = "Remote daemon bootstrap failed: \(error.localizedDescription)\(retrySuffix)" publishDaemonStatus(.error, detail: detail) publishState(.error, detail: detail) - scheduleProbeRestartLocked(delay: 4.0) } } @@ -183,8 +199,9 @@ private final class WorkspaceRemoteSessionController { probeStdoutPipe = stdoutPipe probeStderrPipe = stderrPipe } catch { - publishState(.error, detail: "Failed to start SSH probe: \(error.localizedDescription)") - scheduleProbeRestartLocked(delay: 3.0) + let nextRetry = scheduleProbeRestartLocked(delay: 3.0) + let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 3.0) + publishState(.error, detail: "Failed to start SSH probe: \(error.localizedDescription)\(retrySuffix)") } } @@ -209,18 +226,27 @@ private final class WorkspaceRemoteSessionController { let statusCode = process.terminationStatus let rawDetail = Self.bestErrorLine(stderr: probeStderrBuffer, stdout: probeStdoutBuffer) let detail = rawDetail ?? "SSH probe exited with status \(statusCode)" - publishState(.error, detail: "SSH probe to \(configuration.displayTarget) failed: \(detail)") - scheduleProbeRestartLocked(delay: 3.0) + let nextRetry = scheduleProbeRestartLocked(delay: 3.0) + let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 3.0) + publishState(.error, detail: "SSH probe to \(configuration.displayTarget) failed: \(detail)\(retrySuffix)") } - private func scheduleProbeRestartLocked(delay: TimeInterval) { - guard !isStopping else { return } - queue.asyncAfter(deadline: .now() + delay) { [weak self] in + @discardableResult + private func scheduleProbeRestartLocked(delay: TimeInterval) -> Int { + guard !isStopping else { return reconnectRetryCount } + reconnectWorkItem?.cancel() + reconnectRetryCount += 1 + let retryNumber = reconnectRetryCount + let workItem = DispatchWorkItem { [weak self] in guard let self else { return } + self.reconnectWorkItem = nil guard !self.isStopping else { return } guard self.probeProcess == nil else { return } self.beginConnectionAttemptLocked() } + reconnectWorkItem = workItem + queue.asyncAfter(deadline: .now() + delay, execute: workItem) + return retryNumber } private func consumeProbeStdoutData(_ data: Data) { @@ -248,6 +274,9 @@ private final class WorkspaceRemoteSessionController { let ports = Self.parseRemotePorts(line: line) desiredRemotePorts = Set(ports) portConflicts = portConflicts.intersection(desiredRemotePorts) + reconnectWorkItem?.cancel() + reconnectWorkItem = nil + reconnectRetryCount = 0 publishState(.connected, detail: "Connected to \(configuration.displayTarget)") reconcileForwardsLocked() } @@ -885,6 +914,11 @@ private final class WorkspaceRemoteSessionController { return false } + private static func retrySuffix(retry: Int, delay: TimeInterval) -> String { + let seconds = max(1, Int(delay.rounded())) + return " (retry \(retry) in \(seconds)s)" + } + private static func isLoopbackPortAvailable(port: Int) -> Bool { guard port > 0 && port <= 65535 else { return false } diff --git a/docs/remote-daemon-spec.md b/docs/remote-daemon-spec.md index 8795dc1b..a676c0a2 100644 --- a/docs/remote-daemon-spec.md +++ b/docs/remote-daemon-spec.md @@ -64,16 +64,29 @@ Minimum RPC surface: Protocol requirement: 1. multiplexed framed streams (control + PTY + proxy data) -## 6. Proxying +## 6. Web Proxying (Browser-First) -Proxy endpoints (loopback only by default): -1. HTTP CONNECT -2. SOCKS5 +Goal: remote workspaces browse from the remote host network, without per-service local port forwards. -Behavior: -1. requests tunnel to daemon, daemon dials destinations -2. websocket must work in both proxy modes -3. local bind conflicts return structured errors (+ optional next-port fallback) +Model: +1. `cmux ssh` creates/uses one **proxy endpoint per SSH transport** (not per workspace, not per destination port). +2. Browser panels opened in remote workspaces are auto-wired to that endpoint. +3. Terminal/service port forwarding is **not** the browser path; keep it opt-in for explicit localhost workflows only. + +Implementation: +1. local `cmuxd` runs a transport-scoped proxy broker (`127.0.0.1:`), supporting: + - HTTP CONNECT + - SOCKS5 +2. broker opens multiplexed proxy streams to `cmuxd-remote`; remote daemon performs outbound dials. +3. browser wiring uses workspace-scoped `WKWebsiteDataStore.proxyConfigurations`: + - primary: SOCKS5 (`ProxyConfiguration(socksv5Proxy:)`) + - fallback: HTTP CONNECT (`ProxyConfiguration(httpCONNECTProxy:)`) +4. browser panels in non-remote workspaces use no forced proxy config. + +Failure + reconnect: +1. if proxy endpoint bind fails, return structured `proxy_unavailable` with actionable detail. +2. if transport drops, browser requests fail fast, workspace status shows reconnect + retry count. +3. after reconnect, proxy broker and WKWebView proxy config are revalidated automatically. ## 7. Reconnect Semantics @@ -111,13 +124,13 @@ All cases require deterministic `MUST` assertions. | ID | Scenario | MUST Assertions | |---|---|---| -| W-001 | HTTP CONNECT | fixture response matches expected body | -| W-002 | SOCKS5 | response parity with direct remote | +| W-001 | browser auto wiring | remote workspace browser gets daemon-backed proxy automatically | +| W-002 | remote egress proof | remote workspace browser egress IP matches remote host, not local host | | W-003 | websocket via CONNECT | echo integrity, no unexpected close | | W-004 | websocket via SOCKS5 | echo integrity | -| W-005 | port conflict | structured conflict error + fallback behavior | +| W-005 | proxy listener conflict | structured `proxy_unavailable` + fallback bind behavior | | W-006 | concurrent PTY + proxy load | no PTY stall; proxy latency/error budget met | -| W-007 | browser auto wiring | browser workflow uses daemon-backed proxy automatically when remote session is active | +| W-007 | reconnect continuity | after transport reconnect, browser traffic resumes without manual proxy reconfiguration | ### 8.3 Reconnect @@ -147,5 +160,5 @@ All cases require deterministic `MUST` assertions. ## 10. Open Decisions -1. proxy endpoint scope: per daemon transport vs per workspace -2. reconnect retry budget and backoff profile +1. reconnect retry budget and backoff profile +2. proxy auth policy (none vs optional credentials for local broker) diff --git a/tests_v2/test_ssh_remote_cli_metadata.py b/tests_v2/test_ssh_remote_cli_metadata.py index de7f102c..c540ff62 100644 --- a/tests_v2/test_ssh_remote_cli_metadata.py +++ b/tests_v2/test_ssh_remote_cli_metadata.py @@ -171,6 +171,7 @@ def main() -> int: last_daemon = last_remote.get("daemon") or {} detail = str(last_daemon.get("detail") or "") _must("bootstrap failed" in detail.lower(), f"daemon error should mention bootstrap failure: {last_status}") + _must(re.search(r"retry\s+\d+", detail.lower()) is not None, f"daemon error should include retry count: {last_status}") # Lifecycle regression: disconnect with clear should reset remote/daemon metadata. disconnected = client._call(