Surface remote retry counts and specify browser proxy model

This commit is contained in:
Lawrence Chen 2026-02-21 02:20:58 -08:00
parent 5e14bfe087
commit bab7440e49
3 changed files with 73 additions and 25 deletions

View file

@ -54,6 +54,8 @@ private final class WorkspaceRemoteSessionController {
private var daemonReady = false
private var daemonBootstrapVersion: String?
private var daemonRemotePath: String?
private var reconnectRetryCount = 0
private var reconnectWorkItem: DispatchWorkItem?
init(workspace: Workspace, configuration: WorkspaceRemoteConfiguration) {
self.workspace = workspace
@ -76,6 +78,9 @@ private final class WorkspaceRemoteSessionController {
private func stopAllLocked() {
isStopping = true
reconnectWorkItem?.cancel()
reconnectWorkItem = nil
reconnectRetryCount = 0
if let probeProcess {
probeStdoutPipe?.fileHandleForReading.readabilityHandler = nil
@ -107,8 +112,18 @@ private final class WorkspaceRemoteSessionController {
private func beginConnectionAttemptLocked() {
guard !isStopping else { return }
publishState(.connecting, detail: "Connecting to \(configuration.displayTarget)")
publishDaemonStatus(.bootstrapping, detail: "Bootstrapping remote daemon on \(configuration.displayTarget)")
reconnectWorkItem = nil
let connectDetail: String
let bootstrapDetail: String
if reconnectRetryCount > 0 {
connectDetail = "Reconnecting to \(configuration.displayTarget) (retry \(reconnectRetryCount))"
bootstrapDetail = "Bootstrapping remote daemon on \(configuration.displayTarget) (retry \(reconnectRetryCount))"
} else {
connectDetail = "Connecting to \(configuration.displayTarget)"
bootstrapDetail = "Bootstrapping remote daemon on \(configuration.displayTarget)"
}
publishState(.connecting, detail: connectDetail)
publishDaemonStatus(.bootstrapping, detail: bootstrapDetail)
do {
let hello = try bootstrapDaemonLocked()
daemonReady = true
@ -127,10 +142,11 @@ private final class WorkspaceRemoteSessionController {
daemonReady = false
daemonBootstrapVersion = nil
daemonRemotePath = nil
let detail = "Remote daemon bootstrap failed: \(error.localizedDescription)"
let nextRetry = scheduleProbeRestartLocked(delay: 4.0)
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 4.0)
let detail = "Remote daemon bootstrap failed: \(error.localizedDescription)\(retrySuffix)"
publishDaemonStatus(.error, detail: detail)
publishState(.error, detail: detail)
scheduleProbeRestartLocked(delay: 4.0)
}
}
@ -183,8 +199,9 @@ private final class WorkspaceRemoteSessionController {
probeStdoutPipe = stdoutPipe
probeStderrPipe = stderrPipe
} catch {
publishState(.error, detail: "Failed to start SSH probe: \(error.localizedDescription)")
scheduleProbeRestartLocked(delay: 3.0)
let nextRetry = scheduleProbeRestartLocked(delay: 3.0)
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 3.0)
publishState(.error, detail: "Failed to start SSH probe: \(error.localizedDescription)\(retrySuffix)")
}
}
@ -209,18 +226,27 @@ private final class WorkspaceRemoteSessionController {
let statusCode = process.terminationStatus
let rawDetail = Self.bestErrorLine(stderr: probeStderrBuffer, stdout: probeStdoutBuffer)
let detail = rawDetail ?? "SSH probe exited with status \(statusCode)"
publishState(.error, detail: "SSH probe to \(configuration.displayTarget) failed: \(detail)")
scheduleProbeRestartLocked(delay: 3.0)
let nextRetry = scheduleProbeRestartLocked(delay: 3.0)
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 3.0)
publishState(.error, detail: "SSH probe to \(configuration.displayTarget) failed: \(detail)\(retrySuffix)")
}
private func scheduleProbeRestartLocked(delay: TimeInterval) {
guard !isStopping else { return }
queue.asyncAfter(deadline: .now() + delay) { [weak self] in
@discardableResult
private func scheduleProbeRestartLocked(delay: TimeInterval) -> Int {
guard !isStopping else { return reconnectRetryCount }
reconnectWorkItem?.cancel()
reconnectRetryCount += 1
let retryNumber = reconnectRetryCount
let workItem = DispatchWorkItem { [weak self] in
guard let self else { return }
self.reconnectWorkItem = nil
guard !self.isStopping else { return }
guard self.probeProcess == nil else { return }
self.beginConnectionAttemptLocked()
}
reconnectWorkItem = workItem
queue.asyncAfter(deadline: .now() + delay, execute: workItem)
return retryNumber
}
private func consumeProbeStdoutData(_ data: Data) {
@ -248,6 +274,9 @@ private final class WorkspaceRemoteSessionController {
let ports = Self.parseRemotePorts(line: line)
desiredRemotePorts = Set(ports)
portConflicts = portConflicts.intersection(desiredRemotePorts)
reconnectWorkItem?.cancel()
reconnectWorkItem = nil
reconnectRetryCount = 0
publishState(.connected, detail: "Connected to \(configuration.displayTarget)")
reconcileForwardsLocked()
}
@ -885,6 +914,11 @@ private final class WorkspaceRemoteSessionController {
return false
}
private static func retrySuffix(retry: Int, delay: TimeInterval) -> String {
let seconds = max(1, Int(delay.rounded()))
return " (retry \(retry) in \(seconds)s)"
}
private static func isLoopbackPortAvailable(port: Int) -> Bool {
guard port > 0 && port <= 65535 else { return false }

View file

@ -64,16 +64,29 @@ Minimum RPC surface:
Protocol requirement:
1. multiplexed framed streams (control + PTY + proxy data)
## 6. Proxying
## 6. Web Proxying (Browser-First)
Proxy endpoints (loopback only by default):
1. HTTP CONNECT
2. SOCKS5
Goal: remote workspaces browse from the remote host network, without per-service local port forwards.
Behavior:
1. requests tunnel to daemon, daemon dials destinations
2. websocket must work in both proxy modes
3. local bind conflicts return structured errors (+ optional next-port fallback)
Model:
1. `cmux ssh` creates/uses one **proxy endpoint per SSH transport** (not per workspace, not per destination port).
2. Browser panels opened in remote workspaces are auto-wired to that endpoint.
3. Terminal/service port forwarding is **not** the browser path; keep it opt-in for explicit localhost workflows only.
Implementation:
1. local `cmuxd` runs a transport-scoped proxy broker (`127.0.0.1:<ephemeral>`), supporting:
- HTTP CONNECT
- SOCKS5
2. broker opens multiplexed proxy streams to `cmuxd-remote`; remote daemon performs outbound dials.
3. browser wiring uses workspace-scoped `WKWebsiteDataStore.proxyConfigurations`:
- primary: SOCKS5 (`ProxyConfiguration(socksv5Proxy:)`)
- fallback: HTTP CONNECT (`ProxyConfiguration(httpCONNECTProxy:)`)
4. browser panels in non-remote workspaces use no forced proxy config.
Failure + reconnect:
1. if proxy endpoint bind fails, return structured `proxy_unavailable` with actionable detail.
2. if transport drops, browser requests fail fast, workspace status shows reconnect + retry count.
3. after reconnect, proxy broker and WKWebView proxy config are revalidated automatically.
## 7. Reconnect Semantics
@ -111,13 +124,13 @@ All cases require deterministic `MUST` assertions.
| ID | Scenario | MUST Assertions |
|---|---|---|
| W-001 | HTTP CONNECT | fixture response matches expected body |
| W-002 | SOCKS5 | response parity with direct remote |
| W-001 | browser auto wiring | remote workspace browser gets daemon-backed proxy automatically |
| W-002 | remote egress proof | remote workspace browser egress IP matches remote host, not local host |
| W-003 | websocket via CONNECT | echo integrity, no unexpected close |
| W-004 | websocket via SOCKS5 | echo integrity |
| W-005 | port conflict | structured conflict error + fallback behavior |
| W-005 | proxy listener conflict | structured `proxy_unavailable` + fallback bind behavior |
| W-006 | concurrent PTY + proxy load | no PTY stall; proxy latency/error budget met |
| W-007 | browser auto wiring | browser workflow uses daemon-backed proxy automatically when remote session is active |
| W-007 | reconnect continuity | after transport reconnect, browser traffic resumes without manual proxy reconfiguration |
### 8.3 Reconnect
@ -147,5 +160,5 @@ All cases require deterministic `MUST` assertions.
## 10. Open Decisions
1. proxy endpoint scope: per daemon transport vs per workspace
2. reconnect retry budget and backoff profile
1. reconnect retry budget and backoff profile
2. proxy auth policy (none vs optional credentials for local broker)

View file

@ -171,6 +171,7 @@ def main() -> int:
last_daemon = last_remote.get("daemon") or {}
detail = str(last_daemon.get("detail") or "")
_must("bootstrap failed" in detail.lower(), f"daemon error should mention bootstrap failure: {last_status}")
_must(re.search(r"retry\s+\d+", detail.lower()) is not None, f"daemon error should include retry count: {last_status}")
# Lifecycle regression: disconnect with clear should reset remote/daemon metadata.
disconnected = client._call(