Surface remote retry counts and specify browser proxy model
This commit is contained in:
parent
5e14bfe087
commit
bab7440e49
3 changed files with 73 additions and 25 deletions
|
|
@ -54,6 +54,8 @@ private final class WorkspaceRemoteSessionController {
|
||||||
private var daemonReady = false
|
private var daemonReady = false
|
||||||
private var daemonBootstrapVersion: String?
|
private var daemonBootstrapVersion: String?
|
||||||
private var daemonRemotePath: String?
|
private var daemonRemotePath: String?
|
||||||
|
private var reconnectRetryCount = 0
|
||||||
|
private var reconnectWorkItem: DispatchWorkItem?
|
||||||
|
|
||||||
init(workspace: Workspace, configuration: WorkspaceRemoteConfiguration) {
|
init(workspace: Workspace, configuration: WorkspaceRemoteConfiguration) {
|
||||||
self.workspace = workspace
|
self.workspace = workspace
|
||||||
|
|
@ -76,6 +78,9 @@ private final class WorkspaceRemoteSessionController {
|
||||||
|
|
||||||
private func stopAllLocked() {
|
private func stopAllLocked() {
|
||||||
isStopping = true
|
isStopping = true
|
||||||
|
reconnectWorkItem?.cancel()
|
||||||
|
reconnectWorkItem = nil
|
||||||
|
reconnectRetryCount = 0
|
||||||
|
|
||||||
if let probeProcess {
|
if let probeProcess {
|
||||||
probeStdoutPipe?.fileHandleForReading.readabilityHandler = nil
|
probeStdoutPipe?.fileHandleForReading.readabilityHandler = nil
|
||||||
|
|
@ -107,8 +112,18 @@ private final class WorkspaceRemoteSessionController {
|
||||||
private func beginConnectionAttemptLocked() {
|
private func beginConnectionAttemptLocked() {
|
||||||
guard !isStopping else { return }
|
guard !isStopping else { return }
|
||||||
|
|
||||||
publishState(.connecting, detail: "Connecting to \(configuration.displayTarget)")
|
reconnectWorkItem = nil
|
||||||
publishDaemonStatus(.bootstrapping, detail: "Bootstrapping remote daemon on \(configuration.displayTarget)")
|
let connectDetail: String
|
||||||
|
let bootstrapDetail: String
|
||||||
|
if reconnectRetryCount > 0 {
|
||||||
|
connectDetail = "Reconnecting to \(configuration.displayTarget) (retry \(reconnectRetryCount))"
|
||||||
|
bootstrapDetail = "Bootstrapping remote daemon on \(configuration.displayTarget) (retry \(reconnectRetryCount))"
|
||||||
|
} else {
|
||||||
|
connectDetail = "Connecting to \(configuration.displayTarget)"
|
||||||
|
bootstrapDetail = "Bootstrapping remote daemon on \(configuration.displayTarget)"
|
||||||
|
}
|
||||||
|
publishState(.connecting, detail: connectDetail)
|
||||||
|
publishDaemonStatus(.bootstrapping, detail: bootstrapDetail)
|
||||||
do {
|
do {
|
||||||
let hello = try bootstrapDaemonLocked()
|
let hello = try bootstrapDaemonLocked()
|
||||||
daemonReady = true
|
daemonReady = true
|
||||||
|
|
@ -127,10 +142,11 @@ private final class WorkspaceRemoteSessionController {
|
||||||
daemonReady = false
|
daemonReady = false
|
||||||
daemonBootstrapVersion = nil
|
daemonBootstrapVersion = nil
|
||||||
daemonRemotePath = nil
|
daemonRemotePath = nil
|
||||||
let detail = "Remote daemon bootstrap failed: \(error.localizedDescription)"
|
let nextRetry = scheduleProbeRestartLocked(delay: 4.0)
|
||||||
|
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 4.0)
|
||||||
|
let detail = "Remote daemon bootstrap failed: \(error.localizedDescription)\(retrySuffix)"
|
||||||
publishDaemonStatus(.error, detail: detail)
|
publishDaemonStatus(.error, detail: detail)
|
||||||
publishState(.error, detail: detail)
|
publishState(.error, detail: detail)
|
||||||
scheduleProbeRestartLocked(delay: 4.0)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -183,8 +199,9 @@ private final class WorkspaceRemoteSessionController {
|
||||||
probeStdoutPipe = stdoutPipe
|
probeStdoutPipe = stdoutPipe
|
||||||
probeStderrPipe = stderrPipe
|
probeStderrPipe = stderrPipe
|
||||||
} catch {
|
} catch {
|
||||||
publishState(.error, detail: "Failed to start SSH probe: \(error.localizedDescription)")
|
let nextRetry = scheduleProbeRestartLocked(delay: 3.0)
|
||||||
scheduleProbeRestartLocked(delay: 3.0)
|
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 3.0)
|
||||||
|
publishState(.error, detail: "Failed to start SSH probe: \(error.localizedDescription)\(retrySuffix)")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -209,18 +226,27 @@ private final class WorkspaceRemoteSessionController {
|
||||||
let statusCode = process.terminationStatus
|
let statusCode = process.terminationStatus
|
||||||
let rawDetail = Self.bestErrorLine(stderr: probeStderrBuffer, stdout: probeStdoutBuffer)
|
let rawDetail = Self.bestErrorLine(stderr: probeStderrBuffer, stdout: probeStdoutBuffer)
|
||||||
let detail = rawDetail ?? "SSH probe exited with status \(statusCode)"
|
let detail = rawDetail ?? "SSH probe exited with status \(statusCode)"
|
||||||
publishState(.error, detail: "SSH probe to \(configuration.displayTarget) failed: \(detail)")
|
let nextRetry = scheduleProbeRestartLocked(delay: 3.0)
|
||||||
scheduleProbeRestartLocked(delay: 3.0)
|
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 3.0)
|
||||||
|
publishState(.error, detail: "SSH probe to \(configuration.displayTarget) failed: \(detail)\(retrySuffix)")
|
||||||
}
|
}
|
||||||
|
|
||||||
private func scheduleProbeRestartLocked(delay: TimeInterval) {
|
@discardableResult
|
||||||
guard !isStopping else { return }
|
private func scheduleProbeRestartLocked(delay: TimeInterval) -> Int {
|
||||||
queue.asyncAfter(deadline: .now() + delay) { [weak self] in
|
guard !isStopping else { return reconnectRetryCount }
|
||||||
|
reconnectWorkItem?.cancel()
|
||||||
|
reconnectRetryCount += 1
|
||||||
|
let retryNumber = reconnectRetryCount
|
||||||
|
let workItem = DispatchWorkItem { [weak self] in
|
||||||
guard let self else { return }
|
guard let self else { return }
|
||||||
|
self.reconnectWorkItem = nil
|
||||||
guard !self.isStopping else { return }
|
guard !self.isStopping else { return }
|
||||||
guard self.probeProcess == nil else { return }
|
guard self.probeProcess == nil else { return }
|
||||||
self.beginConnectionAttemptLocked()
|
self.beginConnectionAttemptLocked()
|
||||||
}
|
}
|
||||||
|
reconnectWorkItem = workItem
|
||||||
|
queue.asyncAfter(deadline: .now() + delay, execute: workItem)
|
||||||
|
return retryNumber
|
||||||
}
|
}
|
||||||
|
|
||||||
private func consumeProbeStdoutData(_ data: Data) {
|
private func consumeProbeStdoutData(_ data: Data) {
|
||||||
|
|
@ -248,6 +274,9 @@ private final class WorkspaceRemoteSessionController {
|
||||||
let ports = Self.parseRemotePorts(line: line)
|
let ports = Self.parseRemotePorts(line: line)
|
||||||
desiredRemotePorts = Set(ports)
|
desiredRemotePorts = Set(ports)
|
||||||
portConflicts = portConflicts.intersection(desiredRemotePorts)
|
portConflicts = portConflicts.intersection(desiredRemotePorts)
|
||||||
|
reconnectWorkItem?.cancel()
|
||||||
|
reconnectWorkItem = nil
|
||||||
|
reconnectRetryCount = 0
|
||||||
publishState(.connected, detail: "Connected to \(configuration.displayTarget)")
|
publishState(.connected, detail: "Connected to \(configuration.displayTarget)")
|
||||||
reconcileForwardsLocked()
|
reconcileForwardsLocked()
|
||||||
}
|
}
|
||||||
|
|
@ -885,6 +914,11 @@ private final class WorkspaceRemoteSessionController {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static func retrySuffix(retry: Int, delay: TimeInterval) -> String {
|
||||||
|
let seconds = max(1, Int(delay.rounded()))
|
||||||
|
return " (retry \(retry) in \(seconds)s)"
|
||||||
|
}
|
||||||
|
|
||||||
private static func isLoopbackPortAvailable(port: Int) -> Bool {
|
private static func isLoopbackPortAvailable(port: Int) -> Bool {
|
||||||
guard port > 0 && port <= 65535 else { return false }
|
guard port > 0 && port <= 65535 else { return false }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -64,16 +64,29 @@ Minimum RPC surface:
|
||||||
Protocol requirement:
|
Protocol requirement:
|
||||||
1. multiplexed framed streams (control + PTY + proxy data)
|
1. multiplexed framed streams (control + PTY + proxy data)
|
||||||
|
|
||||||
## 6. Proxying
|
## 6. Web Proxying (Browser-First)
|
||||||
|
|
||||||
Proxy endpoints (loopback only by default):
|
Goal: remote workspaces browse from the remote host network, without per-service local port forwards.
|
||||||
1. HTTP CONNECT
|
|
||||||
2. SOCKS5
|
|
||||||
|
|
||||||
Behavior:
|
Model:
|
||||||
1. requests tunnel to daemon, daemon dials destinations
|
1. `cmux ssh` creates/uses one **proxy endpoint per SSH transport** (not per workspace, not per destination port).
|
||||||
2. websocket must work in both proxy modes
|
2. Browser panels opened in remote workspaces are auto-wired to that endpoint.
|
||||||
3. local bind conflicts return structured errors (+ optional next-port fallback)
|
3. Terminal/service port forwarding is **not** the browser path; keep it opt-in for explicit localhost workflows only.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
1. local `cmuxd` runs a transport-scoped proxy broker (`127.0.0.1:<ephemeral>`), supporting:
|
||||||
|
- HTTP CONNECT
|
||||||
|
- SOCKS5
|
||||||
|
2. broker opens multiplexed proxy streams to `cmuxd-remote`; remote daemon performs outbound dials.
|
||||||
|
3. browser wiring uses workspace-scoped `WKWebsiteDataStore.proxyConfigurations`:
|
||||||
|
- primary: SOCKS5 (`ProxyConfiguration(socksv5Proxy:)`)
|
||||||
|
- fallback: HTTP CONNECT (`ProxyConfiguration(httpCONNECTProxy:)`)
|
||||||
|
4. browser panels in non-remote workspaces use no forced proxy config.
|
||||||
|
|
||||||
|
Failure + reconnect:
|
||||||
|
1. if proxy endpoint bind fails, return structured `proxy_unavailable` with actionable detail.
|
||||||
|
2. if transport drops, browser requests fail fast, workspace status shows reconnect + retry count.
|
||||||
|
3. after reconnect, proxy broker and WKWebView proxy config are revalidated automatically.
|
||||||
|
|
||||||
## 7. Reconnect Semantics
|
## 7. Reconnect Semantics
|
||||||
|
|
||||||
|
|
@ -111,13 +124,13 @@ All cases require deterministic `MUST` assertions.
|
||||||
|
|
||||||
| ID | Scenario | MUST Assertions |
|
| ID | Scenario | MUST Assertions |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| W-001 | HTTP CONNECT | fixture response matches expected body |
|
| W-001 | browser auto wiring | remote workspace browser gets daemon-backed proxy automatically |
|
||||||
| W-002 | SOCKS5 | response parity with direct remote |
|
| W-002 | remote egress proof | remote workspace browser egress IP matches remote host, not local host |
|
||||||
| W-003 | websocket via CONNECT | echo integrity, no unexpected close |
|
| W-003 | websocket via CONNECT | echo integrity, no unexpected close |
|
||||||
| W-004 | websocket via SOCKS5 | echo integrity |
|
| W-004 | websocket via SOCKS5 | echo integrity |
|
||||||
| W-005 | port conflict | structured conflict error + fallback behavior |
|
| W-005 | proxy listener conflict | structured `proxy_unavailable` + fallback bind behavior |
|
||||||
| W-006 | concurrent PTY + proxy load | no PTY stall; proxy latency/error budget met |
|
| W-006 | concurrent PTY + proxy load | no PTY stall; proxy latency/error budget met |
|
||||||
| W-007 | browser auto wiring | browser workflow uses daemon-backed proxy automatically when remote session is active |
|
| W-007 | reconnect continuity | after transport reconnect, browser traffic resumes without manual proxy reconfiguration |
|
||||||
|
|
||||||
### 8.3 Reconnect
|
### 8.3 Reconnect
|
||||||
|
|
||||||
|
|
@ -147,5 +160,5 @@ All cases require deterministic `MUST` assertions.
|
||||||
|
|
||||||
## 10. Open Decisions
|
## 10. Open Decisions
|
||||||
|
|
||||||
1. proxy endpoint scope: per daemon transport vs per workspace
|
1. reconnect retry budget and backoff profile
|
||||||
2. reconnect retry budget and backoff profile
|
2. proxy auth policy (none vs optional credentials for local broker)
|
||||||
|
|
|
||||||
|
|
@ -171,6 +171,7 @@ def main() -> int:
|
||||||
last_daemon = last_remote.get("daemon") or {}
|
last_daemon = last_remote.get("daemon") or {}
|
||||||
detail = str(last_daemon.get("detail") or "")
|
detail = str(last_daemon.get("detail") or "")
|
||||||
_must("bootstrap failed" in detail.lower(), f"daemon error should mention bootstrap failure: {last_status}")
|
_must("bootstrap failed" in detail.lower(), f"daemon error should mention bootstrap failure: {last_status}")
|
||||||
|
_must(re.search(r"retry\s+\d+", detail.lower()) is not None, f"daemon error should include retry count: {last_status}")
|
||||||
|
|
||||||
# Lifecycle regression: disconnect with clear should reset remote/daemon metadata.
|
# Lifecycle regression: disconnect with clear should reset remote/daemon metadata.
|
||||||
disconnected = client._call(
|
disconnected = client._call(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue