diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c51b29a3..7c822148 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -217,3 +217,45 @@ jobs: exit 1 fi fi + + - name: Run workspace churn typing-lag regression + run: | + set -euo pipefail + + APP="$(find "$HOME/Library/Developer/Xcode/DerivedData" -path "*/Build/Products/Debug/cmux DEV.app" -print -quit)" + if [ -z "${APP:-}" ] || [ ! -d "$APP" ]; then + echo "cmux DEV.app not found in DerivedData" >&2 + exit 1 + fi + + TAG="ci-lag" + SOCK="/tmp/cmux-debug-${TAG}.sock" + BUNDLE_ID="$( + /usr/libexec/PlistBuddy -c 'Print :CFBundleIdentifier' "$APP/Contents/Info.plist" 2>/dev/null \ + || echo 'com.cmuxterm.app.debug' + )" + + pkill -x "cmux DEV" || true + rm -f "$SOCK" "/tmp/cmux-${TAG}.sock" || true + defaults write "$BUNDLE_ID" socketControlMode -string full >/dev/null 2>&1 || true + + CMUX_TAG="$TAG" CMUX_SOCKET_PATH="$SOCK" CMUX_UI_TEST_MODE=1 "$APP/Contents/MacOS/cmux DEV" >/tmp/cmux-ci-lag.log 2>&1 & + APP_PID=$! + trap 'kill "$APP_PID" >/dev/null 2>&1 || true' EXIT + + for _ in {1..240}; do + [ -S "$SOCK" ] && break + sleep 0.25 + done + [ -S "$SOCK" ] || { echo "Socket not ready at $SOCK" >&2; exit 1; } + + CMUX_SOCKET_PATH="$SOCK" \ + CMUX_LAG_MAX_P95_RATIO=1.70 \ + CMUX_LAG_MAX_AVG_RATIO=1.70 \ + CMUX_LAG_MIN_BASELINE_P95_MS_FOR_RATIO=6.0 \ + CMUX_LAG_MIN_BASELINE_AVG_MS_FOR_RATIO=4.0 \ + CMUX_LAG_MAX_P95_DELTA_MS=20.0 \ + CMUX_LAG_MAX_AVG_DELTA_MS=12.0 \ + CMUX_LAG_MAX_CHURN_P95_MS=35 \ + CMUX_LAG_KEY_EVENTS=180 \ + python3 tests/test_workspace_churn_up_arrow_lag.py diff --git a/Sources/AppDelegate.swift b/Sources/AppDelegate.swift index 9054dd74..b2bd485e 100644 --- a/Sources/AppDelegate.swift +++ b/Sources/AppDelegate.swift @@ -3684,6 +3684,18 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent ) #endif guard let context else { return tabManager } + let alreadyActive = + tabManager === context.tabManager + && sidebarState === context.sidebarState + && sidebarSelectionState === context.sidebarSelectionState + if alreadyActive { +#if DEBUG + dlog( + "shortcut.sync.post source=\(source) beforeMgr=\(beforeManagerToken) afterMgr=\(debugManagerToken(tabManager)) chosen={\(debugContextToken(context))} nochange=1 \(debugShortcutRouteSnapshot())" + ) +#endif + return context.tabManager + } if let window = context.window ?? windowForMainWindowId(context.windowId) { setActiveMainWindow(window) } else { @@ -4483,6 +4495,13 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent #if DEBUG private let debugColorWorkspaceTitlePrefix = "Debug Color - " + private let debugPerfWorkspaceTitlePrefix = "Debug Perf - " + private var debugStressWorkspaceCreationInProgress = false + private var debugStressLagProbeEnabled = false + private let debugStressWorkspaceCount = 20 + private let debugStressPaneCount = 4 + private let debugStressTabsPerPane = 4 + private let debugStressYieldInterval = 4 @objc func openDebugScrollbackTab(_ sender: Any?) { guard let tabManager else { return } @@ -4533,6 +4552,280 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent } } + @objc func openDebugStressWorkspacesWithLoadedSurfaces(_ sender: Any?) { + guard !debugStressWorkspaceCreationInProgress else { return } + guard let tabManager else { return } + + debugStressLagProbeEnabled = true + debugStressWorkspaceCreationInProgress = true + Task { @MainActor [weak self] in + guard let self else { return } + defer { self.debugStressWorkspaceCreationInProgress = false } + + let totalStart = ProcessInfo.processInfo.systemUptime + let originalSelectedWorkspaceId = tabManager.selectedTabId + var created: [Workspace] = [] + created.reserveCapacity(self.debugStressWorkspaceCount) + var layoutFailures = 0 + var cumulativeWorkspaceMs: Double = 0 + var slowWorkspaceCount = 0 + var worstWorkspaceMs: Double = 0 + + dlog( + "stress.setup.start workspaces=\(self.debugStressWorkspaceCount) panes=\(self.debugStressPaneCount) " + + "tabsPerPane=\(self.debugStressTabsPerPane) lagProbe=1" + ) + + for index in 0..= 35 { + slowWorkspaceCount += 1 + } + + if workspaceMs >= 35 || ((index + 1) % 5 == 0) { + let pending = self.pendingDebugTerminalSurfaceCount(in: created) + dlog( + "stress.setup.workspace idx=\(index + 1)/\(self.debugStressWorkspaceCount) " + + "ms=\(String(format: "%.2f", workspaceMs)) failures=\(layoutFailures) pending=\(pending)" + ) + } + + if ((index + 1) % self.debugStressYieldInterval) == 0 { + await Task.yield() + } + } + + let creationElapsedMs = (ProcessInfo.processInfo.systemUptime - totalStart) * 1000.0 + let primeStats = await self.primeDebugStressWorkspacesForSurfaceLoad(created) + // Avoid synchronous "load all surfaces" waiting in this command path. + // Waiting for every background surface to be ready creates sustained + // main-actor churn and can starve typing responsiveness. + let loadStats = DebugStressSurfaceLoadStats( + pendingSurfaces: self.pendingDebugTerminalSurfaceCount(in: created), + attempts: 0, + elapsedMs: 0 + ) + let totalElapsedMs = (ProcessInfo.processInfo.systemUptime - totalStart) * 1000.0 + let avgWorkspaceMs = created.isEmpty ? 0 : (cumulativeWorkspaceMs / Double(created.count)) + let expectedSurfaceCount = self.debugStressWorkspaceCount + * self.debugStressPaneCount + * self.debugStressTabsPerPane + if let originalSelectedWorkspaceId, + tabManager.tabs.contains(where: { $0.id == originalSelectedWorkspaceId }) { + tabManager.selectedTabId = originalSelectedWorkspaceId + } + + dlog( + "stress.setup.done createMs=\(String(format: "%.2f", creationElapsedMs)) " + + "primeMs=\(String(format: "%.2f", primeStats.elapsedMs)) primedTabs=\(primeStats.activatedTabs) " + + "waitMs=\(String(format: "%.2f", loadStats.elapsedMs)) totalMs=\(String(format: "%.2f", totalElapsedMs)) " + + "workspaceAvgMs=\(String(format: "%.2f", avgWorkspaceMs)) workspaceWorstMs=\(String(format: "%.2f", worstWorkspaceMs)) " + + "workspaceSlowCount=\(slowWorkspaceCount) waitAttempts=\(loadStats.attempts) " + + "pendingSurfaces=\(loadStats.pendingSurfaces) expectedSurfaces=\(expectedSurfaceCount)" + ) + + NSLog( + "Debug stress workspaces: created=%d panesPerWorkspace=%d tabsPerPane=%d expectedSurfaces=%d layoutFailures=%d pendingSurfaces=%d createMs=%.2f primeMs=%.2f primedTabs=%d waitMs=%.2f totalMs=%.2f workspaceAvgMs=%.2f workspaceWorstMs=%.2f waitAttempts=%d", + self.debugStressWorkspaceCount, + self.debugStressPaneCount, + self.debugStressTabsPerPane, + expectedSurfaceCount, + layoutFailures, + loadStats.pendingSurfaces, + creationElapsedMs, + primeStats.elapsedMs, + primeStats.activatedTabs, + loadStats.elapsedMs, + totalElapsedMs, + avgWorkspaceMs, + worstWorkspaceMs, + loadStats.attempts + ) + } + } + + private struct DebugStressSurfacePrimeStats { + let activatedTabs: Int + let elapsedMs: Double + } + + private func primeDebugStressWorkspacesForSurfaceLoad( + _ workspaces: [Workspace] + ) async -> DebugStressSurfacePrimeStats { + guard !workspaces.isEmpty else { + return DebugStressSurfacePrimeStats(activatedTabs: 0, elapsedMs: 0) + } + + let primeStart = ProcessInfo.processInfo.systemUptime + var activatedTabs = 0 + + for (index, workspace) in workspaces.enumerated() { + activatedTabs += workspace.panels.values.reduce(into: 0) { count, panel in + if panel is TerminalPanel { + count += 1 + } + } + + if (index + 1) % debugStressYieldInterval == 0 || index == workspaces.count - 1 { + dlog( + "stress.setup.mount idx=\(index + 1)/\(workspaces.count) activatedTabs=\(activatedTabs)" + ) + await Task.yield() + } + } + + let elapsedMs = (ProcessInfo.processInfo.systemUptime - primeStart) * 1000.0 + return DebugStressSurfacePrimeStats(activatedTabs: activatedTabs, elapsedMs: elapsedMs) + } + + private func configureDebugStressWorkspaceLayout( + _ workspace: Workspace, + paneCount: Int, + tabsPerPane: Int + ) async -> Bool { + guard let topLeftPanelId = workspace.focusedTerminalPanel?.id ?? workspace.focusedPanelId else { + return false + } + guard let topRight = workspace.newTerminalSplit( + from: topLeftPanelId, + orientation: .horizontal, + focus: false + ) else { + return false + } + await Task.yield() + guard workspace.newTerminalSplit( + from: topLeftPanelId, + orientation: .vertical, + focus: false + ) != nil else { + return false + } + await Task.yield() + guard workspace.newTerminalSplit( + from: topRight.id, + orientation: .vertical, + focus: false + ) != nil else { + return false + } + await Task.yield() + + let paneIds = workspace.bonsplitController.allPaneIds + guard paneIds.count == paneCount else { return false } + + let additionalTabsPerPane = max(0, tabsPerPane - 1) + if additionalTabsPerPane > 0 { + for (paneIndex, paneId) in paneIds.enumerated() { + for tabOffset in 0.. Int { + var pending = 0 + for workspace in workspaces { + for panel in workspace.panels.values { + guard let terminalPanel = panel as? TerminalPanel else { continue } + if terminalPanel.surface.surface == nil { + pending += 1 + } + } + } + return pending + } + + private func debugStressLagSnapshot() -> ( + workspaceCount: Int, + terminalPanelCount: Int, + loadedSurfaceCount: Int, + selectedWorkspace: String + ) { + guard let tabManager else { + return (0, 0, 0, "nil") + } + var terminalPanelCount = 0 + var loadedSurfaceCount = 0 + for workspace in tabManager.tabs { + for panel in workspace.panels.values { + guard let terminalPanel = panel as? TerminalPanel else { continue } + terminalPanelCount += 1 + if terminalPanel.surface.surface != nil { + loadedSurfaceCount += 1 + } + } + } + let selectedWorkspace = tabManager.selectedTabId.map { String($0.uuidString.prefix(5)) } ?? "nil" + return ( + tabManager.tabs.count, + terminalPanelCount, + loadedSurfaceCount, + selectedWorkspace + ) + } + + private func logSlowShortcutMonitorLatencyIfNeeded( + event: NSEvent, + handledByShortcut: Bool, + elapsedMs: Double + ) { + guard debugStressLagProbeEnabled else { return } + guard event.type == .keyDown else { return } + + let normalizedFlags = event.modifierFlags + .intersection(.deviceIndependentFlagsMask) + .subtracting([.numericPad, .function, .capsLock]) + let isPlainTyping = normalizedFlags.isDisjoint(with: [.command, .control, .option]) + let thresholdMs: Double = event.isARepeat ? 1.5 : (isPlainTyping ? 2.5 : 6.0) + guard elapsedMs >= thresholdMs else { return } + + let snapshot = debugStressLagSnapshot() + dlog( + "stress.inputLag path=appMonitor ms=\(String(format: "%.2f", elapsedMs)) " + + "threshold=\(String(format: "%.2f", thresholdMs)) handled=\(handledByShortcut ? 1 : 0) " + + "plain=\(isPlainTyping ? 1 : 0) repeat=\(event.isARepeat ? 1 : 0) keyCode=\(event.keyCode) " + + "mods=\(event.modifierFlags.rawValue) workspaces=\(snapshot.workspaceCount) " + + "terminals=\(snapshot.terminalPanelCount) surfacesReady=\(snapshot.loadedSurfaceCount) " + + "selected=\(snapshot.selectedWorkspace)" + ) + } + private func sendTextWhenReady(_ text: String, to tab: Tab, attempt: Int = 0) { let maxAttempts = 60 if let terminalPanel = tab.focusedTerminalPanel, terminalPanel.surface.surface != nil { @@ -5132,24 +5425,35 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent let delayText = String(format: "%.2f", delayMs) dlog("key.latency path=appMonitor ms=\(delayText) keyCode=\(event.keyCode) mods=\(event.modifierFlags.rawValue) repeat=\(event.isARepeat ? 1 : 0)") } - let frType = NSApp.keyWindow?.firstResponder.map { String(describing: type(of: $0)) } ?? "nil" - dlog( - "monitor.keyDown: \(NSWindow.keyDescription(event)) fr=\(frType) addrBarId=\(self.browserAddressBarFocusedPanelId?.uuidString.prefix(8) ?? "nil") \(self.debugShortcutRouteSnapshot(event: event))" - ) + let shortcutMonitorTraceEnabled = + ProcessInfo.processInfo.environment["CMUX_SHORTCUT_MONITOR_TRACE"] == "1" + || UserDefaults.standard.bool(forKey: "cmuxShortcutMonitorTrace") + if shortcutMonitorTraceEnabled { + let frType = NSApp.keyWindow?.firstResponder.map { String(describing: type(of: $0)) } ?? "nil" + dlog( + "monitor.keyDown: \(NSWindow.keyDescription(event)) fr=\(frType) addrBarId=\(self.browserAddressBarFocusedPanelId?.uuidString.prefix(8) ?? "nil") \(self.debugShortcutRouteSnapshot(event: event))" + ) + } if let probeKind = self.developerToolsShortcutProbeKind(event: event) { self.logDeveloperToolsShortcutSnapshot(phase: "monitor.pre.\(probeKind)", event: event) } #endif - if self.handleCustomShortcut(event: event) { + let shortcutStart = ProcessInfo.processInfo.systemUptime + let handledByShortcut = self.handleCustomShortcut(event: event) +#if DEBUG + let shortcutElapsedMs = (ProcessInfo.processInfo.systemUptime - shortcutStart) * 1000.0 + self.logSlowShortcutMonitorLatencyIfNeeded( + event: event, + handledByShortcut: handledByShortcut, + elapsedMs: shortcutElapsedMs + ) +#endif + if handledByShortcut { #if DEBUG dlog(" → consumed by handleCustomShortcut") - DebugEventLog.shared.dump() #endif return nil // Consume the event } -#if DEBUG - DebugEventLog.shared.dump() -#endif return event // Pass through } self.handleBrowserOmnibarSelectionRepeatLifecycleEvent(event) @@ -5573,6 +5877,13 @@ final class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCent return true } + // Fast path for normal typing and terminal navigation keys (for example Up-arrow + // history): after command-palette/notification handling and browser omnibar + // arrow navigation above, plain key events have no app-level shortcut behavior. + if normalizedFlags.isEmpty { + return false + } + // Let omnibar-local Emacs navigation (Cmd/Ctrl+N/P) win while the browser // address bar is focused. Without this, app-level Cmd+N can steal focus. if shouldBypassAppShortcutForFocusedBrowserAddressBar(flags: flags, chars: chars) { diff --git a/tests/test_workspace_churn_up_arrow_lag.py b/tests/test_workspace_churn_up_arrow_lag.py new file mode 100755 index 00000000..3cadc43d --- /dev/null +++ b/tests/test_workspace_churn_up_arrow_lag.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +""" +Regression harness: compare typing latency before and after workspace churn. + +Scenario A (baseline): +1) Keep only the first workspace. +2) Seed shell history. +3) Measure per-key latency for repeated Up-arrow shortcuts. + +Scenario B (churn): +1) Keep only the first workspace. +2) Create N workspaces. +3) Visit every workspace (simulates clicking each tab), then return to the first. +4) Seed shell history. +5) Measure Up-arrow latency again. + +The test fails when churn latency regresses too far relative to baseline. +""" + +from __future__ import annotations + +import os +import select +import socket +import statistics +import subprocess +import sys +import threading +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Optional + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from cmux import cmux, cmuxError + +NEW_WORKSPACES = int(os.environ.get("CMUX_LAG_NEW_WORKSPACES", "20")) +SWITCH_PASSES = int(os.environ.get("CMUX_LAG_SWITCH_PASSES", "1")) +SWITCH_DELAY_S = float(os.environ.get("CMUX_LAG_SWITCH_DELAY_S", "0.06")) +HISTORY_SEED_LINES = int(os.environ.get("CMUX_LAG_HISTORY_LINES", "120")) +KEY_EVENTS = int(os.environ.get("CMUX_LAG_KEY_EVENTS", "180")) +KEY_DELAY_S = float(os.environ.get("CMUX_LAG_KEY_DELAY_S", "0.0")) +KEY_COMBO = os.environ.get("CMUX_LAG_KEY_COMBO", "up") + +MAX_P95_RATIO = float(os.environ.get("CMUX_LAG_MAX_P95_RATIO", "1.70")) +MAX_AVG_RATIO = float(os.environ.get("CMUX_LAG_MAX_AVG_RATIO", "1.70")) +MAX_CHURN_P95_MS = float(os.environ.get("CMUX_LAG_MAX_CHURN_P95_MS", "35.0")) +MAX_P95_DELTA_MS = float(os.environ.get("CMUX_LAG_MAX_P95_DELTA_MS", "20.0")) +MAX_AVG_DELTA_MS = float(os.environ.get("CMUX_LAG_MAX_AVG_DELTA_MS", "12.0")) +MIN_BASELINE_P95_MS_FOR_RATIO = float(os.environ.get("CMUX_LAG_MIN_BASELINE_P95_MS_FOR_RATIO", "6.0")) +MIN_BASELINE_AVG_MS_FOR_RATIO = float(os.environ.get("CMUX_LAG_MIN_BASELINE_AVG_MS_FOR_RATIO", "4.0")) +MAX_CPU_PERCENT = float(os.environ.get("CMUX_LAG_MAX_CPU_PERCENT", "180.0")) +ENFORCE_CPU = os.environ.get("CMUX_LAG_ENFORCE_CPU", "0") == "1" +ALLOW_MAIN_SOCKET = os.environ.get("CMUX_LAG_ALLOW_MAIN_SOCKET", "0") == "1" + + +@dataclass +class LatencyStats: + n: int + avg_ms: float + p50_ms: float + p95_ms: float + p99_ms: float + max_ms: float + + +class RawSocketClient: + def __init__(self, socket_path: str): + self.socket_path = socket_path + self.sock: Optional[socket.socket] = None + self.recv_buffer = "" + + def connect(self) -> None: + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.settimeout(3.0) + sock.connect(self.socket_path) + self.sock = sock + + def close(self) -> None: + if self.sock is not None: + try: + self.sock.close() + finally: + self.sock = None + + def __enter__(self) -> RawSocketClient: + self.connect() + return self + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + self.close() + + def command(self, command: str, timeout_s: float = 2.0) -> str: + if self.sock is None: + raise cmuxError("Raw socket client not connected") + + self.sock.sendall((command + "\n").encode("utf-8")) + deadline = time.time() + timeout_s + + while True: + if "\n" in self.recv_buffer: + line, self.recv_buffer = self.recv_buffer.split("\n", 1) + return line + + remaining = deadline - time.time() + if remaining <= 0: + raise cmuxError(f"Timed out waiting for response to: {command}") + + ready, _, _ = select.select([self.sock], [], [], remaining) + if not ready: + raise cmuxError(f"Timed out waiting for response to: {command}") + + chunk = self.sock.recv(8192) + if not chunk: + raise cmuxError("Socket closed while waiting for response") + self.recv_buffer += chunk.decode("utf-8", errors="replace") + + +def wait_for(predicate: Callable[[], bool], timeout_s: float, step_s: float = 0.05) -> None: + start = time.time() + while time.time() - start < timeout_s: + if predicate(): + return + time.sleep(step_s) + raise cmuxError("Timed out waiting for condition") + + +def percentile(values: list[float], p: float) -> float: + if not values: + return 0.0 + if len(values) == 1: + return values[0] + sorted_values = sorted(values) + idx = (len(sorted_values) - 1) * p + lower = int(idx) + upper = min(lower + 1, len(sorted_values) - 1) + fraction = idx - lower + return sorted_values[lower] * (1 - fraction) + sorted_values[upper] * fraction + + +def compute_stats(values_ms: list[float]) -> LatencyStats: + return LatencyStats( + n=len(values_ms), + avg_ms=statistics.mean(values_ms) if values_ms else 0.0, + p50_ms=percentile(values_ms, 0.50), + p95_ms=percentile(values_ms, 0.95), + p99_ms=percentile(values_ms, 0.99), + max_ms=max(values_ms) if values_ms else 0.0, + ) + + +def get_cmux_pid_for_socket(socket_path: Optional[str]) -> Optional[int]: + if socket_path and os.path.exists(socket_path): + result = subprocess.run(["lsof", "-t", socket_path], capture_output=True, text=True) + if result.returncode == 0: + for line in result.stdout.strip().split("\n"): + line = line.strip() + if not line: + continue + try: + pid = int(line) + except ValueError: + continue + if pid != os.getpid(): + return pid + + result = subprocess.run( + ["pgrep", "-f", r"cmux DEV.*\.app/Contents/MacOS/cmux DEV"], + capture_output=True, + text=True, + ) + if result.returncode != 0: + return None + lines = [line.strip() for line in result.stdout.splitlines() if line.strip()] + return int(lines[0]) if lines else None + + +def resolve_target_socket() -> str: + socket_path = os.environ.get("CMUX_SOCKET_PATH") + if not socket_path: + raise cmuxError( + "CMUX_SOCKET_PATH is required. Point it to a tagged dev socket (for example /tmp/cmux-debug-.sock)." + ) + base = os.path.basename(socket_path) + if not ALLOW_MAIN_SOCKET and base in {"cmux.sock", "cmux-debug.sock"}: + raise cmuxError( + f"Refusing to run against main socket '{socket_path}'. Set CMUX_SOCKET_PATH to a tagged dev instance." + ) + return socket_path + + +def get_cpu(pid: int) -> float: + result = subprocess.run(["ps", "-p", str(pid), "-o", "%cpu="], capture_output=True, text=True) + if result.returncode != 0: + return 0.0 + try: + return float(result.stdout.strip()) + except ValueError: + return 0.0 + + +class CPUMonitor: + def __init__(self, pid: int, interval_s: float = 0.2): + self.pid = pid + self.interval_s = interval_s + self._stop = threading.Event() + self._thread = threading.Thread(target=self._run, daemon=True) + self.samples: list[float] = [] + + def _run(self) -> None: + while not self._stop.is_set(): + self.samples.append(get_cpu(self.pid)) + time.sleep(self.interval_s) + + def start(self) -> None: + self._thread.start() + + def stop(self) -> None: + self._stop.set() + self._thread.join(timeout=2.0) + + +def keep_only_first_workspace(client: cmux) -> str: + workspaces = sorted(client.list_workspaces(), key=lambda row: row[0]) + if not workspaces: + first_id = client.new_workspace() + client.select_workspace(first_id) + return first_id + + first_id = workspaces[0][1] + client.select_workspace(first_id) + for _index, wid, _title, _selected in reversed(workspaces[1:]): + if wid == first_id: + continue + client.close_workspace(wid) + + def only_first() -> bool: + current = sorted(client.list_workspaces(), key=lambda row: row[0]) + return len(current) == 1 and current[0][1] == first_id + + wait_for(only_first, timeout_s=6.0) + return first_id + + +def create_workspaces(client: cmux, count: int) -> list[str]: + created: list[str] = [] + for _ in range(count): + wid = client.new_workspace() + created.append(wid) + time.sleep(0.04) + return created + + +def cycle_all_workspaces(client: cmux, passes: int, delay_s: float) -> list[str]: + ids = [wid for _idx, wid, _title, _selected in sorted(client.list_workspaces(), key=lambda row: row[0])] + for _ in range(passes): + for wid in ids: + client.select_workspace(wid) + time.sleep(delay_s) + return ids + + +def focused_terminal_panel(client: cmux) -> str: + surfaces = client.list_surfaces() + if not surfaces: + raise cmuxError("No surfaces available in selected workspace") + focused = next(((idx, sid) for idx, sid, is_focused in surfaces if is_focused), None) + if focused is None: + idx, sid, _ = surfaces[0] + client.focus_surface(idx) + return sid + return focused[1] + + +def seed_history(client: cmux, lines: int) -> None: + for i in range(lines): + client.send_line(f"echo cmux-lag-seed-{i}") + + +def run_shortcut_latency_burst( + socket_path: str, + combo: str, + count: int, + delay_s: float, +) -> list[float]: + latencies_ms: list[float] = [] + with RawSocketClient(socket_path) as raw: + # Warm up the command path and responder chain. + for _ in range(5): + response = raw.command(f"simulate_shortcut {combo}") + if not response.startswith("OK"): + raise cmuxError(response) + + for _ in range(count): + start = time.perf_counter() + response = raw.command(f"simulate_shortcut {combo}") + elapsed_ms = (time.perf_counter() - start) * 1000.0 + if not response.startswith("OK"): + raise cmuxError(response) + latencies_ms.append(elapsed_ms) + if delay_s > 0: + time.sleep(delay_s) + + return latencies_ms + + +def maybe_write_sample(pid: Optional[int], prefix: str) -> Optional[Path]: + if pid is None: + return None + out = Path(f"/tmp/{prefix}_{pid}.txt") + result = subprocess.run(["sample", str(pid), "2"], capture_output=True, text=True) + out.write_text(result.stdout + result.stderr) + return out + + +def print_stats(label: str, stats: LatencyStats) -> None: + print(f"\n{label}") + print(f" events: {stats.n}") + print(f" avg_ms: {stats.avg_ms:.2f}") + print(f" p50_ms: {stats.p50_ms:.2f}") + print(f" p95_ms: {stats.p95_ms:.2f}") + print(f" p99_ms: {stats.p99_ms:.2f}") + print(f" max_ms: {stats.max_ms:.2f}") + + +def run_baseline_scenario(client: cmux, socket_path: str) -> tuple[str, LatencyStats]: + first_workspace_id = keep_only_first_workspace(client) + client.select_workspace(first_workspace_id) + panel_id = focused_terminal_panel(client) + seed_history(client, HISTORY_SEED_LINES) + latencies = run_shortcut_latency_burst( + socket_path=socket_path, + combo=KEY_COMBO, + count=KEY_EVENTS, + delay_s=KEY_DELAY_S, + ) + return panel_id, compute_stats(latencies) + + +def run_churn_scenario(client: cmux, socket_path: str, first_workspace_id: str) -> tuple[str, LatencyStats]: + first_workspace_id = keep_only_first_workspace(client) + _ = create_workspaces(client, NEW_WORKSPACES) + ordered_ids = cycle_all_workspaces(client, SWITCH_PASSES, SWITCH_DELAY_S) + + if first_workspace_id in ordered_ids: + client.select_workspace(first_workspace_id) + elif ordered_ids: + client.select_workspace(ordered_ids[0]) + + panel_id = focused_terminal_panel(client) + seed_history(client, HISTORY_SEED_LINES) + latencies = run_shortcut_latency_burst( + socket_path=socket_path, + combo=KEY_COMBO, + count=KEY_EVENTS, + delay_s=KEY_DELAY_S, + ) + return panel_id, compute_stats(latencies) + + +def main() -> int: + print("=" * 64) + print("Workspace Churn + Up-Arrow Latency Regression") + print("=" * 64) + + client: Optional[cmux] = None + pid: Optional[int] = None + first_workspace_id: Optional[str] = None + + try: + target_socket = resolve_target_socket() + client = cmux(socket_path=target_socket) + client.connect() + print(f"Using socket: {client.socket_path}") + + pid = get_cmux_pid_for_socket(client.socket_path) + if pid is None: + print("SKIP: cmux process not found for socket") + return 0 + + cpu_monitor = CPUMonitor(pid) + cpu_monitor.start() + + first_workspace_id = keep_only_first_workspace(client) + baseline_panel_id, baseline = run_baseline_scenario(client, client.socket_path) + print(f"Baseline panel: {baseline_panel_id}") + + churn_panel_id, churn = run_churn_scenario(client, client.socket_path, first_workspace_id) + print(f"Churn panel: {churn_panel_id}") + + cpu_monitor.stop() + cpu_samples = cpu_monitor.samples + cpu_avg = statistics.mean(cpu_samples) if cpu_samples else 0.0 + cpu_max = max(cpu_samples) if cpu_samples else 0.0 + + print_stats("Baseline", baseline) + print_stats("After workspace churn", churn) + + p95_ratio = churn.p95_ms / max(baseline.p95_ms, 0.001) + avg_ratio = churn.avg_ms / max(baseline.avg_ms, 0.001) + p95_delta_ms = churn.p95_ms - baseline.p95_ms + avg_delta_ms = churn.avg_ms - baseline.avg_ms + enforce_p95_ratio = baseline.p95_ms >= MIN_BASELINE_P95_MS_FOR_RATIO + enforce_avg_ratio = baseline.avg_ms >= MIN_BASELINE_AVG_MS_FOR_RATIO + + print("\nComparison") + print( + f" p95_ratio: {p95_ratio:.2f}x (max {MAX_P95_RATIO:.2f}x, " + f"enabled when baseline p95 >= {MIN_BASELINE_P95_MS_FOR_RATIO:.2f}ms)" + ) + print( + f" avg_ratio: {avg_ratio:.2f}x (max {MAX_AVG_RATIO:.2f}x, " + f"enabled when baseline avg >= {MIN_BASELINE_AVG_MS_FOR_RATIO:.2f}ms)" + ) + print(f" churn_p95_ms: {churn.p95_ms:.2f} (max {MAX_CHURN_P95_MS:.2f})") + print(f" p95_delta_ms: {p95_delta_ms:.2f} (max {MAX_P95_DELTA_MS:.2f})") + print(f" avg_delta_ms: {avg_delta_ms:.2f} (max {MAX_AVG_DELTA_MS:.2f})") + print(f" cpu_avg_pct: {cpu_avg:.2f}") + print(f" cpu_max_pct: {cpu_max:.2f}") + + failures: list[str] = [] + if enforce_p95_ratio and p95_ratio > MAX_P95_RATIO: + failures.append(f"p95 ratio {p95_ratio:.2f}x > {MAX_P95_RATIO:.2f}x") + if enforce_avg_ratio and avg_ratio > MAX_AVG_RATIO: + failures.append(f"avg ratio {avg_ratio:.2f}x > {MAX_AVG_RATIO:.2f}x") + if p95_delta_ms > MAX_P95_DELTA_MS: + failures.append(f"p95 delta {p95_delta_ms:.2f}ms > {MAX_P95_DELTA_MS:.2f}ms") + if avg_delta_ms > MAX_AVG_DELTA_MS: + failures.append(f"avg delta {avg_delta_ms:.2f}ms > {MAX_AVG_DELTA_MS:.2f}ms") + if churn.p95_ms > MAX_CHURN_P95_MS: + failures.append(f"churn p95 {churn.p95_ms:.2f}ms > {MAX_CHURN_P95_MS:.2f}ms") + if ENFORCE_CPU and cpu_max > MAX_CPU_PERCENT: + failures.append(f"cpu max {cpu_max:.2f}% > {MAX_CPU_PERCENT:.2f}%") + + if failures: + print("\nFAIL") + for item in failures: + print(f" - {item}") + sample_path = maybe_write_sample(pid, "cmux_workspace_churn_up_arrow_lag") + if sample_path: + print(f" sample_path: {sample_path}") + return 1 + + print("\nPASS") + return 0 + + except cmuxError as e: + print(f"FAIL: {e}") + sample_path = maybe_write_sample(pid, "cmux_workspace_churn_up_arrow_error") + if sample_path: + print(f"sample_path: {sample_path}") + return 1 + + finally: + if client is not None: + try: + if first_workspace_id: + client.select_workspace(first_workspace_id) + keep_only_first_workspace(client) + except Exception: + pass + client.close() + + +if __name__ == "__main__": + raise SystemExit(main())