From b85cc38f4dcac98c627a0ec54df1b464289ec310 Mon Sep 17 00:00:00 2001 From: haritabh-z01 Date: Thu, 13 Nov 2025 02:37:45 +0530 Subject: [PATCH] chore: make recording start rpc async --- apps/desktop/src/hooks/useAudioCapture.ts | 46 ++++++++++++++++- apps/desktop/src/hooks/useRecording.ts | 6 +++ .../src/main/managers/recording-manager.ts | 45 +++++++++++++---- .../widget/components/FloatingButton.tsx | 23 ++++++++- .../platform/native-bridge-service.ts | 49 ++++++++++++++++--- .../Sources/SwiftHelper/main.swift | 3 +- 6 files changed, 152 insertions(+), 20 deletions(-) diff --git a/apps/desktop/src/hooks/useAudioCapture.ts b/apps/desktop/src/hooks/useAudioCapture.ts index ccfe12a..dbe0349 100644 --- a/apps/desktop/src/hooks/useAudioCapture.ts +++ b/apps/desktop/src/hooks/useAudioCapture.ts @@ -49,6 +49,7 @@ export const useAudioCapture = ({ const startCapture = useCallback(async () => { await mutexRef.current.runExclusive(async () => { try { + const overallStartTime = performance.now(); console.log("AudioCapture: Starting audio capture"); // Build audio constraints @@ -62,7 +63,13 @@ export const useAudioCapture = ({ // Add deviceId if user has a preference if (preferredMicrophoneName) { + const enumerateStartTime = performance.now(); const devices = await navigator.mediaDevices.enumerateDevices(); + const enumerateDuration = performance.now() - enumerateStartTime; + console.log( + `AudioCapture: enumerateDevices took ${enumerateDuration.toFixed(2)}ms`, + ); + const preferredDevice = devices.find( (device) => device.kind === "audioinput" && @@ -78,17 +85,33 @@ export const useAudioCapture = ({ } // Get microphone stream + const getUserMediaStartTime = performance.now(); streamRef.current = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints, }); + const getUserMediaDuration = performance.now() - getUserMediaStartTime; + console.log( + `AudioCapture: getUserMedia took ${getUserMediaDuration.toFixed(2)}ms`, + ); // Create audio context + const audioContextStartTime = performance.now(); audioContextRef.current = new AudioContext({ sampleRate: SAMPLE_RATE }); + const audioContextDuration = performance.now() - audioContextStartTime; + console.log( + `AudioCapture: AudioContext creation took ${audioContextDuration.toFixed(2)}ms`, + ); // Load audio worklet + const workletStartTime = performance.now(); await audioContextRef.current.audioWorklet.addModule(audioWorkletUrl); + const workletDuration = performance.now() - workletStartTime; + console.log( + `AudioCapture: audioWorklet.addModule took ${workletDuration.toFixed(2)}ms`, + ); // Create nodes + const nodeCreationStartTime = performance.now(); sourceRef.current = audioContextRef.current.createMediaStreamSource( streamRef.current, ); @@ -96,10 +119,27 @@ export const useAudioCapture = ({ audioContextRef.current, "audio-recorder-processor", ); + const nodeCreationDuration = performance.now() - nodeCreationStartTime; + console.log( + `AudioCapture: Node creation took ${nodeCreationDuration.toFixed(2)}ms`, + ); + + // Track first frame timing + let firstFrameReceived = false; + const firstFrameStartTime = performance.now(); // Handle audio frames from worklet workletNodeRef.current.port.onmessage = async (event) => { if (event.data.type === "audioFrame") { + if (!firstFrameReceived) { + firstFrameReceived = true; + const firstFrameDuration = + performance.now() - firstFrameStartTime; + console.log( + `AudioCapture: First audio frame received after ${firstFrameDuration.toFixed(2)}ms`, + ); + } + const frame = event.data.frame; console.debug("AudioCapture: Received frame", { frameLength: frame.length, @@ -122,7 +162,11 @@ export const useAudioCapture = ({ // Connect audio graph sourceRef.current.connect(workletNodeRef.current); - console.log("AudioCapture: Audio capture started"); + const overallDuration = performance.now() - overallStartTime; + console.log( + `AudioCapture: Total startup took ${overallDuration.toFixed(2)}ms`, + ); + console.log("AudioCapture: Audio capture started successfully"); } catch (error) { console.error("AudioCapture: Failed to start capture:", error); throw error; diff --git a/apps/desktop/src/hooks/useRecording.ts b/apps/desktop/src/hooks/useRecording.ts index 24e1dae..a6fef88 100644 --- a/apps/desktop/src/hooks/useRecording.ts +++ b/apps/desktop/src/hooks/useRecording.ts @@ -70,8 +70,14 @@ export const useRecording = (): UseRecordingOutput => { }); const startRecording = useCallback(async () => { + const mutationStartTime = performance.now(); + console.log("Hook: Calling startRecording mutation"); // Request main process to start recording await startRecordingMutation.mutateAsync(); + const mutationDuration = performance.now() - mutationStartTime; + console.log( + `Hook: startRecording mutation took ${mutationDuration.toFixed(2)}ms`, + ); console.log("Hook: Recording fully started"); }, [startRecordingMutation]); diff --git a/apps/desktop/src/main/managers/recording-manager.ts b/apps/desktop/src/main/managers/recording-manager.ts index 9ceb3de..6b10833 100644 --- a/apps/desktop/src/main/managers/recording-manager.ts +++ b/apps/desktop/src/main/managers/recording-manager.ts @@ -156,7 +156,11 @@ export class RecordingManager extends EventEmitter { public async startRecording(mode: "ptt" | "hands-free") { await this.recordingMutex.runExclusive(async () => { + const startTime = performance.now(); + logger.audio.info("RecordingManager: startRecording called", { mode }); + // Check if transcription service is available and has models + const modelCheckStartTime = performance.now(); const transcriptionService = this.serviceManager.getService( "transcriptionService", ); @@ -171,6 +175,11 @@ export class RecordingManager extends EventEmitter { } const hasModels = await transcriptionService.isModelAvailable(); + const modelCheckDuration = performance.now() - modelCheckStartTime; + logger.audio.info( + `RecordingManager: Model availability check took ${modelCheckDuration.toFixed(2)}ms`, + ); + if (!hasModels) { logger.audio.error("No transcription models available"); // Show error dialog @@ -205,11 +214,20 @@ export class RecordingManager extends EventEmitter { const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); this.currentSessionId = `session-${timestamp}`; - // Get accessibility context from global store + // Get accessibility context from global store (async, not awaited) appContextStore.refreshAccessibilityData(); + logger.audio.info( + "RecordingManager: Triggered accessibility context refresh (async)", + ); // Create audio file and WAV writer + const fileCreationStartTime = performance.now(); const audioFilePath = await this.createAudioFile(this.currentSessionId); + const fileCreationDuration = performance.now() - fileCreationStartTime; + logger.audio.info( + `RecordingManager: Audio file creation took ${fileCreationDuration.toFixed(2)}ms`, + ); + this.currentAudioRecording = { audioFilePath, wavWriter: new StreamingWavWriter(audioFilePath), @@ -220,19 +238,28 @@ export class RecordingManager extends EventEmitter { audioFilePath, }); - // Mute system audio - try { - const nativeBridge = this.serviceManager.getService("nativeBridge"); - if (nativeBridge) { - await nativeBridge.call("muteSystemAudio", {}); - } - } catch (error) { - logger.main.warn("Native bridge not available for audio muting"); + // Mute system audio (async, non-blocking) + const muteStartTime = performance.now(); + const nativeBridge = this.serviceManager.getService("nativeBridge"); + if (nativeBridge) { + nativeBridge + .call("muteSystemAudio", {}) + .then(() => { + const muteDuration = performance.now() - muteStartTime; + logger.audio.info( + `RecordingManager: System audio mute took ${muteDuration.toFixed(2)}ms`, + ); + }) + .catch((error) => { + logger.main.warn("Failed to mute system audio", { error }); + }); } this.setState("recording"); + const totalDuration = performance.now() - startTime; logger.audio.info("Recording started successfully", { sessionId: this.currentSessionId, + totalStartupDuration: `${totalDuration.toFixed(2)}ms`, }); return; diff --git a/apps/desktop/src/renderer/widget/pages/widget/components/FloatingButton.tsx b/apps/desktop/src/renderer/widget/pages/widget/components/FloatingButton.tsx index b3d57de..1f41428 100644 --- a/apps/desktop/src/renderer/widget/pages/widget/components/FloatingButton.tsx +++ b/apps/desktop/src/renderer/widget/pages/widget/components/FloatingButton.tsx @@ -51,6 +51,7 @@ const WaveformVisualization: React.FC<{ export const FloatingButton: React.FC = () => { const [isHovered, setIsHovered] = useState(false); const leaveTimeoutRef = useRef(null); // Ref for debounce timeout + const clickTimeRef = useRef(null); // Track when user clicked // tRPC mutation to control widget mouse events const setIgnoreMouseEvents = api.widget.setIgnoreMouseEvents.useMutation(); @@ -71,18 +72,38 @@ export const FloatingButton: React.FC = () => { const isStopping = recordingStatus.state === "stopping"; const isHandsFreeMode = recordingStatus.mode === "hands-free"; + // Track when recording state changes to "recording" after a click + useEffect(() => { + if (recordingStatus.state === "recording" && clickTimeRef.current) { + const timeSinceClick = performance.now() - clickTimeRef.current; + console.log( + `FAB: Recording state became 'recording' ${timeSinceClick.toFixed(2)}ms after user click`, + ); + clickTimeRef.current = null; // Reset + } + }, [recordingStatus.state]); + // Handler for widget click to start recording in hands-free mode const handleButtonClick = async (e: React.MouseEvent) => { e.preventDefault(); e.stopPropagation(); - console.log("FAB: Button clicked! Current status:", recordingStatus); + const clickTime = performance.now(); + clickTimeRef.current = clickTime; + console.log("FAB: Button clicked at", clickTime); + console.log("FAB: Current status:", recordingStatus); // Only start recording if not already recording if (recordingStatus.state === "idle") { + const startRecordingCallTime = performance.now(); await startRecording(); + const startRecordingReturnTime = performance.now(); + console.log( + `FAB: startRecording() call took ${(startRecordingReturnTime - startRecordingCallTime).toFixed(2)}ms to return`, + ); console.log("FAB: Started hands-free recording"); } else { console.log("FAB: Already recording, ignoring click"); + clickTimeRef.current = null; // Reset since we're not starting } }; diff --git a/apps/desktop/src/services/platform/native-bridge-service.ts b/apps/desktop/src/services/platform/native-bridge-service.ts index c6dddf6..7961728 100644 --- a/apps/desktop/src/services/platform/native-bridge-service.ts +++ b/apps/desktop/src/services/platform/native-bridge-service.ts @@ -232,11 +232,27 @@ export class NativeBridge extends EventEmitter { ); } - this.logger.debug("Sending RPC request", { - method, - id, - startedAt: new Date(startTime).toISOString(), - }); + // Log at INFO level for critical audio operations, DEBUG for others + const logLevel = + method === "muteSystemAudio" || method === "restoreSystemAudio" + ? "info" + : "debug"; + const logMessage = `Sending RPC request: ${method}`; + + if (logLevel === "info") { + this.logger.info(logMessage, { + method, + id, + startedAt: new Date(startTime).toISOString(), + }); + } else { + this.logger.debug(logMessage, { + method, + id, + startedAt: new Date(startTime).toISOString(), + }); + } + this.proc.stdin.write(JSON.stringify(requestPayload) + "\n", (err) => { if (err) { this.logger.error("Error writing to helper stdin", { @@ -247,7 +263,11 @@ export class NativeBridge extends EventEmitter { // Note: The promise might have already been set up, consider how to reject it. // For now, this error will be logged. The timeout will eventually reject. } else { - this.logger.debug("Successfully sent RPC request", { method, id }); + if (logLevel === "info") { + this.logger.info("Successfully sent RPC request", { method, id }); + } else { + this.logger.debug("Successfully sent RPC request", { method, id }); + } } }); @@ -265,15 +285,28 @@ export class NativeBridge extends EventEmitter { (error as any).data = resp.error.data; reject(error); } else { + // Log at INFO level for critical audio operations, DEBUG for others + const logLevel = + method === "muteSystemAudio" || method === "restoreSystemAudio" + ? "info" + : "debug"; + // Log the raw resp.result with timing information - this.logger.debug("Raw RPC response result received", { + const logData = { method, id, result: resp.result, startedAt: new Date(startTime).toISOString(), completedAt: new Date(completedAt).toISOString(), durationMs: duration, - }); + }; + + if (logLevel === "info") { + this.logger.info("RPC response received", logData); + } else { + this.logger.debug("Raw RPC response result received", logData); + } + // Here, we might need to validate resp.result against the specific method's result schema // For now, casting as any, but for type safety, validation is better. // Example: const resultValidation = RPCMethods[method].resultSchema.safeParse(resp.result); diff --git a/packages/native-helpers/swift-helper/Sources/SwiftHelper/main.swift b/packages/native-helpers/swift-helper/Sources/SwiftHelper/main.swift index 20e6165..5fa33d7 100644 --- a/packages/native-helpers/swift-helper/Sources/SwiftHelper/main.swift +++ b/packages/native-helpers/swift-helper/Sources/SwiftHelper/main.swift @@ -186,7 +186,8 @@ let swiftHelper = SwiftHelper() let ioBridge = IOBridge(jsonEncoder: JSONEncoder(), jsonDecoder: JSONDecoder()) // Start RPC processing in a background thread -DispatchQueue.global(qos: .userInitiated).async { +// Using .userInteractive QoS for high priority (reduces latency for audio muting) +DispatchQueue.global(qos: .userInteractive).async { FileHandle.standardError.write( "Starting IOBridge RPC processing in background thread...\n".data(using: .utf8)!) ioBridge.processRpcRequests()