From 95ce6bea36ffc52c9936299094dc43e6c0324f9d Mon Sep 17 00:00:00 2001 From: Naomi Chopra Date: Sat, 12 Jul 2025 10:06:12 +0530 Subject: [PATCH] chore: decouple audio file logging + transcription svc --- .../src/main/managers/recording-manager.ts | 76 ++++++++++++--- .../src/services/transcription-service.ts | 94 ++++++------------- .../desktop/src/utils/streaming-wav-writer.ts | 4 + 3 files changed, 96 insertions(+), 78 deletions(-) diff --git a/apps/desktop/src/main/managers/recording-manager.ts b/apps/desktop/src/main/managers/recording-manager.ts index 4b4396e..69a3900 100644 --- a/apps/desktop/src/main/managers/recording-manager.ts +++ b/apps/desktop/src/main/managers/recording-manager.ts @@ -1,10 +1,13 @@ -import { ipcMain } from "electron"; +import { ipcMain, app } from "electron"; import { EventEmitter } from "node:events"; import { logger, logPerformance } from "../logger"; import { ServiceManager } from "./service-manager"; import type { RecordingState } from "../../types/recording"; import { Mutex } from "async-mutex"; import type { ShortcutManager } from "../services/shortcut-manager"; +import { StreamingWavWriter } from "../../utils/streaming-wav-writer"; +import * as fs from "node:fs"; +import * as path from "node:path"; export type RecordingMode = "idle" | "ptt" | "hands-free"; @@ -17,6 +20,10 @@ export class RecordingManager extends EventEmitter { private recordingState: RecordingState = "idle"; private recordingMutex = new Mutex(); private recordingMode: RecordingMode = "idle"; + private currentAudioRecording: { + audioFilePath: string; + wavWriter: StreamingWavWriter; + } | null = null; constructor(private serviceManager: ServiceManager) { super(); @@ -87,6 +94,26 @@ export class RecordingManager extends EventEmitter { this.emit("mode-changed", this.getRecordingMode()); } + /** + * Create audio file for recording session + */ + private async createAudioFile(sessionId: string): Promise { + // Create audio directory in app temp path + const audioDir = path.join(app.getPath("temp"), "amical-audio"); + await fs.promises.mkdir(audioDir, { recursive: true }); + + // Create file path + const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); + const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`); + + logger.audio.info("Created audio file for session", { + sessionId, + filePath, + }); + + return filePath; + } + private setupIPCHandlers(): void { // Handle audio data chunks from renderer ipcMain.handle( @@ -148,21 +175,28 @@ export class RecordingManager extends EventEmitter { const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); this.currentSessionId = `session-${timestamp}`; + // Create audio file and WAV writer + const audioFilePath = await this.createAudioFile(this.currentSessionId); + this.currentAudioRecording = { + audioFilePath, + wavWriter: new StreamingWavWriter(audioFilePath), + }; + + logger.audio.info("Audio recording initialized", { + sessionId: this.currentSessionId, + audioFilePath, + }); + // Mute system audio try { const swiftBridge = this.serviceManager.getService("swiftIOBridge"); if (swiftBridge) { - await swiftBridge.call("muteSystemAudio", {}); + //await swiftBridge.call("muteSystemAudio", {}); } } catch (error) { logger.main.warn("Swift bridge not available for audio muting"); } - // Refresh accessibility context - fire and forget - // appContextStore.refreshAccessibilityData(); - - // TODO: Preload models if needed (Phase 2) - this.setState("recording"); logger.audio.info("Recording started successfully", { sessionId: this.currentSessionId, @@ -255,8 +289,10 @@ export class RecordingManager extends EventEmitter { } // Session should already exist from startRecording - if (!this.currentSessionId) { - logger.audio.error("No session ID found while handling audio chunk"); + if (!this.currentSessionId || !this.currentAudioRecording) { + logger.audio.error( + "No session ID or audio recording found while handling audio chunk", + ); return; } @@ -266,6 +302,8 @@ export class RecordingManager extends EventEmitter { return; } + await this.currentAudioRecording.wavWriter.appendAudio(chunk); + try { const transcriptionService = this.serviceManager.getService( "transcriptionService", @@ -275,12 +313,13 @@ export class RecordingManager extends EventEmitter { } const startTime = Date.now(); - // Process the chunk - pass isFinal flag + // Process the chunk - pass isFinal flag and audio file path const transcriptionResult = await transcriptionService.processStreamingChunk({ sessionId: this.currentSessionId, audioChunk: chunk, isFinal: isFinalChunk, + audioFilePath: this.currentAudioRecording.audioFilePath, }); logger.audio.debug("Processed audio chunk", { @@ -292,6 +331,14 @@ export class RecordingManager extends EventEmitter { // If this was the final chunk, handle completion if (isFinalChunk) { + // Finalize the WAV file + await this.currentAudioRecording.wavWriter.finalize(); + logger.audio.info("Finalized WAV file", { + sessionId: this.currentSessionId, + filePath: this.currentAudioRecording.audioFilePath, + dataSize: this.currentAudioRecording.wavWriter.getDataSize(), + }); + logPerformance("streaming transcription complete", startTime, { sessionId: this.currentSessionId, resultLength: transcriptionResult?.length || 0, @@ -308,8 +355,9 @@ export class RecordingManager extends EventEmitter { await this.pasteTranscription(transcriptionResult); } - // Clean up session + // Clean up session and audio recording this.currentSessionId = null; + this.currentAudioRecording = null; // Ensure state is idle after completion if (this.recordingState === "stopping") { @@ -320,8 +368,9 @@ export class RecordingManager extends EventEmitter { logger.audio.error("Error processing audio chunk:", error); if (isFinalChunk) { - // Clean up session on error + // Clean up session and audio recording on error this.currentSessionId = null; + this.currentAudioRecording = null; this.setState("error"); } } @@ -363,8 +412,9 @@ export class RecordingManager extends EventEmitter { await this.stopRecording(); } - // Clear any active session + // Clear any active session and audio recording this.currentSessionId = null; + this.currentAudioRecording = null; this.setState("idle"); } } diff --git a/apps/desktop/src/services/transcription-service.ts b/apps/desktop/src/services/transcription-service.ts index aaf92f4..b1d8284 100644 --- a/apps/desktop/src/services/transcription-service.ts +++ b/apps/desktop/src/services/transcription-service.ts @@ -13,26 +13,16 @@ import { createTranscription } from "../db/transcriptions"; import { logger } from "../main/logger"; import { v4 as uuid } from "uuid"; import { VADService } from "./vad-service"; -import { app } from "electron"; -import * as fs from "node:fs"; -import * as path from "node:path"; - -import { StreamingWavWriter } from "../utils/streaming-wav-writer"; import { Mutex } from "async-mutex"; /** * Service for audio transcription and optional formatting */ -interface ExtendedStreamingSession extends StreamingSession { - wavWriter?: StreamingWavWriter; - audioFilePath?: string; -} - export class TranscriptionService { private whisperProvider: WhisperProvider; private openRouterProvider: OpenRouterProvider | null = null; private formatterEnabled = false; - private streamingSessions: Map = new Map(); + private streamingSessions: Map = new Map(); private vadService: VADService | null; private settingsService: SettingsService; private vadMutex: Mutex; @@ -143,26 +133,6 @@ export class TranscriptionService { } } - /** - * Create audio file for recording session - */ - private async createAudioFile(sessionId: string): Promise { - // Create audio directory in app temp path - const audioDir = path.join(app.getPath("temp"), "amical-audio"); - await fs.promises.mkdir(audioDir, { recursive: true }); - - // Create file path - const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); - const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`); - - logger.transcription.info("Created audio file for session", { - sessionId, - filePath, - }); - - return filePath; - } - /** * Process a single audio chunk in streaming mode */ @@ -170,8 +140,9 @@ export class TranscriptionService { sessionId: string; audioChunk: Float32Array; isFinal?: boolean; + audioFilePath?: string; }): Promise { - const { sessionId, audioChunk, isFinal = false } = options; + const { sessionId, audioChunk, isFinal = false, audioFilePath } = options; // Run VAD on the audio chunk let speechProbability = 0; @@ -198,6 +169,7 @@ export class TranscriptionService { // Acquire transcription mutex await this.transcriptionMutex.acquire(); + // Auto-create session if it doesn't exist let session = this.streamingSessions.get(sessionId); if (!session) { @@ -214,31 +186,18 @@ export class TranscriptionService { streamingContext.sharedData.accessibilityContext = appContextStore.getAccessibilityContext(); - // Create audio file for this session - const audioFilePath = await this.createAudioFile(sessionId); - - // Create streaming WAV writer - const wavWriter = new StreamingWavWriter(audioFilePath); - session = { context: streamingContext, transcriptionResults: [], - audioFilePath, - wavWriter, }; this.streamingSessions.set(sessionId, session); + logger.transcription.info("Started streaming session", { sessionId, - audioFilePath, }); } - // Write audio chunk to WAV file immediately - if (audioChunk.length > 0 && session.wavWriter) { - await session.wavWriter.appendAudio(audioChunk); - } - // Process chunk if it has content if (audioChunk.length > 0) { // Direct frame to Whisper - it will handle aggregation and VAD internally @@ -285,7 +244,7 @@ export class TranscriptionService { // Release transcription mutex this.transcriptionMutex.release(); - let completeTranscriptionTillNow = session.transcriptionResults + const completeTranscriptionTillNow = session.transcriptionResults .join(" ") .trim(); @@ -302,13 +261,11 @@ export class TranscriptionService { chunkCount: session.transcriptionResults.length, }); - // Format if enabled (currently disabled with && false) - // Commenting out to fix TypeScript errors since this code path is never executed - /* - if (this.formatterEnabled && this.openRouterProvider && false) { + if (this.formatterEnabled && this.openRouterProvider) { + try { const style = session.context.sharedData.userPreferences?.formattingStyle; - completeTranscription = await this.openRouterProvider.format({ + const formattedText = await this.openRouterProvider.format({ text: completeTranscription, context: { style, @@ -324,24 +281,31 @@ export class TranscriptionService { aggregatedTranscription: completeTranscription, }, }); - } - */ - // Finalize the WAV file - if (session.wavWriter) { - await session.wavWriter.finalize(); - logger.transcription.info("Finalized WAV file", { - sessionId, - filePath: session.audioFilePath, - dataSize: session.wavWriter.getDataSize(), - }); + logger.transcription.info("Text formatted successfully", { + sessionId, + originalLength: completeTranscription.length, + formattedLength: formattedText.length, + }); + + completeTranscription = formattedText; + } catch (error) { + logger.transcription.error( + "Formatting failed, using unformatted text", + { + sessionId, + error, + }, + ); + // Continue with unformatted text + } } // Save directly to database logger.transcription.info("Saving transcription with audio file", { sessionId, - audioFilePath: session.audioFilePath, - hasAudioFile: !!session.audioFilePath, + audioFilePath, + hasAudioFile: !!audioFilePath, }); await createTranscription({ @@ -350,7 +314,7 @@ export class TranscriptionService { duration: session.context.sharedData.audioMetadata?.duration, speechModel: "whisper-local", formattingModel: this.formatterEnabled ? "openrouter" : undefined, - audioFile: session.audioFilePath, + audioFile: audioFilePath, meta: { sessionId, source: session.context.sharedData.audioMetadata?.source, diff --git a/apps/desktop/src/utils/streaming-wav-writer.ts b/apps/desktop/src/utils/streaming-wav-writer.ts index b4bd6c9..1b6b791 100644 --- a/apps/desktop/src/utils/streaming-wav-writer.ts +++ b/apps/desktop/src/utils/streaming-wav-writer.ts @@ -66,6 +66,10 @@ export class StreamingWavWriter { * @param audioData Float32Array of audio samples */ async appendAudio(audioData: Float32Array): Promise { + if (!audioData.length) { + return; + } + if (this.isFinalized) { throw new Error("Cannot append to finalized WAV file"); }