From b32064c53c0c150fc8ddc6a5a24c08ec2b8d96c7 Mon Sep 17 00:00:00 2001 From: Naomi Chopra Date: Tue, 8 Jul 2025 06:19:20 +0530 Subject: [PATCH] chore: continously write to audio file to avoid memory consumption + audio loss on crash --- apps/desktop/src/main/logger.ts | 11 +- .../src/main/managers/recording-manager.ts | 1 - .../src/services/transcription-service.ts | 56 ++----- .../desktop/src/utils/streaming-wav-writer.ts | 147 ++++++++++++++++++ 4 files changed, 171 insertions(+), 44 deletions(-) create mode 100644 apps/desktop/src/utils/streaming-wav-writer.ts diff --git a/apps/desktop/src/main/logger.ts b/apps/desktop/src/main/logger.ts index 23a1b7f..d25404d 100644 --- a/apps/desktop/src/main/logger.ts +++ b/apps/desktop/src/main/logger.ts @@ -16,8 +16,15 @@ const envLogLevel = process.env.LOG_LEVEL as | "info" | "debug" | undefined; -const defaultFileLevel: "debug" | "info" = isDev ? "debug" : "info"; -const defaultConsoleLevel: "debug" | "warn" = isDev ? "debug" : "warn"; + +// If LOG_DEBUG_SCOPES is set, we need to allow debug messages through the transport +// so they can be filtered by scope later +const hasDebugScopes = !!process.env.LOG_DEBUG_SCOPES?.trim(); + +const defaultFileLevel: "debug" | "info" = + isDev || hasDebugScopes ? "debug" : "info"; +const defaultConsoleLevel: "debug" | "warn" = + isDev || hasDebugScopes ? "debug" : "warn"; log.transports.file.level = envLogLevel || defaultFileLevel; log.transports.console.level = envLogLevel || defaultConsoleLevel; diff --git a/apps/desktop/src/main/managers/recording-manager.ts b/apps/desktop/src/main/managers/recording-manager.ts index 92727f4..4b4396e 100644 --- a/apps/desktop/src/main/managers/recording-manager.ts +++ b/apps/desktop/src/main/managers/recording-manager.ts @@ -173,7 +173,6 @@ export class RecordingManager extends EventEmitter { } public async stopRecording() { - console.error("stopRecording called", this.recordingState); await this.recordingMutex.runExclusive(async () => { // Check if recording if (this.recordingState !== "recording") { diff --git a/apps/desktop/src/services/transcription-service.ts b/apps/desktop/src/services/transcription-service.ts index d1ee6b9..aaf92f4 100644 --- a/apps/desktop/src/services/transcription-service.ts +++ b/apps/desktop/src/services/transcription-service.ts @@ -16,16 +16,16 @@ import { VADService } from "./vad-service"; import { app } from "electron"; import * as fs from "node:fs"; import * as path from "node:path"; -import { convertRawToWav } from "../utils/audio-converter"; + +import { StreamingWavWriter } from "../utils/streaming-wav-writer"; import { Mutex } from "async-mutex"; /** * Service for audio transcription and optional formatting */ interface ExtendedStreamingSession extends StreamingSession { - audioFileStream?: fs.WriteStream; + wavWriter?: StreamingWavWriter; audioFilePath?: string; - audioBuffers?: Float32Array[]; } export class TranscriptionService { @@ -217,11 +217,14 @@ export class TranscriptionService { // Create audio file for this session const audioFilePath = await this.createAudioFile(sessionId); + // Create streaming WAV writer + const wavWriter = new StreamingWavWriter(audioFilePath); + session = { context: streamingContext, transcriptionResults: [], audioFilePath, - audioBuffers: [], + wavWriter, }; this.streamingSessions.set(sessionId, session); @@ -231,9 +234,9 @@ export class TranscriptionService { }); } - // Buffer audio chunks - we'll write WAV at the end - if (audioChunk.length > 0) { - session.audioBuffers!.push(audioChunk); + // Write audio chunk to WAV file immediately + if (audioChunk.length > 0 && session.wavWriter) { + await session.wavWriter.appendAudio(audioChunk); } // Process chunk if it has content @@ -324,42 +327,13 @@ export class TranscriptionService { } */ - // Convert buffered audio to WAV and save - if ( - session.audioBuffers && - session.audioBuffers.length > 0 && - session.audioFilePath - ) { - // Concatenate all Float32Arrays - const totalLength = session.audioBuffers.reduce( - (acc, arr) => acc + arr.length, - 0, - ); - const combinedFloat32 = new Float32Array(totalLength); - let offset = 0; - for (const arr of session.audioBuffers) { - combinedFloat32.set(arr, offset); - offset += arr.length; - } - - // Convert Float32Array to Buffer for WAV conversion - const buffer = Buffer.alloc(combinedFloat32.length * 2); - for (let i = 0; i < combinedFloat32.length; i++) { - const sample = Math.max(-1, Math.min(1, combinedFloat32[i])); - buffer.writeInt16LE(Math.floor(sample * 32767), i * 2); - } - const combinedBuffer = buffer; - - // Convert to WAV - const wavBuffer = convertRawToWav(combinedBuffer); - - // Write WAV file - await fs.promises.writeFile(session.audioFilePath, wavBuffer); - - logger.transcription.info("Saved audio as WAV file", { + // Finalize the WAV file + if (session.wavWriter) { + await session.wavWriter.finalize(); + logger.transcription.info("Finalized WAV file", { sessionId, filePath: session.audioFilePath, - size: wavBuffer.length, + dataSize: session.wavWriter.getDataSize(), }); } diff --git a/apps/desktop/src/utils/streaming-wav-writer.ts b/apps/desktop/src/utils/streaming-wav-writer.ts new file mode 100644 index 0000000..b4bd6c9 --- /dev/null +++ b/apps/desktop/src/utils/streaming-wav-writer.ts @@ -0,0 +1,147 @@ +import * as fs from "node:fs"; +import { logger } from "../main/logger"; + +/** + * StreamingWavWriter allows incremental writing of audio data to a WAV file. + * It writes a placeholder header initially and updates it when finalized. + */ +export class StreamingWavWriter { + private fileStream: fs.WriteStream; + private dataSize = 0; + private sampleRate: number; + private channels: number; + private bitDepth: number; + private isFinalized = false; + + constructor( + filePath: string, + sampleRate = 16000, + channels = 1, + bitDepth = 16, + ) { + this.sampleRate = sampleRate; + this.channels = channels; + this.bitDepth = bitDepth; + + // Create write stream + this.fileStream = fs.createWriteStream(filePath); + + // Write initial WAV header with placeholder sizes + this.writeHeader(); + } + + /** + * Write WAV header with current or placeholder sizes + */ + private writeHeader(): void { + const header = Buffer.alloc(44); + + // RIFF chunk + header.write("RIFF", 0); + header.writeUInt32LE(this.dataSize + 36, 4); // File size - 8 + header.write("WAVE", 8); + + // fmt sub-chunk + header.write("fmt ", 12); + header.writeUInt32LE(16, 16); // Sub-chunk size + header.writeUInt16LE(1, 20); // Audio format (1 = PCM) + header.writeUInt16LE(this.channels, 22); + header.writeUInt32LE(this.sampleRate, 24); + header.writeUInt32LE( + (this.sampleRate * this.channels * this.bitDepth) / 8, + 28, + ); // Byte rate + header.writeUInt16LE((this.channels * this.bitDepth) / 8, 32); // Block align + header.writeUInt16LE(this.bitDepth, 34); + + // data sub-chunk + header.write("data", 36); + header.writeUInt32LE(this.dataSize, 40); + + this.fileStream.write(header); + } + + /** + * Append audio data to the WAV file + * @param audioData Float32Array of audio samples + */ + async appendAudio(audioData: Float32Array): Promise { + if (this.isFinalized) { + throw new Error("Cannot append to finalized WAV file"); + } + + // Convert Float32Array to Int16 buffer + const buffer = Buffer.alloc(audioData.length * 2); + for (let i = 0; i < audioData.length; i++) { + const sample = Math.max(-1, Math.min(1, audioData[i])); + buffer.writeInt16LE(Math.floor(sample * 32767), i * 2); + } + + // Write to file + await new Promise((resolve, reject) => { + this.fileStream.write(buffer, (err) => { + if (err) reject(err); + else resolve(); + }); + }); + + this.dataSize += buffer.length; + + logger.transcription.debug("Appended audio to WAV file", { + samplesWritten: audioData.length, + bytesWritten: buffer.length, + totalDataSize: this.dataSize, + }); + } + + /** + * Finalize the WAV file by updating the header with correct sizes + */ + async finalize(): Promise { + if (this.isFinalized) return; + + this.isFinalized = true; + + // Close the stream + await new Promise((resolve) => { + this.fileStream.end(() => resolve()); + }); + + // Reopen file to update header with correct sizes + const fd = await fs.promises.open(this.fileStream.path as string, "r+"); + + try { + // Update file size in RIFF header + const fileSizeBuffer = Buffer.alloc(4); + fileSizeBuffer.writeUInt32LE(this.dataSize + 36, 0); + await fd.write(fileSizeBuffer, 0, 4, 4); + + // Update data size in data sub-chunk + const dataSizeBuffer = Buffer.alloc(4); + dataSizeBuffer.writeUInt32LE(this.dataSize, 0); + await fd.write(dataSizeBuffer, 0, 4, 40); + + logger.transcription.info("Finalized WAV file", { + path: this.fileStream.path, + dataSize: this.dataSize, + duration: this.dataSize / 2 / this.sampleRate, // seconds + }); + } finally { + await fd.close(); + } + } + + /** + * Get the current size of audio data written + */ + getDataSize(): number { + return this.dataSize; + } + + /** + * Get the file path + */ + getFilePath(): string { + return this.fileStream.path as string; + } +}