From c3fd9817735bb68a360010cb526ca7eea5f8dc24 Mon Sep 17 00:00:00 2001 From: haritabh-z01 Date: Mon, 17 Nov 2025 21:59:47 +0530 Subject: [PATCH] fix: regression issue with local whispr models --- .../src/pipeline/core/pipeline-types.ts | 2 +- .../transcription/amical-cloud-provider.ts | 24 +------------- .../transcription/whisper-provider.ts | 4 +-- .../src/services/transcription-service.ts | 31 +++++++------------ 4 files changed, 15 insertions(+), 46 deletions(-) diff --git a/apps/desktop/src/pipeline/core/pipeline-types.ts b/apps/desktop/src/pipeline/core/pipeline-types.ts index 6191e73..609dffe 100644 --- a/apps/desktop/src/pipeline/core/pipeline-types.ts +++ b/apps/desktop/src/pipeline/core/pipeline-types.ts @@ -11,6 +11,7 @@ export { PipelineContext, SharedPipelineData } from "./context"; export interface TranscribeParams { audioData: Float32Array; speechProbability?: number; // Speech probability from frontend VAD (0-1) + flush?: boolean; // Whether to flush any buffered audio context: { vocabulary?: Map; accessibilityContext?: GetAccessibilityContextResult | null; @@ -35,7 +36,6 @@ export interface FormatParams { export interface TranscriptionProvider { readonly name: string; transcribe(params: TranscribeParams): Promise; - flush?(): Promise; // Optional flush method for providers that buffer } // Formatting provider interface diff --git a/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts b/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts index 562b19a..c4be269 100644 --- a/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts +++ b/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts @@ -45,9 +45,7 @@ export class AmicalCloudProvider implements TranscriptionProvider { }); } - async transcribe( - params: TranscribeParams & { flush?: boolean }, - ): Promise { + async transcribe(params: TranscribeParams): Promise { try { const { audioData, speechProbability = 1, flush = false } = params; @@ -104,26 +102,6 @@ export class AmicalCloudProvider implements TranscriptionProvider { } } - async flush(): Promise { - if (this.frameBuffer.length === 0) { - return ""; - } - - try { - const result = await this.processAudio(); - - // Clear buffer - this.frameBuffer = []; - this.frameBufferSpeechProbabilities = []; - this.currentSilenceFrameCount = 0; - - return result; - } catch (error) { - logger.transcription.error("Cloud flush error:", error); - throw error; - } - } - private async processAudio(): Promise { if (this.frameBuffer.length === 0) { return ""; diff --git a/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts b/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts index 92d883d..ed9b0a5 100644 --- a/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts +++ b/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts @@ -74,9 +74,7 @@ export class WhisperProvider implements TranscriptionProvider { } } - async transcribe( - params: TranscribeParams & { flush?: boolean }, - ): Promise { + async transcribe(params: TranscribeParams): Promise { try { await this.initializeWhisper(); diff --git a/apps/desktop/src/services/transcription-service.ts b/apps/desktop/src/services/transcription-service.ts index c31784f..8369e64 100644 --- a/apps/desktop/src/services/transcription-service.ts +++ b/apps/desktop/src/services/transcription-service.ts @@ -318,25 +318,18 @@ export class TranscriptionService { // Select the appropriate provider const provider = await this.selectProvider(); - // For providers that support flush, call it separately when final - let chunkTranscription = ""; - - if (isFinal && provider.flush) { - // If final chunk, flush the provider buffer - chunkTranscription = await provider.flush(); - } else { - // Normal transcription - chunkTranscription = await provider.transcribe({ - audioData: audioChunk, - speechProbability: speechProbability, // Now from VAD service - context: { - vocabulary: session.context.sharedData.vocabulary, - accessibilityContext: session.context.sharedData.accessibilityContext, - previousChunk, - aggregatedTranscription: aggregatedTranscription || undefined, - }, - }); - } + // Transcribe with flush parameter for final chunks + const chunkTranscription = await provider.transcribe({ + audioData: audioChunk, + speechProbability: speechProbability, // Now from VAD service + flush: isFinal, // Pass flush flag for final chunks + context: { + vocabulary: session.context.sharedData.vocabulary, + accessibilityContext: session.context.sharedData.accessibilityContext, + previousChunk, + aggregatedTranscription: aggregatedTranscription || undefined, + }, + }); // Accumulate the result only if Whisper returned something // (it returns empty string while buffering)