fix: regression issue with local whispr models

2025-11-17 21:59:47 +05:30 · 2025-11-17 21:59:47 +05:30 · c3fd981773
commit c3fd981773
parent be7b2eda1e
4 changed files with 15 additions and 46 deletions
--- a/apps/desktop/src/pipeline/core/pipeline-types.ts
+++ b/apps/desktop/src/pipeline/core/pipeline-types.ts
@ -11,6 +11,7 @@ export { PipelineContext, SharedPipelineData } from "./context";
 export interface TranscribeParams {
  audioData: Float32Array;
  speechProbability?: number; // Speech probability from frontend VAD (0-1)
+  flush?: boolean; // Whether to flush any buffered audio
  context: {
    vocabulary?: Map<string, string>;
    accessibilityContext?: GetAccessibilityContextResult | null;
@ -35,7 +36,6 @@ export interface FormatParams {
 export interface TranscriptionProvider {
  readonly name: string;
  transcribe(params: TranscribeParams): Promise<string>;
-  flush?(): Promise<string>; // Optional flush method for providers that buffer
 }

 // Formatting provider interface
--- a/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts
+++ b/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts
@ -45,9 +45,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
    });
  }

-  async transcribe(
-    params: TranscribeParams & { flush?: boolean },
-  ): Promise<string> {
+  async transcribe(params: TranscribeParams): Promise<string> {
    try {
      const { audioData, speechProbability = 1, flush = false } = params;

@ -104,26 +102,6 @@ export class AmicalCloudProvider implements TranscriptionProvider {
    }
  }

-  async flush(): Promise<string> {
-    if (this.frameBuffer.length === 0) {
-      return "";
-    }
-
-    try {
-      const result = await this.processAudio();
-
-      // Clear buffer
-      this.frameBuffer = [];
-      this.frameBufferSpeechProbabilities = [];
-      this.currentSilenceFrameCount = 0;
-
-      return result;
-    } catch (error) {
-      logger.transcription.error("Cloud flush error:", error);
-      throw error;
-    }
-  }
-
  private async processAudio(): Promise<string> {
    if (this.frameBuffer.length === 0) {
      return "";
--- a/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts
+++ b/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts
@ -74,9 +74,7 @@ export class WhisperProvider implements TranscriptionProvider {
    }
  }

-  async transcribe(
-    params: TranscribeParams & { flush?: boolean },
-  ): Promise<string> {
+  async transcribe(params: TranscribeParams): Promise<string> {
    try {
      await this.initializeWhisper();

--- a/apps/desktop/src/services/transcription-service.ts
+++ b/apps/desktop/src/services/transcription-service.ts
@ -318,25 +318,18 @@ export class TranscriptionService {
    // Select the appropriate provider
    const provider = await this.selectProvider();

-    // For providers that support flush, call it separately when final
-    let chunkTranscription = "";
-
-    if (isFinal && provider.flush) {
-      // If final chunk, flush the provider buffer
-      chunkTranscription = await provider.flush();
-    } else {
-      // Normal transcription
-      chunkTranscription = await provider.transcribe({
-        audioData: audioChunk,
-        speechProbability: speechProbability, // Now from VAD service
-        context: {
-          vocabulary: session.context.sharedData.vocabulary,
-          accessibilityContext: session.context.sharedData.accessibilityContext,
-          previousChunk,
-          aggregatedTranscription: aggregatedTranscription || undefined,
-        },
-      });
-    }
+    // Transcribe with flush parameter for final chunks
+    const chunkTranscription = await provider.transcribe({
+      audioData: audioChunk,
+      speechProbability: speechProbability, // Now from VAD service
+      flush: isFinal, // Pass flush flag for final chunks
+      context: {
+        vocabulary: session.context.sharedData.vocabulary,
+        accessibilityContext: session.context.sharedData.accessibilityContext,
+        previousChunk,
+        aggregatedTranscription: aggregatedTranscription || undefined,
+      },
+    });

    // Accumulate the result only if Whisper returned something
    // (it returns empty string while buffering)