fix: regression issue with local whispr models

This commit is contained in:
haritabh-z01 2025-11-17 21:59:47 +05:30
parent be7b2eda1e
commit c3fd981773
4 changed files with 15 additions and 46 deletions

View file

@ -11,6 +11,7 @@ export { PipelineContext, SharedPipelineData } from "./context";
export interface TranscribeParams {
audioData: Float32Array;
speechProbability?: number; // Speech probability from frontend VAD (0-1)
flush?: boolean; // Whether to flush any buffered audio
context: {
vocabulary?: Map<string, string>;
accessibilityContext?: GetAccessibilityContextResult | null;
@ -35,7 +36,6 @@ export interface FormatParams {
export interface TranscriptionProvider {
readonly name: string;
transcribe(params: TranscribeParams): Promise<string>;
flush?(): Promise<string>; // Optional flush method for providers that buffer
}
// Formatting provider interface

View file

@ -45,9 +45,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
});
}
async transcribe(
params: TranscribeParams & { flush?: boolean },
): Promise<string> {
async transcribe(params: TranscribeParams): Promise<string> {
try {
const { audioData, speechProbability = 1, flush = false } = params;
@ -104,26 +102,6 @@ export class AmicalCloudProvider implements TranscriptionProvider {
}
}
async flush(): Promise<string> {
if (this.frameBuffer.length === 0) {
return "";
}
try {
const result = await this.processAudio();
// Clear buffer
this.frameBuffer = [];
this.frameBufferSpeechProbabilities = [];
this.currentSilenceFrameCount = 0;
return result;
} catch (error) {
logger.transcription.error("Cloud flush error:", error);
throw error;
}
}
private async processAudio(): Promise<string> {
if (this.frameBuffer.length === 0) {
return "";

View file

@ -74,9 +74,7 @@ export class WhisperProvider implements TranscriptionProvider {
}
}
async transcribe(
params: TranscribeParams & { flush?: boolean },
): Promise<string> {
async transcribe(params: TranscribeParams): Promise<string> {
try {
await this.initializeWhisper();

View file

@ -318,25 +318,18 @@ export class TranscriptionService {
// Select the appropriate provider
const provider = await this.selectProvider();
// For providers that support flush, call it separately when final
let chunkTranscription = "";
if (isFinal && provider.flush) {
// If final chunk, flush the provider buffer
chunkTranscription = await provider.flush();
} else {
// Normal transcription
chunkTranscription = await provider.transcribe({
audioData: audioChunk,
speechProbability: speechProbability, // Now from VAD service
context: {
vocabulary: session.context.sharedData.vocabulary,
accessibilityContext: session.context.sharedData.accessibilityContext,
previousChunk,
aggregatedTranscription: aggregatedTranscription || undefined,
},
});
}
// Transcribe with flush parameter for final chunks
const chunkTranscription = await provider.transcribe({
audioData: audioChunk,
speechProbability: speechProbability, // Now from VAD service
flush: isFinal, // Pass flush flag for final chunks
context: {
vocabulary: session.context.sharedData.vocabulary,
accessibilityContext: session.context.sharedData.accessibilityContext,
previousChunk,
aggregatedTranscription: aggregatedTranscription || undefined,
},
});
// Accumulate the result only if Whisper returned something
// (it returns empty string while buffering)