fix: regression issue with local whispr models
This commit is contained in:
parent
be7b2eda1e
commit
c3fd981773
4 changed files with 15 additions and 46 deletions
|
|
@ -11,6 +11,7 @@ export { PipelineContext, SharedPipelineData } from "./context";
|
|||
export interface TranscribeParams {
|
||||
audioData: Float32Array;
|
||||
speechProbability?: number; // Speech probability from frontend VAD (0-1)
|
||||
flush?: boolean; // Whether to flush any buffered audio
|
||||
context: {
|
||||
vocabulary?: Map<string, string>;
|
||||
accessibilityContext?: GetAccessibilityContextResult | null;
|
||||
|
|
@ -35,7 +36,6 @@ export interface FormatParams {
|
|||
export interface TranscriptionProvider {
|
||||
readonly name: string;
|
||||
transcribe(params: TranscribeParams): Promise<string>;
|
||||
flush?(): Promise<string>; // Optional flush method for providers that buffer
|
||||
}
|
||||
|
||||
// Formatting provider interface
|
||||
|
|
|
|||
|
|
@ -45,9 +45,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
});
|
||||
}
|
||||
|
||||
async transcribe(
|
||||
params: TranscribeParams & { flush?: boolean },
|
||||
): Promise<string> {
|
||||
async transcribe(params: TranscribeParams): Promise<string> {
|
||||
try {
|
||||
const { audioData, speechProbability = 1, flush = false } = params;
|
||||
|
||||
|
|
@ -104,26 +102,6 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
}
|
||||
}
|
||||
|
||||
async flush(): Promise<string> {
|
||||
if (this.frameBuffer.length === 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await this.processAudio();
|
||||
|
||||
// Clear buffer
|
||||
this.frameBuffer = [];
|
||||
this.frameBufferSpeechProbabilities = [];
|
||||
this.currentSilenceFrameCount = 0;
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.transcription.error("Cloud flush error:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private async processAudio(): Promise<string> {
|
||||
if (this.frameBuffer.length === 0) {
|
||||
return "";
|
||||
|
|
|
|||
|
|
@ -74,9 +74,7 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
}
|
||||
}
|
||||
|
||||
async transcribe(
|
||||
params: TranscribeParams & { flush?: boolean },
|
||||
): Promise<string> {
|
||||
async transcribe(params: TranscribeParams): Promise<string> {
|
||||
try {
|
||||
await this.initializeWhisper();
|
||||
|
||||
|
|
|
|||
|
|
@ -318,25 +318,18 @@ export class TranscriptionService {
|
|||
// Select the appropriate provider
|
||||
const provider = await this.selectProvider();
|
||||
|
||||
// For providers that support flush, call it separately when final
|
||||
let chunkTranscription = "";
|
||||
|
||||
if (isFinal && provider.flush) {
|
||||
// If final chunk, flush the provider buffer
|
||||
chunkTranscription = await provider.flush();
|
||||
} else {
|
||||
// Normal transcription
|
||||
chunkTranscription = await provider.transcribe({
|
||||
audioData: audioChunk,
|
||||
speechProbability: speechProbability, // Now from VAD service
|
||||
context: {
|
||||
vocabulary: session.context.sharedData.vocabulary,
|
||||
accessibilityContext: session.context.sharedData.accessibilityContext,
|
||||
previousChunk,
|
||||
aggregatedTranscription: aggregatedTranscription || undefined,
|
||||
},
|
||||
});
|
||||
}
|
||||
// Transcribe with flush parameter for final chunks
|
||||
const chunkTranscription = await provider.transcribe({
|
||||
audioData: audioChunk,
|
||||
speechProbability: speechProbability, // Now from VAD service
|
||||
flush: isFinal, // Pass flush flag for final chunks
|
||||
context: {
|
||||
vocabulary: session.context.sharedData.vocabulary,
|
||||
accessibilityContext: session.context.sharedData.accessibilityContext,
|
||||
previousChunk,
|
||||
aggregatedTranscription: aggregatedTranscription || undefined,
|
||||
},
|
||||
});
|
||||
|
||||
// Accumulate the result only if Whisper returned something
|
||||
// (it returns empty string while buffering)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue