From 566cad7a5a0c02bc256cb3d9e5bb04f166308fc5 Mon Sep 17 00:00:00 2001 From: haritabh-z01 Date: Sat, 9 Aug 2025 01:28:52 +0530 Subject: [PATCH] Use pure node process to handle whisper to bypass electron cage --- apps/desktop/forge.config.ts | 9 +- .../{resources => }/models/silero_vad_v5.onnx | Bin .../{resources => }/node-binaries/.gitignore | 0 .../{resources => }/node-binaries/README.md | 0 apps/desktop/package.json | 3 +- .../desktop/scripts/download-node-binaries.js | 156 ++++++++++++++++++ .../desktop/scripts/download-node-binaries.ts | 2 +- apps/desktop/src/hooks/useRecording.ts | 4 +- .../src/main/managers/recording-manager.ts | 2 +- .../transcription/simple-fork-wrapper.ts | 137 +++++++++++++++ .../transcription/whisper-provider.ts | 84 ++++------ .../transcription/whisper-worker-fork.ts | 117 +++++++++++++ .../src/services/transcription-service.ts | 71 ++++---- apps/desktop/src/services/vad-service.ts | 2 +- apps/desktop/vite.main.config.mts | 5 +- pnpm-lock.yaml | 8 + 16 files changed, 498 insertions(+), 102 deletions(-) rename apps/desktop/{resources => }/models/silero_vad_v5.onnx (100%) rename apps/desktop/{resources => }/node-binaries/.gitignore (100%) rename apps/desktop/{resources => }/node-binaries/README.md (100%) create mode 100644 apps/desktop/scripts/download-node-binaries.js create mode 100644 apps/desktop/src/pipeline/providers/transcription/simple-fork-wrapper.ts create mode 100644 apps/desktop/src/pipeline/providers/transcription/whisper-worker-fork.ts diff --git a/apps/desktop/forge.config.ts b/apps/desktop/forge.config.ts index 6d72ab6..966884c 100644 --- a/apps/desktop/forge.config.ts +++ b/apps/desktop/forge.config.ts @@ -38,6 +38,7 @@ export const EXTERNAL_DEPENDENCIES = [ "@libsql/win32-x64-msvc", "libsql", "onnxruntime-node", + "workerpool", // Add any other native modules you need here ]; @@ -53,14 +54,12 @@ const config: ForgeConfig = { console.log(`Copying Node.js binary for ${platform}-${arch}...`); const nodeBinarySource = join( projectRoot, - "resources", "node-binaries", `${platform}-${arch}`, platform === "win32" ? "node.exe" : "node", ); const nodeBinaryDest = join( projectRoot, - "resources", "node-binaries", `${platform}-${arch}`, ); @@ -273,7 +272,8 @@ const config: ForgeConfig = { }, packagerConfig: { asar: { - unpack: "{*.node,*.dylib,*.so,*.dll,*.metal,**/whisper.cpp/**}", + unpack: + "{*.node,*.dylib,*.so,*.dll,*.metal,**/whisper.cpp/**,**/.vite/build/whisper-worker-fork.js,**/node_modules/smart-whisper/**,**/node_modules/jest-worker/**}", }, name: "Amical", executableName: "Amical", @@ -282,7 +282,8 @@ const config: ForgeConfig = { extraResource: [ "../../packages/native-helpers/swift-helper/bin", "./src/db/migrations", - "./resources", + "./node-binaries", + "./models", "./src/assets", ], extendInfo: { diff --git a/apps/desktop/resources/models/silero_vad_v5.onnx b/apps/desktop/models/silero_vad_v5.onnx similarity index 100% rename from apps/desktop/resources/models/silero_vad_v5.onnx rename to apps/desktop/models/silero_vad_v5.onnx diff --git a/apps/desktop/resources/node-binaries/.gitignore b/apps/desktop/node-binaries/.gitignore similarity index 100% rename from apps/desktop/resources/node-binaries/.gitignore rename to apps/desktop/node-binaries/.gitignore diff --git a/apps/desktop/resources/node-binaries/README.md b/apps/desktop/node-binaries/README.md similarity index 100% rename from apps/desktop/resources/node-binaries/README.md rename to apps/desktop/node-binaries/README.md diff --git a/apps/desktop/package.json b/apps/desktop/package.json index e1ad560..70b720d 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -1,6 +1,6 @@ { "name": "@amical/desktop", - "version": "0.0.5-test-publish", + "version": "0.0.6", "description": "Amical Desktop app", "main": ".vite/build/main.js", "productName": "Amical", @@ -150,6 +150,7 @@ "update-electron-app": "^3.1.1", "uuid": "^11.1.0", "vaul": "^1.1.2", + "workerpool": "^9.3.3", "zod": "^3.25.24" } } diff --git a/apps/desktop/scripts/download-node-binaries.js b/apps/desktop/scripts/download-node-binaries.js new file mode 100644 index 0000000..56c22e9 --- /dev/null +++ b/apps/desktop/scripts/download-node-binaries.js @@ -0,0 +1,156 @@ +#!/usr/bin/env node + +const https = require('https'); +const fs = require('fs'); +const path = require('path'); +const { execSync } = require('child_process'); +const { createWriteStream, mkdirSync, chmodSync } = fs; + +// Node.js version to download +const NODE_VERSION = '24.4.0'; + +// Platform configurations +const PLATFORMS = [ + { + platform: 'darwin', + arch: 'arm64', + url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-darwin-arm64.tar.gz`, + binary: 'bin/node' + }, + { + platform: 'darwin', + arch: 'x64', + url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-darwin-x64.tar.gz`, + binary: 'bin/node' + }, + { + platform: 'win32', + arch: 'x64', + url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-win-x64.zip`, + binary: 'node.exe' + }, + { + platform: 'linux', + arch: 'x64', + url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64.tar.gz`, + binary: 'bin/node' + } +]; + +// Base directory for binaries +const RESOURCES_DIR = path.join(__dirname, '..', 'node-binaries'); + +async function downloadFile(url, dest) { + return new Promise((resolve, reject) => { + const file = createWriteStream(dest); + + https.get(url, (response) => { + if (response.statusCode === 302 || response.statusCode === 301) { + // Handle redirect + https.get(response.headers.location, (redirectResponse) => { + redirectResponse.pipe(file); + file.on('finish', () => { + file.close(resolve); + }); + }).on('error', reject); + } else { + response.pipe(file); + file.on('finish', () => { + file.close(resolve); + }); + } + }).on('error', reject); + }); +} + +async function extractArchive(archivePath, platform) { + const tempDir = path.join(path.dirname(archivePath), 'temp'); + mkdirSync(tempDir, { recursive: true }); + + if (platform === 'win32') { + // Use unzip command (available on macOS) to extract zip files + execSync(`unzip -q "${archivePath}" -d "${tempDir}"`, { stdio: 'inherit' }); + } else { + // Use tar for Unix-like systems + execSync(`tar -xzf "${archivePath}" -C "${tempDir}"`, { stdio: 'inherit' }); + } + + return tempDir; +} + +async function downloadNodeBinary(config) { + const { platform, arch, url, binary } = config; + const platformDir = path.join(RESOURCES_DIR, `${platform}-${arch}`); + const binaryPath = path.join(platformDir, platform === 'win32' ? 'node.exe' : 'node'); + + // Skip if already exists + if (fs.existsSync(binaryPath)) { + console.log(`✓ ${platform}-${arch} binary already exists`); + return; + } + + console.log(`Downloading Node.js for ${platform}-${arch}...`); + + // Create directory + mkdirSync(platformDir, { recursive: true }); + + // Download archive + const archiveExt = platform === 'win32' ? '.zip' : '.tar.gz'; + const archivePath = path.join(platformDir, `node-v${NODE_VERSION}${archiveExt}`); + + try { + await downloadFile(url, archivePath); + console.log(`Downloaded archive for ${platform}-${arch}`); + + // Extract archive + const tempDir = await extractArchive(archivePath, platform); + + // Find the node binary in extracted files + // Windows uses different directory naming convention (win instead of win32) + const extractedDirName = platform === 'win32' + ? `node-v${NODE_VERSION}-win-${arch}` + : `node-v${NODE_VERSION}-${platform}-${arch}`; + const extractedBinaryPath = path.join(tempDir, extractedDirName, binary); + + // Copy binary to final location + fs.copyFileSync(extractedBinaryPath, binaryPath); + + // Make executable on Unix-like systems + if (platform !== 'win32') { + chmodSync(binaryPath, '755'); + } + + // Clean up + fs.rmSync(tempDir, { recursive: true, force: true }); + fs.unlinkSync(archivePath); + + console.log(`✓ Successfully installed ${platform}-${arch} binary`); + } catch (error) { + console.error(`✗ Failed to download ${platform}-${arch}:`, error.message); + // Clean up on failure + if (fs.existsSync(archivePath)) { + fs.unlinkSync(archivePath); + } + } +} + +async function main() { + console.log(`Downloading Node.js v${NODE_VERSION} binaries for all platforms...\n`); + + // Create base directory + mkdirSync(RESOURCES_DIR, { recursive: true }); + + // Download binaries for all platforms + for (const platform of PLATFORMS) { + await downloadNodeBinary(platform); + } + + console.log('\nDone! Node.js binaries downloaded to:', RESOURCES_DIR); +} + +// Run if called directly +if (require.main === module) { + main().catch(console.error); +} + +module.exports = { downloadNodeBinary, PLATFORMS, NODE_VERSION }; \ No newline at end of file diff --git a/apps/desktop/scripts/download-node-binaries.ts b/apps/desktop/scripts/download-node-binaries.ts index d7debd1..9adb632 100644 --- a/apps/desktop/scripts/download-node-binaries.ts +++ b/apps/desktop/scripts/download-node-binaries.ts @@ -49,7 +49,7 @@ const PLATFORMS: PlatformConfig[] = [ }, ]; -const RESOURCES_DIR = path.join(__dirname, "..", "resources", "node-binaries"); +const RESOURCES_DIR = path.join(__dirname, "..", "node-binaries"); // Parse command line arguments const args = process.argv.slice(2); diff --git a/apps/desktop/src/hooks/useRecording.ts b/apps/desktop/src/hooks/useRecording.ts index 0d29d7a..24e1dae 100644 --- a/apps/desktop/src/hooks/useRecording.ts +++ b/apps/desktop/src/hooks/useRecording.ts @@ -62,9 +62,7 @@ export const useRecording = (): UseRecordingOutput => { ); // Manage audio capture when recording is active - const isActive = - recordingStatus.state === "recording" || - recordingStatus.state === "starting"; + const isActive = recordingStatus.state === "recording"; const { voiceDetected } = useAudioCapture({ onAudioChunk: handleAudioChunk, diff --git a/apps/desktop/src/main/managers/recording-manager.ts b/apps/desktop/src/main/managers/recording-manager.ts index bc5a603..0be4ef2 100644 --- a/apps/desktop/src/main/managers/recording-manager.ts +++ b/apps/desktop/src/main/managers/recording-manager.ts @@ -195,7 +195,7 @@ export class RecordingManager extends EventEmitter { try { const swiftBridge = this.serviceManager.getService("swiftIOBridge"); if (swiftBridge) { - //await swiftBridge.call("muteSystemAudio", {}); + await swiftBridge.call("muteSystemAudio", {}); } } catch (error) { logger.main.warn("Swift bridge not available for audio muting"); diff --git a/apps/desktop/src/pipeline/providers/transcription/simple-fork-wrapper.ts b/apps/desktop/src/pipeline/providers/transcription/simple-fork-wrapper.ts new file mode 100644 index 0000000..236e1dc --- /dev/null +++ b/apps/desktop/src/pipeline/providers/transcription/simple-fork-wrapper.ts @@ -0,0 +1,137 @@ +import { fork, ChildProcess } from "child_process"; +import { app } from "electron"; +import * as path from "path"; +import { logger } from "../../../main/logger"; + +interface WorkerMessage { + id: number; + method: string; + args: any[]; +} + +interface WorkerResponse { + id: number; + result?: any; + error?: string; +} + +export class SimpleForkWrapper { + private worker: ChildProcess | null = null; + private messageId = 0; + private pendingCalls = new Map< + number, + { + resolve: (value: any) => void; + reject: (error: any) => void; + } + >(); + + constructor( + private workerPath: string, + private nodeBinaryPath: string, + ) {} + + async initialize(): Promise { + if (this.worker) return; + + logger.transcription.info(`Starting worker process: ${this.workerPath}`); + + // When packaged, we need to extract the worker to a temp file + // because fork needs an actual file path, not an asar path + let actualWorkerPath = this.workerPath; + + // Set up environment for the worker + const workerEnv: any = { + ...process.env, + ELECTRON_RUN_AS_NODE: "1", + GGML_METAL_PATH_RESOURCES: process.env.GGML_METAL_PATH_RESOURCES, + NODE_OPTIONS: "--max-old-space-size=8192", + }; + + if (app.isPackaged && this.workerPath.includes(".asar")) { + // For packaged app, use the unpacked worker + actualWorkerPath = this.workerPath.replace( + "app.asar", + "app.asar.unpacked", + ); + workerEnv.APP_ASAR_PATH = path.join(process.resourcesPath, "app.asar"); + logger.transcription.info(`Using unpacked worker: ${actualWorkerPath}`); + } + + this.worker = fork(actualWorkerPath, [], { + execPath: this.nodeBinaryPath, + env: workerEnv, + silent: false, + cwd: app.isPackaged ? process.resourcesPath : process.cwd(), + }); + + this.worker.on("message", (message: WorkerResponse) => { + if (message.id !== undefined && this.pendingCalls.has(message.id)) { + const { resolve, reject } = this.pendingCalls.get(message.id)!; + this.pendingCalls.delete(message.id); + + if (message.error) { + reject(new Error(message.error)); + } else { + resolve(message.result); + } + } + }); + + this.worker.on("error", (error) => { + logger.transcription.error("Worker process error:", error); + this.rejectAllPending(error); + }); + + this.worker.on("exit", (code, signal) => { + logger.transcription.info( + `Worker process exited: code=${code}, signal=${signal}`, + ); + this.worker = null; + this.rejectAllPending(new Error(`Worker exited with code ${code}`)); + }); + } + + private rejectAllPending(error: Error): void { + for (const { reject } of this.pendingCalls.values()) { + reject(error); + } + this.pendingCalls.clear(); + } + + async exec(method: string, args: any[]): Promise { + if (!this.worker) { + await this.initialize(); + } + + return new Promise((resolve, reject) => { + const id = this.messageId++; + this.pendingCalls.set(id, { resolve, reject }); + + // Convert Float32Array to regular array for IPC + const serializedArgs = args.map((arg) => { + if (arg instanceof Float32Array) { + return { + __type: "Float32Array", + data: Array.from(arg), + }; + } + return arg; + }); + + this.worker!.send({ + id, + method, + args: serializedArgs, + } as WorkerMessage); + }); + } + + async terminate(): Promise { + if (this.worker) { + this.worker.kill(); + this.worker = null; + this.pendingCalls.clear(); + } + } +} diff --git a/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts b/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts index 1c296a8..7b3122a 100644 --- a/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts +++ b/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts @@ -4,30 +4,15 @@ import { } from "../../core/pipeline-types"; import { logger } from "../../../main/logger"; import { ModelManagerService } from "../../../services/model-manager"; -import { Worker as JestWorker } from "jest-worker"; +import { SimpleForkWrapper } from "./simple-fork-wrapper"; import * as path from "path"; import { app } from "electron"; -interface WhisperWorkerMethods { - initializeModel(modelPath: string): Promise; - transcribeAudio( - aggregatedAudio: Float32Array, - options: { - language: string; - initial_prompt: string; - suppress_blank: boolean; - suppress_non_speech_tokens: boolean; - no_timestamps: boolean; - }, - ): Promise; - dispose(): Promise; -} - export class WhisperProvider implements TranscriptionProvider { readonly name = "whisper-local"; private modelManager: ModelManagerService; - private whisperWorker: (JestWorker & WhisperWorkerMethods) | null = null; + private workerWrapper: SimpleForkWrapper | null = null; // Frame aggregation state private frameBuffer: Float32Array[] = []; @@ -52,7 +37,7 @@ export class WhisperProvider implements TranscriptionProvider { // In development, use the local binary return path.join( __dirname, - "../../resources/node-binaries", + "../../node-binaries", `${platform}-${arch}`, binaryName, ); @@ -90,7 +75,7 @@ export class WhisperProvider implements TranscriptionProvider { context, flush = false, } = params; - const { vocabulary, previousChunk, aggregatedTranscription } = context; + const { vocabulary, aggregatedTranscription } = context; // Audio data is already Float32Array @@ -140,8 +125,8 @@ export class WhisperProvider implements TranscriptionProvider { ); // Transcribe using smart-whisper - if (!this.whisperWorker) { - throw new Error("Whisper worker is not initialized"); + if (!this.workerWrapper) { + throw new Error("Worker wrapper is not initialized"); } // Generate initial prompt from vocabulary and recent context @@ -150,13 +135,16 @@ export class WhisperProvider implements TranscriptionProvider { aggregatedTranscription, ); - const text = await this.whisperWorker.transcribeAudio(aggregatedAudio, { - language: "auto", - initial_prompt: initialPrompt, - suppress_blank: true, - suppress_non_speech_tokens: true, - no_timestamps: true, - }); + const text = await this.workerWrapper!.exec("transcribeAudio", [ + aggregatedAudio, + { + language: "auto", + initial_prompt: initialPrompt, + suppress_blank: true, + suppress_non_speech_tokens: true, + no_timestamps: true, + }, + ]); logger.transcription.debug( `Transcription completed, length: ${text.length}`, @@ -290,30 +278,22 @@ export class WhisperProvider implements TranscriptionProvider { } async initializeWhisper(): Promise { - if (!this.whisperWorker) { - // Initialize jest-worker with single worker process + if (!this.workerWrapper) { // Determine the correct path for the worker script const workerPath = app.isPackaged - ? path.join(__dirname, "whisper-worker.js") // In production, same directory as main.js - : path.join(process.cwd(), ".vite/build/whisper-worker.js"); // In development + ? path.join(__dirname, "whisper-worker-fork.js") // In production, same directory as main.js + : path.join(process.cwd(), ".vite/build/whisper-worker-fork.js"); // In development logger.transcription.info( `Initializing Whisper worker at: ${workerPath}`, ); - this.whisperWorker = new JestWorker(workerPath, { - exposedMethods: ["initializeModel", "transcribeAudio", "dispose"], - numWorkers: 1, - enableWorkerThreads: false, - forkOptions: { - execPath: this.getNodeBinaryPath(), - env: { - ...process.env, - GGML_METAL_PATH_RESOURCES: process.env.GGML_METAL_PATH_RESOURCES, - NODE_OPTIONS: "--max-old-space-size=8192", - }, - silent: false, // Enable output from worker for debugging - }, - }) as JestWorker & WhisperWorkerMethods; + + this.workerWrapper = new SimpleForkWrapper( + workerPath, + this.getNodeBinaryPath(), + ); + + await this.workerWrapper.initialize(); } const modelPath = await this.modelManager.getBestAvailableModelPath(); @@ -324,7 +304,7 @@ export class WhisperProvider implements TranscriptionProvider { } try { - await this.whisperWorker.initializeModel(modelPath); + await this.workerWrapper.exec("initializeModel", [modelPath]); } catch (error) { logger.transcription.error(`Failed to initialize:`, error); throw new Error(`Failed to initialize smart-whisper: ${error}`); @@ -333,15 +313,15 @@ export class WhisperProvider implements TranscriptionProvider { // Simple cleanup method async dispose(): Promise { - if (this.whisperWorker) { + if (this.workerWrapper) { try { - await this.whisperWorker.dispose(); - await this.whisperWorker.end(); // Terminate the worker process + await this.workerWrapper.exec("dispose", []); + await this.workerWrapper.terminate(); // Terminate the worker logger.transcription.debug("Worker terminated"); } catch (error) { - logger.transcription.warn("Error disposing whisper worker:", error); + logger.transcription.warn("Error disposing worker:", error); } finally { - this.whisperWorker = null; + this.workerWrapper = null; } } diff --git a/apps/desktop/src/pipeline/providers/transcription/whisper-worker-fork.ts b/apps/desktop/src/pipeline/providers/transcription/whisper-worker-fork.ts new file mode 100644 index 0000000..8d0070f --- /dev/null +++ b/apps/desktop/src/pipeline/providers/transcription/whisper-worker-fork.ts @@ -0,0 +1,117 @@ +// Worker process entry point for fork +import { Whisper } from "smart-whisper"; + +// Simple console-based logging for worker process +const logger = { + transcription: { + info: (message: string, ...args: unknown[]) => + console.log(`[whisper-worker] INFO: ${message}`, ...args), + error: (message: string, ...args: unknown[]) => + console.error(`[whisper-worker] ERROR: ${message}`, ...args), + debug: (message: string, ...args: unknown[]) => + console.log(`[whisper-worker] DEBUG: ${message}`, ...args), + }, +}; + +let whisperInstance: Whisper | null = null; +let currentModelPath: string | null = null; + +// Worker methods +const methods = { + async initializeModel(modelPath: string): Promise { + if (whisperInstance && currentModelPath === modelPath) { + return; // Already initialized with same model + } + + // Cleanup existing instance + if (whisperInstance) { + await whisperInstance.free(); + whisperInstance = null; + } + + const { Whisper } = await import("smart-whisper"); + whisperInstance = new Whisper(modelPath, { gpu: true }); + try { + await whisperInstance.load(); + } catch (e) { + logger.transcription.error("Failed to load Whisper model:", e); + throw e; + } + currentModelPath = modelPath; + logger.transcription.info(`Initialized with model: ${modelPath}`); + }, + + async transcribeAudio( + aggregatedAudio: Float32Array, + options: { + language: string; + initial_prompt: string; + suppress_blank: boolean; + suppress_non_speech_tokens: boolean; + no_timestamps: boolean; + }, + ): Promise { + if (!whisperInstance) { + throw new Error("Whisper instance is not initialized"); + } + + // Pad audio with silence to ensure at least 1 second of audio (16k samples) + const SAMPLE_RATE = 16000; // Whisper expects 16kHz input + const MIN_DURATION_SAMPLES = SAMPLE_RATE * 1 + 4000; // 1 second + extra buffer + if (aggregatedAudio.length < MIN_DURATION_SAMPLES) { + const padded = new Float32Array(MIN_DURATION_SAMPLES); + // Copy the existing audio to the beginning + padded.set(aggregatedAudio, 0); + aggregatedAudio = padded; + } + + const { result } = await whisperInstance.transcribe( + aggregatedAudio, + options, + ); + const transcription = await result; + + return transcription + .map((segment) => segment.text) + .join(" ") + .trim(); + }, + + async dispose(): Promise { + if (whisperInstance) { + await whisperInstance.free(); + whisperInstance = null; + currentModelPath = null; + } + }, +}; + +// Handle messages from parent process +process.on("message", async (message: any) => { + const { id, method, args } = message; + + try { + // Deserialize Float32Array from IPC + const deserializedArgs = args.map((arg: any) => { + if (arg && arg.__type === "Float32Array" && Array.isArray(arg.data)) { + return new Float32Array(arg.data); + } + return arg; + }); + + if (method in methods) { + const result = await (methods as any)[method](...deserializedArgs); + process.send!({ id, result }); + } else { + process.send!({ id, error: `Unknown method: ${method}` }); + } + } catch (error) { + process.send!({ + id, + error: error instanceof Error ? error.message : String(error), + }); + } +}); + +// Send ready signal +logger.transcription.info("Worker process started"); diff --git a/apps/desktop/src/services/transcription-service.ts b/apps/desktop/src/services/transcription-service.ts index b3599c7..b8fb223 100644 --- a/apps/desktop/src/services/transcription-service.ts +++ b/apps/desktop/src/services/transcription-service.ts @@ -198,50 +198,45 @@ export class TranscriptionService { }); } - // Process chunk if it has content - if (audioChunk.length > 0) { - // Direct frame to Whisper - it will handle aggregation and VAD internally - const previousChunk = - session.transcriptionResults.length > 0 - ? session.transcriptionResults[ - session.transcriptionResults.length - 1 - ] - : undefined; - const aggregatedTranscription = session.transcriptionResults - .join(" ") - .trim(); + // Direct frame to Whisper - it will handle aggregation and VAD internally + const previousChunk = + session.transcriptionResults.length > 0 + ? session.transcriptionResults[session.transcriptionResults.length - 1] + : undefined; + const aggregatedTranscription = session.transcriptionResults + .join(" ") + .trim(); - const chunkTranscription = await this.whisperProvider.transcribe({ - audioData: audioChunk, - speechProbability: speechProbability, // Now from VAD service - context: { - vocabulary: session.context.sharedData.vocabulary, - accessibilityContext: session.context.sharedData.accessibilityContext, - previousChunk, - aggregatedTranscription: aggregatedTranscription || undefined, - }, - flush: isFinal, - }); + const chunkTranscription = await this.whisperProvider.transcribe({ + audioData: audioChunk, + speechProbability: speechProbability, // Now from VAD service + context: { + vocabulary: session.context.sharedData.vocabulary, + accessibilityContext: session.context.sharedData.accessibilityContext, + previousChunk, + aggregatedTranscription: aggregatedTranscription || undefined, + }, + flush: isFinal, + }); - // Accumulate the result only if Whisper returned something - // (it returns empty string while buffering) - if (chunkTranscription.trim()) { - session.transcriptionResults.push(chunkTranscription); - logger.transcription.info("Whisper returned transcription", { - sessionId, - transcriptionLength: chunkTranscription.length, - totalResults: session.transcriptionResults.length, - }); - } - - logger.transcription.debug("Processed frame", { + // Accumulate the result only if Whisper returned something + // (it returns empty string while buffering) + if (chunkTranscription.trim()) { + session.transcriptionResults.push(chunkTranscription); + logger.transcription.info("Whisper returned transcription", { sessionId, - frameSize: audioChunk.length, - hadTranscription: chunkTranscription.length > 0, - isFinal, + transcriptionLength: chunkTranscription.length, + totalResults: session.transcriptionResults.length, }); } + logger.transcription.debug("Processed frame", { + sessionId, + frameSize: audioChunk.length, + hadTranscription: chunkTranscription.length > 0, + isFinal, + }); + // Release transcription mutex this.transcriptionMutex.release(); const completeTranscriptionTillNow = session.transcriptionResults diff --git a/apps/desktop/src/services/vad-service.ts b/apps/desktop/src/services/vad-service.ts index 09279f1..6cb25c7 100644 --- a/apps/desktop/src/services/vad-service.ts +++ b/apps/desktop/src/services/vad-service.ts @@ -39,7 +39,7 @@ export class VADService extends EventEmitter { // In development, use the source path this.modelPath = path.join( __dirname, - "../../resources/models/silero_vad_v5.onnx", + "../../models/silero_vad_v5.onnx", ); } diff --git a/apps/desktop/vite.main.config.mts b/apps/desktop/vite.main.config.mts index 3507704..8f00912 100644 --- a/apps/desktop/vite.main.config.mts +++ b/apps/desktop/vite.main.config.mts @@ -11,13 +11,16 @@ export default defineConfig({ __dirname, "src/pipeline/providers/transcription/whisper-worker.ts", ), + "whisper-worker-fork": resolve( + __dirname, + "src/pipeline/providers/transcription/whisper-worker-fork.ts", + ), }, output: { entryFileNames: "[name].js", }, external: [ "smart-whisper", - "jest-worker", "@libsql/client", "@libsql/darwin-arm64", "@libsql/darwin-x64", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8664f94..1a1f4f6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -272,6 +272,9 @@ importers: vaul: specifier: ^1.1.2 version: 1.1.2(@types/react-dom@19.1.5(@types/react@19.1.5))(@types/react@19.1.5)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + workerpool: + specifier: ^9.3.3 + version: 9.3.3 zod: specifier: ^3.25.24 version: 3.25.67 @@ -8168,6 +8171,9 @@ packages: engines: {node: '>=16'} hasBin: true + workerpool@9.3.3: + resolution: {integrity: sha512-slxCaKbYjEdFT/o2rH9xS1hf4uRDch1w7Uo+apxhZ+sf/1d9e0ZVkn42kPNGP2dgjIx6YFvSevj0zHvbWe2jdw==} + wrangler@4.20.3: resolution: {integrity: sha512-ugvmi43CFPbjeQFfhU7EqE1V0ek6ZFv80jzwHcPk/7jPFmOA4ahT5uUU1ga5ZP6vz6lUuG2bLnyl1T5qJah0cg==} engines: {node: '>=18.0.0'} @@ -17524,6 +17530,8 @@ snapshots: '@cloudflare/workerd-linux-arm64': 1.20250617.0 '@cloudflare/workerd-windows-64': 1.20250617.0 + workerpool@9.3.3: {} + wrangler@4.20.3: dependencies: '@cloudflare/kv-asset-handler': 0.4.0