Use pure node process to handle whisper to bypass electron cage

This commit is contained in:
haritabh-z01 2025-08-09 01:28:52 +05:30
parent 382b77ffe8
commit 566cad7a5a
16 changed files with 498 additions and 102 deletions

View file

@ -38,6 +38,7 @@ export const EXTERNAL_DEPENDENCIES = [
"@libsql/win32-x64-msvc",
"libsql",
"onnxruntime-node",
"workerpool",
// Add any other native modules you need here
];
@ -53,14 +54,12 @@ const config: ForgeConfig = {
console.log(`Copying Node.js binary for ${platform}-${arch}...`);
const nodeBinarySource = join(
projectRoot,
"resources",
"node-binaries",
`${platform}-${arch}`,
platform === "win32" ? "node.exe" : "node",
);
const nodeBinaryDest = join(
projectRoot,
"resources",
"node-binaries",
`${platform}-${arch}`,
);
@ -273,7 +272,8 @@ const config: ForgeConfig = {
},
packagerConfig: {
asar: {
unpack: "{*.node,*.dylib,*.so,*.dll,*.metal,**/whisper.cpp/**}",
unpack:
"{*.node,*.dylib,*.so,*.dll,*.metal,**/whisper.cpp/**,**/.vite/build/whisper-worker-fork.js,**/node_modules/smart-whisper/**,**/node_modules/jest-worker/**}",
},
name: "Amical",
executableName: "Amical",
@ -282,7 +282,8 @@ const config: ForgeConfig = {
extraResource: [
"../../packages/native-helpers/swift-helper/bin",
"./src/db/migrations",
"./resources",
"./node-binaries",
"./models",
"./src/assets",
],
extendInfo: {

View file

@ -1,6 +1,6 @@
{
"name": "@amical/desktop",
"version": "0.0.5-test-publish",
"version": "0.0.6",
"description": "Amical Desktop app",
"main": ".vite/build/main.js",
"productName": "Amical",
@ -150,6 +150,7 @@
"update-electron-app": "^3.1.1",
"uuid": "^11.1.0",
"vaul": "^1.1.2",
"workerpool": "^9.3.3",
"zod": "^3.25.24"
}
}

View file

@ -0,0 +1,156 @@
#!/usr/bin/env node
const https = require('https');
const fs = require('fs');
const path = require('path');
const { execSync } = require('child_process');
const { createWriteStream, mkdirSync, chmodSync } = fs;
// Node.js version to download
const NODE_VERSION = '24.4.0';
// Platform configurations
const PLATFORMS = [
{
platform: 'darwin',
arch: 'arm64',
url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-darwin-arm64.tar.gz`,
binary: 'bin/node'
},
{
platform: 'darwin',
arch: 'x64',
url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-darwin-x64.tar.gz`,
binary: 'bin/node'
},
{
platform: 'win32',
arch: 'x64',
url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-win-x64.zip`,
binary: 'node.exe'
},
{
platform: 'linux',
arch: 'x64',
url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64.tar.gz`,
binary: 'bin/node'
}
];
// Base directory for binaries
const RESOURCES_DIR = path.join(__dirname, '..', 'node-binaries');
async function downloadFile(url, dest) {
return new Promise((resolve, reject) => {
const file = createWriteStream(dest);
https.get(url, (response) => {
if (response.statusCode === 302 || response.statusCode === 301) {
// Handle redirect
https.get(response.headers.location, (redirectResponse) => {
redirectResponse.pipe(file);
file.on('finish', () => {
file.close(resolve);
});
}).on('error', reject);
} else {
response.pipe(file);
file.on('finish', () => {
file.close(resolve);
});
}
}).on('error', reject);
});
}
async function extractArchive(archivePath, platform) {
const tempDir = path.join(path.dirname(archivePath), 'temp');
mkdirSync(tempDir, { recursive: true });
if (platform === 'win32') {
// Use unzip command (available on macOS) to extract zip files
execSync(`unzip -q "${archivePath}" -d "${tempDir}"`, { stdio: 'inherit' });
} else {
// Use tar for Unix-like systems
execSync(`tar -xzf "${archivePath}" -C "${tempDir}"`, { stdio: 'inherit' });
}
return tempDir;
}
async function downloadNodeBinary(config) {
const { platform, arch, url, binary } = config;
const platformDir = path.join(RESOURCES_DIR, `${platform}-${arch}`);
const binaryPath = path.join(platformDir, platform === 'win32' ? 'node.exe' : 'node');
// Skip if already exists
if (fs.existsSync(binaryPath)) {
console.log(`${platform}-${arch} binary already exists`);
return;
}
console.log(`Downloading Node.js for ${platform}-${arch}...`);
// Create directory
mkdirSync(platformDir, { recursive: true });
// Download archive
const archiveExt = platform === 'win32' ? '.zip' : '.tar.gz';
const archivePath = path.join(platformDir, `node-v${NODE_VERSION}${archiveExt}`);
try {
await downloadFile(url, archivePath);
console.log(`Downloaded archive for ${platform}-${arch}`);
// Extract archive
const tempDir = await extractArchive(archivePath, platform);
// Find the node binary in extracted files
// Windows uses different directory naming convention (win instead of win32)
const extractedDirName = platform === 'win32'
? `node-v${NODE_VERSION}-win-${arch}`
: `node-v${NODE_VERSION}-${platform}-${arch}`;
const extractedBinaryPath = path.join(tempDir, extractedDirName, binary);
// Copy binary to final location
fs.copyFileSync(extractedBinaryPath, binaryPath);
// Make executable on Unix-like systems
if (platform !== 'win32') {
chmodSync(binaryPath, '755');
}
// Clean up
fs.rmSync(tempDir, { recursive: true, force: true });
fs.unlinkSync(archivePath);
console.log(`✓ Successfully installed ${platform}-${arch} binary`);
} catch (error) {
console.error(`✗ Failed to download ${platform}-${arch}:`, error.message);
// Clean up on failure
if (fs.existsSync(archivePath)) {
fs.unlinkSync(archivePath);
}
}
}
async function main() {
console.log(`Downloading Node.js v${NODE_VERSION} binaries for all platforms...\n`);
// Create base directory
mkdirSync(RESOURCES_DIR, { recursive: true });
// Download binaries for all platforms
for (const platform of PLATFORMS) {
await downloadNodeBinary(platform);
}
console.log('\nDone! Node.js binaries downloaded to:', RESOURCES_DIR);
}
// Run if called directly
if (require.main === module) {
main().catch(console.error);
}
module.exports = { downloadNodeBinary, PLATFORMS, NODE_VERSION };

View file

@ -49,7 +49,7 @@ const PLATFORMS: PlatformConfig[] = [
},
];
const RESOURCES_DIR = path.join(__dirname, "..", "resources", "node-binaries");
const RESOURCES_DIR = path.join(__dirname, "..", "node-binaries");
// Parse command line arguments
const args = process.argv.slice(2);

View file

@ -62,9 +62,7 @@ export const useRecording = (): UseRecordingOutput => {
);
// Manage audio capture when recording is active
const isActive =
recordingStatus.state === "recording" ||
recordingStatus.state === "starting";
const isActive = recordingStatus.state === "recording";
const { voiceDetected } = useAudioCapture({
onAudioChunk: handleAudioChunk,

View file

@ -195,7 +195,7 @@ export class RecordingManager extends EventEmitter {
try {
const swiftBridge = this.serviceManager.getService("swiftIOBridge");
if (swiftBridge) {
//await swiftBridge.call("muteSystemAudio", {});
await swiftBridge.call("muteSystemAudio", {});
}
} catch (error) {
logger.main.warn("Swift bridge not available for audio muting");

View file

@ -0,0 +1,137 @@
import { fork, ChildProcess } from "child_process";
import { app } from "electron";
import * as path from "path";
import { logger } from "../../../main/logger";
interface WorkerMessage {
id: number;
method: string;
args: any[];
}
interface WorkerResponse {
id: number;
result?: any;
error?: string;
}
export class SimpleForkWrapper {
private worker: ChildProcess | null = null;
private messageId = 0;
private pendingCalls = new Map<
number,
{
resolve: (value: any) => void;
reject: (error: any) => void;
}
>();
constructor(
private workerPath: string,
private nodeBinaryPath: string,
) {}
async initialize(): Promise<void> {
if (this.worker) return;
logger.transcription.info(`Starting worker process: ${this.workerPath}`);
// When packaged, we need to extract the worker to a temp file
// because fork needs an actual file path, not an asar path
let actualWorkerPath = this.workerPath;
// Set up environment for the worker
const workerEnv: any = {
...process.env,
ELECTRON_RUN_AS_NODE: "1",
GGML_METAL_PATH_RESOURCES: process.env.GGML_METAL_PATH_RESOURCES,
NODE_OPTIONS: "--max-old-space-size=8192",
};
if (app.isPackaged && this.workerPath.includes(".asar")) {
// For packaged app, use the unpacked worker
actualWorkerPath = this.workerPath.replace(
"app.asar",
"app.asar.unpacked",
);
workerEnv.APP_ASAR_PATH = path.join(process.resourcesPath, "app.asar");
logger.transcription.info(`Using unpacked worker: ${actualWorkerPath}`);
}
this.worker = fork(actualWorkerPath, [], {
execPath: this.nodeBinaryPath,
env: workerEnv,
silent: false,
cwd: app.isPackaged ? process.resourcesPath : process.cwd(),
});
this.worker.on("message", (message: WorkerResponse) => {
if (message.id !== undefined && this.pendingCalls.has(message.id)) {
const { resolve, reject } = this.pendingCalls.get(message.id)!;
this.pendingCalls.delete(message.id);
if (message.error) {
reject(new Error(message.error));
} else {
resolve(message.result);
}
}
});
this.worker.on("error", (error) => {
logger.transcription.error("Worker process error:", error);
this.rejectAllPending(error);
});
this.worker.on("exit", (code, signal) => {
logger.transcription.info(
`Worker process exited: code=${code}, signal=${signal}`,
);
this.worker = null;
this.rejectAllPending(new Error(`Worker exited with code ${code}`));
});
}
private rejectAllPending(error: Error): void {
for (const { reject } of this.pendingCalls.values()) {
reject(error);
}
this.pendingCalls.clear();
}
async exec<T>(method: string, args: any[]): Promise<T> {
if (!this.worker) {
await this.initialize();
}
return new Promise((resolve, reject) => {
const id = this.messageId++;
this.pendingCalls.set(id, { resolve, reject });
// Convert Float32Array to regular array for IPC
const serializedArgs = args.map((arg) => {
if (arg instanceof Float32Array) {
return {
__type: "Float32Array",
data: Array.from(arg),
};
}
return arg;
});
this.worker!.send({
id,
method,
args: serializedArgs,
} as WorkerMessage);
});
}
async terminate(): Promise<void> {
if (this.worker) {
this.worker.kill();
this.worker = null;
this.pendingCalls.clear();
}
}
}

View file

@ -4,30 +4,15 @@ import {
} from "../../core/pipeline-types";
import { logger } from "../../../main/logger";
import { ModelManagerService } from "../../../services/model-manager";
import { Worker as JestWorker } from "jest-worker";
import { SimpleForkWrapper } from "./simple-fork-wrapper";
import * as path from "path";
import { app } from "electron";
interface WhisperWorkerMethods {
initializeModel(modelPath: string): Promise<void>;
transcribeAudio(
aggregatedAudio: Float32Array,
options: {
language: string;
initial_prompt: string;
suppress_blank: boolean;
suppress_non_speech_tokens: boolean;
no_timestamps: boolean;
},
): Promise<string>;
dispose(): Promise<void>;
}
export class WhisperProvider implements TranscriptionProvider {
readonly name = "whisper-local";
private modelManager: ModelManagerService;
private whisperWorker: (JestWorker & WhisperWorkerMethods) | null = null;
private workerWrapper: SimpleForkWrapper | null = null;
// Frame aggregation state
private frameBuffer: Float32Array[] = [];
@ -52,7 +37,7 @@ export class WhisperProvider implements TranscriptionProvider {
// In development, use the local binary
return path.join(
__dirname,
"../../resources/node-binaries",
"../../node-binaries",
`${platform}-${arch}`,
binaryName,
);
@ -90,7 +75,7 @@ export class WhisperProvider implements TranscriptionProvider {
context,
flush = false,
} = params;
const { vocabulary, previousChunk, aggregatedTranscription } = context;
const { vocabulary, aggregatedTranscription } = context;
// Audio data is already Float32Array
@ -140,8 +125,8 @@ export class WhisperProvider implements TranscriptionProvider {
);
// Transcribe using smart-whisper
if (!this.whisperWorker) {
throw new Error("Whisper worker is not initialized");
if (!this.workerWrapper) {
throw new Error("Worker wrapper is not initialized");
}
// Generate initial prompt from vocabulary and recent context
@ -150,13 +135,16 @@ export class WhisperProvider implements TranscriptionProvider {
aggregatedTranscription,
);
const text = await this.whisperWorker.transcribeAudio(aggregatedAudio, {
language: "auto",
initial_prompt: initialPrompt,
suppress_blank: true,
suppress_non_speech_tokens: true,
no_timestamps: true,
});
const text = await this.workerWrapper!.exec<string>("transcribeAudio", [
aggregatedAudio,
{
language: "auto",
initial_prompt: initialPrompt,
suppress_blank: true,
suppress_non_speech_tokens: true,
no_timestamps: true,
},
]);
logger.transcription.debug(
`Transcription completed, length: ${text.length}`,
@ -290,30 +278,22 @@ export class WhisperProvider implements TranscriptionProvider {
}
async initializeWhisper(): Promise<void> {
if (!this.whisperWorker) {
// Initialize jest-worker with single worker process
if (!this.workerWrapper) {
// Determine the correct path for the worker script
const workerPath = app.isPackaged
? path.join(__dirname, "whisper-worker.js") // In production, same directory as main.js
: path.join(process.cwd(), ".vite/build/whisper-worker.js"); // In development
? path.join(__dirname, "whisper-worker-fork.js") // In production, same directory as main.js
: path.join(process.cwd(), ".vite/build/whisper-worker-fork.js"); // In development
logger.transcription.info(
`Initializing Whisper worker at: ${workerPath}`,
);
this.whisperWorker = new JestWorker(workerPath, {
exposedMethods: ["initializeModel", "transcribeAudio", "dispose"],
numWorkers: 1,
enableWorkerThreads: false,
forkOptions: {
execPath: this.getNodeBinaryPath(),
env: {
...process.env,
GGML_METAL_PATH_RESOURCES: process.env.GGML_METAL_PATH_RESOURCES,
NODE_OPTIONS: "--max-old-space-size=8192",
},
silent: false, // Enable output from worker for debugging
},
}) as JestWorker & WhisperWorkerMethods;
this.workerWrapper = new SimpleForkWrapper(
workerPath,
this.getNodeBinaryPath(),
);
await this.workerWrapper.initialize();
}
const modelPath = await this.modelManager.getBestAvailableModelPath();
@ -324,7 +304,7 @@ export class WhisperProvider implements TranscriptionProvider {
}
try {
await this.whisperWorker.initializeModel(modelPath);
await this.workerWrapper.exec("initializeModel", [modelPath]);
} catch (error) {
logger.transcription.error(`Failed to initialize:`, error);
throw new Error(`Failed to initialize smart-whisper: ${error}`);
@ -333,15 +313,15 @@ export class WhisperProvider implements TranscriptionProvider {
// Simple cleanup method
async dispose(): Promise<void> {
if (this.whisperWorker) {
if (this.workerWrapper) {
try {
await this.whisperWorker.dispose();
await this.whisperWorker.end(); // Terminate the worker process
await this.workerWrapper.exec("dispose", []);
await this.workerWrapper.terminate(); // Terminate the worker
logger.transcription.debug("Worker terminated");
} catch (error) {
logger.transcription.warn("Error disposing whisper worker:", error);
logger.transcription.warn("Error disposing worker:", error);
} finally {
this.whisperWorker = null;
this.workerWrapper = null;
}
}

View file

@ -0,0 +1,117 @@
// Worker process entry point for fork
import { Whisper } from "smart-whisper";
// Simple console-based logging for worker process
const logger = {
transcription: {
info: (message: string, ...args: unknown[]) =>
console.log(`[whisper-worker] INFO: ${message}`, ...args),
error: (message: string, ...args: unknown[]) =>
console.error(`[whisper-worker] ERROR: ${message}`, ...args),
debug: (message: string, ...args: unknown[]) =>
console.log(`[whisper-worker] DEBUG: ${message}`, ...args),
},
};
let whisperInstance: Whisper | null = null;
let currentModelPath: string | null = null;
// Worker methods
const methods = {
async initializeModel(modelPath: string): Promise<void> {
if (whisperInstance && currentModelPath === modelPath) {
return; // Already initialized with same model
}
// Cleanup existing instance
if (whisperInstance) {
await whisperInstance.free();
whisperInstance = null;
}
const { Whisper } = await import("smart-whisper");
whisperInstance = new Whisper(modelPath, { gpu: true });
try {
await whisperInstance.load();
} catch (e) {
logger.transcription.error("Failed to load Whisper model:", e);
throw e;
}
currentModelPath = modelPath;
logger.transcription.info(`Initialized with model: ${modelPath}`);
},
async transcribeAudio(
aggregatedAudio: Float32Array,
options: {
language: string;
initial_prompt: string;
suppress_blank: boolean;
suppress_non_speech_tokens: boolean;
no_timestamps: boolean;
},
): Promise<string> {
if (!whisperInstance) {
throw new Error("Whisper instance is not initialized");
}
// Pad audio with silence to ensure at least 1 second of audio (16k samples)
const SAMPLE_RATE = 16000; // Whisper expects 16kHz input
const MIN_DURATION_SAMPLES = SAMPLE_RATE * 1 + 4000; // 1 second + extra buffer
if (aggregatedAudio.length < MIN_DURATION_SAMPLES) {
const padded = new Float32Array(MIN_DURATION_SAMPLES);
// Copy the existing audio to the beginning
padded.set(aggregatedAudio, 0);
aggregatedAudio = padded;
}
const { result } = await whisperInstance.transcribe(
aggregatedAudio,
options,
);
const transcription = await result;
return transcription
.map((segment) => segment.text)
.join(" ")
.trim();
},
async dispose(): Promise<void> {
if (whisperInstance) {
await whisperInstance.free();
whisperInstance = null;
currentModelPath = null;
}
},
};
// Handle messages from parent process
process.on("message", async (message: any) => {
const { id, method, args } = message;
try {
// Deserialize Float32Array from IPC
const deserializedArgs = args.map((arg: any) => {
if (arg && arg.__type === "Float32Array" && Array.isArray(arg.data)) {
return new Float32Array(arg.data);
}
return arg;
});
if (method in methods) {
const result = await (methods as any)[method](...deserializedArgs);
process.send!({ id, result });
} else {
process.send!({ id, error: `Unknown method: ${method}` });
}
} catch (error) {
process.send!({
id,
error: error instanceof Error ? error.message : String(error),
});
}
});
// Send ready signal
logger.transcription.info("Worker process started");

View file

@ -198,50 +198,45 @@ export class TranscriptionService {
});
}
// Process chunk if it has content
if (audioChunk.length > 0) {
// Direct frame to Whisper - it will handle aggregation and VAD internally
const previousChunk =
session.transcriptionResults.length > 0
? session.transcriptionResults[
session.transcriptionResults.length - 1
]
: undefined;
const aggregatedTranscription = session.transcriptionResults
.join(" ")
.trim();
// Direct frame to Whisper - it will handle aggregation and VAD internally
const previousChunk =
session.transcriptionResults.length > 0
? session.transcriptionResults[session.transcriptionResults.length - 1]
: undefined;
const aggregatedTranscription = session.transcriptionResults
.join(" ")
.trim();
const chunkTranscription = await this.whisperProvider.transcribe({
audioData: audioChunk,
speechProbability: speechProbability, // Now from VAD service
context: {
vocabulary: session.context.sharedData.vocabulary,
accessibilityContext: session.context.sharedData.accessibilityContext,
previousChunk,
aggregatedTranscription: aggregatedTranscription || undefined,
},
flush: isFinal,
});
const chunkTranscription = await this.whisperProvider.transcribe({
audioData: audioChunk,
speechProbability: speechProbability, // Now from VAD service
context: {
vocabulary: session.context.sharedData.vocabulary,
accessibilityContext: session.context.sharedData.accessibilityContext,
previousChunk,
aggregatedTranscription: aggregatedTranscription || undefined,
},
flush: isFinal,
});
// Accumulate the result only if Whisper returned something
// (it returns empty string while buffering)
if (chunkTranscription.trim()) {
session.transcriptionResults.push(chunkTranscription);
logger.transcription.info("Whisper returned transcription", {
sessionId,
transcriptionLength: chunkTranscription.length,
totalResults: session.transcriptionResults.length,
});
}
logger.transcription.debug("Processed frame", {
// Accumulate the result only if Whisper returned something
// (it returns empty string while buffering)
if (chunkTranscription.trim()) {
session.transcriptionResults.push(chunkTranscription);
logger.transcription.info("Whisper returned transcription", {
sessionId,
frameSize: audioChunk.length,
hadTranscription: chunkTranscription.length > 0,
isFinal,
transcriptionLength: chunkTranscription.length,
totalResults: session.transcriptionResults.length,
});
}
logger.transcription.debug("Processed frame", {
sessionId,
frameSize: audioChunk.length,
hadTranscription: chunkTranscription.length > 0,
isFinal,
});
// Release transcription mutex
this.transcriptionMutex.release();
const completeTranscriptionTillNow = session.transcriptionResults

View file

@ -39,7 +39,7 @@ export class VADService extends EventEmitter {
// In development, use the source path
this.modelPath = path.join(
__dirname,
"../../resources/models/silero_vad_v5.onnx",
"../../models/silero_vad_v5.onnx",
);
}

View file

@ -11,13 +11,16 @@ export default defineConfig({
__dirname,
"src/pipeline/providers/transcription/whisper-worker.ts",
),
"whisper-worker-fork": resolve(
__dirname,
"src/pipeline/providers/transcription/whisper-worker-fork.ts",
),
},
output: {
entryFileNames: "[name].js",
},
external: [
"smart-whisper",
"jest-worker",
"@libsql/client",
"@libsql/darwin-arm64",
"@libsql/darwin-x64",

8
pnpm-lock.yaml generated
View file

@ -272,6 +272,9 @@ importers:
vaul:
specifier: ^1.1.2
version: 1.1.2(@types/react-dom@19.1.5(@types/react@19.1.5))(@types/react@19.1.5)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
workerpool:
specifier: ^9.3.3
version: 9.3.3
zod:
specifier: ^3.25.24
version: 3.25.67
@ -8168,6 +8171,9 @@ packages:
engines: {node: '>=16'}
hasBin: true
workerpool@9.3.3:
resolution: {integrity: sha512-slxCaKbYjEdFT/o2rH9xS1hf4uRDch1w7Uo+apxhZ+sf/1d9e0ZVkn42kPNGP2dgjIx6YFvSevj0zHvbWe2jdw==}
wrangler@4.20.3:
resolution: {integrity: sha512-ugvmi43CFPbjeQFfhU7EqE1V0ek6ZFv80jzwHcPk/7jPFmOA4ahT5uUU1ga5ZP6vz6lUuG2bLnyl1T5qJah0cg==}
engines: {node: '>=18.0.0'}
@ -17524,6 +17530,8 @@ snapshots:
'@cloudflare/workerd-linux-arm64': 1.20250617.0
'@cloudflare/workerd-windows-64': 1.20250617.0
workerpool@9.3.3: {}
wrangler@4.20.3:
dependencies:
'@cloudflare/kv-asset-handler': 0.4.0