Use pure node process to handle whisper to bypass electron cage
This commit is contained in:
parent
382b77ffe8
commit
566cad7a5a
16 changed files with 498 additions and 102 deletions
|
|
@ -38,6 +38,7 @@ export const EXTERNAL_DEPENDENCIES = [
|
|||
"@libsql/win32-x64-msvc",
|
||||
"libsql",
|
||||
"onnxruntime-node",
|
||||
"workerpool",
|
||||
// Add any other native modules you need here
|
||||
];
|
||||
|
||||
|
|
@ -53,14 +54,12 @@ const config: ForgeConfig = {
|
|||
console.log(`Copying Node.js binary for ${platform}-${arch}...`);
|
||||
const nodeBinarySource = join(
|
||||
projectRoot,
|
||||
"resources",
|
||||
"node-binaries",
|
||||
`${platform}-${arch}`,
|
||||
platform === "win32" ? "node.exe" : "node",
|
||||
);
|
||||
const nodeBinaryDest = join(
|
||||
projectRoot,
|
||||
"resources",
|
||||
"node-binaries",
|
||||
`${platform}-${arch}`,
|
||||
);
|
||||
|
|
@ -273,7 +272,8 @@ const config: ForgeConfig = {
|
|||
},
|
||||
packagerConfig: {
|
||||
asar: {
|
||||
unpack: "{*.node,*.dylib,*.so,*.dll,*.metal,**/whisper.cpp/**}",
|
||||
unpack:
|
||||
"{*.node,*.dylib,*.so,*.dll,*.metal,**/whisper.cpp/**,**/.vite/build/whisper-worker-fork.js,**/node_modules/smart-whisper/**,**/node_modules/jest-worker/**}",
|
||||
},
|
||||
name: "Amical",
|
||||
executableName: "Amical",
|
||||
|
|
@ -282,7 +282,8 @@ const config: ForgeConfig = {
|
|||
extraResource: [
|
||||
"../../packages/native-helpers/swift-helper/bin",
|
||||
"./src/db/migrations",
|
||||
"./resources",
|
||||
"./node-binaries",
|
||||
"./models",
|
||||
"./src/assets",
|
||||
],
|
||||
extendInfo: {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@amical/desktop",
|
||||
"version": "0.0.5-test-publish",
|
||||
"version": "0.0.6",
|
||||
"description": "Amical Desktop app",
|
||||
"main": ".vite/build/main.js",
|
||||
"productName": "Amical",
|
||||
|
|
@ -150,6 +150,7 @@
|
|||
"update-electron-app": "^3.1.1",
|
||||
"uuid": "^11.1.0",
|
||||
"vaul": "^1.1.2",
|
||||
"workerpool": "^9.3.3",
|
||||
"zod": "^3.25.24"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
156
apps/desktop/scripts/download-node-binaries.js
Normal file
156
apps/desktop/scripts/download-node-binaries.js
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
const https = require('https');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { execSync } = require('child_process');
|
||||
const { createWriteStream, mkdirSync, chmodSync } = fs;
|
||||
|
||||
// Node.js version to download
|
||||
const NODE_VERSION = '24.4.0';
|
||||
|
||||
// Platform configurations
|
||||
const PLATFORMS = [
|
||||
{
|
||||
platform: 'darwin',
|
||||
arch: 'arm64',
|
||||
url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-darwin-arm64.tar.gz`,
|
||||
binary: 'bin/node'
|
||||
},
|
||||
{
|
||||
platform: 'darwin',
|
||||
arch: 'x64',
|
||||
url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-darwin-x64.tar.gz`,
|
||||
binary: 'bin/node'
|
||||
},
|
||||
{
|
||||
platform: 'win32',
|
||||
arch: 'x64',
|
||||
url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-win-x64.zip`,
|
||||
binary: 'node.exe'
|
||||
},
|
||||
{
|
||||
platform: 'linux',
|
||||
arch: 'x64',
|
||||
url: `https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64.tar.gz`,
|
||||
binary: 'bin/node'
|
||||
}
|
||||
];
|
||||
|
||||
// Base directory for binaries
|
||||
const RESOURCES_DIR = path.join(__dirname, '..', 'node-binaries');
|
||||
|
||||
async function downloadFile(url, dest) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const file = createWriteStream(dest);
|
||||
|
||||
https.get(url, (response) => {
|
||||
if (response.statusCode === 302 || response.statusCode === 301) {
|
||||
// Handle redirect
|
||||
https.get(response.headers.location, (redirectResponse) => {
|
||||
redirectResponse.pipe(file);
|
||||
file.on('finish', () => {
|
||||
file.close(resolve);
|
||||
});
|
||||
}).on('error', reject);
|
||||
} else {
|
||||
response.pipe(file);
|
||||
file.on('finish', () => {
|
||||
file.close(resolve);
|
||||
});
|
||||
}
|
||||
}).on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function extractArchive(archivePath, platform) {
|
||||
const tempDir = path.join(path.dirname(archivePath), 'temp');
|
||||
mkdirSync(tempDir, { recursive: true });
|
||||
|
||||
if (platform === 'win32') {
|
||||
// Use unzip command (available on macOS) to extract zip files
|
||||
execSync(`unzip -q "${archivePath}" -d "${tempDir}"`, { stdio: 'inherit' });
|
||||
} else {
|
||||
// Use tar for Unix-like systems
|
||||
execSync(`tar -xzf "${archivePath}" -C "${tempDir}"`, { stdio: 'inherit' });
|
||||
}
|
||||
|
||||
return tempDir;
|
||||
}
|
||||
|
||||
async function downloadNodeBinary(config) {
|
||||
const { platform, arch, url, binary } = config;
|
||||
const platformDir = path.join(RESOURCES_DIR, `${platform}-${arch}`);
|
||||
const binaryPath = path.join(platformDir, platform === 'win32' ? 'node.exe' : 'node');
|
||||
|
||||
// Skip if already exists
|
||||
if (fs.existsSync(binaryPath)) {
|
||||
console.log(`✓ ${platform}-${arch} binary already exists`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Downloading Node.js for ${platform}-${arch}...`);
|
||||
|
||||
// Create directory
|
||||
mkdirSync(platformDir, { recursive: true });
|
||||
|
||||
// Download archive
|
||||
const archiveExt = platform === 'win32' ? '.zip' : '.tar.gz';
|
||||
const archivePath = path.join(platformDir, `node-v${NODE_VERSION}${archiveExt}`);
|
||||
|
||||
try {
|
||||
await downloadFile(url, archivePath);
|
||||
console.log(`Downloaded archive for ${platform}-${arch}`);
|
||||
|
||||
// Extract archive
|
||||
const tempDir = await extractArchive(archivePath, platform);
|
||||
|
||||
// Find the node binary in extracted files
|
||||
// Windows uses different directory naming convention (win instead of win32)
|
||||
const extractedDirName = platform === 'win32'
|
||||
? `node-v${NODE_VERSION}-win-${arch}`
|
||||
: `node-v${NODE_VERSION}-${platform}-${arch}`;
|
||||
const extractedBinaryPath = path.join(tempDir, extractedDirName, binary);
|
||||
|
||||
// Copy binary to final location
|
||||
fs.copyFileSync(extractedBinaryPath, binaryPath);
|
||||
|
||||
// Make executable on Unix-like systems
|
||||
if (platform !== 'win32') {
|
||||
chmodSync(binaryPath, '755');
|
||||
}
|
||||
|
||||
// Clean up
|
||||
fs.rmSync(tempDir, { recursive: true, force: true });
|
||||
fs.unlinkSync(archivePath);
|
||||
|
||||
console.log(`✓ Successfully installed ${platform}-${arch} binary`);
|
||||
} catch (error) {
|
||||
console.error(`✗ Failed to download ${platform}-${arch}:`, error.message);
|
||||
// Clean up on failure
|
||||
if (fs.existsSync(archivePath)) {
|
||||
fs.unlinkSync(archivePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log(`Downloading Node.js v${NODE_VERSION} binaries for all platforms...\n`);
|
||||
|
||||
// Create base directory
|
||||
mkdirSync(RESOURCES_DIR, { recursive: true });
|
||||
|
||||
// Download binaries for all platforms
|
||||
for (const platform of PLATFORMS) {
|
||||
await downloadNodeBinary(platform);
|
||||
}
|
||||
|
||||
console.log('\nDone! Node.js binaries downloaded to:', RESOURCES_DIR);
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
|
||||
module.exports = { downloadNodeBinary, PLATFORMS, NODE_VERSION };
|
||||
|
|
@ -49,7 +49,7 @@ const PLATFORMS: PlatformConfig[] = [
|
|||
},
|
||||
];
|
||||
|
||||
const RESOURCES_DIR = path.join(__dirname, "..", "resources", "node-binaries");
|
||||
const RESOURCES_DIR = path.join(__dirname, "..", "node-binaries");
|
||||
|
||||
// Parse command line arguments
|
||||
const args = process.argv.slice(2);
|
||||
|
|
|
|||
|
|
@ -62,9 +62,7 @@ export const useRecording = (): UseRecordingOutput => {
|
|||
);
|
||||
|
||||
// Manage audio capture when recording is active
|
||||
const isActive =
|
||||
recordingStatus.state === "recording" ||
|
||||
recordingStatus.state === "starting";
|
||||
const isActive = recordingStatus.state === "recording";
|
||||
|
||||
const { voiceDetected } = useAudioCapture({
|
||||
onAudioChunk: handleAudioChunk,
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ export class RecordingManager extends EventEmitter {
|
|||
try {
|
||||
const swiftBridge = this.serviceManager.getService("swiftIOBridge");
|
||||
if (swiftBridge) {
|
||||
//await swiftBridge.call("muteSystemAudio", {});
|
||||
await swiftBridge.call("muteSystemAudio", {});
|
||||
}
|
||||
} catch (error) {
|
||||
logger.main.warn("Swift bridge not available for audio muting");
|
||||
|
|
|
|||
|
|
@ -0,0 +1,137 @@
|
|||
import { fork, ChildProcess } from "child_process";
|
||||
import { app } from "electron";
|
||||
import * as path from "path";
|
||||
import { logger } from "../../../main/logger";
|
||||
|
||||
interface WorkerMessage {
|
||||
id: number;
|
||||
method: string;
|
||||
args: any[];
|
||||
}
|
||||
|
||||
interface WorkerResponse {
|
||||
id: number;
|
||||
result?: any;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export class SimpleForkWrapper {
|
||||
private worker: ChildProcess | null = null;
|
||||
private messageId = 0;
|
||||
private pendingCalls = new Map<
|
||||
number,
|
||||
{
|
||||
resolve: (value: any) => void;
|
||||
reject: (error: any) => void;
|
||||
}
|
||||
>();
|
||||
|
||||
constructor(
|
||||
private workerPath: string,
|
||||
private nodeBinaryPath: string,
|
||||
) {}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
if (this.worker) return;
|
||||
|
||||
logger.transcription.info(`Starting worker process: ${this.workerPath}`);
|
||||
|
||||
// When packaged, we need to extract the worker to a temp file
|
||||
// because fork needs an actual file path, not an asar path
|
||||
let actualWorkerPath = this.workerPath;
|
||||
|
||||
// Set up environment for the worker
|
||||
const workerEnv: any = {
|
||||
...process.env,
|
||||
ELECTRON_RUN_AS_NODE: "1",
|
||||
GGML_METAL_PATH_RESOURCES: process.env.GGML_METAL_PATH_RESOURCES,
|
||||
NODE_OPTIONS: "--max-old-space-size=8192",
|
||||
};
|
||||
|
||||
if (app.isPackaged && this.workerPath.includes(".asar")) {
|
||||
// For packaged app, use the unpacked worker
|
||||
actualWorkerPath = this.workerPath.replace(
|
||||
"app.asar",
|
||||
"app.asar.unpacked",
|
||||
);
|
||||
workerEnv.APP_ASAR_PATH = path.join(process.resourcesPath, "app.asar");
|
||||
logger.transcription.info(`Using unpacked worker: ${actualWorkerPath}`);
|
||||
}
|
||||
|
||||
this.worker = fork(actualWorkerPath, [], {
|
||||
execPath: this.nodeBinaryPath,
|
||||
env: workerEnv,
|
||||
silent: false,
|
||||
cwd: app.isPackaged ? process.resourcesPath : process.cwd(),
|
||||
});
|
||||
|
||||
this.worker.on("message", (message: WorkerResponse) => {
|
||||
if (message.id !== undefined && this.pendingCalls.has(message.id)) {
|
||||
const { resolve, reject } = this.pendingCalls.get(message.id)!;
|
||||
this.pendingCalls.delete(message.id);
|
||||
|
||||
if (message.error) {
|
||||
reject(new Error(message.error));
|
||||
} else {
|
||||
resolve(message.result);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.worker.on("error", (error) => {
|
||||
logger.transcription.error("Worker process error:", error);
|
||||
this.rejectAllPending(error);
|
||||
});
|
||||
|
||||
this.worker.on("exit", (code, signal) => {
|
||||
logger.transcription.info(
|
||||
`Worker process exited: code=${code}, signal=${signal}`,
|
||||
);
|
||||
this.worker = null;
|
||||
this.rejectAllPending(new Error(`Worker exited with code ${code}`));
|
||||
});
|
||||
}
|
||||
|
||||
private rejectAllPending(error: Error): void {
|
||||
for (const { reject } of this.pendingCalls.values()) {
|
||||
reject(error);
|
||||
}
|
||||
this.pendingCalls.clear();
|
||||
}
|
||||
|
||||
async exec<T>(method: string, args: any[]): Promise<T> {
|
||||
if (!this.worker) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const id = this.messageId++;
|
||||
this.pendingCalls.set(id, { resolve, reject });
|
||||
|
||||
// Convert Float32Array to regular array for IPC
|
||||
const serializedArgs = args.map((arg) => {
|
||||
if (arg instanceof Float32Array) {
|
||||
return {
|
||||
__type: "Float32Array",
|
||||
data: Array.from(arg),
|
||||
};
|
||||
}
|
||||
return arg;
|
||||
});
|
||||
|
||||
this.worker!.send({
|
||||
id,
|
||||
method,
|
||||
args: serializedArgs,
|
||||
} as WorkerMessage);
|
||||
});
|
||||
}
|
||||
|
||||
async terminate(): Promise<void> {
|
||||
if (this.worker) {
|
||||
this.worker.kill();
|
||||
this.worker = null;
|
||||
this.pendingCalls.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,30 +4,15 @@ import {
|
|||
} from "../../core/pipeline-types";
|
||||
import { logger } from "../../../main/logger";
|
||||
import { ModelManagerService } from "../../../services/model-manager";
|
||||
import { Worker as JestWorker } from "jest-worker";
|
||||
import { SimpleForkWrapper } from "./simple-fork-wrapper";
|
||||
import * as path from "path";
|
||||
import { app } from "electron";
|
||||
|
||||
interface WhisperWorkerMethods {
|
||||
initializeModel(modelPath: string): Promise<void>;
|
||||
transcribeAudio(
|
||||
aggregatedAudio: Float32Array,
|
||||
options: {
|
||||
language: string;
|
||||
initial_prompt: string;
|
||||
suppress_blank: boolean;
|
||||
suppress_non_speech_tokens: boolean;
|
||||
no_timestamps: boolean;
|
||||
},
|
||||
): Promise<string>;
|
||||
dispose(): Promise<void>;
|
||||
}
|
||||
|
||||
export class WhisperProvider implements TranscriptionProvider {
|
||||
readonly name = "whisper-local";
|
||||
|
||||
private modelManager: ModelManagerService;
|
||||
private whisperWorker: (JestWorker & WhisperWorkerMethods) | null = null;
|
||||
private workerWrapper: SimpleForkWrapper | null = null;
|
||||
|
||||
// Frame aggregation state
|
||||
private frameBuffer: Float32Array[] = [];
|
||||
|
|
@ -52,7 +37,7 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
// In development, use the local binary
|
||||
return path.join(
|
||||
__dirname,
|
||||
"../../resources/node-binaries",
|
||||
"../../node-binaries",
|
||||
`${platform}-${arch}`,
|
||||
binaryName,
|
||||
);
|
||||
|
|
@ -90,7 +75,7 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
context,
|
||||
flush = false,
|
||||
} = params;
|
||||
const { vocabulary, previousChunk, aggregatedTranscription } = context;
|
||||
const { vocabulary, aggregatedTranscription } = context;
|
||||
|
||||
// Audio data is already Float32Array
|
||||
|
||||
|
|
@ -140,8 +125,8 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
);
|
||||
|
||||
// Transcribe using smart-whisper
|
||||
if (!this.whisperWorker) {
|
||||
throw new Error("Whisper worker is not initialized");
|
||||
if (!this.workerWrapper) {
|
||||
throw new Error("Worker wrapper is not initialized");
|
||||
}
|
||||
|
||||
// Generate initial prompt from vocabulary and recent context
|
||||
|
|
@ -150,13 +135,16 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
aggregatedTranscription,
|
||||
);
|
||||
|
||||
const text = await this.whisperWorker.transcribeAudio(aggregatedAudio, {
|
||||
language: "auto",
|
||||
initial_prompt: initialPrompt,
|
||||
suppress_blank: true,
|
||||
suppress_non_speech_tokens: true,
|
||||
no_timestamps: true,
|
||||
});
|
||||
const text = await this.workerWrapper!.exec<string>("transcribeAudio", [
|
||||
aggregatedAudio,
|
||||
{
|
||||
language: "auto",
|
||||
initial_prompt: initialPrompt,
|
||||
suppress_blank: true,
|
||||
suppress_non_speech_tokens: true,
|
||||
no_timestamps: true,
|
||||
},
|
||||
]);
|
||||
|
||||
logger.transcription.debug(
|
||||
`Transcription completed, length: ${text.length}`,
|
||||
|
|
@ -290,30 +278,22 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
}
|
||||
|
||||
async initializeWhisper(): Promise<void> {
|
||||
if (!this.whisperWorker) {
|
||||
// Initialize jest-worker with single worker process
|
||||
if (!this.workerWrapper) {
|
||||
// Determine the correct path for the worker script
|
||||
const workerPath = app.isPackaged
|
||||
? path.join(__dirname, "whisper-worker.js") // In production, same directory as main.js
|
||||
: path.join(process.cwd(), ".vite/build/whisper-worker.js"); // In development
|
||||
? path.join(__dirname, "whisper-worker-fork.js") // In production, same directory as main.js
|
||||
: path.join(process.cwd(), ".vite/build/whisper-worker-fork.js"); // In development
|
||||
|
||||
logger.transcription.info(
|
||||
`Initializing Whisper worker at: ${workerPath}`,
|
||||
);
|
||||
this.whisperWorker = new JestWorker(workerPath, {
|
||||
exposedMethods: ["initializeModel", "transcribeAudio", "dispose"],
|
||||
numWorkers: 1,
|
||||
enableWorkerThreads: false,
|
||||
forkOptions: {
|
||||
execPath: this.getNodeBinaryPath(),
|
||||
env: {
|
||||
...process.env,
|
||||
GGML_METAL_PATH_RESOURCES: process.env.GGML_METAL_PATH_RESOURCES,
|
||||
NODE_OPTIONS: "--max-old-space-size=8192",
|
||||
},
|
||||
silent: false, // Enable output from worker for debugging
|
||||
},
|
||||
}) as JestWorker & WhisperWorkerMethods;
|
||||
|
||||
this.workerWrapper = new SimpleForkWrapper(
|
||||
workerPath,
|
||||
this.getNodeBinaryPath(),
|
||||
);
|
||||
|
||||
await this.workerWrapper.initialize();
|
||||
}
|
||||
|
||||
const modelPath = await this.modelManager.getBestAvailableModelPath();
|
||||
|
|
@ -324,7 +304,7 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
}
|
||||
|
||||
try {
|
||||
await this.whisperWorker.initializeModel(modelPath);
|
||||
await this.workerWrapper.exec("initializeModel", [modelPath]);
|
||||
} catch (error) {
|
||||
logger.transcription.error(`Failed to initialize:`, error);
|
||||
throw new Error(`Failed to initialize smart-whisper: ${error}`);
|
||||
|
|
@ -333,15 +313,15 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
|
||||
// Simple cleanup method
|
||||
async dispose(): Promise<void> {
|
||||
if (this.whisperWorker) {
|
||||
if (this.workerWrapper) {
|
||||
try {
|
||||
await this.whisperWorker.dispose();
|
||||
await this.whisperWorker.end(); // Terminate the worker process
|
||||
await this.workerWrapper.exec("dispose", []);
|
||||
await this.workerWrapper.terminate(); // Terminate the worker
|
||||
logger.transcription.debug("Worker terminated");
|
||||
} catch (error) {
|
||||
logger.transcription.warn("Error disposing whisper worker:", error);
|
||||
logger.transcription.warn("Error disposing worker:", error);
|
||||
} finally {
|
||||
this.whisperWorker = null;
|
||||
this.workerWrapper = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,117 @@
|
|||
// Worker process entry point for fork
|
||||
import { Whisper } from "smart-whisper";
|
||||
|
||||
// Simple console-based logging for worker process
|
||||
const logger = {
|
||||
transcription: {
|
||||
info: (message: string, ...args: unknown[]) =>
|
||||
console.log(`[whisper-worker] INFO: ${message}`, ...args),
|
||||
error: (message: string, ...args: unknown[]) =>
|
||||
console.error(`[whisper-worker] ERROR: ${message}`, ...args),
|
||||
debug: (message: string, ...args: unknown[]) =>
|
||||
console.log(`[whisper-worker] DEBUG: ${message}`, ...args),
|
||||
},
|
||||
};
|
||||
|
||||
let whisperInstance: Whisper | null = null;
|
||||
let currentModelPath: string | null = null;
|
||||
|
||||
// Worker methods
|
||||
const methods = {
|
||||
async initializeModel(modelPath: string): Promise<void> {
|
||||
if (whisperInstance && currentModelPath === modelPath) {
|
||||
return; // Already initialized with same model
|
||||
}
|
||||
|
||||
// Cleanup existing instance
|
||||
if (whisperInstance) {
|
||||
await whisperInstance.free();
|
||||
whisperInstance = null;
|
||||
}
|
||||
|
||||
const { Whisper } = await import("smart-whisper");
|
||||
whisperInstance = new Whisper(modelPath, { gpu: true });
|
||||
try {
|
||||
await whisperInstance.load();
|
||||
} catch (e) {
|
||||
logger.transcription.error("Failed to load Whisper model:", e);
|
||||
throw e;
|
||||
}
|
||||
currentModelPath = modelPath;
|
||||
logger.transcription.info(`Initialized with model: ${modelPath}`);
|
||||
},
|
||||
|
||||
async transcribeAudio(
|
||||
aggregatedAudio: Float32Array,
|
||||
options: {
|
||||
language: string;
|
||||
initial_prompt: string;
|
||||
suppress_blank: boolean;
|
||||
suppress_non_speech_tokens: boolean;
|
||||
no_timestamps: boolean;
|
||||
},
|
||||
): Promise<string> {
|
||||
if (!whisperInstance) {
|
||||
throw new Error("Whisper instance is not initialized");
|
||||
}
|
||||
|
||||
// Pad audio with silence to ensure at least 1 second of audio (16k samples)
|
||||
const SAMPLE_RATE = 16000; // Whisper expects 16kHz input
|
||||
const MIN_DURATION_SAMPLES = SAMPLE_RATE * 1 + 4000; // 1 second + extra buffer
|
||||
if (aggregatedAudio.length < MIN_DURATION_SAMPLES) {
|
||||
const padded = new Float32Array(MIN_DURATION_SAMPLES);
|
||||
// Copy the existing audio to the beginning
|
||||
padded.set(aggregatedAudio, 0);
|
||||
aggregatedAudio = padded;
|
||||
}
|
||||
|
||||
const { result } = await whisperInstance.transcribe(
|
||||
aggregatedAudio,
|
||||
options,
|
||||
);
|
||||
const transcription = await result;
|
||||
|
||||
return transcription
|
||||
.map((segment) => segment.text)
|
||||
.join(" ")
|
||||
.trim();
|
||||
},
|
||||
|
||||
async dispose(): Promise<void> {
|
||||
if (whisperInstance) {
|
||||
await whisperInstance.free();
|
||||
whisperInstance = null;
|
||||
currentModelPath = null;
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
// Handle messages from parent process
|
||||
process.on("message", async (message: any) => {
|
||||
const { id, method, args } = message;
|
||||
|
||||
try {
|
||||
// Deserialize Float32Array from IPC
|
||||
const deserializedArgs = args.map((arg: any) => {
|
||||
if (arg && arg.__type === "Float32Array" && Array.isArray(arg.data)) {
|
||||
return new Float32Array(arg.data);
|
||||
}
|
||||
return arg;
|
||||
});
|
||||
|
||||
if (method in methods) {
|
||||
const result = await (methods as any)[method](...deserializedArgs);
|
||||
process.send!({ id, result });
|
||||
} else {
|
||||
process.send!({ id, error: `Unknown method: ${method}` });
|
||||
}
|
||||
} catch (error) {
|
||||
process.send!({
|
||||
id,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Send ready signal
|
||||
logger.transcription.info("Worker process started");
|
||||
|
|
@ -198,50 +198,45 @@ export class TranscriptionService {
|
|||
});
|
||||
}
|
||||
|
||||
// Process chunk if it has content
|
||||
if (audioChunk.length > 0) {
|
||||
// Direct frame to Whisper - it will handle aggregation and VAD internally
|
||||
const previousChunk =
|
||||
session.transcriptionResults.length > 0
|
||||
? session.transcriptionResults[
|
||||
session.transcriptionResults.length - 1
|
||||
]
|
||||
: undefined;
|
||||
const aggregatedTranscription = session.transcriptionResults
|
||||
.join(" ")
|
||||
.trim();
|
||||
// Direct frame to Whisper - it will handle aggregation and VAD internally
|
||||
const previousChunk =
|
||||
session.transcriptionResults.length > 0
|
||||
? session.transcriptionResults[session.transcriptionResults.length - 1]
|
||||
: undefined;
|
||||
const aggregatedTranscription = session.transcriptionResults
|
||||
.join(" ")
|
||||
.trim();
|
||||
|
||||
const chunkTranscription = await this.whisperProvider.transcribe({
|
||||
audioData: audioChunk,
|
||||
speechProbability: speechProbability, // Now from VAD service
|
||||
context: {
|
||||
vocabulary: session.context.sharedData.vocabulary,
|
||||
accessibilityContext: session.context.sharedData.accessibilityContext,
|
||||
previousChunk,
|
||||
aggregatedTranscription: aggregatedTranscription || undefined,
|
||||
},
|
||||
flush: isFinal,
|
||||
});
|
||||
const chunkTranscription = await this.whisperProvider.transcribe({
|
||||
audioData: audioChunk,
|
||||
speechProbability: speechProbability, // Now from VAD service
|
||||
context: {
|
||||
vocabulary: session.context.sharedData.vocabulary,
|
||||
accessibilityContext: session.context.sharedData.accessibilityContext,
|
||||
previousChunk,
|
||||
aggregatedTranscription: aggregatedTranscription || undefined,
|
||||
},
|
||||
flush: isFinal,
|
||||
});
|
||||
|
||||
// Accumulate the result only if Whisper returned something
|
||||
// (it returns empty string while buffering)
|
||||
if (chunkTranscription.trim()) {
|
||||
session.transcriptionResults.push(chunkTranscription);
|
||||
logger.transcription.info("Whisper returned transcription", {
|
||||
sessionId,
|
||||
transcriptionLength: chunkTranscription.length,
|
||||
totalResults: session.transcriptionResults.length,
|
||||
});
|
||||
}
|
||||
|
||||
logger.transcription.debug("Processed frame", {
|
||||
// Accumulate the result only if Whisper returned something
|
||||
// (it returns empty string while buffering)
|
||||
if (chunkTranscription.trim()) {
|
||||
session.transcriptionResults.push(chunkTranscription);
|
||||
logger.transcription.info("Whisper returned transcription", {
|
||||
sessionId,
|
||||
frameSize: audioChunk.length,
|
||||
hadTranscription: chunkTranscription.length > 0,
|
||||
isFinal,
|
||||
transcriptionLength: chunkTranscription.length,
|
||||
totalResults: session.transcriptionResults.length,
|
||||
});
|
||||
}
|
||||
|
||||
logger.transcription.debug("Processed frame", {
|
||||
sessionId,
|
||||
frameSize: audioChunk.length,
|
||||
hadTranscription: chunkTranscription.length > 0,
|
||||
isFinal,
|
||||
});
|
||||
|
||||
// Release transcription mutex
|
||||
this.transcriptionMutex.release();
|
||||
const completeTranscriptionTillNow = session.transcriptionResults
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ export class VADService extends EventEmitter {
|
|||
// In development, use the source path
|
||||
this.modelPath = path.join(
|
||||
__dirname,
|
||||
"../../resources/models/silero_vad_v5.onnx",
|
||||
"../../models/silero_vad_v5.onnx",
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,13 +11,16 @@ export default defineConfig({
|
|||
__dirname,
|
||||
"src/pipeline/providers/transcription/whisper-worker.ts",
|
||||
),
|
||||
"whisper-worker-fork": resolve(
|
||||
__dirname,
|
||||
"src/pipeline/providers/transcription/whisper-worker-fork.ts",
|
||||
),
|
||||
},
|
||||
output: {
|
||||
entryFileNames: "[name].js",
|
||||
},
|
||||
external: [
|
||||
"smart-whisper",
|
||||
"jest-worker",
|
||||
"@libsql/client",
|
||||
"@libsql/darwin-arm64",
|
||||
"@libsql/darwin-x64",
|
||||
|
|
|
|||
8
pnpm-lock.yaml
generated
8
pnpm-lock.yaml
generated
|
|
@ -272,6 +272,9 @@ importers:
|
|||
vaul:
|
||||
specifier: ^1.1.2
|
||||
version: 1.1.2(@types/react-dom@19.1.5(@types/react@19.1.5))(@types/react@19.1.5)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
|
||||
workerpool:
|
||||
specifier: ^9.3.3
|
||||
version: 9.3.3
|
||||
zod:
|
||||
specifier: ^3.25.24
|
||||
version: 3.25.67
|
||||
|
|
@ -8168,6 +8171,9 @@ packages:
|
|||
engines: {node: '>=16'}
|
||||
hasBin: true
|
||||
|
||||
workerpool@9.3.3:
|
||||
resolution: {integrity: sha512-slxCaKbYjEdFT/o2rH9xS1hf4uRDch1w7Uo+apxhZ+sf/1d9e0ZVkn42kPNGP2dgjIx6YFvSevj0zHvbWe2jdw==}
|
||||
|
||||
wrangler@4.20.3:
|
||||
resolution: {integrity: sha512-ugvmi43CFPbjeQFfhU7EqE1V0ek6ZFv80jzwHcPk/7jPFmOA4ahT5uUU1ga5ZP6vz6lUuG2bLnyl1T5qJah0cg==}
|
||||
engines: {node: '>=18.0.0'}
|
||||
|
|
@ -17524,6 +17530,8 @@ snapshots:
|
|||
'@cloudflare/workerd-linux-arm64': 1.20250617.0
|
||||
'@cloudflare/workerd-windows-64': 1.20250617.0
|
||||
|
||||
workerpool@9.3.3: {}
|
||||
|
||||
wrangler@4.20.3:
|
||||
dependencies:
|
||||
'@cloudflare/kv-asset-handler': 0.4.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue