feat: migrate from smart-whisper to custom binding + add cuda support

2025-09-19 16:09:08 +05:30 · 2025-09-19 16:09:08 +05:30 · 048915da61
commit 048915da61
parent 696193eb44
52 changed files with 1490 additions and 4353 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -4,6 +4,7 @@ on:
  push:
    branches:
      - feat.windows.support
+      - feat/whisper.migration
    tags:
      - 'v*'
  workflow_dispatch:
@ -70,6 +71,13 @@ jobs:
          node-version: '24.1.0'
          cache: 'pnpm'

+      - name: Install CUDA Toolkit
+        if: matrix.os == 'windows'
+        uses: Jimver/cuda-toolkit@v0.2.15
+        with:
+          cuda: '12.4.1'
+          method: 'network'
+
      - name: Log Node.js architecture and platform
        run: |
          echo "=== Node.js Process Information ==="
@ -78,8 +86,24 @@ jobs:
          echo ""

      - name: Install dependencies
+        env:
+          GGML_NATIVE: OFF # ensure postinstall builds avoid i8mm on CI runners
        run: pnpm install --frozen-lockfile

+      - name: Build whisper wrapper JS
+        run: pnpm --filter @amical/whisper-wrapper build
+
+      - name: Build whisper native binaries
+        env:
+          GGML_NATIVE: OFF # CI mac runners lack i8mm support; keep CPU features conservative here
+        run: pnpm --filter @amical/whisper-wrapper build:native
+
+      - name: Build whisper native binaries (cuda)
+        if: matrix.os == 'windows'
+        env:
+          GGML_NATIVE: OFF
+        run: pnpm --filter @amical/whisper-wrapper build:native:cuda
+
      - name: Download Node.js binaries
        working-directory: apps/desktop
        run: pnpm download-node
--- a/.gitignore
+++ b/.gitignore
@ -21,11 +21,20 @@ coverage
 # Vercel
 .vercel

+# CMake-js cache
+.cmake-js/
+**/.cmake-js/
+
+# Tool helpers
+.home/
+**/.home/
+
 # Build Outputs
 .next/
 out/
 build
 dist
+packages/whisper-wrapper/native/


 # Debug
@ -41,6 +50,7 @@ CLAUDE.md
 .local
 .claude
 amical.db
+AGENTS.md

 # Temp files
 /tmp
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,3 @@
-[submodule "packages/smart-whisper/whisper.cpp"]
-	path = packages/smart-whisper/whisper.cpp
+[submodule "packages/whisper-wrapper/whisper.cpp"]
+	path = packages/whisper-wrapper/whisper.cpp
 	url = https://github.com/ggerganov/whisper.cpp.git
--- a/apps/desktop/forge.config.ts
+++ b/apps/desktop/forge.config.ts
@ -40,7 +40,7 @@ export const EXTERNAL_DEPENDENCIES = [
  "libsql",
  "onnxruntime-node",
  "workerpool",
-  "@amical/smart-whisper",
+  "@amical/whisper-wrapper",
  // Add any other native modules you need here
 ];

@ -160,6 +160,24 @@ const config: ForgeConfig = {
        }
      }

+      // Prune heavy native sources that trigger MAX_PATH on Windows packages
+      const whisperWrapperPath = join(
+        localNodeModules,
+        "@amical",
+        "whisper-wrapper",
+      );
+      const whisperPruneTargets = [
+        join(whisperWrapperPath, "whisper.cpp"),
+        join(whisperWrapperPath, "build"),
+        join(whisperWrapperPath, ".cmake-js"),
+      ];
+      for (const target of whisperPruneTargets) {
+        if (existsSync(target)) {
+          console.log(`Pruning ${target} from packaged output`);
+          rmSync(target, { recursive: true, force: true });
+        }
+      }
+
      // Second pass: Replace any symlinks with dereferenced copies
      console.log("Checking for symlinks in copied dependencies...");
      for (const dep of nativeModuleDependenciesToPackage) {
@ -318,7 +336,7 @@ const config: ForgeConfig = {
  packagerConfig: {
    asar: {
      unpack:
-        "{*.node,*.dylib,*.so,*.dll,*.metal,**/node_modules/@amical/smart-whisper/**,**/whisper.cpp/**,**/.vite/build/whisper-worker-fork.js,**/node_modules/jest-worker/**,**/onnxruntime-node/bin/**}",
+        "{*.node,*.dylib,*.so,*.dll,*.metal,**/node_modules/@amical/whisper-wrapper/**,**/whisper.cpp/**,**/.vite/build/whisper-worker-fork.js,**/node_modules/jest-worker/**,**/onnxruntime-node/bin/**}",
    },
    name: "Amical",
    executableName: "Amical",
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@ -81,7 +81,7 @@
  "dependencies": {
    "@ai-sdk/openai": "^1.3.22",
    "@amical/eslint-config": "workspace:*",
-    "@amical/smart-whisper": "workspace:*",
+    "@amical/whisper-wrapper": "workspace:*",
    "@amical/types": "workspace:*",
    "@amical/y-libsql": "workspace:*",
    "@dnd-kit/core": "^6.3.1",
--- a/apps/desktop/src/main/main.ts
+++ b/apps/desktop/src/main/main.ts
@ -2,21 +2,7 @@ import dotenv from "dotenv";
 dotenv.config();

 import { app } from "electron";
-import * as path from "path";

-// Set GGML_METAL_PATH_RESOURCES before any other imports
-// This ensures @amical/smart-whisper can find its resources when unpacked from asar
-if (app.isPackaged) {
-  // Point to the unpacked whisper.cpp directory
-  process.env.GGML_METAL_PATH_RESOURCES = path.join(
-    process.resourcesPath,
-    "app.asar.unpacked",
-    "node_modules",
-    "@amical",
-    "smart-whisper",
-    "whisper.cpp",
-  );
-}
 import started from "electron-squirrel-startup";
 import { AppManager } from "./core/app-manager";
 import { updateElectronApp } from "update-electron-app";
--- a/apps/desktop/src/pipeline/providers/transcription/simple-fork-wrapper.ts
+++ b/apps/desktop/src/pipeline/providers/transcription/simple-fork-wrapper.ts
@ -44,7 +44,6 @@ export class SimpleForkWrapper {
    const workerEnv: any = {
      ...process.env,
      ELECTRON_RUN_AS_NODE: "1",
-      GGML_METAL_PATH_RESOURCES: process.env.GGML_METAL_PATH_RESOURCES,
      NODE_OPTIONS: "--max-old-space-size=8192",
    };

--- a/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts
+++ b/apps/desktop/src/pipeline/providers/transcription/whisper-provider.ts
@ -57,6 +57,21 @@ export class WhisperProvider implements TranscriptionProvider {
    await this.initializeWhisper();
  }

+  async getBindingInfo(): Promise<{ path: string; type: string } | null> {
+    if (!this.workerWrapper) {
+      return null;
+    }
+    try {
+      return await this.workerWrapper.exec<{
+        path: string;
+        type: string;
+      } | null>("getBindingInfo", []);
+    } catch (error) {
+      logger.transcription.warn("Failed to get binding info:", error);
+      return null;
+    }
+  }
+
  async transcribe(
    params: TranscribeParams & { flush?: boolean },
  ): Promise<string> {
@ -119,7 +134,7 @@ export class WhisperProvider implements TranscriptionProvider {
        `Starting transcription of ${aggregatedAudio.length} samples (${((aggregatedAudio.length / this.SAMPLE_RATE) * 1000).toFixed(0)}ms)`,
      );

-      // Transcribe using smart-whisper
+      // Transcribe using the local Whisper wrapper
      if (!this.workerWrapper) {
        throw new Error("Worker wrapper is not initialized");
      }
@ -137,7 +152,7 @@ export class WhisperProvider implements TranscriptionProvider {
          initial_prompt: initialPrompt,
          suppress_blank: true,
          suppress_non_speech_tokens: true,
-          no_timestamps: true,
+          no_timestamps: false,
        },
      ]);

@ -302,7 +317,7 @@ export class WhisperProvider implements TranscriptionProvider {
      await this.workerWrapper.exec("initializeModel", [modelPath]);
    } catch (error) {
      logger.transcription.error(`Failed to initialize:`, error);
-      throw new Error(`Failed to initialize smart-whisper: ${error}`);
+      throw new Error(`Failed to initialize whisper wrapper: ${error}`);
    }
  }

--- a/apps/desktop/src/pipeline/providers/transcription/whisper-worker-fork.ts
+++ b/apps/desktop/src/pipeline/providers/transcription/whisper-worker-fork.ts
@ -1,5 +1,19 @@
 // Worker process entry point for fork
-import { Whisper } from "@amical/smart-whisper";
+import { Whisper, getLoadedBindingInfo } from "@amical/whisper-wrapper";
+
+// Type definitions for IPC communication
+interface WorkerMessage {
+  id: number;
+  method: string;
+  args: unknown[];
+}
+
+interface SerializedFloat32Array {
+  __type: "Float32Array";
+  data: number[];
+}
+
+type MethodArg = SerializedFloat32Array | unknown;

 // Simple console-based logging for worker process
 const logger = {
@ -29,7 +43,6 @@ const methods = {
      whisperInstance = null;
    }

-    const { Whisper } = await import("@amical/smart-whisper");
    whisperInstance = new Whisper(modelPath, { gpu: true });
    try {
      await whisperInstance.load();
@ -71,8 +84,17 @@ const methods = {
    );
    const transcription = await result;

+    logger.transcription.debug(
+      `Transcription segments: ${Array.isArray(transcription) ? transcription.length : "?"}`,
+    );
+    if (Array.isArray(transcription)) {
+      logger.transcription.debug(
+        `First segment preview: ${transcription[0]?.text ?? "<none>"}`,
+      );
+    }
+
    return transcription
-      .map((segment) => segment.text)
+      .map((segment: { text: string }) => segment.text)
      .join(" ")
      .trim();
  },
@ -84,23 +106,39 @@ const methods = {
      currentModelPath = null;
    }
  },
+
+  getBindingInfo(): { path: string; type: string } | null {
+    return getLoadedBindingInfo();
+  },
 };

 // Handle messages from parent process
-process.on("message", async (message: any) => {
+process.on("message", async (message: WorkerMessage) => {
  const { id, method, args } = message;

  try {
    // Deserialize Float32Array from IPC
-    const deserializedArgs = args.map((arg: any) => {
-      if (arg && arg.__type === "Float32Array" && Array.isArray(arg.data)) {
-        return new Float32Array(arg.data);
+    const deserializedArgs = args.map((arg: MethodArg) => {
+      if (
+        arg &&
+        typeof arg === "object" &&
+        "__type" in arg &&
+        arg.__type === "Float32Array"
+      ) {
+        const serialized = arg as SerializedFloat32Array;
+        if (Array.isArray(serialized.data)) {
+          return new Float32Array(serialized.data);
+        }
      }
      return arg;
    });

    if (method in methods) {
-      const result = await (methods as any)[method](...deserializedArgs);
+      const methodName = method as keyof typeof methods;
+      const fn = methods[methodName] as (
+        ...args: unknown[]
+      ) => Promise<unknown>;
+      const result = await fn(...deserializedArgs);
      process.send!({ id, result });
    } else {
      process.send!({ id, error: `Unknown method: ${method}` });
--- a/apps/desktop/src/pipeline/providers/transcription/whisper-worker.ts
+++ b/apps/desktop/src/pipeline/providers/transcription/whisper-worker.ts
@ -1,5 +1,5 @@
 // This file contains just the Whisper-specific operations that need to run in a separate process
-import { Whisper } from "@amical/smart-whisper";
+import { Whisper } from "@amical/whisper-wrapper";

 // Simple console-based logging for worker process
 const logger = {
@ -27,7 +27,6 @@ export async function initializeModel(modelPath: string): Promise<void> {
    whisperInstance = null;
  }

-  const { Whisper } = await import("@amical/smart-whisper");
  whisperInstance = new Whisper(modelPath, { gpu: true });
  try {
    await whisperInstance.load();
@ -57,7 +56,7 @@ export async function transcribeAudio(
  const transcription = await result;

  return transcription
-    .map((segment) => segment.text)
+    .map((segment: { text: string }) => segment.text)
    .join(" ")
    .trim();
 }
--- a/apps/desktop/src/services/telemetry-service.ts
+++ b/apps/desktop/src/services/telemetry-service.ts
@ -9,6 +9,7 @@ export interface TranscriptionMetrics {
  session_id?: string;
  model_id: string;
  model_preloaded?: boolean;
+  whisper_native_binding?: string;
  total_duration_ms?: number;
  recording_duration_ms?: number;
  processing_duration_ms?: number;
--- a/apps/desktop/src/services/transcription-service.ts
+++ b/apps/desktop/src/services/transcription-service.ts
@ -417,10 +417,22 @@ export class TranscriptionService {
    const audioDurationSeconds =
      session.context.sharedData.audioMetadata?.duration;

+    // Get native binding info if using local whisper
+    let whisperNativeBinding: string | undefined;
+    if (this.whisperProvider && "getBindingInfo" in this.whisperProvider) {
+      const bindingInfo = await this.whisperProvider.getBindingInfo();
+      whisperNativeBinding = bindingInfo?.type;
+      logger.transcription.info(
+        "whisper native binding used",
+        whisperNativeBinding,
+      );
+    }
+
    this.telemetryService.trackTranscriptionCompleted({
      session_id: sessionId,
      model_id: selectedModel!,
      model_preloaded: this.modelWasPreloaded,
+      whisper_native_binding: whisperNativeBinding,
      total_duration_ms: totalDuration || 0,
      recording_duration_ms: recordingDuration,
      processing_duration_ms: processingDuration,
--- a/apps/desktop/vite.main.config.mts
+++ b/apps/desktop/vite.main.config.mts
@ -29,7 +29,7 @@ export default defineConfig({
        entryFileNames: "[name].js",
      },
      external: [
-        "@amical/smart-whisper",
+        "@amical/whisper-wrapper",
        "@libsql/client",
        "@libsql/darwin-arm64",
        "@libsql/darwin-x64",
@ -51,7 +51,7 @@ export default defineConfig({
  optimizeDeps: {
    exclude: [
      "better-sqlite3",
-      "smart-whisper",
+      "@amical/whisper-wrapper",
      "drizzle-orm",
      "@libsql/client",
    ],
--- a/package.json
+++ b/package.json
@ -32,13 +32,13 @@
      "keytar",
      "protobufjs",
      "sharp",
-      "smart-whisper",
+      "@amical/whisper-wrapper",
      "drizzle-orm/libsql"
    ],
    "onlyBuiltDependencies": [
      "electron",
      "electron-winstaller",
-      "smart-whisper",
+      "@amical/whisper-wrapper",
      "drizzle-orm/libsql",
      "@libsql",
      "macos-alias",
--- a/packages/smart-whisper/.gitignore
+++ b/packages/smart-whisper/.gitignore
@ -1,64 +0,0 @@
-# Dependencies
-node_modules/
-
-# Build outputs
-build/
-*.node
-bin/
-
-# TypeScript outputs
-dist/
-# Keep the build configuration file
-!dist/build.js
-*.tsbuildinfo
-
-# Native compilation artifacts
-*.o
-*.a
-*.so
-*.dylib
-*.dll
-*.lib
-*.exp
-*.ilk
-*.pdb
-
-# Logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-lerna-debug.log*
-.pnpm-debug.log*
-
-# OS files
-.DS_Store
-Thumbs.db
-desktop.ini
-
-# IDE files
-.vscode/
-.idea/
-*.swp
-*.swo
-*~
-
-# Environment variables
-.env
-.env.local
-.env.*.local
-
-# Test coverage
-coverage/
-*.lcov
-.nyc_output/
-
-# Temporary files
-tmp/
-temp/
-*.tmp
-
-# whisper.cpp build artifacts (if any get generated)
-whisper.cpp/build/
-whisper.cpp/*.o
-whisper.cpp/**/*.o
--- a/packages/smart-whisper/WHISPER_CPP_VERSION
+++ b/packages/smart-whisper/WHISPER_CPP_VERSION
@ -1,10 +0,0 @@
-# Whisper.cpp Version Information
-
-Repository: https://github.com/ggerganov/whisper.cpp
-Commit: 2ef717b293fe93872cc3a03ca77942936a281959
-Date: November 2024
-Description: whisper : add large-v3-turbo (#2440)
-
-This file tracks the exact version of whisper.cpp used in this package.
-To update whisper.cpp, replace the contents of the whisper.cpp directory
-with a new version and update this file accordingly.
--- a/packages/smart-whisper/binding.gyp
+++ b/packages/smart-whisper/binding.gyp
@ -1,37 +0,0 @@
-{
-  'variables' : {
-    'openssl_fips': '',
-  },
-  "targets": [
-    {
-      "target_name": "smart-whisper",
-      "sources": [
-          "src/binding/binding.cc",
-          "src/binding/common.cc",
-          "src/binding/model.cc",
-          "src/binding/transcribe.cc",
-          "<!@(node -p \"require('./dist/build.js').sources\")"
-      ],
-      "libraries": [ "<!@(node -p \"require('./dist/build.js').libraries\")" ],
-      'defines': [ "<!@(node -p \"require('./dist/build.js').defines\")" ],
-      'include_dirs': ["<!@(node -p \"require('node-addon-api').include\")", "whisper.cpp/include", "whisper.cpp/ggml/include", "whisper.cpp/examples"],
-      'dependencies': ["<!(node -p \"require('node-addon-api').gyp\")"],
-      'cflags!': [ '-fno-exceptions' ],
-      'cflags_cc!': [ '-fno-exceptions' ],
-      'xcode_settings': {
-        'GCC_ENABLE_CPP_EXCEPTIONS': 'YES',
-        'CLANG_CXX_LIBRARY': 'libc++',
-      },
-      'msvs_settings': {
-        'VCCLCompilerTool': { 'ExceptionHandling': 1 },
-      },
-      'conditions': [
-        ['OS=="mac"', {
-            'xcode_settings': {
-                'GCC_SYMBOLS_PRIVATE_EXTERN': 'YES', # -fvisibility=hidden
-            }
-        }]
-      ]
-    }
-  ],
-}
--- a/packages/smart-whisper/package.json
+++ b/packages/smart-whisper/package.json
@ -1,41 +0,0 @@
-{
-  "name": "@amical/smart-whisper",
-  "version": "0.1.0",
-  "description": "Whisper.cpp Node.js binding with auto model offloading strategy.",
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "keywords": [
-    "whisper",
-    "whisper.cpp",
-    "native",
-    "binding",
-    "addon"
-  ],
-  "gypfile": true,
-  "files": [
-    "dist",
-    "src",
-    "scripts",
-    "binding.gyp",
-    "whisper.cpp/**/*.{c,h,cpp,hpp,m,cu,metal}",
-    "whisper.cpp/Makefile",
-    "whisper.cpp/LICENSE"
-  ],
-  "scripts": {
-    "install": "tsup",
-    "postinstall": "node-gyp rebuild",
-    "build": "tsup && node-gyp rebuild",
-    "build:ts": "tsup",
-    "build:native": "node-gyp rebuild"
-  },
-  "dependencies": {
-    "node-addon-api": "^8.5.0",
-    "minimatch": "10.0.3"
-  },
-  "devDependencies": {
-    "@amical/typescript-config": "workspace:*",
-    "@types/node": "^24.3.0",
-    "tsup": "^8.5.0",
-    "typescript": "^5.8.2"
-  }
-}
--- a/packages/smart-whisper/scripts/linker.js
+++ b/packages/smart-whisper/scripts/linker.js
@ -1,21 +0,0 @@
-"use strict";
-
-var libs = [];
-if (process.env.WHISPER_OPENBLAS) {
-	libs.push(`-lopenblas`);
-}
-if (process.env.WHISPER_CUBLAS) {
-	libs.push(
-		`-lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64`,
-	);
-}
-if (process.env.WHISPER_HIPBLAS) {
-	libs.push(
-		`lhipblas -lamdhip64 -lrocblas -L/opt/rocm/lib -L/opt/rocm/hipblas/lib -Wl,-rpath=/opt/rocm/lib`,
-	);
-}
-if (process.env.WHISPER_CLBLAST) {
-	libs.push(`-lclblast -lOpenCL`);
-}
-
-console.log(libs.join(" "));
--- a/packages/smart-whisper/src/binding.ts
+++ b/packages/smart-whisper/src/binding.ts
@ -1,78 +0,0 @@
-process.env.GGML_METAL_PATH_RESOURCES =
-  process.env.GGML_METAL_PATH_RESOURCES ||
-  path.join(__dirname, "../whisper.cpp/ggml/src");
-
-import path from "node:path";
-import { TranscribeFormat, TranscribeParams, TranscribeResult } from "./types";
-const module = require(path.join(__dirname, "../build/Release/smart-whisper"));
-
-/**
- * A external handle to a model.
- */
-export type Handle = {
-  readonly "": unique symbol;
-};
-
-export namespace Binding {
-  /**
-   * Load a model from a whisper weights file.
-   * @param file The path to the whisper weights file.
-   * @param gpu Whether to use the GPU or not.
-   * @param callback A callback that will be called with the handle to the model.
-   */
-  export declare function load(
-    file: string,
-    gpu: boolean,
-    callback: (handle: Handle) => void,
-  ): void;
-
-  /**
-   * Release the memory of the model, it will be unusable after this.
-   * @param handle The handle to the model.
-   * @param callback A callback that will be called when the model is freed.
-   */
-  export declare function free(handle: Handle, callback: () => void): void;
-
-  /**
-   * Transcribe a PCM buffer.
-   * @param handle The handle to the model.
-   * @param pcm The PCM buffer.
-   * @param params The parameters to use for transcription.
-   * @param finish A callback that will be called when the transcription is finished.
-   * @param progress A callback that will be called when a new result is available.
-   */
-  export declare function transcribe<
-    Format extends TranscribeFormat,
-    TokenTimestamp extends boolean,
-  >(
-    handle: Handle,
-    pcm: Float32Array,
-    params: Partial<TranscribeParams<Format, TokenTimestamp>>,
-    finish: (results: TranscribeResult<Format, TokenTimestamp>[]) => void,
-    progress: (result: TranscribeResult<Format, TokenTimestamp>) => void,
-  ): void;
-
-  export declare class WhisperModel {
-    private _ctx;
-    constructor(handle: Handle);
-    get handle(): Handle | null;
-    get freed(): boolean;
-    /**
-     * Release the memory of the model, it will be unusable after this.
-     * It's safe to call this multiple times, but it will only free the model once.
-     */
-    free(): Promise<void>;
-    /**
-     * Load a model from a whisper weights file.
-     * @param file The path to the whisper weights file.
-     * @param gpu Whether to use the GPU or not.
-     * @returns A promise that resolves to a {@link WhisperModel}.
-     */
-    static load(file: string, gpu?: boolean): Promise<WhisperModel>;
-  }
-}
-
-/**
- * The native binding for the underlying C++ addon.
- */
-export const binding: typeof Binding = module;
--- a/packages/smart-whisper/src/binding/binding.cc
+++ b/packages/smart-whisper/src/binding/binding.cc
@ -1,18 +0,0 @@
-#include <napi.h>
-
-#include "common.h"
-#include "model.h"
-#include "transcribe.h"
-
-Napi::Object Init(Napi::Env env, Napi::Object exports) {
-    exports.Set("transcribe", Napi::Function::New(env, Transcribe));
-    WhisperModel::Init(env, exports);
-
-    if (IsProduction(env.Global())) {
-        whisper_log_set([](ggml_log_level level, const char *text, void *user_data) {}, nullptr);
-    }
-
-    return exports;
-}
-
-NODE_API_MODULE(whisper, Init)
--- a/packages/smart-whisper/src/binding/common.cc
+++ b/packages/smart-whisper/src/binding/common.cc
@ -1,16 +0,0 @@
-#include "common.h"
-
-Napi::Promise PromiseWorker::Promise() { return promise.Promise(); }
-
-bool IsProduction(const Napi::Object global_env) {
-    Napi::Object process = global_env.Get("process").As<Napi::Object>();
-    Napi::Object env = process.Get("env").As<Napi::Object>();
-    Napi::Value  node_env = env.Get("NODE_ENV");
-
-    if (!node_env.IsString()) {
-        return false;
-    }
-
-    Napi::String node_env_str = node_env.As<Napi::String>();
-    return node_env_str.Utf8Value() == "production";
-}
--- a/packages/smart-whisper/src/binding/common.h
+++ b/packages/smart-whisper/src/binding/common.h
@ -1,22 +0,0 @@
-#ifndef _GUARD_SW_COMMON_H
-#define _GUARD_SW_COMMON_H
-
-#ifndef NAPI_VERSION
-// Support Node.js 16+
-#define NAPI_VERSION 8
-#endif
-#include <napi.h>
-
-class PromiseWorker : public Napi::AsyncWorker {
-   public:
-    PromiseWorker(Napi::Env &env) : AsyncWorker(env), promise(Napi::Promise::Deferred::New(env)) {}
-
-    Napi::Promise Promise();
-
-   protected:
-    Napi::Promise::Deferred promise;
-};
-
-bool IsProduction(const Napi::Object global_env);
-
-#endif
--- a/packages/smart-whisper/src/binding/model.cc
+++ b/packages/smart-whisper/src/binding/model.cc
@ -1,145 +0,0 @@
-#include "model.h"
-
-class LoadModelWorker : public PromiseWorker {
-   public:
-    LoadModelWorker(Napi::Env &env, const std::string &model_path,
-                    struct whisper_context_params params)
-        : PromiseWorker(env), model_path(model_path), params(params) {}
-
-    void Execute() override {
-        context = whisper_init_from_file_with_params_no_state(model_path.c_str(), params);
-        if (context == nullptr) {
-            SetError("Failed to initialize whisper context");
-        }
-        whisper_print_timings(context);
-    }
-
-    void OnOK() override {
-        Napi::HandleScope scope(Env());
-        auto              handle = Napi::External<whisper_context>::New(Env(), context);
-        auto              constructor = Env().GetInstanceData<Napi::FunctionReference>();
-        auto              model = constructor->New({handle});
-
-        promise.Resolve(model);
-    }
-
-   private:
-    std::string                   model_path;
-    struct whisper_context_params params;
-    whisper_context              *context;
-};
-
-class FreeModelWorker : public PromiseWorker {
-   public:
-    FreeModelWorker(Napi::Env &env, whisper_context *context)
-        : PromiseWorker(env), context(context) {}
-
-    void Execute() override { whisper_free(context); }
-
-    void OnOK() override {
-        Napi::HandleScope scope(Env());
-        promise.Resolve(Env().Undefined());
-    }
-
-   private:
-    whisper_context *context;
-};
-
-Napi::Object WhisperModel::Init(Napi::Env env, Napi::Object exports) {
-    Napi::Function func = DefineClass(
-        env, "WhisperModel",
-        {
-            StaticMethod<&WhisperModel::Load>(
-                "load", static_cast<napi_property_attributes>(napi_writable | napi_configurable)),
-            InstanceMethod<&WhisperModel::Free>(
-                "free", static_cast<napi_property_attributes>(napi_writable | napi_configurable)),
-            InstanceAccessor(
-                "freed", &WhisperModel::GetFreed, nullptr,
-                static_cast<napi_property_attributes>(napi_enumerable | napi_configurable)),
-            InstanceAccessor(
-                "handle", &WhisperModel::GetHandle, nullptr,
-                static_cast<napi_property_attributes>(napi_enumerable | napi_configurable)),
-        });
-
-    auto constructor = new Napi::FunctionReference();
-    *constructor = Napi::Persistent(func);
-    env.SetInstanceData<Napi::FunctionReference>(constructor);
-
-    exports.Set("WhisperModel", func);
-    return exports;
-}
-
-WhisperModel::WhisperModel(const Napi::CallbackInfo &info) : Napi::ObjectWrap<WhisperModel>(info) {
-    Napi::Env         env = info.Env();
-    Napi::HandleScope scope(env);
-
-    if (info.Length() != 1) {
-        Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
-        return;
-    }
-
-    whisper_context *context = info[0].As<Napi::External<whisper_context>>().Data();
-    this->context = context;
-}
-
-void WhisperModel::Finalize(Napi::Env env) {
-    if (context != nullptr) {
-        whisper_free(context);
-        context = nullptr;
-    }
-}
-
-Napi::Value WhisperModel::Load(const Napi::CallbackInfo &info) {
-    Napi::Env env = info.Env();
-
-    if (info.Length() < 1 || info.Length() > 2) {
-        Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
-        return env.Null();
-    }
-
-    std::string model_path = info[0].As<Napi::String>();
-
-    whisper_context_params params;
-    params.use_gpu = info.Length() == 2 ? info[1].As<Napi::Boolean>() : true;
-
-    auto worker = new LoadModelWorker(env, model_path, params);
-    worker->Queue();
-
-    return worker->Promise();
-}
-
-Napi::Value WhisperModel::Free(const Napi::CallbackInfo &info) {
-    Napi::Env env = info.Env();
-
-    if (info.Length() != 0) {
-        Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
-        return env.Null();
-    }
-
-    if (context == nullptr) {
-        auto deferred = Napi::Promise::Deferred::New(env);
-        deferred.Resolve(env.Undefined());
-        return deferred.Promise();
-    } else {
-        auto worker = new FreeModelWorker(env, context);
-        context = nullptr;
-        worker->Queue();
-        return worker->Promise();
-    }
-}
-
-Napi::Value WhisperModel::GetFreed(const Napi::CallbackInfo &info) {
-    Napi::Env env = info.Env();
-
-    return Napi::Boolean::New(env, context == nullptr);
-}
-
-Napi::Value WhisperModel::GetHandle(const Napi::CallbackInfo &info) {
-    Napi::Env env = info.Env();
-
-    if (context == nullptr) {
-        return env.Null();
-    }
-
-    return Napi::External<whisper_context>::New(env, context);
-}
--- a/packages/smart-whisper/src/binding/model.h
+++ b/packages/smart-whisper/src/binding/model.h
@ -1,22 +0,0 @@
-#ifndef _GUARD_SW_MODEL_H
-#define _GUARD_SW_MODEL_H
-
-#include "common.h"
-#include "whisper.h"
-
-class WhisperModel : public Napi::ObjectWrap<WhisperModel> {
-   public:
-    static Napi::Object Init(Napi::Env env, Napi::Object exports);
-
-    WhisperModel(const Napi::CallbackInfo &info);
-    void Finalize(Napi::Env env);
-
-   private:
-    whisper_context   *context;
-    static Napi::Value Load(const Napi::CallbackInfo &info);
-    Napi::Value        Free(const Napi::CallbackInfo &info);
-    Napi::Value        GetFreed(const Napi::CallbackInfo &info);
-    Napi::Value        GetHandle(const Napi::CallbackInfo &info);
-};
-
-#endif
--- a/packages/smart-whisper/src/binding/transcribe.cc
+++ b/packages/smart-whisper/src/binding/transcribe.cc
@ -1,358 +0,0 @@
-#include "transcribe.h"
-
-struct smart_whisper_transcribe_params {
-    const char* format;
-};
-
-struct whisper_full_params whisper_full_params_from_js(Napi::Object o) {
-    struct whisper_full_params params =
-        whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_BEAM_SEARCH);
-
-    if (o.Has("strategy")) {
-        params.strategy = static_cast<whisper_sampling_strategy>(
-            o.Get("strategy").As<Napi::Number>().Int32Value());
-    }
-    if (o.Has("n_threads")) {
-        params.n_threads = o.Get("n_threads").As<Napi::Number>();
-    }
-    if (o.Has("n_max_text_ctx")) {
-        params.n_max_text_ctx = o.Get("n_max_text_ctx").As<Napi::Number>();
-    }
-    if (o.Has("offset_ms")) {
-        params.offset_ms = o.Get("offset_ms").As<Napi::Number>();
-    }
-    if (o.Has("duration_ms")) {
-        params.duration_ms = o.Get("duration_ms").As<Napi::Number>();
-    }
-
-    if (o.Has("translate")) {
-        params.translate = o.Get("translate").As<Napi::Boolean>();
-    }
-    if (o.Has("no_context")) {
-        params.no_context = o.Get("no_context").As<Napi::Boolean>();
-    }
-    if (o.Has("no_timestamps")) {
-        params.no_timestamps = o.Get("no_timestamps").As<Napi::Boolean>();
-    }
-    if (o.Has("single_segment")) {
-        params.single_segment = o.Get("single_segment").As<Napi::Boolean>();
-    }
-    if (o.Has("print_special")) {
-        params.print_special = o.Get("print_special").As<Napi::Boolean>();
-    }
-    if (o.Has("print_progress")) {
-        params.print_progress = o.Get("print_progress").As<Napi::Boolean>();
-    }
-    if (o.Has("print_realtime")) {
-        params.print_realtime = o.Get("print_realtime").As<Napi::Boolean>();
-    }
-    if (o.Has("print_timestamps")) {
-        params.print_timestamps = o.Get("print_timestamps").As<Napi::Boolean>();
-    }
-
-    if (o.Has("token_timestamps")) {
-        params.token_timestamps = o.Get("token_timestamps").As<Napi::Boolean>();
-    }
-    if (o.Has("thold_pt")) {
-        params.thold_pt = o.Get("thold_pt").As<Napi::Number>();
-    }
-    if (o.Has("thold_ptsum")) {
-        params.thold_ptsum = o.Get("thold_ptsum").As<Napi::Number>();
-    }
-    if (o.Has("max_len")) {
-        params.max_len = o.Get("max_len").As<Napi::Number>();
-    }
-    if (o.Has("split_on_word")) {
-        params.split_on_word = o.Get("split_on_word").As<Napi::Boolean>();
-    }
-    if (o.Has("max_tokens")) {
-        params.max_tokens = o.Get("max_tokens").As<Napi::Number>();
-    }
-
-    if (o.Has("debug_mode")) {
-        params.debug_mode = o.Get("debug_mode").As<Napi::Boolean>();
-    }
-    if (o.Has("audio_ctx")) {
-        params.audio_ctx = o.Get("audio_ctx").As<Napi::Number>();
-    }
-
-    if (o.Has("tdrz_enable")) {
-        params.tdrz_enable = o.Get("tdrz_enable").As<Napi::Boolean>();
-    }
-
-    if (o.Has("initial_prompt") && o.Get("initial_prompt").IsString()) {
-        std::string initial_prompt = o.Get("initial_prompt").As<Napi::String>().Utf8Value();
-        params.initial_prompt = strdup(initial_prompt.c_str());
-    } else {
-        params.initial_prompt = nullptr;
-    }
-
-    if (o.Has("language") && o.Get("language").IsString()) {
-        std::string language = o.Get("language").As<Napi::String>().Utf8Value();
-        params.language = strdup(language.c_str());
-    } else {
-        params.language = strdup("auto");
-    }
-
-    if (o.Has("suppress_blank") && o.Get("suppress_blank").IsBoolean()) {
-        params.suppress_blank = o.Get("suppress_blank").As<Napi::Boolean>();
-    }
-    if (o.Has("suppress_non_speech_tokens") && o.Get("suppress_non_speech_tokens").IsBoolean()) {
-        params.suppress_non_speech_tokens = o.Get("suppress_non_speech_tokens").As<Napi::Boolean>();
-    }
-
-    if (o.Has("temperature")) {
-        params.temperature = o.Get("temperature").As<Napi::Number>();
-    }
-    if (o.Has("max_initial_ts")) {
-        params.max_initial_ts = o.Get("max_initial_ts").As<Napi::Number>();
-    }
-    if (o.Has("length_penalty")) {
-        params.length_penalty = o.Get("length_penalty").As<Napi::Number>();
-    }
-
-    if (o.Has("temperature_inc")) {
-        params.temperature_inc = o.Get("temperature_inc").As<Napi::Number>();
-    }
-    if (o.Has("entropy_thold")) {
-        params.entropy_thold = o.Get("entropy_thold").As<Napi::Number>();
-    }
-    if (o.Has("logprob_thold")) {
-        params.logprob_thold = o.Get("logprob_thold").As<Napi::Number>();
-    }
-    if (o.Has("no_speech_thold")) {
-        params.no_speech_thold = o.Get("no_speech_thold").As<Napi::Number>();
-    }
-
-    if (o.Has("best_of")) {
-        params.greedy.best_of = o.Get("best_of").As<Napi::Number>();
-    }
-
-    if (o.Has("beam_size")) {
-        params.beam_search.beam_size = o.Get("beam_size").As<Napi::Number>();
-    }
-
-    return params;
-}
-
-struct smart_whisper_transcribe_params smart_whisper_transcribe_params_from_js(Napi::Object o) {
-    struct smart_whisper_transcribe_params params;
-
-    if (o.Has("format") && o.Get("format").IsString()) {
-        std::string format = o.Get("format").As<Napi::String>().Utf8Value();
-        params.format = strdup(format.c_str());
-    } else {
-        params.format = strdup("simple");
-    }
-
-    return params;
-}
-
-class TranscribeWorker : public Napi::AsyncProgressQueueWorker<int> {
-   public:
-    TranscribeWorker(whisper_context* context, const float* samples, int n_samples,
-                     struct whisper_full_params             params,
-                     struct smart_whisper_transcribe_params smart_params,
-                     Napi::Function& finish_callback, Napi::Function& progress_callback)
-        : AsyncProgressQueueWorker(finish_callback),
-          context(context),
-          samples(samples),
-          n_samples(n_samples),
-          params(params),
-          smart_params(smart_params) {
-        this->progress_callback.Reset(progress_callback, 1);
-        state = nullptr;
-    }
-
-    ~TranscribeWorker() {
-        delete[] samples;
-        // whisper_free_params(&params); will lead to a double free
-        if (params.initial_prompt != nullptr) {
-            free((void*)params.initial_prompt);
-        }
-        if (params.language != nullptr) {
-            free((void*)params.language);
-        }
-        if (state != nullptr) {
-            whisper_free_state(state);
-        }
-
-        free((void*)smart_params.format);
-    }
-
-    void Execute(const ExecutionProgress& progress) override {
-        state = whisper_init_state(context);
-
-        params.new_segment_callback = [](struct whisper_context* ctx, struct whisper_state* state,
-                                         int n_new, void* user_data) {
-            const ExecutionProgress& progress = *(ExecutionProgress*)user_data;
-
-            const int i = whisper_full_n_segments_from_state(state) - 1;
-            progress.Send(&i, 1);
-        };
-        params.new_segment_callback_user_data = (void*)&progress;
-
-        int err = whisper_full_with_state(context, state, params, samples, n_samples);
-        if (err != 0) {
-            SetError("whisper_full operation failed");
-        }
-    }
-
-    void OnProgress(const int* data, size_t _count) override {
-        Napi::HandleScope scope(Env());
-
-        if (this->progress_callback.IsEmpty()) {
-            return;
-        }
-
-        int i = (*data);
-
-        Napi::Object segment = Napi::Object::New(Env());
-        segment.Set("from", Napi::Number::New(
-                                Env(), whisper_full_get_segment_t0_from_state(state, i) * 10));
-        segment.Set(
-            "to", Napi::Number::New(Env(), whisper_full_get_segment_t1_from_state(state, i) * 10));
-        segment.Set("text",
-                    Napi::String::New(Env(), whisper_full_get_segment_text_from_state(state, i)));
-
-        if (strcmp(smart_params.format, "detail") == 0) {
-            float       confidence = 0, min_p = 1, max_p = 0;
-            int         skips = 0;
-            int         tokens = whisper_full_n_tokens_from_state(state, i);
-            Napi::Array tokens_array = Napi::Array::New(Env(), tokens);
-            for (int j = 0; j < tokens; j++) {
-                auto         token = whisper_full_get_token_data_from_state(state, i, j);
-                Napi::Object token_object = Napi::Object::New(Env());
-                token_object.Set("text",
-                                 Napi::String::New(Env(), whisper_full_get_token_text_from_state(
-                                                              context, state, i, j)));
-                token_object.Set("id", Napi::Number::New(Env(), token.id));
-                token_object.Set("p", Napi::Number::New(Env(), token.p));
-                tokens_array.Set(j, token_object);
-
-                if (token.id > whisper_token_eot(context)) {
-                    skips++;
-                    continue;
-                }
-                confidence += token.p;
-                min_p = std::min(min_p, token.p);
-                max_p = std::max(max_p, token.p);
-            }
-
-            if (tokens > 2) {
-                confidence = (confidence - min_p - max_p) / (tokens - 2 - skips);
-            } else {
-                confidence = confidence / (tokens - skips);
-            }
-
-            segment.Set(
-                "lang",
-                Napi::String::New(Env(), whisper_lang_str(whisper_full_lang_id_from_state(state))));
-            segment.Set("confidence", Napi::Number::New(Env(), confidence));
-            segment.Set("tokens", tokens_array);
-        }
-
-        this->progress_callback.Call({segment});
-    }
-
-    void OnOK() override {
-        Napi::HandleScope scope(Env());
-
-        int         n_segments = whisper_full_n_segments_from_state(state);
-        Napi::Array segments = Napi::Array::New(Env(), n_segments);
-        for (int i = 0; i < n_segments; i++) {
-            Napi::Object segment = Napi::Object::New(Env());
-            segment.Set("from", Napi::Number::New(
-                                    Env(), whisper_full_get_segment_t0_from_state(state, i) * 10));
-            segment.Set("to", Napi::Number::New(
-                                  Env(), whisper_full_get_segment_t1_from_state(state, i) * 10));
-            segment.Set("text", Napi::String::New(
-                                    Env(), whisper_full_get_segment_text_from_state(state, i)));
-
-            if (strcmp(smart_params.format, "detail") == 0) {
-                float       confidence = 0, min_p = 1, max_p = 0;
-                int         skips = 0;
-                int         tokens = whisper_full_n_tokens_from_state(state, i);
-                Napi::Array tokens_array = Napi::Array::New(Env(), tokens);
-                for (int j = 0; j < tokens; j++) {
-                    auto         token = whisper_full_get_token_data_from_state(state, i, j);
-                    Napi::Object token_object = Napi::Object::New(Env());
-                    token_object.Set(
-                        "text", Napi::String::New(Env(), whisper_full_get_token_text_from_state(
-                                                             context, state, i, j)));
-                    token_object.Set("id", Napi::Number::New(Env(), token.id));
-                    token_object.Set("p", Napi::Number::New(Env(), token.p));
-                    if (params.token_timestamps) {
-                        token_object.Set("from", Napi::Number::New(Env(), token.t0 * 10));
-                        token_object.Set("to", Napi::Number::New(Env(), token.t1 * 10));
-                    }
-
-                    tokens_array.Set(j, token_object);
-
-                    if (token.id > whisper_token_eot(context)) {
-                        skips++;
-                        continue;
-                    }
-                    confidence += token.p;
-                    min_p = std::min(min_p, token.p);
-                    max_p = std::max(max_p, token.p);
-                }
-
-                if (tokens - skips > 2) {
-                    confidence = (confidence - min_p - max_p) / (tokens - skips - 2);
-                } else if (tokens - skips > 0) {
-                    confidence = confidence / (tokens - skips);
-                }
-
-                segment.Set("lang",
-                            Napi::String::New(
-                                Env(), whisper_lang_str(whisper_full_lang_id_from_state(state))));
-                segment.Set("confidence", Napi::Number::New(Env(), confidence));
-                segment.Set("tokens", tokens_array);
-            }
-
-            segments.Set(i, segment);
-        }
-
-        Callback().Call({segments});
-    }
-
-   private:
-    whisper_context*                       context;
-    whisper_state*                         state;
-    const float*                           samples;
-    int                                    n_samples;
-    struct whisper_full_params             params;
-    struct smart_whisper_transcribe_params smart_params;
-    Napi::FunctionReference                progress_callback;
-};
-
-Napi::Value Transcribe(const Napi::CallbackInfo& info) {
-    Napi::Env env = info.Env();
-
-    if (info.Length() != 5) {
-        Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
-        return env.Null();
-    }
-
-    whisper_context* context = info[0].As<Napi::External<whisper_context>>().Data();
-
-    Napi::Float32Array pcm = info[1].As<Napi::Float32Array>();
-    float*             samples = new float[pcm.ElementLength()];
-    memcpy(samples, pcm.Data(), pcm.ByteLength());
-
-    int n_samples = static_cast<int>(pcm.ElementLength());
-
-    Napi::Object params = info[2].As<Napi::Object>();
-    auto         whisper_params = whisper_full_params_from_js(params);
-    auto         smart_params = smart_whisper_transcribe_params_from_js(params);
-
-    Napi::Function finish_callback = info[3].As<Napi::Function>();
-    Napi::Function progress_callback = info[4].As<Napi::Function>();
-
-    auto worker = new TranscribeWorker(context, samples, n_samples, whisper_params, smart_params,
-                                       finish_callback, progress_callback);
-    worker->Queue();
-
-    return env.Undefined();
-}
--- a/packages/smart-whisper/src/binding/transcribe.h
+++ b/packages/smart-whisper/src/binding/transcribe.h
@ -1,9 +0,0 @@
-#ifndef _GUARD_SW_TRANSCRIBE_H
-#define _GUARD_SW_TRANSCRIBE_H
-
-#include "common.h"
-#include "whisper.h"
-
-Napi::Value Transcribe(const Napi::CallbackInfo& info);
-
-#endif
--- a/packages/smart-whisper/src/build.ts
+++ b/packages/smart-whisper/src/build.ts
@ -1,97 +0,0 @@
-import os from "node:os";
-import { execSync } from "node:child_process";
-
-type ComputeBackend = "cpu" | "accelerate" | "metal" | "clblast" | "openblas";
-
-const cfg = config();
-
-export const sources = cfg.sources.join(" ");
-export const defines = cfg.defines.join(" ");
-export const libraries = cfg.libraries.join(" ");
-
-function config(): {
-  sources: string[];
-  defines: string[];
-  libraries: string[];
-} {
-  if (process.env.BYOL) {
-    return {
-      sources: [],
-      defines: [],
-      libraries: [process.env.BYOL],
-    };
-  }
-
-  const COMPUTE_BACKEND: ComputeBackend =
-    (process.env.COMPUTE_BACKEND as ComputeBackend | undefined) ??
-    infer_backend();
-
-  const cfg = {
-    sources: [
-      "whisper.cpp/src/whisper.cpp",
-      "whisper.cpp/ggml/src/ggml.c",
-      "whisper.cpp/ggml/src/ggml-alloc.c",
-      "whisper.cpp/ggml/src/ggml-backend.c",
-      "whisper.cpp/ggml/src/ggml-quants.c",
-      "whisper.cpp/ggml/src/ggml-aarch64.c",
-    ] as string[],
-    defines: [] as string[],
-    libraries: [] as string[],
-  };
-
-  switch (COMPUTE_BACKEND) {
-    case "accelerate": {
-      cfg.defines.push("GGML_USE_ACCELERATE");
-
-      cfg.libraries.push('"-framework Foundation"');
-      cfg.libraries.push('"-framework Accelerate"');
-      break;
-    }
-    case "metal": {
-      cfg.sources.push("whisper.cpp/ggml/src/ggml-metal.m");
-
-      cfg.defines.push("GGML_USE_ACCELERATE");
-      cfg.defines.push("GGML_USE_METAL");
-
-      cfg.libraries.push('"-framework Foundation"');
-      cfg.libraries.push('"-framework Accelerate"');
-      cfg.libraries.push('"-framework Metal"');
-      cfg.libraries.push('"-framework MetalKit"');
-      break;
-    }
-    case "openblas": {
-      cfg.defines.push("GGML_USE_OPENBLAS");
-
-      cfg.libraries.push("-lopenblas");
-      break;
-    }
-    default: {
-    }
-  }
-
-  return cfg;
-}
-
-function infer_backend(): ComputeBackend {
-  let backend: ComputeBackend = "cpu";
-
-  try {
-    if (os.platform() === "darwin") {
-      backend = "accelerate";
-      if (os.arch() === "arm64") {
-        backend = "metal";
-      }
-    } else if (os.platform() === "linux") {
-      const has_libopenblas = !!execSync("ldconfig -p | grep libopenblas")
-        .toString()
-        .trim();
-      if (has_libopenblas) {
-        backend = "openblas";
-      }
-    }
-  } catch {
-    // if anything goes wrong, just use the default cpu backend
-  }
-
-  return backend;
-}
--- a/packages/smart-whisper/src/index.ts
+++ b/packages/smart-whisper/src/index.ts
@ -1,7 +0,0 @@
-export * from "./binding";
-export * from "./model";
-export * from "./transcribe";
-export * from "./types";
-export * from "./whisper";
-
-export * as manager from "./model-manager";
--- a/packages/smart-whisper/src/model-manager/index.ts
+++ b/packages/smart-whisper/src/model-manager/index.ts
@ -1,125 +0,0 @@
-import path from "node:path";
-import fs from "node:fs";
-import os from "node:os";
-import { Readable } from "node:stream";
-import type { ReadableStream } from "node:stream/web";
-
-const root = path.join(os.homedir(), ".smart-whisper");
-const models = path.join(root, "models");
-const ext = ".bin";
-
-fs.mkdirSync(models, { recursive: true });
-
-const BASE_MODELS_URL =
-  "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
-
-/**
- * MODELS is an object that contains the URLs of different ggml whisper models.
- * Each model is represented by a key-value pair, where the key is the model name
- * and the value is the URL of the model.
- */
-export const MODELS = {
-  tiny: `${BASE_MODELS_URL}/ggml-tiny.bin`,
-  "tiny.en": `${BASE_MODELS_URL}/ggml-tiny.en.bin`,
-  small: `${BASE_MODELS_URL}/ggml-small.bin`,
-  "small.en": `${BASE_MODELS_URL}/ggml-small.en.bin`,
-  base: `${BASE_MODELS_URL}/ggml-base.bin`,
-  "base.en": `${BASE_MODELS_URL}/ggml-base.en.bin`,
-  medium: `${BASE_MODELS_URL}/ggml-medium.bin`,
-  "medium.en": `${BASE_MODELS_URL}/ggml-medium.en.bin`,
-  "large-v1": `${BASE_MODELS_URL}/ggml-large-v1.bin`,
-  "large-v2": `${BASE_MODELS_URL}/ggml-large-v2.bin`,
-  "large-v3": `${BASE_MODELS_URL}/ggml-large-v3.bin`,
-  "large-v3-turbo": `${BASE_MODELS_URL}/ggml-large-v3-turbo.bin`,
-} as const;
-
-export type ModelName = keyof typeof MODELS | (string & {});
-
-/**
- * Downloads a ggml whisper model from a specified URL or shorthand.
- *
- * @param model - The model to download, specified either as a key of the {@link MODELS} object or as a URL.
- * @returns A promise that resolves to the name of the downloaded model.
- * @throws An error if the model URL or shorthand is invalid, or if the model fails to download.
- */
-export async function download(model: ModelName): Promise<string> {
-  let url = "",
-    name = "";
-  if (model in MODELS) {
-    url = MODELS[model as keyof typeof MODELS];
-    name = model;
-  } else {
-    try {
-      url = new URL(model).href;
-      name = new URL(url).pathname.split("/").pop() ?? "";
-    } catch {}
-  }
-
-  if (!url) {
-    throw new Error(`Invalid model URL or shorthand: ${model}`);
-  }
-
-  if (!name) {
-    throw new Error(`Failed to parse model name: ${url}`);
-  }
-
-  if (check(name)) {
-    return name;
-  }
-
-  const res = await fetch(url);
-  if (!res.ok || !res.body) {
-    throw new Error(`Failed to download model: ${res.statusText}`);
-  }
-
-  const stream = fs.createWriteStream(
-    path.join(models, name.endsWith(ext) ? name : name + ext),
-  );
-  Readable.fromWeb(res.body as ReadableStream<Uint8Array>).pipe(stream);
-
-  return new Promise((resolve) => stream.on("finish", () => resolve(name)));
-}
-
-/**
- * Removes a locally downloaded model.
- * @param model - The name of the model to remove.
- */
-export function remove(model: ModelName): void {
-  if (check(model)) {
-    fs.unlinkSync(path.join(models, model + ext));
-  }
-}
-
-/**
- * Retrieves a list of model names that are available locally.
- * @returns An array of model names.
- */
-export function list(): ModelName[] {
-  const files = fs.readdirSync(models).filter((file) => file.endsWith(ext));
-  return files.map((file) => file.slice(0, -ext.length));
-}
-
-/**
- * Checks if a model exists.
- * @param model - The name of the model.
- * @returns True if the model exists, false otherwise.
- */
-export function check(model: ModelName): boolean {
-  return fs.existsSync(path.join(models, model + ext));
-}
-
-/**
- * Resolves the absolute path of a model.
- * @param model - The name of the model.
- * @returns The resolved path of the model.
- * @throws Error if the model is not found.
- */
-export function resolve(model: ModelName): string {
-  if (check(model)) {
-    return path.join(models, model + ext);
-  } else {
-    throw new Error(`Model not found: ${model}`);
-  }
-}
-
-export const dir = { root, models };
--- a/packages/smart-whisper/src/model.ts
+++ b/packages/smart-whisper/src/model.ts
@ -1,3 +0,0 @@
-import { binding } from "./binding";
-
-export class WhisperModel extends binding.WhisperModel {}
--- a/packages/smart-whisper/src/transcribe.ts
+++ b/packages/smart-whisper/src/transcribe.ts
@ -1,114 +0,0 @@
-import EventEmitter from "node:events";
-import type { WhisperModel } from "./model";
-import { TranscribeFormat, TranscribeParams, TranscribeResult } from "./types";
-import { binding } from "./binding";
-
-export class TranscribeTask<
-  Format extends TranscribeFormat,
-  TokenTimestamp extends boolean,
-> extends EventEmitter {
-  private _model: WhisperModel;
-  private _result: Promise<TranscribeResult<Format, TokenTimestamp>[]> | null =
-    null;
-
-  /**
-   * You should not construct this class directly, use {@link TranscribeTask.run} instead.
-   */
-  constructor(model: WhisperModel) {
-    super();
-    this._model = model;
-  }
-
-  get model(): WhisperModel {
-    return this._model;
-  }
-
-  /**
-   * A promise that resolves to the result of the transcription task.
-   */
-  get result(): Promise<TranscribeResult<Format, TokenTimestamp>[]> {
-    if (this._result === null) {
-      throw new Error("Task has not been started");
-    }
-    return this._result;
-  }
-
-  private async _run(
-    pcm: Float32Array,
-    params: Partial<TranscribeParams<Format, TokenTimestamp>>,
-  ): Promise<TranscribeResult<Format, TokenTimestamp>[]> {
-    return new Promise((resolve) => {
-      const handle = this.model.handle;
-      if (!handle) {
-        throw new Error("Model has been freed");
-      }
-
-      binding.transcribe(
-        handle,
-        pcm,
-        params,
-        (results) => {
-          this.emit("finish");
-          resolve(results);
-        },
-        (result) => {
-          this.emit("transcribed", result);
-        },
-      );
-    });
-  }
-
-  static async run<
-    Format extends TranscribeFormat,
-    TokenTimestamp extends boolean,
-  >(
-    model: WhisperModel,
-    pcm: Float32Array,
-    params: Partial<TranscribeParams<Format, TokenTimestamp>>,
-  ): Promise<TranscribeTask<Format, TokenTimestamp>> {
-    if (model.freed) {
-      throw new Error("Model has been freed");
-    }
-
-    const task = new TranscribeTask(model);
-    task._result = task._run(pcm, params);
-
-    return task;
-  }
-
-  on(
-    event: "finish",
-    listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void,
-  ): this;
-  on(
-    event: "transcribed",
-    listener: (result: TranscribeResult<Format, TokenTimestamp>) => void,
-  ): this;
-  on(event: string, listener: (...args: any[]) => void): this {
-    return super.on(event, listener);
-  }
-
-  once(
-    event: "finish",
-    listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void,
-  ): this;
-  once(
-    event: "transcribed",
-    listener: (result: TranscribeResult<Format, TokenTimestamp>) => void,
-  ): this;
-  once(event: string, listener: (...args: any[]) => void): this {
-    return super.once(event, listener);
-  }
-
-  off(
-    event: "finish",
-    listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void,
-  ): this;
-  off(
-    event: "transcribed",
-    listener: (result: TranscribeResult<Format, TokenTimestamp>) => void,
-  ): this;
-  off(event: string, listener: (...args: any[]) => void): this {
-    return super.off(event, listener);
-  }
-}
--- a/packages/smart-whisper/src/types.ts
+++ b/packages/smart-whisper/src/types.ts
@ -1,104 +0,0 @@
-export enum WhisperSamplingStrategy {
-  WHISPER_SAMPLING_GREEDY,
-  WHISPER_SAMPLING_BEAM_SEARCH,
-}
-
-export type TranscribeFormat = "simple" | "detail";
-
-/**
- * See {@link https://github.com/ggerganov/whisper.cpp/blob/00b7a4be02ca82d53ac69dd2dd438c16e2af7658/whisper.h#L433C19-L433C19} for details.
- */
-export interface TranscribeParams<
-  Format extends TranscribeFormat = TranscribeFormat,
-  TokenTimestamp extends boolean = false,
-> {
-  strategy: WhisperSamplingStrategy;
-  n_threads: number;
-  n_max_text_ctx: number;
-  offset_ms: number;
-  duration_ms: number;
-
-  translate: boolean;
-  no_context: boolean;
-  no_timestamps: boolean;
-  single_segment: boolean;
-  print_special: boolean;
-  print_progress: boolean;
-  print_realtime: boolean;
-  print_timestamps: boolean;
-
-  token_timestamps: TokenTimestamp;
-  thold_pt: number;
-  thold_ptsum: number;
-  max_len: number;
-  split_on_word: boolean;
-  max_tokens: number;
-
-  speed_up: boolean;
-  debug_mode: boolean;
-  audio_ctx: number;
-
-  tdrz_enable: boolean;
-
-  initial_prompt: string;
-
-  /**
-   * Language code, e.g. "en", "de", "fr", "es", "it", "nl", "pt", "ru", "tr", "uk", "pl", "sv", "cs", "zh", "ja", "ko"
-   */
-  language: string;
-
-  suppress_blank: boolean;
-  suppress_non_speech_tokens: boolean;
-
-  temperature: number;
-  max_initial_ts: number;
-  length_penalty: number;
-
-  temperature_inc: number;
-  entropy_thold: number;
-  logprob_thold: number;
-  no_speech_thold: number;
-
-  best_of: number;
-
-  beam_size: number;
-
-  format: Format;
-}
-
-export interface TranscribeSimpleResult {
-  from: number;
-  to: number;
-  text: string;
-}
-
-/**
- * Represents a detailed result of transcription.
- */
-export interface TranscribeDetailedResult<TokenTimestamp extends boolean>
-  extends TranscribeSimpleResult {
-  /** The detected spoken language. */
-  lang: string;
-  /** The confidence level of the transcription, calculated by the average probability of the tokens. */
-  confidence: number;
-  /** The tokens generated during the transcription process. */
-  tokens: {
-    /** The text of the token, for CJK languages, due to the BPE encoding, the token text may not be readable. */
-    text: string;
-    /** The ID of the token. */
-    id: number;
-    /** The probability of the token. */
-    p: number;
-    /** The start timestamp of the token, in milliseconds. Only available when `token_timestamps` of {@link TranscribeParams} is `true`. */
-    from: TokenTimestamp extends true ? number : undefined;
-    /** The end timestamp of the token, in milliseconds. Only available when `token_timestamps` of {@link TranscribeParams} is `true`. */
-    to: TokenTimestamp extends true ? number : undefined;
-  }[];
-}
-
-export type TranscribeResult<
-  Format extends TranscribeFormat = TranscribeFormat,
-  TokenTimestamp extends boolean = boolean,
-> = Format extends "simple"
-  ? TranscribeSimpleResult
-  : TranscribeDetailedResult<TokenTimestamp>;
--- a/packages/smart-whisper/src/whisper.ts
+++ b/packages/smart-whisper/src/whisper.ts
@ -1,148 +0,0 @@
-import type {
-  TranscribeFormat,
-  TranscribeParams,
-  TranscribeResult,
-} from "./types";
-import { WhisperModel } from "./model";
-import { TranscribeTask } from "./transcribe";
-
-export interface WhisperConfig {
-  /**
-   * Time in seconds to wait before offloading the model if it's not being used.
-   */
-  offload: number;
-
-  /**
-   * Whether to use the GPU or not.
-   */
-  gpu: boolean;
-}
-
-/**
- * The Whisper class is responsible for managing the lifecycle and operations of whisper model.
- * It handles the loading and offloading of the model, managing transcription tasks, and configuring model parameters.
- */
-export class Whisper {
-  private _file: string;
-  private _available: WhisperModel | null = null;
-  private _loading: Promise<WhisperModel> | null = null;
-  private _tasks: Promise<TranscribeResult[]>[] = [];
-  private _config: WhisperConfig;
-  private _offload_timer: NodeJS.Timeout | null = null;
-
-  /**
-   * Constructs a new Whisper instance with a specified model file and configuration.
-   * @param file - The path to the Whisper model file.
-   * @param config - Optional configuration for the Whisper instance.
-   */
-  constructor(file: string, config: Partial<WhisperConfig> = {}) {
-    this._file = file;
-    this._config = {
-      offload: 300,
-      gpu: true,
-      ...config,
-    };
-  }
-
-  get file(): string {
-    return this._file;
-  }
-
-  set file(file: string) {
-    this._file = file;
-  }
-
-  get config(): WhisperConfig {
-    return this._config;
-  }
-
-  get tasks(): Promise<TranscribeResult[]>[] {
-    return this._tasks;
-  }
-
-  reset_offload_timer(): void {
-    this.clear_offload_timer();
-    this._offload_timer = setTimeout(() => {
-      this.free();
-    }, this.config.offload * 1000);
-  }
-
-  private clear_offload_timer(): void {
-    if (this._offload_timer !== null) {
-      clearTimeout(this._offload_timer);
-      this._offload_timer = null;
-    }
-  }
-
-  async model(): Promise<WhisperModel> {
-    if (this._available === null) {
-      return this.load();
-    }
-    this.reset_offload_timer();
-    return Promise.resolve(this._available);
-  }
-
-  /**
-   * Loads the whisper model asynchronously.
-   * If the model is already being loaded, returns the existing one.
-   *
-   * You don't need to call this method directly, it's called automatically if necessary when you call {@link Whisper.transcribe}.
-   *
-   * @returns A Promise that resolves to the loaded model.
-   */
-  async load(): Promise<WhisperModel> {
-    if (this._loading !== null) {
-      return this._loading;
-    }
-
-    const model = WhisperModel.load(this.file, this.config.gpu);
-    this._loading = model;
-    this._available = await model;
-    this._loading = null;
-    this.reset_offload_timer();
-    return this._available;
-  }
-
-  /**
-   * Transcribes the given PCM audio data using the Whisper model.
-   * @param pcm - The mono 16k PCM audio data to transcribe.
-   * @param params - Optional parameters for transcription.
-   * @returns A promise that resolves to the result of the transcription task.
-   */
-  async transcribe<
-    Format extends TranscribeFormat,
-    TokenTimestamp extends boolean,
-  >(
-    pcm: Float32Array,
-    params: Partial<TranscribeParams<Format, TokenTimestamp>> = {},
-  ): Promise<TranscribeTask<Format, TokenTimestamp>> {
-    const model = await this.model();
-    const task = await TranscribeTask.run<Format, TokenTimestamp>(
-      model,
-      pcm,
-      params,
-    );
-    this._tasks.push(task.result);
-    return task;
-  }
-
-  async free(): Promise<void> {
-    if (this._available === null) {
-      return;
-    }
-    const model = this._available;
-    this._available = null;
-    this.clear_offload_timer();
-    await Promise.all(this.tasks);
-    await model.free();
-  }
-}
-
-/**
- * Here's a life cycle diagram of a model:
- * | Method     | (0) Not Available | (1) Loading | (2) Available | (3) Freeing | (0) Not Available |
- * |------------|-------------------|-------------|---------------|-------------|-------------------|
- * | load       | V                 | -           | -             | -           | V                 |
- * | free       | -                 | -           | wait tasks, V | -           | -                 |
- * | transcribe | load              | load        | V             | load        | load              |
- */
--- a/packages/smart-whisper/tsconfig.json
+++ b/packages/smart-whisper/tsconfig.json
@ -1,9 +0,0 @@
-{
-  "extends": "@amical/typescript-config/base.json",
-  "compilerOptions": {
-    "outDir": "dist"
-  },
-  "include": ["src"],
-  "exclude": ["node_modules", "dist"],
-  "types": ["node"]
-}
--- a/packages/smart-whisper/tsup.config.ts
+++ b/packages/smart-whisper/tsup.config.ts
@ -1,18 +0,0 @@
-import { defineConfig } from "tsup";
-import { readFileSync, writeFileSync } from "node:fs";
-
-export default defineConfig({
-  entry: ["src/index.ts", "src/build.ts"],
-  outDir: "dist",
-  dts: true,
-  async onSuccess() {
-    // replace `#include "ggml-common.h" in whisper.cpp/ggml/src/ggml-metal.metal with full content
-    const metal = readFileSync(
-      "whisper.cpp/ggml/src/ggml-metal.metal",
-      "utf-8",
-    );
-    const common = readFileSync("whisper.cpp/ggml/src/ggml-common.h", "utf-8");
-    const replaced = metal.replace(/#include "ggml-common.h"/, common);
-    writeFileSync("whisper.cpp/ggml/src/ggml-metal.metal", replaced);
-  },
-});
--- a/packages/smart-whisper/whisper.cpp
+++ b/packages/smart-whisper/whisper.cpp
@ -1 +0,0 @@
-Subproject commit 2ef717b293fe93872cc3a03ca77942936a281959
--- a/packages/whisper-wrapper/README.md
+++ b/packages/whisper-wrapper/README.md
@ -0,0 +1,91 @@
+# @amical/whisper-wrapper
+
+This package wraps the `whisper.cpp` Node addon so the desktop app can call into
+Whisper from a forked worker process. The build and runtime layers are tuned for
+the desktop pipeline; the notes below capture the important knobs and the
+reasoning behind them.
+
+## Build workflow
+
+- `pnpm install` (postinstall) runs `bin/build-addon.js` via CMake.js and drops
+  the resulting `whisper.node` into `native/<platform-arch(-tag)>/`.
+- `pnpm --filter @amical/whisper-wrapper build:native` rebuilds the default
+  variants for this platform (Metal + CPU on macOS, CPU elsewhere).
+- `pnpm --filter @amical/whisper-wrapper build:native:cuda` builds an extra
+  `win32-x64-cuda` binary alongside the regular `win32-x64` fallback. Install
+  the CUDA toolkit (12.x tested) before running it.
+- Every macOS build is ad-hoc signed (`codesign -s -`) so Electron/Node can load
+  it without crashing.
+- Each variant is produced as a _single_ `.node` binary. We force static
+  libraries (`GGML_STATIC=ON`, `BUILD_SHARED_LIBS=OFF`) so all ggml/whisper
+  code is linked directly into the addon—no sidecar `.dylib/.dll` files ship
+  at runtime.
+- The full CMake build directory is deleted after each variant so Electron
+  Forge/Squirrel never sees the long `CMakeFiles/...` paths that blew past
+  Windows’ MAX_PATH limit during packaging.
+
+## GPU/CPU fallback
+
+`resolveBinding()` in `src/loader.ts` no longer throws if the first candidate
+fails. `loadBinding()` walks the list:
+
+1. `platform-arch-metal`
+2. `platform-arch-openblas`
+3. `platform-arch-cuda`
+4. `platform-arch`
+5. `cpu-fallback`
+
+If `require()` raises `ERR_DLOPEN_FAILED` (missing runtime, wrong driver, etc.)
+it logs a warning and tries the next candidate. That lets us ship CUDA/Metal
+binaries alongside CPU ones without breaking installs that lack the GPU stack.
+
+## GGML_NATIVE on macOS arm64
+
+GitHub’s hosted macOS runners expose `i8mm` but clang refuses to emit the
+`vmmlaq_s32` intrinsic when `-mcpu=native` is passed, so the build dies in
+`ggml-cpu/arch/arm/quants.c`. CI therefore exports `GGML_NATIVE=OFF` before
+calling the build scripts. Locally you can flip it back on if your toolchain
+supports those instructions:
+
+```bash
+GGML_NATIVE=ON pnpm --filter @amical/whisper-wrapper build:native
+```
+
+Leave it off in CI unless you control the runner.
+
+## Custom targets
+
+`WHISPER_TARGETS` lets you override which variants to build. The value is a
+comma-separated list of directory names that should map to `native/<name>`.
+Examples:
+
+```bash
+WHISPER_TARGETS="linux-x64-gnu" pnpm --filter @amical/whisper-wrapper build:native
+WHISPER_TARGETS="win32-x64-cuda,win32-x64" pnpm --filter @amical/whisper-wrapper build:native
+```
+
+Absent overrides the script builds the Metal variant (on macOS) followed by the
+plain CPU build.
+
+## Runtime API
+
+`src/index.ts` exposes a minimal class that mirrors the desktop worker protocol:
+
+- `new Whisper(modelPath, { gpu?: boolean })`
+- `await whisper.load()` (no-op placeholder)
+- `await whisper.transcribe(audioOrNull, options)`
+- `await whisper.free()`
+
+If you pass `null` (and a `fname_inp` in `options`) the addon reads the audio
+file directly, matching the CLI smoke tests.
+
+## Local expectations
+
+- `whisper.cpp` is tracked as a submodule under `packages/whisper-wrapper/`.
+- `cmake-js` / `node` / `pnpm` must be installed (the workspace root sets the
+  required versions).
+- The build creates `.cmake-js/` and `.home/` caches inside the package; they’re
+  ignored in git.
+
+For any tweaks (new build targets, additional fallbacks, etc.) update this file
+so the CI configuration stays discoverable.
--- a/packages/whisper-wrapper/WHISPER_CPP_VERSION
+++ b/packages/whisper-wrapper/WHISPER_CPP_VERSION
@ -0,0 +1 @@
+v1.7.6
--- a/packages/whisper-wrapper/addon/CMakeLists.txt
+++ b/packages/whisper-wrapper/addon/CMakeLists.txt
@ -0,0 +1,84 @@
+cmake_minimum_required(VERSION 3.20)
+project(whisper_node LANGUAGES C CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+add_definitions(-DNAPI_VERSION=8)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../whisper.cpp/cmake")
+
+set(WHISPER_CPP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../whisper.cpp")
+
+set(WHISPER_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+set(WHISPER_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
+set(WHISPER_BUILD_SERVER OFF CACHE BOOL "" FORCE)
+set(WHISPER_CURL OFF CACHE BOOL "" FORCE)
+set(WHISPER_SDL2 OFF CACHE BOOL "" FORCE)
+set(WHISPER_FFMPEG OFF CACHE BOOL "" FORCE)
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
+set(GGML_STATIC ON CACHE BOOL "" FORCE)
+set(GGML_SHARED OFF CACHE BOOL "" FORCE)
+
+add_subdirectory(${WHISPER_CPP_DIR} whispercpp EXCLUDE_FROM_ALL)
+
+find_package(Threads REQUIRED)
+
+set(ADDON_SOURCES
+  ${CMAKE_CURRENT_SOURCE_DIR}/addon.cpp
+  ${WHISPER_CPP_DIR}/examples/common.cpp
+  ${WHISPER_CPP_DIR}/examples/common-ggml.cpp
+  ${WHISPER_CPP_DIR}/examples/common-whisper.cpp
+  ${WHISPER_CPP_DIR}/examples/grammar-parser.cpp
+)
+
+add_library(whisper_node SHARED ${ADDON_SOURCES})
+set_target_properties(whisper_node PROPERTIES PREFIX "" SUFFIX ".node" OUTPUT_NAME "whisper")
+
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  target_compile_options(whisper_node PRIVATE -Wall -Wextra -Wno-unused-parameter)
+endif()
+
+set_target_properties(whisper_node PROPERTIES
+  CXX_VISIBILITY_PRESET hidden
+  VISIBILITY_INLINES_HIDDEN ON)
+
+# CMake-js variables
+if (DEFINED CMAKE_JS_INC)
+  string(REPLACE ";" " " TMP_CMAKE_JS_INC "${CMAKE_JS_INC}")
+endif()
+if (DEFINED CMAKE_JS_LIB)
+  string(REPLACE ";" " " TMP_CMAKE_JS_LIB "${CMAKE_JS_LIB}")
+endif()
+
+if (DEFINED TMP_CMAKE_JS_INC)
+  separate_arguments(TMP_CMAKE_JS_INC)
+  foreach(INC ${TMP_CMAKE_JS_INC})
+    target_include_directories(whisper_node PRIVATE "${INC}")
+  endforeach()
+endif()
+
+if (DEFINED TMP_CMAKE_JS_LIB)
+  separate_arguments(TMP_CMAKE_JS_LIB)
+endif()
+
+# Include directories
+target_include_directories(whisper_node PRIVATE
+  ${CMAKE_CURRENT_SOURCE_DIR}
+  ${WHISPER_CPP_DIR}/include
+  ${WHISPER_CPP_DIR}/ggml/include
+  ${WHISPER_CPP_DIR}/examples
+)
+
+# Link libraries
+if (DEFINED TMP_CMAKE_JS_LIB)
+  target_link_libraries(whisper_node PRIVATE ${TMP_CMAKE_JS_LIB})
+endif()
+
+target_link_libraries(whisper_node PRIVATE whisper Threads::Threads)
+
+# On macOS we need to allow undefined symbols for node addon
+if (APPLE)
+  target_link_options(whisper_node PRIVATE "-undefined" "dynamic_lookup")
+endif()
--- a/packages/whisper-wrapper/addon/addon.cpp
+++ b/packages/whisper-wrapper/addon/addon.cpp
@ -0,0 +1,455 @@
+#include "napi.h"
+
+#include "whisper.h"
+#include "common.h"
+#include "common-whisper.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <vector>
+
+namespace {
+
+struct WhisperHandle {
+  std::mutex mutex;
+  whisper_context* ctx = nullptr;
+  bool freed = false;
+};
+
+struct TokenData {
+  std::string text;
+  int id = 0;
+  float p = 0.0f;
+  int from_ms = -1;
+  int to_ms = -1;
+};
+
+struct SegmentData {
+  int from_ms = 0;
+  int to_ms = 0;
+  std::string text;
+  float confidence = 0.0f;
+  std::string language;
+  std::vector<TokenData> tokens;
+};
+
+struct FullParamConfig {
+  whisper_full_params params;
+  std::string initial_prompt;
+  std::string language;
+  bool detailed = false;
+  bool token_timestamps = false;
+};
+
+FullParamConfig parse_full_params(const Napi::Env env, const Napi::Object& options) {
+  FullParamConfig cfg;
+  cfg.params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+
+  if (options.Has("strategy")) {
+    cfg.params.strategy = static_cast<whisper_sampling_strategy>(
+      options.Get("strategy").As<Napi::Number>().Int32Value());
+  }
+  if (options.Has("n_threads")) {
+    cfg.params.n_threads = options.Get("n_threads").As<Napi::Number>().Int32Value();
+  }
+  if (options.Has("n_max_text_ctx")) {
+    cfg.params.n_max_text_ctx = options.Get("n_max_text_ctx").As<Napi::Number>().Int32Value();
+  }
+  if (options.Has("offset_ms")) {
+    cfg.params.offset_ms = options.Get("offset_ms").As<Napi::Number>().Int32Value();
+  }
+  if (options.Has("duration_ms")) {
+    cfg.params.duration_ms = options.Get("duration_ms").As<Napi::Number>().Int32Value();
+  }
+
+  if (options.Has("translate")) {
+    cfg.params.translate = options.Get("translate").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("no_context")) {
+    cfg.params.no_context = options.Get("no_context").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("no_timestamps")) {
+    cfg.params.no_timestamps = options.Get("no_timestamps").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("single_segment")) {
+    cfg.params.single_segment = options.Get("single_segment").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("print_special")) {
+    cfg.params.print_special = options.Get("print_special").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("print_progress")) {
+    cfg.params.print_progress = options.Get("print_progress").As<Napi::Boolean>().Value();
+  } else {
+    cfg.params.print_progress = false;
+  }
+  if (options.Has("print_realtime")) {
+    cfg.params.print_realtime = options.Get("print_realtime").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("print_timestamps")) {
+    cfg.params.print_timestamps = options.Get("print_timestamps").As<Napi::Boolean>().Value();
+  }
+
+  if (options.Has("token_timestamps")) {
+    cfg.params.token_timestamps = options.Get("token_timestamps").As<Napi::Boolean>().Value();
+  }
+  cfg.token_timestamps = cfg.params.token_timestamps;
+
+  if (options.Has("thold_pt")) {
+    cfg.params.thold_pt = options.Get("thold_pt").As<Napi::Number>();
+  }
+  if (options.Has("thold_ptsum")) {
+    cfg.params.thold_ptsum = options.Get("thold_ptsum").As<Napi::Number>();
+  }
+  if (options.Has("max_len")) {
+    cfg.params.max_len = options.Get("max_len").As<Napi::Number>().Int32Value();
+  }
+  if (options.Has("split_on_word")) {
+    cfg.params.split_on_word = options.Get("split_on_word").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("max_tokens")) {
+    cfg.params.max_tokens = options.Get("max_tokens").As<Napi::Number>().Int32Value();
+  }
+
+  if (options.Has("debug_mode")) {
+    cfg.params.debug_mode = options.Get("debug_mode").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("audio_ctx")) {
+    cfg.params.audio_ctx = options.Get("audio_ctx").As<Napi::Number>().Int32Value();
+  }
+
+  if (options.Has("tdrz_enable")) {
+    cfg.params.tdrz_enable = options.Get("tdrz_enable").As<Napi::Boolean>().Value();
+  }
+
+  if (options.Has("initial_prompt") && options.Get("initial_prompt").IsString()) {
+    cfg.initial_prompt = options.Get("initial_prompt").As<Napi::String>();
+  }
+
+  if (options.Has("language") && options.Get("language").IsString()) {
+    cfg.language = options.Get("language").As<Napi::String>();
+  } else {
+    cfg.language = "auto";
+  }
+
+  if (options.Has("suppress_blank")) {
+    cfg.params.suppress_blank = options.Get("suppress_blank").As<Napi::Boolean>().Value();
+  }
+  if (options.Has("suppress_non_speech_tokens")) {
+    cfg.params.suppress_nst = options.Get("suppress_non_speech_tokens").As<Napi::Boolean>().Value();
+  }
+
+  if (options.Has("temperature")) {
+    cfg.params.temperature = options.Get("temperature").As<Napi::Number>();
+  }
+  if (options.Has("max_initial_ts")) {
+    cfg.params.max_initial_ts = options.Get("max_initial_ts").As<Napi::Number>().Int32Value();
+  }
+  if (options.Has("length_penalty")) {
+    cfg.params.length_penalty = options.Get("length_penalty").As<Napi::Number>();
+  }
+
+  if (options.Has("temperature_inc")) {
+    cfg.params.temperature_inc = options.Get("temperature_inc").As<Napi::Number>();
+  }
+  if (options.Has("entropy_thold")) {
+    cfg.params.entropy_thold = options.Get("entropy_thold").As<Napi::Number>();
+  }
+  if (options.Has("logprob_thold")) {
+    cfg.params.logprob_thold = options.Get("logprob_thold").As<Napi::Number>();
+  }
+  if (options.Has("no_speech_thold")) {
+    cfg.params.no_speech_thold = options.Get("no_speech_thold").As<Napi::Number>();
+  }
+
+  if (options.Has("best_of")) {
+    cfg.params.greedy.best_of = options.Get("best_of").As<Napi::Number>().Int32Value();
+  }
+  if (options.Has("beam_size")) {
+    cfg.params.beam_search.beam_size = options.Get("beam_size").As<Napi::Number>().Int32Value();
+    if (cfg.params.beam_search.beam_size > 1) {
+      cfg.params.strategy = WHISPER_SAMPLING_BEAM_SEARCH;
+    }
+  }
+
+  if (options.Has("prompt") && options.Get("prompt").IsString() && cfg.initial_prompt.empty()) {
+    cfg.initial_prompt = options.Get("prompt").As<Napi::String>();
+  }
+
+  if (options.Has("format") && options.Get("format").IsString()) {
+    std::string format = options.Get("format").As<Napi::String>();
+    std::transform(format.begin(), format.end(), format.begin(), ::tolower);
+    cfg.detailed = (format == "detail");
+  }
+
+  if (options.Has("detect_language")) {
+    cfg.params.detect_language = options.Get("detect_language").As<Napi::Boolean>().Value();
+  }
+
+  if (cfg.language.empty()) {
+    cfg.language = "auto";
+  }
+
+  return cfg;
+}
+
+Napi::External<WhisperHandle> wrap_handle(Napi::Env env, WhisperHandle* handle) {
+  return Napi::External<WhisperHandle>::New(
+    env,
+    handle,
+    [](Napi::Env /*env*/, WhisperHandle* ptr) {
+      if (!ptr) return;
+      std::lock_guard<std::mutex> guard(ptr->mutex);
+      if (!ptr->freed && ptr->ctx) {
+        whisper_free(ptr->ctx);
+        ptr->ctx = nullptr;
+        ptr->freed = true;
+      }
+      delete ptr;
+    });
+}
+
+WhisperHandle* unwrap_handle(const Napi::CallbackInfo& info, size_t index) {
+  if (info.Length() <= index || !info[index].IsExternal()) {
+    throw Napi::TypeError::New(info.Env(), "Invalid context handle");
+  }
+  return info[index].As<Napi::External<WhisperHandle>>().Data();
+}
+
+std::vector<float> extract_audio(const Napi::Env env, const Napi::Object& options) {
+  std::vector<float> pcmf32;
+  if (options.Has("audio") && options.Get("audio").IsTypedArray()) {
+    Napi::Float32Array array = options.Get("audio").As<Napi::Float32Array>();
+    pcmf32.resize(array.ElementLength());
+    std::copy(array.Data(), array.Data() + array.ElementLength(), pcmf32.begin());
+  }
+  return pcmf32;
+}
+
+std::vector<std::string> extract_files(const Napi::Object& options) {
+  std::vector<std::string> files;
+  if (options.Has("fname_inp")) {
+    const auto value = options.Get("fname_inp");
+    if (value.IsString()) {
+      files.emplace_back(value.As<Napi::String>());
+    }
+  }
+  return files;
+}
+
+Napi::Value init_model(const Napi::CallbackInfo& info) {
+  Napi::Env env = info.Env();
+  if (info.Length() < 1 || !info[0].IsObject()) {
+    throw Napi::TypeError::New(env, "Expected init options object");
+  }
+
+  auto options = info[0].As<Napi::Object>();
+  if (!options.Has("model") || !options.Get("model").IsString()) {
+    throw Napi::TypeError::New(env, "Missing 'model' path");
+  }
+
+  std::string model = options.Get("model").As<Napi::String>();
+  bool use_gpu = true;
+  if (options.Has("gpu")) {
+    use_gpu = options.Get("gpu").As<Napi::Boolean>();
+  } else if (options.Has("use_gpu")) {
+    use_gpu = options.Get("use_gpu").As<Napi::Boolean>();
+  }
+
+  bool flash_attn = false;
+  if (options.Has("flash_attn")) {
+    flash_attn = options.Get("flash_attn").As<Napi::Boolean>();
+  }
+
+  whisper_context_params cparams = whisper_context_default_params();
+  cparams.use_gpu = use_gpu;
+  cparams.flash_attn = flash_attn;
+
+  whisper_context* ctx = whisper_init_from_file_with_params(model.c_str(), cparams);
+  if (ctx == nullptr) {
+    throw Napi::Error::New(env, "Failed to initialize whisper context");
+  }
+
+  auto* handle = new WhisperHandle();
+  handle->ctx = ctx;
+
+  return wrap_handle(env, handle);
+}
+
+Napi::Value free_model(const Napi::CallbackInfo& info) {
+  Napi::Env env = info.Env();
+  WhisperHandle* handle = unwrap_handle(info, 0);
+
+  std::lock_guard<std::mutex> guard(handle->mutex);
+  if (!handle->freed && handle->ctx) {
+    whisper_free(handle->ctx);
+    handle->ctx = nullptr;
+    handle->freed = true;
+  }
+
+  return env.Undefined();
+}
+
+Napi::Array build_segments(const Napi::Env env,
+                           whisper_context* ctx,
+                           const FullParamConfig& cfg,
+                           const std::vector<float>& pcmf32,
+                           const std::vector<std::vector<float>>& pcmf32s) {
+  const int n_segments = whisper_full_n_segments(ctx);
+  Napi::Array segments = Napi::Array::New(env, n_segments);
+
+  const std::string detected_language = whisper_lang_str(whisper_full_lang_id(ctx));
+
+  for (int i = 0; i < n_segments; ++i) {
+    SegmentData segment;
+    segment.from_ms = whisper_full_get_segment_t0(ctx, i) * 10;
+    segment.to_ms = whisper_full_get_segment_t1(ctx, i) * 10;
+    segment.text = whisper_full_get_segment_text(ctx, i);
+
+    if (cfg.detailed) {
+      const int n_tokens = whisper_full_n_tokens(ctx, i);
+      segment.tokens.reserve(n_tokens);
+
+      float confidence_sum = 0.0f;
+      float min_p = 1.0f;
+      float max_p = 0.0f;
+      int valid_tokens = 0;
+
+      for (int j = 0; j < n_tokens; ++j) {
+        whisper_token_data token = whisper_full_get_token_data(ctx, i, j);
+
+        TokenData token_data;
+        token_data.text = whisper_full_get_token_text(ctx, i, j);
+        token_data.id = token.id;
+        token_data.p = token.p;
+        if (cfg.token_timestamps) {
+          token_data.from_ms = token.t0 * 10;
+          token_data.to_ms = token.t1 * 10;
+        }
+
+        segment.tokens.push_back(std::move(token_data));
+
+        if (token.id > whisper_token_eot(ctx)) {
+          continue;
+        }
+
+        confidence_sum += token.p;
+        min_p = std::min(min_p, token.p);
+        max_p = std::max(max_p, token.p);
+        ++valid_tokens;
+      }
+
+      if (valid_tokens > 2) {
+        segment.confidence =
+          (confidence_sum - min_p - max_p) / static_cast<float>(valid_tokens - 2);
+      } else if (valid_tokens > 0) {
+        segment.confidence = confidence_sum / static_cast<float>(valid_tokens);
+      } else {
+        segment.confidence = 0.0f;
+      }
+
+      segment.language = detected_language;
+    }
+
+    Napi::Object jsSegment = Napi::Object::New(env);
+    jsSegment.Set("from", Napi::Number::New(env, segment.from_ms));
+    jsSegment.Set("to", Napi::Number::New(env, segment.to_ms));
+    jsSegment.Set("text", Napi::String::New(env, segment.text));
+
+    if (cfg.detailed) {
+      jsSegment.Set("lang", Napi::String::New(env, segment.language));
+      jsSegment.Set("confidence", Napi::Number::New(env, segment.confidence));
+
+      Napi::Array jsTokens = Napi::Array::New(env, segment.tokens.size());
+      for (size_t t = 0; t < segment.tokens.size(); ++t) {
+        const TokenData& token = segment.tokens[t];
+        Napi::Object jsToken = Napi::Object::New(env);
+        jsToken.Set("text", Napi::String::New(env, token.text));
+        jsToken.Set("id", Napi::Number::New(env, token.id));
+        jsToken.Set("p", Napi::Number::New(env, token.p));
+        if (cfg.token_timestamps) {
+          jsToken.Set("from", Napi::Number::New(env, token.from_ms));
+          jsToken.Set("to", Napi::Number::New(env, token.to_ms));
+        }
+        jsTokens.Set(t, jsToken);
+      }
+
+      jsSegment.Set("tokens", jsTokens);
+    }
+
+    segments.Set(i, jsSegment);
+  }
+
+  return segments;
+}
+
+Napi::Value full_transcribe(const Napi::CallbackInfo& info) {
+  Napi::Env env = info.Env();
+  if (info.Length() < 2 || !info[1].IsObject()) {
+    throw Napi::TypeError::New(env, "Expected arguments (handle, options)");
+  }
+
+  WhisperHandle* handle = unwrap_handle(info, 0);
+  if (handle->freed || handle->ctx == nullptr) {
+    throw Napi::Error::New(env, "Model has been freed");
+  }
+
+  auto options = info[1].As<Napi::Object>();
+
+  std::vector<float> pcmf32 = extract_audio(env, options);
+  std::vector<std::vector<float>> pcmf32s;
+  std::vector<std::string> files = extract_files(options);
+
+  if (pcmf32.empty()) {
+    if (files.empty()) {
+      throw Napi::Error::New(env, "No audio provided (audio buffer or fname_inp required)");
+    }
+    if (!::read_audio_data(files[0], pcmf32, pcmf32s, false)) {
+      throw Napi::Error::New(env, "Failed to read input audio file");
+    }
+  }
+
+  FullParamConfig cfg = parse_full_params(env, options);
+
+  if (cfg.language.empty()) {
+    cfg.language = "auto";
+  }
+
+  cfg.params.language = cfg.language.c_str();
+  cfg.params.initial_prompt = cfg.initial_prompt.empty() ? nullptr : cfg.initial_prompt.c_str();
+
+  int n_processors = 1;
+  if (options.Has("n_processors")) {
+    n_processors = std::max(1, options.Get("n_processors").As<Napi::Number>().Int32Value());
+  }
+
+  std::lock_guard<std::mutex> guard(handle->mutex);
+
+  int result = whisper_full_parallel(
+    handle->ctx,
+    cfg.params,
+    pcmf32.data(),
+    static_cast<int>(pcmf32.size()),
+    n_processors);
+
+  if (result != 0) {
+    throw Napi::Error::New(env, "whisper_full_parallel failed");
+  }
+
+  return build_segments(env, handle->ctx, cfg, pcmf32, pcmf32s);
+}
+
+} // namespace
+
+Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
+  exports.Set("init", Napi::Function::New(env, init_model));
+  exports.Set("full", Napi::Function::New(env, full_transcribe));
+  exports.Set("free", Napi::Function::New(env, free_model));
+  return exports;
+}
+
+NODE_API_MODULE(whisper, InitAll)
--- a/packages/whisper-wrapper/addon/package.json
+++ b/packages/whisper-wrapper/addon/package.json
@ -0,0 +1,9 @@
+{
+  "name": "@amical/whisper-node-addon",
+  "private": true,
+  "binary": {
+    "napi_versions": [
+      8
+    ]
+  }
+}
--- a/packages/whisper-wrapper/bin/build-addon.js
+++ b/packages/whisper-wrapper/bin/build-addon.js
@ -0,0 +1,296 @@
+#!/usr/bin/env node
+/*
+ * build-addon.js
+ * --------------------------------------------------
+ * Compiles the whisper.cpp Node addon (examples/addon.node) for the current
+ * platform/arch with acceleration flags, then places the resulting
+ * `whisper.node` binary in native/<target>/.
+ *
+ * NOTE: This is an initial scaffold. It expects the whisper.cpp sources to be
+ * vendored at `./whisper.cpp` (git submodule or manual copy). You can refine
+ * the build flags as needed.
+ */
+
+const { execSync } = require("child_process");
+const path = require("path");
+const fs = require("fs");
+
+function run(cmd, opts = {}) {
+  console.log(`[build-addon] ${cmd}`);
+  execSync(cmd, { stdio: "inherit", ...opts });
+}
+
+const pkgDir = path.resolve(__dirname, "..");
+const addonDir = path.join(pkgDir, "addon");
+const whisperDir = path.join(pkgDir, "whisper.cpp");
+
+if (!fs.existsSync(addonDir) || !fs.existsSync(whisperDir)) {
+  console.error(
+    "whisper.cpp sources not found. Please add them to packages/whisper-wrapper/whisper.cpp",
+  );
+  process.exit(1);
+}
+
+const buildDir = path.join(pkgDir, "build");
+if (!fs.existsSync(buildDir)) fs.mkdirSync(buildDir);
+
+const cacheDir = path.join(pkgDir, ".cmake-js");
+if (!fs.existsSync(cacheDir)) fs.mkdirSync(cacheDir);
+
+const homeDir = path.join(pkgDir, ".home");
+if (!fs.existsSync(homeDir)) fs.mkdirSync(homeDir);
+
+function resolveLibExecutable(env, arch) {
+  const archDir = arch === "ia32" ? "x86" : arch === "arm64" ? "arm64" : "x64";
+  const hostDir = arch === "ia32" ? "Hostx86" : "Hostx64";
+  const candidates = [];
+
+  const addIfExists = (candidate) => {
+    if (candidate && fs.existsSync(candidate) && !candidates.includes(candidate)) {
+      candidates.push(candidate);
+    }
+  };
+
+  try {
+    const whereOutput = execSync("where lib.exe", {
+      env,
+      stdio: ["ignore", "pipe", "ignore"],
+    })
+      .toString()
+      .split(/\r?\n/)
+      .map((line) => line.trim())
+      .filter(Boolean);
+    for (const line of whereOutput) {
+      addIfExists(line);
+    }
+  } catch (err) {
+    // ignore when lib.exe is not on PATH; fall back to manual probing
+  }
+
+  const probeVersionedDir = (dir) => {
+    if (!dir || !fs.existsSync(dir) || !fs.statSync(dir).isDirectory()) return;
+    const entries = fs
+      .readdirSync(dir, { withFileTypes: true })
+      .filter((entry) => entry.isDirectory())
+      .map((entry) => entry.name)
+      .sort((a, b) => b.localeCompare(a, undefined, { numeric: true, sensitivity: "base" }));
+    for (const entry of entries) {
+      const candidate = path.join(dir, entry, "bin", hostDir, archDir, "lib.exe");
+      if (fs.existsSync(candidate)) {
+        addIfExists(candidate);
+        break;
+      }
+    }
+  };
+
+  const probeInstallDir = (installDir) => {
+    if (!installDir) return;
+    if (fs.existsSync(installDir) && fs.statSync(installDir).isFile()) {
+      addIfExists(installDir);
+      return;
+    }
+
+    const directCandidate = path.join(installDir, "bin", hostDir, archDir, "lib.exe");
+    addIfExists(directCandidate);
+
+    const toolsDir = path.join(installDir, "Tools", "MSVC");
+    probeVersionedDir(toolsDir);
+  };
+
+  probeInstallDir(env.VCToolsInstallDir);
+  probeInstallDir(env.VCINSTALLDIR);
+  probeInstallDir(env.VSINSTALLDIR && path.join(env.VSINSTALLDIR, "VC"));
+  probeVersionedDir("C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Tools/MSVC");
+  probeVersionedDir("C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC");
+  probeVersionedDir("C:/Program Files/Microsoft Visual Studio/2022/Professional/VC/Tools/MSVC");
+  probeVersionedDir("C:/Program Files (x86)/Microsoft Visual Studio/2022/BuildTools/VC/Tools/MSVC");
+
+  return candidates[0] || null;
+}
+
+function ensureWindowsNodeImportLib(buildVariantDir, arch, env) {
+  if (process.platform !== "win32") return;
+
+  const nodeImportLib = path.join(buildVariantDir, "node.lib");
+  if (fs.existsSync(nodeImportLib)) return;
+
+  let headersPackageJson;
+  try {
+    headersPackageJson = require.resolve("node-api-headers/package.json", {
+      paths: [pkgDir],
+    });
+  } catch (err) {
+    throw new Error(
+      "node-api-headers package not found; cannot generate node.lib on Windows",
+    );
+  }
+
+  const defPath = path.join(path.dirname(headersPackageJson), "def", "node_api.def");
+  if (!fs.existsSync(defPath)) {
+    throw new Error(`node_api.def not found at ${defPath}`);
+  }
+
+  const machineMap = { x64: "X64", ia32: "X86", arm64: "ARM64" };
+  const machine = machineMap[arch] || "X64";
+
+  const libExecutable = resolveLibExecutable(env, arch);
+  if (!libExecutable) {
+    throw new Error(
+      "Unable to locate lib.exe. Ensure the Visual Studio Build Tools are installed and vcvarsall has been applied.",
+    );
+  }
+
+  console.log(
+    `[build-addon] Generating node import library using ${libExecutable} for ${machine} into ${nodeImportLib}`,
+  );
+  try {
+    run(`"${libExecutable}" /def:"${defPath}" /machine:${machine} /out:"${nodeImportLib}"`, {
+      env,
+    });
+  } catch (error) {
+    const message =
+      "Failed to generate node import library. Ensure Visual Studio build tools are installed.";
+    if (error instanceof Error) {
+      error.message = `${message}\n${error.message}`;
+      throw error;
+    }
+    throw new Error(message);
+  }
+}
+
+function variantFromName(name, platform, arch) {
+  const envOverrides = {};
+  if (name === "cpu-fallback") {
+    return { name, env: envOverrides };
+  }
+
+  if (!name.includes("-")) {
+    // expand shorthand like "metal" to full name
+    name = `${platform}-${arch}-${name}`;
+  } else if (!name.startsWith(platform)) {
+    console.warn(
+      `[build-addon] Warning: variant '${name}' does not match current platform (${platform}), skipping.`,
+    );
+    return null;
+  }
+
+  if (name.includes("-metal")) {
+    envOverrides.GGML_METAL = "1";
+    envOverrides.GGML_USE_ACCELERATE = "1";
+  }
+  if (name.includes("-openblas")) {
+    envOverrides.GGML_OPENBLAS = "1";
+    envOverrides.GGML_BLAS = "1";
+  }
+  if (name.includes("-cuda")) {
+    envOverrides.GGML_CUDA = "1";
+  }
+  if (name.startsWith("darwin-")) {
+    envOverrides.GGML_USE_ACCELERATE = envOverrides.GGML_USE_ACCELERATE || "1";
+  }
+
+  return { name, env: envOverrides };
+}
+
+function computeVariants(platform, arch) {
+  const overrides = (process.env.WHISPER_TARGETS || "")
+    .split(",")
+    .map((v) => v.trim())
+    .filter(Boolean);
+
+  const result = [];
+
+  if (overrides.length > 0) {
+    for (const override of overrides) {
+      const variant = variantFromName(override, platform, arch);
+      if (variant) result.push(variant);
+    }
+    return result;
+  }
+
+  if (platform === "darwin") {
+    const metal = variantFromName(`${platform}-${arch}-metal`, platform, arch);
+    if (metal) result.push(metal);
+  }
+
+  const primary = variantFromName(`${platform}-${arch}`, platform, arch);
+  if (primary) result.push(primary);
+
+  return result;
+}
+
+const { platform, arch } = process;
+const variants = computeVariants(platform, arch);
+
+if (variants.length === 0) {
+  console.warn("[build-addon] No variants requested, building default cpu-fallback.");
+  const fallback = variantFromName("cpu-fallback", platform, arch);
+  if (fallback) variants.push(fallback);
+}
+
+for (const variant of variants) {
+  const buildVariantDir = path.join(buildDir, variant.name.replace(/[\\/]/g, "_"));
+  fs.rmSync(buildVariantDir, { recursive: true, force: true });
+  fs.mkdirSync(buildVariantDir, { recursive: true });
+
+  const env = {
+    ...process.env,
+    CMAKE_JS_CACHE: cacheDir,
+    HOME: homeDir,
+    CMAKE_JS_NODE_DIR: path.resolve(process.execPath, "..", ".."),
+    ...variant.env,
+  };
+
+  console.log(`[build-addon] Building variant ${variant.name}`);
+
+  ensureWindowsNodeImportLib(buildVariantDir, arch, env);
+
+  const cmakeParts = [
+    "npx cmake-js compile",
+    `-O "${buildVariantDir}"`,
+    "-B Release",
+    `-d "${addonDir}"`,
+    "-T whisper_node",
+    "--CD node_runtime=node",
+  ];
+
+  const propagateCMakeBool = (key) => {
+    const value = env[key];
+    if (typeof value === "string" && value.length > 0) {
+      cmakeParts.push(`--CD${key}=${value}`);
+    }
+  };
+
+  propagateCMakeBool("GGML_NATIVE");
+
+  run(cmakeParts.join(" "), {
+    cwd: addonDir,
+    env,
+  });
+
+  const builtBinary = path.join(buildVariantDir, "Release", "whisper.node");
+  if (!fs.existsSync(builtBinary)) {
+    throw new Error(`Build succeeded but whisper.node not found for variant ${variant.name}`);
+  }
+
+  const targetDir = path.join(pkgDir, "native", variant.name);
+  fs.mkdirSync(targetDir, { recursive: true });
+  fs.copyFileSync(builtBinary, path.join(targetDir, "whisper.node"));
+  console.log(`[build-addon] copied to native/${variant.name}/whisper.node`);
+
+  if (platform === "darwin") {
+    const targetBinary = path.join(targetDir, "whisper.node");
+    try {
+      run(`codesign --force --sign - "${targetBinary}"`);
+      console.log("[build-addon] codesigned", targetBinary);
+    } catch (err) {
+      console.warn(
+        `[build-addon] warning: codesign failed for ${targetBinary}: ${err.message}`,
+      );
+    }
+  }
+
+  // Remove intermediate build artifacts to keep the package footprint small and avoid
+  // extremely long CMake-generated paths that break Windows packaging tools.
+  fs.rmSync(buildVariantDir, { recursive: true, force: true });
+}
--- a/packages/whisper-wrapper/package.json
+++ b/packages/whisper-wrapper/package.json
@ -0,0 +1,33 @@
+{
+  "name": "@amical/whisper-wrapper",
+  "version": "0.0.0",
+  "private": true,
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "files": [
+    "dist",
+    "native",
+    "src",
+    "addon"
+  ],
+  "binary": {
+    "napi_versions": [
+      8
+    ]
+  },
+  "scripts": {
+    "build": "tsc -p tsconfig.json",
+    "postinstall": "node ./bin/build-addon.js",
+    "build:native": "node ./scripts/build-native.js",
+    "build:native:cuda": "node ./scripts/build-native.js --cuda"
+  },
+  "dependencies": {
+    "cmake-js": "^7.3.1",
+    "minimatch": "10.0.3",
+    "node-api-headers": "^1.5.0"
+  },
+  "devDependencies": {
+    "@amical/typescript-config": "workspace:*",
+    "typescript": "^5.8.3"
+  }
+}
--- a/packages/whisper-wrapper/scripts/build-native.js
+++ b/packages/whisper-wrapper/scripts/build-native.js
@ -0,0 +1,19 @@
+#!/usr/bin/env node
+const { execSync } = require("node:child_process");
+const path = require("node:path");
+
+function build(targets) {
+  const baseEnv = { ...process.env };
+  baseEnv.WHISPER_TARGETS = targets.join(",");
+  execSync("node ./bin/build-addon.js", {
+    cwd: path.join(__dirname, ".."),
+    stdio: "inherit",
+    env: baseEnv,
+  });
+}
+
+if (process.argv.includes("--cuda")) {
+  build(["win32-x64-cuda", "win32-x64"]);
+} else {
+  build([]);
+}
--- a/packages/whisper-wrapper/scripts/test-addon.js
+++ b/packages/whisper-wrapper/scripts/test-addon.js
@ -0,0 +1,160 @@
+#!/usr/bin/env node
+// Quick smoke-test runner for the whisper.cpp Node addon build.
+//
+// Usage:
+//   node scripts/test-addon.js [--model /path/to/model.bin] [--audio /path/to/audio.wav]
+//
+// If no flags are provided the script will grab the first *.bin model from
+// "~/Library/Application Support/amical/models" and the bundled jfk sample.
+
+const fs = require("node:fs");
+const os = require("node:os");
+const path = require("node:path");
+
+function resolveBinding() {
+  const nativeRoot = path.resolve(__dirname, "..", "native");
+  const { platform, arch } = process;
+  const candidates = [
+    `${platform}-${arch}-metal`,
+    `${platform}-${arch}-openblas`,
+    `${platform}-${arch}-cuda`,
+    `${platform}-${arch}`,
+    "cpu-fallback",
+  ];
+
+  for (const dir of candidates) {
+    const bindingPath = path.join(nativeRoot, dir, "whisper.node");
+    if (fs.existsSync(bindingPath)) {
+      return bindingPath;
+    }
+  }
+
+  throw new Error(
+    `Unable to locate a whisper.node binary for ${platform}-${arch}. ` +
+      `Expected one of: ${candidates.join(", ")}`,
+  );
+}
+
+function defaultModelPath() {
+  const modelsDir = path.join(
+    os.homedir(),
+    "Library",
+    "Application Support",
+    "amical",
+    "models",
+  );
+
+  if (!fs.existsSync(modelsDir)) {
+    throw new Error(
+      `Model directory not found at ${modelsDir}. Pass --model to override.`,
+    );
+  }
+
+  const candidates = fs
+    .readdirSync(modelsDir)
+    .filter((f) => f.toLowerCase().endsWith(".bin"))
+    .map((name) => {
+      const fullPath = path.join(modelsDir, name);
+      const stats = fs.statSync(fullPath);
+      return { name, fullPath, size: stats.size };
+    })
+    .sort((a, b) =>  - a.size + b.size);
+
+  if (candidates.length === 0) {
+    throw new Error(
+      `No .bin model files found in ${modelsDir}. Pass --model to override.`,
+    );
+  }
+
+  return candidates[0].fullPath;
+}
+
+function defaultAudioPath() {
+  const audio = path.resolve(
+    __dirname,
+    "..",
+    "whisper.cpp",
+    "samples",
+    "jfk.wav",
+  );
+
+  if (!fs.existsSync(audio)) {
+    throw new Error(
+      `Sample audio not found at ${audio}. Pass --audio to override.`,
+    );
+  }
+
+  return audio;
+}
+
+function parseArgs() {
+  const args = process.argv.slice(2);
+  const options = {};
+
+  for (const arg of args) {
+    if (!arg.startsWith("--")) continue;
+    const [key, value] = arg.slice(2).split("=");
+    if (!value) {
+      throw new Error(`Flag '${arg}' must be provided as --${key}=<value>`);
+    }
+    options[key] = value;
+  }
+
+  return options;
+}
+
+async function main() {
+  const opts = parseArgs();
+  const modelPath = path.resolve(opts.model || defaultModelPath());
+  const audioPath = path.resolve(opts.audio || defaultAudioPath());
+
+  if (!fs.existsSync(modelPath)) {
+    throw new Error(`Model file not found at ${modelPath}`);
+  }
+  if (!fs.existsSync(audioPath)) {
+    throw new Error(`Audio file not found at ${audioPath}`);
+  }
+
+  const bindingPath = resolveBinding();
+  console.log(`> Using addon: ${bindingPath}`);
+  console.log(`> Using model: ${modelPath}`);
+  console.log(`> Using audio: ${audioPath}`);
+
+  // eslint-disable-next-line @typescript-eslint/no-var-requires
+  const binding = require(bindingPath);
+
+  if (typeof binding.init !== "function" ||
+      typeof binding.full !== "function" ||
+      typeof binding.free !== "function") {
+    throw new Error(`Addon at ${bindingPath} does not expose init/full/free APIs.`);
+  }
+
+  const handle = binding.init({ model: modelPath, gpu: true });
+  try {
+    const segments = binding.full(handle, {
+      fname_inp: audioPath,
+      language: "en",
+      no_timestamps: false,
+      suppress_blank: true,
+      suppress_non_speech_tokens: true,
+    });
+
+    console.log("Transcription segments:\n");
+    for (const segment of segments) {
+      const from = typeof segment.from === "number" ? segment.from : "?";
+      const to = typeof segment.to === "number" ? segment.to : "?";
+      console.log(`  [${from} -> ${to}] ${segment.text}`);
+    }
+
+    console.log("\nDone.");
+  } finally {
+    binding.free(handle);
+  }
+
+  
+}
+
+main().catch((err) => {
+  console.error("Test run failed:", err);
+  process.exitCode = 1;
+});
--- a/packages/whisper-wrapper/src/index.ts
+++ b/packages/whisper-wrapper/src/index.ts
@ -0,0 +1,43 @@
+/* eslint-disable @typescript-eslint/no-var-requires */
+import { loadBinding, getLoadedBindingInfo } from "./loader";
+
+const binding = loadBinding();
+
+export interface WhisperOptions {
+  gpu?: boolean;
+}
+
+export { getLoadedBindingInfo } from "./loader";
+
+export class Whisper {
+  private ctx: any;
+
+  constructor(
+    private modelPath: string,
+    _opts?: WhisperOptions,
+  ) {
+    this.ctx = binding.init({ model: modelPath });
+  }
+
+  async load(): Promise<void> {
+    return;
+  }
+
+  async transcribe(
+    audio: Float32Array | null,
+    options: Record<string, unknown>,
+  ): Promise<{ result: Promise<Array<{ text: string }>> }> {
+    const payload =
+      audio instanceof Float32Array ? { audio, ...options } : options;
+    const segments = binding.full(this.ctx, payload);
+    return { result: Promise.resolve(segments) };
+  }
+
+  async free(): Promise<void> {
+    binding.free(this.ctx);
+  }
+
+  static getBindingInfo(): { path: string; type: string } | null {
+    return getLoadedBindingInfo();
+  }
+}
--- a/packages/whisper-wrapper/src/loader.ts
+++ b/packages/whisper-wrapper/src/loader.ts
@ -0,0 +1,106 @@
+import path from "node:path";
+import fs from "node:fs";
+
+const GPU_FIRST_CANDIDATES = ["metal", "openblas", "cuda"] as const;
+
+function candidateDirs(platform: string, arch: string): string[] {
+  return [
+    ...GPU_FIRST_CANDIDATES.map((tag) => `${platform}-${arch}-${tag}`),
+    `${platform}-${arch}`,
+    "cpu-fallback",
+  ];
+}
+
+function bindingPathFor(dir: string): string {
+  return path.join(__dirname, "..", "native", dir, "whisper.node");
+}
+
+function isLoadableError(error: unknown): boolean {
+  return (
+    !!error &&
+    typeof error === "object" &&
+    "code" in error &&
+    (error as NodeJS.ErrnoException).code === "ERR_DLOPEN_FAILED"
+  );
+}
+
+export function resolveBinding(): string {
+  const { platform, arch } = process;
+  for (const dir of candidateDirs(platform, arch)) {
+    const candidate = bindingPathFor(dir);
+    if (fs.existsSync(candidate)) {
+      return candidate;
+    }
+  }
+  throw new Error(
+    `No suitable whisper.node binary found for ${platform}-${arch}`,
+  );
+}
+
+let loadedBindingInfo: { path: string; type: string } | null = null;
+
+export function getLoadedBindingInfo(): { path: string; type: string } | null {
+  return loadedBindingInfo;
+}
+
+export function loadBinding(): any {
+  const { platform, arch } = process;
+  const attempted: string[] = [];
+  let lastLoadError: unknown = null;
+
+  for (const dir of candidateDirs(platform, arch)) {
+    const candidate = bindingPathFor(dir);
+    if (!fs.existsSync(candidate)) {
+      continue;
+    }
+
+    attempted.push(candidate);
+    try {
+      const mod = require(candidate);
+      if (attempted.length > 1) {
+        console.warn(
+          `[whisper-wrapper] loaded fallback binary: ${candidate} (attempted ${attempted.length} candidates)`,
+        );
+      }
+
+      // Store the loaded binding info
+      const bindingType = dir.includes("-cuda")
+        ? "cuda"
+        : dir.includes("-metal")
+          ? "metal"
+          : dir.includes("-openblas")
+            ? "openblas"
+            : dir === "cpu-fallback"
+              ? "cpu-fallback"
+              : "cpu";
+      loadedBindingInfo = {
+        path: candidate,
+        type: bindingType,
+      };
+
+      return mod;
+    } catch (error) {
+      if (isLoadableError(error)) {
+        console.warn(
+          `[whisper-wrapper] failed to load ${candidate}: ${(error as Error).message}. Trying next candidate...`,
+        );
+        lastLoadError = error;
+        continue;
+      }
+
+      throw error;
+    }
+  }
+
+  if (lastLoadError) {
+    const error = new Error(
+      `Unable to load whisper.node for ${platform}-${arch}. Attempted: ${attempted.join(", ")}`,
+      { cause: lastLoadError },
+    );
+    throw error;
+  }
+
+  throw new Error(
+    `No suitable whisper.node binary found for ${platform}-${arch}`,
+  );
+}
--- a/packages/whisper-wrapper/tsconfig.json
+++ b/packages/whisper-wrapper/tsconfig.json
@ -0,0 +1,8 @@
+{
+  "extends": "../typescript-config/base.json",
+  "compilerOptions": {
+    "outDir": "dist",
+    "rootDir": "src"
+  },
+  "include": ["src"]
+}
--- a/packages/whisper-wrapper/whisper.cpp
+++ b/packages/whisper-wrapper/whisper.cpp
@ -0,0 +1 @@
+Subproject commit a8d002cfd879315632a579e73f0148d06959de36
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@ -2,3 +2,4 @@ packages:
  - "apps/*"
  - "packages/*"
  - "packages/**"
+  - "!packages/**/whisper.cpp/**"
				`@ -1 +0,0 @@`
				`Subproject commit 2ef717b293fe93872cc3a03ca77942936a281959`
				`@ -0,0 +1 @@`
				`Subproject commit a8d002cfd879315632a579e73f0148d06959de36`