Squashed commit of the following:
commit d6c92ea0ad95c0b640ac9c7df48197412c7518e3
Author: haritabh-z01 <haritabh.z01+github@gmail.com>
Date: Thu Aug 21 23:27:44 2025 +0530
fix: unpacking amical/smart-whisper dep
commit 87819819bb12c07b94f5b52cbb0ea42a452c16e2
Author: haritabh-z01 <haritabh.z01+github@gmail.com>
Date: Thu Aug 21 17:41:02 2025 +0530
fix: unpacking of smart-whisper
commit 81cec166834606cbff2cdd2e750dcc1fb769d4f3
Author: haritabh-z01 <haritabh.z01+github@gmail.com>
Date: Thu Aug 21 16:08:39 2025 +0530
chore: re-enable mac builds
commit f13069c1f350fe06c69aa8f16af41f983f34131e
Author: haritabh-z01 <haritabh.z01+github@gmail.com>
Date: Thu Aug 21 13:06:26 2025 +0530
feat: add smart-whisper package with updated build configuration
commit a24e06856cc595f5e6c5d914090531716d208d2a
Author: haritabh-z01 <haritabh.z01+github@gmail.com>
Date: Thu Aug 21 11:37:25 2025 +0530
chore: bump smart-whisper ver
commit 98f84b6f89c873370f1bb356f11c97dab0185ab7
Author: haritabh-z01 <haritabh.z01+github@gmail.com>
Date: Wed Aug 20 08:59:55 2025 +0530
feat: release wf updates for win builds
commit a85825d362f2a27fdef7b49533a9139aea4785b7
Author: haritabh-z01 <haritabh.z01+github@gmail.com>
Date: Wed Aug 20 08:36:13 2025 +0530
feat: add windows support basics
This commit is contained in:
parent
2d852a0d14
commit
17d034be80
59 changed files with 10524 additions and 3079 deletions
72
.github/workflows/release.yml
vendored
72
.github/workflows/release.yml
vendored
|
|
@ -2,6 +2,8 @@ name: Release
|
|||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- feat.windows.support
|
||||
tags:
|
||||
- 'v*'
|
||||
workflow_dispatch:
|
||||
|
|
@ -12,18 +14,30 @@ on:
|
|||
type: string
|
||||
|
||||
jobs:
|
||||
build-macos:
|
||||
name: Build macOS (${{ matrix.arch }})
|
||||
runs-on: ${{ matrix.arch == 'x64' && 'macos-13' || 'macos-latest' }}
|
||||
build:
|
||||
name: Build ${{ matrix.os == 'macos' && 'macOS' || 'Windows' }} (${{ matrix.arch }})
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
arch: [arm64, x64]
|
||||
include:
|
||||
- os: macos
|
||||
arch: arm64
|
||||
runner: macos-latest
|
||||
- os: macos
|
||||
arch: x64
|
||||
runner: macos-13
|
||||
- os: windows
|
||||
arch: x64
|
||||
runner: windows-2025
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Verify architecture
|
||||
if: matrix.os == 'macos'
|
||||
run: |
|
||||
CURRENT_ARCH=$(uname -m)
|
||||
echo "Current shell architecture: $CURRENT_ARCH"
|
||||
|
|
@ -47,12 +61,13 @@ jobs:
|
|||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10.13.1
|
||||
version: 10.15.0
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '24'
|
||||
# 24.2 to at least 24.6 (atm of writing this) has issues with symlink deref in nested directories
|
||||
node-version: '24.1.0'
|
||||
cache: 'pnpm'
|
||||
|
||||
- name: Log Node.js architecture and platform
|
||||
|
|
@ -70,15 +85,18 @@ jobs:
|
|||
run: pnpm download-node
|
||||
|
||||
- name: Import Developer ID cert
|
||||
if: matrix.os == 'macos'
|
||||
uses: apple-actions/import-codesign-certs@v3
|
||||
with:
|
||||
p12-file-base64: ${{ secrets.DEVELOPER_CERT_BASE64 }}
|
||||
p12-password: ${{ secrets.DEVELOPER_CERT_PASSPHRASE }}
|
||||
|
||||
- name: List signing identities (debug)
|
||||
if: matrix.os == 'macos'
|
||||
run: security find-identity -v -p codesigning
|
||||
|
||||
- name: Build artifacts
|
||||
- name: Build artifacts (macOS)
|
||||
if: matrix.os == 'macos'
|
||||
working-directory: apps/desktop
|
||||
env:
|
||||
SKIP_CODESIGNING: false
|
||||
|
|
@ -88,15 +106,24 @@ jobs:
|
|||
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
|
||||
CODESIGNING_IDENTITY: ${{ secrets.CODESIGNING_IDENTITY }}
|
||||
run: |
|
||||
echo "Building ${{ matrix.arch }} artifacts"
|
||||
echo "Building macOS ${{ matrix.arch }} artifacts"
|
||||
pnpm make:${{ matrix.arch }}
|
||||
|
||||
- name: Build artifacts (Windows)
|
||||
if: matrix.os == 'windows'
|
||||
working-directory: apps/desktop
|
||||
run: |
|
||||
echo "Building Windows x64 artifacts"
|
||||
pnpm make:windows
|
||||
|
||||
- name: Get version from package.json
|
||||
id: package_version
|
||||
working-directory: apps/desktop
|
||||
shell: bash
|
||||
run: echo "version=$(node -p "require('./package.json').version")" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (macOS)
|
||||
if: matrix.os == 'macos'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: macos-${{ matrix.arch }}
|
||||
|
|
@ -104,9 +131,18 @@ jobs:
|
|||
apps/desktop/out/make/*-${{ matrix.arch }}.dmg
|
||||
apps/desktop/out/make/zip/darwin/${{ matrix.arch }}/*.zip
|
||||
|
||||
- name: Upload artifacts (Windows)
|
||||
if: matrix.os == 'windows'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: windows-${{ matrix.arch }}
|
||||
path: |
|
||||
apps/desktop/out/make/squirrel.windows/${{ matrix.arch }}/*.exe
|
||||
apps/desktop/out/make/squirrel.windows/${{ matrix.arch }}/*.nupkg
|
||||
|
||||
release:
|
||||
name: Create Release
|
||||
needs: build-macos
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
|
|
@ -128,10 +164,10 @@ jobs:
|
|||
- name: List artifacts
|
||||
run: |
|
||||
echo "=== Full artifacts directory structure ==="
|
||||
find artifacts -type f -name "*.dmg" -o -name "*.zip" | sort
|
||||
find artifacts -type f \( -name "*.dmg" -o -name "*.zip" -o -name "*.exe" -o -name "*.nupkg" -o -name "RELEASES" \) | sort
|
||||
echo ""
|
||||
echo "=== Detailed file listing ==="
|
||||
find artifacts -type f \( -name "*.dmg" -o -name "*.zip" \) -exec ls -la {} \;
|
||||
find artifacts -type f \( -name "*.dmg" -o -name "*.zip" -o -name "*.exe" -o -name "*.nupkg" -o -name "RELEASES" \) -exec ls -la {} \;
|
||||
|
||||
- name: Create Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
|
|
@ -152,17 +188,27 @@ jobs:
|
|||
- **Apple Silicon (M1/M2/M3)**: Download the DMG or ZIP file for arm64
|
||||
- **Intel**: Download the DMG or ZIP file for x64
|
||||
|
||||
#### Windows
|
||||
- **Windows (x64)**: Download the .exe installer for 64-bit Windows
|
||||
|
||||
### Installation
|
||||
|
||||
**macOS**:
|
||||
- **DMG**: Download and open the DMG file, then drag Amical to your Applications folder
|
||||
- **ZIP**: Download and extract the ZIP file, then drag Amical to your Applications folder
|
||||
|
||||
The ZIP files are primarily for automatic updates. We recommend using the DMG files for initial installation.
|
||||
**Windows**:
|
||||
- Download and run the .exe installer
|
||||
- Follow the installation wizard
|
||||
- The app will be installed to your user AppData folder and a shortcut will be created
|
||||
|
||||
The ZIP files are primarily for automatic updates. We recommend using the DMG files for initial installation on macOS.
|
||||
files: |
|
||||
artifacts/macos-arm64/*.dmg
|
||||
artifacts/macos-arm64/zip/darwin/arm64/*.zip
|
||||
artifacts/macos-x64/*.dmg
|
||||
artifacts/macos-x64/zip/darwin/x64/*.zip
|
||||
artifacts/windows-x64/*.exe
|
||||
artifacts/windows-x64/*.nupkg
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "packages/smart-whisper/whisper.cpp"]
|
||||
path = packages/smart-whisper/whisper.cpp
|
||||
url = https://github.com/ggerganov/whisper.cpp.git
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 34 KiB |
|
|
@ -17,6 +17,8 @@ import {
|
|||
mkdirSync,
|
||||
cpSync,
|
||||
rmSync,
|
||||
lstatSync,
|
||||
readlinkSync,
|
||||
} from "node:fs";
|
||||
import { join, normalize } from "node:path";
|
||||
// Use flora-colossus for finding all dependencies of EXTERNAL_DEPENDENCIES
|
||||
|
|
@ -29,7 +31,6 @@ let nativeModuleDependenciesToPackage: string[] = [];
|
|||
|
||||
export const EXTERNAL_DEPENDENCIES = [
|
||||
"electron-squirrel-startup",
|
||||
"smart-whisper",
|
||||
"@libsql/client",
|
||||
"@libsql/darwin-arm64",
|
||||
"@libsql/darwin-x64",
|
||||
|
|
@ -39,13 +40,13 @@ export const EXTERNAL_DEPENDENCIES = [
|
|||
"libsql",
|
||||
"onnxruntime-node",
|
||||
"workerpool",
|
||||
"@amical/smart-whisper",
|
||||
// Add any other native modules you need here
|
||||
];
|
||||
|
||||
const config: ForgeConfig = {
|
||||
hooks: {
|
||||
prePackage: async (_forgeConfig, platform, arch) => {
|
||||
console.error("prePackage", { platform, arch });
|
||||
const projectRoot = normalize(__dirname);
|
||||
// In a monorepo, node_modules are typically at the root level
|
||||
const monorepoRoot = join(projectRoot, "../../"); // Go up to monorepo root
|
||||
|
|
@ -148,15 +149,56 @@ const config: ForgeConfig = {
|
|||
|
||||
// Copy the package
|
||||
console.log(`Copying ${dep}...`);
|
||||
cpSync(rootDepPath, localDepPath, { recursive: true });
|
||||
cpSync(rootDepPath, localDepPath, { recursive: true, dereference: true, force: true });
|
||||
console.log(`✓ Successfully copied ${dep}`);
|
||||
} catch (error) {
|
||||
console.error(`Failed to copy ${dep}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: Replace any symlinks with dereferenced copies
|
||||
console.log("Checking for symlinks in copied dependencies...");
|
||||
for (const dep of nativeModuleDependenciesToPackage) {
|
||||
const localDepPath = join(localNodeModules, dep);
|
||||
|
||||
try {
|
||||
if (existsSync(localDepPath)) {
|
||||
const stats = lstatSync(localDepPath);
|
||||
if (stats.isSymbolicLink()) {
|
||||
console.log(`Found symlink for ${dep}, replacing with dereferenced copy...`);
|
||||
|
||||
// Read where the symlink points to
|
||||
const symlinkTarget = readlinkSync(localDepPath);
|
||||
const absoluteTarget = join(localDepPath, "..", symlinkTarget);
|
||||
const sourcePath = normalize(absoluteTarget);
|
||||
|
||||
console.log(` Symlink points to: ${sourcePath}`);
|
||||
|
||||
// Remove the symlink
|
||||
rmSync(localDepPath, { recursive: true, force: true });
|
||||
|
||||
// Copy with dereference to get actual content
|
||||
cpSync(sourcePath, localDepPath, {
|
||||
recursive: true,
|
||||
force: true,
|
||||
dereference: true // Follow symlinks and copy actual content
|
||||
});
|
||||
|
||||
console.log(`✓ Successfully replaced symlink for ${dep} with actual content`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Failed to check/replace symlink for ${dep}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
// Prune onnxruntime-node to keep only the required binary
|
||||
console.log("Pruning onnxruntime-node binaries...");
|
||||
const targetPlatform = platform;
|
||||
const targetArch = arch;
|
||||
|
||||
console.log(
|
||||
`Pruning onnxruntime-node binaries for ${targetPlatform}/${targetArch}...`,
|
||||
);
|
||||
const onnxBinRoot = join(localNodeModules, "onnxruntime-node", "bin");
|
||||
if (existsSync(onnxBinRoot)) {
|
||||
const napiVersionDirs = readdirSync(onnxBinRoot);
|
||||
|
|
@ -169,18 +211,18 @@ const config: ForgeConfig = {
|
|||
const platformPath = join(napiVersionPath, platformDir);
|
||||
if (!statSync(platformPath).isDirectory()) continue;
|
||||
|
||||
// Delete other platform directories
|
||||
if (platformDir !== process.platform) {
|
||||
// Delete unused platforms except Linux (keep for compatibility)
|
||||
if (platformDir !== targetPlatform && platformDir !== "linux") {
|
||||
console.log(`- Deleting unused platform: ${platformPath}`);
|
||||
rmSync(platformPath, { recursive: true, force: true });
|
||||
} else {
|
||||
} else if (platformDir === targetPlatform) {
|
||||
// Now in the correct platform dir, prune architectures
|
||||
const archDirs = readdirSync(platformPath);
|
||||
for (const archDir of archDirs) {
|
||||
const archPath = join(platformPath, archDir);
|
||||
if (!statSync(archPath).isDirectory()) continue;
|
||||
|
||||
if (archDir !== process.arch) {
|
||||
if (archDir !== targetArch) {
|
||||
console.log(`- Deleting unused arch: ${archPath}`);
|
||||
rmSync(archPath, { recursive: true, force: true });
|
||||
}
|
||||
|
|
@ -196,6 +238,7 @@ const config: ForgeConfig = {
|
|||
}
|
||||
},
|
||||
packageAfterPrune: async (_forgeConfig, buildPath) => {
|
||||
console.error("PRE PACKAGE");
|
||||
try {
|
||||
function getItemsFromFolder(
|
||||
path: string,
|
||||
|
|
@ -264,14 +307,14 @@ const config: ForgeConfig = {
|
|||
packagerConfig: {
|
||||
asar: {
|
||||
unpack:
|
||||
"{*.node,*.dylib,*.so,*.dll,*.metal,**/whisper.cpp/**,**/.vite/build/whisper-worker-fork.js,**/node_modules/smart-whisper/**,**/node_modules/jest-worker/**}",
|
||||
"{*.node,*.dylib,*.so,*.dll,*.metal,**/node_modules/@amical/smart-whisper/**,**/whisper.cpp/**,**/.vite/build/whisper-worker-fork.js,**/node_modules/jest-worker/**,**/onnxruntime-node/bin/**}",
|
||||
},
|
||||
name: "Amical",
|
||||
executableName: "Amical",
|
||||
icon: "./assets/logo", // Path to your icon file
|
||||
appBundleId: "com.amical.desktop", // Proper bundle ID
|
||||
extraResource: [
|
||||
"../../packages/native-helpers/swift-helper/bin",
|
||||
`${process.platform === "win32" ? "../../packages/native-helpers/windows-helper/bin" : "../../packages/native-helpers/swift-helper/bin"}`,
|
||||
"./src/db/migrations",
|
||||
// Only include the platform-specific node binary
|
||||
`./node-binaries/${process.platform}-${process.arch}/node${
|
||||
|
|
@ -356,8 +399,18 @@ const config: ForgeConfig = {
|
|||
}
|
||||
|
||||
// Handle scoped packages: if dep is @scope/package, also keep @scope/ directory
|
||||
// But not for our workspace packages
|
||||
if (dep.includes("/") && dep.startsWith("@")) {
|
||||
const scopeDir = dep.split("/")[0]; // @libsql/client -> @libsql
|
||||
// for workspace packages only keep the actual package
|
||||
if (scopeDir === "@amical") {
|
||||
if (filePath.startsWith(`/node_modules/${dep}`) ||
|
||||
filePath === `/node_modules/${scopeDir}`) {
|
||||
KEEP_FILE.keep = true;
|
||||
KEEP_FILE.log = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
filePath === `/node_modules/${scopeDir}/` ||
|
||||
filePath === `/node_modules/${scopeDir}` ||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@amical/desktop",
|
||||
"version": "0.0.9",
|
||||
"version": "0.0.9-windows-ci-test",
|
||||
"description": "Amical Desktop app",
|
||||
"main": ".vite/build/main.js",
|
||||
"productName": "Amical",
|
||||
|
|
@ -8,6 +8,11 @@
|
|||
"type": "git",
|
||||
"url": "https://github.com/amicalhq/amical"
|
||||
},
|
||||
"author": {
|
||||
"name": "Amical",
|
||||
"email": "contact@amical.ai",
|
||||
"url": "https://amical.ai"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "pnpm build:deps && electron-forge start",
|
||||
"start:onboarding": "FORCE_ONBOARDING=true pnpm start",
|
||||
|
|
@ -22,6 +27,8 @@
|
|||
"make:zip:x64": "pnpm build:deps && electron-forge make --targets=@electron-forge/maker-zip --platform=darwin --arch=x64",
|
||||
"package:arm64": "pnpm build:deps && electron-forge package --platform=darwin --arch=arm64",
|
||||
"package:x64": "pnpm build:deps && electron-forge package --platform=darwin --arch=x64",
|
||||
"package:windows": "pnpm build:deps && electron-forge package --platform=win32 --arch=x64",
|
||||
"make:windows": "pnpm build:deps && electron-forge make --platform=win32 --arch=x64",
|
||||
"publish": "electron-forge publish",
|
||||
"lint": "eslint --ext .ts,.tsx .",
|
||||
"format:check": "prettier --check \"**/*.{ts,tsx,md,json,mjs,mts,css,mdx}\" --cache --ignore-path=../../.prettierignore",
|
||||
|
|
@ -29,9 +36,11 @@
|
|||
"db:generate": "drizzle-kit generate",
|
||||
"db:push": "drizzle-kit push",
|
||||
"db:migrate": "drizzle-kit migrate",
|
||||
"build:deps": "pnpm build:types && pnpm build:swift-helper",
|
||||
"build:deps": "pnpm build:types && pnpm build:native-helper",
|
||||
"build:types": "pnpm --filter @amical/types build",
|
||||
"build:swift-helper": "pnpm --filter @amical/swift-helper build",
|
||||
"build:windows-helper": "pnpm --filter @amical/windows-helper build",
|
||||
"build:native-helper": "node -p \"process.platform === 'darwin' ? 'build:swift-helper' : process.platform === 'win32' ? 'build:windows-helper' : 'echo No native helpers'\" | xargs pnpm run",
|
||||
"dev": "pnpm start",
|
||||
"download-node": "tsx scripts/download-node-binaries.ts",
|
||||
"download-node:all": "tsx scripts/download-node-binaries.ts --all"
|
||||
|
|
@ -39,16 +48,16 @@
|
|||
"keywords": [],
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"@electron-forge/cli": "^7.8.1",
|
||||
"@electron-forge/maker-deb": "^7.8.1",
|
||||
"@electron-forge/maker-dmg": "^7.8.1",
|
||||
"@electron-forge/maker-rpm": "^7.8.1",
|
||||
"@electron-forge/maker-squirrel": "^7.8.1",
|
||||
"@electron-forge/maker-zip": "^7.8.1",
|
||||
"@electron-forge/plugin-auto-unpack-natives": "^7.8.1",
|
||||
"@electron-forge/plugin-fuses": "^7.8.1",
|
||||
"@electron-forge/plugin-vite": "^7.8.1",
|
||||
"@electron-forge/publisher-github": "^7.8.1",
|
||||
"@electron-forge/cli": "7.8.2",
|
||||
"@electron-forge/maker-deb": "7.8.2",
|
||||
"@electron-forge/maker-dmg": "7.8.2",
|
||||
"@electron-forge/maker-rpm": "7.8.2",
|
||||
"@electron-forge/maker-squirrel": "7.8.2",
|
||||
"@electron-forge/maker-zip": "7.8.2",
|
||||
"@electron-forge/plugin-auto-unpack-natives": "7.8.2",
|
||||
"@electron-forge/plugin-fuses": "7.8.2",
|
||||
"@electron-forge/plugin-vite": "7.8.2",
|
||||
"@electron-forge/publisher-github": "7.8.2",
|
||||
"@electron/fuses": "^1.8.0",
|
||||
"@rollup/plugin-commonjs": "^28.0.6",
|
||||
"@tailwindcss/vite": "^4.1.6",
|
||||
|
|
@ -78,6 +87,7 @@
|
|||
"@hookform/resolvers": "^5.0.1",
|
||||
"@libsql/client": "^0.15.9",
|
||||
"@libsql/darwin-x64": "0.5.13",
|
||||
"@libsql/win32-x64-msvc": "^0.5.13",
|
||||
"@openrouter/ai-sdk-provider": "^0.7.2",
|
||||
"@radix-ui/react-accordion": "^1.2.10",
|
||||
"@radix-ui/react-alert-dialog": "^1.1.13",
|
||||
|
|
@ -142,7 +152,7 @@
|
|||
"react-hook-form": "^7.56.3",
|
||||
"react-resizable-panels": "^3.0.2",
|
||||
"recharts": "^2.15.3",
|
||||
"smart-whisper": "0.2.0",
|
||||
"@amical/smart-whisper": "workspace:*",
|
||||
"sonner": "^2.0.3",
|
||||
"split2": "^4.2.0",
|
||||
"superjson": "^2.2.2",
|
||||
|
|
|
|||
|
|
@ -167,8 +167,8 @@ export class AppManager {
|
|||
return this.serviceManager.getService("transcriptionService");
|
||||
}
|
||||
|
||||
getSwiftIOBridge() {
|
||||
return this.serviceManager.getService("swiftIOBridge");
|
||||
getNativeBridge() {
|
||||
return this.serviceManager.getService("nativeBridge");
|
||||
}
|
||||
|
||||
getAutoUpdaterService() {
|
||||
|
|
|
|||
|
|
@ -11,23 +11,23 @@ export class EventHandlers {
|
|||
}
|
||||
|
||||
setupEventHandlers(): void {
|
||||
this.setupSwiftBridgeEventHandlers();
|
||||
this.setupNativeBridgeEventHandlers();
|
||||
this.setupGeneralIPCHandlers();
|
||||
this.setupOnboardingIPCHandlers();
|
||||
// Note: Audio IPC handlers are now managed by RecordingService
|
||||
}
|
||||
|
||||
private setupSwiftBridgeEventHandlers(): void {
|
||||
private setupNativeBridgeEventHandlers(): void {
|
||||
try {
|
||||
const swiftBridge = this.appManager.getSwiftIOBridge();
|
||||
if (!swiftBridge) {
|
||||
logger.main.warn("Swift bridge not available for event handlers");
|
||||
const nativeBridge = this.appManager.getNativeBridge();
|
||||
if (!nativeBridge) {
|
||||
logger.main.warn("Native bridge not available for event handlers");
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle non-shortcut related events only
|
||||
swiftBridge.on("helperEvent", (event: HelperEvent) => {
|
||||
logger.swift.debug("Received helperEvent from SwiftIOBridge", {
|
||||
nativeBridge.on("helperEvent", (event: HelperEvent) => {
|
||||
logger.swift.debug("Received helperEvent from native bridge", {
|
||||
event,
|
||||
});
|
||||
|
||||
|
|
@ -35,15 +35,15 @@ export class EventHandlers {
|
|||
// This handler can process other helper events if needed
|
||||
});
|
||||
|
||||
swiftBridge.on("error", (error: Error) => {
|
||||
logger.main.error("SwiftIOBridge error:", error);
|
||||
nativeBridge.on("error", (error: Error) => {
|
||||
logger.main.error("Native bridge error:", error);
|
||||
});
|
||||
|
||||
swiftBridge.on("close", (code: number | null) => {
|
||||
logger.swift.warn("Swift helper process closed", { code });
|
||||
nativeBridge.on("close", (code: number | null) => {
|
||||
logger.swift.warn("Native helper process closed", { code });
|
||||
});
|
||||
} catch (error) {
|
||||
logger.main.warn("Swift bridge not available for event handlers");
|
||||
logger.main.warn("Native bridge not available for event handlers");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,13 +5,14 @@ import { app } from "electron";
|
|||
import * as path from "path";
|
||||
|
||||
// Set GGML_METAL_PATH_RESOURCES before any other imports
|
||||
// This ensures smart-whisper can find its resources when unpacked from asar
|
||||
// This ensures @amical/smart-whisper can find its resources when unpacked from asar
|
||||
if (app.isPackaged) {
|
||||
// Point to the unpacked whisper.cpp directory
|
||||
process.env.GGML_METAL_PATH_RESOURCES = path.join(
|
||||
process.resourcesPath,
|
||||
"app.asar.unpacked",
|
||||
"node_modules",
|
||||
"@amical",
|
||||
"smart-whisper",
|
||||
"whisper.cpp",
|
||||
);
|
||||
|
|
|
|||
|
|
@ -218,12 +218,12 @@ export class RecordingManager extends EventEmitter {
|
|||
|
||||
// Mute system audio
|
||||
try {
|
||||
const swiftBridge = this.serviceManager.getService("swiftIOBridge");
|
||||
if (swiftBridge) {
|
||||
await swiftBridge.call("muteSystemAudio", {});
|
||||
const nativeBridge = this.serviceManager.getService("nativeBridge");
|
||||
if (nativeBridge) {
|
||||
await nativeBridge.call("muteSystemAudio", {});
|
||||
}
|
||||
} catch (error) {
|
||||
logger.main.warn("Swift bridge not available for audio muting");
|
||||
logger.main.warn("Native bridge not available for audio muting");
|
||||
}
|
||||
|
||||
this.setState("recording");
|
||||
|
|
@ -252,12 +252,12 @@ export class RecordingManager extends EventEmitter {
|
|||
|
||||
// Restore system audio
|
||||
try {
|
||||
const swiftBridge = this.serviceManager.getService("swiftIOBridge");
|
||||
if (swiftBridge) {
|
||||
await swiftBridge.call("restoreSystemAudio", {});
|
||||
const nativeBridge = this.serviceManager.getService("nativeBridge");
|
||||
if (nativeBridge) {
|
||||
await nativeBridge.call("restoreSystemAudio", {});
|
||||
}
|
||||
} catch (error) {
|
||||
logger.main.warn("Swift bridge not available for audio restore");
|
||||
logger.main.warn("Native bridge not available for audio restore");
|
||||
}
|
||||
|
||||
logger.audio.info("Recording stop initiated", {
|
||||
|
|
@ -412,14 +412,14 @@ export class RecordingManager extends EventEmitter {
|
|||
}
|
||||
|
||||
try {
|
||||
const swiftBridge = this.serviceManager.getService("swiftIOBridge");
|
||||
const nativeBridge = this.serviceManager.getService("nativeBridge");
|
||||
|
||||
logger.main.info("Pasting transcription to active application", {
|
||||
textLength: transcription.length,
|
||||
});
|
||||
|
||||
if (swiftBridge) {
|
||||
swiftBridge.call("pasteText", {
|
||||
if (nativeBridge) {
|
||||
nativeBridge.call("pasteText", {
|
||||
transcript: transcription,
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import { logger } from "../logger";
|
|||
import { ModelManagerService } from "../../services/model-manager";
|
||||
import { TranscriptionService } from "../../services/transcription-service";
|
||||
import { SettingsService } from "../../services/settings-service";
|
||||
import { SwiftIOBridge } from "../../services/platform/swift-bridge-service";
|
||||
import { NativeBridge } from "../../services/platform/native-bridge-service";
|
||||
import { AutoUpdaterService } from "../services/auto-updater";
|
||||
import { RecordingManager } from "./recording-manager";
|
||||
import { VADService } from "../../services/vad-service";
|
||||
|
|
@ -11,6 +11,7 @@ import { WindowManager } from "../core/window-manager";
|
|||
import { createIPCHandler } from "electron-trpc-experimental/main";
|
||||
import { router } from "../../trpc/router";
|
||||
import { createContext } from "../../trpc/context";
|
||||
import { isMacOS, isWindows } from "../../utils/platform";
|
||||
|
||||
/**
|
||||
* Service map for type-safe service access
|
||||
|
|
@ -20,7 +21,7 @@ export interface ServiceMap {
|
|||
transcriptionService: TranscriptionService;
|
||||
settingsService: SettingsService;
|
||||
vadService: VADService;
|
||||
swiftIOBridge: SwiftIOBridge;
|
||||
nativeBridge: NativeBridge;
|
||||
autoUpdaterService: AutoUpdaterService;
|
||||
recordingManager: RecordingManager;
|
||||
shortcutManager: ShortcutManager;
|
||||
|
|
@ -39,7 +40,7 @@ export class ServiceManager {
|
|||
private settingsService: SettingsService | null = null;
|
||||
private vadService: VADService | null = null;
|
||||
|
||||
private swiftIOBridge: SwiftIOBridge | null = null;
|
||||
private nativeBridge: NativeBridge | null = null;
|
||||
private autoUpdaterService: AutoUpdaterService | null = null;
|
||||
private recordingManager: RecordingManager | null = null;
|
||||
private shortcutManager: ShortcutManager | null = null;
|
||||
|
|
@ -145,9 +146,9 @@ export class ServiceManager {
|
|||
}
|
||||
|
||||
private initializePlatformServices(): void {
|
||||
// Initialize Swift bridge for macOS integration
|
||||
if (process.platform === "darwin") {
|
||||
this.swiftIOBridge = new SwiftIOBridge();
|
||||
// Initialize platform-specific bridge
|
||||
if (isMacOS() || isWindows()) {
|
||||
this.nativeBridge = new NativeBridge();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -163,7 +164,7 @@ export class ServiceManager {
|
|||
);
|
||||
}
|
||||
this.shortcutManager = new ShortcutManager(this.settingsService);
|
||||
await this.shortcutManager.initialize(this.swiftIOBridge);
|
||||
await this.shortcutManager.initialize(this.nativeBridge);
|
||||
|
||||
// Connect shortcut events to recording manager
|
||||
this.recordingManager.setupShortcutListeners(this.shortcutManager);
|
||||
|
|
@ -213,7 +214,7 @@ export class ServiceManager {
|
|||
transcriptionService: this.transcriptionService ?? undefined,
|
||||
settingsService: this.settingsService ?? undefined,
|
||||
vadService: this.vadService ?? undefined,
|
||||
swiftIOBridge: this.swiftIOBridge ?? undefined,
|
||||
nativeBridge: this.nativeBridge ?? undefined,
|
||||
autoUpdaterService: this.autoUpdaterService ?? undefined,
|
||||
recordingManager: this.recordingManager ?? undefined,
|
||||
shortcutManager: this.shortcutManager ?? undefined,
|
||||
|
|
@ -242,9 +243,9 @@ export class ServiceManager {
|
|||
await this.vadService.dispose();
|
||||
}
|
||||
|
||||
if (this.swiftIOBridge) {
|
||||
logger.main.info("Stopping Swift helper...");
|
||||
this.swiftIOBridge.stopHelper();
|
||||
if (this.nativeBridge) {
|
||||
logger.main.info("Stopping native helper...");
|
||||
this.nativeBridge.stopHelper();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
import { EventEmitter } from "events";
|
||||
import { globalShortcut } from "electron";
|
||||
import { SettingsService } from "@/services/settings-service";
|
||||
import { SwiftIOBridge } from "@/services/platform/swift-bridge-service";
|
||||
import { matchesShortcutKey, getKeyNameFromPayload } from "@/utils/keycode-map";
|
||||
import { NativeBridge } from "@/services/platform/native-bridge-service";
|
||||
import { getKeyNameFromPayload } from "@/utils/keycode-map";
|
||||
import { KeyEventPayload, HelperEvent } from "@amical/types";
|
||||
import { logger } from "@/main/logger";
|
||||
|
||||
|
|
@ -25,7 +25,7 @@ export class ShortcutManager extends EventEmitter {
|
|||
toggleRecording: "",
|
||||
};
|
||||
private settingsService: SettingsService;
|
||||
private swiftIOBridge: SwiftIOBridge | null = null;
|
||||
private nativeBridge: NativeBridge | null = null;
|
||||
private isRecordingShortcut: boolean = false;
|
||||
|
||||
constructor(settingsService: SettingsService) {
|
||||
|
|
@ -33,8 +33,8 @@ export class ShortcutManager extends EventEmitter {
|
|||
this.settingsService = settingsService;
|
||||
}
|
||||
|
||||
async initialize(swiftIOBridge: SwiftIOBridge | null) {
|
||||
this.swiftIOBridge = swiftIOBridge;
|
||||
async initialize(nativeBridge: NativeBridge | null) {
|
||||
this.nativeBridge = nativeBridge;
|
||||
await this.loadShortcuts();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
|
@ -59,12 +59,12 @@ export class ShortcutManager extends EventEmitter {
|
|||
}
|
||||
|
||||
private setupEventListeners() {
|
||||
if (!this.swiftIOBridge) {
|
||||
log.warn("SwiftIOBridge not available, shortcuts will not work");
|
||||
if (!this.nativeBridge) {
|
||||
log.warn("Native bridge not available, shortcuts will not work");
|
||||
return;
|
||||
}
|
||||
|
||||
this.swiftIOBridge.on("helperEvent", (event: HelperEvent) => {
|
||||
this.nativeBridge.on("helperEvent", (event: HelperEvent) => {
|
||||
switch (event.type) {
|
||||
case "flagsChanged":
|
||||
this.handleFlagsChanged(event.payload);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
// Worker process entry point for fork
|
||||
import { Whisper } from "smart-whisper";
|
||||
import { Whisper } from "@amical/smart-whisper";
|
||||
|
||||
// Simple console-based logging for worker process
|
||||
const logger = {
|
||||
|
|
@ -29,7 +29,7 @@ const methods = {
|
|||
whisperInstance = null;
|
||||
}
|
||||
|
||||
const { Whisper } = await import("smart-whisper");
|
||||
const { Whisper } = await import("@amical/smart-whisper");
|
||||
whisperInstance = new Whisper(modelPath, { gpu: true });
|
||||
try {
|
||||
await whisperInstance.load();
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
// This file contains just the Whisper-specific operations that need to run in a separate process
|
||||
import { Whisper } from "smart-whisper";
|
||||
import { Whisper } from "@amical/smart-whisper";
|
||||
|
||||
// Simple console-based logging for worker process
|
||||
const logger = {
|
||||
|
|
@ -27,7 +27,7 @@ export async function initializeModel(modelPath: string): Promise<void> {
|
|||
whisperInstance = null;
|
||||
}
|
||||
|
||||
const { Whisper } = await import("smart-whisper");
|
||||
const { Whisper } = await import("@amical/smart-whisper");
|
||||
whisperInstance = new Whisper(modelPath, { gpu: true });
|
||||
try {
|
||||
await whisperInstance.load();
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import { spawn, ChildProcessWithoutNullStreams } from "child_process";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import process from "node:process"; // Added import for process
|
||||
import { app, app as electronApp } from "electron"; // electronApp for app.getAppPath() consistency
|
||||
import { app as electronApp } from "electron";
|
||||
import split2 from "split2";
|
||||
import { v4 as uuid } from "uuid";
|
||||
import { getNativeHelperName, getNativeHelperDir } from "../../utils/platform";
|
||||
|
||||
import { EventEmitter } from "events";
|
||||
import { createScopedLogger } from "../../main/logger";
|
||||
|
|
@ -54,21 +54,21 @@ interface RPCMethods {
|
|||
}
|
||||
|
||||
// Define event types for the client
|
||||
interface SwiftIOBridgeEvents {
|
||||
interface NativeBridgeEvents {
|
||||
helperEvent: (event: HelperEvent) => void;
|
||||
error: (error: Error) => void;
|
||||
close: (code: number | null, signal: NodeJS.Signals | null) => void;
|
||||
ready: () => void; // Emitted when the helper process is successfully spawned
|
||||
}
|
||||
|
||||
export class SwiftIOBridge extends EventEmitter {
|
||||
export class NativeBridge extends EventEmitter {
|
||||
private proc: ChildProcessWithoutNullStreams | null = null;
|
||||
private pending = new Map<
|
||||
string,
|
||||
{ callback: (resp: RpcResponse) => void; startTime: number }
|
||||
>();
|
||||
private helperPath: string;
|
||||
private logger = createScopedLogger("swift-bridge");
|
||||
private logger = createScopedLogger("native-bridge");
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
|
|
@ -77,7 +77,9 @@ export class SwiftIOBridge extends EventEmitter {
|
|||
}
|
||||
|
||||
private determineHelperPath(): string {
|
||||
const helperName = "SwiftHelper"; // Swift native helper executable
|
||||
const helperName = getNativeHelperName();
|
||||
const helperDir = getNativeHelperDir();
|
||||
|
||||
return electronApp.isPackaged
|
||||
? path.join(process.resourcesPath, "bin", helperName)
|
||||
: path.join(
|
||||
|
|
@ -86,7 +88,7 @@ export class SwiftIOBridge extends EventEmitter {
|
|||
"..",
|
||||
"packages",
|
||||
"native-helpers",
|
||||
"swift-helper",
|
||||
helperDir,
|
||||
"bin",
|
||||
helperName,
|
||||
);
|
||||
|
|
@ -96,20 +98,33 @@ export class SwiftIOBridge extends EventEmitter {
|
|||
try {
|
||||
fs.accessSync(this.helperPath, fs.constants.X_OK);
|
||||
} catch (err) {
|
||||
this.logger.error("SwiftHelper executable not found or not executable", {
|
||||
helperPath: this.helperPath,
|
||||
});
|
||||
this.emit(
|
||||
"error",
|
||||
new Error(
|
||||
`Helper executable not found at ${this.helperPath}. Attempt to build it if in dev mode.`,
|
||||
),
|
||||
const helperName = getNativeHelperName();
|
||||
this.logger.error(
|
||||
`${helperName} executable not found or not executable`,
|
||||
{
|
||||
helperPath: this.helperPath,
|
||||
},
|
||||
);
|
||||
// In a real app, you might try to build it here or provide more robust error handling.
|
||||
// In production, provide a more user-friendly error message
|
||||
const errorMessage = electronApp.isPackaged
|
||||
? `${helperName} is not available. Some features may not work correctly.`
|
||||
: `Helper executable not found at ${this.helperPath}. Please build it first.`;
|
||||
|
||||
this.emit("error", new Error(errorMessage));
|
||||
|
||||
// Log detailed error for debugging
|
||||
this.logger.error("Helper initialization failed", {
|
||||
helperPath: this.helperPath,
|
||||
isPackaged: electronApp.isPackaged,
|
||||
platform: process.platform,
|
||||
error: err,
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
this.logger.info("Spawning SwiftHelper", { helperPath: this.helperPath });
|
||||
const helperName = getNativeHelperName();
|
||||
this.logger.info(`Spawning ${helperName}`, { helperPath: this.helperPath });
|
||||
this.proc = spawn(this.helperPath, [], { stdio: ["pipe", "pipe", "pipe"] });
|
||||
|
||||
this.proc.stdout.pipe(split2()).on("data", (line: string) => {
|
||||
|
|
@ -150,19 +165,24 @@ export class SwiftIOBridge extends EventEmitter {
|
|||
|
||||
this.proc.stderr.on("data", (data: Buffer) => {
|
||||
const errorMsg = data.toString();
|
||||
this.logger.warn("SwiftHelper stderr output", { message: errorMsg });
|
||||
const helperName = getNativeHelperName();
|
||||
this.logger.warn(`${helperName} stderr output`, { message: errorMsg });
|
||||
// Don't emit as error since stderr is often just debug info
|
||||
// this.emit('error', new Error(`Helper stderr: ${errorMsg}`));
|
||||
});
|
||||
|
||||
this.proc.on("error", (err) => {
|
||||
this.logger.error("Failed to start SwiftHelper process", { error: err });
|
||||
const helperName = getNativeHelperName();
|
||||
this.logger.error(`Failed to start ${helperName} process`, {
|
||||
error: err,
|
||||
});
|
||||
this.emit("error", err);
|
||||
this.proc = null;
|
||||
});
|
||||
|
||||
this.proc.on("close", (code, signal) => {
|
||||
this.logger.info("SwiftHelper process exited", { code, signal });
|
||||
const helperName = getNativeHelperName();
|
||||
this.logger.info(`${helperName} process exited`, { code, signal });
|
||||
this.emit("close", code, signal);
|
||||
this.proc = null;
|
||||
// Optionally, implement retry logic or notify further
|
||||
|
|
@ -180,11 +200,18 @@ export class SwiftIOBridge extends EventEmitter {
|
|||
timeoutMs = 5000,
|
||||
): Promise<RPCMethods[M]["result"]> {
|
||||
if (!this.proc || !this.proc.stdin || !this.proc.stdin.writable) {
|
||||
return Promise.reject(
|
||||
new Error(
|
||||
"Swift helper process is not running or stdin is not writable.",
|
||||
),
|
||||
);
|
||||
const helperName = getNativeHelperName();
|
||||
const errorMessage = electronApp.isPackaged
|
||||
? `${helperName} is not available for this operation.`
|
||||
: "Native helper process is not running or stdin is not writable.";
|
||||
|
||||
this.logger.warn(`Cannot call ${method}: helper not available`, {
|
||||
method,
|
||||
isPackaged: electronApp.isPackaged,
|
||||
platform: process.platform,
|
||||
});
|
||||
|
||||
return Promise.reject(new Error(errorMessage));
|
||||
}
|
||||
|
||||
const id = uuid();
|
||||
|
|
@ -267,7 +294,7 @@ export class SwiftIOBridge extends EventEmitter {
|
|||
const duration = timedOutAt - startTime;
|
||||
reject(
|
||||
new Error(
|
||||
`SwiftIOBridge: RPC call "${method}" (id: ${id}) timed out after ${timeoutMs}ms (duration: ${duration}ms, started: ${new Date(startTime).toISOString()})`,
|
||||
`NativeBridge: RPC call "${method}" (id: ${id}) timed out after ${timeoutMs}ms (duration: ${duration}ms, started: ${new Date(startTime).toISOString()})`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
|
@ -283,24 +310,25 @@ export class SwiftIOBridge extends EventEmitter {
|
|||
|
||||
public stopHelper(): void {
|
||||
if (this.proc) {
|
||||
this.logger.info("Stopping SwiftHelper process");
|
||||
const helperName = getNativeHelperName();
|
||||
this.logger.info(`Stopping ${helperName} process`);
|
||||
this.proc.kill();
|
||||
this.proc = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Typed event emitter methods
|
||||
on<E extends keyof SwiftIOBridgeEvents>(
|
||||
on<E extends keyof NativeBridgeEvents>(
|
||||
event: E,
|
||||
listener: SwiftIOBridgeEvents[E],
|
||||
listener: NativeBridgeEvents[E],
|
||||
): this {
|
||||
super.on(event, listener);
|
||||
return this;
|
||||
}
|
||||
|
||||
emit<E extends keyof SwiftIOBridgeEvents>(
|
||||
emit<E extends keyof NativeBridgeEvents>(
|
||||
event: E,
|
||||
...args: Parameters<SwiftIOBridgeEvents[E]>
|
||||
...args: Parameters<NativeBridgeEvents[E]>
|
||||
): boolean {
|
||||
return super.emit(event, ...args);
|
||||
}
|
||||
|
|
@ -10,12 +10,12 @@ class AppContextStore {
|
|||
const serviceManager = ServiceManager.getInstance();
|
||||
if (!serviceManager) return; // Silent fail
|
||||
|
||||
const swiftBridge = serviceManager.getService("swiftIOBridge");
|
||||
if (!swiftBridge) {
|
||||
logger.main.warn("SwiftIOBridge not available");
|
||||
const nativeBridge = serviceManager.getService("nativeBridge");
|
||||
if (!nativeBridge) {
|
||||
logger.main.warn("Native bridge not available");
|
||||
return;
|
||||
}
|
||||
const context = await swiftBridge.call("getAccessibilityContext", {
|
||||
const context = await nativeBridge.call("getAccessibilityContext", {
|
||||
editableOnly: false,
|
||||
});
|
||||
this.accessibilityContext = context;
|
||||
|
|
|
|||
53
apps/desktop/src/utils/platform.ts
Normal file
53
apps/desktop/src/utils/platform.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import process from "node:process";
|
||||
|
||||
/**
|
||||
* Platform detection utilities
|
||||
*/
|
||||
|
||||
export type Platform = "darwin" | "win32" | "linux";
|
||||
|
||||
export function getPlatform(): Platform {
|
||||
return process.platform as Platform;
|
||||
}
|
||||
|
||||
export function isWindows(): boolean {
|
||||
return process.platform === "win32";
|
||||
}
|
||||
|
||||
export function isMacOS(): boolean {
|
||||
return process.platform === "darwin";
|
||||
}
|
||||
|
||||
export function isLinux(): boolean {
|
||||
return process.platform === "linux";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the native helper name for the current platform
|
||||
*/
|
||||
export function getNativeHelperName(): string {
|
||||
return isWindows() ? "WindowsHelper.exe" : "SwiftHelper";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the native helper directory name for the current platform
|
||||
*/
|
||||
export function getNativeHelperDir(): string {
|
||||
return isWindows() ? "windows-helper" : "swift-helper";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a platform-specific display name
|
||||
*/
|
||||
export function getPlatformDisplayName(): string {
|
||||
switch (process.platform) {
|
||||
case "darwin":
|
||||
return "macOS";
|
||||
case "win32":
|
||||
return "Windows";
|
||||
case "linux":
|
||||
return "Linux";
|
||||
default:
|
||||
return process.platform;
|
||||
}
|
||||
}
|
||||
|
|
@ -20,7 +20,7 @@ export default defineConfig({
|
|||
entryFileNames: "[name].js",
|
||||
},
|
||||
external: [
|
||||
"smart-whisper",
|
||||
"@amical/smart-whisper",
|
||||
"@libsql/client",
|
||||
"@libsql/darwin-arm64",
|
||||
"@libsql/darwin-x64",
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
"turbo": "^2.5.3",
|
||||
"typescript": "5.8.2"
|
||||
},
|
||||
"packageManager": "pnpm@10.13.1",
|
||||
"packageManager": "pnpm@10.15.0",
|
||||
"engines": {
|
||||
"node": ">=24"
|
||||
},
|
||||
|
|
|
|||
74
packages/native-helpers/windows-helper/.gitignore
vendored
Normal file
74
packages/native-helpers/windows-helper/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# Build outputs
|
||||
bin/
|
||||
obj/
|
||||
|
||||
# .NET build artifacts
|
||||
*.dll
|
||||
*.exe
|
||||
*.pdb
|
||||
*.cache
|
||||
*.log
|
||||
*.vsidx
|
||||
|
||||
# Visual Studio
|
||||
.vs/
|
||||
*.user
|
||||
*.suo
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
|
||||
# Visual Studio Code
|
||||
.vscode/
|
||||
|
||||
# Rider
|
||||
.idea/
|
||||
*.sln.iml
|
||||
|
||||
# NuGet
|
||||
packages/
|
||||
*.nupkg
|
||||
|
||||
# Build results
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPublic/
|
||||
[Rr]elease/
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
[Aa][Rr][Mm]/
|
||||
[Aa][Rr][Mm]64/
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
[Ll]ogs/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
# .NET Core
|
||||
project.lock.json
|
||||
project.fragment.lock.json
|
||||
artifacts/
|
||||
|
||||
# Files generated by popular IDEs
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
||||
*.bak
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
BIN
packages/native-helpers/windows-helper/Resources/rec-start.mp3
Normal file
BIN
packages/native-helpers/windows-helper/Resources/rec-start.mp3
Normal file
Binary file not shown.
BIN
packages/native-helpers/windows-helper/Resources/rec-stop.mp3
Normal file
BIN
packages/native-helpers/windows-helper/Resources/rec-stop.mp3
Normal file
Binary file not shown.
29
packages/native-helpers/windows-helper/WindowsHelper.csproj
Normal file
29
packages/native-helpers/windows-helper/WindowsHelper.csproj
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
|
||||
<PublishSingleFile>true</PublishSingleFile>
|
||||
<SelfContained>false</SelfContained>
|
||||
<AssemblyName>WindowsHelper</AssemblyName>
|
||||
<RootNamespace>WindowsHelper</RootNamespace>
|
||||
<EnableWindowsTargeting>true</EnableWindowsTargeting>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="System.Text.Json" Version="9.0.7" />
|
||||
<PackageReference Include="NAudio" Version="2.2.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup Condition="'$(TargetFramework)' == 'net6.0-windows' Or '$(RuntimeIdentifier)' == 'win-x64'">
|
||||
<FrameworkReference Include="Microsoft.WindowsDesktop.App" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Include="Resources\rec-start.mp3" />
|
||||
<EmbeddedResource Include="Resources\rec-stop.mp3" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
16
packages/native-helpers/windows-helper/package.json
Normal file
16
packages/native-helpers/windows-helper/package.json
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"name": "@amical/windows-helper",
|
||||
"version": "0.0.1",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"build": "pnpm --filter @amical/types generate:all && dotnet publish -c Release -r win-x64 --self-contained false -o bin",
|
||||
"build:native": "dotnet publish -c Release -r win-x64 --self-contained false -o bin",
|
||||
"clean": "rm -rf bin obj"
|
||||
},
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"cpu": [
|
||||
"x64"
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,851 @@
|
|||
// <auto-generated />
|
||||
// This file was automatically generated by quicktype from JSON schemas.
|
||||
// DO NOT EDIT THIS FILE DIRECTLY! Instead, edit the TypeScript schemas and regenerate.
|
||||
|
||||
// <auto-generated />
|
||||
//
|
||||
// To parse this JSON data, add NuGet 'System.Text.Json' then do one of these:
|
||||
//
|
||||
// using WindowsHelper.Models;
|
||||
//
|
||||
// var rpcRequest = RpcRequest.FromJson(jsonString);
|
||||
// var rpcResponse = RpcResponse.FromJson(jsonString);
|
||||
// var getAccessibilityTreeDetailsParams = GetAccessibilityTreeDetailsParams.FromJson(jsonString);
|
||||
// var getAccessibilityTreeDetailsResult = GetAccessibilityTreeDetailsResult.FromJson(jsonString);
|
||||
// var getAccessibilityContextParams = GetAccessibilityContextParams.FromJson(jsonString);
|
||||
// var getAccessibilityContextResult = GetAccessibilityContextResult.FromJson(jsonString);
|
||||
// var pasteTextParams = PasteTextParams.FromJson(jsonString);
|
||||
// var pasteTextResult = PasteTextResult.FromJson(jsonString);
|
||||
// var muteSystemAudioParams = MuteSystemAudioParams.FromJson(jsonString);
|
||||
// var muteSystemAudioResult = MuteSystemAudioResult.FromJson(jsonString);
|
||||
// var restoreSystemAudioParams = RestoreSystemAudioParams.FromJson(jsonString);
|
||||
// var restoreSystemAudioResult = RestoreSystemAudioResult.FromJson(jsonString);
|
||||
// var keyDownEvent = KeyDownEvent.FromJson(jsonString);
|
||||
// var keyUpEvent = KeyUpEvent.FromJson(jsonString);
|
||||
// var flagsChangedEvent = FlagsChangedEvent.FromJson(jsonString);
|
||||
// var helperEvent = HelperEvent.FromJson(jsonString);
|
||||
#nullable enable
|
||||
#pragma warning disable CS8618
|
||||
#pragma warning disable CS8601
|
||||
#pragma warning disable CS8603
|
||||
|
||||
namespace WindowsHelper.Models
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Globalization;
|
||||
|
||||
public partial class RpcRequest
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public Guid Id { get; set; }
|
||||
|
||||
[JsonPropertyName("method")]
|
||||
public Method Method { get; set; }
|
||||
|
||||
[JsonPropertyName("params")]
|
||||
public object Params { get; set; }
|
||||
}
|
||||
|
||||
public partial class RpcResponse
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("error")]
|
||||
public Error Error { get; set; }
|
||||
|
||||
[JsonPropertyName("id")]
|
||||
public string Id { get; set; }
|
||||
|
||||
[JsonPropertyName("result")]
|
||||
public object Result { get; set; }
|
||||
}
|
||||
|
||||
public partial class Error
|
||||
{
|
||||
[JsonPropertyName("code")]
|
||||
public long Code { get; set; }
|
||||
|
||||
[JsonPropertyName("data")]
|
||||
public object Data { get; set; }
|
||||
|
||||
[JsonPropertyName("message")]
|
||||
public string Message { get; set; }
|
||||
}
|
||||
|
||||
public partial class GetAccessibilityTreeDetailsParams
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("rootId")]
|
||||
public string RootId { get; set; }
|
||||
}
|
||||
|
||||
public partial class GetAccessibilityTreeDetailsResult
|
||||
{
|
||||
[JsonPropertyName("tree")]
|
||||
public object Tree { get; set; }
|
||||
}
|
||||
|
||||
public partial class GetAccessibilityContextParams
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("editableOnly")]
|
||||
public bool? EditableOnly { get; set; }
|
||||
}
|
||||
|
||||
public partial class GetAccessibilityContextResult
|
||||
{
|
||||
[JsonPropertyName("context")]
|
||||
public Context Context { get; set; }
|
||||
}
|
||||
|
||||
public partial class Context
|
||||
{
|
||||
[JsonPropertyName("application")]
|
||||
public Application Application { get; set; }
|
||||
|
||||
[JsonPropertyName("focusedElement")]
|
||||
public FocusedElement FocusedElement { get; set; }
|
||||
|
||||
[JsonPropertyName("textSelection")]
|
||||
public TextSelection TextSelection { get; set; }
|
||||
|
||||
[JsonPropertyName("timestamp")]
|
||||
public double Timestamp { get; set; }
|
||||
|
||||
[JsonPropertyName("windowInfo")]
|
||||
public WindowInfo WindowInfo { get; set; }
|
||||
}
|
||||
|
||||
public partial class Application
|
||||
{
|
||||
[JsonPropertyName("bundleIdentifier")]
|
||||
public string BundleIdentifier { get; set; }
|
||||
|
||||
[JsonPropertyName("name")]
|
||||
public string Name { get; set; }
|
||||
|
||||
[JsonPropertyName("version")]
|
||||
public string Version { get; set; }
|
||||
}
|
||||
|
||||
public partial class FocusedElement
|
||||
{
|
||||
[JsonPropertyName("description")]
|
||||
public string Description { get; set; }
|
||||
|
||||
[JsonPropertyName("isEditable")]
|
||||
public bool IsEditable { get; set; }
|
||||
|
||||
[JsonPropertyName("role")]
|
||||
public string Role { get; set; }
|
||||
|
||||
[JsonPropertyName("title")]
|
||||
public string Title { get; set; }
|
||||
|
||||
[JsonPropertyName("value")]
|
||||
public string Value { get; set; }
|
||||
}
|
||||
|
||||
public partial class TextSelection
|
||||
{
|
||||
[JsonPropertyName("fullContent")]
|
||||
public string FullContent { get; set; }
|
||||
|
||||
[JsonPropertyName("isEditable")]
|
||||
public bool IsEditable { get; set; }
|
||||
|
||||
[JsonPropertyName("postSelectionText")]
|
||||
public string PostSelectionText { get; set; }
|
||||
|
||||
[JsonPropertyName("preSelectionText")]
|
||||
public string PreSelectionText { get; set; }
|
||||
|
||||
[JsonPropertyName("selectedText")]
|
||||
public string SelectedText { get; set; }
|
||||
|
||||
[JsonPropertyName("selectionRange")]
|
||||
public SelectionRange SelectionRange { get; set; }
|
||||
}
|
||||
|
||||
public partial class SelectionRange
|
||||
{
|
||||
[JsonPropertyName("length")]
|
||||
public long Length { get; set; }
|
||||
|
||||
[JsonPropertyName("location")]
|
||||
public long Location { get; set; }
|
||||
}
|
||||
|
||||
public partial class WindowInfo
|
||||
{
|
||||
[JsonPropertyName("title")]
|
||||
public string Title { get; set; }
|
||||
|
||||
[JsonPropertyName("url")]
|
||||
public string Url { get; set; }
|
||||
}
|
||||
|
||||
public partial class PasteTextParams
|
||||
{
|
||||
[JsonPropertyName("transcript")]
|
||||
public string Transcript { get; set; }
|
||||
}
|
||||
|
||||
public partial class PasteTextResult
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("message")]
|
||||
public string Message { get; set; }
|
||||
|
||||
[JsonPropertyName("success")]
|
||||
public bool Success { get; set; }
|
||||
}
|
||||
|
||||
public partial class MuteSystemAudioResult
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("message")]
|
||||
public string Message { get; set; }
|
||||
|
||||
[JsonPropertyName("success")]
|
||||
public bool Success { get; set; }
|
||||
}
|
||||
|
||||
public partial class RestoreSystemAudioResult
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("message")]
|
||||
public string Message { get; set; }
|
||||
|
||||
[JsonPropertyName("success")]
|
||||
public bool Success { get; set; }
|
||||
}
|
||||
|
||||
public partial class KeyDownEvent
|
||||
{
|
||||
[JsonPropertyName("payload")]
|
||||
public KeyDownEventPayload Payload { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("timestamp")]
|
||||
public DateTimeOffset? Timestamp { get; set; }
|
||||
|
||||
[JsonPropertyName("type")]
|
||||
public KeyDownEventType Type { get; set; }
|
||||
}
|
||||
|
||||
public partial class KeyDownEventPayload
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("altKey")]
|
||||
public bool? AltKey { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("code")]
|
||||
public string Code { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("ctrlKey")]
|
||||
public bool? CtrlKey { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// State of the Fn key.
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("fnKeyPressed")]
|
||||
public bool? FnKeyPressed { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("key")]
|
||||
public string Key { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Raw key code, e.g., from CGEvent
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("keyCode")]
|
||||
public long? KeyCode { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("metaKey")]
|
||||
public bool? MetaKey { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("shiftKey")]
|
||||
public bool? ShiftKey { get; set; }
|
||||
}
|
||||
|
||||
public partial class KeyUpEvent
|
||||
{
|
||||
[JsonPropertyName("payload")]
|
||||
public KeyUpEventPayload Payload { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("timestamp")]
|
||||
public DateTimeOffset? Timestamp { get; set; }
|
||||
|
||||
[JsonPropertyName("type")]
|
||||
public KeyUpEventType Type { get; set; }
|
||||
}
|
||||
|
||||
public partial class KeyUpEventPayload
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("altKey")]
|
||||
public bool? AltKey { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("code")]
|
||||
public string Code { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("ctrlKey")]
|
||||
public bool? CtrlKey { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// State of the Fn key.
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("fnKeyPressed")]
|
||||
public bool? FnKeyPressed { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("key")]
|
||||
public string Key { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Raw key code, e.g., from CGEvent
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("keyCode")]
|
||||
public long? KeyCode { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("metaKey")]
|
||||
public bool? MetaKey { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("shiftKey")]
|
||||
public bool? ShiftKey { get; set; }
|
||||
}
|
||||
|
||||
public partial class FlagsChangedEvent
|
||||
{
|
||||
[JsonPropertyName("payload")]
|
||||
public FlagsChangedEventPayload Payload { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("timestamp")]
|
||||
public DateTimeOffset? Timestamp { get; set; }
|
||||
|
||||
[JsonPropertyName("type")]
|
||||
public FlagsChangedEventType Type { get; set; }
|
||||
}
|
||||
|
||||
public partial class FlagsChangedEventPayload
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("altKey")]
|
||||
public bool? AltKey { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("code")]
|
||||
public string Code { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("ctrlKey")]
|
||||
public bool? CtrlKey { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// State of the Fn key.
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("fnKeyPressed")]
|
||||
public bool? FnKeyPressed { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("key")]
|
||||
public string Key { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Raw key code, e.g., from CGEvent
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("keyCode")]
|
||||
public long? KeyCode { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("metaKey")]
|
||||
public bool? MetaKey { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("shiftKey")]
|
||||
public bool? ShiftKey { get; set; }
|
||||
}
|
||||
|
||||
public partial class HelperEvent
|
||||
{
|
||||
[JsonPropertyName("payload")]
|
||||
public HelperEventPayload Payload { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("timestamp")]
|
||||
public DateTimeOffset? Timestamp { get; set; }
|
||||
|
||||
[JsonPropertyName("type")]
|
||||
public HelperEventType Type { get; set; }
|
||||
}
|
||||
|
||||
public partial class HelperEventPayload
|
||||
{
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("altKey")]
|
||||
public bool? AltKey { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("code")]
|
||||
public string Code { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("ctrlKey")]
|
||||
public bool? CtrlKey { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// State of the Fn key.
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("fnKeyPressed")]
|
||||
public bool? FnKeyPressed { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("key")]
|
||||
public string Key { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Raw key code, e.g., from CGEvent
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("keyCode")]
|
||||
public long? KeyCode { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("metaKey")]
|
||||
public bool? MetaKey { get; set; }
|
||||
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
[JsonPropertyName("shiftKey")]
|
||||
public bool? ShiftKey { get; set; }
|
||||
}
|
||||
|
||||
public enum Method { GetAccessibilityContext, GetAccessibilityTreeDetails, MuteSystemAudio, PasteText, RestoreSystemAudio };
|
||||
|
||||
public enum KeyDownEventType { KeyDown };
|
||||
|
||||
public enum KeyUpEventType { KeyUp };
|
||||
|
||||
public enum FlagsChangedEventType { FlagsChanged };
|
||||
|
||||
public enum HelperEventType { FlagsChanged, KeyDown, KeyUp };
|
||||
|
||||
public partial class RpcRequest
|
||||
{
|
||||
public static RpcRequest FromJson(string json) => JsonSerializer.Deserialize<RpcRequest>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class RpcResponse
|
||||
{
|
||||
public static RpcResponse FromJson(string json) => JsonSerializer.Deserialize<RpcResponse>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class GetAccessibilityTreeDetailsParams
|
||||
{
|
||||
public static GetAccessibilityTreeDetailsParams FromJson(string json) => JsonSerializer.Deserialize<GetAccessibilityTreeDetailsParams>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class GetAccessibilityTreeDetailsResult
|
||||
{
|
||||
public static GetAccessibilityTreeDetailsResult FromJson(string json) => JsonSerializer.Deserialize<GetAccessibilityTreeDetailsResult>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class GetAccessibilityContextParams
|
||||
{
|
||||
public static GetAccessibilityContextParams FromJson(string json) => JsonSerializer.Deserialize<GetAccessibilityContextParams>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class GetAccessibilityContextResult
|
||||
{
|
||||
public static GetAccessibilityContextResult FromJson(string json) => JsonSerializer.Deserialize<GetAccessibilityContextResult>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class PasteTextParams
|
||||
{
|
||||
public static PasteTextParams FromJson(string json) => JsonSerializer.Deserialize<PasteTextParams>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class PasteTextResult
|
||||
{
|
||||
public static PasteTextResult FromJson(string json) => JsonSerializer.Deserialize<PasteTextResult>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public class MuteSystemAudioParams
|
||||
{
|
||||
public static object FromJson(string json) => JsonSerializer.Deserialize<object>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class MuteSystemAudioResult
|
||||
{
|
||||
public static MuteSystemAudioResult FromJson(string json) => JsonSerializer.Deserialize<MuteSystemAudioResult>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public class RestoreSystemAudioParams
|
||||
{
|
||||
public static object FromJson(string json) => JsonSerializer.Deserialize<object>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class RestoreSystemAudioResult
|
||||
{
|
||||
public static RestoreSystemAudioResult FromJson(string json) => JsonSerializer.Deserialize<RestoreSystemAudioResult>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class KeyDownEvent
|
||||
{
|
||||
public static KeyDownEvent FromJson(string json) => JsonSerializer.Deserialize<KeyDownEvent>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class KeyUpEvent
|
||||
{
|
||||
public static KeyUpEvent FromJson(string json) => JsonSerializer.Deserialize<KeyUpEvent>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class FlagsChangedEvent
|
||||
{
|
||||
public static FlagsChangedEvent FromJson(string json) => JsonSerializer.Deserialize<FlagsChangedEvent>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public partial class HelperEvent
|
||||
{
|
||||
public static HelperEvent FromJson(string json) => JsonSerializer.Deserialize<HelperEvent>(json, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
public static class Serialize
|
||||
{
|
||||
public static string ToJson(this RpcRequest self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this RpcResponse self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this GetAccessibilityTreeDetailsParams self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this GetAccessibilityTreeDetailsResult self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this GetAccessibilityContextParams self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this GetAccessibilityContextResult self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this PasteTextParams self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this PasteTextResult self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this object self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this MuteSystemAudioResult self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this RestoreSystemAudioResult self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this KeyDownEvent self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this KeyUpEvent self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this FlagsChangedEvent self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
public static string ToJson(this HelperEvent self) => JsonSerializer.Serialize(self, WindowsHelper.Models.Converter.Settings);
|
||||
}
|
||||
|
||||
internal static class Converter
|
||||
{
|
||||
public static readonly JsonSerializerOptions Settings = new(JsonSerializerDefaults.General)
|
||||
{
|
||||
Converters =
|
||||
{
|
||||
MethodConverter.Singleton,
|
||||
KeyDownEventTypeConverter.Singleton,
|
||||
KeyUpEventTypeConverter.Singleton,
|
||||
FlagsChangedEventTypeConverter.Singleton,
|
||||
HelperEventTypeConverter.Singleton,
|
||||
new DateOnlyConverter(),
|
||||
new TimeOnlyConverter(),
|
||||
IsoDateTimeOffsetConverter.Singleton
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
internal class MethodConverter : JsonConverter<Method>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(Method);
|
||||
|
||||
public override Method Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
switch (value)
|
||||
{
|
||||
case "getAccessibilityContext":
|
||||
return Method.GetAccessibilityContext;
|
||||
case "getAccessibilityTreeDetails":
|
||||
return Method.GetAccessibilityTreeDetails;
|
||||
case "muteSystemAudio":
|
||||
return Method.MuteSystemAudio;
|
||||
case "pasteText":
|
||||
return Method.PasteText;
|
||||
case "restoreSystemAudio":
|
||||
return Method.RestoreSystemAudio;
|
||||
}
|
||||
throw new Exception("Cannot unmarshal type Method");
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, Method value, JsonSerializerOptions options)
|
||||
{
|
||||
switch (value)
|
||||
{
|
||||
case Method.GetAccessibilityContext:
|
||||
JsonSerializer.Serialize(writer, "getAccessibilityContext", options);
|
||||
return;
|
||||
case Method.GetAccessibilityTreeDetails:
|
||||
JsonSerializer.Serialize(writer, "getAccessibilityTreeDetails", options);
|
||||
return;
|
||||
case Method.MuteSystemAudio:
|
||||
JsonSerializer.Serialize(writer, "muteSystemAudio", options);
|
||||
return;
|
||||
case Method.PasteText:
|
||||
JsonSerializer.Serialize(writer, "pasteText", options);
|
||||
return;
|
||||
case Method.RestoreSystemAudio:
|
||||
JsonSerializer.Serialize(writer, "restoreSystemAudio", options);
|
||||
return;
|
||||
}
|
||||
throw new Exception("Cannot marshal type Method");
|
||||
}
|
||||
|
||||
public static readonly MethodConverter Singleton = new MethodConverter();
|
||||
}
|
||||
|
||||
internal class KeyDownEventTypeConverter : JsonConverter<KeyDownEventType>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(KeyDownEventType);
|
||||
|
||||
public override KeyDownEventType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
if (value == "keyDown")
|
||||
{
|
||||
return KeyDownEventType.KeyDown;
|
||||
}
|
||||
throw new Exception("Cannot unmarshal type KeyDownEventType");
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, KeyDownEventType value, JsonSerializerOptions options)
|
||||
{
|
||||
if (value == KeyDownEventType.KeyDown)
|
||||
{
|
||||
JsonSerializer.Serialize(writer, "keyDown", options);
|
||||
return;
|
||||
}
|
||||
throw new Exception("Cannot marshal type KeyDownEventType");
|
||||
}
|
||||
|
||||
public static readonly KeyDownEventTypeConverter Singleton = new KeyDownEventTypeConverter();
|
||||
}
|
||||
|
||||
internal class KeyUpEventTypeConverter : JsonConverter<KeyUpEventType>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(KeyUpEventType);
|
||||
|
||||
public override KeyUpEventType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
if (value == "keyUp")
|
||||
{
|
||||
return KeyUpEventType.KeyUp;
|
||||
}
|
||||
throw new Exception("Cannot unmarshal type KeyUpEventType");
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, KeyUpEventType value, JsonSerializerOptions options)
|
||||
{
|
||||
if (value == KeyUpEventType.KeyUp)
|
||||
{
|
||||
JsonSerializer.Serialize(writer, "keyUp", options);
|
||||
return;
|
||||
}
|
||||
throw new Exception("Cannot marshal type KeyUpEventType");
|
||||
}
|
||||
|
||||
public static readonly KeyUpEventTypeConverter Singleton = new KeyUpEventTypeConverter();
|
||||
}
|
||||
|
||||
internal class FlagsChangedEventTypeConverter : JsonConverter<FlagsChangedEventType>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(FlagsChangedEventType);
|
||||
|
||||
public override FlagsChangedEventType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
if (value == "flagsChanged")
|
||||
{
|
||||
return FlagsChangedEventType.FlagsChanged;
|
||||
}
|
||||
throw new Exception("Cannot unmarshal type FlagsChangedEventType");
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, FlagsChangedEventType value, JsonSerializerOptions options)
|
||||
{
|
||||
if (value == FlagsChangedEventType.FlagsChanged)
|
||||
{
|
||||
JsonSerializer.Serialize(writer, "flagsChanged", options);
|
||||
return;
|
||||
}
|
||||
throw new Exception("Cannot marshal type FlagsChangedEventType");
|
||||
}
|
||||
|
||||
public static readonly FlagsChangedEventTypeConverter Singleton = new FlagsChangedEventTypeConverter();
|
||||
}
|
||||
|
||||
internal class HelperEventTypeConverter : JsonConverter<HelperEventType>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(HelperEventType);
|
||||
|
||||
public override HelperEventType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
switch (value)
|
||||
{
|
||||
case "flagsChanged":
|
||||
return HelperEventType.FlagsChanged;
|
||||
case "keyDown":
|
||||
return HelperEventType.KeyDown;
|
||||
case "keyUp":
|
||||
return HelperEventType.KeyUp;
|
||||
}
|
||||
throw new Exception("Cannot unmarshal type HelperEventType");
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, HelperEventType value, JsonSerializerOptions options)
|
||||
{
|
||||
switch (value)
|
||||
{
|
||||
case HelperEventType.FlagsChanged:
|
||||
JsonSerializer.Serialize(writer, "flagsChanged", options);
|
||||
return;
|
||||
case HelperEventType.KeyDown:
|
||||
JsonSerializer.Serialize(writer, "keyDown", options);
|
||||
return;
|
||||
case HelperEventType.KeyUp:
|
||||
JsonSerializer.Serialize(writer, "keyUp", options);
|
||||
return;
|
||||
}
|
||||
throw new Exception("Cannot marshal type HelperEventType");
|
||||
}
|
||||
|
||||
public static readonly HelperEventTypeConverter Singleton = new HelperEventTypeConverter();
|
||||
}
|
||||
|
||||
public class DateOnlyConverter : JsonConverter<DateOnly>
|
||||
{
|
||||
private readonly string serializationFormat;
|
||||
public DateOnlyConverter() : this(null) { }
|
||||
|
||||
public DateOnlyConverter(string? serializationFormat)
|
||||
{
|
||||
this.serializationFormat = serializationFormat ?? "yyyy-MM-dd";
|
||||
}
|
||||
|
||||
public override DateOnly Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return DateOnly.Parse(value!);
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, DateOnly value, JsonSerializerOptions options)
|
||||
=> writer.WriteStringValue(value.ToString(serializationFormat));
|
||||
}
|
||||
|
||||
public class TimeOnlyConverter : JsonConverter<TimeOnly>
|
||||
{
|
||||
private readonly string serializationFormat;
|
||||
|
||||
public TimeOnlyConverter() : this(null) { }
|
||||
|
||||
public TimeOnlyConverter(string? serializationFormat)
|
||||
{
|
||||
this.serializationFormat = serializationFormat ?? "HH:mm:ss.fff";
|
||||
}
|
||||
|
||||
public override TimeOnly Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return TimeOnly.Parse(value!);
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, TimeOnly value, JsonSerializerOptions options)
|
||||
=> writer.WriteStringValue(value.ToString(serializationFormat));
|
||||
}
|
||||
|
||||
internal class IsoDateTimeOffsetConverter : JsonConverter<DateTimeOffset>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(DateTimeOffset);
|
||||
|
||||
private const string DefaultDateTimeFormat = "yyyy'-'MM'-'dd'T'HH':'mm':'ss.FFFFFFFK";
|
||||
|
||||
private DateTimeStyles _dateTimeStyles = DateTimeStyles.RoundtripKind;
|
||||
private string? _dateTimeFormat;
|
||||
private CultureInfo? _culture;
|
||||
|
||||
public DateTimeStyles DateTimeStyles
|
||||
{
|
||||
get => _dateTimeStyles;
|
||||
set => _dateTimeStyles = value;
|
||||
}
|
||||
|
||||
public string? DateTimeFormat
|
||||
{
|
||||
get => _dateTimeFormat ?? string.Empty;
|
||||
set => _dateTimeFormat = (string.IsNullOrEmpty(value)) ? null : value;
|
||||
}
|
||||
|
||||
public CultureInfo Culture
|
||||
{
|
||||
get => _culture ?? CultureInfo.CurrentCulture;
|
||||
set => _culture = value;
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, DateTimeOffset value, JsonSerializerOptions options)
|
||||
{
|
||||
string text;
|
||||
|
||||
|
||||
if ((_dateTimeStyles & DateTimeStyles.AdjustToUniversal) == DateTimeStyles.AdjustToUniversal
|
||||
|| (_dateTimeStyles & DateTimeStyles.AssumeUniversal) == DateTimeStyles.AssumeUniversal)
|
||||
{
|
||||
value = value.ToUniversalTime();
|
||||
}
|
||||
|
||||
text = value.ToString(_dateTimeFormat ?? DefaultDateTimeFormat, Culture);
|
||||
|
||||
writer.WriteStringValue(text);
|
||||
}
|
||||
|
||||
public override DateTimeOffset Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
string? dateText = reader.GetString();
|
||||
|
||||
if (string.IsNullOrEmpty(dateText) == false)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(_dateTimeFormat))
|
||||
{
|
||||
return DateTimeOffset.ParseExact(dateText, _dateTimeFormat, Culture, _dateTimeStyles);
|
||||
}
|
||||
else
|
||||
{
|
||||
return DateTimeOffset.Parse(dateText, Culture, _dateTimeStyles);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return default(DateTimeOffset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static readonly IsoDateTimeOffsetConverter Singleton = new IsoDateTimeOffsetConverter();
|
||||
}
|
||||
}
|
||||
#pragma warning restore CS8618
|
||||
#pragma warning restore CS8601
|
||||
#pragma warning restore CS8603
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace WindowsHelper.Models
|
||||
{
|
||||
// These models are platform-specific and intentionally not in the TypeScript schemas
|
||||
// They are internal implementation details that get serialized to generic JSON
|
||||
// Each platform (Windows/macOS) can structure these differently based on their needs
|
||||
|
||||
public class AccessibilityElementNode
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public string? Id { get; set; }
|
||||
|
||||
[JsonPropertyName("role")]
|
||||
public string? Role { get; set; }
|
||||
|
||||
[JsonPropertyName("name")]
|
||||
public string? Name { get; set; }
|
||||
|
||||
[JsonPropertyName("value")]
|
||||
public string? Value { get; set; }
|
||||
|
||||
[JsonPropertyName("description")]
|
||||
public string? Description { get; set; }
|
||||
|
||||
[JsonPropertyName("isEditable")]
|
||||
public bool IsEditable { get; set; }
|
||||
|
||||
[JsonPropertyName("children")]
|
||||
public List<AccessibilityElementNode>? Children { get; set; }
|
||||
}
|
||||
|
||||
// Alias for the generated Context class to match existing code
|
||||
public class AccessibilityContext : Context
|
||||
{
|
||||
// Additional properties that might be missing from generated model
|
||||
[JsonPropertyName("applicationName")]
|
||||
public string? ApplicationName { get; set; }
|
||||
|
||||
[JsonPropertyName("windowTitle")]
|
||||
public string? WindowTitle { get; set; }
|
||||
|
||||
[JsonPropertyName("focusedElementRole")]
|
||||
public string? FocusedElementRole { get; set; }
|
||||
|
||||
[JsonPropertyName("isEditable")]
|
||||
public bool IsEditable { get; set; }
|
||||
|
||||
[JsonPropertyName("isWebContent")]
|
||||
public bool IsWebContent { get; set; }
|
||||
}
|
||||
}
|
||||
97
packages/native-helpers/windows-helper/src/Program.cs
Normal file
97
packages/native-helpers/windows-helper/src/Program.cs
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using WindowsHelper.Models;
|
||||
|
||||
namespace WindowsHelper
|
||||
{
|
||||
class Program
|
||||
{
|
||||
static ShortcutMonitor? shortcutMonitor;
|
||||
static RpcHandler? rpcHandler;
|
||||
static readonly CancellationTokenSource cancellationTokenSource = new();
|
||||
|
||||
static async Task Main(string[] args)
|
||||
{
|
||||
// Set up console encoding for proper JSON communication
|
||||
Console.InputEncoding = System.Text.Encoding.UTF8;
|
||||
Console.OutputEncoding = System.Text.Encoding.UTF8;
|
||||
|
||||
// Log startup
|
||||
LogToStderr("WindowsHelper starting...");
|
||||
|
||||
try
|
||||
{
|
||||
// Initialize components
|
||||
shortcutMonitor = new ShortcutMonitor();
|
||||
rpcHandler = new RpcHandler();
|
||||
|
||||
// Set up event handlers
|
||||
shortcutMonitor.KeyEventOccurred += OnKeyEvent;
|
||||
|
||||
// Start RPC processing in background task
|
||||
var rpcTask = Task.Run(() =>
|
||||
{
|
||||
LogToStderr("Starting RPC processing in background thread...");
|
||||
rpcHandler.ProcessRpcRequests(cancellationTokenSource.Token);
|
||||
}, cancellationTokenSource.Token);
|
||||
|
||||
// Start shortcut monitoring (this will run the Windows message loop)
|
||||
LogToStderr("Starting shortcut monitoring in main thread...");
|
||||
shortcutMonitor.Start();
|
||||
|
||||
// Wait for cancellation
|
||||
await Task.Delay(Timeout.Infinite, cancellationTokenSource.Token);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
LogToStderr("WindowsHelper shutting down...");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Fatal error: {ex.Message}");
|
||||
Environment.Exit(1);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Cleanup
|
||||
shortcutMonitor?.Stop();
|
||||
cancellationTokenSource.Cancel();
|
||||
LogToStderr("WindowsHelper stopped.");
|
||||
}
|
||||
}
|
||||
|
||||
private static void OnKeyEvent(object? sender, HelperEvent e)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Serialize and send the event to stdout using generated serializer
|
||||
var json = e.ToJson();
|
||||
Console.WriteLine(json);
|
||||
Console.Out.Flush();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error sending key event: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static void LogToStderr(string message)
|
||||
{
|
||||
var timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff");
|
||||
Console.Error.WriteLine($"[{timestamp}] {message}");
|
||||
Console.Error.Flush();
|
||||
}
|
||||
|
||||
// Handle Ctrl+C gracefully
|
||||
static Program()
|
||||
{
|
||||
Console.CancelKeyPress += (sender, e) =>
|
||||
{
|
||||
e.Cancel = true;
|
||||
cancellationTokenSource.Cancel();
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
333
packages/native-helpers/windows-helper/src/RpcHandler.cs
Normal file
333
packages/native-helpers/windows-helper/src/RpcHandler.cs
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
using System;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using WindowsHelper.Models;
|
||||
using WindowsHelper.Services;
|
||||
|
||||
namespace WindowsHelper
|
||||
{
|
||||
public class RpcHandler
|
||||
{
|
||||
private readonly JsonSerializerOptions jsonOptions;
|
||||
private readonly AccessibilityService accessibilityService;
|
||||
private readonly AudioService audioService;
|
||||
private Action<string>? audioCompletionHandler;
|
||||
|
||||
public RpcHandler()
|
||||
{
|
||||
// Use the generated converter settings from the models
|
||||
jsonOptions = WindowsHelper.Models.Converter.Settings;
|
||||
|
||||
accessibilityService = new AccessibilityService();
|
||||
audioService = new AudioService();
|
||||
audioService.SoundPlaybackCompleted += OnSoundPlaybackCompleted;
|
||||
}
|
||||
|
||||
public void ProcessRpcRequests(CancellationToken cancellationToken)
|
||||
{
|
||||
LogToStderr("RpcHandler: Starting RPC request processing loop.");
|
||||
|
||||
try
|
||||
{
|
||||
string? line;
|
||||
while (!cancellationToken.IsCancellationRequested && (line = Console.ReadLine()) != null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
{
|
||||
LogToStderr("Warning: Received empty line on stdin.");
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var request = JsonSerializer.Deserialize<RpcRequest>(line, jsonOptions);
|
||||
if (request != null)
|
||||
{
|
||||
LogToStderr($"RpcHandler: Received RPC Request ID {request.Id}, Method: {request.Method}");
|
||||
_ = Task.Run(() => HandleRpcRequest(request), cancellationToken);
|
||||
}
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
LogToStderr($"Error decoding RpcRequest from stdin: {ex.Message}. Line: {line}");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Fatal error in RPC processing: {ex.Message}");
|
||||
}
|
||||
|
||||
LogToStderr("RpcHandler: RPC request processing loop finished.");
|
||||
}
|
||||
|
||||
private async void HandleRpcRequest(RpcRequest request)
|
||||
{
|
||||
RpcResponse response;
|
||||
|
||||
try
|
||||
{
|
||||
switch (request.Method)
|
||||
{
|
||||
case Method.GetAccessibilityTreeDetails:
|
||||
response = await HandleGetAccessibilityTreeDetails(request);
|
||||
break;
|
||||
|
||||
case Method.GetAccessibilityContext:
|
||||
response = await HandleGetAccessibilityContext(request);
|
||||
break;
|
||||
|
||||
case Method.PasteText:
|
||||
response = HandlePasteText(request);
|
||||
break;
|
||||
|
||||
case Method.MuteSystemAudio:
|
||||
response = await HandleMuteSystemAudio(request);
|
||||
return; // Response sent after audio playback
|
||||
|
||||
case Method.RestoreSystemAudio:
|
||||
response = HandleRestoreSystemAudio(request);
|
||||
break;
|
||||
|
||||
default:
|
||||
LogToStderr($"Method not found: {request.Method} for ID: {request.Id}");
|
||||
response = new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Error = new Error
|
||||
{
|
||||
Code = -32601,
|
||||
Message = $"Method not found: {request.Method}"
|
||||
}
|
||||
};
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error handling request {request.Id}: {ex.Message}");
|
||||
response = new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Error = new Error
|
||||
{
|
||||
Code = -32603,
|
||||
Message = $"Internal error: {ex.Message}"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
SendRpcResponse(response);
|
||||
}
|
||||
|
||||
private async Task<RpcResponse> HandleGetAccessibilityTreeDetails(RpcRequest request)
|
||||
{
|
||||
LogToStderr($"Handling getAccessibilityTreeDetails for ID: {request.Id}");
|
||||
|
||||
GetAccessibilityTreeDetailsParams? parameters = null;
|
||||
if (request.Params != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
var json = JsonSerializer.Serialize(request.Params, jsonOptions);
|
||||
parameters = JsonSerializer.Deserialize<GetAccessibilityTreeDetailsParams>(json, jsonOptions);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error decoding params: {ex.Message}");
|
||||
return new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Error = new Error
|
||||
{
|
||||
Code = -32602,
|
||||
Message = $"Invalid params: {ex.Message}",
|
||||
Data = request.Params
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Get accessibility tree on UI thread
|
||||
var tree = await Task.Run(() => accessibilityService.FetchAccessibilityTree(parameters?.RootId));
|
||||
|
||||
return new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Result = new GetAccessibilityTreeDetailsResult { Tree = tree }
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<RpcResponse> HandleGetAccessibilityContext(RpcRequest request)
|
||||
{
|
||||
LogToStderr($"Handling getAccessibilityContext for ID: {request.Id}");
|
||||
|
||||
GetAccessibilityContextParams? parameters = null;
|
||||
if (request.Params != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
var json = JsonSerializer.Serialize(request.Params, jsonOptions);
|
||||
parameters = JsonSerializer.Deserialize<GetAccessibilityContextParams>(json, jsonOptions);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error decoding params: {ex.Message}");
|
||||
return new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Error = new Error
|
||||
{
|
||||
Code = -32602,
|
||||
Message = $"Invalid params: {ex.Message}",
|
||||
Data = request.Params
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
var editableOnly = parameters?.EditableOnly ?? false;
|
||||
var context = await Task.Run(() => accessibilityService.GetAccessibilityContext(editableOnly));
|
||||
|
||||
return new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Result = new GetAccessibilityContextResult { Context = context }
|
||||
};
|
||||
}
|
||||
|
||||
private RpcResponse HandlePasteText(RpcRequest request)
|
||||
{
|
||||
LogToStderr($"Handling pasteText for ID: {request.Id}");
|
||||
|
||||
if (request.Params == null)
|
||||
{
|
||||
return new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Error = new Error
|
||||
{
|
||||
Code = -32602,
|
||||
Message = "Missing params for pasteText"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var json = JsonSerializer.Serialize(request.Params, jsonOptions);
|
||||
var parameters = JsonSerializer.Deserialize<PasteTextParams>(json, jsonOptions);
|
||||
|
||||
if (parameters != null)
|
||||
{
|
||||
var success = accessibilityService.PasteText(parameters.Transcript);
|
||||
return new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Result = new PasteTextResult
|
||||
{
|
||||
Success = success,
|
||||
Message = success ? "Pasted successfully" : "Paste failed"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error processing pasteText: {ex.Message}");
|
||||
}
|
||||
|
||||
return new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Error = new Error
|
||||
{
|
||||
Code = -32603,
|
||||
Message = "Error during paste operation"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<RpcResponse> HandleMuteSystemAudio(RpcRequest request)
|
||||
{
|
||||
LogToStderr($"Handling muteSystemAudio for ID: {request.Id}");
|
||||
|
||||
// Store the request ID for the completion handler
|
||||
var requestId = request.Id.ToString();
|
||||
|
||||
audioCompletionHandler = (id) =>
|
||||
{
|
||||
LogToStderr($"rec-start.mp3 finished playing. Proceeding to mute system audio. ID: {id}");
|
||||
var success = audioService.MuteSystemAudio();
|
||||
var response = new RpcResponse
|
||||
{
|
||||
Id = id,
|
||||
Result = new MuteSystemAudioResult
|
||||
{
|
||||
Success = success,
|
||||
Message = success ? "Mute command sent" : "Failed to send mute command"
|
||||
}
|
||||
};
|
||||
SendRpcResponse(response);
|
||||
audioCompletionHandler = null;
|
||||
};
|
||||
|
||||
// Play sound and wait for completion
|
||||
await audioService.PlaySound("rec-start", requestId);
|
||||
|
||||
// Return dummy response (real response sent after audio completion)
|
||||
return new RpcResponse { Id = request.Id.ToString() };
|
||||
}
|
||||
|
||||
private RpcResponse HandleRestoreSystemAudio(RpcRequest request)
|
||||
{
|
||||
LogToStderr($"Handling restoreSystemAudio for ID: {request.Id}");
|
||||
|
||||
var success = audioService.RestoreSystemAudio();
|
||||
if (success)
|
||||
{
|
||||
// Play sound asynchronously (don't wait)
|
||||
_ = audioService.PlaySound("rec-stop", request.Id.ToString());
|
||||
}
|
||||
|
||||
return new RpcResponse
|
||||
{
|
||||
Id = request.Id.ToString(),
|
||||
Result = new RestoreSystemAudioResult
|
||||
{
|
||||
Success = success,
|
||||
Message = success ? "Restore command sent" : "Failed to send restore command"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private void OnSoundPlaybackCompleted(object? sender, string requestId)
|
||||
{
|
||||
audioCompletionHandler?.Invoke(requestId);
|
||||
}
|
||||
|
||||
private void SendRpcResponse(RpcResponse response)
|
||||
{
|
||||
try
|
||||
{
|
||||
var json = JsonSerializer.Serialize(response, jsonOptions);
|
||||
LogToStderr($"[RpcHandler] Sending response to stdout: {json}");
|
||||
Console.WriteLine(json);
|
||||
Console.Out.Flush();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error encoding RpcResponse: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private void LogToStderr(string message)
|
||||
{
|
||||
var timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff");
|
||||
Console.Error.WriteLine($"[{timestamp}] {message}");
|
||||
Console.Error.Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Windows.Forms;
|
||||
using WindowsHelper.Models;
|
||||
|
||||
namespace WindowsHelper.Services
|
||||
{
|
||||
public class AccessibilityService
|
||||
{
|
||||
#region Windows API
|
||||
[DllImport("user32.dll")]
|
||||
private static extern bool SetForegroundWindow(IntPtr hWnd);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern IntPtr GetForegroundWindow();
|
||||
|
||||
[DllImport("user32.dll", CharSet = CharSet.Auto)]
|
||||
private static extern int GetWindowText(IntPtr hWnd, StringBuilder lpString, int nMaxCount);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern int GetWindowTextLength(IntPtr hWnd);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint processId);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern IntPtr GetFocus();
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern bool keybd_event(byte bVk, byte bScan, uint dwFlags, UIntPtr dwExtraInfo);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern void Sleep(int dwMilliseconds);
|
||||
|
||||
private const byte VK_CONTROL = 0x11;
|
||||
private const byte VK_V = 0x56;
|
||||
private const uint KEYEVENTF_KEYUP = 0x0002;
|
||||
#endregion
|
||||
|
||||
private readonly UIAutomationService uiAutomationService;
|
||||
|
||||
public AccessibilityService()
|
||||
{
|
||||
uiAutomationService = new UIAutomationService();
|
||||
}
|
||||
|
||||
public AccessibilityElementNode? FetchAccessibilityTree(string? rootId)
|
||||
{
|
||||
// Delegate to UI Automation service for real implementation
|
||||
return uiAutomationService.FetchAccessibilityTree(rootId);
|
||||
}
|
||||
|
||||
public AccessibilityContext GetAccessibilityContext(bool editableOnly)
|
||||
{
|
||||
// Delegate to UI Automation service for real implementation
|
||||
return uiAutomationService.GetAccessibilityContext(editableOnly);
|
||||
}
|
||||
|
||||
public bool PasteText(string text)
|
||||
{
|
||||
try
|
||||
{
|
||||
LogToStderr($"PasteText called with text length: {text.Length}");
|
||||
|
||||
// Set clipboard content
|
||||
Thread thread = new Thread(() =>
|
||||
{
|
||||
try
|
||||
{
|
||||
Clipboard.SetText(text);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error setting clipboard: {ex.Message}");
|
||||
}
|
||||
});
|
||||
thread.SetApartmentState(ApartmentState.STA);
|
||||
thread.Start();
|
||||
thread.Join();
|
||||
|
||||
// Small delay to ensure clipboard is set
|
||||
Thread.Sleep(100);
|
||||
|
||||
// Simulate Ctrl+V
|
||||
// Press Ctrl
|
||||
keybd_event(VK_CONTROL, 0, 0, UIntPtr.Zero);
|
||||
Thread.Sleep(50);
|
||||
|
||||
// Press V
|
||||
keybd_event(VK_V, 0, 0, UIntPtr.Zero);
|
||||
Thread.Sleep(50);
|
||||
|
||||
// Release V
|
||||
keybd_event(VK_V, 0, KEYEVENTF_KEYUP, UIntPtr.Zero);
|
||||
Thread.Sleep(50);
|
||||
|
||||
// Release Ctrl
|
||||
keybd_event(VK_CONTROL, 0, KEYEVENTF_KEYUP, UIntPtr.Zero);
|
||||
|
||||
LogToStderr("Paste command sent successfully");
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error in PasteText: {ex.Message}");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private string GetWindowTitle(IntPtr hwnd)
|
||||
{
|
||||
int length = GetWindowTextLength(hwnd);
|
||||
if (length == 0) return string.Empty;
|
||||
|
||||
StringBuilder sb = new StringBuilder(length + 1);
|
||||
GetWindowText(hwnd, sb, sb.Capacity);
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
private void LogToStderr(string message)
|
||||
{
|
||||
var timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff");
|
||||
Console.Error.WriteLine($"[{timestamp}] [AccessibilityService] {message}");
|
||||
Console.Error.Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
using System;
|
||||
using System.IO;
|
||||
using System.Reflection;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading.Tasks;
|
||||
using NAudio.Wave;
|
||||
using NAudio.CoreAudioApi;
|
||||
|
||||
namespace WindowsHelper.Services
|
||||
{
|
||||
public class AudioService
|
||||
{
|
||||
private WaveOutEvent? waveOut;
|
||||
private MMDeviceEnumerator? deviceEnumerator;
|
||||
private float originalVolume = 1.0f;
|
||||
private bool originalMuteState = false;
|
||||
|
||||
public event EventHandler<string>? SoundPlaybackCompleted;
|
||||
|
||||
public AudioService()
|
||||
{
|
||||
try
|
||||
{
|
||||
deviceEnumerator = new MMDeviceEnumerator();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Failed to initialize audio device enumerator: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
public async Task PlaySound(string soundName, string requestId)
|
||||
{
|
||||
try
|
||||
{
|
||||
LogToStderr($"PlaySound called with soundName: {soundName}");
|
||||
|
||||
// Stop any currently playing sound
|
||||
if (waveOut != null && waveOut.PlaybackState == PlaybackState.Playing)
|
||||
{
|
||||
waveOut.Stop();
|
||||
waveOut.Dispose();
|
||||
waveOut = null;
|
||||
}
|
||||
|
||||
// Get embedded resource
|
||||
var assembly = Assembly.GetExecutingAssembly();
|
||||
var resourceName = $"WindowsHelper.Resources.{soundName}.mp3";
|
||||
|
||||
using (var stream = assembly.GetManifestResourceStream(resourceName))
|
||||
{
|
||||
if (stream == null)
|
||||
{
|
||||
LogToStderr($"Resource not found: {resourceName}");
|
||||
return;
|
||||
}
|
||||
|
||||
// Create memory stream from embedded resource
|
||||
using (var memoryStream = new MemoryStream())
|
||||
{
|
||||
await stream.CopyToAsync(memoryStream);
|
||||
memoryStream.Position = 0;
|
||||
|
||||
// Create audio file reader
|
||||
using (var audioFile = new Mp3FileReader(memoryStream))
|
||||
{
|
||||
waveOut = new WaveOutEvent();
|
||||
waveOut.Init(audioFile);
|
||||
|
||||
// Set up completion handler
|
||||
var completionSource = new TaskCompletionSource<bool>();
|
||||
waveOut.PlaybackStopped += (sender, args) =>
|
||||
{
|
||||
LogToStderr($"Sound playback finished for {soundName}");
|
||||
completionSource.TrySetResult(true);
|
||||
SoundPlaybackCompleted?.Invoke(this, requestId);
|
||||
};
|
||||
|
||||
// Start playback
|
||||
waveOut.Play();
|
||||
LogToStderr($"Playing embedded sound: {soundName}.mp3");
|
||||
|
||||
// Wait for completion
|
||||
await completionSource.Task;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error playing sound {soundName}: {ex.Message}");
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (waveOut != null)
|
||||
{
|
||||
waveOut.Dispose();
|
||||
waveOut = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public bool MuteSystemAudio()
|
||||
{
|
||||
try
|
||||
{
|
||||
LogToStderr("MuteSystemAudio called");
|
||||
|
||||
if (deviceEnumerator == null) return false;
|
||||
|
||||
var device = deviceEnumerator.GetDefaultAudioEndpoint(DataFlow.Render, Role.Multimedia);
|
||||
if (device != null)
|
||||
{
|
||||
// Store original state
|
||||
originalVolume = device.AudioEndpointVolume.MasterVolumeLevelScalar;
|
||||
originalMuteState = device.AudioEndpointVolume.Mute;
|
||||
|
||||
// Mute the audio
|
||||
device.AudioEndpointVolume.Mute = true;
|
||||
|
||||
LogToStderr($"System audio muted. Original volume: {originalVolume}, Original mute: {originalMuteState}");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error muting system audio: {ex.Message}");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public bool RestoreSystemAudio()
|
||||
{
|
||||
try
|
||||
{
|
||||
LogToStderr("RestoreSystemAudio called");
|
||||
|
||||
if (deviceEnumerator == null) return false;
|
||||
|
||||
var device = deviceEnumerator.GetDefaultAudioEndpoint(DataFlow.Render, Role.Multimedia);
|
||||
if (device != null)
|
||||
{
|
||||
// Restore original state
|
||||
device.AudioEndpointVolume.Mute = originalMuteState;
|
||||
device.AudioEndpointVolume.MasterVolumeLevelScalar = originalVolume;
|
||||
|
||||
LogToStderr($"System audio restored. Volume: {originalVolume}, Mute: {originalMuteState}");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error restoring system audio: {ex.Message}");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void LogToStderr(string message)
|
||||
{
|
||||
var timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff");
|
||||
Console.Error.WriteLine($"[{timestamp}] [AudioService] {message}");
|
||||
Console.Error.Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,323 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using System.Windows.Automation;
|
||||
using System.Windows.Forms;
|
||||
using WindowsHelper.Models;
|
||||
|
||||
namespace WindowsHelper.Services
|
||||
{
|
||||
/// <summary>
|
||||
/// Implements accessibility functionality using Windows UI Automation API
|
||||
/// </summary>
|
||||
public class UIAutomationService
|
||||
{
|
||||
private readonly int maxDepth = 10;
|
||||
|
||||
public AccessibilityElementNode? FetchAccessibilityTree(string? rootId)
|
||||
{
|
||||
try
|
||||
{
|
||||
LogToStderr("FetchAccessibilityTree called with UI Automation");
|
||||
|
||||
AutomationElement rootElement;
|
||||
|
||||
if (!string.IsNullOrEmpty(rootId))
|
||||
{
|
||||
// Try to find element by automation ID
|
||||
var condition = new PropertyCondition(AutomationElement.AutomationIdProperty, rootId);
|
||||
rootElement = AutomationElement.RootElement.FindFirst(TreeScope.Descendants, condition);
|
||||
|
||||
if (rootElement == null)
|
||||
{
|
||||
LogToStderr($"Could not find element with ID: {rootId}");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Get the focused element as root
|
||||
rootElement = AutomationElement.FocusedElement;
|
||||
if (rootElement == null)
|
||||
{
|
||||
LogToStderr("No focused element found");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return BuildAccessibilityTree(rootElement, 0);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error fetching accessibility tree: {ex.Message}");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private AccessibilityElementNode? BuildAccessibilityTree(AutomationElement element, int depth)
|
||||
{
|
||||
if (element == null || depth > maxDepth)
|
||||
return null;
|
||||
|
||||
try
|
||||
{
|
||||
var node = new AccessibilityElementNode
|
||||
{
|
||||
Id = element.Current.AutomationId,
|
||||
Role = element.Current.ControlType.ProgrammaticName,
|
||||
Name = element.Current.Name,
|
||||
Value = GetElementValue(element),
|
||||
Description = element.Current.HelpText,
|
||||
IsEditable = IsElementEditable(element),
|
||||
Children = new List<AccessibilityElementNode>()
|
||||
};
|
||||
|
||||
// Get children
|
||||
var children = element.FindAll(TreeScope.Children, Condition.TrueCondition);
|
||||
foreach (AutomationElement child in children)
|
||||
{
|
||||
var childNode = BuildAccessibilityTree(child, depth + 1);
|
||||
if (childNode != null)
|
||||
{
|
||||
node.Children.Add(childNode);
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
catch (ElementNotAvailableException)
|
||||
{
|
||||
// Element became unavailable during traversal
|
||||
return null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error building tree node: {ex.Message}");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public AccessibilityContext GetAccessibilityContext(bool editableOnly)
|
||||
{
|
||||
var context = new AccessibilityContext();
|
||||
|
||||
try
|
||||
{
|
||||
LogToStderr($"GetAccessibilityContext called with editableOnly: {editableOnly}");
|
||||
|
||||
// Get focused element
|
||||
var focusedElement = AutomationElement.FocusedElement;
|
||||
if (focusedElement != null)
|
||||
{
|
||||
// Populate focused element information
|
||||
context.FocusedElement = new FocusedElement
|
||||
{
|
||||
Role = focusedElement.Current.ControlType.ProgrammaticName,
|
||||
Title = focusedElement.Current.Name,
|
||||
Value = GetElementValue(focusedElement),
|
||||
Description = focusedElement.Current.HelpText,
|
||||
IsEditable = IsElementEditable(focusedElement)
|
||||
};
|
||||
|
||||
context.FocusedElementRole = focusedElement.Current.ControlType.ProgrammaticName;
|
||||
context.IsEditable = IsElementEditable(focusedElement);
|
||||
|
||||
// Get text selection if available
|
||||
if (focusedElement.TryGetCurrentPattern(TextPattern.Pattern, out object textPattern))
|
||||
{
|
||||
var tp = textPattern as TextPattern;
|
||||
if (tp != null)
|
||||
{
|
||||
var selection = tp.GetSelection();
|
||||
if (selection.Length > 0)
|
||||
{
|
||||
var range = selection[0];
|
||||
context.TextSelection = new TextSelection
|
||||
{
|
||||
SelectedText = range.GetText(-1),
|
||||
IsEditable = context.IsEditable
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get window information
|
||||
var window = GetWindowElement(focusedElement);
|
||||
if (window != null)
|
||||
{
|
||||
context.WindowInfo = new WindowInfo
|
||||
{
|
||||
Title = window.Current.Name
|
||||
};
|
||||
context.WindowTitle = window.Current.Name;
|
||||
|
||||
// Get application info
|
||||
try
|
||||
{
|
||||
var processId = window.Current.ProcessId;
|
||||
var process = System.Diagnostics.Process.GetProcessById(processId);
|
||||
|
||||
context.Application = new Models.Application
|
||||
{
|
||||
Name = process.ProcessName,
|
||||
BundleIdentifier = process.MainModule?.FileName ?? "",
|
||||
Version = process.MainModule?.FileVersionInfo.ProductVersion ?? ""
|
||||
};
|
||||
context.ApplicationName = process.ProcessName;
|
||||
|
||||
// Detect if it's a web browser
|
||||
var browserNames = new[] { "chrome", "firefox", "edge", "msedge", "brave", "opera" };
|
||||
context.IsWebContent = Array.Exists(browserNames,
|
||||
name => process.ProcessName.ToLower().Contains(name));
|
||||
|
||||
// For browsers, try to get URL
|
||||
if (context.IsWebContent)
|
||||
{
|
||||
var urlBar = FindUrlBar(window);
|
||||
if (urlBar != null)
|
||||
{
|
||||
context.WindowInfo.Url = GetElementValue(urlBar);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
context.ApplicationName = "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
context.Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds() / 1000.0;
|
||||
|
||||
LogToStderr($"Accessibility context retrieved: App={context.ApplicationName}, Window={context.WindowTitle}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error getting accessibility context: {ex.Message}");
|
||||
}
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
private AutomationElement? GetWindowElement(AutomationElement? element)
|
||||
{
|
||||
if (element == null) return null;
|
||||
|
||||
var current = element;
|
||||
while (current != null)
|
||||
{
|
||||
if (current.Current.ControlType == ControlType.Window)
|
||||
return current;
|
||||
|
||||
try
|
||||
{
|
||||
var parent = TreeWalker.ControlViewWalker.GetParent(current);
|
||||
if (parent == null || parent.Equals(AutomationElement.RootElement))
|
||||
break;
|
||||
current = parent;
|
||||
}
|
||||
catch
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private AutomationElement? FindUrlBar(AutomationElement window)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Common patterns for finding URL bars in browsers
|
||||
var conditions = new Condition[]
|
||||
{
|
||||
new PropertyCondition(AutomationElement.AutomationIdProperty, "addressEditBox"),
|
||||
new PropertyCondition(AutomationElement.AutomationIdProperty, "urlbar"),
|
||||
new PropertyCondition(AutomationElement.AutomationIdProperty, "omnibox"),
|
||||
new AndCondition(
|
||||
new PropertyCondition(AutomationElement.ControlTypeProperty, ControlType.Edit),
|
||||
new PropertyCondition(AutomationElement.IsKeyboardFocusableProperty, true)
|
||||
)
|
||||
};
|
||||
|
||||
foreach (var condition in conditions)
|
||||
{
|
||||
var element = window.FindFirst(TreeScope.Descendants, condition);
|
||||
if (element != null)
|
||||
return element;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore errors in URL detection
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private string? GetElementValue(AutomationElement element)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Try Value pattern first
|
||||
if (element.TryGetCurrentPattern(ValuePattern.Pattern, out object valuePattern))
|
||||
{
|
||||
return (valuePattern as ValuePattern)?.Current.Value;
|
||||
}
|
||||
|
||||
// Try Text pattern
|
||||
if (element.TryGetCurrentPattern(TextPattern.Pattern, out object textPattern))
|
||||
{
|
||||
return (textPattern as TextPattern)?.DocumentRange.GetText(-1);
|
||||
}
|
||||
|
||||
// Try RangeValue pattern
|
||||
if (element.TryGetCurrentPattern(RangeValuePattern.Pattern, out object rangePattern))
|
||||
{
|
||||
return (rangePattern as RangeValuePattern)?.Current.Value.ToString();
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore pattern errors
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private bool IsElementEditable(AutomationElement element)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Check if element supports Value pattern and is not read-only
|
||||
if (element.TryGetCurrentPattern(ValuePattern.Pattern, out object valuePattern))
|
||||
{
|
||||
var vp = valuePattern as ValuePattern;
|
||||
return vp != null && !vp.Current.IsReadOnly;
|
||||
}
|
||||
|
||||
// Check if it's an editable text control
|
||||
if (element.Current.ControlType == ControlType.Edit ||
|
||||
element.Current.ControlType == ControlType.Document)
|
||||
{
|
||||
return element.Current.IsEnabled;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore pattern errors
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void LogToStderr(string message)
|
||||
{
|
||||
var timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff");
|
||||
Console.Error.WriteLine($"[{timestamp}] [UIAutomationService] {message}");
|
||||
Console.Error.Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
245
packages/native-helpers/windows-helper/src/ShortcutMonitor.cs
Normal file
245
packages/native-helpers/windows-helper/src/ShortcutMonitor.cs
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading;
|
||||
using WindowsHelper.Models;
|
||||
|
||||
namespace WindowsHelper
|
||||
{
|
||||
public class ShortcutMonitor
|
||||
{
|
||||
#region Windows API
|
||||
private const int WH_KEYBOARD_LL = 13;
|
||||
private const int WM_KEYDOWN = 0x0100;
|
||||
private const int WM_KEYUP = 0x0101;
|
||||
private const int WM_SYSKEYDOWN = 0x0104;
|
||||
private const int WM_SYSKEYUP = 0x0105;
|
||||
|
||||
private delegate IntPtr LowLevelKeyboardProc(int nCode, IntPtr wParam, IntPtr lParam);
|
||||
|
||||
[DllImport("user32.dll", CharSet = CharSet.Auto, SetLastError = true)]
|
||||
private static extern IntPtr SetWindowsHookEx(int idHook, LowLevelKeyboardProc lpfn, IntPtr hMod, uint dwThreadId);
|
||||
|
||||
[DllImport("user32.dll", CharSet = CharSet.Auto, SetLastError = true)]
|
||||
private static extern bool UnhookWindowsHookEx(IntPtr hhk);
|
||||
|
||||
[DllImport("user32.dll", CharSet = CharSet.Auto, SetLastError = true)]
|
||||
private static extern IntPtr CallNextHookEx(IntPtr hhk, int nCode, IntPtr wParam, IntPtr lParam);
|
||||
|
||||
[DllImport("kernel32.dll", CharSet = CharSet.Auto, SetLastError = true)]
|
||||
private static extern IntPtr GetModuleHandle(string lpModuleName);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern short GetAsyncKeyState(int vKey);
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
private struct KBDLLHOOKSTRUCT
|
||||
{
|
||||
public uint vkCode;
|
||||
public uint scanCode;
|
||||
public uint flags;
|
||||
public uint time;
|
||||
public IntPtr dwExtraInfo;
|
||||
}
|
||||
|
||||
// Virtual key codes for modifier keys
|
||||
private const int VK_SHIFT = 0x10;
|
||||
private const int VK_CONTROL = 0x11;
|
||||
private const int VK_MENU = 0x12; // Alt key
|
||||
private const int VK_LWIN = 0x5B; // Left Windows key
|
||||
private const int VK_RWIN = 0x5C; // Right Windows key
|
||||
private const int VK_FUNCTION = 0xFF; // Fn key (not standard, varies by keyboard)
|
||||
#endregion
|
||||
|
||||
private IntPtr hookId = IntPtr.Zero;
|
||||
private LowLevelKeyboardProc? hookProc;
|
||||
private Thread? messageLoopThread;
|
||||
private bool isRunning = false;
|
||||
|
||||
public event EventHandler<HelperEvent>? KeyEventOccurred;
|
||||
|
||||
public void Start()
|
||||
{
|
||||
if (isRunning) return;
|
||||
|
||||
isRunning = true;
|
||||
messageLoopThread = new Thread(MessageLoop)
|
||||
{
|
||||
Name = "ShortcutHook",
|
||||
IsBackground = false
|
||||
};
|
||||
messageLoopThread.SetApartmentState(ApartmentState.STA);
|
||||
messageLoopThread.Start();
|
||||
}
|
||||
|
||||
public void Stop()
|
||||
{
|
||||
isRunning = false;
|
||||
if (hookId != IntPtr.Zero)
|
||||
{
|
||||
UnhookWindowsHookEx(hookId);
|
||||
hookId = IntPtr.Zero;
|
||||
}
|
||||
}
|
||||
|
||||
private void MessageLoop()
|
||||
{
|
||||
try
|
||||
{
|
||||
// Keep a reference to the delegate to prevent GC
|
||||
hookProc = HookCallback;
|
||||
|
||||
using (Process curProcess = Process.GetCurrentProcess())
|
||||
using (ProcessModule? curModule = curProcess.MainModule)
|
||||
{
|
||||
if (curModule != null)
|
||||
{
|
||||
hookId = SetWindowsHookEx(WH_KEYBOARD_LL, hookProc,
|
||||
GetModuleHandle(curModule.ModuleName), 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (hookId == IntPtr.Zero)
|
||||
{
|
||||
LogToStderr("Failed to install shortcut hook");
|
||||
return;
|
||||
}
|
||||
|
||||
LogToStderr("Shortcut hook installed successfully");
|
||||
|
||||
// Run Windows message loop
|
||||
MSG msg;
|
||||
while (isRunning && GetMessage(out msg, IntPtr.Zero, 0, 0) > 0)
|
||||
{
|
||||
TranslateMessage(ref msg);
|
||||
DispatchMessage(ref msg);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error in shortcut message loop: {ex.Message}");
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (hookId != IntPtr.Zero)
|
||||
{
|
||||
UnhookWindowsHookEx(hookId);
|
||||
hookId = IntPtr.Zero;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private IntPtr HookCallback(int nCode, IntPtr wParam, IntPtr lParam)
|
||||
{
|
||||
if (nCode >= 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
int msg = wParam.ToInt32();
|
||||
bool isKeyDown = (msg == WM_KEYDOWN || msg == WM_SYSKEYDOWN);
|
||||
bool isKeyUp = (msg == WM_KEYUP || msg == WM_SYSKEYUP);
|
||||
|
||||
if (isKeyDown || isKeyUp)
|
||||
{
|
||||
var kbStruct = Marshal.PtrToStructure<KBDLLHOOKSTRUCT>(lParam);
|
||||
|
||||
// Create event matching Swift helper format
|
||||
var keyEvent = new HelperEvent
|
||||
{
|
||||
Type = isKeyDown ? HelperEventType.KeyDown : HelperEventType.KeyUp,
|
||||
Timestamp = DateTime.UtcNow,
|
||||
Payload = new HelperEventPayload
|
||||
{
|
||||
KeyCode = (int)kbStruct.vkCode,
|
||||
AltKey = IsKeyPressed(VK_MENU),
|
||||
CtrlKey = IsKeyPressed(VK_CONTROL),
|
||||
ShiftKey = IsKeyPressed(VK_SHIFT),
|
||||
MetaKey = IsKeyPressed(VK_LWIN) || IsKeyPressed(VK_RWIN),
|
||||
FnKeyPressed = false // Windows doesn't have standard Fn key detection
|
||||
}
|
||||
};
|
||||
|
||||
// Check for modifier key changes
|
||||
if (IsModifierKey(kbStruct.vkCode))
|
||||
{
|
||||
// Send flagsChanged event for modifier keys
|
||||
var flagsEvent = new HelperEvent
|
||||
{
|
||||
Type = HelperEventType.FlagsChanged,
|
||||
Timestamp = DateTime.UtcNow,
|
||||
Payload = new HelperEventPayload
|
||||
{
|
||||
KeyCode = (int)kbStruct.vkCode,
|
||||
AltKey = IsKeyPressed(VK_MENU),
|
||||
CtrlKey = IsKeyPressed(VK_CONTROL),
|
||||
ShiftKey = IsKeyPressed(VK_SHIFT),
|
||||
MetaKey = IsKeyPressed(VK_LWIN) || IsKeyPressed(VK_RWIN),
|
||||
FnKeyPressed = false
|
||||
}
|
||||
};
|
||||
KeyEventOccurred?.Invoke(this, flagsEvent);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Send regular key event
|
||||
KeyEventOccurred?.Invoke(this, keyEvent);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
LogToStderr($"Error in hook callback: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
return CallNextHookEx(hookId, nCode, wParam, lParam);
|
||||
}
|
||||
|
||||
private bool IsKeyPressed(int vKey)
|
||||
{
|
||||
return (GetAsyncKeyState(vKey) & 0x8000) != 0;
|
||||
}
|
||||
|
||||
private bool IsModifierKey(uint vkCode)
|
||||
{
|
||||
return vkCode == VK_SHIFT || vkCode == VK_CONTROL ||
|
||||
vkCode == VK_MENU || vkCode == VK_LWIN || vkCode == VK_RWIN;
|
||||
}
|
||||
|
||||
private void LogToStderr(string message)
|
||||
{
|
||||
var timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff");
|
||||
Console.Error.WriteLine($"[{timestamp}] [ShortcutMonitor] {message}");
|
||||
Console.Error.Flush();
|
||||
}
|
||||
|
||||
#region Windows Message Loop
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
private struct MSG
|
||||
{
|
||||
public IntPtr hwnd;
|
||||
public uint message;
|
||||
public IntPtr wParam;
|
||||
public IntPtr lParam;
|
||||
public uint time;
|
||||
public POINT pt;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
private struct POINT
|
||||
{
|
||||
public int x;
|
||||
public int y;
|
||||
}
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern int GetMessage(out MSG lpMsg, IntPtr hWnd, uint wMsgFilterMin, uint wMsgFilterMax);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern bool TranslateMessage(ref MSG lpMsg);
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern IntPtr DispatchMessage(ref MSG lpMsg);
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
64
packages/smart-whisper/.gitignore
vendored
Normal file
64
packages/smart-whisper/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# Dependencies
|
||||
node_modules/
|
||||
|
||||
# Build outputs
|
||||
build/
|
||||
*.node
|
||||
bin/
|
||||
|
||||
# TypeScript outputs
|
||||
dist/
|
||||
# Keep the build configuration file
|
||||
!dist/build.js
|
||||
*.tsbuildinfo
|
||||
|
||||
# Native compilation artifacts
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
*.dylib
|
||||
*.dll
|
||||
*.lib
|
||||
*.exp
|
||||
*.ilk
|
||||
*.pdb
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
desktop.ini
|
||||
|
||||
# IDE files
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Test coverage
|
||||
coverage/
|
||||
*.lcov
|
||||
.nyc_output/
|
||||
|
||||
# Temporary files
|
||||
tmp/
|
||||
temp/
|
||||
*.tmp
|
||||
|
||||
# whisper.cpp build artifacts (if any get generated)
|
||||
whisper.cpp/build/
|
||||
whisper.cpp/*.o
|
||||
whisper.cpp/**/*.o
|
||||
10
packages/smart-whisper/WHISPER_CPP_VERSION
Normal file
10
packages/smart-whisper/WHISPER_CPP_VERSION
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
# Whisper.cpp Version Information
|
||||
|
||||
Repository: https://github.com/ggerganov/whisper.cpp
|
||||
Commit: 2ef717b293fe93872cc3a03ca77942936a281959
|
||||
Date: November 2024
|
||||
Description: whisper : add large-v3-turbo (#2440)
|
||||
|
||||
This file tracks the exact version of whisper.cpp used in this package.
|
||||
To update whisper.cpp, replace the contents of the whisper.cpp directory
|
||||
with a new version and update this file accordingly.
|
||||
37
packages/smart-whisper/binding.gyp
Normal file
37
packages/smart-whisper/binding.gyp
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
'variables' : {
|
||||
'openssl_fips': '',
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "smart-whisper",
|
||||
"sources": [
|
||||
"src/binding/binding.cc",
|
||||
"src/binding/common.cc",
|
||||
"src/binding/model.cc",
|
||||
"src/binding/transcribe.cc",
|
||||
"<!@(node -p \"require('./dist/build.js').sources\")"
|
||||
],
|
||||
"libraries": [ "<!@(node -p \"require('./dist/build.js').libraries\")" ],
|
||||
'defines': [ "<!@(node -p \"require('./dist/build.js').defines\")" ],
|
||||
'include_dirs': ["<!@(node -p \"require('node-addon-api').include\")", "whisper.cpp/include", "whisper.cpp/ggml/include", "whisper.cpp/examples"],
|
||||
'dependencies': ["<!(node -p \"require('node-addon-api').gyp\")"],
|
||||
'cflags!': [ '-fno-exceptions' ],
|
||||
'cflags_cc!': [ '-fno-exceptions' ],
|
||||
'xcode_settings': {
|
||||
'GCC_ENABLE_CPP_EXCEPTIONS': 'YES',
|
||||
'CLANG_CXX_LIBRARY': 'libc++',
|
||||
},
|
||||
'msvs_settings': {
|
||||
'VCCLCompilerTool': { 'ExceptionHandling': 1 },
|
||||
},
|
||||
'conditions': [
|
||||
['OS=="mac"', {
|
||||
'xcode_settings': {
|
||||
'GCC_SYMBOLS_PRIVATE_EXTERN': 'YES', # -fvisibility=hidden
|
||||
}
|
||||
}]
|
||||
]
|
||||
}
|
||||
],
|
||||
}
|
||||
41
packages/smart-whisper/package.json
Normal file
41
packages/smart-whisper/package.json
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
{
|
||||
"name": "@amical/smart-whisper",
|
||||
"version": "0.1.0",
|
||||
"description": "Whisper.cpp Node.js binding with auto model offloading strategy.",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"keywords": [
|
||||
"whisper",
|
||||
"whisper.cpp",
|
||||
"native",
|
||||
"binding",
|
||||
"addon"
|
||||
],
|
||||
"gypfile": true,
|
||||
"files": [
|
||||
"dist",
|
||||
"src",
|
||||
"scripts",
|
||||
"binding.gyp",
|
||||
"whisper.cpp/**/*.{c,h,cpp,hpp,m,cu,metal}",
|
||||
"whisper.cpp/Makefile",
|
||||
"whisper.cpp/LICENSE"
|
||||
],
|
||||
"scripts": {
|
||||
"install": "tsup",
|
||||
"postinstall": "node-gyp rebuild",
|
||||
"build": "tsup && node-gyp rebuild",
|
||||
"build:ts": "tsup",
|
||||
"build:native": "node-gyp rebuild"
|
||||
},
|
||||
"dependencies": {
|
||||
"node-addon-api": "^8.5.0",
|
||||
"minimatch": "10.0.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@amical/typescript-config": "workspace:*",
|
||||
"@types/node": "^24.3.0",
|
||||
"tsup": "^8.5.0",
|
||||
"typescript": "^5.8.2"
|
||||
}
|
||||
}
|
||||
21
packages/smart-whisper/scripts/linker.js
Normal file
21
packages/smart-whisper/scripts/linker.js
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
"use strict";
|
||||
|
||||
var libs = [];
|
||||
if (process.env.WHISPER_OPENBLAS) {
|
||||
libs.push(`-lopenblas`);
|
||||
}
|
||||
if (process.env.WHISPER_CUBLAS) {
|
||||
libs.push(
|
||||
`-lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64`,
|
||||
);
|
||||
}
|
||||
if (process.env.WHISPER_HIPBLAS) {
|
||||
libs.push(
|
||||
`lhipblas -lamdhip64 -lrocblas -L/opt/rocm/lib -L/opt/rocm/hipblas/lib -Wl,-rpath=/opt/rocm/lib`,
|
||||
);
|
||||
}
|
||||
if (process.env.WHISPER_CLBLAST) {
|
||||
libs.push(`-lclblast -lOpenCL`);
|
||||
}
|
||||
|
||||
console.log(libs.join(" "));
|
||||
77
packages/smart-whisper/src/binding.ts
Normal file
77
packages/smart-whisper/src/binding.ts
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
process.env.GGML_METAL_PATH_RESOURCES =
|
||||
process.env.GGML_METAL_PATH_RESOURCES || path.join(__dirname, "../whisper.cpp/ggml/src");
|
||||
|
||||
import path from "node:path";
|
||||
import { TranscribeFormat, TranscribeParams, TranscribeResult } from "./types";
|
||||
const module = require(path.join(__dirname, "../build/Release/smart-whisper"));
|
||||
|
||||
/**
|
||||
* A external handle to a model.
|
||||
*/
|
||||
export type Handle = {
|
||||
readonly "": unique symbol;
|
||||
};
|
||||
|
||||
export namespace Binding {
|
||||
/**
|
||||
* Load a model from a whisper weights file.
|
||||
* @param file The path to the whisper weights file.
|
||||
* @param gpu Whether to use the GPU or not.
|
||||
* @param callback A callback that will be called with the handle to the model.
|
||||
*/
|
||||
export declare function load(
|
||||
file: string,
|
||||
gpu: boolean,
|
||||
callback: (handle: Handle) => void,
|
||||
): void;
|
||||
|
||||
/**
|
||||
* Release the memory of the model, it will be unusable after this.
|
||||
* @param handle The handle to the model.
|
||||
* @param callback A callback that will be called when the model is freed.
|
||||
*/
|
||||
export declare function free(handle: Handle, callback: () => void): void;
|
||||
|
||||
/**
|
||||
* Transcribe a PCM buffer.
|
||||
* @param handle The handle to the model.
|
||||
* @param pcm The PCM buffer.
|
||||
* @param params The parameters to use for transcription.
|
||||
* @param finish A callback that will be called when the transcription is finished.
|
||||
* @param progress A callback that will be called when a new result is available.
|
||||
*/
|
||||
export declare function transcribe<
|
||||
Format extends TranscribeFormat,
|
||||
TokenTimestamp extends boolean,
|
||||
>(
|
||||
handle: Handle,
|
||||
pcm: Float32Array,
|
||||
params: Partial<TranscribeParams<Format, TokenTimestamp>>,
|
||||
finish: (results: TranscribeResult<Format, TokenTimestamp>[]) => void,
|
||||
progress: (result: TranscribeResult<Format, TokenTimestamp>) => void,
|
||||
): void;
|
||||
|
||||
export declare class WhisperModel {
|
||||
private _ctx;
|
||||
constructor(handle: Handle);
|
||||
get handle(): Handle | null;
|
||||
get freed(): boolean;
|
||||
/**
|
||||
* Release the memory of the model, it will be unusable after this.
|
||||
* It's safe to call this multiple times, but it will only free the model once.
|
||||
*/
|
||||
free(): Promise<void>;
|
||||
/**
|
||||
* Load a model from a whisper weights file.
|
||||
* @param file The path to the whisper weights file.
|
||||
* @param gpu Whether to use the GPU or not.
|
||||
* @returns A promise that resolves to a {@link WhisperModel}.
|
||||
*/
|
||||
static load(file: string, gpu?: boolean): Promise<WhisperModel>;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The native binding for the underlying C++ addon.
|
||||
*/
|
||||
export const binding: typeof Binding = module;
|
||||
18
packages/smart-whisper/src/binding/binding.cc
Normal file
18
packages/smart-whisper/src/binding/binding.cc
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#include <napi.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "model.h"
|
||||
#include "transcribe.h"
|
||||
|
||||
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
||||
exports.Set("transcribe", Napi::Function::New(env, Transcribe));
|
||||
WhisperModel::Init(env, exports);
|
||||
|
||||
if (IsProduction(env.Global())) {
|
||||
whisper_log_set([](ggml_log_level level, const char *text, void *user_data) {}, nullptr);
|
||||
}
|
||||
|
||||
return exports;
|
||||
}
|
||||
|
||||
NODE_API_MODULE(whisper, Init)
|
||||
16
packages/smart-whisper/src/binding/common.cc
Normal file
16
packages/smart-whisper/src/binding/common.cc
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#include "common.h"
|
||||
|
||||
Napi::Promise PromiseWorker::Promise() { return promise.Promise(); }
|
||||
|
||||
bool IsProduction(const Napi::Object global_env) {
|
||||
Napi::Object process = global_env.Get("process").As<Napi::Object>();
|
||||
Napi::Object env = process.Get("env").As<Napi::Object>();
|
||||
Napi::Value node_env = env.Get("NODE_ENV");
|
||||
|
||||
if (!node_env.IsString()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Napi::String node_env_str = node_env.As<Napi::String>();
|
||||
return node_env_str.Utf8Value() == "production";
|
||||
}
|
||||
22
packages/smart-whisper/src/binding/common.h
Normal file
22
packages/smart-whisper/src/binding/common.h
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
#ifndef _GUARD_SW_COMMON_H
|
||||
#define _GUARD_SW_COMMON_H
|
||||
|
||||
#ifndef NAPI_VERSION
|
||||
// Support Node.js 16+
|
||||
#define NAPI_VERSION 8
|
||||
#endif
|
||||
#include <napi.h>
|
||||
|
||||
class PromiseWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
PromiseWorker(Napi::Env &env) : AsyncWorker(env), promise(Napi::Promise::Deferred::New(env)) {}
|
||||
|
||||
Napi::Promise Promise();
|
||||
|
||||
protected:
|
||||
Napi::Promise::Deferred promise;
|
||||
};
|
||||
|
||||
bool IsProduction(const Napi::Object global_env);
|
||||
|
||||
#endif
|
||||
145
packages/smart-whisper/src/binding/model.cc
Normal file
145
packages/smart-whisper/src/binding/model.cc
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
#include "model.h"
|
||||
|
||||
class LoadModelWorker : public PromiseWorker {
|
||||
public:
|
||||
LoadModelWorker(Napi::Env &env, const std::string &model_path,
|
||||
struct whisper_context_params params)
|
||||
: PromiseWorker(env), model_path(model_path), params(params) {}
|
||||
|
||||
void Execute() override {
|
||||
context = whisper_init_from_file_with_params_no_state(model_path.c_str(), params);
|
||||
if (context == nullptr) {
|
||||
SetError("Failed to initialize whisper context");
|
||||
}
|
||||
whisper_print_timings(context);
|
||||
}
|
||||
|
||||
void OnOK() override {
|
||||
Napi::HandleScope scope(Env());
|
||||
auto handle = Napi::External<whisper_context>::New(Env(), context);
|
||||
auto constructor = Env().GetInstanceData<Napi::FunctionReference>();
|
||||
auto model = constructor->New({handle});
|
||||
|
||||
promise.Resolve(model);
|
||||
}
|
||||
|
||||
private:
|
||||
std::string model_path;
|
||||
struct whisper_context_params params;
|
||||
whisper_context *context;
|
||||
};
|
||||
|
||||
class FreeModelWorker : public PromiseWorker {
|
||||
public:
|
||||
FreeModelWorker(Napi::Env &env, whisper_context *context)
|
||||
: PromiseWorker(env), context(context) {}
|
||||
|
||||
void Execute() override { whisper_free(context); }
|
||||
|
||||
void OnOK() override {
|
||||
Napi::HandleScope scope(Env());
|
||||
promise.Resolve(Env().Undefined());
|
||||
}
|
||||
|
||||
private:
|
||||
whisper_context *context;
|
||||
};
|
||||
|
||||
Napi::Object WhisperModel::Init(Napi::Env env, Napi::Object exports) {
|
||||
Napi::Function func = DefineClass(
|
||||
env, "WhisperModel",
|
||||
{
|
||||
StaticMethod<&WhisperModel::Load>(
|
||||
"load", static_cast<napi_property_attributes>(napi_writable | napi_configurable)),
|
||||
InstanceMethod<&WhisperModel::Free>(
|
||||
"free", static_cast<napi_property_attributes>(napi_writable | napi_configurable)),
|
||||
InstanceAccessor(
|
||||
"freed", &WhisperModel::GetFreed, nullptr,
|
||||
static_cast<napi_property_attributes>(napi_enumerable | napi_configurable)),
|
||||
InstanceAccessor(
|
||||
"handle", &WhisperModel::GetHandle, nullptr,
|
||||
static_cast<napi_property_attributes>(napi_enumerable | napi_configurable)),
|
||||
});
|
||||
|
||||
auto constructor = new Napi::FunctionReference();
|
||||
*constructor = Napi::Persistent(func);
|
||||
env.SetInstanceData<Napi::FunctionReference>(constructor);
|
||||
|
||||
exports.Set("WhisperModel", func);
|
||||
return exports;
|
||||
}
|
||||
|
||||
WhisperModel::WhisperModel(const Napi::CallbackInfo &info) : Napi::ObjectWrap<WhisperModel>(info) {
|
||||
Napi::Env env = info.Env();
|
||||
Napi::HandleScope scope(env);
|
||||
|
||||
if (info.Length() != 1) {
|
||||
Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
|
||||
return;
|
||||
}
|
||||
|
||||
whisper_context *context = info[0].As<Napi::External<whisper_context>>().Data();
|
||||
this->context = context;
|
||||
}
|
||||
|
||||
void WhisperModel::Finalize(Napi::Env env) {
|
||||
if (context != nullptr) {
|
||||
whisper_free(context);
|
||||
context = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value WhisperModel::Load(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
if (info.Length() < 1 || info.Length() > 2) {
|
||||
Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
|
||||
return env.Null();
|
||||
}
|
||||
|
||||
std::string model_path = info[0].As<Napi::String>();
|
||||
|
||||
whisper_context_params params;
|
||||
params.use_gpu = info.Length() == 2 ? info[1].As<Napi::Boolean>() : true;
|
||||
|
||||
auto worker = new LoadModelWorker(env, model_path, params);
|
||||
worker->Queue();
|
||||
|
||||
return worker->Promise();
|
||||
}
|
||||
|
||||
Napi::Value WhisperModel::Free(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
if (info.Length() != 0) {
|
||||
Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
|
||||
return env.Null();
|
||||
}
|
||||
|
||||
if (context == nullptr) {
|
||||
auto deferred = Napi::Promise::Deferred::New(env);
|
||||
deferred.Resolve(env.Undefined());
|
||||
return deferred.Promise();
|
||||
} else {
|
||||
auto worker = new FreeModelWorker(env, context);
|
||||
context = nullptr;
|
||||
worker->Queue();
|
||||
return worker->Promise();
|
||||
}
|
||||
}
|
||||
|
||||
Napi::Value WhisperModel::GetFreed(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
return Napi::Boolean::New(env, context == nullptr);
|
||||
}
|
||||
|
||||
Napi::Value WhisperModel::GetHandle(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
if (context == nullptr) {
|
||||
return env.Null();
|
||||
}
|
||||
|
||||
return Napi::External<whisper_context>::New(env, context);
|
||||
}
|
||||
22
packages/smart-whisper/src/binding/model.h
Normal file
22
packages/smart-whisper/src/binding/model.h
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
#ifndef _GUARD_SW_MODEL_H
|
||||
#define _GUARD_SW_MODEL_H
|
||||
|
||||
#include "common.h"
|
||||
#include "whisper.h"
|
||||
|
||||
class WhisperModel : public Napi::ObjectWrap<WhisperModel> {
|
||||
public:
|
||||
static Napi::Object Init(Napi::Env env, Napi::Object exports);
|
||||
|
||||
WhisperModel(const Napi::CallbackInfo &info);
|
||||
void Finalize(Napi::Env env);
|
||||
|
||||
private:
|
||||
whisper_context *context;
|
||||
static Napi::Value Load(const Napi::CallbackInfo &info);
|
||||
Napi::Value Free(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetFreed(const Napi::CallbackInfo &info);
|
||||
Napi::Value GetHandle(const Napi::CallbackInfo &info);
|
||||
};
|
||||
|
||||
#endif
|
||||
358
packages/smart-whisper/src/binding/transcribe.cc
Normal file
358
packages/smart-whisper/src/binding/transcribe.cc
Normal file
|
|
@ -0,0 +1,358 @@
|
|||
#include "transcribe.h"
|
||||
|
||||
struct smart_whisper_transcribe_params {
|
||||
const char* format;
|
||||
};
|
||||
|
||||
struct whisper_full_params whisper_full_params_from_js(Napi::Object o) {
|
||||
struct whisper_full_params params =
|
||||
whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
|
||||
if (o.Has("strategy")) {
|
||||
params.strategy = static_cast<whisper_sampling_strategy>(
|
||||
o.Get("strategy").As<Napi::Number>().Int32Value());
|
||||
}
|
||||
if (o.Has("n_threads")) {
|
||||
params.n_threads = o.Get("n_threads").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("n_max_text_ctx")) {
|
||||
params.n_max_text_ctx = o.Get("n_max_text_ctx").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("offset_ms")) {
|
||||
params.offset_ms = o.Get("offset_ms").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("duration_ms")) {
|
||||
params.duration_ms = o.Get("duration_ms").As<Napi::Number>();
|
||||
}
|
||||
|
||||
if (o.Has("translate")) {
|
||||
params.translate = o.Get("translate").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("no_context")) {
|
||||
params.no_context = o.Get("no_context").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("no_timestamps")) {
|
||||
params.no_timestamps = o.Get("no_timestamps").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("single_segment")) {
|
||||
params.single_segment = o.Get("single_segment").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("print_special")) {
|
||||
params.print_special = o.Get("print_special").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("print_progress")) {
|
||||
params.print_progress = o.Get("print_progress").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("print_realtime")) {
|
||||
params.print_realtime = o.Get("print_realtime").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("print_timestamps")) {
|
||||
params.print_timestamps = o.Get("print_timestamps").As<Napi::Boolean>();
|
||||
}
|
||||
|
||||
if (o.Has("token_timestamps")) {
|
||||
params.token_timestamps = o.Get("token_timestamps").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("thold_pt")) {
|
||||
params.thold_pt = o.Get("thold_pt").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("thold_ptsum")) {
|
||||
params.thold_ptsum = o.Get("thold_ptsum").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("max_len")) {
|
||||
params.max_len = o.Get("max_len").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("split_on_word")) {
|
||||
params.split_on_word = o.Get("split_on_word").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("max_tokens")) {
|
||||
params.max_tokens = o.Get("max_tokens").As<Napi::Number>();
|
||||
}
|
||||
|
||||
if (o.Has("debug_mode")) {
|
||||
params.debug_mode = o.Get("debug_mode").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("audio_ctx")) {
|
||||
params.audio_ctx = o.Get("audio_ctx").As<Napi::Number>();
|
||||
}
|
||||
|
||||
if (o.Has("tdrz_enable")) {
|
||||
params.tdrz_enable = o.Get("tdrz_enable").As<Napi::Boolean>();
|
||||
}
|
||||
|
||||
if (o.Has("initial_prompt") && o.Get("initial_prompt").IsString()) {
|
||||
std::string initial_prompt = o.Get("initial_prompt").As<Napi::String>().Utf8Value();
|
||||
params.initial_prompt = strdup(initial_prompt.c_str());
|
||||
} else {
|
||||
params.initial_prompt = nullptr;
|
||||
}
|
||||
|
||||
if (o.Has("language") && o.Get("language").IsString()) {
|
||||
std::string language = o.Get("language").As<Napi::String>().Utf8Value();
|
||||
params.language = strdup(language.c_str());
|
||||
} else {
|
||||
params.language = strdup("auto");
|
||||
}
|
||||
|
||||
if (o.Has("suppress_blank") && o.Get("suppress_blank").IsBoolean()) {
|
||||
params.suppress_blank = o.Get("suppress_blank").As<Napi::Boolean>();
|
||||
}
|
||||
if (o.Has("suppress_non_speech_tokens") && o.Get("suppress_non_speech_tokens").IsBoolean()) {
|
||||
params.suppress_non_speech_tokens = o.Get("suppress_non_speech_tokens").As<Napi::Boolean>();
|
||||
}
|
||||
|
||||
if (o.Has("temperature")) {
|
||||
params.temperature = o.Get("temperature").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("max_initial_ts")) {
|
||||
params.max_initial_ts = o.Get("max_initial_ts").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("length_penalty")) {
|
||||
params.length_penalty = o.Get("length_penalty").As<Napi::Number>();
|
||||
}
|
||||
|
||||
if (o.Has("temperature_inc")) {
|
||||
params.temperature_inc = o.Get("temperature_inc").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("entropy_thold")) {
|
||||
params.entropy_thold = o.Get("entropy_thold").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("logprob_thold")) {
|
||||
params.logprob_thold = o.Get("logprob_thold").As<Napi::Number>();
|
||||
}
|
||||
if (o.Has("no_speech_thold")) {
|
||||
params.no_speech_thold = o.Get("no_speech_thold").As<Napi::Number>();
|
||||
}
|
||||
|
||||
if (o.Has("best_of")) {
|
||||
params.greedy.best_of = o.Get("best_of").As<Napi::Number>();
|
||||
}
|
||||
|
||||
if (o.Has("beam_size")) {
|
||||
params.beam_search.beam_size = o.Get("beam_size").As<Napi::Number>();
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
struct smart_whisper_transcribe_params smart_whisper_transcribe_params_from_js(Napi::Object o) {
|
||||
struct smart_whisper_transcribe_params params;
|
||||
|
||||
if (o.Has("format") && o.Get("format").IsString()) {
|
||||
std::string format = o.Get("format").As<Napi::String>().Utf8Value();
|
||||
params.format = strdup(format.c_str());
|
||||
} else {
|
||||
params.format = strdup("simple");
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
class TranscribeWorker : public Napi::AsyncProgressQueueWorker<int> {
|
||||
public:
|
||||
TranscribeWorker(whisper_context* context, const float* samples, int n_samples,
|
||||
struct whisper_full_params params,
|
||||
struct smart_whisper_transcribe_params smart_params,
|
||||
Napi::Function& finish_callback, Napi::Function& progress_callback)
|
||||
: AsyncProgressQueueWorker(finish_callback),
|
||||
context(context),
|
||||
samples(samples),
|
||||
n_samples(n_samples),
|
||||
params(params),
|
||||
smart_params(smart_params) {
|
||||
this->progress_callback.Reset(progress_callback, 1);
|
||||
state = nullptr;
|
||||
}
|
||||
|
||||
~TranscribeWorker() {
|
||||
delete[] samples;
|
||||
// whisper_free_params(¶ms); will lead to a double free
|
||||
if (params.initial_prompt != nullptr) {
|
||||
free((void*)params.initial_prompt);
|
||||
}
|
||||
if (params.language != nullptr) {
|
||||
free((void*)params.language);
|
||||
}
|
||||
if (state != nullptr) {
|
||||
whisper_free_state(state);
|
||||
}
|
||||
|
||||
free((void*)smart_params.format);
|
||||
}
|
||||
|
||||
void Execute(const ExecutionProgress& progress) override {
|
||||
state = whisper_init_state(context);
|
||||
|
||||
params.new_segment_callback = [](struct whisper_context* ctx, struct whisper_state* state,
|
||||
int n_new, void* user_data) {
|
||||
const ExecutionProgress& progress = *(ExecutionProgress*)user_data;
|
||||
|
||||
const int i = whisper_full_n_segments_from_state(state) - 1;
|
||||
progress.Send(&i, 1);
|
||||
};
|
||||
params.new_segment_callback_user_data = (void*)&progress;
|
||||
|
||||
int err = whisper_full_with_state(context, state, params, samples, n_samples);
|
||||
if (err != 0) {
|
||||
SetError("whisper_full operation failed");
|
||||
}
|
||||
}
|
||||
|
||||
void OnProgress(const int* data, size_t _count) override {
|
||||
Napi::HandleScope scope(Env());
|
||||
|
||||
if (this->progress_callback.IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
int i = (*data);
|
||||
|
||||
Napi::Object segment = Napi::Object::New(Env());
|
||||
segment.Set("from", Napi::Number::New(
|
||||
Env(), whisper_full_get_segment_t0_from_state(state, i) * 10));
|
||||
segment.Set(
|
||||
"to", Napi::Number::New(Env(), whisper_full_get_segment_t1_from_state(state, i) * 10));
|
||||
segment.Set("text",
|
||||
Napi::String::New(Env(), whisper_full_get_segment_text_from_state(state, i)));
|
||||
|
||||
if (strcmp(smart_params.format, "detail") == 0) {
|
||||
float confidence = 0, min_p = 1, max_p = 0;
|
||||
int skips = 0;
|
||||
int tokens = whisper_full_n_tokens_from_state(state, i);
|
||||
Napi::Array tokens_array = Napi::Array::New(Env(), tokens);
|
||||
for (int j = 0; j < tokens; j++) {
|
||||
auto token = whisper_full_get_token_data_from_state(state, i, j);
|
||||
Napi::Object token_object = Napi::Object::New(Env());
|
||||
token_object.Set("text",
|
||||
Napi::String::New(Env(), whisper_full_get_token_text_from_state(
|
||||
context, state, i, j)));
|
||||
token_object.Set("id", Napi::Number::New(Env(), token.id));
|
||||
token_object.Set("p", Napi::Number::New(Env(), token.p));
|
||||
tokens_array.Set(j, token_object);
|
||||
|
||||
if (token.id > whisper_token_eot(context)) {
|
||||
skips++;
|
||||
continue;
|
||||
}
|
||||
confidence += token.p;
|
||||
min_p = std::min(min_p, token.p);
|
||||
max_p = std::max(max_p, token.p);
|
||||
}
|
||||
|
||||
if (tokens > 2) {
|
||||
confidence = (confidence - min_p - max_p) / (tokens - 2 - skips);
|
||||
} else {
|
||||
confidence = confidence / (tokens - skips);
|
||||
}
|
||||
|
||||
segment.Set(
|
||||
"lang",
|
||||
Napi::String::New(Env(), whisper_lang_str(whisper_full_lang_id_from_state(state))));
|
||||
segment.Set("confidence", Napi::Number::New(Env(), confidence));
|
||||
segment.Set("tokens", tokens_array);
|
||||
}
|
||||
|
||||
this->progress_callback.Call({segment});
|
||||
}
|
||||
|
||||
void OnOK() override {
|
||||
Napi::HandleScope scope(Env());
|
||||
|
||||
int n_segments = whisper_full_n_segments_from_state(state);
|
||||
Napi::Array segments = Napi::Array::New(Env(), n_segments);
|
||||
for (int i = 0; i < n_segments; i++) {
|
||||
Napi::Object segment = Napi::Object::New(Env());
|
||||
segment.Set("from", Napi::Number::New(
|
||||
Env(), whisper_full_get_segment_t0_from_state(state, i) * 10));
|
||||
segment.Set("to", Napi::Number::New(
|
||||
Env(), whisper_full_get_segment_t1_from_state(state, i) * 10));
|
||||
segment.Set("text", Napi::String::New(
|
||||
Env(), whisper_full_get_segment_text_from_state(state, i)));
|
||||
|
||||
if (strcmp(smart_params.format, "detail") == 0) {
|
||||
float confidence = 0, min_p = 1, max_p = 0;
|
||||
int skips = 0;
|
||||
int tokens = whisper_full_n_tokens_from_state(state, i);
|
||||
Napi::Array tokens_array = Napi::Array::New(Env(), tokens);
|
||||
for (int j = 0; j < tokens; j++) {
|
||||
auto token = whisper_full_get_token_data_from_state(state, i, j);
|
||||
Napi::Object token_object = Napi::Object::New(Env());
|
||||
token_object.Set(
|
||||
"text", Napi::String::New(Env(), whisper_full_get_token_text_from_state(
|
||||
context, state, i, j)));
|
||||
token_object.Set("id", Napi::Number::New(Env(), token.id));
|
||||
token_object.Set("p", Napi::Number::New(Env(), token.p));
|
||||
if (params.token_timestamps) {
|
||||
token_object.Set("from", Napi::Number::New(Env(), token.t0 * 10));
|
||||
token_object.Set("to", Napi::Number::New(Env(), token.t1 * 10));
|
||||
}
|
||||
|
||||
tokens_array.Set(j, token_object);
|
||||
|
||||
if (token.id > whisper_token_eot(context)) {
|
||||
skips++;
|
||||
continue;
|
||||
}
|
||||
confidence += token.p;
|
||||
min_p = std::min(min_p, token.p);
|
||||
max_p = std::max(max_p, token.p);
|
||||
}
|
||||
|
||||
if (tokens - skips > 2) {
|
||||
confidence = (confidence - min_p - max_p) / (tokens - skips - 2);
|
||||
} else if (tokens - skips > 0) {
|
||||
confidence = confidence / (tokens - skips);
|
||||
}
|
||||
|
||||
segment.Set("lang",
|
||||
Napi::String::New(
|
||||
Env(), whisper_lang_str(whisper_full_lang_id_from_state(state))));
|
||||
segment.Set("confidence", Napi::Number::New(Env(), confidence));
|
||||
segment.Set("tokens", tokens_array);
|
||||
}
|
||||
|
||||
segments.Set(i, segment);
|
||||
}
|
||||
|
||||
Callback().Call({segments});
|
||||
}
|
||||
|
||||
private:
|
||||
whisper_context* context;
|
||||
whisper_state* state;
|
||||
const float* samples;
|
||||
int n_samples;
|
||||
struct whisper_full_params params;
|
||||
struct smart_whisper_transcribe_params smart_params;
|
||||
Napi::FunctionReference progress_callback;
|
||||
};
|
||||
|
||||
Napi::Value Transcribe(const Napi::CallbackInfo& info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
if (info.Length() != 5) {
|
||||
Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException();
|
||||
return env.Null();
|
||||
}
|
||||
|
||||
whisper_context* context = info[0].As<Napi::External<whisper_context>>().Data();
|
||||
|
||||
Napi::Float32Array pcm = info[1].As<Napi::Float32Array>();
|
||||
float* samples = new float[pcm.ElementLength()];
|
||||
memcpy(samples, pcm.Data(), pcm.ByteLength());
|
||||
|
||||
int n_samples = static_cast<int>(pcm.ElementLength());
|
||||
|
||||
Napi::Object params = info[2].As<Napi::Object>();
|
||||
auto whisper_params = whisper_full_params_from_js(params);
|
||||
auto smart_params = smart_whisper_transcribe_params_from_js(params);
|
||||
|
||||
Napi::Function finish_callback = info[3].As<Napi::Function>();
|
||||
Napi::Function progress_callback = info[4].As<Napi::Function>();
|
||||
|
||||
auto worker = new TranscribeWorker(context, samples, n_samples, whisper_params, smart_params,
|
||||
finish_callback, progress_callback);
|
||||
worker->Queue();
|
||||
|
||||
return env.Undefined();
|
||||
}
|
||||
9
packages/smart-whisper/src/binding/transcribe.h
Normal file
9
packages/smart-whisper/src/binding/transcribe.h
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#ifndef _GUARD_SW_TRANSCRIBE_H
|
||||
#define _GUARD_SW_TRANSCRIBE_H
|
||||
|
||||
#include "common.h"
|
||||
#include "whisper.h"
|
||||
|
||||
Napi::Value Transcribe(const Napi::CallbackInfo& info);
|
||||
|
||||
#endif
|
||||
94
packages/smart-whisper/src/build.ts
Normal file
94
packages/smart-whisper/src/build.ts
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
import os from "node:os";
|
||||
import { execSync } from "node:child_process";
|
||||
|
||||
type ComputeBackend = "cpu" | "accelerate" | "metal" | "clblast" | "openblas";
|
||||
|
||||
const cfg = config();
|
||||
|
||||
export const sources = cfg.sources.join(" ");
|
||||
export const defines = cfg.defines.join(" ");
|
||||
export const libraries = cfg.libraries.join(" ");
|
||||
|
||||
function config(): {
|
||||
sources: string[];
|
||||
defines: string[];
|
||||
libraries: string[];
|
||||
} {
|
||||
if (process.env.BYOL) {
|
||||
return {
|
||||
sources: [],
|
||||
defines: [],
|
||||
libraries: [process.env.BYOL],
|
||||
};
|
||||
}
|
||||
|
||||
const COMPUTE_BACKEND: ComputeBackend =
|
||||
(process.env.COMPUTE_BACKEND as ComputeBackend | undefined) ?? infer_backend();
|
||||
|
||||
const cfg = {
|
||||
sources: [
|
||||
"whisper.cpp/src/whisper.cpp",
|
||||
"whisper.cpp/ggml/src/ggml.c",
|
||||
"whisper.cpp/ggml/src/ggml-alloc.c",
|
||||
"whisper.cpp/ggml/src/ggml-backend.c",
|
||||
"whisper.cpp/ggml/src/ggml-quants.c",
|
||||
"whisper.cpp/ggml/src/ggml-aarch64.c",
|
||||
] as string[],
|
||||
defines: [] as string[],
|
||||
libraries: [] as string[],
|
||||
};
|
||||
|
||||
switch (COMPUTE_BACKEND) {
|
||||
case "accelerate": {
|
||||
cfg.defines.push("GGML_USE_ACCELERATE");
|
||||
|
||||
cfg.libraries.push('"-framework Foundation"');
|
||||
cfg.libraries.push('"-framework Accelerate"');
|
||||
break;
|
||||
}
|
||||
case "metal": {
|
||||
cfg.sources.push("whisper.cpp/ggml/src/ggml-metal.m");
|
||||
|
||||
cfg.defines.push("GGML_USE_ACCELERATE");
|
||||
cfg.defines.push("GGML_USE_METAL");
|
||||
|
||||
cfg.libraries.push('"-framework Foundation"');
|
||||
cfg.libraries.push('"-framework Accelerate"');
|
||||
cfg.libraries.push('"-framework Metal"');
|
||||
cfg.libraries.push('"-framework MetalKit"');
|
||||
break;
|
||||
}
|
||||
case "openblas": {
|
||||
cfg.defines.push("GGML_USE_OPENBLAS");
|
||||
|
||||
cfg.libraries.push("-lopenblas");
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
}
|
||||
}
|
||||
|
||||
return cfg;
|
||||
}
|
||||
|
||||
function infer_backend(): ComputeBackend {
|
||||
let backend: ComputeBackend = "cpu";
|
||||
|
||||
try {
|
||||
if (os.platform() === "darwin") {
|
||||
backend = "accelerate";
|
||||
if (os.arch() === "arm64") {
|
||||
backend = "metal";
|
||||
}
|
||||
} else if (os.platform() === "linux") {
|
||||
const has_libopenblas = !!execSync("ldconfig -p | grep libopenblas").toString().trim();
|
||||
if (has_libopenblas) {
|
||||
backend = "openblas";
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// if anything goes wrong, just use the default cpu backend
|
||||
}
|
||||
|
||||
return backend;
|
||||
}
|
||||
7
packages/smart-whisper/src/index.ts
Normal file
7
packages/smart-whisper/src/index.ts
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
export * from "./binding";
|
||||
export * from "./model";
|
||||
export * from "./transcribe";
|
||||
export * from "./types";
|
||||
export * from "./whisper";
|
||||
|
||||
export * as manager from "./model-manager";
|
||||
122
packages/smart-whisper/src/model-manager/index.ts
Normal file
122
packages/smart-whisper/src/model-manager/index.ts
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import { Readable } from "node:stream";
|
||||
import type { ReadableStream } from "node:stream/web";
|
||||
|
||||
const root = path.join(os.homedir(), ".smart-whisper");
|
||||
const models = path.join(root, "models");
|
||||
const ext = ".bin";
|
||||
|
||||
fs.mkdirSync(models, { recursive: true });
|
||||
|
||||
const BASE_MODELS_URL = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
|
||||
|
||||
/**
|
||||
* MODELS is an object that contains the URLs of different ggml whisper models.
|
||||
* Each model is represented by a key-value pair, where the key is the model name
|
||||
* and the value is the URL of the model.
|
||||
*/
|
||||
export const MODELS = {
|
||||
tiny: `${BASE_MODELS_URL}/ggml-tiny.bin`,
|
||||
"tiny.en": `${BASE_MODELS_URL}/ggml-tiny.en.bin`,
|
||||
small: `${BASE_MODELS_URL}/ggml-small.bin`,
|
||||
"small.en": `${BASE_MODELS_URL}/ggml-small.en.bin`,
|
||||
base: `${BASE_MODELS_URL}/ggml-base.bin`,
|
||||
"base.en": `${BASE_MODELS_URL}/ggml-base.en.bin`,
|
||||
medium: `${BASE_MODELS_URL}/ggml-medium.bin`,
|
||||
"medium.en": `${BASE_MODELS_URL}/ggml-medium.en.bin`,
|
||||
"large-v1": `${BASE_MODELS_URL}/ggml-large-v1.bin`,
|
||||
"large-v2": `${BASE_MODELS_URL}/ggml-large-v2.bin`,
|
||||
"large-v3": `${BASE_MODELS_URL}/ggml-large-v3.bin`,
|
||||
"large-v3-turbo": `${BASE_MODELS_URL}/ggml-large-v3-turbo.bin`,
|
||||
} as const;
|
||||
|
||||
export type ModelName = keyof typeof MODELS | (string & {});
|
||||
|
||||
/**
|
||||
* Downloads a ggml whisper model from a specified URL or shorthand.
|
||||
*
|
||||
* @param model - The model to download, specified either as a key of the {@link MODELS} object or as a URL.
|
||||
* @returns A promise that resolves to the name of the downloaded model.
|
||||
* @throws An error if the model URL or shorthand is invalid, or if the model fails to download.
|
||||
*/
|
||||
export async function download(model: ModelName): Promise<string> {
|
||||
let url = "",
|
||||
name = "";
|
||||
if (model in MODELS) {
|
||||
url = MODELS[model as keyof typeof MODELS];
|
||||
name = model;
|
||||
} else {
|
||||
try {
|
||||
url = new URL(model).href;
|
||||
name = new URL(url).pathname.split("/").pop() ?? "";
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (!url) {
|
||||
throw new Error(`Invalid model URL or shorthand: ${model}`);
|
||||
}
|
||||
|
||||
if (!name) {
|
||||
throw new Error(`Failed to parse model name: ${url}`);
|
||||
}
|
||||
|
||||
if (check(name)) {
|
||||
return name;
|
||||
}
|
||||
|
||||
const res = await fetch(url);
|
||||
if (!res.ok || !res.body) {
|
||||
throw new Error(`Failed to download model: ${res.statusText}`);
|
||||
}
|
||||
|
||||
const stream = fs.createWriteStream(path.join(models, name.endsWith(ext) ? name : name + ext));
|
||||
Readable.fromWeb(res.body as ReadableStream<Uint8Array>).pipe(stream);
|
||||
|
||||
return new Promise((resolve) => stream.on("finish", () => resolve(name)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes a locally downloaded model.
|
||||
* @param model - The name of the model to remove.
|
||||
*/
|
||||
export function remove(model: ModelName): void {
|
||||
if (check(model)) {
|
||||
fs.unlinkSync(path.join(models, model + ext));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a list of model names that are available locally.
|
||||
* @returns An array of model names.
|
||||
*/
|
||||
export function list(): ModelName[] {
|
||||
const files = fs.readdirSync(models).filter((file) => file.endsWith(ext));
|
||||
return files.map((file) => file.slice(0, -ext.length));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a model exists.
|
||||
* @param model - The name of the model.
|
||||
* @returns True if the model exists, false otherwise.
|
||||
*/
|
||||
export function check(model: ModelName): boolean {
|
||||
return fs.existsSync(path.join(models, model + ext));
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the absolute path of a model.
|
||||
* @param model - The name of the model.
|
||||
* @returns The resolved path of the model.
|
||||
* @throws Error if the model is not found.
|
||||
*/
|
||||
export function resolve(model: ModelName): string {
|
||||
if (check(model)) {
|
||||
return path.join(models, model + ext);
|
||||
} else {
|
||||
throw new Error(`Model not found: ${model}`);
|
||||
}
|
||||
}
|
||||
|
||||
export const dir = { root, models };
|
||||
3
packages/smart-whisper/src/model.ts
Normal file
3
packages/smart-whisper/src/model.ts
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
import { binding } from "./binding";
|
||||
|
||||
export class WhisperModel extends binding.WhisperModel {}
|
||||
110
packages/smart-whisper/src/transcribe.ts
Normal file
110
packages/smart-whisper/src/transcribe.ts
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
import EventEmitter from "node:events";
|
||||
import type { WhisperModel } from "./model";
|
||||
import { TranscribeFormat, TranscribeParams, TranscribeResult } from "./types";
|
||||
import { binding } from "./binding";
|
||||
|
||||
export class TranscribeTask<
|
||||
Format extends TranscribeFormat,
|
||||
TokenTimestamp extends boolean,
|
||||
> extends EventEmitter {
|
||||
private _model: WhisperModel;
|
||||
private _result: Promise<TranscribeResult<Format, TokenTimestamp>[]> | null = null;
|
||||
|
||||
/**
|
||||
* You should not construct this class directly, use {@link TranscribeTask.run} instead.
|
||||
*/
|
||||
constructor(model: WhisperModel) {
|
||||
super();
|
||||
this._model = model;
|
||||
}
|
||||
|
||||
get model(): WhisperModel {
|
||||
return this._model;
|
||||
}
|
||||
|
||||
/**
|
||||
* A promise that resolves to the result of the transcription task.
|
||||
*/
|
||||
get result(): Promise<TranscribeResult<Format, TokenTimestamp>[]> {
|
||||
if (this._result === null) {
|
||||
throw new Error("Task has not been started");
|
||||
}
|
||||
return this._result;
|
||||
}
|
||||
|
||||
private async _run(
|
||||
pcm: Float32Array,
|
||||
params: Partial<TranscribeParams<Format, TokenTimestamp>>,
|
||||
): Promise<TranscribeResult<Format, TokenTimestamp>[]> {
|
||||
return new Promise((resolve) => {
|
||||
const handle = this.model.handle;
|
||||
if (!handle) {
|
||||
throw new Error("Model has been freed");
|
||||
}
|
||||
|
||||
binding.transcribe(
|
||||
handle,
|
||||
pcm,
|
||||
params,
|
||||
(results) => {
|
||||
this.emit("finish");
|
||||
resolve(results);
|
||||
},
|
||||
(result) => {
|
||||
this.emit("transcribed", result);
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
static async run<Format extends TranscribeFormat, TokenTimestamp extends boolean>(
|
||||
model: WhisperModel,
|
||||
pcm: Float32Array,
|
||||
params: Partial<TranscribeParams<Format, TokenTimestamp>>,
|
||||
): Promise<TranscribeTask<Format, TokenTimestamp>> {
|
||||
if (model.freed) {
|
||||
throw new Error("Model has been freed");
|
||||
}
|
||||
|
||||
const task = new TranscribeTask(model);
|
||||
task._result = task._run(pcm, params);
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
on(
|
||||
event: "finish",
|
||||
listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void,
|
||||
): this;
|
||||
on(
|
||||
event: "transcribed",
|
||||
listener: (result: TranscribeResult<Format, TokenTimestamp>) => void,
|
||||
): this;
|
||||
on(event: string, listener: (...args: any[]) => void): this {
|
||||
return super.on(event, listener);
|
||||
}
|
||||
|
||||
once(
|
||||
event: "finish",
|
||||
listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void,
|
||||
): this;
|
||||
once(
|
||||
event: "transcribed",
|
||||
listener: (result: TranscribeResult<Format, TokenTimestamp>) => void,
|
||||
): this;
|
||||
once(event: string, listener: (...args: any[]) => void): this {
|
||||
return super.once(event, listener);
|
||||
}
|
||||
|
||||
off(
|
||||
event: "finish",
|
||||
listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void,
|
||||
): this;
|
||||
off(
|
||||
event: "transcribed",
|
||||
listener: (result: TranscribeResult<Format, TokenTimestamp>) => void,
|
||||
): this;
|
||||
off(event: string, listener: (...args: any[]) => void): this {
|
||||
return super.off(event, listener);
|
||||
}
|
||||
}
|
||||
102
packages/smart-whisper/src/types.ts
Normal file
102
packages/smart-whisper/src/types.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
export enum WhisperSamplingStrategy {
|
||||
WHISPER_SAMPLING_GREEDY,
|
||||
WHISPER_SAMPLING_BEAM_SEARCH,
|
||||
}
|
||||
|
||||
export type TranscribeFormat = "simple" | "detail";
|
||||
|
||||
/**
|
||||
* See {@link https://github.com/ggerganov/whisper.cpp/blob/00b7a4be02ca82d53ac69dd2dd438c16e2af7658/whisper.h#L433C19-L433C19} for details.
|
||||
*/
|
||||
export interface TranscribeParams<
|
||||
Format extends TranscribeFormat = TranscribeFormat,
|
||||
TokenTimestamp extends boolean = false,
|
||||
> {
|
||||
strategy: WhisperSamplingStrategy;
|
||||
n_threads: number;
|
||||
n_max_text_ctx: number;
|
||||
offset_ms: number;
|
||||
duration_ms: number;
|
||||
|
||||
translate: boolean;
|
||||
no_context: boolean;
|
||||
no_timestamps: boolean;
|
||||
single_segment: boolean;
|
||||
print_special: boolean;
|
||||
print_progress: boolean;
|
||||
print_realtime: boolean;
|
||||
print_timestamps: boolean;
|
||||
|
||||
token_timestamps: TokenTimestamp;
|
||||
thold_pt: number;
|
||||
thold_ptsum: number;
|
||||
max_len: number;
|
||||
split_on_word: boolean;
|
||||
max_tokens: number;
|
||||
|
||||
speed_up: boolean;
|
||||
debug_mode: boolean;
|
||||
audio_ctx: number;
|
||||
|
||||
tdrz_enable: boolean;
|
||||
|
||||
initial_prompt: string;
|
||||
|
||||
/**
|
||||
* Language code, e.g. "en", "de", "fr", "es", "it", "nl", "pt", "ru", "tr", "uk", "pl", "sv", "cs", "zh", "ja", "ko"
|
||||
*/
|
||||
language: string;
|
||||
|
||||
suppress_blank: boolean;
|
||||
suppress_non_speech_tokens: boolean;
|
||||
|
||||
temperature: number;
|
||||
max_initial_ts: number;
|
||||
length_penalty: number;
|
||||
|
||||
temperature_inc: number;
|
||||
entropy_thold: number;
|
||||
logprob_thold: number;
|
||||
no_speech_thold: number;
|
||||
|
||||
best_of: number;
|
||||
|
||||
beam_size: number;
|
||||
|
||||
format: Format;
|
||||
}
|
||||
|
||||
export interface TranscribeSimpleResult {
|
||||
from: number;
|
||||
to: number;
|
||||
text: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a detailed result of transcription.
|
||||
*/
|
||||
export interface TranscribeDetailedResult<TokenTimestamp extends boolean>
|
||||
extends TranscribeSimpleResult {
|
||||
/** The detected spoken language. */
|
||||
lang: string;
|
||||
/** The confidence level of the transcription, calculated by the average probability of the tokens. */
|
||||
confidence: number;
|
||||
/** The tokens generated during the transcription process. */
|
||||
tokens: {
|
||||
/** The text of the token, for CJK languages, due to the BPE encoding, the token text may not be readable. */
|
||||
text: string;
|
||||
/** The ID of the token. */
|
||||
id: number;
|
||||
/** The probability of the token. */
|
||||
p: number;
|
||||
/** The start timestamp of the token, in milliseconds. Only available when `token_timestamps` of {@link TranscribeParams} is `true`. */
|
||||
from: TokenTimestamp extends true ? number : undefined;
|
||||
/** The end timestamp of the token, in milliseconds. Only available when `token_timestamps` of {@link TranscribeParams} is `true`. */
|
||||
to: TokenTimestamp extends true ? number : undefined;
|
||||
}[];
|
||||
}
|
||||
|
||||
export type TranscribeResult<
|
||||
Format extends TranscribeFormat = TranscribeFormat,
|
||||
TokenTimestamp extends boolean = boolean,
|
||||
> = Format extends "simple" ? TranscribeSimpleResult : TranscribeDetailedResult<TokenTimestamp>;
|
||||
137
packages/smart-whisper/src/whisper.ts
Normal file
137
packages/smart-whisper/src/whisper.ts
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
import type { TranscribeFormat, TranscribeParams, TranscribeResult } from "./types";
|
||||
import { WhisperModel } from "./model";
|
||||
import { TranscribeTask } from "./transcribe";
|
||||
|
||||
export interface WhisperConfig {
|
||||
/**
|
||||
* Time in seconds to wait before offloading the model if it's not being used.
|
||||
*/
|
||||
offload: number;
|
||||
|
||||
/**
|
||||
* Whether to use the GPU or not.
|
||||
*/
|
||||
gpu: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* The Whisper class is responsible for managing the lifecycle and operations of whisper model.
|
||||
* It handles the loading and offloading of the model, managing transcription tasks, and configuring model parameters.
|
||||
*/
|
||||
export class Whisper {
|
||||
private _file: string;
|
||||
private _available: WhisperModel | null = null;
|
||||
private _loading: Promise<WhisperModel> | null = null;
|
||||
private _tasks: Promise<TranscribeResult[]>[] = [];
|
||||
private _config: WhisperConfig;
|
||||
private _offload_timer: NodeJS.Timeout | null = null;
|
||||
|
||||
/**
|
||||
* Constructs a new Whisper instance with a specified model file and configuration.
|
||||
* @param file - The path to the Whisper model file.
|
||||
* @param config - Optional configuration for the Whisper instance.
|
||||
*/
|
||||
constructor(file: string, config: Partial<WhisperConfig> = {}) {
|
||||
this._file = file;
|
||||
this._config = {
|
||||
offload: 300,
|
||||
gpu: true,
|
||||
...config,
|
||||
};
|
||||
}
|
||||
|
||||
get file(): string {
|
||||
return this._file;
|
||||
}
|
||||
|
||||
set file(file: string) {
|
||||
this._file = file;
|
||||
}
|
||||
|
||||
get config(): WhisperConfig {
|
||||
return this._config;
|
||||
}
|
||||
|
||||
get tasks(): Promise<TranscribeResult[]>[] {
|
||||
return this._tasks;
|
||||
}
|
||||
|
||||
reset_offload_timer(): void {
|
||||
this.clear_offload_timer();
|
||||
this._offload_timer = setTimeout(() => {
|
||||
this.free();
|
||||
}, this.config.offload * 1000);
|
||||
}
|
||||
|
||||
private clear_offload_timer(): void {
|
||||
if (this._offload_timer !== null) {
|
||||
clearTimeout(this._offload_timer);
|
||||
this._offload_timer = null;
|
||||
}
|
||||
}
|
||||
|
||||
async model(): Promise<WhisperModel> {
|
||||
if (this._available === null) {
|
||||
return this.load();
|
||||
}
|
||||
this.reset_offload_timer();
|
||||
return Promise.resolve(this._available);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the whisper model asynchronously.
|
||||
* If the model is already being loaded, returns the existing one.
|
||||
*
|
||||
* You don't need to call this method directly, it's called automatically if necessary when you call {@link Whisper.transcribe}.
|
||||
*
|
||||
* @returns A Promise that resolves to the loaded model.
|
||||
*/
|
||||
async load(): Promise<WhisperModel> {
|
||||
if (this._loading !== null) {
|
||||
return this._loading;
|
||||
}
|
||||
|
||||
const model = WhisperModel.load(this.file, this.config.gpu);
|
||||
this._loading = model;
|
||||
this._available = await model;
|
||||
this._loading = null;
|
||||
this.reset_offload_timer();
|
||||
return this._available;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transcribes the given PCM audio data using the Whisper model.
|
||||
* @param pcm - The mono 16k PCM audio data to transcribe.
|
||||
* @param params - Optional parameters for transcription.
|
||||
* @returns A promise that resolves to the result of the transcription task.
|
||||
*/
|
||||
async transcribe<Format extends TranscribeFormat, TokenTimestamp extends boolean>(
|
||||
pcm: Float32Array,
|
||||
params: Partial<TranscribeParams<Format, TokenTimestamp>> = {},
|
||||
): Promise<TranscribeTask<Format, TokenTimestamp>> {
|
||||
const model = await this.model();
|
||||
const task = await TranscribeTask.run<Format, TokenTimestamp>(model, pcm, params);
|
||||
this._tasks.push(task.result);
|
||||
return task;
|
||||
}
|
||||
|
||||
async free(): Promise<void> {
|
||||
if (this._available === null) {
|
||||
return;
|
||||
}
|
||||
const model = this._available;
|
||||
this._available = null;
|
||||
this.clear_offload_timer();
|
||||
await Promise.all(this.tasks);
|
||||
await model.free();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Here's a life cycle diagram of a model:
|
||||
* | Method | (0) Not Available | (1) Loading | (2) Available | (3) Freeing | (0) Not Available |
|
||||
* |------------|-------------------|-------------|---------------|-------------|-------------------|
|
||||
* | load | V | - | - | - | V |
|
||||
* | free | - | - | wait tasks, V | - | - |
|
||||
* | transcribe | load | load | V | load | load |
|
||||
*/
|
||||
9
packages/smart-whisper/tsconfig.json
Normal file
9
packages/smart-whisper/tsconfig.json
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"extends": "@amical/typescript-config/base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "dist"
|
||||
},
|
||||
"include": ["src"],
|
||||
"exclude": ["node_modules", "dist"],
|
||||
"types": ["node"]
|
||||
}
|
||||
15
packages/smart-whisper/tsup.config.ts
Normal file
15
packages/smart-whisper/tsup.config.ts
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
import { defineConfig } from "tsup";
|
||||
import { readFileSync, writeFileSync } from "node:fs";
|
||||
|
||||
export default defineConfig({
|
||||
entry: ["src/index.ts", "src/build.ts"],
|
||||
outDir: "dist",
|
||||
dts: true,
|
||||
async onSuccess() {
|
||||
// replace `#include "ggml-common.h" in whisper.cpp/ggml/src/ggml-metal.metal with full content
|
||||
const metal = readFileSync("whisper.cpp/ggml/src/ggml-metal.metal", "utf-8");
|
||||
const common = readFileSync("whisper.cpp/ggml/src/ggml-common.h", "utf-8");
|
||||
const replaced = metal.replace(/#include "ggml-common.h"/, common);
|
||||
writeFileSync("whisper.cpp/ggml/src/ggml-metal.metal", replaced);
|
||||
},
|
||||
});
|
||||
1
packages/smart-whisper/whisper.cpp
Submodule
1
packages/smart-whisper/whisper.cpp
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 2ef717b293fe93872cc3a03ca77942936a281959
|
||||
|
|
@ -11,7 +11,8 @@
|
|||
"clean": "rm -rf dist generated",
|
||||
"generate:json-schemas": "tsx scripts/generate-json-schemas.ts",
|
||||
"generate:swift": "tsx scripts/generate-swift-models.ts",
|
||||
"generate:all": "pnpm run generate:json-schemas && pnpm run generate:swift",
|
||||
"generate:csharp": "tsx scripts/generate-csharp-models.ts",
|
||||
"generate:all": "pnpm run generate:json-schemas && pnpm run generate:swift && pnpm run generate:csharp",
|
||||
"lint": "eslint .",
|
||||
"check-types": "tsc --noEmit"
|
||||
},
|
||||
|
|
@ -32,7 +33,8 @@
|
|||
"zod-to-json-schema": "^3.24.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"zod": "^3.24.4"
|
||||
"zod": "^3.24.4",
|
||||
"minimatch": "10.0.3"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
|
|
|
|||
79
packages/types/scripts/generate-csharp-models.ts
Normal file
79
packages/types/scripts/generate-csharp-models.ts
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import { execSync } from "child_process";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
const generatedDir = "../native-helpers/windows-helper/src/Models/Generated";
|
||||
|
||||
try {
|
||||
// Remove existing generated models and create the directory
|
||||
if (fs.existsSync(generatedDir)) {
|
||||
fs.rmSync(generatedDir, { recursive: true, force: true });
|
||||
}
|
||||
fs.mkdirSync(generatedDir, { recursive: true });
|
||||
|
||||
console.log("Directory created/cleaned successfully.");
|
||||
|
||||
// Generate C# models from JSON schemas using quicktype
|
||||
// Using System.Text.Json mode with proper nullable reference type support
|
||||
// quicktype only supports csharp 5 and 6, so using 6
|
||||
const command =
|
||||
"quicktype --src-lang schema --lang csharp " +
|
||||
`--namespace WindowsHelper.Models ` +
|
||||
`--framework SystemTextJson ` +
|
||||
`--array-type list ` +
|
||||
`--csharp-version 6 ` +
|
||||
`--features complete ` +
|
||||
`--no-check-required ` +
|
||||
`-o ${generatedDir}/Models.cs ` +
|
||||
"generated/json-schemas/rpc/rpc-request.schema.json " +
|
||||
"generated/json-schemas/rpc/rpc-response.schema.json " +
|
||||
"generated/json-schemas/methods/get-accessibility-tree-details-params.schema.json " +
|
||||
"generated/json-schemas/methods/get-accessibility-tree-details-result.schema.json " +
|
||||
"generated/json-schemas/methods/get-accessibility-context-params.schema.json " +
|
||||
"generated/json-schemas/methods/get-accessibility-context-result.schema.json " +
|
||||
"generated/json-schemas/methods/paste-text-params.schema.json " +
|
||||
"generated/json-schemas/methods/paste-text-result.schema.json " +
|
||||
"generated/json-schemas/methods/mute-system-audio-params.schema.json " +
|
||||
"generated/json-schemas/methods/mute-system-audio-result.schema.json " +
|
||||
"generated/json-schemas/methods/restore-system-audio-params.schema.json " +
|
||||
"generated/json-schemas/methods/restore-system-audio-result.schema.json " +
|
||||
"generated/json-schemas/events/key-down-event.schema.json " +
|
||||
"generated/json-schemas/events/key-up-event.schema.json " +
|
||||
"generated/json-schemas/events/flags-changed-event.schema.json " +
|
||||
"generated/json-schemas/events/helper-event.schema.json";
|
||||
|
||||
console.log(`Executing quicktype...`);
|
||||
execSync(command, { stdio: "inherit" });
|
||||
|
||||
// Post-process to clean up the generated code
|
||||
let content = fs.readFileSync(path.join(generatedDir, "Models.cs"), "utf8");
|
||||
|
||||
// Remove the "Schema" suffix from class names
|
||||
content = content.replace(/(\w+)Schema/g, "$1");
|
||||
|
||||
// Fix the RPC types to use simpler names
|
||||
content = content.replace(
|
||||
/public partial class RpcRequest\b/g,
|
||||
"public partial class RpcRequest",
|
||||
);
|
||||
content = content.replace(
|
||||
/public partial class RpcResponse\b/g,
|
||||
"public partial class RpcResponse",
|
||||
);
|
||||
|
||||
// Add a header comment
|
||||
const header = `// <auto-generated />
|
||||
// This file was automatically generated by quicktype from JSON schemas.
|
||||
// DO NOT EDIT THIS FILE DIRECTLY! Instead, edit the TypeScript schemas and regenerate.
|
||||
|
||||
`;
|
||||
|
||||
content = header + content;
|
||||
|
||||
fs.writeFileSync(path.join(generatedDir, "Models.cs"), content);
|
||||
|
||||
console.log("C# models generated successfully.");
|
||||
} catch (error) {
|
||||
console.error("Error generating C# models:", error);
|
||||
process.exit(1);
|
||||
}
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
"@amical/eslint-config": "workspace:*",
|
||||
"@amical/typescript-config": "workspace:*",
|
||||
"@turbo/gen": "^2.5.0",
|
||||
"@types/node": "^22.15.3",
|
||||
"@types/node": "^24.3.0",
|
||||
"@types/react": "19.1.0",
|
||||
"@types/react-dom": "19.1.1",
|
||||
"eslint": "^9.26.0",
|
||||
|
|
|
|||
9312
pnpm-lock.yaml
generated
9312
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load diff
|
|
@ -12,7 +12,7 @@
|
|||
"build": {
|
||||
"dependsOn": ["^build"],
|
||||
"inputs": ["$TURBO_DEFAULT$", ".env*"],
|
||||
"outputs": [".next/**", "!.next/cache/**", "bin/**", "out/**"],
|
||||
"outputs": [".next/**", "!.next/cache/**", "bin/**", "out/**", "dist/**", "build/**"],
|
||||
"env": [
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
|
|
@ -23,8 +23,8 @@
|
|||
},
|
||||
"build:native": {
|
||||
"dependsOn": [],
|
||||
"inputs": ["Sources/**", "Package.swift", "main.swift", "scripts/**"],
|
||||
"outputs": ["bin/**"],
|
||||
"inputs": ["Sources/**", "Package.swift", "main.swift", "scripts/**", "src/binding/**", "binding.gyp", "whisper.cpp/**"],
|
||||
"outputs": ["bin/**", "build/**"],
|
||||
"cache": true
|
||||
},
|
||||
"format:check": {},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue