refactor: audio recording, processing, logging & app lifecyle

This commit is contained in:
haritabh-z01 2025-06-28 18:33:26 +05:30
parent 383a73833c
commit 834473877f
52 changed files with 2112 additions and 2361 deletions

1
.gitignore vendored
View file

@ -38,6 +38,7 @@ yarn-error.log*
*.pem
CLAUDE.md
.serena
.local
# Temp files
/tmp

View file

@ -1 +1,6 @@
/// <reference types="@electron-forge/plugin-vite/forge-vite-env" />
declare module "*?url" {
const url: string;
export default url;
}

View file

@ -59,6 +59,7 @@
"@dnd-kit/utilities": "^3.2.2",
"@hookform/resolvers": "^5.0.1",
"@libsql/client": "^0.15.9",
"@openrouter/ai-sdk-provider": "^0.7.2",
"@radix-ui/react-accordion": "^1.2.10",
"@radix-ui/react-alert-dialog": "^1.1.13",
"@radix-ui/react-aspect-ratio": "^1.1.6",
@ -95,6 +96,7 @@
"@types/split2": "^4.2.3",
"@types/uuid": "^10.0.0",
"ai": "^4.3.16",
"ansi-colors": "^4.1.3",
"async-mutex": "^0.5.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",

View file

@ -17,6 +17,8 @@ export const db = drizzle(`file:${dbPath}`, {
// Initialize database with migrations
let isInitialized = false;
import { logger } from "../main/logger";
export async function initializeDatabase() {
if (isInitialized) {
return;
@ -35,10 +37,10 @@ export async function initializeDatabase() {
migrationsPath = path.join(process.resourcesPath, "migrations");
}
console.log("Attempting to run migrations from:", migrationsPath);
console.log("__dirname:", __dirname);
console.log("process.cwd():", process.cwd());
console.log("isDev:", isDev);
logger.db.debug("Attempting to run migrations from:", migrationsPath);
logger.db.debug("__dirname:", __dirname);
logger.db.debug("process.cwd():", process.cwd());
logger.db.debug("isDev:", isDev);
// Check if the migrations path exists
if (!fs.existsSync(migrationsPath)) {
@ -55,11 +57,13 @@ export async function initializeDatabase() {
migrationsFolder: migrationsPath,
});
console.log("Database initialized and migrations completed successfully");
logger.db.info(
"Database initialized and migrations completed successfully",
);
isInitialized = true;
} catch (error) {
console.error("FATAL: Error initializing database:", error);
console.error(
logger.db.error("FATAL: Error initializing database:", error);
logger.db.error(
"Application cannot continue without a working database. Exiting...",
);

View file

@ -75,7 +75,7 @@ export async function deleteDownloadedModel(id: string) {
return result[0] || null;
}
// Get downloaded models as a record (for backward compatibility)
// Get downloaded models as a record
export async function getDownloadedModelsRecord(): Promise<
Record<string, DownloadedModel>
> {

View file

@ -1,13 +1,14 @@
import { migrate } from "drizzle-orm/libsql/migrator";
import { db } from "./config";
import { logger } from "../main/logger";
export async function runMigrations() {
try {
// Run migrations
await migrate(db, { migrationsFolder: "./src/db/migrations" });
console.log("Migrations completed successfully");
logger.db.info("Migrations completed successfully");
} catch (error) {
console.error("Error running migrations:", error);
logger.db.error("Error running migrations:", error);
throw error;
}
}

View file

@ -1,3 +1,5 @@
// AudioWorklet processor source code
export const audioRecorderWorkletSource = `
// AudioWorklet processor for real-time audio capture
// This runs in the audio rendering thread for low-latency processing
/* eslint-env worker */
@ -60,3 +62,4 @@ class AudioRecorderProcessor extends AudioWorkletProcessor {
// Register the processor
registerProcessor('audio-recorder-processor', AudioRecorderProcessor);
`;

View file

@ -1,6 +1,7 @@
import { useState, useEffect, useRef, useCallback } from "react";
import { MicVAD } from "@ricky0123/vad-web";
import { Mutex } from "async-mutex";
import { audioRecorderWorkletSource } from "./audio-recorder-worklet";
export interface UseRecordingParams {
onAudioChunk: (
@ -72,19 +73,6 @@ export const useRecording = ({
"Hook: Internal: Stopping recording and sending final chunk...",
);
// Send final audio chunk before cleanup
try {
// Access the sendAudioChunk function from the current recording session
// We need to store this reference when starting recording
const sendFinalChunk = (window as any).currentSendAudioChunk;
if (sendFinalChunk) {
await sendFinalChunk(true); // Send final chunk
console.log("Hook: Final audio chunk sent.");
}
} catch (error) {
console.error("Hook: Error sending final audio chunk:", error);
}
// Cleanup all resources
cleanupMediaResources(vadRef.current, streamRef.current);
@ -148,8 +136,13 @@ export const useRecording = ({
let chunkTimer: NodeJS.Timeout | null = null;
let pendingAudioChunks: Float32Array[] = [];
// Load AudioWorklet module
await audioContext.audioWorklet.addModule("/audio-recorder-worklet.js");
// Load AudioWorklet module using blob URL
const blob = new Blob([audioRecorderWorkletSource], {
type: "application/javascript",
});
const audioWorkletUrl = URL.createObjectURL(blob);
await audioContext.audioWorklet.addModule(audioWorkletUrl);
URL.revokeObjectURL(audioWorkletUrl); // Clean up blob URL
console.log("Hook: AudioWorklet module loaded successfully");
source = audioContext.createMediaStreamSource(localStream);

View file

@ -0,0 +1,194 @@
import {
app,
systemPreferences,
BrowserWindow,
globalShortcut,
} from "electron";
import { initializeDatabase } from "../../db/config";
import { logger } from "../logger";
import { WindowManager } from "./window-manager";
import { setupApplicationMenu } from "../menu";
import { ServiceManager } from "../managers/service-manager";
import { createIPCHandler } from "electron-trpc-experimental/main";
import { router } from "../../trpc/router";
import { EventHandlers } from "./event-handlers";
export class AppManager {
private windowManager: WindowManager;
private serviceManager: ServiceManager;
constructor() {
this.windowManager = new WindowManager();
this.serviceManager = ServiceManager.createInstance();
this.windowManager.setMainWindowCreatedCallback(
this.onMainWindowCreated.bind(this),
);
}
async initialize(): Promise<void> {
try {
await this.initializeDatabase();
await this.requestPermissions();
await this.serviceManager.initialize(this.windowManager);
this.exposeGlobalServices();
await this.setupWindows();
await this.setupMenu();
// Setup event handlers
const eventHandlers = new EventHandlers(this);
eventHandlers.setupEventHandlers();
// Schedule auto-update check after startup
this.scheduleAutoUpdateCheck();
logger.main.info("Application initialized successfully");
} catch (error) {
logger.main.error("Error initializing app:", error);
throw error;
}
}
private async initializeDatabase(): Promise<void> {
await initializeDatabase();
logger.db.info(
"Database initialized and migrations completed successfully",
);
}
private async requestPermissions(): Promise<void> {
if (process.platform === "darwin") {
const accessibilityEnabled =
systemPreferences.isTrustedAccessibilityClient(false);
if (!accessibilityEnabled) {
logger.main.debug(
"Please enable accessibility permissions in System Preferences > Security & Privacy > Privacy > Accessibility",
);
}
}
const microphoneEnabled =
systemPreferences.getMediaAccessStatus("microphone");
logger.main.info("Microphone access status:", {
status: microphoneEnabled,
});
if (microphoneEnabled !== "granted") {
await systemPreferences.askForMediaAccess("microphone");
}
}
private async setupWindows(): Promise<void> {
this.windowManager.createWidgetWindow();
this.setupTRPCHandler();
if (process.platform === "darwin" && app.dock) {
app.dock.show();
}
}
private setupTRPCHandler(): Promise<void> {
const windows = this.windowManager
.getAllWindows()
.filter((w): w is BrowserWindow => w !== null);
createIPCHandler({ router, windows });
return Promise.resolve();
}
updateTRPCHandler(): void {
const windows = this.windowManager
.getAllWindows()
.filter((w): w is BrowserWindow => w !== null);
createIPCHandler({ router, windows });
}
private async setupMenu(): Promise<void> {
setupApplicationMenu(
() => this.windowManager.createOrShowMainWindow(),
() => {
const autoUpdaterService = this.serviceManager.getAutoUpdaterService();
if (autoUpdaterService) {
autoUpdaterService.checkForUpdates(true);
}
},
() => this.windowManager.openAllDevTools(),
);
}
private exposeGlobalServices(): void {
// Make services available globally for tRPC (temporary solution)
const transcriptionService = this.serviceManager.getTranscriptionService();
const autoUpdaterService = this.serviceManager.getAutoUpdaterService();
const settingsService = this.serviceManager.getSettingsService();
const swiftBridge = this.serviceManager.getSwiftIOBridge();
(globalThis as any).modelManagerService =
this.serviceManager.getModelManagerService();
(globalThis as any).transcriptionService = transcriptionService;
(globalThis as any).settingsService = settingsService;
(globalThis as any).logger = logger;
(globalThis as any).autoUpdaterService = autoUpdaterService;
(globalThis as any).swiftBridge = swiftBridge;
}
getWindowManager(): WindowManager {
return this.windowManager;
}
getServiceManager(): ServiceManager {
return this.serviceManager;
}
getTranscriptionService(): any {
return this.serviceManager.getTranscriptionService();
}
getSwiftIOBridge(): any {
return this.serviceManager.getSwiftIOBridge();
}
getAutoUpdaterService(): any {
return this.serviceManager.getAutoUpdaterService();
}
private scheduleAutoUpdateCheck(): void {
// Check for updates on startup (after a brief delay)
setTimeout(() => {
try {
const autoUpdaterService = this.serviceManager.getAutoUpdaterService();
autoUpdaterService.checkForUpdatesAndNotify();
} catch (error) {
logger.main.warn("Auto-update check failed during startup", {
error: error instanceof Error ? error.message : String(error),
});
}
}, 5000); // Wait 5 seconds after startup
}
private onMainWindowCreated(window: BrowserWindow): void {
this.updateTRPCHandler();
}
async cleanup(): Promise<void> {
globalShortcut.unregisterAll();
await this.serviceManager.cleanup();
if (this.windowManager) {
this.windowManager.cleanup();
}
}
handleActivate(): void {
const allWindows = this.windowManager.getAllWindows();
if (allWindows.every((w) => !w || w.isDestroyed())) {
this.windowManager.createWidgetWindow();
} else {
const widgetWindow = this.windowManager.getWidgetWindow();
if (!widgetWindow || widgetWindow.isDestroyed()) {
this.windowManager.createWidgetWindow();
} else {
widgetWindow.show();
}
this.windowManager.createOrShowMainWindow();
}
}
}

View file

@ -0,0 +1,63 @@
import { HelperEvent } from "@amical/types";
import { AppManager } from "./app-manager";
import { logger } from "../logger";
export class EventHandlers {
private appManager: AppManager;
constructor(appManager: AppManager) {
this.appManager = appManager;
}
setupEventHandlers(): void {
this.setupSwiftBridgeEventHandlers();
// Note: Audio IPC handlers are now managed by RecordingService
}
private setupSwiftBridgeEventHandlers(): void {
try {
const swiftBridge = this.appManager.getSwiftIOBridge();
const windowManager = this.appManager.getWindowManager();
swiftBridge.on("helperEvent", (event: HelperEvent) => {
logger.swift.debug("Received helperEvent from SwiftIOBridge", {
event,
});
switch (event.type) {
case "flagsChanged": {
const payload = event.payload;
if (payload?.fnKeyPressed !== undefined) {
logger.swift.info("Setting recording state", {
state: payload.fnKeyPressed,
});
const widgetWindow = windowManager.getWidgetWindow();
if (widgetWindow) {
widgetWindow.webContents.send(
"recording-state-changed",
payload.fnKeyPressed,
);
}
}
break;
}
case "keyDown":
case "keyUp":
break;
default:
break;
}
});
swiftBridge.on("error", (error: Error) => {
logger.main.error("SwiftIOBridge error:", error);
});
swiftBridge.on("close", (code: number | null) => {
logger.swift.warn("Swift helper process closed", { code });
});
} catch (error) {
logger.main.warn("Swift bridge not available for event handlers");
}
}
}

View file

@ -0,0 +1,195 @@
import { BrowserWindow, screen, systemPreferences } from "electron";
import path from "node:path";
import { logger } from "../logger";
declare const MAIN_WINDOW_VITE_DEV_SERVER_URL: string;
declare const MAIN_WINDOW_VITE_NAME: string;
declare const WIDGET_WINDOW_VITE_NAME: string;
export class WindowManager {
private mainWindow: BrowserWindow | null = null;
private widgetWindow: BrowserWindow | null = null;
private currentWindowDisplayId: number | null = null;
private activeSpaceChangeSubscriptionId: number | null = null;
private onMainWindowCreated?: (window: BrowserWindow) => void;
createOrShowMainWindow(): void {
if (this.mainWindow && !this.mainWindow.isDestroyed()) {
this.mainWindow.show();
this.mainWindow.focus();
return;
}
this.mainWindow = new BrowserWindow({
width: 1200,
height: 800,
frame: false,
titleBarStyle: "hidden",
trafficLightPosition: { x: 20, y: 16 },
useContentSize: true,
webPreferences: {
preload: path.join(__dirname, "preload.js"),
nodeIntegration: false,
contextIsolation: true,
},
});
if (MAIN_WINDOW_VITE_DEV_SERVER_URL) {
this.mainWindow.loadURL(MAIN_WINDOW_VITE_DEV_SERVER_URL);
} else {
this.mainWindow.loadFile(
path.join(__dirname, `../renderer/${MAIN_WINDOW_VITE_NAME}/index.html`),
);
}
this.mainWindow.on("closed", () => {
this.mainWindow = null;
});
if (this.onMainWindowCreated) {
this.onMainWindowCreated(this.mainWindow);
}
}
createWidgetWindow(): void {
const mainScreen = screen.getPrimaryDisplay();
const { width, height } = mainScreen.workAreaSize;
this.widgetWindow = new BrowserWindow({
width,
height,
frame: false,
transparent: true,
alwaysOnTop: true,
resizable: false,
maximizable: false,
skipTaskbar: true,
focusable: false,
hasShadow: false,
webPreferences: {
preload: path.join(__dirname, "preload.js"),
nodeIntegration: false,
contextIsolation: true,
},
});
this.currentWindowDisplayId = mainScreen.id;
this.widgetWindow.setIgnoreMouseEvents(true, { forward: true });
if (MAIN_WINDOW_VITE_DEV_SERVER_URL) {
const devUrl = new URL(MAIN_WINDOW_VITE_DEV_SERVER_URL);
devUrl.pathname = "widget.html";
this.widgetWindow.loadURL(devUrl.toString());
} else {
this.widgetWindow.loadFile(
path.join(
__dirname,
`../renderer/${WIDGET_WINDOW_VITE_NAME}/widget.html`,
),
);
}
if (process.platform === "darwin") {
this.widgetWindow.setAlwaysOnTop(true, "floating", 1);
this.widgetWindow.setVisibleOnAllWorkspaces(true, {
visibleOnFullScreen: true,
});
this.widgetWindow.setHiddenInMissionControl(true);
this.setupDisplayChangeNotifications();
}
}
private setupDisplayChangeNotifications(): void {
if (process.platform !== "darwin") return;
try {
this.activeSpaceChangeSubscriptionId =
systemPreferences.subscribeWorkspaceNotification(
"NSWorkspaceActiveDisplayDidChangeNotification",
() => {
if (this.widgetWindow && !this.widgetWindow.isDestroyed()) {
try {
const cursorPoint = screen.getCursorScreenPoint();
const displayForCursor =
screen.getDisplayNearestPoint(cursorPoint);
if (this.currentWindowDisplayId !== displayForCursor.id) {
logger.main.info("Moving floating window to display", {
displayId: displayForCursor.id,
});
this.widgetWindow.setBounds(displayForCursor.workArea);
this.currentWindowDisplayId = displayForCursor.id;
}
} catch (error) {
logger.main.warn("Error handling display change:", error);
}
}
},
);
if (
this.activeSpaceChangeSubscriptionId !== undefined &&
this.activeSpaceChangeSubscriptionId >= 0
) {
logger.main.info(
"Successfully subscribed to display change notifications",
);
} else {
logger.main.error(
"Failed to subscribe to display change notifications",
);
}
} catch (error) {
logger.main.error(
"Error during subscription to display notifications:",
error,
);
this.activeSpaceChangeSubscriptionId = null;
}
}
getMainWindow(): BrowserWindow | null {
return this.mainWindow;
}
getWidgetWindow(): BrowserWindow | null {
return this.widgetWindow;
}
getAllWindows(): (BrowserWindow | null)[] {
return [this.mainWindow, this.widgetWindow];
}
setMainWindowCreatedCallback(
callback: (window: BrowserWindow) => void,
): void {
this.onMainWindowCreated = callback;
}
openAllDevTools(): void {
const windows = this.getAllWindows().filter(
(window): window is BrowserWindow =>
window !== null && !window.isDestroyed(),
);
windows.forEach((window) => {
if (window.webContents && !window.webContents.isDevToolsOpened()) {
window.webContents.openDevTools();
}
});
logger.main.info(`Opened dev tools for ${windows.length} windows`);
}
cleanup(): void {
if (
process.platform === "darwin" &&
this.activeSpaceChangeSubscriptionId !== null
) {
systemPreferences.unsubscribeWorkspaceNotification(
this.activeSpaceChangeSubscriptionId,
);
logger.main.info("Unsubscribed from display change notifications");
this.activeSpaceChangeSubscriptionId = null;
}
}
}

View file

@ -4,6 +4,7 @@ dotenv.config();
import log from "electron-log";
import { app } from "electron";
import path from "node:path";
import colors from "ansi-colors";
// Configure electron-log immediately when module is imported
const isDev = process.env.NODE_ENV === "development" || !app.isPackaged;
@ -35,9 +36,53 @@ log.transports.file.resolvePathFn = () => logPath;
// Configure console transport for better development experience
if (isDev) {
log.transports.console.format =
"[{y}-{m}-{d} {h}:{i}:{s}.{ms}] [{level}] [{scope}] {text}";
log.transports.console.useStyles = true;
// Color functions for different scopes using ansi-colors
const scopeColorFunctions: Record<string, (text: string) => string> = {
main: colors.blue.bold,
audio: colors.green.bold,
transcription: colors.magenta.bold,
swift: colors.yellow.bold,
pipeline: colors.red.bold,
widget: colors.cyan.bold,
mainWindow: colors.greenBright.bold,
renderer: colors.gray.bold,
ui: colors.magentaBright.bold,
db: colors.cyanBright.bold,
network: colors.yellowBright.bold,
updater: colors.blueBright.bold,
ipc: colors.green.bold,
default: colors.gray.bold,
};
// Color functions for different log levels
const levelColorFunctions: Record<string, (text: string) => string> = {
error: colors.red.bold,
warn: colors.yellow.bold,
info: colors.blue,
verbose: colors.cyan,
debug: colors.gray,
silly: colors.magenta,
};
// Override console transport with custom colored output
log.transports.console.format = "{text}"; // Minimal formatting - just pass through the text
log.transports.console.writeFn = (info) => {
const { message } = info;
const scope = message.scope || "default";
const level = message.level;
// Get color functions
const scopeColorFn =
scopeColorFunctions[scope] || scopeColorFunctions.default;
const levelColorFn = levelColorFunctions[level] || ((text: string) => text);
const timestamp = `${message.date.getFullYear()}-${String(message.date.getMonth() + 1).padStart(2, "0")}-${String(message.date.getDate()).padStart(2, "0")} ${String(message.date.getHours()).padStart(2, "0")}:${String(message.date.getMinutes()).padStart(2, "0")}:${String(message.date.getSeconds()).padStart(2, "0")}.${String(message.date.getMilliseconds()).padStart(3, "0")}`;
// Let console.log handle message serialization naturally
const prefix = `${colors.dim(`[${timestamp}]`)} ${levelColorFn(`[${level}]`)} ${scopeColorFn(`[${scope}]`)}`;
console.log(prefix, ...message.data);
};
} else {
log.transports.console.format =
"[{y}-{m}-{d} {h}:{i}:{s}.{ms}] [{level}] [{scope}] {text}";
@ -115,11 +160,14 @@ export const logger = {
renderer: createLoggerForScope("renderer"),
network: createLoggerForScope("network"),
audio: createLoggerForScope("audio"),
ai: createLoggerForScope("ai"),
pipeline: createLoggerForScope("pipeline"),
swift: createLoggerForScope("swift"),
ui: createLoggerForScope("ui"),
db: createLoggerForScope("db"),
updater: createLoggerForScope("updater"),
transcription: createLoggerForScope("transcription"),
widget: createLoggerForScope("widget"),
mainWindow: createLoggerForScope("mainWindow"),
};
// Log startup information

View file

@ -1,804 +1,19 @@
// Load .env file FIRST before any other imports
import dotenv from "dotenv";
dotenv.config();
import {
app,
BrowserWindow,
systemPreferences,
globalShortcut,
ipcMain,
screen,
clipboard,
} from "electron";
import path from "node:path";
import fsPromises from "node:fs/promises"; // For reading the audio file (async)
import { app } from "electron";
import started from "electron-squirrel-startup";
import { initializeDatabase } from "../db/config";
import { HelperEvent, KeyEventPayload } from "@amical/types";
import { logger, logError, logPerformance } from "./logger";
import { AudioCapture } from "../modules/audio/audio-capture";
import { setupApplicationMenu } from "./menu";
import { AiService } from "../modules/ai/ai-service";
import { SwiftIOBridge } from "./swift-io-bridge"; // Added import
import { DownloadedModel } from "../constants/models";
import { ModelManagerService } from "../modules/models/model-manager";
import { LocalWhisperClient } from "../modules/ai/local-whisper-client";
import {
TranscriptionSession,
ChunkData,
} from "../modules/transcription/transcription-session";
import { ContextualTranscriptionManager } from "../modules/transcription/contextual-transcription-manager";
import { SettingsService } from "../modules/settings";
import { createIPCHandler } from "electron-trpc-experimental/main";
import { router } from "../trpc/router";
import { AutoUpdaterService } from "./services/auto-updater";
import { AppManager } from "./core/app-manager";
// Handle creating/removing shortcuts on Windows when installing/uninstalling.
if (started) {
app.quit();
}
declare const MAIN_WINDOW_VITE_DEV_SERVER_URL: string;
declare const MAIN_WINDOW_VITE_NAME: string;
declare const WIDGET_WINDOW_VITE_NAME: string;
const appManager = new AppManager();
let mainWindow: BrowserWindow | null = null;
let floatingButtonWindow: BrowserWindow | null = null;
let audioCapture: AudioCapture | null = null;
let aiService: AiService | null = null;
let swiftIOBridgeClientInstance: SwiftIOBridge | null = null;
let modelManagerService: ModelManagerService | null = null;
let localWhisperClient: LocalWhisperClient | null = null;
let currentWindowDisplayId: number | null = null; // For tracking current display
let activeSpaceChangeSubscriptionId: number | null = null; // For display change notifications
// New chunk-based transcription variables
let contextualTranscriptionManager: ContextualTranscriptionManager | null =
null;
const activeTranscriptionSessions: Map<string, TranscriptionSession> =
new Map();
let autoUpdaterService: AutoUpdaterService | null = null;
// Store is imported from '../lib/store' and is database-backed
// Function to create the local transcription client
const createTranscriptionClient = () => {
logger.ai.info("Using local Whisper inference");
if (!localWhisperClient) {
throw new Error("Local Whisper client not initialized");
}
return localWhisperClient;
};
// Formatter Configuration - Now handled by tRPC settings router
const requestPermissions = async () => {
try {
// Request accessibility permissions
if (process.platform === "darwin") {
const accessibilityEnabled =
systemPreferences.isTrustedAccessibilityClient(false);
if (!accessibilityEnabled) {
// On macOS, we need to use a different approach for accessibility permissions
// The user will need to grant accessibility permissions through System Preferences
console.log(
"Please enable accessibility permissions in System Preferences > Security & Privacy > Privacy > Accessibility",
);
}
}
// Request microphone permissions
const microphoneEnabled =
systemPreferences.getMediaAccessStatus("microphone");
logger.main.info("Microphone access status:", {
status: microphoneEnabled,
});
if (microphoneEnabled !== "granted") {
await systemPreferences.askForMediaAccess("microphone");
}
} catch (error) {
logError(
error instanceof Error ? error : new Error(String(error)),
"requesting permissions",
);
}
};
const createOrShowMainWindow = () => {
if (mainWindow && !mainWindow.isDestroyed()) {
mainWindow.show();
mainWindow.focus();
return;
}
mainWindow = new BrowserWindow({
width: 1200,
height: 800,
frame: false,
titleBarStyle: "hidden",
trafficLightPosition: { x: 20, y: 16 },
useContentSize: true,
webPreferences: {
preload: path.join(__dirname, "preload.js"),
nodeIntegration: false,
contextIsolation: true,
},
});
if (MAIN_WINDOW_VITE_DEV_SERVER_URL) {
mainWindow.loadURL(MAIN_WINDOW_VITE_DEV_SERVER_URL);
} else {
mainWindow.loadFile(
path.join(__dirname, `../renderer/${MAIN_WINDOW_VITE_NAME}/index.html`),
);
}
mainWindow.on("closed", () => {
mainWindow = null;
if (autoUpdaterService) {
autoUpdaterService.setMainWindow(null);
}
});
// Update tRPC handler to include the main window
createIPCHandler({
router,
windows: [mainWindow, floatingButtonWindow].filter(
Boolean,
) as BrowserWindow[],
});
// Set main window reference for auto-updater
if (autoUpdaterService) {
autoUpdaterService.setMainWindow(mainWindow);
}
};
const createFloatingButtonWindow = () => {
const mainScreen = screen.getPrimaryDisplay();
const { width, height } = mainScreen.workAreaSize;
floatingButtonWindow = new BrowserWindow({
width,
height,
frame: false,
transparent: true,
alwaysOnTop: true,
resizable: false,
maximizable: false,
skipTaskbar: true,
focusable: false,
hasShadow: false,
webPreferences: {
preload: path.join(__dirname, "preload.js"),
nodeIntegration: false,
contextIsolation: true,
},
});
currentWindowDisplayId = mainScreen.id; // Initialize with the primary display's ID
floatingButtonWindow.setIgnoreMouseEvents(true, { forward: true });
if (MAIN_WINDOW_VITE_DEV_SERVER_URL) {
const devUrl = new URL(MAIN_WINDOW_VITE_DEV_SERVER_URL);
devUrl.pathname = "widget.html";
floatingButtonWindow.loadURL(devUrl.toString());
} else {
floatingButtonWindow.loadFile(
path.join(
__dirname,
`../renderer/${WIDGET_WINDOW_VITE_NAME}/widget.html`,
),
);
}
// Set a higher level for macOS to stay on top of fullscreen apps
if (process.platform === "darwin") {
floatingButtonWindow.setAlwaysOnTop(true, "floating", 1);
floatingButtonWindow.setVisibleOnAllWorkspaces(true, {
visibleOnFullScreen: true,
});
floatingButtonWindow.setHiddenInMissionControl(true);
}
// floatingButtonWindow.webContents.openDevTools({ mode: 'detach' }); // For debugging the button
};
// This method will be called when Electron has finished
// initialization and is ready to create browser windows.
// Some APIs can only be used after this event occurs.
app.on("ready", async () => {
// Initialize database and run migrations first
try {
await initializeDatabase();
logger.db.info(
"Database initialized and migrations completed successfully",
);
} catch (error) {
logError(
error instanceof Error ? error : new Error(String(error)),
"initializing database",
);
// You might want to handle this error differently, perhaps showing a dialog to the user
}
await requestPermissions();
createFloatingButtonWindow();
// Setup tRPC IPC handler
createIPCHandler({
router,
windows: [floatingButtonWindow!],
});
if (process.platform === "darwin" && app.dock) {
app.dock.show();
}
audioCapture = new AudioCapture();
// Initialize Model Manager Service
modelManagerService = new ModelManagerService();
await modelManagerService.initialize();
// Initialize Local Whisper Client
localWhisperClient = new LocalWhisperClient(modelManagerService);
// Make services available globally for tRPC
(globalThis as any).modelManagerService = modelManagerService;
(globalThis as any).localWhisperClient = localWhisperClient;
(globalThis as any).aiService = aiService;
(globalThis as any).logger = logger;
// Initialize Contextual Transcription Manager
contextualTranscriptionManager = new ContextualTranscriptionManager(
modelManagerService,
);
// Initialize Auto-Updater Service
autoUpdaterService = new AutoUpdaterService();
// Make auto-updater service available globally for tRPC
(globalThis as any).autoUpdaterService = autoUpdaterService;
// Check for updates on startup (after a brief delay)
setTimeout(() => {
if (autoUpdaterService) {
autoUpdaterService.checkForUpdatesAndNotify();
}
}, 5000); // Wait 5 seconds after startup
// Initialize AI service with the appropriate client based on configuration
try {
const transcriptionClient = createTranscriptionClient();
aiService = new AiService(transcriptionClient);
// Load and configure formatter
try {
const settingsService = SettingsService.getInstance();
const formatterConfig = await settingsService.getFormatterConfig();
if (formatterConfig) {
aiService.configureFormatter(formatterConfig);
logger.ai.info("Formatter configured", {
provider: formatterConfig.provider,
enabled: formatterConfig.enabled,
});
}
} catch (formatterError) {
logger.ai.warn("Failed to load formatter configuration:", formatterError);
}
logger.ai.info("AI Service initialized", {
client: "Local Whisper",
});
} catch (error) {
logError(
error instanceof Error ? error : new Error(String(error)),
"initializing AI Service",
);
logger.ai.warn("Transcription will not work until configuration is fixed");
aiService = null;
}
audioCapture.on("recording-finished", async (filePath: string) => {
// Ensure AI service is available and up-to-date
if (!aiService) {
try {
const transcriptionClient = createTranscriptionClient();
aiService = new AiService(transcriptionClient);
// Load and configure formatter
try {
const settingsService = SettingsService.getInstance();
const formatterConfig = await settingsService.getFormatterConfig();
if (formatterConfig) {
aiService.configureFormatter(formatterConfig);
logger.ai.info("Formatter reconfigured", {
provider: formatterConfig.provider,
enabled: formatterConfig.enabled,
});
}
} catch (formatterError) {
logger.ai.warn(
"Failed to reload formatter configuration:",
formatterError,
);
}
logger.ai.info("AI Service reinitialized", {
client: "Local Whisper",
});
} catch (error) {
logError(
error instanceof Error ? error : new Error(String(error)),
"reinitializing AI Service",
);
}
}
logger.audio.info("Recording finished", { filePath });
if (aiService) {
try {
const startTime = Date.now();
const audioBuffer = await fsPromises.readFile(filePath);
logger.audio.info("Audio file read", {
size: audioBuffer.length,
sizeKB: Math.round(audioBuffer.length / 1024),
});
const transcription = await aiService.transcribeAudio(audioBuffer);
logPerformance("audio transcription", startTime, {
audioSizeKB: Math.round(audioBuffer.length / 1024),
transcriptionLength: transcription?.length || 0,
});
logger.ai.info("Transcription completed", {
resultLength: transcription?.length || 0,
hasResult: !!transcription,
});
// Save transcription to database
if (
transcription &&
typeof transcription === "string" &&
transcription.trim().length > 0
) {
try {
const { createTranscription } = await import(
"../db/transcriptions.js"
);
const savedTranscription = await createTranscription({
text: transcription,
timestamp: new Date(),
audioFile: filePath,
language: "en", // Default to English, could be made configurable
});
logger.db.info("Transcription saved to database", {
transcriptionId: savedTranscription.id,
textLength: transcription.length,
audioFile: filePath,
});
} catch (dbError) {
logError(
dbError instanceof Error ? dbError : new Error(String(dbError)),
"saving transcription to database",
);
}
}
// Copy transcription to clipboard
if (transcription && typeof transcription === "string") {
logger.main.info("Transcription pasted to active application");
// Attempt to paste into the active application
swiftIOBridgeClientInstance!.call("pasteText", {
transcript: transcription,
});
} else {
logger.main.warn(
"Transcription result was empty or not a string, not copying",
);
}
// Optionally, delete the audio file after processing
// await fs.unlink(filePath);
// console.log(`Main: Deleted audio file: ${filePath}`);
} catch (error) {
logError(
error instanceof Error ? error : new Error(String(error)),
"transcription or file handling",
);
}
} else {
logger.ai.warn("AI Service not available, cannot transcribe audio");
}
});
audioCapture.on("recording-error", (error: Error) => {
console.error("Main: Received recording error from AudioCapture:", error);
});
// Handle individual audio chunks for real-time transcription
audioCapture.on("chunk-ready", async (chunkData: ChunkData) => {
logger.audio.info("Received chunk for transcription", {
sessionId: chunkData.sessionId,
chunkId: chunkData.chunkId,
audioDataSize: chunkData.audioData.length,
isFinalChunk: chunkData.isFinalChunk,
});
try {
// Get or create transcription session for this recording session
let transcriptionSession = activeTranscriptionSessions.get(
chunkData.sessionId,
);
if (!transcriptionSession) {
// Create new transcription session
const transcriptionClient =
contextualTranscriptionManager!.createDefaultClient();
transcriptionSession = new TranscriptionSession(
chunkData.sessionId,
transcriptionClient,
);
activeTranscriptionSessions.set(
chunkData.sessionId,
transcriptionSession,
);
// Set up session event handlers
transcriptionSession.on("chunk-completed", (result) => {
logger.ai.info("Chunk transcription completed", {
sessionId: chunkData.sessionId,
chunkId: result.chunkId,
textLength: result.text.length,
processingTimeMs: result.processingTimeMs,
});
});
transcriptionSession.on("session-completed", async (sessionResult) => {
logger.ai.info("Transcription session completed", {
sessionId: sessionResult.sessionId,
finalTextLength: sessionResult.finalText.length,
totalChunks: sessionResult.chunkResults.length,
totalProcessingTimeMs: sessionResult.totalProcessingTimeMs,
});
// Save chunk-based transcription to database
if (
sessionResult.finalText &&
sessionResult.finalText.trim().length > 0
) {
try {
const { createTranscription } = await import(
"../db/transcriptions.js"
);
const savedTranscription = await createTranscription({
text: sessionResult.finalText,
timestamp: new Date(),
audioFile: null, // Chunk-based transcriptions don't have a single audio file
language: "en", // Default to English, could be made configurable
});
logger.db.info("Chunk-based transcription saved to database", {
transcriptionId: savedTranscription.id,
sessionId: sessionResult.sessionId,
textLength: sessionResult.finalText.length,
totalChunks: sessionResult.chunkResults.length,
});
} catch (dbError) {
logError(
dbError instanceof Error ? dbError : new Error(String(dbError)),
"saving chunk-based transcription to database",
);
}
// Paste the final result to active application
logger.main.info(
"Final transcription pasted to active application",
{
textLength: sessionResult.finalText.length,
sessionId: sessionResult.sessionId,
},
);
swiftIOBridgeClientInstance!.call("pasteText", {
transcript: sessionResult.finalText,
});
} else {
logger.main.warn("Final transcription was empty, not pasting");
}
// Clean up completed session
activeTranscriptionSessions.delete(chunkData.sessionId);
});
transcriptionSession.on("chunk-error", (errorInfo) => {
logger.ai.error("Chunk transcription error", {
sessionId: chunkData.sessionId,
chunkId: errorInfo.chunkId,
error: errorInfo.error,
});
// Continue processing other chunks even if one fails
});
logger.ai.info("Created new transcription session", {
sessionId: chunkData.sessionId,
});
}
// Add chunk to session for processing
transcriptionSession.addChunk(chunkData);
} catch (error) {
logger.ai.error("Error handling chunk-ready event", {
sessionId: chunkData.sessionId,
chunkId: chunkData.chunkId,
error: error instanceof Error ? error.message : String(error),
});
}
});
// Handle audio data chunks from renderer
ipcMain.handle(
"audio-data-chunk",
(event, chunk: ArrayBuffer, isFinalChunk: boolean) => {
if (chunk instanceof ArrayBuffer) {
console.log(
`Main: IPC received audio-data-chunk (ArrayBuffer) of size: ${chunk.byteLength} bytes. isFinalChunk: ${isFinalChunk}`,
);
const buffer = Buffer.from(chunk);
if (buffer.length === 0) {
console.warn("Main: Received an empty audio chunk after conversion.");
}
// The AudioCapture class will now need to handle buffering and the isFinalChunk flag
audioCapture?.handleAudioChunk(buffer, isFinalChunk);
} else {
console.error(
"Main: Received audio chunk, but it is not an ArrayBuffer. Type:",
typeof chunk,
);
throw new Error("Invalid audio chunk type received.");
}
},
);
ipcMain.handle("recording-starting", async () => {
console.log("Main: Received recording-starting event.");
// Preload the transcription model for fast processing
try {
if (contextualTranscriptionManager) {
if (!contextualTranscriptionManager.isModelLoaded()) {
logger.ai.info(
"Preloading transcription model for recording session",
);
await contextualTranscriptionManager.preloadModel();
logger.ai.info("Transcription model preloaded successfully");
} else {
logger.ai.info("Transcription model already loaded");
}
}
} catch (error) {
logger.ai.error("Error preloading transcription model", {
error: error instanceof Error ? error.message : String(error),
});
}
// Get accessibility context when recording starts
try {
//const accessibilityContext = await swiftIOBridgeClientInstance!.call('getAccessibilityContext', { editableOnly: true });
//console.log('Main: Accessibility context captured:', JSON.stringify(accessibilityContext, null, 2));
} catch (error) {
console.error("Main: Error getting accessibility context:", error);
}
await swiftIOBridgeClientInstance!.call("muteSystemAudio", {});
});
ipcMain.handle("recording-stopping", async () => {
console.log("Main: Received recording-stopping event.");
await swiftIOBridgeClientInstance!.call("restoreSystemAudio", {});
});
// Initialize the SwiftIOBridgeClient
swiftIOBridgeClientInstance = new SwiftIOBridge();
swiftIOBridgeClientInstance.on("helperEvent", (event: HelperEvent) => {
logger.swift.debug("Received helperEvent from SwiftIOBridge", { event });
switch (event.type) {
case "flagsChanged": {
const payload = event.payload;
logger.swift.debug("Received flagsChanged event", {
fnKeyPressed: payload?.fnKeyPressed,
});
// Use flagsChanged for more reliable Fn key state tracking
if (payload?.fnKeyPressed !== undefined) {
logger.swift.info("Setting recording state", {
state: payload.fnKeyPressed,
});
floatingButtonWindow!.webContents.send(
"recording-state-changed",
payload.fnKeyPressed,
);
}
break;
}
case "keyDown": {
const payload = event.payload;
// console.log(`Main: Received keyDown for key: ${payload?.key}.`);
// Keep keyDown handling as fallback, but flagsChanged should be primary
if (payload?.key?.toLowerCase() === "fn") {
// console.log('Main: Fn keyDown detected (fallback)');
// Don't send recording-state-changed here as flagsChanged should handle it
}
break;
}
case "keyUp": {
const payload = event.payload;
// console.log(`Main: Received keyUp for key: ${payload?.key}.`);
// Keep keyUp handling as fallback, but flagsChanged should be primary
if (payload?.key?.toLowerCase() === "fn") {
// console.log('Main: Fn keyUp detected (fallback)');
// Don't send recording-state-changed here as flagsChanged should handle it
}
break;
}
default:
// Optionally log or handle other event types if necessary
// console.log('Main: Unhandled helperEvent type:', (event as any).type);
break;
}
});
swiftIOBridgeClientInstance.on("error", (error) => {
logError(
error instanceof Error ? error : new Error(String(error)),
"SwiftIOBridge error",
);
// Potentially notify the user or attempt to restart
});
swiftIOBridgeClientInstance.on("close", (code) => {
logger.swift.warn("Swift helper process closed", { code });
// Handle unexpected close, maybe attempt restart
});
setupApplicationMenu(createOrShowMainWindow, () => {
if (autoUpdaterService) {
autoUpdaterService.checkForUpdates(true);
}
});
if (process.platform === "darwin") {
try {
console.log("Main: Setting up display change notifications");
activeSpaceChangeSubscriptionId =
systemPreferences.subscribeWorkspaceNotification(
"NSWorkspaceActiveDisplayDidChangeNotification",
() => {
if (floatingButtonWindow && !floatingButtonWindow.isDestroyed()) {
try {
const cursorPoint = screen.getCursorScreenPoint();
const displayForCursor =
screen.getDisplayNearestPoint(cursorPoint);
if (currentWindowDisplayId !== displayForCursor.id) {
console.log(
`[Main Process] Moving floating window to display ID: ${displayForCursor.id}`,
);
floatingButtonWindow.setBounds(displayForCursor.workArea);
currentWindowDisplayId = displayForCursor.id;
}
} catch (error) {
console.warn(
"[Main Process] Error handling display change:",
error,
);
}
}
},
);
if (
activeSpaceChangeSubscriptionId !== undefined &&
activeSpaceChangeSubscriptionId >= 0
) {
console.log(
`Main: Successfully subscribed to display change notifications`,
);
} else {
console.error(
"Main: Failed to subscribe to display change notifications",
);
}
} catch (e) {
console.error(
"Main: Error during subscription to display notifications:",
e,
);
activeSpaceChangeSubscriptionId = null;
}
} else {
console.log("Main: Display change tracking is a macOS-only feature");
}
});
// Clean up intervals and subscriptions
app.on("will-quit", () => {
// globalShortcut.unregisterAll();
globalShortcut.unregisterAll();
if (swiftIOBridgeClientInstance) {
console.log("Main: Stopping Swift helper...");
swiftIOBridgeClientInstance.stopHelper();
}
if (modelManagerService) {
console.log("Main: Cleaning up model downloads...");
modelManagerService.cleanup();
}
if (contextualTranscriptionManager) {
console.log("Main: Cleaning up transcription models...");
contextualTranscriptionManager.dispose();
}
if (
process.platform === "darwin" &&
activeSpaceChangeSubscriptionId !== null
) {
systemPreferences.unsubscribeWorkspaceNotification(
activeSpaceChangeSubscriptionId,
);
console.log("Main: Unsubscribed from display change notifications");
activeSpaceChangeSubscriptionId = null;
}
});
// Quit when all windows are closed, except on macOS. There, it's common
// for applications and their menu bar to stay active until the user quits
// explicitly with Cmd + Q.
app.whenReady().then(() => appManager.initialize());
app.on("will-quit", () => appManager.cleanup());
app.on("window-all-closed", () => {
if (process.platform !== "darwin") {
app.quit();
}
if (process.platform !== "darwin") app.quit();
});
app.on("activate", () => {
// On OS X it's common to re-create a window in the app when the
// dock icon is clicked and there are no other windows open.
if (BrowserWindow.getAllWindows().length === 0) {
// If no windows are open, create both FAB and main window
createFloatingButtonWindow();
} else {
// If there are windows, ensure FAB is visible.
if (!floatingButtonWindow || floatingButtonWindow.isDestroyed()) {
createFloatingButtonWindow();
} else {
floatingButtonWindow.show();
}
// Always show/create the main window when dock icon is clicked
createOrShowMainWindow();
}
});
// In this file you can include the rest of your app's specific main process
// code. You can also put them in separate files and import them here.
// Function to log the accessibility tree (added)
async function logAccessibilityTree() {
if (
swiftIOBridgeClientInstance &&
swiftIOBridgeClientInstance.isHelperRunning()
) {
try {
// console.log('Main: Requesting full accessibility tree...');
// Call with empty params for the whole tree, as per schema for GetAccessibilityTreeDetailsParams
const result = await swiftIOBridgeClientInstance.call(
"getAccessibilityTreeDetails",
{},
);
// Using JSON.stringify to see the whole structure since it's 'any' for now
// console.log('Main: Accessibility tree received:', JSON.stringify(result, null, 2));
} catch (error) {
console.error("Main: Error calling getAccessibilityTreeDetails:", error);
}
} else {
console.warn(
"Main: SwiftIOBridge not ready or helper not running, cannot log accessibility tree.",
);
}
}
app.on("activate", () => appManager.handleActivate());

View file

@ -0,0 +1,208 @@
import { logger } from "../logger";
import { ModelManagerService } from "../../services/model-manager";
import { TranscriptionService } from "../../services/transcription-service";
import { SettingsService } from "../../services/settings-service";
import { SwiftIOBridge } from "../../services/platform/swift-bridge-service";
import { AutoUpdaterService } from "../services/auto-updater";
import { WindowManager } from "../core/window-manager";
import { RecordingService } from "../../services/recording-service";
/**
* Manages service initialization and lifecycle
*/
export class ServiceManager {
private static instance: ServiceManager | null = null;
private isInitialized = false;
private modelManagerService: ModelManagerService | null = null;
private transcriptionService: TranscriptionService | null = null;
private settingsService: SettingsService | null = null;
private swiftIOBridge: SwiftIOBridge | null = null;
private autoUpdaterService: AutoUpdaterService | null = null;
private recordingService: RecordingService | null = null;
async initialize(windowManager: WindowManager): Promise<void> {
if (this.isInitialized) {
logger.main.warn(
"ServiceManager is already initialized, skipping initialization",
);
return;
}
try {
this.initializeSettingsService();
await this.initializeModelServices();
this.initializePlatformServices();
await this.initializeAIServices();
this.initializeRecordingService();
this.initializeAutoUpdater(windowManager);
this.isInitialized = true;
logger.main.info("Services initialized successfully");
} catch (error) {
logger.main.error("Failed to initialize services:", error);
// Don't throw here - allow app to start even if some services fail
}
}
private initializeSettingsService(): void {
this.settingsService = new SettingsService();
logger.main.info("Settings service initialized");
}
private async initializeModelServices(): Promise<void> {
// Initialize Model Manager Service
this.modelManagerService = new ModelManagerService();
await this.modelManagerService.initialize();
}
private async initializeAIServices(): Promise<void> {
try {
if (!this.modelManagerService) {
throw new Error("Model manager service not initialized");
}
this.transcriptionService = new TranscriptionService(
this.modelManagerService,
);
// Load and configure formatter
try {
if (!this.settingsService) {
throw new Error("SettingsService not initialized");
}
const formatterConfig = await this.settingsService.getFormatterConfig();
if (formatterConfig) {
this.transcriptionService.configureFormatter(formatterConfig);
logger.transcription.info("Formatter configured", {
provider: formatterConfig.provider,
enabled: formatterConfig.enabled,
});
}
} catch (formatterError) {
logger.transcription.warn(
"Failed to load formatter configuration:",
formatterError,
);
}
logger.transcription.info("Transcription Service initialized", {
client: "Pipeline with Whisper",
});
} catch (error) {
logger.transcription.error(
"Error initializing Transcription Service:",
error,
);
logger.transcription.warn(
"Transcription will not work until configuration is fixed",
);
this.transcriptionService = null;
}
}
private initializePlatformServices(): void {
// Initialize Swift bridge for macOS integration
if (process.platform === "darwin") {
this.swiftIOBridge = new SwiftIOBridge();
}
}
private initializeRecordingService(): void {
this.recordingService = new RecordingService(this);
logger.main.info("Recording service initialized");
}
private initializeAutoUpdater(windowManager: WindowManager): void {
this.autoUpdaterService = new AutoUpdaterService(windowManager);
}
// Getters for other managers to access services
getModelManagerService(): ModelManagerService {
if (!this.isInitialized) {
throw new Error(
"ServiceManager not initialized. Call initialize() first.",
);
}
if (!this.modelManagerService) {
throw new Error("ModelManagerService failed to initialize");
}
return this.modelManagerService;
}
getTranscriptionService(): TranscriptionService {
if (!this.isInitialized) {
throw new Error(
"ServiceManager not initialized. Call initialize() first.",
);
}
if (!this.transcriptionService) {
throw new Error("TranscriptionService failed to initialize");
}
return this.transcriptionService;
}
getSettingsService(): SettingsService {
if (!this.isInitialized) {
throw new Error(
"ServiceManager not initialized. Call initialize() first.",
);
}
if (!this.settingsService) {
throw new Error("SettingsService failed to initialize");
}
return this.settingsService;
}
getSwiftIOBridge(): SwiftIOBridge {
if (!this.isInitialized) {
throw new Error(
"ServiceManager not initialized. Call initialize() first.",
);
}
if (!this.swiftIOBridge) {
throw new Error("SwiftIOBridge not available on this platform");
}
return this.swiftIOBridge;
}
getAutoUpdaterService(): AutoUpdaterService {
if (!this.isInitialized) {
throw new Error(
"ServiceManager not initialized. Call initialize() first.",
);
}
if (!this.autoUpdaterService) {
throw new Error("AutoUpdaterService failed to initialize");
}
return this.autoUpdaterService;
}
async cleanup(): Promise<void> {
if (this.recordingService) {
logger.main.info("Cleaning up recording service...");
await this.recordingService.cleanup();
}
if (this.modelManagerService) {
logger.main.info("Cleaning up model downloads...");
this.modelManagerService.cleanup();
}
if (this.swiftIOBridge) {
logger.main.info("Stopping Swift helper...");
this.swiftIOBridge.stopHelper();
}
}
static getInstance(): ServiceManager | null {
return ServiceManager.instance;
}
static createInstance(): ServiceManager {
if (!ServiceManager.instance) {
ServiceManager.instance = new ServiceManager();
}
return ServiceManager.instance;
}
}

View file

@ -6,6 +6,7 @@ import { app, Menu, MenuItemConstructorOptions, BrowserWindow } from "electron";
export const setupApplicationMenu = (
createOrShowSettingsWindow: () => void,
checkForUpdates?: () => void,
openAllDevTools?: () => void,
) => {
const menuTemplate: MenuItemConstructorOptions[] = [
// { role: 'appMenu' } for macOS
@ -97,6 +98,15 @@ export const setupApplicationMenu = (
{ role: "reload" as const },
{ role: "forceReload" as const },
{ role: "toggleDevTools" as const },
...(openAllDevTools
? [
{
label: "Open All Dev Tools",
accelerator: "CmdOrCtrl+Shift+I",
click: () => openAllDevTools(),
} as MenuItemConstructorOptions,
]
: []),
{ type: "separator" as const },
{ role: "resetZoom" as const },
{ role: "zoomIn" as const },

View file

@ -2,11 +2,9 @@
// https://www.electronjs.org/docs/latest/tutorial/process-model#preload-scripts
import { contextBridge, ipcRenderer, IpcRendererEvent } from "electron";
import log from "electron-log/renderer";
import { exposeElectronTRPC } from "electron-trpc-experimental/preload";
import type { ElectronAPI } from "../types/electron-api";
import type { FormatterConfig } from "../modules/formatter";
import type { Transcription, NewTranscription } from "../db/schema";
import type { FormatterConfig } from "../types/formatter";
interface ShortcutData {
shortcut: string;
@ -94,7 +92,6 @@ const api: ElectronAPI = {
// Transcription Database API (moved to tRPC)
// Vocabulary Database API
on: (channel: string, callback: (...args: any[]) => void) => {
const handler = (_event: IpcRendererEvent, ...args: any[]) =>
callback(...args);
@ -125,13 +122,26 @@ const api: ElectronAPI = {
}
},
// Logging API for renderer process
// Logging API for renderer process - sends to main process via IPC
log: {
info: (...args: any[]) => log.info(...args),
warn: (...args: any[]) => log.warn(...args),
error: (...args: any[]) => log.error(...args),
debug: (...args: any[]) => log.debug(...args),
scope: (name: string) => log.scope(name),
info: (...args: any[]) =>
ipcRenderer.invoke("log-message", "info", "renderer", ...args),
warn: (...args: any[]) =>
ipcRenderer.invoke("log-message", "warn", "renderer", ...args),
error: (...args: any[]) =>
ipcRenderer.invoke("log-message", "error", "renderer", ...args),
debug: (...args: any[]) =>
ipcRenderer.invoke("log-message", "debug", "renderer", ...args),
scope: (name: string) => ({
info: (...args: any[]) =>
ipcRenderer.invoke("log-message", "info", name, ...args),
warn: (...args: any[]) =>
ipcRenderer.invoke("log-message", "warn", name, ...args),
error: (...args: any[]) =>
ipcRenderer.invoke("log-message", "error", name, ...args),
debug: (...args: any[]) =>
ipcRenderer.invoke("log-message", "debug", name, ...args),
}),
},
};

View file

@ -2,13 +2,13 @@ import { autoUpdater } from "electron-updater";
import { app, dialog, BrowserWindow } from "electron";
import { EventEmitter } from "events";
import { logger } from "../logger";
import { WindowManager } from "../core/window-manager";
export class AutoUpdaterService extends EventEmitter {
private checkingForUpdate = false;
private updateAvailable = false;
private mainWindow: BrowserWindow | null = null;
constructor() {
constructor(private windowManager: WindowManager) {
super();
// Only set up auto-updater in production
@ -19,10 +19,6 @@ export class AutoUpdaterService extends EventEmitter {
}
}
setMainWindow(window: BrowserWindow | null) {
this.mainWindow = window;
}
private setupAutoUpdater() {
// Configure updater
autoUpdater.autoDownload = false; // Don't auto-download, ask user first
@ -62,7 +58,8 @@ export class AutoUpdaterService extends EventEmitter {
this.checkingForUpdate = false;
// Show error dialog only if user manually checked for updates
if (this.mainWindow && !this.mainWindow.isDestroyed()) {
const mainWindow = this.windowManager.getMainWindow();
if (mainWindow && !mainWindow.isDestroyed()) {
dialog.showErrorBox(
"Update Error",
`Error checking for updates: ${err.message}`,
@ -89,11 +86,12 @@ export class AutoUpdaterService extends EventEmitter {
}
private async showUpdateDialog(info: any) {
if (!this.mainWindow || this.mainWindow.isDestroyed()) {
const mainWindow = this.windowManager.getMainWindow();
if (!mainWindow || mainWindow.isDestroyed()) {
return;
}
const result = await dialog.showMessageBox(this.mainWindow, {
const result = await dialog.showMessageBox(mainWindow, {
type: "info",
title: "Update Available",
message: `A new version (${info.version}) is available.`,
@ -113,11 +111,12 @@ export class AutoUpdaterService extends EventEmitter {
}
private async showInstallDialog(info: any) {
if (!this.mainWindow || this.mainWindow.isDestroyed()) {
const mainWindow = this.windowManager.getMainWindow();
if (!mainWindow || mainWindow.isDestroyed()) {
return;
}
const result = await dialog.showMessageBox(this.mainWindow, {
const result = await dialog.showMessageBox(mainWindow, {
type: "info",
title: "Update Ready",
message: `Update ${info.version} has been downloaded.`,
@ -140,13 +139,16 @@ export class AutoUpdaterService extends EventEmitter {
// Skip in development
if (process.env.NODE_ENV === "development" || !app.isPackaged) {
logger.updater.info("Skipping update check in development mode");
if (userInitiated && this.mainWindow && !this.mainWindow.isDestroyed()) {
dialog.showMessageBox(this.mainWindow, {
type: "info",
title: "Development Mode",
message: "Update checking is disabled in development mode.",
buttons: ["OK"],
});
if (userInitiated) {
const mainWindow = this.windowManager.getMainWindow();
if (mainWindow && !mainWindow.isDestroyed()) {
dialog.showMessageBox(mainWindow, {
type: "info",
title: "Development Mode",
message: "Update checking is disabled in development mode.",
buttons: ["OK"],
});
}
}
return;
}
@ -164,11 +166,14 @@ export class AutoUpdaterService extends EventEmitter {
error: error instanceof Error ? error.message : String(error),
});
if (userInitiated && this.mainWindow && !this.mainWindow.isDestroyed()) {
dialog.showErrorBox(
"Update Check Failed",
"Failed to check for updates. Please try again later.",
);
if (userInitiated) {
const mainWindow = this.windowManager.getMainWindow();
if (mainWindow && !mainWindow.isDestroyed()) {
dialog.showErrorBox(
"Update Check Failed",
"Failed to check for updates. Please try again later.",
);
}
}
}
}

View file

@ -1,45 +0,0 @@
import { TranscriptionClient } from "./transcription-client";
import { FormatterService } from "../formatter";
export class AiService {
private transcriptionClient: TranscriptionClient;
private formatterService: FormatterService;
constructor(transcriptionClient: TranscriptionClient) {
this.transcriptionClient = transcriptionClient;
this.formatterService = new FormatterService();
}
async transcribeAudio(audioData: Buffer): Promise<string> {
if (!this.transcriptionClient) {
throw new Error("Transcription client is not initialized.");
}
// Step 1: Transcribe audio
const transcribedText =
await this.transcriptionClient.transcribe(audioData);
// Step 2: Format the transcribed text if formatter is enabled
const formattedText =
await this.formatterService.formatText(transcribedText);
return formattedText;
}
/**
* Set formatter configuration
*/
configureFormatter(config: any): void {
this.formatterService.configure(config);
}
/**
* Get formatter service instance
*/
getFormatterService(): FormatterService {
return this.formatterService;
}
// Future methods for other AI functionalities can be added here
// e.g., text summarization, sentiment analysis, etc.
}

View file

@ -1,195 +0,0 @@
import { TranscriptionClient } from "./transcription-client";
import * as fs from "fs";
import { logger } from "../../main/logger";
import { ModelManagerService } from "../models/model-manager";
export class LocalWhisperClient implements TranscriptionClient {
private modelManager: ModelManagerService;
private selectedModelId: string | null = null;
private whisperInstance: any = null; // Will be imported from smart-whisper
constructor(modelManager: ModelManagerService, selectedModelId?: string) {
this.modelManager = modelManager;
this.selectedModelId = selectedModelId || null;
}
private async initializeWhisper(): Promise<void> {
if (this.whisperInstance) {
return; // Already initialized
}
const modelPath = await this.getBestAvailableModel();
if (!modelPath) {
throw new Error(
"No Whisper models available. Please download a model first.",
);
}
try {
const { Whisper } = await import("smart-whisper");
this.whisperInstance = new Whisper(modelPath, { gpu: true });
logger.ai.info("Smart-whisper initialized", { modelPath });
} catch (error) {
logger.ai.error("Failed to initialize smart-whisper", {
error: error instanceof Error ? error.message : String(error),
modelPath,
});
throw new Error(`Failed to initialize smart-whisper: ${error}`);
}
}
async transcribe(audioData: Buffer): Promise<string> {
try {
await this.initializeWhisper();
// Convert audio buffer to the format expected by smart-whisper
const audioFloat32Array = await this.convertAudioBuffer(audioData);
logger.ai.info("Starting smart-whisper transcription", {
audioDataSize: audioData.length,
convertedSize: audioFloat32Array.length,
});
// Transcribe using smart-whisper
const { result } = await this.whisperInstance.transcribe(
audioFloat32Array,
{
language: "auto",
},
);
const transcription = await result;
logger.ai.info("Smart-whisper transcription completed", {
resultLength: transcription.length,
});
return transcription;
} catch (error) {
logger.ai.error("Smart-whisper transcription failed", {
error: error instanceof Error ? error.message : String(error),
});
throw new Error(`Transcription failed: ${error}`);
}
}
private async convertAudioBuffer(audioData: Buffer): Promise<Float32Array> {
// Smart-whisper expects Float32Array with 16kHz mono audio
// This is a simplified conversion - you may need more sophisticated audio processing
try {
// For now, assume the audio data is already in the correct format
// In a real implementation, you'd use an audio processing library like node-wav
// to properly decode and resample the audio
// Convert buffer to Float32Array (simplified)
const float32Array = new Float32Array(audioData.length / 4);
for (let i = 0; i < float32Array.length; i++) {
// Read 32-bit float from buffer (little-endian)
float32Array[i] = audioData.readFloatLE(i * 4);
}
return float32Array;
} catch (error) {
logger.ai.warn("Audio conversion failed, trying alternative method", {
error: error instanceof Error ? error.message : String(error),
});
// Fallback: convert as if it's PCM data
const samples = new Float32Array(audioData.length / 2);
for (let i = 0; i < samples.length; i++) {
// Convert 16-bit signed PCM to float (-1 to 1)
const sample = audioData.readInt16LE(i * 2);
samples[i] = sample / 32768.0;
}
return samples;
}
}
private async getBestAvailableModel(): Promise<string | null> {
const downloadedModels = await this.modelManager.getDownloadedModels();
// If a specific model is selected and available, use it
if (this.selectedModelId && downloadedModels[this.selectedModelId]) {
const model = downloadedModels[this.selectedModelId];
if (fs.existsSync(model.localPath)) {
return model.localPath;
}
}
// Otherwise, find the best available model (prioritize by quality)
const preferredOrder = [
"whisper-large-v1",
"whisper-medium",
"whisper-small",
"whisper-base",
"whisper-tiny",
];
for (const modelId of preferredOrder) {
const model = downloadedModels[modelId];
if (model && fs.existsSync(model.localPath)) {
return model.localPath;
}
}
return null;
}
// Set the model to use for transcription
async setSelectedModel(modelId: string): Promise<void> {
const downloadedModels = await this.modelManager.getDownloadedModels();
if (!downloadedModels[modelId]) {
throw new Error(`Model not downloaded: ${modelId}`);
}
// If we're changing models, free the current instance
if (this.selectedModelId !== modelId && this.whisperInstance) {
this.freeWhisperInstance();
}
this.selectedModelId = modelId;
logger.ai.info("Selected model for transcription", { modelId });
}
// Get the currently selected model
getSelectedModel(): string | null {
return this.selectedModelId;
}
// Check if whisper is available
async isAvailable(): Promise<boolean> {
const downloadedModels = await this.modelManager.getDownloadedModels();
return Object.keys(downloadedModels).some((modelId) =>
fs.existsSync(downloadedModels[modelId].localPath),
);
}
// Get available models
async getAvailableModels(): Promise<string[]> {
const downloadedModels = await this.modelManager.getDownloadedModels();
return Object.keys(downloadedModels).filter((modelId) =>
fs.existsSync(downloadedModels[modelId].localPath),
);
}
// Free resources
async dispose(): Promise<void> {
await this.freeWhisperInstance();
}
private async freeWhisperInstance(): Promise<void> {
if (this.whisperInstance) {
try {
await this.whisperInstance.free();
logger.ai.info("Smart-whisper instance freed");
} catch (error) {
logger.ai.warn("Error freeing smart-whisper instance", {
error: error instanceof Error ? error.message : String(error),
});
} finally {
this.whisperInstance = null;
}
}
}
}

View file

@ -1,3 +0,0 @@
export interface TranscriptionClient {
transcribe(audioData: Buffer): Promise<string>;
}

View file

@ -1,256 +0,0 @@
import fs, { statSync } from "node:fs"; // Import statSync
import path from "node:path";
import { app } from "electron"; // To get a writable path like appData
import { EventEmitter } from "node:events";
export class AudioCapture extends EventEmitter {
private currentRecordingPath: string | null = null;
private writableStream: fs.WriteStream | null = null;
private chunkCounter: number = 0;
private sessionId: string | null = null;
constructor() {
super();
// Ensure the recordings directory exists
const recordingsDir = path.join(app.getPath("userData"), "recordings");
if (!fs.existsSync(recordingsDir)) {
fs.mkdirSync(recordingsDir, { recursive: true });
}
}
public isCurrentlyRecording(): boolean {
return this.writableStream !== null;
}
private finalizeRecording(): void {
if (!this.writableStream) {
console.warn(
"AudioCapture: finalizeRecording called but no writableStream active. This might indicate a prior error or premature call.",
);
return;
}
console.log(
"AudioCapture: finalizeRecording() called, ending writable stream.",
);
const streamToClose = this.writableStream;
const recordingPathToFinalize = this.currentRecordingPath;
this.writableStream = null; // Prevent new writes and signal "not recording"
streamToClose.end(() => {
console.log(
`AudioCapture: Writable stream .end() callback for: ${recordingPathToFinalize}`,
);
if (recordingPathToFinalize) {
try {
const stats = statSync(recordingPathToFinalize);
console.log(
`AudioCapture: File size of ${recordingPathToFinalize} is ${stats.size} bytes before emitting 'recording-finished'.`,
);
if (stats.size === 0) {
console.warn(
`AudioCapture: File ${recordingPathToFinalize} is empty. Transcription will likely fail.`,
);
}
this.emit("recording-finished", recordingPathToFinalize);
} catch (error: any) {
console.error(
`AudioCapture: Error getting file stats for ${recordingPathToFinalize}:`,
error,
);
this.emit(
"recording-error",
new Error(
`Failed to get stats for ${recordingPathToFinalize}: ${error.message}`,
),
);
}
// Only nullify currentRecordingPath if it matches the one being finalized.
if (this.currentRecordingPath === recordingPathToFinalize) {
this.currentRecordingPath = null;
this.sessionId = null;
this.chunkCounter = 0;
}
}
});
// The 'finish' event on streamToClose is mostly for logging here.
streamToClose.on("finish", () => {
console.log(
`AudioCapture: Writable stream 'finish' event for the recording at ${recordingPathToFinalize}.`,
);
// Clean up path if still relevant, though .end() callback should handle primary cleanup.
if (this.currentRecordingPath === recordingPathToFinalize) {
this.currentRecordingPath = null;
this.sessionId = null;
this.chunkCounter = 0;
}
});
// Note: The 'error' handler for streamToClose was set up when it was created.
// That handler is responsible for nulling writableStream and currentRecordingPath if an error occurs on *that* stream instance.
}
public handleAudioChunk(chunk: Buffer, isFinalChunk: boolean = false): void {
if (!this.writableStream) {
// No active stream, this could be the start of a new recording
if (chunk.length > 0) {
// First non-empty chunk: Start a new recording
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
this.sessionId = `session-${timestamp}`;
this.chunkCounter = 0;
this.currentRecordingPath = path.join(
app.getPath("userData"),
"recordings",
`recording-${timestamp}.webm`,
);
const newStream = fs.createWriteStream(this.currentRecordingPath);
const recordingPathForThisStream = this.currentRecordingPath; // Capture path for this specific stream instance
console.log(
`AudioCapture: New recording started by first chunk. Saving to: ${recordingPathForThisStream}`,
);
newStream.on("error", (err) => {
console.error(
`AudioCapture: Error on writable stream for ${recordingPathForThisStream}:`,
err,
);
this.emit("recording-error", err);
// If the currently active stream in the class is the one that errored, nullify it.
if (this.writableStream === newStream) {
this.writableStream = null;
}
// If the current recording path in the class is for the stream that errored, nullify it.
if (this.currentRecordingPath === recordingPathForThisStream) {
this.currentRecordingPath = null;
}
// Ensure the stream is closed/destroyed to release resources
if (!newStream.destroyed) {
newStream.end();
}
});
this.writableStream = newStream; // Assign to class property after setup
// Write the first chunk
this.writableStream.write(chunk, (writeError) => {
if (writeError) {
console.error(
`AudioCapture: Error writing initial audio chunk to ${recordingPathForThisStream}:`,
writeError,
);
this.emit("recording-error", writeError);
// If this write fails, the stream is likely compromised. Clean up.
if (this.writableStream === newStream) {
// Check if it's still our current stream
this.writableStream = null;
}
if (this.currentRecordingPath === recordingPathForThisStream) {
// Check if it's still our current path
this.currentRecordingPath = null;
}
if (!newStream.destroyed) {
newStream.end(); // Attempt to close the problematic stream
}
return; // Don't proceed to final chunk logic if initial write fails
}
// Emit chunk-ready event for immediate transcription
this.chunkCounter++;
console.log(
`AudioCapture: Emitting chunk-ready for chunk ${this.chunkCounter}`,
);
this.emit("chunk-ready", {
sessionId: this.sessionId,
chunkId: this.chunkCounter,
audioData: chunk,
isFinalChunk: isFinalChunk,
});
// If this very first chunk is also the final chunk
if (isFinalChunk) {
console.log(
"AudioCapture: First chunk is also the final chunk. Finalizing immediately.",
);
this.finalizeRecording();
}
});
} else {
// Empty chunk and no stream
if (isFinalChunk) {
console.log(
"AudioCapture: Received an empty final chunk, but no recording was active. No action taken.",
);
} else {
console.warn(
"AudioCapture: Received an empty non-final chunk, but no recording was active. Ignoring.",
);
}
}
} else {
// WritableStream exists, so we are actively recording
const activeStream = this.writableStream; // Capture current stream for this operation scope
const activePath = this.currentRecordingPath;
if (chunk.length > 0) {
// console.log(`AudioCapture: Writing audio chunk of size: ${chunk.length} bytes to ${activePath}. isFinalChunk: ${isFinalChunk}`);
activeStream.write(chunk, (writeError) => {
if (writeError) {
console.error(
`AudioCapture: Error writing subsequent audio chunk to ${activePath}:`,
writeError,
);
this.emit("recording-error", writeError);
// The stream's main 'error' handler should manage cleanup if the stream itself errors.
// If only this write fails, but stream doesn't emit 'error', we might need to intervene.
// However, a write error often leads to a stream error.
// For safety, if this write fails, we consider the stream potentially compromised for further writes.
// The 'error' handler on `activeStream` should ideally handle this.
// If `isFinalChunk` was true, `finalizeRecording` won't be called due to return/error.
// Consider calling finalizeRecording or a similar cleanup if write error on final chunk.
// For now, relying on the stream's 'error' event for full cleanup.
} else {
// Emit chunk-ready event for immediate transcription
this.chunkCounter++;
console.log(
`AudioCapture: Emitting chunk-ready for chunk ${this.chunkCounter}`,
);
this.emit("chunk-ready", {
sessionId: this.sessionId,
chunkId: this.chunkCounter,
audioData: chunk,
isFinalChunk: isFinalChunk,
});
if (isFinalChunk) {
console.log(
"AudioCapture: Final chunk written successfully. Finalizing recording.",
);
this.finalizeRecording();
}
}
});
} else {
// Empty chunk during active recording
console.warn(
`AudioCapture: Received empty audio chunk while recording to ${activePath}. Not writing to file.`,
);
if (isFinalChunk) {
console.log(
"AudioCapture: Empty final chunk received during active recording. Finalizing recording.",
);
// Still emit the final chunk event even if empty
this.emit("chunk-ready", {
sessionId: this.sessionId,
chunkId: this.chunkCounter, // Don't increment for empty chunks
audioData: chunk,
isFinalChunk: true,
});
this.finalizeRecording();
}
}
}
}
}

View file

@ -1,16 +0,0 @@
/**
* Abstract base class for text formatting clients
*/
export abstract class FormatterClient {
abstract formatText(text: string): Promise<string>;
}
/**
* Configuration interface for formatter clients
*/
export interface FormatterConfig {
provider: "openrouter";
model: string;
apiKey: string;
enabled: boolean;
}

View file

@ -1,65 +0,0 @@
import { FormatterClient, FormatterConfig } from "./formatter-client";
import { OpenRouterFormatterClient } from "./openrouter-formatter-client";
/**
* Main formatter service that manages different formatting providers
*/
export class FormatterService {
private client: FormatterClient | null = null;
private config: FormatterConfig | null = null;
/**
* Configure the formatter service with the given configuration
*/
configure(config: FormatterConfig): void {
this.config = config;
if (!config.enabled) {
this.client = null;
return;
}
switch (config.provider) {
case "openrouter":
this.client = new OpenRouterFormatterClient(
config.apiKey,
config.model,
);
break;
default:
throw new Error(`Unsupported formatter provider: ${config.provider}`);
}
}
/**
* Format the given text using the configured formatter
* Returns the original text if formatter is not configured or disabled
*/
async formatText(text: string): Promise<string> {
if (!this.client || !this.config?.enabled) {
return text;
}
try {
return await this.client.formatText(text);
} catch (error) {
console.error("Error in formatter service:", error);
// Return original text if formatting fails
return text;
}
}
/**
* Check if the formatter is configured and enabled
*/
isEnabled(): boolean {
return this.config?.enabled === true && this.client !== null;
}
/**
* Get the current configuration
*/
getConfiguration(): FormatterConfig | null {
return this.config;
}
}

View file

@ -1,3 +0,0 @@
export { FormatterService } from "./formatter-service";
export { FormatterClient, FormatterConfig } from "./formatter-client";
export { OpenRouterFormatterClient } from "./openrouter-formatter-client";

View file

@ -1,59 +0,0 @@
import { createOpenAI } from "@ai-sdk/openai";
import { generateText } from "ai";
import { FormatterClient } from "./formatter-client";
/**
* OpenRouter-based text formatter client
*/
export class OpenRouterFormatterClient extends FormatterClient {
private provider: any;
private model: string;
constructor(apiKey: string, model: string) {
super();
// Configure OpenRouter provider
this.provider = createOpenAI({
baseURL: "https://openrouter.ai/api/v1",
apiKey: apiKey,
});
this.model = model;
}
async formatText(text: string): Promise<string> {
try {
const { text: formattedText } = await generateText({
model: this.provider(this.model),
messages: [
{
role: "system",
content: `You are a professional text formatter. Your task is to clean up and improve the formatting of transcribed text while preserving the original meaning and content.
Please:
1. Fix obvious transcription errors and typos
2. Add proper punctuation where missing
3. Organize the text into proper paragraphs
4. Capitalize proper nouns and sentence beginnings
5. Remove unnecessary filler words (um, uh, etc.) but keep natural speech patterns
6. Maintain the speaker's original tone and style
Return only the formatted text without any explanations or additional commentary.`,
},
{
role: "user",
content: `Please format this transcribed text:\n\n${text}`,
},
],
temperature: 0.1, // Low temperature for consistent formatting
maxTokens: 2000,
});
return formattedText;
} catch (error) {
console.error("Error formatting text with OpenRouter:", error);
// Return original text if formatting fails
return text;
}
}
}

View file

@ -1 +0,0 @@
export { SettingsService } from "./settings-service";

View file

@ -1,387 +0,0 @@
import { ContextualTranscriptionClient } from "./transcription-session";
import * as fs from "fs";
import { logger } from "../../main/logger";
import { ModelManagerService } from "../models/model-manager";
import { TranscribeFormat, TranscribeParams, Whisper } from "smart-whisper";
export class ContextualLocalWhisperClient
implements ContextualTranscriptionClient
{
private modelManager: ModelManagerService;
private selectedModelId: string | null = null;
private whisperInstance: Whisper | null = null; // Will be imported from smart-whisper
private lastUsedTimestamp: number = 0;
private cleanupTimer: NodeJS.Timeout | null = null;
private readonly MODEL_CLEANUP_DELAY_MS = 30000; // 30 seconds after last use (configurable)
constructor(modelManager: ModelManagerService, selectedModelId?: string) {
this.modelManager = modelManager;
this.selectedModelId = selectedModelId || null;
}
private async initializeWhisper(): Promise<void> {
if (this.whisperInstance) {
return; // Already initialized
}
const modelPath = await this.getBestAvailableModel();
if (!modelPath) {
throw new Error(
"No Whisper models available. Please download a model first.",
);
}
try {
//! esure gpu is used if available
this.whisperInstance = new Whisper(modelPath, { gpu: true });
logger.ai.info(
"Smart-whisper instance created for contextual transcription",
{ modelPath },
);
// Actually load the model into memory
await this.whisperInstance.load();
logger.ai.info(
"Smart-whisper model loaded into memory for contextual transcription",
{
modelPath,
},
);
} catch (error) {
logger.ai.error(
"Failed to initialize and load smart-whisper for contextual transcription",
{
error: error instanceof Error ? error.message : String(error),
modelPath,
},
);
throw new Error(`Failed to initialize and load smart-whisper: ${error}`);
}
}
async transcribeWithContext(
audioData: Buffer,
previousContext: string,
): Promise<string> {
try {
await this.initializeWhisper();
this.updateLastUsedTimestamp(); // Update timestamp when model is used
// Convert audio buffer to the format expected by smart-whisper
const audioFloat32Array = await this.convertAudioBuffer(audioData);
// Prepare initial prompt with context for better continuity
let prompt = "";
if (previousContext && previousContext.trim().length > 0) {
// Use last ~50 words as context/prompt
const contextWords = previousContext.trim().split(/\s+/);
const maxWords = 50;
prompt =
contextWords.length > maxWords
? contextWords.slice(-maxWords).join(" ")
: previousContext.trim();
}
const modelInfo = await this.getCurrentModelInfo();
logger.ai.info("Starting smart-whisper contextual transcription", {
audioDataSize: audioData.length,
convertedSize: audioFloat32Array.length,
hasContext: prompt.length > 0,
contextLength: prompt.length,
modelId: modelInfo.modelId,
modelPath: modelInfo.modelPath,
});
// Transcribe using smart-whisper with initial prompt for context
const transcriptionOptions: Partial<TranscribeParams<TranscribeFormat>> =
{
language: "auto",
};
// Add initial prompt if we have context
if (prompt) {
transcriptionOptions.initial_prompt = prompt;
}
const { result } = await this.whisperInstance!.transcribe(
audioFloat32Array,
transcriptionOptions,
);
const transcription = await result;
// Extract text from the result object
const transcriptionText = transcription.reduce(
(acc, curr) => acc + curr.text,
"",
);
logger.ai.info("Smart-whisper contextual transcription completed", {
resultLength: transcriptionText.length,
hadContext: prompt.length > 0,
resultType: typeof result,
modelId: modelInfo.modelId,
modelPath: modelInfo.modelPath,
});
return transcriptionText;
} catch (error) {
logger.ai.error("Smart-whisper contextual transcription failed", {
error: error instanceof Error ? error.message : String(error),
});
throw new Error(`Contextual transcription failed: ${error}`);
}
}
private async convertAudioBuffer(audioData: Buffer): Promise<Float32Array> {
// Smart-whisper expects Float32Array with 16kHz mono audio
// Now we're receiving raw Float32Array data from Web Audio API
logger.ai.info("Converting audio buffer", {
bufferLength: audioData.length,
expectedFloat32Length: audioData.length / 4,
});
try {
// The audioData should now be raw Float32Array from Web Audio API (16kHz, mono)
// Check if buffer length is divisible by 4 (Float32 = 4 bytes)
if (audioData.length % 4 !== 0) {
logger.ai.warn(
"Audio buffer length not divisible by 4, may not be Float32Array",
{
length: audioData.length,
remainder: audioData.length % 4,
},
);
}
// Convert buffer back to Float32Array
const float32Array = new Float32Array(
audioData.buffer,
audioData.byteOffset,
audioData.length / 4,
);
logger.ai.info("Successfully converted audio buffer", {
sampleCount: float32Array.length,
sampleRate: "16kHz (assumed)",
format: "Float32Array",
});
return float32Array;
} catch (error) {
logger.ai.error("Audio conversion failed", {
error: error instanceof Error ? error.message : String(error),
});
// Fallback: try to interpret as different formats
try {
// Try as 16-bit PCM
const samples = new Float32Array(audioData.length / 2);
for (let i = 0; i < samples.length; i++) {
const sample = audioData.readInt16LE(i * 2);
samples[i] = sample / 32768.0;
}
logger.ai.info("Fallback: converted as 16-bit PCM", {
sampleCount: samples.length,
});
return samples;
} catch (fallbackError) {
logger.ai.error("All audio conversion methods failed", {
originalError: error instanceof Error ? error.message : String(error),
fallbackError:
fallbackError instanceof Error
? fallbackError.message
: String(fallbackError),
});
// Return empty array as last resort
return new Float32Array(0);
}
}
}
private async getBestAvailableModel(): Promise<string | null> {
const downloadedModels = await this.modelManager.getDownloadedModels();
// If a specific model is selected and available, use it
if (this.selectedModelId && downloadedModels[this.selectedModelId]) {
const model = downloadedModels[this.selectedModelId];
if (fs.existsSync(model.localPath)) {
return model.localPath;
}
}
// Otherwise, find the best available model (prioritize by quality)
const preferredOrder = [
"whisper-large-v1",
"whisper-medium",
"whisper-small",
"whisper-base",
"whisper-tiny",
];
for (const modelId of preferredOrder) {
const model = downloadedModels[modelId];
if (model && fs.existsSync(model.localPath)) {
return model.localPath;
}
}
return null;
}
// Set the model to use for transcription
async setSelectedModel(modelId: string): Promise<void> {
const downloadedModels = await this.modelManager.getDownloadedModels();
if (!downloadedModels[modelId]) {
throw new Error(`Model not downloaded: ${modelId}`);
}
// If we're changing models, free the current instance
if (this.selectedModelId !== modelId && this.whisperInstance) {
this.freeWhisperInstance();
}
this.selectedModelId = modelId;
logger.ai.info("Selected model for contextual transcription", { modelId });
}
// Get the currently selected model
getSelectedModel(): string | null {
return this.selectedModelId;
}
// Check if whisper is available
async isAvailable(): Promise<boolean> {
const downloadedModels = await this.modelManager.getDownloadedModels();
return Object.keys(downloadedModels).some((modelId) =>
fs.existsSync(downloadedModels[modelId].localPath),
);
}
// Get available models
async getAvailableModels(): Promise<string[]> {
const downloadedModels = await this.modelManager.getDownloadedModels();
return Object.keys(downloadedModels).filter((modelId) =>
fs.existsSync(downloadedModels[modelId].localPath),
);
}
// Get current model information for logging
async getCurrentModelInfo(): Promise<{
modelId: string | null;
modelPath: string | null;
}> {
const downloadedModels = await this.modelManager.getDownloadedModels();
// If a specific model is selected and available, use it
if (this.selectedModelId && downloadedModels[this.selectedModelId]) {
const model = downloadedModels[this.selectedModelId];
if (fs.existsSync(model.localPath)) {
return {
modelId: this.selectedModelId,
modelPath: model.localPath,
};
}
}
// Otherwise, find the best available model (same logic as getBestAvailableModel)
const preferredOrder = [
"whisper-large-v1",
"whisper-medium",
"whisper-small",
"whisper-base",
"whisper-tiny",
];
for (const modelId of preferredOrder) {
const model = downloadedModels[modelId];
if (model && fs.existsSync(model.localPath)) {
return {
modelId: modelId,
modelPath: model.localPath,
};
}
}
return { modelId: null, modelPath: null };
}
// Public method to preload the model
async loadModel(): Promise<void> {
await this.initializeWhisper();
this.updateLastUsedTimestamp();
logger.ai.info("Model preloaded successfully", {
modelLoaded: this.isModelLoaded(),
cleanupDelayMs: this.MODEL_CLEANUP_DELAY_MS,
});
}
// Public method to free the model
async freeModel(): Promise<void> {
this.clearCleanupTimer();
await this.freeWhisperInstance();
logger.ai.info("Model freed manually");
}
// Check if model is currently loaded
isModelLoaded(): boolean {
return this.whisperInstance !== null;
}
// Free resources
async dispose(): Promise<void> {
this.clearCleanupTimer();
await this.freeWhisperInstance();
}
private async freeWhisperInstance(): Promise<void> {
if (this.whisperInstance) {
try {
await this.whisperInstance.free();
logger.ai.info("Smart-whisper contextual instance freed");
} catch (error) {
logger.ai.warn("Error freeing smart-whisper contextual instance", {
error: error instanceof Error ? error.message : String(error),
});
} finally {
this.whisperInstance = null;
}
}
}
private updateLastUsedTimestamp(): void {
this.lastUsedTimestamp = Date.now();
this.scheduleCleanup();
}
private scheduleCleanup(): void {
this.clearCleanupTimer();
this.cleanupTimer = setTimeout(async () => {
const timeSinceLastUse = Date.now() - this.lastUsedTimestamp;
if (timeSinceLastUse >= this.MODEL_CLEANUP_DELAY_MS) {
logger.ai.info("Auto-freeing model after inactivity", {
inactiveTimeMs: timeSinceLastUse,
thresholdMs: this.MODEL_CLEANUP_DELAY_MS,
});
await this.freeWhisperInstance();
} else {
// Reschedule if model was used recently
const remainingTime = this.MODEL_CLEANUP_DELAY_MS - timeSinceLastUse;
this.cleanupTimer = setTimeout(
() => this.scheduleCleanup(),
remainingTime,
);
}
}, this.MODEL_CLEANUP_DELAY_MS);
}
private clearCleanupTimer(): void {
if (this.cleanupTimer) {
clearTimeout(this.cleanupTimer);
this.cleanupTimer = null;
}
}
}

View file

@ -1,81 +0,0 @@
import { ContextualTranscriptionClient } from "./transcription-session";
import { ContextualLocalWhisperClient } from "./contextual-local-whisper-client";
import { ModelManagerService } from "../models/model-manager";
import { createScopedLogger } from "../../main/logger";
export class ContextualTranscriptionManager {
private logger = createScopedLogger("contextual-transcription-manager");
private defaultClient: ContextualLocalWhisperClient | null = null;
constructor(private modelManagerService: ModelManagerService | null = null) {}
createTranscriptionClient(
provider: "local",
options: { modelId?: string } = {},
): ContextualTranscriptionClient {
switch (provider) {
case "local":
if (!this.modelManagerService) {
throw new Error(
"ModelManagerService is required for local transcription client",
);
}
this.logger.info(
"Creating local Whisper contextual transcription client",
{
selectedModelId: options.modelId,
},
);
return new ContextualLocalWhisperClient(
this.modelManagerService,
options.modelId,
);
default:
throw new Error(`Unknown transcription provider: ${provider}`);
}
}
// Get the default provider based on configuration
getDefaultProvider(): "local" {
return "local";
}
// Create default client with current configuration
createDefaultClient(): ContextualTranscriptionClient {
if (!this.defaultClient) {
this.defaultClient = this.createTranscriptionClient(
"local",
) as ContextualLocalWhisperClient;
}
return this.defaultClient;
}
// Preload the model for faster transcription
async preloadModel(): Promise<void> {
const client = this.createDefaultClient() as ContextualLocalWhisperClient;
await client.loadModel();
this.logger.info("Model preloaded for contextual transcription");
}
// Free the model to save memory
async freeModel(): Promise<void> {
if (this.defaultClient) {
await this.defaultClient.freeModel();
this.logger.info("Model freed for contextual transcription");
}
}
// Check if model is loaded
isModelLoaded(): boolean {
return this.defaultClient ? this.defaultClient.isModelLoaded() : false;
}
// Cleanup resources
async dispose(): Promise<void> {
if (this.defaultClient) {
await this.defaultClient.dispose();
this.defaultClient = null;
}
}
}

View file

@ -1,298 +0,0 @@
import { EventEmitter } from "node:events";
import { createScopedLogger } from "../../main/logger";
export interface ChunkData {
sessionId: string;
chunkId: number;
audioData: Buffer;
isFinalChunk: boolean;
}
export interface ChunkResult {
chunkId: number;
text: string;
processingTimeMs: number;
startTime: number;
endTime: number;
modelInfo?: {
modelId: string | null;
modelPath: string | null;
};
}
export interface ContextualTranscriptionClient {
transcribeWithContext(
audioData: Buffer,
previousContext: string,
): Promise<string>;
getCurrentModelInfo?: () => Promise<{
modelId: string | null;
modelPath: string | null;
}>;
}
export class TranscriptionSession extends EventEmitter {
private logger = createScopedLogger("transcription-session");
private sessionId: string;
private transcriptionClient: ContextualTranscriptionClient;
private chunkQueue: ChunkData[] = [];
private results: ChunkResult[] = [];
private accumulatedText: string = "";
private isProcessing: boolean = false;
private expectedChunkId: number = 1;
private isComplete: boolean = false;
private sessionStartTime: number;
constructor(
sessionId: string,
transcriptionClient: ContextualTranscriptionClient,
) {
super();
this.sessionId = sessionId;
this.transcriptionClient = transcriptionClient;
this.sessionStartTime = Date.now();
this.logger.info("TranscriptionSession created", {
sessionId,
sessionStartTime: this.sessionStartTime,
sessionStartTimeISO: new Date(this.sessionStartTime).toISOString(),
});
}
public addChunk(chunkData: ChunkData): void {
if (chunkData.sessionId !== this.sessionId) {
this.logger.warn("Received chunk for different session", {
expected: this.sessionId,
received: chunkData.sessionId,
});
return;
}
if (this.isComplete) {
this.logger.warn("Session already complete, ignoring chunk", {
sessionId: this.sessionId,
chunkId: chunkData.chunkId,
});
return;
}
this.logger.info("Adding chunk to queue", {
sessionId: this.sessionId,
chunkId: chunkData.chunkId,
isFinalChunk: chunkData.isFinalChunk,
audioDataSize: chunkData.audioData.length,
});
this.chunkQueue.push(chunkData);
this.processNextChunk();
}
private async processNextChunk(): Promise<void> {
if (this.isProcessing || this.chunkQueue.length === 0) {
return;
}
// Find the next expected chunk in sequence
const nextChunkIndex = this.chunkQueue.findIndex(
(chunk) => chunk.chunkId === this.expectedChunkId,
);
if (nextChunkIndex === -1) {
this.logger.debug("Next expected chunk not yet available", {
expectedChunkId: this.expectedChunkId,
availableChunks: this.chunkQueue.map((c) => c.chunkId),
});
return;
}
const chunk = this.chunkQueue.splice(nextChunkIndex, 1)[0];
this.isProcessing = true;
try {
await this.transcribeChunk(chunk);
} catch (error) {
this.logger.error("Error processing chunk", {
sessionId: this.sessionId,
chunkId: chunk.chunkId,
error: error instanceof Error ? error.message : String(error),
});
this.emit("chunk-error", { chunkId: chunk.chunkId, error });
} finally {
this.isProcessing = false;
this.expectedChunkId++;
// Check if this was the final chunk
if (chunk.isFinalChunk) {
this.completeSession();
} else {
// Process next chunk if available
this.processNextChunk();
}
}
}
private async transcribeChunk(chunk: ChunkData): Promise<void> {
const startTime = Date.now();
const modelInfo = this.transcriptionClient.getCurrentModelInfo
? await this.transcriptionClient.getCurrentModelInfo()
: { modelId: null, modelPath: null };
this.logger.info("Starting transcription for chunk", {
sessionId: this.sessionId,
chunkId: chunk.chunkId,
audioDataSize: chunk.audioData.length,
contextLength: this.accumulatedText.length,
startTime,
startTimeISO: new Date(startTime).toISOString(),
modelId: modelInfo.modelId,
modelPath: modelInfo.modelPath,
});
// Skip transcription for empty chunks (but still process them for completion)
if (chunk.audioData.length === 0) {
const endTime = Date.now();
const processingTimeMs = endTime - startTime;
this.logger.info("Skipping transcription for empty chunk", {
sessionId: this.sessionId,
chunkId: chunk.chunkId,
startTime,
endTime,
processingTimeMs,
startTimeISO: new Date(startTime).toISOString(),
endTimeISO: new Date(endTime).toISOString(),
modelId: modelInfo.modelId,
modelPath: modelInfo.modelPath,
});
const result: ChunkResult = {
chunkId: chunk.chunkId,
text: "",
processingTimeMs,
startTime,
endTime,
modelInfo,
};
this.results.push(result);
this.emit("chunk-completed", result);
return;
}
const transcriptionText =
await this.transcriptionClient.transcribeWithContext(
chunk.audioData,
this.accumulatedText,
);
console.error("transcriptionText result ", transcriptionText);
const endTime = Date.now();
const processingTimeMs = endTime - startTime;
const result: ChunkResult = {
chunkId: chunk.chunkId,
text: transcriptionText,
processingTimeMs,
startTime,
endTime,
modelInfo,
};
// Accumulate the transcription text for context
this.accumulatedText +=
(this.accumulatedText ? " " : "") + transcriptionText;
this.results.push(result);
this.logger.error("Chunk transcription completed", {
sessionId: this.sessionId,
chunkId: chunk.chunkId,
textLength: transcriptionText.length,
processingTimeMs,
startTime,
endTime,
startTimeISO: new Date(startTime).toISOString(),
endTimeISO: new Date(endTime).toISOString(),
accumulatedTextLength: this.accumulatedText.length,
modelId: modelInfo.modelId,
modelPath: modelInfo.modelPath,
});
this.emit("chunk-completed", result);
}
private completeSession(): void {
this.isComplete = true;
const sessionEndTime = Date.now();
const totalSessionTimeMs = sessionEndTime - this.sessionStartTime;
const totalProcessingTime = this.results.reduce(
(sum, result) => sum + result.processingTimeMs,
0,
);
// Get model info from the last successful chunk result
const lastChunkWithModel = this.results.find((r) => r.modelInfo);
const sessionModelInfo = lastChunkWithModel?.modelInfo || {
modelId: null,
modelPath: null,
};
this.logger.error("Transcription session completed", {
sessionId: this.sessionId,
totalChunks: this.results.length,
finalTextLength: this.accumulatedText.length,
sessionStartTime: this.sessionStartTime,
sessionEndTime,
sessionStartTimeISO: new Date(this.sessionStartTime).toISOString(),
sessionEndTimeISO: new Date(sessionEndTime).toISOString(),
totalSessionTimeMs,
totalProcessingTimeMs: totalProcessingTime,
averageProcessingTimePerChunkMs:
this.results.length > 0
? Math.round(totalProcessingTime / this.results.length)
: 0,
processingEfficiency:
totalSessionTimeMs > 0
? Math.round((totalProcessingTime / totalSessionTimeMs) * 100)
: 0,
modelId: sessionModelInfo.modelId,
modelPath: sessionModelInfo.modelPath,
chunkTimings: this.results.map((r) => ({
chunkId: r.chunkId,
processingTimeMs: r.processingTimeMs,
startTime: r.startTime,
endTime: r.endTime,
textLength: r.text.length,
})),
});
this.emit("session-completed", {
sessionId: this.sessionId,
finalText: this.accumulatedText,
chunkResults: this.results,
totalProcessingTimeMs: totalProcessingTime,
totalSessionTimeMs,
sessionStartTime: this.sessionStartTime,
sessionEndTime,
});
}
public getSessionId(): string {
return this.sessionId;
}
public getAccumulatedText(): string {
return this.accumulatedText;
}
public getResults(): ChunkResult[] {
return [...this.results];
}
public isSessionComplete(): boolean {
return this.isComplete;
}
}

View file

@ -0,0 +1,46 @@
/**
* Simple context management for the pipeline - no over-engineering
* Based on ARCHITECTURE.md specifications
*/
export interface PipelineContext {
sessionId: string;
sharedData: SharedPipelineData;
metadata: Map<string, any>;
}
import { GetAccessibilityContextResult } from "@amical/types";
export interface SharedPipelineData {
vocabulary: Map<string, string>;
userPreferences: {
language: string;
formattingStyle: "formal" | "casual" | "technical";
};
audioMetadata: {
source: "microphone" | "file" | "stream";
duration?: number;
};
accessibilityContext: GetAccessibilityContextResult | null;
}
/**
* Create a default context for pipeline execution
*/
export function createDefaultContext(sessionId: string): PipelineContext {
return {
sessionId,
sharedData: {
vocabulary: new Map(),
userPreferences: {
language: "en",
formattingStyle: "formal",
},
audioMetadata: {
source: "microphone",
},
accessibilityContext: null, // Will be populated async by TranscriptionService
},
metadata: new Map(),
};
}

View file

@ -0,0 +1,75 @@
/**
* Core pipeline types - Simple interfaces without over-engineering
*/
// Re-export context types from dedicated file
import { PipelineContext } from "./context";
import { GetAccessibilityContextResult } from "@amical/types";
export { PipelineContext, SharedPipelineData } from "./context";
// Transcription input parameters
export interface TranscribeParams {
audioData: Buffer;
context: {
vocabulary?: Map<string, string>;
accessibilityContext?: GetAccessibilityContextResult | null;
previousChunk?: string;
aggregatedTranscription?: string;
};
}
// Formatting input parameters
export interface FormatParams {
text: string;
context: {
style?: string;
vocabulary?: Map<string, string>;
accessibilityContext?: GetAccessibilityContextResult | null;
previousChunk?: string;
aggregatedTranscription?: string;
};
}
// Transcription provider interface
export interface TranscriptionProvider {
readonly name: string;
transcribe(params: TranscribeParams): Promise<string>;
}
// Formatting provider interface
export interface FormattingProvider {
readonly name: string;
format(params: FormatParams): Promise<string>;
}
// Pipeline execution result
export interface PipelineResult {
transcription: string;
sessionId: string;
metadata: {
duration?: number;
provider: string;
formatted: boolean;
};
}
// Streaming context for pipeline processing
export interface StreamingPipelineContext extends PipelineContext {
sessionId: string;
isPartial: boolean;
isFinal: boolean;
accumulatedTranscription?: string[]; // Store all partial results
}
// Session data for streaming transcription
export interface StreamingSession {
context: StreamingPipelineContext;
transcriptionResults: string[]; // Accumulate all transcription chunks
}
// Simple pipeline configuration
export interface PipelineConfig {
transcriptionProvider: TranscriptionProvider;
formattingProvider?: FormattingProvider;
saveToDatabase: boolean;
}

View file

@ -0,0 +1,24 @@
/**
* Pipeline module exports
*/
// Core types
export type {
TranscriptionProvider,
FormattingProvider,
PipelineResult,
PipelineConfig,
StreamingPipelineContext,
StreamingSession,
} from "./core/pipeline-types";
// Context management
export { createDefaultContext } from "./core/context";
export type { PipelineContext, SharedPipelineData } from "./core/context";
// Main service
export { TranscriptionService } from "../services/transcription-service";
// Providers (if needed externally)
export { WhisperProvider } from "./providers/transcription/whisper-provider";
export { OpenRouterProvider } from "./providers/formatting/openrouter-formatter";

View file

@ -0,0 +1,98 @@
import { FormatParams } from "../../core/pipeline-types";
import { GetAccessibilityContextResult, ApplicationInfo } from "@amical/types";
export function constructFormatterPrompt(context: FormatParams["context"]): {
systemPrompt: string;
} {
const { accessibilityContext } = context;
// Build enhanced system prompt with context information
let systemPrompt = `You are a professional text formatter. Your task is to clean up and improve the formatting of transcribed text while preserving the original meaning and content.
Please:
1. Fix obvious transcription errors and typos
2. Add proper punctuation where missing
3. Organize the text into proper paragraphs, with sufficient line breaks, etc.
4. Capitalize proper nouns and sentence beginnings
5. Remove unnecessary filler words (um, uh, etc.) but keep natural speech patterns
6. Maintain the speaker's original tone and style
7. If the text is empty, return an empty string
8. For formatting of emails make sure to use the correct email format`;
// Build context information
const contextXml = buildContextXml(accessibilityContext);
if (contextXml) {
systemPrompt += `\n\n${contextXml}`;
systemPrompt += `\n\nUse this context to better understand the environment where the text will be used and adjust formatting accordingly.`;
}
systemPrompt += `\n\nReturn only the formatted text without any explanations or additional commentary.`;
return { systemPrompt };
}
function buildContextXml(
accessibilityContext: GetAccessibilityContextResult | null | undefined,
): string | null {
if (!accessibilityContext?.context) return null;
const contextParts: string[] = ["<context>"];
// Add application info
const appXml = buildApplicationXml(accessibilityContext.context.application);
if (appXml) contextParts.push(appXml);
// Add URL info
const urlXml = buildUrlXml(
accessibilityContext.context.windowInfo?.url || undefined,
);
if (urlXml) contextParts.push(urlXml);
contextParts.push("</context>");
// Only return context if we have actual content
return contextParts.length > 2 ? contextParts.join("\n") : null;
}
function buildApplicationXml(application: ApplicationInfo): string | null {
if (!application?.name) return null;
const appParts = [" <application>", ` <name>${application.name}</name>`];
if (application.bundleIdentifier) {
appParts.push(` <bundle>${application.bundleIdentifier}</bundle>`);
}
appParts.push(" </application>");
return appParts.join("\n");
}
function buildUrlXml(url: string | undefined): string | null {
if (!url) return null;
const domain = extractDomain(url);
if (!domain) return null;
return [" <url>", ` <domain>${domain}</domain>`, " </url>"].join("\n");
}
function extractDomain(url: string): string | null {
try {
// Try standard URL parsing first
const parsedUrl = new URL(url);
return parsedUrl.hostname;
} catch {
// Handle URLs without protocol or malformed URLs
// Remove any leading slashes
const cleanUrl = url.replace(/^\/+/, "");
// Extract domain from patterns like "domain.com/path" or just "domain.com"
const match = cleanUrl.match(/^([^\/\s?#]+)/);
if (match && match[1].includes(".")) {
return match[1];
}
return null;
}
}

View file

@ -0,0 +1,62 @@
import { FormattingProvider, FormatParams } from "../../core/pipeline-types";
import { logger } from "../../../main/logger";
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
import { constructFormatterPrompt } from "./formatter-prompt";
import { generateText } from "ai";
export class OpenRouterProvider implements FormattingProvider {
readonly name = "openrouter";
private provider: any;
private model: string;
constructor(apiKey: string, model: string) {
// Configure OpenRouter provider
this.provider = createOpenRouter({
apiKey: apiKey,
});
this.model = model;
}
async format(params: FormatParams): Promise<string> {
try {
// Extract parameters from the new structure
const { text, context } = params;
// Construct the formatter prompt using the extracted function
const { systemPrompt } = constructFormatterPrompt(context);
// Build user prompt with context
const userPrompt = text;
const { text: formattedText } = await generateText({
model: this.provider(this.model),
messages: [
{
role: "system",
content: systemPrompt,
},
{
role: "user",
content: userPrompt,
},
],
temperature: 0.1, // Low temperature for consistent formatting
maxTokens: 2000,
});
logger.pipeline.debug("Formatting completed", {
original: text,
formatted: formattedText,
});
return formattedText;
} catch (error) {
logger.pipeline.error("Formatting failed:", error);
// Return original text if formatting fails - simple fallback
return params.text;
}
}
}

View file

@ -0,0 +1,167 @@
import {
TranscriptionProvider,
TranscribeParams,
} from "../../core/pipeline-types";
import { logger } from "../../../main/logger";
import { ModelManagerService } from "../../../services/model-manager";
import { Whisper } from "smart-whisper";
export class WhisperProvider implements TranscriptionProvider {
readonly name = "whisper-local";
private modelManager: ModelManagerService;
private whisperInstance: Whisper | null = null;
constructor(modelManager: ModelManagerService) {
this.modelManager = modelManager;
}
async transcribe(params: TranscribeParams): Promise<string> {
try {
await this.initializeWhisper();
// Extract parameters from the new structure
const { audioData, context } = params;
const { vocabulary, previousChunk, aggregatedTranscription } = context;
// Convert audio buffer to the format expected by smart-whisper
const audioFloat32Array = await this.convertAudioBuffer(audioData);
logger.transcription.debug(
`Starting transcription, audio size: ${audioData.length}`,
previousChunk
? `Previous chunk: ${previousChunk.substring(0, 50)}...`
: "No previous chunk",
aggregatedTranscription
? `Aggregated length: ${aggregatedTranscription.length}`
: "No aggregated transcription",
);
// Transcribe using smart-whisper
if (!this.whisperInstance) {
throw new Error("Whisper instance is not initialized");
}
// Generate initial prompt from vocabulary and recent context
const initialPrompt = this.generateInitialPrompt(
vocabulary,
aggregatedTranscription,
);
const { result } = await this.whisperInstance.transcribe(
audioFloat32Array,
{
language: "auto",
initial_prompt: initialPrompt,
},
);
const transcription = await result;
// Combine all transcription segments into a single string
const text = transcription
.map((segment) => segment.text)
.join(" ")
.trim();
logger.transcription.debug(
`Transcription completed, length: ${text.length}`,
);
return text;
} catch (error) {
logger.transcription.error("Transcription failed:", error);
throw new Error(`Transcription failed: ${error}`);
}
}
private generateInitialPrompt(
vocabulary?: Map<string, string>,
aggregatedTranscription?: string,
): string {
const promptParts: string[] = [];
// Add vocabulary terms if available
if (vocabulary && vocabulary.size > 0) {
// Extract vocabulary keys (the actual terms) and join with commas
const vocabularyTerms = Array.from(vocabulary.keys());
const vocabularyText = vocabularyTerms.join(", ");
promptParts.push(vocabularyText);
}
// Add last 8 words from aggregated transcription if available
if (aggregatedTranscription && aggregatedTranscription.trim().length > 0) {
const words = aggregatedTranscription.trim().split(/\s+/);
const lastWords = words.slice(-8).join(" ");
if (lastWords.length > 0) {
promptParts.push(lastWords);
}
}
// Combine parts with a separator, or return empty string if no context
const prompt = promptParts.join(". ");
logger.transcription.debug(`Generated initial prompt: "${prompt}"`);
return prompt;
}
private async initializeWhisper(): Promise<void> {
if (this.whisperInstance) {
return; // Already initialized
}
const modelPath = await this.modelManager.getBestAvailableModelPath();
if (!modelPath) {
throw new Error(
"No Whisper models available. Please download a model first.",
);
}
try {
const { Whisper } = await import("smart-whisper");
this.whisperInstance = new Whisper(modelPath, { gpu: true });
logger.transcription.info(`Initialized with model: ${modelPath}`);
} catch (error) {
logger.transcription.error(`Failed to initialize:`, error);
throw new Error(`Failed to initialize smart-whisper: ${error}`);
}
}
private async convertAudioBuffer(audioData: Buffer): Promise<Float32Array> {
try {
// Convert buffer to Float32Array (simplified)
const float32Array = new Float32Array(audioData.length / 4);
for (let i = 0; i < float32Array.length; i++) {
float32Array[i] = audioData.readFloatLE(i * 4);
}
return float32Array;
} catch (error) {
logger.transcription.warn(
"Audio conversion failed, trying alternative method",
);
// Fallback: convert as if it's PCM data
const samples = new Float32Array(audioData.length / 2);
for (let i = 0; i < samples.length; i++) {
const sample = audioData.readInt16LE(i * 2);
samples[i] = sample / 32768.0;
}
return samples;
}
}
// Simple cleanup method
async dispose(): Promise<void> {
if (this.whisperInstance) {
try {
await this.whisperInstance.free();
logger.transcription.debug("Instance freed");
} catch (error) {
logger.transcription.warn("Error freeing instance:", error);
} finally {
this.whisperInstance = null;
}
}
}
}

View file

@ -1,30 +1,4 @@
/**
* This file will automatically be loaded by vite and run in the "renderer" context.
* To learn more about the differences between the "main" and the "renderer" context in
* Electron, visit:
*
* https://electronjs.org/docs/tutorial/process-model
*
* By default, Node.js integration in this file is disabled. When enabling Node.js integration
* in a renderer process, please be aware of potential security implications. You can read
* more about security risks here:
*
* https://electronjs.org/docs/tutorial/security
*
* To enable Node.js integration in this file, open up `main.ts` and enable the `nodeIntegration`
* flag:
*
* ```
* // Create the browser window.
* mainWindow = new BrowserWindow({
* width: 800,
* height: 600,
* webPreferences: {
* nodeIntegration: true
* }
* });
* ```
*/
/* eslint-disable @typescript-eslint/no-explicit-any */
import React, { useState, useEffect } from "react";
import { createRoot } from "react-dom/client";
@ -46,6 +20,48 @@ import "@/styles/globals.css";
import { SiteHeader } from "@/components/site-header";
import { api } from "@/trpc/react";
// Extend Console interface to include original methods
declare global {
interface Console {
original: {
log: (...args: any[]) => void;
info: (...args: any[]) => void;
warn: (...args: any[]) => void;
error: (...args: any[]) => void;
debug: (...args: any[]) => void;
};
}
}
// Main window scoped logger setup
const mainWindowLogger = window.electronAPI.log.scope("mainWindow");
// Proxy console methods to use BOTH original console AND main window logger
const originalConsole = { ...console };
console.log = (...args: any[]) => {
originalConsole.log(...args); // Show in dev console
mainWindowLogger.info(...args); // Send via IPC
};
console.info = (...args: any[]) => {
originalConsole.info(...args);
mainWindowLogger.info(...args);
};
console.warn = (...args: any[]) => {
originalConsole.warn(...args);
mainWindowLogger.warn(...args);
};
console.error = (...args: any[]) => {
originalConsole.error(...args);
mainWindowLogger.error(...args);
};
console.debug = (...args: any[]) => {
originalConsole.debug(...args);
mainWindowLogger.debug(...args);
};
// Keep original methods available if needed
console.original = originalConsole;
// import { Waveform } from '../components/Waveform'; // Waveform might not be needed if hook is removed
// import { useRecording } from '../hooks/useRecording'; // Remove hook import

View file

@ -38,8 +38,8 @@ export const ModelsManager: React.FC = () => {
const availableModelsQuery = api.models.getAvailableModels.useQuery();
const downloadedModelsQuery = api.models.getDownloadedModels.useQuery();
const activeDownloadsQuery = api.models.getActiveDownloads.useQuery();
const isLocalWhisperAvailableQuery =
api.models.isLocalWhisperAvailable.useQuery();
const isTranscriptionAvailableQuery =
api.models.isTranscriptionAvailable.useQuery();
const selectedModelQuery = api.models.getSelectedModel.useQuery();
const utils = api.useUtils();
@ -243,13 +243,13 @@ export const ModelsManager: React.FC = () => {
const loading =
availableModelsQuery.isLoading ||
downloadedModelsQuery.isLoading ||
isLocalWhisperAvailableQuery.isLoading ||
isTranscriptionAvailableQuery.isLoading ||
selectedModelQuery.isLoading;
// Data from queries
const availableModels = availableModelsQuery.data || [];
const downloadedModels = downloadedModelsQuery.data || {};
const isLocalWhisperAvailable = isLocalWhisperAvailableQuery.data || false;
const isTranscriptionAvailable = isTranscriptionAvailableQuery.data || false;
const selectedModel = selectedModelQuery.data;
if (loading) {
@ -298,7 +298,7 @@ export const ModelsManager: React.FC = () => {
<RadioGroupItem
value={model.id}
id={model.id}
disabled={!isDownloaded || !isLocalWhisperAvailable}
disabled={!isDownloaded || !isTranscriptionAvailable}
/>
<div className="flex-1">
<Label

View file

@ -19,12 +19,13 @@ import {
} from "@/components/ui/select";
import { Input } from "@/components/ui/input";
import { ThemeToggle } from "@/components/theme-toggle";
import { FormatterConfig } from "@/modules/formatter";
import { FormatterConfig } from "@/types/formatter";
import { api } from "@/trpc/react";
import { toast } from "sonner";
// OpenRouter models list
const OPENROUTER_MODELS = [
{ value: "google/gemini-2.0-flash-001", label: "Gemini 2.0 Flash" },
{ value: "anthropic/claude-3.5-sonnet", label: "Claude 3.5 Sonnet" },
{ value: "anthropic/claude-3-haiku", label: "Claude 3 Haiku" },
{ value: "openai/gpt-4o", label: "GPT-4o" },

View file

@ -1,8 +1,51 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import React from "react";
import { createRoot } from "react-dom/client";
import { WidgetPage } from "./pages/widget";
import "@/styles/globals.css";
// Extend Console interface to include original methods
declare global {
interface Console {
original: {
log: (...args: any[]) => void;
info: (...args: any[]) => void;
warn: (...args: any[]) => void;
error: (...args: any[]) => void;
debug: (...args: any[]) => void;
};
}
}
// Widget scoped logger setup
const widgetLogger = window.electronAPI.log.scope("widget");
// Proxy console methods to use BOTH original console AND widget logger
const originalConsole = { ...console };
console.log = (...args: any[]) => {
originalConsole.log(...args); // Show in dev console
widgetLogger.info(...args); // Send via IPC
};
console.info = (...args: any[]) => {
originalConsole.info(...args);
widgetLogger.info(...args);
};
console.warn = (...args: any[]) => {
originalConsole.warn(...args);
widgetLogger.warn(...args);
};
console.error = (...args: any[]) => {
originalConsole.error(...args);
widgetLogger.error(...args);
};
console.debug = (...args: any[]) => {
originalConsole.debug(...args);
widgetLogger.debug(...args);
};
// Keep original methods available if needed
console.original = originalConsole;
const container = document.getElementById("root");
if (container) {
const root = createRoot(container);

View file

@ -3,30 +3,39 @@ import { Waveform } from "@/components/Waveform";
import { useRecording, RecordingStatus } from "@/hooks/useRecording";
const NUM_WAVEFORM_BARS = 8; // Fewer bars for a smaller button
const DEBOUNCE_DELAY = 100; // milliseconds
const DEBOUNCE_DELAY = 100; // milliseconds;
export const FloatingButton: React.FC = () => {
const [isHovered, setIsHovered] = useState(false);
const fabRef = useRef<HTMLButtonElement>(null);
const leaveTimeoutRef = useRef<NodeJS.Timeout | null>(null); // Ref for debounce timeout
// Log component initialization
useEffect(() => {
console.log("FloatingButton component initialized");
return () => {
console.debug("FloatingButton component unmounting");
};
}, []);
const handleAudioChunk = useCallback(
async (audioChunk: ArrayBuffer, isFinalChunk: boolean) => {
try {
// Send the audio chunk regardless of whether it's final or not
await window.electronAPI.sendAudioChunk(audioChunk, isFinalChunk);
console.log(`FAB: Sent audio chunk. isFinalChunk: ${isFinalChunk}`);
console.debug(`Sent audio chunk`, {
chunkSize: audioChunk.byteLength,
isFinalChunk,
});
if (isFinalChunk) {
console.log(
"FAB: This was the final chunk. Informing main process to finalize transcription.",
);
console.log("Final chunk sent to main process");
// You might want to add a specific IPC call here if the main process needs an explicit signal
// to finalize transcription, e.g., window.electronAPI.finalizeTranscription();
// For now, we assume sendAudioChunk is enough and the main process handles the stream end.
}
} catch (error) {
console.error("FAB: Error sending audio chunk:", error);
console.error("Error sending audio chunk:", error);
}
},
[],
@ -44,15 +53,23 @@ export const FloatingButton: React.FC = () => {
const isRecording =
recordingStatus === "recording" || recordingStatus === "starting";
const isAwaitingFinalChunk = recordingStatus === "stopping";
console.log("FAB: recordingStatus:", recordingStatus);
// Log recording status changes
useEffect(() => {
console.debug("Recording status changed", { recordingStatus });
}, [recordingStatus]);
useEffect(() => {
const cleanup = window.electronAPI.onRecordingStateChanged(
(newState: boolean) => {
console.log("FAB: Received new recording state:", newState);
console.log("Received recording state change from main process", {
newState,
});
if (newState) {
console.debug("Starting recording via state change");
startRecording();
} else {
console.debug("Stopping recording via state change");
stopRecording();
}
},
@ -79,7 +96,7 @@ export const FloatingButton: React.FC = () => {
// Update window size when recording or hover state changes
useEffect(() => {
console.log("is hovered", isHovered);
console.debug("Widget state changed", { isHovered, isRecording });
updateWindowSizeToFab();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [isRecording, isHovered]);

View file

@ -0,0 +1,101 @@
import { FormatterConfig } from "../../types/formatter";
import {
getSettingsSection,
updateSettingsSection,
getAppSettings,
updateAppSettings,
} from "../../db/app-settings";
import type { AppSettingsData } from "../../db/schema";
/**
* Database-backed settings service with typed configuration
*/
export class SettingsService {
private static instance: SettingsService;
private constructor() {}
static getInstance(): SettingsService {
if (!SettingsService.instance) {
SettingsService.instance = new SettingsService();
}
return SettingsService.instance;
}
/**
* Get formatter configuration
*/
async getFormatterConfig(): Promise<FormatterConfig | null> {
const formatterConfig = await getSettingsSection("formatterConfig");
return formatterConfig || null;
}
/**
* Set formatter configuration
*/
async setFormatterConfig(config: FormatterConfig): Promise<void> {
await updateSettingsSection("formatterConfig", config);
}
/**
* Get all app settings
*/
async getAllSettings(): Promise<AppSettingsData> {
return await getAppSettings();
}
/**
* Update multiple settings at once
*/
async updateSettings(
settings: Partial<AppSettingsData>,
): Promise<AppSettingsData> {
return await updateAppSettings(settings);
}
/**
* Get UI settings
*/
async getUISettings(): Promise<AppSettingsData["ui"]> {
return await getSettingsSection("ui");
}
/**
* Update UI settings
*/
async setUISettings(uiSettings: AppSettingsData["ui"]): Promise<void> {
await updateSettingsSection("ui", uiSettings);
}
/**
* Get transcription settings
*/
async getTranscriptionSettings(): Promise<AppSettingsData["transcription"]> {
return await getSettingsSection("transcription");
}
/**
* Update transcription settings
*/
async setTranscriptionSettings(
transcriptionSettings: AppSettingsData["transcription"],
): Promise<void> {
await updateSettingsSection("transcription", transcriptionSettings);
}
/**
* Get recording settings
*/
async getRecordingSettings(): Promise<AppSettingsData["recording"]> {
return await getSettingsSection("recording");
}
/**
* Update recording settings
*/
async setRecordingSettings(
recordingSettings: AppSettingsData["recording"],
): Promise<void> {
await updateSettingsSection("recording", recordingSettings);
}
}

View file

@ -8,8 +8,8 @@ import {
DownloadProgress,
ModelManagerState,
AVAILABLE_MODELS,
} from "../../constants/models";
import { DownloadedModel } from "../../db/schema";
} from "../constants/models";
import { DownloadedModel } from "../db/schema";
import {
getDownloadedModelsRecord,
createDownloadedModel,
@ -17,8 +17,8 @@ import {
validateDownloadedModels,
validateModelFile,
getValidDownloadedModels,
} from "../../db/downloaded-models";
import { logger } from "../../main/logger";
} from "../db/downloaded-models";
import { logger } from "../main/logger";
interface ModelManagerEvents {
"download-progress": (modelId: string, progress: DownloadProgress) => void;
@ -31,17 +31,6 @@ interface ModelManagerEvents {
"model-deleted": (modelId: string) => void;
}
declare interface ModelManagerService {
on<U extends keyof ModelManagerEvents>(
event: U,
listener: ModelManagerEvents[U],
): this;
emit<U extends keyof ModelManagerEvents>(
event: U,
...args: Parameters<ModelManagerEvents[U]>
): boolean;
}
class ModelManagerService extends EventEmitter {
private state: ModelManagerState;
private modelsDirectory: string;
@ -57,6 +46,35 @@ class ModelManagerService extends EventEmitter {
this.ensureModelsDirectory();
}
// Type-safe event emitter methods
on<U extends keyof ModelManagerEvents>(
event: U,
listener: ModelManagerEvents[U],
): this {
return super.on(event, listener);
}
emit<U extends keyof ModelManagerEvents>(
event: U,
...args: Parameters<ModelManagerEvents[U]>
): boolean {
return super.emit(event, ...args);
}
off<U extends keyof ModelManagerEvents>(
event: U,
listener: ModelManagerEvents[U],
): this {
return super.off(event, listener);
}
once<U extends keyof ModelManagerEvents>(
event: U,
listener: ModelManagerEvents[U],
): this {
return super.once(event, listener);
}
// Initialize and validate models on startup
async initialize(): Promise<void> {
try {
@ -364,6 +382,64 @@ class ModelManagerService extends EventEmitter {
}
}
// Model selection for transcription (moved from LocalWhisperClient)
private selectedModelId: string | null = null;
// Check if any models are available for transcription
async isAvailable(): Promise<boolean> {
const downloadedModels = await this.getValidDownloadedModels();
return Object.keys(downloadedModels).length > 0;
}
// Get available model IDs for transcription
async getAvailableModelsForTranscription(): Promise<string[]> {
const downloadedModels = await this.getValidDownloadedModels();
return Object.keys(downloadedModels);
}
// Get currently selected model for transcription
getSelectedModel(): string | null {
return this.selectedModelId;
}
// Set selected model for transcription
async setSelectedModel(modelId: string): Promise<void> {
const downloadedModels = await this.getValidDownloadedModels();
if (!downloadedModels[modelId]) {
throw new Error(`Model not downloaded: ${modelId}`);
}
this.selectedModelId = modelId;
logger.main.info("Selected model for transcription", { modelId });
}
// Get best available model path for transcription (used by WhisperProvider)
async getBestAvailableModelPath(): Promise<string | null> {
const downloadedModels = await this.getValidDownloadedModels();
// If a specific model is selected and available, use it
if (this.selectedModelId && downloadedModels[this.selectedModelId]) {
return downloadedModels[this.selectedModelId].localPath;
}
// Otherwise, find the best available model (prioritize by quality)
const preferredOrder = [
"whisper-large-v1",
"whisper-medium",
"whisper-small",
"whisper-base",
"whisper-tiny",
];
for (const modelId of preferredOrder) {
const model = downloadedModels[modelId];
if (model && fs.existsSync(model.localPath)) {
return model.localPath;
}
}
return null;
}
// Cleanup - cancel all active downloads
cleanup(): void {
logger.main.info("Cleaning up model downloads", {

View file

@ -7,7 +7,7 @@ import split2 from "split2";
import { v4 as uuid } from "uuid";
import { EventEmitter } from "events";
import { createScopedLogger } from "./logger";
import { createScopedLogger } from "../../main/logger";
import {
RpcRequestSchema,
RpcRequest,

View file

@ -0,0 +1,190 @@
import { ipcMain } from "electron";
import { EventEmitter } from "node:events";
import { logger, logPerformance } from "../main/logger";
import { ServiceManager } from "../main/managers/service-manager";
import { appContextStore } from "../stores/app-context";
/**
* Handles audio recording via IPC and coordinates with the pipeline system
* This service manages the recording flow but delegates actual processing to the pipeline
*/
export class RecordingService extends EventEmitter {
private currentSessionId: string | null = null;
constructor(private serviceManager: ServiceManager) {
super();
this.setupIPCHandlers();
}
private setupIPCHandlers(): void {
// Handle audio data chunks from renderer
ipcMain.handle(
"audio-data-chunk",
async (event, chunk: ArrayBuffer, isFinalChunk: boolean) => {
if (!(chunk instanceof ArrayBuffer)) {
logger.audio.error("Received invalid audio chunk type", {
type: typeof chunk,
});
throw new Error("Invalid audio chunk type received.");
}
const buffer = Buffer.from(chunk);
logger.audio.info("Received audio chunk", {
size: buffer.byteLength,
isFinalChunk,
});
await this.handleAudioChunk(buffer, isFinalChunk);
},
);
ipcMain.handle("recording-starting", async () => {
logger.audio.info("Recording starting");
await this.handleRecordingStarting();
});
ipcMain.handle("recording-stopping", async () => {
logger.audio.info("Recording stopping");
await this.handleRecordingStopping();
});
// Handle log messages from renderer processes
ipcMain.handle(
"log-message",
(event, level: string, scope: string, ...args: any[]) => {
const scopedLogger =
logger[scope as keyof typeof logger] || logger.renderer;
const logMethod = scopedLogger[level as keyof typeof scopedLogger];
if (typeof logMethod === "function") {
logMethod(...args);
}
},
);
}
private async handleAudioChunk(
chunk: Buffer,
isFinalChunk: boolean,
): Promise<void> {
// Start new session if needed
if (!this.currentSessionId) {
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
this.currentSessionId = `session-${timestamp}`;
logger.audio.info("Started new streaming session", {
sessionId: this.currentSessionId,
});
}
// Skip empty chunks unless it's the final one
if (chunk.length === 0 && !isFinalChunk) {
logger.audio.debug("Skipping empty non-final chunk");
return;
}
try {
const transcriptionService =
this.serviceManager.getTranscriptionService();
const startTime = Date.now();
// Process the chunk - pass isFinal flag
const transcriptionResult =
await transcriptionService.processStreamingChunk({
sessionId: this.currentSessionId,
audioChunk: chunk,
isFinal: isFinalChunk,
});
logger.audio.debug("Processed audio chunk", {
chunkSize: chunk.length,
processingTimeMs: Date.now() - startTime,
resultLength: transcriptionResult.length,
isFinal: isFinalChunk,
});
// If this was the final chunk, handle completion
if (isFinalChunk) {
logPerformance("streaming transcription complete", startTime, {
sessionId: this.currentSessionId,
resultLength: transcriptionResult?.length || 0,
});
logger.audio.info("Streaming transcription completed", {
sessionId: this.currentSessionId,
resultLength: transcriptionResult?.length || 0,
hasResult: !!transcriptionResult,
});
// Paste the final formatted transcription
if (transcriptionResult) {
await this.pasteTranscription(transcriptionResult);
}
// Clean up session
this.currentSessionId = null;
}
} catch (error) {
logger.audio.error("Error processing audio chunk:", error);
if (isFinalChunk) {
// Clean up session on error
this.currentSessionId = null;
}
}
}
private async pasteTranscription(transcription: string): Promise<void> {
if (!transcription || typeof transcription !== "string") {
logger.main.warn("Invalid transcription, not pasting");
return;
}
try {
const swiftBridge = this.serviceManager.getSwiftIOBridge();
logger.main.info("Pasting transcription to active application", {
textLength: transcription.length,
});
swiftBridge.call("pasteText", {
transcript: transcription,
});
} catch (error) {
logger.main.warn(
"Swift bridge not available, cannot paste transcription",
{ error: error instanceof Error ? error.message : String(error) },
);
}
}
private async handleRecordingStarting(): Promise<void> {
// Refresh accessibility context - fire and forget
appContextStore.refreshAccessibilityData();
// Mute system audio
try {
const swiftBridge = this.serviceManager.getSwiftIOBridge();
await swiftBridge.call("muteSystemAudio", {});
} catch (error) {
logger.main.warn("Swift bridge not available for audio muting");
}
// TODO: Preload models if needed (Phase 2)
}
private async handleRecordingStopping(): Promise<void> {
// Restore system audio
try {
const swiftBridge = this.serviceManager.getSwiftIOBridge();
await swiftBridge.call("restoreSystemAudio", {});
} catch (error) {
logger.main.warn("Swift bridge not available for audio restore");
}
}
// Clean up resources
async cleanup(): Promise<void> {
// Clear any active session
this.currentSessionId = null;
}
}

View file

@ -1,26 +1,17 @@
import { FormatterConfig } from "../formatter";
import { FormatterConfig } from "../types/formatter";
import {
getSettingsSection,
updateSettingsSection,
getAppSettings,
updateAppSettings,
} from "../../db/app-settings";
import type { AppSettingsData } from "../../db/schema";
} from "../db/app-settings";
import type { AppSettingsData } from "../db/schema";
/**
* Database-backed settings service with typed configuration
*/
export class SettingsService {
private static instance: SettingsService;
private constructor() {}
static getInstance(): SettingsService {
if (!SettingsService.instance) {
SettingsService.instance = new SettingsService();
}
return SettingsService.instance;
}
constructor() {}
/**
* Get formatter configuration

View file

@ -0,0 +1,215 @@
import {
PipelineContext,
StreamingPipelineContext,
StreamingSession,
} from "../pipeline/core/pipeline-types";
import { createDefaultContext } from "../pipeline/core/context";
import { WhisperProvider } from "../pipeline/providers/transcription/whisper-provider";
import { OpenRouterProvider } from "../pipeline/providers/formatting/openrouter-formatter";
import { ModelManagerService } from "../services/model-manager";
import { ServiceManager } from "../main/managers/service-manager";
import { appContextStore } from "../stores/app-context";
import { createTranscription } from "../db/transcriptions";
import { logger } from "../main/logger";
import { v4 as uuid } from "uuid";
/**
* Service for audio transcription and optional formatting
*/
export class TranscriptionService {
private whisperProvider: WhisperProvider;
private openRouterProvider: OpenRouterProvider | null = null;
private formatterEnabled = false;
private streamingSessions: Map<string, StreamingSession> = new Map();
constructor(modelManagerService: ModelManagerService) {
this.whisperProvider = new WhisperProvider(modelManagerService);
}
/**
* Configure formatter for post-processing
*/
configureFormatter(config: any): void {
if (!config?.enabled) {
this.openRouterProvider = null;
this.formatterEnabled = false;
logger.transcription.info("Formatter disabled");
return;
}
if (config.provider === "openrouter" && config.apiKey && config.model) {
this.openRouterProvider = new OpenRouterProvider(
config.apiKey,
config.model,
);
this.formatterEnabled = true;
logger.transcription.info("Formatter configured", {
provider: config.provider,
});
} else {
logger.transcription.warn("Invalid formatter configuration");
this.openRouterProvider = null;
this.formatterEnabled = false;
}
}
/**
* Process a single audio chunk in streaming mode
*/
async processStreamingChunk(options: {
sessionId: string;
audioChunk: Buffer;
isFinal?: boolean;
}): Promise<string> {
const { sessionId, audioChunk, isFinal = false } = options;
// Auto-create session if it doesn't exist
let session = this.streamingSessions.get(sessionId);
if (!session) {
const context = await this.buildContext();
const streamingContext: StreamingPipelineContext = {
...context,
sessionId,
isPartial: true,
isFinal: false,
accumulatedTranscription: [],
};
// Get accessibility context from global store
streamingContext.sharedData.accessibilityContext =
appContextStore.getAccessibilityContext();
session = {
context: streamingContext,
transcriptionResults: [],
};
this.streamingSessions.set(sessionId, session);
logger.transcription.info("Started streaming session", { sessionId });
}
// Process chunk if it has content
if (audioChunk.length > 0) {
// Direct provider call - no step wrapper
const previousChunk =
session.transcriptionResults.length > 0
? session.transcriptionResults[
session.transcriptionResults.length - 1
]
: undefined;
const aggregatedTranscription = session.transcriptionResults
.join(" ")
.trim();
const chunkTranscription = await this.whisperProvider.transcribe({
audioData: audioChunk,
context: {
vocabulary: session.context.sharedData.vocabulary,
accessibilityContext: session.context.sharedData.accessibilityContext,
previousChunk,
aggregatedTranscription: aggregatedTranscription || undefined,
},
});
// Accumulate the result
if (chunkTranscription.trim()) {
session.transcriptionResults.push(chunkTranscription);
}
logger.transcription.debug("Processed chunk", {
sessionId,
chunkSize: audioChunk.length,
transcriptionLength: chunkTranscription.length,
totalResults: session.transcriptionResults.length,
isFinal,
});
}
// If this is the final chunk, apply formatting and save
if (isFinal) {
// Get complete transcription
let completeTranscription = session.transcriptionResults.join(" ").trim();
logger.transcription.info("Finalizing streaming session", {
sessionId,
rawTranscriptionLength: completeTranscription.length,
chunkCount: session.transcriptionResults.length,
});
// Format if enabled
if (this.formatterEnabled && this.openRouterProvider) {
const style =
session.context.sharedData.userPreferences?.formattingStyle;
completeTranscription = await this.openRouterProvider.format({
text: completeTranscription,
context: {
style,
vocabulary: session.context.sharedData.vocabulary,
accessibilityContext:
session.context.sharedData.accessibilityContext,
previousChunk:
session.transcriptionResults.length > 1
? session.transcriptionResults[
session.transcriptionResults.length - 2
]
: undefined,
aggregatedTranscription: completeTranscription,
},
});
}
// Save directly to database
await createTranscription({
text: completeTranscription,
language: session.context.sharedData.userPreferences?.language || "en",
duration: session.context.sharedData.audioMetadata?.duration,
speechModel: "whisper-local",
formattingModel: this.formatterEnabled ? "openrouter" : undefined,
meta: {
sessionId,
source: session.context.sharedData.audioMetadata?.source,
vocabularySize: session.context.sharedData.vocabulary?.size || 0,
formattingStyle:
session.context.sharedData.userPreferences?.formattingStyle,
},
});
this.streamingSessions.delete(sessionId);
logger.transcription.info("Streaming session completed", { sessionId });
return completeTranscription;
}
// Return accumulated transcription so far (for UI feedback)
return session.transcriptionResults.join(" ");
}
private async buildContext(): Promise<PipelineContext> {
// Create default context
const context = createDefaultContext(uuid());
// Simple context building - no complex loading
const serviceManager = ServiceManager.getInstance();
if (serviceManager) {
try {
const settingsService = serviceManager.getSettingsService();
const formatterConfig = await settingsService.getFormatterConfig();
} catch (error) {
logger.transcription.warn("Failed to load formatter config", { error });
}
}
// TODO: Load actual vocabulary
// TODO: Load user preferences from settings
return context;
}
/**
* Cleanup method
*/
async dispose(): Promise<void> {
await this.whisperProvider.dispose();
logger.transcription.info("Transcription service disposed");
}
}

View file

@ -0,0 +1,37 @@
import { GetAccessibilityContextResult } from "@amical/types";
import { ServiceManager } from "../main/managers/service-manager";
import { logger } from "../main/logger";
class AppContextStore {
private accessibilityContext: GetAccessibilityContextResult | null = null;
async refreshAccessibilityData(): Promise<void> {
try {
const serviceManager = ServiceManager.getInstance();
if (!serviceManager) return; // Silent fail
const swiftBridge = serviceManager.getSwiftIOBridge();
const context = await swiftBridge.call("getAccessibilityContext", {
editableOnly: false,
});
this.accessibilityContext = context;
logger.main.debug("Accessibility context refreshed", {
hasApplication: !!context.context?.application?.name,
hasFocusedElement: !!context.context?.focusedElement?.role,
hasTextSelection: !!context.context?.textSelection?.selectedText,
hasWindow: !!context.context?.windowInfo?.title,
});
} catch (error) {
logger.main.error("Failed to refresh accessibility context", {
error: error instanceof Error ? error.message : String(error),
});
}
}
getAccessibilityContext(): GetAccessibilityContextResult | null {
return this.accessibilityContext;
}
}
export const appContextStore = new AppContextStore();

View file

@ -18,7 +18,6 @@ const t = initTRPC.create({
declare global {
var modelManagerService: any;
var localWhisperClient: any;
}
export const modelsRouter = t.router({
@ -67,22 +66,22 @@ export const modelsRouter = t.router({
return globalThis.modelManagerService?.getModelsDirectory() || "";
}),
// Local Whisper methods
isLocalWhisperAvailable: t.procedure.query(async () => {
return globalThis.localWhisperClient
? await globalThis.localWhisperClient.isAvailable()
// Transcription model selection methods
isTranscriptionAvailable: t.procedure.query(async () => {
return globalThis.modelManagerService
? await globalThis.modelManagerService.isAvailable()
: false;
}),
getLocalWhisperModels: t.procedure.query(async () => {
return globalThis.localWhisperClient
? await globalThis.localWhisperClient.getAvailableModels()
getTranscriptionModels: t.procedure.query(async () => {
return globalThis.modelManagerService
? await globalThis.modelManagerService.getAvailableModelsForTranscription()
: [];
}),
getSelectedModel: t.procedure.query(async () => {
return globalThis.localWhisperClient
? globalThis.localWhisperClient.getSelectedModel()
return globalThis.modelManagerService
? globalThis.modelManagerService.getSelectedModel()
: null;
}),
@ -117,10 +116,10 @@ export const modelsRouter = t.router({
setSelectedModel: t.procedure
.input(z.object({ modelId: z.string() }))
.mutation(async ({ input }) => {
if (!globalThis.localWhisperClient) {
throw new Error("Local whisper client not initialized");
if (!globalThis.modelManagerService) {
throw new Error("Model manager service not initialized");
}
return await globalThis.localWhisperClient.setSelectedModel(
return await globalThis.modelManagerService.setSelectedModel(
input.modelId,
);
}),

View file

@ -1,7 +1,7 @@
import { initTRPC } from "@trpc/server";
import superjson from "superjson";
import { z } from "zod";
import { SettingsService } from "../../modules/settings";
import { SettingsService } from "../../services/settings-service";
const t = initTRPC.create({
isServer: true,
@ -18,7 +18,8 @@ const FormatterConfigSchema = z.object({
// We'll need to access these from the main process
declare global {
var aiService: any;
var transcriptionService: any;
var settingsService: any;
var logger: any;
}
@ -26,11 +27,16 @@ export const settingsRouter = t.router({
// Get formatter configuration
getFormatterConfig: t.procedure.query(async () => {
try {
const settingsService = SettingsService.getInstance();
return await settingsService.getFormatterConfig();
if (!globalThis.settingsService) {
throw new Error("SettingsService not available");
}
return await globalThis.settingsService.getFormatterConfig();
} catch (error) {
if (globalThis.logger) {
globalThis.logger.ai.error("Error getting formatter config:", error);
globalThis.logger.transcription.error(
"Error getting formatter config:",
error,
);
}
return null;
}
@ -41,21 +47,28 @@ export const settingsRouter = t.router({
.input(FormatterConfigSchema)
.mutation(async ({ input }) => {
try {
const settingsService = SettingsService.getInstance();
await settingsService.setFormatterConfig(input);
if (!globalThis.settingsService) {
throw new Error("SettingsService not available");
}
await globalThis.settingsService.setFormatterConfig(input);
// Update AI service with new formatter configuration
if (globalThis.aiService) {
globalThis.aiService.configureFormatter(input);
// Update transcription service with new formatter configuration
if (globalThis.transcriptionService) {
globalThis.transcriptionService.configureFormatter(input);
if (globalThis.logger) {
globalThis.logger.ai.info("Formatter configuration updated");
globalThis.logger.transcription.info(
"Formatter configuration updated",
);
}
}
return true;
} catch (error) {
if (globalThis.logger) {
globalThis.logger.ai.error("Error setting formatter config:", error);
globalThis.logger.transcription.error(
"Error setting formatter config:",
error,
);
}
throw error;
}

View file

@ -46,10 +46,10 @@ export interface ElectronAPI {
// Formatter Configuration API
getFormatterConfig: () => Promise<
import("../modules/formatter").FormatterConfig | null
import("../types/formatter").FormatterConfig | null
>;
setFormatterConfig: (
config: import("../modules/formatter").FormatterConfig,
config: import("../types/formatter").FormatterConfig,
) => Promise<void>;
// Transcription Database API (moved to tRPC)

View file

@ -0,0 +1,6 @@
export interface FormatterConfig {
provider: "openrouter";
model: string;
apiKey: string;
enabled: boolean;
}

26
pnpm-lock.yaml generated
View file

@ -47,6 +47,9 @@ importers:
'@libsql/client':
specifier: ^0.15.9
version: 0.15.9
'@openrouter/ai-sdk-provider':
specifier: ^0.7.2
version: 0.7.2(ai@4.3.16(react@19.1.0)(zod@3.25.67))(zod@3.25.67)
'@radix-ui/react-accordion':
specifier: ^1.2.10
version: 1.2.11(@types/react-dom@19.1.5(@types/react@19.1.5))(@types/react@19.1.5)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
@ -155,6 +158,9 @@ importers:
ai:
specifier: ^4.3.16
version: 4.3.16(react@19.1.0)(zod@3.25.67)
ansi-colors:
specifier: ^4.1.3
version: 4.1.3
async-mutex:
specifier: ^0.5.0
version: 0.5.0
@ -2014,6 +2020,13 @@ packages:
'@octokit/types@6.41.0':
resolution: {integrity: sha512-eJ2jbzjdijiL3B4PrSQaSjuF2sPEQPVCPzBvTHJD9Nz+9dw2SGH4K4xeQJ77YfTq5bRQ+bD8wT11JbeDPmxmGg==}
'@openrouter/ai-sdk-provider@0.7.2':
resolution: {integrity: sha512-Fry2mV7uGGJRmP9JntTZRc8ElESIk7AJNTacLbF6Syoeb5k8d7HPGkcK9rTXDlqBb8HgU1hOKtz23HojesTmnw==}
engines: {node: '>=18'}
peerDependencies:
ai: ^4.3.16
zod: ^3.25.34
'@opentelemetry/api@1.9.0':
resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==}
engines: {node: '>=8.0.0'}
@ -3502,6 +3515,10 @@ packages:
ajv@6.12.6:
resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==}
ansi-colors@4.1.3:
resolution: {integrity: sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==}
engines: {node: '>=6'}
ansi-escapes@4.3.2:
resolution: {integrity: sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==}
engines: {node: '>=8'}
@ -10113,6 +10130,13 @@ snapshots:
dependencies:
'@octokit/openapi-types': 12.11.0
'@openrouter/ai-sdk-provider@0.7.2(ai@4.3.16(react@19.1.0)(zod@3.25.67))(zod@3.25.67)':
dependencies:
'@ai-sdk/provider': 1.1.3
'@ai-sdk/provider-utils': 2.2.8(zod@3.25.67)
ai: 4.3.16(react@19.1.0)(zod@3.25.67)
zod: 3.25.67
'@opentelemetry/api@1.9.0': {}
'@orama/orama@3.1.7': {}
@ -11756,6 +11780,8 @@ snapshots:
json-schema-traverse: 0.4.1
uri-js: 4.4.1
ansi-colors@4.1.3: {}
ansi-escapes@4.3.2:
dependencies:
type-fest: 0.21.3