chore: decouple audio file logging + transcription svc

This commit is contained in:
Naomi Chopra 2025-07-12 10:06:12 +05:30 committed by haritabh-z01
parent c71241d264
commit 95ce6bea36
3 changed files with 96 additions and 78 deletions

View file

@ -1,10 +1,13 @@
import { ipcMain } from "electron";
import { ipcMain, app } from "electron";
import { EventEmitter } from "node:events";
import { logger, logPerformance } from "../logger";
import { ServiceManager } from "./service-manager";
import type { RecordingState } from "../../types/recording";
import { Mutex } from "async-mutex";
import type { ShortcutManager } from "../services/shortcut-manager";
import { StreamingWavWriter } from "../../utils/streaming-wav-writer";
import * as fs from "node:fs";
import * as path from "node:path";
export type RecordingMode = "idle" | "ptt" | "hands-free";
@ -17,6 +20,10 @@ export class RecordingManager extends EventEmitter {
private recordingState: RecordingState = "idle";
private recordingMutex = new Mutex();
private recordingMode: RecordingMode = "idle";
private currentAudioRecording: {
audioFilePath: string;
wavWriter: StreamingWavWriter;
} | null = null;
constructor(private serviceManager: ServiceManager) {
super();
@ -87,6 +94,26 @@ export class RecordingManager extends EventEmitter {
this.emit("mode-changed", this.getRecordingMode());
}
/**
* Create audio file for recording session
*/
private async createAudioFile(sessionId: string): Promise<string> {
// Create audio directory in app temp path
const audioDir = path.join(app.getPath("temp"), "amical-audio");
await fs.promises.mkdir(audioDir, { recursive: true });
// Create file path
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`);
logger.audio.info("Created audio file for session", {
sessionId,
filePath,
});
return filePath;
}
private setupIPCHandlers(): void {
// Handle audio data chunks from renderer
ipcMain.handle(
@ -148,21 +175,28 @@ export class RecordingManager extends EventEmitter {
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
this.currentSessionId = `session-${timestamp}`;
// Create audio file and WAV writer
const audioFilePath = await this.createAudioFile(this.currentSessionId);
this.currentAudioRecording = {
audioFilePath,
wavWriter: new StreamingWavWriter(audioFilePath),
};
logger.audio.info("Audio recording initialized", {
sessionId: this.currentSessionId,
audioFilePath,
});
// Mute system audio
try {
const swiftBridge = this.serviceManager.getService("swiftIOBridge");
if (swiftBridge) {
await swiftBridge.call("muteSystemAudio", {});
//await swiftBridge.call("muteSystemAudio", {});
}
} catch (error) {
logger.main.warn("Swift bridge not available for audio muting");
}
// Refresh accessibility context - fire and forget
// appContextStore.refreshAccessibilityData();
// TODO: Preload models if needed (Phase 2)
this.setState("recording");
logger.audio.info("Recording started successfully", {
sessionId: this.currentSessionId,
@ -255,8 +289,10 @@ export class RecordingManager extends EventEmitter {
}
// Session should already exist from startRecording
if (!this.currentSessionId) {
logger.audio.error("No session ID found while handling audio chunk");
if (!this.currentSessionId || !this.currentAudioRecording) {
logger.audio.error(
"No session ID or audio recording found while handling audio chunk",
);
return;
}
@ -266,6 +302,8 @@ export class RecordingManager extends EventEmitter {
return;
}
await this.currentAudioRecording.wavWriter.appendAudio(chunk);
try {
const transcriptionService = this.serviceManager.getService(
"transcriptionService",
@ -275,12 +313,13 @@ export class RecordingManager extends EventEmitter {
}
const startTime = Date.now();
// Process the chunk - pass isFinal flag
// Process the chunk - pass isFinal flag and audio file path
const transcriptionResult =
await transcriptionService.processStreamingChunk({
sessionId: this.currentSessionId,
audioChunk: chunk,
isFinal: isFinalChunk,
audioFilePath: this.currentAudioRecording.audioFilePath,
});
logger.audio.debug("Processed audio chunk", {
@ -292,6 +331,14 @@ export class RecordingManager extends EventEmitter {
// If this was the final chunk, handle completion
if (isFinalChunk) {
// Finalize the WAV file
await this.currentAudioRecording.wavWriter.finalize();
logger.audio.info("Finalized WAV file", {
sessionId: this.currentSessionId,
filePath: this.currentAudioRecording.audioFilePath,
dataSize: this.currentAudioRecording.wavWriter.getDataSize(),
});
logPerformance("streaming transcription complete", startTime, {
sessionId: this.currentSessionId,
resultLength: transcriptionResult?.length || 0,
@ -308,8 +355,9 @@ export class RecordingManager extends EventEmitter {
await this.pasteTranscription(transcriptionResult);
}
// Clean up session
// Clean up session and audio recording
this.currentSessionId = null;
this.currentAudioRecording = null;
// Ensure state is idle after completion
if (this.recordingState === "stopping") {
@ -320,8 +368,9 @@ export class RecordingManager extends EventEmitter {
logger.audio.error("Error processing audio chunk:", error);
if (isFinalChunk) {
// Clean up session on error
// Clean up session and audio recording on error
this.currentSessionId = null;
this.currentAudioRecording = null;
this.setState("error");
}
}
@ -363,8 +412,9 @@ export class RecordingManager extends EventEmitter {
await this.stopRecording();
}
// Clear any active session
// Clear any active session and audio recording
this.currentSessionId = null;
this.currentAudioRecording = null;
this.setState("idle");
}
}

View file

@ -13,26 +13,16 @@ import { createTranscription } from "../db/transcriptions";
import { logger } from "../main/logger";
import { v4 as uuid } from "uuid";
import { VADService } from "./vad-service";
import { app } from "electron";
import * as fs from "node:fs";
import * as path from "node:path";
import { StreamingWavWriter } from "../utils/streaming-wav-writer";
import { Mutex } from "async-mutex";
/**
* Service for audio transcription and optional formatting
*/
interface ExtendedStreamingSession extends StreamingSession {
wavWriter?: StreamingWavWriter;
audioFilePath?: string;
}
export class TranscriptionService {
private whisperProvider: WhisperProvider;
private openRouterProvider: OpenRouterProvider | null = null;
private formatterEnabled = false;
private streamingSessions: Map<string, ExtendedStreamingSession> = new Map();
private streamingSessions: Map<string, StreamingSession> = new Map();
private vadService: VADService | null;
private settingsService: SettingsService;
private vadMutex: Mutex;
@ -143,26 +133,6 @@ export class TranscriptionService {
}
}
/**
* Create audio file for recording session
*/
private async createAudioFile(sessionId: string): Promise<string> {
// Create audio directory in app temp path
const audioDir = path.join(app.getPath("temp"), "amical-audio");
await fs.promises.mkdir(audioDir, { recursive: true });
// Create file path
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`);
logger.transcription.info("Created audio file for session", {
sessionId,
filePath,
});
return filePath;
}
/**
* Process a single audio chunk in streaming mode
*/
@ -170,8 +140,9 @@ export class TranscriptionService {
sessionId: string;
audioChunk: Float32Array;
isFinal?: boolean;
audioFilePath?: string;
}): Promise<string> {
const { sessionId, audioChunk, isFinal = false } = options;
const { sessionId, audioChunk, isFinal = false, audioFilePath } = options;
// Run VAD on the audio chunk
let speechProbability = 0;
@ -198,6 +169,7 @@ export class TranscriptionService {
// Acquire transcription mutex
await this.transcriptionMutex.acquire();
// Auto-create session if it doesn't exist
let session = this.streamingSessions.get(sessionId);
if (!session) {
@ -214,31 +186,18 @@ export class TranscriptionService {
streamingContext.sharedData.accessibilityContext =
appContextStore.getAccessibilityContext();
// Create audio file for this session
const audioFilePath = await this.createAudioFile(sessionId);
// Create streaming WAV writer
const wavWriter = new StreamingWavWriter(audioFilePath);
session = {
context: streamingContext,
transcriptionResults: [],
audioFilePath,
wavWriter,
};
this.streamingSessions.set(sessionId, session);
logger.transcription.info("Started streaming session", {
sessionId,
audioFilePath,
});
}
// Write audio chunk to WAV file immediately
if (audioChunk.length > 0 && session.wavWriter) {
await session.wavWriter.appendAudio(audioChunk);
}
// Process chunk if it has content
if (audioChunk.length > 0) {
// Direct frame to Whisper - it will handle aggregation and VAD internally
@ -285,7 +244,7 @@ export class TranscriptionService {
// Release transcription mutex
this.transcriptionMutex.release();
let completeTranscriptionTillNow = session.transcriptionResults
const completeTranscriptionTillNow = session.transcriptionResults
.join(" ")
.trim();
@ -302,13 +261,11 @@ export class TranscriptionService {
chunkCount: session.transcriptionResults.length,
});
// Format if enabled (currently disabled with && false)
// Commenting out to fix TypeScript errors since this code path is never executed
/*
if (this.formatterEnabled && this.openRouterProvider && false) {
if (this.formatterEnabled && this.openRouterProvider) {
try {
const style =
session.context.sharedData.userPreferences?.formattingStyle;
completeTranscription = await this.openRouterProvider.format({
const formattedText = await this.openRouterProvider.format({
text: completeTranscription,
context: {
style,
@ -324,24 +281,31 @@ export class TranscriptionService {
aggregatedTranscription: completeTranscription,
},
});
}
*/
// Finalize the WAV file
if (session.wavWriter) {
await session.wavWriter.finalize();
logger.transcription.info("Finalized WAV file", {
sessionId,
filePath: session.audioFilePath,
dataSize: session.wavWriter.getDataSize(),
});
logger.transcription.info("Text formatted successfully", {
sessionId,
originalLength: completeTranscription.length,
formattedLength: formattedText.length,
});
completeTranscription = formattedText;
} catch (error) {
logger.transcription.error(
"Formatting failed, using unformatted text",
{
sessionId,
error,
},
);
// Continue with unformatted text
}
}
// Save directly to database
logger.transcription.info("Saving transcription with audio file", {
sessionId,
audioFilePath: session.audioFilePath,
hasAudioFile: !!session.audioFilePath,
audioFilePath,
hasAudioFile: !!audioFilePath,
});
await createTranscription({
@ -350,7 +314,7 @@ export class TranscriptionService {
duration: session.context.sharedData.audioMetadata?.duration,
speechModel: "whisper-local",
formattingModel: this.formatterEnabled ? "openrouter" : undefined,
audioFile: session.audioFilePath,
audioFile: audioFilePath,
meta: {
sessionId,
source: session.context.sharedData.audioMetadata?.source,

View file

@ -66,6 +66,10 @@ export class StreamingWavWriter {
* @param audioData Float32Array of audio samples
*/
async appendAudio(audioData: Float32Array): Promise<void> {
if (!audioData.length) {
return;
}
if (this.isFinalized) {
throw new Error("Cannot append to finalized WAV file");
}