chore: decouple audio file logging + transcription svc
This commit is contained in:
parent
c71241d264
commit
95ce6bea36
3 changed files with 96 additions and 78 deletions
|
|
@ -1,10 +1,13 @@
|
|||
import { ipcMain } from "electron";
|
||||
import { ipcMain, app } from "electron";
|
||||
import { EventEmitter } from "node:events";
|
||||
import { logger, logPerformance } from "../logger";
|
||||
import { ServiceManager } from "./service-manager";
|
||||
import type { RecordingState } from "../../types/recording";
|
||||
import { Mutex } from "async-mutex";
|
||||
import type { ShortcutManager } from "../services/shortcut-manager";
|
||||
import { StreamingWavWriter } from "../../utils/streaming-wav-writer";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
|
||||
export type RecordingMode = "idle" | "ptt" | "hands-free";
|
||||
|
||||
|
|
@ -17,6 +20,10 @@ export class RecordingManager extends EventEmitter {
|
|||
private recordingState: RecordingState = "idle";
|
||||
private recordingMutex = new Mutex();
|
||||
private recordingMode: RecordingMode = "idle";
|
||||
private currentAudioRecording: {
|
||||
audioFilePath: string;
|
||||
wavWriter: StreamingWavWriter;
|
||||
} | null = null;
|
||||
|
||||
constructor(private serviceManager: ServiceManager) {
|
||||
super();
|
||||
|
|
@ -87,6 +94,26 @@ export class RecordingManager extends EventEmitter {
|
|||
this.emit("mode-changed", this.getRecordingMode());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create audio file for recording session
|
||||
*/
|
||||
private async createAudioFile(sessionId: string): Promise<string> {
|
||||
// Create audio directory in app temp path
|
||||
const audioDir = path.join(app.getPath("temp"), "amical-audio");
|
||||
await fs.promises.mkdir(audioDir, { recursive: true });
|
||||
|
||||
// Create file path
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
||||
const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`);
|
||||
|
||||
logger.audio.info("Created audio file for session", {
|
||||
sessionId,
|
||||
filePath,
|
||||
});
|
||||
|
||||
return filePath;
|
||||
}
|
||||
|
||||
private setupIPCHandlers(): void {
|
||||
// Handle audio data chunks from renderer
|
||||
ipcMain.handle(
|
||||
|
|
@ -148,21 +175,28 @@ export class RecordingManager extends EventEmitter {
|
|||
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
||||
this.currentSessionId = `session-${timestamp}`;
|
||||
|
||||
// Create audio file and WAV writer
|
||||
const audioFilePath = await this.createAudioFile(this.currentSessionId);
|
||||
this.currentAudioRecording = {
|
||||
audioFilePath,
|
||||
wavWriter: new StreamingWavWriter(audioFilePath),
|
||||
};
|
||||
|
||||
logger.audio.info("Audio recording initialized", {
|
||||
sessionId: this.currentSessionId,
|
||||
audioFilePath,
|
||||
});
|
||||
|
||||
// Mute system audio
|
||||
try {
|
||||
const swiftBridge = this.serviceManager.getService("swiftIOBridge");
|
||||
if (swiftBridge) {
|
||||
await swiftBridge.call("muteSystemAudio", {});
|
||||
//await swiftBridge.call("muteSystemAudio", {});
|
||||
}
|
||||
} catch (error) {
|
||||
logger.main.warn("Swift bridge not available for audio muting");
|
||||
}
|
||||
|
||||
// Refresh accessibility context - fire and forget
|
||||
// appContextStore.refreshAccessibilityData();
|
||||
|
||||
// TODO: Preload models if needed (Phase 2)
|
||||
|
||||
this.setState("recording");
|
||||
logger.audio.info("Recording started successfully", {
|
||||
sessionId: this.currentSessionId,
|
||||
|
|
@ -255,8 +289,10 @@ export class RecordingManager extends EventEmitter {
|
|||
}
|
||||
|
||||
// Session should already exist from startRecording
|
||||
if (!this.currentSessionId) {
|
||||
logger.audio.error("No session ID found while handling audio chunk");
|
||||
if (!this.currentSessionId || !this.currentAudioRecording) {
|
||||
logger.audio.error(
|
||||
"No session ID or audio recording found while handling audio chunk",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -266,6 +302,8 @@ export class RecordingManager extends EventEmitter {
|
|||
return;
|
||||
}
|
||||
|
||||
await this.currentAudioRecording.wavWriter.appendAudio(chunk);
|
||||
|
||||
try {
|
||||
const transcriptionService = this.serviceManager.getService(
|
||||
"transcriptionService",
|
||||
|
|
@ -275,12 +313,13 @@ export class RecordingManager extends EventEmitter {
|
|||
}
|
||||
const startTime = Date.now();
|
||||
|
||||
// Process the chunk - pass isFinal flag
|
||||
// Process the chunk - pass isFinal flag and audio file path
|
||||
const transcriptionResult =
|
||||
await transcriptionService.processStreamingChunk({
|
||||
sessionId: this.currentSessionId,
|
||||
audioChunk: chunk,
|
||||
isFinal: isFinalChunk,
|
||||
audioFilePath: this.currentAudioRecording.audioFilePath,
|
||||
});
|
||||
|
||||
logger.audio.debug("Processed audio chunk", {
|
||||
|
|
@ -292,6 +331,14 @@ export class RecordingManager extends EventEmitter {
|
|||
|
||||
// If this was the final chunk, handle completion
|
||||
if (isFinalChunk) {
|
||||
// Finalize the WAV file
|
||||
await this.currentAudioRecording.wavWriter.finalize();
|
||||
logger.audio.info("Finalized WAV file", {
|
||||
sessionId: this.currentSessionId,
|
||||
filePath: this.currentAudioRecording.audioFilePath,
|
||||
dataSize: this.currentAudioRecording.wavWriter.getDataSize(),
|
||||
});
|
||||
|
||||
logPerformance("streaming transcription complete", startTime, {
|
||||
sessionId: this.currentSessionId,
|
||||
resultLength: transcriptionResult?.length || 0,
|
||||
|
|
@ -308,8 +355,9 @@ export class RecordingManager extends EventEmitter {
|
|||
await this.pasteTranscription(transcriptionResult);
|
||||
}
|
||||
|
||||
// Clean up session
|
||||
// Clean up session and audio recording
|
||||
this.currentSessionId = null;
|
||||
this.currentAudioRecording = null;
|
||||
|
||||
// Ensure state is idle after completion
|
||||
if (this.recordingState === "stopping") {
|
||||
|
|
@ -320,8 +368,9 @@ export class RecordingManager extends EventEmitter {
|
|||
logger.audio.error("Error processing audio chunk:", error);
|
||||
|
||||
if (isFinalChunk) {
|
||||
// Clean up session on error
|
||||
// Clean up session and audio recording on error
|
||||
this.currentSessionId = null;
|
||||
this.currentAudioRecording = null;
|
||||
this.setState("error");
|
||||
}
|
||||
}
|
||||
|
|
@ -363,8 +412,9 @@ export class RecordingManager extends EventEmitter {
|
|||
await this.stopRecording();
|
||||
}
|
||||
|
||||
// Clear any active session
|
||||
// Clear any active session and audio recording
|
||||
this.currentSessionId = null;
|
||||
this.currentAudioRecording = null;
|
||||
this.setState("idle");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,26 +13,16 @@ import { createTranscription } from "../db/transcriptions";
|
|||
import { logger } from "../main/logger";
|
||||
import { v4 as uuid } from "uuid";
|
||||
import { VADService } from "./vad-service";
|
||||
import { app } from "electron";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
|
||||
import { StreamingWavWriter } from "../utils/streaming-wav-writer";
|
||||
import { Mutex } from "async-mutex";
|
||||
|
||||
/**
|
||||
* Service for audio transcription and optional formatting
|
||||
*/
|
||||
interface ExtendedStreamingSession extends StreamingSession {
|
||||
wavWriter?: StreamingWavWriter;
|
||||
audioFilePath?: string;
|
||||
}
|
||||
|
||||
export class TranscriptionService {
|
||||
private whisperProvider: WhisperProvider;
|
||||
private openRouterProvider: OpenRouterProvider | null = null;
|
||||
private formatterEnabled = false;
|
||||
private streamingSessions: Map<string, ExtendedStreamingSession> = new Map();
|
||||
private streamingSessions: Map<string, StreamingSession> = new Map();
|
||||
private vadService: VADService | null;
|
||||
private settingsService: SettingsService;
|
||||
private vadMutex: Mutex;
|
||||
|
|
@ -143,26 +133,6 @@ export class TranscriptionService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create audio file for recording session
|
||||
*/
|
||||
private async createAudioFile(sessionId: string): Promise<string> {
|
||||
// Create audio directory in app temp path
|
||||
const audioDir = path.join(app.getPath("temp"), "amical-audio");
|
||||
await fs.promises.mkdir(audioDir, { recursive: true });
|
||||
|
||||
// Create file path
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
||||
const filePath = path.join(audioDir, `audio-${sessionId}-${timestamp}.wav`);
|
||||
|
||||
logger.transcription.info("Created audio file for session", {
|
||||
sessionId,
|
||||
filePath,
|
||||
});
|
||||
|
||||
return filePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single audio chunk in streaming mode
|
||||
*/
|
||||
|
|
@ -170,8 +140,9 @@ export class TranscriptionService {
|
|||
sessionId: string;
|
||||
audioChunk: Float32Array;
|
||||
isFinal?: boolean;
|
||||
audioFilePath?: string;
|
||||
}): Promise<string> {
|
||||
const { sessionId, audioChunk, isFinal = false } = options;
|
||||
const { sessionId, audioChunk, isFinal = false, audioFilePath } = options;
|
||||
|
||||
// Run VAD on the audio chunk
|
||||
let speechProbability = 0;
|
||||
|
|
@ -198,6 +169,7 @@ export class TranscriptionService {
|
|||
|
||||
// Acquire transcription mutex
|
||||
await this.transcriptionMutex.acquire();
|
||||
|
||||
// Auto-create session if it doesn't exist
|
||||
let session = this.streamingSessions.get(sessionId);
|
||||
if (!session) {
|
||||
|
|
@ -214,31 +186,18 @@ export class TranscriptionService {
|
|||
streamingContext.sharedData.accessibilityContext =
|
||||
appContextStore.getAccessibilityContext();
|
||||
|
||||
// Create audio file for this session
|
||||
const audioFilePath = await this.createAudioFile(sessionId);
|
||||
|
||||
// Create streaming WAV writer
|
||||
const wavWriter = new StreamingWavWriter(audioFilePath);
|
||||
|
||||
session = {
|
||||
context: streamingContext,
|
||||
transcriptionResults: [],
|
||||
audioFilePath,
|
||||
wavWriter,
|
||||
};
|
||||
|
||||
this.streamingSessions.set(sessionId, session);
|
||||
|
||||
logger.transcription.info("Started streaming session", {
|
||||
sessionId,
|
||||
audioFilePath,
|
||||
});
|
||||
}
|
||||
|
||||
// Write audio chunk to WAV file immediately
|
||||
if (audioChunk.length > 0 && session.wavWriter) {
|
||||
await session.wavWriter.appendAudio(audioChunk);
|
||||
}
|
||||
|
||||
// Process chunk if it has content
|
||||
if (audioChunk.length > 0) {
|
||||
// Direct frame to Whisper - it will handle aggregation and VAD internally
|
||||
|
|
@ -285,7 +244,7 @@ export class TranscriptionService {
|
|||
|
||||
// Release transcription mutex
|
||||
this.transcriptionMutex.release();
|
||||
let completeTranscriptionTillNow = session.transcriptionResults
|
||||
const completeTranscriptionTillNow = session.transcriptionResults
|
||||
.join(" ")
|
||||
.trim();
|
||||
|
||||
|
|
@ -302,13 +261,11 @@ export class TranscriptionService {
|
|||
chunkCount: session.transcriptionResults.length,
|
||||
});
|
||||
|
||||
// Format if enabled (currently disabled with && false)
|
||||
// Commenting out to fix TypeScript errors since this code path is never executed
|
||||
/*
|
||||
if (this.formatterEnabled && this.openRouterProvider && false) {
|
||||
if (this.formatterEnabled && this.openRouterProvider) {
|
||||
try {
|
||||
const style =
|
||||
session.context.sharedData.userPreferences?.formattingStyle;
|
||||
completeTranscription = await this.openRouterProvider.format({
|
||||
const formattedText = await this.openRouterProvider.format({
|
||||
text: completeTranscription,
|
||||
context: {
|
||||
style,
|
||||
|
|
@ -324,24 +281,31 @@ export class TranscriptionService {
|
|||
aggregatedTranscription: completeTranscription,
|
||||
},
|
||||
});
|
||||
}
|
||||
*/
|
||||
|
||||
// Finalize the WAV file
|
||||
if (session.wavWriter) {
|
||||
await session.wavWriter.finalize();
|
||||
logger.transcription.info("Finalized WAV file", {
|
||||
sessionId,
|
||||
filePath: session.audioFilePath,
|
||||
dataSize: session.wavWriter.getDataSize(),
|
||||
});
|
||||
logger.transcription.info("Text formatted successfully", {
|
||||
sessionId,
|
||||
originalLength: completeTranscription.length,
|
||||
formattedLength: formattedText.length,
|
||||
});
|
||||
|
||||
completeTranscription = formattedText;
|
||||
} catch (error) {
|
||||
logger.transcription.error(
|
||||
"Formatting failed, using unformatted text",
|
||||
{
|
||||
sessionId,
|
||||
error,
|
||||
},
|
||||
);
|
||||
// Continue with unformatted text
|
||||
}
|
||||
}
|
||||
|
||||
// Save directly to database
|
||||
logger.transcription.info("Saving transcription with audio file", {
|
||||
sessionId,
|
||||
audioFilePath: session.audioFilePath,
|
||||
hasAudioFile: !!session.audioFilePath,
|
||||
audioFilePath,
|
||||
hasAudioFile: !!audioFilePath,
|
||||
});
|
||||
|
||||
await createTranscription({
|
||||
|
|
@ -350,7 +314,7 @@ export class TranscriptionService {
|
|||
duration: session.context.sharedData.audioMetadata?.duration,
|
||||
speechModel: "whisper-local",
|
||||
formattingModel: this.formatterEnabled ? "openrouter" : undefined,
|
||||
audioFile: session.audioFilePath,
|
||||
audioFile: audioFilePath,
|
||||
meta: {
|
||||
sessionId,
|
||||
source: session.context.sharedData.audioMetadata?.source,
|
||||
|
|
|
|||
|
|
@ -66,6 +66,10 @@ export class StreamingWavWriter {
|
|||
* @param audioData Float32Array of audio samples
|
||||
*/
|
||||
async appendAudio(audioData: Float32Array): Promise<void> {
|
||||
if (!audioData.length) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.isFinalized) {
|
||||
throw new Error("Cannot append to finalized WAV file");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue