Optimise local transcription calls (#33)
* chore: move audio worklet file to assets * chore: get rid of rickyvad and use vad model directly * fix: handling of onnxruntime in packaged app * chore: run ci on macos * fix: formatting
This commit is contained in:
parent
e4b4e92be4
commit
5eb5777001
20 changed files with 775 additions and 521 deletions
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
|
|
@ -13,7 +13,7 @@ env:
|
|||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: macos-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ export const EXTERNAL_DEPENDENCIES = [
|
|||
"@libsql/linux-x64-musl",
|
||||
"@libsql/win32-x64-msvc",
|
||||
"libsql",
|
||||
"onnxruntime-node",
|
||||
// Add any other native modules you need here
|
||||
];
|
||||
|
||||
|
|
@ -195,13 +196,16 @@ const config: ForgeConfig = {
|
|||
},
|
||||
},
|
||||
packagerConfig: {
|
||||
asar: true,
|
||||
asar: {
|
||||
unpack: "{*.node,*.dylib,*.so,*.dll}",
|
||||
},
|
||||
name: "Amical",
|
||||
executableName: "Amical",
|
||||
icon: "./assets/logo.icns", // Path to your icon file
|
||||
extraResource: [
|
||||
"../../packages/native-helpers/swift-helper/bin",
|
||||
"./src/db/migrations",
|
||||
"./src/assets",
|
||||
],
|
||||
extendInfo: {
|
||||
NSMicrophoneUsageDescription:
|
||||
|
|
|
|||
|
|
@ -86,7 +86,6 @@
|
|||
"@radix-ui/react-toggle": "^1.1.9",
|
||||
"@radix-ui/react-toggle-group": "^1.1.10",
|
||||
"@radix-ui/react-tooltip": "^1.2.7",
|
||||
"@ricky0123/vad-web": "^0.0.24",
|
||||
"@tabler/icons-react": "^3.34.0",
|
||||
"@tanstack/react-query": "^5.81.2",
|
||||
"@tanstack/react-table": "^8.21.3",
|
||||
|
|
@ -116,6 +115,7 @@
|
|||
"libsql": "^0.5.13",
|
||||
"lucide-react": "^0.510.0",
|
||||
"next-themes": "^0.4.6",
|
||||
"onnxruntime-node": "^1.20.1",
|
||||
"openai": "^4.98.0",
|
||||
"react": "^19.1.0",
|
||||
"react-day-picker": "8.10.1",
|
||||
|
|
|
|||
56
apps/desktop/src/assets/audio-recorder-processor.js
Normal file
56
apps/desktop/src/assets/audio-recorder-processor.js
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
class AudioRecorderProcessor extends AudioWorkletProcessor {
|
||||
constructor() {
|
||||
super();
|
||||
this.frameSize = 512; // 32ms at 16kHz
|
||||
this.sampleRate = 16000;
|
||||
this.buffer = [];
|
||||
|
||||
// Listen for control messages
|
||||
this.port.onmessage = (event) => {
|
||||
if (event.data.type === 'flush') {
|
||||
this.flushBuffer();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
flushBuffer() {
|
||||
// Always send a final frame to signal end of recording
|
||||
const finalFrame = new Float32Array(this.buffer);
|
||||
this.buffer = [];
|
||||
|
||||
this.port.postMessage({
|
||||
type: 'audioFrame',
|
||||
frame: finalFrame,
|
||||
isFinal: true
|
||||
});
|
||||
}
|
||||
|
||||
process(inputs, outputs, parameters) {
|
||||
const input = inputs[0];
|
||||
if (!input || !input[0]) return true;
|
||||
|
||||
const channelData = input[0];
|
||||
|
||||
// Add samples to buffer
|
||||
for (let i = 0; i < channelData.length; i++) {
|
||||
this.buffer.push(channelData[i]);
|
||||
}
|
||||
|
||||
// When we have enough samples, send a frame
|
||||
while (this.buffer.length >= this.frameSize) {
|
||||
const frame = this.buffer.slice(0, this.frameSize);
|
||||
this.buffer = this.buffer.slice(this.frameSize);
|
||||
|
||||
// Send frame to main thread
|
||||
this.port.postMessage({
|
||||
type: 'audioFrame',
|
||||
frame: new Float32Array(frame),
|
||||
isFinal: false
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor('audio-recorder-processor', AudioRecorderProcessor);
|
||||
BIN
apps/desktop/src/assets/silero_vad_v5.onnx
Normal file
BIN
apps/desktop/src/assets/silero_vad_v5.onnx
Normal file
Binary file not shown.
|
|
@ -1,65 +0,0 @@
|
|||
// AudioWorklet processor source code
|
||||
export const audioRecorderWorkletSource = `
|
||||
// AudioWorklet processor for real-time audio capture
|
||||
// This runs in the audio rendering thread for low-latency processing
|
||||
/* eslint-env worker */
|
||||
/* global AudioWorkletProcessor, registerProcessor */
|
||||
|
||||
class AudioRecorderProcessor extends AudioWorkletProcessor {
|
||||
constructor() {
|
||||
super();
|
||||
this.bufferSize = 4096;
|
||||
this.buffer = new Float32Array(this.bufferSize);
|
||||
this.bufferIndex = 0;
|
||||
|
||||
// Listen for messages from main thread
|
||||
this.port.onmessage = (event) => {
|
||||
if (event.data.command === 'stop') {
|
||||
this.sendBufferedAudio(true); // Send final chunk
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
process(inputs, _outputs, _parameters) {
|
||||
const input = inputs[0];
|
||||
|
||||
// Check if we have input audio
|
||||
if (input && input.length > 0) {
|
||||
const inputChannel = input[0]; // Get first (mono) channel
|
||||
|
||||
// Buffer the audio data
|
||||
for (let i = 0; i < inputChannel.length; i++) {
|
||||
this.buffer[this.bufferIndex] = inputChannel[i];
|
||||
this.bufferIndex++;
|
||||
|
||||
// When buffer is full, send it to main thread
|
||||
if (this.bufferIndex >= this.bufferSize) {
|
||||
this.sendBufferedAudio(false);
|
||||
this.bufferIndex = 0; // Reset buffer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Keep the processor alive
|
||||
return true;
|
||||
}
|
||||
|
||||
sendBufferedAudio(isFinal) {
|
||||
if (this.bufferIndex > 0 || isFinal) {
|
||||
// Create a copy of the current buffer data
|
||||
const audioData = new Float32Array(this.bufferIndex);
|
||||
audioData.set(this.buffer.subarray(0, this.bufferIndex));
|
||||
|
||||
// Send to main thread
|
||||
this.port.postMessage({
|
||||
type: 'audioData',
|
||||
audioData: audioData,
|
||||
isFinal: isFinal,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Register the processor
|
||||
registerProcessor('audio-recorder-processor', AudioRecorderProcessor);
|
||||
`;
|
||||
|
|
@ -1,13 +1,17 @@
|
|||
import { useState, useRef, useEffect } from "react";
|
||||
import { MicVAD } from "@ricky0123/vad-web";
|
||||
import { audioRecorderWorkletSource } from "./audio-recorder-worklet";
|
||||
import { useRef, useEffect, useState, useCallback } from "react";
|
||||
import audioWorkletUrl from "@/assets/audio-recorder-processor.js?url";
|
||||
import { api } from "@/trpc/react";
|
||||
|
||||
// Audio configuration
|
||||
const FRAME_SIZE = 512; // 32ms at 16kHz
|
||||
const SAMPLE_RATE = 16000;
|
||||
|
||||
export interface UseAudioCaptureParams {
|
||||
onAudioChunk: (
|
||||
arrayBuffer: ArrayBuffer,
|
||||
speechProbability: number,
|
||||
isFinalChunk: boolean,
|
||||
) => Promise<void> | void;
|
||||
chunkDurationMs?: number;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
|
|
@ -15,268 +19,136 @@ export interface UseAudioCaptureOutput {
|
|||
voiceDetected: boolean;
|
||||
}
|
||||
|
||||
interface AudioCaptureState {
|
||||
stream: MediaStream | null;
|
||||
vad: MicVAD | null;
|
||||
audioContext: AudioContext | null;
|
||||
audioWorkletNode: AudioWorkletNode | null;
|
||||
source: MediaStreamAudioSourceNode | null;
|
||||
chunkTimer: NodeJS.Timeout | null;
|
||||
pendingAudioChunks: Float32Array[];
|
||||
sendAudioChunk: ((isFinal: boolean) => Promise<void>) | null;
|
||||
}
|
||||
|
||||
export const useAudioCapture = ({
|
||||
onAudioChunk,
|
||||
chunkDurationMs = 28000,
|
||||
enabled,
|
||||
}: UseAudioCaptureParams): UseAudioCaptureOutput => {
|
||||
const [voiceDetected, setVoiceDetected] = useState(false);
|
||||
const stateRef = useRef<AudioCaptureState>({
|
||||
stream: null,
|
||||
vad: null,
|
||||
audioContext: null,
|
||||
audioWorkletNode: null,
|
||||
source: null,
|
||||
chunkTimer: null,
|
||||
pendingAudioChunks: [],
|
||||
sendAudioChunk: null,
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
|
||||
const workletNodeRef = useRef<AudioWorkletNode | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
|
||||
// Subscribe to voice detection updates via tRPC
|
||||
api.recording.voiceDetectionUpdates.useSubscription(undefined, {
|
||||
onData: (detected: boolean) => {
|
||||
setVoiceDetected(detected);
|
||||
},
|
||||
onError: (err) => {
|
||||
console.error("Voice detection subscription error:", err);
|
||||
},
|
||||
});
|
||||
|
||||
// Main effect to handle enabled state changes
|
||||
useEffect(() => {
|
||||
let isCancelled = false;
|
||||
const startCapture = useCallback(async () => {
|
||||
try {
|
||||
console.log("AudioCapture: Starting audio capture");
|
||||
|
||||
const cleanup = async () => {
|
||||
const state = stateRef.current;
|
||||
|
||||
// Send final chunk if we have pending audio
|
||||
if (state.sendAudioChunk) {
|
||||
try {
|
||||
await state.sendAudioChunk(true);
|
||||
} catch (error) {
|
||||
console.error("AudioCapture: Error sending final chunk:", error);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear chunk timer
|
||||
if (state.chunkTimer) {
|
||||
clearInterval(state.chunkTimer);
|
||||
state.chunkTimer = null;
|
||||
}
|
||||
|
||||
// Cleanup AudioWorklet
|
||||
if (state.audioWorkletNode) {
|
||||
state.audioWorkletNode.port.postMessage({ command: "stop" });
|
||||
state.audioWorkletNode.disconnect();
|
||||
state.audioWorkletNode = null;
|
||||
}
|
||||
|
||||
if (state.source) {
|
||||
state.source.disconnect();
|
||||
state.source = null;
|
||||
}
|
||||
|
||||
if (state.audioContext && state.audioContext.state !== "closed") {
|
||||
await state.audioContext.close();
|
||||
state.audioContext = null;
|
||||
}
|
||||
|
||||
// Cleanup VAD
|
||||
if (state.vad) {
|
||||
try {
|
||||
state.vad.destroy();
|
||||
console.log("AudioCapture: VAD destroyed");
|
||||
} catch (e) {
|
||||
console.error("Error destroying VAD:", e);
|
||||
}
|
||||
state.vad = null;
|
||||
}
|
||||
|
||||
// Stop media stream
|
||||
if (state.stream) {
|
||||
state.stream.getTracks().forEach((track) => {
|
||||
try {
|
||||
track.stop();
|
||||
} catch (e) {
|
||||
console.error("Error stopping stream track:", e);
|
||||
}
|
||||
});
|
||||
state.stream = null;
|
||||
}
|
||||
|
||||
// Reset state
|
||||
state.pendingAudioChunks = [];
|
||||
state.sendAudioChunk = null;
|
||||
setVoiceDetected(false);
|
||||
|
||||
console.log("AudioCapture: Cleaned up");
|
||||
};
|
||||
|
||||
const startCapture = async () => {
|
||||
console.log("AudioCapture: Starting capture...");
|
||||
|
||||
try {
|
||||
// Get microphone access
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: true,
|
||||
});
|
||||
if (isCancelled) {
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
return;
|
||||
}
|
||||
stateRef.current.stream = stream;
|
||||
|
||||
// Set up Web Audio API with AudioWorklet for raw PCM data
|
||||
const audioContext = new AudioContext({ sampleRate: 16000 });
|
||||
stateRef.current.audioContext = audioContext;
|
||||
|
||||
// Load AudioWorklet module using blob URL
|
||||
const blob = new Blob([audioRecorderWorkletSource], {
|
||||
type: "application/javascript",
|
||||
});
|
||||
const audioWorkletUrl = URL.createObjectURL(blob);
|
||||
|
||||
try {
|
||||
await audioContext.audioWorklet.addModule(audioWorkletUrl);
|
||||
} finally {
|
||||
URL.revokeObjectURL(audioWorkletUrl);
|
||||
}
|
||||
|
||||
if (isCancelled) {
|
||||
await cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
const source = audioContext.createMediaStreamSource(stream);
|
||||
stateRef.current.source = source;
|
||||
|
||||
// Create AudioWorklet node
|
||||
const audioWorkletNode = new AudioWorkletNode(
|
||||
audioContext,
|
||||
"audio-recorder-processor",
|
||||
);
|
||||
stateRef.current.audioWorkletNode = audioWorkletNode;
|
||||
|
||||
// Create function to send accumulated chunks
|
||||
const sendAudioChunk = async (isFinal = false) => {
|
||||
const pendingChunks = stateRef.current.pendingAudioChunks;
|
||||
if (pendingChunks.length > 0) {
|
||||
// Combine all pending chunks into one array
|
||||
const totalLength = pendingChunks.reduce(
|
||||
(sum, chunk) => sum + chunk.length,
|
||||
0,
|
||||
);
|
||||
const combinedChunk = new Float32Array(totalLength);
|
||||
let offset = 0;
|
||||
|
||||
for (const chunk of pendingChunks) {
|
||||
combinedChunk.set(chunk, offset);
|
||||
offset += chunk.length;
|
||||
}
|
||||
|
||||
// Convert Float32Array to ArrayBuffer for IPC
|
||||
const arrayBuffer = combinedChunk.buffer.slice(
|
||||
combinedChunk.byteOffset,
|
||||
combinedChunk.byteOffset + combinedChunk.byteLength,
|
||||
);
|
||||
|
||||
try {
|
||||
await onAudioChunk(arrayBuffer, isFinal);
|
||||
console.log(
|
||||
`AudioCapture: Sent chunk: ${combinedChunk.length} samples, final: ${isFinal}`,
|
||||
);
|
||||
} catch (error) {
|
||||
console.error("AudioCapture: Error processing chunk:", error);
|
||||
}
|
||||
|
||||
stateRef.current.pendingAudioChunks = []; // Clear chunks after sending
|
||||
}
|
||||
};
|
||||
|
||||
stateRef.current.sendAudioChunk = sendAudioChunk;
|
||||
|
||||
// Handle messages from AudioWorklet
|
||||
audioWorkletNode.port.onmessage = (event) => {
|
||||
if (event.data.type === "audioData") {
|
||||
const audioData = event.data.audioData as Float32Array;
|
||||
const isFinal = event.data.isFinal as boolean;
|
||||
|
||||
// Store the audio chunk
|
||||
stateRef.current.pendingAudioChunks.push(audioData);
|
||||
|
||||
if (isFinal) {
|
||||
// Send final chunk immediately
|
||||
sendAudioChunk(true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Set up periodic chunk sending
|
||||
const chunkTimer = setInterval(() => {
|
||||
sendAudioChunk(false);
|
||||
}, chunkDurationMs);
|
||||
stateRef.current.chunkTimer = chunkTimer;
|
||||
|
||||
// Connect the audio processing chain
|
||||
source.connect(audioWorkletNode);
|
||||
console.log("AudioCapture: Connected AudioWorklet processing chain");
|
||||
|
||||
// Set up VAD
|
||||
const vad = await MicVAD.new({
|
||||
stream,
|
||||
model: "v5",
|
||||
onSpeechStart: () => {
|
||||
// Check if component is still mounted before updating state
|
||||
if (!isCancelled) {
|
||||
console.log("VAD: Speech started");
|
||||
setVoiceDetected(true);
|
||||
}
|
||||
},
|
||||
onSpeechEnd: () => {
|
||||
console.log("VAD: Speech ended");
|
||||
// Check if component is still mounted before updating state
|
||||
if (!isCancelled) {
|
||||
console.log("VAD: Speech ended");
|
||||
setVoiceDetected(false);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
// Store VAD reference immediately to ensure proper cleanup
|
||||
stateRef.current.vad = vad;
|
||||
|
||||
if (isCancelled) {
|
||||
await cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
vad.start();
|
||||
console.log("AudioCapture: VAD started");
|
||||
|
||||
console.log("AudioCapture: Fully started");
|
||||
} catch (err) {
|
||||
console.error("AudioCapture: Error starting:", err);
|
||||
await cleanup();
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
// Handle enabled state
|
||||
if (enabled) {
|
||||
startCapture().catch((err) => {
|
||||
console.error("AudioCapture: Failed to start:", err);
|
||||
// Get microphone stream
|
||||
streamRef.current = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
sampleRate: SAMPLE_RATE,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
});
|
||||
|
||||
// Create audio context
|
||||
audioContextRef.current = new AudioContext({ sampleRate: SAMPLE_RATE });
|
||||
|
||||
// Load audio worklet
|
||||
await audioContextRef.current.audioWorklet.addModule(audioWorkletUrl);
|
||||
|
||||
// Create nodes
|
||||
sourceRef.current = audioContextRef.current.createMediaStreamSource(
|
||||
streamRef.current,
|
||||
);
|
||||
workletNodeRef.current = new AudioWorkletNode(
|
||||
audioContextRef.current,
|
||||
"audio-recorder-processor",
|
||||
);
|
||||
|
||||
// Handle audio frames from worklet
|
||||
workletNodeRef.current.port.onmessage = async (event) => {
|
||||
if (event.data.type === "audioFrame") {
|
||||
const frame = event.data.frame;
|
||||
const isFinal = event.data.isFinal || false;
|
||||
|
||||
// Convert to ArrayBuffer for IPC
|
||||
const arrayBuffer = frame.buffer.slice(
|
||||
frame.byteOffset,
|
||||
frame.byteOffset + frame.byteLength,
|
||||
);
|
||||
|
||||
// Send to main process for VAD processing
|
||||
// Main process will update voice detection state
|
||||
await onAudioChunk(arrayBuffer, 0, isFinal); // Speech probability will come from main
|
||||
|
||||
console.log(
|
||||
`AudioCapture: Sent frame: ${frame.length} samples, isFinal: ${isFinal}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Connect audio graph
|
||||
sourceRef.current.connect(workletNodeRef.current);
|
||||
|
||||
console.log("AudioCapture: Audio capture started");
|
||||
} catch (error) {
|
||||
console.error("AudioCapture: Failed to start capture:", error);
|
||||
throw error;
|
||||
}
|
||||
}, [onAudioChunk]);
|
||||
|
||||
const stopCapture = useCallback(() => {
|
||||
console.log("AudioCapture: Stopping audio capture");
|
||||
|
||||
// Send flush command to worklet before disconnecting
|
||||
if (workletNodeRef.current) {
|
||||
workletNodeRef.current.port.postMessage({ type: "flush" });
|
||||
console.log("AudioCapture: Sent flush command to worklet");
|
||||
}
|
||||
|
||||
// Cleanup function
|
||||
return () => {
|
||||
isCancelled = true;
|
||||
cleanup().catch((err) => {
|
||||
console.error("AudioCapture: Cleanup error:", err);
|
||||
// Disconnect nodes
|
||||
if (sourceRef.current && workletNodeRef.current) {
|
||||
sourceRef.current.disconnect(workletNodeRef.current);
|
||||
}
|
||||
|
||||
// Close audio context
|
||||
if (audioContextRef.current && audioContextRef.current.state !== "closed") {
|
||||
audioContextRef.current.close();
|
||||
}
|
||||
|
||||
// Stop media stream
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
}
|
||||
|
||||
// Clear refs
|
||||
audioContextRef.current = null;
|
||||
sourceRef.current = null;
|
||||
workletNodeRef.current = null;
|
||||
streamRef.current = null;
|
||||
|
||||
setVoiceDetected(false);
|
||||
console.log("AudioCapture: Audio capture stopped");
|
||||
}, []);
|
||||
|
||||
// Start/stop based on enabled state
|
||||
useEffect(() => {
|
||||
if (enabled) {
|
||||
startCapture().catch((error) => {
|
||||
console.error("AudioCapture: Failed to start:", error);
|
||||
});
|
||||
} else {
|
||||
stopCapture();
|
||||
}
|
||||
|
||||
return () => {
|
||||
stopCapture();
|
||||
};
|
||||
}, [enabled, onAudioChunk, chunkDurationMs]);
|
||||
}, [enabled, startCapture, stopCapture]);
|
||||
|
||||
return {
|
||||
voiceDetected,
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@ import { useAudioCapture } from "./useAudioCapture";
|
|||
import type { RecordingState } from "@/types/recording";
|
||||
|
||||
export interface UseRecordingParams {
|
||||
onAudioChunk: (
|
||||
arrayBuffer: ArrayBuffer,
|
||||
isFinalChunk: boolean,
|
||||
onAudioFrame: (
|
||||
audioBuffer: ArrayBuffer,
|
||||
speechProbability: number,
|
||||
isFinal: boolean,
|
||||
) => Promise<void> | void;
|
||||
chunkDurationMs?: number;
|
||||
onRecordingStartCallback?: () => Promise<void> | void;
|
||||
onRecordingStopCallback?: () => Promise<void> | void;
|
||||
}
|
||||
|
|
@ -21,8 +21,7 @@ export interface UseRecordingOutput {
|
|||
}
|
||||
|
||||
export const useRecording = ({
|
||||
onAudioChunk,
|
||||
chunkDurationMs = 28000,
|
||||
onAudioFrame,
|
||||
onRecordingStartCallback,
|
||||
onRecordingStopCallback,
|
||||
}: UseRecordingParams): UseRecordingOutput => {
|
||||
|
|
@ -33,13 +32,25 @@ export const useRecording = ({
|
|||
stopRecording: stopRecordingMutation,
|
||||
} = useRecordingState();
|
||||
|
||||
// Create handler for audio chunks - just pass through
|
||||
const handleAudioChunk = useCallback(
|
||||
async (
|
||||
arrayBuffer: ArrayBuffer,
|
||||
speechProbability: number,
|
||||
isFinalChunk: boolean,
|
||||
) => {
|
||||
// Direct pass-through - no aggregation needed
|
||||
await onAudioFrame(arrayBuffer, speechProbability, isFinalChunk);
|
||||
},
|
||||
[onAudioFrame],
|
||||
);
|
||||
|
||||
// Manage audio capture when recording is active
|
||||
const isActive =
|
||||
recordingStatus === "recording" || recordingStatus === "starting";
|
||||
|
||||
const { voiceDetected } = useAudioCapture({
|
||||
onAudioChunk,
|
||||
chunkDurationMs,
|
||||
onAudioChunk: handleAudioChunk,
|
||||
enabled: isActive,
|
||||
});
|
||||
|
||||
|
|
@ -121,7 +132,12 @@ export const useRecording = ({
|
|||
} catch (error) {
|
||||
console.error("Hook: Error stopping recording:", error);
|
||||
}
|
||||
}, [recordingStatus, stopRecordingMutation, onRecordingStopCallback]);
|
||||
}, [
|
||||
recordingStatus,
|
||||
stopRecordingMutation,
|
||||
onRecordingStopCallback,
|
||||
onAudioFrame,
|
||||
]);
|
||||
|
||||
return {
|
||||
recordingStatus,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { logger, logPerformance } from "../logger";
|
|||
import { ServiceManager } from "./service-manager";
|
||||
import { appContextStore } from "../../stores/app-context";
|
||||
import type { RecordingState, RecordingStatus } from "../../types/recording";
|
||||
import { WindowManager } from "../core/window-manager";
|
||||
|
||||
/**
|
||||
* Manages recording state and coordinates audio recording across the application
|
||||
|
|
@ -13,12 +14,17 @@ export class RecordingManager extends EventEmitter {
|
|||
private currentSessionId: string | null = null;
|
||||
private recordingState: RecordingState = "idle";
|
||||
private lastError: string | undefined;
|
||||
private windowManager: WindowManager | null = null;
|
||||
|
||||
constructor(private serviceManager: ServiceManager) {
|
||||
super();
|
||||
this.setupIPCHandlers();
|
||||
}
|
||||
|
||||
public setWindowManager(windowManager: WindowManager): void {
|
||||
this.windowManager = windowManager;
|
||||
}
|
||||
|
||||
private setState(newState: RecordingState, error?: string): void {
|
||||
const oldState = this.recordingState;
|
||||
this.recordingState = newState;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { SwiftIOBridge } from "../../services/platform/swift-bridge-service";
|
|||
import { AutoUpdaterService } from "../services/auto-updater";
|
||||
import { WindowManager } from "../core/window-manager";
|
||||
import { RecordingManager } from "./recording-manager";
|
||||
import { VADService } from "../../services/vad-service";
|
||||
|
||||
/**
|
||||
* Manages service initialization and lifecycle
|
||||
|
|
@ -17,6 +18,7 @@ export class ServiceManager {
|
|||
private modelManagerService: ModelManagerService | null = null;
|
||||
private transcriptionService: TranscriptionService | null = null;
|
||||
private settingsService: SettingsService | null = null;
|
||||
private vadService: VADService | null = null;
|
||||
|
||||
private swiftIOBridge: SwiftIOBridge | null = null;
|
||||
private autoUpdaterService: AutoUpdaterService | null = null;
|
||||
|
|
@ -34,8 +36,9 @@ export class ServiceManager {
|
|||
this.initializeSettingsService();
|
||||
await this.initializeModelServices();
|
||||
this.initializePlatformServices();
|
||||
await this.initializeVADService();
|
||||
await this.initializeAIServices();
|
||||
this.initializeRecordingManager();
|
||||
this.initializeRecordingManager(windowManager);
|
||||
this.initializeAutoUpdater(windowManager);
|
||||
|
||||
this.isInitialized = true;
|
||||
|
|
@ -57,6 +60,17 @@ export class ServiceManager {
|
|||
await this.modelManagerService.initialize();
|
||||
}
|
||||
|
||||
private async initializeVADService(): Promise<void> {
|
||||
try {
|
||||
this.vadService = new VADService();
|
||||
await this.vadService.initialize();
|
||||
logger.main.info("VAD service initialized");
|
||||
} catch (error) {
|
||||
logger.main.error("Failed to initialize VAD service:", error);
|
||||
// Don't throw - VAD is not critical for basic functionality
|
||||
}
|
||||
}
|
||||
|
||||
private async initializeAIServices(): Promise<void> {
|
||||
try {
|
||||
if (!this.modelManagerService) {
|
||||
|
|
@ -65,7 +79,9 @@ export class ServiceManager {
|
|||
|
||||
this.transcriptionService = new TranscriptionService(
|
||||
this.modelManagerService,
|
||||
this.vadService,
|
||||
);
|
||||
await this.transcriptionService.initialize();
|
||||
|
||||
// Load and configure formatter
|
||||
try {
|
||||
|
|
@ -109,8 +125,9 @@ export class ServiceManager {
|
|||
}
|
||||
}
|
||||
|
||||
private initializeRecordingManager(): void {
|
||||
private initializeRecordingManager(windowManager: WindowManager): void {
|
||||
this.recordingManager = new RecordingManager(this);
|
||||
this.recordingManager.setWindowManager(windowManager);
|
||||
logger.main.info("Recording manager initialized");
|
||||
}
|
||||
|
||||
|
|
@ -191,6 +208,15 @@ export class ServiceManager {
|
|||
return this.recordingManager;
|
||||
}
|
||||
|
||||
getVADService(): VADService | null {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error(
|
||||
"ServiceManager not initialized. Call initialize() first.",
|
||||
);
|
||||
}
|
||||
return this.vadService;
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.recordingManager) {
|
||||
logger.main.info("Cleaning up recording manager...");
|
||||
|
|
@ -201,6 +227,11 @@ export class ServiceManager {
|
|||
this.modelManagerService.cleanup();
|
||||
}
|
||||
|
||||
if (this.vadService) {
|
||||
logger.main.info("Cleaning up VAD service...");
|
||||
await this.vadService.dispose();
|
||||
}
|
||||
|
||||
if (this.swiftIOBridge) {
|
||||
logger.main.info("Stopping Swift helper...");
|
||||
this.swiftIOBridge.stopHelper();
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ export { PipelineContext, SharedPipelineData } from "./context";
|
|||
// Transcription input parameters
|
||||
export interface TranscribeParams {
|
||||
audioData: Buffer;
|
||||
speechProbability?: number; // Speech probability from frontend VAD (0-1)
|
||||
context: {
|
||||
vocabulary?: Map<string, string>;
|
||||
accessibilityContext?: GetAccessibilityContextResult | null;
|
||||
|
|
@ -34,6 +35,7 @@ export interface FormatParams {
|
|||
export interface TranscriptionProvider {
|
||||
readonly name: string;
|
||||
transcribe(params: TranscribeParams): Promise<string>;
|
||||
flush?(): Promise<string>; // Optional flush method for providers that buffer
|
||||
}
|
||||
|
||||
// Formatting provider interface
|
||||
|
|
|
|||
|
|
@ -12,6 +12,19 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
private modelManager: ModelManagerService;
|
||||
private whisperInstance: Whisper | null = null;
|
||||
|
||||
// Frame aggregation state
|
||||
private frameBuffer: Float32Array[] = [];
|
||||
private frameBufferSpeechProbabilities: number[] = []; // Track speech probabilities for each frame
|
||||
private silenceFrameCount = 0;
|
||||
private lastSpeechTimestamp = 0;
|
||||
|
||||
// Configuration
|
||||
private readonly FRAME_SIZE = 512; // 32ms at 16kHz
|
||||
private readonly MIN_SPEECH_DURATION_MS = 500; // Minimum speech duration to transcribe
|
||||
private readonly MAX_SILENCE_DURATION_MS = 2000; // Max silence before cutting
|
||||
private readonly SAMPLE_RATE = 16000;
|
||||
private readonly SPEECH_PROBABILITY_THRESHOLD = 0.2; // Threshold for speech detection
|
||||
|
||||
constructor(modelManager: ModelManagerService) {
|
||||
this.modelManager = modelManager;
|
||||
}
|
||||
|
|
@ -21,20 +34,53 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
await this.initializeWhisper();
|
||||
|
||||
// Extract parameters from the new structure
|
||||
const { audioData, context } = params;
|
||||
const { audioData, speechProbability = 0, context } = params;
|
||||
const { vocabulary, previousChunk, aggregatedTranscription } = context;
|
||||
|
||||
// Convert audio buffer to the format expected by smart-whisper
|
||||
const audioFloat32Array = await this.convertAudioBuffer(audioData);
|
||||
|
||||
// Add frame to buffer with speech probability
|
||||
this.frameBuffer.push(audioFloat32Array);
|
||||
this.frameBufferSpeechProbabilities.push(speechProbability);
|
||||
|
||||
// Consider it speech if probability is above threshold
|
||||
const isSpeech = speechProbability > this.SPEECH_PROBABILITY_THRESHOLD;
|
||||
|
||||
logger.transcription.debug(
|
||||
`Starting transcription, audio size: ${audioData.length}`,
|
||||
previousChunk
|
||||
? `Previous chunk: ${previousChunk.substring(0, 50)}...`
|
||||
: "No previous chunk",
|
||||
aggregatedTranscription
|
||||
? `Aggregated length: ${aggregatedTranscription.length}`
|
||||
: "No aggregated transcription",
|
||||
`Frame received - SpeechProb: ${speechProbability.toFixed(3)}, Buffer size: ${this.frameBuffer.length}, Silence count: ${this.silenceFrameCount}`,
|
||||
);
|
||||
|
||||
// Handle speech/silence logic
|
||||
if (isSpeech) {
|
||||
this.silenceFrameCount = 0;
|
||||
this.lastSpeechTimestamp = Date.now();
|
||||
} else {
|
||||
this.silenceFrameCount++;
|
||||
}
|
||||
|
||||
// Determine if we should transcribe
|
||||
const shouldTranscribe = this.shouldTranscribe();
|
||||
|
||||
if (!shouldTranscribe) {
|
||||
// Keep buffering
|
||||
return "";
|
||||
}
|
||||
|
||||
// Aggregate buffered frames
|
||||
const aggregatedAudio = this.aggregateFrames();
|
||||
|
||||
// Skip if too short or only silence
|
||||
if (aggregatedAudio.length < this.FRAME_SIZE * 2) {
|
||||
logger.transcription.debug("Skipping transcription - audio too short");
|
||||
this.frameBuffer = [];
|
||||
this.frameBufferSpeechProbabilities = [];
|
||||
this.silenceFrameCount = 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
logger.transcription.debug(
|
||||
`Starting transcription of ${aggregatedAudio.length} samples (${((aggregatedAudio.length / this.SAMPLE_RATE) * 1000).toFixed(0)}ms)`,
|
||||
);
|
||||
|
||||
// Transcribe using smart-whisper
|
||||
|
|
@ -49,10 +95,13 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
);
|
||||
|
||||
const { result } = await this.whisperInstance.transcribe(
|
||||
audioFloat32Array,
|
||||
aggregatedAudio,
|
||||
{
|
||||
language: "auto",
|
||||
initial_prompt: initialPrompt,
|
||||
suppress_blank: true,
|
||||
suppress_non_speech_tokens: true,
|
||||
no_timestamps: true,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -68,6 +117,11 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
`Transcription completed, length: ${text.length}`,
|
||||
);
|
||||
|
||||
// Clear buffer after successful transcription
|
||||
this.frameBuffer = [];
|
||||
this.frameBufferSpeechProbabilities = [];
|
||||
this.silenceFrameCount = 0;
|
||||
|
||||
return text;
|
||||
} catch (error) {
|
||||
logger.transcription.error("Transcription failed:", error);
|
||||
|
|
@ -75,6 +129,112 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
}
|
||||
}
|
||||
|
||||
private shouldTranscribe(): boolean {
|
||||
// Transcribe if:
|
||||
// 1. We have significant silence after speech
|
||||
// 2. Buffer is getting too large
|
||||
// 3. Final chunk was received (handled elsewhere)
|
||||
|
||||
const bufferDurationMs =
|
||||
((this.frameBuffer.length * this.FRAME_SIZE) / this.SAMPLE_RATE) * 1000;
|
||||
const silenceDurationMs =
|
||||
((this.silenceFrameCount * this.FRAME_SIZE) / this.SAMPLE_RATE) * 1000;
|
||||
|
||||
// If we have speech and then significant silence, transcribe
|
||||
if (
|
||||
this.frameBuffer.length > 0 &&
|
||||
silenceDurationMs > this.MAX_SILENCE_DURATION_MS
|
||||
) {
|
||||
logger.transcription.debug(
|
||||
`Transcribing due to ${silenceDurationMs}ms of silence`,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If buffer is too large (e.g., 30 seconds), transcribe anyway
|
||||
if (bufferDurationMs > 30000) {
|
||||
logger.transcription.debug(
|
||||
`Transcribing due to buffer size: ${bufferDurationMs}ms`,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
logger.transcription.error("Not transcribing", {
|
||||
bufferDurationMs,
|
||||
silenceDurationMs,
|
||||
frameBufferLength: this.frameBuffer.length,
|
||||
silenceFrameCount: this.silenceFrameCount,
|
||||
});
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private aggregateFrames(): Float32Array {
|
||||
// Calculate total size
|
||||
const totalLength = this.frameBuffer.reduce(
|
||||
(sum, frame) => sum + frame.length,
|
||||
0,
|
||||
);
|
||||
const aggregated = new Float32Array(totalLength);
|
||||
|
||||
// Copy all frames into single array
|
||||
let offset = 0;
|
||||
for (const frame of this.frameBuffer) {
|
||||
aggregated.set(frame, offset);
|
||||
offset += frame.length;
|
||||
}
|
||||
|
||||
// Trim silence from beginning and end
|
||||
const trimmed = this.trimSilence(aggregated);
|
||||
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
private trimSilence(audio: Float32Array): Float32Array {
|
||||
// Find first speech frame (probability > threshold)
|
||||
let startIdx = 0;
|
||||
for (let i = 0; i < this.frameBufferSpeechProbabilities.length; i++) {
|
||||
if (
|
||||
this.frameBufferSpeechProbabilities[i] >
|
||||
this.SPEECH_PROBABILITY_THRESHOLD
|
||||
) {
|
||||
startIdx = i * this.FRAME_SIZE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Find last speech frame (probability > threshold)
|
||||
let endIdx = audio.length;
|
||||
for (let i = this.frameBufferSpeechProbabilities.length - 1; i >= 0; i--) {
|
||||
if (
|
||||
this.frameBufferSpeechProbabilities[i] >
|
||||
this.SPEECH_PROBABILITY_THRESHOLD
|
||||
) {
|
||||
endIdx = (i + 1) * this.FRAME_SIZE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return audio.slice(startIdx, Math.min(endIdx, audio.length));
|
||||
}
|
||||
|
||||
// Force transcription of any remaining frames
|
||||
async flush(): Promise<string> {
|
||||
if (this.frameBuffer.length === 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
logger.transcription.error(`Flushing ${this.frameBuffer.length} frames`);
|
||||
|
||||
// Force transcription by setting high silence count
|
||||
this.silenceFrameCount = 999;
|
||||
return this.transcribe({
|
||||
audioData: Buffer.alloc(0), // Empty buffer, we'll use the buffered frames
|
||||
speechProbability: 0,
|
||||
context: {},
|
||||
});
|
||||
}
|
||||
|
||||
private generateInitialPrompt(
|
||||
vocabulary?: Map<string, string>,
|
||||
aggregatedTranscription?: string,
|
||||
|
|
@ -163,5 +323,10 @@ export class WhisperProvider implements TranscriptionProvider {
|
|||
this.whisperInstance = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Clear buffers
|
||||
this.frameBuffer = [];
|
||||
this.frameBufferSpeechProbabilities = [];
|
||||
this.silenceFrameCount = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,24 +19,27 @@ export const FloatingButton: React.FC = () => {
|
|||
};
|
||||
}, []);
|
||||
|
||||
const handleAudioChunk = useCallback(
|
||||
async (audioChunk: ArrayBuffer, isFinalChunk: boolean) => {
|
||||
const handleAudioFrame = useCallback(
|
||||
async (
|
||||
audioBuffer: ArrayBuffer,
|
||||
speechProbability: number,
|
||||
isFinal: boolean,
|
||||
) => {
|
||||
try {
|
||||
// Send the audio chunk regardless of whether it's final or not
|
||||
await window.electronAPI.sendAudioChunk(audioChunk, isFinalChunk);
|
||||
console.debug(`Sent audio chunk`, {
|
||||
chunkSize: audioChunk.byteLength,
|
||||
isFinalChunk,
|
||||
// Send frame directly to main process
|
||||
// TODO: We need to update the IPC to include speech detection info
|
||||
await window.electronAPI.sendAudioChunk(audioBuffer, isFinal);
|
||||
console.debug(`Sent audio frame`, {
|
||||
size: audioBuffer.byteLength,
|
||||
speechProbability: speechProbability.toFixed(3),
|
||||
isFinal,
|
||||
});
|
||||
|
||||
if (isFinalChunk) {
|
||||
console.log("Final chunk sent to main process");
|
||||
// You might want to add a specific IPC call here if the main process needs an explicit signal
|
||||
// to finalize transcription, e.g., window.electronAPI.finalizeTranscription();
|
||||
// For now, we assume sendAudioChunk is enough and the main process handles the stream end.
|
||||
if (isFinal) {
|
||||
console.log("Final frame sent to main process");
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error sending audio chunk:", error);
|
||||
console.error("Error sending audio frame:", error);
|
||||
}
|
||||
},
|
||||
[],
|
||||
|
|
@ -44,8 +47,7 @@ export const FloatingButton: React.FC = () => {
|
|||
|
||||
const { recordingStatus, startRecording, stopRecording, voiceDetected } =
|
||||
useRecording({
|
||||
onAudioChunk: handleAudioChunk,
|
||||
// Optionally, set chunkDurationMs here if needed, e.g., chunkDurationMs: 250
|
||||
onAudioFrame: handleAudioFrame,
|
||||
});
|
||||
const isRecording =
|
||||
recordingStatus === "recording" || recordingStatus === "starting";
|
||||
|
|
|
|||
|
|
@ -7,11 +7,11 @@ import { createDefaultContext } from "../pipeline/core/context";
|
|||
import { WhisperProvider } from "../pipeline/providers/transcription/whisper-provider";
|
||||
import { OpenRouterProvider } from "../pipeline/providers/formatting/openrouter-formatter";
|
||||
import { ModelManagerService } from "../services/model-manager";
|
||||
import { ServiceManager } from "../main/managers/service-manager";
|
||||
import { appContextStore } from "../stores/app-context";
|
||||
import { createTranscription } from "../db/transcriptions";
|
||||
import { logger } from "../main/logger";
|
||||
import { v4 as uuid } from "uuid";
|
||||
import { VADService } from "./vad-service";
|
||||
|
||||
/**
|
||||
* Service for audio transcription and optional formatting
|
||||
|
|
@ -21,9 +21,23 @@ export class TranscriptionService {
|
|||
private openRouterProvider: OpenRouterProvider | null = null;
|
||||
private formatterEnabled = false;
|
||||
private streamingSessions: Map<string, StreamingSession> = new Map();
|
||||
private vadService: VADService | null = null;
|
||||
|
||||
constructor(modelManagerService: ModelManagerService) {
|
||||
constructor(
|
||||
modelManagerService: ModelManagerService,
|
||||
vadService: VADService | null = null,
|
||||
) {
|
||||
this.whisperProvider = new WhisperProvider(modelManagerService);
|
||||
this.vadService = vadService;
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
if (this.vadService) {
|
||||
logger.transcription.info("Using VAD service");
|
||||
} else {
|
||||
logger.transcription.warn("VAD service not available");
|
||||
}
|
||||
logger.transcription.info("Transcription service initialized");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -62,6 +76,26 @@ export class TranscriptionService {
|
|||
isFinal?: boolean;
|
||||
}): Promise<string> {
|
||||
const { sessionId, audioChunk, isFinal = false } = options;
|
||||
console.error("processing streaming chunk", {
|
||||
length: audioChunk.length,
|
||||
});
|
||||
|
||||
// Run VAD on the audio chunk
|
||||
let speechProbability = 0;
|
||||
let isSpeaking = false;
|
||||
|
||||
if (audioChunk.length > 0 && this.vadService) {
|
||||
const vadResult = await this.vadService.processAudioFrame(
|
||||
audioChunk.buffer as ArrayBuffer,
|
||||
);
|
||||
speechProbability = vadResult.probability;
|
||||
isSpeaking = vadResult.isSpeaking;
|
||||
|
||||
logger.transcription.debug("VAD result", {
|
||||
probability: speechProbability.toFixed(3),
|
||||
isSpeaking,
|
||||
});
|
||||
}
|
||||
|
||||
// Auto-create session if it doesn't exist
|
||||
let session = this.streamingSessions.get(sessionId);
|
||||
|
|
@ -90,7 +124,7 @@ export class TranscriptionService {
|
|||
|
||||
// Process chunk if it has content
|
||||
if (audioChunk.length > 0) {
|
||||
// Direct provider call - no step wrapper
|
||||
// Direct frame to Whisper - it will handle aggregation and VAD internally
|
||||
const previousChunk =
|
||||
session.transcriptionResults.length > 0
|
||||
? session.transcriptionResults[
|
||||
|
|
@ -103,6 +137,7 @@ export class TranscriptionService {
|
|||
|
||||
const chunkTranscription = await this.whisperProvider.transcribe({
|
||||
audioData: audioChunk,
|
||||
speechProbability: speechProbability, // Now from VAD service
|
||||
context: {
|
||||
vocabulary: session.context.sharedData.vocabulary,
|
||||
accessibilityContext: session.context.sharedData.accessibilityContext,
|
||||
|
|
@ -111,22 +146,39 @@ export class TranscriptionService {
|
|||
},
|
||||
});
|
||||
|
||||
// Accumulate the result
|
||||
// Accumulate the result only if Whisper returned something
|
||||
// (it returns empty string while buffering)
|
||||
if (chunkTranscription.trim()) {
|
||||
session.transcriptionResults.push(chunkTranscription);
|
||||
logger.transcription.info("Whisper returned transcription", {
|
||||
sessionId,
|
||||
transcriptionLength: chunkTranscription.length,
|
||||
totalResults: session.transcriptionResults.length,
|
||||
});
|
||||
}
|
||||
|
||||
logger.transcription.debug("Processed chunk", {
|
||||
logger.transcription.error("Processed frame", {
|
||||
sessionId,
|
||||
chunkSize: audioChunk.length,
|
||||
transcriptionLength: chunkTranscription.length,
|
||||
totalResults: session.transcriptionResults.length,
|
||||
frameSize: audioChunk.length,
|
||||
hadTranscription: chunkTranscription.length > 0,
|
||||
isFinal,
|
||||
});
|
||||
}
|
||||
|
||||
// If this is the final chunk, apply formatting and save
|
||||
// If this is the final chunk, flush any remaining audio and apply formatting
|
||||
if (isFinal) {
|
||||
// Flush any remaining buffered audio in Whisper
|
||||
if (this.whisperProvider.flush) {
|
||||
const flushResult = await this.whisperProvider.flush();
|
||||
if (flushResult.trim()) {
|
||||
session.transcriptionResults.push(flushResult);
|
||||
logger.transcription.info("Flushed final audio", {
|
||||
sessionId,
|
||||
flushLength: flushResult.length,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Get complete transcription
|
||||
let completeTranscription = session.transcriptionResults.join(" ").trim();
|
||||
|
||||
|
|
@ -137,7 +189,7 @@ export class TranscriptionService {
|
|||
});
|
||||
|
||||
// Format if enabled
|
||||
if (this.formatterEnabled && this.openRouterProvider) {
|
||||
if (this.formatterEnabled && this.openRouterProvider && false) {
|
||||
const style =
|
||||
session.context.sharedData.userPreferences?.formattingStyle;
|
||||
completeTranscription = await this.openRouterProvider.format({
|
||||
|
|
@ -188,19 +240,9 @@ export class TranscriptionService {
|
|||
// Create default context
|
||||
const context = createDefaultContext(uuid());
|
||||
|
||||
// Simple context building - no complex loading
|
||||
const serviceManager = ServiceManager.getInstance();
|
||||
if (serviceManager) {
|
||||
try {
|
||||
const settingsService = serviceManager.getSettingsService();
|
||||
const formatterConfig = await settingsService.getFormatterConfig();
|
||||
} catch (error) {
|
||||
logger.transcription.warn("Failed to load formatter config", { error });
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Load actual vocabulary
|
||||
// TODO: Load user preferences from settings
|
||||
// TODO: Load formatter config from settings
|
||||
|
||||
return context;
|
||||
}
|
||||
|
|
@ -210,6 +252,7 @@ export class TranscriptionService {
|
|||
*/
|
||||
async dispose(): Promise<void> {
|
||||
await this.whisperProvider.dispose();
|
||||
// VAD service is managed by ServiceManager
|
||||
logger.transcription.info("Transcription service disposed");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
192
apps/desktop/src/services/vad-service.ts
Normal file
192
apps/desktop/src/services/vad-service.ts
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
import * as ort from "onnxruntime-node";
|
||||
import { logger } from "../main/logger";
|
||||
import { app } from "electron";
|
||||
import * as path from "path";
|
||||
import { EventEmitter } from "node:events";
|
||||
import { existsSync } from "node:fs";
|
||||
|
||||
export class VADService extends EventEmitter {
|
||||
private session: ort.InferenceSession | null = null;
|
||||
private modelPath: string | null = null;
|
||||
private state: ort.Tensor | null = null;
|
||||
private sr: number = 16000;
|
||||
|
||||
// Configuration
|
||||
private readonly WINDOW_SIZE_SAMPLES = 512; // 32ms at 16kHz
|
||||
private readonly SPEECH_THRESHOLD = 0.2;
|
||||
private readonly REDEMPTION_FRAMES = 8;
|
||||
|
||||
// State
|
||||
private speechFrameCount = 0;
|
||||
private silenceFrameCount = 0;
|
||||
private isSpeaking = false;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
try {
|
||||
// Handle both development and production paths
|
||||
if (app.isPackaged) {
|
||||
// In production, the assets are copied to the resources folder
|
||||
this.modelPath = path.join(
|
||||
process.resourcesPath,
|
||||
"assets",
|
||||
"silero_vad_v5.onnx",
|
||||
);
|
||||
} else {
|
||||
// In development, use the source path
|
||||
this.modelPath = path.join(
|
||||
__dirname,
|
||||
"../../src/assets/silero_vad_v5.onnx",
|
||||
);
|
||||
}
|
||||
|
||||
logger.main.info("Loading VAD model from", this.modelPath);
|
||||
|
||||
// Check if the model file exists
|
||||
if (!existsSync(this.modelPath)) {
|
||||
throw new Error(
|
||||
`VAD model file not found at: ${this.modelPath}. ` +
|
||||
`Make sure the ONNX model is in the assets folder.`,
|
||||
);
|
||||
}
|
||||
|
||||
// Load ONNX model
|
||||
this.session = await ort.InferenceSession.create(this.modelPath, {
|
||||
executionProviders: ["cpu"], // Use CPU provider for compatibility
|
||||
});
|
||||
|
||||
// Initialize hidden states (h and c)
|
||||
this.resetStates();
|
||||
|
||||
logger.main.info("VAD service initialized successfully");
|
||||
} catch (error) {
|
||||
logger.main.error("Failed to initialize VAD service:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private resetStates(): void {
|
||||
// Silero VAD uses a state tensor with shape [2, 1, 128]
|
||||
const stateSize = 2 * 1 * 128;
|
||||
this.state = new ort.Tensor(
|
||||
"float32",
|
||||
new Float32Array(stateSize).fill(0),
|
||||
[2, 1, 128],
|
||||
);
|
||||
}
|
||||
|
||||
async processBatch(
|
||||
audioFrames: Float32Array,
|
||||
): Promise<{ probability: number; isSpeaking: boolean }> {
|
||||
if (!this.session || !this.state) {
|
||||
throw new Error("VAD service not initialized");
|
||||
}
|
||||
|
||||
try {
|
||||
// Create input tensor - shape should be [1, audio_length]
|
||||
const inputTensor = new ort.Tensor("float32", audioFrames, [
|
||||
1,
|
||||
audioFrames.length,
|
||||
]);
|
||||
|
||||
const srTensor = new ort.Tensor(
|
||||
"int64",
|
||||
BigInt64Array.from([BigInt(this.sr)]),
|
||||
[],
|
||||
);
|
||||
|
||||
// Run inference with input, state, and sr
|
||||
const results = await this.session.run({
|
||||
input: inputTensor,
|
||||
state: this.state,
|
||||
sr: srTensor,
|
||||
});
|
||||
|
||||
// Update state for next iteration
|
||||
this.state = results.stateN as ort.Tensor;
|
||||
|
||||
// Get speech probability
|
||||
const output = results.output as ort.Tensor;
|
||||
const probability = output.data[0] as number;
|
||||
|
||||
// Apply smoothing logic
|
||||
const isSpeaking = this.applySpeechDetectionLogic(probability);
|
||||
|
||||
return { probability, isSpeaking };
|
||||
} catch (error) {
|
||||
logger.main.error("VAD inference failed:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private applySpeechDetectionLogic(probability: number): boolean {
|
||||
const isSpeechFrame = probability > this.SPEECH_THRESHOLD;
|
||||
|
||||
if (isSpeechFrame) {
|
||||
this.speechFrameCount++;
|
||||
this.silenceFrameCount = 0;
|
||||
} else {
|
||||
this.silenceFrameCount++;
|
||||
if (this.silenceFrameCount > this.REDEMPTION_FRAMES) {
|
||||
this.speechFrameCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Start speaking after enough speech frames
|
||||
if (!this.isSpeaking && this.speechFrameCount >= 3) {
|
||||
this.isSpeaking = true;
|
||||
logger.main.debug("Speech started");
|
||||
this.emit("voice-detected", true);
|
||||
}
|
||||
|
||||
// Stop speaking after enough silence
|
||||
if (this.isSpeaking && this.silenceFrameCount >= this.REDEMPTION_FRAMES) {
|
||||
this.isSpeaking = false;
|
||||
logger.main.debug("Speech ended");
|
||||
this.emit("voice-detected", false);
|
||||
}
|
||||
|
||||
return this.isSpeaking;
|
||||
}
|
||||
|
||||
async processAudioFrame(
|
||||
audioBuffer: ArrayBuffer,
|
||||
): Promise<{ probability: number; isSpeaking: boolean }> {
|
||||
// Convert ArrayBuffer to Float32Array
|
||||
const float32Array = new Float32Array(audioBuffer);
|
||||
|
||||
// Silero VAD requires exactly 512 samples
|
||||
if (float32Array.length !== this.WINDOW_SIZE_SAMPLES) {
|
||||
// If we have fewer samples (e.g., final buffer flush), pad with zeros
|
||||
if (float32Array.length < this.WINDOW_SIZE_SAMPLES) {
|
||||
const paddedArray = new Float32Array(this.WINDOW_SIZE_SAMPLES);
|
||||
paddedArray.set(float32Array);
|
||||
// Rest is already zeros
|
||||
return this.processBatch(paddedArray);
|
||||
} else {
|
||||
// If we have more samples, just process the first 512
|
||||
const truncatedArray = float32Array.slice(0, this.WINDOW_SIZE_SAMPLES);
|
||||
return this.processBatch(truncatedArray);
|
||||
}
|
||||
}
|
||||
|
||||
// Process through VAD
|
||||
return this.processBatch(float32Array);
|
||||
}
|
||||
|
||||
getSpeechState(): boolean {
|
||||
return this.isSpeaking;
|
||||
}
|
||||
|
||||
async dispose(): Promise<void> {
|
||||
if (this.session) {
|
||||
await this.session.release();
|
||||
this.session = null;
|
||||
}
|
||||
this.state = null;
|
||||
logger.main.info("VAD service disposed");
|
||||
}
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ import { observable } from "@trpc/server/observable";
|
|||
import superjson from "superjson";
|
||||
import { ServiceManager } from "../../main/managers/service-manager";
|
||||
import type { RecordingStatus } from "../../types/recording";
|
||||
import { logger } from "../../main/logger";
|
||||
|
||||
const t = initTRPC.create({
|
||||
isServer: true,
|
||||
|
|
@ -61,4 +62,36 @@ export const recordingRouter = t.router({
|
|||
};
|
||||
});
|
||||
}),
|
||||
|
||||
// Voice detection subscription
|
||||
voiceDetectionUpdates: t.procedure.subscription(() => {
|
||||
return observable<boolean>((emit) => {
|
||||
const serviceManager = ServiceManager.getInstance();
|
||||
if (!serviceManager) {
|
||||
throw new Error("ServiceManager not initialized");
|
||||
}
|
||||
|
||||
const vadService = serviceManager.getVADService();
|
||||
if (!vadService) {
|
||||
logger.main.warn(
|
||||
"VAD service not available for voice detection subscription",
|
||||
);
|
||||
// Emit false and complete immediately if VAD is not available
|
||||
emit.next(false);
|
||||
return () => {};
|
||||
}
|
||||
|
||||
// Set up listener for voice detection changes
|
||||
const handleVoiceDetection = (detected: boolean) => {
|
||||
emit.next(detected);
|
||||
};
|
||||
|
||||
vadService.on("voice-detected", handleVoiceDetection);
|
||||
|
||||
// Cleanup function
|
||||
return () => {
|
||||
vadService.off("voice-detected", handleVoiceDetection);
|
||||
};
|
||||
});
|
||||
}),
|
||||
});
|
||||
|
|
|
|||
13
apps/desktop/src/types/vite-env.d.ts
vendored
Normal file
13
apps/desktop/src/types/vite-env.d.ts
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/// <reference types="vite/client" />
|
||||
|
||||
// Declare module for URL imports
|
||||
declare module "*?url" {
|
||||
const url: string;
|
||||
export default url;
|
||||
}
|
||||
|
||||
// Declare module for raw imports
|
||||
declare module "*?raw" {
|
||||
const content: string;
|
||||
export default content;
|
||||
}
|
||||
|
|
@ -14,6 +14,7 @@ export default defineConfig({
|
|||
"@libsql/linux-x64-musl",
|
||||
"@libsql/win32-x64-msvc",
|
||||
"libsql",
|
||||
"onnxruntime-node",
|
||||
/^node:/,
|
||||
/^electron$/,
|
||||
],
|
||||
|
|
|
|||
|
|
@ -38,7 +38,8 @@
|
|||
"drizzle-orm/libsql",
|
||||
"@libsql",
|
||||
"macos-alias",
|
||||
"fs-xattr"
|
||||
"fs-xattr",
|
||||
"onnxruntime-node"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
168
pnpm-lock.yaml
generated
168
pnpm-lock.yaml
generated
|
|
@ -128,9 +128,6 @@ importers:
|
|||
'@radix-ui/react-tooltip':
|
||||
specifier: ^1.2.7
|
||||
version: 1.2.7(@types/react-dom@19.1.5(@types/react@19.1.5))(@types/react@19.1.5)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
|
||||
'@ricky0123/vad-web':
|
||||
specifier: ^0.0.24
|
||||
version: 0.0.24
|
||||
'@tabler/icons-react':
|
||||
specifier: ^3.34.0
|
||||
version: 3.34.0(react@19.1.0)
|
||||
|
|
@ -218,6 +215,9 @@ importers:
|
|||
next-themes:
|
||||
specifier: ^0.4.6
|
||||
version: 0.4.6(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
|
||||
onnxruntime-node:
|
||||
specifier: ^1.20.1
|
||||
version: 1.22.0
|
||||
openai:
|
||||
specifier: ^4.98.0
|
||||
version: 4.103.0(encoding@0.1.13)(ws@8.18.0)(zod@3.25.67)
|
||||
|
|
@ -2042,36 +2042,6 @@ packages:
|
|||
resolution: {integrity: sha512-ROFF39F6ZrnzSUEmQQZUar0Jt4xVoP9WnDRdWwF4NNcXs3xBTLgBUDoOwW141y1jP+S8nahIbdxbFC7IShw9Iw==}
|
||||
engines: {node: ^12.20.0 || ^14.18.0 || >=16.0.0}
|
||||
|
||||
'@protobufjs/aspromise@1.1.2':
|
||||
resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==}
|
||||
|
||||
'@protobufjs/base64@1.1.2':
|
||||
resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==}
|
||||
|
||||
'@protobufjs/codegen@2.0.4':
|
||||
resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==}
|
||||
|
||||
'@protobufjs/eventemitter@1.1.0':
|
||||
resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==}
|
||||
|
||||
'@protobufjs/fetch@1.1.0':
|
||||
resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==}
|
||||
|
||||
'@protobufjs/float@1.0.2':
|
||||
resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==}
|
||||
|
||||
'@protobufjs/inquire@1.1.0':
|
||||
resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==}
|
||||
|
||||
'@protobufjs/path@1.1.2':
|
||||
resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==}
|
||||
|
||||
'@protobufjs/pool@1.1.0':
|
||||
resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==}
|
||||
|
||||
'@protobufjs/utf8@1.1.0':
|
||||
resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==}
|
||||
|
||||
'@radix-ui/number@1.1.1':
|
||||
resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==}
|
||||
|
||||
|
|
@ -2684,9 +2654,6 @@ packages:
|
|||
'@radix-ui/rect@1.1.1':
|
||||
resolution: {integrity: sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==}
|
||||
|
||||
'@ricky0123/vad-web@0.0.24':
|
||||
resolution: {integrity: sha512-uv6GWW/kq8BkVErMQzXp3uwSyYMT3w/3QJiUerVaaKp7EwhOTIRY+96EoyFdG2WOFU5RkLk/2CVGbI7nDlxhEg==}
|
||||
|
||||
'@rollup/plugin-commonjs@28.0.6':
|
||||
resolution: {integrity: sha512-XSQB1K7FUU5QP+3lOQmVCE3I0FcbbNvmNT4VJSj93iUjayaARrTQeoRdiYQoftAJBLrR9t2agwAd3ekaTgHNlw==}
|
||||
engines: {node: '>=16.0.0 || 14 >= 14.17'}
|
||||
|
|
@ -3317,9 +3284,6 @@ packages:
|
|||
'@types/keyv@3.1.4':
|
||||
resolution: {integrity: sha512-BQ5aZNSCpj7D6K2ksrRCTmKRLEpnPvWDiLPfoGyhZ++8YtiK9d/3DBKPJgry359X/P1PfruyYwvnvwFjuEiEIg==}
|
||||
|
||||
'@types/long@4.0.2':
|
||||
resolution: {integrity: sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==}
|
||||
|
||||
'@types/mdast@4.0.4':
|
||||
resolution: {integrity: sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==}
|
||||
|
||||
|
|
@ -3486,6 +3450,10 @@ packages:
|
|||
engines: {node: '>=0.4.0'}
|
||||
hasBin: true
|
||||
|
||||
adm-zip@0.5.16:
|
||||
resolution: {integrity: sha512-TGw5yVi4saajsSEgz25grObGHEUaDrniwvA2qwSC060KfqGPdglhvPMA2lPIoxs3PQIItj2iag35fONcQqgUaQ==}
|
||||
engines: {node: '>=12.0'}
|
||||
|
||||
agent-base@6.0.2:
|
||||
resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==}
|
||||
engines: {node: '>= 6.0.0'}
|
||||
|
|
@ -4953,9 +4921,6 @@ packages:
|
|||
resolution: {integrity: sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==}
|
||||
engines: {node: '>=16'}
|
||||
|
||||
flatbuffers@1.12.0:
|
||||
resolution: {integrity: sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==}
|
||||
|
||||
flatted@3.3.3:
|
||||
resolution: {integrity: sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==}
|
||||
|
||||
|
|
@ -5266,9 +5231,6 @@ packages:
|
|||
resolution: {integrity: sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==}
|
||||
engines: {node: '>=6.0'}
|
||||
|
||||
guid-typescript@1.0.9:
|
||||
resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==}
|
||||
|
||||
handlebars@4.7.7:
|
||||
resolution: {integrity: sha512-aAcXm5OAfE/8IXkcZvCepKU3VzW1/39Fb5ZuqMtgI/hT8X2YgoMvBY5dLhq/cpOvw7Lk1nK/UF71aLG/ZnVYRA==}
|
||||
engines: {node: '>=0.4.7'}
|
||||
|
|
@ -5918,9 +5880,6 @@ packages:
|
|||
resolution: {integrity: sha512-5UtUDQ/6edw4ofyljDNcOVJQ4c7OjDro4h3y8e1GQL5iYElYclVHJ3zeWchylvMaKnDbDilC8irOVyexnA/Slw==}
|
||||
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
|
||||
|
||||
long@4.0.0:
|
||||
resolution: {integrity: sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==}
|
||||
|
||||
longest-streak@3.1.0:
|
||||
resolution: {integrity: sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==}
|
||||
|
||||
|
|
@ -6506,14 +6465,12 @@ packages:
|
|||
oniguruma-to-es@4.3.3:
|
||||
resolution: {integrity: sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg==}
|
||||
|
||||
onnx-proto@4.0.4:
|
||||
resolution: {integrity: sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==}
|
||||
onnxruntime-common@1.22.0:
|
||||
resolution: {integrity: sha512-vcuaNWgtF2dGQu/EP5P8UI5rEPEYqXG2sPPe5j9lg2TY/biJF8eWklTMwlDO08iuXq48xJo0awqIpK5mPG+IxA==}
|
||||
|
||||
onnxruntime-common@1.14.0:
|
||||
resolution: {integrity: sha512-3LJpegM2iMNRX2wUmtYfeX/ytfOzNwAWKSq1HbRrKc9+uqG/FsEA0bbKZl1btQeZaXhC26l44NWpNUeXPII7Ew==}
|
||||
|
||||
onnxruntime-web@1.14.0:
|
||||
resolution: {integrity: sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw==}
|
||||
onnxruntime-node@1.22.0:
|
||||
resolution: {integrity: sha512-QaAqr7PFekrmEsmu1rpw7OxJYyG+iACjNHoNtQIVt9Oh7st8WDPIIUe6KhF9l35HVJTJd9CV1rePoPmKhSV26g==}
|
||||
os: [win32, darwin, linux]
|
||||
|
||||
openai@4.103.0:
|
||||
resolution: {integrity: sha512-eWcz9kdurkGOFDtd5ySS5y251H2uBgq9+1a2lTBnjMMzlexJ40Am5t6Mu76SSE87VvitPa0dkIAp75F+dZVC0g==}
|
||||
|
|
@ -6715,9 +6672,6 @@ packages:
|
|||
resolution: {integrity: sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
platform@1.3.6:
|
||||
resolution: {integrity: sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==}
|
||||
|
||||
plist@3.1.0:
|
||||
resolution: {integrity: sha512-uysumyrvkUX0rX/dEVqt8gC3sTBzd4zoWfLeS29nb53imdaXVvLINYXTI2GNqzaMuvacNx4uJQ8+b3zXR0pkgQ==}
|
||||
engines: {node: '>=10.4.0'}
|
||||
|
|
@ -6804,10 +6758,6 @@ packages:
|
|||
property-information@7.1.0:
|
||||
resolution: {integrity: sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==}
|
||||
|
||||
protobufjs@6.11.4:
|
||||
resolution: {integrity: sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw==}
|
||||
hasBin: true
|
||||
|
||||
proxy-addr@2.0.7:
|
||||
resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
|
||||
engines: {node: '>= 0.10'}
|
||||
|
|
@ -10147,29 +10097,6 @@ snapshots:
|
|||
|
||||
'@pkgr/core@0.2.4': {}
|
||||
|
||||
'@protobufjs/aspromise@1.1.2': {}
|
||||
|
||||
'@protobufjs/base64@1.1.2': {}
|
||||
|
||||
'@protobufjs/codegen@2.0.4': {}
|
||||
|
||||
'@protobufjs/eventemitter@1.1.0': {}
|
||||
|
||||
'@protobufjs/fetch@1.1.0':
|
||||
dependencies:
|
||||
'@protobufjs/aspromise': 1.1.2
|
||||
'@protobufjs/inquire': 1.1.0
|
||||
|
||||
'@protobufjs/float@1.0.2': {}
|
||||
|
||||
'@protobufjs/inquire@1.1.0': {}
|
||||
|
||||
'@protobufjs/path@1.1.2': {}
|
||||
|
||||
'@protobufjs/pool@1.1.0': {}
|
||||
|
||||
'@protobufjs/utf8@1.1.0': {}
|
||||
|
||||
'@radix-ui/number@1.1.1': {}
|
||||
|
||||
'@radix-ui/primitive@1.1.2': {}
|
||||
|
|
@ -10823,10 +10750,6 @@ snapshots:
|
|||
|
||||
'@radix-ui/rect@1.1.1': {}
|
||||
|
||||
'@ricky0123/vad-web@0.0.24':
|
||||
dependencies:
|
||||
onnxruntime-web: 1.14.0
|
||||
|
||||
'@rollup/plugin-commonjs@28.0.6(rollup@4.41.0)':
|
||||
dependencies:
|
||||
'@rollup/pluginutils': 5.2.0(rollup@4.41.0)
|
||||
|
|
@ -11560,8 +11483,6 @@ snapshots:
|
|||
dependencies:
|
||||
'@types/node': 22.15.12
|
||||
|
||||
'@types/long@4.0.2': {}
|
||||
|
||||
'@types/mdast@4.0.4':
|
||||
dependencies:
|
||||
'@types/unist': 3.0.3
|
||||
|
|
@ -11744,6 +11665,8 @@ snapshots:
|
|||
|
||||
acorn@8.14.1: {}
|
||||
|
||||
adm-zip@0.5.16: {}
|
||||
|
||||
agent-base@6.0.2:
|
||||
dependencies:
|
||||
debug: 4.4.1
|
||||
|
|
@ -12006,8 +11929,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
boolean@3.2.0:
|
||||
optional: true
|
||||
boolean@3.2.0: {}
|
||||
|
||||
bottleneck@2.19.5: {}
|
||||
|
||||
|
|
@ -12583,8 +12505,7 @@ snapshots:
|
|||
|
||||
detect-node-es@1.1.0: {}
|
||||
|
||||
detect-node@2.1.0:
|
||||
optional: true
|
||||
detect-node@2.1.0: {}
|
||||
|
||||
devlop@1.1.0:
|
||||
dependencies:
|
||||
|
|
@ -12929,8 +12850,7 @@ snapshots:
|
|||
is-date-object: 1.1.0
|
||||
is-symbol: 1.1.1
|
||||
|
||||
es6-error@4.1.1:
|
||||
optional: true
|
||||
es6-error@4.1.1: {}
|
||||
|
||||
esast-util-from-estree@2.0.0:
|
||||
dependencies:
|
||||
|
|
@ -13474,8 +13394,6 @@ snapshots:
|
|||
flatted: 3.3.3
|
||||
keyv: 4.5.4
|
||||
|
||||
flatbuffers@1.12.0: {}
|
||||
|
||||
flatted@3.3.3: {}
|
||||
|
||||
flora-colossus@2.0.0:
|
||||
|
|
@ -13833,7 +13751,6 @@ snapshots:
|
|||
roarr: 2.15.4
|
||||
semver: 7.7.2
|
||||
serialize-error: 7.0.1
|
||||
optional: true
|
||||
|
||||
global-dirs@3.0.1:
|
||||
dependencies:
|
||||
|
|
@ -13904,8 +13821,6 @@ snapshots:
|
|||
section-matter: 1.0.0
|
||||
strip-bom-string: 1.0.0
|
||||
|
||||
guid-typescript@1.0.9: {}
|
||||
|
||||
handlebars@4.7.7:
|
||||
dependencies:
|
||||
minimist: 1.2.8
|
||||
|
|
@ -14443,8 +14358,7 @@ snapshots:
|
|||
|
||||
json-stable-stringify-without-jsonify@1.0.1: {}
|
||||
|
||||
json-stringify-safe@5.0.1:
|
||||
optional: true
|
||||
json-stringify-safe@5.0.1: {}
|
||||
|
||||
json5@1.0.2:
|
||||
dependencies:
|
||||
|
|
@ -14623,8 +14537,6 @@ snapshots:
|
|||
strip-ansi: 7.1.0
|
||||
wrap-ansi: 8.1.0
|
||||
|
||||
long@4.0.0: {}
|
||||
|
||||
longest-streak@3.1.0: {}
|
||||
|
||||
loose-envify@1.4.0:
|
||||
|
|
@ -14695,7 +14607,6 @@ snapshots:
|
|||
matcher@3.0.0:
|
||||
dependencies:
|
||||
escape-string-regexp: 4.0.0
|
||||
optional: true
|
||||
|
||||
math-intrinsics@1.1.0: {}
|
||||
|
||||
|
|
@ -15488,20 +15399,13 @@ snapshots:
|
|||
regex: 6.0.1
|
||||
regex-recursion: 6.0.2
|
||||
|
||||
onnx-proto@4.0.4:
|
||||
dependencies:
|
||||
protobufjs: 6.11.4
|
||||
onnxruntime-common@1.22.0: {}
|
||||
|
||||
onnxruntime-common@1.14.0: {}
|
||||
|
||||
onnxruntime-web@1.14.0:
|
||||
onnxruntime-node@1.22.0:
|
||||
dependencies:
|
||||
flatbuffers: 1.12.0
|
||||
guid-typescript: 1.0.9
|
||||
long: 4.0.0
|
||||
onnx-proto: 4.0.4
|
||||
onnxruntime-common: 1.14.0
|
||||
platform: 1.3.6
|
||||
adm-zip: 0.5.16
|
||||
global-agent: 3.0.0
|
||||
onnxruntime-common: 1.22.0
|
||||
|
||||
openai@4.103.0(encoding@0.1.13)(ws@8.18.0)(zod@3.25.67):
|
||||
dependencies:
|
||||
|
|
@ -15705,8 +15609,6 @@ snapshots:
|
|||
|
||||
pify@2.3.0: {}
|
||||
|
||||
platform@1.3.6: {}
|
||||
|
||||
plist@3.1.0:
|
||||
dependencies:
|
||||
'@xmldom/xmldom': 0.8.10
|
||||
|
|
@ -15790,22 +15692,6 @@ snapshots:
|
|||
|
||||
property-information@7.1.0: {}
|
||||
|
||||
protobufjs@6.11.4:
|
||||
dependencies:
|
||||
'@protobufjs/aspromise': 1.1.2
|
||||
'@protobufjs/base64': 1.1.2
|
||||
'@protobufjs/codegen': 2.0.4
|
||||
'@protobufjs/eventemitter': 1.1.0
|
||||
'@protobufjs/fetch': 1.1.0
|
||||
'@protobufjs/float': 1.0.2
|
||||
'@protobufjs/inquire': 1.1.0
|
||||
'@protobufjs/path': 1.1.2
|
||||
'@protobufjs/pool': 1.1.0
|
||||
'@protobufjs/utf8': 1.1.0
|
||||
'@types/long': 4.0.2
|
||||
'@types/node': 22.15.12
|
||||
long: 4.0.0
|
||||
|
||||
proxy-addr@2.0.7:
|
||||
dependencies:
|
||||
forwarded: 0.2.0
|
||||
|
|
@ -16341,7 +16227,6 @@ snapshots:
|
|||
json-stringify-safe: 5.0.1
|
||||
semver-compare: 1.0.0
|
||||
sprintf-js: 1.1.3
|
||||
optional: true
|
||||
|
||||
rollup@4.41.0:
|
||||
dependencies:
|
||||
|
|
@ -16425,8 +16310,7 @@ snapshots:
|
|||
|
||||
secure-json-parse@2.7.0: {}
|
||||
|
||||
semver-compare@1.0.0:
|
||||
optional: true
|
||||
semver-compare@1.0.0: {}
|
||||
|
||||
semver@5.7.2: {}
|
||||
|
||||
|
|
@ -16462,7 +16346,6 @@ snapshots:
|
|||
serialize-error@7.0.1:
|
||||
dependencies:
|
||||
type-fest: 0.13.1
|
||||
optional: true
|
||||
|
||||
serve-favicon@2.5.0:
|
||||
dependencies:
|
||||
|
|
@ -17180,8 +17063,7 @@ snapshots:
|
|||
dependencies:
|
||||
prelude-ls: 1.2.1
|
||||
|
||||
type-fest@0.13.1:
|
||||
optional: true
|
||||
type-fest@0.13.1: {}
|
||||
|
||||
type-fest@0.21.3: {}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue