diff --git a/apps/desktop/models/silero_vad_v5.onnx b/apps/desktop/models/silero_vad_v5.onnx
deleted file mode 100644
index b3e3a90..0000000
Binary files a/apps/desktop/models/silero_vad_v5.onnx and /dev/null differ
diff --git a/apps/desktop/models/silero_vad_v6.onnx b/apps/desktop/models/silero_vad_v6.onnx
new file mode 100644
index 0000000..80c5592
Binary files /dev/null and b/apps/desktop/models/silero_vad_v6.onnx differ
diff --git a/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts b/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts
index a9c44d4..6b5f93f 100644
--- a/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts
+++ b/apps/desktop/src/pipeline/providers/transcription/amical-cloud-provider.ts
@@ -136,6 +136,9 @@ export class AmicalCloudProvider implements TranscriptionProvider {
       offset += frame.length;
     }
 
+    // Save VAD probabilities before clearing
+    const vadProbs = [...this.frameBufferSpeechProbabilities];
+
     // Clear frame buffers only (context values needed for API call below)
     this.frameBuffer = [];
     this.frameBufferSpeechProbabilities = [];
@@ -144,6 +147,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
     // Make the API request
     return this.makeTranscriptionRequest(
       combinedAudio,
+      vadProbs,
       false,
       enableFormatting,
     );
@@ -177,6 +181,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
 
   private async makeTranscriptionRequest(
     audioData: Float32Array,
+    vadProbs: number[],
     isRetry = false,
     enableFormatting = false,
   ): Promise<string> {
@@ -215,6 +220,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
       },
       body: JSON.stringify({
         audioData: Array.from(audioData),
+        vadProbs,
         language: this.currentLanguage,
         previousTranscription: this.currentAggregatedTranscription,
         formatting: {
@@ -262,6 +268,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
         // Retry the request once (preserve formatting flag)
         return await this.makeTranscriptionRequest(
           audioData,
+          vadProbs,
           true,
           enableFormatting,
         );
diff --git a/apps/desktop/src/services/vad-service.ts b/apps/desktop/src/services/vad-service.ts
index 9cdec94..6d47be3 100644
--- a/apps/desktop/src/services/vad-service.ts
+++ b/apps/desktop/src/services/vad-service.ts
@@ -13,10 +13,13 @@ export class VADService extends EventEmitter {
 
   // Configuration
   private readonly WINDOW_SIZE_SAMPLES = 512; // 32ms at 16kHz
+  private readonly CTX_SIZE = 64; // Context size for v6
+  private readonly INPUT_SIZE = 576; // CTX_SIZE + WINDOW_SIZE_SAMPLES
   private readonly SPEECH_THRESHOLD = 0.1;
   private readonly REDEMPTION_FRAMES = 8;
 
   // State
+  private context: Float32Array = new Float32Array(64).fill(0); // v6 context buffer
   private speechFrameCount = 0;
   private silenceFrameCount = 0;
   private isSpeaking = false;
@@ -33,13 +36,13 @@ export class VADService extends EventEmitter {
         this.modelPath = path.join(
           process.resourcesPath,
           "models",
-          "silero_vad_v5.onnx",
+          "silero_vad_v6.onnx",
         );
       } else {
         // In development, use the source path
         this.modelPath = path.join(
           __dirname,
-          "../../models/silero_vad_v5.onnx",
+          "../../models/silero_vad_v6.onnx",
         );
       }
 
@@ -90,10 +93,14 @@ export class VADService extends EventEmitter {
     }
 
     try {
-      // Create input tensor - shape should be [1, audio_length]
-      const inputTensor = new ort.Tensor("float32", audioFrames, [
+      // v6: Create combined input [context | frame] with fixed size 576
+      const input = new Float32Array(this.INPUT_SIZE);
+      input.set(this.context, 0);
+      input.set(audioFrames, this.CTX_SIZE);
+
+      const inputTensor = new ort.Tensor("float32", input, [
         1,
-        audioFrames.length,
+        this.INPUT_SIZE,
       ]);
 
       const srTensor = new ort.Tensor(
@@ -109,12 +116,18 @@ export class VADService extends EventEmitter {
         sr: srTensor,
       });
 
+      // v6: Use dynamic output name detection for robustness
+      const outName = this.session.outputNames[0];
+      const stateName = this.session.outputNames.find((n) => n !== outName)!;
+
       // Update state for next iteration
-      this.state = results.stateN as ort.Tensor;
+      this.state = results[stateName] as ort.Tensor;
 
       // Get speech probability
-      const output = results.output as ort.Tensor;
-      const probability = output.data[0] as number;
+      const probability = (results[outName].data as Float32Array)[0];
+
+      // v6: Update context = last CTX_SIZE samples of the input
+      this.context = input.slice(this.INPUT_SIZE - this.CTX_SIZE);
 
       // Apply smoothing logic
       const isSpeaking = this.applySpeechDetectionLogic(probability);
@@ -182,10 +195,11 @@ export class VADService extends EventEmitter {
 
   /**
    * Reset VAD state for a new recording session.
-   * This clears the LSTM state and speech detection counters.
+   * This clears the LSTM state, context buffer, and speech detection counters.
    */
   reset(): void {
     this.resetStates();
+    this.context = new Float32Array(this.CTX_SIZE).fill(0); // Reset v6 context buffer
     this.speechFrameCount = 0;
     this.silenceFrameCount = 0;
     this.isSpeaking = false;