From 7b885673da44ffd952af096bf64606e7517b5e57 Mon Sep 17 00:00:00 2001 From: Naiyuan Qing <145280634+NevilleQingNY@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:13:47 +0800 Subject: [PATCH] docs(skills): update whisper skill with correct transcription priority Clarify that local whisper is the primary provider (free, offline), OpenAI API is the fallback, and the skill only activates when both are unavailable. Add setup instructions noting no restart is required. Co-Authored-By: Claude Opus 4.6 --- skills/whisper/SKILL.md | 29 ++++++++++++++++++++++++----- src/channels/manager.ts | 2 -- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/skills/whisper/SKILL.md b/skills/whisper/SKILL.md index 1748a79d..fa9a128d 100644 --- a/skills/whisper/SKILL.md +++ b/skills/whisper/SKILL.md @@ -1,7 +1,7 @@ --- name: Audio Transcription -description: Transcribe audio files using local Whisper CLI (fallback when API is unavailable) -version: 1.0.0 +description: Transcribe audio files using local Whisper CLI when automatic pre-processing is unavailable +version: 1.1.0 metadata: emoji: "🎙️" requires: @@ -23,14 +23,33 @@ userInvocable: false disableModelInvocation: false --- -## Audio Transcription (Local Fallback) +## Audio Transcription (Agent Fallback) -Voice messages from channels are normally transcribed automatically via the OpenAI Whisper API before reaching you. This skill is only needed when the API is unavailable. +Voice messages from channels are pre-processed before reaching you. The transcription +priority is: -If you receive `[audio message received]` with a `File:` path (instead of `[Voice Message]` with a transcript), it means the API transcription was not available. Use local whisper to transcribe: +1. **Local whisper CLI** (free, offline) — requires `whisper` or `whisper-cli` in PATH +2. **OpenAI Whisper API** — requires an OpenAI API key in credentials +3. **No provider available** — you receive a raw file path instead of a transcript + +When both providers are unavailable, you will receive `[audio message received]` with a +`File:` path instead of `[Voice Message]` with a transcript. Use local whisper to +transcribe manually: ``` whisper "" --model base --output_format txt --output_dir /tmp ``` Then read the `.txt` file from `/tmp/` and respond based on the transcribed content. + +### Setup + +To enable automatic local transcription (recommended): + +```bash +brew install openai-whisper +``` + +The first run will download the `base` model (~139MB) to `~/.cache/whisper/`. +No app restart is required — the binary is detected automatically on the next +voice message. diff --git a/src/channels/manager.ts b/src/channels/manager.ts index 5cbe7425..a7d5f248 100644 --- a/src/channels/manager.ts +++ b/src/channels/manager.ts @@ -265,8 +265,6 @@ export class ChannelManager { // Keep heartbeat acknowledgements internal (same behavior as desktop/gateway stream path). if (isHeartbeatAckEvent(event)) { if (event.type === "message_end") { - this.stopTyping(); - this.removeAckReaction(); this.aggregator = null; } return;