diff --git a/apps/desktop/src/constants/models.ts b/apps/desktop/src/constants/models.ts
index e8e27ff..6c04e0e 100644
--- a/apps/desktop/src/constants/models.ts
+++ b/apps/desktop/src/constants/models.ts
@@ -91,4 +91,17 @@ export const AVAILABLE_MODELS: Model[] = [
filename: "ggml-large-v3.bin",
checksum: "ad82bf6a9043ceed055076d0fd39f5f186ff8062",
},
+ {
+ id: "whisper-large-v3-turbo",
+ name: "Whisper Large v3 Turbo",
+ type: "whisper",
+ size: 1.5 * 1024 * 1024 * 1024, // ~1.5 GB
+ sizeFormatted: "~1.5 GB",
+ description:
+ "Optimized Large v3 variant with only 4 decoder layers, offering significantly faster transcription with accuracy comparable to Large v2/v3.",
+ downloadUrl:
+ "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin",
+ filename: "ggml-large-v3-turbo.bin",
+ checksum: "4af2b29d7ec73d781377bfd1758ca957a807e941",
+ },
];
diff --git a/apps/desktop/src/main/managers/recording-manager.ts b/apps/desktop/src/main/managers/recording-manager.ts
index 69a3900..bc5a603 100644
--- a/apps/desktop/src/main/managers/recording-manager.ts
+++ b/apps/desktop/src/main/managers/recording-manager.ts
@@ -8,6 +8,7 @@ import type { ShortcutManager } from "../services/shortcut-manager";
import { StreamingWavWriter } from "../../utils/streaming-wav-writer";
import * as fs from "node:fs";
import * as path from "node:path";
+import { appContextStore } from "@/stores/app-context";
export type RecordingMode = "idle" | "ptt" | "hands-free";
@@ -175,6 +176,9 @@ export class RecordingManager extends EventEmitter {
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
this.currentSessionId = `session-${timestamp}`;
+ // Get accessibility context from global store
+ appContextStore.refreshAccessibilityData();
+
// Create audio file and WAV writer
const audioFilePath = await this.createAudioFile(this.currentSessionId);
this.currentAudioRecording = {
diff --git a/apps/desktop/src/pipeline/providers/formatting/formatter-prompt.ts b/apps/desktop/src/pipeline/providers/formatting/formatter-prompt.ts
index e3198ac..d3d722d 100644
--- a/apps/desktop/src/pipeline/providers/formatting/formatter-prompt.ts
+++ b/apps/desktop/src/pipeline/providers/formatting/formatter-prompt.ts
@@ -1,98 +1,187 @@
import { FormatParams } from "../../core/pipeline-types";
-import { GetAccessibilityContextResult, ApplicationInfo } from "@amical/types";
+import { GetAccessibilityContextResult } from "@amical/types";
+
+// Base system prompt
+const SYSTEM_PROMPT = `You are a professional text formatter. Your task is to format transcribed text to be clear, readable, and properly structured.`;
+
+// Base instructions that apply to all formatting
+const BASE_INSTRUCTIONS = [
+ "Fix any transcription errors based on context and custom vocabulary",
+ "Add proper punctuation and capitalization",
+ "Format paragraphs appropriately with sufficient line breaks",
+ "Maintain the original meaning and tone",
+ "Use the custom vocabulary to correct domain-specific terms",
+ "Remove unnecessary filler words (um, uh, etc.) but keep natural speech patterns",
+ "If the text is empty, return ",
+ "Return ONLY the formatted text enclosed in tags",
+ "Do not include any commentary, explanations, or text outside the XML tags",
+];
+
+// Application type specific rules
+const APPLICATION_TYPE_RULES: Record = {
+ email: [
+ "Format with proper email structure (greeting, body paragraphs, closing)",
+ "Preserve email metadata if present (From, To, Subject, Date)",
+ "Ensure proper paragraph breaks between different topics",
+ "Maintain professional tone and formatting",
+ "Format any quoted or forwarded content clearly",
+ "Preserve email signatures and contact information",
+ ],
+ chat: [
+ "Preserve conversational tone and informal language",
+ "Keep messages concise and separate",
+ "Maintain emoji and emoticons if present",
+ "Format timestamps and usernames clearly if included",
+ "Preserve thread context and replies",
+ ],
+ notes: [
+ "Organize content with clear headings and sections",
+ "Use bullet points or numbered lists where appropriate",
+ "Maintain hierarchical structure of ideas",
+ "Format action items and tasks clearly",
+ "Preserve any existing formatting hints",
+ ],
+ general: [
+ "Apply standard formatting for general text",
+ "Create logical paragraph breaks based on content flow",
+ "Maintain consistent formatting throughout",
+ "Preserve the original tone and style",
+ ],
+};
+
+// Map bundle identifiers to application types
+const BUNDLE_TO_TYPE: Record = {
+ "com.apple.mail": "email",
+ "com.microsoft.Outlook": "email",
+ "com.readdle.smartemail": "email",
+ "com.google.Gmail": "email",
+ "com.tinyspeck.slackmacgap": "chat",
+ "com.microsoft.teams": "chat",
+ "com.facebook.archon": "chat", // Messenger
+ "com.discord.Discord": "chat",
+ "com.telegram.desktop": "chat",
+ "com.apple.Notes": "notes",
+ "com.microsoft.onenote.mac": "notes",
+ "com.evernote.Evernote": "notes",
+ "notion.id": "notes",
+ "com.agiletortoise.Drafts-OSX": "notes",
+};
+
+// Browser bundle identifiers
+const BROWSER_BUNDLE_IDS = [
+ "com.apple.Safari",
+ "com.google.Chrome",
+ "com.google.Chrome.canary",
+ "com.microsoft.edgemac",
+ "org.mozilla.firefox",
+ "com.brave.Browser",
+ "com.operasoftware.Opera",
+ "com.vivaldi.Vivaldi",
+];
+
+// URL patterns for web applications
+const URL_PATTERNS: Record = {
+ email: [
+ /mail\.google\.com/,
+ /outlook\.live\.com/,
+ /outlook\.office\.com/,
+ /mail\.yahoo\.com/,
+ /mail\.proton\.me/,
+ /webmail\./,
+ /roundcube/,
+ /fastmail\.com/,
+ ],
+ chat: [
+ /web\.whatsapp\.com/,
+ /discord\.com\/channels/,
+ /teams\.microsoft\.com/,
+ /slack\.com/,
+ /web\.telegram\.org/,
+ /messenger\.com/,
+ /chat\.openai\.com/,
+ /claude\.ai/,
+ ],
+ notes: [
+ /notion\.so/,
+ /docs\.google\.com/,
+ /onenote\.com/,
+ /evernote\.com/,
+ /roamresearch\.com/,
+ /obsidian\.md/,
+ /workflowy\.com/,
+ /coda\.io/,
+ ],
+};
export function constructFormatterPrompt(context: FormatParams["context"]): {
systemPrompt: string;
} {
- const { accessibilityContext } = context;
-
- // Build enhanced system prompt with context information
- let systemPrompt = `You are a professional text formatter. Your task is to clean up and improve the formatting of transcribed text while preserving the original meaning and content.
-
-Please:
-1. Fix obvious transcription errors and typos
-2. Add proper punctuation where missing
-3. Organize the text into proper paragraphs, with sufficient line breaks, etc.
-4. Capitalize proper nouns and sentence beginnings
-5. Remove unnecessary filler words (um, uh, etc.) but keep natural speech patterns
-6. Maintain the speaker's original tone and style
-7. If the text is empty, return an empty string
-8. For formatting of emails make sure to use the correct email format`;
-
- // Build context information
- const contextXml = buildContextXml(accessibilityContext);
-
- if (contextXml) {
- systemPrompt += `\n\n${contextXml}`;
- systemPrompt += `\n\nUse this context to better understand the environment where the text will be used and adjust formatting accordingly.`;
+ const { accessibilityContext, vocabulary } = context;
+
+ // Detect application type
+ const applicationType = detectApplicationType(accessibilityContext);
+
+ // Build instructions array
+ const instructions = [
+ ...BASE_INSTRUCTIONS,
+ ...(APPLICATION_TYPE_RULES[applicationType] || [])
+ ];
+
+ // Build prompt parts
+ const parts = [SYSTEM_PROMPT];
+
+ // Add vocabulary context if available
+ if (vocabulary && vocabulary.size > 0) {
+ const vocabTerms = Array.from(vocabulary.keys()).join(", ");
+ parts.push(`\nCustom vocabulary to use for corrections: ${vocabTerms}`);
}
-
- systemPrompt += `\n\nReturn only the formatted text without any explanations or additional commentary.`;
-
- return { systemPrompt };
+
+ // Add numbered instructions
+ parts.push("\nInstructions:");
+ instructions.forEach((instruction, index) => {
+ parts.push(`${index + 1}. ${instruction}`);
+ });
+
+ return { systemPrompt: parts.join("\n") };
}
-function buildContextXml(
+function detectApplicationType(
accessibilityContext: GetAccessibilityContextResult | null | undefined,
-): string | null {
- if (!accessibilityContext?.context) return null;
+): string {
+ if (!accessibilityContext?.context?.application?.bundleIdentifier) {
+ return "general";
+ }
- const contextParts: string[] = [""];
-
- // Add application info
- const appXml = buildApplicationXml(accessibilityContext.context.application);
- if (appXml) contextParts.push(appXml);
-
- // Add URL info
- const urlXml = buildUrlXml(
- accessibilityContext.context.windowInfo?.url || undefined,
+ const bundleId = accessibilityContext.context.application.bundleIdentifier;
+
+ // Check if it's a browser
+ const isBrowser = BROWSER_BUNDLE_IDS.some(browserId =>
+ bundleId.includes(browserId) || browserId.includes(bundleId)
);
- if (urlXml) contextParts.push(urlXml);
-
- contextParts.push("");
-
- // Only return context if we have actual content
- return contextParts.length > 2 ? contextParts.join("\n") : null;
-}
-
-function buildApplicationXml(application: ApplicationInfo): string | null {
- if (!application?.name) return null;
-
- const appParts = [" ", ` ${application.name}`];
-
- if (application.bundleIdentifier) {
- appParts.push(` ${application.bundleIdentifier}`);
- }
-
- appParts.push(" ");
- return appParts.join("\n");
-}
-
-function buildUrlXml(url: string | undefined): string | null {
- if (!url) return null;
-
- const domain = extractDomain(url);
- if (!domain) return null;
-
- return [" ", ` ${domain}`, " "].join("\n");
-}
-
-function extractDomain(url: string): string | null {
- try {
- // Try standard URL parsing first
- const parsedUrl = new URL(url);
- return parsedUrl.hostname;
- } catch {
- // Handle URLs without protocol or malformed URLs
- // Remove any leading slashes
- const cleanUrl = url.replace(/^\/+/, "");
-
- // Extract domain from patterns like "domain.com/path" or just "domain.com"
- const match = cleanUrl.match(/^([^\/\s?#]+)/);
- if (match && match[1].includes(".")) {
- return match[1];
+
+ if (isBrowser && accessibilityContext.context?.windowInfo?.url) {
+ // Try to detect type from URL
+ const url = accessibilityContext.context.windowInfo.url.toLowerCase();
+
+ for (const [type, patterns] of Object.entries(URL_PATTERNS)) {
+ if (patterns.some(pattern => pattern.test(url))) {
+ return type;
+ }
}
-
- return null;
}
+
+ // Check for exact match in native apps
+ if (BUNDLE_TO_TYPE[bundleId]) {
+ return BUNDLE_TO_TYPE[bundleId];
+ }
+
+ // Check for partial matches
+ for (const [key, type] of Object.entries(BUNDLE_TO_TYPE)) {
+ if (bundleId.includes(key) || key.includes(bundleId)) {
+ return type;
+ }
+ }
+
+ // Default to general
+ return "general";
}
diff --git a/apps/desktop/src/pipeline/providers/formatting/openrouter-formatter.ts b/apps/desktop/src/pipeline/providers/formatting/openrouter-formatter.ts
index e5da141..010f030 100644
--- a/apps/desktop/src/pipeline/providers/formatting/openrouter-formatter.ts
+++ b/apps/desktop/src/pipeline/providers/formatting/openrouter-formatter.ts
@@ -31,7 +31,7 @@ export class OpenRouterProvider implements FormattingProvider {
// Build user prompt with context
const userPrompt = text;
- const { text: formattedText } = await generateText({
+ const { text: aiResponse } = await generateText({
model: this.provider(this.model),
messages: [
{
@@ -47,9 +47,14 @@ export class OpenRouterProvider implements FormattingProvider {
maxTokens: 2000,
});
+ // Extract formatted text from XML tags
+ const match = aiResponse.match(/([\s\S]*?)<\/formatted_text>/);
+ const formattedText = match ? match[1].trim() : aiResponse.trim();
+
logger.pipeline.debug("Formatting completed", {
original: text,
formatted: formattedText,
+ hadXmlTags: !!match,
});
return formattedText;
diff --git a/apps/desktop/src/services/model-manager.ts b/apps/desktop/src/services/model-manager.ts
index f531962..1e6091e 100644
--- a/apps/desktop/src/services/model-manager.ts
+++ b/apps/desktop/src/services/model-manager.ts
@@ -423,6 +423,7 @@ class ModelManagerService extends EventEmitter {
// Otherwise, find the best available model (prioritize by quality)
const preferredOrder = [
+ "whisper-large-v3-turbo",
"whisper-large-v1",
"whisper-medium",
"whisper-small",
diff --git a/apps/desktop/src/services/transcription-service.ts b/apps/desktop/src/services/transcription-service.ts
index b1d8284..11f059a 100644
--- a/apps/desktop/src/services/transcription-service.ts
+++ b/apps/desktop/src/services/transcription-service.ts
@@ -261,7 +261,7 @@ export class TranscriptionService {
chunkCount: session.transcriptionResults.length,
});
- if (this.formatterEnabled && this.openRouterProvider) {
+ if (this.formatterEnabled && this.openRouterProvider && completeTranscription.trim().length) {
try {
const style =
session.context.sharedData.userPreferences?.formattingStyle;
@@ -284,6 +284,8 @@ export class TranscriptionService {
logger.transcription.info("Text formatted successfully", {
sessionId,
+ originalTranscription: completeTranscription,
+ formattedTranscription: formattedText,
originalLength: completeTranscription.length,
formattedLength: formattedText.length,
});
diff --git a/packages/native-helpers/swift-helper/Sources/SwiftHelper/AccessibilityContextService.swift b/packages/native-helpers/swift-helper/Sources/SwiftHelper/AccessibilityContextService.swift
index 7faddc6..62e5711 100644
--- a/packages/native-helpers/swift-helper/Sources/SwiftHelper/AccessibilityContextService.swift
+++ b/packages/native-helpers/swift-helper/Sources/SwiftHelper/AccessibilityContextService.swift
@@ -84,7 +84,7 @@ class AccessibilityContextService {
// Enable manual accessibility for specific apps
if let bundleId: String = getBundleIdentifier(pid: pid),
appsManuallyEnableAx.contains(bundleId) {
- FileHandle.standardError.write("🔧 Enabling manual accessibility for \(bundleId)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔧 Enabling manual accessibility for \(bundleId)\n".data(using: .utf8)!)
AXUIElementSetAttributeValue(application, "AXManualAccessibility" as CFString, kCFBooleanTrue)
AXUIElementSetAttributeValue(application, "AXEnhancedUserInterface" as CFString, kCFBooleanTrue)
}
@@ -94,12 +94,12 @@ class AccessibilityContextService {
// Fallback to focused window if focused element fails
if error != .success {
- FileHandle.standardError.write("⚠️ Failed to get focused element, trying focused window...\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("⚠️ Failed to get focused element, trying focused window...\n".data(using: .utf8)!)
error = AXUIElementCopyAttributeValue(application, kAXFocusedWindowAttribute as CFString, &focusedElement)
}
guard error == .success, let element = focusedElement else {
- FileHandle.standardError.write("❌ Failed to get focused element or window. Error: \(error.rawValue)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("❌ Failed to get focused element or window. Error: \(error.rawValue)\n".data(using: .utf8)!)
return nil
}
@@ -238,13 +238,13 @@ class AccessibilityContextService {
var urlSource = "none"
// Debug: Print all window attributes
- FileHandle.standardError.write("🔍 Window attributes:\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Window attributes:\n".data(using: .utf8)!)
let attributes = getAttributeNames(element: windowElement)
for attribute in attributes {
if let value = getAttributeValue(element: windowElement, attribute: attribute) {
- FileHandle.standardError.write(" \(attribute): \(value)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write(" \(attribute): \(value)\n".data(using: .utf8)!)
} else {
- FileHandle.standardError.write(" \(attribute): \n".data(using: .utf8)!)
+ // FileHandle.standardError.write(" \(attribute): \n".data(using: .utf8)!)
}
}
@@ -258,15 +258,15 @@ class AccessibilityContextService {
let isFirefox = bundleId == "org.mozilla.firefox"
- FileHandle.standardError.write("🔍 Browser type - Chromium: \(isChromiumBrowser), Firefox: \(isFirefox), Bundle: \(bundleId ?? "unknown")\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Browser type - Chromium: \(isChromiumBrowser), Firefox: \(isFirefox), Bundle: \(bundleId ?? "unknown")\n".data(using: .utf8)!)
// For Chromium browsers and Firefox: Prioritize AXWebArea (live URL)
if isChromiumBrowser || isFirefox {
- FileHandle.standardError.write("🔍 Using AXWebArea priority for Chromium/Firefox browser\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Using AXWebArea priority for Chromium/Firefox browser\n".data(using: .utf8)!)
foundURL = findURLInChildren(element: windowElement, depth: 0, maxDepth: 30)
if foundURL != nil {
urlSource = "tree_walking_priority"
- FileHandle.standardError.write("🔍 Found URL from AXWebArea (priority): \(foundURL!)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Found URL from AXWebArea (priority): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
}
@@ -279,7 +279,7 @@ class AccessibilityContextService {
if docErr == .success, let urlString = urlRef as? String, !urlString.isEmpty {
foundURL = urlString
urlSource = "window_document"
- FileHandle.standardError.write("🔍 Found URL from window document: \(urlString)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Found URL from window document: \(urlString)\n".data(using: .utf8)!)
// For Safari and other WebKit browsers, this is reliable, return immediately
if !isChromiumBrowser && !isFirefox {
@@ -295,7 +295,7 @@ class AccessibilityContextService {
if foundURL == nil {
foundURL = urlString
urlSource = "window_url"
- FileHandle.standardError.write("🔍 Found URL from window URL attribute: \(urlString)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Found URL from window URL attribute: \(urlString)\n".data(using: .utf8)!)
// For Safari and other WebKit browsers, this is reliable, return immediately
if !isChromiumBrowser && !isFirefox {
@@ -309,17 +309,17 @@ class AccessibilityContextService {
foundURL = findURLInChildren(element: windowElement, depth: 0, maxDepth: 3)
if foundURL != nil {
urlSource = "tree_walking_fallback"
- FileHandle.standardError.write("🔍 Found URL from tree walking (fallback): \(foundURL!)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Found URL from tree walking (fallback): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
}
if foundURL != nil {
- FileHandle.standardError.write("🔍 Returning URL (\(urlSource)): \(foundURL!)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Returning URL (\(urlSource)): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
- FileHandle.standardError.write("🔍 No URL found from any method\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 No URL found from any method\n".data(using: .utf8)!)
return nil
}
@@ -355,11 +355,11 @@ class AccessibilityContextService {
}
// log role
- FileHandle.standardError.write("🔍 Found element with role: \(role) at depth \(currentDepth + 1)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Found element with role: \(role) at depth \(currentDepth + 1)\n".data(using: .utf8)!)
// log all attribute names
- FileHandle.standardError.write("🔍 Element attributes: \(getAttributeNames(element: child))\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Element attributes: \(getAttributeNames(element: child))\n".data(using: .utf8)!)
// log kAXURLAttribute
- FileHandle.standardError.write("🔍 kAXURLAttribute: \(getAttributeValue(element: child, attribute: kAXURLAttribute) ?? "none")\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 kAXURLAttribute: \(getAttributeValue(element: child, attribute: kAXURLAttribute) ?? "none")\n".data(using: .utf8)!)
// Priority 1: Address/search fields (most current)
if role == "AXTextField" || role == "AXComboBox" || role == "AXSafariAddressAndSearchField" {
@@ -370,7 +370,7 @@ class AccessibilityContextService {
let value = valueRef as? String,
!value.isEmpty,
(value.hasPrefix("http://") || value.hasPrefix("https://") || value.contains(".")) {
- FileHandle.standardError.write("🔍 Found URL in address field (\(role)): \(value)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Found URL in address field (\(role)): \(value)\n".data(using: .utf8)!)
return value
}
}
@@ -389,7 +389,7 @@ class AccessibilityContextService {
kAXURLAttribute as CFString,
&urlRef) == .success,
let urlString = urlRef as? String, !urlString.isEmpty {
- FileHandle.standardError.write("🔍 Found URL in web area: \(urlString)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Found URL in web area: \(urlString)\n".data(using: .utf8)!)
return urlString
}
@@ -397,7 +397,7 @@ class AccessibilityContextService {
kAXDocumentAttribute as CFString,
&urlRef) == .success,
let urlString = urlRef as? String, !urlString.isEmpty {
- FileHandle.standardError.write("🔍 Found URL in web area document: \(urlString)\n".data(using: .utf8)!)
+ // FileHandle.standardError.write("🔍 Found URL in web area document: \(urlString)\n".data(using: .utf8)!)
return urlString
}
}