chore: formatter prompt updates

This commit is contained in:
haritabh-z01 2025-07-13 14:16:52 +05:30
parent 95ce6bea36
commit feebe5cae4
7 changed files with 220 additions and 106 deletions

View file

@ -91,4 +91,17 @@ export const AVAILABLE_MODELS: Model[] = [
filename: "ggml-large-v3.bin",
checksum: "ad82bf6a9043ceed055076d0fd39f5f186ff8062",
},
{
id: "whisper-large-v3-turbo",
name: "Whisper Large v3 Turbo",
type: "whisper",
size: 1.5 * 1024 * 1024 * 1024, // ~1.5 GB
sizeFormatted: "~1.5 GB",
description:
"Optimized Large v3 variant with only 4 decoder layers, offering significantly faster transcription with accuracy comparable to Large v2/v3.",
downloadUrl:
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin",
filename: "ggml-large-v3-turbo.bin",
checksum: "4af2b29d7ec73d781377bfd1758ca957a807e941",
},
];

View file

@ -8,6 +8,7 @@ import type { ShortcutManager } from "../services/shortcut-manager";
import { StreamingWavWriter } from "../../utils/streaming-wav-writer";
import * as fs from "node:fs";
import * as path from "node:path";
import { appContextStore } from "@/stores/app-context";
export type RecordingMode = "idle" | "ptt" | "hands-free";
@ -175,6 +176,9 @@ export class RecordingManager extends EventEmitter {
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
this.currentSessionId = `session-${timestamp}`;
// Get accessibility context from global store
appContextStore.refreshAccessibilityData();
// Create audio file and WAV writer
const audioFilePath = await this.createAudioFile(this.currentSessionId);
this.currentAudioRecording = {

View file

@ -1,98 +1,187 @@
import { FormatParams } from "../../core/pipeline-types";
import { GetAccessibilityContextResult, ApplicationInfo } from "@amical/types";
import { GetAccessibilityContextResult } from "@amical/types";
// Base system prompt
const SYSTEM_PROMPT = `You are a professional text formatter. Your task is to format transcribed text to be clear, readable, and properly structured.`;
// Base instructions that apply to all formatting
const BASE_INSTRUCTIONS = [
"Fix any transcription errors based on context and custom vocabulary",
"Add proper punctuation and capitalization",
"Format paragraphs appropriately with sufficient line breaks",
"Maintain the original meaning and tone",
"Use the custom vocabulary to correct domain-specific terms",
"Remove unnecessary filler words (um, uh, etc.) but keep natural speech patterns",
"If the text is empty, return <formatted_text></formatted_text>",
"Return ONLY the formatted text enclosed in <formatted_text></formatted_text> tags",
"Do not include any commentary, explanations, or text outside the XML tags",
];
// Application type specific rules
const APPLICATION_TYPE_RULES: Record<string, string[]> = {
email: [
"Format with proper email structure (greeting, body paragraphs, closing)",
"Preserve email metadata if present (From, To, Subject, Date)",
"Ensure proper paragraph breaks between different topics",
"Maintain professional tone and formatting",
"Format any quoted or forwarded content clearly",
"Preserve email signatures and contact information",
],
chat: [
"Preserve conversational tone and informal language",
"Keep messages concise and separate",
"Maintain emoji and emoticons if present",
"Format timestamps and usernames clearly if included",
"Preserve thread context and replies",
],
notes: [
"Organize content with clear headings and sections",
"Use bullet points or numbered lists where appropriate",
"Maintain hierarchical structure of ideas",
"Format action items and tasks clearly",
"Preserve any existing formatting hints",
],
general: [
"Apply standard formatting for general text",
"Create logical paragraph breaks based on content flow",
"Maintain consistent formatting throughout",
"Preserve the original tone and style",
],
};
// Map bundle identifiers to application types
const BUNDLE_TO_TYPE: Record<string, string> = {
"com.apple.mail": "email",
"com.microsoft.Outlook": "email",
"com.readdle.smartemail": "email",
"com.google.Gmail": "email",
"com.tinyspeck.slackmacgap": "chat",
"com.microsoft.teams": "chat",
"com.facebook.archon": "chat", // Messenger
"com.discord.Discord": "chat",
"com.telegram.desktop": "chat",
"com.apple.Notes": "notes",
"com.microsoft.onenote.mac": "notes",
"com.evernote.Evernote": "notes",
"notion.id": "notes",
"com.agiletortoise.Drafts-OSX": "notes",
};
// Browser bundle identifiers
const BROWSER_BUNDLE_IDS = [
"com.apple.Safari",
"com.google.Chrome",
"com.google.Chrome.canary",
"com.microsoft.edgemac",
"org.mozilla.firefox",
"com.brave.Browser",
"com.operasoftware.Opera",
"com.vivaldi.Vivaldi",
];
// URL patterns for web applications
const URL_PATTERNS: Record<string, RegExp[]> = {
email: [
/mail\.google\.com/,
/outlook\.live\.com/,
/outlook\.office\.com/,
/mail\.yahoo\.com/,
/mail\.proton\.me/,
/webmail\./,
/roundcube/,
/fastmail\.com/,
],
chat: [
/web\.whatsapp\.com/,
/discord\.com\/channels/,
/teams\.microsoft\.com/,
/slack\.com/,
/web\.telegram\.org/,
/messenger\.com/,
/chat\.openai\.com/,
/claude\.ai/,
],
notes: [
/notion\.so/,
/docs\.google\.com/,
/onenote\.com/,
/evernote\.com/,
/roamresearch\.com/,
/obsidian\.md/,
/workflowy\.com/,
/coda\.io/,
],
};
export function constructFormatterPrompt(context: FormatParams["context"]): {
systemPrompt: string;
} {
const { accessibilityContext } = context;
// Build enhanced system prompt with context information
let systemPrompt = `You are a professional text formatter. Your task is to clean up and improve the formatting of transcribed text while preserving the original meaning and content.
Please:
1. Fix obvious transcription errors and typos
2. Add proper punctuation where missing
3. Organize the text into proper paragraphs, with sufficient line breaks, etc.
4. Capitalize proper nouns and sentence beginnings
5. Remove unnecessary filler words (um, uh, etc.) but keep natural speech patterns
6. Maintain the speaker's original tone and style
7. If the text is empty, return an empty string
8. For formatting of emails make sure to use the correct email format`;
// Build context information
const contextXml = buildContextXml(accessibilityContext);
if (contextXml) {
systemPrompt += `\n\n${contextXml}`;
systemPrompt += `\n\nUse this context to better understand the environment where the text will be used and adjust formatting accordingly.`;
const { accessibilityContext, vocabulary } = context;
// Detect application type
const applicationType = detectApplicationType(accessibilityContext);
// Build instructions array
const instructions = [
...BASE_INSTRUCTIONS,
...(APPLICATION_TYPE_RULES[applicationType] || [])
];
// Build prompt parts
const parts = [SYSTEM_PROMPT];
// Add vocabulary context if available
if (vocabulary && vocabulary.size > 0) {
const vocabTerms = Array.from(vocabulary.keys()).join(", ");
parts.push(`\nCustom vocabulary to use for corrections: ${vocabTerms}`);
}
systemPrompt += `\n\nReturn only the formatted text without any explanations or additional commentary.`;
return { systemPrompt };
// Add numbered instructions
parts.push("\nInstructions:");
instructions.forEach((instruction, index) => {
parts.push(`${index + 1}. ${instruction}`);
});
return { systemPrompt: parts.join("\n") };
}
function buildContextXml(
function detectApplicationType(
accessibilityContext: GetAccessibilityContextResult | null | undefined,
): string | null {
if (!accessibilityContext?.context) return null;
): string {
if (!accessibilityContext?.context?.application?.bundleIdentifier) {
return "general";
}
const contextParts: string[] = ["<context>"];
// Add application info
const appXml = buildApplicationXml(accessibilityContext.context.application);
if (appXml) contextParts.push(appXml);
// Add URL info
const urlXml = buildUrlXml(
accessibilityContext.context.windowInfo?.url || undefined,
const bundleId = accessibilityContext.context.application.bundleIdentifier;
// Check if it's a browser
const isBrowser = BROWSER_BUNDLE_IDS.some(browserId =>
bundleId.includes(browserId) || browserId.includes(bundleId)
);
if (urlXml) contextParts.push(urlXml);
contextParts.push("</context>");
// Only return context if we have actual content
return contextParts.length > 2 ? contextParts.join("\n") : null;
}
function buildApplicationXml(application: ApplicationInfo): string | null {
if (!application?.name) return null;
const appParts = [" <application>", ` <name>${application.name}</name>`];
if (application.bundleIdentifier) {
appParts.push(` <bundle>${application.bundleIdentifier}</bundle>`);
}
appParts.push(" </application>");
return appParts.join("\n");
}
function buildUrlXml(url: string | undefined): string | null {
if (!url) return null;
const domain = extractDomain(url);
if (!domain) return null;
return [" <url>", ` <domain>${domain}</domain>`, " </url>"].join("\n");
}
function extractDomain(url: string): string | null {
try {
// Try standard URL parsing first
const parsedUrl = new URL(url);
return parsedUrl.hostname;
} catch {
// Handle URLs without protocol or malformed URLs
// Remove any leading slashes
const cleanUrl = url.replace(/^\/+/, "");
// Extract domain from patterns like "domain.com/path" or just "domain.com"
const match = cleanUrl.match(/^([^\/\s?#]+)/);
if (match && match[1].includes(".")) {
return match[1];
if (isBrowser && accessibilityContext.context?.windowInfo?.url) {
// Try to detect type from URL
const url = accessibilityContext.context.windowInfo.url.toLowerCase();
for (const [type, patterns] of Object.entries(URL_PATTERNS)) {
if (patterns.some(pattern => pattern.test(url))) {
return type;
}
}
return null;
}
// Check for exact match in native apps
if (BUNDLE_TO_TYPE[bundleId]) {
return BUNDLE_TO_TYPE[bundleId];
}
// Check for partial matches
for (const [key, type] of Object.entries(BUNDLE_TO_TYPE)) {
if (bundleId.includes(key) || key.includes(bundleId)) {
return type;
}
}
// Default to general
return "general";
}

View file

@ -31,7 +31,7 @@ export class OpenRouterProvider implements FormattingProvider {
// Build user prompt with context
const userPrompt = text;
const { text: formattedText } = await generateText({
const { text: aiResponse } = await generateText({
model: this.provider(this.model),
messages: [
{
@ -47,9 +47,14 @@ export class OpenRouterProvider implements FormattingProvider {
maxTokens: 2000,
});
// Extract formatted text from XML tags
const match = aiResponse.match(/<formatted_text>([\s\S]*?)<\/formatted_text>/);
const formattedText = match ? match[1].trim() : aiResponse.trim();
logger.pipeline.debug("Formatting completed", {
original: text,
formatted: formattedText,
hadXmlTags: !!match,
});
return formattedText;

View file

@ -423,6 +423,7 @@ class ModelManagerService extends EventEmitter {
// Otherwise, find the best available model (prioritize by quality)
const preferredOrder = [
"whisper-large-v3-turbo",
"whisper-large-v1",
"whisper-medium",
"whisper-small",

View file

@ -261,7 +261,7 @@ export class TranscriptionService {
chunkCount: session.transcriptionResults.length,
});
if (this.formatterEnabled && this.openRouterProvider) {
if (this.formatterEnabled && this.openRouterProvider && completeTranscription.trim().length) {
try {
const style =
session.context.sharedData.userPreferences?.formattingStyle;
@ -284,6 +284,8 @@ export class TranscriptionService {
logger.transcription.info("Text formatted successfully", {
sessionId,
originalTranscription: completeTranscription,
formattedTranscription: formattedText,
originalLength: completeTranscription.length,
formattedLength: formattedText.length,
});

View file

@ -84,7 +84,7 @@ class AccessibilityContextService {
// Enable manual accessibility for specific apps
if let bundleId: String = getBundleIdentifier(pid: pid),
appsManuallyEnableAx.contains(bundleId) {
FileHandle.standardError.write("🔧 Enabling manual accessibility for \(bundleId)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔧 Enabling manual accessibility for \(bundleId)\n".data(using: .utf8)!)
AXUIElementSetAttributeValue(application, "AXManualAccessibility" as CFString, kCFBooleanTrue)
AXUIElementSetAttributeValue(application, "AXEnhancedUserInterface" as CFString, kCFBooleanTrue)
}
@ -94,12 +94,12 @@ class AccessibilityContextService {
// Fallback to focused window if focused element fails
if error != .success {
FileHandle.standardError.write("⚠️ Failed to get focused element, trying focused window...\n".data(using: .utf8)!)
// FileHandle.standardError.write(" Failed to get focused element, trying focused window...\n".data(using: .utf8)!)
error = AXUIElementCopyAttributeValue(application, kAXFocusedWindowAttribute as CFString, &focusedElement)
}
guard error == .success, let element = focusedElement else {
FileHandle.standardError.write("❌ Failed to get focused element or window. Error: \(error.rawValue)\n".data(using: .utf8)!)
// FileHandle.standardError.write(" Failed to get focused element or window. Error: \(error.rawValue)\n".data(using: .utf8)!)
return nil
}
@ -238,13 +238,13 @@ class AccessibilityContextService {
var urlSource = "none"
// Debug: Print all window attributes
FileHandle.standardError.write("🔍 Window attributes:\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Window attributes:\n".data(using: .utf8)!)
let attributes = getAttributeNames(element: windowElement)
for attribute in attributes {
if let value = getAttributeValue(element: windowElement, attribute: attribute) {
FileHandle.standardError.write(" \(attribute): \(value)\n".data(using: .utf8)!)
// FileHandle.standardError.write(" \(attribute): \(value)\n".data(using: .utf8)!)
} else {
FileHandle.standardError.write(" \(attribute): <no value>\n".data(using: .utf8)!)
// FileHandle.standardError.write(" \(attribute): <no value>\n".data(using: .utf8)!)
}
}
@ -258,15 +258,15 @@ class AccessibilityContextService {
let isFirefox = bundleId == "org.mozilla.firefox"
FileHandle.standardError.write("🔍 Browser type - Chromium: \(isChromiumBrowser), Firefox: \(isFirefox), Bundle: \(bundleId ?? "unknown")\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Browser type - Chromium: \(isChromiumBrowser), Firefox: \(isFirefox), Bundle: \(bundleId ?? "unknown")\n".data(using: .utf8)!)
// For Chromium browsers and Firefox: Prioritize AXWebArea (live URL)
if isChromiumBrowser || isFirefox {
FileHandle.standardError.write("🔍 Using AXWebArea priority for Chromium/Firefox browser\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Using AXWebArea priority for Chromium/Firefox browser\n".data(using: .utf8)!)
foundURL = findURLInChildren(element: windowElement, depth: 0, maxDepth: 30)
if foundURL != nil {
urlSource = "tree_walking_priority"
FileHandle.standardError.write("🔍 Found URL from AXWebArea (priority): \(foundURL!)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Found URL from AXWebArea (priority): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
}
@ -279,7 +279,7 @@ class AccessibilityContextService {
if docErr == .success, let urlString = urlRef as? String, !urlString.isEmpty {
foundURL = urlString
urlSource = "window_document"
FileHandle.standardError.write("🔍 Found URL from window document: \(urlString)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Found URL from window document: \(urlString)\n".data(using: .utf8)!)
// For Safari and other WebKit browsers, this is reliable, return immediately
if !isChromiumBrowser && !isFirefox {
@ -295,7 +295,7 @@ class AccessibilityContextService {
if foundURL == nil {
foundURL = urlString
urlSource = "window_url"
FileHandle.standardError.write("🔍 Found URL from window URL attribute: \(urlString)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Found URL from window URL attribute: \(urlString)\n".data(using: .utf8)!)
// For Safari and other WebKit browsers, this is reliable, return immediately
if !isChromiumBrowser && !isFirefox {
@ -309,17 +309,17 @@ class AccessibilityContextService {
foundURL = findURLInChildren(element: windowElement, depth: 0, maxDepth: 3)
if foundURL != nil {
urlSource = "tree_walking_fallback"
FileHandle.standardError.write("🔍 Found URL from tree walking (fallback): \(foundURL!)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Found URL from tree walking (fallback): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
}
if foundURL != nil {
FileHandle.standardError.write("🔍 Returning URL (\(urlSource)): \(foundURL!)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Returning URL (\(urlSource)): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
FileHandle.standardError.write("🔍 No URL found from any method\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 No URL found from any method\n".data(using: .utf8)!)
return nil
}
@ -355,11 +355,11 @@ class AccessibilityContextService {
}
// log role
FileHandle.standardError.write("🔍 Found element with role: \(role) at depth \(currentDepth + 1)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Found element with role: \(role) at depth \(currentDepth + 1)\n".data(using: .utf8)!)
// log all attribute names
FileHandle.standardError.write("🔍 Element attributes: \(getAttributeNames(element: child))\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Element attributes: \(getAttributeNames(element: child))\n".data(using: .utf8)!)
// log kAXURLAttribute
FileHandle.standardError.write("🔍 kAXURLAttribute: \(getAttributeValue(element: child, attribute: kAXURLAttribute) ?? "none")\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 kAXURLAttribute: \(getAttributeValue(element: child, attribute: kAXURLAttribute) ?? "none")\n".data(using: .utf8)!)
// Priority 1: Address/search fields (most current)
if role == "AXTextField" || role == "AXComboBox" || role == "AXSafariAddressAndSearchField" {
@ -370,7 +370,7 @@ class AccessibilityContextService {
let value = valueRef as? String,
!value.isEmpty,
(value.hasPrefix("http://") || value.hasPrefix("https://") || value.contains(".")) {
FileHandle.standardError.write("🔍 Found URL in address field (\(role)): \(value)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Found URL in address field (\(role)): \(value)\n".data(using: .utf8)!)
return value
}
}
@ -389,7 +389,7 @@ class AccessibilityContextService {
kAXURLAttribute as CFString,
&urlRef) == .success,
let urlString = urlRef as? String, !urlString.isEmpty {
FileHandle.standardError.write("🔍 Found URL in web area: \(urlString)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Found URL in web area: \(urlString)\n".data(using: .utf8)!)
return urlString
}
@ -397,7 +397,7 @@ class AccessibilityContextService {
kAXDocumentAttribute as CFString,
&urlRef) == .success,
let urlString = urlRef as? String, !urlString.isEmpty {
FileHandle.standardError.write("🔍 Found URL in web area document: \(urlString)\n".data(using: .utf8)!)
// FileHandle.standardError.write("🔍 Found URL in web area document: \(urlString)\n".data(using: .utf8)!)
return urlString
}
}