chore: formatter prompt updates

This commit is contained in:
haritabh-z01 2025-07-13 14:16:52 +05:30
parent 95ce6bea36
commit feebe5cae4
7 changed files with 220 additions and 106 deletions

View file

@ -91,4 +91,17 @@ export const AVAILABLE_MODELS: Model[] = [
filename: "ggml-large-v3.bin",
checksum: "ad82bf6a9043ceed055076d0fd39f5f186ff8062",
},
{
id: "whisper-large-v3-turbo",
name: "Whisper Large v3 Turbo",
type: "whisper",
size: 1.5 * 1024 * 1024 * 1024, // ~1.5 GB
sizeFormatted: "~1.5 GB",
description:
"Optimized Large v3 variant with only 4 decoder layers, offering significantly faster transcription with accuracy comparable to Large v2/v3.",
downloadUrl:
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin",
filename: "ggml-large-v3-turbo.bin",
checksum: "4af2b29d7ec73d781377bfd1758ca957a807e941",
},
];

View file

@ -8,6 +8,7 @@ import type { ShortcutManager } from "../services/shortcut-manager";
import { StreamingWavWriter } from "../../utils/streaming-wav-writer";
import * as fs from "node:fs";
import * as path from "node:path";
import { appContextStore } from "@/stores/app-context";
export type RecordingMode = "idle" | "ptt" | "hands-free";
@ -175,6 +176,9 @@ export class RecordingManager extends EventEmitter {
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
this.currentSessionId = `session-${timestamp}`;
// Get accessibility context from global store
appContextStore.refreshAccessibilityData();
// Create audio file and WAV writer
const audioFilePath = await this.createAudioFile(this.currentSessionId);
this.currentAudioRecording = {

View file

@ -1,98 +1,187 @@
import { FormatParams } from "../../core/pipeline-types";
import { GetAccessibilityContextResult, ApplicationInfo } from "@amical/types";
import { GetAccessibilityContextResult } from "@amical/types";
// Base system prompt
const SYSTEM_PROMPT = `You are a professional text formatter. Your task is to format transcribed text to be clear, readable, and properly structured.`;
// Base instructions that apply to all formatting
const BASE_INSTRUCTIONS = [
"Fix any transcription errors based on context and custom vocabulary",
"Add proper punctuation and capitalization",
"Format paragraphs appropriately with sufficient line breaks",
"Maintain the original meaning and tone",
"Use the custom vocabulary to correct domain-specific terms",
"Remove unnecessary filler words (um, uh, etc.) but keep natural speech patterns",
"If the text is empty, return <formatted_text></formatted_text>",
"Return ONLY the formatted text enclosed in <formatted_text></formatted_text> tags",
"Do not include any commentary, explanations, or text outside the XML tags",
];
// Application type specific rules
const APPLICATION_TYPE_RULES: Record<string, string[]> = {
email: [
"Format with proper email structure (greeting, body paragraphs, closing)",
"Preserve email metadata if present (From, To, Subject, Date)",
"Ensure proper paragraph breaks between different topics",
"Maintain professional tone and formatting",
"Format any quoted or forwarded content clearly",
"Preserve email signatures and contact information",
],
chat: [
"Preserve conversational tone and informal language",
"Keep messages concise and separate",
"Maintain emoji and emoticons if present",
"Format timestamps and usernames clearly if included",
"Preserve thread context and replies",
],
notes: [
"Organize content with clear headings and sections",
"Use bullet points or numbered lists where appropriate",
"Maintain hierarchical structure of ideas",
"Format action items and tasks clearly",
"Preserve any existing formatting hints",
],
general: [
"Apply standard formatting for general text",
"Create logical paragraph breaks based on content flow",
"Maintain consistent formatting throughout",
"Preserve the original tone and style",
],
};
// Map bundle identifiers to application types
const BUNDLE_TO_TYPE: Record<string, string> = {
"com.apple.mail": "email",
"com.microsoft.Outlook": "email",
"com.readdle.smartemail": "email",
"com.google.Gmail": "email",
"com.tinyspeck.slackmacgap": "chat",
"com.microsoft.teams": "chat",
"com.facebook.archon": "chat", // Messenger
"com.discord.Discord": "chat",
"com.telegram.desktop": "chat",
"com.apple.Notes": "notes",
"com.microsoft.onenote.mac": "notes",
"com.evernote.Evernote": "notes",
"notion.id": "notes",
"com.agiletortoise.Drafts-OSX": "notes",
};
// Browser bundle identifiers
const BROWSER_BUNDLE_IDS = [
"com.apple.Safari",
"com.google.Chrome",
"com.google.Chrome.canary",
"com.microsoft.edgemac",
"org.mozilla.firefox",
"com.brave.Browser",
"com.operasoftware.Opera",
"com.vivaldi.Vivaldi",
];
// URL patterns for web applications
const URL_PATTERNS: Record<string, RegExp[]> = {
email: [
/mail\.google\.com/,
/outlook\.live\.com/,
/outlook\.office\.com/,
/mail\.yahoo\.com/,
/mail\.proton\.me/,
/webmail\./,
/roundcube/,
/fastmail\.com/,
],
chat: [
/web\.whatsapp\.com/,
/discord\.com\/channels/,
/teams\.microsoft\.com/,
/slack\.com/,
/web\.telegram\.org/,
/messenger\.com/,
/chat\.openai\.com/,
/claude\.ai/,
],
notes: [
/notion\.so/,
/docs\.google\.com/,
/onenote\.com/,
/evernote\.com/,
/roamresearch\.com/,
/obsidian\.md/,
/workflowy\.com/,
/coda\.io/,
],
};
export function constructFormatterPrompt(context: FormatParams["context"]): {
systemPrompt: string;
} {
const { accessibilityContext } = context;
// Build enhanced system prompt with context information
let systemPrompt = `You are a professional text formatter. Your task is to clean up and improve the formatting of transcribed text while preserving the original meaning and content.
Please:
1. Fix obvious transcription errors and typos
2. Add proper punctuation where missing
3. Organize the text into proper paragraphs, with sufficient line breaks, etc.
4. Capitalize proper nouns and sentence beginnings
5. Remove unnecessary filler words (um, uh, etc.) but keep natural speech patterns
6. Maintain the speaker's original tone and style
7. If the text is empty, return an empty string
8. For formatting of emails make sure to use the correct email format`;
// Build context information
const contextXml = buildContextXml(accessibilityContext);
if (contextXml) {
systemPrompt += `\n\n${contextXml}`;
systemPrompt += `\n\nUse this context to better understand the environment where the text will be used and adjust formatting accordingly.`;
const { accessibilityContext, vocabulary } = context;
// Detect application type
const applicationType = detectApplicationType(accessibilityContext);
// Build instructions array
const instructions = [
...BASE_INSTRUCTIONS,
...(APPLICATION_TYPE_RULES[applicationType] || [])
];
// Build prompt parts
const parts = [SYSTEM_PROMPT];
// Add vocabulary context if available
if (vocabulary && vocabulary.size > 0) {
const vocabTerms = Array.from(vocabulary.keys()).join(", ");
parts.push(`\nCustom vocabulary to use for corrections: ${vocabTerms}`);
}
systemPrompt += `\n\nReturn only the formatted text without any explanations or additional commentary.`;
return { systemPrompt };
// Add numbered instructions
parts.push("\nInstructions:");
instructions.forEach((instruction, index) => {
parts.push(`${index + 1}. ${instruction}`);
});
return { systemPrompt: parts.join("\n") };
}
function buildContextXml(
function detectApplicationType(
accessibilityContext: GetAccessibilityContextResult | null | undefined,
): string | null {
if (!accessibilityContext?.context) return null;
): string {
if (!accessibilityContext?.context?.application?.bundleIdentifier) {
return "general";
}
const contextParts: string[] = ["<context>"];
// Add application info
const appXml = buildApplicationXml(accessibilityContext.context.application);
if (appXml) contextParts.push(appXml);
// Add URL info
const urlXml = buildUrlXml(
accessibilityContext.context.windowInfo?.url || undefined,
const bundleId = accessibilityContext.context.application.bundleIdentifier;
// Check if it's a browser
const isBrowser = BROWSER_BUNDLE_IDS.some(browserId =>
bundleId.includes(browserId) || browserId.includes(bundleId)
);
if (urlXml) contextParts.push(urlXml);
contextParts.push("</context>");
// Only return context if we have actual content
return contextParts.length > 2 ? contextParts.join("\n") : null;
}
function buildApplicationXml(application: ApplicationInfo): string | null {
if (!application?.name) return null;
const appParts = [" <application>", ` <name>${application.name}</name>`];
if (application.bundleIdentifier) {
appParts.push(` <bundle>${application.bundleIdentifier}</bundle>`);
}
appParts.push(" </application>");
return appParts.join("\n");
}
function buildUrlXml(url: string | undefined): string | null {
if (!url) return null;
const domain = extractDomain(url);
if (!domain) return null;
return [" <url>", ` <domain>${domain}</domain>`, " </url>"].join("\n");
}
function extractDomain(url: string): string | null {
try {
// Try standard URL parsing first
const parsedUrl = new URL(url);
return parsedUrl.hostname;
} catch {
// Handle URLs without protocol or malformed URLs
// Remove any leading slashes
const cleanUrl = url.replace(/^\/+/, "");
// Extract domain from patterns like "domain.com/path" or just "domain.com"
const match = cleanUrl.match(/^([^\/\s?#]+)/);
if (match && match[1].includes(".")) {
return match[1];
if (isBrowser && accessibilityContext.context?.windowInfo?.url) {
// Try to detect type from URL
const url = accessibilityContext.context.windowInfo.url.toLowerCase();
for (const [type, patterns] of Object.entries(URL_PATTERNS)) {
if (patterns.some(pattern => pattern.test(url))) {
return type;
}
}
return null;
}
// Check for exact match in native apps
if (BUNDLE_TO_TYPE[bundleId]) {
return BUNDLE_TO_TYPE[bundleId];
}
// Check for partial matches
for (const [key, type] of Object.entries(BUNDLE_TO_TYPE)) {
if (bundleId.includes(key) || key.includes(bundleId)) {
return type;
}
}
// Default to general
return "general";
}

View file

@ -31,7 +31,7 @@ export class OpenRouterProvider implements FormattingProvider {
// Build user prompt with context
const userPrompt = text;
const { text: formattedText } = await generateText({
const { text: aiResponse } = await generateText({
model: this.provider(this.model),
messages: [
{
@ -47,9 +47,14 @@ export class OpenRouterProvider implements FormattingProvider {
maxTokens: 2000,
});
// Extract formatted text from XML tags
const match = aiResponse.match(/<formatted_text>([\s\S]*?)<\/formatted_text>/);
const formattedText = match ? match[1].trim() : aiResponse.trim();
logger.pipeline.debug("Formatting completed", {
original: text,
formatted: formattedText,
hadXmlTags: !!match,
});
return formattedText;

View file

@ -423,6 +423,7 @@ class ModelManagerService extends EventEmitter {
// Otherwise, find the best available model (prioritize by quality)
const preferredOrder = [
"whisper-large-v3-turbo",
"whisper-large-v1",
"whisper-medium",
"whisper-small",

View file

@ -261,7 +261,7 @@ export class TranscriptionService {
chunkCount: session.transcriptionResults.length,
});
if (this.formatterEnabled && this.openRouterProvider) {
if (this.formatterEnabled && this.openRouterProvider && completeTranscription.trim().length) {
try {
const style =
session.context.sharedData.userPreferences?.formattingStyle;
@ -284,6 +284,8 @@ export class TranscriptionService {
logger.transcription.info("Text formatted successfully", {
sessionId,
originalTranscription: completeTranscription,
formattedTranscription: formattedText,
originalLength: completeTranscription.length,
formattedLength: formattedText.length,
});