fix: strip leading space after trailing ascii whitespace

This commit is contained in:
haritabh-z01 2026-01-16 21:56:02 +05:30
parent cc03a88fc4
commit cc7712b0a7
5 changed files with 48 additions and 11 deletions

View file

@ -9,6 +9,7 @@ export { PipelineContext, SharedPipelineData } from "./context";
// Context for transcription operations (shared between transcribe and flush)
export interface TranscribeContext {
sessionId?: string;
vocabulary?: string[];
accessibilityContext?: GetAccessibilityContextResult | null;
previousChunk?: string;

View file

@ -34,6 +34,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
null;
private currentAggregatedTranscription: string | undefined;
private currentVocabulary: string[] = [];
private currentSessionId: string | undefined;
// Configuration
private readonly FRAME_SIZE = 512; // 32ms at 16kHz
@ -65,6 +66,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
this.currentAccessibilityContext = context?.accessibilityContext ?? null;
this.currentAggregatedTranscription = context?.aggregatedTranscription;
this.currentVocabulary = context?.vocabulary ?? [];
this.currentSessionId = context?.sessionId;
// Check authentication
if (!(await this.authService.isAuthenticated())) {
@ -110,6 +112,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
this.currentAccessibilityContext = context?.accessibilityContext ?? null;
this.currentAggregatedTranscription = context?.aggregatedTranscription;
this.currentVocabulary = context?.vocabulary ?? [];
this.currentSessionId = context?.sessionId;
// Check authentication
if (!(await this.authService.isAuthenticated())) {
@ -117,7 +120,8 @@ export class AmicalCloudProvider implements TranscriptionProvider {
}
const enableFormatting = context.formattingEnabled ?? false;
return this.doTranscription(enableFormatting);
// flush() is called at session end, so this is the final call
return this.doTranscription(enableFormatting, true);
} catch (error) {
logger.transcription.error("Cloud transcription error:", error);
throw error;
@ -126,8 +130,13 @@ export class AmicalCloudProvider implements TranscriptionProvider {
/**
* Shared transcription logic - aggregates buffer, calls cloud API, clears state
* @param enableFormatting - Whether to enable formatting
* @param isFinal - Whether this is the final call for the session (default: false)
*/
private async doTranscription(enableFormatting: boolean): Promise<string> {
private async doTranscription(
enableFormatting: boolean,
isFinal = false,
): Promise<string> {
// Combine all frames into a single Float32Array
const totalLength = this.frameBuffer.reduce(
(acc, frame) => acc + frame.length,
@ -154,6 +163,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
vadProbs,
false,
enableFormatting,
isFinal,
);
}
@ -168,6 +178,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
this.currentLanguage = undefined;
this.currentAccessibilityContext = null;
this.currentAggregatedTranscription = undefined;
this.currentSessionId = undefined;
}
private shouldTranscribe(): boolean {
@ -188,6 +199,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
vadProbs: number[],
isRetry = false,
enableFormatting = false,
isFinal = false,
): Promise<string> {
// Skip API call if there's nothing to process
if (audioData.length === 0) {
@ -213,6 +225,8 @@ export class AmicalCloudProvider implements TranscriptionProvider {
duration,
isRetry,
formatting: enableFormatting,
sessionId: this.currentSessionId,
isFinal,
});
const response = await fetch(`${this.apiEndpoint}/transcribe`, {
@ -223,6 +237,8 @@ export class AmicalCloudProvider implements TranscriptionProvider {
"User-Agent": getUserAgent(),
},
body: JSON.stringify({
sessionId: this.currentSessionId,
isFinal,
audioData: Array.from(audioData),
vadProbs,
language: this.currentLanguage,
@ -270,12 +286,13 @@ export class AmicalCloudProvider implements TranscriptionProvider {
// Force token refresh
await this.authService.refreshTokenIfNeeded();
// Retry the request once (preserve formatting flag)
// Retry the request once (preserve formatting and isFinal flags)
return await this.makeTranscriptionRequest(
audioData,
vadProbs,
true,
enableFormatting,
isFinal,
);
} catch (refreshError) {
logger.transcription.error("Token refresh failed:", refreshError);

View file

@ -300,6 +300,7 @@ export class TranscriptionService {
audioData: audioChunk,
speechProbability: speechProbability,
context: {
sessionId,
vocabulary: session.context.sharedData.vocabulary,
accessibilityContext: session.context.sharedData.accessibilityContext,
previousChunk,
@ -391,11 +392,13 @@ export class TranscriptionService {
? session.transcriptionResults[
session.transcriptionResults.length - 1
]
: undefined; const aggregatedTranscription = session.transcriptionResults.join("");
: undefined;
const aggregatedTranscription = session.transcriptionResults.join("");
const provider = await this.selectProvider();
usedCloudProvider = provider.name === "amical-cloud";
const finalTranscription = await provider.flush({
sessionId,
vocabulary: session.context.sharedData.vocabulary,
accessibilityContext: session.context.sharedData.accessibilityContext,
previousChunk,
@ -682,15 +685,14 @@ export class TranscriptionService {
return transcription;
}
// Strip leading space if:
// 1. No previous text (start of document/field)
// 2. Previous text ends with whitespace (avoid double space)
// Strip leading space only if previous text exists and ends with ASCII whitespace.
// When there's no previous text (null/undefined/""), keep the leading space.
const shouldStripLeadingSpace =
!preSelectionText ||
preSelectionText.length === 0 ||
/\s$/.test(preSelectionText);
preSelectionText !== undefined &&
preSelectionText !== null &&
(preSelectionText.length === 0 || /[ \t\r\n]$/.test(preSelectionText));
return shouldStripLeadingSpace ? transcription.trimStart() : transcription;
return shouldStripLeadingSpace ? transcription.slice(1) : transcription;
}
/**

View file

@ -207,6 +207,7 @@ class AccessibilityContextService {
let selectedText = getAttributeValue(element: element, attribute: kAXSelectedTextAttribute)
// Calculate pre and post selection/cursor text
// Return "" instead of nil when cursor is at start/end of document
var preSelectionText: String? = nil
var postSelectionText: String? = nil
@ -214,19 +215,25 @@ class AccessibilityContextService {
let nsString = fullContent as NSString
// Pre-selection text: last MAX_CONTEXT_LENGTH chars before cursor/selection
// Returns "" if cursor is at start of document (position 0)
if range.location > 0 {
let preLength = min(range.location, MAX_CONTEXT_LENGTH)
let preStart = range.location - preLength
let preRange = NSRange(location: preStart, length: preLength)
preSelectionText = nsString.substring(with: preRange)
} else {
preSelectionText = ""
}
// Post-selection text: first MAX_CONTEXT_LENGTH chars after cursor/selection
// Returns "" if cursor is at end of document
let postStart = range.location + range.length
if postStart < nsString.length {
let postLength = min(nsString.length - postStart, MAX_CONTEXT_LENGTH)
let postRange = NSRange(location: postStart, length: postLength)
postSelectionText = nsString.substring(with: postRange)
} else {
postSelectionText = ""
}
}

View file

@ -384,18 +384,28 @@ namespace WindowsHelper.Services
};
// Extract pre-selection context (last MAX_CONTEXT_LENGTH chars before cursor/selection)
// Returns "" if cursor is at start of document
if (!string.IsNullOrEmpty(textBefore))
{
int preStart = Math.Max(0, textBefore.Length - MAX_CONTEXT_LENGTH);
preSelectionText = textBefore.Substring(preStart);
}
else
{
preSelectionText = "";
}
// Extract post-selection context (first MAX_CONTEXT_LENGTH chars after cursor/selection)
// Returns "" if cursor is at end of document
if (!string.IsNullOrEmpty(textAfter))
{
int postLength = Math.Min(textAfter.Length, MAX_CONTEXT_LENGTH);
postSelectionText = textAfter.Substring(0, postLength);
}
else
{
postSelectionText = "";
}
}
catch (COMException ex)
{