fix: strip leading space after trailing ascii whitespace
This commit is contained in:
parent
cc03a88fc4
commit
cc7712b0a7
5 changed files with 48 additions and 11 deletions
|
|
@ -9,6 +9,7 @@ export { PipelineContext, SharedPipelineData } from "./context";
|
|||
|
||||
// Context for transcription operations (shared between transcribe and flush)
|
||||
export interface TranscribeContext {
|
||||
sessionId?: string;
|
||||
vocabulary?: string[];
|
||||
accessibilityContext?: GetAccessibilityContextResult | null;
|
||||
previousChunk?: string;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
null;
|
||||
private currentAggregatedTranscription: string | undefined;
|
||||
private currentVocabulary: string[] = [];
|
||||
private currentSessionId: string | undefined;
|
||||
|
||||
// Configuration
|
||||
private readonly FRAME_SIZE = 512; // 32ms at 16kHz
|
||||
|
|
@ -65,6 +66,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
this.currentAccessibilityContext = context?.accessibilityContext ?? null;
|
||||
this.currentAggregatedTranscription = context?.aggregatedTranscription;
|
||||
this.currentVocabulary = context?.vocabulary ?? [];
|
||||
this.currentSessionId = context?.sessionId;
|
||||
|
||||
// Check authentication
|
||||
if (!(await this.authService.isAuthenticated())) {
|
||||
|
|
@ -110,6 +112,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
this.currentAccessibilityContext = context?.accessibilityContext ?? null;
|
||||
this.currentAggregatedTranscription = context?.aggregatedTranscription;
|
||||
this.currentVocabulary = context?.vocabulary ?? [];
|
||||
this.currentSessionId = context?.sessionId;
|
||||
|
||||
// Check authentication
|
||||
if (!(await this.authService.isAuthenticated())) {
|
||||
|
|
@ -117,7 +120,8 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
}
|
||||
|
||||
const enableFormatting = context.formattingEnabled ?? false;
|
||||
return this.doTranscription(enableFormatting);
|
||||
// flush() is called at session end, so this is the final call
|
||||
return this.doTranscription(enableFormatting, true);
|
||||
} catch (error) {
|
||||
logger.transcription.error("Cloud transcription error:", error);
|
||||
throw error;
|
||||
|
|
@ -126,8 +130,13 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
|
||||
/**
|
||||
* Shared transcription logic - aggregates buffer, calls cloud API, clears state
|
||||
* @param enableFormatting - Whether to enable formatting
|
||||
* @param isFinal - Whether this is the final call for the session (default: false)
|
||||
*/
|
||||
private async doTranscription(enableFormatting: boolean): Promise<string> {
|
||||
private async doTranscription(
|
||||
enableFormatting: boolean,
|
||||
isFinal = false,
|
||||
): Promise<string> {
|
||||
// Combine all frames into a single Float32Array
|
||||
const totalLength = this.frameBuffer.reduce(
|
||||
(acc, frame) => acc + frame.length,
|
||||
|
|
@ -154,6 +163,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
vadProbs,
|
||||
false,
|
||||
enableFormatting,
|
||||
isFinal,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -168,6 +178,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
this.currentLanguage = undefined;
|
||||
this.currentAccessibilityContext = null;
|
||||
this.currentAggregatedTranscription = undefined;
|
||||
this.currentSessionId = undefined;
|
||||
}
|
||||
|
||||
private shouldTranscribe(): boolean {
|
||||
|
|
@ -188,6 +199,7 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
vadProbs: number[],
|
||||
isRetry = false,
|
||||
enableFormatting = false,
|
||||
isFinal = false,
|
||||
): Promise<string> {
|
||||
// Skip API call if there's nothing to process
|
||||
if (audioData.length === 0) {
|
||||
|
|
@ -213,6 +225,8 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
duration,
|
||||
isRetry,
|
||||
formatting: enableFormatting,
|
||||
sessionId: this.currentSessionId,
|
||||
isFinal,
|
||||
});
|
||||
|
||||
const response = await fetch(`${this.apiEndpoint}/transcribe`, {
|
||||
|
|
@ -223,6 +237,8 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
"User-Agent": getUserAgent(),
|
||||
},
|
||||
body: JSON.stringify({
|
||||
sessionId: this.currentSessionId,
|
||||
isFinal,
|
||||
audioData: Array.from(audioData),
|
||||
vadProbs,
|
||||
language: this.currentLanguage,
|
||||
|
|
@ -270,12 +286,13 @@ export class AmicalCloudProvider implements TranscriptionProvider {
|
|||
// Force token refresh
|
||||
await this.authService.refreshTokenIfNeeded();
|
||||
|
||||
// Retry the request once (preserve formatting flag)
|
||||
// Retry the request once (preserve formatting and isFinal flags)
|
||||
return await this.makeTranscriptionRequest(
|
||||
audioData,
|
||||
vadProbs,
|
||||
true,
|
||||
enableFormatting,
|
||||
isFinal,
|
||||
);
|
||||
} catch (refreshError) {
|
||||
logger.transcription.error("Token refresh failed:", refreshError);
|
||||
|
|
|
|||
|
|
@ -300,6 +300,7 @@ export class TranscriptionService {
|
|||
audioData: audioChunk,
|
||||
speechProbability: speechProbability,
|
||||
context: {
|
||||
sessionId,
|
||||
vocabulary: session.context.sharedData.vocabulary,
|
||||
accessibilityContext: session.context.sharedData.accessibilityContext,
|
||||
previousChunk,
|
||||
|
|
@ -391,11 +392,13 @@ export class TranscriptionService {
|
|||
? session.transcriptionResults[
|
||||
session.transcriptionResults.length - 1
|
||||
]
|
||||
: undefined; const aggregatedTranscription = session.transcriptionResults.join("");
|
||||
: undefined;
|
||||
const aggregatedTranscription = session.transcriptionResults.join("");
|
||||
|
||||
const provider = await this.selectProvider();
|
||||
usedCloudProvider = provider.name === "amical-cloud";
|
||||
const finalTranscription = await provider.flush({
|
||||
sessionId,
|
||||
vocabulary: session.context.sharedData.vocabulary,
|
||||
accessibilityContext: session.context.sharedData.accessibilityContext,
|
||||
previousChunk,
|
||||
|
|
@ -682,15 +685,14 @@ export class TranscriptionService {
|
|||
return transcription;
|
||||
}
|
||||
|
||||
// Strip leading space if:
|
||||
// 1. No previous text (start of document/field)
|
||||
// 2. Previous text ends with whitespace (avoid double space)
|
||||
// Strip leading space only if previous text exists and ends with ASCII whitespace.
|
||||
// When there's no previous text (null/undefined/""), keep the leading space.
|
||||
const shouldStripLeadingSpace =
|
||||
!preSelectionText ||
|
||||
preSelectionText.length === 0 ||
|
||||
/\s$/.test(preSelectionText);
|
||||
preSelectionText !== undefined &&
|
||||
preSelectionText !== null &&
|
||||
(preSelectionText.length === 0 || /[ \t\r\n]$/.test(preSelectionText));
|
||||
|
||||
return shouldStripLeadingSpace ? transcription.trimStart() : transcription;
|
||||
return shouldStripLeadingSpace ? transcription.slice(1) : transcription;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -207,6 +207,7 @@ class AccessibilityContextService {
|
|||
let selectedText = getAttributeValue(element: element, attribute: kAXSelectedTextAttribute)
|
||||
|
||||
// Calculate pre and post selection/cursor text
|
||||
// Return "" instead of nil when cursor is at start/end of document
|
||||
var preSelectionText: String? = nil
|
||||
var postSelectionText: String? = nil
|
||||
|
||||
|
|
@ -214,19 +215,25 @@ class AccessibilityContextService {
|
|||
let nsString = fullContent as NSString
|
||||
|
||||
// Pre-selection text: last MAX_CONTEXT_LENGTH chars before cursor/selection
|
||||
// Returns "" if cursor is at start of document (position 0)
|
||||
if range.location > 0 {
|
||||
let preLength = min(range.location, MAX_CONTEXT_LENGTH)
|
||||
let preStart = range.location - preLength
|
||||
let preRange = NSRange(location: preStart, length: preLength)
|
||||
preSelectionText = nsString.substring(with: preRange)
|
||||
} else {
|
||||
preSelectionText = ""
|
||||
}
|
||||
|
||||
// Post-selection text: first MAX_CONTEXT_LENGTH chars after cursor/selection
|
||||
// Returns "" if cursor is at end of document
|
||||
let postStart = range.location + range.length
|
||||
if postStart < nsString.length {
|
||||
let postLength = min(nsString.length - postStart, MAX_CONTEXT_LENGTH)
|
||||
let postRange = NSRange(location: postStart, length: postLength)
|
||||
postSelectionText = nsString.substring(with: postRange)
|
||||
} else {
|
||||
postSelectionText = ""
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -384,18 +384,28 @@ namespace WindowsHelper.Services
|
|||
};
|
||||
|
||||
// Extract pre-selection context (last MAX_CONTEXT_LENGTH chars before cursor/selection)
|
||||
// Returns "" if cursor is at start of document
|
||||
if (!string.IsNullOrEmpty(textBefore))
|
||||
{
|
||||
int preStart = Math.Max(0, textBefore.Length - MAX_CONTEXT_LENGTH);
|
||||
preSelectionText = textBefore.Substring(preStart);
|
||||
}
|
||||
else
|
||||
{
|
||||
preSelectionText = "";
|
||||
}
|
||||
|
||||
// Extract post-selection context (first MAX_CONTEXT_LENGTH chars after cursor/selection)
|
||||
// Returns "" if cursor is at end of document
|
||||
if (!string.IsNullOrEmpty(textAfter))
|
||||
{
|
||||
int postLength = Math.Min(textAfter.Length, MAX_CONTEXT_LENGTH);
|
||||
postSelectionText = textAfter.Substring(0, postLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
postSelectionText = "";
|
||||
}
|
||||
}
|
||||
catch (COMException ex)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue