feat: ax rewrite for swift to improve context

This commit is contained in:
haritabh-z01 2026-01-20 00:29:07 +05:30
parent 1f5e3649b1
commit 7243bfcb8a
15 changed files with 2791 additions and 660 deletions

View file

@ -278,7 +278,7 @@ export class RecordingManager extends EventEmitter {
const vadService = this.serviceManager.getService("vadService");
vadService.reset();
// Refresh accessibility context
// Refresh accessibility context (TextMarker API for Electron support)
const nativeBridge = this.serviceManager.getService("nativeBridge");
nativeBridge.refreshAccessibilityContext();

View file

@ -20,6 +20,10 @@ import {
GetAccessibilityTreeDetailsResult,
GetAccessibilityContextParams,
GetAccessibilityContextResult,
GetAccessibilityStatusParams,
GetAccessibilityStatusResult,
RequestAccessibilityPermissionParams,
RequestAccessibilityPermissionResult,
PasteTextParams,
PasteTextResult,
MuteSystemAudioParams,
@ -28,6 +32,7 @@ import {
RestoreSystemAudioResult,
SetShortcutsParams,
SetShortcutsResult,
AppContext,
} from "@amical/types";
// Define the interface for RPC methods
@ -40,6 +45,14 @@ interface RPCMethods {
params: GetAccessibilityContextParams;
result: GetAccessibilityContextResult;
};
getAccessibilityStatus: {
params: GetAccessibilityStatusParams;
result: GetAccessibilityStatusResult;
};
requestAccessibilityPermission: {
params: RequestAccessibilityPermissionParams;
result: RequestAccessibilityPermissionResult;
};
pasteText: {
params: PasteTextParams;
result: PasteTextResult;
@ -74,7 +87,7 @@ export class NativeBridge extends EventEmitter {
>();
private helperPath: string;
private logger = createScopedLogger("native-bridge");
private accessibilityContext: GetAccessibilityContextResult | null = null;
private accessibilityContext: AppContext | null = null;
// Auto-restart configuration
private static readonly MAX_RESTARTS = 3;
@ -435,15 +448,16 @@ export class NativeBridge extends EventEmitter {
*/
async refreshAccessibilityContext(): Promise<void> {
try {
const context = await this.call("getAccessibilityContext", {
const result = await this.call("getAccessibilityContext", {
editableOnly: false,
});
this.accessibilityContext = context;
this.accessibilityContext = result.context;
this.logger.debug("Accessibility context refreshed", {
hasApplication: !!context.context?.application?.name,
hasFocusedElement: !!context.context?.focusedElement?.role,
hasTextSelection: !!context.context?.textSelection?.selectedText,
hasWindow: !!context.context?.windowInfo?.title,
hasApplication: !!result.context?.application?.name,
hasFocusedElement: !!result.context?.focusedElement?.role,
hasTextSelection: !!result.context?.textSelection?.selectedText,
extractionMethod: result.context?.textSelection?.extractionMethod,
metricsMs: result.context?.metrics?.totalTimeMs,
});
} catch (error) {
this.logger.error("Failed to refresh accessibility context", {
@ -454,9 +468,13 @@ export class NativeBridge extends EventEmitter {
/**
* Get the cached accessibility context.
* Returns in the result wrapper format for API consistency.
*/
getAccessibilityContext(): GetAccessibilityContextResult | null {
return this.accessibilityContext;
if (this.accessibilityContext === null) {
return null;
}
return { context: this.accessibilityContext };
}
/**
@ -481,6 +499,20 @@ export class NativeBridge extends EventEmitter {
}
}
/**
* Get accessibility permission status.
*/
async getAccessibilityStatus(): Promise<GetAccessibilityStatusResult> {
return this.call("getAccessibilityStatus", {});
}
/**
* Request accessibility permission.
*/
async requestAccessibilityPermission(): Promise<RequestAccessibilityPermissionResult> {
return this.call("requestAccessibilityPermission", {});
}
// Typed event emitter methods
on<E extends keyof NativeBridgeEvents>(
event: E,

View file

@ -1,526 +0,0 @@
import Foundation
import ApplicationServices
import AppKit
// Apps that need manual accessibility enabling
let appsManuallyEnableAx: Set<String> = ["com.google.Chrome", "org.mozilla.firefox", "com.microsoft.edgemac", "com.apple.Safari"]
struct ProcessInfo {
let pid: pid_t
let name: String?
let bundleIdentifier: String?
let version: String?
}
struct Selection {
let text: String
let process: ProcessInfo
let preSelection: String?
let postSelection: String?
let fullContent: String?
let selectionRange: NSRange?
let isEditable: Bool
let elementType: String?
}
class AccessibilityContextService {
static func checkAccessibilityPermissions(prompt: Bool = false) -> Bool {
let options: [String: Any] = [kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: prompt]
return AXIsProcessTrustedWithOptions(options as CFDictionary)
}
static func getFrontProcessID() -> pid_t {
guard let frontmostApp = NSWorkspace.shared.frontmostApplication else {
FileHandle.standardError.write("❌ No frontmost application found\n".data(using: .utf8)!)
return 0
}
return frontmostApp.processIdentifier
}
static func getProcessName(pid: pid_t) -> String? {
guard let application = NSRunningApplication(processIdentifier: pid),
let url = application.executableURL else {
return nil
}
return url.lastPathComponent
}
static func getBundleIdentifier(pid: pid_t) -> String? {
guard let application = NSRunningApplication(processIdentifier: pid) else {
return nil
}
return application.bundleIdentifier
}
static func getApplicationVersion(pid: pid_t) -> String? {
guard let application = NSRunningApplication(processIdentifier: pid),
let bundle = Bundle(url: application.bundleURL ?? URL(fileURLWithPath: "")) else {
return nil
}
return bundle.infoDictionary?["CFBundleShortVersionString"] as? String
}
static func touchDescendantElements(_ element: AXUIElement, maxDepth: Int) {
guard maxDepth > 0 else { return }
var children: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, kAXChildrenAttribute as CFString, &children)
guard error == .success, let childrenArray = children as? [AXUIElement] else {
return
}
// Limit to 8 children to avoid performance issues
let limitedChildren = Array(childrenArray.prefix(8))
for child in limitedChildren {
touchDescendantElements(child, maxDepth: maxDepth - 1)
}
}
static func _getFocusedElement(pid: pid_t) -> AXUIElement? {
let application = AXUIElementCreateApplication(pid)
// Enable manual accessibility for specific apps
if let bundleId: String = getBundleIdentifier(pid: pid),
appsManuallyEnableAx.contains(bundleId) {
// FileHandle.standardError.write("🔧 Enabling manual accessibility for \(bundleId)\n".data(using: .utf8)!)
AXUIElementSetAttributeValue(application, "AXManualAccessibility" as CFString, kCFBooleanTrue)
AXUIElementSetAttributeValue(application, "AXEnhancedUserInterface" as CFString, kCFBooleanTrue)
}
var focusedElement: CFTypeRef?
var error = AXUIElementCopyAttributeValue(application, kAXFocusedUIElementAttribute as CFString, &focusedElement)
// Fallback to focused window if focused element fails
if error != .success {
// FileHandle.standardError.write(" Failed to get focused element, trying focused window...\n".data(using: .utf8)!)
error = AXUIElementCopyAttributeValue(application, kAXFocusedWindowAttribute as CFString, &focusedElement)
}
guard error == .success, let element = focusedElement else {
// FileHandle.standardError.write(" Failed to get focused element or window. Error: \(error.rawValue)\n".data(using: .utf8)!)
return nil
}
return (element as! AXUIElement)
}
static func getAttributeValue(element: AXUIElement, attribute: String) -> String? {
var value: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
if error == .success {
if let stringValue = value as? String {
return stringValue
} else if let numberValue = value as? NSNumber {
return numberValue.stringValue
} else if let boolValue = value as? Bool {
return boolValue ? "true" : "false"
}
}
return nil
}
static func getAttributeNames(element: AXUIElement) -> [String] {
var attributeNames: CFArray?
let error = AXUIElementCopyAttributeNames(element, &attributeNames)
if error == .success, let names = attributeNames as? [String] {
return names
}
return []
}
static func isElementEditable(element: AXUIElement) -> Bool {
let role = getAttributeValue(element: element, attribute: kAXRoleAttribute)
let subrole = getAttributeValue(element: element, attribute: kAXSubroleAttribute)
// Check for editable roles
let editableRoles = ["AXTextField", "AXTextArea", "AXComboBox"]
if let role = role, editableRoles.contains(role) {
return true
}
// Check for editable subroles
let editableSubroles = ["AXSecureTextField", "AXSearchField"]
if let subrole = subrole, editableSubroles.contains(subrole) {
return true
}
// Check if element has AXValue attribute (often indicates editability)
let attributes = getAttributeNames(element: element)
return attributes.contains(kAXValueAttribute)
}
static func getParentChain(element: AXUIElement, maxDepth: Int = 10) -> [String] {
var chain: [String] = []
var currentElement = element
for _ in 0..<maxDepth {
var parent: CFTypeRef?
let error = AXUIElementCopyAttributeValue(currentElement, kAXParentAttribute as CFString, &parent)
if error == .success, let parentElement = parent {
// Check if the parent is actually an AXUIElement
if CFGetTypeID(parentElement) == AXUIElementGetTypeID() {
let axParent = parentElement as! AXUIElement
if let role = getAttributeValue(element: axParent, attribute: kAXRoleAttribute) {
chain.append(role)
}
currentElement = axParent
} else {
break
}
} else {
break
}
}
return chain
}
static let MAX_CONTEXT_LENGTH = 500
static func getTextSelection(element: AXUIElement) -> TextSelection? {
// Get full content first - we need this to provide context
let fullContent = getAttributeValue(element: element, attribute: kAXValueAttribute)
// Get selection/cursor range
var selectionRange: SelectionRange? = nil
var rangeValue: CFTypeRef?
let rangeError = AXUIElementCopyAttributeValue(element, kAXSelectedTextRangeAttribute as CFString, &rangeValue)
if rangeError == .success, let axValue = rangeValue {
var range = CFRange()
if AXValueGetValue(axValue as! AXValue, .cfRange, &range) {
selectionRange = SelectionRange(length: Int(range.length), location: Int(range.location))
}
}
// If we have no cursor/selection position and no content, return nil
guard selectionRange != nil || fullContent != nil else {
return nil
}
// Get selected text (may be empty if just cursor position)
let selectedText = getAttributeValue(element: element, attribute: kAXSelectedTextAttribute)
// Calculate pre and post selection/cursor text
// Return "" instead of nil when cursor is at start/end of document
var preSelectionText: String? = nil
var postSelectionText: String? = nil
if let fullContent = fullContent, let range = selectionRange {
let nsString = fullContent as NSString
// Pre-selection text: last MAX_CONTEXT_LENGTH chars before cursor/selection
// Returns "" if cursor is at start of document (position 0)
if range.location > 0 {
let preLength = min(range.location, MAX_CONTEXT_LENGTH)
let preStart = range.location - preLength
let preRange = NSRange(location: preStart, length: preLength)
preSelectionText = nsString.substring(with: preRange)
} else {
preSelectionText = ""
}
// Post-selection text: first MAX_CONTEXT_LENGTH chars after cursor/selection
// Returns "" if cursor is at end of document
let postStart = range.location + range.length
if postStart < nsString.length {
let postLength = min(nsString.length - postStart, MAX_CONTEXT_LENGTH)
let postRange = NSRange(location: postStart, length: postLength)
postSelectionText = nsString.substring(with: postRange)
} else {
postSelectionText = ""
}
}
let isEditable = isElementEditable(element: element)
return TextSelection(
fullContent: fullContent,
isEditable: isEditable,
postSelectionText: postSelectionText,
preSelectionText: preSelectionText,
selectedText: selectedText,
selectionRange: selectionRange
)
}
static func getBrowserURL(windowElement: AXUIElement, bundleId: String?) -> String? {
var foundURL: String? = nil
var urlSource = "none"
// Debug: Print all window attributes
// FileHandle.standardError.write("🔍 Window attributes:\n".data(using: .utf8)!)
let attributes = getAttributeNames(element: windowElement)
for attribute in attributes {
if let value = getAttributeValue(element: windowElement, attribute: attribute) {
// FileHandle.standardError.write(" \(attribute): \(value)\n".data(using: .utf8)!)
} else {
// FileHandle.standardError.write(" \(attribute): <no value>\n".data(using: .utf8)!)
}
}
// Determine browser type for conditional logic
let isChromiumBrowser = bundleId?.lowercased().contains("chrome") == true ||
bundleId?.lowercased().contains("chromium") == true ||
bundleId == "com.microsoft.edgemac" ||
bundleId == "com.brave.Browser" ||
bundleId == "com.operasoftware.Opera" ||
bundleId == "com.vivaldi.Vivaldi"
let isFirefox = bundleId == "org.mozilla.firefox"
// FileHandle.standardError.write("🔍 Browser type - Chromium: \(isChromiumBrowser), Firefox: \(isFirefox), Bundle: \(bundleId ?? "unknown")\n".data(using: .utf8)!)
// For Chromium browsers and Firefox: Prioritize AXWebArea (live URL)
if isChromiumBrowser || isFirefox {
// FileHandle.standardError.write("🔍 Using AXWebArea priority for Chromium/Firefox browser\n".data(using: .utf8)!)
foundURL = findURLInChildren(element: windowElement, depth: 0, maxDepth: 30)
if foundURL != nil {
urlSource = "tree_walking_priority"
// FileHandle.standardError.write("🔍 Found URL from AXWebArea (priority): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
}
// Try window-level attributes (reliable for Safari, fallback for others)
var urlRef: CFTypeRef?
let docErr = AXUIElementCopyAttributeValue(windowElement,
kAXDocumentAttribute as CFString,
&urlRef)
if docErr == .success, let urlString = urlRef as? String, !urlString.isEmpty {
foundURL = urlString
urlSource = "window_document"
// FileHandle.standardError.write("🔍 Found URL from window document: \(urlString)\n".data(using: .utf8)!)
// For Safari and other WebKit browsers, this is reliable, return immediately
if !isChromiumBrowser && !isFirefox {
return foundURL
}
// For Chromium/Firefox, keep this as fallback but continue looking
}
if AXUIElementCopyAttributeValue(windowElement,
kAXURLAttribute as CFString,
&urlRef) == .success,
let urlString = urlRef as? String, !urlString.isEmpty {
if foundURL == nil {
foundURL = urlString
urlSource = "window_url"
// FileHandle.standardError.write("🔍 Found URL from window URL attribute: \(urlString)\n".data(using: .utf8)!)
// For Safari and other WebKit browsers, this is reliable, return immediately
if !isChromiumBrowser && !isFirefox {
return foundURL
}
}
}
// For non-Chromium browsers that didn't find window URLs, try tree walking
if !isChromiumBrowser && !isFirefox && foundURL == nil {
foundURL = findURLInChildren(element: windowElement, depth: 0, maxDepth: 3)
if foundURL != nil {
urlSource = "tree_walking_fallback"
// FileHandle.standardError.write("🔍 Found URL from tree walking (fallback): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
}
if foundURL != nil {
// FileHandle.standardError.write("🔍 Returning URL (\(urlSource)): \(foundURL!)\n".data(using: .utf8)!)
return foundURL
}
// FileHandle.standardError.write("🔍 No URL found from any method\n".data(using: .utf8)!)
return nil
}
static func findURLInChildren(element: AXUIElement, depth: Int, maxDepth: Int) -> String? {
guard depth < maxDepth else { return nil }
// BFS implementation using a queue
var queue: [(element: AXUIElement, depth: Int)] = [(element, depth)]
while !queue.isEmpty {
let (currentElement, currentDepth) = queue.removeFirst()
// Skip if we've exceeded max depth
guard currentDepth < maxDepth else { continue }
var childrenRef: CFTypeRef?
guard AXUIElementCopyAttributeValue(currentElement,
kAXChildrenAttribute as CFString,
&childrenRef) == .success,
let children = childrenRef as? [AXUIElement] else {
continue
}
// Process all children at current level first (BFS)
for child in children {
// Check role first
var roleRef: CFTypeRef?
guard AXUIElementCopyAttributeValue(child,
kAXRoleAttribute as CFString,
&roleRef) == .success,
let role = roleRef as? String else {
continue
}
// log role
// FileHandle.standardError.write("🔍 Found element with role: \(role) at depth \(currentDepth + 1)\n".data(using: .utf8)!)
// log all attribute names
// FileHandle.standardError.write("🔍 Element attributes: \(getAttributeNames(element: child))\n".data(using: .utf8)!)
// log kAXURLAttribute
// FileHandle.standardError.write("🔍 kAXURLAttribute: \(getAttributeValue(element: child, attribute: kAXURLAttribute) ?? "none")\n".data(using: .utf8)!)
// Priority 1: Address/search fields (most current)
if role == "AXTextField" || role == "AXComboBox" || role == "AXSafariAddressAndSearchField" {
var valueRef: CFTypeRef?
if AXUIElementCopyAttributeValue(child,
kAXValueAttribute as CFString,
&valueRef) == .success,
let value = valueRef as? String,
!value.isEmpty,
(value.hasPrefix("http://") || value.hasPrefix("https://") || value.contains(".")) {
// FileHandle.standardError.write("🔍 Found URL in address field (\(role)): \(value)\n".data(using: .utf8)!)
return value
}
}
// Priority 2: Web areas
if role == "AXWebArea" {
FileHandle.standardError.write("🔍 Found AXWebArea element at depth \(currentDepth + 1)\n".data(using: .utf8)!)
// list all attributes for this element
FileHandle.standardError.write("🔍 AXWebArea attributes: \(getAttributeNames(element: child))\n".data(using: .utf8)!)
// iterate and list value for all attributes
for attribute in getAttributeNames(element: child) {
FileHandle.standardError.write("🔍 \(attribute): \(getAttributeValue(element: child, attribute: attribute) ?? "none")\n".data(using: .utf8)!)
}
var urlRef: CFTypeRef?
if AXUIElementCopyAttributeValue(child,
kAXURLAttribute as CFString,
&urlRef) == .success,
let urlString = urlRef as? String, !urlString.isEmpty {
// FileHandle.standardError.write("🔍 Found URL in web area: \(urlString)\n".data(using: .utf8)!)
return urlString
}
if AXUIElementCopyAttributeValue(child,
kAXDocumentAttribute as CFString,
&urlRef) == .success,
let urlString = urlRef as? String, !urlString.isEmpty {
// FileHandle.standardError.write("🔍 Found URL in web area document: \(urlString)\n".data(using: .utf8)!)
return urlString
}
}
// Add child to queue for next level processing
queue.append((child, currentDepth + 1))
}
}
return nil
}
static func getWindowInfo(pid: pid_t) -> WindowInfo? {
let application = AXUIElementCreateApplication(pid)
// Get main window
var mainWindow: CFTypeRef?
let error = AXUIElementCopyAttributeValue(application, kAXMainWindowAttribute as CFString, &mainWindow)
guard error == .success, let windowRef = mainWindow else {
return nil
}
// Check if the window is actually an AXUIElement
guard CFGetTypeID(windowRef) == AXUIElementGetTypeID() else {
return nil
}
let window = windowRef as! AXUIElement
let title = getAttributeValue(element: window, attribute: kAXTitleAttribute)
// Get URL if this is a browser
let url = getBrowserURL(windowElement: window, bundleId: getBundleIdentifier(pid: pid))
return WindowInfo(
title: title,
url: url
)
}
static func getAccessibilityContext(editableOnly: Bool = false) -> Context? {
// Check accessibility permissions
guard checkAccessibilityPermissions() else {
FileHandle.standardError.write("❌ Accessibility permissions not granted\n".data(using: .utf8)!)
return nil
}
// Get frontmost application
let pid = getFrontProcessID()
guard pid > 0 else {
FileHandle.standardError.write("❌ Could not get frontmost application PID\n".data(using: .utf8)!)
return nil
}
let processName = getProcessName(pid: pid)
let bundleId = getBundleIdentifier(pid: pid)
let version = getApplicationVersion(pid: pid)
// Create application info
let applicationInfo = Application(
bundleIdentifier: bundleId,
name: processName,
version: version
)
// Get focused element
var focusedElementInfo: FocusedElement? = nil
var textSelectionInfo: TextSelection? = nil
if let focusedElement = _getFocusedElement(pid: pid) {
// Touch descendant elements to ensure they're accessible
touchDescendantElements(focusedElement, maxDepth: 3)
let role = getAttributeValue(element: focusedElement, attribute: kAXRoleAttribute)
let title = getAttributeValue(element: focusedElement, attribute: kAXTitleAttribute)
let description = getAttributeValue(element: focusedElement, attribute: kAXDescriptionAttribute)
let value = getAttributeValue(element: focusedElement, attribute: kAXValueAttribute)
let isEditable = isElementEditable(element: focusedElement)
focusedElementInfo = FocusedElement(
description: description,
isEditable: isEditable,
role: role,
title: title,
value: value
)
// Get text selection if available and not filtered by editableOnly
if let textSelection = getTextSelection(element: focusedElement) {
if !editableOnly || textSelection.isEditable {
textSelectionInfo = textSelection
}
}
}
// Get window info
let windowInfo = getWindowInfo(pid: pid)
// Create context
let context = Context(
application: applicationInfo,
focusedElement: focusedElementInfo,
textSelection: textSelectionInfo,
timestamp: Date().timeIntervalSince1970,
windowInfo: windowInfo
)
return context
}
}

View file

@ -48,7 +48,7 @@ struct AccessibilityElementNode: Codable {
class AccessibilityService {
private let maxDepth = 10 // To prevent excessively deep recursion and large payloads
private let maxDepth = ACCESSIBILITY_TREE_MAX_DEPTH // To prevent excessively deep recursion and large payloads
private let dateFormatter: DateFormatter
// Properties to store original audio states
@ -478,23 +478,19 @@ class AccessibilityService {
return false
}
// Simulate Cmd+V
// Using deprecated kVK_Command might still work but kCGEventFlagMaskCommand is preferred.
// Virtual key code for 'v' is 9.
let vKeyCode: CGKeyCode = 9
// Simulate Cmd+V using virtual key codes from Constants.swift
let source = CGEventSource(stateID: .hidSystemState)
let cmdDown = CGEvent(keyboardEventSource: source, virtualKey: CGKeyCode(55), keyDown: true) // 55 is kVK_Command
let cmdDown = CGEvent(keyboardEventSource: source, virtualKey: VK_COMMAND, keyDown: true)
cmdDown?.flags = .maskCommand
let vDown = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: true)
let vDown = CGEvent(keyboardEventSource: source, virtualKey: VK_V, keyDown: true)
vDown?.flags = .maskCommand // Keep command flag for the V press as well
let vUp = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: false)
let vUp = CGEvent(keyboardEventSource: source, virtualKey: VK_V, keyDown: false)
vUp?.flags = .maskCommand
let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: CGKeyCode(55), keyDown: false)
let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: VK_COMMAND, keyDown: false)
// No flags needed for key up typically, or just .maskCommand if it was held
if cmdDown == nil || vDown == nil || vUp == nil || cmdUp == nil {
@ -516,7 +512,7 @@ class AccessibilityService {
// Restore the original pasteboard content after a short delay
// to allow the paste action to complete.
DispatchQueue.main.asyncAfter(deadline: .now() + 0.2) { // 200ms delay
DispatchQueue.main.asyncAfter(deadline: .now() + PASTE_RESTORE_DELAY_SECONDS) {
self.restorePasteboard(
pasteboard: pasteboard, items: originalPasteboardItems,
originalChangeCount: originalChangeCount)
@ -547,10 +543,6 @@ class AccessibilityService {
}
}
// Define kVK_Function if not available from a system framework directly in this context.
// 0x3F is the virtual key code for the Fn key on Apple keyboards.
private let kVK_Function: CGKeyCode = 0x3F
// Determines whether a keyboard event should be forwarded to the Electron application.
// This method should be called from the CGEventTap callback in main.swift or RpcHandler.swift.
public func shouldForwardKeyboardEvent(event: CGEvent) -> Bool {
@ -570,7 +562,7 @@ class AccessibilityService {
if type == .keyDown || type == .keyUp {
// For keyDown and keyUp events, only forward if the event is FOR THE Fn KEY ITSELF.
if keyCode == kVK_Function {
if keyCode == VK_FUNCTION {
// logToStderr("[AccessibilityService] Forwarding \(type == .keyDown ? "keyDown" : "keyUp") event because it IS the Fn key (keyCode: \(keyCode)).")
return true
} else {

View file

@ -1,12 +1,19 @@
import Foundation
import ObjCExceptionCatcher
/// Flexible RPC request that can parse any method string
struct FlexibleRPCRequest: Codable {
let id: String
let method: String
let params: JSONAny?
}
class IOBridge: NSObject {
private let jsonEncoder: JSONEncoder
private let jsonDecoder: JSONDecoder
let jsonEncoder: JSONEncoder
let jsonDecoder: JSONDecoder
private let accessibilityService: AccessibilityService
private let audioService: AudioService
private let dateFormatter: DateFormatter
let dateFormatter: DateFormatter
init(jsonEncoder: JSONEncoder, jsonDecoder: JSONDecoder) {
self.jsonEncoder = jsonEncoder
@ -38,13 +45,21 @@ class IOBridge: NSObject {
return
case .getAccessibilityContext:
// Process accessibility context requests on dedicated thread
// Process accessibility context requests on dedicated thread (uses v2 service)
AccessibilityQueue.shared.async { [weak self] in
guard let self = self else { return }
self.handleAccessibilityContext(request)
self.handleGetAccessibilityContext(id: request.id, params: request.params)
}
return
case .getAccessibilityStatus:
handleGetAccessibilityStatus(id: request.id)
return
case .requestAccessibilityPermission:
handleRequestAccessibilityPermission(id: request.id)
return
case .pasteText:
logToStderr("[IOBridge] Handling pasteText for ID: \(request.id)")
guard let paramsAnyCodable = request.params else {
@ -308,71 +323,70 @@ class IOBridge: NSObject {
}
}
private func handleAccessibilityContext(_ request: RPCRequestSchema) {
var contextParams: GetAccessibilityContextParamsSchema? = nil
logToStderr("[IOBridge] Handling getAccessibilityContext for ID: \(request.id)")
// MARK: - Accessibility Handlers (using consolidated service)
if let paramsAnyCodable = request.params {
private func handleGetAccessibilityContext(id: String, params: JSONAny?) {
logToStderr("[IOBridge] Handling getAccessibilityContext for ID: \(id)")
// Parse params (default editableOnly = false per spec)
var editableOnly = false
if let paramsAnyCodable = params {
do {
let paramsData = try jsonEncoder.encode(paramsAnyCodable)
contextParams = try jsonDecoder.decode(
GetAccessibilityContextParamsSchema.self, from: paramsData)
logToStderr(
"[IOBridge] Decoded contextParams.editableOnly: \(contextParams?.editableOnly ?? false) for ID: \(request.id)"
)
let contextParams = try jsonDecoder.decode(GetAccessibilityContextParams.self, from: paramsData)
editableOnly = contextParams.editableOnly ?? false
} catch {
logToStderr(
"[IOBridge] Error decoding getAccessibilityContext params: \(error.localizedDescription)"
)
let errPayload = Error(
code: -32602, data: request.params,
message: "Invalid params: \(error.localizedDescription)")
let rpcResponse = RPCResponseSchema(error: errPayload, id: request.id, result: nil)
sendRpcResponse(rpcResponse)
return
logToStderr("[IOBridge] Error decoding params: \(error.localizedDescription)")
}
}
let editableOnly = contextParams?.editableOnly ?? false
// Call service with exception handling
switch ExceptionCatcher.try({
AccessibilityContextService.getAccessibilityContext(editableOnly: editableOnly)
}) {
case .success(let context):
logToStderr("[IOBridge] Retrieved context for ID: \(request.id)")
let resultPayload = GetAccessibilityContextResultSchema(context: context)
do {
let resultData = try jsonEncoder.encode(resultPayload)
let resultAsJsonAny = try jsonDecoder.decode(JSONAny.self, from: resultData)
let rpcResponse = RPCResponseSchema(error: nil, id: request.id, result: resultAsJsonAny)
sendRpcResponse(rpcResponse)
} catch {
logToStderr("[IOBridge] Error encoding result: \(error.localizedDescription) for ID: \(request.id)")
let errPayload = Error(code: -32603, data: nil, message: "Error encoding result: \(error.localizedDescription)")
let rpcResponse = RPCResponseSchema(error: errPayload, id: request.id, result: nil)
sendRpcResponse(rpcResponse)
}
logToStderr("[IOBridge] Retrieved context for ID: \(id)")
let result = GetAccessibilityContextResult(context: context)
sendResult(id: id, result: result)
case .exception(let exception):
logToStderr("[IOBridge] NSException in getAccessibilityContext: \(exception.name) - \(exception.reason)")
let exceptionData: [String: Any] = [
"name": exception.name,
"reason": exception.reason,
"callStack": exception.callStack.prefix(10).joined(separator: "\n")
]
var exceptionJsonAny: JSONAny? = nil
if let jsonData = try? JSONSerialization.data(withJSONObject: exceptionData),
let decoded = try? jsonDecoder.decode(JSONAny.self, from: jsonData) {
exceptionJsonAny = decoded
}
let errPayload = Error(
code: -32603,
data: exceptionJsonAny,
message: "\(exception.name): \(exception.reason)"
)
let rpcResponse = RPCResponseSchema(error: errPayload, id: request.id, result: nil)
sendRpcResponse(rpcResponse)
sendError(id: id, code: -32603, message: "\(exception.name): \(exception.reason)")
}
}
private func handleGetAccessibilityStatus(id: String) {
logToStderr("[IOBridge] Handling getAccessibilityStatus for ID: \(id)")
let result = AccessibilityContextService.getAccessibilityStatus()
sendResult(id: id, result: result)
}
private func handleRequestAccessibilityPermission(id: String) {
logToStderr("[IOBridge] Handling requestAccessibilityPermission for ID: \(id)")
let result = AccessibilityContextService.requestAccessibilityPermission()
sendResult(id: id, result: result)
}
// MARK: - Response Helpers
private func sendResult<T: Encodable>(id: String, result: T) {
do {
let resultData = try jsonEncoder.encode(result)
let resultAsJsonAny = try jsonDecoder.decode(JSONAny.self, from: resultData)
let rpcResponse = RPCResponseSchema(error: nil, id: id, result: resultAsJsonAny)
sendRpcResponse(rpcResponse)
} catch {
logToStderr("[IOBridge] Error encoding result: \(error.localizedDescription)")
sendError(id: id, code: -32603, message: "Error encoding result: \(error.localizedDescription)")
}
}
private func sendError(id: String, code: Int, message: String) {
let errPayload = Error(code: code, data: nil, message: message)
let rpcResponse = RPCResponseSchema(error: errPayload, id: id, result: nil)
sendRpcResponse(rpcResponse)
}
}

View file

@ -0,0 +1,156 @@
import Foundation
// =============================================================================
// Accessibility Builders
// =============================================================================
// Builder pattern helpers for creating accessibility response types.
// These use the auto-generated types from models/generated/models.swift.
// =============================================================================
// MARK: - Type Aliases for Backward Compatibility
/// Maps to the generated `Context` type (AppContext in TypeScript)
typealias AppContext = Context
/// Maps to the generated `Application` type (ApplicationInfo in TypeScript)
typealias ApplicationInfo = Application
/// Maps to the generated `FocusedElement` type (AXElementInfo in TypeScript)
typealias AXElementInfo = FocusedElement
/// Maps to the generated `Metrics` type (ExtractionMetrics in TypeScript)
typealias ExtractionMetrics = Metrics
/// Maps to the generated `The0` enum (ExtractionMethod in TypeScript)
typealias ExtractionMethod = The0
/// Maps to the generated `SelectionRange` type (same name)
typealias AccessibilitySelectionRange = SelectionRange
/// Maps to the generated `TextSelection` type (same name)
typealias AccessibilityTextSelection = TextSelection
/// Maps to the generated `WindowInfo` type (same name)
typealias AccessibilityWindowInfo = WindowInfo
/// Maps to the generated params type
typealias GetAccessibilityContextParams = GetAccessibilityContextParamsSchema
/// Maps to the generated result type
typealias GetAccessibilityContextResult = GetAccessibilityContextResultSchema
// MARK: - Result Types for Other Methods
/// Response result for getAccessibilityStatus
struct GetAccessibilityStatusResult: Codable {
/// Does the app have accessibility permission?
let hasPermission: Bool
/// Is accessibility enabled system-wide?
let isEnabled: Bool
}
/// Response result for requestAccessibilityPermission
struct RequestAccessibilityPermissionResult: Codable {
/// Was permission granted?
let granted: Bool
}
// MARK: - Builder for TextSelection
/// Builder for creating TextSelection with proper defaults
class TextSelectionBuilder {
var selectedText: String? = nil
var fullContent: String? = nil
var preSelectionText: String? = nil
var postSelectionText: String? = nil
var selectionRange: SelectionRange? = nil
var isEditable: Bool = false
var extractionMethod: ExtractionMethod = .none
var hasMultipleRanges: Bool = false
var isPlaceholder: Bool = false
var isSecure: Bool = false
var fullContentTruncated: Bool = false
func build() -> TextSelection {
return TextSelection(
extractionMethod: extractionMethod,
fullContent: fullContent,
fullContentTruncated: fullContentTruncated,
hasMultipleRanges: hasMultipleRanges,
isEditable: isEditable,
isPlaceholder: isPlaceholder,
isSecure: isSecure,
postSelectionText: postSelectionText,
preSelectionText: preSelectionText,
selectedText: selectedText,
selectionRange: selectionRange
)
}
/// Create a secure field result (all content fields suppressed)
static func secureField(isEditable: Bool) -> TextSelection {
return TextSelection(
extractionMethod: .none,
fullContent: nil,
fullContentTruncated: false,
hasMultipleRanges: false,
isEditable: isEditable,
isPlaceholder: false,
isSecure: true,
postSelectionText: nil,
preSelectionText: nil,
selectedText: nil,
selectionRange: nil // Suppressed to prevent password length leakage
)
}
}
// MARK: - Builder for Metrics
/// Builder for creating Metrics
class ExtractionMetricsBuilder {
private var startTime: CFAbsoluteTime
var textMarkerAttempted: Bool = false
var textMarkerSucceeded: Bool = false
var fallbacksUsed: [ExtractionMethod] = []
var errors: [String] = []
var timedOut: Bool = false
// WebArea retry path metrics
var webAreaRetryAttempted: Bool = false
var webAreaFound: Bool = false
var webAreaRetrySucceeded: Bool = false
init() {
self.startTime = CFAbsoluteTimeGetCurrent()
}
func recordFallback(_ method: ExtractionMethod) {
fallbacksUsed.append(method)
}
func recordError(_ message: String) {
// Ensure no PII in error messages
errors.append(message)
}
func build() -> Metrics {
let endTime = CFAbsoluteTimeGetCurrent()
let totalTimeMs = (endTime - startTime) * 1000
// Set timedOut flag if we exceeded best-effort timeout (per spec)
let didTimeout = totalTimeMs > EXTRACTION_TIMEOUT_MS
return Metrics(
errors: errors,
fallbacksUsed: fallbacksUsed,
textMarkerAttempted: textMarkerAttempted,
textMarkerSucceeded: textMarkerSucceeded,
timedOut: didTimeout,
totalTimeMS: totalTimeMs,
webAreaFound: webAreaFound,
webAreaRetryAttempted: webAreaRetryAttempted,
webAreaRetrySucceeded: webAreaRetrySucceeded
)
}
}

View file

@ -0,0 +1,113 @@
import Foundation
import ApplicationServices
import AppKit
// =============================================================================
// AccessibilityContextService - Main Entry Point for Accessibility API
// =============================================================================
// Coordinates all services to extract accessibility context.
// This is the main entry point called from RpcHandler.
// =============================================================================
/// Main service for accessibility context extraction
class AccessibilityContextService {
// MARK: - Main API
/// Get accessibility context using the extraction algorithm
/// - Parameter editableOnly: Only return text selection if element is editable (default: false per spec)
/// - Returns: AppContext with all accessibility context, or nil if unavailable
static func getAccessibilityContext(editableOnly: Bool = false) -> AppContext? {
// Start metrics tracking
let metricsBuilder = ExtractionMetricsBuilder()
// Check permissions
guard PermissionsService.checkPermissions() else {
logError("Accessibility permissions not granted")
return nil
}
// Get frontmost application
let pid = AXHelpers.getFrontProcessID()
guard pid > 0 else {
logError("Could not get frontmost application PID")
return nil
}
// Build application info (arguments in alphabetical order per generated types)
let applicationInfo = ApplicationInfo(
bundleIdentifier: AXHelpers.getBundleIdentifier(pid: pid),
name: AXHelpers.getProcessName(pid: pid),
pid: Int(pid),
version: AXHelpers.getApplicationVersion(pid: pid)
)
// Get focused element
var focusedElementInfo: AXElementInfo? = nil
var textSelectionInfo: AccessibilityTextSelection? = nil
if let focusedElement = FocusService.getFocusedElement(pid: pid) {
// Touch descendants to ensure they're accessible (triggers lazy loading)
AXHelpers.touchDescendants(focusedElement, maxDepth: TOUCH_DESCENDANTS_MAX_DEPTH)
// Try to find a text-capable element
if let focusResult = FocusService.findTextCapableElement(from: focusedElement, editableOnly: editableOnly) {
focusedElementInfo = FocusService.getElementInfo(element: focusResult.element)
// Extract text selection
textSelectionInfo = SelectionExtractor.extract(from: focusResult.element, metricsBuilder: metricsBuilder)
// Apply editableOnly filter
if editableOnly {
if let selection = textSelectionInfo, !selection.isEditable {
textSelectionInfo = nil
}
}
} else {
// No text-capable element found, but still get basic element info
focusedElementInfo = FocusService.getElementInfo(element: focusedElement)
}
}
// Get window info
let windowInfo = FocusService.getWindowInfo(pid: pid)
// Build metrics
let metrics = metricsBuilder.build()
// Build and return context (arguments in alphabetical order per generated types)
return AppContext(
application: applicationInfo,
focusedElement: focusedElementInfo,
metrics: metrics,
schemaVersion: .the20,
textSelection: textSelectionInfo,
timestamp: Date().timeIntervalSince1970,
windowInfo: windowInfo
)
}
// MARK: - Permission APIs
/// Get accessibility permission status
static func getAccessibilityStatus() -> GetAccessibilityStatusResult {
return PermissionsService.getStatus()
}
/// Request accessibility permission
static func requestAccessibilityPermission() -> RequestAccessibilityPermissionResult {
return PermissionsService.requestPermission()
}
// MARK: - Logging
private static func logError(_ message: String) {
FileHandle.standardError.write("\(message)\n".data(using: .utf8)!)
}
private static func logDebug(_ message: String) {
#if DEBUG
FileHandle.standardError.write("🔍 \(message)\n".data(using: .utf8)!)
#endif
}
}

View file

@ -0,0 +1,306 @@
import Foundation
import ApplicationServices
import AppKit
// =============================================================================
// FocusService - Focus Resolution and Element Discovery
// =============================================================================
// Handles finding the focused element and searching for text-capable elements
// when the focused element is not directly text-capable.
// =============================================================================
/// Result of focus resolution
struct FocusResult {
let element: AXUIElement
let role: String?
let wasSearched: Bool // True if we had to search for a text-capable element
}
/// Service for resolving focus and finding text-capable elements
class FocusService {
// MARK: - Get Focused Element
/// Get the focused element for the frontmost application
/// - Parameter pid: Process ID of the application
/// - Returns: The focused AXUIElement, or nil if none found
static func getFocusedElement(pid: pid_t) -> AXUIElement? {
let application = AXHelpers.createApplicationElement(pid: pid)
// Enable manual accessibility for specific apps (Chrome, Firefox, etc.)
let bundleId = AXHelpers.getBundleIdentifier(pid: pid)
AXHelpers.enableManualAccessibilityIfNeeded(application: application, bundleId: bundleId)
// Try to get focused UI element
var focusedElement: CFTypeRef?
var error = AXUIElementCopyAttributeValue(
application,
kAXFocusedUIElementAttribute as CFString,
&focusedElement
)
// Fallback to focused window if focused element fails
if error != .success {
error = AXUIElementCopyAttributeValue(
application,
kAXFocusedWindowAttribute as CFString,
&focusedElement
)
}
guard error == .success, let element = focusedElement else {
return nil
}
return (element as! AXUIElement)
}
// MARK: - Find Text-Capable Element
/// Find a text-capable element starting from the focused element
/// Searches descendants first, then ancestors
/// - Parameters:
/// - element: Starting element
/// - editableOnly: If true, only return editable elements
/// - Returns: FocusResult with the found element, or nil
static func findTextCapableElement(from element: AXUIElement, editableOnly: Bool) -> FocusResult? {
let role = AXHelpers.getStringAttribute(element, kAXRoleAttribute)
// Check if current element is text-capable
if AXHelpers.isTextCapable(element) {
if !editableOnly || AXHelpers.isElementEditable(element) {
return FocusResult(element: element, role: role, wasSearched: false)
}
}
// Search descendants for text-capable element
if let descendant = searchDescendantsForTextCapable(element: element, editableOnly: editableOnly) {
let descendantRole = AXHelpers.getStringAttribute(descendant, kAXRoleAttribute)
return FocusResult(element: descendant, role: descendantRole, wasSearched: true)
}
// Search ancestors for text-capable element
if let ancestor = searchAncestorsForTextCapable(element: element, editableOnly: editableOnly) {
let ancestorRole = AXHelpers.getStringAttribute(ancestor, kAXRoleAttribute)
return FocusResult(element: ancestor, role: ancestorRole, wasSearched: true)
}
// If editableOnly is false, return the original element if it has any text attributes
if !editableOnly && AXHelpers.hasAttribute(element, kAXValueAttribute) {
return FocusResult(element: element, role: role, wasSearched: false)
}
return nil
}
// MARK: - Descendant Search
/// Search descendants for a text-capable element using BFS
private static func searchDescendantsForTextCapable(
element: AXUIElement,
editableOnly: Bool,
maxDepth: Int = TREE_WALK_MAX_DEPTH,
maxElements: Int = TREE_WALK_MAX_ELEMENTS
) -> AXUIElement? {
var queue: [(element: AXUIElement, depth: Int)] = [(element, 0)]
var elementsSearched = 0
while !queue.isEmpty && elementsSearched < maxElements {
let (current, currentDepth) = queue.removeFirst()
elementsSearched += 1
// Skip if we've exceeded max depth
guard currentDepth < maxDepth else { continue }
let children = AXHelpers.getChildren(current)
for child in children {
// Check if child is text-capable
if AXHelpers.isTextCapable(child) {
if !editableOnly || AXHelpers.isElementEditable(child) {
return child
}
}
// Add to queue for further search
queue.append((child, currentDepth + 1))
}
}
return nil
}
// MARK: - Ancestor Search
/// Search ancestors for a text-capable element
private static func searchAncestorsForTextCapable(
element: AXUIElement,
editableOnly: Bool,
maxDepth: Int = TREE_WALK_MAX_DEPTH
) -> AXUIElement? {
var currentElement = element
for _ in 0..<maxDepth {
guard let parent = AXHelpers.getParent(currentElement) else { break }
if AXHelpers.isTextCapable(parent) {
if !editableOnly || AXHelpers.isElementEditable(parent) {
return parent
}
}
currentElement = parent
}
return nil
}
// MARK: - Element Info Extraction
/// Extract element info from an AXUIElement
static func getElementInfo(element: AXUIElement) -> AXElementInfo {
let role = AXHelpers.getStringAttribute(element, kAXRoleAttribute)
let subrole = AXHelpers.getStringAttribute(element, kAXSubroleAttribute)
let title = AXHelpers.getStringAttribute(element, kAXTitleAttribute)
let description = AXHelpers.getStringAttribute(element, kAXDescriptionAttribute)
let isEditable = AXHelpers.isElementEditable(element)
let isSecure = AXHelpers.isSecureField(element)
// Suppress value for secure fields
let value: String? = isSecure ? nil : AXHelpers.getStringAttribute(element, kAXValueAttribute)
// Check placeholder
let isPlaceholder = AXHelpers.isPlaceholderShowing(element, selectionLength: nil)
// Check focus (AXFocused attribute)
let isFocused = AXHelpers.getBoolAttribute(element, kAXFocusedAttribute) ?? true
// Arguments in alphabetical order per generated types
return AXElementInfo(
description: description,
isEditable: isEditable,
isFocused: isFocused,
isPlaceholder: isPlaceholder,
isSecure: isSecure,
role: role,
subrole: subrole,
title: title,
value: value
)
}
// MARK: - Window Info Extraction
/// Get window info for an application
static func getWindowInfo(pid: pid_t) -> AccessibilityWindowInfo? {
let application = AXHelpers.createApplicationElement(pid: pid)
// Get main window
var mainWindow: CFTypeRef?
let error = AXUIElementCopyAttributeValue(
application,
kAXMainWindowAttribute as CFString,
&mainWindow
)
guard error == .success, let windowRef = mainWindow else {
return nil
}
// Verify it's an AXUIElement
guard CFGetTypeID(windowRef) == AXUIElementGetTypeID() else {
return nil
}
let window = windowRef as! AXUIElement
let title = AXHelpers.getStringAttribute(window, kAXTitleAttribute)
// Get URL if this is a browser
let bundleId = AXHelpers.getBundleIdentifier(pid: pid)
let url = getBrowserURL(windowElement: window, bundleId: bundleId)
return AccessibilityWindowInfo(title: title, url: url)
}
// MARK: - Browser URL Extraction
/// Get browser URL from window element
private static func getBrowserURL(windowElement: AXUIElement, bundleId: String?) -> String? {
// Determine browser type
let isChromiumBrowser = bundleId?.lowercased().contains("chrome") == true ||
bundleId?.lowercased().contains("chromium") == true ||
bundleId == "com.microsoft.edgemac" ||
bundleId == "com.brave.Browser" ||
bundleId == "com.operasoftware.Opera" ||
bundleId == "com.vivaldi.Vivaldi"
let isFirefox = bundleId == "org.mozilla.firefox"
// For Chromium browsers and Firefox: Prioritize AXWebArea tree walk
if isChromiumBrowser || isFirefox {
if let url = findURLInChildren(element: windowElement, maxDepth: CHROMIUM_URL_SEARCH_DEPTH) {
return url
}
// Fallback to window-level attributes if tree walk fails
if let url = AXHelpers.getStringAttribute(windowElement, kAXDocumentAttribute), !url.isEmpty {
return url
}
if let url = AXHelpers.getStringAttribute(windowElement, kAXURLAttribute), !url.isEmpty {
return url
}
return nil
}
// For non-Chromium browsers: Try window-level attributes first (more reliable)
if let url = AXHelpers.getStringAttribute(windowElement, kAXDocumentAttribute), !url.isEmpty {
return url
}
if let url = AXHelpers.getStringAttribute(windowElement, kAXURLAttribute), !url.isEmpty {
return url
}
// Shallow tree walk as fallback for non-Chromium browsers
return findURLInChildren(element: windowElement, maxDepth: NON_CHROMIUM_URL_SEARCH_DEPTH)
}
/// Find URL in children using BFS
private static func findURLInChildren(element: AXUIElement, maxDepth: Int) -> String? {
var queue: [(element: AXUIElement, depth: Int)] = [(element, 0)]
while !queue.isEmpty {
let (currentElement, currentDepth) = queue.removeFirst()
guard currentDepth < maxDepth else { continue }
let children = AXHelpers.getChildren(currentElement)
for child in children {
let role = AXHelpers.getStringAttribute(child, kAXRoleAttribute)
// Check address fields
if role == "AXTextField" || role == "AXComboBox" || role == "AXSafariAddressAndSearchField" {
if let value = AXHelpers.getStringAttribute(child, kAXValueAttribute),
!value.isEmpty,
(value.hasPrefix("http://") || value.hasPrefix("https://") || value.contains(".")) {
return value
}
}
// Check web areas
if role == "AXWebArea" {
if let url = AXHelpers.getStringAttribute(child, kAXURLAttribute), !url.isEmpty {
return url
}
if let url = AXHelpers.getStringAttribute(child, kAXDocumentAttribute), !url.isEmpty {
return url
}
}
queue.append((child, currentDepth + 1))
}
}
return nil
}
}

View file

@ -0,0 +1,49 @@
import Foundation
import ApplicationServices
// =============================================================================
// PermissionsService - Accessibility Permission Management
// =============================================================================
// Handles checking and requesting accessibility permissions.
// =============================================================================
/// Service for managing accessibility permissions
class PermissionsService {
// MARK: - Permission Check
/// Check if accessibility permissions are granted
/// - Parameter prompt: If true, show the system prompt to request permissions
/// - Returns: True if permissions are granted
static func checkPermissions(prompt: Bool = false) -> Bool {
return AXHelpers.checkAccessibilityPermissions(prompt: prompt)
}
// MARK: - Permission Status
/// Get detailed permission status
/// - Returns: GetAccessibilityStatusResult with permission details
static func getStatus() -> GetAccessibilityStatusResult {
let hasPermission = checkPermissions(prompt: false)
// On macOS, accessibility is always "enabled" system-wide
// The question is whether the app has permission
let isEnabled = true
return GetAccessibilityStatusResult(
hasPermission: hasPermission,
isEnabled: isEnabled
)
}
// MARK: - Request Permission
/// Request accessibility permission (shows system prompt)
/// - Returns: RequestAccessibilityPermissionResult with grant status
static func requestPermission() -> RequestAccessibilityPermissionResult {
// Show the system accessibility prompt
let granted = checkPermissions(prompt: true)
return RequestAccessibilityPermissionResult(granted: granted)
}
}

View file

@ -0,0 +1,792 @@
import Foundation
import ApplicationServices
// =============================================================================
// SelectionExtractor - Multi-Path Text Selection Extraction
// =============================================================================
// Implements the Phase 1 extraction algorithm with TextMarker as primary path.
// This enables text selection extraction in Electron/Chromium apps where
// AXSelectedTextRange fails.
// =============================================================================
/// Result from TextMarker extraction attempt
struct TextMarkerResult {
let selectedText: String?
let selectionRange: SelectionRange?
let hasMultipleRanges: Bool
}
/// Service for extracting text selection from focused elements
class SelectionExtractor {
// MARK: - Main Extraction Entry Point
/// Extract text selection from an element using multi-path algorithm
/// - Parameters:
/// - element: The AXUIElement to extract from (focused element)
/// - metricsBuilder: Builder to record extraction metrics
/// - Returns: AccessibilityTextSelection or nil if no text selection available
static func extract(from element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder) -> AccessibilityTextSelection? {
let builder = TextSelectionBuilder()
// Track both original focused element and the element we extract from
let focusedElement = element
var extractionElement = element
// Step 2: Check if element is editable (check original focused element)
let focusedIsEditable = AXHelpers.isElementEditable(focusedElement)
// Step 2.1: SECURE FIELD CHECK - suppress all content if secure
if AXHelpers.isSecureField(focusedElement) {
return TextSelectionBuilder.secureField(isEditable: focusedIsEditable)
}
// Variables to track extraction state
var selectionRange: AccessibilitySelectionRange? = nil
var selectedText: String? = nil
var fullContent: String? = nil
var hasMultipleRanges = false
var extractionMethod: ExtractionMethod = .none
// Step 4: EXTRACTION (Priority Order)
// Path A: TextMarker (PRIMARY - works in Electron)
metricsBuilder.textMarkerAttempted = true
if let textMarkerResult = extractViaTextMarker(element: focusedElement, metricsBuilder: metricsBuilder) {
metricsBuilder.textMarkerSucceeded = true
selectedText = textMarkerResult.selectedText
selectionRange = textMarkerResult.selectionRange
hasMultipleRanges = textMarkerResult.hasMultipleRanges
extractionMethod = .textMarkerRange
}
// WebArea Retry Path: When TextMarker fails on focused element
if extractionMethod == .none {
// TextMarker failed - search for a better WebArea
metricsBuilder.webAreaRetryAttempted = true
if let webArea = findWebArea(from: focusedElement) {
metricsBuilder.webAreaFound = true
// Try TextMarker on WebArea
if let webAreaTextMarkerResult = extractViaTextMarker(element: webArea, metricsBuilder: metricsBuilder) {
// TextMarker SUCCEEDED on WebArea - now switch extraction element
metricsBuilder.textMarkerSucceeded = true // Mark overall TextMarker as succeeded
metricsBuilder.webAreaRetrySucceeded = true
extractionElement = webArea
selectedText = webAreaTextMarkerResult.selectedText
selectionRange = webAreaTextMarkerResult.selectionRange
hasMultipleRanges = webAreaTextMarkerResult.hasMultipleRanges
extractionMethod = .textMarkerRange
}
// If TextMarker fails on WebArea, DON'T switch extractionElement
// Keep using focusedElement for fallbacks (it has the content, even if noisy)
}
}
// Descendant Text Element Path: When both TextMarker attempts fail
// Try to find the actual text element inside the container (e.g., in Notion)
if extractionMethod == .none {
if let deepTextElement = AXHelpers.findDeepestTextElement(from: focusedElement) {
// Found a deeper text element - try extraction on it
if let textMarkerResult = extractViaTextMarker(element: deepTextElement, metricsBuilder: metricsBuilder) {
metricsBuilder.textMarkerSucceeded = true // Mark TextMarker as succeeded
extractionElement = deepTextElement
selectedText = textMarkerResult.selectedText
selectionRange = textMarkerResult.selectionRange
hasMultipleRanges = textMarkerResult.hasMultipleRanges
extractionMethod = .textMarkerRange
} else if let rangeResult = extractViaSelectedTextRange(element: deepTextElement) {
// TextMarker failed but SelectedTextRange works - use this element
// This should give us cleaner content without UI labels
extractionElement = deepTextElement
selectedText = rangeResult.selectedText
selectionRange = rangeResult.selectionRange
extractionMethod = .selectedTextRange
}
}
}
// Path B: SelectedTextRange (Fallback 1) - use extractionElement
if extractionMethod == .none {
metricsBuilder.recordFallback(.selectedTextRange)
if let result = extractViaSelectedTextRange(element: extractionElement) {
selectedText = result.selectedText
selectionRange = result.selectionRange
extractionMethod = .selectedTextRange
}
}
// Path C: SelectedTextRanges (Fallback 2 - Multi-select) - use extractionElement
if extractionMethod == .none {
metricsBuilder.recordFallback(.selectedTextRanges)
if let result = extractViaSelectedTextRanges(element: extractionElement) {
selectedText = result.selectedText
selectionRange = result.selectionRange
hasMultipleRanges = result.hasMultipleRanges
extractionMethod = .selectedTextRanges
}
}
// Path D: Value Attribute (Fallback 3) - use extractionElement
if extractionMethod == .none {
metricsBuilder.recordFallback(.valueAttribute)
if let value = AXHelpers.getStringAttribute(extractionElement, kAXValueAttribute) {
fullContent = value
extractionMethod = .valueAttribute
// Note: No selectionRange available from this path
}
}
// Path E: StringForRange (Fallback 4) - use extractionElement
if extractionMethod == .none {
metricsBuilder.recordFallback(.stringForRange)
if let charCount = AXHelpers.getNumberOfCharacters(extractionElement) {
if charCount == 0 {
fullContent = ""
extractionMethod = .stringForRange
} else if charCount > 0 {
let range = CFRange(location: 0, length: charCount)
if let content = AXHelpers.getStringForRange(extractionElement, range: range) {
fullContent = content
extractionMethod = .stringForRange
}
}
}
}
// If no extraction succeeded at all, return nil
if extractionMethod == .none {
return nil
}
// Step 5: FULL CONTENT RETRIEVAL (if not already obtained) - use extractionElement
if fullContent == nil && selectionRange != nil {
// Try AXValue first
fullContent = AXHelpers.getStringAttribute(extractionElement, kAXValueAttribute)
// If fails, try AXStringForRange
if fullContent == nil, let charCount = AXHelpers.getNumberOfCharacters(extractionElement), charCount > 0 {
let range = CFRange(location: 0, length: charCount)
fullContent = AXHelpers.getStringForRange(extractionElement, range: range)
}
}
// Step 3: PLACEHOLDER CHECK (non-blocking)
// Use TextMarker-derived length if available, fall back to AXSelectedTextRange
var selectionLength: Int? = selectionRange?.length
if selectionLength == nil {
if let cfRange = AXHelpers.getSelectedTextRange(extractionElement) {
selectionLength = cfRange.length
}
}
// OR logic: check placeholder on BOTH elements
let focusedIsPlaceholder = AXHelpers.isPlaceholderShowing(focusedElement, selectionLength: nil)
let extractionIsPlaceholder = AXHelpers.isPlaceholderShowing(extractionElement, selectionLength: selectionLength)
builder.isPlaceholder = focusedIsPlaceholder || extractionIsPlaceholder
// OR logic for isEditable: editable if EITHER element is editable
let extractionIsEditable = AXHelpers.isElementEditable(extractionElement)
builder.isEditable = focusedIsEditable || extractionIsEditable
// Step 5.1: SELECTION RANGE VALIDATION
if var range = selectionRange, let content = fullContent {
let contentLength = content.utf16.count
let originalLocation = range.location
let originalLength = range.length
// Clamp to valid bounds
let clampedLocation = AXHelpers.clamp(originalLocation, min: 0, max: contentLength)
let maxLength = contentLength - clampedLocation
let clampedLength = AXHelpers.clamp(originalLength, min: 0, max: maxLength)
// Log if clamping occurred (no PII)
if originalLocation != clampedLocation || originalLength != clampedLength {
metricsBuilder.recordError("SelectionRange clamped: original exceeded content bounds")
}
selectionRange = SelectionRange(length: clampedLength, location: clampedLocation)
// Step 5.2: RE-DERIVE selectedText when no windowing needed
if contentLength <= MAX_FULL_CONTENT_LENGTH {
if clampedLength == 0 {
selectedText = ""
} else {
selectedText = AXHelpers.substringUTF16(content, start: clampedLocation, length: clampedLength)
}
}
}
// Step 6: CONTENT WINDOWING
var fullContentTruncated = false
if var content = fullContent, content.utf16.count > MAX_FULL_CONTENT_LENGTH {
let result = windowContent(
content: content,
selectionRange: selectionRange,
metricsBuilder: metricsBuilder
)
fullContent = result.windowedContent
selectionRange = result.adjustedRange
selectedText = result.selectedText
fullContentTruncated = true
}
// Step 7: CONTEXT COMPUTATION
var preSelectionText: String? = nil
var postSelectionText: String? = nil
if let range = selectionRange, let content = fullContent {
let location = range.location
let length = range.length
let contentLength = content.utf16.count
// Pre-selection text
if location == 0 {
preSelectionText = ""
} else {
let preStart = max(0, location - MAX_CONTEXT_LENGTH)
let preLength = location - preStart
preSelectionText = AXHelpers.substringUTF16(content, start: preStart, length: preLength)
}
// Post-selection text
let postStart = location + length
if postStart >= contentLength {
postSelectionText = ""
} else {
let postLength = min(MAX_CONTEXT_LENGTH, contentLength - postStart)
postSelectionText = AXHelpers.substringUTF16(content, start: postStart, length: postLength)
}
} else if let range = selectionRange, fullContent == nil {
// Per spec: when selectionRange exists but fullContent is nil,
// compute pre/post via AXStringForRange
let location = range.location
let length = range.length
// Pre-selection text via AXStringForRange
if location == 0 {
preSelectionText = ""
} else {
let preStart = max(0, location - MAX_CONTEXT_LENGTH)
let preLength = location - preStart
let preRange = CFRange(location: preStart, length: preLength)
preSelectionText = AXHelpers.getStringForRange(extractionElement, range: preRange)
}
// Post-selection text via AXStringForRange
let postStart = location + length
// We don't know total length, so just try to get MAX_CONTEXT_LENGTH
let postRange = CFRange(location: postStart, length: MAX_CONTEXT_LENGTH)
postSelectionText = AXHelpers.getStringForRange(extractionElement, range: postRange)
}
// Build final result
builder.selectedText = selectedText
builder.fullContent = fullContent
builder.preSelectionText = preSelectionText
builder.postSelectionText = postSelectionText
builder.selectionRange = selectionRange
builder.extractionMethod = extractionMethod
builder.hasMultipleRanges = hasMultipleRanges
builder.fullContentTruncated = fullContentTruncated
return builder.build()
}
// MARK: - Path A: TextMarker Extraction
/// Extract selection using TextMarker APIs (works in Electron/Chromium)
/// Tries single range (AXSelectedTextMarkerRange) first, then multi-range (AXSelectedTextMarkerRanges)
private static func extractViaTextMarker(element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder) -> TextMarkerResult? {
// Try single range first
if let result = extractViaSingleTextMarkerRange(element: element, metricsBuilder: metricsBuilder) {
return result
}
// If single range failed, try multi-range (use first range)
return extractViaMultiTextMarkerRanges(element: element, metricsBuilder: metricsBuilder)
}
/// Extract selection using single AXSelectedTextMarkerRange
private static func extractViaSingleTextMarkerRange(element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder) -> TextMarkerResult? {
// 1. Get TextMarker range
var markerRangeRef: CFTypeRef?
let rangeError = AXUIElementCopyAttributeValue(
element,
"AXSelectedTextMarkerRange" as CFString,
&markerRangeRef
)
guard rangeError == .success, let markerRange = markerRangeRef else {
metricsBuilder.recordError("TextMarker: AXSelectedTextMarkerRange failed, AXError=\(rangeError.rawValue)")
return nil
}
// Extract from the marker range
return extractFromMarkerRange(markerRange, element: element, metricsBuilder: metricsBuilder, hasMultipleRanges: false)
}
/// Extract selection using AXSelectedTextMarkerRanges (multi-cursor), using the first range
private static func extractViaMultiTextMarkerRanges(element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder) -> TextMarkerResult? {
// 1. Get TextMarker ranges array
var markerRangesRef: CFTypeRef?
let rangesError = AXUIElementCopyAttributeValue(
element,
"AXSelectedTextMarkerRanges" as CFString,
&markerRangesRef
)
guard rangesError == .success, let rangesArray = markerRangesRef as? [AnyObject], !rangesArray.isEmpty else {
metricsBuilder.recordError("TextMarker: AXSelectedTextMarkerRanges failed or empty, AXError=\(rangesError.rawValue)")
return nil
}
// Use the first range
let firstRange = rangesArray[0]
let hasMultipleRanges = rangesArray.count > 1
// Extract from the first marker range
return extractFromMarkerRange(firstRange as CFTypeRef, element: element, metricsBuilder: metricsBuilder, hasMultipleRanges: hasMultipleRanges)
}
/// Extract text and indices from a TextMarker range
private static func extractFromMarkerRange(_ markerRange: CFTypeRef, element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder, hasMultipleRanges: Bool) -> TextMarkerResult? {
// 2. Get start marker
var startMarkerRef: CFTypeRef?
let startError = AXUIElementCopyParameterizedAttributeValue(
element,
"AXStartTextMarkerForTextMarkerRange" as CFString,
markerRange,
&startMarkerRef
)
guard startError == .success, let startMarker = startMarkerRef else {
metricsBuilder.recordError("TextMarker: AXStartTextMarkerForTextMarkerRange failed, AXError=\(startError.rawValue)")
return nil
}
// 3. Get end marker
var endMarkerRef: CFTypeRef?
let endError = AXUIElementCopyParameterizedAttributeValue(
element,
"AXEndTextMarkerForTextMarkerRange" as CFString,
markerRange,
&endMarkerRef
)
guard endError == .success, let endMarker = endMarkerRef else {
metricsBuilder.recordError("TextMarker: AXEndTextMarkerForTextMarkerRange failed, AXError=\(endError.rawValue)")
return nil
}
// 4. Convert markers to indices
var startIndexRef: CFTypeRef?
let startIndexError = AXUIElementCopyParameterizedAttributeValue(
element,
"AXIndexForTextMarker" as CFString,
startMarker,
&startIndexRef
)
guard startIndexError == .success,
let startIndexNumber = startIndexRef as? NSNumber else {
metricsBuilder.recordError("TextMarker: AXIndexForTextMarker (start) failed, AXError=\(startIndexError.rawValue)")
return nil
}
var endIndexRef: CFTypeRef?
let endIndexError = AXUIElementCopyParameterizedAttributeValue(
element,
"AXIndexForTextMarker" as CFString,
endMarker,
&endIndexRef
)
guard endIndexError == .success,
let endIndexNumber = endIndexRef as? NSNumber else {
metricsBuilder.recordError("TextMarker: AXIndexForTextMarker (end) failed, AXError=\(endIndexError.rawValue)")
return nil
}
let startIndex = startIndexNumber.intValue
let endIndex = endIndexNumber.intValue
// Validate indices per spec: negative or end < start should fail
if startIndex < 0 || endIndex < 0 {
metricsBuilder.recordError("TextMarker: Invalid indices - negative values (start=\(startIndex), end=\(endIndex))")
return nil
}
if endIndex < startIndex {
metricsBuilder.recordError("TextMarker: Invalid indices - end < start (start=\(startIndex), end=\(endIndex))")
return nil
}
let length = endIndex - startIndex
// 5. Get text for marker range
var attributedStringRef: CFTypeRef?
let stringError = AXUIElementCopyParameterizedAttributeValue(
element,
"AXAttributedStringForTextMarkerRange" as CFString,
markerRange,
&attributedStringRef
)
var selectedText: String? = nil
if stringError == .success, let attrString = attributedStringRef as? NSAttributedString {
selectedText = attrString.string
} else if stringError == .success, let plainString = attributedStringRef as? String {
selectedText = plainString
} else if length == 0 {
// Cursor only - no selection, this is fine
selectedText = ""
} else {
metricsBuilder.recordError("TextMarker: AXAttributedStringForTextMarkerRange failed, AXError=\(stringError.rawValue)")
}
let selectionRange = SelectionRange(length: length, location: startIndex)
return TextMarkerResult(
selectedText: selectedText,
selectionRange: selectionRange,
hasMultipleRanges: hasMultipleRanges
)
}
// MARK: - Path B: SelectedTextRange Extraction
/// Extract selection using standard AXSelectedTextRange
/// Uses AXStringForRange for text extraction (more reliable for Chromium/Electron per spec)
private static func extractViaSelectedTextRange(element: AXUIElement) -> TextMarkerResult? {
guard let cfRange = AXHelpers.getSelectedTextRange(element) else {
return nil
}
let location = cfRange.location
let length = cfRange.length
// Get selected text using AXStringForRange (more reliable for Chromium/Electron)
var selectedText: String? = nil
if length == 0 {
selectedText = ""
} else {
// Try AXStringForRange first (per spec - more reliable)
selectedText = AXHelpers.getStringForRange(element, range: cfRange)
// Fall back to AXSelectedText if needed
if selectedText == nil {
selectedText = AXHelpers.getStringAttribute(element, kAXSelectedTextAttribute)
}
}
return TextMarkerResult(
selectedText: selectedText,
selectionRange: SelectionRange(length: length, location: location),
hasMultipleRanges: false
)
}
// MARK: - Path C: SelectedTextRanges Extraction
/// Extract selection using AXSelectedTextRanges (multi-select)
private static func extractViaSelectedTextRanges(element: AXUIElement) -> TextMarkerResult? {
var rangesRef: CFTypeRef?
let error = AXUIElementCopyAttributeValue(
element,
"AXSelectedTextRanges" as CFString,
&rangesRef
)
guard error == .success, let ranges = rangesRef as? [AXValue], !ranges.isEmpty else {
return nil
}
// Convert ranges and sort by location
var cfRanges: [CFRange] = []
for rangeValue in ranges {
var range = CFRange()
if AXValueGetValue(rangeValue, .cfRange, &range) {
cfRanges.append(range)
}
}
guard !cfRanges.isEmpty else { return nil }
// Sort by location (ascending)
cfRanges.sort { $0.location < $1.location }
// Use first (lowest location) as primary
let primaryRange = cfRanges[0]
let hasMultipleRanges = cfRanges.count > 1
// Get selected text for primary range
var selectedText: String? = nil
if primaryRange.length == 0 {
selectedText = ""
} else {
selectedText = AXHelpers.getStringForRange(element, range: primaryRange)
}
return TextMarkerResult(
selectedText: selectedText,
selectionRange: SelectionRange(length: primaryRange.length, location: primaryRange.location),
hasMultipleRanges: hasMultipleRanges
)
}
// MARK: - WebArea Search
/// Candidate structure for WebArea selection
private struct WebAreaCandidate {
let element: AXUIElement
let depth: Int // positive = descendant, negative = ancestor
let isAncestor: Bool
}
/// Find best AXWebArea from descendants (and optionally ancestors)
/// - Parameter focusedElement: The currently focused element
/// - Returns: Best AXWebArea element to use for extraction, or nil if none found
private static func findWebArea(from focusedElement: AXUIElement) -> AXUIElement? {
let focusedIsWebArea = AXHelpers.getRole(focusedElement) == "AXWebArea"
var candidates: [WebAreaCandidate] = []
// 1. Collect from ancestors (only if focused is NOT already a WebArea)
if !focusedIsWebArea {
let ancestorWebAreas = AXHelpers.findWebAreasInAncestors(
element: focusedElement,
excludeElement: focusedElement,
maxLevels: WEB_AREA_ANCESTOR_SEARCH_DEPTH
)
for (webArea, depth) in ancestorWebAreas {
candidates.append(WebAreaCandidate(element: webArea, depth: depth, isAncestor: true))
}
}
// 2. Collect from descendants (ALWAYS, even if focused is WebArea)
let children = AXHelpers.getChildren(focusedElement)
if children.count > 0 {
let descendantWebAreas = AXHelpers.findWebAreasInDescendants(
element: focusedElement,
excludeElement: focusedElement,
maxDepth: FIND_WEB_AREAS_MAX_DEPTH,
maxElements: FIND_WEB_AREAS_MAX_ELEMENTS
)
for (webArea, depth) in descendantWebAreas {
candidates.append(WebAreaCandidate(element: webArea, depth: depth, isAncestor: false))
}
}
// 3. Select best candidate based on preference order
return selectBestWebArea(from: candidates, focusedElement: focusedElement)
}
/// Select best WebArea from candidates
/// Preference order (DEEPEST descendant wins at ALL levels):
/// 1. Marker range present + contains focus
/// 2. Marker range present (focus unavailable)
/// 3. Contains focus without marker range
/// 4. DEEPEST descendant, then nearest ancestor
private static func selectBestWebArea(
from candidates: [WebAreaCandidate],
focusedElement: AXUIElement
) -> AXUIElement? {
guard !candidates.isEmpty else { return nil }
// Get app-level focused element for containment validation
let pid = AXHelpers.getPid(focusedElement)
let appFocusedElement = AXHelpers.getAppFocusedElement(forPid: pid)
// Score each candidate
struct ScoredCandidate {
let candidate: WebAreaCandidate
let hasMarkerRange: Bool
let containsFocus: Bool
}
let scored = candidates.map { c -> ScoredCandidate in
// Focus is "related" if EITHER:
// 1. Focus is inside the WebArea (focus is descendant/equal of WebArea)
// 2. WebArea is inside focus (WebArea is descendant/equal of focused container)
let containsFocus: Bool
if let focused = appFocusedElement {
containsFocus = AXHelpers.isDescendantOrEqual(focused, of: c.element) ||
AXHelpers.isDescendantOrEqual(c.element, of: focused)
} else {
containsFocus = false
}
return ScoredCandidate(
candidate: c,
hasMarkerRange: AXHelpers.hasTextMarkerRange(c.element),
containsFocus: containsFocus
)
}
// 1. BEST: Has marker range AND contains focus (DEEPEST descendant wins)
let withMarkerAndFocus = scored.filter { $0.hasMarkerRange && $0.containsFocus }
if !withMarkerAndFocus.isEmpty {
// Prefer deepest descendant
if let descendant = withMarkerAndFocus
.filter({ !$0.candidate.isAncestor })
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
return descendant.candidate.element
}
// Otherwise nearest ancestor
if let ancestor = withMarkerAndFocus
.filter({ $0.candidate.isAncestor })
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
return ancestor.candidate.element
}
}
// 2. Has marker range (without focus - focus detection may be unavailable)
let withMarker = scored.filter { $0.hasMarkerRange && !$0.containsFocus }
if !withMarker.isEmpty {
// Deepest descendant first
if let descendant = withMarker
.filter({ !$0.candidate.isAncestor })
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
return descendant.candidate.element
}
// Then nearest ancestor
if let ancestor = withMarker
.filter({ $0.candidate.isAncestor })
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
return ancestor.candidate.element
}
}
// 3. Contains focus but no marker range
let withFocus = scored.filter { $0.containsFocus && !$0.hasMarkerRange }
if !withFocus.isEmpty {
// Prefer deepest descendant
if let descendant = withFocus
.filter({ !$0.candidate.isAncestor })
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
return descendant.candidate.element
}
if let ancestor = withFocus
.filter({ $0.candidate.isAncestor })
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
return ancestor.candidate.element
}
}
// 4. Fallback: deepest descendant first, then nearest ancestor
let descendants = candidates.filter { !$0.isAncestor }
if let deepest = descendants.max(by: { $0.depth < $1.depth }) {
return deepest.element
}
let ancestors = candidates.filter { $0.isAncestor }
if let nearest = ancestors.max(by: { $0.depth < $1.depth }) {
return nearest.element
}
return nil
}
// MARK: - Content Windowing
/// Result of content windowing operation
struct WindowResult {
let windowedContent: String
let adjustedRange: SelectionRange?
let selectedText: String?
}
/// Apply content windowing based on the spec algorithm
private static func windowContent(
content: String,
selectionRange: SelectionRange?,
metricsBuilder: ExtractionMetricsBuilder
) -> WindowResult {
let utf16 = content.utf16
let totalLength = utf16.count
// CASE A: No selection - head+tail truncation
guard let range = selectionRange else {
let delimiter = "\n...\n"
let delimiterLength = delimiter.utf16.count
let availableSpace = MAX_FULL_CONTENT_LENGTH - delimiterLength
var headSize = availableSpace / 2
var tailSize = availableSpace - headSize
// Adjust for surrogate pairs
headSize = AXHelpers.adjustForSurrogatePairs(content, offset: headSize, direction: .backward)
let tailStart = AXHelpers.adjustForSurrogatePairs(content, offset: totalLength - tailSize, direction: .forward)
tailSize = totalLength - tailStart
let headContent = AXHelpers.substringUTF16(content, start: 0, length: headSize) ?? ""
let tailContent = AXHelpers.substringUTF16(content, start: tailStart, length: tailSize) ?? ""
return WindowResult(
windowedContent: headContent + delimiter + tailContent,
adjustedRange: nil,
selectedText: nil
)
}
let location = range.location
let length = range.length
// CASE B: Selection exceeds max - clamp to selection start
if length > MAX_FULL_CONTENT_LENGTH {
var windowStart = location
var windowEnd = min(location + MAX_FULL_CONTENT_LENGTH, totalLength)
// Adjust for surrogate pairs FIRST
windowStart = AXHelpers.adjustForSurrogatePairs(content, offset: windowStart, direction: .forward)
windowEnd = AXHelpers.adjustForSurrogatePairs(content, offset: windowEnd, direction: .backward)
let windowedContent = AXHelpers.substringUTF16(content, start: windowStart, length: windowEnd - windowStart) ?? ""
let windowLength = windowedContent.utf16.count
// Compute adjusted range (clamp location FIRST)
let rawLocation = location - windowStart
let adjustedLocation = AXHelpers.clamp(rawLocation, min: 0, max: windowLength)
let maxPossibleLength = windowLength - adjustedLocation
let adjustedLength = AXHelpers.clamp(length, min: 0, max: maxPossibleLength)
let selectedText = AXHelpers.substringUTF16(windowedContent, start: adjustedLocation, length: adjustedLength)
return WindowResult(
windowedContent: windowedContent,
adjustedRange: SelectionRange(length: adjustedLength, location: adjustedLocation),
selectedText: selectedText
)
}
// CASE C: Selection fits - window around selection
var windowStart = max(0, location - WINDOW_PADDING)
var windowEnd = min(totalLength, location + length + WINDOW_PADDING)
// Shrink symmetrically if needed
if windowEnd - windowStart > MAX_FULL_CONTENT_LENGTH {
let selectionCenter = location + length / 2
windowStart = max(0, selectionCenter - MAX_FULL_CONTENT_LENGTH / 2)
windowEnd = min(totalLength, windowStart + MAX_FULL_CONTENT_LENGTH)
windowStart = max(0, windowEnd - MAX_FULL_CONTENT_LENGTH)
}
// Adjust for surrogate pairs FIRST
windowStart = AXHelpers.adjustForSurrogatePairs(content, offset: windowStart, direction: .forward)
windowEnd = AXHelpers.adjustForSurrogatePairs(content, offset: windowEnd, direction: .backward)
let windowedContent = AXHelpers.substringUTF16(content, start: windowStart, length: windowEnd - windowStart) ?? ""
let windowLength = windowedContent.utf16.count
// Compute adjusted range (clamp location FIRST)
let rawLocation = location - windowStart
let adjustedLocation = AXHelpers.clamp(rawLocation, min: 0, max: windowLength)
let maxPossibleLength = windowLength - adjustedLocation
let adjustedLength = AXHelpers.clamp(length, min: 0, max: maxPossibleLength)
let selectedText = AXHelpers.substringUTF16(windowedContent, start: adjustedLocation, length: adjustedLength)
return WindowResult(
windowedContent: windowedContent,
adjustedRange: SelectionRange(length: adjustedLength, location: adjustedLocation),
selectedText: selectedText
)
}
}

View file

@ -0,0 +1,656 @@
import Foundation
import ApplicationServices
import AppKit
// =============================================================================
// AXHelpers - Common Accessibility API Utilities
// =============================================================================
// Shared utilities for working with macOS Accessibility APIs.
// Extracted from AccessibilityContextService for reuse in v2 implementation.
// =============================================================================
// Note: Constants are defined in utils/Constants.swift
// MARK: - Surrogate Pair Direction
/// Direction for surrogate pair boundary adjustment
enum SurrogatePairDirection {
case forward // For windowStart: move into content to include complete character
case backward // For windowEnd: move out of content to exclude incomplete character
}
// MARK: - AXHelpers
/// Utilities for working with macOS Accessibility APIs
enum AXHelpers {
// MARK: - Attribute Access
/// Get a string attribute value from an AXUIElement
static func getStringAttribute(_ element: AXUIElement, _ attribute: String) -> String? {
var value: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
guard error == .success else { return nil }
if let stringValue = value as? String {
return stringValue
} else if let numberValue = value as? NSNumber {
return numberValue.stringValue
} else if let boolValue = value as? Bool {
return boolValue ? "true" : "false"
}
return nil
}
/// Get a boolean attribute value from an AXUIElement
static func getBoolAttribute(_ element: AXUIElement, _ attribute: String) -> Bool? {
var value: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
guard error == .success else { return nil }
if let boolValue = value as? Bool {
return boolValue
} else if let numberValue = value as? NSNumber {
return numberValue.boolValue
}
return nil
}
/// Get an integer attribute value from an AXUIElement
static func getIntAttribute(_ element: AXUIElement, _ attribute: String) -> Int? {
var value: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
guard error == .success else { return nil }
if let numberValue = value as? NSNumber {
return numberValue.intValue
}
return nil
}
/// Get all attribute names for an AXUIElement
static func getAttributeNames(_ element: AXUIElement) -> [String] {
var attributeNames: CFArray?
let error = AXUIElementCopyAttributeNames(element, &attributeNames)
if error == .success, let names = attributeNames as? [String] {
return names
}
return []
}
/// Check if an element has a specific attribute
static func hasAttribute(_ element: AXUIElement, _ attribute: String) -> Bool {
return getAttributeNames(element).contains(attribute)
}
/// Get a raw CFTypeRef attribute value
static func getRawAttribute(_ element: AXUIElement, _ attribute: String) -> CFTypeRef? {
var value: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
return error == .success ? value : nil
}
/// Get a parameterized attribute value
static func getParameterizedAttribute(_ element: AXUIElement, _ attribute: String, parameter: CFTypeRef) -> CFTypeRef? {
var value: CFTypeRef?
let error = AXUIElementCopyParameterizedAttributeValue(element, attribute as CFString, parameter, &value)
return error == .success ? value : nil
}
// MARK: - Element Type Detection
/// Roles that are typically editable text fields
static let editableRoles: Set<String> = [
"AXTextField",
"AXTextArea",
"AXComboBox"
]
/// Subroles that indicate editable text fields
static let editableSubroles: Set<String> = [
"AXSecureTextField",
"AXSearchField"
]
/// Check if an element is editable
static func isElementEditable(_ element: AXUIElement) -> Bool {
let role = getStringAttribute(element, kAXRoleAttribute)
let subrole = getStringAttribute(element, kAXSubroleAttribute)
// Check for editable roles
if let role = role, editableRoles.contains(role) {
return true
}
// Check for editable subroles
if let subrole = subrole, editableSubroles.contains(subrole) {
return true
}
// Check if element has AXValue attribute (often indicates editability)
return hasAttribute(element, kAXValueAttribute)
}
/// Check if an element is a secure/password field
/// Per spec: check subrole == "AXSecureTextField" OR role contains "Secure"
static func isSecureField(_ element: AXUIElement) -> Bool {
// Check subrole first (most common case)
let subrole = getStringAttribute(element, kAXSubroleAttribute)
if subrole == "AXSecureTextField" {
return true
}
// Also check if role contains "Secure" (per spec)
if let role = getStringAttribute(element, kAXRoleAttribute) {
if role.contains("Secure") {
return true
}
}
return false
}
/// Check if an element is showing placeholder text
static func isPlaceholderShowing(_ element: AXUIElement, selectionLength: Int?) -> Bool {
let placeholderValue = getStringAttribute(element, "AXPlaceholderValue")
let currentValue = getStringAttribute(element, kAXValueAttribute)
guard let placeholder = placeholderValue, !placeholder.isEmpty else {
return false
}
// Placeholder is showing if:
// 1. Placeholder exists AND is non-empty
// 2. AND one of: currentValue is nil/empty OR matches placeholder
// 3. AND (selectionLength == 0 OR selectionLength is unknown)
let valueIsEmpty = currentValue == nil || currentValue!.isEmpty
let valueMatchesPlaceholder = currentValue == placeholder
let selectionIsZeroOrUnknown = selectionLength == nil || selectionLength == 0
return (valueIsEmpty || valueMatchesPlaceholder) && selectionIsZeroOrUnknown
}
/// Check if element is text-capable (can contain text selection)
static func isTextCapable(_ element: AXUIElement) -> Bool {
// Check for TextMarker range attribute
if hasAttribute(element, "AXSelectedTextMarkerRange") {
return true
}
// Check for standard text range attribute
if hasAttribute(element, kAXSelectedTextRangeAttribute) {
return true
}
// Check for value attribute with editable role
let role = getStringAttribute(element, kAXRoleAttribute)
if hasAttribute(element, kAXValueAttribute) {
if let role = role, editableRoles.contains(role) {
return true
}
}
// Check for web area roles
if role == "AXWebArea" {
return true
}
return false
}
// MARK: - Element Tree Navigation
/// Get children of an AXUIElement
static func getChildren(_ element: AXUIElement) -> [AXUIElement] {
var children: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, kAXChildrenAttribute as CFString, &children)
guard error == .success, let childrenArray = children as? [AXUIElement] else {
return []
}
return childrenArray
}
/// Get parent of an AXUIElement
static func getParent(_ element: AXUIElement) -> AXUIElement? {
var parent: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, kAXParentAttribute as CFString, &parent)
guard error == .success, let parentRef = parent else { return nil }
// Verify it's actually an AXUIElement
if CFGetTypeID(parentRef) == AXUIElementGetTypeID() {
return (parentRef as! AXUIElement)
}
return nil
}
/// Get the parent chain of an element (up to maxDepth)
static func getParentChain(_ element: AXUIElement, maxDepth: Int = PARENT_CHAIN_MAX_DEPTH) -> [AXUIElement] {
var chain: [AXUIElement] = []
var currentElement = element
for _ in 0..<maxDepth {
guard let parent = getParent(currentElement) else { break }
chain.append(parent)
currentElement = parent
}
return chain
}
/// Touch descendant elements to ensure they're accessible (triggers lazy loading)
static func touchDescendants(_ element: AXUIElement, maxDepth: Int = TOUCH_DESCENDANTS_MAX_DEPTH) {
guard maxDepth > 0 else { return }
let children = getChildren(element)
let limitedChildren = Array(children.prefix(TOUCH_DESCENDANTS_PREFIX_LIMIT))
for child in limitedChildren {
touchDescendants(child, maxDepth: maxDepth - 1)
}
}
// MARK: - Selection Range Helpers
/// Get CFRange from AXSelectedTextRange attribute
static func getSelectedTextRange(_ element: AXUIElement) -> CFRange? {
var rangeValue: CFTypeRef?
let error = AXUIElementCopyAttributeValue(element, kAXSelectedTextRangeAttribute as CFString, &rangeValue)
guard error == .success, let axValue = rangeValue else { return nil }
var range = CFRange()
if AXValueGetValue(axValue as! AXValue, .cfRange, &range) {
return range
}
return nil
}
/// Get text for a specific range using AXStringForRange
static func getStringForRange(_ element: AXUIElement, range: CFRange) -> String? {
var mutableRange = range
var rangeValue: AXValue?
rangeValue = AXValueCreate(.cfRange, &mutableRange) as AXValue?
guard let rangeParam = rangeValue else { return nil }
var result: CFTypeRef?
let error = AXUIElementCopyParameterizedAttributeValue(
element,
kAXStringForRangeParameterizedAttribute as CFString,
rangeParam,
&result
)
return error == .success ? result as? String : nil
}
/// Get the total number of characters in the element
static func getNumberOfCharacters(_ element: AXUIElement) -> Int? {
return getIntAttribute(element, kAXNumberOfCharactersAttribute)
}
// MARK: - UTF-16 String Helpers
/// Adjust offset to avoid splitting surrogate pairs (single source of truth)
///
/// - direction .forward: Used for windowStart - move INTO content to include complete char
/// - At LOW surrogate (trail): move +1 to skip the orphan trail
/// - Previous is HIGH surrogate (lead): move +1 to include complete pair
///
/// - direction .backward: Used for windowEnd - move OUT of content to exclude incomplete char
/// - At LOW surrogate (trail): move -1 to exclude orphan trail
/// - Previous is HIGH surrogate (lead): move -1 to exclude lead (pair would be split)
static func adjustForSurrogatePairs(_ content: String, offset: Int, direction: SurrogatePairDirection) -> Int {
let utf16 = content.utf16
guard offset > 0 && offset < utf16.count else { return offset }
let idx = utf16.index(utf16.startIndex, offsetBy: offset)
let codeUnit = utf16[idx]
// At a LOW surrogate (trail) - the HIGH surrogate is before us
if UTF16.isTrailSurrogate(codeUnit) {
return direction == .forward ? offset + 1 : offset - 1
}
// Check if previous code unit is a HIGH surrogate (lead) - we'd split the pair
if offset > 0 {
let prevIdx = utf16.index(before: idx)
let prevCodeUnit = utf16[prevIdx]
if UTF16.isLeadSurrogate(prevCodeUnit) {
return direction == .forward ? offset + 1 : offset - 1
}
}
return offset
}
/// Clamp a value to a range
static func clamp<T: Comparable>(_ value: T, min minValue: T, max maxValue: T) -> T {
return max(minValue, min(maxValue, value))
}
/// Extract a substring using UTF-16 indices
static func substringUTF16(_ content: String, start: Int, length: Int) -> String? {
let utf16 = content.utf16
let totalLength = utf16.count
guard start >= 0 && start <= totalLength && length >= 0 else { return nil }
let endOffset = min(start + length, totalLength)
let startIdx = utf16.index(utf16.startIndex, offsetBy: start)
let endIdx = utf16.index(utf16.startIndex, offsetBy: endOffset)
return String(utf16[startIdx..<endIdx])
}
// MARK: - Process Helpers
/// Get the frontmost application's process ID
static func getFrontProcessID() -> pid_t {
guard let frontmostApp = NSWorkspace.shared.frontmostApplication else {
return 0
}
return frontmostApp.processIdentifier
}
/// Get the running application for a process ID
static func getRunningApplication(pid: pid_t) -> NSRunningApplication? {
return NSRunningApplication(processIdentifier: pid)
}
/// Get the process name for a PID
static func getProcessName(pid: pid_t) -> String? {
guard let application = getRunningApplication(pid: pid),
let url = application.executableURL else {
return nil
}
return url.lastPathComponent
}
/// Get the bundle identifier for a PID
static func getBundleIdentifier(pid: pid_t) -> String? {
return getRunningApplication(pid: pid)?.bundleIdentifier
}
/// Get the application version for a PID
static func getApplicationVersion(pid: pid_t) -> String? {
guard let application = getRunningApplication(pid: pid),
let bundleURL = application.bundleURL,
let bundle = Bundle(url: bundleURL) else {
return nil
}
return bundle.infoDictionary?["CFBundleShortVersionString"] as? String
}
/// Create an AXUIElement for an application by PID
static func createApplicationElement(pid: pid_t) -> AXUIElement {
return AXUIElementCreateApplication(pid)
}
/// Enable manual accessibility for specific apps (Chrome, Firefox, etc.)
static func enableManualAccessibilityIfNeeded(application: AXUIElement, bundleId: String?) {
guard let bundleId = bundleId, appsRequiringManualAX.contains(bundleId) else { return }
AXUIElementSetAttributeValue(application, "AXManualAccessibility" as CFString, kCFBooleanTrue)
AXUIElementSetAttributeValue(application, "AXEnhancedUserInterface" as CFString, kCFBooleanTrue)
}
// MARK: - Permission Helpers
/// Check if accessibility permissions are granted
static func checkAccessibilityPermissions(prompt: Bool = false) -> Bool {
let options: [String: Any] = [kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: prompt]
return AXIsProcessTrustedWithOptions(options as CFDictionary)
}
// MARK: - WebArea Search Helpers
/// Get the role of an element
static func getRole(_ element: AXUIElement) -> String? {
return getStringAttribute(element, kAXRoleAttribute)
}
/// Get the process ID from an AXUIElement
static func getPid(_ element: AXUIElement) -> pid_t? {
var pid: pid_t = 0
let error = AXUIElementGetPid(element, &pid)
return error == .success ? pid : nil
}
/// Get the focused element for a specific application by PID
static func getAppFocusedElement(forPid pid: pid_t?) -> AXUIElement? {
guard let pid = pid, pid > 0 else { return nil }
let application = AXUIElementCreateApplication(pid)
var focusedElement: CFTypeRef?
let error = AXUIElementCopyAttributeValue(
application,
kAXFocusedUIElementAttribute as CFString,
&focusedElement
)
guard error == .success, let element = focusedElement else {
return nil
}
return (element as! AXUIElement)
}
/// Check if element A is a descendant of or equal to element B
/// Uses AXParent chain traversal
static func isDescendantOrEqual(_ elementA: AXUIElement, of elementB: AXUIElement) -> Bool {
// Check if they're the same element
if CFEqual(elementA, elementB) {
return true
}
// Walk up parent chain from elementA looking for elementB
var current: AXUIElement? = elementA
var depth = 0
let maxDepth = DESCENDANT_CHECK_MAX_DEPTH // Prevent infinite loops
while let element = current, depth < maxDepth {
if let parent = getParent(element) {
if CFEqual(parent, elementB) {
return true
}
current = parent
depth += 1
} else {
break
}
}
return false
}
/// Check if an element has a text marker range attribute (single or multi-range)
/// Returns true if:
/// - AXSelectedTextMarkerRange is present (not nil, length=0 is valid cursor), OR
/// - AXSelectedTextMarkerRanges array has at least one range
static func hasTextMarkerRange(_ element: AXUIElement) -> Bool {
// Check single range (AXSelectedTextMarkerRange)
var singleRangeRef: CFTypeRef?
let singleError = AXUIElementCopyAttributeValue(
element,
"AXSelectedTextMarkerRange" as CFString,
&singleRangeRef
)
if singleError == .success && singleRangeRef != nil {
return true
}
// Check multi-range (AXSelectedTextMarkerRanges)
var multiRangeRef: CFTypeRef?
let multiError = AXUIElementCopyAttributeValue(
element,
"AXSelectedTextMarkerRanges" as CFString,
&multiRangeRef
)
if multiError == .success, let ranges = multiRangeRef as? [Any], !ranges.isEmpty {
return true
}
return false
}
/// Find the descendant text element that actually has focus/cursor
/// Priority: AXFocused text element > element with non-zero selection > element with most content
/// - Parameters:
/// - element: Starting element (container)
/// - maxDepth: Maximum depth to search
/// - maxElements: Maximum elements to visit
/// - Returns: The focused text element, or nil if none found
static func findDeepestTextElement(
from element: AXUIElement,
maxDepth: Int = FIND_TEXT_ELEMENT_MAX_DEPTH,
maxElements: Int = FIND_TEXT_ELEMENT_MAX_ELEMENTS
) -> AXUIElement? {
var focusedCandidate: AXUIElement? = nil // Element with AXFocused=true AND has value
var selectionCandidate: AXUIElement? = nil // Element with non-zero selection range
var fallbackCandidate: AXUIElement? = nil // Element with most content (fallback)
var fallbackContentLength: Int = 0
var elementsVisited = 0
// BFS queue: (element, depth)
var queue: [(AXUIElement, Int)] = [(element, 0)]
while !queue.isEmpty && elementsVisited < maxElements {
let (currentElement, currentDepth) = queue.removeFirst()
elementsVisited += 1
guard currentDepth < maxDepth else { continue }
let children = getChildren(currentElement)
for child in children {
// Check if this is a text element (has AXValue)
let value = getStringAttribute(child, kAXValueAttribute)
let hasValue = value != nil && !value!.isEmpty
// Check if element has AXSelectedTextRange
let range = getSelectedTextRange(child)
let hasRange = range != nil
// Priority 1: Check if this element has AXFocused=true AND has content
var focusedRef: CFTypeRef?
let focusedError = AXUIElementCopyAttributeValue(child, kAXFocusedAttribute as CFString, &focusedRef)
if focusedError == .success, let focused = focusedRef as? Bool, focused {
if hasValue && hasRange {
focusedCandidate = child
}
}
// Priority 2: Check if selection range indicates cursor is here (non-zero location or has selection)
// IMPORTANT: Require non-empty content to be a valid candidate
if let r = range, hasValue {
if selectionCandidate == nil && (r.location > 0 || r.length > 0) {
// Verify the content can accommodate the selection
if let v = value, v.utf16.count >= r.location {
selectionCandidate = child
}
}
}
// Priority 3: Fallback to element with most content that has a selection range
// IMPORTANT: Require non-empty content to be a valid candidate
if hasRange && hasValue, let v = value {
let contentLength = v.utf16.count
if contentLength > fallbackContentLength {
fallbackContentLength = contentLength
fallbackCandidate = child
}
}
queue.append((child, currentDepth + 1))
}
}
// Return in priority order: focused > selection-based > most content
return focusedCandidate ?? selectionCandidate ?? fallbackCandidate
}
/// BFS search for AXWebArea elements in descendants
/// - Parameters:
/// - element: Starting element for search
/// - excludeElement: Element to exclude from results (typically the focused element)
/// - maxDepth: Maximum depth to search (default 10)
/// - maxElements: Maximum elements to visit (default 200)
/// - Returns: Array of (WebArea, depth) tuples
static func findWebAreasInDescendants(
element: AXUIElement,
excludeElement: AXUIElement,
maxDepth: Int = FIND_WEB_AREAS_MAX_DEPTH,
maxElements: Int = FIND_WEB_AREAS_MAX_ELEMENTS
) -> [(AXUIElement, Int)] {
var results: [(AXUIElement, Int)] = []
var elementsVisited = 0
// BFS queue: (element, depth)
var queue: [(AXUIElement, Int)] = [(element, 0)]
while !queue.isEmpty && elementsVisited < maxElements {
let (currentElement, currentDepth) = queue.removeFirst()
elementsVisited += 1
// Skip if we've exceeded max depth for children
guard currentDepth < maxDepth else { continue }
let children = getChildren(currentElement)
for child in children {
// Check if this child is an AXWebArea
if let role = getRole(child), role == "AXWebArea" {
// Exclude the original focused element
if !CFEqual(child, excludeElement) {
results.append((child, currentDepth + 1))
}
}
// Add child to queue for further exploration
queue.append((child, currentDepth + 1))
}
}
return results
}
/// Walk up parent chain looking for AXWebArea elements
/// - Parameters:
/// - element: Starting element for search
/// - excludeElement: Element to exclude from results
/// - maxLevels: Maximum levels to traverse up (default 3)
/// - Returns: Array of (WebArea, depth) tuples where depth is negative (-1 = parent, -2 = grandparent)
static func findWebAreasInAncestors(
element: AXUIElement,
excludeElement: AXUIElement,
maxLevels: Int = 3
) -> [(AXUIElement, Int)] {
var results: [(AXUIElement, Int)] = []
var current: AXUIElement? = element
var level = 0
while let currentElement = current, level < maxLevels {
guard let parent = getParent(currentElement) else { break }
level += 1
// Check if parent is AXWebArea
if let role = getRole(parent), role == "AXWebArea" {
// Exclude the original focused element
if !CFEqual(parent, excludeElement) {
results.append((parent, -level)) // Negative depth for ancestors
}
}
current = parent
}
return results
}
}

View file

@ -0,0 +1,100 @@
import Foundation
import CoreGraphics
// =============================================================================
// Constants - Centralized Configuration for Accessibility Extraction
// =============================================================================
// All magic numbers, timeouts, depths, and configuration values in one place.
// This makes it easier to tune, document, and understand system behavior.
// =============================================================================
// MARK: - Content Limits
/// Maximum UTF-16 code units for pre/post selection context
let MAX_CONTEXT_LENGTH = 500
/// Maximum UTF-16 code units for full content before truncation
let MAX_FULL_CONTENT_LENGTH = 50_000
/// Padding around selection when windowing content (UTF-16 code units)
let WINDOW_PADDING = 25_000
// MARK: - Tree Traversal Limits
/// Default maximum depth for generic tree walks (BFS)
let TREE_WALK_MAX_DEPTH = 8
/// Maximum elements to visit during tree searches
let TREE_WALK_MAX_ELEMENTS = 100
/// Depth for touching descendants to trigger lazy loading
let TOUCH_DESCENDANTS_MAX_DEPTH = 3
/// Maximum children to touch per level during lazy loading
let TOUCH_DESCENDANTS_PREFIX_LIMIT = 8
/// Default depth for parent chain traversal
let PARENT_CHAIN_MAX_DEPTH = 10
/// Depth limit for descendant-or-equal check (infinite loop guard)
let DESCENDANT_CHECK_MAX_DEPTH = 20
/// Default depth for finding deepest text element
let FIND_TEXT_ELEMENT_MAX_DEPTH = 10
/// Maximum elements to visit when finding text element
let FIND_TEXT_ELEMENT_MAX_ELEMENTS = 200
/// Default depth for finding WebAreas in descendants
let FIND_WEB_AREAS_MAX_DEPTH = 10
/// Maximum elements to visit when finding WebAreas
let FIND_WEB_AREAS_MAX_ELEMENTS = 200
// MARK: - Browser-Specific Depths
/// Depth for Chromium browser URL search (deeper due to complex DOM)
let CHROMIUM_URL_SEARCH_DEPTH = 30
/// Depth for non-Chromium browser URL search
let NON_CHROMIUM_URL_SEARCH_DEPTH = 3
/// Depth for WebArea ancestor search (increased for deeply nested Electron apps like Notion)
let WEB_AREA_ANCESTOR_SEARCH_DEPTH = 15
// MARK: - Timeouts
/// Best-effort timeout for extraction (milliseconds)
let EXTRACTION_TIMEOUT_MS: Double = 600.0
/// Delay before restoring pasteboard after paste (seconds)
let PASTE_RESTORE_DELAY_SECONDS: Double = 0.2
// MARK: - Virtual Key Codes (macOS)
/// Virtual key code for 'V' key
let VK_V: CGKeyCode = 9
/// Virtual key code for Command key
let VK_COMMAND: CGKeyCode = 55
/// Virtual key code for Function (Fn) key
let VK_FUNCTION: CGKeyCode = 0x3F
// MARK: - Accessibility Tree Building
/// Maximum recursion depth for building accessibility tree
let ACCESSIBILITY_TREE_MAX_DEPTH = 10
// MARK: - App Lists
/// Apps that need manual accessibility enabling (browsers)
let appsRequiringManualAX: Set<String> = [
"com.google.Chrome",
"org.mozilla.firefox",
"com.microsoft.edgemac",
"com.apple.Safari",
"com.brave.Browser",
"com.operasoftware.Opera",
"com.vivaldi.Vivaldi"
]

View file

@ -111,6 +111,12 @@ namespace WindowsHelper.Models
[JsonPropertyName("focusedElement")]
public FocusedElement FocusedElement { get; set; }
[JsonPropertyName("metrics")]
public Metrics Metrics { get; set; }
[JsonPropertyName("schemaVersion")]
public SchemaVersion SchemaVersion { get; set; }
[JsonPropertyName("textSelection")]
public TextSelection TextSelection { get; set; }
@ -129,6 +135,9 @@ namespace WindowsHelper.Models
[JsonPropertyName("name")]
public string Name { get; set; }
[JsonPropertyName("pid")]
public long Pid { get; set; }
[JsonPropertyName("version")]
public string Version { get; set; }
}
@ -141,9 +150,21 @@ namespace WindowsHelper.Models
[JsonPropertyName("isEditable")]
public bool IsEditable { get; set; }
[JsonPropertyName("isFocused")]
public bool IsFocused { get; set; }
[JsonPropertyName("isPlaceholder")]
public bool IsPlaceholder { get; set; }
[JsonPropertyName("isSecure")]
public bool IsSecure { get; set; }
[JsonPropertyName("role")]
public string Role { get; set; }
[JsonPropertyName("subrole")]
public string Subrole { get; set; }
[JsonPropertyName("title")]
public string Title { get; set; }
@ -151,14 +172,60 @@ namespace WindowsHelper.Models
public string Value { get; set; }
}
public partial class Metrics
{
[JsonPropertyName("errors")]
public List<string> Errors { get; set; }
[JsonPropertyName("fallbacksUsed")]
public List<The0> FallbacksUsed { get; set; }
[JsonPropertyName("textMarkerAttempted")]
public bool TextMarkerAttempted { get; set; }
[JsonPropertyName("textMarkerSucceeded")]
public bool TextMarkerSucceeded { get; set; }
[JsonPropertyName("timedOut")]
public bool TimedOut { get; set; }
[JsonPropertyName("totalTimeMs")]
[JsonConverter(typeof(MinMaxValueCheckConverter))]
public double TotalTimeMs { get; set; }
[JsonPropertyName("webAreaFound")]
public bool WebAreaFound { get; set; }
[JsonPropertyName("webAreaRetryAttempted")]
public bool WebAreaRetryAttempted { get; set; }
[JsonPropertyName("webAreaRetrySucceeded")]
public bool WebAreaRetrySucceeded { get; set; }
}
public partial class TextSelection
{
[JsonPropertyName("extractionMethod")]
public The0 ExtractionMethod { get; set; }
[JsonPropertyName("fullContent")]
public string FullContent { get; set; }
[JsonPropertyName("fullContentTruncated")]
public bool FullContentTruncated { get; set; }
[JsonPropertyName("hasMultipleRanges")]
public bool HasMultipleRanges { get; set; }
[JsonPropertyName("isEditable")]
public bool IsEditable { get; set; }
[JsonPropertyName("isPlaceholder")]
public bool IsPlaceholder { get; set; }
[JsonPropertyName("isSecure")]
public bool IsSecure { get; set; }
[JsonPropertyName("postSelectionText")]
public string PostSelectionText { get; set; }
@ -457,7 +524,11 @@ namespace WindowsHelper.Models
public bool? ShiftKey { get; set; }
}
public enum Method { GetAccessibilityContext, GetAccessibilityTreeDetails, MuteSystemAudio, PasteText, RestoreSystemAudio, SetShortcuts };
public enum Method { GetAccessibilityContext, GetAccessibilityStatus, GetAccessibilityTreeDetails, MuteSystemAudio, PasteText, RequestAccessibilityPermission, RestoreSystemAudio, SetShortcuts };
public enum The0 { ClipboardCopy, None, SelectedTextRange, SelectedTextRanges, StringForRange, TextMarkerRange, ValueAttribute };
public enum SchemaVersion { The20 };
public enum KeyDownEventType { KeyDown };
@ -585,6 +656,8 @@ namespace WindowsHelper.Models
Converters =
{
MethodConverter.Singleton,
The0Converter.Singleton,
SchemaVersionConverter.Singleton,
KeyDownEventTypeConverter.Singleton,
KeyUpEventTypeConverter.Singleton,
FlagsChangedEventTypeConverter.Singleton,
@ -607,12 +680,16 @@ namespace WindowsHelper.Models
{
case "getAccessibilityContext":
return Method.GetAccessibilityContext;
case "getAccessibilityStatus":
return Method.GetAccessibilityStatus;
case "getAccessibilityTreeDetails":
return Method.GetAccessibilityTreeDetails;
case "muteSystemAudio":
return Method.MuteSystemAudio;
case "pasteText":
return Method.PasteText;
case "requestAccessibilityPermission":
return Method.RequestAccessibilityPermission;
case "restoreSystemAudio":
return Method.RestoreSystemAudio;
case "setShortcuts":
@ -628,6 +705,9 @@ namespace WindowsHelper.Models
case Method.GetAccessibilityContext:
JsonSerializer.Serialize(writer, "getAccessibilityContext", options);
return;
case Method.GetAccessibilityStatus:
JsonSerializer.Serialize(writer, "getAccessibilityStatus", options);
return;
case Method.GetAccessibilityTreeDetails:
JsonSerializer.Serialize(writer, "getAccessibilityTreeDetails", options);
return;
@ -637,6 +717,9 @@ namespace WindowsHelper.Models
case Method.PasteText:
JsonSerializer.Serialize(writer, "pasteText", options);
return;
case Method.RequestAccessibilityPermission:
JsonSerializer.Serialize(writer, "requestAccessibilityPermission", options);
return;
case Method.RestoreSystemAudio:
JsonSerializer.Serialize(writer, "restoreSystemAudio", options);
return;
@ -650,6 +733,119 @@ namespace WindowsHelper.Models
public static readonly MethodConverter Singleton = new MethodConverter();
}
internal class The0Converter : JsonConverter<The0>
{
public override bool CanConvert(Type t) => t == typeof(The0);
public override The0 Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
var value = reader.GetString();
switch (value)
{
case "clipboardCopy":
return The0.ClipboardCopy;
case "none":
return The0.None;
case "selectedTextRange":
return The0.SelectedTextRange;
case "selectedTextRanges":
return The0.SelectedTextRanges;
case "stringForRange":
return The0.StringForRange;
case "textMarkerRange":
return The0.TextMarkerRange;
case "valueAttribute":
return The0.ValueAttribute;
}
throw new Exception("Cannot unmarshal type The0");
}
public override void Write(Utf8JsonWriter writer, The0 value, JsonSerializerOptions options)
{
switch (value)
{
case The0.ClipboardCopy:
JsonSerializer.Serialize(writer, "clipboardCopy", options);
return;
case The0.None:
JsonSerializer.Serialize(writer, "none", options);
return;
case The0.SelectedTextRange:
JsonSerializer.Serialize(writer, "selectedTextRange", options);
return;
case The0.SelectedTextRanges:
JsonSerializer.Serialize(writer, "selectedTextRanges", options);
return;
case The0.StringForRange:
JsonSerializer.Serialize(writer, "stringForRange", options);
return;
case The0.TextMarkerRange:
JsonSerializer.Serialize(writer, "textMarkerRange", options);
return;
case The0.ValueAttribute:
JsonSerializer.Serialize(writer, "valueAttribute", options);
return;
}
throw new Exception("Cannot marshal type The0");
}
public static readonly The0Converter Singleton = new The0Converter();
}
internal class MinMaxValueCheckConverter : JsonConverter<double>
{
public override bool CanConvert(Type t) => t == typeof(double);
public override double Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
var value = reader.GetDouble();
if (value >= 0)
{
return value;
}
throw new Exception("Cannot unmarshal type double");
}
public override void Write(Utf8JsonWriter writer, double value, JsonSerializerOptions options)
{
if (value >= 0)
{
JsonSerializer.Serialize(writer, value, options);
return;
}
throw new Exception("Cannot marshal type double");
}
public static readonly MinMaxValueCheckConverter Singleton = new MinMaxValueCheckConverter();
}
internal class SchemaVersionConverter : JsonConverter<SchemaVersion>
{
public override bool CanConvert(Type t) => t == typeof(SchemaVersion);
public override SchemaVersion Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
var value = reader.GetString();
if (value == "2.0")
{
return SchemaVersion.The20;
}
throw new Exception("Cannot unmarshal type SchemaVersion");
}
public override void Write(Utf8JsonWriter writer, SchemaVersion value, JsonSerializerOptions options)
{
if (value == SchemaVersion.The20)
{
JsonSerializer.Serialize(writer, "2.0", options);
return;
}
throw new Exception("Cannot marshal type SchemaVersion");
}
public static readonly SchemaVersionConverter Singleton = new SchemaVersionConverter();
}
internal class KeyDownEventTypeConverter : JsonConverter<KeyDownEventType>
{
public override bool CanConvert(Type t) => t == typeof(KeyDownEventType);

View file

@ -1,67 +1,316 @@
import { z } from "zod";
// Request params
// =============================================================================
// Accessibility Context Schema
// =============================================================================
// Schema for the Swift helper accessibility layer.
// Key features:
// - TextMarker API support for Electron/Chromium apps
// - Extraction method tracking for debugging
// - Performance metrics
// - Secure field and placeholder detection
// - UTF-16 code unit semantics (documented)
// =============================================================================
// -----------------------------------------------------------------------------
// Enums
// -----------------------------------------------------------------------------
/**
* How the text selection was extracted.
* Priority order: textMarkerRange > selectedTextRange > selectedTextRanges > valueAttribute > stringForRange
*/
export const ExtractionMethodSchema = z.enum([
"textMarkerRange", // Primary - AXSelectedTextMarkerRange (works in Electron)
"selectedTextRange", // Fallback 1 - AXSelectedTextRange
"selectedTextRanges", // Fallback 2 - AXSelectedTextRanges (multi-select)
"valueAttribute", // Fallback 3 - AXValue
"stringForRange", // Fallback 4 - AXStringForRange
"clipboardCopy", // Fallback 5 - Clipboard (Phase 2)
"none", // No extraction possible (secure field, etc.)
]);
export type ExtractionMethod = z.infer<typeof ExtractionMethodSchema>;
// -----------------------------------------------------------------------------
// Core Data Structures
// -----------------------------------------------------------------------------
/**
* Character range for text selection.
*
* IMPORTANT: UTF-16 Code Unit Semantics
* All `location` and `length` values are UTF-16 code unit offsets (equivalent to NSString indices),
* NOT Unicode scalar or grapheme cluster counts.
*
* This matches macOS Accessibility API semantics where CFRange and NSRange use UTF-16 code units.
* Characters outside the Basic Multilingual Plane (e.g., emoji like 👨👩👧👦) occupy 2 code units (surrogate pair).
*
* Examples:
* - "a" (U+0061) = 1 code unit
* - "😀" (U+1F600) = 2 code units
* - "👨‍👩‍👧‍👦" = 11 code units (multiple emoji + ZWJ)
*
* Implications:
* - Swift: Use String.utf16 view for slicing
* - TypeScript/JS: string.length counts code units, so indices align correctly
*/
export const SelectionRangeSchema = z.object({
/** UTF-16 code unit offset from start (NOT grapheme count) */
location: z.number().int().nonnegative(),
/** UTF-16 code unit count (0 = cursor only, no selection) */
length: z.number().int().nonnegative(),
});
export type SelectionRange = z.infer<typeof SelectionRangeSchema>;
/**
* Text selection information.
*
* Null vs Empty String Semantics:
* - null = unavailable/unknown (API failed, attribute doesn't exist, or suppressed for security)
* - "" = available and empty (API succeeded, value exists, but is legitimately empty)
*
* Examples:
* - Cursor-only: selectedText = "" (not null), selectionRange.length = 0
* - Empty text field: fullContent = "" (not null)
* - Secure field: all text fields are null (suppressed)
*/
export const TextSelectionSchema = z.object({
// Core data
/** Selected text ("" for cursor-only, null if unavailable/suppressed) */
selectedText: z.string().nullable(),
/** Full textbox content (window around selection if large, null if unavailable) */
fullContent: z.string().nullable(),
/** Up to 500 UTF-16 units before selection (null if unavailable) */
preSelectionText: z.string().nullable(),
/** Up to 500 UTF-16 units after selection (null if unavailable) */
postSelectionText: z.string().nullable(),
/** UTF-16 code unit range (null for secure fields or if unavailable) */
selectionRange: SelectionRangeSchema.nullable(),
// Metadata
/** Can user type in this element? */
isEditable: z.boolean(),
/** How was selection obtained? */
extractionMethod: ExtractionMethodSchema,
/** Multi-cursor/selection detected? */
hasMultipleRanges: z.boolean(),
// Safety flags
/** Is this showing placeholder text only (no user input)? */
isPlaceholder: z.boolean(),
/** Is this a password/secure field? (all content fields will be null) */
isSecure: z.boolean(),
// Truncation info
/** Was fullContent truncated/windowed due to size limits? */
fullContentTruncated: z.boolean(),
});
export type TextSelection = z.infer<typeof TextSelectionSchema>;
/**
* Focused element information.
*/
export const AXElementInfoSchema = z.object({
/** AXRole (AXTextField, AXWebArea, etc.) */
role: z.string().nullable(),
/** AXSubrole if present */
subrole: z.string().nullable(),
/** AXTitle */
title: z.string().nullable(),
/** AXDescription */
description: z.string().nullable(),
/** AXValue (null for secure fields - suppressed for security) */
value: z.string().nullable(),
/** Can user type in this element? */
isEditable: z.boolean(),
/** Is this element focused? */
isFocused: z.boolean(),
/** Is this a secure/password field? */
isSecure: z.boolean(),
/** Is this showing placeholder text? */
isPlaceholder: z.boolean(),
});
export type AXElementInfo = z.infer<typeof AXElementInfoSchema>;
/**
* Application information.
*/
export const ApplicationInfoSchema = z.object({
/** Application name */
name: z.string().nullable(),
/** Bundle identifier (e.g., com.apple.Safari) */
bundleIdentifier: z.string().nullable(),
/** Application version */
version: z.string().nullable(),
/** Process ID */
pid: z.number().int(),
});
export type ApplicationInfo = z.infer<typeof ApplicationInfoSchema>;
/**
* Window information.
*/
export const WindowInfoSchema = z.object({
/** Window title */
title: z.string().nullable(),
/** Browser URL if detected */
url: z.string().nullable(),
});
export type WindowInfo = z.infer<typeof WindowInfoSchema>;
/**
* Extraction performance metrics.
*
* Note: Error strings must contain only technical error messages, never PII or content values.
* Allowed: "TextMarker: AXError -25204", "Timeout exceeded"
* Forbidden: "Failed to parse text: Hello World", "Value was: password123"
*/
export const ExtractionMetricsSchema = z.object({
/** Total extraction time in milliseconds */
totalTimeMs: z.number().nonnegative(),
/** Did we attempt TextMarker extraction? */
textMarkerAttempted: z.boolean(),
/** Did TextMarker extraction succeed? */
textMarkerSucceeded: z.boolean(),
/** Which fallback methods were tried (in order) */
fallbacksUsed: z.array(ExtractionMethodSchema),
/** Technical error messages only - NO PII/content */
errors: z.array(z.string()),
/** Did extraction exceed best-effort time budget? */
timedOut: z.boolean(),
// WebArea retry path metrics
/** Did we search for WebArea candidates? (true when TextMarker fails on focused element) */
webAreaRetryAttempted: z.boolean(),
/** Did we find a different WebArea to switch to? */
webAreaFound: z.boolean(),
/** Did TextMarker work on the switched WebArea? */
webAreaRetrySucceeded: z.boolean(),
});
export type ExtractionMetrics = z.infer<typeof ExtractionMetricsSchema>;
// -----------------------------------------------------------------------------
// Main Response Schema
// -----------------------------------------------------------------------------
/**
* Complete accessibility context response.
*/
export const AppContextSchema = z.object({
/** Schema version for future evolution */
schemaVersion: z.literal("2.0"),
// Application context
/** Information about the frontmost application */
application: ApplicationInfoSchema,
/** Window information (may be null) */
windowInfo: WindowInfoSchema.nullable(),
// Focus and selection
/** Currently focused element (may be null if no focus) */
focusedElement: AXElementInfoSchema.nullable(),
/** Text selection information (may be null if no text field focused) */
textSelection: TextSelectionSchema.nullable(),
// Timing
/** Unix timestamp in seconds when context was captured */
timestamp: z.number(),
// Debugging
/** Performance metrics for this extraction */
metrics: ExtractionMetricsSchema,
});
export type AppContext = z.infer<typeof AppContextSchema>;
// -----------------------------------------------------------------------------
// RPC Method Schemas
// -----------------------------------------------------------------------------
/**
* Request params for getAccessibilityContext
*/
export const GetAccessibilityContextParamsSchema = z.object({
editableOnly: z.boolean().optional().default(true), // Only return text selection if element is editable
/**
* Only return text selection if element is editable.
* When true: searches for nearest editable element if current focus is not editable.
* When false: returns whatever element is focused, editable or not.
* Default: false
*/
editableOnly: z.boolean().optional().default(false),
});
export type GetAccessibilityContextParams = z.infer<
typeof GetAccessibilityContextParamsSchema
>;
// Data structures for the result
const SelectionRangeSchema = z.object({
location: z.number().int(),
length: z.number().int(),
});
const ApplicationInfoSchema = z.object({
name: z.string().nullable(),
bundleIdentifier: z.string().nullable(),
version: z.string().nullable(),
});
const FocusedElementInfoSchema = z.object({
role: z.string().nullable(), // Main accessibility role (e.g., "AXTextField", "AXButton")
isEditable: z.boolean(),
title: z.string().nullable(),
description: z.string().nullable(),
value: z.string().nullable(),
});
const TextSelectionInfoSchema = z.object({
selectedText: z.string().nullable(), // Nullable when only cursor position is available (no selection)
fullContent: z.string().nullable(),
preSelectionText: z.string().nullable(), // Last 500 chars before cursor/selection (closest to cursor)
postSelectionText: z.string().nullable(), // First 500 chars after cursor/selection (closest to cursor)
selectionRange: SelectionRangeSchema.nullable(),
isEditable: z.boolean(),
});
const WindowInfoSchema = z.object({
title: z.string().nullable(),
url: z.string().nullable(), // Browser URL if available
});
const AccessibilityContextSchema = z.object({
application: ApplicationInfoSchema,
focusedElement: FocusedElementInfoSchema.nullable(),
textSelection: TextSelectionInfoSchema.nullable(),
windowInfo: WindowInfoSchema.nullable(),
timestamp: z.number(),
});
// Response result
/**
* Response result for getAccessibilityContext
*/
export const GetAccessibilityContextResultSchema = z.object({
context: AccessibilityContextSchema.nullable(),
context: AppContextSchema.nullable(),
});
export type GetAccessibilityContextResult = z.infer<
typeof GetAccessibilityContextResultSchema
>;
// Export individual schemas for potential reuse
export type ApplicationInfo = z.infer<typeof ApplicationInfoSchema>;
export type FocusedElementInfo = z.infer<typeof FocusedElementInfoSchema>;
export type TextSelectionInfo = z.infer<typeof TextSelectionInfoSchema>;
export type WindowInfo = z.infer<typeof WindowInfoSchema>;
export type AccessibilityContext = z.infer<typeof AccessibilityContextSchema>;
export type SelectionRange = z.infer<typeof SelectionRangeSchema>;
/**
* Request params for getAccessibilityStatus
*/
export const GetAccessibilityStatusParamsSchema = z.object({});
export type GetAccessibilityStatusParams = z.infer<
typeof GetAccessibilityStatusParamsSchema
>;
/**
* Response result for getAccessibilityStatus
*/
export const GetAccessibilityStatusResultSchema = z.object({
/** Does the app have accessibility permission? */
hasPermission: z.boolean(),
/** Is accessibility enabled system-wide? */
isEnabled: z.boolean(),
});
export type GetAccessibilityStatusResult = z.infer<
typeof GetAccessibilityStatusResultSchema
>;
/**
* Request params for requestAccessibilityPermission
*/
export const RequestAccessibilityPermissionParamsSchema = z.object({});
export type RequestAccessibilityPermissionParams = z.infer<
typeof RequestAccessibilityPermissionParamsSchema
>;
/**
* Response result for requestAccessibilityPermission
*/
export const RequestAccessibilityPermissionResultSchema = z.object({
/** Was permission granted? */
granted: z.boolean(),
});
export type RequestAccessibilityPermissionResult = z.infer<
typeof RequestAccessibilityPermissionResultSchema
>;
// -----------------------------------------------------------------------------
// Constants (for reference - actual values defined in Swift)
// -----------------------------------------------------------------------------
/**
* Context extraction limits (UTF-16 code units).
* These are documented here for reference; actual enforcement is in Swift.
*/
export const ACCESSIBILITY_CONSTANTS = {
/** Max UTF-16 units for pre/post selection context */
MAX_CONTEXT_LENGTH: 500,
/** Max UTF-16 units for fullContent window */
MAX_FULL_CONTENT_LENGTH: 50000,
/** UTF-16 units of padding around selection for windowing */
WINDOW_PADDING: 25000,
/** Best-effort timeout target in milliseconds */
BEST_EFFORT_TIMEOUT_MS: 600,
/** Max depth for element tree search */
TREE_WALK_MAX_DEPTH: 8,
/** Max elements to search in tree walk */
TREE_WALK_MAX_ELEMENTS: 100,
} as const;

View file

@ -7,6 +7,8 @@ import { PasteTextParamsSchema } from "../methods/paste-text.js";
const RPCMethodNameSchema = z.union([
z.literal("getAccessibilityTreeDetails"),
z.literal("getAccessibilityContext"),
z.literal("getAccessibilityStatus"),
z.literal("requestAccessibilityPermission"),
z.literal("pasteText"),
z.literal("muteSystemAudio"),
z.literal("restoreSystemAudio"),