feat: ax rewrite for swift to improve context
This commit is contained in:
parent
1f5e3649b1
commit
7243bfcb8a
15 changed files with 2791 additions and 660 deletions
|
|
@ -278,7 +278,7 @@ export class RecordingManager extends EventEmitter {
|
|||
const vadService = this.serviceManager.getService("vadService");
|
||||
vadService.reset();
|
||||
|
||||
// Refresh accessibility context
|
||||
// Refresh accessibility context (TextMarker API for Electron support)
|
||||
const nativeBridge = this.serviceManager.getService("nativeBridge");
|
||||
nativeBridge.refreshAccessibilityContext();
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,10 @@ import {
|
|||
GetAccessibilityTreeDetailsResult,
|
||||
GetAccessibilityContextParams,
|
||||
GetAccessibilityContextResult,
|
||||
GetAccessibilityStatusParams,
|
||||
GetAccessibilityStatusResult,
|
||||
RequestAccessibilityPermissionParams,
|
||||
RequestAccessibilityPermissionResult,
|
||||
PasteTextParams,
|
||||
PasteTextResult,
|
||||
MuteSystemAudioParams,
|
||||
|
|
@ -28,6 +32,7 @@ import {
|
|||
RestoreSystemAudioResult,
|
||||
SetShortcutsParams,
|
||||
SetShortcutsResult,
|
||||
AppContext,
|
||||
} from "@amical/types";
|
||||
|
||||
// Define the interface for RPC methods
|
||||
|
|
@ -40,6 +45,14 @@ interface RPCMethods {
|
|||
params: GetAccessibilityContextParams;
|
||||
result: GetAccessibilityContextResult;
|
||||
};
|
||||
getAccessibilityStatus: {
|
||||
params: GetAccessibilityStatusParams;
|
||||
result: GetAccessibilityStatusResult;
|
||||
};
|
||||
requestAccessibilityPermission: {
|
||||
params: RequestAccessibilityPermissionParams;
|
||||
result: RequestAccessibilityPermissionResult;
|
||||
};
|
||||
pasteText: {
|
||||
params: PasteTextParams;
|
||||
result: PasteTextResult;
|
||||
|
|
@ -74,7 +87,7 @@ export class NativeBridge extends EventEmitter {
|
|||
>();
|
||||
private helperPath: string;
|
||||
private logger = createScopedLogger("native-bridge");
|
||||
private accessibilityContext: GetAccessibilityContextResult | null = null;
|
||||
private accessibilityContext: AppContext | null = null;
|
||||
|
||||
// Auto-restart configuration
|
||||
private static readonly MAX_RESTARTS = 3;
|
||||
|
|
@ -435,15 +448,16 @@ export class NativeBridge extends EventEmitter {
|
|||
*/
|
||||
async refreshAccessibilityContext(): Promise<void> {
|
||||
try {
|
||||
const context = await this.call("getAccessibilityContext", {
|
||||
const result = await this.call("getAccessibilityContext", {
|
||||
editableOnly: false,
|
||||
});
|
||||
this.accessibilityContext = context;
|
||||
this.accessibilityContext = result.context;
|
||||
this.logger.debug("Accessibility context refreshed", {
|
||||
hasApplication: !!context.context?.application?.name,
|
||||
hasFocusedElement: !!context.context?.focusedElement?.role,
|
||||
hasTextSelection: !!context.context?.textSelection?.selectedText,
|
||||
hasWindow: !!context.context?.windowInfo?.title,
|
||||
hasApplication: !!result.context?.application?.name,
|
||||
hasFocusedElement: !!result.context?.focusedElement?.role,
|
||||
hasTextSelection: !!result.context?.textSelection?.selectedText,
|
||||
extractionMethod: result.context?.textSelection?.extractionMethod,
|
||||
metricsMs: result.context?.metrics?.totalTimeMs,
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error("Failed to refresh accessibility context", {
|
||||
|
|
@ -454,9 +468,13 @@ export class NativeBridge extends EventEmitter {
|
|||
|
||||
/**
|
||||
* Get the cached accessibility context.
|
||||
* Returns in the result wrapper format for API consistency.
|
||||
*/
|
||||
getAccessibilityContext(): GetAccessibilityContextResult | null {
|
||||
return this.accessibilityContext;
|
||||
if (this.accessibilityContext === null) {
|
||||
return null;
|
||||
}
|
||||
return { context: this.accessibilityContext };
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -481,6 +499,20 @@ export class NativeBridge extends EventEmitter {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get accessibility permission status.
|
||||
*/
|
||||
async getAccessibilityStatus(): Promise<GetAccessibilityStatusResult> {
|
||||
return this.call("getAccessibilityStatus", {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Request accessibility permission.
|
||||
*/
|
||||
async requestAccessibilityPermission(): Promise<RequestAccessibilityPermissionResult> {
|
||||
return this.call("requestAccessibilityPermission", {});
|
||||
}
|
||||
|
||||
// Typed event emitter methods
|
||||
on<E extends keyof NativeBridgeEvents>(
|
||||
event: E,
|
||||
|
|
|
|||
|
|
@ -1,526 +0,0 @@
|
|||
import Foundation
|
||||
import ApplicationServices
|
||||
import AppKit
|
||||
|
||||
// Apps that need manual accessibility enabling
|
||||
let appsManuallyEnableAx: Set<String> = ["com.google.Chrome", "org.mozilla.firefox", "com.microsoft.edgemac", "com.apple.Safari"]
|
||||
|
||||
struct ProcessInfo {
|
||||
let pid: pid_t
|
||||
let name: String?
|
||||
let bundleIdentifier: String?
|
||||
let version: String?
|
||||
}
|
||||
|
||||
struct Selection {
|
||||
let text: String
|
||||
let process: ProcessInfo
|
||||
let preSelection: String?
|
||||
let postSelection: String?
|
||||
let fullContent: String?
|
||||
let selectionRange: NSRange?
|
||||
let isEditable: Bool
|
||||
let elementType: String?
|
||||
}
|
||||
|
||||
class AccessibilityContextService {
|
||||
|
||||
static func checkAccessibilityPermissions(prompt: Bool = false) -> Bool {
|
||||
let options: [String: Any] = [kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: prompt]
|
||||
return AXIsProcessTrustedWithOptions(options as CFDictionary)
|
||||
}
|
||||
|
||||
static func getFrontProcessID() -> pid_t {
|
||||
guard let frontmostApp = NSWorkspace.shared.frontmostApplication else {
|
||||
FileHandle.standardError.write("❌ No frontmost application found\n".data(using: .utf8)!)
|
||||
return 0
|
||||
}
|
||||
return frontmostApp.processIdentifier
|
||||
}
|
||||
|
||||
static func getProcessName(pid: pid_t) -> String? {
|
||||
guard let application = NSRunningApplication(processIdentifier: pid),
|
||||
let url = application.executableURL else {
|
||||
return nil
|
||||
}
|
||||
return url.lastPathComponent
|
||||
}
|
||||
|
||||
static func getBundleIdentifier(pid: pid_t) -> String? {
|
||||
guard let application = NSRunningApplication(processIdentifier: pid) else {
|
||||
return nil
|
||||
}
|
||||
return application.bundleIdentifier
|
||||
}
|
||||
|
||||
static func getApplicationVersion(pid: pid_t) -> String? {
|
||||
guard let application = NSRunningApplication(processIdentifier: pid),
|
||||
let bundle = Bundle(url: application.bundleURL ?? URL(fileURLWithPath: "")) else {
|
||||
return nil
|
||||
}
|
||||
return bundle.infoDictionary?["CFBundleShortVersionString"] as? String
|
||||
}
|
||||
|
||||
static func touchDescendantElements(_ element: AXUIElement, maxDepth: Int) {
|
||||
guard maxDepth > 0 else { return }
|
||||
|
||||
var children: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, kAXChildrenAttribute as CFString, &children)
|
||||
|
||||
guard error == .success, let childrenArray = children as? [AXUIElement] else {
|
||||
return
|
||||
}
|
||||
|
||||
// Limit to 8 children to avoid performance issues
|
||||
let limitedChildren = Array(childrenArray.prefix(8))
|
||||
for child in limitedChildren {
|
||||
touchDescendantElements(child, maxDepth: maxDepth - 1)
|
||||
}
|
||||
}
|
||||
|
||||
static func _getFocusedElement(pid: pid_t) -> AXUIElement? {
|
||||
let application = AXUIElementCreateApplication(pid)
|
||||
|
||||
// Enable manual accessibility for specific apps
|
||||
if let bundleId: String = getBundleIdentifier(pid: pid),
|
||||
appsManuallyEnableAx.contains(bundleId) {
|
||||
// FileHandle.standardError.write("🔧 Enabling manual accessibility for \(bundleId)\n".data(using: .utf8)!)
|
||||
AXUIElementSetAttributeValue(application, "AXManualAccessibility" as CFString, kCFBooleanTrue)
|
||||
AXUIElementSetAttributeValue(application, "AXEnhancedUserInterface" as CFString, kCFBooleanTrue)
|
||||
}
|
||||
|
||||
var focusedElement: CFTypeRef?
|
||||
var error = AXUIElementCopyAttributeValue(application, kAXFocusedUIElementAttribute as CFString, &focusedElement)
|
||||
|
||||
// Fallback to focused window if focused element fails
|
||||
if error != .success {
|
||||
// FileHandle.standardError.write("⚠️ Failed to get focused element, trying focused window...\n".data(using: .utf8)!)
|
||||
error = AXUIElementCopyAttributeValue(application, kAXFocusedWindowAttribute as CFString, &focusedElement)
|
||||
}
|
||||
|
||||
guard error == .success, let element = focusedElement else {
|
||||
// FileHandle.standardError.write("❌ Failed to get focused element or window. Error: \(error.rawValue)\n".data(using: .utf8)!)
|
||||
return nil
|
||||
}
|
||||
|
||||
return (element as! AXUIElement)
|
||||
}
|
||||
|
||||
static func getAttributeValue(element: AXUIElement, attribute: String) -> String? {
|
||||
var value: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
|
||||
|
||||
if error == .success {
|
||||
if let stringValue = value as? String {
|
||||
return stringValue
|
||||
} else if let numberValue = value as? NSNumber {
|
||||
return numberValue.stringValue
|
||||
} else if let boolValue = value as? Bool {
|
||||
return boolValue ? "true" : "false"
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
static func getAttributeNames(element: AXUIElement) -> [String] {
|
||||
var attributeNames: CFArray?
|
||||
let error = AXUIElementCopyAttributeNames(element, &attributeNames)
|
||||
|
||||
if error == .success, let names = attributeNames as? [String] {
|
||||
return names
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
static func isElementEditable(element: AXUIElement) -> Bool {
|
||||
let role = getAttributeValue(element: element, attribute: kAXRoleAttribute)
|
||||
let subrole = getAttributeValue(element: element, attribute: kAXSubroleAttribute)
|
||||
|
||||
// Check for editable roles
|
||||
let editableRoles = ["AXTextField", "AXTextArea", "AXComboBox"]
|
||||
if let role = role, editableRoles.contains(role) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for editable subroles
|
||||
let editableSubroles = ["AXSecureTextField", "AXSearchField"]
|
||||
if let subrole = subrole, editableSubroles.contains(subrole) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if element has AXValue attribute (often indicates editability)
|
||||
let attributes = getAttributeNames(element: element)
|
||||
return attributes.contains(kAXValueAttribute)
|
||||
}
|
||||
|
||||
static func getParentChain(element: AXUIElement, maxDepth: Int = 10) -> [String] {
|
||||
var chain: [String] = []
|
||||
var currentElement = element
|
||||
|
||||
for _ in 0..<maxDepth {
|
||||
var parent: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(currentElement, kAXParentAttribute as CFString, &parent)
|
||||
|
||||
if error == .success, let parentElement = parent {
|
||||
// Check if the parent is actually an AXUIElement
|
||||
if CFGetTypeID(parentElement) == AXUIElementGetTypeID() {
|
||||
let axParent = parentElement as! AXUIElement
|
||||
if let role = getAttributeValue(element: axParent, attribute: kAXRoleAttribute) {
|
||||
chain.append(role)
|
||||
}
|
||||
currentElement = axParent
|
||||
} else {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return chain
|
||||
}
|
||||
|
||||
static let MAX_CONTEXT_LENGTH = 500
|
||||
|
||||
static func getTextSelection(element: AXUIElement) -> TextSelection? {
|
||||
// Get full content first - we need this to provide context
|
||||
let fullContent = getAttributeValue(element: element, attribute: kAXValueAttribute)
|
||||
|
||||
// Get selection/cursor range
|
||||
var selectionRange: SelectionRange? = nil
|
||||
var rangeValue: CFTypeRef?
|
||||
let rangeError = AXUIElementCopyAttributeValue(element, kAXSelectedTextRangeAttribute as CFString, &rangeValue)
|
||||
|
||||
if rangeError == .success, let axValue = rangeValue {
|
||||
var range = CFRange()
|
||||
if AXValueGetValue(axValue as! AXValue, .cfRange, &range) {
|
||||
selectionRange = SelectionRange(length: Int(range.length), location: Int(range.location))
|
||||
}
|
||||
}
|
||||
|
||||
// If we have no cursor/selection position and no content, return nil
|
||||
guard selectionRange != nil || fullContent != nil else {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get selected text (may be empty if just cursor position)
|
||||
let selectedText = getAttributeValue(element: element, attribute: kAXSelectedTextAttribute)
|
||||
|
||||
// Calculate pre and post selection/cursor text
|
||||
// Return "" instead of nil when cursor is at start/end of document
|
||||
var preSelectionText: String? = nil
|
||||
var postSelectionText: String? = nil
|
||||
|
||||
if let fullContent = fullContent, let range = selectionRange {
|
||||
let nsString = fullContent as NSString
|
||||
|
||||
// Pre-selection text: last MAX_CONTEXT_LENGTH chars before cursor/selection
|
||||
// Returns "" if cursor is at start of document (position 0)
|
||||
if range.location > 0 {
|
||||
let preLength = min(range.location, MAX_CONTEXT_LENGTH)
|
||||
let preStart = range.location - preLength
|
||||
let preRange = NSRange(location: preStart, length: preLength)
|
||||
preSelectionText = nsString.substring(with: preRange)
|
||||
} else {
|
||||
preSelectionText = ""
|
||||
}
|
||||
|
||||
// Post-selection text: first MAX_CONTEXT_LENGTH chars after cursor/selection
|
||||
// Returns "" if cursor is at end of document
|
||||
let postStart = range.location + range.length
|
||||
if postStart < nsString.length {
|
||||
let postLength = min(nsString.length - postStart, MAX_CONTEXT_LENGTH)
|
||||
let postRange = NSRange(location: postStart, length: postLength)
|
||||
postSelectionText = nsString.substring(with: postRange)
|
||||
} else {
|
||||
postSelectionText = ""
|
||||
}
|
||||
}
|
||||
|
||||
let isEditable = isElementEditable(element: element)
|
||||
|
||||
return TextSelection(
|
||||
fullContent: fullContent,
|
||||
isEditable: isEditable,
|
||||
postSelectionText: postSelectionText,
|
||||
preSelectionText: preSelectionText,
|
||||
selectedText: selectedText,
|
||||
selectionRange: selectionRange
|
||||
)
|
||||
}
|
||||
|
||||
static func getBrowserURL(windowElement: AXUIElement, bundleId: String?) -> String? {
|
||||
var foundURL: String? = nil
|
||||
var urlSource = "none"
|
||||
|
||||
// Debug: Print all window attributes
|
||||
// FileHandle.standardError.write("🔍 Window attributes:\n".data(using: .utf8)!)
|
||||
let attributes = getAttributeNames(element: windowElement)
|
||||
for attribute in attributes {
|
||||
if let value = getAttributeValue(element: windowElement, attribute: attribute) {
|
||||
// FileHandle.standardError.write(" \(attribute): \(value)\n".data(using: .utf8)!)
|
||||
} else {
|
||||
// FileHandle.standardError.write(" \(attribute): <no value>\n".data(using: .utf8)!)
|
||||
}
|
||||
}
|
||||
|
||||
// Determine browser type for conditional logic
|
||||
let isChromiumBrowser = bundleId?.lowercased().contains("chrome") == true ||
|
||||
bundleId?.lowercased().contains("chromium") == true ||
|
||||
bundleId == "com.microsoft.edgemac" ||
|
||||
bundleId == "com.brave.Browser" ||
|
||||
bundleId == "com.operasoftware.Opera" ||
|
||||
bundleId == "com.vivaldi.Vivaldi"
|
||||
|
||||
let isFirefox = bundleId == "org.mozilla.firefox"
|
||||
|
||||
// FileHandle.standardError.write("🔍 Browser type - Chromium: \(isChromiumBrowser), Firefox: \(isFirefox), Bundle: \(bundleId ?? "unknown")\n".data(using: .utf8)!)
|
||||
|
||||
// For Chromium browsers and Firefox: Prioritize AXWebArea (live URL)
|
||||
if isChromiumBrowser || isFirefox {
|
||||
// FileHandle.standardError.write("🔍 Using AXWebArea priority for Chromium/Firefox browser\n".data(using: .utf8)!)
|
||||
foundURL = findURLInChildren(element: windowElement, depth: 0, maxDepth: 30)
|
||||
if foundURL != nil {
|
||||
urlSource = "tree_walking_priority"
|
||||
// FileHandle.standardError.write("🔍 Found URL from AXWebArea (priority): \(foundURL!)\n".data(using: .utf8)!)
|
||||
return foundURL
|
||||
}
|
||||
}
|
||||
|
||||
// Try window-level attributes (reliable for Safari, fallback for others)
|
||||
var urlRef: CFTypeRef?
|
||||
let docErr = AXUIElementCopyAttributeValue(windowElement,
|
||||
kAXDocumentAttribute as CFString,
|
||||
&urlRef)
|
||||
if docErr == .success, let urlString = urlRef as? String, !urlString.isEmpty {
|
||||
foundURL = urlString
|
||||
urlSource = "window_document"
|
||||
// FileHandle.standardError.write("🔍 Found URL from window document: \(urlString)\n".data(using: .utf8)!)
|
||||
|
||||
// For Safari and other WebKit browsers, this is reliable, return immediately
|
||||
if !isChromiumBrowser && !isFirefox {
|
||||
return foundURL
|
||||
}
|
||||
// For Chromium/Firefox, keep this as fallback but continue looking
|
||||
}
|
||||
|
||||
if AXUIElementCopyAttributeValue(windowElement,
|
||||
kAXURLAttribute as CFString,
|
||||
&urlRef) == .success,
|
||||
let urlString = urlRef as? String, !urlString.isEmpty {
|
||||
if foundURL == nil {
|
||||
foundURL = urlString
|
||||
urlSource = "window_url"
|
||||
// FileHandle.standardError.write("🔍 Found URL from window URL attribute: \(urlString)\n".data(using: .utf8)!)
|
||||
|
||||
// For Safari and other WebKit browsers, this is reliable, return immediately
|
||||
if !isChromiumBrowser && !isFirefox {
|
||||
return foundURL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For non-Chromium browsers that didn't find window URLs, try tree walking
|
||||
if !isChromiumBrowser && !isFirefox && foundURL == nil {
|
||||
foundURL = findURLInChildren(element: windowElement, depth: 0, maxDepth: 3)
|
||||
if foundURL != nil {
|
||||
urlSource = "tree_walking_fallback"
|
||||
// FileHandle.standardError.write("🔍 Found URL from tree walking (fallback): \(foundURL!)\n".data(using: .utf8)!)
|
||||
return foundURL
|
||||
}
|
||||
}
|
||||
|
||||
if foundURL != nil {
|
||||
// FileHandle.standardError.write("🔍 Returning URL (\(urlSource)): \(foundURL!)\n".data(using: .utf8)!)
|
||||
return foundURL
|
||||
}
|
||||
|
||||
// FileHandle.standardError.write("🔍 No URL found from any method\n".data(using: .utf8)!)
|
||||
return nil
|
||||
}
|
||||
|
||||
static func findURLInChildren(element: AXUIElement, depth: Int, maxDepth: Int) -> String? {
|
||||
guard depth < maxDepth else { return nil }
|
||||
|
||||
// BFS implementation using a queue
|
||||
var queue: [(element: AXUIElement, depth: Int)] = [(element, depth)]
|
||||
|
||||
while !queue.isEmpty {
|
||||
let (currentElement, currentDepth) = queue.removeFirst()
|
||||
|
||||
// Skip if we've exceeded max depth
|
||||
guard currentDepth < maxDepth else { continue }
|
||||
|
||||
var childrenRef: CFTypeRef?
|
||||
guard AXUIElementCopyAttributeValue(currentElement,
|
||||
kAXChildrenAttribute as CFString,
|
||||
&childrenRef) == .success,
|
||||
let children = childrenRef as? [AXUIElement] else {
|
||||
continue
|
||||
}
|
||||
|
||||
// Process all children at current level first (BFS)
|
||||
for child in children {
|
||||
// Check role first
|
||||
var roleRef: CFTypeRef?
|
||||
guard AXUIElementCopyAttributeValue(child,
|
||||
kAXRoleAttribute as CFString,
|
||||
&roleRef) == .success,
|
||||
let role = roleRef as? String else {
|
||||
continue
|
||||
}
|
||||
|
||||
// log role
|
||||
// FileHandle.standardError.write("🔍 Found element with role: \(role) at depth \(currentDepth + 1)\n".data(using: .utf8)!)
|
||||
// log all attribute names
|
||||
// FileHandle.standardError.write("🔍 Element attributes: \(getAttributeNames(element: child))\n".data(using: .utf8)!)
|
||||
// log kAXURLAttribute
|
||||
// FileHandle.standardError.write("🔍 kAXURLAttribute: \(getAttributeValue(element: child, attribute: kAXURLAttribute) ?? "none")\n".data(using: .utf8)!)
|
||||
|
||||
// Priority 1: Address/search fields (most current)
|
||||
if role == "AXTextField" || role == "AXComboBox" || role == "AXSafariAddressAndSearchField" {
|
||||
var valueRef: CFTypeRef?
|
||||
if AXUIElementCopyAttributeValue(child,
|
||||
kAXValueAttribute as CFString,
|
||||
&valueRef) == .success,
|
||||
let value = valueRef as? String,
|
||||
!value.isEmpty,
|
||||
(value.hasPrefix("http://") || value.hasPrefix("https://") || value.contains(".")) {
|
||||
// FileHandle.standardError.write("🔍 Found URL in address field (\(role)): \(value)\n".data(using: .utf8)!)
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 2: Web areas
|
||||
if role == "AXWebArea" {
|
||||
FileHandle.standardError.write("🔍 Found AXWebArea element at depth \(currentDepth + 1)\n".data(using: .utf8)!)
|
||||
// list all attributes for this element
|
||||
FileHandle.standardError.write("🔍 AXWebArea attributes: \(getAttributeNames(element: child))\n".data(using: .utf8)!)
|
||||
// iterate and list value for all attributes
|
||||
for attribute in getAttributeNames(element: child) {
|
||||
FileHandle.standardError.write("🔍 \(attribute): \(getAttributeValue(element: child, attribute: attribute) ?? "none")\n".data(using: .utf8)!)
|
||||
}
|
||||
var urlRef: CFTypeRef?
|
||||
if AXUIElementCopyAttributeValue(child,
|
||||
kAXURLAttribute as CFString,
|
||||
&urlRef) == .success,
|
||||
let urlString = urlRef as? String, !urlString.isEmpty {
|
||||
// FileHandle.standardError.write("🔍 Found URL in web area: \(urlString)\n".data(using: .utf8)!)
|
||||
return urlString
|
||||
}
|
||||
|
||||
if AXUIElementCopyAttributeValue(child,
|
||||
kAXDocumentAttribute as CFString,
|
||||
&urlRef) == .success,
|
||||
let urlString = urlRef as? String, !urlString.isEmpty {
|
||||
// FileHandle.standardError.write("🔍 Found URL in web area document: \(urlString)\n".data(using: .utf8)!)
|
||||
return urlString
|
||||
}
|
||||
}
|
||||
|
||||
// Add child to queue for next level processing
|
||||
queue.append((child, currentDepth + 1))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
static func getWindowInfo(pid: pid_t) -> WindowInfo? {
|
||||
let application = AXUIElementCreateApplication(pid)
|
||||
|
||||
// Get main window
|
||||
var mainWindow: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(application, kAXMainWindowAttribute as CFString, &mainWindow)
|
||||
|
||||
guard error == .success, let windowRef = mainWindow else {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the window is actually an AXUIElement
|
||||
guard CFGetTypeID(windowRef) == AXUIElementGetTypeID() else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let window = windowRef as! AXUIElement
|
||||
let title = getAttributeValue(element: window, attribute: kAXTitleAttribute)
|
||||
|
||||
// Get URL if this is a browser
|
||||
let url = getBrowserURL(windowElement: window, bundleId: getBundleIdentifier(pid: pid))
|
||||
|
||||
return WindowInfo(
|
||||
title: title,
|
||||
url: url
|
||||
)
|
||||
}
|
||||
|
||||
static func getAccessibilityContext(editableOnly: Bool = false) -> Context? {
|
||||
// Check accessibility permissions
|
||||
guard checkAccessibilityPermissions() else {
|
||||
FileHandle.standardError.write("❌ Accessibility permissions not granted\n".data(using: .utf8)!)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get frontmost application
|
||||
let pid = getFrontProcessID()
|
||||
guard pid > 0 else {
|
||||
FileHandle.standardError.write("❌ Could not get frontmost application PID\n".data(using: .utf8)!)
|
||||
return nil
|
||||
}
|
||||
|
||||
let processName = getProcessName(pid: pid)
|
||||
let bundleId = getBundleIdentifier(pid: pid)
|
||||
let version = getApplicationVersion(pid: pid)
|
||||
|
||||
// Create application info
|
||||
let applicationInfo = Application(
|
||||
bundleIdentifier: bundleId,
|
||||
name: processName,
|
||||
version: version
|
||||
)
|
||||
|
||||
// Get focused element
|
||||
var focusedElementInfo: FocusedElement? = nil
|
||||
var textSelectionInfo: TextSelection? = nil
|
||||
|
||||
if let focusedElement = _getFocusedElement(pid: pid) {
|
||||
// Touch descendant elements to ensure they're accessible
|
||||
touchDescendantElements(focusedElement, maxDepth: 3)
|
||||
|
||||
let role = getAttributeValue(element: focusedElement, attribute: kAXRoleAttribute)
|
||||
let title = getAttributeValue(element: focusedElement, attribute: kAXTitleAttribute)
|
||||
let description = getAttributeValue(element: focusedElement, attribute: kAXDescriptionAttribute)
|
||||
let value = getAttributeValue(element: focusedElement, attribute: kAXValueAttribute)
|
||||
let isEditable = isElementEditable(element: focusedElement)
|
||||
|
||||
focusedElementInfo = FocusedElement(
|
||||
description: description,
|
||||
isEditable: isEditable,
|
||||
role: role,
|
||||
title: title,
|
||||
value: value
|
||||
)
|
||||
|
||||
// Get text selection if available and not filtered by editableOnly
|
||||
if let textSelection = getTextSelection(element: focusedElement) {
|
||||
if !editableOnly || textSelection.isEditable {
|
||||
textSelectionInfo = textSelection
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get window info
|
||||
let windowInfo = getWindowInfo(pid: pid)
|
||||
|
||||
// Create context
|
||||
let context = Context(
|
||||
application: applicationInfo,
|
||||
focusedElement: focusedElementInfo,
|
||||
textSelection: textSelectionInfo,
|
||||
timestamp: Date().timeIntervalSince1970,
|
||||
windowInfo: windowInfo
|
||||
)
|
||||
|
||||
return context
|
||||
}
|
||||
}
|
||||
|
|
@ -48,7 +48,7 @@ struct AccessibilityElementNode: Codable {
|
|||
|
||||
class AccessibilityService {
|
||||
|
||||
private let maxDepth = 10 // To prevent excessively deep recursion and large payloads
|
||||
private let maxDepth = ACCESSIBILITY_TREE_MAX_DEPTH // To prevent excessively deep recursion and large payloads
|
||||
private let dateFormatter: DateFormatter
|
||||
|
||||
// Properties to store original audio states
|
||||
|
|
@ -478,23 +478,19 @@ class AccessibilityService {
|
|||
return false
|
||||
}
|
||||
|
||||
// Simulate Cmd+V
|
||||
// Using deprecated kVK_Command might still work but kCGEventFlagMaskCommand is preferred.
|
||||
// Virtual key code for 'v' is 9.
|
||||
let vKeyCode: CGKeyCode = 9
|
||||
|
||||
// Simulate Cmd+V using virtual key codes from Constants.swift
|
||||
let source = CGEventSource(stateID: .hidSystemState)
|
||||
|
||||
let cmdDown = CGEvent(keyboardEventSource: source, virtualKey: CGKeyCode(55), keyDown: true) // 55 is kVK_Command
|
||||
let cmdDown = CGEvent(keyboardEventSource: source, virtualKey: VK_COMMAND, keyDown: true)
|
||||
cmdDown?.flags = .maskCommand
|
||||
|
||||
let vDown = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: true)
|
||||
let vDown = CGEvent(keyboardEventSource: source, virtualKey: VK_V, keyDown: true)
|
||||
vDown?.flags = .maskCommand // Keep command flag for the V press as well
|
||||
|
||||
let vUp = CGEvent(keyboardEventSource: source, virtualKey: vKeyCode, keyDown: false)
|
||||
let vUp = CGEvent(keyboardEventSource: source, virtualKey: VK_V, keyDown: false)
|
||||
vUp?.flags = .maskCommand
|
||||
|
||||
let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: CGKeyCode(55), keyDown: false)
|
||||
let cmdUp = CGEvent(keyboardEventSource: source, virtualKey: VK_COMMAND, keyDown: false)
|
||||
// No flags needed for key up typically, or just .maskCommand if it was held
|
||||
|
||||
if cmdDown == nil || vDown == nil || vUp == nil || cmdUp == nil {
|
||||
|
|
@ -516,7 +512,7 @@ class AccessibilityService {
|
|||
|
||||
// Restore the original pasteboard content after a short delay
|
||||
// to allow the paste action to complete.
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.2) { // 200ms delay
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + PASTE_RESTORE_DELAY_SECONDS) {
|
||||
self.restorePasteboard(
|
||||
pasteboard: pasteboard, items: originalPasteboardItems,
|
||||
originalChangeCount: originalChangeCount)
|
||||
|
|
@ -547,10 +543,6 @@ class AccessibilityService {
|
|||
}
|
||||
}
|
||||
|
||||
// Define kVK_Function if not available from a system framework directly in this context.
|
||||
// 0x3F is the virtual key code for the Fn key on Apple keyboards.
|
||||
private let kVK_Function: CGKeyCode = 0x3F
|
||||
|
||||
// Determines whether a keyboard event should be forwarded to the Electron application.
|
||||
// This method should be called from the CGEventTap callback in main.swift or RpcHandler.swift.
|
||||
public func shouldForwardKeyboardEvent(event: CGEvent) -> Bool {
|
||||
|
|
@ -570,7 +562,7 @@ class AccessibilityService {
|
|||
|
||||
if type == .keyDown || type == .keyUp {
|
||||
// For keyDown and keyUp events, only forward if the event is FOR THE Fn KEY ITSELF.
|
||||
if keyCode == kVK_Function {
|
||||
if keyCode == VK_FUNCTION {
|
||||
// logToStderr("[AccessibilityService] Forwarding \(type == .keyDown ? "keyDown" : "keyUp") event because it IS the Fn key (keyCode: \(keyCode)).")
|
||||
return true
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,19 @@
|
|||
import Foundation
|
||||
import ObjCExceptionCatcher
|
||||
|
||||
/// Flexible RPC request that can parse any method string
|
||||
struct FlexibleRPCRequest: Codable {
|
||||
let id: String
|
||||
let method: String
|
||||
let params: JSONAny?
|
||||
}
|
||||
|
||||
class IOBridge: NSObject {
|
||||
private let jsonEncoder: JSONEncoder
|
||||
private let jsonDecoder: JSONDecoder
|
||||
let jsonEncoder: JSONEncoder
|
||||
let jsonDecoder: JSONDecoder
|
||||
private let accessibilityService: AccessibilityService
|
||||
private let audioService: AudioService
|
||||
private let dateFormatter: DateFormatter
|
||||
let dateFormatter: DateFormatter
|
||||
|
||||
init(jsonEncoder: JSONEncoder, jsonDecoder: JSONDecoder) {
|
||||
self.jsonEncoder = jsonEncoder
|
||||
|
|
@ -38,13 +45,21 @@ class IOBridge: NSObject {
|
|||
return
|
||||
|
||||
case .getAccessibilityContext:
|
||||
// Process accessibility context requests on dedicated thread
|
||||
// Process accessibility context requests on dedicated thread (uses v2 service)
|
||||
AccessibilityQueue.shared.async { [weak self] in
|
||||
guard let self = self else { return }
|
||||
self.handleAccessibilityContext(request)
|
||||
self.handleGetAccessibilityContext(id: request.id, params: request.params)
|
||||
}
|
||||
return
|
||||
|
||||
case .getAccessibilityStatus:
|
||||
handleGetAccessibilityStatus(id: request.id)
|
||||
return
|
||||
|
||||
case .requestAccessibilityPermission:
|
||||
handleRequestAccessibilityPermission(id: request.id)
|
||||
return
|
||||
|
||||
case .pasteText:
|
||||
logToStderr("[IOBridge] Handling pasteText for ID: \(request.id)")
|
||||
guard let paramsAnyCodable = request.params else {
|
||||
|
|
@ -308,71 +323,70 @@ class IOBridge: NSObject {
|
|||
}
|
||||
}
|
||||
|
||||
private func handleAccessibilityContext(_ request: RPCRequestSchema) {
|
||||
var contextParams: GetAccessibilityContextParamsSchema? = nil
|
||||
logToStderr("[IOBridge] Handling getAccessibilityContext for ID: \(request.id)")
|
||||
// MARK: - Accessibility Handlers (using consolidated service)
|
||||
|
||||
if let paramsAnyCodable = request.params {
|
||||
private func handleGetAccessibilityContext(id: String, params: JSONAny?) {
|
||||
logToStderr("[IOBridge] Handling getAccessibilityContext for ID: \(id)")
|
||||
|
||||
// Parse params (default editableOnly = false per spec)
|
||||
var editableOnly = false
|
||||
if let paramsAnyCodable = params {
|
||||
do {
|
||||
let paramsData = try jsonEncoder.encode(paramsAnyCodable)
|
||||
contextParams = try jsonDecoder.decode(
|
||||
GetAccessibilityContextParamsSchema.self, from: paramsData)
|
||||
logToStderr(
|
||||
"[IOBridge] Decoded contextParams.editableOnly: \(contextParams?.editableOnly ?? false) for ID: \(request.id)"
|
||||
)
|
||||
let contextParams = try jsonDecoder.decode(GetAccessibilityContextParams.self, from: paramsData)
|
||||
editableOnly = contextParams.editableOnly ?? false
|
||||
} catch {
|
||||
logToStderr(
|
||||
"[IOBridge] Error decoding getAccessibilityContext params: \(error.localizedDescription)"
|
||||
)
|
||||
let errPayload = Error(
|
||||
code: -32602, data: request.params,
|
||||
message: "Invalid params: \(error.localizedDescription)")
|
||||
let rpcResponse = RPCResponseSchema(error: errPayload, id: request.id, result: nil)
|
||||
sendRpcResponse(rpcResponse)
|
||||
return
|
||||
logToStderr("[IOBridge] Error decoding params: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
let editableOnly = contextParams?.editableOnly ?? false
|
||||
|
||||
// Call service with exception handling
|
||||
switch ExceptionCatcher.try({
|
||||
AccessibilityContextService.getAccessibilityContext(editableOnly: editableOnly)
|
||||
}) {
|
||||
case .success(let context):
|
||||
logToStderr("[IOBridge] Retrieved context for ID: \(request.id)")
|
||||
let resultPayload = GetAccessibilityContextResultSchema(context: context)
|
||||
do {
|
||||
let resultData = try jsonEncoder.encode(resultPayload)
|
||||
let resultAsJsonAny = try jsonDecoder.decode(JSONAny.self, from: resultData)
|
||||
let rpcResponse = RPCResponseSchema(error: nil, id: request.id, result: resultAsJsonAny)
|
||||
sendRpcResponse(rpcResponse)
|
||||
} catch {
|
||||
logToStderr("[IOBridge] Error encoding result: \(error.localizedDescription) for ID: \(request.id)")
|
||||
let errPayload = Error(code: -32603, data: nil, message: "Error encoding result: \(error.localizedDescription)")
|
||||
let rpcResponse = RPCResponseSchema(error: errPayload, id: request.id, result: nil)
|
||||
sendRpcResponse(rpcResponse)
|
||||
}
|
||||
logToStderr("[IOBridge] Retrieved context for ID: \(id)")
|
||||
let result = GetAccessibilityContextResult(context: context)
|
||||
sendResult(id: id, result: result)
|
||||
|
||||
case .exception(let exception):
|
||||
logToStderr("[IOBridge] NSException in getAccessibilityContext: \(exception.name) - \(exception.reason)")
|
||||
let exceptionData: [String: Any] = [
|
||||
"name": exception.name,
|
||||
"reason": exception.reason,
|
||||
"callStack": exception.callStack.prefix(10).joined(separator: "\n")
|
||||
]
|
||||
var exceptionJsonAny: JSONAny? = nil
|
||||
if let jsonData = try? JSONSerialization.data(withJSONObject: exceptionData),
|
||||
let decoded = try? jsonDecoder.decode(JSONAny.self, from: jsonData) {
|
||||
exceptionJsonAny = decoded
|
||||
}
|
||||
let errPayload = Error(
|
||||
code: -32603,
|
||||
data: exceptionJsonAny,
|
||||
message: "\(exception.name): \(exception.reason)"
|
||||
)
|
||||
let rpcResponse = RPCResponseSchema(error: errPayload, id: request.id, result: nil)
|
||||
sendRpcResponse(rpcResponse)
|
||||
sendError(id: id, code: -32603, message: "\(exception.name): \(exception.reason)")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleGetAccessibilityStatus(id: String) {
|
||||
logToStderr("[IOBridge] Handling getAccessibilityStatus for ID: \(id)")
|
||||
|
||||
let result = AccessibilityContextService.getAccessibilityStatus()
|
||||
sendResult(id: id, result: result)
|
||||
}
|
||||
|
||||
private func handleRequestAccessibilityPermission(id: String) {
|
||||
logToStderr("[IOBridge] Handling requestAccessibilityPermission for ID: \(id)")
|
||||
|
||||
let result = AccessibilityContextService.requestAccessibilityPermission()
|
||||
sendResult(id: id, result: result)
|
||||
}
|
||||
|
||||
// MARK: - Response Helpers
|
||||
|
||||
private func sendResult<T: Encodable>(id: String, result: T) {
|
||||
do {
|
||||
let resultData = try jsonEncoder.encode(result)
|
||||
let resultAsJsonAny = try jsonDecoder.decode(JSONAny.self, from: resultData)
|
||||
let rpcResponse = RPCResponseSchema(error: nil, id: id, result: resultAsJsonAny)
|
||||
sendRpcResponse(rpcResponse)
|
||||
} catch {
|
||||
logToStderr("[IOBridge] Error encoding result: \(error.localizedDescription)")
|
||||
sendError(id: id, code: -32603, message: "Error encoding result: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
private func sendError(id: String, code: Int, message: String) {
|
||||
let errPayload = Error(code: code, data: nil, message: message)
|
||||
let rpcResponse = RPCResponseSchema(error: errPayload, id: id, result: nil)
|
||||
sendRpcResponse(rpcResponse)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,156 @@
|
|||
import Foundation
|
||||
|
||||
// =============================================================================
|
||||
// Accessibility Builders
|
||||
// =============================================================================
|
||||
// Builder pattern helpers for creating accessibility response types.
|
||||
// These use the auto-generated types from models/generated/models.swift.
|
||||
// =============================================================================
|
||||
|
||||
// MARK: - Type Aliases for Backward Compatibility
|
||||
|
||||
/// Maps to the generated `Context` type (AppContext in TypeScript)
|
||||
typealias AppContext = Context
|
||||
|
||||
/// Maps to the generated `Application` type (ApplicationInfo in TypeScript)
|
||||
typealias ApplicationInfo = Application
|
||||
|
||||
/// Maps to the generated `FocusedElement` type (AXElementInfo in TypeScript)
|
||||
typealias AXElementInfo = FocusedElement
|
||||
|
||||
/// Maps to the generated `Metrics` type (ExtractionMetrics in TypeScript)
|
||||
typealias ExtractionMetrics = Metrics
|
||||
|
||||
/// Maps to the generated `The0` enum (ExtractionMethod in TypeScript)
|
||||
typealias ExtractionMethod = The0
|
||||
|
||||
/// Maps to the generated `SelectionRange` type (same name)
|
||||
typealias AccessibilitySelectionRange = SelectionRange
|
||||
|
||||
/// Maps to the generated `TextSelection` type (same name)
|
||||
typealias AccessibilityTextSelection = TextSelection
|
||||
|
||||
/// Maps to the generated `WindowInfo` type (same name)
|
||||
typealias AccessibilityWindowInfo = WindowInfo
|
||||
|
||||
/// Maps to the generated params type
|
||||
typealias GetAccessibilityContextParams = GetAccessibilityContextParamsSchema
|
||||
|
||||
/// Maps to the generated result type
|
||||
typealias GetAccessibilityContextResult = GetAccessibilityContextResultSchema
|
||||
|
||||
// MARK: - Result Types for Other Methods
|
||||
|
||||
/// Response result for getAccessibilityStatus
|
||||
struct GetAccessibilityStatusResult: Codable {
|
||||
/// Does the app have accessibility permission?
|
||||
let hasPermission: Bool
|
||||
/// Is accessibility enabled system-wide?
|
||||
let isEnabled: Bool
|
||||
}
|
||||
|
||||
/// Response result for requestAccessibilityPermission
|
||||
struct RequestAccessibilityPermissionResult: Codable {
|
||||
/// Was permission granted?
|
||||
let granted: Bool
|
||||
}
|
||||
|
||||
// MARK: - Builder for TextSelection
|
||||
|
||||
/// Builder for creating TextSelection with proper defaults
|
||||
class TextSelectionBuilder {
|
||||
var selectedText: String? = nil
|
||||
var fullContent: String? = nil
|
||||
var preSelectionText: String? = nil
|
||||
var postSelectionText: String? = nil
|
||||
var selectionRange: SelectionRange? = nil
|
||||
var isEditable: Bool = false
|
||||
var extractionMethod: ExtractionMethod = .none
|
||||
var hasMultipleRanges: Bool = false
|
||||
var isPlaceholder: Bool = false
|
||||
var isSecure: Bool = false
|
||||
var fullContentTruncated: Bool = false
|
||||
|
||||
func build() -> TextSelection {
|
||||
return TextSelection(
|
||||
extractionMethod: extractionMethod,
|
||||
fullContent: fullContent,
|
||||
fullContentTruncated: fullContentTruncated,
|
||||
hasMultipleRanges: hasMultipleRanges,
|
||||
isEditable: isEditable,
|
||||
isPlaceholder: isPlaceholder,
|
||||
isSecure: isSecure,
|
||||
postSelectionText: postSelectionText,
|
||||
preSelectionText: preSelectionText,
|
||||
selectedText: selectedText,
|
||||
selectionRange: selectionRange
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a secure field result (all content fields suppressed)
|
||||
static func secureField(isEditable: Bool) -> TextSelection {
|
||||
return TextSelection(
|
||||
extractionMethod: .none,
|
||||
fullContent: nil,
|
||||
fullContentTruncated: false,
|
||||
hasMultipleRanges: false,
|
||||
isEditable: isEditable,
|
||||
isPlaceholder: false,
|
||||
isSecure: true,
|
||||
postSelectionText: nil,
|
||||
preSelectionText: nil,
|
||||
selectedText: nil,
|
||||
selectionRange: nil // Suppressed to prevent password length leakage
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Builder for Metrics
|
||||
|
||||
/// Builder for creating Metrics
|
||||
class ExtractionMetricsBuilder {
|
||||
private var startTime: CFAbsoluteTime
|
||||
var textMarkerAttempted: Bool = false
|
||||
var textMarkerSucceeded: Bool = false
|
||||
var fallbacksUsed: [ExtractionMethod] = []
|
||||
var errors: [String] = []
|
||||
var timedOut: Bool = false
|
||||
|
||||
// WebArea retry path metrics
|
||||
var webAreaRetryAttempted: Bool = false
|
||||
var webAreaFound: Bool = false
|
||||
var webAreaRetrySucceeded: Bool = false
|
||||
|
||||
init() {
|
||||
self.startTime = CFAbsoluteTimeGetCurrent()
|
||||
}
|
||||
|
||||
func recordFallback(_ method: ExtractionMethod) {
|
||||
fallbacksUsed.append(method)
|
||||
}
|
||||
|
||||
func recordError(_ message: String) {
|
||||
// Ensure no PII in error messages
|
||||
errors.append(message)
|
||||
}
|
||||
|
||||
func build() -> Metrics {
|
||||
let endTime = CFAbsoluteTimeGetCurrent()
|
||||
let totalTimeMs = (endTime - startTime) * 1000
|
||||
|
||||
// Set timedOut flag if we exceeded best-effort timeout (per spec)
|
||||
let didTimeout = totalTimeMs > EXTRACTION_TIMEOUT_MS
|
||||
|
||||
return Metrics(
|
||||
errors: errors,
|
||||
fallbacksUsed: fallbacksUsed,
|
||||
textMarkerAttempted: textMarkerAttempted,
|
||||
textMarkerSucceeded: textMarkerSucceeded,
|
||||
timedOut: didTimeout,
|
||||
totalTimeMS: totalTimeMs,
|
||||
webAreaFound: webAreaFound,
|
||||
webAreaRetryAttempted: webAreaRetryAttempted,
|
||||
webAreaRetrySucceeded: webAreaRetrySucceeded
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
import Foundation
|
||||
import ApplicationServices
|
||||
import AppKit
|
||||
|
||||
// =============================================================================
|
||||
// AccessibilityContextService - Main Entry Point for Accessibility API
|
||||
// =============================================================================
|
||||
// Coordinates all services to extract accessibility context.
|
||||
// This is the main entry point called from RpcHandler.
|
||||
// =============================================================================
|
||||
|
||||
/// Main service for accessibility context extraction
|
||||
class AccessibilityContextService {
|
||||
|
||||
// MARK: - Main API
|
||||
|
||||
/// Get accessibility context using the extraction algorithm
|
||||
/// - Parameter editableOnly: Only return text selection if element is editable (default: false per spec)
|
||||
/// - Returns: AppContext with all accessibility context, or nil if unavailable
|
||||
static func getAccessibilityContext(editableOnly: Bool = false) -> AppContext? {
|
||||
// Start metrics tracking
|
||||
let metricsBuilder = ExtractionMetricsBuilder()
|
||||
|
||||
// Check permissions
|
||||
guard PermissionsService.checkPermissions() else {
|
||||
logError("Accessibility permissions not granted")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get frontmost application
|
||||
let pid = AXHelpers.getFrontProcessID()
|
||||
guard pid > 0 else {
|
||||
logError("Could not get frontmost application PID")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Build application info (arguments in alphabetical order per generated types)
|
||||
let applicationInfo = ApplicationInfo(
|
||||
bundleIdentifier: AXHelpers.getBundleIdentifier(pid: pid),
|
||||
name: AXHelpers.getProcessName(pid: pid),
|
||||
pid: Int(pid),
|
||||
version: AXHelpers.getApplicationVersion(pid: pid)
|
||||
)
|
||||
|
||||
// Get focused element
|
||||
var focusedElementInfo: AXElementInfo? = nil
|
||||
var textSelectionInfo: AccessibilityTextSelection? = nil
|
||||
|
||||
if let focusedElement = FocusService.getFocusedElement(pid: pid) {
|
||||
// Touch descendants to ensure they're accessible (triggers lazy loading)
|
||||
AXHelpers.touchDescendants(focusedElement, maxDepth: TOUCH_DESCENDANTS_MAX_DEPTH)
|
||||
|
||||
// Try to find a text-capable element
|
||||
if let focusResult = FocusService.findTextCapableElement(from: focusedElement, editableOnly: editableOnly) {
|
||||
focusedElementInfo = FocusService.getElementInfo(element: focusResult.element)
|
||||
|
||||
// Extract text selection
|
||||
textSelectionInfo = SelectionExtractor.extract(from: focusResult.element, metricsBuilder: metricsBuilder)
|
||||
|
||||
// Apply editableOnly filter
|
||||
if editableOnly {
|
||||
if let selection = textSelectionInfo, !selection.isEditable {
|
||||
textSelectionInfo = nil
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No text-capable element found, but still get basic element info
|
||||
focusedElementInfo = FocusService.getElementInfo(element: focusedElement)
|
||||
}
|
||||
}
|
||||
|
||||
// Get window info
|
||||
let windowInfo = FocusService.getWindowInfo(pid: pid)
|
||||
|
||||
// Build metrics
|
||||
let metrics = metricsBuilder.build()
|
||||
|
||||
// Build and return context (arguments in alphabetical order per generated types)
|
||||
return AppContext(
|
||||
application: applicationInfo,
|
||||
focusedElement: focusedElementInfo,
|
||||
metrics: metrics,
|
||||
schemaVersion: .the20,
|
||||
textSelection: textSelectionInfo,
|
||||
timestamp: Date().timeIntervalSince1970,
|
||||
windowInfo: windowInfo
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Permission APIs
|
||||
|
||||
/// Get accessibility permission status
|
||||
static func getAccessibilityStatus() -> GetAccessibilityStatusResult {
|
||||
return PermissionsService.getStatus()
|
||||
}
|
||||
|
||||
/// Request accessibility permission
|
||||
static func requestAccessibilityPermission() -> RequestAccessibilityPermissionResult {
|
||||
return PermissionsService.requestPermission()
|
||||
}
|
||||
|
||||
// MARK: - Logging
|
||||
|
||||
private static func logError(_ message: String) {
|
||||
FileHandle.standardError.write("❌ \(message)\n".data(using: .utf8)!)
|
||||
}
|
||||
|
||||
private static func logDebug(_ message: String) {
|
||||
#if DEBUG
|
||||
FileHandle.standardError.write("🔍 \(message)\n".data(using: .utf8)!)
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,306 @@
|
|||
import Foundation
|
||||
import ApplicationServices
|
||||
import AppKit
|
||||
|
||||
// =============================================================================
|
||||
// FocusService - Focus Resolution and Element Discovery
|
||||
// =============================================================================
|
||||
// Handles finding the focused element and searching for text-capable elements
|
||||
// when the focused element is not directly text-capable.
|
||||
// =============================================================================
|
||||
|
||||
/// Result of focus resolution
|
||||
struct FocusResult {
|
||||
let element: AXUIElement
|
||||
let role: String?
|
||||
let wasSearched: Bool // True if we had to search for a text-capable element
|
||||
}
|
||||
|
||||
/// Service for resolving focus and finding text-capable elements
|
||||
class FocusService {
|
||||
|
||||
// MARK: - Get Focused Element
|
||||
|
||||
/// Get the focused element for the frontmost application
|
||||
/// - Parameter pid: Process ID of the application
|
||||
/// - Returns: The focused AXUIElement, or nil if none found
|
||||
static func getFocusedElement(pid: pid_t) -> AXUIElement? {
|
||||
let application = AXHelpers.createApplicationElement(pid: pid)
|
||||
|
||||
// Enable manual accessibility for specific apps (Chrome, Firefox, etc.)
|
||||
let bundleId = AXHelpers.getBundleIdentifier(pid: pid)
|
||||
AXHelpers.enableManualAccessibilityIfNeeded(application: application, bundleId: bundleId)
|
||||
|
||||
// Try to get focused UI element
|
||||
var focusedElement: CFTypeRef?
|
||||
var error = AXUIElementCopyAttributeValue(
|
||||
application,
|
||||
kAXFocusedUIElementAttribute as CFString,
|
||||
&focusedElement
|
||||
)
|
||||
|
||||
// Fallback to focused window if focused element fails
|
||||
if error != .success {
|
||||
error = AXUIElementCopyAttributeValue(
|
||||
application,
|
||||
kAXFocusedWindowAttribute as CFString,
|
||||
&focusedElement
|
||||
)
|
||||
}
|
||||
|
||||
guard error == .success, let element = focusedElement else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return (element as! AXUIElement)
|
||||
}
|
||||
|
||||
// MARK: - Find Text-Capable Element
|
||||
|
||||
/// Find a text-capable element starting from the focused element
|
||||
/// Searches descendants first, then ancestors
|
||||
/// - Parameters:
|
||||
/// - element: Starting element
|
||||
/// - editableOnly: If true, only return editable elements
|
||||
/// - Returns: FocusResult with the found element, or nil
|
||||
static func findTextCapableElement(from element: AXUIElement, editableOnly: Bool) -> FocusResult? {
|
||||
let role = AXHelpers.getStringAttribute(element, kAXRoleAttribute)
|
||||
|
||||
// Check if current element is text-capable
|
||||
if AXHelpers.isTextCapable(element) {
|
||||
if !editableOnly || AXHelpers.isElementEditable(element) {
|
||||
return FocusResult(element: element, role: role, wasSearched: false)
|
||||
}
|
||||
}
|
||||
|
||||
// Search descendants for text-capable element
|
||||
if let descendant = searchDescendantsForTextCapable(element: element, editableOnly: editableOnly) {
|
||||
let descendantRole = AXHelpers.getStringAttribute(descendant, kAXRoleAttribute)
|
||||
return FocusResult(element: descendant, role: descendantRole, wasSearched: true)
|
||||
}
|
||||
|
||||
// Search ancestors for text-capable element
|
||||
if let ancestor = searchAncestorsForTextCapable(element: element, editableOnly: editableOnly) {
|
||||
let ancestorRole = AXHelpers.getStringAttribute(ancestor, kAXRoleAttribute)
|
||||
return FocusResult(element: ancestor, role: ancestorRole, wasSearched: true)
|
||||
}
|
||||
|
||||
// If editableOnly is false, return the original element if it has any text attributes
|
||||
if !editableOnly && AXHelpers.hasAttribute(element, kAXValueAttribute) {
|
||||
return FocusResult(element: element, role: role, wasSearched: false)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MARK: - Descendant Search
|
||||
|
||||
/// Search descendants for a text-capable element using BFS
|
||||
private static func searchDescendantsForTextCapable(
|
||||
element: AXUIElement,
|
||||
editableOnly: Bool,
|
||||
maxDepth: Int = TREE_WALK_MAX_DEPTH,
|
||||
maxElements: Int = TREE_WALK_MAX_ELEMENTS
|
||||
) -> AXUIElement? {
|
||||
var queue: [(element: AXUIElement, depth: Int)] = [(element, 0)]
|
||||
var elementsSearched = 0
|
||||
|
||||
while !queue.isEmpty && elementsSearched < maxElements {
|
||||
let (current, currentDepth) = queue.removeFirst()
|
||||
elementsSearched += 1
|
||||
|
||||
// Skip if we've exceeded max depth
|
||||
guard currentDepth < maxDepth else { continue }
|
||||
|
||||
let children = AXHelpers.getChildren(current)
|
||||
|
||||
for child in children {
|
||||
// Check if child is text-capable
|
||||
if AXHelpers.isTextCapable(child) {
|
||||
if !editableOnly || AXHelpers.isElementEditable(child) {
|
||||
return child
|
||||
}
|
||||
}
|
||||
|
||||
// Add to queue for further search
|
||||
queue.append((child, currentDepth + 1))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MARK: - Ancestor Search
|
||||
|
||||
/// Search ancestors for a text-capable element
|
||||
private static func searchAncestorsForTextCapable(
|
||||
element: AXUIElement,
|
||||
editableOnly: Bool,
|
||||
maxDepth: Int = TREE_WALK_MAX_DEPTH
|
||||
) -> AXUIElement? {
|
||||
var currentElement = element
|
||||
|
||||
for _ in 0..<maxDepth {
|
||||
guard let parent = AXHelpers.getParent(currentElement) else { break }
|
||||
|
||||
if AXHelpers.isTextCapable(parent) {
|
||||
if !editableOnly || AXHelpers.isElementEditable(parent) {
|
||||
return parent
|
||||
}
|
||||
}
|
||||
|
||||
currentElement = parent
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MARK: - Element Info Extraction
|
||||
|
||||
/// Extract element info from an AXUIElement
|
||||
static func getElementInfo(element: AXUIElement) -> AXElementInfo {
|
||||
let role = AXHelpers.getStringAttribute(element, kAXRoleAttribute)
|
||||
let subrole = AXHelpers.getStringAttribute(element, kAXSubroleAttribute)
|
||||
let title = AXHelpers.getStringAttribute(element, kAXTitleAttribute)
|
||||
let description = AXHelpers.getStringAttribute(element, kAXDescriptionAttribute)
|
||||
let isEditable = AXHelpers.isElementEditable(element)
|
||||
let isSecure = AXHelpers.isSecureField(element)
|
||||
|
||||
// Suppress value for secure fields
|
||||
let value: String? = isSecure ? nil : AXHelpers.getStringAttribute(element, kAXValueAttribute)
|
||||
|
||||
// Check placeholder
|
||||
let isPlaceholder = AXHelpers.isPlaceholderShowing(element, selectionLength: nil)
|
||||
|
||||
// Check focus (AXFocused attribute)
|
||||
let isFocused = AXHelpers.getBoolAttribute(element, kAXFocusedAttribute) ?? true
|
||||
|
||||
// Arguments in alphabetical order per generated types
|
||||
return AXElementInfo(
|
||||
description: description,
|
||||
isEditable: isEditable,
|
||||
isFocused: isFocused,
|
||||
isPlaceholder: isPlaceholder,
|
||||
isSecure: isSecure,
|
||||
role: role,
|
||||
subrole: subrole,
|
||||
title: title,
|
||||
value: value
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Window Info Extraction
|
||||
|
||||
/// Get window info for an application
|
||||
static func getWindowInfo(pid: pid_t) -> AccessibilityWindowInfo? {
|
||||
let application = AXHelpers.createApplicationElement(pid: pid)
|
||||
|
||||
// Get main window
|
||||
var mainWindow: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(
|
||||
application,
|
||||
kAXMainWindowAttribute as CFString,
|
||||
&mainWindow
|
||||
)
|
||||
|
||||
guard error == .success, let windowRef = mainWindow else {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Verify it's an AXUIElement
|
||||
guard CFGetTypeID(windowRef) == AXUIElementGetTypeID() else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let window = windowRef as! AXUIElement
|
||||
let title = AXHelpers.getStringAttribute(window, kAXTitleAttribute)
|
||||
|
||||
// Get URL if this is a browser
|
||||
let bundleId = AXHelpers.getBundleIdentifier(pid: pid)
|
||||
let url = getBrowserURL(windowElement: window, bundleId: bundleId)
|
||||
|
||||
return AccessibilityWindowInfo(title: title, url: url)
|
||||
}
|
||||
|
||||
// MARK: - Browser URL Extraction
|
||||
|
||||
/// Get browser URL from window element
|
||||
private static func getBrowserURL(windowElement: AXUIElement, bundleId: String?) -> String? {
|
||||
// Determine browser type
|
||||
let isChromiumBrowser = bundleId?.lowercased().contains("chrome") == true ||
|
||||
bundleId?.lowercased().contains("chromium") == true ||
|
||||
bundleId == "com.microsoft.edgemac" ||
|
||||
bundleId == "com.brave.Browser" ||
|
||||
bundleId == "com.operasoftware.Opera" ||
|
||||
bundleId == "com.vivaldi.Vivaldi"
|
||||
|
||||
let isFirefox = bundleId == "org.mozilla.firefox"
|
||||
|
||||
// For Chromium browsers and Firefox: Prioritize AXWebArea tree walk
|
||||
if isChromiumBrowser || isFirefox {
|
||||
if let url = findURLInChildren(element: windowElement, maxDepth: CHROMIUM_URL_SEARCH_DEPTH) {
|
||||
return url
|
||||
}
|
||||
// Fallback to window-level attributes if tree walk fails
|
||||
if let url = AXHelpers.getStringAttribute(windowElement, kAXDocumentAttribute), !url.isEmpty {
|
||||
return url
|
||||
}
|
||||
if let url = AXHelpers.getStringAttribute(windowElement, kAXURLAttribute), !url.isEmpty {
|
||||
return url
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// For non-Chromium browsers: Try window-level attributes first (more reliable)
|
||||
if let url = AXHelpers.getStringAttribute(windowElement, kAXDocumentAttribute), !url.isEmpty {
|
||||
return url
|
||||
}
|
||||
|
||||
if let url = AXHelpers.getStringAttribute(windowElement, kAXURLAttribute), !url.isEmpty {
|
||||
return url
|
||||
}
|
||||
|
||||
// Shallow tree walk as fallback for non-Chromium browsers
|
||||
return findURLInChildren(element: windowElement, maxDepth: NON_CHROMIUM_URL_SEARCH_DEPTH)
|
||||
}
|
||||
|
||||
/// Find URL in children using BFS
|
||||
private static func findURLInChildren(element: AXUIElement, maxDepth: Int) -> String? {
|
||||
var queue: [(element: AXUIElement, depth: Int)] = [(element, 0)]
|
||||
|
||||
while !queue.isEmpty {
|
||||
let (currentElement, currentDepth) = queue.removeFirst()
|
||||
|
||||
guard currentDepth < maxDepth else { continue }
|
||||
|
||||
let children = AXHelpers.getChildren(currentElement)
|
||||
|
||||
for child in children {
|
||||
let role = AXHelpers.getStringAttribute(child, kAXRoleAttribute)
|
||||
|
||||
// Check address fields
|
||||
if role == "AXTextField" || role == "AXComboBox" || role == "AXSafariAddressAndSearchField" {
|
||||
if let value = AXHelpers.getStringAttribute(child, kAXValueAttribute),
|
||||
!value.isEmpty,
|
||||
(value.hasPrefix("http://") || value.hasPrefix("https://") || value.contains(".")) {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
// Check web areas
|
||||
if role == "AXWebArea" {
|
||||
if let url = AXHelpers.getStringAttribute(child, kAXURLAttribute), !url.isEmpty {
|
||||
return url
|
||||
}
|
||||
if let url = AXHelpers.getStringAttribute(child, kAXDocumentAttribute), !url.isEmpty {
|
||||
return url
|
||||
}
|
||||
}
|
||||
|
||||
queue.append((child, currentDepth + 1))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
import Foundation
|
||||
import ApplicationServices
|
||||
|
||||
// =============================================================================
|
||||
// PermissionsService - Accessibility Permission Management
|
||||
// =============================================================================
|
||||
// Handles checking and requesting accessibility permissions.
|
||||
// =============================================================================
|
||||
|
||||
/// Service for managing accessibility permissions
|
||||
class PermissionsService {
|
||||
|
||||
// MARK: - Permission Check
|
||||
|
||||
/// Check if accessibility permissions are granted
|
||||
/// - Parameter prompt: If true, show the system prompt to request permissions
|
||||
/// - Returns: True if permissions are granted
|
||||
static func checkPermissions(prompt: Bool = false) -> Bool {
|
||||
return AXHelpers.checkAccessibilityPermissions(prompt: prompt)
|
||||
}
|
||||
|
||||
// MARK: - Permission Status
|
||||
|
||||
/// Get detailed permission status
|
||||
/// - Returns: GetAccessibilityStatusResult with permission details
|
||||
static func getStatus() -> GetAccessibilityStatusResult {
|
||||
let hasPermission = checkPermissions(prompt: false)
|
||||
|
||||
// On macOS, accessibility is always "enabled" system-wide
|
||||
// The question is whether the app has permission
|
||||
let isEnabled = true
|
||||
|
||||
return GetAccessibilityStatusResult(
|
||||
hasPermission: hasPermission,
|
||||
isEnabled: isEnabled
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Request Permission
|
||||
|
||||
/// Request accessibility permission (shows system prompt)
|
||||
/// - Returns: RequestAccessibilityPermissionResult with grant status
|
||||
static func requestPermission() -> RequestAccessibilityPermissionResult {
|
||||
// Show the system accessibility prompt
|
||||
let granted = checkPermissions(prompt: true)
|
||||
|
||||
return RequestAccessibilityPermissionResult(granted: granted)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,792 @@
|
|||
import Foundation
|
||||
import ApplicationServices
|
||||
|
||||
// =============================================================================
|
||||
// SelectionExtractor - Multi-Path Text Selection Extraction
|
||||
// =============================================================================
|
||||
// Implements the Phase 1 extraction algorithm with TextMarker as primary path.
|
||||
// This enables text selection extraction in Electron/Chromium apps where
|
||||
// AXSelectedTextRange fails.
|
||||
// =============================================================================
|
||||
|
||||
/// Result from TextMarker extraction attempt
|
||||
struct TextMarkerResult {
|
||||
let selectedText: String?
|
||||
let selectionRange: SelectionRange?
|
||||
let hasMultipleRanges: Bool
|
||||
}
|
||||
|
||||
/// Service for extracting text selection from focused elements
|
||||
class SelectionExtractor {
|
||||
|
||||
// MARK: - Main Extraction Entry Point
|
||||
|
||||
/// Extract text selection from an element using multi-path algorithm
|
||||
/// - Parameters:
|
||||
/// - element: The AXUIElement to extract from (focused element)
|
||||
/// - metricsBuilder: Builder to record extraction metrics
|
||||
/// - Returns: AccessibilityTextSelection or nil if no text selection available
|
||||
static func extract(from element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder) -> AccessibilityTextSelection? {
|
||||
let builder = TextSelectionBuilder()
|
||||
|
||||
// Track both original focused element and the element we extract from
|
||||
let focusedElement = element
|
||||
var extractionElement = element
|
||||
|
||||
// Step 2: Check if element is editable (check original focused element)
|
||||
let focusedIsEditable = AXHelpers.isElementEditable(focusedElement)
|
||||
|
||||
// Step 2.1: SECURE FIELD CHECK - suppress all content if secure
|
||||
if AXHelpers.isSecureField(focusedElement) {
|
||||
return TextSelectionBuilder.secureField(isEditable: focusedIsEditable)
|
||||
}
|
||||
|
||||
// Variables to track extraction state
|
||||
var selectionRange: AccessibilitySelectionRange? = nil
|
||||
var selectedText: String? = nil
|
||||
var fullContent: String? = nil
|
||||
var hasMultipleRanges = false
|
||||
var extractionMethod: ExtractionMethod = .none
|
||||
|
||||
// Step 4: EXTRACTION (Priority Order)
|
||||
|
||||
// Path A: TextMarker (PRIMARY - works in Electron)
|
||||
metricsBuilder.textMarkerAttempted = true
|
||||
if let textMarkerResult = extractViaTextMarker(element: focusedElement, metricsBuilder: metricsBuilder) {
|
||||
metricsBuilder.textMarkerSucceeded = true
|
||||
selectedText = textMarkerResult.selectedText
|
||||
selectionRange = textMarkerResult.selectionRange
|
||||
hasMultipleRanges = textMarkerResult.hasMultipleRanges
|
||||
extractionMethod = .textMarkerRange
|
||||
}
|
||||
|
||||
// WebArea Retry Path: When TextMarker fails on focused element
|
||||
if extractionMethod == .none {
|
||||
// TextMarker failed - search for a better WebArea
|
||||
metricsBuilder.webAreaRetryAttempted = true
|
||||
|
||||
if let webArea = findWebArea(from: focusedElement) {
|
||||
metricsBuilder.webAreaFound = true
|
||||
|
||||
// Try TextMarker on WebArea
|
||||
if let webAreaTextMarkerResult = extractViaTextMarker(element: webArea, metricsBuilder: metricsBuilder) {
|
||||
// TextMarker SUCCEEDED on WebArea - now switch extraction element
|
||||
metricsBuilder.textMarkerSucceeded = true // Mark overall TextMarker as succeeded
|
||||
metricsBuilder.webAreaRetrySucceeded = true
|
||||
extractionElement = webArea
|
||||
selectedText = webAreaTextMarkerResult.selectedText
|
||||
selectionRange = webAreaTextMarkerResult.selectionRange
|
||||
hasMultipleRanges = webAreaTextMarkerResult.hasMultipleRanges
|
||||
extractionMethod = .textMarkerRange
|
||||
}
|
||||
// If TextMarker fails on WebArea, DON'T switch extractionElement
|
||||
// Keep using focusedElement for fallbacks (it has the content, even if noisy)
|
||||
}
|
||||
}
|
||||
|
||||
// Descendant Text Element Path: When both TextMarker attempts fail
|
||||
// Try to find the actual text element inside the container (e.g., in Notion)
|
||||
if extractionMethod == .none {
|
||||
if let deepTextElement = AXHelpers.findDeepestTextElement(from: focusedElement) {
|
||||
// Found a deeper text element - try extraction on it
|
||||
if let textMarkerResult = extractViaTextMarker(element: deepTextElement, metricsBuilder: metricsBuilder) {
|
||||
metricsBuilder.textMarkerSucceeded = true // Mark TextMarker as succeeded
|
||||
extractionElement = deepTextElement
|
||||
selectedText = textMarkerResult.selectedText
|
||||
selectionRange = textMarkerResult.selectionRange
|
||||
hasMultipleRanges = textMarkerResult.hasMultipleRanges
|
||||
extractionMethod = .textMarkerRange
|
||||
} else if let rangeResult = extractViaSelectedTextRange(element: deepTextElement) {
|
||||
// TextMarker failed but SelectedTextRange works - use this element
|
||||
// This should give us cleaner content without UI labels
|
||||
extractionElement = deepTextElement
|
||||
selectedText = rangeResult.selectedText
|
||||
selectionRange = rangeResult.selectionRange
|
||||
extractionMethod = .selectedTextRange
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Path B: SelectedTextRange (Fallback 1) - use extractionElement
|
||||
if extractionMethod == .none {
|
||||
metricsBuilder.recordFallback(.selectedTextRange)
|
||||
if let result = extractViaSelectedTextRange(element: extractionElement) {
|
||||
selectedText = result.selectedText
|
||||
selectionRange = result.selectionRange
|
||||
extractionMethod = .selectedTextRange
|
||||
}
|
||||
}
|
||||
|
||||
// Path C: SelectedTextRanges (Fallback 2 - Multi-select) - use extractionElement
|
||||
if extractionMethod == .none {
|
||||
metricsBuilder.recordFallback(.selectedTextRanges)
|
||||
if let result = extractViaSelectedTextRanges(element: extractionElement) {
|
||||
selectedText = result.selectedText
|
||||
selectionRange = result.selectionRange
|
||||
hasMultipleRanges = result.hasMultipleRanges
|
||||
extractionMethod = .selectedTextRanges
|
||||
}
|
||||
}
|
||||
|
||||
// Path D: Value Attribute (Fallback 3) - use extractionElement
|
||||
if extractionMethod == .none {
|
||||
metricsBuilder.recordFallback(.valueAttribute)
|
||||
if let value = AXHelpers.getStringAttribute(extractionElement, kAXValueAttribute) {
|
||||
fullContent = value
|
||||
extractionMethod = .valueAttribute
|
||||
// Note: No selectionRange available from this path
|
||||
}
|
||||
}
|
||||
|
||||
// Path E: StringForRange (Fallback 4) - use extractionElement
|
||||
if extractionMethod == .none {
|
||||
metricsBuilder.recordFallback(.stringForRange)
|
||||
if let charCount = AXHelpers.getNumberOfCharacters(extractionElement) {
|
||||
if charCount == 0 {
|
||||
fullContent = ""
|
||||
extractionMethod = .stringForRange
|
||||
} else if charCount > 0 {
|
||||
let range = CFRange(location: 0, length: charCount)
|
||||
if let content = AXHelpers.getStringForRange(extractionElement, range: range) {
|
||||
fullContent = content
|
||||
extractionMethod = .stringForRange
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no extraction succeeded at all, return nil
|
||||
if extractionMethod == .none {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Step 5: FULL CONTENT RETRIEVAL (if not already obtained) - use extractionElement
|
||||
if fullContent == nil && selectionRange != nil {
|
||||
// Try AXValue first
|
||||
fullContent = AXHelpers.getStringAttribute(extractionElement, kAXValueAttribute)
|
||||
|
||||
// If fails, try AXStringForRange
|
||||
if fullContent == nil, let charCount = AXHelpers.getNumberOfCharacters(extractionElement), charCount > 0 {
|
||||
let range = CFRange(location: 0, length: charCount)
|
||||
fullContent = AXHelpers.getStringForRange(extractionElement, range: range)
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: PLACEHOLDER CHECK (non-blocking)
|
||||
// Use TextMarker-derived length if available, fall back to AXSelectedTextRange
|
||||
var selectionLength: Int? = selectionRange?.length
|
||||
if selectionLength == nil {
|
||||
if let cfRange = AXHelpers.getSelectedTextRange(extractionElement) {
|
||||
selectionLength = cfRange.length
|
||||
}
|
||||
}
|
||||
// OR logic: check placeholder on BOTH elements
|
||||
let focusedIsPlaceholder = AXHelpers.isPlaceholderShowing(focusedElement, selectionLength: nil)
|
||||
let extractionIsPlaceholder = AXHelpers.isPlaceholderShowing(extractionElement, selectionLength: selectionLength)
|
||||
builder.isPlaceholder = focusedIsPlaceholder || extractionIsPlaceholder
|
||||
|
||||
// OR logic for isEditable: editable if EITHER element is editable
|
||||
let extractionIsEditable = AXHelpers.isElementEditable(extractionElement)
|
||||
builder.isEditable = focusedIsEditable || extractionIsEditable
|
||||
|
||||
// Step 5.1: SELECTION RANGE VALIDATION
|
||||
if var range = selectionRange, let content = fullContent {
|
||||
let contentLength = content.utf16.count
|
||||
let originalLocation = range.location
|
||||
let originalLength = range.length
|
||||
|
||||
// Clamp to valid bounds
|
||||
let clampedLocation = AXHelpers.clamp(originalLocation, min: 0, max: contentLength)
|
||||
let maxLength = contentLength - clampedLocation
|
||||
let clampedLength = AXHelpers.clamp(originalLength, min: 0, max: maxLength)
|
||||
|
||||
// Log if clamping occurred (no PII)
|
||||
if originalLocation != clampedLocation || originalLength != clampedLength {
|
||||
metricsBuilder.recordError("SelectionRange clamped: original exceeded content bounds")
|
||||
}
|
||||
|
||||
selectionRange = SelectionRange(length: clampedLength, location: clampedLocation)
|
||||
|
||||
// Step 5.2: RE-DERIVE selectedText when no windowing needed
|
||||
if contentLength <= MAX_FULL_CONTENT_LENGTH {
|
||||
if clampedLength == 0 {
|
||||
selectedText = ""
|
||||
} else {
|
||||
selectedText = AXHelpers.substringUTF16(content, start: clampedLocation, length: clampedLength)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 6: CONTENT WINDOWING
|
||||
var fullContentTruncated = false
|
||||
if var content = fullContent, content.utf16.count > MAX_FULL_CONTENT_LENGTH {
|
||||
let result = windowContent(
|
||||
content: content,
|
||||
selectionRange: selectionRange,
|
||||
metricsBuilder: metricsBuilder
|
||||
)
|
||||
fullContent = result.windowedContent
|
||||
selectionRange = result.adjustedRange
|
||||
selectedText = result.selectedText
|
||||
fullContentTruncated = true
|
||||
}
|
||||
|
||||
// Step 7: CONTEXT COMPUTATION
|
||||
var preSelectionText: String? = nil
|
||||
var postSelectionText: String? = nil
|
||||
|
||||
if let range = selectionRange, let content = fullContent {
|
||||
let location = range.location
|
||||
let length = range.length
|
||||
let contentLength = content.utf16.count
|
||||
|
||||
// Pre-selection text
|
||||
if location == 0 {
|
||||
preSelectionText = ""
|
||||
} else {
|
||||
let preStart = max(0, location - MAX_CONTEXT_LENGTH)
|
||||
let preLength = location - preStart
|
||||
preSelectionText = AXHelpers.substringUTF16(content, start: preStart, length: preLength)
|
||||
}
|
||||
|
||||
// Post-selection text
|
||||
let postStart = location + length
|
||||
if postStart >= contentLength {
|
||||
postSelectionText = ""
|
||||
} else {
|
||||
let postLength = min(MAX_CONTEXT_LENGTH, contentLength - postStart)
|
||||
postSelectionText = AXHelpers.substringUTF16(content, start: postStart, length: postLength)
|
||||
}
|
||||
} else if let range = selectionRange, fullContent == nil {
|
||||
// Per spec: when selectionRange exists but fullContent is nil,
|
||||
// compute pre/post via AXStringForRange
|
||||
let location = range.location
|
||||
let length = range.length
|
||||
|
||||
// Pre-selection text via AXStringForRange
|
||||
if location == 0 {
|
||||
preSelectionText = ""
|
||||
} else {
|
||||
let preStart = max(0, location - MAX_CONTEXT_LENGTH)
|
||||
let preLength = location - preStart
|
||||
let preRange = CFRange(location: preStart, length: preLength)
|
||||
preSelectionText = AXHelpers.getStringForRange(extractionElement, range: preRange)
|
||||
}
|
||||
|
||||
// Post-selection text via AXStringForRange
|
||||
let postStart = location + length
|
||||
// We don't know total length, so just try to get MAX_CONTEXT_LENGTH
|
||||
let postRange = CFRange(location: postStart, length: MAX_CONTEXT_LENGTH)
|
||||
postSelectionText = AXHelpers.getStringForRange(extractionElement, range: postRange)
|
||||
}
|
||||
|
||||
// Build final result
|
||||
builder.selectedText = selectedText
|
||||
builder.fullContent = fullContent
|
||||
builder.preSelectionText = preSelectionText
|
||||
builder.postSelectionText = postSelectionText
|
||||
builder.selectionRange = selectionRange
|
||||
builder.extractionMethod = extractionMethod
|
||||
builder.hasMultipleRanges = hasMultipleRanges
|
||||
builder.fullContentTruncated = fullContentTruncated
|
||||
|
||||
return builder.build()
|
||||
}
|
||||
|
||||
// MARK: - Path A: TextMarker Extraction
|
||||
|
||||
/// Extract selection using TextMarker APIs (works in Electron/Chromium)
|
||||
/// Tries single range (AXSelectedTextMarkerRange) first, then multi-range (AXSelectedTextMarkerRanges)
|
||||
private static func extractViaTextMarker(element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder) -> TextMarkerResult? {
|
||||
// Try single range first
|
||||
if let result = extractViaSingleTextMarkerRange(element: element, metricsBuilder: metricsBuilder) {
|
||||
return result
|
||||
}
|
||||
|
||||
// If single range failed, try multi-range (use first range)
|
||||
return extractViaMultiTextMarkerRanges(element: element, metricsBuilder: metricsBuilder)
|
||||
}
|
||||
|
||||
/// Extract selection using single AXSelectedTextMarkerRange
|
||||
private static func extractViaSingleTextMarkerRange(element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder) -> TextMarkerResult? {
|
||||
// 1. Get TextMarker range
|
||||
var markerRangeRef: CFTypeRef?
|
||||
let rangeError = AXUIElementCopyAttributeValue(
|
||||
element,
|
||||
"AXSelectedTextMarkerRange" as CFString,
|
||||
&markerRangeRef
|
||||
)
|
||||
|
||||
guard rangeError == .success, let markerRange = markerRangeRef else {
|
||||
metricsBuilder.recordError("TextMarker: AXSelectedTextMarkerRange failed, AXError=\(rangeError.rawValue)")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Extract from the marker range
|
||||
return extractFromMarkerRange(markerRange, element: element, metricsBuilder: metricsBuilder, hasMultipleRanges: false)
|
||||
}
|
||||
|
||||
/// Extract selection using AXSelectedTextMarkerRanges (multi-cursor), using the first range
|
||||
private static func extractViaMultiTextMarkerRanges(element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder) -> TextMarkerResult? {
|
||||
// 1. Get TextMarker ranges array
|
||||
var markerRangesRef: CFTypeRef?
|
||||
let rangesError = AXUIElementCopyAttributeValue(
|
||||
element,
|
||||
"AXSelectedTextMarkerRanges" as CFString,
|
||||
&markerRangesRef
|
||||
)
|
||||
|
||||
guard rangesError == .success, let rangesArray = markerRangesRef as? [AnyObject], !rangesArray.isEmpty else {
|
||||
metricsBuilder.recordError("TextMarker: AXSelectedTextMarkerRanges failed or empty, AXError=\(rangesError.rawValue)")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Use the first range
|
||||
let firstRange = rangesArray[0]
|
||||
let hasMultipleRanges = rangesArray.count > 1
|
||||
|
||||
// Extract from the first marker range
|
||||
return extractFromMarkerRange(firstRange as CFTypeRef, element: element, metricsBuilder: metricsBuilder, hasMultipleRanges: hasMultipleRanges)
|
||||
}
|
||||
|
||||
/// Extract text and indices from a TextMarker range
|
||||
private static func extractFromMarkerRange(_ markerRange: CFTypeRef, element: AXUIElement, metricsBuilder: ExtractionMetricsBuilder, hasMultipleRanges: Bool) -> TextMarkerResult? {
|
||||
// 2. Get start marker
|
||||
var startMarkerRef: CFTypeRef?
|
||||
let startError = AXUIElementCopyParameterizedAttributeValue(
|
||||
element,
|
||||
"AXStartTextMarkerForTextMarkerRange" as CFString,
|
||||
markerRange,
|
||||
&startMarkerRef
|
||||
)
|
||||
|
||||
guard startError == .success, let startMarker = startMarkerRef else {
|
||||
metricsBuilder.recordError("TextMarker: AXStartTextMarkerForTextMarkerRange failed, AXError=\(startError.rawValue)")
|
||||
return nil
|
||||
}
|
||||
|
||||
// 3. Get end marker
|
||||
var endMarkerRef: CFTypeRef?
|
||||
let endError = AXUIElementCopyParameterizedAttributeValue(
|
||||
element,
|
||||
"AXEndTextMarkerForTextMarkerRange" as CFString,
|
||||
markerRange,
|
||||
&endMarkerRef
|
||||
)
|
||||
|
||||
guard endError == .success, let endMarker = endMarkerRef else {
|
||||
metricsBuilder.recordError("TextMarker: AXEndTextMarkerForTextMarkerRange failed, AXError=\(endError.rawValue)")
|
||||
return nil
|
||||
}
|
||||
|
||||
// 4. Convert markers to indices
|
||||
var startIndexRef: CFTypeRef?
|
||||
let startIndexError = AXUIElementCopyParameterizedAttributeValue(
|
||||
element,
|
||||
"AXIndexForTextMarker" as CFString,
|
||||
startMarker,
|
||||
&startIndexRef
|
||||
)
|
||||
|
||||
guard startIndexError == .success,
|
||||
let startIndexNumber = startIndexRef as? NSNumber else {
|
||||
metricsBuilder.recordError("TextMarker: AXIndexForTextMarker (start) failed, AXError=\(startIndexError.rawValue)")
|
||||
return nil
|
||||
}
|
||||
|
||||
var endIndexRef: CFTypeRef?
|
||||
let endIndexError = AXUIElementCopyParameterizedAttributeValue(
|
||||
element,
|
||||
"AXIndexForTextMarker" as CFString,
|
||||
endMarker,
|
||||
&endIndexRef
|
||||
)
|
||||
|
||||
guard endIndexError == .success,
|
||||
let endIndexNumber = endIndexRef as? NSNumber else {
|
||||
metricsBuilder.recordError("TextMarker: AXIndexForTextMarker (end) failed, AXError=\(endIndexError.rawValue)")
|
||||
return nil
|
||||
}
|
||||
|
||||
let startIndex = startIndexNumber.intValue
|
||||
let endIndex = endIndexNumber.intValue
|
||||
|
||||
// Validate indices per spec: negative or end < start should fail
|
||||
if startIndex < 0 || endIndex < 0 {
|
||||
metricsBuilder.recordError("TextMarker: Invalid indices - negative values (start=\(startIndex), end=\(endIndex))")
|
||||
return nil
|
||||
}
|
||||
if endIndex < startIndex {
|
||||
metricsBuilder.recordError("TextMarker: Invalid indices - end < start (start=\(startIndex), end=\(endIndex))")
|
||||
return nil
|
||||
}
|
||||
|
||||
let length = endIndex - startIndex
|
||||
|
||||
// 5. Get text for marker range
|
||||
var attributedStringRef: CFTypeRef?
|
||||
let stringError = AXUIElementCopyParameterizedAttributeValue(
|
||||
element,
|
||||
"AXAttributedStringForTextMarkerRange" as CFString,
|
||||
markerRange,
|
||||
&attributedStringRef
|
||||
)
|
||||
|
||||
var selectedText: String? = nil
|
||||
if stringError == .success, let attrString = attributedStringRef as? NSAttributedString {
|
||||
selectedText = attrString.string
|
||||
} else if stringError == .success, let plainString = attributedStringRef as? String {
|
||||
selectedText = plainString
|
||||
} else if length == 0 {
|
||||
// Cursor only - no selection, this is fine
|
||||
selectedText = ""
|
||||
} else {
|
||||
metricsBuilder.recordError("TextMarker: AXAttributedStringForTextMarkerRange failed, AXError=\(stringError.rawValue)")
|
||||
}
|
||||
|
||||
let selectionRange = SelectionRange(length: length, location: startIndex)
|
||||
|
||||
return TextMarkerResult(
|
||||
selectedText: selectedText,
|
||||
selectionRange: selectionRange,
|
||||
hasMultipleRanges: hasMultipleRanges
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Path B: SelectedTextRange Extraction
|
||||
|
||||
/// Extract selection using standard AXSelectedTextRange
|
||||
/// Uses AXStringForRange for text extraction (more reliable for Chromium/Electron per spec)
|
||||
private static func extractViaSelectedTextRange(element: AXUIElement) -> TextMarkerResult? {
|
||||
guard let cfRange = AXHelpers.getSelectedTextRange(element) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let location = cfRange.location
|
||||
let length = cfRange.length
|
||||
|
||||
// Get selected text using AXStringForRange (more reliable for Chromium/Electron)
|
||||
var selectedText: String? = nil
|
||||
if length == 0 {
|
||||
selectedText = ""
|
||||
} else {
|
||||
// Try AXStringForRange first (per spec - more reliable)
|
||||
selectedText = AXHelpers.getStringForRange(element, range: cfRange)
|
||||
// Fall back to AXSelectedText if needed
|
||||
if selectedText == nil {
|
||||
selectedText = AXHelpers.getStringAttribute(element, kAXSelectedTextAttribute)
|
||||
}
|
||||
}
|
||||
|
||||
return TextMarkerResult(
|
||||
selectedText: selectedText,
|
||||
selectionRange: SelectionRange(length: length, location: location),
|
||||
hasMultipleRanges: false
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Path C: SelectedTextRanges Extraction
|
||||
|
||||
/// Extract selection using AXSelectedTextRanges (multi-select)
|
||||
private static func extractViaSelectedTextRanges(element: AXUIElement) -> TextMarkerResult? {
|
||||
var rangesRef: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(
|
||||
element,
|
||||
"AXSelectedTextRanges" as CFString,
|
||||
&rangesRef
|
||||
)
|
||||
|
||||
guard error == .success, let ranges = rangesRef as? [AXValue], !ranges.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert ranges and sort by location
|
||||
var cfRanges: [CFRange] = []
|
||||
for rangeValue in ranges {
|
||||
var range = CFRange()
|
||||
if AXValueGetValue(rangeValue, .cfRange, &range) {
|
||||
cfRanges.append(range)
|
||||
}
|
||||
}
|
||||
|
||||
guard !cfRanges.isEmpty else { return nil }
|
||||
|
||||
// Sort by location (ascending)
|
||||
cfRanges.sort { $0.location < $1.location }
|
||||
|
||||
// Use first (lowest location) as primary
|
||||
let primaryRange = cfRanges[0]
|
||||
let hasMultipleRanges = cfRanges.count > 1
|
||||
|
||||
// Get selected text for primary range
|
||||
var selectedText: String? = nil
|
||||
if primaryRange.length == 0 {
|
||||
selectedText = ""
|
||||
} else {
|
||||
selectedText = AXHelpers.getStringForRange(element, range: primaryRange)
|
||||
}
|
||||
|
||||
return TextMarkerResult(
|
||||
selectedText: selectedText,
|
||||
selectionRange: SelectionRange(length: primaryRange.length, location: primaryRange.location),
|
||||
hasMultipleRanges: hasMultipleRanges
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - WebArea Search
|
||||
|
||||
/// Candidate structure for WebArea selection
|
||||
private struct WebAreaCandidate {
|
||||
let element: AXUIElement
|
||||
let depth: Int // positive = descendant, negative = ancestor
|
||||
let isAncestor: Bool
|
||||
}
|
||||
|
||||
/// Find best AXWebArea from descendants (and optionally ancestors)
|
||||
/// - Parameter focusedElement: The currently focused element
|
||||
/// - Returns: Best AXWebArea element to use for extraction, or nil if none found
|
||||
private static func findWebArea(from focusedElement: AXUIElement) -> AXUIElement? {
|
||||
let focusedIsWebArea = AXHelpers.getRole(focusedElement) == "AXWebArea"
|
||||
|
||||
var candidates: [WebAreaCandidate] = []
|
||||
|
||||
// 1. Collect from ancestors (only if focused is NOT already a WebArea)
|
||||
if !focusedIsWebArea {
|
||||
let ancestorWebAreas = AXHelpers.findWebAreasInAncestors(
|
||||
element: focusedElement,
|
||||
excludeElement: focusedElement,
|
||||
maxLevels: WEB_AREA_ANCESTOR_SEARCH_DEPTH
|
||||
)
|
||||
for (webArea, depth) in ancestorWebAreas {
|
||||
candidates.append(WebAreaCandidate(element: webArea, depth: depth, isAncestor: true))
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Collect from descendants (ALWAYS, even if focused is WebArea)
|
||||
let children = AXHelpers.getChildren(focusedElement)
|
||||
if children.count > 0 {
|
||||
let descendantWebAreas = AXHelpers.findWebAreasInDescendants(
|
||||
element: focusedElement,
|
||||
excludeElement: focusedElement,
|
||||
maxDepth: FIND_WEB_AREAS_MAX_DEPTH,
|
||||
maxElements: FIND_WEB_AREAS_MAX_ELEMENTS
|
||||
)
|
||||
for (webArea, depth) in descendantWebAreas {
|
||||
candidates.append(WebAreaCandidate(element: webArea, depth: depth, isAncestor: false))
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Select best candidate based on preference order
|
||||
return selectBestWebArea(from: candidates, focusedElement: focusedElement)
|
||||
}
|
||||
|
||||
/// Select best WebArea from candidates
|
||||
/// Preference order (DEEPEST descendant wins at ALL levels):
|
||||
/// 1. Marker range present + contains focus
|
||||
/// 2. Marker range present (focus unavailable)
|
||||
/// 3. Contains focus without marker range
|
||||
/// 4. DEEPEST descendant, then nearest ancestor
|
||||
private static func selectBestWebArea(
|
||||
from candidates: [WebAreaCandidate],
|
||||
focusedElement: AXUIElement
|
||||
) -> AXUIElement? {
|
||||
guard !candidates.isEmpty else { return nil }
|
||||
|
||||
// Get app-level focused element for containment validation
|
||||
let pid = AXHelpers.getPid(focusedElement)
|
||||
let appFocusedElement = AXHelpers.getAppFocusedElement(forPid: pid)
|
||||
|
||||
// Score each candidate
|
||||
struct ScoredCandidate {
|
||||
let candidate: WebAreaCandidate
|
||||
let hasMarkerRange: Bool
|
||||
let containsFocus: Bool
|
||||
}
|
||||
|
||||
let scored = candidates.map { c -> ScoredCandidate in
|
||||
// Focus is "related" if EITHER:
|
||||
// 1. Focus is inside the WebArea (focus is descendant/equal of WebArea)
|
||||
// 2. WebArea is inside focus (WebArea is descendant/equal of focused container)
|
||||
let containsFocus: Bool
|
||||
if let focused = appFocusedElement {
|
||||
containsFocus = AXHelpers.isDescendantOrEqual(focused, of: c.element) ||
|
||||
AXHelpers.isDescendantOrEqual(c.element, of: focused)
|
||||
} else {
|
||||
containsFocus = false
|
||||
}
|
||||
return ScoredCandidate(
|
||||
candidate: c,
|
||||
hasMarkerRange: AXHelpers.hasTextMarkerRange(c.element),
|
||||
containsFocus: containsFocus
|
||||
)
|
||||
}
|
||||
|
||||
// 1. BEST: Has marker range AND contains focus (DEEPEST descendant wins)
|
||||
let withMarkerAndFocus = scored.filter { $0.hasMarkerRange && $0.containsFocus }
|
||||
if !withMarkerAndFocus.isEmpty {
|
||||
// Prefer deepest descendant
|
||||
if let descendant = withMarkerAndFocus
|
||||
.filter({ !$0.candidate.isAncestor })
|
||||
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
|
||||
return descendant.candidate.element
|
||||
}
|
||||
// Otherwise nearest ancestor
|
||||
if let ancestor = withMarkerAndFocus
|
||||
.filter({ $0.candidate.isAncestor })
|
||||
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
|
||||
return ancestor.candidate.element
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Has marker range (without focus - focus detection may be unavailable)
|
||||
let withMarker = scored.filter { $0.hasMarkerRange && !$0.containsFocus }
|
||||
if !withMarker.isEmpty {
|
||||
// Deepest descendant first
|
||||
if let descendant = withMarker
|
||||
.filter({ !$0.candidate.isAncestor })
|
||||
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
|
||||
return descendant.candidate.element
|
||||
}
|
||||
// Then nearest ancestor
|
||||
if let ancestor = withMarker
|
||||
.filter({ $0.candidate.isAncestor })
|
||||
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
|
||||
return ancestor.candidate.element
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Contains focus but no marker range
|
||||
let withFocus = scored.filter { $0.containsFocus && !$0.hasMarkerRange }
|
||||
if !withFocus.isEmpty {
|
||||
// Prefer deepest descendant
|
||||
if let descendant = withFocus
|
||||
.filter({ !$0.candidate.isAncestor })
|
||||
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
|
||||
return descendant.candidate.element
|
||||
}
|
||||
if let ancestor = withFocus
|
||||
.filter({ $0.candidate.isAncestor })
|
||||
.max(by: { $0.candidate.depth < $1.candidate.depth }) {
|
||||
return ancestor.candidate.element
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Fallback: deepest descendant first, then nearest ancestor
|
||||
let descendants = candidates.filter { !$0.isAncestor }
|
||||
if let deepest = descendants.max(by: { $0.depth < $1.depth }) {
|
||||
return deepest.element
|
||||
}
|
||||
let ancestors = candidates.filter { $0.isAncestor }
|
||||
if let nearest = ancestors.max(by: { $0.depth < $1.depth }) {
|
||||
return nearest.element
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MARK: - Content Windowing
|
||||
|
||||
/// Result of content windowing operation
|
||||
struct WindowResult {
|
||||
let windowedContent: String
|
||||
let adjustedRange: SelectionRange?
|
||||
let selectedText: String?
|
||||
}
|
||||
|
||||
/// Apply content windowing based on the spec algorithm
|
||||
private static func windowContent(
|
||||
content: String,
|
||||
selectionRange: SelectionRange?,
|
||||
metricsBuilder: ExtractionMetricsBuilder
|
||||
) -> WindowResult {
|
||||
let utf16 = content.utf16
|
||||
let totalLength = utf16.count
|
||||
|
||||
// CASE A: No selection - head+tail truncation
|
||||
guard let range = selectionRange else {
|
||||
let delimiter = "\n...\n"
|
||||
let delimiterLength = delimiter.utf16.count
|
||||
let availableSpace = MAX_FULL_CONTENT_LENGTH - delimiterLength
|
||||
|
||||
var headSize = availableSpace / 2
|
||||
var tailSize = availableSpace - headSize
|
||||
|
||||
// Adjust for surrogate pairs
|
||||
headSize = AXHelpers.adjustForSurrogatePairs(content, offset: headSize, direction: .backward)
|
||||
let tailStart = AXHelpers.adjustForSurrogatePairs(content, offset: totalLength - tailSize, direction: .forward)
|
||||
tailSize = totalLength - tailStart
|
||||
|
||||
let headContent = AXHelpers.substringUTF16(content, start: 0, length: headSize) ?? ""
|
||||
let tailContent = AXHelpers.substringUTF16(content, start: tailStart, length: tailSize) ?? ""
|
||||
|
||||
return WindowResult(
|
||||
windowedContent: headContent + delimiter + tailContent,
|
||||
adjustedRange: nil,
|
||||
selectedText: nil
|
||||
)
|
||||
}
|
||||
|
||||
let location = range.location
|
||||
let length = range.length
|
||||
|
||||
// CASE B: Selection exceeds max - clamp to selection start
|
||||
if length > MAX_FULL_CONTENT_LENGTH {
|
||||
var windowStart = location
|
||||
var windowEnd = min(location + MAX_FULL_CONTENT_LENGTH, totalLength)
|
||||
|
||||
// Adjust for surrogate pairs FIRST
|
||||
windowStart = AXHelpers.adjustForSurrogatePairs(content, offset: windowStart, direction: .forward)
|
||||
windowEnd = AXHelpers.adjustForSurrogatePairs(content, offset: windowEnd, direction: .backward)
|
||||
|
||||
let windowedContent = AXHelpers.substringUTF16(content, start: windowStart, length: windowEnd - windowStart) ?? ""
|
||||
let windowLength = windowedContent.utf16.count
|
||||
|
||||
// Compute adjusted range (clamp location FIRST)
|
||||
let rawLocation = location - windowStart
|
||||
let adjustedLocation = AXHelpers.clamp(rawLocation, min: 0, max: windowLength)
|
||||
let maxPossibleLength = windowLength - adjustedLocation
|
||||
let adjustedLength = AXHelpers.clamp(length, min: 0, max: maxPossibleLength)
|
||||
|
||||
let selectedText = AXHelpers.substringUTF16(windowedContent, start: adjustedLocation, length: adjustedLength)
|
||||
|
||||
return WindowResult(
|
||||
windowedContent: windowedContent,
|
||||
adjustedRange: SelectionRange(length: adjustedLength, location: adjustedLocation),
|
||||
selectedText: selectedText
|
||||
)
|
||||
}
|
||||
|
||||
// CASE C: Selection fits - window around selection
|
||||
var windowStart = max(0, location - WINDOW_PADDING)
|
||||
var windowEnd = min(totalLength, location + length + WINDOW_PADDING)
|
||||
|
||||
// Shrink symmetrically if needed
|
||||
if windowEnd - windowStart > MAX_FULL_CONTENT_LENGTH {
|
||||
let selectionCenter = location + length / 2
|
||||
windowStart = max(0, selectionCenter - MAX_FULL_CONTENT_LENGTH / 2)
|
||||
windowEnd = min(totalLength, windowStart + MAX_FULL_CONTENT_LENGTH)
|
||||
windowStart = max(0, windowEnd - MAX_FULL_CONTENT_LENGTH)
|
||||
}
|
||||
|
||||
// Adjust for surrogate pairs FIRST
|
||||
windowStart = AXHelpers.adjustForSurrogatePairs(content, offset: windowStart, direction: .forward)
|
||||
windowEnd = AXHelpers.adjustForSurrogatePairs(content, offset: windowEnd, direction: .backward)
|
||||
|
||||
let windowedContent = AXHelpers.substringUTF16(content, start: windowStart, length: windowEnd - windowStart) ?? ""
|
||||
let windowLength = windowedContent.utf16.count
|
||||
|
||||
// Compute adjusted range (clamp location FIRST)
|
||||
let rawLocation = location - windowStart
|
||||
let adjustedLocation = AXHelpers.clamp(rawLocation, min: 0, max: windowLength)
|
||||
let maxPossibleLength = windowLength - adjustedLocation
|
||||
let adjustedLength = AXHelpers.clamp(length, min: 0, max: maxPossibleLength)
|
||||
|
||||
let selectedText = AXHelpers.substringUTF16(windowedContent, start: adjustedLocation, length: adjustedLength)
|
||||
|
||||
return WindowResult(
|
||||
windowedContent: windowedContent,
|
||||
adjustedRange: SelectionRange(length: adjustedLength, location: adjustedLocation),
|
||||
selectedText: selectedText
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,656 @@
|
|||
import Foundation
|
||||
import ApplicationServices
|
||||
import AppKit
|
||||
|
||||
// =============================================================================
|
||||
// AXHelpers - Common Accessibility API Utilities
|
||||
// =============================================================================
|
||||
// Shared utilities for working with macOS Accessibility APIs.
|
||||
// Extracted from AccessibilityContextService for reuse in v2 implementation.
|
||||
// =============================================================================
|
||||
|
||||
// Note: Constants are defined in utils/Constants.swift
|
||||
|
||||
// MARK: - Surrogate Pair Direction
|
||||
|
||||
/// Direction for surrogate pair boundary adjustment
|
||||
enum SurrogatePairDirection {
|
||||
case forward // For windowStart: move into content to include complete character
|
||||
case backward // For windowEnd: move out of content to exclude incomplete character
|
||||
}
|
||||
|
||||
// MARK: - AXHelpers
|
||||
|
||||
/// Utilities for working with macOS Accessibility APIs
|
||||
enum AXHelpers {
|
||||
|
||||
// MARK: - Attribute Access
|
||||
|
||||
/// Get a string attribute value from an AXUIElement
|
||||
static func getStringAttribute(_ element: AXUIElement, _ attribute: String) -> String? {
|
||||
var value: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
|
||||
|
||||
guard error == .success else { return nil }
|
||||
|
||||
if let stringValue = value as? String {
|
||||
return stringValue
|
||||
} else if let numberValue = value as? NSNumber {
|
||||
return numberValue.stringValue
|
||||
} else if let boolValue = value as? Bool {
|
||||
return boolValue ? "true" : "false"
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Get a boolean attribute value from an AXUIElement
|
||||
static func getBoolAttribute(_ element: AXUIElement, _ attribute: String) -> Bool? {
|
||||
var value: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
|
||||
|
||||
guard error == .success else { return nil }
|
||||
|
||||
if let boolValue = value as? Bool {
|
||||
return boolValue
|
||||
} else if let numberValue = value as? NSNumber {
|
||||
return numberValue.boolValue
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Get an integer attribute value from an AXUIElement
|
||||
static func getIntAttribute(_ element: AXUIElement, _ attribute: String) -> Int? {
|
||||
var value: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
|
||||
|
||||
guard error == .success else { return nil }
|
||||
|
||||
if let numberValue = value as? NSNumber {
|
||||
return numberValue.intValue
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Get all attribute names for an AXUIElement
|
||||
static func getAttributeNames(_ element: AXUIElement) -> [String] {
|
||||
var attributeNames: CFArray?
|
||||
let error = AXUIElementCopyAttributeNames(element, &attributeNames)
|
||||
|
||||
if error == .success, let names = attributeNames as? [String] {
|
||||
return names
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
/// Check if an element has a specific attribute
|
||||
static func hasAttribute(_ element: AXUIElement, _ attribute: String) -> Bool {
|
||||
return getAttributeNames(element).contains(attribute)
|
||||
}
|
||||
|
||||
/// Get a raw CFTypeRef attribute value
|
||||
static func getRawAttribute(_ element: AXUIElement, _ attribute: String) -> CFTypeRef? {
|
||||
var value: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
|
||||
return error == .success ? value : nil
|
||||
}
|
||||
|
||||
/// Get a parameterized attribute value
|
||||
static func getParameterizedAttribute(_ element: AXUIElement, _ attribute: String, parameter: CFTypeRef) -> CFTypeRef? {
|
||||
var value: CFTypeRef?
|
||||
let error = AXUIElementCopyParameterizedAttributeValue(element, attribute as CFString, parameter, &value)
|
||||
return error == .success ? value : nil
|
||||
}
|
||||
|
||||
// MARK: - Element Type Detection
|
||||
|
||||
/// Roles that are typically editable text fields
|
||||
static let editableRoles: Set<String> = [
|
||||
"AXTextField",
|
||||
"AXTextArea",
|
||||
"AXComboBox"
|
||||
]
|
||||
|
||||
/// Subroles that indicate editable text fields
|
||||
static let editableSubroles: Set<String> = [
|
||||
"AXSecureTextField",
|
||||
"AXSearchField"
|
||||
]
|
||||
|
||||
/// Check if an element is editable
|
||||
static func isElementEditable(_ element: AXUIElement) -> Bool {
|
||||
let role = getStringAttribute(element, kAXRoleAttribute)
|
||||
let subrole = getStringAttribute(element, kAXSubroleAttribute)
|
||||
|
||||
// Check for editable roles
|
||||
if let role = role, editableRoles.contains(role) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for editable subroles
|
||||
if let subrole = subrole, editableSubroles.contains(subrole) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if element has AXValue attribute (often indicates editability)
|
||||
return hasAttribute(element, kAXValueAttribute)
|
||||
}
|
||||
|
||||
/// Check if an element is a secure/password field
|
||||
/// Per spec: check subrole == "AXSecureTextField" OR role contains "Secure"
|
||||
static func isSecureField(_ element: AXUIElement) -> Bool {
|
||||
// Check subrole first (most common case)
|
||||
let subrole = getStringAttribute(element, kAXSubroleAttribute)
|
||||
if subrole == "AXSecureTextField" {
|
||||
return true
|
||||
}
|
||||
|
||||
// Also check if role contains "Secure" (per spec)
|
||||
if let role = getStringAttribute(element, kAXRoleAttribute) {
|
||||
if role.contains("Secure") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/// Check if an element is showing placeholder text
|
||||
static func isPlaceholderShowing(_ element: AXUIElement, selectionLength: Int?) -> Bool {
|
||||
let placeholderValue = getStringAttribute(element, "AXPlaceholderValue")
|
||||
let currentValue = getStringAttribute(element, kAXValueAttribute)
|
||||
|
||||
guard let placeholder = placeholderValue, !placeholder.isEmpty else {
|
||||
return false
|
||||
}
|
||||
|
||||
// Placeholder is showing if:
|
||||
// 1. Placeholder exists AND is non-empty
|
||||
// 2. AND one of: currentValue is nil/empty OR matches placeholder
|
||||
// 3. AND (selectionLength == 0 OR selectionLength is unknown)
|
||||
let valueIsEmpty = currentValue == nil || currentValue!.isEmpty
|
||||
let valueMatchesPlaceholder = currentValue == placeholder
|
||||
let selectionIsZeroOrUnknown = selectionLength == nil || selectionLength == 0
|
||||
|
||||
return (valueIsEmpty || valueMatchesPlaceholder) && selectionIsZeroOrUnknown
|
||||
}
|
||||
|
||||
/// Check if element is text-capable (can contain text selection)
|
||||
static func isTextCapable(_ element: AXUIElement) -> Bool {
|
||||
// Check for TextMarker range attribute
|
||||
if hasAttribute(element, "AXSelectedTextMarkerRange") {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for standard text range attribute
|
||||
if hasAttribute(element, kAXSelectedTextRangeAttribute) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for value attribute with editable role
|
||||
let role = getStringAttribute(element, kAXRoleAttribute)
|
||||
if hasAttribute(element, kAXValueAttribute) {
|
||||
if let role = role, editableRoles.contains(role) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check for web area roles
|
||||
if role == "AXWebArea" {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// MARK: - Element Tree Navigation
|
||||
|
||||
/// Get children of an AXUIElement
|
||||
static func getChildren(_ element: AXUIElement) -> [AXUIElement] {
|
||||
var children: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, kAXChildrenAttribute as CFString, &children)
|
||||
|
||||
guard error == .success, let childrenArray = children as? [AXUIElement] else {
|
||||
return []
|
||||
}
|
||||
return childrenArray
|
||||
}
|
||||
|
||||
/// Get parent of an AXUIElement
|
||||
static func getParent(_ element: AXUIElement) -> AXUIElement? {
|
||||
var parent: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, kAXParentAttribute as CFString, &parent)
|
||||
|
||||
guard error == .success, let parentRef = parent else { return nil }
|
||||
|
||||
// Verify it's actually an AXUIElement
|
||||
if CFGetTypeID(parentRef) == AXUIElementGetTypeID() {
|
||||
return (parentRef as! AXUIElement)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Get the parent chain of an element (up to maxDepth)
|
||||
static func getParentChain(_ element: AXUIElement, maxDepth: Int = PARENT_CHAIN_MAX_DEPTH) -> [AXUIElement] {
|
||||
var chain: [AXUIElement] = []
|
||||
var currentElement = element
|
||||
|
||||
for _ in 0..<maxDepth {
|
||||
guard let parent = getParent(currentElement) else { break }
|
||||
chain.append(parent)
|
||||
currentElement = parent
|
||||
}
|
||||
|
||||
return chain
|
||||
}
|
||||
|
||||
/// Touch descendant elements to ensure they're accessible (triggers lazy loading)
|
||||
static func touchDescendants(_ element: AXUIElement, maxDepth: Int = TOUCH_DESCENDANTS_MAX_DEPTH) {
|
||||
guard maxDepth > 0 else { return }
|
||||
|
||||
let children = getChildren(element)
|
||||
let limitedChildren = Array(children.prefix(TOUCH_DESCENDANTS_PREFIX_LIMIT))
|
||||
|
||||
for child in limitedChildren {
|
||||
touchDescendants(child, maxDepth: maxDepth - 1)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Selection Range Helpers
|
||||
|
||||
/// Get CFRange from AXSelectedTextRange attribute
|
||||
static func getSelectedTextRange(_ element: AXUIElement) -> CFRange? {
|
||||
var rangeValue: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(element, kAXSelectedTextRangeAttribute as CFString, &rangeValue)
|
||||
|
||||
guard error == .success, let axValue = rangeValue else { return nil }
|
||||
|
||||
var range = CFRange()
|
||||
if AXValueGetValue(axValue as! AXValue, .cfRange, &range) {
|
||||
return range
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Get text for a specific range using AXStringForRange
|
||||
static func getStringForRange(_ element: AXUIElement, range: CFRange) -> String? {
|
||||
var mutableRange = range
|
||||
var rangeValue: AXValue?
|
||||
rangeValue = AXValueCreate(.cfRange, &mutableRange) as AXValue?
|
||||
|
||||
guard let rangeParam = rangeValue else { return nil }
|
||||
|
||||
var result: CFTypeRef?
|
||||
let error = AXUIElementCopyParameterizedAttributeValue(
|
||||
element,
|
||||
kAXStringForRangeParameterizedAttribute as CFString,
|
||||
rangeParam,
|
||||
&result
|
||||
)
|
||||
|
||||
return error == .success ? result as? String : nil
|
||||
}
|
||||
|
||||
/// Get the total number of characters in the element
|
||||
static func getNumberOfCharacters(_ element: AXUIElement) -> Int? {
|
||||
return getIntAttribute(element, kAXNumberOfCharactersAttribute)
|
||||
}
|
||||
|
||||
// MARK: - UTF-16 String Helpers
|
||||
|
||||
/// Adjust offset to avoid splitting surrogate pairs (single source of truth)
|
||||
///
|
||||
/// - direction .forward: Used for windowStart - move INTO content to include complete char
|
||||
/// - At LOW surrogate (trail): move +1 to skip the orphan trail
|
||||
/// - Previous is HIGH surrogate (lead): move +1 to include complete pair
|
||||
///
|
||||
/// - direction .backward: Used for windowEnd - move OUT of content to exclude incomplete char
|
||||
/// - At LOW surrogate (trail): move -1 to exclude orphan trail
|
||||
/// - Previous is HIGH surrogate (lead): move -1 to exclude lead (pair would be split)
|
||||
static func adjustForSurrogatePairs(_ content: String, offset: Int, direction: SurrogatePairDirection) -> Int {
|
||||
let utf16 = content.utf16
|
||||
guard offset > 0 && offset < utf16.count else { return offset }
|
||||
|
||||
let idx = utf16.index(utf16.startIndex, offsetBy: offset)
|
||||
let codeUnit = utf16[idx]
|
||||
|
||||
// At a LOW surrogate (trail) - the HIGH surrogate is before us
|
||||
if UTF16.isTrailSurrogate(codeUnit) {
|
||||
return direction == .forward ? offset + 1 : offset - 1
|
||||
}
|
||||
|
||||
// Check if previous code unit is a HIGH surrogate (lead) - we'd split the pair
|
||||
if offset > 0 {
|
||||
let prevIdx = utf16.index(before: idx)
|
||||
let prevCodeUnit = utf16[prevIdx]
|
||||
if UTF16.isLeadSurrogate(prevCodeUnit) {
|
||||
return direction == .forward ? offset + 1 : offset - 1
|
||||
}
|
||||
}
|
||||
|
||||
return offset
|
||||
}
|
||||
|
||||
/// Clamp a value to a range
|
||||
static func clamp<T: Comparable>(_ value: T, min minValue: T, max maxValue: T) -> T {
|
||||
return max(minValue, min(maxValue, value))
|
||||
}
|
||||
|
||||
/// Extract a substring using UTF-16 indices
|
||||
static func substringUTF16(_ content: String, start: Int, length: Int) -> String? {
|
||||
let utf16 = content.utf16
|
||||
let totalLength = utf16.count
|
||||
|
||||
guard start >= 0 && start <= totalLength && length >= 0 else { return nil }
|
||||
|
||||
let endOffset = min(start + length, totalLength)
|
||||
let startIdx = utf16.index(utf16.startIndex, offsetBy: start)
|
||||
let endIdx = utf16.index(utf16.startIndex, offsetBy: endOffset)
|
||||
|
||||
return String(utf16[startIdx..<endIdx])
|
||||
}
|
||||
|
||||
// MARK: - Process Helpers
|
||||
|
||||
/// Get the frontmost application's process ID
|
||||
static func getFrontProcessID() -> pid_t {
|
||||
guard let frontmostApp = NSWorkspace.shared.frontmostApplication else {
|
||||
return 0
|
||||
}
|
||||
return frontmostApp.processIdentifier
|
||||
}
|
||||
|
||||
/// Get the running application for a process ID
|
||||
static func getRunningApplication(pid: pid_t) -> NSRunningApplication? {
|
||||
return NSRunningApplication(processIdentifier: pid)
|
||||
}
|
||||
|
||||
/// Get the process name for a PID
|
||||
static func getProcessName(pid: pid_t) -> String? {
|
||||
guard let application = getRunningApplication(pid: pid),
|
||||
let url = application.executableURL else {
|
||||
return nil
|
||||
}
|
||||
return url.lastPathComponent
|
||||
}
|
||||
|
||||
/// Get the bundle identifier for a PID
|
||||
static func getBundleIdentifier(pid: pid_t) -> String? {
|
||||
return getRunningApplication(pid: pid)?.bundleIdentifier
|
||||
}
|
||||
|
||||
/// Get the application version for a PID
|
||||
static func getApplicationVersion(pid: pid_t) -> String? {
|
||||
guard let application = getRunningApplication(pid: pid),
|
||||
let bundleURL = application.bundleURL,
|
||||
let bundle = Bundle(url: bundleURL) else {
|
||||
return nil
|
||||
}
|
||||
return bundle.infoDictionary?["CFBundleShortVersionString"] as? String
|
||||
}
|
||||
|
||||
/// Create an AXUIElement for an application by PID
|
||||
static func createApplicationElement(pid: pid_t) -> AXUIElement {
|
||||
return AXUIElementCreateApplication(pid)
|
||||
}
|
||||
|
||||
/// Enable manual accessibility for specific apps (Chrome, Firefox, etc.)
|
||||
static func enableManualAccessibilityIfNeeded(application: AXUIElement, bundleId: String?) {
|
||||
guard let bundleId = bundleId, appsRequiringManualAX.contains(bundleId) else { return }
|
||||
|
||||
AXUIElementSetAttributeValue(application, "AXManualAccessibility" as CFString, kCFBooleanTrue)
|
||||
AXUIElementSetAttributeValue(application, "AXEnhancedUserInterface" as CFString, kCFBooleanTrue)
|
||||
}
|
||||
|
||||
// MARK: - Permission Helpers
|
||||
|
||||
/// Check if accessibility permissions are granted
|
||||
static func checkAccessibilityPermissions(prompt: Bool = false) -> Bool {
|
||||
let options: [String: Any] = [kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: prompt]
|
||||
return AXIsProcessTrustedWithOptions(options as CFDictionary)
|
||||
}
|
||||
|
||||
// MARK: - WebArea Search Helpers
|
||||
|
||||
/// Get the role of an element
|
||||
static func getRole(_ element: AXUIElement) -> String? {
|
||||
return getStringAttribute(element, kAXRoleAttribute)
|
||||
}
|
||||
|
||||
/// Get the process ID from an AXUIElement
|
||||
static func getPid(_ element: AXUIElement) -> pid_t? {
|
||||
var pid: pid_t = 0
|
||||
let error = AXUIElementGetPid(element, &pid)
|
||||
return error == .success ? pid : nil
|
||||
}
|
||||
|
||||
/// Get the focused element for a specific application by PID
|
||||
static func getAppFocusedElement(forPid pid: pid_t?) -> AXUIElement? {
|
||||
guard let pid = pid, pid > 0 else { return nil }
|
||||
|
||||
let application = AXUIElementCreateApplication(pid)
|
||||
var focusedElement: CFTypeRef?
|
||||
let error = AXUIElementCopyAttributeValue(
|
||||
application,
|
||||
kAXFocusedUIElementAttribute as CFString,
|
||||
&focusedElement
|
||||
)
|
||||
|
||||
guard error == .success, let element = focusedElement else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return (element as! AXUIElement)
|
||||
}
|
||||
|
||||
/// Check if element A is a descendant of or equal to element B
|
||||
/// Uses AXParent chain traversal
|
||||
static func isDescendantOrEqual(_ elementA: AXUIElement, of elementB: AXUIElement) -> Bool {
|
||||
// Check if they're the same element
|
||||
if CFEqual(elementA, elementB) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Walk up parent chain from elementA looking for elementB
|
||||
var current: AXUIElement? = elementA
|
||||
var depth = 0
|
||||
let maxDepth = DESCENDANT_CHECK_MAX_DEPTH // Prevent infinite loops
|
||||
|
||||
while let element = current, depth < maxDepth {
|
||||
if let parent = getParent(element) {
|
||||
if CFEqual(parent, elementB) {
|
||||
return true
|
||||
}
|
||||
current = parent
|
||||
depth += 1
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/// Check if an element has a text marker range attribute (single or multi-range)
|
||||
/// Returns true if:
|
||||
/// - AXSelectedTextMarkerRange is present (not nil, length=0 is valid cursor), OR
|
||||
/// - AXSelectedTextMarkerRanges array has at least one range
|
||||
static func hasTextMarkerRange(_ element: AXUIElement) -> Bool {
|
||||
// Check single range (AXSelectedTextMarkerRange)
|
||||
var singleRangeRef: CFTypeRef?
|
||||
let singleError = AXUIElementCopyAttributeValue(
|
||||
element,
|
||||
"AXSelectedTextMarkerRange" as CFString,
|
||||
&singleRangeRef
|
||||
)
|
||||
if singleError == .success && singleRangeRef != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check multi-range (AXSelectedTextMarkerRanges)
|
||||
var multiRangeRef: CFTypeRef?
|
||||
let multiError = AXUIElementCopyAttributeValue(
|
||||
element,
|
||||
"AXSelectedTextMarkerRanges" as CFString,
|
||||
&multiRangeRef
|
||||
)
|
||||
if multiError == .success, let ranges = multiRangeRef as? [Any], !ranges.isEmpty {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/// Find the descendant text element that actually has focus/cursor
|
||||
/// Priority: AXFocused text element > element with non-zero selection > element with most content
|
||||
/// - Parameters:
|
||||
/// - element: Starting element (container)
|
||||
/// - maxDepth: Maximum depth to search
|
||||
/// - maxElements: Maximum elements to visit
|
||||
/// - Returns: The focused text element, or nil if none found
|
||||
static func findDeepestTextElement(
|
||||
from element: AXUIElement,
|
||||
maxDepth: Int = FIND_TEXT_ELEMENT_MAX_DEPTH,
|
||||
maxElements: Int = FIND_TEXT_ELEMENT_MAX_ELEMENTS
|
||||
) -> AXUIElement? {
|
||||
var focusedCandidate: AXUIElement? = nil // Element with AXFocused=true AND has value
|
||||
var selectionCandidate: AXUIElement? = nil // Element with non-zero selection range
|
||||
var fallbackCandidate: AXUIElement? = nil // Element with most content (fallback)
|
||||
var fallbackContentLength: Int = 0
|
||||
var elementsVisited = 0
|
||||
|
||||
// BFS queue: (element, depth)
|
||||
var queue: [(AXUIElement, Int)] = [(element, 0)]
|
||||
|
||||
while !queue.isEmpty && elementsVisited < maxElements {
|
||||
let (currentElement, currentDepth) = queue.removeFirst()
|
||||
elementsVisited += 1
|
||||
|
||||
guard currentDepth < maxDepth else { continue }
|
||||
|
||||
let children = getChildren(currentElement)
|
||||
|
||||
for child in children {
|
||||
// Check if this is a text element (has AXValue)
|
||||
let value = getStringAttribute(child, kAXValueAttribute)
|
||||
let hasValue = value != nil && !value!.isEmpty
|
||||
|
||||
// Check if element has AXSelectedTextRange
|
||||
let range = getSelectedTextRange(child)
|
||||
let hasRange = range != nil
|
||||
|
||||
// Priority 1: Check if this element has AXFocused=true AND has content
|
||||
var focusedRef: CFTypeRef?
|
||||
let focusedError = AXUIElementCopyAttributeValue(child, kAXFocusedAttribute as CFString, &focusedRef)
|
||||
if focusedError == .success, let focused = focusedRef as? Bool, focused {
|
||||
if hasValue && hasRange {
|
||||
focusedCandidate = child
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 2: Check if selection range indicates cursor is here (non-zero location or has selection)
|
||||
// IMPORTANT: Require non-empty content to be a valid candidate
|
||||
if let r = range, hasValue {
|
||||
if selectionCandidate == nil && (r.location > 0 || r.length > 0) {
|
||||
// Verify the content can accommodate the selection
|
||||
if let v = value, v.utf16.count >= r.location {
|
||||
selectionCandidate = child
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 3: Fallback to element with most content that has a selection range
|
||||
// IMPORTANT: Require non-empty content to be a valid candidate
|
||||
if hasRange && hasValue, let v = value {
|
||||
let contentLength = v.utf16.count
|
||||
if contentLength > fallbackContentLength {
|
||||
fallbackContentLength = contentLength
|
||||
fallbackCandidate = child
|
||||
}
|
||||
}
|
||||
|
||||
queue.append((child, currentDepth + 1))
|
||||
}
|
||||
}
|
||||
|
||||
// Return in priority order: focused > selection-based > most content
|
||||
return focusedCandidate ?? selectionCandidate ?? fallbackCandidate
|
||||
}
|
||||
|
||||
/// BFS search for AXWebArea elements in descendants
|
||||
/// - Parameters:
|
||||
/// - element: Starting element for search
|
||||
/// - excludeElement: Element to exclude from results (typically the focused element)
|
||||
/// - maxDepth: Maximum depth to search (default 10)
|
||||
/// - maxElements: Maximum elements to visit (default 200)
|
||||
/// - Returns: Array of (WebArea, depth) tuples
|
||||
static func findWebAreasInDescendants(
|
||||
element: AXUIElement,
|
||||
excludeElement: AXUIElement,
|
||||
maxDepth: Int = FIND_WEB_AREAS_MAX_DEPTH,
|
||||
maxElements: Int = FIND_WEB_AREAS_MAX_ELEMENTS
|
||||
) -> [(AXUIElement, Int)] {
|
||||
var results: [(AXUIElement, Int)] = []
|
||||
var elementsVisited = 0
|
||||
|
||||
// BFS queue: (element, depth)
|
||||
var queue: [(AXUIElement, Int)] = [(element, 0)]
|
||||
|
||||
while !queue.isEmpty && elementsVisited < maxElements {
|
||||
let (currentElement, currentDepth) = queue.removeFirst()
|
||||
elementsVisited += 1
|
||||
|
||||
// Skip if we've exceeded max depth for children
|
||||
guard currentDepth < maxDepth else { continue }
|
||||
|
||||
let children = getChildren(currentElement)
|
||||
|
||||
for child in children {
|
||||
// Check if this child is an AXWebArea
|
||||
if let role = getRole(child), role == "AXWebArea" {
|
||||
// Exclude the original focused element
|
||||
if !CFEqual(child, excludeElement) {
|
||||
results.append((child, currentDepth + 1))
|
||||
}
|
||||
}
|
||||
|
||||
// Add child to queue for further exploration
|
||||
queue.append((child, currentDepth + 1))
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
/// Walk up parent chain looking for AXWebArea elements
|
||||
/// - Parameters:
|
||||
/// - element: Starting element for search
|
||||
/// - excludeElement: Element to exclude from results
|
||||
/// - maxLevels: Maximum levels to traverse up (default 3)
|
||||
/// - Returns: Array of (WebArea, depth) tuples where depth is negative (-1 = parent, -2 = grandparent)
|
||||
static func findWebAreasInAncestors(
|
||||
element: AXUIElement,
|
||||
excludeElement: AXUIElement,
|
||||
maxLevels: Int = 3
|
||||
) -> [(AXUIElement, Int)] {
|
||||
var results: [(AXUIElement, Int)] = []
|
||||
var current: AXUIElement? = element
|
||||
var level = 0
|
||||
|
||||
while let currentElement = current, level < maxLevels {
|
||||
guard let parent = getParent(currentElement) else { break }
|
||||
level += 1
|
||||
|
||||
// Check if parent is AXWebArea
|
||||
if let role = getRole(parent), role == "AXWebArea" {
|
||||
// Exclude the original focused element
|
||||
if !CFEqual(parent, excludeElement) {
|
||||
results.append((parent, -level)) // Negative depth for ancestors
|
||||
}
|
||||
}
|
||||
|
||||
current = parent
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
import Foundation
|
||||
import CoreGraphics
|
||||
|
||||
// =============================================================================
|
||||
// Constants - Centralized Configuration for Accessibility Extraction
|
||||
// =============================================================================
|
||||
// All magic numbers, timeouts, depths, and configuration values in one place.
|
||||
// This makes it easier to tune, document, and understand system behavior.
|
||||
// =============================================================================
|
||||
|
||||
// MARK: - Content Limits
|
||||
|
||||
/// Maximum UTF-16 code units for pre/post selection context
|
||||
let MAX_CONTEXT_LENGTH = 500
|
||||
|
||||
/// Maximum UTF-16 code units for full content before truncation
|
||||
let MAX_FULL_CONTENT_LENGTH = 50_000
|
||||
|
||||
/// Padding around selection when windowing content (UTF-16 code units)
|
||||
let WINDOW_PADDING = 25_000
|
||||
|
||||
// MARK: - Tree Traversal Limits
|
||||
|
||||
/// Default maximum depth for generic tree walks (BFS)
|
||||
let TREE_WALK_MAX_DEPTH = 8
|
||||
|
||||
/// Maximum elements to visit during tree searches
|
||||
let TREE_WALK_MAX_ELEMENTS = 100
|
||||
|
||||
/// Depth for touching descendants to trigger lazy loading
|
||||
let TOUCH_DESCENDANTS_MAX_DEPTH = 3
|
||||
|
||||
/// Maximum children to touch per level during lazy loading
|
||||
let TOUCH_DESCENDANTS_PREFIX_LIMIT = 8
|
||||
|
||||
/// Default depth for parent chain traversal
|
||||
let PARENT_CHAIN_MAX_DEPTH = 10
|
||||
|
||||
/// Depth limit for descendant-or-equal check (infinite loop guard)
|
||||
let DESCENDANT_CHECK_MAX_DEPTH = 20
|
||||
|
||||
/// Default depth for finding deepest text element
|
||||
let FIND_TEXT_ELEMENT_MAX_DEPTH = 10
|
||||
|
||||
/// Maximum elements to visit when finding text element
|
||||
let FIND_TEXT_ELEMENT_MAX_ELEMENTS = 200
|
||||
|
||||
/// Default depth for finding WebAreas in descendants
|
||||
let FIND_WEB_AREAS_MAX_DEPTH = 10
|
||||
|
||||
/// Maximum elements to visit when finding WebAreas
|
||||
let FIND_WEB_AREAS_MAX_ELEMENTS = 200
|
||||
|
||||
// MARK: - Browser-Specific Depths
|
||||
|
||||
/// Depth for Chromium browser URL search (deeper due to complex DOM)
|
||||
let CHROMIUM_URL_SEARCH_DEPTH = 30
|
||||
|
||||
/// Depth for non-Chromium browser URL search
|
||||
let NON_CHROMIUM_URL_SEARCH_DEPTH = 3
|
||||
|
||||
/// Depth for WebArea ancestor search (increased for deeply nested Electron apps like Notion)
|
||||
let WEB_AREA_ANCESTOR_SEARCH_DEPTH = 15
|
||||
|
||||
// MARK: - Timeouts
|
||||
|
||||
/// Best-effort timeout for extraction (milliseconds)
|
||||
let EXTRACTION_TIMEOUT_MS: Double = 600.0
|
||||
|
||||
/// Delay before restoring pasteboard after paste (seconds)
|
||||
let PASTE_RESTORE_DELAY_SECONDS: Double = 0.2
|
||||
|
||||
// MARK: - Virtual Key Codes (macOS)
|
||||
|
||||
/// Virtual key code for 'V' key
|
||||
let VK_V: CGKeyCode = 9
|
||||
|
||||
/// Virtual key code for Command key
|
||||
let VK_COMMAND: CGKeyCode = 55
|
||||
|
||||
/// Virtual key code for Function (Fn) key
|
||||
let VK_FUNCTION: CGKeyCode = 0x3F
|
||||
|
||||
// MARK: - Accessibility Tree Building
|
||||
|
||||
/// Maximum recursion depth for building accessibility tree
|
||||
let ACCESSIBILITY_TREE_MAX_DEPTH = 10
|
||||
|
||||
// MARK: - App Lists
|
||||
|
||||
/// Apps that need manual accessibility enabling (browsers)
|
||||
let appsRequiringManualAX: Set<String> = [
|
||||
"com.google.Chrome",
|
||||
"org.mozilla.firefox",
|
||||
"com.microsoft.edgemac",
|
||||
"com.apple.Safari",
|
||||
"com.brave.Browser",
|
||||
"com.operasoftware.Opera",
|
||||
"com.vivaldi.Vivaldi"
|
||||
]
|
||||
|
|
@ -111,6 +111,12 @@ namespace WindowsHelper.Models
|
|||
[JsonPropertyName("focusedElement")]
|
||||
public FocusedElement FocusedElement { get; set; }
|
||||
|
||||
[JsonPropertyName("metrics")]
|
||||
public Metrics Metrics { get; set; }
|
||||
|
||||
[JsonPropertyName("schemaVersion")]
|
||||
public SchemaVersion SchemaVersion { get; set; }
|
||||
|
||||
[JsonPropertyName("textSelection")]
|
||||
public TextSelection TextSelection { get; set; }
|
||||
|
||||
|
|
@ -129,6 +135,9 @@ namespace WindowsHelper.Models
|
|||
[JsonPropertyName("name")]
|
||||
public string Name { get; set; }
|
||||
|
||||
[JsonPropertyName("pid")]
|
||||
public long Pid { get; set; }
|
||||
|
||||
[JsonPropertyName("version")]
|
||||
public string Version { get; set; }
|
||||
}
|
||||
|
|
@ -141,9 +150,21 @@ namespace WindowsHelper.Models
|
|||
[JsonPropertyName("isEditable")]
|
||||
public bool IsEditable { get; set; }
|
||||
|
||||
[JsonPropertyName("isFocused")]
|
||||
public bool IsFocused { get; set; }
|
||||
|
||||
[JsonPropertyName("isPlaceholder")]
|
||||
public bool IsPlaceholder { get; set; }
|
||||
|
||||
[JsonPropertyName("isSecure")]
|
||||
public bool IsSecure { get; set; }
|
||||
|
||||
[JsonPropertyName("role")]
|
||||
public string Role { get; set; }
|
||||
|
||||
[JsonPropertyName("subrole")]
|
||||
public string Subrole { get; set; }
|
||||
|
||||
[JsonPropertyName("title")]
|
||||
public string Title { get; set; }
|
||||
|
||||
|
|
@ -151,14 +172,60 @@ namespace WindowsHelper.Models
|
|||
public string Value { get; set; }
|
||||
}
|
||||
|
||||
public partial class Metrics
|
||||
{
|
||||
[JsonPropertyName("errors")]
|
||||
public List<string> Errors { get; set; }
|
||||
|
||||
[JsonPropertyName("fallbacksUsed")]
|
||||
public List<The0> FallbacksUsed { get; set; }
|
||||
|
||||
[JsonPropertyName("textMarkerAttempted")]
|
||||
public bool TextMarkerAttempted { get; set; }
|
||||
|
||||
[JsonPropertyName("textMarkerSucceeded")]
|
||||
public bool TextMarkerSucceeded { get; set; }
|
||||
|
||||
[JsonPropertyName("timedOut")]
|
||||
public bool TimedOut { get; set; }
|
||||
|
||||
[JsonPropertyName("totalTimeMs")]
|
||||
[JsonConverter(typeof(MinMaxValueCheckConverter))]
|
||||
public double TotalTimeMs { get; set; }
|
||||
|
||||
[JsonPropertyName("webAreaFound")]
|
||||
public bool WebAreaFound { get; set; }
|
||||
|
||||
[JsonPropertyName("webAreaRetryAttempted")]
|
||||
public bool WebAreaRetryAttempted { get; set; }
|
||||
|
||||
[JsonPropertyName("webAreaRetrySucceeded")]
|
||||
public bool WebAreaRetrySucceeded { get; set; }
|
||||
}
|
||||
|
||||
public partial class TextSelection
|
||||
{
|
||||
[JsonPropertyName("extractionMethod")]
|
||||
public The0 ExtractionMethod { get; set; }
|
||||
|
||||
[JsonPropertyName("fullContent")]
|
||||
public string FullContent { get; set; }
|
||||
|
||||
[JsonPropertyName("fullContentTruncated")]
|
||||
public bool FullContentTruncated { get; set; }
|
||||
|
||||
[JsonPropertyName("hasMultipleRanges")]
|
||||
public bool HasMultipleRanges { get; set; }
|
||||
|
||||
[JsonPropertyName("isEditable")]
|
||||
public bool IsEditable { get; set; }
|
||||
|
||||
[JsonPropertyName("isPlaceholder")]
|
||||
public bool IsPlaceholder { get; set; }
|
||||
|
||||
[JsonPropertyName("isSecure")]
|
||||
public bool IsSecure { get; set; }
|
||||
|
||||
[JsonPropertyName("postSelectionText")]
|
||||
public string PostSelectionText { get; set; }
|
||||
|
||||
|
|
@ -457,7 +524,11 @@ namespace WindowsHelper.Models
|
|||
public bool? ShiftKey { get; set; }
|
||||
}
|
||||
|
||||
public enum Method { GetAccessibilityContext, GetAccessibilityTreeDetails, MuteSystemAudio, PasteText, RestoreSystemAudio, SetShortcuts };
|
||||
public enum Method { GetAccessibilityContext, GetAccessibilityStatus, GetAccessibilityTreeDetails, MuteSystemAudio, PasteText, RequestAccessibilityPermission, RestoreSystemAudio, SetShortcuts };
|
||||
|
||||
public enum The0 { ClipboardCopy, None, SelectedTextRange, SelectedTextRanges, StringForRange, TextMarkerRange, ValueAttribute };
|
||||
|
||||
public enum SchemaVersion { The20 };
|
||||
|
||||
public enum KeyDownEventType { KeyDown };
|
||||
|
||||
|
|
@ -585,6 +656,8 @@ namespace WindowsHelper.Models
|
|||
Converters =
|
||||
{
|
||||
MethodConverter.Singleton,
|
||||
The0Converter.Singleton,
|
||||
SchemaVersionConverter.Singleton,
|
||||
KeyDownEventTypeConverter.Singleton,
|
||||
KeyUpEventTypeConverter.Singleton,
|
||||
FlagsChangedEventTypeConverter.Singleton,
|
||||
|
|
@ -607,12 +680,16 @@ namespace WindowsHelper.Models
|
|||
{
|
||||
case "getAccessibilityContext":
|
||||
return Method.GetAccessibilityContext;
|
||||
case "getAccessibilityStatus":
|
||||
return Method.GetAccessibilityStatus;
|
||||
case "getAccessibilityTreeDetails":
|
||||
return Method.GetAccessibilityTreeDetails;
|
||||
case "muteSystemAudio":
|
||||
return Method.MuteSystemAudio;
|
||||
case "pasteText":
|
||||
return Method.PasteText;
|
||||
case "requestAccessibilityPermission":
|
||||
return Method.RequestAccessibilityPermission;
|
||||
case "restoreSystemAudio":
|
||||
return Method.RestoreSystemAudio;
|
||||
case "setShortcuts":
|
||||
|
|
@ -628,6 +705,9 @@ namespace WindowsHelper.Models
|
|||
case Method.GetAccessibilityContext:
|
||||
JsonSerializer.Serialize(writer, "getAccessibilityContext", options);
|
||||
return;
|
||||
case Method.GetAccessibilityStatus:
|
||||
JsonSerializer.Serialize(writer, "getAccessibilityStatus", options);
|
||||
return;
|
||||
case Method.GetAccessibilityTreeDetails:
|
||||
JsonSerializer.Serialize(writer, "getAccessibilityTreeDetails", options);
|
||||
return;
|
||||
|
|
@ -637,6 +717,9 @@ namespace WindowsHelper.Models
|
|||
case Method.PasteText:
|
||||
JsonSerializer.Serialize(writer, "pasteText", options);
|
||||
return;
|
||||
case Method.RequestAccessibilityPermission:
|
||||
JsonSerializer.Serialize(writer, "requestAccessibilityPermission", options);
|
||||
return;
|
||||
case Method.RestoreSystemAudio:
|
||||
JsonSerializer.Serialize(writer, "restoreSystemAudio", options);
|
||||
return;
|
||||
|
|
@ -650,6 +733,119 @@ namespace WindowsHelper.Models
|
|||
public static readonly MethodConverter Singleton = new MethodConverter();
|
||||
}
|
||||
|
||||
internal class The0Converter : JsonConverter<The0>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(The0);
|
||||
|
||||
public override The0 Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
switch (value)
|
||||
{
|
||||
case "clipboardCopy":
|
||||
return The0.ClipboardCopy;
|
||||
case "none":
|
||||
return The0.None;
|
||||
case "selectedTextRange":
|
||||
return The0.SelectedTextRange;
|
||||
case "selectedTextRanges":
|
||||
return The0.SelectedTextRanges;
|
||||
case "stringForRange":
|
||||
return The0.StringForRange;
|
||||
case "textMarkerRange":
|
||||
return The0.TextMarkerRange;
|
||||
case "valueAttribute":
|
||||
return The0.ValueAttribute;
|
||||
}
|
||||
throw new Exception("Cannot unmarshal type The0");
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, The0 value, JsonSerializerOptions options)
|
||||
{
|
||||
switch (value)
|
||||
{
|
||||
case The0.ClipboardCopy:
|
||||
JsonSerializer.Serialize(writer, "clipboardCopy", options);
|
||||
return;
|
||||
case The0.None:
|
||||
JsonSerializer.Serialize(writer, "none", options);
|
||||
return;
|
||||
case The0.SelectedTextRange:
|
||||
JsonSerializer.Serialize(writer, "selectedTextRange", options);
|
||||
return;
|
||||
case The0.SelectedTextRanges:
|
||||
JsonSerializer.Serialize(writer, "selectedTextRanges", options);
|
||||
return;
|
||||
case The0.StringForRange:
|
||||
JsonSerializer.Serialize(writer, "stringForRange", options);
|
||||
return;
|
||||
case The0.TextMarkerRange:
|
||||
JsonSerializer.Serialize(writer, "textMarkerRange", options);
|
||||
return;
|
||||
case The0.ValueAttribute:
|
||||
JsonSerializer.Serialize(writer, "valueAttribute", options);
|
||||
return;
|
||||
}
|
||||
throw new Exception("Cannot marshal type The0");
|
||||
}
|
||||
|
||||
public static readonly The0Converter Singleton = new The0Converter();
|
||||
}
|
||||
|
||||
internal class MinMaxValueCheckConverter : JsonConverter<double>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(double);
|
||||
|
||||
public override double Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetDouble();
|
||||
if (value >= 0)
|
||||
{
|
||||
return value;
|
||||
}
|
||||
throw new Exception("Cannot unmarshal type double");
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, double value, JsonSerializerOptions options)
|
||||
{
|
||||
if (value >= 0)
|
||||
{
|
||||
JsonSerializer.Serialize(writer, value, options);
|
||||
return;
|
||||
}
|
||||
throw new Exception("Cannot marshal type double");
|
||||
}
|
||||
|
||||
public static readonly MinMaxValueCheckConverter Singleton = new MinMaxValueCheckConverter();
|
||||
}
|
||||
|
||||
internal class SchemaVersionConverter : JsonConverter<SchemaVersion>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(SchemaVersion);
|
||||
|
||||
public override SchemaVersion Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
if (value == "2.0")
|
||||
{
|
||||
return SchemaVersion.The20;
|
||||
}
|
||||
throw new Exception("Cannot unmarshal type SchemaVersion");
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, SchemaVersion value, JsonSerializerOptions options)
|
||||
{
|
||||
if (value == SchemaVersion.The20)
|
||||
{
|
||||
JsonSerializer.Serialize(writer, "2.0", options);
|
||||
return;
|
||||
}
|
||||
throw new Exception("Cannot marshal type SchemaVersion");
|
||||
}
|
||||
|
||||
public static readonly SchemaVersionConverter Singleton = new SchemaVersionConverter();
|
||||
}
|
||||
|
||||
internal class KeyDownEventTypeConverter : JsonConverter<KeyDownEventType>
|
||||
{
|
||||
public override bool CanConvert(Type t) => t == typeof(KeyDownEventType);
|
||||
|
|
|
|||
|
|
@ -1,67 +1,316 @@
|
|||
import { z } from "zod";
|
||||
|
||||
// Request params
|
||||
// =============================================================================
|
||||
// Accessibility Context Schema
|
||||
// =============================================================================
|
||||
// Schema for the Swift helper accessibility layer.
|
||||
// Key features:
|
||||
// - TextMarker API support for Electron/Chromium apps
|
||||
// - Extraction method tracking for debugging
|
||||
// - Performance metrics
|
||||
// - Secure field and placeholder detection
|
||||
// - UTF-16 code unit semantics (documented)
|
||||
// =============================================================================
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Enums
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* How the text selection was extracted.
|
||||
* Priority order: textMarkerRange > selectedTextRange > selectedTextRanges > valueAttribute > stringForRange
|
||||
*/
|
||||
export const ExtractionMethodSchema = z.enum([
|
||||
"textMarkerRange", // Primary - AXSelectedTextMarkerRange (works in Electron)
|
||||
"selectedTextRange", // Fallback 1 - AXSelectedTextRange
|
||||
"selectedTextRanges", // Fallback 2 - AXSelectedTextRanges (multi-select)
|
||||
"valueAttribute", // Fallback 3 - AXValue
|
||||
"stringForRange", // Fallback 4 - AXStringForRange
|
||||
"clipboardCopy", // Fallback 5 - Clipboard (Phase 2)
|
||||
"none", // No extraction possible (secure field, etc.)
|
||||
]);
|
||||
export type ExtractionMethod = z.infer<typeof ExtractionMethodSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Core Data Structures
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Character range for text selection.
|
||||
*
|
||||
* IMPORTANT: UTF-16 Code Unit Semantics
|
||||
* All `location` and `length` values are UTF-16 code unit offsets (equivalent to NSString indices),
|
||||
* NOT Unicode scalar or grapheme cluster counts.
|
||||
*
|
||||
* This matches macOS Accessibility API semantics where CFRange and NSRange use UTF-16 code units.
|
||||
* Characters outside the Basic Multilingual Plane (e.g., emoji like 👨👩👧👦) occupy 2 code units (surrogate pair).
|
||||
*
|
||||
* Examples:
|
||||
* - "a" (U+0061) = 1 code unit
|
||||
* - "😀" (U+1F600) = 2 code units
|
||||
* - "👨👩👧👦" = 11 code units (multiple emoji + ZWJ)
|
||||
*
|
||||
* Implications:
|
||||
* - Swift: Use String.utf16 view for slicing
|
||||
* - TypeScript/JS: string.length counts code units, so indices align correctly
|
||||
*/
|
||||
export const SelectionRangeSchema = z.object({
|
||||
/** UTF-16 code unit offset from start (NOT grapheme count) */
|
||||
location: z.number().int().nonnegative(),
|
||||
/** UTF-16 code unit count (0 = cursor only, no selection) */
|
||||
length: z.number().int().nonnegative(),
|
||||
});
|
||||
export type SelectionRange = z.infer<typeof SelectionRangeSchema>;
|
||||
|
||||
/**
|
||||
* Text selection information.
|
||||
*
|
||||
* Null vs Empty String Semantics:
|
||||
* - null = unavailable/unknown (API failed, attribute doesn't exist, or suppressed for security)
|
||||
* - "" = available and empty (API succeeded, value exists, but is legitimately empty)
|
||||
*
|
||||
* Examples:
|
||||
* - Cursor-only: selectedText = "" (not null), selectionRange.length = 0
|
||||
* - Empty text field: fullContent = "" (not null)
|
||||
* - Secure field: all text fields are null (suppressed)
|
||||
*/
|
||||
export const TextSelectionSchema = z.object({
|
||||
// Core data
|
||||
/** Selected text ("" for cursor-only, null if unavailable/suppressed) */
|
||||
selectedText: z.string().nullable(),
|
||||
/** Full textbox content (window around selection if large, null if unavailable) */
|
||||
fullContent: z.string().nullable(),
|
||||
/** Up to 500 UTF-16 units before selection (null if unavailable) */
|
||||
preSelectionText: z.string().nullable(),
|
||||
/** Up to 500 UTF-16 units after selection (null if unavailable) */
|
||||
postSelectionText: z.string().nullable(),
|
||||
/** UTF-16 code unit range (null for secure fields or if unavailable) */
|
||||
selectionRange: SelectionRangeSchema.nullable(),
|
||||
|
||||
// Metadata
|
||||
/** Can user type in this element? */
|
||||
isEditable: z.boolean(),
|
||||
/** How was selection obtained? */
|
||||
extractionMethod: ExtractionMethodSchema,
|
||||
/** Multi-cursor/selection detected? */
|
||||
hasMultipleRanges: z.boolean(),
|
||||
|
||||
// Safety flags
|
||||
/** Is this showing placeholder text only (no user input)? */
|
||||
isPlaceholder: z.boolean(),
|
||||
/** Is this a password/secure field? (all content fields will be null) */
|
||||
isSecure: z.boolean(),
|
||||
|
||||
// Truncation info
|
||||
/** Was fullContent truncated/windowed due to size limits? */
|
||||
fullContentTruncated: z.boolean(),
|
||||
});
|
||||
export type TextSelection = z.infer<typeof TextSelectionSchema>;
|
||||
|
||||
/**
|
||||
* Focused element information.
|
||||
*/
|
||||
export const AXElementInfoSchema = z.object({
|
||||
/** AXRole (AXTextField, AXWebArea, etc.) */
|
||||
role: z.string().nullable(),
|
||||
/** AXSubrole if present */
|
||||
subrole: z.string().nullable(),
|
||||
/** AXTitle */
|
||||
title: z.string().nullable(),
|
||||
/** AXDescription */
|
||||
description: z.string().nullable(),
|
||||
/** AXValue (null for secure fields - suppressed for security) */
|
||||
value: z.string().nullable(),
|
||||
/** Can user type in this element? */
|
||||
isEditable: z.boolean(),
|
||||
/** Is this element focused? */
|
||||
isFocused: z.boolean(),
|
||||
/** Is this a secure/password field? */
|
||||
isSecure: z.boolean(),
|
||||
/** Is this showing placeholder text? */
|
||||
isPlaceholder: z.boolean(),
|
||||
});
|
||||
export type AXElementInfo = z.infer<typeof AXElementInfoSchema>;
|
||||
|
||||
/**
|
||||
* Application information.
|
||||
*/
|
||||
export const ApplicationInfoSchema = z.object({
|
||||
/** Application name */
|
||||
name: z.string().nullable(),
|
||||
/** Bundle identifier (e.g., com.apple.Safari) */
|
||||
bundleIdentifier: z.string().nullable(),
|
||||
/** Application version */
|
||||
version: z.string().nullable(),
|
||||
/** Process ID */
|
||||
pid: z.number().int(),
|
||||
});
|
||||
export type ApplicationInfo = z.infer<typeof ApplicationInfoSchema>;
|
||||
|
||||
/**
|
||||
* Window information.
|
||||
*/
|
||||
export const WindowInfoSchema = z.object({
|
||||
/** Window title */
|
||||
title: z.string().nullable(),
|
||||
/** Browser URL if detected */
|
||||
url: z.string().nullable(),
|
||||
});
|
||||
export type WindowInfo = z.infer<typeof WindowInfoSchema>;
|
||||
|
||||
/**
|
||||
* Extraction performance metrics.
|
||||
*
|
||||
* Note: Error strings must contain only technical error messages, never PII or content values.
|
||||
* Allowed: "TextMarker: AXError -25204", "Timeout exceeded"
|
||||
* Forbidden: "Failed to parse text: Hello World", "Value was: password123"
|
||||
*/
|
||||
export const ExtractionMetricsSchema = z.object({
|
||||
/** Total extraction time in milliseconds */
|
||||
totalTimeMs: z.number().nonnegative(),
|
||||
/** Did we attempt TextMarker extraction? */
|
||||
textMarkerAttempted: z.boolean(),
|
||||
/** Did TextMarker extraction succeed? */
|
||||
textMarkerSucceeded: z.boolean(),
|
||||
/** Which fallback methods were tried (in order) */
|
||||
fallbacksUsed: z.array(ExtractionMethodSchema),
|
||||
/** Technical error messages only - NO PII/content */
|
||||
errors: z.array(z.string()),
|
||||
/** Did extraction exceed best-effort time budget? */
|
||||
timedOut: z.boolean(),
|
||||
|
||||
// WebArea retry path metrics
|
||||
/** Did we search for WebArea candidates? (true when TextMarker fails on focused element) */
|
||||
webAreaRetryAttempted: z.boolean(),
|
||||
/** Did we find a different WebArea to switch to? */
|
||||
webAreaFound: z.boolean(),
|
||||
/** Did TextMarker work on the switched WebArea? */
|
||||
webAreaRetrySucceeded: z.boolean(),
|
||||
});
|
||||
export type ExtractionMetrics = z.infer<typeof ExtractionMetricsSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Main Response Schema
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Complete accessibility context response.
|
||||
*/
|
||||
export const AppContextSchema = z.object({
|
||||
/** Schema version for future evolution */
|
||||
schemaVersion: z.literal("2.0"),
|
||||
|
||||
// Application context
|
||||
/** Information about the frontmost application */
|
||||
application: ApplicationInfoSchema,
|
||||
/** Window information (may be null) */
|
||||
windowInfo: WindowInfoSchema.nullable(),
|
||||
|
||||
// Focus and selection
|
||||
/** Currently focused element (may be null if no focus) */
|
||||
focusedElement: AXElementInfoSchema.nullable(),
|
||||
/** Text selection information (may be null if no text field focused) */
|
||||
textSelection: TextSelectionSchema.nullable(),
|
||||
|
||||
// Timing
|
||||
/** Unix timestamp in seconds when context was captured */
|
||||
timestamp: z.number(),
|
||||
|
||||
// Debugging
|
||||
/** Performance metrics for this extraction */
|
||||
metrics: ExtractionMetricsSchema,
|
||||
});
|
||||
export type AppContext = z.infer<typeof AppContextSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// RPC Method Schemas
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Request params for getAccessibilityContext
|
||||
*/
|
||||
export const GetAccessibilityContextParamsSchema = z.object({
|
||||
editableOnly: z.boolean().optional().default(true), // Only return text selection if element is editable
|
||||
/**
|
||||
* Only return text selection if element is editable.
|
||||
* When true: searches for nearest editable element if current focus is not editable.
|
||||
* When false: returns whatever element is focused, editable or not.
|
||||
* Default: false
|
||||
*/
|
||||
editableOnly: z.boolean().optional().default(false),
|
||||
});
|
||||
export type GetAccessibilityContextParams = z.infer<
|
||||
typeof GetAccessibilityContextParamsSchema
|
||||
>;
|
||||
|
||||
// Data structures for the result
|
||||
const SelectionRangeSchema = z.object({
|
||||
location: z.number().int(),
|
||||
length: z.number().int(),
|
||||
});
|
||||
|
||||
const ApplicationInfoSchema = z.object({
|
||||
name: z.string().nullable(),
|
||||
bundleIdentifier: z.string().nullable(),
|
||||
version: z.string().nullable(),
|
||||
});
|
||||
|
||||
const FocusedElementInfoSchema = z.object({
|
||||
role: z.string().nullable(), // Main accessibility role (e.g., "AXTextField", "AXButton")
|
||||
isEditable: z.boolean(),
|
||||
title: z.string().nullable(),
|
||||
description: z.string().nullable(),
|
||||
value: z.string().nullable(),
|
||||
});
|
||||
|
||||
const TextSelectionInfoSchema = z.object({
|
||||
selectedText: z.string().nullable(), // Nullable when only cursor position is available (no selection)
|
||||
fullContent: z.string().nullable(),
|
||||
preSelectionText: z.string().nullable(), // Last 500 chars before cursor/selection (closest to cursor)
|
||||
postSelectionText: z.string().nullable(), // First 500 chars after cursor/selection (closest to cursor)
|
||||
selectionRange: SelectionRangeSchema.nullable(),
|
||||
isEditable: z.boolean(),
|
||||
});
|
||||
|
||||
const WindowInfoSchema = z.object({
|
||||
title: z.string().nullable(),
|
||||
url: z.string().nullable(), // Browser URL if available
|
||||
});
|
||||
|
||||
const AccessibilityContextSchema = z.object({
|
||||
application: ApplicationInfoSchema,
|
||||
focusedElement: FocusedElementInfoSchema.nullable(),
|
||||
textSelection: TextSelectionInfoSchema.nullable(),
|
||||
windowInfo: WindowInfoSchema.nullable(),
|
||||
timestamp: z.number(),
|
||||
});
|
||||
|
||||
// Response result
|
||||
/**
|
||||
* Response result for getAccessibilityContext
|
||||
*/
|
||||
export const GetAccessibilityContextResultSchema = z.object({
|
||||
context: AccessibilityContextSchema.nullable(),
|
||||
context: AppContextSchema.nullable(),
|
||||
});
|
||||
export type GetAccessibilityContextResult = z.infer<
|
||||
typeof GetAccessibilityContextResultSchema
|
||||
>;
|
||||
|
||||
// Export individual schemas for potential reuse
|
||||
export type ApplicationInfo = z.infer<typeof ApplicationInfoSchema>;
|
||||
export type FocusedElementInfo = z.infer<typeof FocusedElementInfoSchema>;
|
||||
export type TextSelectionInfo = z.infer<typeof TextSelectionInfoSchema>;
|
||||
export type WindowInfo = z.infer<typeof WindowInfoSchema>;
|
||||
export type AccessibilityContext = z.infer<typeof AccessibilityContextSchema>;
|
||||
export type SelectionRange = z.infer<typeof SelectionRangeSchema>;
|
||||
/**
|
||||
* Request params for getAccessibilityStatus
|
||||
*/
|
||||
export const GetAccessibilityStatusParamsSchema = z.object({});
|
||||
export type GetAccessibilityStatusParams = z.infer<
|
||||
typeof GetAccessibilityStatusParamsSchema
|
||||
>;
|
||||
|
||||
/**
|
||||
* Response result for getAccessibilityStatus
|
||||
*/
|
||||
export const GetAccessibilityStatusResultSchema = z.object({
|
||||
/** Does the app have accessibility permission? */
|
||||
hasPermission: z.boolean(),
|
||||
/** Is accessibility enabled system-wide? */
|
||||
isEnabled: z.boolean(),
|
||||
});
|
||||
export type GetAccessibilityStatusResult = z.infer<
|
||||
typeof GetAccessibilityStatusResultSchema
|
||||
>;
|
||||
|
||||
/**
|
||||
* Request params for requestAccessibilityPermission
|
||||
*/
|
||||
export const RequestAccessibilityPermissionParamsSchema = z.object({});
|
||||
export type RequestAccessibilityPermissionParams = z.infer<
|
||||
typeof RequestAccessibilityPermissionParamsSchema
|
||||
>;
|
||||
|
||||
/**
|
||||
* Response result for requestAccessibilityPermission
|
||||
*/
|
||||
export const RequestAccessibilityPermissionResultSchema = z.object({
|
||||
/** Was permission granted? */
|
||||
granted: z.boolean(),
|
||||
});
|
||||
export type RequestAccessibilityPermissionResult = z.infer<
|
||||
typeof RequestAccessibilityPermissionResultSchema
|
||||
>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Constants (for reference - actual values defined in Swift)
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Context extraction limits (UTF-16 code units).
|
||||
* These are documented here for reference; actual enforcement is in Swift.
|
||||
*/
|
||||
export const ACCESSIBILITY_CONSTANTS = {
|
||||
/** Max UTF-16 units for pre/post selection context */
|
||||
MAX_CONTEXT_LENGTH: 500,
|
||||
/** Max UTF-16 units for fullContent window */
|
||||
MAX_FULL_CONTENT_LENGTH: 50000,
|
||||
/** UTF-16 units of padding around selection for windowing */
|
||||
WINDOW_PADDING: 25000,
|
||||
/** Best-effort timeout target in milliseconds */
|
||||
BEST_EFFORT_TIMEOUT_MS: 600,
|
||||
/** Max depth for element tree search */
|
||||
TREE_WALK_MAX_DEPTH: 8,
|
||||
/** Max elements to search in tree walk */
|
||||
TREE_WALK_MAX_ELEMENTS: 100,
|
||||
} as const;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import { PasteTextParamsSchema } from "../methods/paste-text.js";
|
|||
const RPCMethodNameSchema = z.union([
|
||||
z.literal("getAccessibilityTreeDetails"),
|
||||
z.literal("getAccessibilityContext"),
|
||||
z.literal("getAccessibilityStatus"),
|
||||
z.literal("requestAccessibilityPermission"),
|
||||
z.literal("pasteText"),
|
||||
z.literal("muteSystemAudio"),
|
||||
z.literal("restoreSystemAudio"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue