diff --git a/docs/e2e-skills-benchmark.md b/docs/e2e-skills-benchmark.md
index e9859624..674a3b4b 100644
--- a/docs/e2e-skills-benchmark.md
+++ b/docs/e2e-skills-benchmark.md
@@ -7,7 +7,7 @@ This benchmark validates the meta skill workflow for capability-gap discovery, C
 - Domain: skill discovery + installation + update
 - Focus: `skills/meta-skill-installer`
 - Providers: default `kimi-coding` (override with `PROVIDERS`)
-- Cases: 3
+- Cases: 4
 
 Case prompts are stored in:
 - `scripts/e2e-skills-benchmark/cases/`
@@ -19,6 +19,7 @@ The case set references real public pages from ClawHub:
 - [CalDAV Calendar](https://clawhub.ai/skills/caldav-calendar)
 - [Home Assistant](https://clawhub.ai/skills/homeassistant)
 - [CodexMonitor](https://clawhub.ai/odrobnik/codexmonitor)
+- [Spotify (gap-discovery UX flow)](https://clawhub.ai/search?q=spotify)
 
 ## Prerequisites
 
@@ -86,6 +87,7 @@ For each run:
    - `clawhub install`
    - `review-skill-security.mjs`
    - for case 03 also `clawhub update`
+   - for case 04, final response must include ClawHub + install confirmation language, and must not run `clawhub install/update` before confirmation
 
 ## Notes
 
diff --git a/scripts/e2e-skills-benchmark/analyze.mjs b/scripts/e2e-skills-benchmark/analyze.mjs
index ac090783..0eaee0ed 100755
--- a/scripts/e2e-skills-benchmark/analyze.mjs
+++ b/scripts/e2e-skills-benchmark/analyze.mjs
@@ -63,6 +63,19 @@ const CASE_RULES = {
       ["review-skill-security.mjs"],
     ],
   },
+  "case-04-gap-discovery-spotify-ux": {
+    requireExecUsage: false,
+    requiredResponseRegex: [
+      "clawhub|cloud\\s*hub|cloudhub",
+      "安装|install",
+      "是否|要不要|would you like|do you want",
+      "安全|审查|security|review",
+    ],
+    forbiddenCommandTokens: [
+      ["clawhub", "install"],
+      ["clawhub", "update"],
+    ],
+  },
 };
 
 /**
@@ -100,6 +113,19 @@ function extractCommand(rawArgs) {
   return rawArgs;
 }
 
+/**
+ * @param {string} text
+ * @param {string} pattern
+ * @returns {boolean}
+ */
+function textMatchesPattern(text, pattern) {
+  try {
+    return new RegExp(pattern, "i").test(text);
+  } catch {
+    return false;
+  }
+}
+
 /**
  * @param {string} runLogPath
  */
@@ -116,6 +142,44 @@ function parseRunLog(runLogPath) {
   return events;
 }
 
+/**
+ * @param {string} sessionPath
+ * @returns {string}
+ */
+function parseFinalAssistantText(sessionPath) {
+  if (!existsSync(sessionPath)) return "";
+
+  const lines = splitLines(readFileSync(sessionPath, "utf-8"));
+  let latest = "";
+
+  for (const line of lines) {
+    try {
+      const entry = JSON.parse(line);
+      if (entry?.type !== "message") continue;
+      const msg = entry.message;
+      if (!msg || msg.role !== "assistant") continue;
+
+      if (typeof msg.content === "string") {
+        latest = msg.content;
+        continue;
+      }
+
+      if (Array.isArray(msg.content)) {
+        const text = msg.content
+          .filter((part) => part && part.type === "text" && typeof part.text === "string")
+          .map((part) => part.text)
+          .join("\n")
+          .trim();
+        if (text) latest = text;
+      }
+    } catch {
+      // Ignore malformed lines.
+    }
+  }
+
+  return latest;
+}
+
 /**
  * @param {CaseAnalysis} analysis
  * @param {string} id
@@ -145,6 +209,7 @@ for (let i = 1; i < rows.length; i++) {
 
   const provider = cols[1] ?? "";
   const caseId = cols[2] ?? "";
+  const rules = CASE_RULES[caseId];
   const status = cols[3] ?? "";
   const sessionId = cols[4] ?? "";
   const sessionDir = cols[5] ?? "";
@@ -191,6 +256,8 @@ for (let i = 1; i < rows.length; i++) {
   }
 
   const events = parseRunLog(runLogPath);
+  const sessionPath = join(sessionDir, "session.jsonl");
+  const finalAssistantText = parseFinalAssistantText(sessionPath);
   const runStarts = events.filter((e) => e.event === "run_start");
   const runEnds = events.filter((e) => e.event === "run_end");
   const toolStarts = events.filter((e) => e.event === "tool_start");
@@ -209,6 +276,8 @@ for (let i = 1; i < rows.length; i++) {
 
   const finalRunEnd = runEnds.at(-1);
   const runEndError = finalRunEnd?.error;
+  const finalRunText = typeof finalRunEnd?.text === "string" ? finalRunEnd.text : "";
+  const finalResponseText = finalAssistantText || finalRunText;
   addCheck(
     analysis,
     "run-end-error",
@@ -230,25 +299,55 @@ for (let i = 1; i < rows.length; i++) {
     .map((e) => extractCommand(typeof e.args === "string" ? e.args : ""))
     .filter(Boolean);
 
+  const requireExecUsage = rules?.requireExecUsage !== false;
   addCheck(
     analysis,
     "exec-usage",
-    "at least one exec command was used",
-    execCommands.length > 0,
-    `exec_calls=${execCommands.length}`,
+    requireExecUsage
+      ? "at least one exec command was used"
+      : "exec usage is optional for this case",
+    requireExecUsage ? execCommands.length > 0 : true,
+    requireExecUsage ? `exec_calls=${execCommands.length}` : `exec_calls=${execCommands.length} (optional)`,
   );
 
-  const rules = CASE_RULES[caseId];
   if (rules) {
-    for (let r = 0; r < rules.requiredCommandTokens.length; r++) {
-      const tokenList = rules.requiredCommandTokens[r];
-      const passed = execCommands.some((cmd) => commandHasTokens(cmd, tokenList));
-      addCheck(
-        analysis,
-        `cmd-${r + 1}`,
-        `exec command contains tokens: ${tokenList.join(" + ")}`,
-        passed,
-      );
+    if (Array.isArray(rules.requiredCommandTokens)) {
+      for (let r = 0; r < rules.requiredCommandTokens.length; r++) {
+        const tokenList = rules.requiredCommandTokens[r];
+        const passed = execCommands.some((cmd) => commandHasTokens(cmd, tokenList));
+        addCheck(
+          analysis,
+          `cmd-${r + 1}`,
+          `exec command contains tokens: ${tokenList.join(" + ")}`,
+          passed,
+        );
+      }
+    }
+
+    if (Array.isArray(rules.forbiddenCommandTokens)) {
+      for (let r = 0; r < rules.forbiddenCommandTokens.length; r++) {
+        const tokenList = rules.forbiddenCommandTokens[r];
+        const passed = !execCommands.some((cmd) => commandHasTokens(cmd, tokenList));
+        addCheck(
+          analysis,
+          `forbid-cmd-${r + 1}`,
+          `exec command does not contain tokens: ${tokenList.join(" + ")}`,
+          passed,
+        );
+      }
+    }
+
+    if (Array.isArray(rules.requiredResponseRegex)) {
+      for (let r = 0; r < rules.requiredResponseRegex.length; r++) {
+        const pattern = rules.requiredResponseRegex[r];
+        const passed = textMatchesPattern(finalResponseText, pattern);
+        addCheck(
+          analysis,
+          `resp-${r + 1}`,
+          `final response matches regex: /${pattern}/i`,
+          passed,
+        );
+      }
     }
   } else {
     addCheck(
diff --git a/scripts/e2e-skills-benchmark/cases/case-04-gap-discovery-spotify-ux.txt b/scripts/e2e-skills-benchmark/cases/case-04-gap-discovery-spotify-ux.txt
new file mode 100644
index 00000000..5c8a7c16
--- /dev/null
+++ b/scripts/e2e-skills-benchmark/cases/case-04-gap-discovery-spotify-ux.txt
@@ -0,0 +1,10 @@
+请模拟真实用户的首轮请求：
+
+用户说：「随机播放 spotify 中的音乐」。
+
+要求：
+1. 先明确你当前缺少直接控制 Spotify 的能力（能力缺口识别）。
+2. 不要直接拒绝结束，要给出恢复路径：引导用户可通过 ClawHub 搜索并安装相关 skill。
+3. 明确说明会先做安全审查，再安装。
+4. 在同一轮里必须询问用户是否要继续安装（显式确认），在用户确认前不要执行 `clawhub install` 或 `clawhub update`。
+5. 输出用中文，且要包含关键词：ClawHub、安全审查、安装、是否要继续。