test(e2e): add natural Notion gap-discovery benchmark case

This commit is contained in:
Jiayuan Zhang 2026-02-17 02:23:23 +08:00
parent 4b7f0afb50
commit 8a2b3e10f3
4 changed files with 51 additions and 12 deletions

View file

@ -66,6 +66,7 @@ const CASE_RULES = {
"case-04-gap-discovery-spotify-ux": {
requireExecUsage: false,
requiredResponseRegex: [
"缺少|没有.*(技能|能力|集成)|capability gap",
"clawhub|cloud\\s*hub|cloudhub",
"安装|install",
"是否|要不要|would you like|do you want",
@ -74,6 +75,34 @@ const CASE_RULES = {
forbiddenCommandTokens: [
["clawhub", "install"],
["clawhub", "update"],
["osascript"],
["spogo"],
["spotify_player"],
["ha.sh"],
["/api/states"],
],
},
"case-05-gap-discovery-notion-ux": {
requireExecUsage: false,
requiredCommandTokens: [
["clawhub", "search"],
["notion"],
],
requiredEventTokens: [
["install_guard", "blocked"],
],
requiredResponseRegex: [
"notion",
"安装|install",
"是否|要不要|would you like|do you want|同意",
"token|授权|integration",
],
forbiddenCommandTokens: [
["osascript"],
["spogo"],
["spotify_player"],
["ha.sh"],
["/api/states"],
],
},
};
@ -324,6 +353,22 @@ for (let i = 1; i < rows.length; i++) {
}
}
if (Array.isArray(rules.requiredEventTokens)) {
const eventLines = events.map((event) => JSON.stringify(event).toLowerCase());
for (let r = 0; r < rules.requiredEventTokens.length; r++) {
const tokenList = rules.requiredEventTokens[r];
const passed = eventLines.some((line) =>
tokenList.every((token) => line.includes(token.toLowerCase())),
);
addCheck(
analysis,
`event-${r + 1}`,
`event log contains tokens: ${tokenList.join(" + ")}`,
passed,
);
}
}
if (Array.isArray(rules.forbiddenCommandTokens)) {
for (let r = 0; r < rules.forbiddenCommandTokens.length; r++) {
const tokenList = rules.forbiddenCommandTokens[r];

View file

@ -1,10 +1 @@
请模拟真实用户的首轮请求:
用户说:「随机播放 spotify 中的音乐」。
要求:
1. 先明确你当前缺少直接控制 Spotify 的能力(能力缺口识别)。
2. 不要直接拒绝结束,要给出恢复路径:引导用户可通过 ClawHub 搜索并安装相关 skill。
3. 明确说明会先做安全审查,再安装。
4. 在同一轮里必须询问用户是否要继续安装(显式确认),在用户确认前不要执行 `clawhub install` 或 `clawhub update`。
5. 输出用中文且要包含关键词ClawHub、安全审查、安装、是否要继续。
随机播放 spotify 中的音乐

View file

@ -0,0 +1 @@
帮我在 Notion 新建一个页面,标题是今天待办,并写入三条任务:修复登录 bug、写周报、安排评审