fix(agent): enforce sufficient search-fetch evidence

2026-02-17 02:08:15 +08:00 · 2026-02-17 02:08:15 +08:00 · 850d55336a
commit 850d55336a
parent b5b65c6bae
3 changed files with 245 additions and 18 deletions
--- a/packages/core/src/agent/runner.ts
+++ b/packages/core/src/agent/runner.ts
@ -44,6 +44,7 @@ import {
 import type { AuthProfileFailureReason } from "./auth-profiles/index.js";
 import {
  analyzeCrossTurnWebFetchNeed,
+  resolveWebFetchRequirementFromPrompt,
  shouldEnforceWebFetchAfterSearch,
  summarizeWebToolUsage,
  type ToolExecutionRecord,
@ -133,15 +134,45 @@ function formatRunLogToolSummary(tool: string, details: Record<string, unknown>
  }
 }

-const WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT = [
-  "You used web_search but did not complete a successful web_fetch in this turn.",
-  "Search snippets are incomplete previews and are not sufficient evidence for detailed claims.",
-  "Before finalizing your answer, you MUST:",
-  "1) Pick the 1-3 most relevant URLs from the web_search results.",
-  "2) Call web_fetch on those URLs.",
-  "3) Revise your answer based on fetched content.",
-  "If all fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.",
-].join("\n");
+function buildWebSearchFetchEnforcementPrompt(params: {
+  requiredMinFetchSuccess: number;
+  fetchSuccess: number;
+  needsFollowupForLatestSearch: boolean;
+}): { prompt: string; additionalFetchNeeded: number } {
+  const additionalFetchNeeded = Math.max(
+    1,
+    params.requiredMinFetchSuccess - params.fetchSuccess,
+    params.needsFollowupForLatestSearch ? 1 : 0,
+  );
+
+  const lines = [
+    "You used web_search, but web evidence coverage for this turn is still incomplete.",
+    "Search snippets are incomplete previews and are not sufficient evidence for detailed claims.",
+  ];
+
+  if (params.requiredMinFetchSuccess > 1) {
+    lines.push(
+      `This task currently requires at least ${params.requiredMinFetchSuccess} successful web_fetch calls.`,
+    );
+  }
+
+  if (params.needsFollowupForLatestSearch) {
+    lines.push(
+      "You performed another successful web_search after your last successful web_fetch. " +
+      "You must fetch URLs from the latest search results before finalizing.",
+    );
+  }
+
+  lines.push(
+    "Before finalizing your answer, you MUST:",
+    "1) Pick the 1-3 most relevant URLs from the latest successful web_search results.",
+    `2) Complete at least ${additionalFetchNeeded} additional successful web_fetch call(s).`,
+    "3) Revise your answer based on fetched page content.",
+    "If all additional fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.",
+  );
+
+  return { prompt: lines.join("\n"), additionalFetchNeeded };
+}

 const CROSS_TURN_WEB_FETCH_ENFORCEMENT_PROMPT = [
  "You are about to finalize a web-dependent answer, but no successful web_fetch happened in this turn.",
@ -590,7 +621,10 @@ export class Agent {
            messages: this.agent.state.messages.length,
          });
          await this.agent.prompt(prompt);
-          await this.enforceWebFetchAfterSearchIfNeeded(toolExecutionStartIndex);
+          await this.enforceWebFetchAfterSearchIfNeeded({
+            toolExecutionStartIndex,
+            userPrompt: prompt,
+          });
          await this.enforceCrossTurnWebFetchIfNeeded({
            toolExecutionStartIndex,
            userPrompt: prompt,
@ -816,9 +850,10 @@ export class Agent {
    this.session.setApiKey(this.currentApiKey);
  }

-  private async enforceWebFetchAfterSearchIfNeeded(
-    toolExecutionStartIndex: number,
-  ): Promise<void> {
+  private async enforceWebFetchAfterSearchIfNeeded(params: {
+    toolExecutionStartIndex: number;
+    userPrompt: string;
+  }): Promise<void> {
    if (this._internalRun) return;

    const activeTools = new Set(
@ -828,32 +863,48 @@ export class Agent {
    const webFetchAvailable = activeTools.has("web_fetch");

    const currentTurnExecutions = this.currentRunToolExecutions.slice(
-      toolExecutionStartIndex,
+      params.toolExecutionStartIndex,
    );
    const usage = summarizeWebToolUsage(currentTurnExecutions);
+    const requirement = resolveWebFetchRequirementFromPrompt(params.userPrompt);

    if (
      !shouldEnforceWebFetchAfterSearch({
        usage,
        webSearchAvailable,
        webFetchAvailable,
+        requiredMinFetchSuccess: requirement.requiredMinFetchSuccess,
      })
    ) {
      return;
    }

+    const { prompt, additionalFetchNeeded } = buildWebSearchFetchEnforcementPrompt({
+      requiredMinFetchSuccess: requirement.requiredMinFetchSuccess,
+      fetchSuccess: usage.fetchSuccess,
+      needsFollowupForLatestSearch: usage.searchNeedsFollowupFetch,
+    });
+
    this.runLog.log("web_search_fetch_guard", {
      search_calls: usage.searchCalls,
      search_success: usage.searchSuccess,
      search_with_results: usage.searchSuccessWithResults,
+      search_needs_followup_fetch: usage.searchNeedsFollowupFetch,
      fetch_calls: usage.fetchCalls,
      fetch_success: usage.fetchSuccess,
+      required_min_fetch_success: requirement.requiredMinFetchSuccess,
+      prompt_suggests_research_depth: requirement.promptSuggestsResearchDepth,
+      prompt_multi_source_cue: requirement.multiSourceCue,
+      prompt_explicit_min_fetch: requirement.explicitMinFetchFromPrompt,
    });

    try {
-      await this.agent.prompt(WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT);
+      await this.agent.prompt(prompt);
      this.runLog.log("web_search_fetch_guard_applied", {
        search_with_results: usage.searchSuccessWithResults,
+        search_needs_followup_fetch: usage.searchNeedsFollowupFetch,
+        required_min_fetch_success: requirement.requiredMinFetchSuccess,
+        additional_fetch_needed: additionalFetchNeeded,
      });
    } catch (error) {
      const message = error instanceof Error ? error.message : String(error);
--- a/packages/core/src/agent/web-tools-policy.test.ts
+++ b/packages/core/src/agent/web-tools-policy.test.ts
@ -1,6 +1,7 @@
 import { describe, expect, it } from "vitest";
 import {
  analyzeCrossTurnWebFetchNeed,
+  resolveWebFetchRequirementFromPrompt,
  shouldEnforceWebFetchAfterSearch,
  summarizeWebToolUsage,
  type ToolExecutionRecord,
@ -31,6 +32,7 @@ describe("web-tools-policy", () => {
      expect(usage.searchCalls).toBe(1);
      expect(usage.searchSuccess).toBe(1);
      expect(usage.searchSuccessWithResults).toBe(1);
+      expect(usage.searchNeedsFollowupFetch).toBe(true);
      expect(usage.fetchCalls).toBe(0);
      expect(usage.fetchSuccess).toBe(0);
    });
@ -46,6 +48,22 @@ describe("web-tools-policy", () => {
      expect(usage.searchCalls).toBe(1);
      expect(usage.searchSuccess).toBe(0);
      expect(usage.searchSuccessWithResults).toBe(0);
+      expect(usage.searchNeedsFollowupFetch).toBe(false);
+    });
+
+    it("marks latest search as covered when successful fetch follows", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 1, results: [{}] },
+        }),
+        buildRecord({
+          toolName: "web_fetch",
+          details: { status: 200, length: 1024 },
+        }),
+      ]);
+
+      expect(usage.searchNeedsFollowupFetch).toBe(false);
    });
  });

@ -88,6 +106,53 @@ describe("web-tools-policy", () => {
      ).toBe(false);
    });

+    it("enforces when the latest successful search has no follow-up fetch", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 2, results: [{}, {}] },
+        }),
+        buildRecord({
+          toolName: "web_fetch",
+          details: { status: 200, length: 1200 },
+        }),
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 3, results: [{}, {}, {}] },
+        }),
+      ]);
+
+      expect(
+        shouldEnforceWebFetchAfterSearch({
+          usage,
+          webSearchAvailable: true,
+          webFetchAvailable: true,
+        }),
+      ).toBe(true);
+    });
+
+    it("enforces when prompt requires deeper evidence coverage", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 6, results: [{}, {}, {}] },
+        }),
+        buildRecord({
+          toolName: "web_fetch",
+          details: { status: 200, length: 2200 },
+        }),
+      ]);
+
+      expect(
+        shouldEnforceWebFetchAfterSearch({
+          usage,
+          webSearchAvailable: true,
+          webFetchAvailable: true,
+          requiredMinFetchSuccess: 2,
+        }),
+      ).toBe(true);
+    });
+
    it("does not enforce when search returns no results", () => {
      const usage = summarizeWebToolUsage([
        buildRecord({
@ -230,4 +295,33 @@ describe("web-tools-policy", () => {
      expect(analysis.webCue).toBe(false);
    });
  });
+
+  describe("resolveWebFetchRequirementFromPrompt", () => {
+    it("requires deeper fetch coverage for research-style prompts", () => {
+      const result = resolveWebFetchRequirementFromPrompt(
+        "帮我调研一下 APPLE 最近的产品信息，并做分析。",
+      );
+
+      expect(result.requiredMinFetchSuccess).toBe(2);
+      expect(result.promptSuggestsResearchDepth).toBe(true);
+    });
+
+    it("uses explicit minimum source count when present", () => {
+      const result = resolveWebFetchRequirementFromPrompt(
+        "Please use at least 3 sources and summarize the latest updates.",
+      );
+
+      expect(result.requiredMinFetchSuccess).toBe(3);
+      expect(result.explicitMinFetchFromPrompt).toBe(3);
+    });
+
+    it("falls back to 1 for simple prompts", () => {
+      const result = resolveWebFetchRequirementFromPrompt(
+        "What is OpenAI's CEO?",
+      );
+
+      expect(result.requiredMinFetchSuccess).toBe(1);
+      expect(result.promptSuggestsResearchDepth).toBe(false);
+    });
+  });
 });
--- a/packages/core/src/agent/web-tools-policy.ts
+++ b/packages/core/src/agent/web-tools-policy.ts
@ -8,10 +8,19 @@ export type WebToolUsage = {
  searchCalls: number;
  searchSuccess: number;
  searchSuccessWithResults: number;
+  /** True when the latest successful search (with results) has no later successful fetch. */
+  searchNeedsFollowupFetch: boolean;
  fetchCalls: number;
  fetchSuccess: number;
 };

+export type WebFetchRequirement = {
+  requiredMinFetchSuccess: number;
+  promptSuggestsResearchDepth: boolean;
+  multiSourceCue: boolean;
+  explicitMinFetchFromPrompt: number | null;
+};
+
 export type CrossTurnWebFetchGuardAnalysis = {
  shouldEnforce: boolean;
  explicitFetchRequest: boolean;
@ -45,6 +54,17 @@ const USER_WEB_CONTEXT_PATTERNS: RegExp[] = [
  /(?:\u7f51\u9875|\u7f51\u7ad9|\u7f51\u7edc|\u4e92\u8054\u7f51|\u94fe\u63a5|\u6765\u6e90|\u65b0\u95fb|\u62a5\u9053|\u6587\u7ae0)/,
 ];

+const USER_RESEARCH_DEPTH_PATTERNS: RegExp[] = [
+  /\b(research|investigate|analysis|analyze|compare|comparison|deep[-\s]?dive|survey|report|review)\b/i,
+  /(?:\u8c03\u7814|\u7814\u7a76|\u5206\u6790|\u6df1\u5ea6|\u5bf9\u6bd4|\u5bf9\u7167|\u6c47\u603b|\u76d8\u70b9|\u62a5\u544a|\u8bc4\u4f30|\u8bc4\u6d4b)/,
+];
+
+const USER_MULTI_SOURCE_PATTERNS: RegExp[] = [
+  /\b(multiple|multi-source|across sources|different sources)\b/i,
+  /(?:\u591a\u6765\u6e90|\u591a\u4e2a\u6765\u6e90|\u4e0d\u540c\u6765\u6e90|\u591a\u7f51\u7ad9)/,
+  /(?:\u81f3\u5c11|\u4e0d\u5c11\u4e8e|\u6700\u5c11)\s*\d+\s*(?:\u4e2a|\u6761)?(?:\u6765\u6e90|\u94fe\u63a5|\u7f51\u5740|\u7f51\u9875|\u6587\u7ae0)/,
+];
+
 const USER_WEB_BLOCK_PATTERNS: RegExp[] = [
  /\b(do not|don't|no|without)\s+(browse|web|internet|web_search|web_fetch|fetch)\b/i,
  /\bonly\b.*\b(snippet|snippets)\b/i,
@ -63,6 +83,28 @@ function hasAnyPattern(text: string, patterns: RegExp[]): boolean {
  return patterns.some((pattern) => pattern.test(text));
 }

+function normalizeMinFetchSuccess(raw: number): number {
+  if (!Number.isFinite(raw)) return 1;
+  return Math.max(1, Math.min(4, Math.floor(raw)));
+}
+
+function extractExplicitMinFetchFromPrompt(prompt: string): number | null {
+  const patterns: RegExp[] = [
+    /\b(?:at least|minimum of|no less than)\s*(\d+)\s*(?:sources?|links?|urls?|articles?|pages?)\b/i,
+    /(?:\u81f3\u5c11|\u4e0d\u5c11\u4e8e|\u6700\u5c11)\s*(\d+)\s*(?:\u4e2a|\u6761)?(?:\u6765\u6e90|\u94fe\u63a5|\u7f51\u5740|\u7f51\u9875|\u6587\u7ae0)/,
+  ];
+
+  for (const pattern of patterns) {
+    const match = prompt.match(pattern);
+    if (!match) continue;
+    const parsed = Number(match[1]);
+    if (!Number.isFinite(parsed)) continue;
+    return normalizeMinFetchSuccess(parsed);
+  }
+
+  return null;
+}
+
 function hasToolError(details: Record<string, unknown> | null): boolean {
  return details?.error === true;
 }
@ -93,9 +135,11 @@ export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUs
    searchCalls: 0,
    searchSuccess: 0,
    searchSuccessWithResults: 0,
+    searchNeedsFollowupFetch: false,
    fetchCalls: 0,
    fetchSuccess: 0,
  };
+  let pendingSearchWithResults = false;

  for (const record of records) {
    const toolName = record.toolName.trim().toLowerCase();
@ -106,6 +150,7 @@ export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUs
        usage.searchSuccess += 1;
        if (getSearchResultCount(record.details) > 0) {
          usage.searchSuccessWithResults += 1;
+          pendingSearchWithResults = true;
        }
      }
      continue;
@ -115,10 +160,12 @@ export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUs
      usage.fetchCalls += 1;
      if (isSuccessfulExecution(record)) {
        usage.fetchSuccess += 1;
+        pendingSearchWithResults = false;
      }
    }
  }

+  usage.searchNeedsFollowupFetch = pendingSearchWithResults;
  return usage;
 }

@ -126,14 +173,49 @@ export function shouldEnforceWebFetchAfterSearch(params: {
  usage: WebToolUsage;
  webSearchAvailable: boolean;
  webFetchAvailable: boolean;
+  requiredMinFetchSuccess?: number;
 }): boolean {
-  const { usage, webSearchAvailable, webFetchAvailable } = params;
+  const {
+    usage,
+    webSearchAvailable,
+    webFetchAvailable,
+    requiredMinFetchSuccess = 1,
+  } = params;

  if (!webSearchAvailable || !webFetchAvailable) return false;
  if (usage.searchSuccessWithResults <= 0) return false;
-  if (usage.fetchSuccess > 0) return false;
+  if (usage.fetchSuccess <= 0) return true;
+  if (usage.searchNeedsFollowupFetch) return true;
+  if (usage.fetchSuccess < normalizeMinFetchSuccess(requiredMinFetchSuccess)) return true;

-  return true;
+  return false;
+}
+
+export function resolveWebFetchRequirementFromPrompt(prompt: string): WebFetchRequirement {
+  const normalizedPrompt = prompt ?? "";
+  const promptSuggestsResearchDepth = hasAnyPattern(
+    normalizedPrompt,
+    USER_RESEARCH_DEPTH_PATTERNS,
+  );
+  const multiSourceCue = hasAnyPattern(normalizedPrompt, USER_MULTI_SOURCE_PATTERNS);
+  const explicitMinFetchFromPrompt = extractExplicitMinFetchFromPrompt(normalizedPrompt);
+
+  let requiredMinFetchSuccess = 1;
+  if (promptSuggestsResearchDepth) requiredMinFetchSuccess = 2;
+  if (multiSourceCue) requiredMinFetchSuccess = Math.max(requiredMinFetchSuccess, 2);
+  if (explicitMinFetchFromPrompt !== null) {
+    requiredMinFetchSuccess = Math.max(
+      requiredMinFetchSuccess,
+      explicitMinFetchFromPrompt,
+    );
+  }
+
+  return {
+    requiredMinFetchSuccess: normalizeMinFetchSuccess(requiredMinFetchSuccess),
+    promptSuggestsResearchDepth,
+    multiSourceCue,
+    explicitMinFetchFromPrompt,
+  };
 }

 export function analyzeCrossTurnWebFetchNeed(params: {