From ce6291e9eb4ab9f04d64d299c65800756d265db5 Mon Sep 17 00:00:00 2001
From: Jiayuan Zhang <forrestchang7@gmail.com>
Date: Tue, 17 Feb 2026 00:49:57 +0800
Subject: [PATCH] fix(agent): enforce web_fetch after successful web_search

---
 packages/core/src/agent/runner.ts             |  79 +++++++++-
 .../src/agent/system-prompt/sections.test.ts  |   2 +-
 .../core/src/agent/system-prompt/sections.ts  |  10 +-
 .../core/src/agent/web-tools-policy.test.ts   | 145 ++++++++++++++++++
 packages/core/src/agent/web-tools-policy.ts   |  86 +++++++++++
 5 files changed, 314 insertions(+), 8 deletions(-)
 create mode 100644 packages/core/src/agent/web-tools-policy.test.ts
 create mode 100644 packages/core/src/agent/web-tools-policy.ts
diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts
index c812b4a1..1f137f94 100644
--- a/packages/core/src/agent/runner.ts
+++ b/packages/core/src/agent/runner.ts
@@ -42,6 +42,11 @@ import {
   type SystemPromptMode,
 } from "./system-prompt/index.js";
 import type { AuthProfileFailureReason } from "./auth-profiles/index.js";
+import {
+  shouldEnforceWebFetchAfterSearch,
+  summarizeWebToolUsage,
+  type ToolExecutionRecord,
+} from "./web-tools-policy.js";
 import {
   sanitizeToolCallInputs,
   sanitizeToolUseResultPairing,
@@ -127,6 +132,16 @@ function formatRunLogToolSummary(tool: string, details: Record<string, unknown>
   }
 }
 
+const WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT = [
+  "You used web_search but did not complete a successful web_fetch in this turn.",
+  "Search snippets are incomplete previews and are not sufficient evidence for detailed claims.",
+  "Before finalizing your answer, you MUST:",
+  "1) Pick the 1-3 most relevant URLs from the web_search results.",
+  "2) Call web_fetch on those URLs.",
+  "3) Revise your answer based on fetched content.",
+  "If all fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.",
+].join("\n");
+
 export class Agent {
   private readonly agent: PiAgentCore;
   private output;
@@ -141,6 +156,7 @@ export class Agent {
   private readonly stderr: NodeJS.WritableStream;
   private readonly runLog: RunLog;
   private readonly toolStartTimes = new Map<string, number>();
+  private currentRunToolExecutions: ToolExecutionRecord[] = [];
   private initialized = false;
 
   // Context window settings (for pre-flight compaction)
@@ -524,6 +540,7 @@ export class Agent {
     this.currentUserSource = options?.source;
     this._isRunning = true;
     this._aborted = false;
+    this.currentRunToolExecutions = [];
 
     const runStart = Date.now();
     this.runLog.log("run_start", {
@@ -552,6 +569,7 @@ export class Agent {
 
       // Loop to exhaust all candidate profiles on rotatable errors
       while (true) {
+        const toolExecutionStartIndex = this.currentRunToolExecutions.length;
         try {
           const llmStart = Date.now();
           this.runLog.log("llm_call", {
@@ -561,6 +579,7 @@ export class Agent {
             messages: this.agent.state.messages.length,
           });
           await this.agent.prompt(prompt);
+          await this.enforceWebFetchAfterSearchIfNeeded(toolExecutionStartIndex);
           this.runLog.log("llm_result", {
             duration_ms: Date.now() - llmStart,
           });
@@ -692,6 +711,7 @@ export class Agent {
       this._lastEventSavedAssistant = undefined;
       this.currentUserDisplayPrompt = undefined;
       this.currentUserSource = undefined;
+      this.currentRunToolExecutions = [];
       this.runLog.flush().catch(() => {});
     }
   }
@@ -781,6 +801,56 @@ export class Agent {
     this.session.setApiKey(this.currentApiKey);
   }
 
+  private async enforceWebFetchAfterSearchIfNeeded(
+    toolExecutionStartIndex: number,
+  ): Promise<void> {
+    if (this._internalRun) return;
+
+    const activeTools = new Set(
+      (this.agent.state.tools ?? []).map((tool) => tool.name.toLowerCase()),
+    );
+    const webSearchAvailable = activeTools.has("web_search");
+    const webFetchAvailable = activeTools.has("web_fetch");
+
+    const currentTurnExecutions = this.currentRunToolExecutions.slice(
+      toolExecutionStartIndex,
+    );
+    const usage = summarizeWebToolUsage(currentTurnExecutions);
+
+    if (
+      !shouldEnforceWebFetchAfterSearch({
+        usage,
+        webSearchAvailable,
+        webFetchAvailable,
+      })
+    ) {
+      return;
+    }
+
+    this.runLog.log("web_search_fetch_guard", {
+      search_calls: usage.searchCalls,
+      search_success: usage.searchSuccess,
+      search_with_results: usage.searchSuccessWithResults,
+      fetch_calls: usage.fetchCalls,
+      fetch_success: usage.fetchSuccess,
+    });
+
+    try {
+      await this.agent.prompt(WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT);
+      this.runLog.log("web_search_fetch_guard_applied", {
+        search_with_results: usage.searchSuccessWithResults,
+      });
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      this.runLog.log("web_search_fetch_guard_failed", {
+        error: message.slice(0, 200),
+      });
+      if (this.debug) {
+        this.stderr.write(`[web-guard] Failed to enforce search->fetch: ${message}\n`);
+      }
+    }
+  }
+
   private handleRunLogEvent(event: AgentEvent) {
     if (event.type === "tool_execution_start") {
       const toolName = (event as any).toolName ?? "unknown";
@@ -800,11 +870,18 @@ export class Agent {
       const resultText = extractRunLogResultText(result);
       const resultChars = resultText?.length ?? 0;
       const details = extractRunLogResultDetails(result);
+      const isError = Boolean((event as any).isError ?? false);
+
+      this.currentRunToolExecutions.push({
+        toolName,
+        isError,
+        details,
+      });
 
       const toolEndData: Record<string, unknown> = {
         tool: toolName,
         duration_ms,
-        is_error: (event as any).isError ?? false,
+        is_error: isError,
         result_chars: resultChars,
         result_summary: formatRunLogToolSummary(toolName, details),
       };
diff --git a/packages/core/src/agent/system-prompt/sections.test.ts b/packages/core/src/agent/system-prompt/sections.test.ts
index a1d2dd5a..75763e1e 100644
--- a/packages/core/src/agent/system-prompt/sections.test.ts
+++ b/packages/core/src/agent/system-prompt/sections.test.ts
@@ -181,7 +181,7 @@ describe("buildConditionalToolSections", () => {
     const result = buildConditionalToolSections(["web_search"], "full");
     const text = result.join("\n");
     expect(text).toContain("## Web Access");
-    expect(text).toContain("Web usage is conditional, not mandatory");
+    expect(text).toContain("you MUST call web_fetch");
   });
 
   it("adds dynamic evidence decision guidance when data tool is present", () => {
diff --git a/packages/core/src/agent/system-prompt/sections.ts b/packages/core/src/agent/system-prompt/sections.ts
index bf935fce..9fdb2591 100644
--- a/packages/core/src/agent/system-prompt/sections.ts
+++ b/packages/core/src/agent/system-prompt/sections.ts
@@ -364,14 +364,12 @@ export function buildConditionalToolSections(
       "## Web Access",
       "You have web access. Use it when the user asks about current events, needs up-to-date information, or requests content from URLs.",
       "Prefer web_search for discovery and web_fetch for specific URLs.",
-      "Web usage is conditional, not mandatory: call web tools when they materially improve evidence quality.",
+      "When web_search is used, treat snippets as incomplete previews rather than final evidence.",
       "",
       "### Search-then-Fetch",
-      "After web_search, evaluate whether the snippets contain enough detail to answer accurately.",
-      "If not, use web_fetch on the 1-3 most relevant URLs to get full content before answering.",
-      "Always fetch when the user asks for detailed explanations, comparisons, or analysis;",
-      "when snippets are vague or contradictory; or when the question requires specific data points.",
-      "Skip fetch when the answer is a simple fact clearly stated in the snippet or the user only wants a quick overview.",
+      "After every successful web_search, you MUST call web_fetch on 1-3 relevant URLs before detailed reasoning or factual claims.",
+      "Use fetched page content (not snippets) as the primary evidence for analysis and synthesis.",
+      "If all fetch attempts fail, explicitly report that limitation and avoid specific claims derived only from snippets.",
       "",
     );
   }
diff --git a/packages/core/src/agent/web-tools-policy.test.ts b/packages/core/src/agent/web-tools-policy.test.ts
new file mode 100644
index 00000000..bb613161
--- /dev/null
+++ b/packages/core/src/agent/web-tools-policy.test.ts
@@ -0,0 +1,145 @@
+import { describe, expect, it } from "vitest";
+import {
+  shouldEnforceWebFetchAfterSearch,
+  summarizeWebToolUsage,
+  type ToolExecutionRecord,
+} from "./web-tools-policy.js";
+
+function buildRecord(params: {
+  toolName: string;
+  isError?: boolean;
+  details?: Record<string, unknown> | null;
+}): ToolExecutionRecord {
+  return {
+    toolName: params.toolName,
+    isError: params.isError ?? false,
+    details: params.details ?? null,
+  };
+}
+
+describe("web-tools-policy", () => {
+  describe("summarizeWebToolUsage", () => {
+    it("counts successful web_search calls with results", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 3, results: [{}, {}, {}] },
+        }),
+      ]);
+
+      expect(usage.searchCalls).toBe(1);
+      expect(usage.searchSuccess).toBe(1);
+      expect(usage.searchSuccessWithResults).toBe(1);
+      expect(usage.fetchCalls).toBe(0);
+      expect(usage.fetchSuccess).toBe(0);
+    });
+
+    it("does not count tool-level error payload as success", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { error: true, code: "search_failed" },
+        }),
+      ]);
+
+      expect(usage.searchCalls).toBe(1);
+      expect(usage.searchSuccess).toBe(0);
+      expect(usage.searchSuccessWithResults).toBe(0);
+    });
+  });
+
+  describe("shouldEnforceWebFetchAfterSearch", () => {
+    it("enforces when search has results but fetch never succeeded", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 2, results: [{}, {}] },
+        }),
+      ]);
+
+      expect(
+        shouldEnforceWebFetchAfterSearch({
+          usage,
+          webSearchAvailable: true,
+          webFetchAvailable: true,
+        }),
+      ).toBe(true);
+    });
+
+    it("does not enforce after a successful web_fetch", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 2, results: [{}, {}] },
+        }),
+        buildRecord({
+          toolName: "web_fetch",
+          details: { status: 200, length: 1024 },
+        }),
+      ]);
+
+      expect(
+        shouldEnforceWebFetchAfterSearch({
+          usage,
+          webSearchAvailable: true,
+          webFetchAvailable: true,
+        }),
+      ).toBe(false);
+    });
+
+    it("does not enforce when search returns no results", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 0, results: [] },
+        }),
+      ]);
+
+      expect(
+        shouldEnforceWebFetchAfterSearch({
+          usage,
+          webSearchAvailable: true,
+          webFetchAvailable: true,
+        }),
+      ).toBe(false);
+    });
+
+    it("does not enforce when web_fetch is unavailable", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 1, results: [{}] },
+        }),
+      ]);
+
+      expect(
+        shouldEnforceWebFetchAfterSearch({
+          usage,
+          webSearchAvailable: true,
+          webFetchAvailable: false,
+        }),
+      ).toBe(false);
+    });
+
+    it("enforces when fetch was attempted but failed", () => {
+      const usage = summarizeWebToolUsage([
+        buildRecord({
+          toolName: "web_search",
+          details: { count: 1, results: [{}] },
+        }),
+        buildRecord({
+          toolName: "web_fetch",
+          details: { error: true, code: "fetch_failed" },
+        }),
+      ]);
+
+      expect(
+        shouldEnforceWebFetchAfterSearch({
+          usage,
+          webSearchAvailable: true,
+          webFetchAvailable: true,
+        }),
+      ).toBe(true);
+    });
+  });
+});
diff --git a/packages/core/src/agent/web-tools-policy.ts b/packages/core/src/agent/web-tools-policy.ts
new file mode 100644
index 00000000..df6b999f
--- /dev/null
+++ b/packages/core/src/agent/web-tools-policy.ts
@@ -0,0 +1,86 @@
+export type ToolExecutionRecord = {
+  toolName: string;
+  isError: boolean;
+  details: Record<string, unknown> | null;
+};
+
+export type WebToolUsage = {
+  searchCalls: number;
+  searchSuccess: number;
+  searchSuccessWithResults: number;
+  fetchCalls: number;
+  fetchSuccess: number;
+};
+
+function hasToolError(details: Record<string, unknown> | null): boolean {
+  return details?.error === true;
+}
+
+function getSearchResultCount(details: Record<string, unknown> | null): number {
+  if (!details) return 0;
+  const countRaw = details.count;
+  if (typeof countRaw === "number" && Number.isFinite(countRaw)) {
+    return Math.max(0, Math.floor(countRaw));
+  }
+
+  const results = details.results;
+  if (Array.isArray(results)) {
+    return results.length;
+  }
+
+  return 0;
+}
+
+function isSuccessfulExecution(record: ToolExecutionRecord): boolean {
+  if (record.isError) return false;
+  if (hasToolError(record.details)) return false;
+  return true;
+}
+
+export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUsage {
+  const usage: WebToolUsage = {
+    searchCalls: 0,
+    searchSuccess: 0,
+    searchSuccessWithResults: 0,
+    fetchCalls: 0,
+    fetchSuccess: 0,
+  };
+
+  for (const record of records) {
+    const toolName = record.toolName.trim().toLowerCase();
+
+    if (toolName === "web_search") {
+      usage.searchCalls += 1;
+      if (isSuccessfulExecution(record)) {
+        usage.searchSuccess += 1;
+        if (getSearchResultCount(record.details) > 0) {
+          usage.searchSuccessWithResults += 1;
+        }
+      }
+      continue;
+    }
+
+    if (toolName === "web_fetch") {
+      usage.fetchCalls += 1;
+      if (isSuccessfulExecution(record)) {
+        usage.fetchSuccess += 1;
+      }
+    }
+  }
+
+  return usage;
+}
+
+export function shouldEnforceWebFetchAfterSearch(params: {
+  usage: WebToolUsage;
+  webSearchAvailable: boolean;
+  webFetchAvailable: boolean;
+}): boolean {
+  const { usage, webSearchAvailable, webFetchAvailable } = params;
+
+  if (!webSearchAvailable || !webFetchAvailable) return false;
+  if (usage.searchSuccessWithResults <= 0) return false;
+  if (usage.fetchSuccess > 0) return false;
+
+  return true;
+}