fix(agent): enforce web_fetch after successful web_search

This commit is contained in:
Jiayuan Zhang 2026-02-17 00:49:57 +08:00
parent 292e2b9454
commit ce6291e9eb
5 changed files with 314 additions and 8 deletions

View file

@ -42,6 +42,11 @@ import {
type SystemPromptMode,
} from "./system-prompt/index.js";
import type { AuthProfileFailureReason } from "./auth-profiles/index.js";
import {
shouldEnforceWebFetchAfterSearch,
summarizeWebToolUsage,
type ToolExecutionRecord,
} from "./web-tools-policy.js";
import {
sanitizeToolCallInputs,
sanitizeToolUseResultPairing,
@ -127,6 +132,16 @@ function formatRunLogToolSummary(tool: string, details: Record<string, unknown>
}
}
const WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT = [
"You used web_search but did not complete a successful web_fetch in this turn.",
"Search snippets are incomplete previews and are not sufficient evidence for detailed claims.",
"Before finalizing your answer, you MUST:",
"1) Pick the 1-3 most relevant URLs from the web_search results.",
"2) Call web_fetch on those URLs.",
"3) Revise your answer based on fetched content.",
"If all fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.",
].join("\n");
export class Agent {
private readonly agent: PiAgentCore;
private output;
@ -141,6 +156,7 @@ export class Agent {
private readonly stderr: NodeJS.WritableStream;
private readonly runLog: RunLog;
private readonly toolStartTimes = new Map<string, number>();
private currentRunToolExecutions: ToolExecutionRecord[] = [];
private initialized = false;
// Context window settings (for pre-flight compaction)
@ -524,6 +540,7 @@ export class Agent {
this.currentUserSource = options?.source;
this._isRunning = true;
this._aborted = false;
this.currentRunToolExecutions = [];
const runStart = Date.now();
this.runLog.log("run_start", {
@ -552,6 +569,7 @@ export class Agent {
// Loop to exhaust all candidate profiles on rotatable errors
while (true) {
const toolExecutionStartIndex = this.currentRunToolExecutions.length;
try {
const llmStart = Date.now();
this.runLog.log("llm_call", {
@ -561,6 +579,7 @@ export class Agent {
messages: this.agent.state.messages.length,
});
await this.agent.prompt(prompt);
await this.enforceWebFetchAfterSearchIfNeeded(toolExecutionStartIndex);
this.runLog.log("llm_result", {
duration_ms: Date.now() - llmStart,
});
@ -692,6 +711,7 @@ export class Agent {
this._lastEventSavedAssistant = undefined;
this.currentUserDisplayPrompt = undefined;
this.currentUserSource = undefined;
this.currentRunToolExecutions = [];
this.runLog.flush().catch(() => {});
}
}
@ -781,6 +801,56 @@ export class Agent {
this.session.setApiKey(this.currentApiKey);
}
private async enforceWebFetchAfterSearchIfNeeded(
toolExecutionStartIndex: number,
): Promise<void> {
if (this._internalRun) return;
const activeTools = new Set(
(this.agent.state.tools ?? []).map((tool) => tool.name.toLowerCase()),
);
const webSearchAvailable = activeTools.has("web_search");
const webFetchAvailable = activeTools.has("web_fetch");
const currentTurnExecutions = this.currentRunToolExecutions.slice(
toolExecutionStartIndex,
);
const usage = summarizeWebToolUsage(currentTurnExecutions);
if (
!shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable,
webFetchAvailable,
})
) {
return;
}
this.runLog.log("web_search_fetch_guard", {
search_calls: usage.searchCalls,
search_success: usage.searchSuccess,
search_with_results: usage.searchSuccessWithResults,
fetch_calls: usage.fetchCalls,
fetch_success: usage.fetchSuccess,
});
try {
await this.agent.prompt(WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT);
this.runLog.log("web_search_fetch_guard_applied", {
search_with_results: usage.searchSuccessWithResults,
});
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
this.runLog.log("web_search_fetch_guard_failed", {
error: message.slice(0, 200),
});
if (this.debug) {
this.stderr.write(`[web-guard] Failed to enforce search->fetch: ${message}\n`);
}
}
}
private handleRunLogEvent(event: AgentEvent) {
if (event.type === "tool_execution_start") {
const toolName = (event as any).toolName ?? "unknown";
@ -800,11 +870,18 @@ export class Agent {
const resultText = extractRunLogResultText(result);
const resultChars = resultText?.length ?? 0;
const details = extractRunLogResultDetails(result);
const isError = Boolean((event as any).isError ?? false);
this.currentRunToolExecutions.push({
toolName,
isError,
details,
});
const toolEndData: Record<string, unknown> = {
tool: toolName,
duration_ms,
is_error: (event as any).isError ?? false,
is_error: isError,
result_chars: resultChars,
result_summary: formatRunLogToolSummary(toolName, details),
};

View file

@ -181,7 +181,7 @@ describe("buildConditionalToolSections", () => {
const result = buildConditionalToolSections(["web_search"], "full");
const text = result.join("\n");
expect(text).toContain("## Web Access");
expect(text).toContain("Web usage is conditional, not mandatory");
expect(text).toContain("you MUST call web_fetch");
});
it("adds dynamic evidence decision guidance when data tool is present", () => {

View file

@ -364,14 +364,12 @@ export function buildConditionalToolSections(
"## Web Access",
"You have web access. Use it when the user asks about current events, needs up-to-date information, or requests content from URLs.",
"Prefer web_search for discovery and web_fetch for specific URLs.",
"Web usage is conditional, not mandatory: call web tools when they materially improve evidence quality.",
"When web_search is used, treat snippets as incomplete previews rather than final evidence.",
"",
"### Search-then-Fetch",
"After web_search, evaluate whether the snippets contain enough detail to answer accurately.",
"If not, use web_fetch on the 1-3 most relevant URLs to get full content before answering.",
"Always fetch when the user asks for detailed explanations, comparisons, or analysis;",
"when snippets are vague or contradictory; or when the question requires specific data points.",
"Skip fetch when the answer is a simple fact clearly stated in the snippet or the user only wants a quick overview.",
"After every successful web_search, you MUST call web_fetch on 1-3 relevant URLs before detailed reasoning or factual claims.",
"Use fetched page content (not snippets) as the primary evidence for analysis and synthesis.",
"If all fetch attempts fail, explicitly report that limitation and avoid specific claims derived only from snippets.",
"",
);
}

View file

@ -0,0 +1,145 @@
import { describe, expect, it } from "vitest";
import {
shouldEnforceWebFetchAfterSearch,
summarizeWebToolUsage,
type ToolExecutionRecord,
} from "./web-tools-policy.js";
function buildRecord(params: {
toolName: string;
isError?: boolean;
details?: Record<string, unknown> | null;
}): ToolExecutionRecord {
return {
toolName: params.toolName,
isError: params.isError ?? false,
details: params.details ?? null,
};
}
describe("web-tools-policy", () => {
describe("summarizeWebToolUsage", () => {
it("counts successful web_search calls with results", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 3, results: [{}, {}, {}] },
}),
]);
expect(usage.searchCalls).toBe(1);
expect(usage.searchSuccess).toBe(1);
expect(usage.searchSuccessWithResults).toBe(1);
expect(usage.fetchCalls).toBe(0);
expect(usage.fetchSuccess).toBe(0);
});
it("does not count tool-level error payload as success", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { error: true, code: "search_failed" },
}),
]);
expect(usage.searchCalls).toBe(1);
expect(usage.searchSuccess).toBe(0);
expect(usage.searchSuccessWithResults).toBe(0);
});
});
describe("shouldEnforceWebFetchAfterSearch", () => {
it("enforces when search has results but fetch never succeeded", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 2, results: [{}, {}] },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(true);
});
it("does not enforce after a successful web_fetch", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 2, results: [{}, {}] },
}),
buildRecord({
toolName: "web_fetch",
details: { status: 200, length: 1024 },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(false);
});
it("does not enforce when search returns no results", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 0, results: [] },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(false);
});
it("does not enforce when web_fetch is unavailable", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 1, results: [{}] },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: false,
}),
).toBe(false);
});
it("enforces when fetch was attempted but failed", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 1, results: [{}] },
}),
buildRecord({
toolName: "web_fetch",
details: { error: true, code: "fetch_failed" },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(true);
});
});
});

View file

@ -0,0 +1,86 @@
export type ToolExecutionRecord = {
toolName: string;
isError: boolean;
details: Record<string, unknown> | null;
};
export type WebToolUsage = {
searchCalls: number;
searchSuccess: number;
searchSuccessWithResults: number;
fetchCalls: number;
fetchSuccess: number;
};
function hasToolError(details: Record<string, unknown> | null): boolean {
return details?.error === true;
}
function getSearchResultCount(details: Record<string, unknown> | null): number {
if (!details) return 0;
const countRaw = details.count;
if (typeof countRaw === "number" && Number.isFinite(countRaw)) {
return Math.max(0, Math.floor(countRaw));
}
const results = details.results;
if (Array.isArray(results)) {
return results.length;
}
return 0;
}
function isSuccessfulExecution(record: ToolExecutionRecord): boolean {
if (record.isError) return false;
if (hasToolError(record.details)) return false;
return true;
}
export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUsage {
const usage: WebToolUsage = {
searchCalls: 0,
searchSuccess: 0,
searchSuccessWithResults: 0,
fetchCalls: 0,
fetchSuccess: 0,
};
for (const record of records) {
const toolName = record.toolName.trim().toLowerCase();
if (toolName === "web_search") {
usage.searchCalls += 1;
if (isSuccessfulExecution(record)) {
usage.searchSuccess += 1;
if (getSearchResultCount(record.details) > 0) {
usage.searchSuccessWithResults += 1;
}
}
continue;
}
if (toolName === "web_fetch") {
usage.fetchCalls += 1;
if (isSuccessfulExecution(record)) {
usage.fetchSuccess += 1;
}
}
}
return usage;
}
export function shouldEnforceWebFetchAfterSearch(params: {
usage: WebToolUsage;
webSearchAvailable: boolean;
webFetchAvailable: boolean;
}): boolean {
const { usage, webSearchAvailable, webFetchAvailable } = params;
if (!webSearchAvailable || !webFetchAvailable) return false;
if (usage.searchSuccessWithResults <= 0) return false;
if (usage.fetchSuccess > 0) return false;
return true;
}