Merge pull request #218 from multica-ai/codex/web-fetch-evidence-coverage

fix(agent): enforce web search fetch evidence coverage
This commit is contained in:
Jiayuan Zhang 2026-02-17 02:45:12 +08:00 committed by GitHub
commit 39fde8e4b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 790 additions and 8 deletions

View file

@ -43,6 +43,13 @@ import {
type SystemPromptMode,
} from "./system-prompt/index.js";
import type { AuthProfileFailureReason } from "./auth-profiles/index.js";
import {
analyzeCrossTurnWebFetchNeed,
resolveWebFetchRequirementFromPrompt,
shouldEnforceWebFetchAfterSearch,
summarizeWebToolUsage,
type ToolExecutionRecord,
} from "./web-tools-policy.js";
import {
sanitizeToolCallInputs,
sanitizeToolUseResultPairing,
@ -128,6 +135,56 @@ function formatRunLogToolSummary(tool: string, details: Record<string, unknown>
}
}
function buildWebSearchFetchEnforcementPrompt(params: {
requiredMinFetchSuccess: number;
fetchSuccess: number;
needsFollowupForLatestSearch: boolean;
}): { prompt: string; additionalFetchNeeded: number } {
const additionalFetchNeeded = Math.max(
1,
params.requiredMinFetchSuccess - params.fetchSuccess,
params.needsFollowupForLatestSearch ? 1 : 0,
);
const lines = [
"You used web_search, but web evidence coverage for this turn is still incomplete.",
"Search snippets are incomplete previews and are not sufficient evidence for detailed claims.",
];
if (params.requiredMinFetchSuccess > 1) {
lines.push(
`This task currently requires at least ${params.requiredMinFetchSuccess} successful web_fetch calls.`,
);
}
if (params.needsFollowupForLatestSearch) {
lines.push(
"You performed another successful web_search after your last successful web_fetch. " +
"You must fetch URLs from the latest search results before finalizing.",
);
}
lines.push(
"Before finalizing your answer, you MUST:",
"1) Pick the 1-3 most relevant URLs from the latest successful web_search results.",
`2) Complete at least ${additionalFetchNeeded} additional successful web_fetch call(s).`,
"3) Revise your answer based on fetched page content.",
"If all additional fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.",
);
return { prompt: lines.join("\n"), additionalFetchNeeded };
}
const CROSS_TURN_WEB_FETCH_ENFORCEMENT_PROMPT = [
"You are about to finalize a web-dependent answer, but no successful web_fetch happened in this turn.",
"Do not rely only on snippets or prior-turn memory for fresh factual claims.",
"Before finalizing your answer, you MUST:",
"1) If relevant URLs are already available in this conversation, call web_fetch on 1-3 of them.",
"2) If no URLs are available, call web_search to find candidates, then web_fetch on 1-3 relevant URLs.",
"3) Revise your answer using fetched page content as primary evidence.",
"If all fetch attempts fail, explicitly report that limitation and avoid specific claims not backed by fetched content.",
].join("\n");
export class Agent {
private readonly agent: PiAgentCore;
private output;
@ -142,6 +199,7 @@ export class Agent {
private readonly stderr: NodeJS.WritableStream;
private readonly runLog: RunLog;
private readonly toolStartTimes = new Map<string, number>();
private currentRunToolExecutions: ToolExecutionRecord[] = [];
private initialized = false;
// Context window settings (for pre-flight compaction)
@ -525,6 +583,7 @@ export class Agent {
this.currentUserSource = options?.source;
this._isRunning = true;
this._aborted = false;
this.currentRunToolExecutions = [];
const runStart = Date.now();
this.runLog.log("run_start", {
@ -553,6 +612,7 @@ export class Agent {
// Loop to exhaust all candidate profiles on rotatable errors
while (true) {
const toolExecutionStartIndex = this.currentRunToolExecutions.length;
try {
const llmStart = Date.now();
this.runLog.log("llm_call", {
@ -562,6 +622,14 @@ export class Agent {
messages: this.agent.state.messages.length,
});
await this.agent.prompt(prompt);
await this.enforceWebFetchAfterSearchIfNeeded({
toolExecutionStartIndex,
userPrompt: prompt,
});
await this.enforceCrossTurnWebFetchIfNeeded({
toolExecutionStartIndex,
userPrompt: prompt,
});
this.runLog.log("llm_result", {
duration_ms: Date.now() - llmStart,
});
@ -693,6 +761,7 @@ export class Agent {
this._lastEventSavedAssistant = undefined;
this.currentUserDisplayPrompt = undefined;
this.currentUserSource = undefined;
this.currentRunToolExecutions = [];
this.runLog.flush().catch(() => {});
}
}
@ -782,6 +851,125 @@ export class Agent {
this.session.setApiKey(this.currentApiKey);
}
private async enforceWebFetchAfterSearchIfNeeded(params: {
toolExecutionStartIndex: number;
userPrompt: string;
}): Promise<void> {
if (this._internalRun) return;
const activeTools = new Set(
(this.agent.state.tools ?? []).map((tool) => tool.name.toLowerCase()),
);
const webSearchAvailable = activeTools.has("web_search");
const webFetchAvailable = activeTools.has("web_fetch");
const currentTurnExecutions = this.currentRunToolExecutions.slice(
params.toolExecutionStartIndex,
);
const usage = summarizeWebToolUsage(currentTurnExecutions);
const requirement = resolveWebFetchRequirementFromPrompt(params.userPrompt);
if (
!shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable,
webFetchAvailable,
requiredMinFetchSuccess: requirement.requiredMinFetchSuccess,
})
) {
return;
}
const { prompt, additionalFetchNeeded } = buildWebSearchFetchEnforcementPrompt({
requiredMinFetchSuccess: requirement.requiredMinFetchSuccess,
fetchSuccess: usage.fetchSuccess,
needsFollowupForLatestSearch: usage.searchNeedsFollowupFetch,
});
this.runLog.log("web_search_fetch_guard", {
search_calls: usage.searchCalls,
search_success: usage.searchSuccess,
search_with_results: usage.searchSuccessWithResults,
search_needs_followup_fetch: usage.searchNeedsFollowupFetch,
fetch_calls: usage.fetchCalls,
fetch_success: usage.fetchSuccess,
required_min_fetch_success: requirement.requiredMinFetchSuccess,
prompt_suggests_research_depth: requirement.promptSuggestsResearchDepth,
prompt_multi_source_cue: requirement.multiSourceCue,
prompt_explicit_min_fetch: requirement.explicitMinFetchFromPrompt,
});
try {
await this.agent.prompt(prompt);
this.runLog.log("web_search_fetch_guard_applied", {
search_with_results: usage.searchSuccessWithResults,
search_needs_followup_fetch: usage.searchNeedsFollowupFetch,
required_min_fetch_success: requirement.requiredMinFetchSuccess,
additional_fetch_needed: additionalFetchNeeded,
});
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
this.runLog.log("web_search_fetch_guard_failed", {
error: message.slice(0, 200),
});
if (this.debug) {
this.stderr.write(`[web-guard] Failed to enforce search->fetch: ${message}\n`);
}
}
}
private async enforceCrossTurnWebFetchIfNeeded(params: {
toolExecutionStartIndex: number;
userPrompt: string;
}): Promise<void> {
if (this._internalRun) return;
const activeTools = new Set(
(this.agent.state.tools ?? []).map((tool) => tool.name.toLowerCase()),
);
const webFetchAvailable = activeTools.has("web_fetch");
const currentTurnExecutions = this.currentRunToolExecutions.slice(
params.toolExecutionStartIndex,
);
const usage = summarizeWebToolUsage(currentTurnExecutions);
const analysis = analyzeCrossTurnWebFetchNeed({
usage,
webFetchAvailable,
userPrompt: params.userPrompt,
assistantText: this.output.state.lastAssistantText ?? "",
});
if (!analysis.shouldEnforce) return;
this.runLog.log("web_cross_turn_fetch_guard", {
fetch_calls: usage.fetchCalls,
fetch_success: usage.fetchSuccess,
explicit_fetch_request: analysis.explicitFetchRequest,
user_provides_url: analysis.userProvidesUrl,
freshness_cue: analysis.freshnessCue,
web_cue: analysis.webCue,
user_needs_fresh_web_evidence: analysis.userNeedsFreshWebEvidence,
user_blocks_web_fetch: analysis.userBlocksWebFetch,
assistant_web_claim_signal: analysis.assistantHasWebClaimSignal,
});
try {
await this.agent.prompt(CROSS_TURN_WEB_FETCH_ENFORCEMENT_PROMPT);
this.runLog.log("web_cross_turn_fetch_guard_applied", {
explicit_fetch_request: analysis.explicitFetchRequest,
user_provides_url: analysis.userProvidesUrl,
});
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
this.runLog.log("web_cross_turn_fetch_guard_failed", {
error: message.slice(0, 200),
});
if (this.debug) {
this.stderr.write(`[web-cross-turn-guard] Failed to enforce fetch: ${message}\n`);
}
}
}
private handleRunLogEvent(event: AgentEvent) {
if (event.type === "tool_execution_start") {
const toolName = (event as any).toolName ?? "unknown";
@ -801,11 +989,18 @@ export class Agent {
const resultText = extractRunLogResultText(result);
const resultChars = resultText?.length ?? 0;
const details = extractRunLogResultDetails(result);
const isError = Boolean((event as any).isError ?? false);
this.currentRunToolExecutions.push({
toolName,
isError,
details,
});
const toolEndData: Record<string, unknown> = {
tool: toolName,
duration_ms,
is_error: (event as any).isError ?? false,
is_error: isError,
result_chars: resultChars,
result_summary: formatRunLogToolSummary(toolName, details),
};

View file

@ -181,7 +181,7 @@ describe("buildConditionalToolSections", () => {
const result = buildConditionalToolSections(["web_search"], "full");
const text = result.join("\n");
expect(text).toContain("## Web Access");
expect(text).toContain("Web usage is conditional, not mandatory");
expect(text).toContain("you MUST call web_fetch");
});
it("adds dynamic evidence decision guidance when data tool is present", () => {

View file

@ -364,14 +364,12 @@ export function buildConditionalToolSections(
"## Web Access",
"You have web access. Use it when the user asks about current events, needs up-to-date information, or requests content from URLs.",
"Prefer web_search for discovery and web_fetch for specific URLs.",
"Web usage is conditional, not mandatory: call web tools when they materially improve evidence quality.",
"When web_search is used, treat snippets as incomplete previews rather than final evidence.",
"",
"### Search-then-Fetch",
"After web_search, evaluate whether the snippets contain enough detail to answer accurately.",
"If not, use web_fetch on the 1-3 most relevant URLs to get full content before answering.",
"Always fetch when the user asks for detailed explanations, comparisons, or analysis;",
"when snippets are vague or contradictory; or when the question requires specific data points.",
"Skip fetch when the answer is a simple fact clearly stated in the snippet or the user only wants a quick overview.",
"After every successful web_search, you MUST call web_fetch on 1-3 relevant URLs before detailed reasoning or factual claims.",
"Use fetched page content (not snippets) as the primary evidence for analysis and synthesis.",
"If all fetch attempts fail, explicitly report that limitation and avoid specific claims derived only from snippets.",
"",
);
}

View file

@ -0,0 +1,327 @@
import { describe, expect, it } from "vitest";
import {
analyzeCrossTurnWebFetchNeed,
resolveWebFetchRequirementFromPrompt,
shouldEnforceWebFetchAfterSearch,
summarizeWebToolUsage,
type ToolExecutionRecord,
} from "./web-tools-policy.js";
function buildRecord(params: {
toolName: string;
isError?: boolean;
details?: Record<string, unknown> | null;
}): ToolExecutionRecord {
return {
toolName: params.toolName,
isError: params.isError ?? false,
details: params.details ?? null,
};
}
describe("web-tools-policy", () => {
describe("summarizeWebToolUsage", () => {
it("counts successful web_search calls with results", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 3, results: [{}, {}, {}] },
}),
]);
expect(usage.searchCalls).toBe(1);
expect(usage.searchSuccess).toBe(1);
expect(usage.searchSuccessWithResults).toBe(1);
expect(usage.searchNeedsFollowupFetch).toBe(true);
expect(usage.fetchCalls).toBe(0);
expect(usage.fetchSuccess).toBe(0);
});
it("does not count tool-level error payload as success", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { error: true, code: "search_failed" },
}),
]);
expect(usage.searchCalls).toBe(1);
expect(usage.searchSuccess).toBe(0);
expect(usage.searchSuccessWithResults).toBe(0);
expect(usage.searchNeedsFollowupFetch).toBe(false);
});
it("marks latest search as covered when successful fetch follows", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 1, results: [{}] },
}),
buildRecord({
toolName: "web_fetch",
details: { status: 200, length: 1024 },
}),
]);
expect(usage.searchNeedsFollowupFetch).toBe(false);
});
});
describe("shouldEnforceWebFetchAfterSearch", () => {
it("enforces when search has results but fetch never succeeded", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 2, results: [{}, {}] },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(true);
});
it("does not enforce after a successful web_fetch", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 2, results: [{}, {}] },
}),
buildRecord({
toolName: "web_fetch",
details: { status: 200, length: 1024 },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(false);
});
it("enforces when the latest successful search has no follow-up fetch", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 2, results: [{}, {}] },
}),
buildRecord({
toolName: "web_fetch",
details: { status: 200, length: 1200 },
}),
buildRecord({
toolName: "web_search",
details: { count: 3, results: [{}, {}, {}] },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(true);
});
it("enforces when prompt requires deeper evidence coverage", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 6, results: [{}, {}, {}] },
}),
buildRecord({
toolName: "web_fetch",
details: { status: 200, length: 2200 },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
requiredMinFetchSuccess: 2,
}),
).toBe(true);
});
it("does not enforce when search returns no results", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 0, results: [] },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(false);
});
it("does not enforce when web_fetch is unavailable", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 1, results: [{}] },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: false,
}),
).toBe(false);
});
it("enforces when fetch was attempted but failed", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_search",
details: { count: 1, results: [{}] },
}),
buildRecord({
toolName: "web_fetch",
details: { error: true, code: "fetch_failed" },
}),
]);
expect(
shouldEnforceWebFetchAfterSearch({
usage,
webSearchAvailable: true,
webFetchAvailable: true,
}),
).toBe(true);
});
});
describe("analyzeCrossTurnWebFetchNeed", () => {
it("enforces when user explicitly asks to refetch page content", () => {
const usage = summarizeWebToolUsage([]);
const analysis = analyzeCrossTurnWebFetchNeed({
usage,
webFetchAvailable: true,
userPrompt: "Please refetch the page body this turn and verify with sources.",
assistantText: "Here is a quick summary.",
});
expect(analysis.shouldEnforce).toBe(true);
expect(analysis.explicitFetchRequest).toBe(true);
});
it("enforces for freshness requests when assistant makes web-style claims", () => {
const usage = summarizeWebToolUsage([]);
const analysis = analyzeCrossTurnWebFetchNeed({
usage,
webFetchAvailable: true,
userPrompt: "Give me the latest web news about OpenAI with sources.",
assistantText: "According to Reuters, OpenAI announced a new release.",
});
expect(analysis.shouldEnforce).toBe(true);
expect(analysis.freshnessCue).toBe(true);
expect(analysis.webCue).toBe(true);
expect(analysis.assistantHasWebClaimSignal).toBe(true);
});
it("does not enforce when a fetch was already attempted in this turn", () => {
const usage = summarizeWebToolUsage([
buildRecord({
toolName: "web_fetch",
details: { error: true, code: "fetch_failed" },
}),
]);
const analysis = analyzeCrossTurnWebFetchNeed({
usage,
webFetchAvailable: true,
userPrompt: "Please verify with the latest web sources.",
assistantText: "According to Reuters, ...",
});
expect(analysis.shouldEnforce).toBe(false);
});
it("does not enforce when user explicitly blocks web fetch", () => {
const usage = summarizeWebToolUsage([]);
const analysis = analyzeCrossTurnWebFetchNeed({
usage,
webFetchAvailable: true,
userPrompt: "Do not browse the web, only use snippets.",
assistantText: "According to Reuters, ...",
});
expect(analysis.shouldEnforce).toBe(false);
expect(analysis.userBlocksWebFetch).toBe(true);
});
it("enforces when user provides a direct URL but no fetch happened", () => {
const usage = summarizeWebToolUsage([]);
const analysis = analyzeCrossTurnWebFetchNeed({
usage,
webFetchAvailable: true,
userPrompt: "Summarize https://example.com/article and include key takeaways.",
assistantText: "I can summarize it for you.",
});
expect(analysis.shouldEnforce).toBe(true);
expect(analysis.userProvidesUrl).toBe(true);
});
it("does not enforce for non-web freshness requests", () => {
const usage = summarizeWebToolUsage([]);
const analysis = analyzeCrossTurnWebFetchNeed({
usage,
webFetchAvailable: true,
userPrompt: "What is the latest version in this repository?",
assistantText: "The latest version is 1.2.3.",
});
expect(analysis.shouldEnforce).toBe(false);
expect(analysis.freshnessCue).toBe(true);
expect(analysis.webCue).toBe(false);
});
});
describe("resolveWebFetchRequirementFromPrompt", () => {
it("requires deeper fetch coverage for research-style prompts", () => {
const result = resolveWebFetchRequirementFromPrompt(
"帮我调研一下 APPLE 最近的产品信息,并做分析。",
);
expect(result.requiredMinFetchSuccess).toBe(2);
expect(result.promptSuggestsResearchDepth).toBe(true);
});
it("uses explicit minimum source count when present", () => {
const result = resolveWebFetchRequirementFromPrompt(
"Please use at least 3 sources and summarize the latest updates.",
);
expect(result.requiredMinFetchSuccess).toBe(3);
expect(result.explicitMinFetchFromPrompt).toBe(3);
});
it("falls back to 1 for simple prompts", () => {
const result = resolveWebFetchRequirementFromPrompt(
"What is OpenAI's CEO?",
);
expect(result.requiredMinFetchSuccess).toBe(1);
expect(result.promptSuggestsResearchDepth).toBe(false);
});
});
});

View file

@ -0,0 +1,262 @@
export type ToolExecutionRecord = {
toolName: string;
isError: boolean;
details: Record<string, unknown> | null;
};
export type WebToolUsage = {
searchCalls: number;
searchSuccess: number;
searchSuccessWithResults: number;
/** True when the latest successful search (with results) has no later successful fetch. */
searchNeedsFollowupFetch: boolean;
fetchCalls: number;
fetchSuccess: number;
};
export type WebFetchRequirement = {
requiredMinFetchSuccess: number;
promptSuggestsResearchDepth: boolean;
multiSourceCue: boolean;
explicitMinFetchFromPrompt: number | null;
};
export type CrossTurnWebFetchGuardAnalysis = {
shouldEnforce: boolean;
explicitFetchRequest: boolean;
userProvidesUrl: boolean;
freshnessCue: boolean;
webCue: boolean;
userNeedsFreshWebEvidence: boolean;
userBlocksWebFetch: boolean;
assistantHasWebClaimSignal: boolean;
};
const URL_PATTERN = /https?:\/\/[^\s)]+/i;
const USER_EXPLICIT_FETCH_PATTERNS: RegExp[] = [
/\b(re[-\s]?fetch|fetch (again|fresh)|verify with sources?|cite sources?|provide (sources?|links?))\b/i,
/\b(revisit|revalidate|double-check)\b.*\b(source|link|url|web|website)\b/i,
/(?:\u672c\u8f6e|\u8fd9\u4e00\u8f6e).*(?:\u91cd\u65b0|\u518d\u6b21).*(?:\u6293\u53d6|\u83b7\u53d6|\u62c9\u53d6)/,
/(?:\u91cd\u65b0|\u518d\u6b21).*(?:\u6293\u53d6|\u83b7\u53d6).*(?:\u7f51\u9875|\u6b63\u6587|\u539f\u6587|\u94fe\u63a5)/,
/(?:\u7ed9\u51fa|\u63d0\u4f9b).*(?:\u6765\u6e90|\u94fe\u63a5|\u5f15\u7528)/,
/(?:\u6838\u5b9e|\u67e5\u8bc1|\u9a8c\u8bc1).*(?:\u6765\u6e90|\u7f51\u9875)/,
];
const USER_FRESHNESS_PATTERNS: RegExp[] = [
/\b(latest|most recent|recent|today|current|up-to-date|newest|breaking)\b/i,
/\b(news|update|updates)\b/i,
/(?:\u6700\u65b0|\u6700\u8fd1|\u4eca\u5929|\u5f53\u524d|\u8fd1\u671f|\u52a8\u6001|\u65b0\u95fb|\u8d44\u8baf)/,
];
const USER_WEB_CONTEXT_PATTERNS: RegExp[] = [
/\b(web|internet|online|url|urls|link|links|website|article|source|sources|news)\b/i,
/(?:\u7f51\u9875|\u7f51\u7ad9|\u7f51\u7edc|\u4e92\u8054\u7f51|\u94fe\u63a5|\u6765\u6e90|\u65b0\u95fb|\u62a5\u9053|\u6587\u7ae0)/,
];
const USER_RESEARCH_DEPTH_PATTERNS: RegExp[] = [
/\b(research|investigate|analysis|analyze|compare|comparison|deep[-\s]?dive|survey|report|review)\b/i,
/(?:\u8c03\u7814|\u7814\u7a76|\u5206\u6790|\u6df1\u5ea6|\u5bf9\u6bd4|\u5bf9\u7167|\u6c47\u603b|\u76d8\u70b9|\u62a5\u544a|\u8bc4\u4f30|\u8bc4\u6d4b)/,
];
const USER_MULTI_SOURCE_PATTERNS: RegExp[] = [
/\b(multiple|multi-source|across sources|different sources)\b/i,
/(?:\u591a\u6765\u6e90|\u591a\u4e2a\u6765\u6e90|\u4e0d\u540c\u6765\u6e90|\u591a\u7f51\u7ad9)/,
/(?:\u81f3\u5c11|\u4e0d\u5c11\u4e8e|\u6700\u5c11)\s*\d+\s*(?:\u4e2a|\u6761)?(?:\u6765\u6e90|\u94fe\u63a5|\u7f51\u5740|\u7f51\u9875|\u6587\u7ae0)/,
];
const USER_WEB_BLOCK_PATTERNS: RegExp[] = [
/\b(do not|don't|no|without)\s+(browse|web|internet|web_search|web_fetch|fetch)\b/i,
/\bonly\b.*\b(snippet|snippets)\b/i,
/(?:\u4e0d\u8981|\u4e0d\u9700)\s*(?:\u8054\u7f51|\u6293\u53d6|\u641c\u7d22|\u83b7\u53d6\u7f51\u9875|web_fetch|web_search)/,
/(?:\u4ec5|\u53ea).*(?:snippet|\u6458\u8981)/i,
];
const ASSISTANT_WEB_CLAIM_PATTERNS: RegExp[] = [
/\b(according to|reported by|as reported|source|sources|citation|cited|press release)\b/i,
/\b(reuters|bloomberg|associated press|ap news|financial times|wall street journal)\b/i,
/(?:\u636e[^。\n]{0,24}(?:\u62a5\u9053|\u663e\u793a|\u79f0)|\u6765\u6e90|\u62a5\u9053\u79f0|\u516c\u544a|\u53d1\u5e03|\u5ba3\u5e03)/,
];
function hasAnyPattern(text: string, patterns: RegExp[]): boolean {
if (!text.trim()) return false;
return patterns.some((pattern) => pattern.test(text));
}
function normalizeMinFetchSuccess(raw: number): number {
if (!Number.isFinite(raw)) return 1;
return Math.max(1, Math.min(4, Math.floor(raw)));
}
function extractExplicitMinFetchFromPrompt(prompt: string): number | null {
const patterns: RegExp[] = [
/\b(?:at least|minimum of|no less than)\s*(\d+)\s*(?:sources?|links?|urls?|articles?|pages?)\b/i,
/(?:\u81f3\u5c11|\u4e0d\u5c11\u4e8e|\u6700\u5c11)\s*(\d+)\s*(?:\u4e2a|\u6761)?(?:\u6765\u6e90|\u94fe\u63a5|\u7f51\u5740|\u7f51\u9875|\u6587\u7ae0)/,
];
for (const pattern of patterns) {
const match = prompt.match(pattern);
if (!match) continue;
const parsed = Number(match[1]);
if (!Number.isFinite(parsed)) continue;
return normalizeMinFetchSuccess(parsed);
}
return null;
}
function hasToolError(details: Record<string, unknown> | null): boolean {
return details?.error === true;
}
function getSearchResultCount(details: Record<string, unknown> | null): number {
if (!details) return 0;
const countRaw = details.count;
if (typeof countRaw === "number" && Number.isFinite(countRaw)) {
return Math.max(0, Math.floor(countRaw));
}
const results = details.results;
if (Array.isArray(results)) {
return results.length;
}
return 0;
}
function isSuccessfulExecution(record: ToolExecutionRecord): boolean {
if (record.isError) return false;
if (hasToolError(record.details)) return false;
return true;
}
export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUsage {
const usage: WebToolUsage = {
searchCalls: 0,
searchSuccess: 0,
searchSuccessWithResults: 0,
searchNeedsFollowupFetch: false,
fetchCalls: 0,
fetchSuccess: 0,
};
let pendingSearchWithResults = false;
for (const record of records) {
const toolName = record.toolName.trim().toLowerCase();
if (toolName === "web_search") {
usage.searchCalls += 1;
if (isSuccessfulExecution(record)) {
usage.searchSuccess += 1;
if (getSearchResultCount(record.details) > 0) {
usage.searchSuccessWithResults += 1;
pendingSearchWithResults = true;
}
}
continue;
}
if (toolName === "web_fetch") {
usage.fetchCalls += 1;
if (isSuccessfulExecution(record)) {
usage.fetchSuccess += 1;
pendingSearchWithResults = false;
}
}
}
usage.searchNeedsFollowupFetch = pendingSearchWithResults;
return usage;
}
export function shouldEnforceWebFetchAfterSearch(params: {
usage: WebToolUsage;
webSearchAvailable: boolean;
webFetchAvailable: boolean;
requiredMinFetchSuccess?: number;
}): boolean {
const {
usage,
webSearchAvailable,
webFetchAvailable,
requiredMinFetchSuccess = 1,
} = params;
if (!webSearchAvailable || !webFetchAvailable) return false;
if (usage.searchSuccessWithResults <= 0) return false;
if (usage.fetchSuccess <= 0) return true;
if (usage.searchNeedsFollowupFetch) return true;
if (usage.fetchSuccess < normalizeMinFetchSuccess(requiredMinFetchSuccess)) return true;
return false;
}
export function resolveWebFetchRequirementFromPrompt(prompt: string): WebFetchRequirement {
const normalizedPrompt = prompt ?? "";
const promptSuggestsResearchDepth = hasAnyPattern(
normalizedPrompt,
USER_RESEARCH_DEPTH_PATTERNS,
);
const multiSourceCue = hasAnyPattern(normalizedPrompt, USER_MULTI_SOURCE_PATTERNS);
const explicitMinFetchFromPrompt = extractExplicitMinFetchFromPrompt(normalizedPrompt);
let requiredMinFetchSuccess = 1;
if (promptSuggestsResearchDepth) requiredMinFetchSuccess = 2;
if (multiSourceCue) requiredMinFetchSuccess = Math.max(requiredMinFetchSuccess, 2);
if (explicitMinFetchFromPrompt !== null) {
requiredMinFetchSuccess = Math.max(
requiredMinFetchSuccess,
explicitMinFetchFromPrompt,
);
}
return {
requiredMinFetchSuccess: normalizeMinFetchSuccess(requiredMinFetchSuccess),
promptSuggestsResearchDepth,
multiSourceCue,
explicitMinFetchFromPrompt,
};
}
export function analyzeCrossTurnWebFetchNeed(params: {
usage: WebToolUsage;
webFetchAvailable: boolean;
userPrompt: string;
assistantText: string;
}): CrossTurnWebFetchGuardAnalysis {
const userPrompt = params.userPrompt ?? "";
const assistantText = params.assistantText ?? "";
const explicitFetchRequest = hasAnyPattern(
userPrompt,
USER_EXPLICIT_FETCH_PATTERNS,
);
const userProvidesUrl = URL_PATTERN.test(userPrompt);
const freshnessCue = hasAnyPattern(userPrompt, USER_FRESHNESS_PATTERNS);
const webCue = userProvidesUrl || hasAnyPattern(userPrompt, USER_WEB_CONTEXT_PATTERNS);
const userNeedsFreshWebEvidence =
explicitFetchRequest || userProvidesUrl || (freshnessCue && webCue);
const userBlocksWebFetch = hasAnyPattern(userPrompt, USER_WEB_BLOCK_PATTERNS);
const assistantHasWebClaimSignal =
URL_PATTERN.test(assistantText) ||
hasAnyPattern(assistantText, ASSISTANT_WEB_CLAIM_PATTERNS);
const shouldEnforce =
params.webFetchAvailable &&
params.usage.fetchCalls === 0 &&
params.usage.fetchSuccess === 0 &&
!userBlocksWebFetch &&
userNeedsFreshWebEvidence &&
(explicitFetchRequest || userProvidesUrl || assistantHasWebClaimSignal);
return {
shouldEnforce,
explicitFetchRequest,
userProvidesUrl,
freshnessCue,
webCue,
userNeedsFreshWebEvidence,
userBlocksWebFetch,
assistantHasWebClaimSignal,
};
}