fix: improve fallback error handling and short-circuit on total failure

- executeFallbackForUrl() now throws on failure instead of returning
  error strings, giving callers proper error semantics to distinguish
  fetch failures from successful content.
- executeFallback() catches per-URL errors, tracks them separately from
  successful results, and short-circuits with an error result when all
  URLs fail — avoiding a wasted LLM call on empty content.
- When some URLs succeed and others fail, error notes are appended to
  the fallback prompt so the LLM is aware of partial fetch failures.
This commit is contained in:
Bryan Morgan
2026-03-12 16:39:28 -04:00
parent f12556de3c
commit 57d999f10c

View File

@@ -268,64 +268,59 @@ class WebFetchToolInvocation extends BaseToolInvocation<
const url = convertGithubUrlToRaw(urlStr);
if (this.isBlockedHost(url)) {
debugLogger.warn(`[WebFetchTool] Blocked access to host: ${url}`);
return `Error fetching ${url}: Access to blocked or private host is not allowed.`;
throw new Error(
`Access to blocked or private host ${url} is not allowed.`,
);
}
try {
const response = await retryWithBackoff(
async () => {
const res = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS, {
signal,
headers: {
'User-Agent': USER_AGENT,
},
});
if (!res.ok) {
const error = new Error(
`Request failed with status code ${res.status} ${res.statusText}`,
);
(error as ErrorWithStatus).status = res.status;
throw error;
}
return res;
},
{
retryFetchErrors: this.config.getRetryFetchErrors(),
onRetry: (attempt, error, delayMs) =>
this.handleRetry(attempt, error, delayMs),
const response = await retryWithBackoff(
async () => {
const res = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS, {
signal,
},
);
const bodyBuffer = await this.readResponseWithLimit(
response,
MAX_EXPERIMENTAL_FETCH_SIZE,
);
const rawContent = bodyBuffer.toString('utf8');
const contentType = response.headers.get('content-type') || '';
let textContent: string;
// Only use html-to-text if content type is HTML, or if no content type is provided (assume HTML)
if (
contentType.toLowerCase().includes('text/html') ||
contentType === ''
) {
textContent = convert(rawContent, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { ignoreHref: true } },
{ selector: 'img', format: 'skip' },
],
headers: {
'User-Agent': USER_AGENT,
},
});
} else {
// For other content types (text/plain, application/json, etc.), use raw text
textContent = rawContent;
}
if (!res.ok) {
const error = new Error(
`Request failed with status code ${res.status} ${res.statusText}`,
);
(error as ErrorWithStatus).status = res.status;
throw error;
}
return res;
},
{
retryFetchErrors: this.config.getRetryFetchErrors(),
onRetry: (attempt, error, delayMs) =>
this.handleRetry(attempt, error, delayMs),
signal,
},
);
return truncateString(textContent, contentBudget, TRUNCATION_WARNING);
} catch (e) {
return `Error fetching ${url}: ${getErrorMessage(e)}`;
const bodyBuffer = await this.readResponseWithLimit(
response,
MAX_EXPERIMENTAL_FETCH_SIZE,
);
const rawContent = bodyBuffer.toString('utf8');
const contentType = response.headers.get('content-type') || '';
let textContent: string;
// Only use html-to-text if content type is HTML, or if no content type is provided (assume HTML)
if (contentType.toLowerCase().includes('text/html') || contentType === '') {
textContent = convert(rawContent, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { ignoreHref: true } },
{ selector: 'img', format: 'skip' },
],
});
} else {
// For other content types (text/plain, application/json, etc.), use raw text
textContent = rawContent;
}
return truncateString(textContent, contentBudget, TRUNCATION_WARNING);
}
private filterAndValidateUrls(urls: string[]): {
@@ -366,18 +361,51 @@ class WebFetchToolInvocation extends BaseToolInvocation<
const contentBudget = Math.floor(
MAX_CONTENT_LENGTH / (uniqueUrls.length || 1),
);
const results: string[] = [];
const successes: Array<{ url: string; content: string }> = [];
const errors: Array<{ url: string; error: string }> = [];
for (const url of uniqueUrls) {
results.push(
await this.executeFallbackForUrl(url, signal, contentBudget),
);
try {
const content = await this.executeFallbackForUrl(
url,
signal,
contentBudget,
);
successes.push({ url, content });
} catch (e) {
const msg = getErrorMessage(e);
debugLogger.warn(
`[WebFetchTool] Failed to fetch fallback for ${url}: ${msg}`,
);
errors.push({ url, error: msg });
}
}
const aggregatedContent = results
.map((content, i) => `URL: ${uniqueUrls[i]}\nContent:\n${content}`)
// Short-circuit: if every URL failed, return an error immediately
// without wasting an LLM call on empty content.
if (successes.length === 0) {
const errorSummary = errors.map((e) => `${e.url}: ${e.error}`).join('\n');
const errorMessage = `All fallback URLs failed:\n${errorSummary}`;
debugLogger.error(`[WebFetchTool] ${errorMessage}`);
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
error: {
message: errorMessage,
type: ToolErrorType.WEB_FETCH_FALLBACK_FAILED,
},
};
}
const aggregatedContent = successes
.map((s) => `URL: ${s.url}\nContent:\n${s.content}`)
.join('\n\n---\n\n');
const errorNotes =
errors.length > 0
? `\n\nNote: The following URL(s) could not be fetched:\n${errors.map((e) => `- ${e.url}: ${e.error}`).join('\n')}`
: '';
try {
const geminiClient = this.config.getGeminiClient();
const fallbackPrompt = `The user requested the following: "${this.params.prompt}".
@@ -387,7 +415,7 @@ I was unable to access the URL(s) directly using the primary fetch tool. Instead
---
${aggregatedContent}
---
`;
${errorNotes}`;
const result = await geminiClient.generateContent(
{ model: 'web-fetch-fallback' },
[{ role: 'user', parts: [{ text: fallbackPrompt }] }],