feat(core): implement Stage 1 improvements for webfetch tool (#21313)

This commit is contained in:
Aishanee Shah
2026-03-12 16:13:00 -04:00
committed by GitHub
parent 4863816b81
commit 663d9c0537
3 changed files with 416 additions and 163 deletions

View File

@@ -2129,12 +2129,17 @@ export class RecoveryAttemptEvent extends BaseAgentEvent {
export const EVENT_WEB_FETCH_FALLBACK_ATTEMPT =
'gemini_cli.web_fetch_fallback_attempt';
export type WebFetchFallbackReason =
| 'private_ip'
| 'primary_failed'
| 'private_ip_skipped';
export class WebFetchFallbackAttemptEvent implements BaseTelemetryEvent {
'event.name': 'web_fetch_fallback_attempt';
'event.timestamp': string;
reason: 'private_ip' | 'primary_failed';
reason: WebFetchFallbackReason;
constructor(reason: 'private_ip' | 'primary_failed') {
constructor(reason: WebFetchFallbackReason) {
this['event.name'] = 'web_fetch_fallback_attempt';
this['event.timestamp'] = new Date().toISOString();
this.reason = reason;

View File

@@ -9,6 +9,7 @@ import {
WebFetchTool,
parsePrompt,
convertGithubUrlToRaw,
normalizeUrl,
} from './web-fetch.js';
import type { Config } from '../config/config.js';
import { ApprovalMode } from '../policy/types.js';
@@ -43,7 +44,7 @@ vi.mock('html-to-text', () => ({
vi.mock('../telemetry/index.js', () => ({
logWebFetchFallbackAttempt: vi.fn(),
WebFetchFallbackAttemptEvent: vi.fn(),
WebFetchFallbackAttemptEvent: vi.fn((reason) => ({ reason })),
}));
vi.mock('../utils/fetch.js', async (importOriginal) => {
@@ -125,6 +126,35 @@ const mockFetch = (url: string, response: Partial<Response> | Error) =>
} as unknown as Response;
});
describe('normalizeUrl', () => {
it('should lowercase hostname', () => {
expect(normalizeUrl('https://EXAMPLE.com/Path')).toBe(
'https://example.com/Path',
);
});
it('should remove trailing slash except for root', () => {
expect(normalizeUrl('https://example.com/path/')).toBe(
'https://example.com/path',
);
expect(normalizeUrl('https://example.com/')).toBe('https://example.com/');
});
it('should remove default ports', () => {
expect(normalizeUrl('http://example.com:80/')).toBe('http://example.com/');
expect(normalizeUrl('https://example.com:443/')).toBe(
'https://example.com/',
);
expect(normalizeUrl('https://example.com:8443/')).toBe(
'https://example.com:8443/',
);
});
it('should handle invalid URLs gracefully', () => {
expect(normalizeUrl('not-a-url')).toBe('not-a-url');
});
});
describe('parsePrompt', () => {
it('should extract valid URLs separated by whitespace', () => {
const prompt = 'Go to https://example.com and http://google.com';
@@ -355,49 +385,164 @@ describe('WebFetchTool', () => {
// The 11th time should fail due to rate limit
const result = await invocation.execute(new AbortController().signal);
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR);
expect(result.error?.message).toContain('Rate limit exceeded for host');
expect(result.error?.message).toContain(
'All requested URLs were skipped',
);
});
it('should skip rate-limited URLs but fetch others', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
const tool = new WebFetchTool(mockConfig, bus);
const params = {
prompt: 'fetch https://ratelimit-multi.com and https://healthy.com',
};
const invocation = tool.build(params);
// Hit rate limit for one host
for (let i = 0; i < 10; i++) {
mockGenerateContent.mockResolvedValueOnce({
candidates: [{ content: { parts: [{ text: 'response' }] } }],
});
await tool
.build({ prompt: 'fetch https://ratelimit-multi.com' })
.execute(new AbortController().signal);
}
// 11th call - should be rate limited and not use a mock
await tool
.build({ prompt: 'fetch https://ratelimit-multi.com' })
.execute(new AbortController().signal);
mockGenerateContent.mockResolvedValueOnce({
candidates: [{ content: { parts: [{ text: 'healthy response' }] } }],
});
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toContain('healthy response');
expect(result.llmContent).toContain(
'[Warning] The following URLs were skipped:',
);
expect(result.llmContent).toContain(
'[Rate limit exceeded] https://ratelimit-multi.com/',
);
});
it('should skip private or local URLs but fetch others and log telemetry', async () => {
vi.mocked(fetchUtils.isPrivateIp).mockImplementation(
(url) => url === 'https://private.com/',
);
const tool = new WebFetchTool(mockConfig, bus);
const params = {
prompt:
'fetch https://private.com and https://healthy.com and http://localhost',
};
const invocation = tool.build(params);
mockGenerateContent.mockResolvedValueOnce({
candidates: [{ content: { parts: [{ text: 'healthy response' }] } }],
});
const result = await invocation.execute(new AbortController().signal);
expect(logWebFetchFallbackAttempt).toHaveBeenCalledTimes(2);
expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({ reason: 'private_ip_skipped' }),
);
expect(result.llmContent).toContain('healthy response');
expect(result.llmContent).toContain(
'[Warning] The following URLs were skipped:',
);
expect(result.llmContent).toContain(
'[Blocked Host] https://private.com/',
);
expect(result.llmContent).toContain('[Blocked Host] http://localhost');
});
it('should fallback to all public URLs if primary fails', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
// Primary fetch fails
mockGenerateContent.mockRejectedValueOnce(new Error('primary fail'));
// Mock fallback fetch for BOTH URLs
mockFetch('https://url1.com/', {
text: () => Promise.resolve('content 1'),
});
mockFetch('https://url2.com/', {
text: () => Promise.resolve('content 2'),
});
// Mock fallback LLM call
mockGenerateContent.mockResolvedValueOnce({
candidates: [
{ content: { parts: [{ text: 'fallback processed response' }] } },
],
});
const tool = new WebFetchTool(mockConfig, bus);
const params = {
prompt: 'fetch https://url1.com and https://url2.com/',
};
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toBe('fallback processed response');
expect(result.returnDisplay).toContain(
'2 URL(s) processed using fallback fetch',
);
});
it('should NOT include private URLs in fallback', async () => {
vi.mocked(fetchUtils.isPrivateIp).mockImplementation(
(url) => url === 'https://private.com/',
);
// Primary fetch fails
mockGenerateContent.mockRejectedValueOnce(new Error('primary fail'));
// Mock fallback fetch only for public URL
mockFetch('https://public.com/', {
text: () => Promise.resolve('public content'),
});
// Mock fallback LLM call
mockGenerateContent.mockResolvedValueOnce({
candidates: [{ content: { parts: [{ text: 'fallback response' }] } }],
});
const tool = new WebFetchTool(mockConfig, bus);
const params = {
prompt: 'fetch https://public.com/ and https://private.com',
};
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toBe('fallback response');
// Verify private URL was NOT fetched (mockFetch would throw if it was called for private.com)
});
it('should return WEB_FETCH_FALLBACK_FAILED on fallback fetch failure', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
mockFetch('https://private.ip/', new Error('fetch failed'));
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
mockGenerateContent.mockRejectedValue(new Error('primary fail'));
mockFetch('https://public.ip/', new Error('fallback fetch failed'));
const tool = new WebFetchTool(mockConfig, bus);
const params = { prompt: 'fetch https://private.ip' };
const params = { prompt: 'fetch https://public.ip' };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED);
});
it('should return WEB_FETCH_PROCESSING_ERROR on general processing failure', async () => {
it('should return WEB_FETCH_FALLBACK_FAILED on general processing failure (when fallback also fails)', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
mockGenerateContent.mockRejectedValue(new Error('API error'));
const tool = new WebFetchTool(mockConfig, bus);
const params = { prompt: 'fetch https://public.ip' };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR);
});
it('should log telemetry when falling back due to private IP', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
// Mock fetchWithTimeout to succeed so fallback proceeds
mockFetch('https://private.ip/', {
text: () => Promise.resolve('some content'),
});
mockGenerateContent.mockResolvedValue({
candidates: [{ content: { parts: [{ text: 'fallback response' }] } }],
});
const tool = new WebFetchTool(mockConfig, bus);
const params = { prompt: 'fetch https://private.ip' };
const invocation = tool.build(params);
await invocation.execute(new AbortController().signal);
expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith(
mockConfig,
expect.any(WebFetchFallbackAttemptEvent),
);
expect(WebFetchFallbackAttemptEvent).toHaveBeenCalledWith('private_ip');
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED);
});
it('should log telemetry when falling back due to primary fetch failure', async () => {
@@ -422,7 +567,7 @@ describe('WebFetchTool', () => {
expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith(
mockConfig,
expect.any(WebFetchFallbackAttemptEvent),
expect.objectContaining({ reason: 'primary_failed' }),
);
expect(WebFetchFallbackAttemptEvent).toHaveBeenCalledWith(
'primary_failed',
@@ -891,13 +1036,13 @@ describe('WebFetchTool', () => {
});
it('should throw error if stream exceeds limit', async () => {
const largeChunk = new Uint8Array(11 * 1024 * 1024);
const large_chunk = new Uint8Array(11 * 1024 * 1024);
mockFetch('https://example.com/large-stream', {
body: {
getReader: () => ({
read: vi
.fn()
.mockResolvedValueOnce({ done: false, value: largeChunk })
.mockResolvedValueOnce({ done: false, value: large_chunk })
.mockResolvedValueOnce({ done: true }),
releaseLock: vi.fn(),
cancel: vi.fn().mockResolvedValue(undefined),
@@ -934,5 +1079,20 @@ describe('WebFetchTool', () => {
expect(result.llmContent).toContain('Error: Invalid URL "not-a-url"');
expect(result.error?.type).toBe(ToolErrorType.INVALID_TOOL_PARAMS);
});
it('should block private IP (experimental)', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
const tool = new WebFetchTool(mockConfig, bus);
const invocation = tool['createInvocation'](
{ url: 'http://localhost' },
bus,
);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toContain(
'Error: Access to blocked or private host http://localhost/ is not allowed.',
);
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR);
});
});
});

View File

@@ -81,6 +81,31 @@ function checkRateLimit(url: string): {
}
}
/**
* Normalizes a URL by converting hostname to lowercase, removing trailing slashes,
* and removing default ports.
*/
export function normalizeUrl(urlStr: string): string {
try {
const url = new URL(urlStr);
url.hostname = url.hostname.toLowerCase();
// Remove trailing slash if present in pathname (except for root '/')
if (url.pathname.endsWith('/') && url.pathname.length > 1) {
url.pathname = url.pathname.slice(0, -1);
}
// Remove default ports
if (
(url.protocol === 'http:' && url.port === '80') ||
(url.protocol === 'https:' && url.port === '443')
) {
url.port = '';
}
return url.href;
} catch {
return urlStr;
}
}
/**
* Parses a prompt to extract valid URLs and identify malformed ones.
*/
@@ -146,6 +171,10 @@ interface GroundingChunkItem {
web?: GroundingChunkWeb;
}
function isGroundingChunkItem(item: unknown): item is GroundingChunkItem {
return typeof item === 'object' && item !== null;
}
interface GroundingSupportSegment {
startIndex: number;
endIndex: number;
@@ -157,6 +186,10 @@ interface GroundingSupportItem {
groundingChunkIndices?: number[];
}
function isGroundingSupportItem(item: unknown): item is GroundingSupportItem {
return typeof item === 'object' && item !== null;
}
/**
* Parameters for the WebFetch tool
*/
@@ -214,13 +247,29 @@ class WebFetchToolInvocation extends BaseToolInvocation<
);
}
private async executeFallback(signal: AbortSignal): Promise<ToolResult> {
const { validUrls: urls } = parsePrompt(this.params.prompt!);
// For now, we only support one URL for fallback
let url = urls[0];
private isBlockedHost(urlStr: string): boolean {
try {
const url = new URL(urlStr);
const hostname = url.hostname.toLowerCase();
if (hostname === 'localhost' || hostname === '127.0.0.1') {
return true;
}
return isPrivateIp(urlStr);
} catch {
return true;
}
}
// Convert GitHub blob URL to raw URL
url = convertGithubUrlToRaw(url);
private async executeFallbackForUrl(
urlStr: string,
signal: AbortSignal,
contentBudget: number,
): Promise<string> {
const url = convertGithubUrlToRaw(urlStr);
if (this.isBlockedHost(url)) {
debugLogger.warn(`[WebFetchTool] Blocked access to host: ${url}`);
return `Error fetching ${url}: Access to blocked or private host is not allowed.`;
}
try {
const response = await retryWithBackoff(
@@ -244,6 +293,7 @@ class WebFetchToolInvocation extends BaseToolInvocation<
retryFetchErrors: this.config.getRetryFetchErrors(),
onRetry: (attempt, error, delayMs) =>
this.handleRetry(attempt, error, delayMs),
signal,
},
);
@@ -272,19 +322,70 @@ class WebFetchToolInvocation extends BaseToolInvocation<
textContent = rawContent;
}
textContent = truncateString(
textContent,
MAX_CONTENT_LENGTH,
TRUNCATION_WARNING,
);
return truncateString(textContent, contentBudget, TRUNCATION_WARNING);
} catch (e) {
return `Error fetching ${url}: ${getErrorMessage(e)}`;
}
}
private filterAndValidateUrls(urls: string[]): {
toFetch: string[];
skipped: string[];
} {
const uniqueUrls = [...new Set(urls.map(normalizeUrl))];
const toFetch: string[] = [];
const skipped: string[] = [];
for (const url of uniqueUrls) {
if (this.isBlockedHost(url)) {
debugLogger.warn(
`[WebFetchTool] Skipped private or local host: ${url}`,
);
logWebFetchFallbackAttempt(
this.config,
new WebFetchFallbackAttemptEvent('private_ip_skipped'),
);
skipped.push(`[Blocked Host] ${url}`);
continue;
}
if (!checkRateLimit(url).allowed) {
debugLogger.warn(`[WebFetchTool] Rate limit exceeded for host: ${url}`);
skipped.push(`[Rate limit exceeded] ${url}`);
continue;
}
toFetch.push(url);
}
return { toFetch, skipped };
}
private async executeFallback(
urls: string[],
signal: AbortSignal,
): Promise<ToolResult> {
const uniqueUrls = [...new Set(urls)];
const contentBudget = Math.floor(
MAX_CONTENT_LENGTH / (uniqueUrls.length || 1),
);
const results: string[] = [];
for (const url of uniqueUrls) {
results.push(
await this.executeFallbackForUrl(url, signal, contentBudget),
);
}
const aggregatedContent = results
.map((content, i) => `URL: ${uniqueUrls[i]}\nContent:\n${content}`)
.join('\n\n---\n\n');
try {
const geminiClient = this.config.getGeminiClient();
const fallbackPrompt = `The user requested the following: "${this.params.prompt}".
I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the request. Do not attempt to access the URL again.
I was unable to access the URL(s) directly using the primary fetch tool. Instead, I have fetched the raw content of the page(s). Please use the following content to answer the request. Do not attempt to access the URL(s) again.
---
${textContent}
${aggregatedContent}
---
`;
const result = await geminiClient.generateContent(
@@ -293,15 +394,29 @@ ${textContent}
signal,
LlmRole.UTILITY_TOOL,
);
debugLogger.debug(
`[WebFetchTool] Fallback response for prompt "${this.params.prompt?.substring(
0,
50,
)}...":`,
JSON.stringify(result, null, 2),
);
const resultText = getResponseText(result) || '';
debugLogger.debug(
`[WebFetchTool] Formatted fallback tool response for prompt "${this.params.prompt}":\n\n`,
resultText,
);
return {
llmContent: resultText,
returnDisplay: `Content for ${url} processed using fallback fetch.`,
returnDisplay: `Content for ${urls.length} URL(s) processed using fallback fetch.`,
};
} catch (e) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const error = e as Error;
const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`;
const errorMessage = `Error during fallback processing: ${getErrorMessage(e)}`;
debugLogger.error(`[WebFetchTool] Fallback failed: ${errorMessage}`);
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
@@ -437,6 +552,21 @@ ${textContent}
// Convert GitHub blob URL to raw URL
url = convertGithubUrlToRaw(url);
if (this.isBlockedHost(url)) {
const errorMessage = `Access to blocked or private host ${url} is not allowed.`;
debugLogger.warn(
`[WebFetchTool] Blocked experimental fetch to host: ${url}`,
);
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
error: {
message: errorMessage,
type: ToolErrorType.WEB_FETCH_PROCESSING_ERROR,
},
};
}
try {
const response = await retryWithBackoff(
async () => {
@@ -454,6 +584,7 @@ ${textContent}
retryFetchErrors: this.config.getRetryFetchErrors(),
onRetry: (attempt, error, delayMs) =>
this.handleRetry(attempt, error, delayMs),
signal,
},
);
@@ -473,6 +604,9 @@ ${textContent}
const errorContent = `Request failed with status ${status}
Headers: ${JSON.stringify(headers, null, 2)}
Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response truncated] ...')}`;
debugLogger.error(
`[WebFetchTool] Experimental fetch failed with status ${status} for ${url}`,
);
return {
llmContent: errorContent,
returnDisplay: `Failed to fetch ${url} (Status: ${status})`,
@@ -543,6 +677,9 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
};
} catch (e) {
const errorMessage = `Error during experimental fetch for ${url}: ${getErrorMessage(e)}`;
debugLogger.error(
`[WebFetchTool] Experimental fetch error: ${errorMessage}`,
);
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
@@ -559,15 +696,14 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
return this.executeExperimental(signal);
}
const userPrompt = this.params.prompt!;
const { validUrls: urls } = parsePrompt(userPrompt);
const url = urls[0];
const { validUrls } = parsePrompt(userPrompt);
// Enforce rate limiting
const rateLimitResult = checkRateLimit(url);
if (!rateLimitResult.allowed) {
const waitTimeSecs = Math.ceil((rateLimitResult.waitTimeMs || 0) / 1000);
const errorMessage = `Rate limit exceeded for host. Please wait ${waitTimeSecs} seconds before trying again.`;
debugLogger.warn(`[WebFetchTool] Rate limit exceeded for ${url}`);
const { toFetch, skipped } = this.filterAndValidateUrls(validUrls);
// If everything was skipped, fail early
if (toFetch.length === 0 && skipped.length > 0) {
const errorMessage = `All requested URLs were skipped: ${skipped.join(', ')}`;
debugLogger.error(`[WebFetchTool] ${errorMessage}`);
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
@@ -578,23 +714,12 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
};
}
const isPrivate = isPrivateIp(url);
if (isPrivate) {
logWebFetchFallbackAttempt(
this.config,
new WebFetchFallbackAttemptEvent('private_ip'),
);
return this.executeFallback(signal);
}
const geminiClient = this.config.getGeminiClient();
try {
const geminiClient = this.config.getGeminiClient();
const response = await geminiClient.generateContent(
{ model: 'web-fetch' },
[{ role: 'user', parts: [{ text: userPrompt }] }],
signal, // Pass signal
signal,
LlmRole.UTILITY_TOOL,
);
@@ -607,113 +732,76 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
);
let responseText = getResponseText(response) || '';
const urlContextMeta = response.candidates?.[0]?.urlContextMetadata;
const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
const sources = groundingMetadata?.groundingChunks as
| GroundingChunkItem[]
| undefined;
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const groundingSupports = groundingMetadata?.groundingSupports as
| GroundingSupportItem[]
| undefined;
// Error Handling
let processingError = false;
if (
urlContextMeta?.urlMetadata &&
urlContextMeta.urlMetadata.length > 0
) {
const allStatuses = urlContextMeta.urlMetadata.map(
(m) => m.urlRetrievalStatus,
);
if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) {
processingError = true;
}
} else if (!responseText.trim() && !sources?.length) {
// No URL metadata and no content/sources
processingError = true;
// Simple primary success check: we need some text or grounding data
if (!responseText.trim() && !groundingMetadata?.groundingChunks?.length) {
throw new Error('Primary fetch returned no content');
}
if (
!processingError &&
!responseText.trim() &&
(!sources || sources.length === 0)
) {
// Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
processingError = true;
}
if (processingError) {
logWebFetchFallbackAttempt(
this.config,
new WebFetchFallbackAttemptEvent('primary_failed'),
);
return await this.executeFallback(signal);
}
const sourceListFormatted: string[] = [];
if (sources && sources.length > 0) {
sources.forEach((source: GroundingChunkItem, index: number) => {
const title = source.web?.title || 'Untitled';
const uri = source.web?.uri || 'Unknown URI'; // Fallback if URI is missing
sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
// 1. Apply Grounding Supports (Citations)
const groundingSupports = groundingMetadata?.groundingSupports?.filter(
isGroundingSupportItem,
);
if (groundingSupports && groundingSupports.length > 0) {
const insertions: Array<{ index: number; marker: string }> = [];
groundingSupports.forEach((support) => {
if (support.segment && support.groundingChunkIndices) {
const citationMarker = support.groundingChunkIndices
.map((chunkIndex: number) => `[${chunkIndex + 1}]`)
.join('');
insertions.push({
index: support.segment.endIndex,
marker: citationMarker,
});
}
});
if (groundingSupports && groundingSupports.length > 0) {
const insertions: Array<{ index: number; marker: string }> = [];
groundingSupports.forEach((support: GroundingSupportItem) => {
if (support.segment && support.groundingChunkIndices) {
const citationMarker = support.groundingChunkIndices
.map((chunkIndex: number) => `[${chunkIndex + 1}]`)
.join('');
insertions.push({
index: support.segment.endIndex,
marker: citationMarker,
});
}
});
insertions.sort((a, b) => b.index - a.index);
const responseChars = responseText.split('');
insertions.forEach((insertion) => {
responseChars.splice(insertion.index, 0, insertion.marker);
});
responseText = responseChars.join('');
}
if (sourceListFormatted.length > 0) {
responseText += `
Sources:
${sourceListFormatted.join('\n')}`;
}
insertions.sort((a, b) => b.index - a.index);
const responseChars = responseText.split('');
insertions.forEach((insertion) => {
responseChars.splice(insertion.index, 0, insertion.marker);
});
responseText = responseChars.join('');
}
const llmContent = responseText;
// 2. Append Source List
const sources =
groundingMetadata?.groundingChunks?.filter(isGroundingChunkItem);
if (sources && sources.length > 0) {
const sourceListFormatted: string[] = [];
sources.forEach((source, index) => {
const title = source.web?.title || 'Untitled';
const uri = source.web?.uri || 'Unknown URI';
sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
});
responseText += `\n\nSources:\n${sourceListFormatted.join('\n')}`;
}
// 3. Prepend Warnings for skipped URLs
if (skipped.length > 0) {
responseText = `[Warning] The following URLs were skipped:\n${skipped.join('\n')}\n\n${responseText}`;
}
debugLogger.debug(
`[WebFetchTool] Formatted tool response for prompt "${userPrompt}:\n\n":`,
llmContent,
`[WebFetchTool] Formatted tool response for prompt "${userPrompt}":\n\n`,
responseText,
);
return {
llmContent,
llmContent: responseText,
returnDisplay: `Content processed from prompt.`,
};
} catch (error: unknown) {
const errorMessage = `Error processing web content for prompt "${userPrompt.substring(
0,
50,
)}...": ${getErrorMessage(error)}`;
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
error: {
message: errorMessage,
type: ToolErrorType.WEB_FETCH_PROCESSING_ERROR,
},
};
debugLogger.warn(
`[WebFetchTool] Primary fetch failed, falling back: ${getErrorMessage(error)}`,
);
logWebFetchFallbackAttempt(
this.config,
new WebFetchFallbackAttemptEvent('primary_failed'),
);
// Simple All-or-Nothing Fallback
return this.executeFallback(toFetch, signal);
}
}
}