From 663d9c0537728facea26a1f2c4ffa8f97ad0da5c Mon Sep 17 00:00:00 2001 From: Aishanee Shah Date: Thu, 12 Mar 2026 16:13:00 -0400 Subject: [PATCH] feat(core): implement Stage 1 improvements for webfetch tool (#21313) --- packages/core/src/telemetry/types.ts | 9 +- packages/core/src/tools/web-fetch.test.ts | 224 ++++++++++++-- packages/core/src/tools/web-fetch.ts | 346 ++++++++++++++-------- 3 files changed, 416 insertions(+), 163 deletions(-) diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index 6669628220..0ee6e63503 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -2129,12 +2129,17 @@ export class RecoveryAttemptEvent extends BaseAgentEvent { export const EVENT_WEB_FETCH_FALLBACK_ATTEMPT = 'gemini_cli.web_fetch_fallback_attempt'; +export type WebFetchFallbackReason = + | 'private_ip' + | 'primary_failed' + | 'private_ip_skipped'; + export class WebFetchFallbackAttemptEvent implements BaseTelemetryEvent { 'event.name': 'web_fetch_fallback_attempt'; 'event.timestamp': string; - reason: 'private_ip' | 'primary_failed'; + reason: WebFetchFallbackReason; - constructor(reason: 'private_ip' | 'primary_failed') { + constructor(reason: WebFetchFallbackReason) { this['event.name'] = 'web_fetch_fallback_attempt'; this['event.timestamp'] = new Date().toISOString(); this.reason = reason; diff --git a/packages/core/src/tools/web-fetch.test.ts b/packages/core/src/tools/web-fetch.test.ts index 0db08c43e0..103138e487 100644 --- a/packages/core/src/tools/web-fetch.test.ts +++ b/packages/core/src/tools/web-fetch.test.ts @@ -9,6 +9,7 @@ import { WebFetchTool, parsePrompt, convertGithubUrlToRaw, + normalizeUrl, } from './web-fetch.js'; import type { Config } from '../config/config.js'; import { ApprovalMode } from '../policy/types.js'; @@ -43,7 +44,7 @@ vi.mock('html-to-text', () => ({ vi.mock('../telemetry/index.js', () => ({ logWebFetchFallbackAttempt: vi.fn(), - WebFetchFallbackAttemptEvent: vi.fn(), + WebFetchFallbackAttemptEvent: vi.fn((reason) => ({ reason })), })); vi.mock('../utils/fetch.js', async (importOriginal) => { @@ -125,6 +126,35 @@ const mockFetch = (url: string, response: Partial | Error) => } as unknown as Response; }); +describe('normalizeUrl', () => { + it('should lowercase hostname', () => { + expect(normalizeUrl('https://EXAMPLE.com/Path')).toBe( + 'https://example.com/Path', + ); + }); + + it('should remove trailing slash except for root', () => { + expect(normalizeUrl('https://example.com/path/')).toBe( + 'https://example.com/path', + ); + expect(normalizeUrl('https://example.com/')).toBe('https://example.com/'); + }); + + it('should remove default ports', () => { + expect(normalizeUrl('http://example.com:80/')).toBe('http://example.com/'); + expect(normalizeUrl('https://example.com:443/')).toBe( + 'https://example.com/', + ); + expect(normalizeUrl('https://example.com:8443/')).toBe( + 'https://example.com:8443/', + ); + }); + + it('should handle invalid URLs gracefully', () => { + expect(normalizeUrl('not-a-url')).toBe('not-a-url'); + }); +}); + describe('parsePrompt', () => { it('should extract valid URLs separated by whitespace', () => { const prompt = 'Go to https://example.com and http://google.com'; @@ -355,49 +385,164 @@ describe('WebFetchTool', () => { // The 11th time should fail due to rate limit const result = await invocation.execute(new AbortController().signal); expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR); - expect(result.error?.message).toContain('Rate limit exceeded for host'); + expect(result.error?.message).toContain( + 'All requested URLs were skipped', + ); + }); + + it('should skip rate-limited URLs but fetch others', async () => { + vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false); + + const tool = new WebFetchTool(mockConfig, bus); + const params = { + prompt: 'fetch https://ratelimit-multi.com and https://healthy.com', + }; + const invocation = tool.build(params); + + // Hit rate limit for one host + for (let i = 0; i < 10; i++) { + mockGenerateContent.mockResolvedValueOnce({ + candidates: [{ content: { parts: [{ text: 'response' }] } }], + }); + await tool + .build({ prompt: 'fetch https://ratelimit-multi.com' }) + .execute(new AbortController().signal); + } + // 11th call - should be rate limited and not use a mock + await tool + .build({ prompt: 'fetch https://ratelimit-multi.com' }) + .execute(new AbortController().signal); + + mockGenerateContent.mockResolvedValueOnce({ + candidates: [{ content: { parts: [{ text: 'healthy response' }] } }], + }); + + const result = await invocation.execute(new AbortController().signal); + expect(result.llmContent).toContain('healthy response'); + expect(result.llmContent).toContain( + '[Warning] The following URLs were skipped:', + ); + expect(result.llmContent).toContain( + '[Rate limit exceeded] https://ratelimit-multi.com/', + ); + }); + + it('should skip private or local URLs but fetch others and log telemetry', async () => { + vi.mocked(fetchUtils.isPrivateIp).mockImplementation( + (url) => url === 'https://private.com/', + ); + + const tool = new WebFetchTool(mockConfig, bus); + const params = { + prompt: + 'fetch https://private.com and https://healthy.com and http://localhost', + }; + const invocation = tool.build(params); + + mockGenerateContent.mockResolvedValueOnce({ + candidates: [{ content: { parts: [{ text: 'healthy response' }] } }], + }); + + const result = await invocation.execute(new AbortController().signal); + + expect(logWebFetchFallbackAttempt).toHaveBeenCalledTimes(2); + expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ reason: 'private_ip_skipped' }), + ); + + expect(result.llmContent).toContain('healthy response'); + expect(result.llmContent).toContain( + '[Warning] The following URLs were skipped:', + ); + expect(result.llmContent).toContain( + '[Blocked Host] https://private.com/', + ); + expect(result.llmContent).toContain('[Blocked Host] http://localhost'); + }); + + it('should fallback to all public URLs if primary fails', async () => { + vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false); + + // Primary fetch fails + mockGenerateContent.mockRejectedValueOnce(new Error('primary fail')); + + // Mock fallback fetch for BOTH URLs + mockFetch('https://url1.com/', { + text: () => Promise.resolve('content 1'), + }); + mockFetch('https://url2.com/', { + text: () => Promise.resolve('content 2'), + }); + + // Mock fallback LLM call + mockGenerateContent.mockResolvedValueOnce({ + candidates: [ + { content: { parts: [{ text: 'fallback processed response' }] } }, + ], + }); + + const tool = new WebFetchTool(mockConfig, bus); + const params = { + prompt: 'fetch https://url1.com and https://url2.com/', + }; + const invocation = tool.build(params); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toBe('fallback processed response'); + expect(result.returnDisplay).toContain( + '2 URL(s) processed using fallback fetch', + ); + }); + + it('should NOT include private URLs in fallback', async () => { + vi.mocked(fetchUtils.isPrivateIp).mockImplementation( + (url) => url === 'https://private.com/', + ); + + // Primary fetch fails + mockGenerateContent.mockRejectedValueOnce(new Error('primary fail')); + + // Mock fallback fetch only for public URL + mockFetch('https://public.com/', { + text: () => Promise.resolve('public content'), + }); + + // Mock fallback LLM call + mockGenerateContent.mockResolvedValueOnce({ + candidates: [{ content: { parts: [{ text: 'fallback response' }] } }], + }); + + const tool = new WebFetchTool(mockConfig, bus); + const params = { + prompt: 'fetch https://public.com/ and https://private.com', + }; + const invocation = tool.build(params); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toBe('fallback response'); + // Verify private URL was NOT fetched (mockFetch would throw if it was called for private.com) }); it('should return WEB_FETCH_FALLBACK_FAILED on fallback fetch failure', async () => { - vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true); - mockFetch('https://private.ip/', new Error('fetch failed')); + vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false); + mockGenerateContent.mockRejectedValue(new Error('primary fail')); + mockFetch('https://public.ip/', new Error('fallback fetch failed')); const tool = new WebFetchTool(mockConfig, bus); - const params = { prompt: 'fetch https://private.ip' }; + const params = { prompt: 'fetch https://public.ip' }; const invocation = tool.build(params); const result = await invocation.execute(new AbortController().signal); expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED); }); - it('should return WEB_FETCH_PROCESSING_ERROR on general processing failure', async () => { + it('should return WEB_FETCH_FALLBACK_FAILED on general processing failure (when fallback also fails)', async () => { vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false); mockGenerateContent.mockRejectedValue(new Error('API error')); const tool = new WebFetchTool(mockConfig, bus); const params = { prompt: 'fetch https://public.ip' }; const invocation = tool.build(params); const result = await invocation.execute(new AbortController().signal); - expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR); - }); - - it('should log telemetry when falling back due to private IP', async () => { - vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true); - // Mock fetchWithTimeout to succeed so fallback proceeds - mockFetch('https://private.ip/', { - text: () => Promise.resolve('some content'), - }); - mockGenerateContent.mockResolvedValue({ - candidates: [{ content: { parts: [{ text: 'fallback response' }] } }], - }); - - const tool = new WebFetchTool(mockConfig, bus); - const params = { prompt: 'fetch https://private.ip' }; - const invocation = tool.build(params); - await invocation.execute(new AbortController().signal); - - expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith( - mockConfig, - expect.any(WebFetchFallbackAttemptEvent), - ); - expect(WebFetchFallbackAttemptEvent).toHaveBeenCalledWith('private_ip'); + expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED); }); it('should log telemetry when falling back due to primary fetch failure', async () => { @@ -422,7 +567,7 @@ describe('WebFetchTool', () => { expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith( mockConfig, - expect.any(WebFetchFallbackAttemptEvent), + expect.objectContaining({ reason: 'primary_failed' }), ); expect(WebFetchFallbackAttemptEvent).toHaveBeenCalledWith( 'primary_failed', @@ -891,13 +1036,13 @@ describe('WebFetchTool', () => { }); it('should throw error if stream exceeds limit', async () => { - const largeChunk = new Uint8Array(11 * 1024 * 1024); + const large_chunk = new Uint8Array(11 * 1024 * 1024); mockFetch('https://example.com/large-stream', { body: { getReader: () => ({ read: vi .fn() - .mockResolvedValueOnce({ done: false, value: largeChunk }) + .mockResolvedValueOnce({ done: false, value: large_chunk }) .mockResolvedValueOnce({ done: true }), releaseLock: vi.fn(), cancel: vi.fn().mockResolvedValue(undefined), @@ -934,5 +1079,20 @@ describe('WebFetchTool', () => { expect(result.llmContent).toContain('Error: Invalid URL "not-a-url"'); expect(result.error?.type).toBe(ToolErrorType.INVALID_TOOL_PARAMS); }); + + it('should block private IP (experimental)', async () => { + vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true); + const tool = new WebFetchTool(mockConfig, bus); + const invocation = tool['createInvocation']( + { url: 'http://localhost' }, + bus, + ); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain( + 'Error: Access to blocked or private host http://localhost/ is not allowed.', + ); + expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR); + }); }); }); diff --git a/packages/core/src/tools/web-fetch.ts b/packages/core/src/tools/web-fetch.ts index 7d16fb1d76..1bb244f21d 100644 --- a/packages/core/src/tools/web-fetch.ts +++ b/packages/core/src/tools/web-fetch.ts @@ -81,6 +81,31 @@ function checkRateLimit(url: string): { } } +/** + * Normalizes a URL by converting hostname to lowercase, removing trailing slashes, + * and removing default ports. + */ +export function normalizeUrl(urlStr: string): string { + try { + const url = new URL(urlStr); + url.hostname = url.hostname.toLowerCase(); + // Remove trailing slash if present in pathname (except for root '/') + if (url.pathname.endsWith('/') && url.pathname.length > 1) { + url.pathname = url.pathname.slice(0, -1); + } + // Remove default ports + if ( + (url.protocol === 'http:' && url.port === '80') || + (url.protocol === 'https:' && url.port === '443') + ) { + url.port = ''; + } + return url.href; + } catch { + return urlStr; + } +} + /** * Parses a prompt to extract valid URLs and identify malformed ones. */ @@ -146,6 +171,10 @@ interface GroundingChunkItem { web?: GroundingChunkWeb; } +function isGroundingChunkItem(item: unknown): item is GroundingChunkItem { + return typeof item === 'object' && item !== null; +} + interface GroundingSupportSegment { startIndex: number; endIndex: number; @@ -157,6 +186,10 @@ interface GroundingSupportItem { groundingChunkIndices?: number[]; } +function isGroundingSupportItem(item: unknown): item is GroundingSupportItem { + return typeof item === 'object' && item !== null; +} + /** * Parameters for the WebFetch tool */ @@ -214,13 +247,29 @@ class WebFetchToolInvocation extends BaseToolInvocation< ); } - private async executeFallback(signal: AbortSignal): Promise { - const { validUrls: urls } = parsePrompt(this.params.prompt!); - // For now, we only support one URL for fallback - let url = urls[0]; + private isBlockedHost(urlStr: string): boolean { + try { + const url = new URL(urlStr); + const hostname = url.hostname.toLowerCase(); + if (hostname === 'localhost' || hostname === '127.0.0.1') { + return true; + } + return isPrivateIp(urlStr); + } catch { + return true; + } + } - // Convert GitHub blob URL to raw URL - url = convertGithubUrlToRaw(url); + private async executeFallbackForUrl( + urlStr: string, + signal: AbortSignal, + contentBudget: number, + ): Promise { + const url = convertGithubUrlToRaw(urlStr); + if (this.isBlockedHost(url)) { + debugLogger.warn(`[WebFetchTool] Blocked access to host: ${url}`); + return `Error fetching ${url}: Access to blocked or private host is not allowed.`; + } try { const response = await retryWithBackoff( @@ -244,6 +293,7 @@ class WebFetchToolInvocation extends BaseToolInvocation< retryFetchErrors: this.config.getRetryFetchErrors(), onRetry: (attempt, error, delayMs) => this.handleRetry(attempt, error, delayMs), + signal, }, ); @@ -272,19 +322,70 @@ class WebFetchToolInvocation extends BaseToolInvocation< textContent = rawContent; } - textContent = truncateString( - textContent, - MAX_CONTENT_LENGTH, - TRUNCATION_WARNING, - ); + return truncateString(textContent, contentBudget, TRUNCATION_WARNING); + } catch (e) { + return `Error fetching ${url}: ${getErrorMessage(e)}`; + } + } + private filterAndValidateUrls(urls: string[]): { + toFetch: string[]; + skipped: string[]; + } { + const uniqueUrls = [...new Set(urls.map(normalizeUrl))]; + const toFetch: string[] = []; + const skipped: string[] = []; + + for (const url of uniqueUrls) { + if (this.isBlockedHost(url)) { + debugLogger.warn( + `[WebFetchTool] Skipped private or local host: ${url}`, + ); + logWebFetchFallbackAttempt( + this.config, + new WebFetchFallbackAttemptEvent('private_ip_skipped'), + ); + skipped.push(`[Blocked Host] ${url}`); + continue; + } + if (!checkRateLimit(url).allowed) { + debugLogger.warn(`[WebFetchTool] Rate limit exceeded for host: ${url}`); + skipped.push(`[Rate limit exceeded] ${url}`); + continue; + } + toFetch.push(url); + } + return { toFetch, skipped }; + } + + private async executeFallback( + urls: string[], + signal: AbortSignal, + ): Promise { + const uniqueUrls = [...new Set(urls)]; + const contentBudget = Math.floor( + MAX_CONTENT_LENGTH / (uniqueUrls.length || 1), + ); + const results: string[] = []; + + for (const url of uniqueUrls) { + results.push( + await this.executeFallbackForUrl(url, signal, contentBudget), + ); + } + + const aggregatedContent = results + .map((content, i) => `URL: ${uniqueUrls[i]}\nContent:\n${content}`) + .join('\n\n---\n\n'); + + try { const geminiClient = this.config.getGeminiClient(); const fallbackPrompt = `The user requested the following: "${this.params.prompt}". -I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the request. Do not attempt to access the URL again. +I was unable to access the URL(s) directly using the primary fetch tool. Instead, I have fetched the raw content of the page(s). Please use the following content to answer the request. Do not attempt to access the URL(s) again. --- -${textContent} +${aggregatedContent} --- `; const result = await geminiClient.generateContent( @@ -293,15 +394,29 @@ ${textContent} signal, LlmRole.UTILITY_TOOL, ); + + debugLogger.debug( + `[WebFetchTool] Fallback response for prompt "${this.params.prompt?.substring( + 0, + 50, + )}...":`, + JSON.stringify(result, null, 2), + ); + const resultText = getResponseText(result) || ''; + + debugLogger.debug( + `[WebFetchTool] Formatted fallback tool response for prompt "${this.params.prompt}":\n\n`, + resultText, + ); + return { llmContent: resultText, - returnDisplay: `Content for ${url} processed using fallback fetch.`, + returnDisplay: `Content for ${urls.length} URL(s) processed using fallback fetch.`, }; } catch (e) { - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const error = e as Error; - const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`; + const errorMessage = `Error during fallback processing: ${getErrorMessage(e)}`; + debugLogger.error(`[WebFetchTool] Fallback failed: ${errorMessage}`); return { llmContent: `Error: ${errorMessage}`, returnDisplay: `Error: ${errorMessage}`, @@ -437,6 +552,21 @@ ${textContent} // Convert GitHub blob URL to raw URL url = convertGithubUrlToRaw(url); + if (this.isBlockedHost(url)) { + const errorMessage = `Access to blocked or private host ${url} is not allowed.`; + debugLogger.warn( + `[WebFetchTool] Blocked experimental fetch to host: ${url}`, + ); + return { + llmContent: `Error: ${errorMessage}`, + returnDisplay: `Error: ${errorMessage}`, + error: { + message: errorMessage, + type: ToolErrorType.WEB_FETCH_PROCESSING_ERROR, + }, + }; + } + try { const response = await retryWithBackoff( async () => { @@ -454,6 +584,7 @@ ${textContent} retryFetchErrors: this.config.getRetryFetchErrors(), onRetry: (attempt, error, delayMs) => this.handleRetry(attempt, error, delayMs), + signal, }, ); @@ -473,6 +604,9 @@ ${textContent} const errorContent = `Request failed with status ${status} Headers: ${JSON.stringify(headers, null, 2)} Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response truncated] ...')}`; + debugLogger.error( + `[WebFetchTool] Experimental fetch failed with status ${status} for ${url}`, + ); return { llmContent: errorContent, returnDisplay: `Failed to fetch ${url} (Status: ${status})`, @@ -543,6 +677,9 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun }; } catch (e) { const errorMessage = `Error during experimental fetch for ${url}: ${getErrorMessage(e)}`; + debugLogger.error( + `[WebFetchTool] Experimental fetch error: ${errorMessage}`, + ); return { llmContent: `Error: ${errorMessage}`, returnDisplay: `Error: ${errorMessage}`, @@ -559,15 +696,14 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun return this.executeExperimental(signal); } const userPrompt = this.params.prompt!; - const { validUrls: urls } = parsePrompt(userPrompt); - const url = urls[0]; + const { validUrls } = parsePrompt(userPrompt); - // Enforce rate limiting - const rateLimitResult = checkRateLimit(url); - if (!rateLimitResult.allowed) { - const waitTimeSecs = Math.ceil((rateLimitResult.waitTimeMs || 0) / 1000); - const errorMessage = `Rate limit exceeded for host. Please wait ${waitTimeSecs} seconds before trying again.`; - debugLogger.warn(`[WebFetchTool] Rate limit exceeded for ${url}`); + const { toFetch, skipped } = this.filterAndValidateUrls(validUrls); + + // If everything was skipped, fail early + if (toFetch.length === 0 && skipped.length > 0) { + const errorMessage = `All requested URLs were skipped: ${skipped.join(', ')}`; + debugLogger.error(`[WebFetchTool] ${errorMessage}`); return { llmContent: `Error: ${errorMessage}`, returnDisplay: `Error: ${errorMessage}`, @@ -578,23 +714,12 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun }; } - const isPrivate = isPrivateIp(url); - - if (isPrivate) { - logWebFetchFallbackAttempt( - this.config, - new WebFetchFallbackAttemptEvent('private_ip'), - ); - return this.executeFallback(signal); - } - - const geminiClient = this.config.getGeminiClient(); - try { + const geminiClient = this.config.getGeminiClient(); const response = await geminiClient.generateContent( { model: 'web-fetch' }, [{ role: 'user', parts: [{ text: userPrompt }] }], - signal, // Pass signal + signal, LlmRole.UTILITY_TOOL, ); @@ -607,113 +732,76 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun ); let responseText = getResponseText(response) || ''; - const urlContextMeta = response.candidates?.[0]?.urlContextMetadata; const groundingMetadata = response.candidates?.[0]?.groundingMetadata; - const sources = groundingMetadata?.groundingChunks as - | GroundingChunkItem[] - | undefined; - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const groundingSupports = groundingMetadata?.groundingSupports as - | GroundingSupportItem[] - | undefined; - // Error Handling - let processingError = false; - - if ( - urlContextMeta?.urlMetadata && - urlContextMeta.urlMetadata.length > 0 - ) { - const allStatuses = urlContextMeta.urlMetadata.map( - (m) => m.urlRetrievalStatus, - ); - if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) { - processingError = true; - } - } else if (!responseText.trim() && !sources?.length) { - // No URL metadata and no content/sources - processingError = true; + // Simple primary success check: we need some text or grounding data + if (!responseText.trim() && !groundingMetadata?.groundingChunks?.length) { + throw new Error('Primary fetch returned no content'); } - if ( - !processingError && - !responseText.trim() && - (!sources || sources.length === 0) - ) { - // Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data. - processingError = true; - } - - if (processingError) { - logWebFetchFallbackAttempt( - this.config, - new WebFetchFallbackAttemptEvent('primary_failed'), - ); - return await this.executeFallback(signal); - } - - const sourceListFormatted: string[] = []; - if (sources && sources.length > 0) { - sources.forEach((source: GroundingChunkItem, index: number) => { - const title = source.web?.title || 'Untitled'; - const uri = source.web?.uri || 'Unknown URI'; // Fallback if URI is missing - sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`); + // 1. Apply Grounding Supports (Citations) + const groundingSupports = groundingMetadata?.groundingSupports?.filter( + isGroundingSupportItem, + ); + if (groundingSupports && groundingSupports.length > 0) { + const insertions: Array<{ index: number; marker: string }> = []; + groundingSupports.forEach((support) => { + if (support.segment && support.groundingChunkIndices) { + const citationMarker = support.groundingChunkIndices + .map((chunkIndex: number) => `[${chunkIndex + 1}]`) + .join(''); + insertions.push({ + index: support.segment.endIndex, + marker: citationMarker, + }); + } }); - if (groundingSupports && groundingSupports.length > 0) { - const insertions: Array<{ index: number; marker: string }> = []; - groundingSupports.forEach((support: GroundingSupportItem) => { - if (support.segment && support.groundingChunkIndices) { - const citationMarker = support.groundingChunkIndices - .map((chunkIndex: number) => `[${chunkIndex + 1}]`) - .join(''); - insertions.push({ - index: support.segment.endIndex, - marker: citationMarker, - }); - } - }); - - insertions.sort((a, b) => b.index - a.index); - const responseChars = responseText.split(''); - insertions.forEach((insertion) => { - responseChars.splice(insertion.index, 0, insertion.marker); - }); - responseText = responseChars.join(''); - } - - if (sourceListFormatted.length > 0) { - responseText += ` - -Sources: -${sourceListFormatted.join('\n')}`; - } + insertions.sort((a, b) => b.index - a.index); + const responseChars = responseText.split(''); + insertions.forEach((insertion) => { + responseChars.splice(insertion.index, 0, insertion.marker); + }); + responseText = responseChars.join(''); } - const llmContent = responseText; + // 2. Append Source List + const sources = + groundingMetadata?.groundingChunks?.filter(isGroundingChunkItem); + if (sources && sources.length > 0) { + const sourceListFormatted: string[] = []; + sources.forEach((source, index) => { + const title = source.web?.title || 'Untitled'; + const uri = source.web?.uri || 'Unknown URI'; + sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`); + }); + responseText += `\n\nSources:\n${sourceListFormatted.join('\n')}`; + } + + // 3. Prepend Warnings for skipped URLs + if (skipped.length > 0) { + responseText = `[Warning] The following URLs were skipped:\n${skipped.join('\n')}\n\n${responseText}`; + } debugLogger.debug( - `[WebFetchTool] Formatted tool response for prompt "${userPrompt}:\n\n":`, - llmContent, + `[WebFetchTool] Formatted tool response for prompt "${userPrompt}":\n\n`, + responseText, ); return { - llmContent, + llmContent: responseText, returnDisplay: `Content processed from prompt.`, }; } catch (error: unknown) { - const errorMessage = `Error processing web content for prompt "${userPrompt.substring( - 0, - 50, - )}...": ${getErrorMessage(error)}`; - return { - llmContent: `Error: ${errorMessage}`, - returnDisplay: `Error: ${errorMessage}`, - error: { - message: errorMessage, - type: ToolErrorType.WEB_FETCH_PROCESSING_ERROR, - }, - }; + debugLogger.warn( + `[WebFetchTool] Primary fetch failed, falling back: ${getErrorMessage(error)}`, + ); + logWebFetchFallbackAttempt( + this.config, + new WebFetchFallbackAttemptEvent('primary_failed'), + ); + // Simple All-or-Nothing Fallback + return this.executeFallback(toFetch, signal); } } }