diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index b1d1f7f021..1758184555 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -146,7 +146,7 @@ their corresponding top-level category object in your `settings.json` file. - **`general.retryFetchErrors`** (boolean): - **Description:** Retry on "exception TypeError: fetch failed sending request" errors. - - **Default:** `false` + - **Default:** `true` - **`general.maxAttempts`** (number): - **Description:** Maximum number of attempts for requests to the main chat diff --git a/package-lock.json b/package-lock.json index 85448711c7..a5437ac5c5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2292,6 +2292,7 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -2472,6 +2473,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } @@ -2521,6 +2523,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz", "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, @@ -2895,6 +2898,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz", "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/semantic-conventions": "^1.29.0" @@ -2928,6 +2932,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz", "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0" @@ -2982,6 +2987,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz", "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/core": "2.5.0", "@opentelemetry/resources": "2.5.0", @@ -4178,6 +4184,7 @@ "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -4451,6 +4458,7 @@ "integrity": "sha512-klQbnPAAiGYFyI02+znpBRLyjL4/BrBd0nyWkdC0s/6xFLkXYQ8OoRrSkqacS1ddVxf/LDyODIKbQ5TgKAf/Fg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.56.1", "@typescript-eslint/types": "8.56.1", @@ -5298,6 +5306,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -7901,6 +7910,7 @@ "integrity": "sha512-VmQ+sifHUbI/IcSopBCF/HO3YiHQx/AVd3UVyYL6weuwW+HvON9VYn5l6Zl1WZzPWXPNZrSQpxwkkZ/VuvJZzg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -8533,6 +8543,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9847,6 +9858,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.2.tgz", "integrity": "sha512-gJnaDHXKDayjt8ue0n8Gs0A007yKXj4Xzb8+cNjZeYsSzzwKc0Lr+OZgYwVfB0pHfUs17EPoLvrOsEaJ9mj+Tg==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -10126,6 +10138,7 @@ "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.4.11.tgz", "integrity": "sha512-93LQlzT7vvZ1XJcmOMwN4s+6W334QegendeHOMnEJBlhnpIzr8bws6/aOEHG8ZCuVD/vNeeea5m1msHIdAY6ig==", "license": "MIT", + "peer": true, "dependencies": { "@alcalzone/ansi-tokenize": "^0.2.1", "ansi-escapes": "^7.0.0", @@ -13808,6 +13821,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -13818,6 +13832,7 @@ "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "shell-quote": "^1.6.1", "ws": "^7" @@ -15906,6 +15921,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16129,7 +16145,8 @@ "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "dev": true, - "license": "0BSD" + "license": "0BSD", + "peer": true }, "node_modules/tsx": { "version": "4.20.3", @@ -16137,6 +16154,7 @@ "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -16296,6 +16314,7 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -16519,6 +16538,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz", "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -16632,6 +16652,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -16644,6 +16665,7 @@ "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "license": "MIT", + "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -17288,6 +17310,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -17687,6 +17710,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index bd1f9d82a4..0db84dbfd1 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -306,7 +306,7 @@ const SETTINGS_SCHEMA = { label: 'Retry Fetch Errors', category: 'General', requiresRestart: false, - default: false, + default: true, description: 'Retry on "exception TypeError: fetch failed sending request" errors.', showInDialog: false, diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index f615564533..a1bb93d27d 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -1014,7 +1014,7 @@ export class Config implements McpContext, AgentLoopContext { params.gemmaModelRouter?.classifier?.model ?? 'gemma3-1b-gpu-custom', }, }; - this.retryFetchErrors = params.retryFetchErrors ?? false; + this.retryFetchErrors = params.retryFetchErrors ?? true; this.maxAttempts = Math.min( params.maxAttempts ?? DEFAULT_MAX_ATTEMPTS, DEFAULT_MAX_ATTEMPTS, diff --git a/packages/core/src/tools/web-fetch.test.ts b/packages/core/src/tools/web-fetch.test.ts index 92ba4076b2..c33bebb3c3 100644 --- a/packages/core/src/tools/web-fetch.test.ts +++ b/packages/core/src/tools/web-fetch.test.ts @@ -9,6 +9,7 @@ import { WebFetchTool, parsePrompt, convertGithubUrlToRaw, + normalizeUrl, } from './web-fetch.js'; import type { Config } from '../config/config.js'; import { ApprovalMode } from '../policy/types.js'; @@ -198,6 +199,35 @@ describe('parsePrompt', () => { }); }); +describe('normalizeUrl', () => { + it('should lowercase hostname', () => { + expect(normalizeUrl('https://EXAMPLE.COM')).toBe('https://example.com/'); + }); + + it('should remove trailing slashes from path', () => { + expect(normalizeUrl('https://example.com/path/')).toBe( + 'https://example.com/path', + ); + }); + + it('should not remove trailing slash from root', () => { + expect(normalizeUrl('https://example.com/')).toBe('https://example.com/'); + }); + + it('should remove default ports', () => { + expect(normalizeUrl('http://example.com:80/')).toBe('http://example.com/'); + expect(normalizeUrl('https://example.com:443/')).toBe( + 'https://example.com/', + ); + }); + + it('should keep non-default ports', () => { + expect(normalizeUrl('http://example.com:8080/')).toBe( + 'http://example.com:8080/', + ); + }); +}); + describe('convertGithubUrlToRaw', () => { it('should convert valid github blob urls', () => { expect( @@ -354,7 +384,123 @@ describe('WebFetchTool', () => { // The 11th time should fail due to rate limit const result = await invocation.execute(new AbortController().signal); expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR); - expect(result.error?.message).toContain('Rate limit exceeded for host'); + expect(result.error?.message).toContain('Rate limit exceeded'); + }); + + it('should skip rate-limited URLs but fetch others', async () => { + vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false); + + const tool = new WebFetchTool(mockConfig, bus); + const params = { + prompt: 'fetch https://ratelimit-multi.com and https://healthy.com', + }; + const invocation = tool.build(params); + + // Hit rate limit for one host + for (let i = 0; i < 10; i++) { + mockGenerateContent.mockResolvedValueOnce({ + candidates: [{ content: { parts: [{ text: 'response' }] } }], + }); + await tool + .build({ prompt: 'fetch https://ratelimit-multi.com' }) + .execute(new AbortController().signal); + } + // 11th call - should be rate limited and not use a mock + await tool + .build({ prompt: 'fetch https://ratelimit-multi.com' }) + .execute(new AbortController().signal); + + mockGenerateContent.mockResolvedValueOnce({ + candidates: [{ content: { parts: [{ text: 'healthy response' }] } }], + }); + + const result = await invocation.execute(new AbortController().signal); + expect(result.llmContent).toContain('healthy response'); + expect(result.llmContent).toContain( + '[Warning] The following URLs were skipped due to rate limiting: https://ratelimit-multi.com/', + ); + }); + + it('should rescue failed public URLs via fallback', async () => { + vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false); + + // Primary fetch fails for one URL + mockGenerateContent.mockResolvedValueOnce({ + candidates: [ + { + content: { parts: [{ text: 'Only partial info' }] }, + urlContextMetadata: { + urlMetadata: [ + { + url: 'https://success.com/', + urlRetrievalStatus: 'URL_RETRIEVAL_STATUS_SUCCESS', + }, + { + url: 'https://fail.com/', + urlRetrievalStatus: 'URL_RETRIEVAL_STATUS_FAILED', + }, + ], + }, + }, + ], + }); + + // Mock fallback fetch for the failed URL + mockFetch('https://fail.com/', { + text: () => Promise.resolve('rescued content'), + }); + + const tool = new WebFetchTool(mockConfig, bus); + const params = { + prompt: 'fetch https://success.com and https://fail.com', + }; + const invocation = tool.build(params); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('Only partial info'); + expect(result.llmContent).toContain('--- Rescued Content ---'); + expect(result.llmContent).toContain('URL: https://fail.com/'); + expect(result.llmContent).toContain('rescued content'); + }); + + it('should rescue private URLs via fallback and merge with public results', async () => { + vi.mocked(fetchUtils.isPrivateIp).mockImplementation( + (url) => url === 'https://private.com/', + ); + + // Primary fetch for public URL + mockGenerateContent.mockResolvedValueOnce({ + candidates: [ + { + content: { parts: [{ text: 'public content' }] }, + urlContextMetadata: { + urlMetadata: [ + { + url: 'https://public.com/', + urlRetrievalStatus: 'URL_RETRIEVAL_STATUS_SUCCESS', + }, + ], + }, + }, + ], + }); + + // Mock fallback fetch for the private URL + mockFetch('https://private.com/', { + text: () => Promise.resolve('private rescued content'), + }); + + const tool = new WebFetchTool(mockConfig, bus); + const params = { + prompt: 'fetch https://public.com and https://private.com', + }; + const invocation = tool.build(params); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toContain('public content'); + expect(result.llmContent).toContain('--- Rescued Content ---'); + expect(result.llmContent).toContain('URL: https://private.com/'); + expect(result.llmContent).toContain('private rescued content'); }); it('should return WEB_FETCH_FALLBACK_FAILED on fallback fetch failure', async () => { @@ -367,14 +513,14 @@ describe('WebFetchTool', () => { expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED); }); - it('should return WEB_FETCH_PROCESSING_ERROR on general processing failure', async () => { + it('should return WEB_FETCH_FALLBACK_FAILED on general processing failure (when fallback also fails)', async () => { vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false); mockGenerateContent.mockRejectedValue(new Error('API error')); const tool = new WebFetchTool(mockConfig, bus); const params = { prompt: 'fetch https://public.ip' }; const invocation = tool.build(params); const result = await invocation.execute(new AbortController().signal); - expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR); + expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED); }); it('should log telemetry when falling back due to private IP', async () => { diff --git a/packages/core/src/tools/web-fetch.ts b/packages/core/src/tools/web-fetch.ts index 3170227188..57e3b4573d 100644 --- a/packages/core/src/tools/web-fetch.ts +++ b/packages/core/src/tools/web-fetch.ts @@ -34,8 +34,8 @@ import { WEB_FETCH_DEFINITION } from './definitions/coreTools.js'; import { resolveToolDeclaration } from './definitions/resolver.js'; import { LRUCache } from 'mnemonist'; -const URL_FETCH_TIMEOUT_MS = 10000; -const MAX_CONTENT_LENGTH = 100000; +const URL_FETCH_TIMEOUT_MS = 30000; +const MAX_CONTENT_LENGTH = 200000; const MAX_EXPERIMENTAL_FETCH_SIZE = 10 * 1024 * 1024; // 10MB const USER_AGENT = 'Mozilla/5.0 (compatible; Google-Gemini-CLI/1.0; +https://github.com/google-gemini/gemini-cli)'; @@ -76,6 +76,31 @@ function checkRateLimit(url: string): { } } +/** + * Normalizes a URL by converting hostname to lowercase, removing trailing slashes, + * and removing default ports. + */ +export function normalizeUrl(urlStr: string): string { + try { + const url = new URL(urlStr); + url.hostname = url.hostname.toLowerCase(); + // Remove trailing slash if present in pathname (except for root '/') + if (url.pathname.endsWith('/') && url.pathname.length > 1) { + url.pathname = url.pathname.slice(0, -1); + } + // Remove default ports + if ( + (url.protocol === 'http:' && url.port === '80') || + (url.protocol === 'https:' && url.port === '443') + ) { + url.port = ''; + } + return url.href; + } catch { + return urlStr; + } +} + /** * Parses a prompt to extract valid URLs and identify malformed ones. */ @@ -152,6 +177,15 @@ interface GroundingSupportItem { groundingChunkIndices?: number[]; } +interface UrlMetadata { + url?: string; + urlRetrievalStatus?: string; +} + +interface UrlContextMetadata { + urlMetadata?: UrlMetadata[]; +} + /** * Parameters for the WebFetch tool */ @@ -184,13 +218,12 @@ class WebFetchToolInvocation extends BaseToolInvocation< super(params, messageBus, _toolName, _toolDisplayName); } - private async executeFallback(signal: AbortSignal): Promise { - const { validUrls: urls } = parsePrompt(this.params.prompt!); - // For now, we only support one URL for fallback - let url = urls[0]; - - // Convert GitHub blob URL to raw URL - url = convertGithubUrlToRaw(url); + private async executeFallbackForUrl( + urlStr: string, + signal: AbortSignal, + contentBudget: number, + ): Promise { + const url = convertGithubUrlToRaw(urlStr); try { const response = await retryWithBackoff( @@ -212,6 +245,7 @@ class WebFetchToolInvocation extends BaseToolInvocation< }, { retryFetchErrors: this.config.getRetryFetchErrors(), + signal, }, ); @@ -240,19 +274,39 @@ class WebFetchToolInvocation extends BaseToolInvocation< textContent = rawContent; } - textContent = truncateString( - textContent, - MAX_CONTENT_LENGTH, - TRUNCATION_WARNING, - ); + return truncateString(textContent, contentBudget, TRUNCATION_WARNING); + } catch (e) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const error = e as Error; + return `Error fetching ${url}: ${error.message}`; + } + } + private async executeFallback( + urls: string[], + signal: AbortSignal, + ): Promise { + const contentBudget = Math.floor(MAX_CONTENT_LENGTH / urls.length); + const results: string[] = []; + + for (const url of urls) { + results.push( + await this.executeFallbackForUrl(url, signal, contentBudget), + ); + } + + const aggregatedContent = results + .map((content, i) => `URL: ${urls[i]}\nContent:\n${content}`) + .join('\n\n---\n\n'); + + try { const geminiClient = this.config.getGeminiClient(); const fallbackPrompt = `The user requested the following: "${this.params.prompt}". -I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the request. Do not attempt to access the URL again. +I was unable to access the URL(s) directly using the primary fetch tool. Instead, I have fetched the raw content of the page(s). Please use the following content to answer the request. Do not attempt to access the URL(s) again. --- -${textContent} +${aggregatedContent} --- `; const result = await geminiClient.generateContent( @@ -264,12 +318,12 @@ ${textContent} const resultText = getResponseText(result) || ''; return { llmContent: resultText, - returnDisplay: `Content for ${url} processed using fallback fetch.`, + returnDisplay: `Content for ${urls.length} URL(s) processed using fallback fetch.`, }; } catch (e) { // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const error = e as Error; - const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`; + const errorMessage = `Error during fallback processing: ${error.message}`; return { llmContent: `Error: ${errorMessage}`, returnDisplay: `Error: ${errorMessage}`, @@ -405,6 +459,7 @@ ${textContent} }, { retryFetchErrors: this.config.getRetryFetchErrors(), + signal, }, ); @@ -510,15 +565,25 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun return this.executeExperimental(signal); } const userPrompt = this.params.prompt!; - const { validUrls: urls } = parsePrompt(userPrompt); - const url = urls[0]; + const { validUrls } = parsePrompt(userPrompt); - // Enforce rate limiting - const rateLimitResult = checkRateLimit(url); - if (!rateLimitResult.allowed) { - const waitTimeSecs = Math.ceil((rateLimitResult.waitTimeMs || 0) / 1000); - const errorMessage = `Rate limit exceeded for host. Please wait ${waitTimeSecs} seconds before trying again.`; - debugLogger.warn(`[WebFetchTool] Rate limit exceeded for ${url}`); + // Unit 1: Normalization & Deduplication + const allUrls = [...new Set(validUrls.map(normalizeUrl))]; + + // Unit 2: Isolated Rate Limiting + const toFetch: string[] = []; + const rateLimited: string[] = []; + for (const url of allUrls) { + const rateLimitResult = checkRateLimit(url); + if (rateLimitResult.allowed) { + toFetch.push(url); + } else { + rateLimited.push(url); + } + } + + if (toFetch.length === 0 && rateLimited.length > 0) { + const errorMessage = `Rate limit exceeded for all requested hosts.`; return { llmContent: `Error: ${errorMessage}`, returnDisplay: `Error: ${errorMessage}`, @@ -529,143 +594,175 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun }; } - const isPrivate = isPrivateIp(url); + const publicUrls = toFetch.filter((url) => !isPrivateIp(url)); + const privateUrls = toFetch.filter((url) => isPrivateIp(url)); - if (isPrivate) { + if (privateUrls.length > 0) { logWebFetchFallbackAttempt( this.config, new WebFetchFallbackAttemptEvent('private_ip'), ); - return this.executeFallback(signal); } - const geminiClient = this.config.getGeminiClient(); + let llmContent = ''; + let returnDisplay = ''; + const needsRescue: string[] = [...privateUrls]; - try { - const response = await geminiClient.generateContent( - { model: 'web-fetch' }, - [{ role: 'user', parts: [{ text: userPrompt }] }], - signal, // Pass signal - LlmRole.UTILITY_TOOL, - ); - - debugLogger.debug( - `[WebFetchTool] Full response for prompt "${userPrompt.substring( - 0, - 50, - )}...":`, - JSON.stringify(response, null, 2), - ); - - let responseText = getResponseText(response) || ''; - const urlContextMeta = response.candidates?.[0]?.urlContextMetadata; - const groundingMetadata = response.candidates?.[0]?.groundingMetadata; - const sources = groundingMetadata?.groundingChunks as - | GroundingChunkItem[] - | undefined; - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const groundingSupports = groundingMetadata?.groundingSupports as - | GroundingSupportItem[] - | undefined; - - // Error Handling - let processingError = false; - - if ( - urlContextMeta?.urlMetadata && - urlContextMeta.urlMetadata.length > 0 - ) { - const allStatuses = urlContextMeta.urlMetadata.map( - (m) => m.urlRetrievalStatus, + if (publicUrls.length > 0) { + const geminiClient = this.config.getGeminiClient(); + try { + const response = await geminiClient.generateContent( + { model: 'web-fetch' }, + [{ role: 'user', parts: [{ text: userPrompt }] }], + signal, + LlmRole.UTILITY_TOOL, ); - if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) { + + let responseText = getResponseText(response) || ''; + + const urlContextMeta = response.candidates?.[0]?.urlContextMetadata as + | UrlContextMetadata + | undefined; + const groundingMetadata = response.candidates?.[0]?.groundingMetadata; + const sources = groundingMetadata?.groundingChunks as + | GroundingChunkItem[] + | undefined; + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + const groundingSupports = groundingMetadata?.groundingSupports as + | GroundingSupportItem[] + | undefined; + + // Error Handling & Rescue identification + let processingError = false; + + if ( + urlContextMeta?.urlMetadata && + urlContextMeta.urlMetadata.length > 0 + ) { + const allStatuses = urlContextMeta.urlMetadata.map( + (m) => m.urlRetrievalStatus, + ); + if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) { + processingError = true; + } + + // Unit 3: Identify specific URLs that need rescue + for (const meta of urlContextMeta.urlMetadata) { + if ( + meta.urlRetrievalStatus !== 'URL_RETRIEVAL_STATUS_SUCCESS' && + meta.url + ) { + needsRescue.push(meta.url); + } + } + } else if (!responseText.trim() && !sources?.length) { processingError = true; } - } else if (!responseText.trim() && !sources?.length) { - // No URL metadata and no content/sources - processingError = true; - } - if ( - !processingError && - !responseText.trim() && - (!sources || sources.length === 0) - ) { - // Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data. - processingError = true; - } + if ( + !processingError && + !responseText.trim() && + (!sources || sources.length === 0) + ) { + processingError = true; + } - if (processingError) { - logWebFetchFallbackAttempt( - this.config, - new WebFetchFallbackAttemptEvent('primary_failed'), - ); - return await this.executeFallback(signal); - } + if (processingError) { + logWebFetchFallbackAttempt( + this.config, + new WebFetchFallbackAttemptEvent('primary_failed'), + ); + // If primary failed completely, rescue all public URLs that were supposed to be fetched + needsRescue.push(...publicUrls); + } else { + // Process grounding if successful + const sourceListFormatted: string[] = []; + if (sources && sources.length > 0) { + sources.forEach((source: GroundingChunkItem, index: number) => { + const title = source.web?.title || 'Untitled'; + const uri = source.web?.uri || 'Unknown URI'; + sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`); + }); - const sourceListFormatted: string[] = []; - if (sources && sources.length > 0) { - sources.forEach((source: GroundingChunkItem, index: number) => { - const title = source.web?.title || 'Untitled'; - const uri = source.web?.uri || 'Unknown URI'; // Fallback if URI is missing - sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`); - }); - - if (groundingSupports && groundingSupports.length > 0) { - const insertions: Array<{ index: number; marker: string }> = []; - groundingSupports.forEach((support: GroundingSupportItem) => { - if (support.segment && support.groundingChunkIndices) { - const citationMarker = support.groundingChunkIndices - .map((chunkIndex: number) => `[${chunkIndex + 1}]`) - .join(''); - insertions.push({ - index: support.segment.endIndex, - marker: citationMarker, + if (groundingSupports && groundingSupports.length > 0) { + const insertions: Array<{ index: number; marker: string }> = []; + groundingSupports.forEach((support: GroundingSupportItem) => { + if (support.segment && support.groundingChunkIndices) { + const citationMarker = support.groundingChunkIndices + .map((chunkIndex: number) => `[${chunkIndex + 1}]`) + .join(''); + insertions.push({ + index: support.segment.endIndex, + marker: citationMarker, + }); + } }); + + insertions.sort((a, b) => b.index - a.index); + const responseChars = responseText.split(''); + insertions.forEach((insertion) => { + responseChars.splice(insertion.index, 0, insertion.marker); + }); + responseText = responseChars.join(''); } - }); - insertions.sort((a, b) => b.index - a.index); - const responseChars = responseText.split(''); - insertions.forEach((insertion) => { - responseChars.splice(insertion.index, 0, insertion.marker); - }); - responseText = responseChars.join(''); + if (sourceListFormatted.length > 0) { + responseText += `\n\nSources:\n${sourceListFormatted.join('\n')}`; + } + } + llmContent = responseText; + returnDisplay = `Content processed from prompt.`; } + } catch (error: unknown) { + debugLogger.error( + `[WebFetchTool] Primary fetch failed: ${getErrorMessage(error)}`, + ); + needsRescue.push(...publicUrls); + } + } - if (sourceListFormatted.length > 0) { - responseText += ` + // Unit 3: Surgical Fallback ("The Rescue") + if (needsRescue.length > 0) { + // Deduplicate needsRescue (public failures might overlap with private) + const uniqueRescue = [...new Set(needsRescue)]; + const contentBudget = Math.floor( + MAX_CONTENT_LENGTH / uniqueRescue.length, + ); + const rescuedResults: string[] = []; -Sources: -${sourceListFormatted.join('\n')}`; - } + for (const url of uniqueRescue) { + rescuedResults.push( + await this.executeFallbackForUrl(url, signal, contentBudget), + ); } - const llmContent = responseText; + const aggregatedRescuedContent = rescuedResults + .map((content, i) => `URL: ${uniqueRescue[i]}\nContent:\n${content}`) + .join('\n\n---\n\n'); - debugLogger.debug( - `[WebFetchTool] Formatted tool response for prompt "${userPrompt}:\n\n":`, - llmContent, - ); - - return { - llmContent, - returnDisplay: `Content processed from prompt.`, - }; - } catch (error: unknown) { - const errorMessage = `Error processing web content for prompt "${userPrompt.substring( - 0, - 50, - )}...": ${getErrorMessage(error)}`; - return { - llmContent: `Error: ${errorMessage}`, - returnDisplay: `Error: ${errorMessage}`, - error: { - message: errorMessage, - type: ToolErrorType.WEB_FETCH_PROCESSING_ERROR, - }, - }; + if (!llmContent) { + // If no primary content, use executeFallback logic to process all rescued content via Gemini + return this.executeFallback(uniqueRescue, signal); + } else { + // If we have some primary content, append the rescued content as additional information + llmContent += `\n\n--- Rescued Content ---\n${aggregatedRescuedContent}`; + returnDisplay += ` (with ${uniqueRescue.length} rescued URL(s))`; + } } + + // Unit 2: Append rate limiting warning + if (rateLimited.length > 0) { + const warning = `[Warning] The following URLs were skipped due to rate limiting: ${rateLimited.join( + ', ', + )}`; + llmContent = `${warning}\n\n${llmContent}`; + returnDisplay = `${returnDisplay} (Warning: ${rateLimited.length} URL(s) rate-limited)`; + } + + return { + llmContent, + returnDisplay, + }; } } diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json index 280ad18db5..456fe5518b 100644 --- a/schemas/settings.schema.json +++ b/schemas/settings.schema.json @@ -131,8 +131,8 @@ "retryFetchErrors": { "title": "Retry Fetch Errors", "description": "Retry on \"exception TypeError: fetch failed sending request\" errors.", - "markdownDescription": "Retry on \"exception TypeError: fetch failed sending request\" errors.\n\n- Category: `General`\n- Requires restart: `no`\n- Default: `false`", - "default": false, + "markdownDescription": "Retry on \"exception TypeError: fetch failed sending request\" errors.\n\n- Category: `General`\n- Requires restart: `no`\n- Default: `true`", + "default": true, "type": "boolean" }, "maxAttempts": {