2025-04-19 19:45:42 +01:00
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
2026-03-04 05:42:59 +05:30
import {
2026-03-21 10:32:07 -07:00
type ToolConfirmationOutcome ,
2026-03-04 05:42:59 +05:30
BaseDeclarativeTool ,
BaseToolInvocation ,
Kind ,
type ToolCallConfirmationDetails ,
type ToolInvocation ,
type ToolResult ,
2026-03-10 13:01:41 -04:00
type PolicyUpdateOptions ,
2025-08-26 00:04:53 +02:00
} from './tools.js' ;
2025-10-15 09:39:41 -07:00
import type { MessageBus } from '../confirmation-bus/message-bus.js' ;
2025-08-21 14:40:18 -07:00
import { ToolErrorType } from './tool-error.js' ;
2025-04-19 19:45:42 +01:00
import { getErrorMessage } from '../utils/errors.js' ;
2025-08-27 23:22:21 -04:00
import { getResponseText } from '../utils/partUtils.js' ;
2025-11-03 10:13:52 -08:00
import { fetchWithTimeout , isPrivateIp } from '../utils/fetch.js' ;
2026-02-23 11:50:14 -08:00
import { truncateString } from '../utils/textUtils.js' ;
2025-06-13 17:44:14 -07:00
import { convert } from 'html-to-text' ;
2025-10-09 13:01:17 -04:00
import {
logWebFetchFallbackAttempt ,
WebFetchFallbackAttemptEvent ,
2026-03-11 14:55:48 -04:00
logNetworkRetryAttempt ,
NetworkRetryAttemptEvent ,
2025-10-09 13:01:17 -04:00
} from '../telemetry/index.js' ;
2026-02-17 12:32:30 -05:00
import { LlmRole } from '../telemetry/llmRole.js' ;
2026-03-23 18:49:51 -07:00
import { WEB_FETCH_TOOL_NAME , WEB_FETCH_DISPLAY_NAME } from './tool-names.js' ;
2025-10-21 16:35:22 -04:00
import { debugLogger } from '../utils/debugLogger.js' ;
2026-03-10 23:33:50 -04:00
import { coreEvents } from '../utils/events.js' ;
2026-03-11 14:55:48 -04:00
import { retryWithBackoff , getRetryErrorType } from '../utils/retry.js' ;
2026-02-13 23:55:02 -05:00
import { WEB_FETCH_DEFINITION } from './definitions/coreTools.js' ;
import { resolveToolDeclaration } from './definitions/resolver.js' ;
2026-02-20 11:18:07 -06:00
import { LRUCache } from 'mnemonist' ;
2026-03-12 18:56:31 -07:00
import type { AgentLoopContext } from '../config/agent-loop-context.js' ;
2025-06-13 17:44:14 -07:00
const URL_FETCH_TIMEOUT_MS = 10000 ;
2026-03-16 17:38:53 -04:00
const MAX_CONTENT_LENGTH = 250000 ;
2026-02-23 11:50:14 -08:00
const MAX_EXPERIMENTAL_FETCH_SIZE = 10 * 1024 * 1024 ; // 10MB
const USER_AGENT =
'Mozilla/5.0 (compatible; Google-Gemini-CLI/1.0; +https://github.com/google-gemini/gemini-cli)' ;
const TRUNCATION_WARNING = '\n\n... [Content truncated due to size limit] ...' ;
2025-06-13 17:44:14 -07:00
2026-02-20 11:18:07 -06:00
// Rate limiting configuration
const RATE_LIMIT_WINDOW_MS = 60000 ; // 1 minute
const MAX_REQUESTS_PER_WINDOW = 10 ;
const hostRequestHistory = new LRUCache < string , number [ ] > ( 1000 ) ;
function checkRateLimit ( url : string ) : {
allowed : boolean ;
waitTimeMs? : number ;
} {
try {
const hostname = new URL ( url ) . hostname ;
const now = Date . now ( ) ;
const windowStart = now - RATE_LIMIT_WINDOW_MS ;
let history = hostRequestHistory . get ( hostname ) || [ ] ;
// Clean up old timestamps
history = history . filter ( ( timestamp ) = > timestamp > windowStart ) ;
if ( history . length >= MAX_REQUESTS_PER_WINDOW ) {
// Calculate wait time based on the oldest timestamp in the current window
const oldestTimestamp = history [ 0 ] ;
const waitTimeMs = oldestTimestamp + RATE_LIMIT_WINDOW_MS - now ;
hostRequestHistory . set ( hostname , history ) ; // Update cleaned history
return { allowed : false , waitTimeMs : Math.max ( 0 , waitTimeMs ) } ;
}
history . push ( now ) ;
hostRequestHistory . set ( hostname , history ) ;
return { allowed : true } ;
2026-04-01 21:33:07 -07:00
} catch {
2026-02-20 11:18:07 -06:00
// If URL parsing fails, we fallback to allowed (should be caught by parsePrompt anyway)
return { allowed : true } ;
}
}
2026-03-12 16:13:00 -04:00
/**
* Normalizes a URL by converting hostname to lowercase, removing trailing slashes,
* and removing default ports.
*/
export function normalizeUrl ( urlStr : string ) : string {
try {
const url = new URL ( urlStr ) ;
url . hostname = url . hostname . toLowerCase ( ) ;
// Remove trailing slash if present in pathname (except for root '/')
if ( url . pathname . endsWith ( '/' ) && url . pathname . length > 1 ) {
url . pathname = url . pathname . slice ( 0 , - 1 ) ;
}
// Remove default ports
if (
( url . protocol === 'http:' && url . port === '80' ) ||
( url . protocol === 'https:' && url . port === '443' )
) {
url . port = '' ;
}
return url . href ;
} catch {
return urlStr ;
}
}
2025-10-14 16:53:22 -04:00
/**
* Parses a prompt to extract valid URLs and identify malformed ones.
*/
export function parsePrompt ( text : string ) : {
validUrls : string [ ] ;
errors : string [ ] ;
} {
const tokens = text . split ( /\s+/ ) ;
const validUrls : string [ ] = [ ] ;
const errors : string [ ] = [ ] ;
for ( const token of tokens ) {
if ( ! token ) continue ;
// Heuristic to check if the url appears to contain URL-like chars.
if ( token . includes ( '://' ) ) {
try {
// Validate with new URL()
const url = new URL ( token ) ;
// Allowlist protocols
if ( [ 'http:' , 'https:' ] . includes ( url . protocol ) ) {
validUrls . push ( url . href ) ;
} else {
errors . push (
` Unsupported protocol in URL: " ${ token } ". Only http and https are supported. ` ,
) ;
}
2026-04-01 21:33:07 -07:00
} catch {
2025-10-14 16:53:22 -04:00
// new URL() threw, so it's malformed according to WHATWG standard
errors . push ( ` Malformed URL detected: " ${ token } ". ` ) ;
}
}
}
return { validUrls , errors } ;
2025-06-13 17:44:14 -07:00
}
2025-05-29 15:02:31 -07:00
2026-02-23 11:50:14 -08:00
/**
* Safely converts a GitHub blob URL to a raw content URL.
*/
export function convertGithubUrlToRaw ( urlStr : string ) : string {
try {
const url = new URL ( urlStr ) ;
if ( url . hostname === 'github.com' && url . pathname . includes ( '/blob/' ) ) {
url . hostname = 'raw.githubusercontent.com' ;
url . pathname = url . pathname . replace ( /^\/([^/]+\/[^/]+)\/blob\// , '/$1/' ) ;
return url . href ;
}
} catch {
// Ignore invalid URLs
}
return urlStr ;
}
2025-05-29 15:02:31 -07:00
// Interfaces for grounding metadata (similar to web-search.ts)
interface GroundingChunkWeb {
uri? : string ;
title? : string ;
}
interface GroundingChunkItem {
web? : GroundingChunkWeb ;
}
2026-03-12 16:13:00 -04:00
function isGroundingChunkItem ( item : unknown ) : item is GroundingChunkItem {
return typeof item === 'object' && item !== null ;
}
2025-05-29 15:02:31 -07:00
interface GroundingSupportSegment {
startIndex : number ;
endIndex : number ;
text? : string ;
}
interface GroundingSupportItem {
segment? : GroundingSupportSegment ;
groundingChunkIndices? : number [ ] ;
}
2025-04-19 19:45:42 +01:00
2026-03-12 16:13:00 -04:00
function isGroundingSupportItem ( item : unknown ) : item is GroundingSupportItem {
return typeof item === 'object' && item !== null ;
}
2026-03-16 17:38:53 -04:00
/**
* Sanitizes text for safe embedding in XML tags.
*/
function sanitizeXml ( text : string ) : string {
return text
. replace ( /&/g , '&' )
. replace ( /</g , '<' )
. replace ( />/g , '>' )
. replace ( /"/g , '"' )
. replace ( /'/g , ''' ) ;
}
2025-04-19 19:45:42 +01:00
/**
* Parameters for the WebFetch tool
*/
export interface WebFetchToolParams {
/**
2025-05-29 15:02:31 -07:00
* The prompt containing URL(s) (up to 20) and instructions for processing their content.
2025-04-19 19:45:42 +01:00
*/
2026-02-23 11:50:14 -08:00
prompt? : string ;
/**
* Direct URL to fetch (experimental mode).
*/
url? : string ;
2025-04-19 19:45:42 +01:00
}
2025-12-05 09:49:08 -08:00
interface ErrorWithStatus extends Error {
status? : number ;
}
2025-08-13 12:27:09 -07:00
class WebFetchToolInvocation extends BaseToolInvocation <
WebFetchToolParams ,
ToolResult
> {
constructor (
2026-03-12 18:56:31 -07:00
private readonly context : AgentLoopContext ,
2025-08-13 12:27:09 -07:00
params : WebFetchToolParams ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-21 11:45:33 -07:00
_toolName? : string ,
_toolDisplayName? : string ,
2025-08-13 12:27:09 -07:00
) {
2026-03-21 03:52:39 +00:00
super (
params ,
messageBus ,
_toolName ,
_toolDisplayName ,
undefined ,
undefined ,
true ,
( ) = > this . context . config . getApprovalMode ( ) ,
) ;
2025-04-19 19:45:42 +01:00
}
2026-03-10 23:33:50 -04:00
private handleRetry ( attempt : number , error : unknown , delayMs : number ) : void {
2026-03-12 18:56:31 -07:00
const maxAttempts = this . context . config . getMaxAttempts ( ) ;
2026-03-11 14:55:48 -04:00
const modelName = 'Web Fetch' ;
const errorType = getRetryErrorType ( error ) ;
2026-03-10 23:33:50 -04:00
coreEvents . emitRetryAttempt ( {
attempt ,
2026-03-11 14:55:48 -04:00
maxAttempts ,
2026-03-10 23:33:50 -04:00
delayMs ,
2026-03-11 14:55:48 -04:00
error : errorType ,
model : modelName ,
2026-03-10 23:33:50 -04:00
} ) ;
2026-03-11 14:55:48 -04:00
logNetworkRetryAttempt (
2026-03-12 18:56:31 -07:00
this . context . config ,
2026-03-11 14:55:48 -04:00
new NetworkRetryAttemptEvent (
attempt ,
maxAttempts ,
errorType ,
delayMs ,
modelName ,
) ,
) ;
2026-03-10 23:33:50 -04:00
}
2026-03-12 16:13:00 -04:00
private isBlockedHost ( urlStr : string ) : boolean {
try {
const url = new URL ( urlStr ) ;
const hostname = url . hostname . toLowerCase ( ) ;
if ( hostname === 'localhost' || hostname === '127.0.0.1' ) {
return true ;
}
return isPrivateIp ( urlStr ) ;
} catch {
return true ;
}
}
2025-06-13 17:44:14 -07:00
2026-03-12 16:13:00 -04:00
private async executeFallbackForUrl (
urlStr : string ,
signal : AbortSignal ,
) : Promise < string > {
const url = convertGithubUrlToRaw ( urlStr ) ;
if ( this . isBlockedHost ( url ) ) {
debugLogger . warn ( ` [WebFetchTool] Blocked access to host: ${ url } ` ) ;
2026-03-16 17:38:53 -04:00
throw new Error (
` Access to blocked or private host ${ url } is not allowed. ` ,
) ;
2026-03-12 16:13:00 -04:00
}
2025-06-13 17:44:14 -07:00
2026-03-16 17:38:53 -04:00
const response = await retryWithBackoff (
async ( ) = > {
const res = await fetchWithTimeout ( url , URL_FETCH_TIMEOUT_MS , {
2026-03-12 16:13:00 -04:00
signal ,
2026-03-16 17:38:53 -04:00
headers : {
'User-Agent' : USER_AGENT ,
} ,
2025-10-16 14:16:24 -04:00
} ) ;
2026-03-16 17:38:53 -04:00
if ( ! res . ok ) {
const error = new Error (
` Request failed with status code ${ res . status } ${ res . statusText } ` ,
) ;
( error as ErrorWithStatus ) . status = res . status ;
throw error ;
}
return res ;
} ,
{
retryFetchErrors : this.context.config.getRetryFetchErrors ( ) ,
onRetry : ( attempt , error , delayMs ) = >
this . handleRetry ( attempt , error , delayMs ) ,
signal ,
} ,
) ;
2025-10-16 14:16:24 -04:00
2026-03-16 17:38:53 -04:00
const bodyBuffer = await this . readResponseWithLimit (
response ,
MAX_EXPERIMENTAL_FETCH_SIZE ,
) ;
const rawContent = bodyBuffer . toString ( 'utf8' ) ;
const contentType = response . headers . get ( 'content-type' ) || '' ;
let textContent : string ;
// Only use html-to-text if content type is HTML, or if no content type is provided (assume HTML)
if ( contentType . toLowerCase ( ) . includes ( 'text/html' ) || contentType === '' ) {
textContent = convert ( rawContent , {
wordwrap : false ,
selectors : [
{ selector : 'a' , options : { ignoreHref : true } } ,
{ selector : 'img' , format : 'skip' } ,
] ,
} ) ;
} else {
// For other content types (text/plain, application/json, etc.), use raw text
textContent = rawContent ;
2026-03-12 16:13:00 -04:00
}
2026-03-16 17:38:53 -04:00
2026-04-02 09:22:04 -07:00
if ( ! this . context . config . isContextManagementEnabled ( ) ) {
2026-03-30 15:29:59 -07:00
return truncateString (
textContent ,
MAX_CONTENT_LENGTH ,
TRUNCATION_WARNING ,
) ;
}
return textContent ;
2026-03-12 16:13:00 -04:00
}
private filterAndValidateUrls ( urls : string [ ] ) : {
toFetch : string [ ] ;
skipped : string [ ] ;
} {
const uniqueUrls = [ . . . new Set ( urls . map ( normalizeUrl ) ) ] ;
const toFetch : string [ ] = [ ] ;
const skipped : string [ ] = [ ] ;
for ( const url of uniqueUrls ) {
if ( this . isBlockedHost ( url ) ) {
debugLogger . warn (
` [WebFetchTool] Skipped private or local host: ${ url } ` ,
) ;
logWebFetchFallbackAttempt (
2026-03-12 18:56:31 -07:00
this . context . config ,
2026-03-12 16:13:00 -04:00
new WebFetchFallbackAttemptEvent ( 'private_ip_skipped' ) ,
) ;
skipped . push ( ` [Blocked Host] ${ url } ` ) ;
continue ;
}
if ( ! checkRateLimit ( url ) . allowed ) {
debugLogger . warn ( ` [WebFetchTool] Rate limit exceeded for host: ${ url } ` ) ;
skipped . push ( ` [Rate limit exceeded] ${ url } ` ) ;
continue ;
}
toFetch . push ( url ) ;
}
return { toFetch , skipped } ;
}
private async executeFallback (
urls : string [ ] ,
signal : AbortSignal ,
) : Promise < ToolResult > {
const uniqueUrls = [ . . . new Set ( urls ) ] ;
2026-03-16 17:38:53 -04:00
const successes : Array < { url : string ; content : string } > = [ ] ;
const errors : Array < { url : string ; message : string } > = [ ] ;
2026-03-12 16:13:00 -04:00
for ( const url of uniqueUrls ) {
2026-03-16 17:38:53 -04:00
try {
const content = await this . executeFallbackForUrl ( url , signal ) ;
successes . push ( { url , content } ) ;
} catch ( e ) {
errors . push ( { url , message : getErrorMessage ( e ) } ) ;
}
}
// Change 2: Short-circuit on total failure
if ( successes . length === 0 ) {
const errorMessage = ` All fallback fetch attempts failed: ${ errors
. map ( ( e ) = > ` ${ e . url } : ${ e . message } ` )
. join ( ', ' ) } ` ;
debugLogger . error ( ` [WebFetchTool] ${ errorMessage } ` ) ;
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_FALLBACK_FAILED ,
} ,
} ;
}
const finalContentsByUrl = new Map < string , string > ( ) ;
2026-04-02 09:22:04 -07:00
if ( this . context . config . isContextManagementEnabled ( ) ) {
2026-03-30 15:29:59 -07:00
successes . forEach ( ( success ) = >
finalContentsByUrl . set ( success . url , success . content ) ,
) ;
} else {
// Smart Budget Allocation (Water-filling algorithm) for successes
const sortedSuccesses = [ . . . successes ] . sort (
( a , b ) = > a . content . length - b . content . length ,
2026-02-23 11:50:14 -08:00
) ;
2026-03-30 15:29:59 -07:00
let remainingBudget = MAX_CONTENT_LENGTH ;
let remainingUrls = sortedSuccesses . length ;
for ( const success of sortedSuccesses ) {
const fairShare = Math . floor ( remainingBudget / remainingUrls ) ;
const allocated = Math . min ( success . content . length , fairShare ) ;
const truncated = truncateString (
success . content ,
allocated ,
TRUNCATION_WARNING ,
) ;
2026-03-16 17:38:53 -04:00
2026-03-30 15:29:59 -07:00
finalContentsByUrl . set ( success . url , truncated ) ;
remainingBudget -= truncated . length ;
remainingUrls -- ;
}
2026-03-12 16:13:00 -04:00
}
2025-06-13 17:44:14 -07:00
2026-03-16 17:38:53 -04:00
const aggregatedContent = uniqueUrls
. map ( ( url ) = > {
const content = finalContentsByUrl . get ( url ) ;
if ( content !== undefined ) {
return ` <source url=" ${ sanitizeXml ( url ) } "> \ n ${ sanitizeXml ( content ) } \ n</source> ` ;
}
const error = errors . find ( ( e ) = > e . url === url ) ;
return ` <source url=" ${ sanitizeXml ( url ) } "> \ nError: ${ sanitizeXml ( error ? . message || 'Unknown error' ) } \ n</source> ` ;
} )
. join ( '\n' ) ;
2026-03-12 16:13:00 -04:00
try {
2026-03-12 18:56:31 -07:00
const geminiClient = this . context . geminiClient ;
2026-03-16 17:38:53 -04:00
const fallbackPrompt = ` Follow the user's instructions below using the provided webpage content.
<user_instructions>
${ sanitizeXml ( this . params . prompt ? ? '' ) }
</user_instructions>
2025-06-13 17:44:14 -07:00
2026-03-12 16:13:00 -04:00
I was unable to access the URL(s) directly using the primary fetch tool. Instead, I have fetched the raw content of the page(s). Please use the following content to answer the request. Do not attempt to access the URL(s) again.
2025-06-13 17:44:14 -07:00
2026-03-16 17:38:53 -04:00
<content>
2026-03-12 16:13:00 -04:00
${ aggregatedContent }
2026-03-16 17:38:53 -04:00
</content>
2025-08-13 12:27:09 -07:00
` ;
2025-06-13 17:44:14 -07:00
const result = await geminiClient . generateContent (
2025-11-11 08:10:50 -08:00
{ model : 'web-fetch-fallback' } ,
2025-06-13 17:44:14 -07:00
[ { role : 'user' , parts : [ { text : fallbackPrompt } ] } ] ,
signal ,
2026-02-17 12:32:30 -05:00
LlmRole . UTILITY_TOOL ,
2025-06-13 17:44:14 -07:00
) ;
2026-03-12 16:13:00 -04:00
debugLogger . debug (
` [WebFetchTool] Fallback response for prompt " ${ this . params . prompt ? . substring (
0 ,
50 ,
) } ...": ` ,
JSON . stringify ( result , null , 2 ) ,
) ;
2025-06-13 17:44:14 -07:00
const resultText = getResponseText ( result ) || '' ;
2026-03-12 16:13:00 -04:00
debugLogger . debug (
` [WebFetchTool] Formatted fallback tool response for prompt " ${ this . params . prompt } ": \ n \ n ` ,
resultText ,
) ;
2025-06-13 17:44:14 -07:00
return {
llmContent : resultText ,
2026-03-12 16:13:00 -04:00
returnDisplay : ` Content for ${ urls . length } URL(s) processed using fallback fetch. ` ,
2025-06-13 17:44:14 -07:00
} ;
} catch ( e ) {
2026-03-12 16:13:00 -04:00
const errorMessage = ` Error during fallback processing: ${ getErrorMessage ( e ) } ` ;
debugLogger . error ( ` [WebFetchTool] Fallback failed: ${ errorMessage } ` ) ;
2025-06-13 17:44:14 -07:00
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
2025-08-21 14:40:18 -07:00
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_FALLBACK_FAILED ,
} ,
2025-06-13 17:44:14 -07:00
} ;
}
}
2025-08-13 12:27:09 -07:00
getDescription ( ) : string {
2026-02-23 11:50:14 -08:00
if ( this . params . url ) {
return ` Fetching content from: ${ this . params . url } ` ;
}
const prompt = this . params . prompt || '' ;
2025-05-29 15:02:31 -07:00
const displayPrompt =
2026-02-23 11:50:14 -08:00
prompt . length > 100 ? prompt . substring ( 0 , 97 ) + '...' : prompt ;
2025-05-29 15:02:31 -07:00
return ` Processing URLs and instructions from prompt: " ${ displayPrompt } " ` ;
2025-04-19 19:45:42 +01:00
}
2026-03-10 13:01:41 -04:00
override getPolicyUpdateOptions (
_outcome : ToolConfirmationOutcome ,
) : PolicyUpdateOptions | undefined {
2026-03-21 10:32:07 -07:00
return { } ;
2026-03-10 13:01:41 -04:00
}
2025-10-24 13:04:40 -07:00
protected override async getConfirmationDetails (
_abortSignal : AbortSignal ,
2025-10-15 09:39:41 -07:00
) : Promise < ToolCallConfirmationDetails | false > {
2026-02-23 11:50:14 -08:00
let urls : string [ ] = [ ] ;
let prompt = this . params . prompt || '' ;
if ( this . params . url ) {
urls = [ this . params . url ] ;
prompt = ` Fetch ${ this . params . url } ` ;
} else if ( this . params . prompt ) {
const { validUrls } = parsePrompt ( this . params . prompt ) ;
urls = validUrls ;
}
// Perform GitHub URL conversion here
urls = urls . map ( ( url ) = > convertGithubUrlToRaw ( url ) ) ;
2025-06-13 17:44:14 -07:00
const confirmationDetails : ToolCallConfirmationDetails = {
type : 'info' ,
title : ` Confirm Web Fetch ` ,
2026-02-23 11:50:14 -08:00
prompt ,
2025-06-13 17:44:14 -07:00
urls ,
2026-02-19 12:03:52 -08:00
onConfirm : async ( _outcome : ToolConfirmationOutcome ) = > {
// Mode transitions (e.g. AUTO_EDIT) and policy updates are now
// handled centrally by the scheduler.
2025-06-13 17:44:14 -07:00
} ,
} ;
return confirmationDetails ;
}
2026-02-23 11:50:14 -08:00
private async readResponseWithLimit (
response : Response ,
limit : number ,
) : Promise < Buffer > {
const contentLength = response . headers . get ( 'content-length' ) ;
if ( contentLength && parseInt ( contentLength , 10 ) > limit ) {
throw new Error ( ` Content exceeds size limit of ${ limit } bytes ` ) ;
}
if ( ! response . body ) {
return Buffer . alloc ( 0 ) ;
}
const reader = response . body . getReader ( ) ;
const chunks : Uint8Array [ ] = [ ] ;
let totalLength = 0 ;
try {
while ( true ) {
const { done , value } = await reader . read ( ) ;
if ( done ) break ;
totalLength += value . length ;
if ( totalLength > limit ) {
// Attempt to cancel the reader to stop the stream
await reader . cancel ( ) . catch ( ( ) = > { } ) ;
throw new Error ( ` Content exceeds size limit of ${ limit } bytes ` ) ;
}
chunks . push ( value ) ;
}
} finally {
reader . releaseLock ( ) ;
}
return Buffer . concat ( chunks ) ;
}
private async executeExperimental ( signal : AbortSignal ) : Promise < ToolResult > {
if ( ! this . params . url ) {
return {
llmContent : 'Error: No URL provided.' ,
returnDisplay : 'Error: No URL provided.' ,
error : {
message : 'No URL provided.' ,
type : ToolErrorType . INVALID_TOOL_PARAMS ,
} ,
} ;
}
let url : string ;
try {
url = new URL ( this . params . url ) . href ;
} catch {
return {
llmContent : ` Error: Invalid URL " ${ this . params . url } " ` ,
returnDisplay : ` Error: Invalid URL " ${ this . params . url } " ` ,
error : {
message : ` Invalid URL " ${ this . params . url } " ` ,
type : ToolErrorType . INVALID_TOOL_PARAMS ,
} ,
} ;
}
// Convert GitHub blob URL to raw URL
url = convertGithubUrlToRaw ( url ) ;
2026-03-12 16:13:00 -04:00
if ( this . isBlockedHost ( url ) ) {
const errorMessage = ` Access to blocked or private host ${ url } is not allowed. ` ;
debugLogger . warn (
` [WebFetchTool] Blocked experimental fetch to host: ${ url } ` ,
) ;
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_PROCESSING_ERROR ,
} ,
} ;
}
2026-02-23 11:50:14 -08:00
try {
const response = await retryWithBackoff (
async ( ) = > {
const res = await fetchWithTimeout ( url , URL_FETCH_TIMEOUT_MS , {
signal ,
headers : {
Accept :
'text/markdown, text/plain;q=0.9, application/json;q=0.9, text/html;q=0.8, application/pdf;q=0.7, video/*;q=0.7, */*;q=0.5' ,
'User-Agent' : USER_AGENT ,
} ,
} ) ;
return res ;
} ,
{
2026-03-12 18:56:31 -07:00
retryFetchErrors : this.context.config.getRetryFetchErrors ( ) ,
2026-03-10 23:33:50 -04:00
onRetry : ( attempt , error , delayMs ) = >
this . handleRetry ( attempt , error , delayMs ) ,
2026-03-12 16:13:00 -04:00
signal ,
2026-02-23 11:50:14 -08:00
} ,
) ;
const contentType = response . headers . get ( 'content-type' ) || '' ;
const status = response . status ;
const bodyBuffer = await this . readResponseWithLimit (
response ,
MAX_EXPERIMENTAL_FETCH_SIZE ,
) ;
if ( status >= 400 ) {
2026-03-30 15:29:59 -07:00
let rawResponseText = bodyBuffer . toString ( 'utf8' ) ;
2026-04-02 09:22:04 -07:00
if ( ! this . context . config . isContextManagementEnabled ( ) ) {
2026-03-30 15:29:59 -07:00
rawResponseText = truncateString (
rawResponseText ,
10000 ,
'\n\n... [Error response truncated] ...' ,
) ;
}
2026-02-23 11:50:14 -08:00
const headers : Record < string , string > = { } ;
response . headers . forEach ( ( value , key ) = > {
headers [ key ] = value ;
} ) ;
const errorContent = ` Request failed with status ${ status }
Headers: ${ JSON . stringify ( headers , null , 2 ) }
2026-03-30 15:29:59 -07:00
Response: ${ rawResponseText } ` ;
2026-03-12 16:13:00 -04:00
debugLogger . error (
` [WebFetchTool] Experimental fetch failed with status ${ status } for ${ url } ` ,
) ;
2026-02-23 11:50:14 -08:00
return {
llmContent : errorContent ,
returnDisplay : ` Failed to fetch ${ url } (Status: ${ status } ) ` ,
} ;
}
const lowContentType = contentType . toLowerCase ( ) ;
if (
lowContentType . includes ( 'text/markdown' ) ||
lowContentType . includes ( 'text/plain' ) ||
lowContentType . includes ( 'application/json' )
) {
2026-03-30 15:29:59 -07:00
let text = bodyBuffer . toString ( 'utf8' ) ;
2026-04-02 09:22:04 -07:00
if ( ! this . context . config . isContextManagementEnabled ( ) ) {
2026-03-30 15:29:59 -07:00
text = truncateString ( text , MAX_CONTENT_LENGTH , TRUNCATION_WARNING ) ;
}
2026-02-23 11:50:14 -08:00
return {
llmContent : text ,
returnDisplay : ` Fetched ${ contentType } content from ${ url } ` ,
} ;
}
if ( lowContentType . includes ( 'text/html' ) ) {
const html = bodyBuffer . toString ( 'utf8' ) ;
2026-03-30 15:29:59 -07:00
let textContent = convert ( html , {
wordwrap : false ,
selectors : [
{ selector : 'a' , options : { ignoreHref : false , baseUrl : url } } ,
] ,
} ) ;
2026-04-02 09:22:04 -07:00
if ( ! this . context . config . isContextManagementEnabled ( ) ) {
2026-03-30 15:29:59 -07:00
textContent = truncateString (
textContent ,
MAX_CONTENT_LENGTH ,
TRUNCATION_WARNING ,
) ;
}
2026-02-23 11:50:14 -08:00
return {
llmContent : textContent ,
returnDisplay : ` Fetched and converted HTML content from ${ url } ` ,
} ;
}
if (
lowContentType . startsWith ( 'image/' ) ||
lowContentType . startsWith ( 'video/' ) ||
lowContentType === 'application/pdf'
) {
const base64Data = bodyBuffer . toString ( 'base64' ) ;
return {
llmContent : {
inlineData : {
data : base64Data ,
mimeType : contentType.split ( ';' ) [ 0 ] ,
} ,
} ,
returnDisplay : ` Fetched ${ contentType } from ${ url } ` ,
} ;
}
// Fallback for unknown types - try as text
2026-03-30 15:29:59 -07:00
let text = bodyBuffer . toString ( 'utf8' ) ;
2026-04-02 09:22:04 -07:00
if ( ! this . context . config . isContextManagementEnabled ( ) ) {
2026-03-30 15:29:59 -07:00
text = truncateString ( text , MAX_CONTENT_LENGTH , TRUNCATION_WARNING ) ;
}
2026-02-23 11:50:14 -08:00
return {
llmContent : text ,
returnDisplay : ` Fetched ${ contentType || 'unknown' } content from ${ url } ` ,
} ;
} catch ( e ) {
const errorMessage = ` Error during experimental fetch for ${ url } : ${ getErrorMessage ( e ) } ` ;
2026-03-12 16:13:00 -04:00
debugLogger . error (
` [WebFetchTool] Experimental fetch error: ${ errorMessage } ` ,
) ;
2026-02-23 11:50:14 -08:00
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_FALLBACK_FAILED ,
} ,
} ;
}
}
2025-08-13 12:27:09 -07:00
async execute ( signal : AbortSignal ) : Promise < ToolResult > {
2026-03-12 18:56:31 -07:00
if ( this . context . config . getDirectWebFetch ( ) ) {
2026-02-23 11:50:14 -08:00
return this . executeExperimental ( signal ) ;
}
const userPrompt = this . params . prompt ! ;
2026-03-12 16:13:00 -04:00
const { validUrls } = parsePrompt ( userPrompt ) ;
const { toFetch , skipped } = this . filterAndValidateUrls ( validUrls ) ;
// If everything was skipped, fail early
if ( toFetch . length === 0 && skipped . length > 0 ) {
const errorMessage = ` All requested URLs were skipped: ${ skipped . join ( ', ' ) } ` ;
debugLogger . error ( ` [WebFetchTool] ${ errorMessage } ` ) ;
2026-02-20 11:18:07 -06:00
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_PROCESSING_ERROR ,
} ,
} ;
}
2025-04-19 19:45:42 +01:00
try {
2026-03-12 18:56:31 -07:00
const geminiClient = this . context . geminiClient ;
2026-03-16 17:38:53 -04:00
const sanitizedPrompt = ` Follow the user's instructions to process the authorized URLs.
<user_instructions>
${ sanitizeXml ( userPrompt ) }
</user_instructions>
<authorized_urls>
${ toFetch . join ( '\n' ) }
</authorized_urls>
` ;
2025-06-02 14:55:51 -07:00
const response = await geminiClient . generateContent (
2025-11-11 08:10:50 -08:00
{ model : 'web-fetch' } ,
2026-03-16 17:38:53 -04:00
[ { role : 'user' , parts : [ { text : sanitizedPrompt } ] } ] ,
2026-03-12 16:13:00 -04:00
signal ,
2026-02-17 12:32:30 -05:00
LlmRole . UTILITY_TOOL ,
2025-06-02 14:55:51 -07:00
) ;
2025-04-19 19:45:42 +01:00
2025-10-21 16:35:22 -04:00
debugLogger . debug (
2025-06-13 17:44:14 -07:00
` [WebFetchTool] Full response for prompt " ${ userPrompt . substring (
0 ,
50 ,
) } ...": ` ,
2025-05-29 15:02:31 -07:00
JSON . stringify ( response , null , 2 ) ,
) ;
let responseText = getResponseText ( response ) || '' ;
2025-06-30 04:06:03 +09:00
const groundingMetadata = response . candidates ? . [ 0 ] ? . groundingMetadata ;
2025-05-29 15:02:31 -07:00
2026-03-12 16:13:00 -04:00
// Simple primary success check: we need some text or grounding data
if ( ! responseText . trim ( ) && ! groundingMetadata ? . groundingChunks ? . length ) {
throw new Error ( 'Primary fetch returned no content' ) ;
2025-04-19 19:45:42 +01:00
}
2026-03-12 16:13:00 -04:00
// 1. Apply Grounding Supports (Citations)
const groundingSupports = groundingMetadata ? . groundingSupports ? . filter (
isGroundingSupportItem ,
) ;
if ( groundingSupports && groundingSupports . length > 0 ) {
const insertions : Array < { index : number ; marker : string } > = [ ] ;
groundingSupports . forEach ( ( support ) = > {
if ( support . segment && support . groundingChunkIndices ) {
const citationMarker = support . groundingChunkIndices
. map ( ( chunkIndex : number ) = > ` [ ${ chunkIndex + 1 } ] ` )
. join ( '' ) ;
insertions . push ( {
index : support.segment.endIndex ,
marker : citationMarker ,
} ) ;
}
} ) ;
2025-05-29 15:02:31 -07:00
2026-03-12 16:13:00 -04:00
insertions . sort ( ( a , b ) = > b . index - a . index ) ;
const responseChars = responseText . split ( '' ) ;
insertions . forEach ( ( insertion ) = > {
responseChars . splice ( insertion . index , 0 , insertion . marker ) ;
} ) ;
responseText = responseChars . join ( '' ) ;
2025-04-19 19:45:42 +01:00
}
2026-03-12 16:13:00 -04:00
// 2. Append Source List
const sources =
groundingMetadata ? . groundingChunks ? . filter ( isGroundingChunkItem ) ;
2025-05-29 15:02:31 -07:00
if ( sources && sources . length > 0 ) {
2026-03-12 16:13:00 -04:00
const sourceListFormatted : string [ ] = [ ] ;
sources . forEach ( ( source , index ) = > {
2025-05-29 15:02:31 -07:00
const title = source . web ? . title || 'Untitled' ;
2026-03-12 16:13:00 -04:00
const uri = source . web ? . uri || 'Unknown URI' ;
2025-05-29 15:02:31 -07:00
sourceListFormatted . push ( ` [ ${ index + 1 } ] ${ title } ( ${ uri } ) ` ) ;
} ) ;
2026-03-12 16:13:00 -04:00
responseText += ` \ n \ nSources: \ n ${ sourceListFormatted . join ( '\n' ) } ` ;
2025-05-29 15:02:31 -07:00
}
2026-03-12 16:13:00 -04:00
// 3. Prepend Warnings for skipped URLs
if ( skipped . length > 0 ) {
responseText = ` [Warning] The following URLs were skipped: \ n ${ skipped . join ( '\n' ) } \ n \ n ${ responseText } ` ;
}
2025-04-19 19:45:42 +01:00
2025-10-21 16:35:22 -04:00
debugLogger . debug (
2026-03-12 16:13:00 -04:00
` [WebFetchTool] Formatted tool response for prompt " ${ userPrompt } ": \ n \ n ` ,
responseText ,
2025-05-29 15:02:31 -07:00
) ;
2025-04-19 19:45:42 +01:00
return {
2026-03-12 16:13:00 -04:00
llmContent : responseText ,
2025-05-29 15:02:31 -07:00
returnDisplay : ` Content processed from prompt. ` ,
2025-04-19 19:45:42 +01:00
} ;
} catch ( error : unknown ) {
2026-03-12 16:13:00 -04:00
debugLogger . warn (
` [WebFetchTool] Primary fetch failed, falling back: ${ getErrorMessage ( error ) } ` ,
) ;
logWebFetchFallbackAttempt (
2026-03-12 18:56:31 -07:00
this . context . config ,
2026-03-12 16:13:00 -04:00
new WebFetchFallbackAttemptEvent ( 'primary_failed' ) ,
) ;
// Simple All-or-Nothing Fallback
return this . executeFallback ( toFetch , signal ) ;
2025-04-19 19:45:42 +01:00
}
}
}
2025-08-13 12:27:09 -07:00
/**
* Implementation of the WebFetch tool logic
*/
export class WebFetchTool extends BaseDeclarativeTool <
WebFetchToolParams ,
ToolResult
> {
2025-10-20 22:35:35 -04:00
static readonly Name = WEB_FETCH_TOOL_NAME ;
2025-10-15 09:39:41 -07:00
constructor (
2026-03-12 18:56:31 -07:00
private readonly context : AgentLoopContext ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-15 09:39:41 -07:00
) {
2025-08-13 12:27:09 -07:00
super (
2025-10-20 22:35:35 -04:00
WebFetchTool . Name ,
2026-03-23 18:49:51 -07:00
WEB_FETCH_DISPLAY_NAME ,
2026-02-13 23:55:02 -05:00
WEB_FETCH_DEFINITION . base . description ! ,
2025-08-13 12:27:09 -07:00
Kind . Fetch ,
2026-02-13 23:55:02 -05:00
WEB_FETCH_DEFINITION . base . parametersJsonSchema ,
2026-01-04 17:11:43 -05:00
messageBus ,
2025-10-15 09:39:41 -07:00
true , // isOutputMarkdown
false , // canUpdateOutput
2025-08-13 12:27:09 -07:00
) ;
}
2025-08-19 13:55:06 -07:00
protected override validateToolParamValues (
2025-08-13 16:17:38 -04:00
params : WebFetchToolParams ,
) : string | null {
2026-03-12 18:56:31 -07:00
if ( this . context . config . getDirectWebFetch ( ) ) {
2026-02-23 11:50:14 -08:00
if ( ! params . url ) {
return "The 'url' parameter is required." ;
}
try {
new URL ( params . url ) ;
} catch {
return ` Invalid URL: " ${ params . url } " ` ;
}
return null ;
}
2025-08-13 12:27:09 -07:00
if ( ! params . prompt || params . prompt . trim ( ) === '' ) {
return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions." ;
}
2025-10-14 16:53:22 -04:00
const { validUrls , errors } = parsePrompt ( params . prompt ) ;
if ( errors . length > 0 ) {
return ` Error(s) in prompt URLs: \ n- ${ errors . join ( '\n- ' ) } ` ;
}
if ( validUrls . length === 0 ) {
2025-08-13 12:27:09 -07:00
return "The 'prompt' must contain at least one valid URL (starting with http:// or https://)." ;
}
2025-10-14 16:53:22 -04:00
2025-08-13 12:27:09 -07:00
return null ;
}
protected createInvocation (
params : WebFetchToolParams ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-21 11:45:33 -07:00
_toolName? : string ,
_toolDisplayName? : string ,
2025-08-13 12:27:09 -07:00
) : ToolInvocation < WebFetchToolParams , ToolResult > {
2025-10-21 11:45:33 -07:00
return new WebFetchToolInvocation (
2026-03-16 17:38:53 -04:00
this . context ,
2025-10-21 11:45:33 -07:00
params ,
2026-01-04 17:11:43 -05:00
messageBus ,
2025-10-21 11:45:33 -07:00
_toolName ,
_toolDisplayName ,
) ;
2025-08-13 12:27:09 -07:00
}
2026-02-13 23:55:02 -05:00
override getSchema ( modelId? : string ) {
2026-02-23 11:50:14 -08:00
const schema = resolveToolDeclaration ( WEB_FETCH_DEFINITION , modelId ) ;
2026-03-12 18:56:31 -07:00
if ( this . context . config . getDirectWebFetch ( ) ) {
2026-02-23 11:50:14 -08:00
return {
. . . schema ,
description :
'Fetch content from a URL directly. Send multiple requests for this tool if multiple URL fetches are needed.' ,
parametersJsonSchema : {
type : 'object' ,
properties : {
url : {
type : 'string' ,
description :
'The URL to fetch. Must be a valid http or https URL.' ,
} ,
} ,
required : [ 'url' ] ,
} ,
} ;
}
return schema ;
2026-02-13 23:55:02 -05:00
}
2025-08-13 12:27:09 -07:00
}