2025-04-19 19:45:42 +01:00
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
2026-03-04 05:42:59 +05:30
import {
BaseDeclarativeTool ,
BaseToolInvocation ,
Kind ,
type ToolCallConfirmationDetails ,
type ToolInvocation ,
type ToolResult ,
type ToolConfirmationOutcome ,
2025-08-26 00:04:53 +02:00
} from './tools.js' ;
2025-10-15 09:39:41 -07:00
import type { MessageBus } from '../confirmation-bus/message-bus.js' ;
2025-08-21 14:40:18 -07:00
import { ToolErrorType } from './tool-error.js' ;
2025-04-19 19:45:42 +01:00
import { getErrorMessage } from '../utils/errors.js' ;
2025-08-26 00:04:53 +02:00
import type { Config } from '../config/config.js' ;
2025-11-03 15:41:00 -08:00
import { ApprovalMode } from '../policy/types.js' ;
2025-08-27 23:22:21 -04:00
import { getResponseText } from '../utils/partUtils.js' ;
2025-11-03 10:13:52 -08:00
import { fetchWithTimeout , isPrivateIp } from '../utils/fetch.js' ;
2026-02-23 11:50:14 -08:00
import { truncateString } from '../utils/textUtils.js' ;
2025-06-13 17:44:14 -07:00
import { convert } from 'html-to-text' ;
2025-10-09 13:01:17 -04:00
import {
logWebFetchFallbackAttempt ,
WebFetchFallbackAttemptEvent ,
} from '../telemetry/index.js' ;
2026-02-17 12:32:30 -05:00
import { LlmRole } from '../telemetry/llmRole.js' ;
2025-10-15 22:48:12 -04:00
import { WEB_FETCH_TOOL_NAME } from './tool-names.js' ;
2025-10-21 16:35:22 -04:00
import { debugLogger } from '../utils/debugLogger.js' ;
2025-12-05 09:49:08 -08:00
import { retryWithBackoff } from '../utils/retry.js' ;
2026-02-13 23:55:02 -05:00
import { WEB_FETCH_DEFINITION } from './definitions/coreTools.js' ;
import { resolveToolDeclaration } from './definitions/resolver.js' ;
2026-02-20 11:18:07 -06:00
import { LRUCache } from 'mnemonist' ;
2025-06-13 17:44:14 -07:00
const URL_FETCH_TIMEOUT_MS = 10000 ;
const MAX_CONTENT_LENGTH = 100000 ;
2026-02-23 11:50:14 -08:00
const MAX_EXPERIMENTAL_FETCH_SIZE = 10 * 1024 * 1024 ; // 10MB
const USER_AGENT =
'Mozilla/5.0 (compatible; Google-Gemini-CLI/1.0; +https://github.com/google-gemini/gemini-cli)' ;
const TRUNCATION_WARNING = '\n\n... [Content truncated due to size limit] ...' ;
2025-06-13 17:44:14 -07:00
2026-02-20 11:18:07 -06:00
// Rate limiting configuration
const RATE_LIMIT_WINDOW_MS = 60000 ; // 1 minute
const MAX_REQUESTS_PER_WINDOW = 10 ;
const hostRequestHistory = new LRUCache < string , number [ ] > ( 1000 ) ;
function checkRateLimit ( url : string ) : {
allowed : boolean ;
waitTimeMs? : number ;
} {
try {
const hostname = new URL ( url ) . hostname ;
const now = Date . now ( ) ;
const windowStart = now - RATE_LIMIT_WINDOW_MS ;
let history = hostRequestHistory . get ( hostname ) || [ ] ;
// Clean up old timestamps
history = history . filter ( ( timestamp ) = > timestamp > windowStart ) ;
if ( history . length >= MAX_REQUESTS_PER_WINDOW ) {
// Calculate wait time based on the oldest timestamp in the current window
const oldestTimestamp = history [ 0 ] ;
const waitTimeMs = oldestTimestamp + RATE_LIMIT_WINDOW_MS - now ;
hostRequestHistory . set ( hostname , history ) ; // Update cleaned history
return { allowed : false , waitTimeMs : Math.max ( 0 , waitTimeMs ) } ;
}
history . push ( now ) ;
hostRequestHistory . set ( hostname , history ) ;
return { allowed : true } ;
} catch ( _e ) {
// If URL parsing fails, we fallback to allowed (should be caught by parsePrompt anyway)
return { allowed : true } ;
}
}
2025-10-14 16:53:22 -04:00
/**
* Parses a prompt to extract valid URLs and identify malformed ones.
*/
export function parsePrompt ( text : string ) : {
validUrls : string [ ] ;
errors : string [ ] ;
} {
const tokens = text . split ( /\s+/ ) ;
const validUrls : string [ ] = [ ] ;
const errors : string [ ] = [ ] ;
for ( const token of tokens ) {
if ( ! token ) continue ;
// Heuristic to check if the url appears to contain URL-like chars.
if ( token . includes ( '://' ) ) {
try {
// Validate with new URL()
const url = new URL ( token ) ;
// Allowlist protocols
if ( [ 'http:' , 'https:' ] . includes ( url . protocol ) ) {
validUrls . push ( url . href ) ;
} else {
errors . push (
` Unsupported protocol in URL: " ${ token } ". Only http and https are supported. ` ,
) ;
}
} catch ( _ ) {
// new URL() threw, so it's malformed according to WHATWG standard
errors . push ( ` Malformed URL detected: " ${ token } ". ` ) ;
}
}
}
return { validUrls , errors } ;
2025-06-13 17:44:14 -07:00
}
2025-05-29 15:02:31 -07:00
2026-02-23 11:50:14 -08:00
/**
* Safely converts a GitHub blob URL to a raw content URL.
*/
export function convertGithubUrlToRaw ( urlStr : string ) : string {
try {
const url = new URL ( urlStr ) ;
if ( url . hostname === 'github.com' && url . pathname . includes ( '/blob/' ) ) {
url . hostname = 'raw.githubusercontent.com' ;
url . pathname = url . pathname . replace ( /^\/([^/]+\/[^/]+)\/blob\// , '/$1/' ) ;
return url . href ;
}
} catch {
// Ignore invalid URLs
}
return urlStr ;
}
2025-05-29 15:02:31 -07:00
// Interfaces for grounding metadata (similar to web-search.ts)
interface GroundingChunkWeb {
uri? : string ;
title? : string ;
}
interface GroundingChunkItem {
web? : GroundingChunkWeb ;
}
interface GroundingSupportSegment {
startIndex : number ;
endIndex : number ;
text? : string ;
}
interface GroundingSupportItem {
segment? : GroundingSupportSegment ;
groundingChunkIndices? : number [ ] ;
}
2025-04-19 19:45:42 +01:00
/**
* Parameters for the WebFetch tool
*/
export interface WebFetchToolParams {
/**
2025-05-29 15:02:31 -07:00
* The prompt containing URL(s) (up to 20) and instructions for processing their content.
2025-04-19 19:45:42 +01:00
*/
2026-02-23 11:50:14 -08:00
prompt? : string ;
/**
* Direct URL to fetch (experimental mode).
*/
url? : string ;
2025-04-19 19:45:42 +01:00
}
2025-12-05 09:49:08 -08:00
interface ErrorWithStatus extends Error {
status? : number ;
}
2025-08-13 12:27:09 -07:00
class WebFetchToolInvocation extends BaseToolInvocation <
WebFetchToolParams ,
ToolResult
> {
constructor (
private readonly config : Config ,
params : WebFetchToolParams ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-21 11:45:33 -07:00
_toolName? : string ,
_toolDisplayName? : string ,
2025-08-13 12:27:09 -07:00
) {
2025-10-21 11:45:33 -07:00
super ( params , messageBus , _toolName , _toolDisplayName ) ;
2025-04-19 19:45:42 +01:00
}
2025-08-13 12:27:09 -07:00
private async executeFallback ( signal : AbortSignal ) : Promise < ToolResult > {
2026-02-23 11:50:14 -08:00
const { validUrls : urls } = parsePrompt ( this . params . prompt ! ) ;
2025-06-13 17:44:14 -07:00
// For now, we only support one URL for fallback
let url = urls [ 0 ] ;
// Convert GitHub blob URL to raw URL
2026-02-23 11:50:14 -08:00
url = convertGithubUrlToRaw ( url ) ;
2025-06-13 17:44:14 -07:00
try {
2025-12-05 09:49:08 -08:00
const response = await retryWithBackoff (
async ( ) = > {
2026-02-23 11:50:14 -08:00
const res = await fetchWithTimeout ( url , URL_FETCH_TIMEOUT_MS , {
signal ,
headers : {
'User-Agent' : USER_AGENT ,
} ,
} ) ;
2025-12-05 09:49:08 -08:00
if ( ! res . ok ) {
const error = new Error (
` Request failed with status code ${ res . status } ${ res . statusText } ` ,
) ;
( error as ErrorWithStatus ) . status = res . status ;
throw error ;
}
return res ;
} ,
{
retryFetchErrors : this.config.getRetryFetchErrors ( ) ,
} ,
) ;
2025-10-16 14:16:24 -04:00
2026-02-23 11:50:14 -08:00
const bodyBuffer = await this . readResponseWithLimit (
response ,
MAX_EXPERIMENTAL_FETCH_SIZE ,
) ;
const rawContent = bodyBuffer . toString ( 'utf8' ) ;
2025-10-16 14:16:24 -04:00
const contentType = response . headers . get ( 'content-type' ) || '' ;
let textContent : string ;
// Only use html-to-text if content type is HTML, or if no content type is provided (assume HTML)
if (
contentType . toLowerCase ( ) . includes ( 'text/html' ) ||
contentType === ''
) {
textContent = convert ( rawContent , {
wordwrap : false ,
selectors : [
{ selector : 'a' , options : { ignoreHref : true } } ,
{ selector : 'img' , format : 'skip' } ,
] ,
} ) ;
} else {
// For other content types (text/plain, application/json, etc.), use raw text
textContent = rawContent ;
}
2026-02-23 11:50:14 -08:00
textContent = truncateString (
textContent ,
MAX_CONTENT_LENGTH ,
TRUNCATION_WARNING ,
) ;
2025-06-13 17:44:14 -07:00
const geminiClient = this . config . getGeminiClient ( ) ;
2025-08-13 12:27:09 -07:00
const fallbackPrompt = ` The user requested the following: " ${ this . params . prompt } ".
2025-06-13 17:44:14 -07:00
2025-08-13 12:27:09 -07:00
I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the request. Do not attempt to access the URL again.
2025-06-13 17:44:14 -07:00
---
${ textContent }
2025-08-13 12:27:09 -07:00
---
` ;
2025-06-13 17:44:14 -07:00
const result = await geminiClient . generateContent (
2025-11-11 08:10:50 -08:00
{ model : 'web-fetch-fallback' } ,
2025-06-13 17:44:14 -07:00
[ { role : 'user' , parts : [ { text : fallbackPrompt } ] } ] ,
signal ,
2026-02-17 12:32:30 -05:00
LlmRole . UTILITY_TOOL ,
2025-06-13 17:44:14 -07:00
) ;
const resultText = getResponseText ( result ) || '' ;
return {
llmContent : resultText ,
returnDisplay : ` Content for ${ url } processed using fallback fetch. ` ,
} ;
} catch ( e ) {
2026-02-10 00:10:15 +00:00
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
2025-06-13 17:44:14 -07:00
const error = e as Error ;
const errorMessage = ` Error during fallback fetch for ${ url } : ${ error . message } ` ;
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
2025-08-21 14:40:18 -07:00
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_FALLBACK_FAILED ,
} ,
2025-06-13 17:44:14 -07:00
} ;
}
}
2025-08-13 12:27:09 -07:00
getDescription ( ) : string {
2026-02-23 11:50:14 -08:00
if ( this . params . url ) {
return ` Fetching content from: ${ this . params . url } ` ;
}
const prompt = this . params . prompt || '' ;
2025-05-29 15:02:31 -07:00
const displayPrompt =
2026-02-23 11:50:14 -08:00
prompt . length > 100 ? prompt . substring ( 0 , 97 ) + '...' : prompt ;
2025-05-29 15:02:31 -07:00
return ` Processing URLs and instructions from prompt: " ${ displayPrompt } " ` ;
2025-04-19 19:45:42 +01:00
}
2025-10-24 13:04:40 -07:00
protected override async getConfirmationDetails (
_abortSignal : AbortSignal ,
2025-10-15 09:39:41 -07:00
) : Promise < ToolCallConfirmationDetails | false > {
2026-01-04 17:11:43 -05:00
// Check for AUTO_EDIT approval mode. This tool has a specific behavior
// where ProceedAlways switches the entire session to AUTO_EDIT.
2025-06-13 17:44:14 -07:00
if ( this . config . getApprovalMode ( ) === ApprovalMode . AUTO_EDIT ) {
return false ;
}
2026-02-23 11:50:14 -08:00
let urls : string [ ] = [ ] ;
let prompt = this . params . prompt || '' ;
if ( this . params . url ) {
urls = [ this . params . url ] ;
prompt = ` Fetch ${ this . params . url } ` ;
} else if ( this . params . prompt ) {
const { validUrls } = parsePrompt ( this . params . prompt ) ;
urls = validUrls ;
}
// Perform GitHub URL conversion here
urls = urls . map ( ( url ) = > convertGithubUrlToRaw ( url ) ) ;
2025-06-13 17:44:14 -07:00
const confirmationDetails : ToolCallConfirmationDetails = {
type : 'info' ,
title : ` Confirm Web Fetch ` ,
2026-02-23 11:50:14 -08:00
prompt ,
2025-06-13 17:44:14 -07:00
urls ,
2026-02-19 12:03:52 -08:00
onConfirm : async ( _outcome : ToolConfirmationOutcome ) = > {
// Mode transitions (e.g. AUTO_EDIT) and policy updates are now
// handled centrally by the scheduler.
2025-06-13 17:44:14 -07:00
} ,
} ;
return confirmationDetails ;
}
2026-02-23 11:50:14 -08:00
private async readResponseWithLimit (
response : Response ,
limit : number ,
) : Promise < Buffer > {
const contentLength = response . headers . get ( 'content-length' ) ;
if ( contentLength && parseInt ( contentLength , 10 ) > limit ) {
throw new Error ( ` Content exceeds size limit of ${ limit } bytes ` ) ;
}
if ( ! response . body ) {
return Buffer . alloc ( 0 ) ;
}
const reader = response . body . getReader ( ) ;
const chunks : Uint8Array [ ] = [ ] ;
let totalLength = 0 ;
try {
while ( true ) {
const { done , value } = await reader . read ( ) ;
if ( done ) break ;
totalLength += value . length ;
if ( totalLength > limit ) {
// Attempt to cancel the reader to stop the stream
await reader . cancel ( ) . catch ( ( ) = > { } ) ;
throw new Error ( ` Content exceeds size limit of ${ limit } bytes ` ) ;
}
chunks . push ( value ) ;
}
} finally {
reader . releaseLock ( ) ;
}
return Buffer . concat ( chunks ) ;
}
private async executeExperimental ( signal : AbortSignal ) : Promise < ToolResult > {
if ( ! this . params . url ) {
return {
llmContent : 'Error: No URL provided.' ,
returnDisplay : 'Error: No URL provided.' ,
error : {
message : 'No URL provided.' ,
type : ToolErrorType . INVALID_TOOL_PARAMS ,
} ,
} ;
}
let url : string ;
try {
url = new URL ( this . params . url ) . href ;
} catch {
return {
llmContent : ` Error: Invalid URL " ${ this . params . url } " ` ,
returnDisplay : ` Error: Invalid URL " ${ this . params . url } " ` ,
error : {
message : ` Invalid URL " ${ this . params . url } " ` ,
type : ToolErrorType . INVALID_TOOL_PARAMS ,
} ,
} ;
}
// Convert GitHub blob URL to raw URL
url = convertGithubUrlToRaw ( url ) ;
try {
const response = await retryWithBackoff (
async ( ) = > {
const res = await fetchWithTimeout ( url , URL_FETCH_TIMEOUT_MS , {
signal ,
headers : {
Accept :
'text/markdown, text/plain;q=0.9, application/json;q=0.9, text/html;q=0.8, application/pdf;q=0.7, video/*;q=0.7, */*;q=0.5' ,
'User-Agent' : USER_AGENT ,
} ,
} ) ;
return res ;
} ,
{
retryFetchErrors : this.config.getRetryFetchErrors ( ) ,
} ,
) ;
const contentType = response . headers . get ( 'content-type' ) || '' ;
const status = response . status ;
const bodyBuffer = await this . readResponseWithLimit (
response ,
MAX_EXPERIMENTAL_FETCH_SIZE ,
) ;
if ( status >= 400 ) {
const rawResponseText = bodyBuffer . toString ( 'utf8' ) ;
const headers : Record < string , string > = { } ;
response . headers . forEach ( ( value , key ) = > {
headers [ key ] = value ;
} ) ;
const errorContent = ` Request failed with status ${ status }
Headers: ${ JSON . stringify ( headers , null , 2 ) }
Response: ${ truncateString ( rawResponseText , 10000 , '\n\n... [Error response truncated] ...' ) } ` ;
return {
llmContent : errorContent ,
returnDisplay : ` Failed to fetch ${ url } (Status: ${ status } ) ` ,
} ;
}
const lowContentType = contentType . toLowerCase ( ) ;
if (
lowContentType . includes ( 'text/markdown' ) ||
lowContentType . includes ( 'text/plain' ) ||
lowContentType . includes ( 'application/json' )
) {
const text = truncateString (
bodyBuffer . toString ( 'utf8' ) ,
MAX_CONTENT_LENGTH ,
TRUNCATION_WARNING ,
) ;
return {
llmContent : text ,
returnDisplay : ` Fetched ${ contentType } content from ${ url } ` ,
} ;
}
if ( lowContentType . includes ( 'text/html' ) ) {
const html = bodyBuffer . toString ( 'utf8' ) ;
const textContent = truncateString (
convert ( html , {
wordwrap : false ,
selectors : [
{ selector : 'a' , options : { ignoreHref : false , baseUrl : url } } ,
] ,
} ) ,
MAX_CONTENT_LENGTH ,
TRUNCATION_WARNING ,
) ;
return {
llmContent : textContent ,
returnDisplay : ` Fetched and converted HTML content from ${ url } ` ,
} ;
}
if (
lowContentType . startsWith ( 'image/' ) ||
lowContentType . startsWith ( 'video/' ) ||
lowContentType === 'application/pdf'
) {
const base64Data = bodyBuffer . toString ( 'base64' ) ;
return {
llmContent : {
inlineData : {
data : base64Data ,
mimeType : contentType.split ( ';' ) [ 0 ] ,
} ,
} ,
returnDisplay : ` Fetched ${ contentType } from ${ url } ` ,
} ;
}
// Fallback for unknown types - try as text
const text = truncateString (
bodyBuffer . toString ( 'utf8' ) ,
MAX_CONTENT_LENGTH ,
TRUNCATION_WARNING ,
) ;
return {
llmContent : text ,
returnDisplay : ` Fetched ${ contentType || 'unknown' } content from ${ url } ` ,
} ;
} catch ( e ) {
const errorMessage = ` Error during experimental fetch for ${ url } : ${ getErrorMessage ( e ) } ` ;
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_FALLBACK_FAILED ,
} ,
} ;
}
}
2025-08-13 12:27:09 -07:00
async execute ( signal : AbortSignal ) : Promise < ToolResult > {
2026-02-23 11:50:14 -08:00
if ( this . config . getDirectWebFetch ( ) ) {
return this . executeExperimental ( signal ) ;
}
const userPrompt = this . params . prompt ! ;
2025-10-14 16:53:22 -04:00
const { validUrls : urls } = parsePrompt ( userPrompt ) ;
2025-06-13 17:44:14 -07:00
const url = urls [ 0 ] ;
2026-02-20 11:18:07 -06:00
// Enforce rate limiting
const rateLimitResult = checkRateLimit ( url ) ;
if ( ! rateLimitResult . allowed ) {
const waitTimeSecs = Math . ceil ( ( rateLimitResult . waitTimeMs || 0 ) / 1000 ) ;
const errorMessage = ` Rate limit exceeded for host. Please wait ${ waitTimeSecs } seconds before trying again. ` ;
debugLogger . warn ( ` [WebFetchTool] Rate limit exceeded for ${ url } ` ) ;
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_PROCESSING_ERROR ,
} ,
} ;
}
2025-06-13 17:44:14 -07:00
const isPrivate = isPrivateIp ( url ) ;
if ( isPrivate ) {
2025-10-09 13:01:17 -04:00
logWebFetchFallbackAttempt (
this . config ,
new WebFetchFallbackAttemptEvent ( 'private_ip' ) ,
) ;
2025-08-13 12:27:09 -07:00
return this . executeFallback ( signal ) ;
2025-06-13 17:44:14 -07:00
}
2025-06-02 14:55:51 -07:00
const geminiClient = this . config . getGeminiClient ( ) ;
2025-04-19 19:45:42 +01:00
try {
2025-06-02 14:55:51 -07:00
const response = await geminiClient . generateContent (
2025-11-11 08:10:50 -08:00
{ model : 'web-fetch' } ,
2025-06-02 14:55:51 -07:00
[ { role : 'user' , parts : [ { text : userPrompt } ] } ] ,
signal , // Pass signal
2026-02-17 12:32:30 -05:00
LlmRole . UTILITY_TOOL ,
2025-06-02 14:55:51 -07:00
) ;
2025-04-19 19:45:42 +01:00
2025-10-21 16:35:22 -04:00
debugLogger . debug (
2025-06-13 17:44:14 -07:00
` [WebFetchTool] Full response for prompt " ${ userPrompt . substring (
0 ,
50 ,
) } ...": ` ,
2025-05-29 15:02:31 -07:00
JSON . stringify ( response , null , 2 ) ,
) ;
let responseText = getResponseText ( response ) || '' ;
const urlContextMeta = response . candidates ? . [ 0 ] ? . urlContextMetadata ;
2025-06-30 04:06:03 +09:00
const groundingMetadata = response . candidates ? . [ 0 ] ? . groundingMetadata ;
2025-05-29 15:02:31 -07:00
const sources = groundingMetadata ? . groundingChunks as
| GroundingChunkItem [ ]
| undefined ;
2026-02-10 00:10:15 +00:00
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
2025-05-29 15:02:31 -07:00
const groundingSupports = groundingMetadata ? . groundingSupports as
| GroundingSupportItem [ ]
| undefined ;
// Error Handling
let processingError = false ;
if (
urlContextMeta ? . urlMetadata &&
urlContextMeta . urlMetadata . length > 0
) {
const allStatuses = urlContextMeta . urlMetadata . map (
( m ) = > m . urlRetrievalStatus ,
) ;
if ( allStatuses . every ( ( s ) = > s !== 'URL_RETRIEVAL_STATUS_SUCCESS' ) ) {
processingError = true ;
}
} else if ( ! responseText . trim ( ) && ! sources ? . length ) {
// No URL metadata and no content/sources
processingError = true ;
2025-04-19 19:45:42 +01:00
}
if (
2025-05-29 15:02:31 -07:00
! processingError &&
! responseText . trim ( ) &&
( ! sources || sources . length === 0 )
2025-04-19 19:45:42 +01:00
) {
2025-05-29 15:02:31 -07:00
// Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
processingError = true ;
}
if ( processingError ) {
2025-10-09 13:01:17 -04:00
logWebFetchFallbackAttempt (
this . config ,
new WebFetchFallbackAttemptEvent ( 'primary_failed' ) ,
) ;
2025-12-02 07:11:40 +09:00
return await this . executeFallback ( signal ) ;
2025-04-19 19:45:42 +01:00
}
2025-05-29 15:02:31 -07:00
const sourceListFormatted : string [ ] = [ ] ;
if ( sources && sources . length > 0 ) {
sources . forEach ( ( source : GroundingChunkItem , index : number ) = > {
const title = source . web ? . title || 'Untitled' ;
const uri = source . web ? . uri || 'Unknown URI' ; // Fallback if URI is missing
sourceListFormatted . push ( ` [ ${ index + 1 } ] ${ title } ( ${ uri } ) ` ) ;
} ) ;
if ( groundingSupports && groundingSupports . length > 0 ) {
const insertions : Array < { index : number ; marker : string } > = [ ] ;
groundingSupports . forEach ( ( support : GroundingSupportItem ) = > {
if ( support . segment && support . groundingChunkIndices ) {
const citationMarker = support . groundingChunkIndices
. map ( ( chunkIndex : number ) = > ` [ ${ chunkIndex + 1 } ] ` )
. join ( '' ) ;
insertions . push ( {
index : support.segment.endIndex ,
marker : citationMarker ,
} ) ;
}
} ) ;
insertions . sort ( ( a , b ) = > b . index - a . index ) ;
const responseChars = responseText . split ( '' ) ;
insertions . forEach ( ( insertion ) = > {
responseChars . splice ( insertion . index , 0 , insertion . marker ) ;
} ) ;
responseText = responseChars . join ( '' ) ;
}
if ( sourceListFormatted . length > 0 ) {
responseText += `
Sources:
${ sourceListFormatted . join ( '\n' ) } ` ;
}
}
const llmContent = responseText ;
2025-04-19 19:45:42 +01:00
2025-10-21 16:35:22 -04:00
debugLogger . debug (
2025-05-29 15:02:31 -07:00
` [WebFetchTool] Formatted tool response for prompt " ${ userPrompt } : \ n \ n": ` ,
llmContent ,
) ;
2025-04-19 19:45:42 +01:00
return {
llmContent ,
2025-05-29 15:02:31 -07:00
returnDisplay : ` Content processed from prompt. ` ,
2025-04-19 19:45:42 +01:00
} ;
} catch ( error : unknown ) {
2025-06-13 17:44:14 -07:00
const errorMessage = ` Error processing web content for prompt " ${ userPrompt . substring (
0 ,
50 ,
) } ...": ${ getErrorMessage ( error ) } ` ;
2025-04-19 19:45:42 +01:00
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
2025-08-21 14:40:18 -07:00
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_PROCESSING_ERROR ,
} ,
2025-04-19 19:45:42 +01:00
} ;
}
}
}
2025-08-13 12:27:09 -07:00
/**
* Implementation of the WebFetch tool logic
*/
export class WebFetchTool extends BaseDeclarativeTool <
WebFetchToolParams ,
ToolResult
> {
2025-10-20 22:35:35 -04:00
static readonly Name = WEB_FETCH_TOOL_NAME ;
2025-10-15 09:39:41 -07:00
constructor (
private readonly config : Config ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-15 09:39:41 -07:00
) {
2025-08-13 12:27:09 -07:00
super (
2025-10-20 22:35:35 -04:00
WebFetchTool . Name ,
2025-08-13 12:27:09 -07:00
'WebFetch' ,
2026-02-13 23:55:02 -05:00
WEB_FETCH_DEFINITION . base . description ! ,
2025-08-13 12:27:09 -07:00
Kind . Fetch ,
2026-02-13 23:55:02 -05:00
WEB_FETCH_DEFINITION . base . parametersJsonSchema ,
2026-01-04 17:11:43 -05:00
messageBus ,
2025-10-15 09:39:41 -07:00
true , // isOutputMarkdown
false , // canUpdateOutput
2025-08-13 12:27:09 -07:00
) ;
}
2025-08-19 13:55:06 -07:00
protected override validateToolParamValues (
2025-08-13 16:17:38 -04:00
params : WebFetchToolParams ,
) : string | null {
2026-02-23 11:50:14 -08:00
if ( this . config . getDirectWebFetch ( ) ) {
if ( ! params . url ) {
return "The 'url' parameter is required." ;
}
try {
new URL ( params . url ) ;
} catch {
return ` Invalid URL: " ${ params . url } " ` ;
}
return null ;
}
2025-08-13 12:27:09 -07:00
if ( ! params . prompt || params . prompt . trim ( ) === '' ) {
return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions." ;
}
2025-10-14 16:53:22 -04:00
const { validUrls , errors } = parsePrompt ( params . prompt ) ;
if ( errors . length > 0 ) {
return ` Error(s) in prompt URLs: \ n- ${ errors . join ( '\n- ' ) } ` ;
}
if ( validUrls . length === 0 ) {
2025-08-13 12:27:09 -07:00
return "The 'prompt' must contain at least one valid URL (starting with http:// or https://)." ;
}
2025-10-14 16:53:22 -04:00
2025-08-13 12:27:09 -07:00
return null ;
}
protected createInvocation (
params : WebFetchToolParams ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-21 11:45:33 -07:00
_toolName? : string ,
_toolDisplayName? : string ,
2025-08-13 12:27:09 -07:00
) : ToolInvocation < WebFetchToolParams , ToolResult > {
2025-10-21 11:45:33 -07:00
return new WebFetchToolInvocation (
this . config ,
params ,
2026-01-04 17:11:43 -05:00
messageBus ,
2025-10-21 11:45:33 -07:00
_toolName ,
_toolDisplayName ,
) ;
2025-08-13 12:27:09 -07:00
}
2026-02-13 23:55:02 -05:00
override getSchema ( modelId? : string ) {
2026-02-23 11:50:14 -08:00
const schema = resolveToolDeclaration ( WEB_FETCH_DEFINITION , modelId ) ;
if ( this . config . getDirectWebFetch ( ) ) {
return {
. . . schema ,
description :
'Fetch content from a URL directly. Send multiple requests for this tool if multiple URL fetches are needed.' ,
parametersJsonSchema : {
type : 'object' ,
properties : {
url : {
type : 'string' ,
description :
'The URL to fetch. Must be a valid http or https URL.' ,
} ,
} ,
required : [ 'url' ] ,
} ,
} ;
}
return schema ;
2026-02-13 23:55:02 -05:00
}
2025-08-13 12:27:09 -07:00
}