2025-04-19 19:45:42 +01:00
/ * *
* @license
* Copyright 2025 Google LLC
* SPDX - License - Identifier : Apache - 2.0
* /
2025-08-26 00:04:53 +02:00
import type {
ToolCallConfirmationDetails ,
ToolInvocation ,
ToolResult ,
} from './tools.js' ;
2025-06-13 17:44:14 -07:00
import {
2025-08-13 12:27:09 -07:00
BaseDeclarativeTool ,
BaseToolInvocation ,
Kind ,
2025-06-13 17:44:14 -07:00
ToolConfirmationOutcome ,
} from './tools.js' ;
2025-10-15 09:39:41 -07:00
import type { MessageBus } from '../confirmation-bus/message-bus.js' ;
2025-08-21 14:40:18 -07:00
import { ToolErrorType } from './tool-error.js' ;
2025-04-19 19:45:42 +01:00
import { getErrorMessage } from '../utils/errors.js' ;
2025-08-26 00:04:53 +02:00
import type { Config } from '../config/config.js' ;
2025-11-03 15:41:00 -08:00
import { ApprovalMode } from '../policy/types.js' ;
2025-08-27 23:22:21 -04:00
import { getResponseText } from '../utils/partUtils.js' ;
2025-11-03 10:13:52 -08:00
import { fetchWithTimeout , isPrivateIp } from '../utils/fetch.js' ;
2025-06-13 17:44:14 -07:00
import { convert } from 'html-to-text' ;
2025-10-09 13:01:17 -04:00
import {
logWebFetchFallbackAttempt ,
WebFetchFallbackAttemptEvent ,
} from '../telemetry/index.js' ;
2025-10-15 22:48:12 -04:00
import { WEB_FETCH_TOOL_NAME } from './tool-names.js' ;
2025-10-21 16:35:22 -04:00
import { debugLogger } from '../utils/debugLogger.js' ;
2025-12-05 09:49:08 -08:00
import { retryWithBackoff } from '../utils/retry.js' ;
2025-06-13 17:44:14 -07:00
const URL_FETCH_TIMEOUT_MS = 10000 ;
const MAX_CONTENT_LENGTH = 100000 ;
2025-10-14 16:53:22 -04:00
/ * *
* Parses a prompt to extract valid URLs and identify malformed ones .
* /
export function parsePrompt ( text : string ) : {
validUrls : string [ ] ;
errors : string [ ] ;
} {
const tokens = text . split ( /\s+/ ) ;
const validUrls : string [ ] = [ ] ;
const errors : string [ ] = [ ] ;
for ( const token of tokens ) {
if ( ! token ) continue ;
// Heuristic to check if the url appears to contain URL-like chars.
if ( token . includes ( '://' ) ) {
try {
// Validate with new URL()
const url = new URL ( token ) ;
// Allowlist protocols
if ( [ 'http:' , 'https:' ] . includes ( url . protocol ) ) {
validUrls . push ( url . href ) ;
} else {
errors . push (
` Unsupported protocol in URL: " ${ token } ". Only http and https are supported. ` ,
) ;
}
} catch ( _ ) {
// new URL() threw, so it's malformed according to WHATWG standard
errors . push ( ` Malformed URL detected: " ${ token } ". ` ) ;
}
}
}
return { validUrls , errors } ;
2025-06-13 17:44:14 -07:00
}
2025-05-29 15:02:31 -07:00
// Interfaces for grounding metadata (similar to web-search.ts)
interface GroundingChunkWeb {
uri? : string ;
title? : string ;
}
interface GroundingChunkItem {
web? : GroundingChunkWeb ;
}
interface GroundingSupportSegment {
startIndex : number ;
endIndex : number ;
text? : string ;
}
interface GroundingSupportItem {
segment? : GroundingSupportSegment ;
groundingChunkIndices? : number [ ] ;
}
2025-04-19 19:45:42 +01:00
/ * *
* Parameters for the WebFetch tool
* /
export interface WebFetchToolParams {
/ * *
2025-05-29 15:02:31 -07:00
* The prompt containing URL ( s ) ( up to 20 ) and instructions for processing their content .
2025-04-19 19:45:42 +01:00
* /
2025-05-29 15:02:31 -07:00
prompt : string ;
2025-04-19 19:45:42 +01:00
}
2025-12-05 09:49:08 -08:00
interface ErrorWithStatus extends Error {
status? : number ;
}
2025-08-13 12:27:09 -07:00
class WebFetchToolInvocation extends BaseToolInvocation <
WebFetchToolParams ,
ToolResult
> {
constructor (
private readonly config : Config ,
params : WebFetchToolParams ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-21 11:45:33 -07:00
_toolName? : string ,
_toolDisplayName? : string ,
2025-08-13 12:27:09 -07:00
) {
2025-10-21 11:45:33 -07:00
super ( params , messageBus , _toolName , _toolDisplayName ) ;
2025-04-19 19:45:42 +01:00
}
2025-08-13 12:27:09 -07:00
private async executeFallback ( signal : AbortSignal ) : Promise < ToolResult > {
2025-10-14 16:53:22 -04:00
const { validUrls : urls } = parsePrompt ( this . params . prompt ) ;
2025-06-13 17:44:14 -07:00
// For now, we only support one URL for fallback
let url = urls [ 0 ] ;
// Convert GitHub blob URL to raw URL
if ( url . includes ( 'github.com' ) && url . includes ( '/blob/' ) ) {
url = url
. replace ( 'github.com' , 'raw.githubusercontent.com' )
. replace ( '/blob/' , '/' ) ;
}
try {
2025-12-05 09:49:08 -08:00
const response = await retryWithBackoff (
async ( ) = > {
const res = await fetchWithTimeout ( url , URL_FETCH_TIMEOUT_MS ) ;
if ( ! res . ok ) {
const error = new Error (
` Request failed with status code ${ res . status } ${ res . statusText } ` ,
) ;
( error as ErrorWithStatus ) . status = res . status ;
throw error ;
}
return res ;
} ,
{
retryFetchErrors : this.config.getRetryFetchErrors ( ) ,
} ,
) ;
2025-10-16 14:16:24 -04:00
const rawContent = await response . text ( ) ;
const contentType = response . headers . get ( 'content-type' ) || '' ;
let textContent : string ;
// Only use html-to-text if content type is HTML, or if no content type is provided (assume HTML)
if (
contentType . toLowerCase ( ) . includes ( 'text/html' ) ||
contentType === ''
) {
textContent = convert ( rawContent , {
wordwrap : false ,
selectors : [
{ selector : 'a' , options : { ignoreHref : true } } ,
{ selector : 'img' , format : 'skip' } ,
] ,
} ) ;
} else {
// For other content types (text/plain, application/json, etc.), use raw text
textContent = rawContent ;
}
textContent = textContent . substring ( 0 , MAX_CONTENT_LENGTH ) ;
2025-06-13 17:44:14 -07:00
const geminiClient = this . config . getGeminiClient ( ) ;
2025-08-13 12:27:09 -07:00
const fallbackPrompt = ` The user requested the following: " ${ this . params . prompt } ".
2025-06-13 17:44:14 -07:00
2025-08-13 12:27:09 -07:00
I was unable to access the URL directly . Instead , I have fetched the raw content of the page . Please use the following content to answer the request . Do not attempt to access the URL again .
2025-06-13 17:44:14 -07:00
-- -
$ { textContent }
2025-08-13 12:27:09 -07:00
-- -
` ;
2025-06-13 17:44:14 -07:00
const result = await geminiClient . generateContent (
2025-11-11 08:10:50 -08:00
{ model : 'web-fetch-fallback' } ,
2025-06-13 17:44:14 -07:00
[ { role : 'user' , parts : [ { text : fallbackPrompt } ] } ] ,
signal ,
) ;
const resultText = getResponseText ( result ) || '' ;
return {
llmContent : resultText ,
returnDisplay : ` Content for ${ url } processed using fallback fetch. ` ,
} ;
} catch ( e ) {
const error = e as Error ;
const errorMessage = ` Error during fallback fetch for ${ url } : ${ error . message } ` ;
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
2025-08-21 14:40:18 -07:00
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_FALLBACK_FAILED ,
} ,
2025-06-13 17:44:14 -07:00
} ;
}
}
2025-08-13 12:27:09 -07:00
getDescription ( ) : string {
2025-05-29 15:02:31 -07:00
const displayPrompt =
2025-08-13 12:27:09 -07:00
this . params . prompt . length > 100
? this . params . prompt . substring ( 0 , 97 ) + '...'
: this . params . prompt ;
2025-05-29 15:02:31 -07:00
return ` Processing URLs and instructions from prompt: " ${ displayPrompt } " ` ;
2025-04-19 19:45:42 +01:00
}
2025-10-24 13:04:40 -07:00
protected override async getConfirmationDetails (
_abortSignal : AbortSignal ,
2025-10-15 09:39:41 -07:00
) : Promise < ToolCallConfirmationDetails | false > {
2026-01-04 17:11:43 -05:00
// Check for AUTO_EDIT approval mode. This tool has a specific behavior
// where ProceedAlways switches the entire session to AUTO_EDIT.
2025-06-13 17:44:14 -07:00
if ( this . config . getApprovalMode ( ) === ApprovalMode . AUTO_EDIT ) {
return false ;
}
// Perform GitHub URL conversion here to differentiate between user-provided
// URL and the actual URL to be fetched.
2025-10-14 16:53:22 -04:00
const { validUrls } = parsePrompt ( this . params . prompt ) ;
const urls = validUrls . map ( ( url ) = > {
2025-06-13 17:44:14 -07:00
if ( url . includes ( 'github.com' ) && url . includes ( '/blob/' ) ) {
return url
. replace ( 'github.com' , 'raw.githubusercontent.com' )
. replace ( '/blob/' , '/' ) ;
}
return url ;
} ) ;
const confirmationDetails : ToolCallConfirmationDetails = {
type : 'info' ,
title : ` Confirm Web Fetch ` ,
2025-08-13 12:27:09 -07:00
prompt : this.params.prompt ,
2025-06-13 17:44:14 -07:00
urls ,
onConfirm : async ( outcome : ToolConfirmationOutcome ) = > {
if ( outcome === ToolConfirmationOutcome . ProceedAlways ) {
2025-12-19 17:09:43 -08:00
// No need to publish a policy update as the default policy for
// AUTO_EDIT already reflects always approving web-fetch.
2025-06-13 17:44:14 -07:00
this . config . setApprovalMode ( ApprovalMode . AUTO_EDIT ) ;
2025-12-19 17:09:43 -08:00
} else {
await this . publishPolicyUpdate ( outcome ) ;
2025-06-13 17:44:14 -07:00
}
} ,
} ;
return confirmationDetails ;
}
2025-08-13 12:27:09 -07:00
async execute ( signal : AbortSignal ) : Promise < ToolResult > {
const userPrompt = this . params . prompt ;
2025-10-14 16:53:22 -04:00
const { validUrls : urls } = parsePrompt ( userPrompt ) ;
2025-06-13 17:44:14 -07:00
const url = urls [ 0 ] ;
const isPrivate = isPrivateIp ( url ) ;
if ( isPrivate ) {
2025-10-09 13:01:17 -04:00
logWebFetchFallbackAttempt (
this . config ,
new WebFetchFallbackAttemptEvent ( 'private_ip' ) ,
) ;
2025-08-13 12:27:09 -07:00
return this . executeFallback ( signal ) ;
2025-06-13 17:44:14 -07:00
}
2025-06-02 14:55:51 -07:00
const geminiClient = this . config . getGeminiClient ( ) ;
2025-04-19 19:45:42 +01:00
try {
2025-06-02 14:55:51 -07:00
const response = await geminiClient . generateContent (
2025-11-11 08:10:50 -08:00
{ model : 'web-fetch' } ,
2025-06-02 14:55:51 -07:00
[ { role : 'user' , parts : [ { text : userPrompt } ] } ] ,
signal , // Pass signal
) ;
2025-04-19 19:45:42 +01:00
2025-10-21 16:35:22 -04:00
debugLogger . debug (
2025-06-13 17:44:14 -07:00
` [WebFetchTool] Full response for prompt " ${ userPrompt . substring (
0 ,
50 ,
) } . . . " : ` ,
2025-05-29 15:02:31 -07:00
JSON . stringify ( response , null , 2 ) ,
) ;
let responseText = getResponseText ( response ) || '' ;
const urlContextMeta = response . candidates ? . [ 0 ] ? . urlContextMetadata ;
2025-06-30 04:06:03 +09:00
const groundingMetadata = response . candidates ? . [ 0 ] ? . groundingMetadata ;
2025-05-29 15:02:31 -07:00
const sources = groundingMetadata ? . groundingChunks as
| GroundingChunkItem [ ]
| undefined ;
const groundingSupports = groundingMetadata ? . groundingSupports as
| GroundingSupportItem [ ]
| undefined ;
// Error Handling
let processingError = false ;
if (
urlContextMeta ? . urlMetadata &&
urlContextMeta . urlMetadata . length > 0
) {
const allStatuses = urlContextMeta . urlMetadata . map (
( m ) = > m . urlRetrievalStatus ,
) ;
if ( allStatuses . every ( ( s ) = > s !== 'URL_RETRIEVAL_STATUS_SUCCESS' ) ) {
processingError = true ;
}
} else if ( ! responseText . trim ( ) && ! sources ? . length ) {
// No URL metadata and no content/sources
processingError = true ;
2025-04-19 19:45:42 +01:00
}
if (
2025-05-29 15:02:31 -07:00
! processingError &&
! responseText . trim ( ) &&
( ! sources || sources . length === 0 )
2025-04-19 19:45:42 +01:00
) {
2025-05-29 15:02:31 -07:00
// Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
processingError = true ;
}
if ( processingError ) {
2025-10-09 13:01:17 -04:00
logWebFetchFallbackAttempt (
this . config ,
new WebFetchFallbackAttemptEvent ( 'primary_failed' ) ,
) ;
2025-12-02 07:11:40 +09:00
return await this . executeFallback ( signal ) ;
2025-04-19 19:45:42 +01:00
}
2025-05-29 15:02:31 -07:00
const sourceListFormatted : string [ ] = [ ] ;
if ( sources && sources . length > 0 ) {
sources . forEach ( ( source : GroundingChunkItem , index : number ) = > {
const title = source . web ? . title || 'Untitled' ;
const uri = source . web ? . uri || 'Unknown URI' ; // Fallback if URI is missing
sourceListFormatted . push ( ` [ ${ index + 1 } ] ${ title } ( ${ uri } ) ` ) ;
} ) ;
if ( groundingSupports && groundingSupports . length > 0 ) {
const insertions : Array < { index : number ; marker : string } > = [ ] ;
groundingSupports . forEach ( ( support : GroundingSupportItem ) = > {
if ( support . segment && support . groundingChunkIndices ) {
const citationMarker = support . groundingChunkIndices
. map ( ( chunkIndex : number ) = > ` [ ${ chunkIndex + 1 } ] ` )
. join ( '' ) ;
insertions . push ( {
index : support.segment.endIndex ,
marker : citationMarker ,
} ) ;
}
} ) ;
insertions . sort ( ( a , b ) = > b . index - a . index ) ;
const responseChars = responseText . split ( '' ) ;
insertions . forEach ( ( insertion ) = > {
responseChars . splice ( insertion . index , 0 , insertion . marker ) ;
} ) ;
responseText = responseChars . join ( '' ) ;
}
if ( sourceListFormatted . length > 0 ) {
responseText += `
Sources :
$ { sourceListFormatted . join ( '\n' ) } ` ;
}
}
const llmContent = responseText ;
2025-04-19 19:45:42 +01:00
2025-10-21 16:35:22 -04:00
debugLogger . debug (
2025-05-29 15:02:31 -07:00
` [WebFetchTool] Formatted tool response for prompt " ${ userPrompt } : \ n \ n": ` ,
llmContent ,
) ;
2025-04-19 19:45:42 +01:00
return {
llmContent ,
2025-05-29 15:02:31 -07:00
returnDisplay : ` Content processed from prompt. ` ,
2025-04-19 19:45:42 +01:00
} ;
} catch ( error : unknown ) {
2025-06-13 17:44:14 -07:00
const errorMessage = ` Error processing web content for prompt " ${ userPrompt . substring (
0 ,
50 ,
) } . . . " : $ { getErrorMessage ( error ) } ` ;
2025-04-19 19:45:42 +01:00
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
2025-08-21 14:40:18 -07:00
error : {
message : errorMessage ,
type : ToolErrorType . WEB_FETCH_PROCESSING_ERROR ,
} ,
2025-04-19 19:45:42 +01:00
} ;
}
}
}
2025-08-13 12:27:09 -07:00
/ * *
* Implementation of the WebFetch tool logic
* /
export class WebFetchTool extends BaseDeclarativeTool <
WebFetchToolParams ,
ToolResult
> {
2025-10-20 22:35:35 -04:00
static readonly Name = WEB_FETCH_TOOL_NAME ;
2025-10-15 09:39:41 -07:00
constructor (
private readonly config : Config ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-15 09:39:41 -07:00
) {
2025-08-13 12:27:09 -07:00
super (
2025-10-20 22:35:35 -04:00
WebFetchTool . Name ,
2025-08-13 12:27:09 -07:00
'WebFetch' ,
"Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter." ,
Kind . Fetch ,
{
properties : {
prompt : {
description :
2025-10-19 13:12:15 -07:00
'A comprehensive prompt that includes the URL(s) (up to 20) to fetch and specific instructions on how to process their content (e.g., "Summarize https://example.com/article and extract key points from https://another.com/data"). All URLs to be fetched must be valid and complete, starting with "http://" or "https://", and be fully-formed with a valid hostname (e.g., a domain name like "example.com" or an IP address). For example, "https://example.com" is valid, but "example.com" is not.' ,
2025-08-13 12:27:09 -07:00
type : 'string' ,
} ,
} ,
required : [ 'prompt' ] ,
type : 'object' ,
} ,
2026-01-04 17:11:43 -05:00
messageBus ,
2025-10-15 09:39:41 -07:00
true , // isOutputMarkdown
false , // canUpdateOutput
2025-08-13 12:27:09 -07:00
) ;
}
2025-08-19 13:55:06 -07:00
protected override validateToolParamValues (
2025-08-13 16:17:38 -04:00
params : WebFetchToolParams ,
) : string | null {
2025-08-13 12:27:09 -07:00
if ( ! params . prompt || params . prompt . trim ( ) === '' ) {
return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions." ;
}
2025-10-14 16:53:22 -04:00
const { validUrls , errors } = parsePrompt ( params . prompt ) ;
if ( errors . length > 0 ) {
return ` Error(s) in prompt URLs: \ n- ${ errors . join ( '\n- ' ) } ` ;
}
if ( validUrls . length === 0 ) {
2025-08-13 12:27:09 -07:00
return "The 'prompt' must contain at least one valid URL (starting with http:// or https://)." ;
}
2025-10-14 16:53:22 -04:00
2025-08-13 12:27:09 -07:00
return null ;
}
protected createInvocation (
params : WebFetchToolParams ,
2026-01-04 17:11:43 -05:00
messageBus : MessageBus ,
2025-10-21 11:45:33 -07:00
_toolName? : string ,
_toolDisplayName? : string ,
2025-08-13 12:27:09 -07:00
) : ToolInvocation < WebFetchToolParams , ToolResult > {
2025-10-21 11:45:33 -07:00
return new WebFetchToolInvocation (
this . config ,
params ,
2026-01-04 17:11:43 -05:00
messageBus ,
2025-10-21 11:45:33 -07:00
_toolName ,
_toolDisplayName ,
) ;
2025-08-13 12:27:09 -07:00
}
}