mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-13 15:40:57 -07:00
feat(core): implement Stage 1 improvements for webfetch tool (#21313)
This commit is contained in:
@@ -2129,12 +2129,17 @@ export class RecoveryAttemptEvent extends BaseAgentEvent {
|
||||
|
||||
export const EVENT_WEB_FETCH_FALLBACK_ATTEMPT =
|
||||
'gemini_cli.web_fetch_fallback_attempt';
|
||||
export type WebFetchFallbackReason =
|
||||
| 'private_ip'
|
||||
| 'primary_failed'
|
||||
| 'private_ip_skipped';
|
||||
|
||||
export class WebFetchFallbackAttemptEvent implements BaseTelemetryEvent {
|
||||
'event.name': 'web_fetch_fallback_attempt';
|
||||
'event.timestamp': string;
|
||||
reason: 'private_ip' | 'primary_failed';
|
||||
reason: WebFetchFallbackReason;
|
||||
|
||||
constructor(reason: 'private_ip' | 'primary_failed') {
|
||||
constructor(reason: WebFetchFallbackReason) {
|
||||
this['event.name'] = 'web_fetch_fallback_attempt';
|
||||
this['event.timestamp'] = new Date().toISOString();
|
||||
this.reason = reason;
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
WebFetchTool,
|
||||
parsePrompt,
|
||||
convertGithubUrlToRaw,
|
||||
normalizeUrl,
|
||||
} from './web-fetch.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import { ApprovalMode } from '../policy/types.js';
|
||||
@@ -43,7 +44,7 @@ vi.mock('html-to-text', () => ({
|
||||
|
||||
vi.mock('../telemetry/index.js', () => ({
|
||||
logWebFetchFallbackAttempt: vi.fn(),
|
||||
WebFetchFallbackAttemptEvent: vi.fn(),
|
||||
WebFetchFallbackAttemptEvent: vi.fn((reason) => ({ reason })),
|
||||
}));
|
||||
|
||||
vi.mock('../utils/fetch.js', async (importOriginal) => {
|
||||
@@ -125,6 +126,35 @@ const mockFetch = (url: string, response: Partial<Response> | Error) =>
|
||||
} as unknown as Response;
|
||||
});
|
||||
|
||||
describe('normalizeUrl', () => {
|
||||
it('should lowercase hostname', () => {
|
||||
expect(normalizeUrl('https://EXAMPLE.com/Path')).toBe(
|
||||
'https://example.com/Path',
|
||||
);
|
||||
});
|
||||
|
||||
it('should remove trailing slash except for root', () => {
|
||||
expect(normalizeUrl('https://example.com/path/')).toBe(
|
||||
'https://example.com/path',
|
||||
);
|
||||
expect(normalizeUrl('https://example.com/')).toBe('https://example.com/');
|
||||
});
|
||||
|
||||
it('should remove default ports', () => {
|
||||
expect(normalizeUrl('http://example.com:80/')).toBe('http://example.com/');
|
||||
expect(normalizeUrl('https://example.com:443/')).toBe(
|
||||
'https://example.com/',
|
||||
);
|
||||
expect(normalizeUrl('https://example.com:8443/')).toBe(
|
||||
'https://example.com:8443/',
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle invalid URLs gracefully', () => {
|
||||
expect(normalizeUrl('not-a-url')).toBe('not-a-url');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parsePrompt', () => {
|
||||
it('should extract valid URLs separated by whitespace', () => {
|
||||
const prompt = 'Go to https://example.com and http://google.com';
|
||||
@@ -355,49 +385,164 @@ describe('WebFetchTool', () => {
|
||||
// The 11th time should fail due to rate limit
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR);
|
||||
expect(result.error?.message).toContain('Rate limit exceeded for host');
|
||||
expect(result.error?.message).toContain(
|
||||
'All requested URLs were skipped',
|
||||
);
|
||||
});
|
||||
|
||||
it('should skip rate-limited URLs but fetch others', async () => {
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = {
|
||||
prompt: 'fetch https://ratelimit-multi.com and https://healthy.com',
|
||||
};
|
||||
const invocation = tool.build(params);
|
||||
|
||||
// Hit rate limit for one host
|
||||
for (let i = 0; i < 10; i++) {
|
||||
mockGenerateContent.mockResolvedValueOnce({
|
||||
candidates: [{ content: { parts: [{ text: 'response' }] } }],
|
||||
});
|
||||
await tool
|
||||
.build({ prompt: 'fetch https://ratelimit-multi.com' })
|
||||
.execute(new AbortController().signal);
|
||||
}
|
||||
// 11th call - should be rate limited and not use a mock
|
||||
await tool
|
||||
.build({ prompt: 'fetch https://ratelimit-multi.com' })
|
||||
.execute(new AbortController().signal);
|
||||
|
||||
mockGenerateContent.mockResolvedValueOnce({
|
||||
candidates: [{ content: { parts: [{ text: 'healthy response' }] } }],
|
||||
});
|
||||
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
expect(result.llmContent).toContain('healthy response');
|
||||
expect(result.llmContent).toContain(
|
||||
'[Warning] The following URLs were skipped:',
|
||||
);
|
||||
expect(result.llmContent).toContain(
|
||||
'[Rate limit exceeded] https://ratelimit-multi.com/',
|
||||
);
|
||||
});
|
||||
|
||||
it('should skip private or local URLs but fetch others and log telemetry', async () => {
|
||||
vi.mocked(fetchUtils.isPrivateIp).mockImplementation(
|
||||
(url) => url === 'https://private.com/',
|
||||
);
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = {
|
||||
prompt:
|
||||
'fetch https://private.com and https://healthy.com and http://localhost',
|
||||
};
|
||||
const invocation = tool.build(params);
|
||||
|
||||
mockGenerateContent.mockResolvedValueOnce({
|
||||
candidates: [{ content: { parts: [{ text: 'healthy response' }] } }],
|
||||
});
|
||||
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(logWebFetchFallbackAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({ reason: 'private_ip_skipped' }),
|
||||
);
|
||||
|
||||
expect(result.llmContent).toContain('healthy response');
|
||||
expect(result.llmContent).toContain(
|
||||
'[Warning] The following URLs were skipped:',
|
||||
);
|
||||
expect(result.llmContent).toContain(
|
||||
'[Blocked Host] https://private.com/',
|
||||
);
|
||||
expect(result.llmContent).toContain('[Blocked Host] http://localhost');
|
||||
});
|
||||
|
||||
it('should fallback to all public URLs if primary fails', async () => {
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
|
||||
|
||||
// Primary fetch fails
|
||||
mockGenerateContent.mockRejectedValueOnce(new Error('primary fail'));
|
||||
|
||||
// Mock fallback fetch for BOTH URLs
|
||||
mockFetch('https://url1.com/', {
|
||||
text: () => Promise.resolve('content 1'),
|
||||
});
|
||||
mockFetch('https://url2.com/', {
|
||||
text: () => Promise.resolve('content 2'),
|
||||
});
|
||||
|
||||
// Mock fallback LLM call
|
||||
mockGenerateContent.mockResolvedValueOnce({
|
||||
candidates: [
|
||||
{ content: { parts: [{ text: 'fallback processed response' }] } },
|
||||
],
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = {
|
||||
prompt: 'fetch https://url1.com and https://url2.com/',
|
||||
};
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toBe('fallback processed response');
|
||||
expect(result.returnDisplay).toContain(
|
||||
'2 URL(s) processed using fallback fetch',
|
||||
);
|
||||
});
|
||||
|
||||
it('should NOT include private URLs in fallback', async () => {
|
||||
vi.mocked(fetchUtils.isPrivateIp).mockImplementation(
|
||||
(url) => url === 'https://private.com/',
|
||||
);
|
||||
|
||||
// Primary fetch fails
|
||||
mockGenerateContent.mockRejectedValueOnce(new Error('primary fail'));
|
||||
|
||||
// Mock fallback fetch only for public URL
|
||||
mockFetch('https://public.com/', {
|
||||
text: () => Promise.resolve('public content'),
|
||||
});
|
||||
|
||||
// Mock fallback LLM call
|
||||
mockGenerateContent.mockResolvedValueOnce({
|
||||
candidates: [{ content: { parts: [{ text: 'fallback response' }] } }],
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = {
|
||||
prompt: 'fetch https://public.com/ and https://private.com',
|
||||
};
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toBe('fallback response');
|
||||
// Verify private URL was NOT fetched (mockFetch would throw if it was called for private.com)
|
||||
});
|
||||
|
||||
it('should return WEB_FETCH_FALLBACK_FAILED on fallback fetch failure', async () => {
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
|
||||
mockFetch('https://private.ip/', new Error('fetch failed'));
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
|
||||
mockGenerateContent.mockRejectedValue(new Error('primary fail'));
|
||||
mockFetch('https://public.ip/', new Error('fallback fetch failed'));
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { prompt: 'fetch https://private.ip' };
|
||||
const params = { prompt: 'fetch https://public.ip' };
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED);
|
||||
});
|
||||
|
||||
it('should return WEB_FETCH_PROCESSING_ERROR on general processing failure', async () => {
|
||||
it('should return WEB_FETCH_FALLBACK_FAILED on general processing failure (when fallback also fails)', async () => {
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
|
||||
mockGenerateContent.mockRejectedValue(new Error('API error'));
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { prompt: 'fetch https://public.ip' };
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR);
|
||||
});
|
||||
|
||||
it('should log telemetry when falling back due to private IP', async () => {
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
|
||||
// Mock fetchWithTimeout to succeed so fallback proceeds
|
||||
mockFetch('https://private.ip/', {
|
||||
text: () => Promise.resolve('some content'),
|
||||
});
|
||||
mockGenerateContent.mockResolvedValue({
|
||||
candidates: [{ content: { parts: [{ text: 'fallback response' }] } }],
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { prompt: 'fetch https://private.ip' };
|
||||
const invocation = tool.build(params);
|
||||
await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
expect.any(WebFetchFallbackAttemptEvent),
|
||||
);
|
||||
expect(WebFetchFallbackAttemptEvent).toHaveBeenCalledWith('private_ip');
|
||||
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED);
|
||||
});
|
||||
|
||||
it('should log telemetry when falling back due to primary fetch failure', async () => {
|
||||
@@ -422,7 +567,7 @@ describe('WebFetchTool', () => {
|
||||
|
||||
expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
expect.any(WebFetchFallbackAttemptEvent),
|
||||
expect.objectContaining({ reason: 'primary_failed' }),
|
||||
);
|
||||
expect(WebFetchFallbackAttemptEvent).toHaveBeenCalledWith(
|
||||
'primary_failed',
|
||||
@@ -891,13 +1036,13 @@ describe('WebFetchTool', () => {
|
||||
});
|
||||
|
||||
it('should throw error if stream exceeds limit', async () => {
|
||||
const largeChunk = new Uint8Array(11 * 1024 * 1024);
|
||||
const large_chunk = new Uint8Array(11 * 1024 * 1024);
|
||||
mockFetch('https://example.com/large-stream', {
|
||||
body: {
|
||||
getReader: () => ({
|
||||
read: vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ done: false, value: largeChunk })
|
||||
.mockResolvedValueOnce({ done: false, value: large_chunk })
|
||||
.mockResolvedValueOnce({ done: true }),
|
||||
releaseLock: vi.fn(),
|
||||
cancel: vi.fn().mockResolvedValue(undefined),
|
||||
@@ -934,5 +1079,20 @@ describe('WebFetchTool', () => {
|
||||
expect(result.llmContent).toContain('Error: Invalid URL "not-a-url"');
|
||||
expect(result.error?.type).toBe(ToolErrorType.INVALID_TOOL_PARAMS);
|
||||
});
|
||||
|
||||
it('should block private IP (experimental)', async () => {
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const invocation = tool['createInvocation'](
|
||||
{ url: 'http://localhost' },
|
||||
bus,
|
||||
);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toContain(
|
||||
'Error: Access to blocked or private host http://localhost/ is not allowed.',
|
||||
);
|
||||
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -81,6 +81,31 @@ function checkRateLimit(url: string): {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes a URL by converting hostname to lowercase, removing trailing slashes,
|
||||
* and removing default ports.
|
||||
*/
|
||||
export function normalizeUrl(urlStr: string): string {
|
||||
try {
|
||||
const url = new URL(urlStr);
|
||||
url.hostname = url.hostname.toLowerCase();
|
||||
// Remove trailing slash if present in pathname (except for root '/')
|
||||
if (url.pathname.endsWith('/') && url.pathname.length > 1) {
|
||||
url.pathname = url.pathname.slice(0, -1);
|
||||
}
|
||||
// Remove default ports
|
||||
if (
|
||||
(url.protocol === 'http:' && url.port === '80') ||
|
||||
(url.protocol === 'https:' && url.port === '443')
|
||||
) {
|
||||
url.port = '';
|
||||
}
|
||||
return url.href;
|
||||
} catch {
|
||||
return urlStr;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a prompt to extract valid URLs and identify malformed ones.
|
||||
*/
|
||||
@@ -146,6 +171,10 @@ interface GroundingChunkItem {
|
||||
web?: GroundingChunkWeb;
|
||||
}
|
||||
|
||||
function isGroundingChunkItem(item: unknown): item is GroundingChunkItem {
|
||||
return typeof item === 'object' && item !== null;
|
||||
}
|
||||
|
||||
interface GroundingSupportSegment {
|
||||
startIndex: number;
|
||||
endIndex: number;
|
||||
@@ -157,6 +186,10 @@ interface GroundingSupportItem {
|
||||
groundingChunkIndices?: number[];
|
||||
}
|
||||
|
||||
function isGroundingSupportItem(item: unknown): item is GroundingSupportItem {
|
||||
return typeof item === 'object' && item !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for the WebFetch tool
|
||||
*/
|
||||
@@ -214,13 +247,29 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
);
|
||||
}
|
||||
|
||||
private async executeFallback(signal: AbortSignal): Promise<ToolResult> {
|
||||
const { validUrls: urls } = parsePrompt(this.params.prompt!);
|
||||
// For now, we only support one URL for fallback
|
||||
let url = urls[0];
|
||||
private isBlockedHost(urlStr: string): boolean {
|
||||
try {
|
||||
const url = new URL(urlStr);
|
||||
const hostname = url.hostname.toLowerCase();
|
||||
if (hostname === 'localhost' || hostname === '127.0.0.1') {
|
||||
return true;
|
||||
}
|
||||
return isPrivateIp(urlStr);
|
||||
} catch {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert GitHub blob URL to raw URL
|
||||
url = convertGithubUrlToRaw(url);
|
||||
private async executeFallbackForUrl(
|
||||
urlStr: string,
|
||||
signal: AbortSignal,
|
||||
contentBudget: number,
|
||||
): Promise<string> {
|
||||
const url = convertGithubUrlToRaw(urlStr);
|
||||
if (this.isBlockedHost(url)) {
|
||||
debugLogger.warn(`[WebFetchTool] Blocked access to host: ${url}`);
|
||||
return `Error fetching ${url}: Access to blocked or private host is not allowed.`;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await retryWithBackoff(
|
||||
@@ -244,6 +293,7 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
retryFetchErrors: this.config.getRetryFetchErrors(),
|
||||
onRetry: (attempt, error, delayMs) =>
|
||||
this.handleRetry(attempt, error, delayMs),
|
||||
signal,
|
||||
},
|
||||
);
|
||||
|
||||
@@ -272,19 +322,70 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
textContent = rawContent;
|
||||
}
|
||||
|
||||
textContent = truncateString(
|
||||
textContent,
|
||||
MAX_CONTENT_LENGTH,
|
||||
TRUNCATION_WARNING,
|
||||
);
|
||||
return truncateString(textContent, contentBudget, TRUNCATION_WARNING);
|
||||
} catch (e) {
|
||||
return `Error fetching ${url}: ${getErrorMessage(e)}`;
|
||||
}
|
||||
}
|
||||
|
||||
private filterAndValidateUrls(urls: string[]): {
|
||||
toFetch: string[];
|
||||
skipped: string[];
|
||||
} {
|
||||
const uniqueUrls = [...new Set(urls.map(normalizeUrl))];
|
||||
const toFetch: string[] = [];
|
||||
const skipped: string[] = [];
|
||||
|
||||
for (const url of uniqueUrls) {
|
||||
if (this.isBlockedHost(url)) {
|
||||
debugLogger.warn(
|
||||
`[WebFetchTool] Skipped private or local host: ${url}`,
|
||||
);
|
||||
logWebFetchFallbackAttempt(
|
||||
this.config,
|
||||
new WebFetchFallbackAttemptEvent('private_ip_skipped'),
|
||||
);
|
||||
skipped.push(`[Blocked Host] ${url}`);
|
||||
continue;
|
||||
}
|
||||
if (!checkRateLimit(url).allowed) {
|
||||
debugLogger.warn(`[WebFetchTool] Rate limit exceeded for host: ${url}`);
|
||||
skipped.push(`[Rate limit exceeded] ${url}`);
|
||||
continue;
|
||||
}
|
||||
toFetch.push(url);
|
||||
}
|
||||
return { toFetch, skipped };
|
||||
}
|
||||
|
||||
private async executeFallback(
|
||||
urls: string[],
|
||||
signal: AbortSignal,
|
||||
): Promise<ToolResult> {
|
||||
const uniqueUrls = [...new Set(urls)];
|
||||
const contentBudget = Math.floor(
|
||||
MAX_CONTENT_LENGTH / (uniqueUrls.length || 1),
|
||||
);
|
||||
const results: string[] = [];
|
||||
|
||||
for (const url of uniqueUrls) {
|
||||
results.push(
|
||||
await this.executeFallbackForUrl(url, signal, contentBudget),
|
||||
);
|
||||
}
|
||||
|
||||
const aggregatedContent = results
|
||||
.map((content, i) => `URL: ${uniqueUrls[i]}\nContent:\n${content}`)
|
||||
.join('\n\n---\n\n');
|
||||
|
||||
try {
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
const fallbackPrompt = `The user requested the following: "${this.params.prompt}".
|
||||
|
||||
I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the request. Do not attempt to access the URL again.
|
||||
I was unable to access the URL(s) directly using the primary fetch tool. Instead, I have fetched the raw content of the page(s). Please use the following content to answer the request. Do not attempt to access the URL(s) again.
|
||||
|
||||
---
|
||||
${textContent}
|
||||
${aggregatedContent}
|
||||
---
|
||||
`;
|
||||
const result = await geminiClient.generateContent(
|
||||
@@ -293,15 +394,29 @@ ${textContent}
|
||||
signal,
|
||||
LlmRole.UTILITY_TOOL,
|
||||
);
|
||||
|
||||
debugLogger.debug(
|
||||
`[WebFetchTool] Fallback response for prompt "${this.params.prompt?.substring(
|
||||
0,
|
||||
50,
|
||||
)}...":`,
|
||||
JSON.stringify(result, null, 2),
|
||||
);
|
||||
|
||||
const resultText = getResponseText(result) || '';
|
||||
|
||||
debugLogger.debug(
|
||||
`[WebFetchTool] Formatted fallback tool response for prompt "${this.params.prompt}":\n\n`,
|
||||
resultText,
|
||||
);
|
||||
|
||||
return {
|
||||
llmContent: resultText,
|
||||
returnDisplay: `Content for ${url} processed using fallback fetch.`,
|
||||
returnDisplay: `Content for ${urls.length} URL(s) processed using fallback fetch.`,
|
||||
};
|
||||
} catch (e) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const error = e as Error;
|
||||
const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`;
|
||||
const errorMessage = `Error during fallback processing: ${getErrorMessage(e)}`;
|
||||
debugLogger.error(`[WebFetchTool] Fallback failed: ${errorMessage}`);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
@@ -437,6 +552,21 @@ ${textContent}
|
||||
// Convert GitHub blob URL to raw URL
|
||||
url = convertGithubUrlToRaw(url);
|
||||
|
||||
if (this.isBlockedHost(url)) {
|
||||
const errorMessage = `Access to blocked or private host ${url} is not allowed.`;
|
||||
debugLogger.warn(
|
||||
`[WebFetchTool] Blocked experimental fetch to host: ${url}`,
|
||||
);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
error: {
|
||||
message: errorMessage,
|
||||
type: ToolErrorType.WEB_FETCH_PROCESSING_ERROR,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await retryWithBackoff(
|
||||
async () => {
|
||||
@@ -454,6 +584,7 @@ ${textContent}
|
||||
retryFetchErrors: this.config.getRetryFetchErrors(),
|
||||
onRetry: (attempt, error, delayMs) =>
|
||||
this.handleRetry(attempt, error, delayMs),
|
||||
signal,
|
||||
},
|
||||
);
|
||||
|
||||
@@ -473,6 +604,9 @@ ${textContent}
|
||||
const errorContent = `Request failed with status ${status}
|
||||
Headers: ${JSON.stringify(headers, null, 2)}
|
||||
Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response truncated] ...')}`;
|
||||
debugLogger.error(
|
||||
`[WebFetchTool] Experimental fetch failed with status ${status} for ${url}`,
|
||||
);
|
||||
return {
|
||||
llmContent: errorContent,
|
||||
returnDisplay: `Failed to fetch ${url} (Status: ${status})`,
|
||||
@@ -543,6 +677,9 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
|
||||
};
|
||||
} catch (e) {
|
||||
const errorMessage = `Error during experimental fetch for ${url}: ${getErrorMessage(e)}`;
|
||||
debugLogger.error(
|
||||
`[WebFetchTool] Experimental fetch error: ${errorMessage}`,
|
||||
);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
@@ -559,15 +696,14 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
|
||||
return this.executeExperimental(signal);
|
||||
}
|
||||
const userPrompt = this.params.prompt!;
|
||||
const { validUrls: urls } = parsePrompt(userPrompt);
|
||||
const url = urls[0];
|
||||
const { validUrls } = parsePrompt(userPrompt);
|
||||
|
||||
// Enforce rate limiting
|
||||
const rateLimitResult = checkRateLimit(url);
|
||||
if (!rateLimitResult.allowed) {
|
||||
const waitTimeSecs = Math.ceil((rateLimitResult.waitTimeMs || 0) / 1000);
|
||||
const errorMessage = `Rate limit exceeded for host. Please wait ${waitTimeSecs} seconds before trying again.`;
|
||||
debugLogger.warn(`[WebFetchTool] Rate limit exceeded for ${url}`);
|
||||
const { toFetch, skipped } = this.filterAndValidateUrls(validUrls);
|
||||
|
||||
// If everything was skipped, fail early
|
||||
if (toFetch.length === 0 && skipped.length > 0) {
|
||||
const errorMessage = `All requested URLs were skipped: ${skipped.join(', ')}`;
|
||||
debugLogger.error(`[WebFetchTool] ${errorMessage}`);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
@@ -578,23 +714,12 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
|
||||
};
|
||||
}
|
||||
|
||||
const isPrivate = isPrivateIp(url);
|
||||
|
||||
if (isPrivate) {
|
||||
logWebFetchFallbackAttempt(
|
||||
this.config,
|
||||
new WebFetchFallbackAttemptEvent('private_ip'),
|
||||
);
|
||||
return this.executeFallback(signal);
|
||||
}
|
||||
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
|
||||
try {
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
const response = await geminiClient.generateContent(
|
||||
{ model: 'web-fetch' },
|
||||
[{ role: 'user', parts: [{ text: userPrompt }] }],
|
||||
signal, // Pass signal
|
||||
signal,
|
||||
LlmRole.UTILITY_TOOL,
|
||||
);
|
||||
|
||||
@@ -607,113 +732,76 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
|
||||
);
|
||||
|
||||
let responseText = getResponseText(response) || '';
|
||||
const urlContextMeta = response.candidates?.[0]?.urlContextMetadata;
|
||||
const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
|
||||
const sources = groundingMetadata?.groundingChunks as
|
||||
| GroundingChunkItem[]
|
||||
| undefined;
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const groundingSupports = groundingMetadata?.groundingSupports as
|
||||
| GroundingSupportItem[]
|
||||
| undefined;
|
||||
|
||||
// Error Handling
|
||||
let processingError = false;
|
||||
|
||||
if (
|
||||
urlContextMeta?.urlMetadata &&
|
||||
urlContextMeta.urlMetadata.length > 0
|
||||
) {
|
||||
const allStatuses = urlContextMeta.urlMetadata.map(
|
||||
(m) => m.urlRetrievalStatus,
|
||||
);
|
||||
if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) {
|
||||
processingError = true;
|
||||
}
|
||||
} else if (!responseText.trim() && !sources?.length) {
|
||||
// No URL metadata and no content/sources
|
||||
processingError = true;
|
||||
// Simple primary success check: we need some text or grounding data
|
||||
if (!responseText.trim() && !groundingMetadata?.groundingChunks?.length) {
|
||||
throw new Error('Primary fetch returned no content');
|
||||
}
|
||||
|
||||
if (
|
||||
!processingError &&
|
||||
!responseText.trim() &&
|
||||
(!sources || sources.length === 0)
|
||||
) {
|
||||
// Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
|
||||
processingError = true;
|
||||
}
|
||||
|
||||
if (processingError) {
|
||||
logWebFetchFallbackAttempt(
|
||||
this.config,
|
||||
new WebFetchFallbackAttemptEvent('primary_failed'),
|
||||
);
|
||||
return await this.executeFallback(signal);
|
||||
}
|
||||
|
||||
const sourceListFormatted: string[] = [];
|
||||
if (sources && sources.length > 0) {
|
||||
sources.forEach((source: GroundingChunkItem, index: number) => {
|
||||
const title = source.web?.title || 'Untitled';
|
||||
const uri = source.web?.uri || 'Unknown URI'; // Fallback if URI is missing
|
||||
sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
|
||||
// 1. Apply Grounding Supports (Citations)
|
||||
const groundingSupports = groundingMetadata?.groundingSupports?.filter(
|
||||
isGroundingSupportItem,
|
||||
);
|
||||
if (groundingSupports && groundingSupports.length > 0) {
|
||||
const insertions: Array<{ index: number; marker: string }> = [];
|
||||
groundingSupports.forEach((support) => {
|
||||
if (support.segment && support.groundingChunkIndices) {
|
||||
const citationMarker = support.groundingChunkIndices
|
||||
.map((chunkIndex: number) => `[${chunkIndex + 1}]`)
|
||||
.join('');
|
||||
insertions.push({
|
||||
index: support.segment.endIndex,
|
||||
marker: citationMarker,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
if (groundingSupports && groundingSupports.length > 0) {
|
||||
const insertions: Array<{ index: number; marker: string }> = [];
|
||||
groundingSupports.forEach((support: GroundingSupportItem) => {
|
||||
if (support.segment && support.groundingChunkIndices) {
|
||||
const citationMarker = support.groundingChunkIndices
|
||||
.map((chunkIndex: number) => `[${chunkIndex + 1}]`)
|
||||
.join('');
|
||||
insertions.push({
|
||||
index: support.segment.endIndex,
|
||||
marker: citationMarker,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
insertions.sort((a, b) => b.index - a.index);
|
||||
const responseChars = responseText.split('');
|
||||
insertions.forEach((insertion) => {
|
||||
responseChars.splice(insertion.index, 0, insertion.marker);
|
||||
});
|
||||
responseText = responseChars.join('');
|
||||
}
|
||||
|
||||
if (sourceListFormatted.length > 0) {
|
||||
responseText += `
|
||||
|
||||
Sources:
|
||||
${sourceListFormatted.join('\n')}`;
|
||||
}
|
||||
insertions.sort((a, b) => b.index - a.index);
|
||||
const responseChars = responseText.split('');
|
||||
insertions.forEach((insertion) => {
|
||||
responseChars.splice(insertion.index, 0, insertion.marker);
|
||||
});
|
||||
responseText = responseChars.join('');
|
||||
}
|
||||
|
||||
const llmContent = responseText;
|
||||
// 2. Append Source List
|
||||
const sources =
|
||||
groundingMetadata?.groundingChunks?.filter(isGroundingChunkItem);
|
||||
if (sources && sources.length > 0) {
|
||||
const sourceListFormatted: string[] = [];
|
||||
sources.forEach((source, index) => {
|
||||
const title = source.web?.title || 'Untitled';
|
||||
const uri = source.web?.uri || 'Unknown URI';
|
||||
sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
|
||||
});
|
||||
responseText += `\n\nSources:\n${sourceListFormatted.join('\n')}`;
|
||||
}
|
||||
|
||||
// 3. Prepend Warnings for skipped URLs
|
||||
if (skipped.length > 0) {
|
||||
responseText = `[Warning] The following URLs were skipped:\n${skipped.join('\n')}\n\n${responseText}`;
|
||||
}
|
||||
|
||||
debugLogger.debug(
|
||||
`[WebFetchTool] Formatted tool response for prompt "${userPrompt}:\n\n":`,
|
||||
llmContent,
|
||||
`[WebFetchTool] Formatted tool response for prompt "${userPrompt}":\n\n`,
|
||||
responseText,
|
||||
);
|
||||
|
||||
return {
|
||||
llmContent,
|
||||
llmContent: responseText,
|
||||
returnDisplay: `Content processed from prompt.`,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = `Error processing web content for prompt "${userPrompt.substring(
|
||||
0,
|
||||
50,
|
||||
)}...": ${getErrorMessage(error)}`;
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
error: {
|
||||
message: errorMessage,
|
||||
type: ToolErrorType.WEB_FETCH_PROCESSING_ERROR,
|
||||
},
|
||||
};
|
||||
debugLogger.warn(
|
||||
`[WebFetchTool] Primary fetch failed, falling back: ${getErrorMessage(error)}`,
|
||||
);
|
||||
logWebFetchFallbackAttempt(
|
||||
this.config,
|
||||
new WebFetchFallbackAttemptEvent('primary_failed'),
|
||||
);
|
||||
// Simple All-or-Nothing Fallback
|
||||
return this.executeFallback(toFetch, signal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user