Files
gemini-cli/packages/core/src/tools/web-fetch.test.ts
2025-11-11 10:03:32 +00:00

669 lines
23 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
import { WebFetchTool, parsePrompt } from './web-fetch.js';
import type { Config } from '../config/config.js';
import { ApprovalMode } from '../policy/types.js';
import { ToolConfirmationOutcome } from './tools.js';
import { ToolErrorType } from './tool-error.js';
import * as fetchUtils from '../utils/fetch.js';
import { MessageBus } from '../confirmation-bus/message-bus.js';
import { PolicyEngine } from '../policy/policy-engine.js';
import {
MessageBusType,
type ToolConfirmationResponse,
} from '../confirmation-bus/types.js';
import { randomUUID } from 'node:crypto';
import {
logWebFetchFallbackAttempt,
WebFetchFallbackAttemptEvent,
} from '../telemetry/index.js';
import { convert } from 'html-to-text';
const mockGenerateContent = vi.fn();
const mockGetGeminiClient = vi.fn(() => ({
generateContent: mockGenerateContent,
}));
vi.mock('html-to-text', () => ({
convert: vi.fn((text) => `Converted: ${text}`),
}));
vi.mock('../telemetry/index.js', () => ({
logWebFetchFallbackAttempt: vi.fn(),
WebFetchFallbackAttemptEvent: vi.fn(),
}));
vi.mock('../utils/fetch.js', async (importOriginal) => {
const actual = await importOriginal<typeof fetchUtils>();
return {
...actual,
fetchWithTimeout: vi.fn(),
isPrivateIp: vi.fn(),
};
});
vi.mock('node:crypto', () => ({
randomUUID: vi.fn(),
}));
describe('parsePrompt', () => {
it('should extract valid URLs separated by whitespace', () => {
const prompt = 'Go to https://example.com and http://google.com';
const { validUrls, errors } = parsePrompt(prompt);
expect(errors).toHaveLength(0);
expect(validUrls).toHaveLength(2);
expect(validUrls[0]).toBe('https://example.com/');
expect(validUrls[1]).toBe('http://google.com/');
});
it('should accept URLs with trailing punctuation', () => {
const prompt = 'Check https://example.com.';
const { validUrls, errors } = parsePrompt(prompt);
expect(errors).toHaveLength(0);
expect(validUrls).toHaveLength(1);
expect(validUrls[0]).toBe('https://example.com./');
});
it('should detect URLs wrapped in punctuation as malformed', () => {
const prompt = 'Read (https://example.com)';
const { validUrls, errors } = parsePrompt(prompt);
expect(validUrls).toHaveLength(0);
expect(errors).toHaveLength(1);
expect(errors[0]).toContain('Malformed URL detected');
expect(errors[0]).toContain('(https://example.com)');
});
it('should detect unsupported protocols (httpshttps://)', () => {
const prompt =
'Summarize httpshttps://github.com/JuliaLang/julia/issues/58346';
const { validUrls, errors } = parsePrompt(prompt);
expect(validUrls).toHaveLength(0);
expect(errors).toHaveLength(1);
expect(errors[0]).toContain('Unsupported protocol');
expect(errors[0]).toContain(
'httpshttps://github.com/JuliaLang/julia/issues/58346',
);
});
it('should detect unsupported protocols (ftp://)', () => {
const prompt = 'ftp://example.com/file.txt';
const { validUrls, errors } = parsePrompt(prompt);
expect(validUrls).toHaveLength(0);
expect(errors).toHaveLength(1);
expect(errors[0]).toContain('Unsupported protocol');
});
it('should detect malformed URLs', () => {
// http:// is not a valid URL in Node's new URL()
const prompt = 'http://';
const { validUrls, errors } = parsePrompt(prompt);
expect(validUrls).toHaveLength(0);
expect(errors).toHaveLength(1);
expect(errors[0]).toContain('Malformed URL detected');
});
it('should handle prompts with no URLs', () => {
const prompt = 'hello world';
const { validUrls, errors } = parsePrompt(prompt);
expect(validUrls).toHaveLength(0);
expect(errors).toHaveLength(0);
});
it('should handle mixed valid and invalid URLs', () => {
const prompt = 'Valid: https://google.com, Invalid: ftp://bad.com';
const { validUrls, errors } = parsePrompt(prompt);
expect(validUrls).toHaveLength(1);
expect(validUrls[0]).toBe('https://google.com,/');
expect(errors).toHaveLength(1);
expect(errors[0]).toContain('ftp://bad.com');
});
});
describe('WebFetchTool', () => {
let mockConfig: Config;
beforeEach(() => {
vi.resetAllMocks();
mockConfig = {
getApprovalMode: vi.fn(),
setApprovalMode: vi.fn(),
getProxy: vi.fn(),
getGeminiClient: mockGetGeminiClient,
isInteractive: () => false,
} as unknown as Config;
});
describe('validateToolParamValues', () => {
it('should throw if prompt is empty', () => {
const tool = new WebFetchTool(mockConfig);
expect(() => tool.build({ prompt: '' })).toThrow(
"The 'prompt' parameter cannot be empty",
);
});
it('should throw if prompt contains no URLs', () => {
const tool = new WebFetchTool(mockConfig);
expect(() => tool.build({ prompt: 'hello world' })).toThrow(
"The 'prompt' must contain at least one valid URL",
);
});
it('should throw if prompt contains malformed URLs (httpshttps://)', () => {
const tool = new WebFetchTool(mockConfig);
const prompt = 'fetch httpshttps://example.com';
expect(() => tool.build({ prompt })).toThrow('Error(s) in prompt URLs:');
});
it('should pass if prompt contains at least one valid URL', () => {
const tool = new WebFetchTool(mockConfig);
expect(() =>
tool.build({ prompt: 'fetch https://example.com' }),
).not.toThrow();
});
});
describe('execute', () => {
it('should return WEB_FETCH_FALLBACK_FAILED on fallback fetch failure', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockRejectedValue(
new Error('fetch failed'),
);
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://private.ip' };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_FALLBACK_FAILED);
});
it('should return WEB_FETCH_PROCESSING_ERROR on general processing failure', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
mockGenerateContent.mockRejectedValue(new Error('API error'));
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://public.ip' };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.error?.type).toBe(ToolErrorType.WEB_FETCH_PROCESSING_ERROR);
});
it('should log telemetry when falling back due to private IP', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
// Mock fetchWithTimeout to succeed so fallback proceeds
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
ok: true,
text: () => Promise.resolve('some content'),
} as Response);
mockGenerateContent.mockResolvedValue({
candidates: [{ content: { parts: [{ text: 'fallback response' }] } }],
});
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://private.ip' };
const invocation = tool.build(params);
await invocation.execute(new AbortController().signal);
expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith(
mockConfig,
expect.any(WebFetchFallbackAttemptEvent),
);
expect(WebFetchFallbackAttemptEvent).toHaveBeenCalledWith('private_ip');
});
it('should log telemetry when falling back due to primary fetch failure', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
// Mock primary fetch to return empty response, triggering fallback
mockGenerateContent.mockResolvedValueOnce({
candidates: [],
});
// Mock fetchWithTimeout to succeed so fallback proceeds
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
ok: true,
text: () => Promise.resolve('some content'),
} as Response);
// Mock fallback LLM call
mockGenerateContent.mockResolvedValueOnce({
candidates: [{ content: { parts: [{ text: 'fallback response' }] } }],
});
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://public.ip' };
const invocation = tool.build(params);
await invocation.execute(new AbortController().signal);
expect(logWebFetchFallbackAttempt).toHaveBeenCalledWith(
mockConfig,
expect.any(WebFetchFallbackAttemptEvent),
);
expect(WebFetchFallbackAttemptEvent).toHaveBeenCalledWith(
'primary_failed',
);
});
});
describe('execute (fallback)', () => {
beforeEach(() => {
// Force fallback by mocking primary fetch to fail
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
mockGenerateContent.mockResolvedValueOnce({
candidates: [],
});
});
it('should convert HTML content using html-to-text', async () => {
const htmlContent = '<html><body><h1>Hello</h1></body></html>';
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
ok: true,
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
text: () => Promise.resolve(htmlContent),
} as Response);
// Mock fallback LLM call to return the content passed to it
mockGenerateContent.mockImplementationOnce(async (req) => ({
candidates: [{ content: { parts: [{ text: req[0].parts[0].text }] } }],
}));
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(convert).toHaveBeenCalledWith(htmlContent, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { ignoreHref: true } },
{ selector: 'img', format: 'skip' },
],
});
expect(result.llmContent).toContain(`Converted: ${htmlContent}`);
});
it('should return raw text for JSON content', async () => {
const jsonContent = '{"key": "value"}';
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
ok: true,
headers: new Headers({ 'content-type': 'application/json' }),
text: () => Promise.resolve(jsonContent),
} as Response);
// Mock fallback LLM call to return the content passed to it
mockGenerateContent.mockImplementationOnce(async (req) => ({
candidates: [{ content: { parts: [{ text: req[0].parts[0].text }] } }],
}));
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(convert).not.toHaveBeenCalled();
expect(result.llmContent).toContain(jsonContent);
});
it('should return raw text for plain text content', async () => {
const textContent = 'Just some text.';
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
ok: true,
headers: new Headers({ 'content-type': 'text/plain' }),
text: () => Promise.resolve(textContent),
} as Response);
// Mock fallback LLM call to return the content passed to it
mockGenerateContent.mockImplementationOnce(async (req) => ({
candidates: [{ content: { parts: [{ text: req[0].parts[0].text }] } }],
}));
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(convert).not.toHaveBeenCalled();
expect(result.llmContent).toContain(textContent);
});
it('should treat content with no Content-Type header as HTML', async () => {
const content = '<p>No header</p>';
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
ok: true,
headers: new Headers(),
text: () => Promise.resolve(content),
} as Response);
// Mock fallback LLM call to return the content passed to it
mockGenerateContent.mockImplementationOnce(async (req) => ({
candidates: [{ content: { parts: [{ text: req[0].parts[0].text }] } }],
}));
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(convert).toHaveBeenCalledWith(content, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { ignoreHref: true } },
{ selector: 'img', format: 'skip' },
],
});
expect(result.llmContent).toContain(`Converted: ${content}`);
});
});
describe('shouldConfirmExecute', () => {
it('should return confirmation details with the correct prompt and parsed urls', async () => {
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const confirmationDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
);
expect(confirmationDetails).toEqual({
type: 'info',
title: 'Confirm Web Fetch',
prompt: 'fetch https://example.com',
urls: ['https://example.com/'],
onConfirm: expect.any(Function),
});
});
it('should convert github urls to raw format', async () => {
const tool = new WebFetchTool(mockConfig);
const params = {
prompt:
'fetch https://github.com/google/gemini-react/blob/main/README.md',
};
const invocation = tool.build(params);
const confirmationDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
);
expect(confirmationDetails).toEqual({
type: 'info',
title: 'Confirm Web Fetch',
prompt:
'fetch https://github.com/google/gemini-react/blob/main/README.md',
urls: [
'https://raw.githubusercontent.com/google/gemini-react/main/README.md',
],
onConfirm: expect.any(Function),
});
});
it('should return false if approval mode is AUTO_EDIT', async () => {
vi.spyOn(mockConfig, 'getApprovalMode').mockReturnValue(
ApprovalMode.AUTO_EDIT,
);
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const confirmationDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
);
expect(confirmationDetails).toBe(false);
});
it('should call setApprovalMode when onConfirm is called with ProceedAlways', async () => {
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const confirmationDetails = await invocation.shouldConfirmExecute(
new AbortController().signal,
);
if (
confirmationDetails &&
typeof confirmationDetails === 'object' &&
'onConfirm' in confirmationDetails
) {
await confirmationDetails.onConfirm(
ToolConfirmationOutcome.ProceedAlways,
);
}
expect(mockConfig.setApprovalMode).toHaveBeenCalledWith(
ApprovalMode.AUTO_EDIT,
);
});
});
describe('Message Bus Integration', () => {
let policyEngine: PolicyEngine;
let messageBus: MessageBus;
let mockUUID: Mock;
beforeEach(() => {
policyEngine = new PolicyEngine();
messageBus = new MessageBus(policyEngine);
mockUUID = vi.mocked(randomUUID);
mockUUID.mockReturnValue('test-correlation-id');
});
it('should use message bus for confirmation when available', async () => {
const tool = new WebFetchTool(mockConfig, messageBus);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
// Mock message bus publish and subscribe
const publishSpy = vi.spyOn(messageBus, 'publish');
const subscribeSpy = vi.spyOn(messageBus, 'subscribe');
const unsubscribeSpy = vi.spyOn(messageBus, 'unsubscribe');
// Start confirmation process
const confirmationPromise = invocation.shouldConfirmExecute(
new AbortController().signal,
);
// Verify confirmation request was published
expect(publishSpy).toHaveBeenCalledWith({
type: MessageBusType.TOOL_CONFIRMATION_REQUEST,
toolCall: {
name: 'web_fetch',
args: { prompt: 'fetch https://example.com' },
},
correlationId: 'test-correlation-id',
});
// Verify subscription to response
expect(subscribeSpy).toHaveBeenCalledWith(
MessageBusType.TOOL_CONFIRMATION_RESPONSE,
expect.any(Function),
);
// Simulate confirmation response
const responseHandler = subscribeSpy.mock.calls[0][1];
const response: ToolConfirmationResponse = {
type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
correlationId: 'test-correlation-id',
confirmed: true,
};
responseHandler(response);
const result = await confirmationPromise;
expect(result).toBe(false); // No further confirmation needed
expect(unsubscribeSpy).toHaveBeenCalled();
});
it('should reject promise when confirmation is denied via message bus', async () => {
const tool = new WebFetchTool(mockConfig, messageBus);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const subscribeSpy = vi.spyOn(messageBus, 'subscribe');
const confirmationPromise = invocation.shouldConfirmExecute(
new AbortController().signal,
);
// Simulate denial response
const responseHandler = subscribeSpy.mock.calls[0][1];
const response: ToolConfirmationResponse = {
type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
correlationId: 'test-correlation-id',
confirmed: false,
};
responseHandler(response);
// Should reject with error when denied
await expect(confirmationPromise).rejects.toThrow(
'Tool execution for "WebFetch" denied by policy.',
);
});
it('should handle timeout gracefully', async () => {
vi.useFakeTimers();
const tool = new WebFetchTool(mockConfig, messageBus);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const confirmationPromise = invocation.shouldConfirmExecute(
new AbortController().signal,
);
// Fast-forward past timeout
await vi.advanceTimersByTimeAsync(30000);
const result = await confirmationPromise;
expect(result).not.toBe(false);
expect(result).toHaveProperty('type', 'info');
vi.useRealTimers();
});
it('should handle abort signal during confirmation', async () => {
const tool = new WebFetchTool(mockConfig, messageBus);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const abortController = new AbortController();
const confirmationPromise = invocation.shouldConfirmExecute(
abortController.signal,
);
// Abort the operation
abortController.abort();
await expect(confirmationPromise).rejects.toThrow(
'Tool execution for "WebFetch" denied by policy.',
);
});
it('should fall back to legacy confirmation when no message bus', async () => {
const tool = new WebFetchTool(mockConfig); // No message bus
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const result = await invocation.shouldConfirmExecute(
new AbortController().signal,
);
// Should use legacy confirmation flow (returns confirmation details, not false)
expect(result).not.toBe(false);
expect(result).toHaveProperty('type', 'info');
});
it('should ignore responses with wrong correlation ID', async () => {
vi.useFakeTimers();
const tool = new WebFetchTool(mockConfig, messageBus);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const subscribeSpy = vi.spyOn(messageBus, 'subscribe');
const confirmationPromise = invocation.shouldConfirmExecute(
new AbortController().signal,
);
// Send response with wrong correlation ID
const responseHandler = subscribeSpy.mock.calls[0][1];
const wrongResponse: ToolConfirmationResponse = {
type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
correlationId: 'wrong-id',
confirmed: true,
};
responseHandler(wrongResponse);
// Should timeout since correct response wasn't received
await vi.advanceTimersByTimeAsync(30000);
const result = await confirmationPromise;
expect(result).not.toBe(false);
expect(result).toHaveProperty('type', 'info');
vi.useRealTimers();
});
it('should handle message bus publish errors gracefully', async () => {
const tool = new WebFetchTool(mockConfig, messageBus);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
// Mock publish to throw error
vi.spyOn(messageBus, 'publish').mockImplementation(() => {
throw new Error('Message bus error');
});
const result = await invocation.shouldConfirmExecute(
new AbortController().signal,
);
expect(result).toBe(false); // Should gracefully fall back
});
it('should execute normally after confirmation approval', async () => {
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
mockGenerateContent.mockResolvedValue({
candidates: [
{
content: {
parts: [{ text: 'Fetched content from https://example.com' }],
role: 'model',
},
},
],
});
const tool = new WebFetchTool(mockConfig, messageBus);
const params = { prompt: 'fetch https://example.com' };
const invocation = tool.build(params);
const subscribeSpy = vi.spyOn(messageBus, 'subscribe');
// Start confirmation
const confirmationPromise = invocation.shouldConfirmExecute(
new AbortController().signal,
);
// Approve via message bus
const responseHandler = subscribeSpy.mock.calls[0][1];
responseHandler({
type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
correlationId: 'test-correlation-id',
confirmed: true,
});
await confirmationPromise;
// Execute the tool
const result = await invocation.execute(new AbortController().signal);
expect(result.error).toBeUndefined();
expect(result.llmContent).toContain('Fetched content');
});
});
});