mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-13 06:40:33 -07:00
feat(core): implement experimental direct web fetch (#19557)
This commit is contained in:
@@ -71,6 +71,7 @@ describe('Tool Confirmation Policy Updates', () => {
|
||||
isPathWithinWorkspace: () => true,
|
||||
getDirectories: () => [rootDir],
|
||||
}),
|
||||
getDirectWebFetch: () => false,
|
||||
storage: {
|
||||
getProjectTempDir: () => path.join(os.tmpdir(), 'gemini-cli-temp'),
|
||||
},
|
||||
|
||||
@@ -37,6 +37,11 @@ vi.mock('node:fs/promises', async (importOriginal) => {
|
||||
|
||||
vi.mock('fs', () => ({
|
||||
mkdirSync: vi.fn(),
|
||||
createWriteStream: vi.fn(() => ({
|
||||
on: vi.fn(),
|
||||
write: vi.fn(),
|
||||
end: vi.fn(),
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock('os');
|
||||
|
||||
@@ -5,7 +5,11 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
|
||||
import { WebFetchTool, parsePrompt } from './web-fetch.js';
|
||||
import {
|
||||
WebFetchTool,
|
||||
parsePrompt,
|
||||
convertGithubUrlToRaw,
|
||||
} from './web-fetch.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import { ApprovalMode } from '../policy/types.js';
|
||||
import { ToolConfirmationOutcome } from './tools.js';
|
||||
@@ -55,6 +59,72 @@ vi.mock('node:crypto', () => ({
|
||||
randomUUID: vi.fn(),
|
||||
}));
|
||||
|
||||
/**
|
||||
* Helper to mock fetchWithTimeout with URL matching.
|
||||
*/
|
||||
const mockFetch = (url: string, response: Partial<Response> | Error) =>
|
||||
vi
|
||||
.spyOn(fetchUtils, 'fetchWithTimeout')
|
||||
.mockImplementation(async (actualUrl) => {
|
||||
if (actualUrl !== url) {
|
||||
throw new Error(
|
||||
`Unexpected fetch URL: expected "${url}", got "${actualUrl}"`,
|
||||
);
|
||||
}
|
||||
if (response instanceof Error) {
|
||||
throw response;
|
||||
}
|
||||
|
||||
const headers = response.headers || new Headers();
|
||||
|
||||
// If we have text/arrayBuffer but no body, create a body mock
|
||||
let body = response.body;
|
||||
if (!body) {
|
||||
let content: Uint8Array | undefined;
|
||||
if (response.text) {
|
||||
const text = await response.text();
|
||||
content = new TextEncoder().encode(text);
|
||||
} else if (response.arrayBuffer) {
|
||||
const ab = await response.arrayBuffer();
|
||||
content = new Uint8Array(ab);
|
||||
}
|
||||
|
||||
if (content) {
|
||||
body = {
|
||||
getReader: () => {
|
||||
let sent = false;
|
||||
return {
|
||||
read: async () => {
|
||||
if (sent) return { done: true, value: undefined };
|
||||
sent = true;
|
||||
return { done: false, value: content };
|
||||
},
|
||||
releaseLock: () => {},
|
||||
cancel: async () => {},
|
||||
};
|
||||
},
|
||||
} as unknown as ReadableStream;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
ok: response.status ? response.status < 400 : true,
|
||||
status: 200,
|
||||
headers,
|
||||
text: response.text || (() => Promise.resolve('')),
|
||||
arrayBuffer:
|
||||
response.arrayBuffer || (() => Promise.resolve(new ArrayBuffer(0))),
|
||||
body: body || {
|
||||
getReader: () => ({
|
||||
read: async () => ({ done: true, value: undefined }),
|
||||
releaseLock: () => {},
|
||||
cancel: async () => {},
|
||||
}),
|
||||
},
|
||||
...response,
|
||||
} as unknown as Response;
|
||||
});
|
||||
|
||||
describe('parsePrompt', () => {
|
||||
it('should extract valid URLs separated by whitespace', () => {
|
||||
const prompt = 'Go to https://example.com and http://google.com';
|
||||
@@ -128,6 +198,42 @@ describe('parsePrompt', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('convertGithubUrlToRaw', () => {
|
||||
it('should convert valid github blob urls', () => {
|
||||
expect(
|
||||
convertGithubUrlToRaw('https://github.com/user/repo/blob/main/README.md'),
|
||||
).toBe('https://raw.githubusercontent.com/user/repo/main/README.md');
|
||||
});
|
||||
|
||||
it('should not convert non-blob github urls', () => {
|
||||
expect(convertGithubUrlToRaw('https://github.com/user/repo')).toBe(
|
||||
'https://github.com/user/repo',
|
||||
);
|
||||
});
|
||||
|
||||
it('should not convert urls with similar domain names', () => {
|
||||
expect(
|
||||
convertGithubUrlToRaw('https://mygithub.com/user/repo/blob/main'),
|
||||
).toBe('https://mygithub.com/user/repo/blob/main');
|
||||
});
|
||||
|
||||
it('should only replace the /blob/ that separates repo from branch', () => {
|
||||
expect(
|
||||
convertGithubUrlToRaw('https://github.com/blob/repo/blob/main/test.ts'),
|
||||
).toBe('https://raw.githubusercontent.com/blob/repo/main/test.ts');
|
||||
});
|
||||
|
||||
it('should not convert urls if blob is not in path', () => {
|
||||
expect(
|
||||
convertGithubUrlToRaw('https://github.com/user/repo/tree/main'),
|
||||
).toBe('https://github.com/user/repo/tree/main');
|
||||
});
|
||||
|
||||
it('should handle invalid urls gracefully', () => {
|
||||
expect(convertGithubUrlToRaw('not-a-url')).toBe('not-a-url');
|
||||
});
|
||||
});
|
||||
|
||||
describe('WebFetchTool', () => {
|
||||
let mockConfig: Config;
|
||||
let bus: MessageBus;
|
||||
@@ -142,6 +248,7 @@ describe('WebFetchTool', () => {
|
||||
getProxy: vi.fn(),
|
||||
getGeminiClient: mockGetGeminiClient,
|
||||
getRetryFetchErrors: vi.fn().mockReturnValue(false),
|
||||
getDirectWebFetch: vi.fn().mockReturnValue(false),
|
||||
modelConfigService: {
|
||||
getResolvedConfig: vi.fn().mockImplementation(({ model }) => ({
|
||||
model,
|
||||
@@ -153,32 +260,79 @@ describe('WebFetchTool', () => {
|
||||
});
|
||||
|
||||
describe('validateToolParamValues', () => {
|
||||
it.each([
|
||||
{
|
||||
name: 'empty prompt',
|
||||
prompt: '',
|
||||
expectedError: "The 'prompt' parameter cannot be empty",
|
||||
},
|
||||
{
|
||||
name: 'prompt with no URLs',
|
||||
prompt: 'hello world',
|
||||
expectedError: "The 'prompt' must contain at least one valid URL",
|
||||
},
|
||||
{
|
||||
name: 'prompt with malformed URLs',
|
||||
prompt: 'fetch httpshttps://example.com',
|
||||
expectedError: 'Error(s) in prompt URLs:',
|
||||
},
|
||||
])('should throw if $name', ({ prompt, expectedError }) => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
expect(() => tool.build({ prompt })).toThrow(expectedError);
|
||||
describe('standard mode', () => {
|
||||
it.each([
|
||||
{
|
||||
name: 'empty prompt',
|
||||
prompt: '',
|
||||
expectedError: "The 'prompt' parameter cannot be empty",
|
||||
},
|
||||
{
|
||||
name: 'prompt with no URLs',
|
||||
prompt: 'hello world',
|
||||
expectedError: "The 'prompt' must contain at least one valid URL",
|
||||
},
|
||||
{
|
||||
name: 'prompt with malformed URLs',
|
||||
prompt: 'fetch httpshttps://example.com',
|
||||
expectedError: 'Error(s) in prompt URLs:',
|
||||
},
|
||||
])('should throw if $name', ({ prompt, expectedError }) => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
expect(() => tool.build({ prompt })).toThrow(expectedError);
|
||||
});
|
||||
|
||||
it('should pass if prompt contains at least one valid URL', () => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
expect(() =>
|
||||
tool.build({ prompt: 'fetch https://example.com' }),
|
||||
).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
it('should pass if prompt contains at least one valid URL', () => {
|
||||
describe('experimental mode', () => {
|
||||
beforeEach(() => {
|
||||
vi.spyOn(mockConfig, 'getDirectWebFetch').mockReturnValue(true);
|
||||
});
|
||||
|
||||
it('should throw if url is missing', () => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
expect(() => tool.build({ prompt: 'foo' })).toThrow(
|
||||
"params must have required property 'url'",
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw if url is invalid', () => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
expect(() => tool.build({ url: 'not-a-url' })).toThrow(
|
||||
'Invalid URL: "not-a-url"',
|
||||
);
|
||||
});
|
||||
|
||||
it('should pass if url is valid', () => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
expect(() => tool.build({ url: 'https://example.com' })).not.toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('getSchema', () => {
|
||||
it('should return standard schema by default', () => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
expect(() =>
|
||||
tool.build({ prompt: 'fetch https://example.com' }),
|
||||
).not.toThrow();
|
||||
const schema = tool.getSchema();
|
||||
expect(schema.parametersJsonSchema).toHaveProperty('properties.prompt');
|
||||
expect(schema.parametersJsonSchema).not.toHaveProperty('properties.url');
|
||||
});
|
||||
|
||||
it('should return experimental schema when enabled', () => {
|
||||
vi.spyOn(mockConfig, 'getDirectWebFetch').mockReturnValue(true);
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const schema = tool.getSchema();
|
||||
expect(schema.parametersJsonSchema).toHaveProperty('properties.url');
|
||||
expect(schema.parametersJsonSchema).not.toHaveProperty(
|
||||
'properties.prompt',
|
||||
);
|
||||
expect(schema.parametersJsonSchema).toHaveProperty('required', ['url']);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -205,9 +359,7 @@ describe('WebFetchTool', () => {
|
||||
|
||||
it('should return WEB_FETCH_FALLBACK_FAILED on fallback fetch failure', async () => {
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
|
||||
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockRejectedValue(
|
||||
new Error('fetch failed'),
|
||||
);
|
||||
mockFetch('https://private.ip/', new Error('fetch failed'));
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { prompt: 'fetch https://private.ip' };
|
||||
const invocation = tool.build(params);
|
||||
@@ -228,10 +380,9 @@ describe('WebFetchTool', () => {
|
||||
it('should log telemetry when falling back due to private IP', async () => {
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(true);
|
||||
// Mock fetchWithTimeout to succeed so fallback proceeds
|
||||
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
|
||||
ok: true,
|
||||
mockFetch('https://private.ip/', {
|
||||
text: () => Promise.resolve('some content'),
|
||||
} as Response);
|
||||
});
|
||||
mockGenerateContent.mockResolvedValue({
|
||||
candidates: [{ content: { parts: [{ text: 'fallback response' }] } }],
|
||||
});
|
||||
@@ -255,10 +406,9 @@ describe('WebFetchTool', () => {
|
||||
candidates: [],
|
||||
});
|
||||
// Mock fetchWithTimeout to succeed so fallback proceeds
|
||||
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
|
||||
ok: true,
|
||||
mockFetch('https://public.ip/', {
|
||||
text: () => Promise.resolve('some content'),
|
||||
} as Response);
|
||||
});
|
||||
// Mock fallback LLM call
|
||||
mockGenerateContent.mockResolvedValueOnce({
|
||||
candidates: [{ content: { parts: [{ text: 'fallback response' }] } }],
|
||||
@@ -320,11 +470,10 @@ describe('WebFetchTool', () => {
|
||||
? new Headers({ 'content-type': contentType })
|
||||
: new Headers();
|
||||
|
||||
vi.spyOn(fetchUtils, 'fetchWithTimeout').mockResolvedValue({
|
||||
ok: true,
|
||||
mockFetch('https://example.com/', {
|
||||
headers,
|
||||
text: () => Promise.resolve(content),
|
||||
} as Response);
|
||||
});
|
||||
|
||||
// Mock fallback LLM call to return the content passed to it
|
||||
mockGenerateContent.mockImplementationOnce(async (_, req) => ({
|
||||
@@ -373,6 +522,24 @@ describe('WebFetchTool', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle URL param in confirmation details', async () => {
|
||||
vi.spyOn(mockConfig, 'getDirectWebFetch').mockReturnValue(true);
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { url: 'https://example.com' };
|
||||
const invocation = tool.build(params);
|
||||
const confirmationDetails = await invocation.shouldConfirmExecute(
|
||||
new AbortController().signal,
|
||||
);
|
||||
|
||||
expect(confirmationDetails).toEqual({
|
||||
type: 'info',
|
||||
title: 'Confirm Web Fetch',
|
||||
prompt: 'Fetch https://example.com',
|
||||
urls: ['https://example.com'],
|
||||
onConfirm: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
it('should convert github urls to raw format', async () => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = {
|
||||
@@ -601,4 +768,170 @@ describe('WebFetchTool', () => {
|
||||
expect(result.llmContent).toContain('Fetched content');
|
||||
});
|
||||
});
|
||||
|
||||
describe('execute (experimental)', () => {
|
||||
beforeEach(() => {
|
||||
vi.spyOn(mockConfig, 'getDirectWebFetch').mockReturnValue(true);
|
||||
vi.spyOn(fetchUtils, 'isPrivateIp').mockReturnValue(false);
|
||||
});
|
||||
|
||||
it('should perform direct fetch and return text for plain text content', async () => {
|
||||
const content = 'Plain text content';
|
||||
mockFetch('https://example.com/', {
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/plain' }),
|
||||
text: () => Promise.resolve(content),
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { url: 'https://example.com' };
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toBe(content);
|
||||
expect(result.returnDisplay).toContain('Fetched text/plain content');
|
||||
expect(fetchUtils.fetchWithTimeout).toHaveBeenCalledWith(
|
||||
'https://example.com/',
|
||||
expect.any(Number),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Accept: expect.stringContaining('text/plain'),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('should use html-to-text and preserve links for HTML content', async () => {
|
||||
const content =
|
||||
'<html><body><a href="https://link.com">Link</a></body></html>';
|
||||
mockFetch('https://example.com/', {
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/html' }),
|
||||
text: () => Promise.resolve(content),
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { url: 'https://example.com' };
|
||||
const invocation = tool.build(params);
|
||||
await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(convert).toHaveBeenCalledWith(
|
||||
content,
|
||||
expect.objectContaining({
|
||||
selectors: [
|
||||
expect.objectContaining({
|
||||
selector: 'a',
|
||||
options: { ignoreHref: false, baseUrl: 'https://example.com/' },
|
||||
}),
|
||||
],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('should return base64 for image content', async () => {
|
||||
const buffer = Buffer.from('fake-image-data');
|
||||
mockFetch('https://example.com/image.png', {
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'image/png' }),
|
||||
arrayBuffer: () =>
|
||||
Promise.resolve(
|
||||
buffer.buffer.slice(
|
||||
buffer.byteOffset,
|
||||
buffer.byteOffset + buffer.byteLength,
|
||||
),
|
||||
),
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { url: 'https://example.com/image.png' };
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toEqual({
|
||||
inlineData: {
|
||||
data: buffer.toString('base64'),
|
||||
mimeType: 'image/png',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should return raw response info for 4xx/5xx errors', async () => {
|
||||
const errorBody = 'Not Found';
|
||||
mockFetch('https://example.com/404', {
|
||||
status: 404,
|
||||
headers: new Headers({ 'x-test': 'val' }),
|
||||
text: () => Promise.resolve(errorBody),
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const params = { url: 'https://example.com/404' };
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toContain('Request failed with status 404');
|
||||
expect(result.llmContent).toContain('val');
|
||||
expect(result.llmContent).toContain(errorBody);
|
||||
expect(result.returnDisplay).toContain('Failed to fetch');
|
||||
});
|
||||
|
||||
it('should throw error if Content-Length exceeds limit', async () => {
|
||||
mockFetch('https://example.com/large', {
|
||||
headers: new Headers({
|
||||
'content-length': (11 * 1024 * 1024).toString(),
|
||||
}),
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const invocation = tool.build({ url: 'https://example.com/large' });
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toContain('Error');
|
||||
expect(result.llmContent).toContain('exceeds size limit');
|
||||
});
|
||||
|
||||
it('should throw error if stream exceeds limit', async () => {
|
||||
const largeChunk = new Uint8Array(11 * 1024 * 1024);
|
||||
mockFetch('https://example.com/large-stream', {
|
||||
body: {
|
||||
getReader: () => ({
|
||||
read: vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ done: false, value: largeChunk })
|
||||
.mockResolvedValueOnce({ done: true }),
|
||||
releaseLock: vi.fn(),
|
||||
cancel: vi.fn().mockResolvedValue(undefined),
|
||||
}),
|
||||
} as unknown as ReadableStream,
|
||||
});
|
||||
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
const invocation = tool.build({
|
||||
url: 'https://example.com/large-stream',
|
||||
});
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toContain('Error');
|
||||
expect(result.llmContent).toContain('exceeds size limit');
|
||||
});
|
||||
|
||||
it('should return error if url is missing (experimental)', async () => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
// Manually bypass build() validation to test executeExperimental safety check
|
||||
const invocation = tool['createInvocation']({}, bus);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toContain('Error: No URL provided.');
|
||||
expect(result.error?.type).toBe(ToolErrorType.INVALID_TOOL_PARAMS);
|
||||
});
|
||||
|
||||
it('should return error if url is invalid (experimental)', async () => {
|
||||
const tool = new WebFetchTool(mockConfig, bus);
|
||||
// Manually bypass build() validation to test executeExperimental safety check
|
||||
const invocation = tool['createInvocation']({ url: 'not-a-url' }, bus);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toContain('Error: Invalid URL "not-a-url"');
|
||||
expect(result.error?.type).toBe(ToolErrorType.INVALID_TOOL_PARAMS);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -18,6 +18,7 @@ import type { Config } from '../config/config.js';
|
||||
import { ApprovalMode } from '../policy/types.js';
|
||||
import { getResponseText } from '../utils/partUtils.js';
|
||||
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
|
||||
import { truncateString } from '../utils/textUtils.js';
|
||||
import { convert } from 'html-to-text';
|
||||
import {
|
||||
logWebFetchFallbackAttempt,
|
||||
@@ -33,6 +34,10 @@ import { LRUCache } from 'mnemonist';
|
||||
|
||||
const URL_FETCH_TIMEOUT_MS = 10000;
|
||||
const MAX_CONTENT_LENGTH = 100000;
|
||||
const MAX_EXPERIMENTAL_FETCH_SIZE = 10 * 1024 * 1024; // 10MB
|
||||
const USER_AGENT =
|
||||
'Mozilla/5.0 (compatible; Google-Gemini-CLI/1.0; +https://github.com/google-gemini/gemini-cli)';
|
||||
const TRUNCATION_WARNING = '\n\n... [Content truncated due to size limit] ...';
|
||||
|
||||
// Rate limiting configuration
|
||||
const RATE_LIMIT_WINDOW_MS = 60000; // 1 minute
|
||||
@@ -107,6 +112,23 @@ export function parsePrompt(text: string): {
|
||||
return { validUrls, errors };
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely converts a GitHub blob URL to a raw content URL.
|
||||
*/
|
||||
export function convertGithubUrlToRaw(urlStr: string): string {
|
||||
try {
|
||||
const url = new URL(urlStr);
|
||||
if (url.hostname === 'github.com' && url.pathname.includes('/blob/')) {
|
||||
url.hostname = 'raw.githubusercontent.com';
|
||||
url.pathname = url.pathname.replace(/^\/([^/]+\/[^/]+)\/blob\//, '/$1/');
|
||||
return url.href;
|
||||
}
|
||||
} catch {
|
||||
// Ignore invalid URLs
|
||||
}
|
||||
return urlStr;
|
||||
}
|
||||
|
||||
// Interfaces for grounding metadata (similar to web-search.ts)
|
||||
interface GroundingChunkWeb {
|
||||
uri?: string;
|
||||
@@ -135,7 +157,11 @@ export interface WebFetchToolParams {
|
||||
/**
|
||||
* The prompt containing URL(s) (up to 20) and instructions for processing their content.
|
||||
*/
|
||||
prompt: string;
|
||||
prompt?: string;
|
||||
/**
|
||||
* Direct URL to fetch (experimental mode).
|
||||
*/
|
||||
url?: string;
|
||||
}
|
||||
|
||||
interface ErrorWithStatus extends Error {
|
||||
@@ -157,21 +183,22 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
}
|
||||
|
||||
private async executeFallback(signal: AbortSignal): Promise<ToolResult> {
|
||||
const { validUrls: urls } = parsePrompt(this.params.prompt);
|
||||
const { validUrls: urls } = parsePrompt(this.params.prompt!);
|
||||
// For now, we only support one URL for fallback
|
||||
let url = urls[0];
|
||||
|
||||
// Convert GitHub blob URL to raw URL
|
||||
if (url.includes('github.com') && url.includes('/blob/')) {
|
||||
url = url
|
||||
.replace('github.com', 'raw.githubusercontent.com')
|
||||
.replace('/blob/', '/');
|
||||
}
|
||||
url = convertGithubUrlToRaw(url);
|
||||
|
||||
try {
|
||||
const response = await retryWithBackoff(
|
||||
async () => {
|
||||
const res = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS);
|
||||
const res = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS, {
|
||||
signal,
|
||||
headers: {
|
||||
'User-Agent': USER_AGENT,
|
||||
},
|
||||
});
|
||||
if (!res.ok) {
|
||||
const error = new Error(
|
||||
`Request failed with status code ${res.status} ${res.statusText}`,
|
||||
@@ -186,7 +213,11 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
},
|
||||
);
|
||||
|
||||
const rawContent = await response.text();
|
||||
const bodyBuffer = await this.readResponseWithLimit(
|
||||
response,
|
||||
MAX_EXPERIMENTAL_FETCH_SIZE,
|
||||
);
|
||||
const rawContent = bodyBuffer.toString('utf8');
|
||||
const contentType = response.headers.get('content-type') || '';
|
||||
let textContent: string;
|
||||
|
||||
@@ -207,7 +238,11 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
textContent = rawContent;
|
||||
}
|
||||
|
||||
textContent = textContent.substring(0, MAX_CONTENT_LENGTH);
|
||||
textContent = truncateString(
|
||||
textContent,
|
||||
MAX_CONTENT_LENGTH,
|
||||
TRUNCATION_WARNING,
|
||||
);
|
||||
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
const fallbackPrompt = `The user requested the following: "${this.params.prompt}".
|
||||
@@ -245,10 +280,12 @@ ${textContent}
|
||||
}
|
||||
|
||||
getDescription(): string {
|
||||
if (this.params.url) {
|
||||
return `Fetching content from: ${this.params.url}`;
|
||||
}
|
||||
const prompt = this.params.prompt || '';
|
||||
const displayPrompt =
|
||||
this.params.prompt.length > 100
|
||||
? this.params.prompt.substring(0, 97) + '...'
|
||||
: this.params.prompt;
|
||||
prompt.length > 100 ? prompt.substring(0, 97) + '...' : prompt;
|
||||
return `Processing URLs and instructions from prompt: "${displayPrompt}"`;
|
||||
}
|
||||
|
||||
@@ -261,22 +298,24 @@ ${textContent}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Perform GitHub URL conversion here to differentiate between user-provided
|
||||
// URL and the actual URL to be fetched.
|
||||
const { validUrls } = parsePrompt(this.params.prompt);
|
||||
const urls = validUrls.map((url) => {
|
||||
if (url.includes('github.com') && url.includes('/blob/')) {
|
||||
return url
|
||||
.replace('github.com', 'raw.githubusercontent.com')
|
||||
.replace('/blob/', '/');
|
||||
}
|
||||
return url;
|
||||
});
|
||||
let urls: string[] = [];
|
||||
let prompt = this.params.prompt || '';
|
||||
|
||||
if (this.params.url) {
|
||||
urls = [this.params.url];
|
||||
prompt = `Fetch ${this.params.url}`;
|
||||
} else if (this.params.prompt) {
|
||||
const { validUrls } = parsePrompt(this.params.prompt);
|
||||
urls = validUrls;
|
||||
}
|
||||
|
||||
// Perform GitHub URL conversion here
|
||||
urls = urls.map((url) => convertGithubUrlToRaw(url));
|
||||
|
||||
const confirmationDetails: ToolCallConfirmationDetails = {
|
||||
type: 'info',
|
||||
title: `Confirm Web Fetch`,
|
||||
prompt: this.params.prompt,
|
||||
prompt,
|
||||
urls,
|
||||
onConfirm: async (_outcome: ToolConfirmationOutcome) => {
|
||||
// Mode transitions (e.g. AUTO_EDIT) and policy updates are now
|
||||
@@ -286,8 +325,189 @@ ${textContent}
|
||||
return confirmationDetails;
|
||||
}
|
||||
|
||||
private async readResponseWithLimit(
|
||||
response: Response,
|
||||
limit: number,
|
||||
): Promise<Buffer> {
|
||||
const contentLength = response.headers.get('content-length');
|
||||
if (contentLength && parseInt(contentLength, 10) > limit) {
|
||||
throw new Error(`Content exceeds size limit of ${limit} bytes`);
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
return Buffer.alloc(0);
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const chunks: Uint8Array[] = [];
|
||||
let totalLength = 0;
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
totalLength += value.length;
|
||||
if (totalLength > limit) {
|
||||
// Attempt to cancel the reader to stop the stream
|
||||
await reader.cancel().catch(() => {});
|
||||
throw new Error(`Content exceeds size limit of ${limit} bytes`);
|
||||
}
|
||||
chunks.push(value);
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
return Buffer.concat(chunks);
|
||||
}
|
||||
|
||||
private async executeExperimental(signal: AbortSignal): Promise<ToolResult> {
|
||||
if (!this.params.url) {
|
||||
return {
|
||||
llmContent: 'Error: No URL provided.',
|
||||
returnDisplay: 'Error: No URL provided.',
|
||||
error: {
|
||||
message: 'No URL provided.',
|
||||
type: ToolErrorType.INVALID_TOOL_PARAMS,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let url: string;
|
||||
try {
|
||||
url = new URL(this.params.url).href;
|
||||
} catch {
|
||||
return {
|
||||
llmContent: `Error: Invalid URL "${this.params.url}"`,
|
||||
returnDisplay: `Error: Invalid URL "${this.params.url}"`,
|
||||
error: {
|
||||
message: `Invalid URL "${this.params.url}"`,
|
||||
type: ToolErrorType.INVALID_TOOL_PARAMS,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Convert GitHub blob URL to raw URL
|
||||
url = convertGithubUrlToRaw(url);
|
||||
|
||||
try {
|
||||
const response = await retryWithBackoff(
|
||||
async () => {
|
||||
const res = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS, {
|
||||
signal,
|
||||
headers: {
|
||||
Accept:
|
||||
'text/markdown, text/plain;q=0.9, application/json;q=0.9, text/html;q=0.8, application/pdf;q=0.7, video/*;q=0.7, */*;q=0.5',
|
||||
'User-Agent': USER_AGENT,
|
||||
},
|
||||
});
|
||||
return res;
|
||||
},
|
||||
{
|
||||
retryFetchErrors: this.config.getRetryFetchErrors(),
|
||||
},
|
||||
);
|
||||
|
||||
const contentType = response.headers.get('content-type') || '';
|
||||
const status = response.status;
|
||||
const bodyBuffer = await this.readResponseWithLimit(
|
||||
response,
|
||||
MAX_EXPERIMENTAL_FETCH_SIZE,
|
||||
);
|
||||
|
||||
if (status >= 400) {
|
||||
const rawResponseText = bodyBuffer.toString('utf8');
|
||||
const headers: Record<string, string> = {};
|
||||
response.headers.forEach((value, key) => {
|
||||
headers[key] = value;
|
||||
});
|
||||
const errorContent = `Request failed with status ${status}
|
||||
Headers: ${JSON.stringify(headers, null, 2)}
|
||||
Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response truncated] ...')}`;
|
||||
return {
|
||||
llmContent: errorContent,
|
||||
returnDisplay: `Failed to fetch ${url} (Status: ${status})`,
|
||||
};
|
||||
}
|
||||
|
||||
const lowContentType = contentType.toLowerCase();
|
||||
if (
|
||||
lowContentType.includes('text/markdown') ||
|
||||
lowContentType.includes('text/plain') ||
|
||||
lowContentType.includes('application/json')
|
||||
) {
|
||||
const text = truncateString(
|
||||
bodyBuffer.toString('utf8'),
|
||||
MAX_CONTENT_LENGTH,
|
||||
TRUNCATION_WARNING,
|
||||
);
|
||||
return {
|
||||
llmContent: text,
|
||||
returnDisplay: `Fetched ${contentType} content from ${url}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (lowContentType.includes('text/html')) {
|
||||
const html = bodyBuffer.toString('utf8');
|
||||
const textContent = truncateString(
|
||||
convert(html, {
|
||||
wordwrap: false,
|
||||
selectors: [
|
||||
{ selector: 'a', options: { ignoreHref: false, baseUrl: url } },
|
||||
],
|
||||
}),
|
||||
MAX_CONTENT_LENGTH,
|
||||
TRUNCATION_WARNING,
|
||||
);
|
||||
return {
|
||||
llmContent: textContent,
|
||||
returnDisplay: `Fetched and converted HTML content from ${url}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
lowContentType.startsWith('image/') ||
|
||||
lowContentType.startsWith('video/') ||
|
||||
lowContentType === 'application/pdf'
|
||||
) {
|
||||
const base64Data = bodyBuffer.toString('base64');
|
||||
return {
|
||||
llmContent: {
|
||||
inlineData: {
|
||||
data: base64Data,
|
||||
mimeType: contentType.split(';')[0],
|
||||
},
|
||||
},
|
||||
returnDisplay: `Fetched ${contentType} from ${url}`,
|
||||
};
|
||||
}
|
||||
|
||||
// Fallback for unknown types - try as text
|
||||
const text = truncateString(
|
||||
bodyBuffer.toString('utf8'),
|
||||
MAX_CONTENT_LENGTH,
|
||||
TRUNCATION_WARNING,
|
||||
);
|
||||
return {
|
||||
llmContent: text,
|
||||
returnDisplay: `Fetched ${contentType || 'unknown'} content from ${url}`,
|
||||
};
|
||||
} catch (e) {
|
||||
const errorMessage = `Error during experimental fetch for ${url}: ${getErrorMessage(e)}`;
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
error: {
|
||||
message: errorMessage,
|
||||
type: ToolErrorType.WEB_FETCH_FALLBACK_FAILED,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async execute(signal: AbortSignal): Promise<ToolResult> {
|
||||
const userPrompt = this.params.prompt;
|
||||
if (this.config.getDirectWebFetch()) {
|
||||
return this.executeExperimental(signal);
|
||||
}
|
||||
const userPrompt = this.params.prompt!;
|
||||
const { validUrls: urls } = parsePrompt(userPrompt);
|
||||
const url = urls[0];
|
||||
|
||||
@@ -475,6 +695,18 @@ export class WebFetchTool extends BaseDeclarativeTool<
|
||||
protected override validateToolParamValues(
|
||||
params: WebFetchToolParams,
|
||||
): string | null {
|
||||
if (this.config.getDirectWebFetch()) {
|
||||
if (!params.url) {
|
||||
return "The 'url' parameter is required.";
|
||||
}
|
||||
try {
|
||||
new URL(params.url);
|
||||
} catch {
|
||||
return `Invalid URL: "${params.url}"`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!params.prompt || params.prompt.trim() === '') {
|
||||
return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions.";
|
||||
}
|
||||
@@ -508,6 +740,25 @@ export class WebFetchTool extends BaseDeclarativeTool<
|
||||
}
|
||||
|
||||
override getSchema(modelId?: string) {
|
||||
return resolveToolDeclaration(WEB_FETCH_DEFINITION, modelId);
|
||||
const schema = resolveToolDeclaration(WEB_FETCH_DEFINITION, modelId);
|
||||
if (this.config.getDirectWebFetch()) {
|
||||
return {
|
||||
...schema,
|
||||
description:
|
||||
'Fetch content from a URL directly. Send multiple requests for this tool if multiple URL fetches are needed.',
|
||||
parametersJsonSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
url: {
|
||||
type: 'string',
|
||||
description:
|
||||
'The URL to fetch. Must be a valid http or https URL.',
|
||||
},
|
||||
},
|
||||
required: ['url'],
|
||||
},
|
||||
};
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user