mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-26 04:54:25 -07:00
refactor(core): address PR feedback on web_fetch nits and refactoring
This commit is contained in:
@@ -2129,12 +2129,17 @@ export class RecoveryAttemptEvent extends BaseAgentEvent {
|
||||
|
||||
export const EVENT_WEB_FETCH_FALLBACK_ATTEMPT =
|
||||
'gemini_cli.web_fetch_fallback_attempt';
|
||||
export type WebFetchFallbackReason =
|
||||
| 'private_ip'
|
||||
| 'primary_failed'
|
||||
| 'private_ip_skipped';
|
||||
|
||||
export class WebFetchFallbackAttemptEvent implements BaseTelemetryEvent {
|
||||
'event.name': 'web_fetch_fallback_attempt';
|
||||
'event.timestamp': string;
|
||||
reason: 'private_ip' | 'primary_failed' | 'private_ip_skipped';
|
||||
reason: WebFetchFallbackReason;
|
||||
|
||||
constructor(reason: 'private_ip' | 'primary_failed' | 'private_ip_skipped') {
|
||||
constructor(reason: WebFetchFallbackReason) {
|
||||
this['event.name'] = 'web_fetch_fallback_attempt';
|
||||
this['event.timestamp'] = new Date().toISOString();
|
||||
this.reason = reason;
|
||||
|
||||
@@ -171,6 +171,10 @@ interface GroundingChunkItem {
|
||||
web?: GroundingChunkWeb;
|
||||
}
|
||||
|
||||
function isGroundingChunkItem(item: unknown): item is GroundingChunkItem {
|
||||
return typeof item === 'object' && item !== null;
|
||||
}
|
||||
|
||||
interface GroundingSupportSegment {
|
||||
startIndex: number;
|
||||
endIndex: number;
|
||||
@@ -182,6 +186,10 @@ interface GroundingSupportItem {
|
||||
groundingChunkIndices?: number[];
|
||||
}
|
||||
|
||||
function isGroundingSupportItem(item: unknown): item is GroundingSupportItem {
|
||||
return typeof item === 'object' && item !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for the WebFetch tool
|
||||
*/
|
||||
@@ -320,22 +328,57 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
}
|
||||
}
|
||||
|
||||
private filterAndValidateUrls(urls: string[]): {
|
||||
toFetch: string[];
|
||||
skipped: string[];
|
||||
} {
|
||||
const uniqueUrls = [...new Set(urls.map(normalizeUrl))];
|
||||
const toFetch: string[] = [];
|
||||
const skipped: string[] = [];
|
||||
|
||||
for (const url of uniqueUrls) {
|
||||
if (this.isBlockedHost(url)) {
|
||||
debugLogger.warn(
|
||||
`[WebFetchTool] Skipped private or local host: ${url}`,
|
||||
);
|
||||
logWebFetchFallbackAttempt(
|
||||
this.config,
|
||||
new WebFetchFallbackAttemptEvent('private_ip_skipped'),
|
||||
);
|
||||
skipped.push(`[Blocked Host] ${url}`);
|
||||
continue;
|
||||
}
|
||||
if (!checkRateLimit(url).allowed) {
|
||||
debugLogger.warn(`[WebFetchTool] Rate limit exceeded for host: ${url}`);
|
||||
skipped.push(`[Rate limit exceeded] ${url}`);
|
||||
continue;
|
||||
}
|
||||
toFetch.push(url);
|
||||
}
|
||||
return { toFetch, skipped };
|
||||
}
|
||||
|
||||
private async executeFallback(
|
||||
urls: string[],
|
||||
signal: AbortSignal,
|
||||
): Promise<ToolResult> {
|
||||
const contentBudget = Math.floor(MAX_CONTENT_LENGTH / urls.length);
|
||||
const uniqueUrls = [...new Set(urls)];
|
||||
const contentBudget = Math.floor(
|
||||
MAX_CONTENT_LENGTH / (uniqueUrls.length || 1),
|
||||
);
|
||||
const results: string[] = [];
|
||||
|
||||
for (const url of urls) {
|
||||
for (const url of uniqueUrls) {
|
||||
results.push(
|
||||
await this.executeFallbackForUrl(url, signal, contentBudget),
|
||||
);
|
||||
}
|
||||
|
||||
const randomDelimiter = `---${Math.random().toString(36).substring(2, 15)}---`;
|
||||
|
||||
const aggregatedContent = results
|
||||
.map((content, i) => `URL: ${urls[i]}\nContent:\n${content}`)
|
||||
.join('\n\n---\n\n');
|
||||
.map((content, i) => `URL: ${uniqueUrls[i]}\nContent:\n${content}`)
|
||||
.join(`\n\n${randomDelimiter}\n\n`);
|
||||
|
||||
try {
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
@@ -343,9 +386,9 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
|
||||
I was unable to access the URL(s) directly using the primary fetch tool. Instead, I have fetched the raw content of the page(s). Please use the following content to answer the request. Do not attempt to access the URL(s) again.
|
||||
|
||||
---
|
||||
${randomDelimiter}
|
||||
${aggregatedContent}
|
||||
---
|
||||
${randomDelimiter}
|
||||
`;
|
||||
const result = await geminiClient.generateContent(
|
||||
{ model: 'web-fetch-fallback' },
|
||||
@@ -658,30 +701,7 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
|
||||
const userPrompt = this.params.prompt!;
|
||||
const { validUrls } = parsePrompt(userPrompt);
|
||||
|
||||
// Unit 1: Normalization & Deduplication
|
||||
const uniqueUrls = [...new Set(validUrls.map(normalizeUrl))];
|
||||
const toFetch: string[] = [];
|
||||
const skipped: string[] = [];
|
||||
|
||||
for (const url of uniqueUrls) {
|
||||
if (this.isBlockedHost(url)) {
|
||||
debugLogger.warn(
|
||||
`[WebFetchTool] Skipped private or local host: ${url}`,
|
||||
);
|
||||
logWebFetchFallbackAttempt(
|
||||
this.config,
|
||||
new WebFetchFallbackAttemptEvent('private_ip_skipped'),
|
||||
);
|
||||
skipped.push(`[Blocked Host] ${url}`);
|
||||
continue;
|
||||
}
|
||||
if (!checkRateLimit(url).allowed) {
|
||||
debugLogger.warn(`[WebFetchTool] Rate limit exceeded for host: ${url}`);
|
||||
skipped.push(`[Rate limit exceeded] ${url}`);
|
||||
continue;
|
||||
}
|
||||
toFetch.push(url);
|
||||
}
|
||||
const { toFetch, skipped } = this.filterAndValidateUrls(validUrls);
|
||||
|
||||
// If everything was skipped, fail early
|
||||
if (toFetch.length === 0 && skipped.length > 0) {
|
||||
@@ -723,13 +743,9 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
|
||||
}
|
||||
|
||||
// 1. Apply Grounding Supports (Citations)
|
||||
const rawGroundingSupports = groundingMetadata?.groundingSupports;
|
||||
const groundingSupports = Array.isArray(rawGroundingSupports)
|
||||
? rawGroundingSupports.filter(
|
||||
(item): item is GroundingSupportItem =>
|
||||
typeof item === 'object' && item !== null,
|
||||
)
|
||||
: undefined;
|
||||
const groundingSupports = groundingMetadata?.groundingSupports?.filter(
|
||||
isGroundingSupportItem,
|
||||
);
|
||||
if (groundingSupports && groundingSupports.length > 0) {
|
||||
const insertions: Array<{ index: number; marker: string }> = [];
|
||||
groundingSupports.forEach((support) => {
|
||||
@@ -753,13 +769,8 @@ Response: ${truncateString(rawResponseText, 10000, '\n\n... [Error response trun
|
||||
}
|
||||
|
||||
// 2. Append Source List
|
||||
const rawSources = groundingMetadata?.groundingChunks;
|
||||
const sources = Array.isArray(rawSources)
|
||||
? rawSources.filter(
|
||||
(item): item is GroundingChunkItem =>
|
||||
typeof item === 'object' && item !== null,
|
||||
)
|
||||
: undefined;
|
||||
const sources =
|
||||
groundingMetadata?.groundingChunks?.filter(isGroundingChunkItem);
|
||||
if (sources && sources.length > 0) {
|
||||
const sourceListFormatted: string[] = [];
|
||||
sources.forEach((source, index) => {
|
||||
|
||||
Reference in New Issue
Block a user