mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-25 12:34:38 -07:00
feat(telemetry): implement retry attempt telemetry for network related retries (#22027)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -18,9 +18,16 @@ import { handleFallback } from '../fallback/handler.js';
|
||||
import { getResponseText } from '../utils/partUtils.js';
|
||||
import { reportError } from '../utils/errorReporting.js';
|
||||
import { getErrorMessage } from '../utils/errors.js';
|
||||
import { logMalformedJsonResponse } from '../telemetry/loggers.js';
|
||||
import { MalformedJsonResponseEvent, LlmRole } from '../telemetry/types.js';
|
||||
import { retryWithBackoff } from '../utils/retry.js';
|
||||
import {
|
||||
logMalformedJsonResponse,
|
||||
logNetworkRetryAttempt,
|
||||
} from '../telemetry/loggers.js';
|
||||
import {
|
||||
MalformedJsonResponseEvent,
|
||||
LlmRole,
|
||||
NetworkRetryAttemptEvent,
|
||||
} from '../telemetry/types.js';
|
||||
import { retryWithBackoff, getRetryErrorType } from '../utils/retry.js';
|
||||
import { coreEvents } from '../utils/events.js';
|
||||
import { getDisplayString } from '../config/models.js';
|
||||
import type { ModelConfigKey } from '../services/modelConfigService.js';
|
||||
@@ -331,14 +338,29 @@ export class BaseLlmClient {
|
||||
this.authType ?? this.config.getContentGeneratorConfig()?.authType,
|
||||
retryFetchErrors: this.config.getRetryFetchErrors(),
|
||||
onRetry: (attempt, error, delayMs) => {
|
||||
const actualMaxAttempts =
|
||||
availabilityMaxAttempts ?? maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
|
||||
const modelName = getDisplayString(currentModel);
|
||||
const errorType = getRetryErrorType(error);
|
||||
|
||||
coreEvents.emitRetryAttempt({
|
||||
attempt,
|
||||
maxAttempts:
|
||||
availabilityMaxAttempts ?? maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
|
||||
maxAttempts: actualMaxAttempts,
|
||||
delayMs,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
model: getDisplayString(currentModel),
|
||||
error: errorType,
|
||||
model: modelName,
|
||||
});
|
||||
|
||||
logNetworkRetryAttempt(
|
||||
this.config,
|
||||
new NetworkRetryAttemptEvent(
|
||||
attempt,
|
||||
actualMaxAttempts,
|
||||
errorType,
|
||||
delayMs,
|
||||
modelName,
|
||||
),
|
||||
);
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
|
||||
@@ -85,14 +85,20 @@ vi.mock('../fallback/handler.js', () => ({
|
||||
handleFallback: mockHandleFallback,
|
||||
}));
|
||||
|
||||
const { mockLogContentRetry, mockLogContentRetryFailure } = vi.hoisted(() => ({
|
||||
const {
|
||||
mockLogContentRetry,
|
||||
mockLogContentRetryFailure,
|
||||
mockLogNetworkRetryAttempt,
|
||||
} = vi.hoisted(() => ({
|
||||
mockLogContentRetry: vi.fn(),
|
||||
mockLogContentRetryFailure: vi.fn(),
|
||||
mockLogNetworkRetryAttempt: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../telemetry/loggers.js', () => ({
|
||||
logContentRetry: mockLogContentRetry,
|
||||
logContentRetryFailure: mockLogContentRetryFailure,
|
||||
logNetworkRetryAttempt: mockLogNetworkRetryAttempt,
|
||||
}));
|
||||
|
||||
vi.mock('../telemetry/uiTelemetry.js', () => ({
|
||||
|
||||
@@ -19,7 +19,11 @@ import {
|
||||
type GenerateContentParameters,
|
||||
} from '@google/genai';
|
||||
import { toParts } from '../code_assist/converter.js';
|
||||
import { retryWithBackoff, isRetryableError } from '../utils/retry.js';
|
||||
import {
|
||||
retryWithBackoff,
|
||||
isRetryableError,
|
||||
getRetryErrorType,
|
||||
} from '../utils/retry.js';
|
||||
import type { ValidationRequiredError } from '../utils/googleQuotaErrors.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import {
|
||||
@@ -33,6 +37,7 @@ import type { CompletedToolCall } from './coreToolScheduler.js';
|
||||
import {
|
||||
logContentRetry,
|
||||
logContentRetryFailure,
|
||||
logNetworkRetryAttempt,
|
||||
} from '../telemetry/loggers.js';
|
||||
import {
|
||||
ChatRecordingService,
|
||||
@@ -41,6 +46,7 @@ import {
|
||||
import {
|
||||
ContentRetryEvent,
|
||||
ContentRetryFailureEvent,
|
||||
NetworkRetryAttemptEvent,
|
||||
type LlmRole,
|
||||
} from '../telemetry/types.js';
|
||||
import { handleFallback } from '../fallback/handler.js';
|
||||
@@ -412,7 +418,9 @@ export class GeminiChat {
|
||||
}
|
||||
|
||||
const isContentError = error instanceof InvalidStreamError;
|
||||
const errorType = isContentError ? error.type : 'NETWORK_ERROR';
|
||||
const errorType = isContentError
|
||||
? error.type
|
||||
: getRetryErrorType(error);
|
||||
|
||||
if (
|
||||
(isContentError && isGemini2Model(model)) ||
|
||||
@@ -422,15 +430,28 @@ export class GeminiChat {
|
||||
if (attempt < maxAttempts - 1) {
|
||||
const delayMs = INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs;
|
||||
|
||||
logContentRetry(
|
||||
this.config,
|
||||
new ContentRetryEvent(attempt, errorType, delayMs, model),
|
||||
);
|
||||
if (isContentError) {
|
||||
logContentRetry(
|
||||
this.config,
|
||||
new ContentRetryEvent(attempt, errorType, delayMs, model),
|
||||
);
|
||||
} else {
|
||||
logNetworkRetryAttempt(
|
||||
this.config,
|
||||
new NetworkRetryAttemptEvent(
|
||||
attempt + 1,
|
||||
maxAttempts,
|
||||
errorType,
|
||||
delayMs * (attempt + 1),
|
||||
model,
|
||||
),
|
||||
);
|
||||
}
|
||||
coreEvents.emitRetryAttempt({
|
||||
attempt: attempt + 1,
|
||||
maxAttempts,
|
||||
delayMs: delayMs * (attempt + 1),
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
error: errorType,
|
||||
model,
|
||||
});
|
||||
await new Promise((res) =>
|
||||
|
||||
@@ -47,14 +47,20 @@ vi.mock('../utils/retry.js', async (importOriginal) => {
|
||||
});
|
||||
|
||||
// Mock loggers
|
||||
const { mockLogContentRetry, mockLogContentRetryFailure } = vi.hoisted(() => ({
|
||||
const {
|
||||
mockLogContentRetry,
|
||||
mockLogContentRetryFailure,
|
||||
mockLogNetworkRetryAttempt,
|
||||
} = vi.hoisted(() => ({
|
||||
mockLogContentRetry: vi.fn(),
|
||||
mockLogContentRetryFailure: vi.fn(),
|
||||
mockLogNetworkRetryAttempt: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../telemetry/loggers.js', () => ({
|
||||
logContentRetry: mockLogContentRetry,
|
||||
logContentRetryFailure: mockLogContentRetryFailure,
|
||||
logNetworkRetryAttempt: mockLogNetworkRetryAttempt,
|
||||
}));
|
||||
|
||||
describe('GeminiChat Network Retries', () => {
|
||||
@@ -185,10 +191,10 @@ describe('GeminiChat Network Retries', () => {
|
||||
expect(successChunk).toBeDefined();
|
||||
|
||||
// Verify retry logging
|
||||
expect(mockLogContentRetry).toHaveBeenCalledWith(
|
||||
expect(mockLogNetworkRetryAttempt).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
error_type: 'NETWORK_ERROR',
|
||||
error_type: 'SERVER_ERROR',
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -474,11 +480,11 @@ describe('GeminiChat Network Retries', () => {
|
||||
);
|
||||
expect(successChunk).toBeDefined();
|
||||
|
||||
// Verify retry logging was called with NETWORK_ERROR type
|
||||
expect(mockLogContentRetry).toHaveBeenCalledWith(
|
||||
// Verify retry logging was called with network error type
|
||||
expect(mockLogNetworkRetryAttempt).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
error_type: 'NETWORK_ERROR',
|
||||
error_type: 'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -27,6 +27,7 @@ import type {
|
||||
InvalidChunkEvent,
|
||||
ContentRetryEvent,
|
||||
ContentRetryFailureEvent,
|
||||
NetworkRetryAttemptEvent,
|
||||
ExtensionInstallEvent,
|
||||
ToolOutputTruncatedEvent,
|
||||
ExtensionUninstallEvent,
|
||||
@@ -94,6 +95,7 @@ export enum EventNames {
|
||||
INVALID_CHUNK = 'invalid_chunk',
|
||||
CONTENT_RETRY = 'content_retry',
|
||||
CONTENT_RETRY_FAILURE = 'content_retry_failure',
|
||||
RETRY_ATTEMPT = 'retry_attempt',
|
||||
EXTENSION_ENABLE = 'extension_enable',
|
||||
EXTENSION_DISABLE = 'extension_disable',
|
||||
EXTENSION_INSTALL = 'extension_install',
|
||||
@@ -1231,6 +1233,32 @@ export class ClearcutLogger {
|
||||
this.flushIfNeeded();
|
||||
}
|
||||
|
||||
logNetworkRetryAttemptEvent(event: NetworkRetryAttemptEvent): void {
|
||||
// This event is generic for any retry attempt (Gemini, WebFetch, etc.)
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key:
|
||||
EventMetadataKey.GEMINI_CLI_NETWORK_RETRY_ATTEMPT_NUMBER,
|
||||
value: String(event.attempt),
|
||||
},
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_NETWORK_RETRY_DELAY_MS,
|
||||
value: String(event.delay_ms),
|
||||
},
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_NETWORK_RETRY_ERROR_TYPE,
|
||||
value: event.error_type,
|
||||
},
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_API_REQUEST_MODEL,
|
||||
value: event.model,
|
||||
},
|
||||
];
|
||||
|
||||
this.enqueueLogEvent(this.createLogEvent(EventNames.RETRY_ATTEMPT, data));
|
||||
this.flushIfNeeded();
|
||||
}
|
||||
|
||||
async logExtensionInstallEvent(event: ExtensionInstallEvent): Promise<void> {
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
|
||||
@@ -674,4 +674,17 @@ export enum EventMetadataKey {
|
||||
|
||||
// Logs the error message for Conseca events.
|
||||
CONSECA_ERROR = 166,
|
||||
|
||||
// ==========================================================================
|
||||
// Network Retry Event Keys
|
||||
// ==========================================================================
|
||||
|
||||
// Logs the attempt number for a network retry.
|
||||
GEMINI_CLI_NETWORK_RETRY_ATTEMPT_NUMBER = 180,
|
||||
|
||||
// Logs the delay in milliseconds for a network retry.
|
||||
GEMINI_CLI_NETWORK_RETRY_DELAY_MS = 181,
|
||||
|
||||
// Logs the error type for a network retry.
|
||||
GEMINI_CLI_NETWORK_RETRY_ERROR_TYPE = 182,
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@ export {
|
||||
logExtensionUninstall,
|
||||
logExtensionUpdateEvent,
|
||||
logWebFetchFallbackAttempt,
|
||||
logNetworkRetryAttempt,
|
||||
logRewind,
|
||||
} from './loggers.js';
|
||||
export {
|
||||
@@ -66,6 +67,7 @@ export {
|
||||
ConversationFinishedEvent,
|
||||
ToolOutputTruncatedEvent,
|
||||
WebFetchFallbackAttemptEvent,
|
||||
NetworkRetryAttemptEvent,
|
||||
ToolCallDecision,
|
||||
RewindEvent,
|
||||
ConsecaPolicyGenerationEvent,
|
||||
@@ -111,6 +113,7 @@ export {
|
||||
recordApiErrorMetrics,
|
||||
recordFileOperationMetric,
|
||||
recordInvalidChunk,
|
||||
recordRetryAttemptMetrics,
|
||||
recordContentRetry,
|
||||
recordContentRetryFailure,
|
||||
recordModelRoutingMetrics,
|
||||
|
||||
@@ -45,6 +45,7 @@ import {
|
||||
logAgentStart,
|
||||
logAgentFinish,
|
||||
logWebFetchFallbackAttempt,
|
||||
logNetworkRetryAttempt,
|
||||
logExtensionUpdateEvent,
|
||||
logHookCall,
|
||||
} from './loggers.js';
|
||||
@@ -70,6 +71,7 @@ import {
|
||||
EVENT_AGENT_FINISH,
|
||||
EVENT_WEB_FETCH_FALLBACK_ATTEMPT,
|
||||
EVENT_INVALID_CHUNK,
|
||||
EVENT_NETWORK_RETRY_ATTEMPT,
|
||||
ApiErrorEvent,
|
||||
ApiRequestEvent,
|
||||
ApiResponseEvent,
|
||||
@@ -91,6 +93,7 @@ import {
|
||||
AgentStartEvent,
|
||||
AgentFinishEvent,
|
||||
WebFetchFallbackAttemptEvent,
|
||||
NetworkRetryAttemptEvent,
|
||||
ExtensionUpdateEvent,
|
||||
EVENT_EXTENSION_UPDATE,
|
||||
HookCallEvent,
|
||||
@@ -2429,6 +2432,56 @@ describe('loggers', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('logNetworkRetryAttempt', () => {
|
||||
const mockConfig = makeFakeConfig();
|
||||
|
||||
beforeEach(() => {
|
||||
vi.spyOn(ClearcutLogger.prototype, 'logNetworkRetryAttemptEvent');
|
||||
vi.spyOn(metrics, 'recordRetryAttemptMetrics');
|
||||
});
|
||||
|
||||
it('logs the network retry attempt event to Clearcut and OTEL', () => {
|
||||
const event = new NetworkRetryAttemptEvent(
|
||||
2,
|
||||
5,
|
||||
'Overloaded',
|
||||
1000,
|
||||
'test-model',
|
||||
);
|
||||
|
||||
logNetworkRetryAttempt(mockConfig, event);
|
||||
|
||||
expect(
|
||||
ClearcutLogger.prototype.logNetworkRetryAttemptEvent,
|
||||
).toHaveBeenCalledWith(event);
|
||||
|
||||
expect(mockLogger.emit).toHaveBeenCalledWith({
|
||||
body: 'Network retry attempt 2/5 for test-model. Delay: 1000ms. Error type: Overloaded',
|
||||
attributes: {
|
||||
'session.id': 'test-session-id',
|
||||
'user.email': 'test-user@example.com',
|
||||
'installation.id': 'test-installation-id',
|
||||
'event.name': EVENT_NETWORK_RETRY_ATTEMPT,
|
||||
'event.timestamp': '2025-01-01T00:00:00.000Z',
|
||||
interactive: false,
|
||||
attempt: 2,
|
||||
max_attempts: 5,
|
||||
error_type: 'Overloaded',
|
||||
delay_ms: 1000,
|
||||
model: 'test-model',
|
||||
},
|
||||
});
|
||||
|
||||
expect(metrics.recordRetryAttemptMetrics).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
{
|
||||
model: 'test-model',
|
||||
attempt: 2,
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Telemetry Buffering', () => {
|
||||
it('should buffer events when SDK is not initialized', async () => {
|
||||
vi.spyOn(sdk, 'isTelemetrySdkInitialized').mockReturnValue(false);
|
||||
|
||||
@@ -32,6 +32,7 @@ import {
|
||||
type InvalidChunkEvent,
|
||||
type ContentRetryEvent,
|
||||
type ContentRetryFailureEvent,
|
||||
type NetworkRetryAttemptEvent,
|
||||
type RipgrepFallbackEvent,
|
||||
type ToolOutputTruncatedEvent,
|
||||
type ModelRoutingEvent,
|
||||
@@ -62,6 +63,7 @@ import {
|
||||
recordToolCallMetrics,
|
||||
recordChatCompressionMetrics,
|
||||
recordFileOperationMetric,
|
||||
recordRetryAttemptMetrics,
|
||||
recordContentRetry,
|
||||
recordContentRetryFailure,
|
||||
recordModelRoutingMetrics,
|
||||
@@ -485,6 +487,25 @@ export function logInvalidChunk(
|
||||
});
|
||||
}
|
||||
|
||||
export function logNetworkRetryAttempt(
|
||||
config: Config,
|
||||
event: NetworkRetryAttemptEvent,
|
||||
): void {
|
||||
ClearcutLogger.getInstance(config)?.logNetworkRetryAttemptEvent(event);
|
||||
bufferTelemetryEvent(() => {
|
||||
const logger = logs.getLogger(SERVICE_NAME);
|
||||
const logRecord: LogRecord = {
|
||||
body: event.toLogBody(),
|
||||
attributes: event.toOpenTelemetryAttributes(config),
|
||||
};
|
||||
logger.emit(logRecord);
|
||||
recordRetryAttemptMetrics(config, {
|
||||
model: event.model,
|
||||
attempt: event.attempt,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export function logContentRetry(
|
||||
config: Config,
|
||||
event: ContentRetryEvent,
|
||||
|
||||
@@ -40,6 +40,7 @@ const INVALID_CHUNK_COUNT = 'gemini_cli.chat.invalid_chunk.count';
|
||||
const CONTENT_RETRY_COUNT = 'gemini_cli.chat.content_retry.count';
|
||||
const CONTENT_RETRY_FAILURE_COUNT =
|
||||
'gemini_cli.chat.content_retry_failure.count';
|
||||
const NETWORK_RETRY_COUNT = 'gemini_cli.network_retry.count';
|
||||
const MODEL_ROUTING_LATENCY = 'gemini_cli.model_routing.latency';
|
||||
const MODEL_ROUTING_FAILURE_COUNT = 'gemini_cli.model_routing.failure.count';
|
||||
const MODEL_SLASH_COMMAND_CALL_COUNT =
|
||||
@@ -166,6 +167,16 @@ const COUNTER_DEFINITIONS = {
|
||||
assign: (c: Counter) => (contentRetryFailureCounter = c),
|
||||
attributes: {} as Record<string, never>,
|
||||
},
|
||||
[NETWORK_RETRY_COUNT]: {
|
||||
description: 'Counts network retries.',
|
||||
valueType: ValueType.INT,
|
||||
assign: (c: Counter) => (networkRetryCounter = c),
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
attributes: {} as {
|
||||
model: string;
|
||||
attempt: number;
|
||||
},
|
||||
},
|
||||
[MODEL_ROUTING_FAILURE_COUNT]: {
|
||||
description: 'Counts model routing failures.',
|
||||
valueType: ValueType.INT,
|
||||
@@ -610,6 +621,7 @@ let chatCompressionCounter: Counter | undefined;
|
||||
let invalidChunkCounter: Counter | undefined;
|
||||
let contentRetryCounter: Counter | undefined;
|
||||
let contentRetryFailureCounter: Counter | undefined;
|
||||
let networkRetryCounter: Counter | undefined;
|
||||
let modelRoutingLatencyHistogram: Histogram | undefined;
|
||||
let modelRoutingFailureCounter: Counter | undefined;
|
||||
let modelSlashCommandCallCounter: Counter | undefined;
|
||||
@@ -848,6 +860,20 @@ export function recordInvalidChunk(config: Config): void {
|
||||
invalidChunkCounter.add(1, baseMetricDefinition.getCommonAttributes(config));
|
||||
}
|
||||
|
||||
export function recordRetryAttemptMetrics(
|
||||
config: Config,
|
||||
attributes: {
|
||||
model: string;
|
||||
attempt: number;
|
||||
},
|
||||
): void {
|
||||
if (!networkRetryCounter || !isMetricsInitialized) return;
|
||||
networkRetryCounter.add(1, {
|
||||
...baseMetricDefinition.getCommonAttributes(config),
|
||||
...attributes,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Records a metric for when a retry is triggered due to a content error.
|
||||
*/
|
||||
|
||||
@@ -1341,6 +1341,51 @@ export class ContentRetryEvent implements BaseTelemetryEvent {
|
||||
|
||||
export const EVENT_CONTENT_RETRY_FAILURE =
|
||||
'gemini_cli.chat.content_retry_failure';
|
||||
|
||||
export const EVENT_NETWORK_RETRY_ATTEMPT = 'gemini_cli.network_retry_attempt';
|
||||
export class NetworkRetryAttemptEvent implements BaseTelemetryEvent {
|
||||
'event.name': 'network_retry_attempt';
|
||||
'event.timestamp': string;
|
||||
attempt: number;
|
||||
max_attempts: number;
|
||||
error_type: string;
|
||||
delay_ms: number;
|
||||
model: string;
|
||||
|
||||
constructor(
|
||||
attempt: number,
|
||||
max_attempts: number,
|
||||
error_type: string,
|
||||
delay_ms: number,
|
||||
model: string,
|
||||
) {
|
||||
this['event.name'] = 'network_retry_attempt';
|
||||
this['event.timestamp'] = new Date().toISOString();
|
||||
this.attempt = attempt;
|
||||
this.max_attempts = max_attempts;
|
||||
this.error_type = error_type;
|
||||
this.delay_ms = delay_ms;
|
||||
this.model = model;
|
||||
}
|
||||
|
||||
toOpenTelemetryAttributes(config: Config): LogAttributes {
|
||||
return {
|
||||
...getCommonAttributes(config),
|
||||
'event.name': EVENT_NETWORK_RETRY_ATTEMPT,
|
||||
'event.timestamp': this['event.timestamp'],
|
||||
attempt: this.attempt,
|
||||
max_attempts: this.max_attempts,
|
||||
error_type: this.error_type,
|
||||
delay_ms: this.delay_ms,
|
||||
model: this.model,
|
||||
};
|
||||
}
|
||||
|
||||
toLogBody(): string {
|
||||
return `Network retry attempt ${this.attempt}/${this.max_attempts} for ${this.model}. Delay: ${this.delay_ms}ms. Error type: ${this.error_type}`;
|
||||
}
|
||||
}
|
||||
|
||||
export class ContentRetryFailureEvent implements BaseTelemetryEvent {
|
||||
'event.name': 'content_retry_failure';
|
||||
'event.timestamp': string;
|
||||
|
||||
@@ -27,12 +27,14 @@ import { convert } from 'html-to-text';
|
||||
import {
|
||||
logWebFetchFallbackAttempt,
|
||||
WebFetchFallbackAttemptEvent,
|
||||
logNetworkRetryAttempt,
|
||||
NetworkRetryAttemptEvent,
|
||||
} from '../telemetry/index.js';
|
||||
import { LlmRole } from '../telemetry/llmRole.js';
|
||||
import { WEB_FETCH_TOOL_NAME } from './tool-names.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import { coreEvents } from '../utils/events.js';
|
||||
import { retryWithBackoff } from '../utils/retry.js';
|
||||
import { retryWithBackoff, getRetryErrorType } from '../utils/retry.js';
|
||||
import { WEB_FETCH_DEFINITION } from './definitions/coreTools.js';
|
||||
import { resolveToolDeclaration } from './definitions/resolver.js';
|
||||
import { LRUCache } from 'mnemonist';
|
||||
@@ -188,13 +190,28 @@ class WebFetchToolInvocation extends BaseToolInvocation<
|
||||
}
|
||||
|
||||
private handleRetry(attempt: number, error: unknown, delayMs: number): void {
|
||||
const maxAttempts = this.config.getMaxAttempts();
|
||||
const modelName = 'Web Fetch';
|
||||
const errorType = getRetryErrorType(error);
|
||||
|
||||
coreEvents.emitRetryAttempt({
|
||||
attempt,
|
||||
maxAttempts: this.config.getMaxAttempts(),
|
||||
maxAttempts,
|
||||
delayMs,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
model: 'Web Fetch',
|
||||
error: errorType,
|
||||
model: modelName,
|
||||
});
|
||||
|
||||
logNetworkRetryAttempt(
|
||||
this.config,
|
||||
new NetworkRetryAttemptEvent(
|
||||
attempt,
|
||||
maxAttempts,
|
||||
errorType,
|
||||
delayMs,
|
||||
modelName,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
private async executeFallback(signal: AbortSignal): Promise<ToolResult> {
|
||||
|
||||
@@ -99,8 +99,46 @@ function getNetworkErrorCode(error: unknown): string | undefined {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const FETCH_FAILED_MESSAGE = 'fetch failed';
|
||||
const INCOMPLETE_JSON_MESSAGE = 'incomplete json segment';
|
||||
export const FETCH_FAILED_MESSAGE = 'fetch failed';
|
||||
export const INCOMPLETE_JSON_MESSAGE = 'incomplete json segment';
|
||||
|
||||
/**
|
||||
* Categorizes an error for retry telemetry purposes.
|
||||
* Returns a safe string without PII.
|
||||
*/
|
||||
export function getRetryErrorType(error: unknown): string {
|
||||
if (error === 'Invalid content') {
|
||||
return 'INVALID_CONTENT';
|
||||
}
|
||||
|
||||
const errorCode = getNetworkErrorCode(error);
|
||||
if (errorCode && RETRYABLE_NETWORK_CODES.includes(errorCode)) {
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
if (error instanceof Error) {
|
||||
const lowerMessage = error.message.toLowerCase();
|
||||
if (lowerMessage.includes(FETCH_FAILED_MESSAGE)) {
|
||||
return 'FETCH_FAILED';
|
||||
}
|
||||
if (lowerMessage.includes(INCOMPLETE_JSON_MESSAGE)) {
|
||||
return 'INCOMPLETE_JSON';
|
||||
}
|
||||
}
|
||||
|
||||
const status = getErrorStatus(error);
|
||||
if (status !== undefined) {
|
||||
if (status === 429) return 'QUOTA_EXCEEDED';
|
||||
if (status >= 500 && status < 600) return 'SERVER_ERROR';
|
||||
return `HTTP_${status}`;
|
||||
}
|
||||
|
||||
if (error instanceof Error) {
|
||||
return error.name;
|
||||
}
|
||||
|
||||
return 'UNKNOWN';
|
||||
}
|
||||
|
||||
/**
|
||||
* Default predicate function to determine if a retry should be attempted.
|
||||
|
||||
Reference in New Issue
Block a user