fix(core): silently retry API errors up to 3 times before halting session (#21989)

This commit is contained in:
Spencer
2026-03-11 16:40:06 -04:00
committed by GitHub
parent be16caece2
commit 775bcbf3a6
3 changed files with 50 additions and 28 deletions

View File

@@ -1380,11 +1380,11 @@ describe('GeminiChat', () => {
}
}).rejects.toThrow(InvalidStreamError);
// Should be called 2 times (initial + 1 retry)
// Should be called 4 times (initial + 3 retries)
expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes(
2,
4,
);
expect(mockLogContentRetry).toHaveBeenCalledTimes(1);
expect(mockLogContentRetry).toHaveBeenCalledTimes(3);
expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1);
// History should still contain the user message.

View File

@@ -79,17 +79,17 @@ export type StreamEvent =
| { type: StreamEventType.AGENT_EXECUTION_BLOCKED; reason: string };
/**
* Options for retrying due to invalid content from the model.
* Options for retrying mid-stream errors (e.g. invalid content or API disconnects).
*/
interface ContentRetryOptions {
interface MidStreamRetryOptions {
/** Total number of attempts to make (1 initial + N retries). */
maxAttempts: number;
/** The base delay in milliseconds for linear backoff. */
initialDelayMs: number;
}
const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = {
maxAttempts: 2, // 1 initial call + 1 retry
const MID_STREAM_RETRY_OPTIONS: MidStreamRetryOptions = {
maxAttempts: 4, // 1 initial call + 3 retries mid-stream
initialDelayMs: 500,
};
@@ -350,7 +350,7 @@ export class GeminiChat {
this: GeminiChat,
): AsyncGenerator<StreamEvent, void, void> {
try {
const maxAttempts = INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
const maxAttempts = this.config.getMaxAttempts();
for (let attempt = 0; attempt < maxAttempts; attempt++) {
let isConnectionPhase = true;
@@ -402,21 +402,19 @@ export class GeminiChat {
return; // Stop the generator
}
if (isConnectionPhase) {
// Connection phase errors have already been retried by retryWithBackoff.
// If they bubble up here, they are exhausted or fatal.
throw error;
}
// Check if the error is retryable (e.g., transient SSL errors
// like ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC)
// like ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC or ApiError)
const isRetryable = isRetryableError(
error,
this.config.getRetryFetchErrors(),
);
// For connection phase errors, only retryable errors should continue
if (isConnectionPhase) {
if (!isRetryable || signal.aborted) {
throw error;
}
// Fall through to retry logic for retryable connection errors
}
const isContentError = error instanceof InvalidStreamError;
const errorType = isContentError
? error.type
@@ -426,9 +424,16 @@ export class GeminiChat {
(isContentError && isGemini2Model(model)) ||
(isRetryable && !signal.aborted)
) {
// Check if we have more attempts left.
if (attempt < maxAttempts - 1) {
const delayMs = INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs;
// The issue requests exactly 3 retries (4 attempts) for API errors during stream iteration.
// Regardless of the global maxAttempts (e.g. 10), we only want to retry these mid-stream API errors
// up to 3 times before finally throwing the error to the user.
const maxMidStreamAttempts = MID_STREAM_RETRY_OPTIONS.maxAttempts;
if (
attempt < maxAttempts - 1 &&
attempt < maxMidStreamAttempts - 1
) {
const delayMs = MID_STREAM_RETRY_OPTIONS.initialDelayMs;
if (isContentError) {
logContentRetry(
@@ -449,7 +454,7 @@ export class GeminiChat {
}
coreEvents.emitRetryAttempt({
attempt: attempt + 1,
maxAttempts,
maxAttempts: Math.min(maxAttempts, maxMidStreamAttempts),
delayMs: delayMs * (attempt + 1),
error: errorType,
model,

View File

@@ -292,6 +292,14 @@ describe('GeminiChat Network Retries', () => {
(sslError as NodeJS.ErrnoException).code =
'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC';
// Instead of outer loop, connection retries are handled by retryWithBackoff.
// Simulate retryWithBackoff attempting it twice: first throws, second succeeds.
mockRetryWithBackoff.mockImplementation(
async (apiCall) =>
// Execute the apiCall to trigger mockContentGenerator
await apiCall(),
);
vi.mocked(mockContentGenerator.generateContentStream)
// First call: throw SSL error immediately (connection phase)
.mockRejectedValueOnce(sslError)
@@ -309,6 +317,15 @@ describe('GeminiChat Network Retries', () => {
})(),
);
// Because retryWithBackoff is mocked and we just want to test GeminiChat's integration,
// we need to actually execute the real retryWithBackoff logic for this test to see it work.
// So let's restore the real retryWithBackoff for this test.
const { retryWithBackoff } =
await vi.importActual<typeof import('../utils/retry.js')>(
'../utils/retry.js',
);
mockRetryWithBackoff.mockImplementation(retryWithBackoff);
const stream = await chat.sendMessageStream(
{ model: 'test-model' },
'test message',
@@ -322,10 +339,6 @@ describe('GeminiChat Network Retries', () => {
events.push(event);
}
// Should have retried and succeeded
const retryEvent = events.find((e) => e.type === StreamEventType.RETRY);
expect(retryEvent).toBeDefined();
const successChunk = events.find(
(e) =>
e.type === StreamEventType.CHUNK &&
@@ -342,6 +355,12 @@ describe('GeminiChat Network Retries', () => {
const connectionError = new Error('read ECONNRESET');
(connectionError as NodeJS.ErrnoException).code = 'ECONNRESET';
const { retryWithBackoff } =
await vi.importActual<typeof import('../utils/retry.js')>(
'../utils/retry.js',
);
mockRetryWithBackoff.mockImplementation(retryWithBackoff);
vi.mocked(mockContentGenerator.generateContentStream)
.mockRejectedValueOnce(connectionError)
.mockImplementationOnce(async () =>
@@ -372,9 +391,6 @@ describe('GeminiChat Network Retries', () => {
events.push(event);
}
const retryEvent = events.find((e) => e.type === StreamEventType.RETRY);
expect(retryEvent).toBeDefined();
const successChunk = events.find(
(e) =>
e.type === StreamEventType.CHUNK &&
@@ -382,6 +398,7 @@ describe('GeminiChat Network Retries', () => {
'Success after connection retry',
);
expect(successChunk).toBeDefined();
expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes(2);
});
it('should NOT retry on non-retryable error during connection phase', async () => {