diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 49c1ebf169..69c494a4e0 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -394,16 +394,23 @@ export class GeminiChat { return; // Stop the generator } - if (isConnectionPhase) { - throw error; - } - lastError = error; - const isContentError = error instanceof InvalidStreamError; + // Check if the error is retryable (e.g., transient SSL errors + // like ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC) const isRetryable = isRetryableError( error, this.config.getRetryFetchErrors(), ); + // For connection phase errors, only retryable errors should continue + if (isConnectionPhase) { + if (!isRetryable || signal.aborted) { + throw error; + } + // Fall through to retry logic for retryable connection errors + } + lastError = error; + const isContentError = error instanceof InvalidStreamError; + if ( (isContentError && isGemini2Model(model)) || (isRetryable && !signal.aborted) diff --git a/packages/core/src/core/geminiChat_network_retry.test.ts b/packages/core/src/core/geminiChat_network_retry.test.ts index 9a41c04a82..3dafc081d3 100644 --- a/packages/core/src/core/geminiChat_network_retry.test.ts +++ b/packages/core/src/core/geminiChat_network_retry.test.ts @@ -274,4 +274,204 @@ describe('GeminiChat Network Retries', () => { expect(mockLogContentRetry).not.toHaveBeenCalled(); }); + + it('should retry on SSL error during connection phase (ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC)', async () => { + // Create an SSL error that occurs during connection (before any yield) + const sslError = new Error( + 'SSL routines:ssl3_read_bytes:sslv3 alert bad record mac', + ); + (sslError as NodeJS.ErrnoException).code = + 'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC'; + + vi.mocked(mockContentGenerator.generateContentStream) + // First call: throw SSL error immediately (connection phase) + .mockRejectedValueOnce(sslError) + // Second call: succeed + .mockImplementationOnce(async () => + (async function* () { + yield { + candidates: [ + { + content: { parts: [{ text: 'Success after SSL retry' }] }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(), + ); + + const stream = await chat.sendMessageStream( + { model: 'test-model' }, + 'test message', + 'prompt-id-ssl-retry', + new AbortController().signal, + ); + + const events: StreamEvent[] = []; + for await (const event of stream) { + events.push(event); + } + + // Should have retried and succeeded + const retryEvent = events.find((e) => e.type === StreamEventType.RETRY); + expect(retryEvent).toBeDefined(); + + const successChunk = events.find( + (e) => + e.type === StreamEventType.CHUNK && + e.value.candidates?.[0]?.content?.parts?.[0]?.text === + 'Success after SSL retry', + ); + expect(successChunk).toBeDefined(); + + // Verify the API was called twice (initial + retry) + expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes(2); + }); + + it('should retry on ECONNRESET error during connection phase', async () => { + const connectionError = new Error('read ECONNRESET'); + (connectionError as NodeJS.ErrnoException).code = 'ECONNRESET'; + + vi.mocked(mockContentGenerator.generateContentStream) + .mockRejectedValueOnce(connectionError) + .mockImplementationOnce(async () => + (async function* () { + yield { + candidates: [ + { + content: { + parts: [{ text: 'Success after connection retry' }], + }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(), + ); + + const stream = await chat.sendMessageStream( + { model: 'test-model' }, + 'test message', + 'prompt-id-connection-retry', + new AbortController().signal, + ); + + const events: StreamEvent[] = []; + for await (const event of stream) { + events.push(event); + } + + const retryEvent = events.find((e) => e.type === StreamEventType.RETRY); + expect(retryEvent).toBeDefined(); + + const successChunk = events.find( + (e) => + e.type === StreamEventType.CHUNK && + e.value.candidates?.[0]?.content?.parts?.[0]?.text === + 'Success after connection retry', + ); + expect(successChunk).toBeDefined(); + }); + + it('should NOT retry on non-retryable error during connection phase', async () => { + const nonRetryableError = new Error('Some non-retryable error'); + + vi.mocked(mockContentGenerator.generateContentStream).mockRejectedValueOnce( + nonRetryableError, + ); + + const stream = await chat.sendMessageStream( + { model: 'test-model' }, + 'test message', + 'prompt-id-no-connection-retry', + new AbortController().signal, + ); + + await expect(async () => { + for await (const _ of stream) { + // consume + } + }).rejects.toThrow(nonRetryableError); + + // Should only be called once (no retry) + expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes(1); + }); + + it('should retry on SSL error during stream iteration (mid-stream failure)', async () => { + // This simulates the exact scenario from issue #17318 where the error + // occurs during a long session while streaming content + const sslError = new Error( + 'request to https://cloudcode-pa.googleapis.com/v1internal:streamGenerateContent failed', + ) as NodeJS.ErrnoException & { type?: string }; + sslError.type = 'system'; + sslError.errno = 'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC' as unknown as number; + sslError.code = 'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC'; + + vi.mocked(mockContentGenerator.generateContentStream) + // First call: yield some content, then throw SSL error mid-stream + .mockImplementationOnce(async () => + (async function* () { + yield { + candidates: [ + { content: { parts: [{ text: 'Partial response...' }] } }, + ], + } as unknown as GenerateContentResponse; + // SSL error occurs while waiting for more data + throw sslError; + })(), + ) + // Second call: succeed + .mockImplementationOnce(async () => + (async function* () { + yield { + candidates: [ + { + content: { parts: [{ text: 'Complete response after retry' }] }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(), + ); + + const stream = await chat.sendMessageStream( + { model: 'test-model' }, + 'test message', + 'prompt-id-ssl-mid-stream', + new AbortController().signal, + ); + + const events: StreamEvent[] = []; + for await (const event of stream) { + events.push(event); + } + + // Should have received partial content, then retry, then success + const partialChunk = events.find( + (e) => + e.type === StreamEventType.CHUNK && + e.value.candidates?.[0]?.content?.parts?.[0]?.text === + 'Partial response...', + ); + expect(partialChunk).toBeDefined(); + + const retryEvent = events.find((e) => e.type === StreamEventType.RETRY); + expect(retryEvent).toBeDefined(); + + const successChunk = events.find( + (e) => + e.type === StreamEventType.CHUNK && + e.value.candidates?.[0]?.content?.parts?.[0]?.text === + 'Complete response after retry', + ); + expect(successChunk).toBeDefined(); + + // Verify retry logging was called with NETWORK_ERROR type + expect(mockLogContentRetry).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + error_type: 'NETWORK_ERROR', + }), + ); + }); }); diff --git a/packages/core/src/utils/retry.test.ts b/packages/core/src/utils/retry.test.ts index ff295d2028..8ab6500259 100644 --- a/packages/core/src/utils/retry.test.ts +++ b/packages/core/src/utils/retry.test.ts @@ -409,6 +409,87 @@ describe('retryWithBackoff', () => { await vi.runAllTimersAsync(); await expect(promise).resolves.toBe('success'); }); + + it('should retry on SSL error code (ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC)', async () => { + const error = new Error('SSL error'); + (error as any).code = 'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC'; + const mockFn = vi + .fn() + .mockRejectedValueOnce(error) + .mockResolvedValue('success'); + + const promise = retryWithBackoff(mockFn, { + initialDelayMs: 1, + maxDelayMs: 1, + }); + await vi.runAllTimersAsync(); + await expect(promise).resolves.toBe('success'); + expect(mockFn).toHaveBeenCalledTimes(2); + }); + + it('should retry on SSL error code in deeply nested cause chain', async () => { + const deepCause = new Error('OpenSSL error'); + (deepCause as any).code = 'ERR_SSL_BAD_RECORD_MAC'; + + const middleCause = new Error('TLS handshake failed'); + (middleCause as any).cause = deepCause; + + const outerError = new Error('fetch failed'); + (outerError as any).cause = middleCause; + + const mockFn = vi + .fn() + .mockRejectedValueOnce(outerError) + .mockResolvedValue('success'); + + const promise = retryWithBackoff(mockFn, { + initialDelayMs: 1, + maxDelayMs: 1, + }); + await vi.runAllTimersAsync(); + await expect(promise).resolves.toBe('success'); + expect(mockFn).toHaveBeenCalledTimes(2); + }); + + it('should retry on EPROTO error (generic protocol/SSL error)', async () => { + const error = new Error('Protocol error'); + (error as any).code = 'EPROTO'; + const mockFn = vi + .fn() + .mockRejectedValueOnce(error) + .mockResolvedValue('success'); + + const promise = retryWithBackoff(mockFn, { + initialDelayMs: 1, + maxDelayMs: 1, + }); + await vi.runAllTimersAsync(); + await expect(promise).resolves.toBe('success'); + expect(mockFn).toHaveBeenCalledTimes(2); + }); + + it('should retry on gaxios-style SSL error with code property', async () => { + // This matches the exact structure from issue #17318 + const error = new Error( + 'request to https://cloudcode-pa.googleapis.com/v1internal:streamGenerateContent failed', + ); + (error as any).type = 'system'; + (error as any).errno = 'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC'; + (error as any).code = 'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC'; + + const mockFn = vi + .fn() + .mockRejectedValueOnce(error) + .mockResolvedValue('success'); + + const promise = retryWithBackoff(mockFn, { + initialDelayMs: 1, + maxDelayMs: 1, + }); + await vi.runAllTimersAsync(); + await expect(promise).resolves.toBe('success'); + expect(mockFn).toHaveBeenCalledTimes(2); + }); }); describe('Flash model fallback for OAuth users', () => { diff --git a/packages/core/src/utils/retry.ts b/packages/core/src/utils/retry.ts index f78bef5bd1..8e9454e496 100644 --- a/packages/core/src/utils/retry.ts +++ b/packages/core/src/utils/retry.ts @@ -54,6 +54,12 @@ const RETRYABLE_NETWORK_CODES = [ 'ENOTFOUND', 'EAI_AGAIN', 'ECONNREFUSED', + // SSL/TLS transient errors + 'ERR_SSL_SSLV3_ALERT_BAD_RECORD_MAC', + 'ERR_SSL_WRONG_VERSION_NUMBER', + 'ERR_SSL_DECRYPTION_FAILED_OR_BAD_RECORD_MAC', + 'ERR_SSL_BAD_RECORD_MAC', + 'EPROTO', // Generic protocol error (often SSL-related) ]; function getNetworkErrorCode(error: unknown): string | undefined { @@ -72,8 +78,22 @@ function getNetworkErrorCode(error: unknown): string | undefined { return directCode; } - if (typeof error === 'object' && error !== null && 'cause' in error) { - return getCode((error as { cause: unknown }).cause); + // Traverse the cause chain to find error codes (SSL errors are often nested) + let current: unknown = error; + const maxDepth = 5; // Prevent infinite loops in case of circular references + for (let depth = 0; depth < maxDepth; depth++) { + if ( + typeof current !== 'object' || + current === null || + !('cause' in current) + ) { + break; + } + current = (current as { cause: unknown }).cause; + const code = getCode(current); + if (code) { + return code; + } } return undefined;