fix(core): remove "System: Please continue." injection on InvalidStream events (#26340)

2026-05-04 08:54:28 -07:00 · 2026-05-01 12:45:31 -07:00
parent 363854172f
commit 9380e13f6d
14 changed files with 53 additions and 298 deletions
@@ -200,7 +200,6 @@ describe('LegacyAgentSession', () => {
        expect.any(AbortSignal),
        'test-prompt',
        undefined,
-        false,
        'raw input',
      );

@@ -196,7 +196,6 @@ export class LegacyAgentProtocol implements AgentProtocol {
        this._abortController.signal,
        this._promptId,
        undefined,
-        false,
        currentDisplayContent,
      );
      currentDisplayContent = undefined;
@@ -1437,31 +1437,6 @@ describe('Server Config (config.ts)', () => {
    });
  });

-  describe('ContinueOnFailedApiCall Configuration', () => {
-    it('should default continueOnFailedApiCall to false when not provided', () => {
-      const config = new Config(baseParams);
-      expect(config.getContinueOnFailedApiCall()).toBe(true);
-    });
-
-    it('should set continueOnFailedApiCall to true when provided as true', () => {
-      const paramsWithContinueOnFailedApiCall: ConfigParameters = {
-        ...baseParams,
-        continueOnFailedApiCall: true,
-      };
-      const config = new Config(paramsWithContinueOnFailedApiCall);
-      expect(config.getContinueOnFailedApiCall()).toBe(true);
-    });
-
-    it('should set continueOnFailedApiCall to false when explicitly provided as false', () => {
-      const paramsWithContinueOnFailedApiCall: ConfigParameters = {
-        ...baseParams,
-        continueOnFailedApiCall: false,
-      };
-      const config = new Config(paramsWithContinueOnFailedApiCall);
-      expect(config.getContinueOnFailedApiCall()).toBe(false);
-    });
-  });
-
  describe('createToolRegistry', () => {
    it('should register a tool if coreTools contains an argument-specific pattern', async () => {
      const params: ConfigParameters = {
@@ -681,7 +681,6 @@ export interface ConfigParameters {
  gemmaModelRouter?: GemmaModelRouterSettings;
  adk?: ADKSettings;
  disableModelRouterForAuth?: AuthType[];
-  continueOnFailedApiCall?: boolean;
  retryFetchErrors?: boolean;
  maxAttempts?: number;
  enableShellOutputEfficiency?: boolean;
@@ -911,7 +910,6 @@ export class Config implements McpContext, AgentLoopContext {
  private readonly agentSessionNoninteractiveEnabled: boolean;
  private readonly agentSessionInteractiveEnabled: boolean;

-  private readonly continueOnFailedApiCall: boolean;
  private readonly retryFetchErrors: boolean;
  private readonly maxAttempts: number;
  private readonly enableShellOutputEfficiency: boolean;
@@ -1288,7 +1286,6 @@ export class Config implements McpContext, AgentLoopContext {
    this.enableHooks = params.enableHooks ?? true;
    this.disabledHooks = params.disabledHooks ?? [];

-    this.continueOnFailedApiCall = params.continueOnFailedApiCall ?? true;
    this.enableShellOutputEfficiency =
      params.enableShellOutputEfficiency ?? true;
    this.shellToolInactivityTimeout =
@@ -3449,10 +3446,6 @@ export class Config implements McpContext, AgentLoopContext {
    return this.skipNextSpeakerCheck;
  }

-  getContinueOnFailedApiCall(): boolean {
-    return this.continueOnFailedApiCall;
-  }
-
  getRetryFetchErrors(): boolean {
    return this.retryFetchErrors;
  }
@@ -259,7 +259,6 @@ describe('Gemini Client (client.ts)', () => {
      getCompressionThreshold: vi.fn().mockReturnValue(undefined),
      getSkipNextSpeakerCheck: vi.fn().mockReturnValue(false),
      getShowModelInfoInChat: vi.fn().mockReturnValue(false),
-      getContinueOnFailedApiCall: vi.fn(),
      getProjectRoot: vi.fn().mockReturnValue('/test/project/root'),
      getIncludeDirectoryTree: vi.fn().mockReturnValue(true),
      storage: {
@@ -1304,9 +1303,6 @@ ${JSON.stringify(
    });

    it('should stop infinite loop after MAX_TURNS when nextSpeaker always returns model', async () => {
-      vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue(
-        true,
-      );
      // Get the mocked checkNextSpeaker function and configure it to trigger infinite loop
      const { checkNextSpeaker } = await import(
        '../utils/nextSpeakerChecker.js'
@@ -2059,26 +2055,13 @@ ${JSON.stringify(
      );
    });

-    it('should recursively call sendMessageStream with "Please continue." when InvalidStream event is received for Gemini 2 models', async () => {
-      vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue(
-        true,
-      );
-      // Arrange - router must return a Gemini 2 model for retry to trigger
-      mockRouterService.route.mockResolvedValue({
-        model: 'gemini-2.0-flash',
-        reason: 'test',
-      });
-
-      const mockStream1 = (async function* () {
+    it('should propagate InvalidStream events without injecting "Please continue." or recursing', async () => {
+      // Arrange: a single turn that yields an InvalidStream event.
+      const mockStream = (async function* () {
        yield { type: GeminiEventType.InvalidStream };
      })();
-      const mockStream2 = (async function* () {
-        yield { type: GeminiEventType.Content, value: 'Continued content' };
-      })();

-      mockTurnRunFn
-        .mockReturnValueOnce(mockStream1)
-        .mockReturnValueOnce(mockStream2);
+      mockTurnRunFn.mockReturnValueOnce(mockStream);

      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
@@ -2096,117 +2079,16 @@ ${JSON.stringify(
      const stream = client.sendMessageStream(initialRequest, signal, promptId);
      const events = await fromAsync(stream);

-      // Assert
-      expect(events).toEqual([
-        { type: GeminiEventType.ModelInfo, value: 'gemini-2.0-flash' },
-        { type: GeminiEventType.InvalidStream },
-        { type: GeminiEventType.Content, value: 'Continued content' },
-      ]);
-
-      // Verify that turn.run was called twice
-      expect(mockTurnRunFn).toHaveBeenCalledTimes(2);
-
-      // First call with original request
-      expect(mockTurnRunFn).toHaveBeenNthCalledWith(
-        1,
-        { model: 'gemini-2.0-flash', isChatModel: true },
-        initialRequest,
-        expect.any(AbortSignal),
-        undefined,
-      );
-
-      // Second call with "Please continue."
-      expect(mockTurnRunFn).toHaveBeenNthCalledWith(
-        2,
-        { model: 'gemini-2.0-flash', isChatModel: true },
-        [{ text: 'System: Please continue.' }],
-        expect.any(AbortSignal),
-        undefined,
-      );
-    });
-
-    it('should not recursively call sendMessageStream with "Please continue." when InvalidStream event is received and flag is false', async () => {
-      vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue(
-        false,
-      );
-      // Arrange
-      const mockStream1 = (async function* () {
-        yield { type: GeminiEventType.InvalidStream };
-      })();
-
-      mockTurnRunFn.mockReturnValueOnce(mockStream1);
-
-      const mockChat: Partial<GeminiChat> = {
-        addHistory: vi.fn(),
-        setTools: vi.fn(),
-        getHistory: vi.fn().mockReturnValue([]),
-        getLastPromptTokenCount: vi.fn(),
-      };
-      client['chat'] = mockChat as GeminiChat;
-
-      const initialRequest = [{ text: 'Hi' }];
-      const promptId = 'prompt-id-invalid-stream';
-      const signal = new AbortController().signal;
-
-      // Act
-      const stream = client.sendMessageStream(initialRequest, signal, promptId);
-      const events = await fromAsync(stream);
-
-      // Assert
+      // Assert: the InvalidStream event is forwarded to the consumer and the
+      // turn ends. No "System: Please continue." is injected and turn.run is
+      // not called a second time.
      expect(events).toEqual([
        { type: GeminiEventType.ModelInfo, value: 'default-routed-model' },
        { type: GeminiEventType.InvalidStream },
      ]);
-
-      // Verify that turn.run was called only once
      expect(mockTurnRunFn).toHaveBeenCalledTimes(1);
    });

-    it('should stop recursing after one retry when InvalidStream events are repeatedly received', async () => {
-      vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue(
-        true,
-      );
-      // Arrange - router must return a Gemini 2 model for retry to trigger
-      mockRouterService.route.mockResolvedValue({
-        model: 'gemini-2.0-flash',
-        reason: 'test',
-      });
-      // Always return a new invalid stream
-      mockTurnRunFn.mockImplementation(() =>
-        (async function* () {
-          yield { type: GeminiEventType.InvalidStream };
-        })(),
-      );
-
-      const mockChat: Partial<GeminiChat> = {
-        addHistory: vi.fn(),
-        setTools: vi.fn(),
-        getHistory: vi.fn().mockReturnValue([]),
-        getLastPromptTokenCount: vi.fn(),
-      };
-      client['chat'] = mockChat as GeminiChat;
-
-      const initialRequest = [{ text: 'Hi' }];
-      const promptId = 'prompt-id-infinite-invalid-stream';
-      const signal = new AbortController().signal;
-
-      // Act
-      const stream = client.sendMessageStream(initialRequest, signal, promptId);
-      const events = await fromAsync(stream);
-
-      // Assert
-      // We expect 3 events (model_info + original + 1 retry)
-      expect(events.length).toBe(3);
-      expect(
-        events
-          .filter((e) => e.type === GeminiEventType.ModelInfo)
-          .map((e) => e.value),
-      ).toEqual(['gemini-2.0-flash']);
-
-      // Verify that turn.run was called twice
-      expect(mockTurnRunFn).toHaveBeenCalledTimes(2);
-    });
-
    describe('Editor context delta', () => {
      const mockStream = (async function* () {
        yield { type: 'content', value: 'Hello' };
@@ -2584,42 +2466,6 @@ ${JSON.stringify(

        expect(mockConfig.resetTurn).toHaveBeenCalled();
      });
-
-      it('should NOT reset turn on invalid stream retry', async () => {
-        vi.mocked(mockAvailabilityService.selectFirstAvailable).mockReturnValue(
-          {
-            selectedModel: 'model-a',
-            skipped: [],
-          },
-        );
-        // We simulate a retry by calling sendMessageStream with isInvalidStreamRetry=true
-        // But the public API doesn't expose that argument directly unless we use the private method or simulate the recursion.
-        // We can simulate recursion by mocking turn run to return invalid stream once.
-
-        vi.spyOn(
-          client['config'],
-          'getContinueOnFailedApiCall',
-        ).mockReturnValue(true);
-        const mockStream1 = (async function* () {
-          yield { type: GeminiEventType.InvalidStream };
-        })();
-        const mockStream2 = (async function* () {
-          yield { type: 'content', value: 'ok' };
-        })();
-        mockTurnRunFn
-          .mockReturnValueOnce(mockStream1)
-          .mockReturnValueOnce(mockStream2);
-
-        const stream = client.sendMessageStream(
-          [{ text: 'Hi' }],
-          new AbortController().signal,
-          'prompt-retry',
-        );
-        await fromAsync(stream);
-
-        // resetTurn should be called once (for the initial call) but NOT for the recursive call
-        expect(mockConfig.resetTurn).toHaveBeenCalledTimes(1);
-      });
    });

    describe('IDE context with pending tool calls', () => {
@@ -47,19 +47,12 @@ import { ChatCompressionService } from '../context/chatCompressionService.js';
 import { AgentHistoryProvider } from '../context/agentHistoryProvider.js';
 import type { ContextManager } from '../context/contextManager.js';
 import { ideContextStore } from '../ide/ideContext.js';
-import {
-  logContentRetryFailure,
-  logNextSpeakerCheck,
-} from '../telemetry/loggers.js';
+import { logNextSpeakerCheck } from '../telemetry/loggers.js';
 import type {
  DefaultHookOutput,
  AfterAgentHookOutput,
 } from '../hooks/types.js';
-import {
-  ContentRetryFailureEvent,
-  NextSpeakerCheckEvent,
-  type LlmRole,
-} from '../telemetry/types.js';
+import { NextSpeakerCheckEvent, type LlmRole } from '../telemetry/types.js';
 import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
 import type { IdeContext, File } from '../ide/types.js';
 import { handleFallback } from '../fallback/handler.js';
@@ -603,7 +596,6 @@ export class GeminiClient {
    signal: AbortSignal,
    prompt_id: string,
    boundedTurns: number,
-    isInvalidStreamRetry: boolean,
    displayContent?: PartListUnion,
  ): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
    // Re-initialize turn (it was empty before if in loop, or new instance)
@@ -708,7 +700,6 @@ export class GeminiClient {
        signal,
        prompt_id,
        boundedTurns,
-        isInvalidStreamRetry,
        displayContent,
      );
    }
@@ -758,7 +749,6 @@ export class GeminiClient {
      displayContent,
    );
    let isError = false;
-    let isInvalidStream = false;

    let loopDetectedAbort = false;
    let loopRecoverResult: { detail?: string } | undefined;
@@ -781,9 +771,6 @@ export class GeminiClient {

      this.updateTelemetryTokenCount();

-      if (event.type === GeminiEventType.InvalidStream) {
-        isInvalidStream = true;
-      }
      if (event.type === GeminiEventType.Error) {
        isError = true;
      }
@@ -799,7 +786,6 @@ export class GeminiClient {
        signal,
        prompt_id,
        boundedTurns,
-        isInvalidStreamRetry,
        displayContent,
      );
    }
@@ -821,33 +807,6 @@ export class GeminiClient {
      }
    }

-    if (isInvalidStream) {
-      if (this.config.getContinueOnFailedApiCall()) {
-        if (isInvalidStreamRetry) {
-          logContentRetryFailure(
-            this.config,
-            new ContentRetryFailureEvent(
-              4,
-              'FAILED_AFTER_PROMPT_INJECTION',
-              modelToUse,
-            ),
-          );
-          return turn;
-        }
-        const nextRequest = [{ text: 'System: Please continue.' }];
-        // Recursive call - update turn with result
-        turn = yield* this.sendMessageStream(
-          nextRequest,
-          signal,
-          prompt_id,
-          boundedTurns - 1,
-          true,
-          displayContent,
-        );
-        return turn;
-      }
-    }
-
    if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
      if (
        !this.config.getQuotaErrorOccurred() &&
@@ -874,7 +833,6 @@ export class GeminiClient {
            signal,
            prompt_id,
            boundedTurns - 1,
-            false, // isInvalidStreamRetry is false
            displayContent,
          );
          return turn;
@@ -889,13 +847,10 @@ export class GeminiClient {
    signal: AbortSignal,
    prompt_id: string,
    turns: number = MAX_TURNS,
-    isInvalidStreamRetry: boolean = false,
    displayContent?: PartListUnion,
    stopHookActive: boolean = false,
  ): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
-    if (!isInvalidStreamRetry) {
-      this.config.resetTurn();
-    }
+    this.config.resetTurn();

    const hooksEnabled = this.config.getEnableHooks();
    const messageBus = this.context.messageBus;
@@ -947,7 +902,6 @@ export class GeminiClient {
        signal,
        prompt_id,
        boundedTurns,
-        isInvalidStreamRetry,
        displayContent,
      );

@@ -1009,7 +963,6 @@ export class GeminiClient {
            signal,
            prompt_id,
            boundedTurns - 1,
-            false,
            displayContent,
            true, // stopHookActive: signal retry to AfterAgent hooks
          );
@@ -1254,7 +1207,6 @@ export class GeminiClient {
    signal: AbortSignal,
    prompt_id: string,
    boundedTurns: number,
-    isInvalidStreamRetry: boolean,
    displayContent?: PartListUnion,
  ): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
    // Clear the detection flag so the recursive turn can proceed, but the count remains 1.
@@ -1276,7 +1228,6 @@ export class GeminiClient {
      signal,
      prompt_id,
      boundedTurns - 1,
-      isInvalidStreamRetry,
      displayContent,
    );
  }
@@ -744,25 +744,41 @@ describe('GeminiChat', () => {
      ).rejects.toThrow(InvalidStreamError);
    });

-    it('should throw InvalidStreamError when no tool call and empty response text', async () => {
-      // Setup: Stream with finish reason but empty response (only thoughts)
-      const streamWithEmptyResponse = (async function* () {
-        yield {
-          candidates: [
-            {
-              content: {
-                role: 'model',
-                parts: [{ thought: 'thinking...' }],
-              },
-              finishReason: 'STOP',
-            },
-          ],
-        } as unknown as GenerateContentResponse;
-      })();
-
-      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
-        streamWithEmptyResponse,
-      );
+    it('should throw InvalidStreamError without retrying when no tool call and empty response text', async () => {
+      vi.mocked(mockContentGenerator.generateContentStream)
+        .mockImplementationOnce(async () =>
+          // First attempt: finish reason is present, but the stream has no
+          // non-thought text, which is NO_RESPONSE_TEXT.
+          (async function* () {
+            yield {
+              candidates: [
+                {
+                  content: {
+                    role: 'model',
+                    parts: [{ thought: true, text: 'thinking...' }],
+                  },
+                  finishReason: 'STOP',
+                },
+              ],
+            } as unknown as GenerateContentResponse;
+          })(),
+        )
+        .mockImplementationOnce(async () =>
+          // This would succeed if NO_RESPONSE_TEXT were retried.
+          (async function* () {
+            yield {
+              candidates: [
+                {
+                  content: {
+                    role: 'model',
+                    parts: [{ text: 'valid response after retry' }],
+                  },
+                  finishReason: 'STOP',
+                },
+              ],
+            } as unknown as GenerateContentResponse;
+          })(),
+        );

      const stream = await chat.sendMessageStream(
        { model: 'gemini-2.0-flash' },
@@ -779,6 +795,11 @@ describe('GeminiChat', () => {
          }
        })(),
      ).rejects.toThrow(InvalidStreamError);
+      expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes(
+        1,
+      );
+      expect(mockLogContentRetry).not.toHaveBeenCalled();
+      expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1);
    });

    it('should succeed when there is finish reason and response text', async () => {
@@ -424,11 +424,13 @@ export class GeminiChat {
            );

            const isContentError = error instanceof InvalidStreamError;
+            const isRetryableContentError =
+              isContentError && error.type !== 'NO_RESPONSE_TEXT';
            const errorType = isContentError
              ? error.type
              : getRetryErrorType(error);

-            if (isContentError || (isRetryable && !signal.aborted)) {
+            if (isRetryableContentError || (isRetryable && !signal.aborted)) {
              // The issue requests exactly 3 retries (4 attempts) for API errors during stream iteration.
              // Regardless of the global maxAttempts (e.g. 10), we only want to retry these mid-stream API errors
              // up to 3 times before finally throwing the error to the user.