fix(core): remove "System: Please continue." injection on InvalidStream events (#26340)

This commit is contained in:
Sandy Tao
2026-05-01 12:45:31 -07:00
committed by GitHub
parent 363854172f
commit 9380e13f6d
14 changed files with 53 additions and 298 deletions
@@ -263,7 +263,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-1',
undefined,
false,
'Test input',
);
expect(getWrittenOutput()).toBe('Hello World\n');
@@ -382,7 +381,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-2',
undefined,
false,
undefined,
);
expect(getWrittenOutput()).toBe('Final answer\n');
@@ -542,7 +540,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-3',
undefined,
false,
undefined,
);
expect(getWrittenOutput()).toBe('Sorry, let me try again.\n');
@@ -684,7 +681,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-7',
undefined,
false,
rawInput,
);
@@ -720,7 +716,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-1',
undefined,
false,
'Test input',
);
expect(processStdoutSpy).toHaveBeenCalledWith(
@@ -853,7 +848,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-empty',
undefined,
false,
'Empty response test',
);
@@ -990,7 +984,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-slash',
undefined,
false,
'/testcommand',
);
@@ -1036,7 +1029,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-slash',
undefined,
false,
'/help',
);
expect(getWrittenOutput()).toBe('Response to slash command\n');
@@ -1214,7 +1206,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-unknown',
undefined,
false,
'/unknowncommand',
);
-1
View File
@@ -319,7 +319,6 @@ export async function runNonInteractive(
abortController.signal,
prompt_id,
undefined,
false,
turnCount === 1 ? input : undefined,
);
@@ -269,7 +269,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-1',
undefined,
false,
'Test input',
);
expect(getWrittenOutput()).toBe('Hello World\n');
@@ -436,7 +435,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-2',
undefined,
false,
undefined,
);
expect(getWrittenOutput()).toBe('Final answer\n');
@@ -596,7 +594,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-3',
undefined,
false,
undefined,
);
expect(getWrittenOutput()).toBe('Sorry, let me try again.\n');
@@ -738,7 +735,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-7',
undefined,
false,
rawInput,
);
@@ -774,7 +770,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-1',
undefined,
false,
'Test input',
);
expect(processStdoutSpy).toHaveBeenCalledWith(
@@ -980,7 +975,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-empty',
undefined,
false,
'Empty response test',
);
@@ -1117,7 +1111,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-slash',
undefined,
false,
'/testcommand',
);
@@ -1163,7 +1156,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-slash',
undefined,
false,
'/help',
);
expect(getWrittenOutput()).toBe('Response to slash command\n');
@@ -1383,7 +1375,6 @@ describe('runNonInteractive', () => {
expect.any(AbortSignal),
'prompt-id-unknown',
undefined,
false,
'/unknowncommand',
);
@@ -135,7 +135,6 @@ export const createMockConfig = (overrides: Partial<Config> = {}): Config =>
getUseRipgrep: vi.fn().mockReturnValue(false),
getEnableInteractiveShell: vi.fn().mockReturnValue(false),
getSkipNextSpeakerCheck: vi.fn().mockReturnValue(false),
getContinueOnFailedApiCall: vi.fn().mockReturnValue(false),
getRetryFetchErrors: vi.fn().mockReturnValue(true),
getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true),
getShellToolInactivityTimeout: vi.fn().mockReturnValue(300000),
@@ -805,7 +805,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
'prompt-id-2',
undefined,
false,
expectedMergedResponse,
);
});
@@ -1532,7 +1531,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
'prompt-id-4',
undefined,
false,
toolCallResponseParts,
);
});
@@ -2027,7 +2025,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
expect.any(String),
undefined,
false,
'/my-custom-command',
);
@@ -2056,7 +2053,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
expect.any(String),
undefined,
false,
'/emptycmd',
);
});
@@ -2077,7 +2073,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
expect.any(String),
undefined,
false,
'// This is a line comment',
);
});
@@ -2098,7 +2093,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
expect.any(String),
undefined,
false,
'/* This is a block comment */',
);
});
@@ -3058,7 +3052,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal), // Argument 2: An AbortSignal
expect.any(String), // Argument 3: The prompt_id string
undefined,
false,
rawQuery,
);
});
@@ -3709,7 +3702,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
expect.any(String),
undefined,
false,
'test query',
);
});
@@ -3859,7 +3851,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
expect.any(String),
undefined,
false,
'second query',
);
});
@@ -4004,7 +3995,6 @@ describe('useGeminiStream', () => {
expect.any(AbortSignal),
expect.any(String),
undefined,
false,
'test query',
);
});
@@ -1670,7 +1670,6 @@ export const useGeminiStream = (
abortSignal,
prompt_id!,
undefined,
false,
query,
);
const processingStatus = await processGeminiStreamEvents(
@@ -200,7 +200,6 @@ describe('LegacyAgentSession', () => {
expect.any(AbortSignal),
'test-prompt',
undefined,
false,
'raw input',
);
@@ -196,7 +196,6 @@ export class LegacyAgentProtocol implements AgentProtocol {
this._abortController.signal,
this._promptId,
undefined,
false,
currentDisplayContent,
);
currentDisplayContent = undefined;
-25
View File
@@ -1437,31 +1437,6 @@ describe('Server Config (config.ts)', () => {
});
});
describe('ContinueOnFailedApiCall Configuration', () => {
it('should default continueOnFailedApiCall to false when not provided', () => {
const config = new Config(baseParams);
expect(config.getContinueOnFailedApiCall()).toBe(true);
});
it('should set continueOnFailedApiCall to true when provided as true', () => {
const paramsWithContinueOnFailedApiCall: ConfigParameters = {
...baseParams,
continueOnFailedApiCall: true,
};
const config = new Config(paramsWithContinueOnFailedApiCall);
expect(config.getContinueOnFailedApiCall()).toBe(true);
});
it('should set continueOnFailedApiCall to false when explicitly provided as false', () => {
const paramsWithContinueOnFailedApiCall: ConfigParameters = {
...baseParams,
continueOnFailedApiCall: false,
};
const config = new Config(paramsWithContinueOnFailedApiCall);
expect(config.getContinueOnFailedApiCall()).toBe(false);
});
});
describe('createToolRegistry', () => {
it('should register a tool if coreTools contains an argument-specific pattern', async () => {
const params: ConfigParameters = {
-7
View File
@@ -681,7 +681,6 @@ export interface ConfigParameters {
gemmaModelRouter?: GemmaModelRouterSettings;
adk?: ADKSettings;
disableModelRouterForAuth?: AuthType[];
continueOnFailedApiCall?: boolean;
retryFetchErrors?: boolean;
maxAttempts?: number;
enableShellOutputEfficiency?: boolean;
@@ -911,7 +910,6 @@ export class Config implements McpContext, AgentLoopContext {
private readonly agentSessionNoninteractiveEnabled: boolean;
private readonly agentSessionInteractiveEnabled: boolean;
private readonly continueOnFailedApiCall: boolean;
private readonly retryFetchErrors: boolean;
private readonly maxAttempts: number;
private readonly enableShellOutputEfficiency: boolean;
@@ -1288,7 +1286,6 @@ export class Config implements McpContext, AgentLoopContext {
this.enableHooks = params.enableHooks ?? true;
this.disabledHooks = params.disabledHooks ?? [];
this.continueOnFailedApiCall = params.continueOnFailedApiCall ?? true;
this.enableShellOutputEfficiency =
params.enableShellOutputEfficiency ?? true;
this.shellToolInactivityTimeout =
@@ -3449,10 +3446,6 @@ export class Config implements McpContext, AgentLoopContext {
return this.skipNextSpeakerCheck;
}
getContinueOnFailedApiCall(): boolean {
return this.continueOnFailedApiCall;
}
getRetryFetchErrors(): boolean {
return this.retryFetchErrors;
}
+7 -161
View File
@@ -259,7 +259,6 @@ describe('Gemini Client (client.ts)', () => {
getCompressionThreshold: vi.fn().mockReturnValue(undefined),
getSkipNextSpeakerCheck: vi.fn().mockReturnValue(false),
getShowModelInfoInChat: vi.fn().mockReturnValue(false),
getContinueOnFailedApiCall: vi.fn(),
getProjectRoot: vi.fn().mockReturnValue('/test/project/root'),
getIncludeDirectoryTree: vi.fn().mockReturnValue(true),
storage: {
@@ -1304,9 +1303,6 @@ ${JSON.stringify(
});
it('should stop infinite loop after MAX_TURNS when nextSpeaker always returns model', async () => {
vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue(
true,
);
// Get the mocked checkNextSpeaker function and configure it to trigger infinite loop
const { checkNextSpeaker } = await import(
'../utils/nextSpeakerChecker.js'
@@ -2059,26 +2055,13 @@ ${JSON.stringify(
);
});
it('should recursively call sendMessageStream with "Please continue." when InvalidStream event is received for Gemini 2 models', async () => {
vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue(
true,
);
// Arrange - router must return a Gemini 2 model for retry to trigger
mockRouterService.route.mockResolvedValue({
model: 'gemini-2.0-flash',
reason: 'test',
});
const mockStream1 = (async function* () {
it('should propagate InvalidStream events without injecting "Please continue." or recursing', async () => {
// Arrange: a single turn that yields an InvalidStream event.
const mockStream = (async function* () {
yield { type: GeminiEventType.InvalidStream };
})();
const mockStream2 = (async function* () {
yield { type: GeminiEventType.Content, value: 'Continued content' };
})();
mockTurnRunFn
.mockReturnValueOnce(mockStream1)
.mockReturnValueOnce(mockStream2);
mockTurnRunFn.mockReturnValueOnce(mockStream);
const mockChat: Partial<GeminiChat> = {
addHistory: vi.fn(),
@@ -2096,117 +2079,16 @@ ${JSON.stringify(
const stream = client.sendMessageStream(initialRequest, signal, promptId);
const events = await fromAsync(stream);
// Assert
expect(events).toEqual([
{ type: GeminiEventType.ModelInfo, value: 'gemini-2.0-flash' },
{ type: GeminiEventType.InvalidStream },
{ type: GeminiEventType.Content, value: 'Continued content' },
]);
// Verify that turn.run was called twice
expect(mockTurnRunFn).toHaveBeenCalledTimes(2);
// First call with original request
expect(mockTurnRunFn).toHaveBeenNthCalledWith(
1,
{ model: 'gemini-2.0-flash', isChatModel: true },
initialRequest,
expect.any(AbortSignal),
undefined,
);
// Second call with "Please continue."
expect(mockTurnRunFn).toHaveBeenNthCalledWith(
2,
{ model: 'gemini-2.0-flash', isChatModel: true },
[{ text: 'System: Please continue.' }],
expect.any(AbortSignal),
undefined,
);
});
it('should not recursively call sendMessageStream with "Please continue." when InvalidStream event is received and flag is false', async () => {
vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue(
false,
);
// Arrange
const mockStream1 = (async function* () {
yield { type: GeminiEventType.InvalidStream };
})();
mockTurnRunFn.mockReturnValueOnce(mockStream1);
const mockChat: Partial<GeminiChat> = {
addHistory: vi.fn(),
setTools: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
getLastPromptTokenCount: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
const initialRequest = [{ text: 'Hi' }];
const promptId = 'prompt-id-invalid-stream';
const signal = new AbortController().signal;
// Act
const stream = client.sendMessageStream(initialRequest, signal, promptId);
const events = await fromAsync(stream);
// Assert
// Assert: the InvalidStream event is forwarded to the consumer and the
// turn ends. No "System: Please continue." is injected and turn.run is
// not called a second time.
expect(events).toEqual([
{ type: GeminiEventType.ModelInfo, value: 'default-routed-model' },
{ type: GeminiEventType.InvalidStream },
]);
// Verify that turn.run was called only once
expect(mockTurnRunFn).toHaveBeenCalledTimes(1);
});
it('should stop recursing after one retry when InvalidStream events are repeatedly received', async () => {
vi.spyOn(client['config'], 'getContinueOnFailedApiCall').mockReturnValue(
true,
);
// Arrange - router must return a Gemini 2 model for retry to trigger
mockRouterService.route.mockResolvedValue({
model: 'gemini-2.0-flash',
reason: 'test',
});
// Always return a new invalid stream
mockTurnRunFn.mockImplementation(() =>
(async function* () {
yield { type: GeminiEventType.InvalidStream };
})(),
);
const mockChat: Partial<GeminiChat> = {
addHistory: vi.fn(),
setTools: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
getLastPromptTokenCount: vi.fn(),
};
client['chat'] = mockChat as GeminiChat;
const initialRequest = [{ text: 'Hi' }];
const promptId = 'prompt-id-infinite-invalid-stream';
const signal = new AbortController().signal;
// Act
const stream = client.sendMessageStream(initialRequest, signal, promptId);
const events = await fromAsync(stream);
// Assert
// We expect 3 events (model_info + original + 1 retry)
expect(events.length).toBe(3);
expect(
events
.filter((e) => e.type === GeminiEventType.ModelInfo)
.map((e) => e.value),
).toEqual(['gemini-2.0-flash']);
// Verify that turn.run was called twice
expect(mockTurnRunFn).toHaveBeenCalledTimes(2);
});
describe('Editor context delta', () => {
const mockStream = (async function* () {
yield { type: 'content', value: 'Hello' };
@@ -2584,42 +2466,6 @@ ${JSON.stringify(
expect(mockConfig.resetTurn).toHaveBeenCalled();
});
it('should NOT reset turn on invalid stream retry', async () => {
vi.mocked(mockAvailabilityService.selectFirstAvailable).mockReturnValue(
{
selectedModel: 'model-a',
skipped: [],
},
);
// We simulate a retry by calling sendMessageStream with isInvalidStreamRetry=true
// But the public API doesn't expose that argument directly unless we use the private method or simulate the recursion.
// We can simulate recursion by mocking turn run to return invalid stream once.
vi.spyOn(
client['config'],
'getContinueOnFailedApiCall',
).mockReturnValue(true);
const mockStream1 = (async function* () {
yield { type: GeminiEventType.InvalidStream };
})();
const mockStream2 = (async function* () {
yield { type: 'content', value: 'ok' };
})();
mockTurnRunFn
.mockReturnValueOnce(mockStream1)
.mockReturnValueOnce(mockStream2);
const stream = client.sendMessageStream(
[{ text: 'Hi' }],
new AbortController().signal,
'prompt-retry',
);
await fromAsync(stream);
// resetTurn should be called once (for the initial call) but NOT for the recursive call
expect(mockConfig.resetTurn).toHaveBeenCalledTimes(1);
});
});
describe('IDE context with pending tool calls', () => {
+3 -52
View File
@@ -47,19 +47,12 @@ import { ChatCompressionService } from '../context/chatCompressionService.js';
import { AgentHistoryProvider } from '../context/agentHistoryProvider.js';
import type { ContextManager } from '../context/contextManager.js';
import { ideContextStore } from '../ide/ideContext.js';
import {
logContentRetryFailure,
logNextSpeakerCheck,
} from '../telemetry/loggers.js';
import { logNextSpeakerCheck } from '../telemetry/loggers.js';
import type {
DefaultHookOutput,
AfterAgentHookOutput,
} from '../hooks/types.js';
import {
ContentRetryFailureEvent,
NextSpeakerCheckEvent,
type LlmRole,
} from '../telemetry/types.js';
import { NextSpeakerCheckEvent, type LlmRole } from '../telemetry/types.js';
import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
import type { IdeContext, File } from '../ide/types.js';
import { handleFallback } from '../fallback/handler.js';
@@ -603,7 +596,6 @@ export class GeminiClient {
signal: AbortSignal,
prompt_id: string,
boundedTurns: number,
isInvalidStreamRetry: boolean,
displayContent?: PartListUnion,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
// Re-initialize turn (it was empty before if in loop, or new instance)
@@ -708,7 +700,6 @@ export class GeminiClient {
signal,
prompt_id,
boundedTurns,
isInvalidStreamRetry,
displayContent,
);
}
@@ -758,7 +749,6 @@ export class GeminiClient {
displayContent,
);
let isError = false;
let isInvalidStream = false;
let loopDetectedAbort = false;
let loopRecoverResult: { detail?: string } | undefined;
@@ -781,9 +771,6 @@ export class GeminiClient {
this.updateTelemetryTokenCount();
if (event.type === GeminiEventType.InvalidStream) {
isInvalidStream = true;
}
if (event.type === GeminiEventType.Error) {
isError = true;
}
@@ -799,7 +786,6 @@ export class GeminiClient {
signal,
prompt_id,
boundedTurns,
isInvalidStreamRetry,
displayContent,
);
}
@@ -821,33 +807,6 @@ export class GeminiClient {
}
}
if (isInvalidStream) {
if (this.config.getContinueOnFailedApiCall()) {
if (isInvalidStreamRetry) {
logContentRetryFailure(
this.config,
new ContentRetryFailureEvent(
4,
'FAILED_AFTER_PROMPT_INJECTION',
modelToUse,
),
);
return turn;
}
const nextRequest = [{ text: 'System: Please continue.' }];
// Recursive call - update turn with result
turn = yield* this.sendMessageStream(
nextRequest,
signal,
prompt_id,
boundedTurns - 1,
true,
displayContent,
);
return turn;
}
}
if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
if (
!this.config.getQuotaErrorOccurred() &&
@@ -874,7 +833,6 @@ export class GeminiClient {
signal,
prompt_id,
boundedTurns - 1,
false, // isInvalidStreamRetry is false
displayContent,
);
return turn;
@@ -889,13 +847,10 @@ export class GeminiClient {
signal: AbortSignal,
prompt_id: string,
turns: number = MAX_TURNS,
isInvalidStreamRetry: boolean = false,
displayContent?: PartListUnion,
stopHookActive: boolean = false,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
if (!isInvalidStreamRetry) {
this.config.resetTurn();
}
this.config.resetTurn();
const hooksEnabled = this.config.getEnableHooks();
const messageBus = this.context.messageBus;
@@ -947,7 +902,6 @@ export class GeminiClient {
signal,
prompt_id,
boundedTurns,
isInvalidStreamRetry,
displayContent,
);
@@ -1009,7 +963,6 @@ export class GeminiClient {
signal,
prompt_id,
boundedTurns - 1,
false,
displayContent,
true, // stopHookActive: signal retry to AfterAgent hooks
);
@@ -1254,7 +1207,6 @@ export class GeminiClient {
signal: AbortSignal,
prompt_id: string,
boundedTurns: number,
isInvalidStreamRetry: boolean,
displayContent?: PartListUnion,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
// Clear the detection flag so the recursive turn can proceed, but the count remains 1.
@@ -1276,7 +1228,6 @@ export class GeminiClient {
signal,
prompt_id,
boundedTurns - 1,
isInvalidStreamRetry,
displayContent,
);
}
+40 -19
View File
@@ -744,25 +744,41 @@ describe('GeminiChat', () => {
).rejects.toThrow(InvalidStreamError);
});
it('should throw InvalidStreamError when no tool call and empty response text', async () => {
// Setup: Stream with finish reason but empty response (only thoughts)
const streamWithEmptyResponse = (async function* () {
yield {
candidates: [
{
content: {
role: 'model',
parts: [{ thought: 'thinking...' }],
},
finishReason: 'STOP',
},
],
} as unknown as GenerateContentResponse;
})();
vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
streamWithEmptyResponse,
);
it('should throw InvalidStreamError without retrying when no tool call and empty response text', async () => {
vi.mocked(mockContentGenerator.generateContentStream)
.mockImplementationOnce(async () =>
// First attempt: finish reason is present, but the stream has no
// non-thought text, which is NO_RESPONSE_TEXT.
(async function* () {
yield {
candidates: [
{
content: {
role: 'model',
parts: [{ thought: true, text: 'thinking...' }],
},
finishReason: 'STOP',
},
],
} as unknown as GenerateContentResponse;
})(),
)
.mockImplementationOnce(async () =>
// This would succeed if NO_RESPONSE_TEXT were retried.
(async function* () {
yield {
candidates: [
{
content: {
role: 'model',
parts: [{ text: 'valid response after retry' }],
},
finishReason: 'STOP',
},
],
} as unknown as GenerateContentResponse;
})(),
);
const stream = await chat.sendMessageStream(
{ model: 'gemini-2.0-flash' },
@@ -779,6 +795,11 @@ describe('GeminiChat', () => {
}
})(),
).rejects.toThrow(InvalidStreamError);
expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes(
1,
);
expect(mockLogContentRetry).not.toHaveBeenCalled();
expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1);
});
it('should succeed when there is finish reason and response text', async () => {
+3 -1
View File
@@ -424,11 +424,13 @@ export class GeminiChat {
);
const isContentError = error instanceof InvalidStreamError;
const isRetryableContentError =
isContentError && error.type !== 'NO_RESPONSE_TEXT';
const errorType = isContentError
? error.type
: getRetryErrorType(error);
if (isContentError || (isRetryable && !signal.aborted)) {
if (isRetryableContentError || (isRetryable && !signal.aborted)) {
// The issue requests exactly 3 retries (4 attempts) for API errors during stream iteration.
// Regardless of the global maxAttempts (e.g. 10), we only want to retry these mid-stream API errors
// up to 3 times before finally throwing the error to the user.