From 7c7150f4876d870bbb8c886ea2e12fd5537fa722 Mon Sep 17 00:00:00 2001 From: Adam Weidman Date: Fri, 20 Mar 2026 14:33:57 -0400 Subject: [PATCH] !feat(cli): harden non-interactive agent session handling --- .../nonInteractiveCli.test.ts.snap | 8 - packages/cli/src/nonInteractiveCli.test.ts | 266 ++++++++++++++++-- packages/cli/src/nonInteractiveCli.ts | 75 +++-- packages/core/src/output/json-formatter.ts | 3 +- 4 files changed, 300 insertions(+), 52 deletions(-) diff --git a/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap b/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap index 92f396a59c..463ce3b144 100644 --- a/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap +++ b/packages/cli/src/__snapshots__/nonInteractiveCli.test.ts.snap @@ -8,14 +8,6 @@ exports[`runNonInteractive > should emit appropriate error event in streaming JS " `; -exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'max session turns' 1`] = ` -"{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} -{"type":"message","timestamp":"","role":"user","content":"Max turns test"} -{"type":"error","timestamp":"","severity":"error","message":"Maximum session turns exceeded"} -{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"models":{}}} -" -`; - exports[`runNonInteractive > should emit appropriate events for streaming JSON output 1`] = ` "{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} {"type":"message","timestamp":"","role":"user","content":"Stream test"} diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index e8ce1d6d3e..15b51d188c 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -295,6 +295,30 @@ describe('runNonInteractive', () => { expect(streamSpy).toHaveBeenCalledWith({ streamId: expect.any(String) }); }); + it('fails fast if the session acknowledges a message send without a stream', async () => { + const { LegacyAgentSession } = await import('@google/gemini-cli-core'); + const sendSpy = vi + .spyOn(LegacyAgentSession.prototype, 'send') + .mockResolvedValue({ streamId: null }); + const streamSpy = vi.spyOn(LegacyAgentSession.prototype, 'stream'); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-null-stream', + }), + ).rejects.toThrow( + 'LegacyAgentSession.send() unexpectedly returned no stream for a message send.', + ); + + expect(streamSpy).not.toHaveBeenCalled(); + + sendSpy.mockRestore(); + streamSpy.mockRestore(); + }); + it('should register activity logger when GEMINI_CLI_ACTIVITY_LOG_TARGET is set', async () => { vi.stubEnv('GEMINI_CLI_ACTIVITY_LOG_TARGET', '/tmp/test.jsonl'); const events: ServerGeminiStreamEvent[] = [ @@ -655,6 +679,21 @@ describe('runNonInteractive', () => { ).rejects.toThrow('process.exit(53) called'); }); + it('should exit when the session reports max turns through agent_end', async () => { + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents([{ type: GeminiEventType.MaxSessionTurns }]), + ); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Trigger max turns event', + prompt_id: 'prompt-id-max-turns-event', + }), + ).rejects.toThrow('process.exit(53) called'); + }); + it('should preprocess @include commands before sending to the model', async () => { // 1. Mock the imported atCommandProcessor const { handleAtCommand } = await import( @@ -837,6 +876,79 @@ describe('runNonInteractive', () => { ); }); + it('should keep only the final post-tool assistant text in JSON output', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-json-tool-text', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [{ text: 'Tool executed successfully' }], + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Let me check that...' }, + toolCallEvent, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]), + ) + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 3 } }, + }, + ]), + ); + + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Use a tool', + prompt_id: 'prompt-id-json-tool-text', + }); + + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: 'Final answer', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + it('should write JSON output with stats for empty response commands', async () => { // Test the scenario where a command completes but produces no content at all const events: ServerGeminiStreamEvent[] = [ @@ -953,7 +1065,7 @@ describe('runNonInteractive', () => { { session_id: 'test-session-id', error: { - type: 'Error', + type: 'FatalInputError', message: 'Invalid command syntax provided', code: 42, }, @@ -1074,10 +1186,11 @@ describe('runNonInteractive', () => { // Spy on handleCancellationError to verify it's called const errors = await import('./utils/errors.js'); + const cancellationSentinel = new Error('Cancelled'); const handleCancellationErrorSpy = vi .spyOn(errors, 'handleCancellationError') .mockImplementation(() => { - throw new Error('Cancelled'); + throw cancellationSentinel; }); const events: ServerGeminiStreamEvent[] = [ @@ -1093,7 +1206,7 @@ describe('runNonInteractive', () => { signal.addEventListener('abort', () => { clearTimeout(timeout); setTimeout(() => { - reject(new Error('Aborted')); // This will be caught by nonInteractiveCli and passed to handleError + reject(new Error('Aborted')); }, 300); }); }); @@ -1126,20 +1239,10 @@ describe('runNonInteractive', () => { keypressHandler('\u0003', { ctrl: true, name: 'c' }); } - // The promise should reject with 'Aborted' because our mock stream throws it, - // and nonInteractiveCli catches it and calls handleError, which doesn't necessarily throw. - // Wait, if handleError is called, we should check that. - // But here we want to check if Ctrl+C works. - - // In our current setup, Ctrl+C aborts the signal. The stream throws 'Aborted'. - // nonInteractiveCli catches 'Aborted' and calls handleError. - - // If we want to test that handleCancellationError is called, we need the loop to detect abortion. - // But our stream throws before the loop can detect it. - - // Let's just check that the promise rejects with 'Aborted' for now, - // which proves the abortion signal reached the stream. - await expect(runPromise).rejects.toThrow('Aborted'); + // The Ctrl+C path should route through handleCancellationError rather than + // surfacing the raw stream abort. + await expect(runPromise).rejects.toBe(cancellationSentinel); + expect(handleCancellationErrorSpy).toHaveBeenCalledTimes(1); expect( processStderrSpy.mock.calls.some( @@ -1166,6 +1269,79 @@ describe('runNonInteractive', () => { // but we can also do it manually if needed. }); + it('should honor cancellation that happens before session.send()', async () => { + const originalIsTTY = process.stdin.isTTY; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const originalSetRawMode = (process.stdin as any).setRawMode; + + Object.defineProperty(process.stdin, 'isTTY', { + value: true, + configurable: true, + }); + if (!originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = vi.fn(); + } + + const stdinOnSpy = vi.spyOn(process.stdin, 'on').mockImplementation( + ( + event: string | symbol, + listener: (...args: unknown[]) => void, + ) => { + if (event === 'keypress') { + listener('\u0003', { ctrl: true, name: 'c' }); + } + return process.stdin; + }, + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + vi.spyOn(process.stdin as any, 'setRawMode').mockImplementation(() => true); + vi.spyOn(process.stdin, 'resume').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'pause').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'removeAllListeners').mockImplementation( + () => process.stdin, + ); + + const errors = await import('./utils/errors.js'); + const cancellationSentinel = new Error('Cancelled before send'); + const handleCancellationErrorSpy = vi + .spyOn(errors, 'handleCancellationError') + .mockImplementation(() => { + throw cancellationSentinel; + }); + + const { LegacyAgentSession } = await import('@google/gemini-cli-core'); + const sendSpy = vi.spyOn(LegacyAgentSession.prototype, 'send'); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Cancelled query', + prompt_id: 'prompt-id-pre-send-cancel', + }), + ).rejects.toBe(cancellationSentinel); + + expect(handleCancellationErrorSpy).toHaveBeenCalledTimes(1); + expect(sendSpy).not.toHaveBeenCalled(); + expect(stdinOnSpy).toHaveBeenCalled(); + + handleCancellationErrorSpy.mockRestore(); + sendSpy.mockRestore(); + + Object.defineProperty(process.stdin, 'isTTY', { + value: originalIsTTY, + configurable: true, + }); + if (originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = originalSetRawMode; + } else { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + delete (process.stdin as any).setRawMode; + } + }); + it('should throw FatalInputError if a command requires confirmation', async () => { const mockCommand = { name: 'confirm', @@ -1679,17 +1855,9 @@ describe('runNonInteractive', () => { input: 'Loop test', promptId: 'prompt-id-loop', }, - { - name: 'max session turns', - events: [ - { type: GeminiEventType.MaxSessionTurns }, - ] as ServerGeminiStreamEvent[], - input: 'Max turns test', - promptId: 'prompt-id-max-turns', - }, ])( 'should emit appropriate error event in streaming JSON mode: $name', - async ({ events, input, promptId }) => { + async ({ name, events, input, promptId }) => { vi.mocked(mockConfig.getOutputFormat).mockReturnValue( OutputFormat.STREAM_JSON, ); @@ -1727,6 +1895,52 @@ describe('runNonInteractive', () => { }, ); + it('should emit a terminal max-turns error event in streaming JSON mode', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents([ + { type: GeminiEventType.MaxSessionTurns }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]), + ); + + try { + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Max turns test', + prompt_id: 'prompt-id-max-turns', + }); + } catch (_error) { + // Expected exit + } + + const streamEvents = getWrittenOutput() + .trim() + .split('\n') + .map((line) => JSON.parse(line) as Record); + + expect(streamEvents).toHaveLength(3); + expect(streamEvents[2]).toMatchObject({ + type: 'result', + status: 'error', + error: { + type: 'FatalTurnLimitedError', + message: + 'Reached max session turns for this session. Increase the number of turns by specifying maxSessionTurns in settings.json.', + }, + }); + }); + it('should log error when tool recording fails', async () => { const toolCallEvent: ServerGeminiStreamEvent = { type: GeminiEventType.ToolCallRequest, diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index c3fe3fbd39..557ce3577c 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -181,6 +181,8 @@ export async function runNonInteractive({ }; let errorToHandle: unknown | undefined; + let terminalProcessExitHandled = false; + let abortSession = () => {}; try { consolePatcher.patch(); @@ -289,14 +291,23 @@ export async function runNonInteractive({ }); // Wire Ctrl+C to session abort - abortController.signal.addEventListener('abort', () => { + abortSession = () => { void session.abort(); - }); + }; + abortController.signal.addEventListener('abort', abortSession); + if (abortController.signal.aborted) { + return handleCancellationError(config); + } // Start the agentic loop (runs in background) const { streamId } = await session.send({ message: geminiPartsToContentParts(query), }); + if (streamId === null) { + throw new Error( + 'LegacyAgentSession.send() unexpectedly returned no stream for a message send.', + ); + } const getTextContent = (parts?: ContentPart[]): string | undefined => { const text = parts @@ -347,11 +358,23 @@ export async function runNonInteractive({ enumerable: true, }); } + if (errorMeta?.['status'] !== undefined) { + Object.defineProperty(errToThrow, 'status', { + value: errorMeta['status'], + enumerable: true, + }); + } return errToThrow; }; + const runTerminalExitHandler = (handler: () => never): never => { + terminalProcessExitHandled = true; + return handler(); + }; + // Consume AgentEvents for output formatting let responseText = ''; + let preToolResponseText: string | undefined; let streamEnded = false; for await (const event of session.stream({ streamId })) { if (streamEnded) break; @@ -384,6 +407,12 @@ export async function runNonInteractive({ break; } case 'tool_request': { + if (config.getOutputFormat() === OutputFormat.JSON) { + // Final JSON output should reflect the last assistant answer after + // any tool orchestration, not intermediate pre-tool text. + preToolResponseText = responseText || preToolResponseText; + responseText = ''; + } if (streamFormatter) { streamFormatter.emitEvent({ type: JsonStreamEventType.TOOL_USE, @@ -422,12 +451,33 @@ export async function runNonInteractive({ const errorMsg = getTextContent(event.content) ?? 'Tool error'; if (event.data?.['errorType'] === ToolErrorType.STOP_EXECUTION) { + if ( + config.getOutputFormat() === OutputFormat.JSON && + !responseText && + preToolResponseText + ) { + responseText = preToolResponseText; + } const stopMessage = `Agent execution stopped: ${errorMsg}`; if (config.getOutputFormat() === OutputFormat.TEXT) { process.stderr.write(`${stopMessage}\n`); } } + if (event.data?.['errorType'] === ToolErrorType.NO_SPACE_LEFT) { + runTerminalExitHandler(() => + handleToolError( + event.name, + new Error(errorMsg), + config, + typeof event.data?.['errorType'] === 'string' + ? event.data['errorType'] + : undefined, + displayText, + ), + ); + break; + } handleToolError( event.name, new Error(errorMsg), @@ -471,22 +521,9 @@ export async function runNonInteractive({ } case 'agent_end': { if (event.reason === 'aborted') { - handleCancellationError(config); + runTerminalExitHandler(() => handleCancellationError(config)); } else if (event.reason === 'max_turns') { - const isSessionLimit = - typeof event.data?.['maxTurns'] === 'number' && - typeof event.data?.['turnCount'] === 'number'; - if (isSessionLimit) { - handleMaxTurnsExceededError(config); - } - if (streamFormatter) { - streamFormatter.emitEvent({ - type: JsonStreamEventType.ERROR, - timestamp: new Date().toISOString(), - severity: 'error', - message: 'Maximum session turns exceeded', - }); - } + runTerminalExitHandler(() => handleMaxTurnsExceededError(config)); } const stopMessage = @@ -523,12 +560,16 @@ export async function runNonInteractive({ } finally { // Cleanup stdin cancellation before other cleanup cleanupStdinCancellation(); + abortController.signal.removeEventListener('abort', abortSession); consolePatcher.cleanup(); coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback); } if (errorToHandle) { + if (terminalProcessExitHandled) { + throw errorToHandle; + } handleError(errorToHandle, config); } }); diff --git a/packages/core/src/output/json-formatter.ts b/packages/core/src/output/json-formatter.ts index dd3e558a6f..bce5055a6b 100644 --- a/packages/core/src/output/json-formatter.ts +++ b/packages/core/src/output/json-formatter.ts @@ -6,6 +6,7 @@ import stripAnsi from 'strip-ansi'; import type { SessionMetrics } from '../telemetry/uiTelemetry.js'; +import { getErrorType } from '../utils/errors.js'; import type { JsonError, JsonOutput } from './types.js'; export class JsonFormatter { @@ -42,7 +43,7 @@ export class JsonFormatter { sessionId?: string, ): string { const jsonError: JsonError = { - type: error.constructor.name, + type: getErrorType(error), message: stripAnsi(error.message), ...(code && { code }), };