diff --git a/packages/cli/src/interactiveCli.tsx b/packages/cli/src/interactiveCli.tsx index 1b1e5bae12..1f9467a1d0 100644 --- a/packages/cli/src/interactiveCli.tsx +++ b/packages/cli/src/interactiveCli.tsx @@ -94,11 +94,14 @@ export async function startInteractiveUI( const version = await getVersion(); setWindowTitle(basename(workspaceRoot), settings); + const simulateUser = config.getSimulateUser(); + const consolePatcher = new ConsolePatcher({ onNewMessage: (msg) => { coreEvents.emitConsoleLog(msg.type, msg.content); }, debugMode: config.getDebugMode(), + interactive: !simulateUser, }); consolePatcher.patch(); @@ -144,7 +147,6 @@ export async function startInteractiveUI( await new Promise((resolve) => setTimeout(resolve, 100)); } - const simulateUser = config.getSimulateUser(); const simulatedStdin = new PassThrough({ encoding: 'utf8' }); let lastFrame: string | undefined; @@ -199,16 +201,18 @@ export async function startInteractiveUI( registerCleanup(cleanupLineWrapping); } - checkForUpdates(settings) - .then((info) => { - handleAutoUpdate(info, settings, config.getProjectRoot()); - }) - .catch((err) => { - // Silently ignore update check errors. - if (config.getDebugMode()) { - debugLogger.warn('Update check failed:', err); - } - }); + if (!simulateUser) { + checkForUpdates(settings) + .then((info) => { + handleAutoUpdate(info, settings, config.getProjectRoot()); + }) + .catch((err) => { + // Silently ignore update check errors. + if (config.getDebugMode()) { + debugLogger.warn('Update check failed:', err); + } + }); + } if (simulateUser) { const simulator = new UserSimulator( diff --git a/packages/cli/src/services/UserSimulator.test.ts b/packages/cli/src/services/UserSimulator.test.ts new file mode 100644 index 0000000000..7e55326b71 --- /dev/null +++ b/packages/cli/src/services/UserSimulator.test.ts @@ -0,0 +1,135 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { UserSimulator } from './UserSimulator.js'; +import { Writable } from 'node:stream'; +import type { Config } from '@google/gemini-cli-core'; + +describe('UserSimulator', () => { + let mockConfig: Config; + let mockGetScreen: vi.Mock<() => string | undefined>; + let mockStdinBuffer: Writable; + let mockContentGenerator: { + generateContent: vi.Mock; + }; + + beforeEach(() => { + mockContentGenerator = { + generateContent: vi + .fn() + .mockResolvedValue({ text: JSON.stringify({ action: 'y\r' }) }), + }; + + mockConfig = { + getContentGenerator: () => mockContentGenerator, + getSimulateUser: () => true, + getQuestion: () => 'test goal', + getKnowledgeSource: () => undefined, + getHasAccessToPreviewModel: () => true, + } as unknown as Config; + + mockGetScreen = vi.fn(); + mockStdinBuffer = new Writable({ + write(chunk, encoding, callback) { + callback(); + }, + }); + vi.spyOn(mockStdinBuffer, 'write'); + }); + + it('should include interactive prompts in its vision even when timers are present', async () => { + const simulator = new UserSimulator( + mockConfig, + mockGetScreen, + mockStdinBuffer, + ); + + // Mock a screen with a timer and a confirmation prompt + mockGetScreen.mockReturnValue( + 'Thinking... (0s)\n\nAction Required: Allow pip execution? [Y/n]', + ); + + // We need to trigger the private tick method. Since it's private and run on an interval, + // we can use a hack or just test the prompt construction if we refactor, + // but for now let's use the interval. + + vi.useFakeTimers(); + simulator.start(); + + // Trigger the interval + await vi.advanceTimersByTimeAsync(2000); + + expect(mockContentGenerator.generateContent).toHaveBeenCalled(); + const lastCall = mockContentGenerator.generateContent.mock.calls[0]; + const prompt = lastCall[0].contents[0].parts[0].text; + + expect(prompt).toContain( + 'STATE 2: The agent is waiting for you to authorize a tool', + ); + expect(prompt).toContain('[Y/n]'); + expect(prompt).toContain('RULE 1: If there is a clear confirmation prompt'); + + simulator.stop(); + vi.useRealTimers(); + }); + + it('should not wait if a prompt is visible even if a spinner is present', async () => { + const simulator = new UserSimulator( + mockConfig, + mockGetScreen, + mockStdinBuffer, + ); + + // Mock a screen with a spinner and a prompt + mockGetScreen.mockReturnValue('⠋ Working...\n> Type your message'); + + vi.useFakeTimers(); + simulator.start(); + + await vi.advanceTimersByTimeAsync(2000); + + expect(mockContentGenerator.generateContent).toHaveBeenCalled(); + const lastCall = mockContentGenerator.generateContent.mock.calls[0]; + const prompt = lastCall[0].contents[0].parts[0].text; + + expect(prompt).toContain( + 'Only (Rule 1 fallback) if the agent is truly mid-process', + ); + + simulator.stop(); + vi.useRealTimers(); + }); + + it('should submit keys with reliable delays', async () => { + const simulator = new UserSimulator( + mockConfig, + mockGetScreen, + mockStdinBuffer, + ); + mockGetScreen.mockReturnValue('> Prompt'); + mockContentGenerator.generateContent.mockResolvedValue({ + text: JSON.stringify({ action: 'abc' }), + }); + + vi.useFakeTimers(); + simulator.start(); + + // Trigger tick + await vi.advanceTimersByTimeAsync(2000); + + // Wait for the async key submission loop to finish + // Initial delay 100ms + (3 chars * 10ms) = 130ms minimum + await vi.advanceTimersByTimeAsync(500); + + expect(mockStdinBuffer.write).toHaveBeenCalledWith('a'); + expect(mockStdinBuffer.write).toHaveBeenCalledWith('b'); + expect(mockStdinBuffer.write).toHaveBeenCalledWith('c'); + + simulator.stop(); + vi.useRealTimers(); + }); +}); diff --git a/packages/cli/src/services/UserSimulator.ts b/packages/cli/src/services/UserSimulator.ts index 0b1a9d5d9d..d2adae697d 100644 --- a/packages/cli/src/services/UserSimulator.ts +++ b/packages/cli/src/services/UserSimulator.ts @@ -143,11 +143,11 @@ export class UserSimulator { const prompt = `You are evaluating a CLI agent by simulating a user sitting at the terminal. Look carefully at the screen and determine the CLI's current state: -STATE 1: The agent is busy (e.g., streaming a response, showing a spinner, running a tool, or displaying a timer like "7s"). It is actively working and NOT waiting for text input. +STATE 1: The agent is busy (e.g., streaming a response, executing a tool, or showing a progress message). It is actively working and NOT waiting for text input or user approval. - In this case, your action MUST be exactly: -STATE 2: The agent is waiting for you to authorize a tool, confirm an action, or answer a specific multi-choice question (e.g., "Action Required", "Allow execution", numbered options). -- In this case, your action MUST be the exact raw characters to select the option and submit it (e.g., 1\\r, 2\\r, y\\r, n\\r, or just \\r if the default option is acceptable). Do NOT output or "Thank you". You must unblock the agent and allow it to run the tool. +STATE 2: The agent is waiting for you to authorize a tool, confirm an action, or answer a specific multi-choice question (e.g., "Action Required", "Allow execution", numbered options, "[Y/n]"). +- In this case, your action MUST be the exact raw characters to select the option and submit it (e.g., 1\\r, 2\\r, y\\r, n\\r, or just \\r if the default option is acceptable). Do NOT output or "Thank you". You must unblock the agent and allow it to run the tool. This state takes precedence even if timers or background messages are visible. STATE 3: The agent has finished its current thought process AND is idle, waiting for a NEW general text prompt (usually indicated by a "> Type your message" prompt). - First, verify that the ACTUAL task is fully complete based on your original goal. Do not stop at intermediate steps like planning or syntax checking. @@ -159,7 +159,7 @@ STATE 4: Any other situation where the agent is waiting for text input or needs - Your action should be the raw characters you would type, followed by \\r. For just an Enter key press, output \\r. CRITICAL RULES: -- RULE 1: If there is ANY active spinner (e.g., ⠋, ⠙, ⠹, ⠸, ⠼, ⠴, ⠧) or an elapsed time indicator (e.g., "0s", "7s") anywhere on the screen, the agent is STILL WORKING. Your action MUST be . Do NOT issue commands, even if a text prompt is visible below it. +- RULE 1: If there is a clear confirmation prompt (e.g. "[Y/n]", "1) Allow Once") or an input cursor (">"), YOU MUST RESPOND (State 2 or 3). Detect these states aggressively. Only (Rule 1 fallback) if the agent is truly mid-process with no interactive markers visible. - RULE 2: If there is an "Action Required" or confirmation prompt on the screen, YOU MUST HANDLE IT (State 2). This takes precedence over everything else. - RULE 3: If prompted to allow execution of a command with options like 'Allow once' and 'Allow for this session', you MUST choose the option for 'Allow for this session' (typically by sending '2\\r'). - RULE 4: You MUST output a strictly formatted JSON object with no markdown wrappers or extra text. @@ -319,6 +319,9 @@ ${strippedScreen} } } + // Wait a bit to ensure the terminal is ready for input + await new Promise((resolve) => setTimeout(resolve, 100)); + for (const char of keys) { if (char === '\r') { // Wait a bit to ensure the previous character is rendered before submitting @@ -327,7 +330,7 @@ ${strippedScreen} this.stdinBuffer.write(char); // Small delay to ensure Ink processes each keypress event individually // while preventing UI state collisions during long simulated inputs. - await new Promise((resolve) => setTimeout(resolve, 5)); + await new Promise((resolve) => setTimeout(resolve, 10)); } this.lastScreenContent = normalizedScreen; } else {