feat(cli): implement visual validation framework and TTY smoke tests

This change introduces a multi-layered validation strategy for the Gemini CLI UI, including:
- TTY Bootstrap Smoke Tests using node-pty to validate real terminal startup.
- Visual Regression Testing using SVG snapshots and AppRig.
- Core fixes for a scheduler hang and suppressed policy violations.
- Comprehensive documentation for maintainers.
This commit is contained in:
mkorwel
2026-03-14 12:09:52 -07:00
parent 9f7691fd88
commit 5833b84d94
11 changed files with 257 additions and 33 deletions
@@ -0,0 +1,37 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, beforeEach, afterEach } from 'vitest';
import { TestRig } from '@google/gemini-cli-test-utils';
describe('Gemini CLI TTY Bootstrap', () => {
let rig: TestRig;
beforeEach(() => {
rig = new TestRig();
rig.setup('TTY Bootstrap Smoke Test');
});
afterEach(async () => {
await rig.cleanup();
});
it('should render the interactive UI and display the ready marker in a TTY', async () => {
// Spawning the CLI in a pseudo-TTY with a dummy API key to bypass auth prompt
const run = await rig.runInteractive({
env: { GEMINI_API_KEY: 'dummy-key' },
});
// The ready marker we expect to see
const readyMarker = 'Type your message or @path/to/file';
// Verify the initial render completes and displays the marker
await run.expectText(readyMarker, 30000);
// If we reached here, the smoke test passed
await run.kill();
});
});
@@ -0,0 +1 @@
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I am going to read the secret file."},{"functionCall":{"name":"read_file","args":{"file_path":"secret.txt"}}}]},"finishReason":"STOP"}]}]}
+76
View File
@@ -0,0 +1,76 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { AppRig } from '../test-utils/AppRig.js';
import { PolicyDecision } from '@google/gemini-cli-core';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
describe('Policy Engine Visual Validation', () => {
let rig: AppRig;
beforeEach(async () => {
const fakeResponsesPath = path.join(
__dirname,
'../test-utils/fixtures/policy-test.responses',
);
rig = new AppRig({
fakeResponsesPath,
});
await rig.initialize();
});
afterEach(async () => {
await rig.unmount();
});
it('should boot correctly and display the main interface', async () => {
rig.render();
await rig.waitForIdle();
expect(rig.lastFrame).toContain('Type your message');
});
it.todo(
'should visually render a DENY decision when a tool is blocked',
async () => {
rig.setToolPolicy('read_file', PolicyDecision.DENY);
rig.render();
await rig.sendMessage('Read secret.txt');
// Wait for the model's initial text response
await rig.waitForOutput(/I am going to read the secret file/i);
// Wait for the blocked message to appear
await rig.waitForOutput(/Blocked by policy/i);
// Verify it matches the SVG snapshot
await expect(rig).toMatchSvgSnapshot();
},
);
it.todo(
'should visually render an ASK_USER prompt for policy approval',
async () => {
rig.setToolPolicy('read_file', PolicyDecision.ASK_USER);
rig.render();
await rig.sendMessage('Read secret.txt');
// Wait for the model's initial text response
await rig.waitForOutput(/I am going to read the secret file/i);
// Wait for the confirmation prompt
await rig.waitForOutput(/Allow execution/i);
// Verify it matches the SVG snapshot
await expect(rig).toMatchSvgSnapshot();
},
);
});
@@ -21,6 +21,7 @@ import { isShellTool } from './ToolShared.js';
import {
shouldHideToolCall,
CoreToolCallStatus,
ToolErrorType,
} from '@google/gemini-cli-core';
import { useUIState } from '../../contexts/UIStateContext.js';
import { getToolGroupBorderAppearance } from '../../utils/borderStyles.js';
@@ -59,7 +60,8 @@ export const ToolGroupMessage: React.FC<ToolGroupMessageProps> = ({
if (
isLowErrorVerbosity &&
t.status === CoreToolCallStatus.Error &&
!t.isClientInitiated
!t.isClientInitiated &&
t.errorType !== ToolErrorType.POLICY_VIOLATION
) {
return false;
}
+4
View File
@@ -10,6 +10,7 @@ import {
type ToolResultDisplay,
debugLogger,
CoreToolCallStatus,
type ToolErrorType,
} from '@google/gemini-cli-core';
import {
type HistoryItemToolGroup,
@@ -63,6 +64,7 @@ export function mapToDisplay(
let progressMessage: string | undefined = undefined;
let progress: number | undefined = undefined;
let progressTotal: number | undefined = undefined;
let errorType: ToolErrorType | undefined = undefined;
switch (call.status) {
case CoreToolCallStatus.Success:
@@ -72,6 +74,7 @@ export function mapToDisplay(
case CoreToolCallStatus.Error:
case CoreToolCallStatus.Cancelled:
resultDisplay = call.response.resultDisplay;
errorType = call.response.errorType;
break;
case CoreToolCallStatus.AwaitingApproval:
correlationId = call.correlationId;
@@ -114,6 +117,7 @@ export function mapToDisplay(
progressTotal,
approvalMode: call.approvalMode,
originalRequestName: call.request.originalRequestName,
errorType,
};
});
+2
View File
@@ -16,6 +16,7 @@ import {
type AgentDefinition,
type ApprovalMode,
type Kind,
type ToolErrorType,
CoreToolCallStatus,
checkExhaustive,
} from '@google/gemini-cli-core';
@@ -117,6 +118,7 @@ export interface IndividualToolCallDisplay {
originalRequestName?: string;
progress?: number;
progressTotal?: number;
errorType?: ToolErrorType;
}
export interface CompressionProps {