mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-07-03 22:56:48 -07:00
feat(cli): implement visual validation framework and TTY smoke tests
This change introduces a multi-layered validation strategy for the Gemini CLI UI, including: - TTY Bootstrap Smoke Tests using node-pty to validate real terminal startup. - Visual Regression Testing using SVG snapshots and AppRig. - Core fixes for a scheduler hang and suppressed policy violations. - Comprehensive documentation for maintainers.
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, beforeEach, afterEach } from 'vitest';
|
||||
import { TestRig } from '@google/gemini-cli-test-utils';
|
||||
|
||||
describe('Gemini CLI TTY Bootstrap', () => {
|
||||
let rig: TestRig;
|
||||
|
||||
beforeEach(() => {
|
||||
rig = new TestRig();
|
||||
rig.setup('TTY Bootstrap Smoke Test');
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rig.cleanup();
|
||||
});
|
||||
|
||||
it('should render the interactive UI and display the ready marker in a TTY', async () => {
|
||||
// Spawning the CLI in a pseudo-TTY with a dummy API key to bypass auth prompt
|
||||
const run = await rig.runInteractive({
|
||||
env: { GEMINI_API_KEY: 'dummy-key' },
|
||||
});
|
||||
|
||||
// The ready marker we expect to see
|
||||
const readyMarker = 'Type your message or @path/to/file';
|
||||
|
||||
// Verify the initial render completes and displays the marker
|
||||
await run.expectText(readyMarker, 30000);
|
||||
|
||||
// If we reached here, the smoke test passed
|
||||
await run.kill();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1 @@
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"role":"model","parts":[{"text":"I am going to read the secret file."},{"functionCall":{"name":"read_file","args":{"file_path":"secret.txt"}}}]},"finishReason":"STOP"}]}]}
|
||||
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { AppRig } from '../test-utils/AppRig.js';
|
||||
import { PolicyDecision } from '@google/gemini-cli-core';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
describe('Policy Engine Visual Validation', () => {
|
||||
let rig: AppRig;
|
||||
|
||||
beforeEach(async () => {
|
||||
const fakeResponsesPath = path.join(
|
||||
__dirname,
|
||||
'../test-utils/fixtures/policy-test.responses',
|
||||
);
|
||||
rig = new AppRig({
|
||||
fakeResponsesPath,
|
||||
});
|
||||
await rig.initialize();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rig.unmount();
|
||||
});
|
||||
|
||||
it('should boot correctly and display the main interface', async () => {
|
||||
rig.render();
|
||||
await rig.waitForIdle();
|
||||
expect(rig.lastFrame).toContain('Type your message');
|
||||
});
|
||||
|
||||
it.todo(
|
||||
'should visually render a DENY decision when a tool is blocked',
|
||||
async () => {
|
||||
rig.setToolPolicy('read_file', PolicyDecision.DENY);
|
||||
rig.render();
|
||||
|
||||
await rig.sendMessage('Read secret.txt');
|
||||
|
||||
// Wait for the model's initial text response
|
||||
await rig.waitForOutput(/I am going to read the secret file/i);
|
||||
|
||||
// Wait for the blocked message to appear
|
||||
await rig.waitForOutput(/Blocked by policy/i);
|
||||
|
||||
// Verify it matches the SVG snapshot
|
||||
await expect(rig).toMatchSvgSnapshot();
|
||||
},
|
||||
);
|
||||
|
||||
it.todo(
|
||||
'should visually render an ASK_USER prompt for policy approval',
|
||||
async () => {
|
||||
rig.setToolPolicy('read_file', PolicyDecision.ASK_USER);
|
||||
rig.render();
|
||||
|
||||
await rig.sendMessage('Read secret.txt');
|
||||
|
||||
// Wait for the model's initial text response
|
||||
await rig.waitForOutput(/I am going to read the secret file/i);
|
||||
|
||||
// Wait for the confirmation prompt
|
||||
await rig.waitForOutput(/Allow execution/i);
|
||||
|
||||
// Verify it matches the SVG snapshot
|
||||
await expect(rig).toMatchSvgSnapshot();
|
||||
},
|
||||
);
|
||||
});
|
||||
@@ -21,6 +21,7 @@ import { isShellTool } from './ToolShared.js';
|
||||
import {
|
||||
shouldHideToolCall,
|
||||
CoreToolCallStatus,
|
||||
ToolErrorType,
|
||||
} from '@google/gemini-cli-core';
|
||||
import { useUIState } from '../../contexts/UIStateContext.js';
|
||||
import { getToolGroupBorderAppearance } from '../../utils/borderStyles.js';
|
||||
@@ -59,7 +60,8 @@ export const ToolGroupMessage: React.FC<ToolGroupMessageProps> = ({
|
||||
if (
|
||||
isLowErrorVerbosity &&
|
||||
t.status === CoreToolCallStatus.Error &&
|
||||
!t.isClientInitiated
|
||||
!t.isClientInitiated &&
|
||||
t.errorType !== ToolErrorType.POLICY_VIOLATION
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import {
|
||||
type ToolResultDisplay,
|
||||
debugLogger,
|
||||
CoreToolCallStatus,
|
||||
type ToolErrorType,
|
||||
} from '@google/gemini-cli-core';
|
||||
import {
|
||||
type HistoryItemToolGroup,
|
||||
@@ -63,6 +64,7 @@ export function mapToDisplay(
|
||||
let progressMessage: string | undefined = undefined;
|
||||
let progress: number | undefined = undefined;
|
||||
let progressTotal: number | undefined = undefined;
|
||||
let errorType: ToolErrorType | undefined = undefined;
|
||||
|
||||
switch (call.status) {
|
||||
case CoreToolCallStatus.Success:
|
||||
@@ -72,6 +74,7 @@ export function mapToDisplay(
|
||||
case CoreToolCallStatus.Error:
|
||||
case CoreToolCallStatus.Cancelled:
|
||||
resultDisplay = call.response.resultDisplay;
|
||||
errorType = call.response.errorType;
|
||||
break;
|
||||
case CoreToolCallStatus.AwaitingApproval:
|
||||
correlationId = call.correlationId;
|
||||
@@ -114,6 +117,7 @@ export function mapToDisplay(
|
||||
progressTotal,
|
||||
approvalMode: call.approvalMode,
|
||||
originalRequestName: call.request.originalRequestName,
|
||||
errorType,
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
type AgentDefinition,
|
||||
type ApprovalMode,
|
||||
type Kind,
|
||||
type ToolErrorType,
|
||||
CoreToolCallStatus,
|
||||
checkExhaustive,
|
||||
} from '@google/gemini-cli-core';
|
||||
@@ -117,6 +118,7 @@ export interface IndividualToolCallDisplay {
|
||||
originalRequestName?: string;
|
||||
progress?: number;
|
||||
progressTotal?: number;
|
||||
errorType?: ToolErrorType;
|
||||
}
|
||||
|
||||
export interface CompressionProps {
|
||||
|
||||
@@ -11,6 +11,7 @@ import { PolicyDecision } from '../policy/types.js';
|
||||
import { MessageBusType, type Message } from './types.js';
|
||||
import { safeJsonStringify } from '../utils/safeJsonStringify.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import { coreEvents } from '../utils/events.js';
|
||||
|
||||
export class MessageBus extends EventEmitter {
|
||||
constructor(
|
||||
@@ -70,6 +71,10 @@ export class MessageBus extends EventEmitter {
|
||||
break;
|
||||
case PolicyDecision.DENY:
|
||||
// Emit both rejection and response messages
|
||||
coreEvents.emitFeedback(
|
||||
'error',
|
||||
`Tool call "${message.toolCall.name}" was blocked by policy.`,
|
||||
);
|
||||
this.emitMessage({
|
||||
type: MessageBusType.TOOL_POLICY_REJECTION,
|
||||
toolCall: message.toolCall,
|
||||
|
||||
@@ -35,11 +35,7 @@ import { runInDevTraceSpan } from '../telemetry/trace.js';
|
||||
import { logToolCall } from '../telemetry/loggers.js';
|
||||
import { ToolCallEvent } from '../telemetry/types.js';
|
||||
import type { EditorType } from '../utils/editor.js';
|
||||
import {
|
||||
MessageBusType,
|
||||
type SerializableConfirmationDetails,
|
||||
type ToolConfirmationRequest,
|
||||
} from '../confirmation-bus/types.js';
|
||||
import { type SerializableConfirmationDetails } from '../confirmation-bus/types.js';
|
||||
import { runWithToolCallContext } from '../utils/toolCallContext.js';
|
||||
import {
|
||||
coreEvents,
|
||||
@@ -91,9 +87,6 @@ const createErrorResponse = (
|
||||
* Coordinates execution via state updates and event listening.
|
||||
*/
|
||||
export class Scheduler {
|
||||
// Tracks which MessageBus instances have the legacy listener attached to prevent duplicates.
|
||||
private static subscribedMessageBuses = new WeakSet<MessageBus>();
|
||||
|
||||
private readonly state: SchedulerStateManager;
|
||||
private readonly executor: ToolExecutor;
|
||||
private readonly modifier: ToolModificationHandler;
|
||||
@@ -127,8 +120,6 @@ export class Scheduler {
|
||||
this.executor = new ToolExecutor(this.context);
|
||||
this.modifier = new ToolModificationHandler();
|
||||
|
||||
this.setupMessageBusListener(this.messageBus);
|
||||
|
||||
coreEvents.on(CoreEvent.McpProgress, this.handleMcpProgress);
|
||||
}
|
||||
|
||||
@@ -161,28 +152,6 @@ export class Scheduler {
|
||||
});
|
||||
};
|
||||
|
||||
private setupMessageBusListener(messageBus: MessageBus): void {
|
||||
if (Scheduler.subscribedMessageBuses.has(messageBus)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Optimize policy checks. Currently, tools check policy via
|
||||
// MessageBus even though the Scheduler already checked it.
|
||||
messageBus.subscribe(
|
||||
MessageBusType.TOOL_CONFIRMATION_REQUEST,
|
||||
async (request: ToolConfirmationRequest) => {
|
||||
await messageBus.publish({
|
||||
type: MessageBusType.TOOL_CONFIRMATION_RESPONSE,
|
||||
correlationId: request.correlationId,
|
||||
confirmed: false,
|
||||
requiresUserConfirmation: true,
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
Scheduler.subscribedMessageBuses.add(messageBus);
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules a batch of tool calls.
|
||||
* @returns A promise that resolves with the results of the completed batch.
|
||||
|
||||
Reference in New Issue
Block a user