mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-23 19:44:30 -07:00
feat(core, cli): Implement sequential approval. (#11593)
This commit is contained in:
@@ -4,11 +4,12 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { Task } from './task.js';
|
||||
import type { Config, ToolCallRequestInfo } from '@google/gemini-cli-core';
|
||||
import { createMockConfig } from '../utils/testing_utils.js';
|
||||
import type { ExecutionEventBus } from '@a2a-js/sdk/server';
|
||||
import type { ToolCall } from '@google/gemini-cli-core';
|
||||
|
||||
describe('Task', () => {
|
||||
it('scheduleToolCalls should not modify the input requests array', async () => {
|
||||
@@ -94,4 +95,122 @@ describe('Task', () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('_schedulerToolCallsUpdate', () => {
|
||||
let task: Task;
|
||||
type SpyInstance = ReturnType<typeof vi.spyOn>;
|
||||
let setTaskStateAndPublishUpdateSpy: SpyInstance;
|
||||
|
||||
beforeEach(() => {
|
||||
const mockConfig = createMockConfig();
|
||||
const mockEventBus: ExecutionEventBus = {
|
||||
publish: vi.fn(),
|
||||
on: vi.fn(),
|
||||
off: vi.fn(),
|
||||
once: vi.fn(),
|
||||
removeAllListeners: vi.fn(),
|
||||
finished: vi.fn(),
|
||||
};
|
||||
|
||||
// @ts-expect-error - Calling private constructor
|
||||
task = new Task(
|
||||
'task-id',
|
||||
'context-id',
|
||||
mockConfig as Config,
|
||||
mockEventBus,
|
||||
);
|
||||
|
||||
// Spy on the method we want to check calls for
|
||||
setTaskStateAndPublishUpdateSpy = vi.spyOn(
|
||||
task,
|
||||
'setTaskStateAndPublishUpdate',
|
||||
);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('should set state to input-required when a tool is awaiting approval and none are executing', () => {
|
||||
const toolCalls = [
|
||||
{ request: { callId: '1' }, status: 'awaiting_approval' },
|
||||
] as ToolCall[];
|
||||
|
||||
// @ts-expect-error - Calling private method
|
||||
task._schedulerToolCallsUpdate(toolCalls);
|
||||
|
||||
// The last call should be the final state update
|
||||
expect(setTaskStateAndPublishUpdateSpy).toHaveBeenLastCalledWith(
|
||||
'input-required',
|
||||
{ kind: 'state-change' },
|
||||
undefined,
|
||||
undefined,
|
||||
true, // final: true
|
||||
);
|
||||
});
|
||||
|
||||
it('should NOT set state to input-required if a tool is awaiting approval but another is executing', () => {
|
||||
const toolCalls = [
|
||||
{ request: { callId: '1' }, status: 'awaiting_approval' },
|
||||
{ request: { callId: '2' }, status: 'executing' },
|
||||
] as ToolCall[];
|
||||
|
||||
// @ts-expect-error - Calling private method
|
||||
task._schedulerToolCallsUpdate(toolCalls);
|
||||
|
||||
// It will be called for status updates, but not with final: true
|
||||
const finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
|
||||
(call) => call[4] === true,
|
||||
);
|
||||
expect(finalCall).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should set state to input-required once an executing tool finishes, leaving one awaiting approval', () => {
|
||||
const initialToolCalls = [
|
||||
{ request: { callId: '1' }, status: 'awaiting_approval' },
|
||||
{ request: { callId: '2' }, status: 'executing' },
|
||||
] as ToolCall[];
|
||||
// @ts-expect-error - Calling private method
|
||||
task._schedulerToolCallsUpdate(initialToolCalls);
|
||||
|
||||
// No final call yet
|
||||
let finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
|
||||
(call) => call[4] === true,
|
||||
);
|
||||
expect(finalCall).toBeUndefined();
|
||||
|
||||
// Now, the executing tool finishes. The scheduler would call _resolveToolCall for it.
|
||||
// @ts-expect-error - Calling private method
|
||||
task._resolveToolCall('2');
|
||||
|
||||
// Then another update comes in for the awaiting tool (e.g., a re-check)
|
||||
const subsequentToolCalls = [
|
||||
{ request: { callId: '1' }, status: 'awaiting_approval' },
|
||||
] as ToolCall[];
|
||||
// @ts-expect-error - Calling private method
|
||||
task._schedulerToolCallsUpdate(subsequentToolCalls);
|
||||
|
||||
// NOW we should get the final call
|
||||
finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
|
||||
(call) => call[4] === true,
|
||||
);
|
||||
expect(finalCall).toBeDefined();
|
||||
expect(finalCall?.[0]).toBe('input-required');
|
||||
});
|
||||
|
||||
it('should NOT set state to input-required if skipFinalTrueAfterInlineEdit is true', () => {
|
||||
task.skipFinalTrueAfterInlineEdit = true;
|
||||
const toolCalls = [
|
||||
{ request: { callId: '1' }, status: 'awaiting_approval' },
|
||||
] as ToolCall[];
|
||||
|
||||
// @ts-expect-error - Calling private method
|
||||
task._schedulerToolCallsUpdate(toolCalls);
|
||||
|
||||
const finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
|
||||
(call) => call[4] === true,
|
||||
);
|
||||
expect(finalCall).toBeUndefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -40,7 +40,6 @@ import type {
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import * as fs from 'node:fs';
|
||||
|
||||
import { CoderAgentEvent } from '../types.js';
|
||||
import type {
|
||||
CoderAgentMessage,
|
||||
@@ -373,11 +372,11 @@ export class Task {
|
||||
|
||||
// Only send an update if the status has actually changed.
|
||||
if (hasChanged) {
|
||||
const message = this.toolStatusMessage(tc, this.id, this.contextId);
|
||||
const coderAgentMessage: CoderAgentMessage =
|
||||
tc.status === 'awaiting_approval'
|
||||
? { kind: CoderAgentEvent.ToolCallConfirmationEvent }
|
||||
: { kind: CoderAgentEvent.ToolCallUpdateEvent };
|
||||
const message = this.toolStatusMessage(tc, this.id, this.contextId);
|
||||
|
||||
const event = this._createStatusUpdateEvent(
|
||||
this.taskState,
|
||||
@@ -404,20 +403,16 @@ export class Task {
|
||||
const isAwaitingApproval = allPendingStatuses.some(
|
||||
(status) => status === 'awaiting_approval',
|
||||
);
|
||||
const allPendingAreStable = allPendingStatuses.every(
|
||||
(status) =>
|
||||
status === 'awaiting_approval' ||
|
||||
status === 'success' ||
|
||||
status === 'error' ||
|
||||
status === 'cancelled',
|
||||
const isExecuting = allPendingStatuses.some(
|
||||
(status) => status === 'executing',
|
||||
);
|
||||
|
||||
// 1. Are any pending tool calls awaiting_approval
|
||||
// 2. Are all pending tool calls in a stable state (i.e. not in validing or executing)
|
||||
// 3. After an inline edit, the edited tool call will send awaiting_approval THEN scheduled. We wait for the next update in this case.
|
||||
// The turn is complete and requires user input if at least one tool
|
||||
// is waiting for the user's decision, and no other tool is actively
|
||||
// running in the background.
|
||||
if (
|
||||
isAwaitingApproval &&
|
||||
allPendingAreStable &&
|
||||
!isExecuting &&
|
||||
!this.skipFinalTrueAfterInlineEdit
|
||||
) {
|
||||
this.skipFinalTrueAfterInlineEdit = false;
|
||||
|
||||
@@ -313,7 +313,7 @@ describe('E2E Tests', () => {
|
||||
expect(workingEvent.kind).toBe('status-update');
|
||||
expect(workingEvent.status.state).toBe('working');
|
||||
|
||||
// State Update: Validate each tool call
|
||||
// State Update: Validate the first tool call
|
||||
const toolCallValidateEvent1 = events[3].result as TaskStatusUpdateEvent;
|
||||
expect(toolCallValidateEvent1.metadata?.['coderAgent']).toMatchObject({
|
||||
kind: 'tool-call-update',
|
||||
@@ -326,47 +326,218 @@ describe('E2E Tests', () => {
|
||||
},
|
||||
},
|
||||
]);
|
||||
const toolCallValidateEvent2 = events[4].result as TaskStatusUpdateEvent;
|
||||
expect(toolCallValidateEvent2.metadata?.['coderAgent']).toMatchObject({
|
||||
|
||||
// --- Assert the event stream ---
|
||||
// 1. Initial "submitted" status.
|
||||
expect((events[0].result as TaskStatusUpdateEvent).status.state).toBe(
|
||||
'submitted',
|
||||
);
|
||||
|
||||
// 2. "working" status after receiving the user prompt.
|
||||
expect((events[1].result as TaskStatusUpdateEvent).status.state).toBe(
|
||||
'working',
|
||||
);
|
||||
|
||||
// 3. A "state-change" event from the agent.
|
||||
expect(events[2].result.metadata?.['coderAgent']).toMatchObject({
|
||||
kind: 'state-change',
|
||||
});
|
||||
|
||||
// 4. Tool 1 is validating.
|
||||
const toolCallUpdate1 = events[3].result as TaskStatusUpdateEvent;
|
||||
expect(toolCallUpdate1.metadata?.['coderAgent']).toMatchObject({
|
||||
kind: 'tool-call-update',
|
||||
});
|
||||
expect(toolCallValidateEvent2.status.message?.parts).toMatchObject([
|
||||
expect(toolCallUpdate1.status.message?.parts).toMatchObject([
|
||||
{
|
||||
data: {
|
||||
request: { callId: 'test-call-id-1' },
|
||||
status: 'validating',
|
||||
request: { callId: 'test-call-id-2' },
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
// State Update: Set each tool call to awaiting
|
||||
const toolCallAwaitEvent1 = events[5].result as TaskStatusUpdateEvent;
|
||||
expect(toolCallAwaitEvent1.metadata?.['coderAgent']).toMatchObject({
|
||||
kind: 'tool-call-confirmation',
|
||||
// 5. Tool 2 is validating.
|
||||
const toolCallUpdate2 = events[4].result as TaskStatusUpdateEvent;
|
||||
expect(toolCallUpdate2.metadata?.['coderAgent']).toMatchObject({
|
||||
kind: 'tool-call-update',
|
||||
});
|
||||
expect(toolCallAwaitEvent1.status.message?.parts).toMatchObject([
|
||||
expect(toolCallUpdate2.status.message?.parts).toMatchObject([
|
||||
{
|
||||
data: {
|
||||
status: 'awaiting_approval',
|
||||
request: { callId: 'test-call-id-1' },
|
||||
},
|
||||
},
|
||||
]);
|
||||
const toolCallAwaitEvent2 = events[6].result as TaskStatusUpdateEvent;
|
||||
expect(toolCallAwaitEvent2.metadata?.['coderAgent']).toMatchObject({
|
||||
kind: 'tool-call-confirmation',
|
||||
});
|
||||
expect(toolCallAwaitEvent2.status.message?.parts).toMatchObject([
|
||||
{
|
||||
data: {
|
||||
status: 'awaiting_approval',
|
||||
request: { callId: 'test-call-id-2' },
|
||||
status: 'validating',
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
// 6. Tool 1 is awaiting approval.
|
||||
const toolCallAwaitEvent = events[5].result as TaskStatusUpdateEvent;
|
||||
expect(toolCallAwaitEvent.metadata?.['coderAgent']).toMatchObject({
|
||||
kind: 'tool-call-confirmation',
|
||||
});
|
||||
expect(toolCallAwaitEvent.status.message?.parts).toMatchObject([
|
||||
{
|
||||
data: {
|
||||
request: { callId: 'test-call-id-1' },
|
||||
status: 'awaiting_approval',
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
// 7. The final event is "input-required".
|
||||
const finalEvent = events[6].result as TaskStatusUpdateEvent;
|
||||
expect(finalEvent.final).toBe(true);
|
||||
expect(finalEvent.status.state).toBe('input-required');
|
||||
|
||||
// The scheduler now waits for approval, so no more events are sent.
|
||||
assertUniqueFinalEventIsLast(events);
|
||||
expect(events.length).toBe(7);
|
||||
});
|
||||
|
||||
it('should handle multiple tool calls sequentially in YOLO mode', async () => {
|
||||
// Set YOLO mode to auto-approve tools and test sequential execution.
|
||||
getApprovalModeSpy.mockReturnValue(ApprovalMode.YOLO);
|
||||
|
||||
// First call yields the tool request
|
||||
sendMessageStreamSpy.mockImplementationOnce(async function* () {
|
||||
yield* [
|
||||
{
|
||||
type: GeminiEventType.ToolCallRequest,
|
||||
value: {
|
||||
callId: 'test-call-id-1',
|
||||
name: 'test-tool-1',
|
||||
args: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: GeminiEventType.ToolCallRequest,
|
||||
value: {
|
||||
callId: 'test-call-id-2',
|
||||
name: 'test-tool-2',
|
||||
args: {},
|
||||
},
|
||||
},
|
||||
];
|
||||
});
|
||||
// Subsequent calls yield nothing, as the tools will "succeed".
|
||||
sendMessageStreamSpy.mockImplementation(async function* () {
|
||||
yield* [{ type: 'content', value: 'All tools executed.' }];
|
||||
});
|
||||
|
||||
const mockTool1 = new MockTool({
|
||||
name: 'test-tool-1',
|
||||
displayName: 'Test Tool 1',
|
||||
shouldConfirmExecute: vi.fn(mockToolConfirmationFn),
|
||||
execute: vi
|
||||
.fn()
|
||||
.mockResolvedValue({ llmContent: 'tool 1 done', returnDisplay: '' }),
|
||||
});
|
||||
const mockTool2 = new MockTool({
|
||||
name: 'test-tool-2',
|
||||
displayName: 'Test Tool 2',
|
||||
shouldConfirmExecute: vi.fn(mockToolConfirmationFn),
|
||||
execute: vi
|
||||
.fn()
|
||||
.mockResolvedValue({ llmContent: 'tool 2 done', returnDisplay: '' }),
|
||||
});
|
||||
|
||||
getToolRegistrySpy.mockReturnValue({
|
||||
getAllTools: vi.fn().mockReturnValue([mockTool1, mockTool2]),
|
||||
getToolsByServer: vi.fn().mockReturnValue([]),
|
||||
getTool: vi.fn().mockImplementation((name: string) => {
|
||||
if (name === 'test-tool-1') return mockTool1;
|
||||
if (name === 'test-tool-2') return mockTool2;
|
||||
return undefined;
|
||||
}),
|
||||
});
|
||||
|
||||
const agent = request.agent(app);
|
||||
const res = await agent
|
||||
.post('/')
|
||||
.send(
|
||||
createStreamMessageRequest(
|
||||
'run two tools',
|
||||
'a2a-multi-tool-test-message',
|
||||
),
|
||||
)
|
||||
.set('Content-Type', 'application/json')
|
||||
.expect(200);
|
||||
|
||||
const events = streamToSSEEvents(res.text);
|
||||
assertTaskCreationAndWorkingStatus(events);
|
||||
|
||||
// --- Assert the sequential execution flow ---
|
||||
const eventStream = events.slice(2).map((e) => {
|
||||
const update = e.result as TaskStatusUpdateEvent;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const agentData = update.metadata?.['coderAgent'] as any;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const toolData = update.status.message?.parts[0] as any;
|
||||
if (!toolData) {
|
||||
return { kind: agentData.kind };
|
||||
}
|
||||
return {
|
||||
kind: agentData.kind,
|
||||
status: toolData.data?.status,
|
||||
callId: toolData.data?.request.callId,
|
||||
};
|
||||
});
|
||||
|
||||
const expectedFlow = [
|
||||
// Initial state change
|
||||
{ kind: 'state-change', status: undefined, callId: undefined },
|
||||
// Tool 1 Lifecycle
|
||||
{
|
||||
kind: 'tool-call-update',
|
||||
status: 'validating',
|
||||
callId: 'test-call-id-1',
|
||||
},
|
||||
{
|
||||
kind: 'tool-call-update',
|
||||
status: 'scheduled',
|
||||
callId: 'test-call-id-1',
|
||||
},
|
||||
{
|
||||
kind: 'tool-call-update',
|
||||
status: 'executing',
|
||||
callId: 'test-call-id-1',
|
||||
},
|
||||
{
|
||||
kind: 'tool-call-update',
|
||||
status: 'success',
|
||||
callId: 'test-call-id-1',
|
||||
},
|
||||
// Tool 2 Lifecycle
|
||||
{
|
||||
kind: 'tool-call-update',
|
||||
status: 'validating',
|
||||
callId: 'test-call-id-2',
|
||||
},
|
||||
{
|
||||
kind: 'tool-call-update',
|
||||
status: 'scheduled',
|
||||
callId: 'test-call-id-2',
|
||||
},
|
||||
{
|
||||
kind: 'tool-call-update',
|
||||
status: 'executing',
|
||||
callId: 'test-call-id-2',
|
||||
},
|
||||
{
|
||||
kind: 'tool-call-update',
|
||||
status: 'success',
|
||||
callId: 'test-call-id-2',
|
||||
},
|
||||
// Final updates
|
||||
{ kind: 'state-change', status: undefined, callId: undefined },
|
||||
{ kind: 'text-content', status: undefined, callId: undefined },
|
||||
];
|
||||
|
||||
// Use `toContainEqual` for flexibility if other events are interspersed.
|
||||
expect(eventStream).toEqual(expect.arrayContaining(expectedFlow));
|
||||
|
||||
assertUniqueFinalEventIsLast(events);
|
||||
expect(events.length).toBe(8);
|
||||
});
|
||||
|
||||
it('should handle tool calls that do not require approval', async () => {
|
||||
|
||||
Reference in New Issue
Block a user