feat(core, cli): Implement sequential approval. (#11593)

This commit is contained in:
joshualitt
2025-10-27 09:59:08 -07:00
committed by GitHub
parent 23c906b085
commit 541eeb7a50
9 changed files with 1272 additions and 339 deletions
+120 -1
View File
@@ -4,11 +4,12 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi } from 'vitest';
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { Task } from './task.js';
import type { Config, ToolCallRequestInfo } from '@google/gemini-cli-core';
import { createMockConfig } from '../utils/testing_utils.js';
import type { ExecutionEventBus } from '@a2a-js/sdk/server';
import type { ToolCall } from '@google/gemini-cli-core';
describe('Task', () => {
it('scheduleToolCalls should not modify the input requests array', async () => {
@@ -94,4 +95,122 @@ describe('Task', () => {
);
});
});
describe('_schedulerToolCallsUpdate', () => {
let task: Task;
type SpyInstance = ReturnType<typeof vi.spyOn>;
let setTaskStateAndPublishUpdateSpy: SpyInstance;
beforeEach(() => {
const mockConfig = createMockConfig();
const mockEventBus: ExecutionEventBus = {
publish: vi.fn(),
on: vi.fn(),
off: vi.fn(),
once: vi.fn(),
removeAllListeners: vi.fn(),
finished: vi.fn(),
};
// @ts-expect-error - Calling private constructor
task = new Task(
'task-id',
'context-id',
mockConfig as Config,
mockEventBus,
);
// Spy on the method we want to check calls for
setTaskStateAndPublishUpdateSpy = vi.spyOn(
task,
'setTaskStateAndPublishUpdate',
);
});
afterEach(() => {
vi.restoreAllMocks();
});
it('should set state to input-required when a tool is awaiting approval and none are executing', () => {
const toolCalls = [
{ request: { callId: '1' }, status: 'awaiting_approval' },
] as ToolCall[];
// @ts-expect-error - Calling private method
task._schedulerToolCallsUpdate(toolCalls);
// The last call should be the final state update
expect(setTaskStateAndPublishUpdateSpy).toHaveBeenLastCalledWith(
'input-required',
{ kind: 'state-change' },
undefined,
undefined,
true, // final: true
);
});
it('should NOT set state to input-required if a tool is awaiting approval but another is executing', () => {
const toolCalls = [
{ request: { callId: '1' }, status: 'awaiting_approval' },
{ request: { callId: '2' }, status: 'executing' },
] as ToolCall[];
// @ts-expect-error - Calling private method
task._schedulerToolCallsUpdate(toolCalls);
// It will be called for status updates, but not with final: true
const finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
(call) => call[4] === true,
);
expect(finalCall).toBeUndefined();
});
it('should set state to input-required once an executing tool finishes, leaving one awaiting approval', () => {
const initialToolCalls = [
{ request: { callId: '1' }, status: 'awaiting_approval' },
{ request: { callId: '2' }, status: 'executing' },
] as ToolCall[];
// @ts-expect-error - Calling private method
task._schedulerToolCallsUpdate(initialToolCalls);
// No final call yet
let finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
(call) => call[4] === true,
);
expect(finalCall).toBeUndefined();
// Now, the executing tool finishes. The scheduler would call _resolveToolCall for it.
// @ts-expect-error - Calling private method
task._resolveToolCall('2');
// Then another update comes in for the awaiting tool (e.g., a re-check)
const subsequentToolCalls = [
{ request: { callId: '1' }, status: 'awaiting_approval' },
] as ToolCall[];
// @ts-expect-error - Calling private method
task._schedulerToolCallsUpdate(subsequentToolCalls);
// NOW we should get the final call
finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
(call) => call[4] === true,
);
expect(finalCall).toBeDefined();
expect(finalCall?.[0]).toBe('input-required');
});
it('should NOT set state to input-required if skipFinalTrueAfterInlineEdit is true', () => {
task.skipFinalTrueAfterInlineEdit = true;
const toolCalls = [
{ request: { callId: '1' }, status: 'awaiting_approval' },
] as ToolCall[];
// @ts-expect-error - Calling private method
task._schedulerToolCallsUpdate(toolCalls);
const finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
(call) => call[4] === true,
);
expect(finalCall).toBeUndefined();
});
});
});
+7 -12
View File
@@ -40,7 +40,6 @@ import type {
import { v4 as uuidv4 } from 'uuid';
import { logger } from '../utils/logger.js';
import * as fs from 'node:fs';
import { CoderAgentEvent } from '../types.js';
import type {
CoderAgentMessage,
@@ -373,11 +372,11 @@ export class Task {
// Only send an update if the status has actually changed.
if (hasChanged) {
const message = this.toolStatusMessage(tc, this.id, this.contextId);
const coderAgentMessage: CoderAgentMessage =
tc.status === 'awaiting_approval'
? { kind: CoderAgentEvent.ToolCallConfirmationEvent }
: { kind: CoderAgentEvent.ToolCallUpdateEvent };
const message = this.toolStatusMessage(tc, this.id, this.contextId);
const event = this._createStatusUpdateEvent(
this.taskState,
@@ -404,20 +403,16 @@ export class Task {
const isAwaitingApproval = allPendingStatuses.some(
(status) => status === 'awaiting_approval',
);
const allPendingAreStable = allPendingStatuses.every(
(status) =>
status === 'awaiting_approval' ||
status === 'success' ||
status === 'error' ||
status === 'cancelled',
const isExecuting = allPendingStatuses.some(
(status) => status === 'executing',
);
// 1. Are any pending tool calls awaiting_approval
// 2. Are all pending tool calls in a stable state (i.e. not in validing or executing)
// 3. After an inline edit, the edited tool call will send awaiting_approval THEN scheduled. We wait for the next update in this case.
// The turn is complete and requires user input if at least one tool
// is waiting for the user's decision, and no other tool is actively
// running in the background.
if (
isAwaitingApproval &&
allPendingAreStable &&
!isExecuting &&
!this.skipFinalTrueAfterInlineEdit
) {
this.skipFinalTrueAfterInlineEdit = false;