feat(core, cli): Implement sequential approval. (#11593)

2026-04-23 19:44:30 -07:00 · 2025-10-27 09:59:08 -07:00
parent 23c906b085
commit 541eeb7a50
9 changed files with 1272 additions and 339 deletions
@@ -4,11 +4,12 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { describe, it, expect, vi } from 'vitest';
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { Task } from './task.js';
 import type { Config, ToolCallRequestInfo } from '@google/gemini-cli-core';
 import { createMockConfig } from '../utils/testing_utils.js';
 import type { ExecutionEventBus } from '@a2a-js/sdk/server';
+import type { ToolCall } from '@google/gemini-cli-core';

 describe('Task', () => {
  it('scheduleToolCalls should not modify the input requests array', async () => {
@@ -94,4 +95,122 @@ describe('Task', () => {
      );
    });
  });
+
+  describe('_schedulerToolCallsUpdate', () => {
+    let task: Task;
+    type SpyInstance = ReturnType<typeof vi.spyOn>;
+    let setTaskStateAndPublishUpdateSpy: SpyInstance;
+
+    beforeEach(() => {
+      const mockConfig = createMockConfig();
+      const mockEventBus: ExecutionEventBus = {
+        publish: vi.fn(),
+        on: vi.fn(),
+        off: vi.fn(),
+        once: vi.fn(),
+        removeAllListeners: vi.fn(),
+        finished: vi.fn(),
+      };
+
+      // @ts-expect-error - Calling private constructor
+      task = new Task(
+        'task-id',
+        'context-id',
+        mockConfig as Config,
+        mockEventBus,
+      );
+
+      // Spy on the method we want to check calls for
+      setTaskStateAndPublishUpdateSpy = vi.spyOn(
+        task,
+        'setTaskStateAndPublishUpdate',
+      );
+    });
+
+    afterEach(() => {
+      vi.restoreAllMocks();
+    });
+
+    it('should set state to input-required when a tool is awaiting approval and none are executing', () => {
+      const toolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+      ] as ToolCall[];
+
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(toolCalls);
+
+      // The last call should be the final state update
+      expect(setTaskStateAndPublishUpdateSpy).toHaveBeenLastCalledWith(
+        'input-required',
+        { kind: 'state-change' },
+        undefined,
+        undefined,
+        true, // final: true
+      );
+    });
+
+    it('should NOT set state to input-required if a tool is awaiting approval but another is executing', () => {
+      const toolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+        { request: { callId: '2' }, status: 'executing' },
+      ] as ToolCall[];
+
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(toolCalls);
+
+      // It will be called for status updates, but not with final: true
+      const finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
+        (call) => call[4] === true,
+      );
+      expect(finalCall).toBeUndefined();
+    });
+
+    it('should set state to input-required once an executing tool finishes, leaving one awaiting approval', () => {
+      const initialToolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+        { request: { callId: '2' }, status: 'executing' },
+      ] as ToolCall[];
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(initialToolCalls);
+
+      // No final call yet
+      let finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
+        (call) => call[4] === true,
+      );
+      expect(finalCall).toBeUndefined();
+
+      // Now, the executing tool finishes. The scheduler would call _resolveToolCall for it.
+      // @ts-expect-error - Calling private method
+      task._resolveToolCall('2');
+
+      // Then another update comes in for the awaiting tool (e.g., a re-check)
+      const subsequentToolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+      ] as ToolCall[];
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(subsequentToolCalls);
+
+      // NOW we should get the final call
+      finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
+        (call) => call[4] === true,
+      );
+      expect(finalCall).toBeDefined();
+      expect(finalCall?.[0]).toBe('input-required');
+    });
+
+    it('should NOT set state to input-required if skipFinalTrueAfterInlineEdit is true', () => {
+      task.skipFinalTrueAfterInlineEdit = true;
+      const toolCalls = [
+        { request: { callId: '1' }, status: 'awaiting_approval' },
+      ] as ToolCall[];
+
+      // @ts-expect-error - Calling private method
+      task._schedulerToolCallsUpdate(toolCalls);
+
+      const finalCall = setTaskStateAndPublishUpdateSpy.mock.calls.find(
+        (call) => call[4] === true,
+      );
+      expect(finalCall).toBeUndefined();
+    });
+  });
 });
@@ -40,7 +40,6 @@ import type {
 import { v4 as uuidv4 } from 'uuid';
 import { logger } from '../utils/logger.js';
 import * as fs from 'node:fs';
-
 import { CoderAgentEvent } from '../types.js';
 import type {
  CoderAgentMessage,
@@ -373,11 +372,11 @@ export class Task {

      // Only send an update if the status has actually changed.
      if (hasChanged) {
-        const message = this.toolStatusMessage(tc, this.id, this.contextId);
        const coderAgentMessage: CoderAgentMessage =
          tc.status === 'awaiting_approval'
            ? { kind: CoderAgentEvent.ToolCallConfirmationEvent }
            : { kind: CoderAgentEvent.ToolCallUpdateEvent };
+        const message = this.toolStatusMessage(tc, this.id, this.contextId);

        const event = this._createStatusUpdateEvent(
          this.taskState,
@@ -404,20 +403,16 @@ export class Task {
    const isAwaitingApproval = allPendingStatuses.some(
      (status) => status === 'awaiting_approval',
    );
-    const allPendingAreStable = allPendingStatuses.every(
-      (status) =>
-        status === 'awaiting_approval' ||
-        status === 'success' ||
-        status === 'error' ||
-        status === 'cancelled',
+    const isExecuting = allPendingStatuses.some(
+      (status) => status === 'executing',
    );

-    // 1. Are any pending tool calls awaiting_approval
-    // 2. Are all pending tool calls in a stable state (i.e. not in validing or executing)
-    // 3. After an inline edit, the edited tool call will send awaiting_approval THEN scheduled. We wait for the next update in this case.
+    // The turn is complete and requires user input if at least one tool
+    // is waiting for the user's decision, and no other tool is actively
+    // running in the background.
    if (
      isAwaitingApproval &&
-      allPendingAreStable &&
+      !isExecuting &&
      !this.skipFinalTrueAfterInlineEdit
    ) {
      this.skipFinalTrueAfterInlineEdit = false;
@@ -313,7 +313,7 @@ describe('E2E Tests', () => {
    expect(workingEvent.kind).toBe('status-update');
    expect(workingEvent.status.state).toBe('working');

-    // State Update: Validate each tool call
+    // State Update: Validate the first tool call
    const toolCallValidateEvent1 = events[3].result as TaskStatusUpdateEvent;
    expect(toolCallValidateEvent1.metadata?.['coderAgent']).toMatchObject({
      kind: 'tool-call-update',
@@ -326,47 +326,218 @@ describe('E2E Tests', () => {
        },
      },
    ]);
-    const toolCallValidateEvent2 = events[4].result as TaskStatusUpdateEvent;
-    expect(toolCallValidateEvent2.metadata?.['coderAgent']).toMatchObject({
+
+    // --- Assert the event stream ---
+    // 1. Initial "submitted" status.
+    expect((events[0].result as TaskStatusUpdateEvent).status.state).toBe(
+      'submitted',
+    );
+
+    // 2. "working" status after receiving the user prompt.
+    expect((events[1].result as TaskStatusUpdateEvent).status.state).toBe(
+      'working',
+    );
+
+    // 3. A "state-change" event from the agent.
+    expect(events[2].result.metadata?.['coderAgent']).toMatchObject({
+      kind: 'state-change',
+    });
+
+    // 4. Tool 1 is validating.
+    const toolCallUpdate1 = events[3].result as TaskStatusUpdateEvent;
+    expect(toolCallUpdate1.metadata?.['coderAgent']).toMatchObject({
      kind: 'tool-call-update',
    });
-    expect(toolCallValidateEvent2.status.message?.parts).toMatchObject([
+    expect(toolCallUpdate1.status.message?.parts).toMatchObject([
      {
        data: {
+          request: { callId: 'test-call-id-1' },
          status: 'validating',
-          request: { callId: 'test-call-id-2' },
        },
      },
    ]);

-    // State Update: Set each tool call to awaiting
-    const toolCallAwaitEvent1 = events[5].result as TaskStatusUpdateEvent;
-    expect(toolCallAwaitEvent1.metadata?.['coderAgent']).toMatchObject({
-      kind: 'tool-call-confirmation',
+    // 5. Tool 2 is validating.
+    const toolCallUpdate2 = events[4].result as TaskStatusUpdateEvent;
+    expect(toolCallUpdate2.metadata?.['coderAgent']).toMatchObject({
+      kind: 'tool-call-update',
    });
-    expect(toolCallAwaitEvent1.status.message?.parts).toMatchObject([
+    expect(toolCallUpdate2.status.message?.parts).toMatchObject([
      {
        data: {
-          status: 'awaiting_approval',
-          request: { callId: 'test-call-id-1' },
-        },
-      },
-    ]);
-    const toolCallAwaitEvent2 = events[6].result as TaskStatusUpdateEvent;
-    expect(toolCallAwaitEvent2.metadata?.['coderAgent']).toMatchObject({
-      kind: 'tool-call-confirmation',
-    });
-    expect(toolCallAwaitEvent2.status.message?.parts).toMatchObject([
-      {
-        data: {
-          status: 'awaiting_approval',
          request: { callId: 'test-call-id-2' },
+          status: 'validating',
        },
      },
    ]);

+    // 6. Tool 1 is awaiting approval.
+    const toolCallAwaitEvent = events[5].result as TaskStatusUpdateEvent;
+    expect(toolCallAwaitEvent.metadata?.['coderAgent']).toMatchObject({
+      kind: 'tool-call-confirmation',
+    });
+    expect(toolCallAwaitEvent.status.message?.parts).toMatchObject([
+      {
+        data: {
+          request: { callId: 'test-call-id-1' },
+          status: 'awaiting_approval',
+        },
+      },
+    ]);
+
+    // 7. The final event is "input-required".
+    const finalEvent = events[6].result as TaskStatusUpdateEvent;
+    expect(finalEvent.final).toBe(true);
+    expect(finalEvent.status.state).toBe('input-required');
+
+    // The scheduler now waits for approval, so no more events are sent.
+    assertUniqueFinalEventIsLast(events);
+    expect(events.length).toBe(7);
+  });
+
+  it('should handle multiple tool calls sequentially in YOLO mode', async () => {
+    // Set YOLO mode to auto-approve tools and test sequential execution.
+    getApprovalModeSpy.mockReturnValue(ApprovalMode.YOLO);
+
+    // First call yields the tool request
+    sendMessageStreamSpy.mockImplementationOnce(async function* () {
+      yield* [
+        {
+          type: GeminiEventType.ToolCallRequest,
+          value: {
+            callId: 'test-call-id-1',
+            name: 'test-tool-1',
+            args: {},
+          },
+        },
+        {
+          type: GeminiEventType.ToolCallRequest,
+          value: {
+            callId: 'test-call-id-2',
+            name: 'test-tool-2',
+            args: {},
+          },
+        },
+      ];
+    });
+    // Subsequent calls yield nothing, as the tools will "succeed".
+    sendMessageStreamSpy.mockImplementation(async function* () {
+      yield* [{ type: 'content', value: 'All tools executed.' }];
+    });
+
+    const mockTool1 = new MockTool({
+      name: 'test-tool-1',
+      displayName: 'Test Tool 1',
+      shouldConfirmExecute: vi.fn(mockToolConfirmationFn),
+      execute: vi
+        .fn()
+        .mockResolvedValue({ llmContent: 'tool 1 done', returnDisplay: '' }),
+    });
+    const mockTool2 = new MockTool({
+      name: 'test-tool-2',
+      displayName: 'Test Tool 2',
+      shouldConfirmExecute: vi.fn(mockToolConfirmationFn),
+      execute: vi
+        .fn()
+        .mockResolvedValue({ llmContent: 'tool 2 done', returnDisplay: '' }),
+    });
+
+    getToolRegistrySpy.mockReturnValue({
+      getAllTools: vi.fn().mockReturnValue([mockTool1, mockTool2]),
+      getToolsByServer: vi.fn().mockReturnValue([]),
+      getTool: vi.fn().mockImplementation((name: string) => {
+        if (name === 'test-tool-1') return mockTool1;
+        if (name === 'test-tool-2') return mockTool2;
+        return undefined;
+      }),
+    });
+
+    const agent = request.agent(app);
+    const res = await agent
+      .post('/')
+      .send(
+        createStreamMessageRequest(
+          'run two tools',
+          'a2a-multi-tool-test-message',
+        ),
+      )
+      .set('Content-Type', 'application/json')
+      .expect(200);
+
+    const events = streamToSSEEvents(res.text);
+    assertTaskCreationAndWorkingStatus(events);
+
+    // --- Assert the sequential execution flow ---
+    const eventStream = events.slice(2).map((e) => {
+      const update = e.result as TaskStatusUpdateEvent;
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const agentData = update.metadata?.['coderAgent'] as any;
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const toolData = update.status.message?.parts[0] as any;
+      if (!toolData) {
+        return { kind: agentData.kind };
+      }
+      return {
+        kind: agentData.kind,
+        status: toolData.data?.status,
+        callId: toolData.data?.request.callId,
+      };
+    });
+
+    const expectedFlow = [
+      // Initial state change
+      { kind: 'state-change', status: undefined, callId: undefined },
+      // Tool 1 Lifecycle
+      {
+        kind: 'tool-call-update',
+        status: 'validating',
+        callId: 'test-call-id-1',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'scheduled',
+        callId: 'test-call-id-1',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'executing',
+        callId: 'test-call-id-1',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'success',
+        callId: 'test-call-id-1',
+      },
+      // Tool 2 Lifecycle
+      {
+        kind: 'tool-call-update',
+        status: 'validating',
+        callId: 'test-call-id-2',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'scheduled',
+        callId: 'test-call-id-2',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'executing',
+        callId: 'test-call-id-2',
+      },
+      {
+        kind: 'tool-call-update',
+        status: 'success',
+        callId: 'test-call-id-2',
+      },
+      // Final updates
+      { kind: 'state-change', status: undefined, callId: undefined },
+      { kind: 'text-content', status: undefined, callId: undefined },
+    ];
+
+    // Use `toContainEqual` for flexibility if other events are interspersed.
+    expect(eventStream).toEqual(expect.arrayContaining(expectedFlow));
+
    assertUniqueFinalEventIsLast(events);
-    expect(events.length).toBe(8);
  });

  it('should handle tool calls that do not require approval', async () => {