feat(core, cli): Implement sequential approval. (#11593)

This commit is contained in:
joshualitt
2025-10-27 09:59:08 -07:00
committed by GitHub
parent 23c906b085
commit 541eeb7a50
9 changed files with 1272 additions and 339 deletions
+195 -24
View File
@@ -313,7 +313,7 @@ describe('E2E Tests', () => {
expect(workingEvent.kind).toBe('status-update');
expect(workingEvent.status.state).toBe('working');
// State Update: Validate each tool call
// State Update: Validate the first tool call
const toolCallValidateEvent1 = events[3].result as TaskStatusUpdateEvent;
expect(toolCallValidateEvent1.metadata?.['coderAgent']).toMatchObject({
kind: 'tool-call-update',
@@ -326,47 +326,218 @@ describe('E2E Tests', () => {
},
},
]);
const toolCallValidateEvent2 = events[4].result as TaskStatusUpdateEvent;
expect(toolCallValidateEvent2.metadata?.['coderAgent']).toMatchObject({
// --- Assert the event stream ---
// 1. Initial "submitted" status.
expect((events[0].result as TaskStatusUpdateEvent).status.state).toBe(
'submitted',
);
// 2. "working" status after receiving the user prompt.
expect((events[1].result as TaskStatusUpdateEvent).status.state).toBe(
'working',
);
// 3. A "state-change" event from the agent.
expect(events[2].result.metadata?.['coderAgent']).toMatchObject({
kind: 'state-change',
});
// 4. Tool 1 is validating.
const toolCallUpdate1 = events[3].result as TaskStatusUpdateEvent;
expect(toolCallUpdate1.metadata?.['coderAgent']).toMatchObject({
kind: 'tool-call-update',
});
expect(toolCallValidateEvent2.status.message?.parts).toMatchObject([
expect(toolCallUpdate1.status.message?.parts).toMatchObject([
{
data: {
request: { callId: 'test-call-id-1' },
status: 'validating',
request: { callId: 'test-call-id-2' },
},
},
]);
// State Update: Set each tool call to awaiting
const toolCallAwaitEvent1 = events[5].result as TaskStatusUpdateEvent;
expect(toolCallAwaitEvent1.metadata?.['coderAgent']).toMatchObject({
kind: 'tool-call-confirmation',
// 5. Tool 2 is validating.
const toolCallUpdate2 = events[4].result as TaskStatusUpdateEvent;
expect(toolCallUpdate2.metadata?.['coderAgent']).toMatchObject({
kind: 'tool-call-update',
});
expect(toolCallAwaitEvent1.status.message?.parts).toMatchObject([
expect(toolCallUpdate2.status.message?.parts).toMatchObject([
{
data: {
status: 'awaiting_approval',
request: { callId: 'test-call-id-1' },
},
},
]);
const toolCallAwaitEvent2 = events[6].result as TaskStatusUpdateEvent;
expect(toolCallAwaitEvent2.metadata?.['coderAgent']).toMatchObject({
kind: 'tool-call-confirmation',
});
expect(toolCallAwaitEvent2.status.message?.parts).toMatchObject([
{
data: {
status: 'awaiting_approval',
request: { callId: 'test-call-id-2' },
status: 'validating',
},
},
]);
// 6. Tool 1 is awaiting approval.
const toolCallAwaitEvent = events[5].result as TaskStatusUpdateEvent;
expect(toolCallAwaitEvent.metadata?.['coderAgent']).toMatchObject({
kind: 'tool-call-confirmation',
});
expect(toolCallAwaitEvent.status.message?.parts).toMatchObject([
{
data: {
request: { callId: 'test-call-id-1' },
status: 'awaiting_approval',
},
},
]);
// 7. The final event is "input-required".
const finalEvent = events[6].result as TaskStatusUpdateEvent;
expect(finalEvent.final).toBe(true);
expect(finalEvent.status.state).toBe('input-required');
// The scheduler now waits for approval, so no more events are sent.
assertUniqueFinalEventIsLast(events);
expect(events.length).toBe(7);
});
it('should handle multiple tool calls sequentially in YOLO mode', async () => {
// Set YOLO mode to auto-approve tools and test sequential execution.
getApprovalModeSpy.mockReturnValue(ApprovalMode.YOLO);
// First call yields the tool request
sendMessageStreamSpy.mockImplementationOnce(async function* () {
yield* [
{
type: GeminiEventType.ToolCallRequest,
value: {
callId: 'test-call-id-1',
name: 'test-tool-1',
args: {},
},
},
{
type: GeminiEventType.ToolCallRequest,
value: {
callId: 'test-call-id-2',
name: 'test-tool-2',
args: {},
},
},
];
});
// Subsequent calls yield nothing, as the tools will "succeed".
sendMessageStreamSpy.mockImplementation(async function* () {
yield* [{ type: 'content', value: 'All tools executed.' }];
});
const mockTool1 = new MockTool({
name: 'test-tool-1',
displayName: 'Test Tool 1',
shouldConfirmExecute: vi.fn(mockToolConfirmationFn),
execute: vi
.fn()
.mockResolvedValue({ llmContent: 'tool 1 done', returnDisplay: '' }),
});
const mockTool2 = new MockTool({
name: 'test-tool-2',
displayName: 'Test Tool 2',
shouldConfirmExecute: vi.fn(mockToolConfirmationFn),
execute: vi
.fn()
.mockResolvedValue({ llmContent: 'tool 2 done', returnDisplay: '' }),
});
getToolRegistrySpy.mockReturnValue({
getAllTools: vi.fn().mockReturnValue([mockTool1, mockTool2]),
getToolsByServer: vi.fn().mockReturnValue([]),
getTool: vi.fn().mockImplementation((name: string) => {
if (name === 'test-tool-1') return mockTool1;
if (name === 'test-tool-2') return mockTool2;
return undefined;
}),
});
const agent = request.agent(app);
const res = await agent
.post('/')
.send(
createStreamMessageRequest(
'run two tools',
'a2a-multi-tool-test-message',
),
)
.set('Content-Type', 'application/json')
.expect(200);
const events = streamToSSEEvents(res.text);
assertTaskCreationAndWorkingStatus(events);
// --- Assert the sequential execution flow ---
const eventStream = events.slice(2).map((e) => {
const update = e.result as TaskStatusUpdateEvent;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const agentData = update.metadata?.['coderAgent'] as any;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const toolData = update.status.message?.parts[0] as any;
if (!toolData) {
return { kind: agentData.kind };
}
return {
kind: agentData.kind,
status: toolData.data?.status,
callId: toolData.data?.request.callId,
};
});
const expectedFlow = [
// Initial state change
{ kind: 'state-change', status: undefined, callId: undefined },
// Tool 1 Lifecycle
{
kind: 'tool-call-update',
status: 'validating',
callId: 'test-call-id-1',
},
{
kind: 'tool-call-update',
status: 'scheduled',
callId: 'test-call-id-1',
},
{
kind: 'tool-call-update',
status: 'executing',
callId: 'test-call-id-1',
},
{
kind: 'tool-call-update',
status: 'success',
callId: 'test-call-id-1',
},
// Tool 2 Lifecycle
{
kind: 'tool-call-update',
status: 'validating',
callId: 'test-call-id-2',
},
{
kind: 'tool-call-update',
status: 'scheduled',
callId: 'test-call-id-2',
},
{
kind: 'tool-call-update',
status: 'executing',
callId: 'test-call-id-2',
},
{
kind: 'tool-call-update',
status: 'success',
callId: 'test-call-id-2',
},
// Final updates
{ kind: 'state-change', status: undefined, callId: undefined },
{ kind: 'text-content', status: undefined, callId: undefined },
];
// Use `toContainEqual` for flexibility if other events are interspersed.
expect(eventStream).toEqual(expect.arrayContaining(expectedFlow));
assertUniqueFinalEventIsLast(events);
expect(events.length).toBe(8);
});
it('should handle tool calls that do not require approval', async () => {