From 9054f828c405cb508ecdc6f01c2b2388a5f9e3cb Mon Sep 17 00:00:00 2001 From: Abhi <43648792+abhipatel12@users.noreply.github.com> Date: Wed, 1 Apr 2026 15:53:46 -0400 Subject: [PATCH] fix(core): ensure complete_task tool calls are recorded in chat history (#24437) --- .../core/src/agents/local-executor.test.ts | 498 +++++++++++++----- packages/core/src/agents/local-executor.ts | 281 ++++------ .../core/src/policy/policies/read-only.toml | 6 + packages/core/src/tools/complete-task.test.ts | 160 ++++++ packages/core/src/tools/complete-task.ts | 179 +++++++ .../tools/definitions/base-declarations.ts | 4 + .../core/src/tools/definitions/coreTools.ts | 2 + packages/core/src/tools/tool-names.ts | 5 + 8 files changed, 819 insertions(+), 316 deletions(-) create mode 100644 packages/core/src/tools/complete-task.test.ts create mode 100644 packages/core/src/tools/complete-task.ts diff --git a/packages/core/src/agents/local-executor.test.ts b/packages/core/src/agents/local-executor.test.ts index 84e552e30c..2ecd305a04 100644 --- a/packages/core/src/agents/local-executor.test.ts +++ b/packages/core/src/agents/local-executor.test.ts @@ -56,7 +56,11 @@ import { PromptRegistry } from '../prompts/prompt-registry.js'; import { ResourceRegistry } from '../resources/resource-registry.js'; import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; import { LSTool } from '../tools/ls.js'; -import { LS_TOOL_NAME, READ_FILE_TOOL_NAME } from '../tools/tool-names.js'; +import { + COMPLETE_TASK_TOOL_NAME, + LS_TOOL_NAME, + READ_FILE_TOOL_NAME, +} from '../tools/tool-names.js'; import { GeminiChat, StreamEventType, @@ -202,9 +206,37 @@ const mockedLogAgentFinish = vi.mocked(logAgentFinish); const mockedLogRecoveryAttempt = vi.mocked(logRecoveryAttempt); // Constants for testing -const TASK_COMPLETE_TOOL_NAME = 'complete_task'; const MOCK_TOOL_NOT_ALLOWED = new MockTool({ name: 'write_file_interactive' }); +/** + * Helper to mock a successful completion result from the scheduler. + */ +const mockCompletionResult = ( + callId: string, + submittedOutput: string, + toolName = COMPLETE_TASK_TOOL_NAME, +) => { + mockScheduleAgentTools.mockResolvedValueOnce([ + { + status: 'success', + request: { + callId, + name: toolName, + args: {}, + prompt_id: 'test-prompt', + }, + response: { + resultDisplay: 'Task completed.', + responseParts: [], + data: { + taskCompleted: true, + submittedOutput, + }, + }, + }, + ]); +}; + /** * Helper to create a mock API response chunk. * Uses conditional spread to handle readonly functionCalls property safely. @@ -320,9 +352,48 @@ describe('LocalAgentExecutor', () => { vi.resetAllMocks(); mockCompress.mockClear(); mockSetHistory.mockClear(); - mockSendMessageStream.mockReset(); + mockSendMessageStream.mockReset().mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }; + }, + }); mockSetSystemInstruction.mockReset(); - mockScheduleAgentTools.mockReset(); + mockScheduleAgentTools + .mockReset() + .mockImplementation(async (_config, requests) => + // Default mock behavior for scheduleAgentTools + requests.map((req: ToolCallRequestInfo) => { + if (req.name === COMPLETE_TASK_TOOL_NAME) { + return { + status: 'success', + request: req, + response: { + resultDisplay: 'Task completed.', + responseParts: [], + data: { + taskCompleted: true, + submittedOutput: + req.args['finalResult'] || + req.args['result'] || + JSON.stringify(req.args), + }, + }, + }; + } + return { + status: 'success', + request: req, + response: { + resultDisplay: 'Mock tool executed', + responseParts: [], + data: {}, + }, + }; + }), + ); mockedLogAgentStart.mockReset(); mockedLogAgentFinish.mockReset(); mockedPromptIdContext.getStore.mockReset(); @@ -413,7 +484,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, @@ -488,7 +559,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, @@ -534,7 +605,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, @@ -584,9 +655,13 @@ describe('LocalAgentExecutor', () => { expect(agentRegistry).not.toBe(parentToolRegistry); expect(agentRegistry.getAllToolNames()).toEqual( - expect.arrayContaining([LS_TOOL_NAME, READ_FILE_TOOL_NAME]), + expect.arrayContaining([ + LS_TOOL_NAME, + READ_FILE_TOOL_NAME, + COMPLETE_TASK_TOOL_NAME, + ]), ); - expect(agentRegistry.getAllToolNames()).toHaveLength(2); + expect(agentRegistry.getAllToolNames()).toHaveLength(3); expect(agentRegistry.getTool(MOCK_TOOL_NOT_ALLOWED.name)).toBeUndefined(); }); @@ -621,7 +696,7 @@ describe('LocalAgentExecutor', () => { // Mock a response to prevent the loop from running forever mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, @@ -867,7 +942,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call1', resultDisplay: 'file1.txt', @@ -891,21 +966,49 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Found file1.txt' }, id: 'call2', }, ], 'T2: Done', ); + mockScheduleAgentTools.mockResolvedValueOnce([ + { + status: 'success', + request: { + callId: 'call2', + name: COMPLETE_TASK_TOOL_NAME, + args: { finalResult: 'Found file1.txt' }, + prompt_id: 'p1', + }, + response: { + resultDisplay: 'Output submitted and task completed.', + responseParts: [ + { + functionResponse: { + name: COMPLETE_TASK_TOOL_NAME, + id: 'call2', + response: { result: 'Output submitted and task completed.' }, + }, + }, + ], + data: { + taskCompleted: true, + submittedOutput: 'Found file1.txt', + }, + }, + }, + ]); const output = await executor.run(inputs, signal); expect(mockSendMessageStream).toHaveBeenCalledTimes(2); + expect(mockScheduleAgentTools).toHaveBeenCalledTimes(2); const systemInstruction = MockedGeminiChat.mock.calls[0][1]; expect(systemInstruction).toContain( - `MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool`, + `MUST call the \`${COMPLETE_TASK_TOOL_NAME}\` tool`, ); expect(systemInstruction).toContain('Mocked Environment Context'); expect(systemInstruction).toContain( @@ -925,14 +1028,17 @@ describe('LocalAgentExecutor', () => { expect(sentTools).toEqual( expect.arrayContaining([ expect.objectContaining({ name: LS_TOOL_NAME }), - expect.objectContaining({ name: TASK_COMPLETE_TOOL_NAME }), + expect.objectContaining({ name: COMPLETE_TASK_TOOL_NAME }), ]), ); const completeToolDef = sentTools!.find( - (t) => t.name === TASK_COMPLETE_TOOL_NAME, + (t) => t.name === COMPLETE_TASK_TOOL_NAME, ); - expect(completeToolDef?.parameters?.required).toContain('finalResult'); + const completeSchema = completeToolDef?.parametersJsonSchema as + | Record + | undefined; + expect(completeSchema?.['required']).toContain('finalResult'); expect(output.result).toBe('Found file1.txt'); expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL); @@ -955,8 +1061,9 @@ describe('LocalAgentExecutor', () => { expect(mockedPromptIdContext.run).toHaveBeenCalledTimes(2); // Two turns // Recording checks - expect(mockRecordCompletedToolCalls).toHaveBeenCalledTimes(1); - expect(mockRecordCompletedToolCalls).toHaveBeenCalledWith( + expect(mockRecordCompletedToolCalls).toHaveBeenCalledTimes(2); + expect(mockRecordCompletedToolCalls).toHaveBeenNthCalledWith( + 1, expect.any(String), // model expect.arrayContaining([ expect.objectContaining({ @@ -995,14 +1102,14 @@ describe('LocalAgentExecutor', () => { expect.objectContaining({ type: 'TOOL_CALL_START', data: expect.objectContaining({ - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Found file1.txt' }, }), }), expect.objectContaining({ type: 'TOOL_CALL_END', data: expect.objectContaining({ - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, output: expect.stringContaining('Output submitted'), }), }), @@ -1032,7 +1139,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call1', resultDisplay: 'ok', @@ -1055,13 +1162,14 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { result: 'All work done' }, id: 'call2', }, ], 'Task finished.', ); + mockCompletionResult('call2', 'All work done'); const output = await executor.run({ goal: 'Do work' }, signal); @@ -1074,15 +1182,19 @@ describe('LocalAgentExecutor', () => { expect(sentTools).toBeDefined(); const completeToolDef = sentTools!.find( - (t) => t.name === TASK_COMPLETE_TOOL_NAME, + (t) => t.name === COMPLETE_TASK_TOOL_NAME, ); - expect(completeToolDef?.parameters?.required).toEqual(['result']); + const schema = completeToolDef?.parametersJsonSchema as + | Record + | undefined; + expect(schema?.['required']).toContain('result'); expect(completeToolDef?.description).toContain( 'submit your final findings', ); expect(output.result).toBe('All work done'); expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL); + expect(mockScheduleAgentTools).toHaveBeenCalledTimes(2); }); it('should error immediately if the model stops tools without calling complete_task (Protocol Violation)', async () => { @@ -1107,7 +1219,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call1', resultDisplay: 'ok', @@ -1137,7 +1249,7 @@ describe('LocalAgentExecutor', () => { expect(mockSendMessageStream).toHaveBeenCalledTimes(3); - const expectedError = `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}'.`; + const expectedError = `Agent stopped calling tools but did not call '${COMPLETE_TASK_TOOL_NAME}'.`; expect(output.terminate_reason).toBe( AgentTerminateMode.ERROR_NO_COMPLETE_TASK_CALL, @@ -1175,24 +1287,58 @@ describe('LocalAgentExecutor', () => { // Turn 1: Missing arg mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { wrongArg: 'oops' }, id: 'call1', }, ]); + // Mock failure in scheduler for Turn 1 + mockScheduleAgentTools.mockResolvedValueOnce([ + { + status: 'error', + request: { + callId: 'call1', + name: COMPLETE_TASK_TOOL_NAME, + args: { wrongArg: 'oops' }, + prompt_id: 'p1', + }, + response: { + resultDisplay: 'Error', + responseParts: [ + { + functionResponse: { + name: COMPLETE_TASK_TOOL_NAME, + id: 'call1', + response: { + error: + "Missing required argument 'finalResult' for completion.", + }, + }, + }, + ], + error: { + message: + "Missing required argument 'finalResult' for completion.", + type: 'INVALID_TOOL_PARAMS' as unknown as SubagentActivityErrorType, + }, + }, + }, + ]); // Turn 2: Corrected mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Corrected result' }, id: 'call2', }, ]); + mockCompletionResult('call2', 'Corrected result'); const output = await executor.run({ goal: 'Error test' }, signal); expect(mockSendMessageStream).toHaveBeenCalledTimes(2); + expect(mockScheduleAgentTools).toHaveBeenCalledTimes(2); const expectedError = "Missing required argument 'finalResult' for completion."; @@ -1202,7 +1348,7 @@ describe('LocalAgentExecutor', () => { type: 'ERROR', data: expect.objectContaining({ context: 'tool_call', - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, error: expectedError, errorType: SubagentActivityErrorType.GENERIC, }), @@ -1217,7 +1363,7 @@ describe('LocalAgentExecutor', () => { expect((turn2Parts as Part[])[0]).toEqual( expect.objectContaining({ functionResponse: expect.objectContaining({ - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, response: { error: expectedError }, id: 'call1', }), @@ -1228,7 +1374,7 @@ describe('LocalAgentExecutor', () => { expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL); }); - it('should handle multiple calls to complete_task in the same turn (accept first, block rest)', async () => { + it('should handle multiple calls to complete_task in the same turn', async () => { const definition = createTestDefinition([], {}, 'none'); const executor = await LocalAgentExecutor.create( definition, @@ -1239,36 +1385,62 @@ describe('LocalAgentExecutor', () => { // Turn 1: Duplicate calls mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, - args: { result: 'done' }, + name: COMPLETE_TASK_TOOL_NAME, + args: { result: 'first' }, id: 'call1', }, { - name: TASK_COMPLETE_TOOL_NAME, - args: { result: 'ignored' }, + name: COMPLETE_TASK_TOOL_NAME, + args: { result: 'second' }, id: 'call2', }, ]); + mockScheduleAgentTools.mockResolvedValueOnce([ + { + status: 'success', + request: { + callId: 'call1', + name: COMPLETE_TASK_TOOL_NAME, + args: { result: 'first' }, + prompt_id: 'p1', + }, + response: { + resultDisplay: 'ok', + responseParts: [], + data: { taskCompleted: true, submittedOutput: 'first' }, + }, + }, + { + status: 'success', + request: { + callId: 'call2', + name: COMPLETE_TASK_TOOL_NAME, + args: { result: 'second' }, + prompt_id: 'p1', + }, + response: { + resultDisplay: 'ok', + responseParts: [], + data: { taskCompleted: true, submittedOutput: 'second' }, + }, + }, + ]); + const output = await executor.run({ goal: 'Dup test' }, signal); expect(mockSendMessageStream).toHaveBeenCalledTimes(1); + expect(mockScheduleAgentTools).toHaveBeenCalledTimes(1); expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL); + // In current impl, the first successful complete_task in the batch is respected. + expect(output.result).toBe('first'); const completions = activities.filter( (a) => a.type === 'TOOL_CALL_END' && - a.data['name'] === TASK_COMPLETE_TOOL_NAME, - ); - const errors = activities.filter( - (a) => a.type === 'ERROR' && a.data['name'] === TASK_COMPLETE_TOOL_NAME, - ); - - expect(completions).toHaveLength(1); - expect(errors).toHaveLength(1); - expect(errors[0].data['error']).toContain( - 'Task already marked complete in this turn', + a.data['name'] === COMPLETE_TASK_TOOL_NAME, ); + expect(completions).toHaveLength(2); }); it('should execute parallel tool calls and then complete', async () => { @@ -1304,31 +1476,48 @@ describe('LocalAgentExecutor', () => { async (_ctx, requests: ToolCallRequestInfo[]) => { const results = await Promise.all( requests.map(async (reqInfo) => { - callsStarted++; - if (callsStarted === 2) resolveCalls(); - await vi.advanceTimersByTimeAsync(100); - return { - status: CoreToolCallStatus.Success, - request: reqInfo, - tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, - response: { - callId: reqInfo.callId, - resultDisplay: 'ok', - responseParts: [ - { - functionResponse: { - name: reqInfo.name, - response: {}, - id: reqInfo.callId, + if (reqInfo.name === LS_TOOL_NAME) { + callsStarted++; + if (callsStarted === 2) resolveCalls(); + await vi.advanceTimersByTimeAsync(100); + return { + status: CoreToolCallStatus.Success, + request: reqInfo, + tool: {} as AnyDeclarativeTool, + invocation: {} as unknown as AnyToolInvocation, + response: { + callId: reqInfo.callId, + resultDisplay: 'ok', + responseParts: [ + { + functionResponse: { + name: reqInfo.name, + response: {}, + id: reqInfo.callId, + }, }, + ], + error: undefined, + errorType: undefined, + contentLength: 0, + }, + }; + } else if (reqInfo.name === COMPLETE_TASK_TOOL_NAME) { + return { + status: CoreToolCallStatus.Success, + request: reqInfo, + response: { + callId: reqInfo.callId, + resultDisplay: 'Task completed.', + responseParts: [], + data: { + taskCompleted: true, + submittedOutput: reqInfo.args['finalResult'] as string, }, - ], - error: undefined, - errorType: undefined, - contentLength: 0, - }, - }; + }, + }; + } + throw new Error(`Unexpected tool: ${reqInfo.name}`); }), ); return results; @@ -1338,7 +1527,7 @@ describe('LocalAgentExecutor', () => { // Turn 2: Completion mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'c3', }, @@ -1353,7 +1542,7 @@ describe('LocalAgentExecutor', () => { const output = await runPromise; - expect(mockScheduleAgentTools).toHaveBeenCalledTimes(1); + expect(mockScheduleAgentTools).toHaveBeenCalledTimes(2); expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL); // Safe access to message parts @@ -1394,7 +1583,7 @@ describe('LocalAgentExecutor', () => { // Turn 2: Model gives up and completes mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Could not read file.' }, id: 'c2', }, @@ -1404,10 +1593,30 @@ describe('LocalAgentExecutor', () => { .spyOn(debugLogger, 'warn') .mockImplementation(() => {}); + mockScheduleAgentTools.mockResolvedValueOnce([ + { + status: 'success', + request: { + callId: 'c2', + name: COMPLETE_TASK_TOOL_NAME, + args: { finalResult: 'Could not read file.' }, + prompt_id: 'p2', + }, + response: { + resultDisplay: 'Output submitted and task completed.', + responseParts: [], + data: { + taskCompleted: true, + submittedOutput: 'Could not read file.', + }, + }, + }, + ]); + await executor.run({ goal: 'Sec test' }, signal); - // Verify external executor was not called (Security held) - expect(mockScheduleAgentTools).not.toHaveBeenCalled(); + // Verify external executor was called exactly once (for complete_task) + expect(mockScheduleAgentTools).toHaveBeenCalledTimes(1); // 2. Verify console warning expect(consoleWarnSpy).toHaveBeenCalledWith( @@ -1462,16 +1671,43 @@ describe('LocalAgentExecutor', () => { // Turn 1: Invalid arg (too short) mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'short' }, id: 'call1', }, ]); + const expectedError = + 'Output validation failed: {"formErrors":["String must contain at least 10 character(s)"],"fieldErrors":{}}'; + mockScheduleAgentTools.mockResolvedValueOnce([ + { + status: 'error', + request: { + callId: 'call1', + name: COMPLETE_TASK_TOOL_NAME, + args: { finalResult: 'short' }, + prompt_id: 'p1', + }, + response: { + resultDisplay: expectedError, + responseParts: [ + { + functionResponse: { + name: COMPLETE_TASK_TOOL_NAME, + id: 'call1', + response: { error: expectedError }, + }, + }, + ], + data: { taskCompleted: false }, + error: new Error(expectedError), + }, + }, + ]); // Turn 2: Corrected mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'This is a much longer and valid result' }, id: 'call2', }, @@ -1481,16 +1717,13 @@ describe('LocalAgentExecutor', () => { expect(mockSendMessageStream).toHaveBeenCalledTimes(2); - const expectedError = - 'Output validation failed: {"formErrors":["String must contain at least 10 character(s)"],"fieldErrors":{}}'; - // Check that the error was reported in the activity stream expect(activities).toContainEqual( expect.objectContaining({ type: 'ERROR', data: expect.objectContaining({ context: 'tool_call', - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, error: expect.stringContaining('Output validation failed'), errorType: SubagentActivityErrorType.GENERIC, }), @@ -1503,7 +1736,7 @@ describe('LocalAgentExecutor', () => { expect(turn2Parts).toEqual([ expect.objectContaining({ functionResponse: expect.objectContaining({ - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, response: { error: expectedError }, id: 'call1', }), @@ -1577,7 +1810,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call1', resultDisplay: '', @@ -1600,15 +1833,34 @@ describe('LocalAgentExecutor', () => { // Turn 2: Model sees the error and completes mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Aborted due to tool failure.' }, id: 'call2', }, ]); + mockScheduleAgentTools.mockResolvedValueOnce([ + { + status: 'success', + request: { + callId: 'call2', + name: COMPLETE_TASK_TOOL_NAME, + args: { finalResult: 'Aborted due to tool failure.' }, + prompt_id: 'p2', + }, + response: { + resultDisplay: 'Task completed.', + responseParts: [], + data: { + taskCompleted: true, + submittedOutput: 'Aborted due to tool failure.', + }, + }, + }, + ]); const output = await executor.run({ goal: 'Tool failure test' }, signal); - expect(mockScheduleAgentTools).toHaveBeenCalledTimes(1); + expect(mockScheduleAgentTools).toHaveBeenCalledTimes(2); expect(mockSendMessageStream).toHaveBeenCalledTimes(2); // Verify the error was reported in the activity stream @@ -1666,7 +1918,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, outcome: ToolConfirmationOutcome.Cancel, // Soft rejection response: { callId: 'call1', @@ -1693,7 +1945,7 @@ describe('LocalAgentExecutor', () => { // Turn 2: Model sees the rejection + consolidated instructions and completes mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'User rejected access to /secret.' }, id: 'call2', }, @@ -1754,7 +2006,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, outcome: undefined, // Hard abort response: { callId: 'call1', @@ -1827,7 +2079,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, @@ -1873,7 +2125,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, @@ -1906,7 +2158,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: id, resultDisplay: 'ok', @@ -2020,7 +2272,7 @@ describe('LocalAgentExecutor', () => { status: 'success', request: requests[0], tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 't1', resultDisplay: 'ok', @@ -2079,7 +2331,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: id, resultDisplay: 'ok', @@ -2112,7 +2364,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Recovered!' }, id: 't2', }, @@ -2200,7 +2452,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Recovered from violation!' }, id: 't3', }, @@ -2251,7 +2503,7 @@ describe('LocalAgentExecutor', () => { AgentTerminateMode.ERROR_NO_COMPLETE_TASK_CALL, ); expect(output.result).toContain( - `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}'`, + `Agent stopped calling tools but did not call '${COMPLETE_TASK_TOOL_NAME}'`, ); expect(activities).toContainEqual( @@ -2294,7 +2546,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Recovered from timeout!' }, id: 't2', }, @@ -2402,7 +2654,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: id, resultDisplay: 'ok', @@ -2460,7 +2712,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Recovered!' }, id: 't2', }, @@ -2519,7 +2771,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 'call2', }, @@ -2549,7 +2801,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'p1', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call1', resultDisplay: 'file1.txt', @@ -2593,7 +2845,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 'call1', }, @@ -2636,7 +2888,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 'call2', }, @@ -2668,7 +2920,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'p1', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call1', resultDisplay: 'file1.txt', @@ -2732,7 +2984,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 'call2', }, @@ -2757,7 +3009,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'p1', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call1', resultDisplay: 'file1.txt', @@ -2812,7 +3064,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 'call2', }, @@ -2841,7 +3093,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'p1', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call1', resultDisplay: 'file1.txt', @@ -2902,7 +3154,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 'call1', }, @@ -2933,7 +3185,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test-prompt', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: id, resultDisplay: 'ok', @@ -2969,7 +3221,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 'call2', }, @@ -3002,7 +3254,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 'call1', }, @@ -3045,7 +3297,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 't2', }, @@ -3100,7 +3352,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse( [ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'Done' }, id: 't3', }, @@ -3259,7 +3511,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'c1', }, @@ -3335,7 +3587,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'ok' }, id: 'c1', }, @@ -3353,7 +3605,7 @@ describe('LocalAgentExecutor', () => { expect(names.filter((n) => n === LS_TOOL_NAME)).toHaveLength(1); expect(names.filter((n) => n === 'fill')).toHaveLength(1); - expect(names.filter((n) => n === TASK_COMPLETE_TOOL_NAME)).toHaveLength( + expect(names.filter((n) => n === COMPLETE_TASK_TOOL_NAME)).toHaveLength( 1, ); // Total = ls + fill + complete_task @@ -3384,7 +3636,7 @@ describe('LocalAgentExecutor', () => { prompt_id: 'test', }, tool: {} as AnyDeclarativeTool, - invocation: {} as AnyToolInvocation, + invocation: {} as unknown as AnyToolInvocation, response: { callId: 'call-click', resultDisplay: 'Clicked', @@ -3407,7 +3659,7 @@ describe('LocalAgentExecutor', () => { // Turn 2: Model completes mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call-done', }, @@ -3438,7 +3690,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { result: 'done' }, id: 'c1', }, @@ -3454,7 +3706,7 @@ describe('LocalAgentExecutor', () => { const declarations = getSentFunctionDeclarations(); const names = declarations.map((d) => d.name); - expect(names).toContain(TASK_COMPLETE_TOOL_NAME); + expect(names).toContain(COMPLETE_TASK_TOOL_NAME); expect(names).toContain('take_snapshot'); expect(declarations).toHaveLength(2); }); @@ -3485,7 +3737,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'c1', }, @@ -3530,7 +3782,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, @@ -3561,7 +3813,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, @@ -3593,7 +3845,7 @@ describe('LocalAgentExecutor', () => { mockModelResponse([ { - name: TASK_COMPLETE_TOOL_NAME, + name: COMPLETE_TASK_TOOL_NAME, args: { finalResult: 'done' }, id: 'call1', }, diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index af7312c231..2ccd40ba9d 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -8,12 +8,10 @@ import { type AgentLoopContext } from '../config/agent-loop-context.js'; import { reportError } from '../utils/errorReporting.js'; import { GeminiChat, StreamEventType } from '../core/geminiChat.js'; import { - Type, type Content, type Part, type FunctionCall, type FunctionDeclaration, - type Schema, } from '@google/genai'; import { ToolRegistry } from '../tools/tool-registry.js'; import { PromptRegistry } from '../prompts/prompt-registry.js'; @@ -64,7 +62,6 @@ import { DEFAULT_GEMINI_MODEL, isAutoModel } from '../config/models.js'; import type { RoutingContext } from '../routing/routingStrategy.js'; import { parseThought } from '../utils/thoughtUtils.js'; import { type z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; import { debugLogger } from '../utils/debugLogger.js'; import { getModelConfigAlias } from './registry.js'; import { getVersion } from '../utils/version.js'; @@ -76,11 +73,12 @@ import { formatBackgroundCompletionForModel, } from '../utils/fastAckHelper.js'; import type { InjectionSource } from '../config/injectionService.js'; +import { CompleteTaskTool } from '../tools/complete-task.js'; +import { COMPLETE_TASK_TOOL_NAME } from '../tools/definitions/base-declarations.js'; /** A callback function to report on agent activity. */ export type ActivityCallback = (activity: SubagentActivityEvent) => void; -const TASK_COMPLETE_TOOL_NAME = 'complete_task'; const GRACE_PERIOD_MS = 60 * 1000; // 1 min /** The possible outcomes of a single agent turn. */ @@ -256,6 +254,15 @@ export class LocalAgentExecutor { agentToolRegistry.sortTools(); + // Register the mandatory completion tool for this agent. + agentToolRegistry.registerTool( + new CompleteTaskTool( + subagentMessageBus, + definition.outputConfig, + definition.processOutput, + ), + ); + // Get the parent tool call ID from context const toolContext = getToolCallContext(); const parentCallId = toolContext?.callId; @@ -341,7 +348,7 @@ export class LocalAgentExecutor { // If the model stops calling tools without calling complete_task, it's an error. if (functionCalls.length === 0) { this.emitActivity('ERROR', { - error: `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}' to finalize the session.`, + error: `Agent stopped calling tools but did not call '${COMPLETE_TASK_TOOL_NAME}' to finalize the session.`, context: 'protocol_violation', errorType: SubagentActivityErrorType.GENERIC, }); @@ -409,7 +416,7 @@ export class LocalAgentExecutor { default: throw new Error(`Unknown terminate reason: ${reason}`); } - return `${explanation} You have one final chance to complete the task with a short grace period. You MUST call \`${TASK_COMPLETE_TOOL_NAME}\` immediately with your best answer and explain that your investigation was interrupted. Do not call any other tools.`; + return `${explanation} You have one final chance to complete the task with a short grace period. You MUST call \`${COMPLETE_TASK_TOOL_NAME}\` immediately with your best answer and explain that your investigation was interrupted. Do not call any other tools.`; } /** @@ -720,7 +727,7 @@ export class LocalAgentExecutor { // The finalResult was already set by executeTurn, but we re-emit just in case. finalResult = finalResult || - `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}'.`; + `Agent stopped calling tools but did not call '${COMPLETE_TASK_TOOL_NAME}'.`; this.emitActivity('ERROR', { error: finalResult, context: 'protocol_violation', @@ -1031,23 +1038,42 @@ export class LocalAgentExecutor { aborted: boolean; }> { const allowedToolNames = new Set(this.toolRegistry.getAllToolNames()); - // Always allow the completion tool - allowedToolNames.add(TASK_COMPLETE_TOOL_NAME); let submittedOutput: string | null = null; let taskCompleted = false; let aborted = false; - // We'll separate complete_task from other tools const toolRequests: ToolCallRequestInfo[] = []; // Map to keep track of tool name by callId for activity emission const toolNameMap = new Map(); - // Synchronous results (like complete_task or unauthorized calls) + // Synchronous results (like unauthorized calls) const syncResults = new Map(); for (const [index, functionCall] of functionCalls.entries()) { const callId = functionCall.id ?? `${promptId}-${index}`; - const args = functionCall.args ?? {}; + const { args, error: parseError } = this.parseToolArguments(functionCall); + + if (parseError) { + debugLogger.warn(`[LocalAgentExecutor] ${parseError}`); + + syncResults.set(callId, { + functionResponse: { + name: functionCall.name, + id: callId, + response: { error: parseError }, + }, + }); + + this.emitActivity('ERROR', { + context: 'tool_call', + name: functionCall.name, + callId, + error: parseError, + errorType: SubagentActivityErrorType.GENERIC, + }); + continue; + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const toolName = functionCall.name as string; @@ -1073,144 +1099,7 @@ export class LocalAgentExecutor { callId, }); - if (toolName === TASK_COMPLETE_TOOL_NAME) { - if (taskCompleted) { - const error = - 'Task already marked complete in this turn. Ignoring duplicate call.'; - syncResults.set(callId, { - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, - }, - }); - this.emitActivity('ERROR', { - context: 'tool_call', - name: toolName, - error, - errorType: SubagentActivityErrorType.GENERIC, - }); - continue; - } - - const { outputConfig } = this.definition; - taskCompleted = true; // Signal completion regardless of output presence - - if (outputConfig) { - const outputName = outputConfig.outputName; - if (args[outputName] !== undefined) { - const outputValue = args[outputName]; - const validationResult = outputConfig.schema.safeParse(outputValue); - - if (!validationResult.success) { - taskCompleted = false; // Validation failed, revoke completion - const error = `Output validation failed: ${JSON.stringify(validationResult.error.flatten())}`; - syncResults.set(callId, { - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, - }, - }); - this.emitActivity('ERROR', { - context: 'tool_call', - name: toolName, - error, - errorType: SubagentActivityErrorType.GENERIC, - }); - continue; - } - - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - const validatedOutput = validationResult.data; - if (this.definition.processOutput) { - submittedOutput = this.definition.processOutput(validatedOutput); - } else { - submittedOutput = - typeof outputValue === 'string' - ? outputValue - : JSON.stringify(outputValue, null, 2); - } - syncResults.set(callId, { - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { result: 'Output submitted and task completed.' }, - id: callId, - }, - }); - this.emitActivity('TOOL_CALL_END', { - name: toolName, - id: callId, - output: 'Output submitted and task completed.', - }); - } else { - // Failed to provide required output. - taskCompleted = false; // Revoke completion status - const error = `Missing required argument '${outputName}' for completion.`; - syncResults.set(callId, { - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, - }, - }); - this.emitActivity('ERROR', { - context: 'tool_call', - name: toolName, - callId, - error, - errorType: SubagentActivityErrorType.GENERIC, - }); - } - } else { - // No outputConfig - use default 'result' parameter - const resultArg = args['result']; - if ( - resultArg !== undefined && - resultArg !== null && - resultArg !== '' - ) { - submittedOutput = - typeof resultArg === 'string' - ? resultArg - : JSON.stringify(resultArg, null, 2); - syncResults.set(callId, { - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { status: 'Result submitted and task completed.' }, - id: callId, - }, - }); - this.emitActivity('TOOL_CALL_END', { - name: toolName, - id: callId, - output: 'Result submitted and task completed.', - }); - } else { - // No result provided - this is an error for agents expected to return results - taskCompleted = false; // Revoke completion - const error = - 'Missing required "result" argument. You must provide your findings when calling complete_task.'; - syncResults.set(callId, { - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, - }, - }); - this.emitActivity('ERROR', { - context: 'tool_call', - name: toolName, - callId, - error, - errorType: SubagentActivityErrorType.GENERIC, - }); - } - } - continue; - } - - // Handle standard tools + // Handle unauthorized tools if (!allowedToolNames.has(toolName)) { const error = createUnauthorizedToolError(toolName); debugLogger.warn(`[LocalAgentExecutor] Blocked call: ${error}`); @@ -1274,6 +1163,22 @@ export class LocalAgentExecutor { output: call.response.resultDisplay, data: call.response.data, }); + + // Check if this was a completion tool call + const isCompletionTool = + call.request.name === COMPLETE_TASK_TOOL_NAME; + const data = call.response.data; + if ( + isCompletionTool && + !taskCompleted && + data?.['taskCompleted'] === true + ) { + taskCompleted = true; + const output = data['submittedOutput']; + if (typeof output === 'string') { + submittedOutput = output; + } + } } else if (call.status === 'error') { this.emitActivity('ERROR', { context: 'tool_call', @@ -1287,7 +1192,7 @@ export class LocalAgentExecutor { call.outcome === ToolConfirmationOutcome.Cancel; if (isSoftRejection) { - const error = `${SUBAGENT_REJECTED_ERROR_PREFIX} Please acknowledge this, rethink your strategy, and try a different approach. If you cannot proceed without the rejected operation, summarize the issue and use \`${TASK_COMPLETE_TOOL_NAME}\` to report your findings and the blocker.`; + const error = `${SUBAGENT_REJECTED_ERROR_PREFIX} Please acknowledge this, rethink your strategy, and try a different approach. If you cannot proceed without the rejected operation, summarize the issue and use \`${COMPLETE_TASK_TOOL_NAME}\` to report your findings and the blocker.`; this.emitActivity('ERROR', { context: 'tool_call', name: toolName, @@ -1358,7 +1263,7 @@ export class LocalAgentExecutor { */ private prepareToolsList(): FunctionDeclaration[] { const toolsList: FunctionDeclaration[] = []; - const { toolConfig, outputConfig } = this.definition; + const { toolConfig } = this.definition; if (toolConfig) { for (const toolRef of toolConfig.tools) { @@ -1375,43 +1280,6 @@ export class LocalAgentExecutor { ), ); - // Always inject complete_task. - // Configure its schema based on whether output is expected. - const completeTool: FunctionDeclaration = { - name: TASK_COMPLETE_TOOL_NAME, - description: outputConfig - ? 'Call this tool to submit your final answer and complete the task. This is the ONLY way to finish.' - : 'Call this tool to submit your final findings and complete the task. This is the ONLY way to finish.', - parameters: { - type: Type.OBJECT, - properties: {}, - required: [], - }, - }; - - if (outputConfig) { - const jsonSchema = zodToJsonSchema(outputConfig.schema); - const { - $schema: _$schema, - definitions: _definitions, - ...schema - } = jsonSchema; - completeTool.parameters!.properties![outputConfig.outputName] = - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - schema as Schema; - completeTool.parameters!.required!.push(outputConfig.outputName); - } else { - completeTool.parameters!.properties!['result'] = { - type: Type.STRING, - description: - 'Your final results or findings to return to the orchestrator. ' + - 'Ensure this is comprehensive and follows any formatting requested in your instructions.', - }; - completeTool.parameters!.required!.push('result'); - } - - toolsList.push(completeTool); - return toolsList; } @@ -1445,15 +1313,15 @@ Important Rules: if (this.definition.outputConfig) { finalPrompt += ` -* When you have completed your task, you MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool with your structured output. -* Do not call any other tools in the same turn as \`${TASK_COMPLETE_TOOL_NAME}\`. +* When you have completed your task, you MUST call the \`${COMPLETE_TASK_TOOL_NAME}\` tool with your structured output. +* Do not call any other tools in the same turn as \`${COMPLETE_TASK_TOOL_NAME}\`. * This is the ONLY way to complete your mission. If you stop calling tools without calling this, you have failed.`; } else { finalPrompt += ` -* When you have completed your task, you MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool. +* When you have completed your task, you MUST call the \`${COMPLETE_TASK_TOOL_NAME}\` tool. * You MUST include your final findings in the "result" parameter. This is how you return the necessary results for the task to be marked complete. * Ensure your findings are comprehensive and follow any specific formatting requirements provided in your instructions. -* Do not call any other tools in the same turn as \`${TASK_COMPLETE_TOOL_NAME}\`. +* Do not call any other tools in the same turn as \`${COMPLETE_TASK_TOOL_NAME}\`. * This is the ONLY way to complete your mission. If you stop calling tools without calling this, you have failed.`; } @@ -1524,4 +1392,31 @@ Important Rules: } return chars.slice(0, 197).join('') + '...'; } + + /** + * Parses the arguments for a tool call, handling both JSON strings and objects. + */ + private parseToolArguments(functionCall: FunctionCall): { + args: Record; + error?: string; + } { + const args: Record = {}; + if (typeof functionCall.args === 'string') { + try { + const parsed: unknown = JSON.parse(functionCall.args); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + Object.assign(args, parsed); + } + return { args }; + } catch (_) { + return { + args: {}, + error: `Failed to parse JSON arguments for tool "${functionCall.name}": ${functionCall.args}. Ensure you provide a valid JSON object.`, + }; + } + } else if (functionCall.args) { + return { args: functionCall.args }; + } + return { args: {} }; + } } diff --git a/packages/core/src/policy/policies/read-only.toml b/packages/core/src/policy/policies/read-only.toml index 66aa4c33ce..c56984b522 100644 --- a/packages/core/src/policy/policies/read-only.toml +++ b/packages/core/src/policy/policies/read-only.toml @@ -61,4 +61,10 @@ priority = 50 [[rule]] toolName = "update_topic" decision = "allow" +priority = 50 + +# Core agent lifecycle tool +[[rule]] +toolName = "complete_task" +decision = "allow" priority = 50 \ No newline at end of file diff --git a/packages/core/src/tools/complete-task.test.ts b/packages/core/src/tools/complete-task.test.ts new file mode 100644 index 0000000000..6577c8786c --- /dev/null +++ b/packages/core/src/tools/complete-task.test.ts @@ -0,0 +1,160 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { CompleteTaskTool } from './complete-task.js'; +import { type MessageBus } from '../confirmation-bus/message-bus.js'; +import { z } from 'zod'; + +describe('CompleteTaskTool', () => { + let mockMessageBus: MessageBus; + + beforeEach(() => { + mockMessageBus = { + publish: vi.fn().mockResolvedValue(undefined), + subscribe: vi.fn(), + unsubscribe: vi.fn(), + } as unknown as MessageBus; + }); + + describe('Default Configuration (no outputConfig)', () => { + let tool: CompleteTaskTool; + + beforeEach(() => { + tool = new CompleteTaskTool(mockMessageBus); + }); + + it('should have correct metadata', () => { + expect(tool.name).toBe('complete_task'); + expect(tool.displayName).toBe('Complete Task'); + }); + + it('should generate correct schema', () => { + const schema = tool.getSchema(); + const parameters = schema.parametersJsonSchema as Record; + const properties = parameters['properties'] as Record; + + expect(properties).toHaveProperty('result'); + expect(parameters['required']).toContain('result'); + + const resultProp = properties['result'] as Record; + expect(resultProp['type']).toBe('string'); + }); + + it('should validate successfully with result', () => { + const result = tool.validateToolParams({ result: 'Task done' }); + expect(result).toBeNull(); + }); + + it('should fail validation if result is missing', () => { + const result = tool.validateToolParams({}); + expect(result).toContain("must have required property 'result'"); + }); + + it('should fail validation if result is only whitespace', () => { + const result = tool.validateToolParams({ result: ' ' }); + expect(result).toContain( + 'Missing required "result" argument. You must provide your findings when calling complete_task.', + ); + }); + + it('should execute and return correct data', async () => { + const invocation = tool.build({ result: 'Success message' }); + const result = await invocation.execute(new AbortController().signal); + + expect(result.data).toEqual({ + taskCompleted: true, + submittedOutput: 'Success message', + }); + expect(result.returnDisplay).toBe('Result submitted and task completed.'); + }); + }); + + describe('Structured Configuration (with outputConfig)', () => { + const schema = z.object({ + report: z.string(), + score: z.number(), + }); + const outputConfig = { + outputName: 'my_output', + description: 'The final report', + schema, + }; + let tool: CompleteTaskTool; + + beforeEach(() => { + tool = new CompleteTaskTool(mockMessageBus, outputConfig); + }); + + it('should generate schema based on outputConfig', () => { + const toolSchema = tool.getSchema(); + + expect(toolSchema.parametersJsonSchema).toHaveProperty( + 'properties.my_output', + ); + expect(toolSchema.parametersJsonSchema).toHaveProperty( + 'properties.my_output.type', + 'object', + ); + expect(toolSchema.parametersJsonSchema).toHaveProperty( + 'properties.my_output.properties.report', + ); + expect(toolSchema.parametersJsonSchema).toHaveProperty( + 'properties.my_output.properties.score', + ); + expect(toolSchema.parametersJsonSchema).toHaveProperty( + 'required', + expect.arrayContaining(['my_output']), + ); + }); + + it('should validate successfully with correct structure', () => { + const result = tool.validateToolParams({ + my_output: { report: 'All good', score: 100 }, + }); + expect(result).toBeNull(); + }); + + it('should fail validation if output is missing', () => { + const result = tool.validateToolParams({}); + expect(result).toContain("must have required property 'my_output'"); + }); + + it('should fail validation if schema mismatch', () => { + const result = tool.validateToolParams({ + my_output: { report: 'All good', score: 'not a number' }, + }); + expect(result).toContain('must be number'); + }); + + it('should execute and return structured data', async () => { + const outputValue = { report: 'Final findings', score: 42 }; + const invocation = tool.build({ my_output: outputValue }); + const result = await invocation.execute(new AbortController().signal); + + expect(result.data?.['taskCompleted']).toBe(true); + expect(result.data?.['submittedOutput']).toBe( + JSON.stringify(outputValue, null, 2), + ); + }); + + it('should use processOutput if provided', async () => { + const processOutput = (val: z.infer) => + `Score was ${val.score}`; + const toolWithProcess = new CompleteTaskTool( + mockMessageBus, + outputConfig, + processOutput, + ); + + const outputValue = { report: 'Final findings', score: 42 }; + const invocation = toolWithProcess.build({ my_output: outputValue }); + const result = await invocation.execute(new AbortController().signal); + + expect(result.data?.['submittedOutput']).toBe('Score was 42'); + }); + }); +}); diff --git a/packages/core/src/tools/complete-task.ts b/packages/core/src/tools/complete-task.ts new file mode 100644 index 0000000000..ec35b193ba --- /dev/null +++ b/packages/core/src/tools/complete-task.ts @@ -0,0 +1,179 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + BaseDeclarativeTool, + BaseToolInvocation, + type ToolResult, + Kind, +} from './tools.js'; +import { + COMPLETE_TASK_TOOL_NAME, + COMPLETE_TASK_DISPLAY_NAME, +} from './definitions/base-declarations.js'; +import { type OutputConfig } from '../agents/types.js'; +import { type z } from 'zod'; +import { type MessageBus } from '../confirmation-bus/message-bus.js'; +import { zodToJsonSchema } from 'zod-to-json-schema'; + +/** + * Tool for signaling task completion and optionally returning structured output. + * This tool is specifically designed for use in subagent loops. + */ +export class CompleteTaskTool< + TOutput extends z.ZodTypeAny = z.ZodTypeAny, +> extends BaseDeclarativeTool, ToolResult> { + static readonly Name = COMPLETE_TASK_TOOL_NAME; + + constructor( + messageBus: MessageBus, + private readonly outputConfig?: OutputConfig, + private readonly processOutput?: (output: z.infer) => string, + ) { + super( + CompleteTaskTool.Name, + COMPLETE_TASK_DISPLAY_NAME, + outputConfig + ? 'Call this tool to submit your final answer and complete the task. This is the ONLY way to finish.' + : 'Call this tool to submit your final findings and complete the task. This is the ONLY way to finish.', + Kind.Other, + CompleteTaskTool.buildParameterSchema(outputConfig), + messageBus, + ); + } + + private static buildParameterSchema( + outputConfig?: OutputConfig, + ): unknown { + if (outputConfig) { + const jsonSchema = zodToJsonSchema(outputConfig.schema); + const { + $schema: _$schema, + definitions: _definitions, + ...schema + } = jsonSchema; + return { + type: 'object', + properties: { + [outputConfig.outputName]: schema, + }, + required: [outputConfig.outputName], + }; + } + return { + type: 'object', + properties: { + result: { + type: 'string', + description: + 'Your final results or findings to return to the orchestrator. ' + + 'Ensure this is comprehensive and follows any formatting requested in your instructions.', + }, + }, + required: ['result'], + }; + } + + protected override validateToolParamValues( + params: Record, + ): string | null { + if (this.outputConfig) { + const outputName = this.outputConfig.outputName; + if (params[outputName] === undefined) { + return `Missing required argument '${outputName}' for completion.`; + } + + const validationResult = this.outputConfig.schema.safeParse( + params[outputName], + ); + if (!validationResult.success) { + return `Output validation failed: ${JSON.stringify(validationResult.error.flatten())}`; + } + } else { + const resultArg = params['result']; + if ( + resultArg === undefined || + resultArg === null || + (typeof resultArg === 'string' && resultArg.trim() === '') + ) { + return 'Missing required "result" argument. You must provide your findings when calling complete_task.'; + } + } + return null; + } + + protected createInvocation( + params: Record, + messageBus: MessageBus, + toolName: string, + toolDisplayName: string, + ): CompleteTaskInvocation { + return new CompleteTaskInvocation( + params, + messageBus, + toolName, + toolDisplayName, + this.outputConfig, + this.processOutput, + ); + } +} + +export class CompleteTaskInvocation< + TOutput extends z.ZodTypeAny = z.ZodTypeAny, +> extends BaseToolInvocation, ToolResult> { + constructor( + params: Record, + messageBus: MessageBus, + toolName: string, + toolDisplayName: string, + private readonly outputConfig?: OutputConfig, + private readonly processOutput?: (output: z.infer) => string, + ) { + super(params, messageBus, toolName, toolDisplayName); + } + + getDescription(): string { + return 'Completing task and submitting results.'; + } + + async execute(_signal: AbortSignal): Promise { + let submittedOutput: string | null = null; + let outputValue: unknown; + + if (this.outputConfig) { + outputValue = this.params[this.outputConfig.outputName]; + if (this.processOutput) { + // We validated the params in validateToolParamValues, so safe to cast + submittedOutput = this.processOutput(outputValue as z.infer); + } else { + submittedOutput = + typeof outputValue === 'string' + ? outputValue + : JSON.stringify(outputValue, null, 2); + } + } else { + outputValue = this.params['result']; + submittedOutput = + typeof outputValue === 'string' + ? outputValue + : JSON.stringify(outputValue, null, 2); + } + + const returnDisplay = this.outputConfig + ? 'Output submitted and task completed.' + : 'Result submitted and task completed.'; + + return { + llmContent: returnDisplay, + returnDisplay, + data: { + taskCompleted: true, + submittedOutput, + }, + }; + } +} diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index 13f31aa2bb..89a5aa1614 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -133,3 +133,7 @@ export const UPDATE_TOPIC_DISPLAY_NAME = 'Update Topic Context'; export const TOPIC_PARAM_TITLE = 'title'; export const TOPIC_PARAM_SUMMARY = 'summary'; export const TOPIC_PARAM_STRATEGIC_INTENT = 'strategic_intent'; + +// -- complete_task -- +export const COMPLETE_TASK_TOOL_NAME = 'complete_task'; +export const COMPLETE_TASK_DISPLAY_NAME = 'Complete Task'; diff --git a/packages/core/src/tools/definitions/coreTools.ts b/packages/core/src/tools/definitions/coreTools.ts index f642d2709f..d1b81a6e99 100644 --- a/packages/core/src/tools/definitions/coreTools.ts +++ b/packages/core/src/tools/definitions/coreTools.ts @@ -41,6 +41,8 @@ export { ENTER_PLAN_MODE_TOOL_NAME, UPDATE_TOPIC_TOOL_NAME, UPDATE_TOPIC_DISPLAY_NAME, + COMPLETE_TASK_TOOL_NAME, + COMPLETE_TASK_DISPLAY_NAME, // Shared parameter names PARAM_FILE_PATH, PARAM_DIR_PATH, diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index 935c1834e7..224f2ab0d5 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -77,6 +77,8 @@ import { SKILL_PARAM_NAME, UPDATE_TOPIC_TOOL_NAME, UPDATE_TOPIC_DISPLAY_NAME, + COMPLETE_TASK_TOOL_NAME, + COMPLETE_TASK_DISPLAY_NAME, TOPIC_PARAM_TITLE, TOPIC_PARAM_SUMMARY, TOPIC_PARAM_STRATEGIC_INTENT, @@ -102,6 +104,8 @@ export { ENTER_PLAN_MODE_TOOL_NAME, UPDATE_TOPIC_TOOL_NAME, UPDATE_TOPIC_DISPLAY_NAME, + COMPLETE_TASK_TOOL_NAME, + COMPLETE_TASK_DISPLAY_NAME, // Shared parameter names PARAM_FILE_PATH, PARAM_DIR_PATH, @@ -264,6 +268,7 @@ export const ALL_BUILTIN_TOOL_NAMES = [ ENTER_PLAN_MODE_TOOL_NAME, EXIT_PLAN_MODE_TOOL_NAME, UPDATE_TOPIC_TOOL_NAME, + COMPLETE_TASK_TOOL_NAME, ] as const; /**