fix(core): intercept complete_task calls in AgentHarness to prevent scheduler crash

Fixes a bug where the virtual complete_task tool was being passed to the tool scheduler, causing a 'Tool not found' error because it is not registered in the ToolRegistry. It is now handled internally by the harness.
2026-05-17 23:32:43 -07:00 · 2026-02-11 20:20:39 -06:00
parent ac8f6f6e7e
commit 3faa6f2056
2 changed files with 153 additions and 6 deletions
@@ -161,4 +161,124 @@ describe('AgentHarness', () => {
      }),
    );
  });
+
+  it('intercepts complete_task and does not schedule it, but schedules other tools', async () => {
+    const definition: LocalAgentDefinition<z.ZodString> = {
+      kind: 'local',
+      name: 'test-agent-mixed',
+      displayName: 'Test Agent Mixed',
+      description: 'A test agent with mixed tools',
+      inputConfig: {
+        inputSchema: { type: 'object', properties: {}, required: [] },
+      },
+      modelConfig: {
+        model: 'gemini-test-model',
+      },
+      runConfig: { maxTurns: 5, maxTimeMinutes: 5 },
+      promptConfig: { systemPrompt: 'You are a test agent.' },
+      outputConfig: {
+        outputName: 'result',
+        description: 'The final result.',
+        schema: z.string(),
+      },
+      // Define a tool for the agent
+      toolConfig: { tools: ['other_tool'] },
+    };
+
+    const harness = new AgentHarness({
+      config: mockConfig,
+      definition: definition as unknown as AgentDefinition,
+      inputs: {},
+    });
+
+    const mockChat = {
+      sendMessageStream: vi.fn(),
+      setTools: vi.fn(),
+      getHistory: vi.fn().mockReturnValue([]),
+      addHistory: vi.fn(),
+      setSystemInstruction: vi.fn(),
+      maybeIncludeSchemaDepthContext: vi.fn(),
+      getLastPromptTokenCount: vi.fn().mockReturnValue(0),
+    } as unknown as GeminiChat;
+    (GeminiChat as unknown as Mock).mockReturnValue(mockChat);
+
+    // Mock model response with both 'other_tool' and 'complete_task'
+    (mockChat.sendMessageStream as Mock).mockResolvedValue(
+      (async function* () {
+        yield {
+          type: StreamEventType.CHUNK,
+          value: {
+            candidates: [
+              {
+                content: { parts: [{ text: 'Calling tools...' }] },
+                finishReason: 'STOP',
+              },
+            ],
+            functionCalls: [
+              {
+                name: 'other_tool',
+                args: { key: 'value' },
+                id: 'call_1',
+              },
+              {
+                name: 'complete_task',
+                args: { result: 'Final Answer' },
+                id: 'call_2',
+              },
+            ],
+          },
+        };
+      })(),
+    );
+
+    // Mock scheduler to handle ONLY 'other_tool'
+    (scheduleAgentTools as unknown as Mock).mockResolvedValue([
+      {
+        request: {
+          name: 'other_tool',
+          args: { key: 'value' },
+          callId: 'call_1',
+        },
+        status: 'success',
+        response: {
+          responseParts: [
+            {
+              functionResponse: {
+                name: 'other_tool',
+                response: { output: 'tool_output' },
+                id: 'call_1',
+              },
+            },
+          ],
+        },
+      },
+    ]);
+
+    const run = harness.run([{ text: 'Start' }], new AbortController().signal);
+
+    // Consume the generator
+    while (true) {
+      const { done } = await run.next();
+      if (done) break;
+    }
+
+    // VERIFICATION:
+    // 1. scheduleAgentTools should have been called...
+    expect(scheduleAgentTools).toHaveBeenCalled();
+
+    // 2. ...but ONLY with 'other_tool', NOT 'complete_task'
+    const calledCalls = (scheduleAgentTools as unknown as Mock).mock
+      .calls[0][1]; // 2nd arg is 'requests'
+    expect(calledCalls).toHaveLength(1);
+    expect(calledCalls[0].name).toBe('other_tool');
+    expect(calledCalls[0].name).not.toBe('complete_task');
+
+    // 3. Agent should finish successfully (meaning complete_task was processed internally)
+    expect(vi.mocked(logAgentFinish)).toHaveBeenCalledWith(
+      expect.anything(),
+      expect.objectContaining({
+        terminate_reason: AgentTerminateMode.GOAL,
+      }),
+    );
+  });
 });
@@ -443,15 +443,42 @@ export class AgentHarness {
    calls: ToolCallRequestInfo[],
    signal: AbortSignal,
  ): Promise<Array<{ name: string; part: Part }>> {
-    const completedCalls = await scheduleAgentTools(this.config, calls, {
-      schedulerId: this.agentId,
-      toolRegistry: this.toolRegistry,
-      signal,
-    });
+    const taskCompleteCalls = calls.filter(
+      (c) => c.name === TASK_COMPLETE_TOOL_NAME,
+    );
+    const otherCalls = calls.filter((c) => c.name !== TASK_COMPLETE_TOOL_NAME);

-    return completedCalls.map((call) => ({
+    let completedCalls: Array<{
+      request: ToolCallRequestInfo;
+      response: { responseParts: Part[] };
+    }> = [];
+
+    if (otherCalls.length > 0) {
+      completedCalls = await scheduleAgentTools(this.config, otherCalls, {
+        schedulerId: this.agentId,
+        toolRegistry: this.toolRegistry,
+        signal,
+      });
+    }
+
+    const results = completedCalls.map((call) => ({
      name: call.request.name,
      part: call.response.responseParts[0],
    }));
+
+    for (const call of taskCompleteCalls) {
+      results.push({
+        name: TASK_COMPLETE_TOOL_NAME,
+        part: {
+          functionResponse: {
+            name: TASK_COMPLETE_TOOL_NAME,
+            response: { result: 'Task completed locally' },
+            id: call.callId,
+          },
+        },
+      });
+    }
+
+    return results;
  }
 }