This commit is contained in:
mkorwel
2026-02-18 15:45:46 -06:00
parent fa0e6e252f
commit 456f8be568
18 changed files with 2465 additions and 1024 deletions
+1117 -806
View File
File diff suppressed because it is too large Load Diff
+87
View File
@@ -0,0 +1,87 @@
# Agent harness architecture
This document provides a detailed walkthrough of the architectural shift from
linear turn-based execution to the unified hierarchical loop model used by the
Agent Harness.
> **Note:** This is a preview feature currently under active development.
## Overview
The Agent Harness represents a fundamental evolution in how Gemini CLI manages
interactions with Large Language Models (LLMs) and tools. It unifies the
execution logic for both the main CLI agent and subagents, providing parity in
features like model routing, history management, and tool execution.
## Legacy architecture: Linear turns
The legacy system operates on a "Stop-and-Go" model where the UI manages the
execution turn-by-turn.
In this model, when you send a prompt, the system follows these steps:
1. **Orchestration:** The `GeminiClient` and the `useGeminiStream` hook manage
the flow.
2. **Execution:** Gemini returns a single response containing text or tool
calls.
3. **UI Interruption:** The execution stops at the UI layer. If Gemini calls
tools, the UI schedules them, waits for results, and then re-submits the
entire history as a brand-new turn.
4. **Subagents:** Subagents are treated as "Black Box" tools. The main agent
calls a subagent (for example, `codebase_investigator`), waits for it to
complete its private loop using `LocalAgentExecutor`, and receives a single
string result.
This model results in duplicated logic for subagents and prevents them from
using advanced features available to the main agent.
## New architecture: Unified agent harness
The Agent Harness treats the ReAct (Reasoning and Action) loop as a first-class,
autonomous process.
The new model introduces several key improvements:
1. **Continuous Loop:** The `AgentHarness` manages the entire lifecycle
internally. It handles LLM calls, tool execution, and reasoning without
relinquishing control to the UI until it reaches the final goal.
2. **Event Stream:** The harness yields a continuous stream of events
(`GeminiEvent`) that the UI listens to and renders in real-time.
3. **Hierarchical Delegation:** Because the harness is unified, a subagent is
simply another instance of `AgentHarness` running inside a tool call of the
parent harness.
4. **Feature Parity:** Subagents can now use the same features as the main
agent, including dynamic model routing, history compression, and complex
interactive tools.
## UI synchronization challenges
Moving to a hierarchical model introduces complexity in how the UI maintains a
consistent history.
The `HistoryManager` expects a flat list of messages, but the harness provides a
nested, multi-turn stream. This creates two primary challenges:
1. **History Persistence:** Legacy code may clear the "active" turn state
prematurely when a turn boundary is crossed. The harness uses a
`TurnFinished` event to signal when to "lock in" reasoning without ending
the overall session.
2. **Hierarchical Boxes:** In a hierarchical model, internal subagent tool
calls (for example, reading a file) shouldn't clutter the main history. The
UI uses `SubagentActivity` events to update a single, persistent subagent
box rather than rendering every internal step as a top-level item.
## Isolation strategy
To ensure stability during this transition, the project uses a "Dual
Implementation" strategy.
This strategy isolates the experimental logic from the stable codebase:
- **Hook Isolation:** `useAgentHarness.ts` provides a dedicated hook for the new
event model, leaving the stable `useGeminiStream` untouched.
- **Logic Isolation:** `HarnessSubagentInvocation.ts` manages subagent execution
specifically for the harness, while `LocalSubagentInvocation.ts` continues to
serve the legacy path.
- **Conditional Forking:** The system switches between these paths based on the
`experimental-agent-harness` configuration flag.
+4
View File
@@ -88,6 +88,10 @@
"label": "Sub-agents (experimental)",
"slug": "docs/core/subagents"
},
{
"label": "Agent harness architecture (experimental)",
"slug": "docs/core/agent-harness-architecture"
},
{
"label": "Remote subagents (experimental)",
"slug": "docs/core/remote-agents"
@@ -6,6 +6,8 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { TestRig } from './test-helper.js';
import * as fs from 'node:fs';
import * as path from 'node:path';
describe('Agent Harness E2E', () => {
let rig: TestRig;
@@ -41,4 +43,44 @@ describe('Agent Harness E2E', () => {
expect(result2).toContain('GeminiUser');
}, 30000);
it('should delegate to codebase_investigator and synthesize results', async () => {
await rig.setup('agent-harness-delegation');
// Create a dummy file for CBI to find
const historyDir = path.join(rig.testDir!, 'packages/core/src');
fs.mkdirSync(historyDir, { recursive: true });
fs.writeFileSync(path.join(historyDir, 'history.ts'), `
/** ChatHistory maintains the message history for the session. */
export class ChatHistory {
private messages: any[] = [];
addMessage(msg: any) { this.messages.push(msg); }
}
`);
const result = await rig.run({
args: ['chat', 'use @codebase_investigator to tell me about how chat history is maintained'],
env: {
...process.env,
GEMINI_ENABLE_AGENT_HARNESS: 'true',
},
});
// Verify synthesis: CBI should have found ChatHistory or history.ts
const output = result.toLowerCase();
expect(output).toMatch(/history|chat/);
// Verify single delegation: CBI should only be called once.
// We check the tool logs for 'codebase_investigator'
const toolLogs = rig.readToolLogs();
const cbiCalls = toolLogs.filter(log => log.toolRequest?.name === 'codebase_investigator');
if (cbiCalls.length !== 1) {
console.log('DEBUG: Full tool logs:', JSON.stringify(toolLogs, null, 2));
if (rig._lastRunStdout) {
console.log('DEBUG: Full stdout length:', rig._lastRunStdout.length);
}
}
expect(cbiCalls.length).toBe(1);
}, 120000);
});
+3 -3
View File
@@ -89,12 +89,12 @@ export function mapToDisplay(
switch (call.status) {
case 'success':
resultDisplay = call.response.resultDisplay;
outputFile = call.response.outputFile;
resultDisplay = call.response?.resultDisplay;
outputFile = call.response?.outputFile;
break;
case 'error':
case 'cancelled':
resultDisplay = call.response.resultDisplay;
resultDisplay = call.response?.resultDisplay;
break;
case 'awaiting_approval':
correlationId = call.correlationId;
@@ -0,0 +1,163 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
import { act } from 'react';
import { renderHookWithProviders } from '../../test-utils/render.js';
import { useAgentHarness } from './useAgentHarness.js';
import {
GeminiEventType as ServerGeminiEventType,
ROOT_SCHEDULER_ID,
} from '@google/gemini-cli-core';
import { StreamingState, MessageType } from '../types.js';
import { makeFakeConfig } from '@google/gemini-cli-core/dist/src/test-utils/config.js';
vi.mock('@google/gemini-cli-core', async (importOriginal) => {
const actual = await importOriginal<typeof import('@google/gemini-cli-core')>();
return {
...actual,
AgentFactory: {
createHarness: vi.fn(),
},
};
});
describe('useAgentHarness', () => {
let mockAddItem: Mock;
let mockConfig: any;
let mockOnCancelSubmit: Mock;
beforeEach(() => {
mockAddItem = vi.fn();
mockConfig = makeFakeConfig();
mockOnCancelSubmit = vi.fn();
mockConfig.getToolRegistry = vi.fn().mockReturnValue({
getTool: vi.fn().mockReturnValue({
displayName: 'TestTool',
createInvocation: vi.fn().mockReturnValue({
getDescription: () => 'Test Tool Description'
})
}),
});
mockConfig.getMessageBus = vi.fn().mockReturnValue({
subscribe: vi.fn().mockReturnValue(vi.fn()),
publish: vi.fn(),
});
vi.clearAllMocks();
});
it('initializes in Idle state', () => {
const { result } = renderHookWithProviders(() =>
useAgentHarness(mockAddItem, mockConfig, mockOnCancelSubmit),
);
expect(result.current.streamingState).toBe(StreamingState.Idle);
expect(result.current.isResponding).toBe(false);
});
it('updates state live during processEvent', async () => {
const { result } = renderHookWithProviders(() =>
useAgentHarness(mockAddItem, mockConfig, mockOnCancelSubmit),
);
// 1. Send content
await act(async () => {
(result.current as any).processEvent({
type: ServerGeminiEventType.Content,
value: 'Hello',
});
});
expect(result.current.streamingContent).toBe('Hello');
expect(result.current.streamingState).toBe(StreamingState.Responding);
// 2. Send thought
await act(async () => {
(result.current as any).processEvent({
type: ServerGeminiEventType.Thought,
value: { subject: 'Thinking' },
});
});
expect(result.current.thought?.subject).toBe('Thinking');
// 3. Send tool request
await act(async () => {
(result.current as any).processEvent({
type: ServerGeminiEventType.ToolCallRequest,
value: { name: 'tool_1', callId: 'c1', args: {}, schedulerId: ROOT_SCHEDULER_ID },
});
});
expect(result.current.toolCalls).toHaveLength(1);
expect(result.current.toolCalls[0].request.name).toBe('tool_1');
});
it('merges subagent activity into active tool calls', async () => {
const { result } = renderHookWithProviders(() =>
useAgentHarness(mockAddItem, mockConfig, mockOnCancelSubmit),
);
// Start a delegation tool
await act(async () => {
(result.current as any).processEvent({
type: ServerGeminiEventType.ToolCallRequest,
value: { name: 'subagent_tool', callId: 'c1', args: {}, schedulerId: ROOT_SCHEDULER_ID },
});
});
// Send subagent activity
await act(async () => {
(result.current as any).processEvent({
type: ServerGeminiEventType.SubagentActivity,
value: {
agentName: 'codebase_investigator',
type: 'THOUGHT',
data: { subject: 'Analyzing logs' },
},
});
});
// Verify the tool box resultDisplay was updated with the thought
expect((result.current.toolCalls[0] as any).response?.resultDisplay).toContain(
'🤖💭 Analyzing logs',
);
// Send another activity
await act(async () => {
result.current.processEvent({
type: ServerGeminiEventType.SubagentActivity,
value: {
agentName: 'codebase_investigator',
type: 'TOOL_CALL_START',
data: { name: 'list_directory' },
},
});
});
expect((result.current.toolCalls[0] as any).response?.resultDisplay).toContain(
'🛠️ Calling TestTool...',
);
});
it('flushes to history on TurnFinished', async () => {
const { result } = renderHookWithProviders(() =>
useAgentHarness(mockAddItem, mockConfig, mockOnCancelSubmit),
);
// Setup some state
await act(async () => {
(result.current as any).processEvent({ type: ServerGeminiEventType.Content, value: 'Done' });
(result.current as any).processEvent({ type: ServerGeminiEventType.TurnFinished });
});
expect(mockAddItem).toHaveBeenCalledWith(expect.objectContaining({
type: MessageType.GEMINI,
text: 'Done'
}));
expect(result.current.streamingContent).toBe(''); // Should be cleared
});
});
+152 -67
View File
@@ -36,6 +36,7 @@ export interface UseAgentHarnessReturn {
streamingContent: string;
toolCalls: TrackedToolCall[];
submitQuery: (query: PartListUnion) => Promise<void>;
processEvent: (event: GeminiEvent) => void;
cancelOngoingRequest: () => void;
reset: () => void;
// Legacy compatibility properties
@@ -72,10 +73,6 @@ export const useAgentHarness = (
const [thought, thoughtRef, setThought] =
useStateAndRef<ThoughtSummary | null>(null);
// Track subagent status and output
const [subagentStatus, setSubagentStatus] = useState<string | null>(null);
const [subagentOutput, setSubagentOutput] = useState<string | null>(null);
// Tools for the CURRENT turn of the main agent
const [toolCalls, setToolCalls] = useState<TrackedToolCall[]>([]);
const toolCallsRef = useRef<TrackedToolCall[]>([]);
@@ -87,9 +84,39 @@ export const useAgentHarness = (
const pushedToolCallIdsRef = useRef<Set<string>>(new Set());
// Listen to the MessageBus for live tool updates (e.g. from subagents or long-running tools)
useEffect(() => {
const bus = config.getMessageBus();
const handler = (event: any) => {
setToolCalls((prev) => {
const next = [...prev];
for (const coreCall of event.toolCalls) {
const index = next.findIndex(
(tc) => tc.request.callId === coreCall.request.callId,
);
if (index !== -1) {
next[index] = {
...next[index],
...coreCall,
} as TrackedToolCall;
}
}
toolCallsRef.current = next;
return next;
});
};
bus.subscribe('tool-calls-update' as any, handler);
return () => {
bus.unsubscribe('tool-calls-update' as any, handler);
};
}, [config]);
const pendingHistoryItems = useMemo(() => {
const items: HistoryItemWithoutId[] = [];
if (thought) {
// Only show the top-level thought if we aren't currently executing tools (delegations)
// Subagent internal thoughts are merged into the tool box via SubagentActivity handler.
if (thought && toolCalls.length === 0) {
items.push({
type: MessageType.THINKING,
thought,
@@ -101,34 +128,18 @@ export const useAgentHarness = (
);
if (unpushed.length > 0) {
items.push(
mapTrackedToolCallsToDisplay(unpushed as TrackedToolCall[], {
mapToDisplayInternal(unpushed as TrackedToolCall[], {
borderBottom: true,
}),
);
}
}
if (streamingContent) {
items.push({ type: MessageType.GEMINI, text: streamingContent });
}
if (subagentStatus) {
items.push({
type: 'tool_group',
tools: [
{
displayName: subagentStatus.split(' is ')[0] || 'Subagent',
status: 'validating',
description: subagentStatus,
resultDisplay: subagentOutput || undefined,
},
],
borderBottom: true,
} as any as HistoryItemWithoutId);
}
return items;
}, [thought, toolCalls, streamingContent, subagentStatus, subagentOutput]);
if (streamingContent) {
items.push({ type: MessageType.GEMINI, text: streamingContent });
}
return items;
}, [thought, toolCalls, streamingContent]);
const abortControllerRef = useRef<AbortController | null>(null);
const reset = useCallback(() => {
@@ -139,7 +150,6 @@ export const useAgentHarness = (
setToolCalls([]);
toolCallsRef.current = [];
pushedToolCallIdsRef.current.clear();
setSubagentStatus(null);
}, [setThought]);
const cancelOngoingRequest = useCallback(() => {
@@ -155,11 +165,12 @@ export const useAgentHarness = (
switch (event.type) {
case ServerGeminiEventType.Content:
setStreamingState(StreamingState.Responding);
setStreamingContent((prev) => {
const next = prev + (event.value || '');
streamingContentRef.current = next;
return next;
});
{
const nextContent =
streamingContentRef.current + (event.value || '');
streamingContentRef.current = nextContent;
setStreamingContent(nextContent);
}
break;
case ServerGeminiEventType.Thought:
@@ -168,6 +179,7 @@ export const useAgentHarness = (
case ServerGeminiEventType.ToolCallRequest:
{
setThought(null);
const tool = config.getToolRegistry().getTool(event.value.name);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const invocation = (tool as any)?.createInvocation?.(
@@ -175,44 +187,44 @@ export const useAgentHarness = (
config.getMessageBus(),
);
// In Harness mode, top-level calls might not have schedulerId set yet.
// We default to ROOT_SCHEDULER_ID to ensure they are visible.
const newCall = {
request: event.value,
request: {
...event.value,
schedulerId: event.value.schedulerId || ROOT_SCHEDULER_ID,
},
status: 'validating',
schedulerId: event.value.schedulerId || ROOT_SCHEDULER_ID,
tool,
invocation,
} as TrackedToolCall;
setToolCalls((prev) => {
const next = [...prev, newCall];
toolCallsRef.current = next;
return next;
});
const nextCalls = [...toolCallsRef.current, newCall];
toolCallsRef.current = nextCalls;
setToolCalls(nextCalls);
}
break;
case ServerGeminiEventType.ToolCallResponse:
{
const response = event.value;
setToolCalls((prev) => {
const next = prev.map((tc) =>
tc.request.callId === response.callId
? ({
...tc,
status: 'success',
result: response,
} as unknown as TrackedToolCall)
: tc,
);
toolCallsRef.current = next;
return next;
});
const nextCalls = toolCallsRef.current.map((tc) =>
tc.request.callId === response.callId
? ({
...tc,
status: 'success',
response: response,
} as unknown as TrackedToolCall)
: tc,
);
toolCallsRef.current = nextCalls;
setToolCalls(nextCalls);
}
break;
case ServerGeminiEventType.TurnFinished:
// MAIN AGENT turn finished. Flush current state to history.
setSubagentStatus(null);
if (thoughtRef.current) {
addItem({
type: MessageType.THINKING,
@@ -227,7 +239,7 @@ export const useAgentHarness = (
);
if (unpushed.length > 0) {
addItem(
mapTrackedToolCallsToDisplay(unpushed as TrackedToolCall[], {
mapToDisplayInternal(unpushed as TrackedToolCall[], {
borderBottom: true,
}),
);
@@ -238,29 +250,67 @@ export const useAgentHarness = (
}
if (streamingContentRef.current) {
addItem({ type: MessageType.GEMINI, text: streamingContentRef.current });
addItem({
type: MessageType.GEMINI,
text: streamingContentRef.current,
});
setStreamingContent('');
streamingContentRef.current = '';
}
setToolCalls([]);
toolCallsRef.current = [];
setToolCalls([]);
break;
case ServerGeminiEventType.SubagentActivity:
{
const activity = event.value;
const name =
activity.agentName.charAt(0).toUpperCase() +
activity.agentName.slice(1);
let matched = false;
if (activity.type === 'TOOL_CALL_START') {
setSubagentStatus(`${name} is calling ${activity.data['name']}...`);
setSubagentOutput((prev) => (prev || '') + `🛠️ Calling ${activity.data['name']}...\n`);
} else if (activity.type === 'THOUGHT') {
setSubagentStatus(`${name} is thinking...`);
} else if (activity.type === 'TOOL_CALL_END') {
// Just a status update, the tool result will come via TOOL_CALL_RESPONSE eventually
const nextCalls = toolCallsRef.current.map((tc) => {
// Try to find the tool box that belongs to this agent.
// Note: We search ALL tool calls, not just 'executing', in case of race conditions.
if (
tc.request.name === activity.agentName ||
(tc.tool?.displayName || tc.request.name) === activity.agentName
) {
matched = true;
const currentCall = tc as any;
let output = currentCall.response?.resultDisplay || '';
if (typeof output !== 'string') output = '';
if (activity.type === 'TOOL_CALL_START') {
const rawName = String(activity.data['name'] || 'a tool');
const tool = config.getToolRegistry().getTool(rawName);
const displayName = tool?.displayName || rawName;
output += `🛠️ Calling ${displayName}...\n`;
} else if (activity.type === 'THOUGHT') {
const subject = String(activity.data['subject'] || 'Thinking');
output += `🤖💭 ${subject}\n`;
}
return {
...tc,
response: {
...(currentCall.response || {}),
resultDisplay: output,
},
} as unknown as TrackedToolCall;
}
return tc;
});
if (matched) {
toolCallsRef.current = nextCalls;
setToolCalls(nextCalls);
} else {
// Fallback: If no tool box matches, show it as a standalone item
if (activity.type === 'THOUGHT') {
addItem({
type: MessageType.GEMINI,
text: `🤖💭 [${activity.agentName}] ${activity.data['subject']}`,
});
}
}
}
break;
@@ -276,6 +326,21 @@ export const useAgentHarness = (
[addItem, config, setThought, thoughtRef],
);
// Listen for nested subagent activity on the MessageBus
useEffect(() => {
const bus = config.getMessageBus();
const handler = (event: any) => {
processEvent({
type: ServerGeminiEventType.SubagentActivity,
value: event.activity,
});
};
bus.subscribe('subagent-activity' as any, handler);
return () => {
bus.unsubscribe('subagent-activity' as any, handler);
};
}, [config, processEvent]);
const submitQuery = useCallback(
async (parts: PartListUnion) => {
reset();
@@ -316,6 +381,7 @@ export const useAgentHarness = (
streamingContent,
toolCalls,
submitQuery,
processEvent,
cancelOngoingRequest,
reset,
initError: null,
@@ -333,3 +399,22 @@ export const useAgentHarness = (
retryStatus: null,
};
};
/**
* Internal mapper to ensure we don't accidentally leak subagent-internal tools
* into the main UI boxes while in Harness Mode.
*/
function mapToDisplayInternal(
calls: TrackedToolCall[],
options: any,
): HistoryItemWithoutId {
// We filter out any tool calls that are NOT part of the root harness level.
// This prevents internal subagent work (like list_directory) from appearing
// as loose tool boxes in the main chat.
const filtered = calls.filter((c) => {
// Only show tools belonging to the main top-level session.
return c.schedulerId === ROOT_SCHEDULER_ID;
});
return mapTrackedToolCallsToDisplay(filtered as any, options);
}
@@ -106,7 +106,7 @@ export class A2AClientManager {
clearCache(): void {
this.clients.clear();
this.agentCards.clear();
debugLogger.debug('[A2AClientManager] Cache cleared.');
debugLogger.debug('[AgentHarness] [A2AClientManager] Cache cleared.');
}
/**
@@ -40,7 +40,7 @@ export async function resolveAuthValue(value: string): Promise<string> {
`Please set it before using this agent.`,
);
}
debugLogger.debug(`[AuthValueResolver] Resolved env var: ${envVar}`);
debugLogger.debug(`[AgentHarness] [AuthValueResolver] Resolved env var: ${envVar}`);
return resolved;
}
@@ -51,7 +51,7 @@ export async function resolveAuthValue(value: string): Promise<string> {
throw new Error('Empty command in auth value. Expected format: !command');
}
debugLogger.debug(`[AuthValueResolver] Executing command for auth value`);
debugLogger.debug(`[AgentHarness] [AuthValueResolver] Executing command for auth value`);
const shellConfig = getShellConfiguration();
try {
+25 -10
View File
@@ -168,7 +168,7 @@ export class MainAgentBehavior implements AgentBehavior {
async initialize(_toolRegistry: ToolRegistry) {
debugLogger.debug(
`[MainAgentBehavior] [${this.name}:${this.agentId}] Initialized`,
`[AgentHarness] [${this.name}:${this.agentId}] Initialized`,
);
}
@@ -341,7 +341,7 @@ export class SubagentBehavior implements AgentBehavior {
constructor(
private readonly config: Config,
private readonly definition: LocalAgentDefinition,
public readonly definition: LocalAgentDefinition,
private readonly inputs?: AgentInputs,
parentPromptId?: string,
) {
@@ -353,7 +353,7 @@ export class SubagentBehavior implements AgentBehavior {
async initialize(toolRegistry: ToolRegistry) {
debugLogger.debug(
`[SubagentBehavior] [${this.name}:${this.agentId}] Initializing tool registry`,
`[AgentHarness] [${this.name}:${this.agentId}] Initializing tool registry`,
);
const parentToolRegistry = this.config.getToolRegistry();
if (this.definition.toolConfig) {
@@ -501,7 +501,7 @@ export class SubagentBehavior implements AgentBehavior {
signal: AbortSignal,
): AsyncGenerator<ServerGeminiStreamEvent, boolean> {
debugLogger.debug(
`[SubagentBehavior] [${this.name}:${this.agentId}] Entering recovery mode. Reason: ${reason}`,
`[AgentHarness] [${this.name}:${this.agentId}] Entering recovery mode. Reason: ${reason}`,
);
const recoveryStartTime = Date.now();
let success = false;
@@ -532,11 +532,26 @@ export class SubagentBehavior implements AgentBehavior {
}
// Check if they called complete_task in the recovery turn
if (turn.pendingToolCalls.length > 0) {
if (
turn.pendingToolCalls.some((c) => c.name === TASK_COMPLETE_TOOL_NAME)
) {
success = true;
const completeCall = turn.pendingToolCalls.find(
(c) => c.name === TASK_COMPLETE_TOOL_NAME,
);
if (completeCall) {
success = true;
// Capture the result in the turn object explicitly
const outputName = this.definition.outputConfig?.outputName || 'result';
const rawFindings =
completeCall.args[outputName] || completeCall.args['result'];
if (rawFindings) {
turn.submittedOutput =
typeof rawFindings === 'object'
? JSON.stringify(rawFindings, null, 2)
: String(rawFindings);
debugLogger.debug(
`[AgentHarness] [${this.name}:${this.agentId}] Captured findings from recovery complete_task. Length: ${turn.submittedOutput.length}`,
);
}
}
} finally {
@@ -571,7 +586,7 @@ export class SubagentBehavior implements AgentBehavior {
default:
explanation = 'Execution was interrupted.';
}
return `${explanation} You have one final chance to complete the task with a short grace period. You MUST call \`${TASK_COMPLETE_TOOL_NAME}\` immediately with your best answer and explain that your investigation was interrupted. Do not call any other tools.`;
return `${explanation} You have one final chance to provide your findings. You MUST call \`${TASK_COMPLETE_TOOL_NAME}\` immediately with your best synthesis and conclusion for the main agent. Do not call any other tools.`;
}
getFinalFailureMessage(
@@ -109,8 +109,8 @@ export const CodebaseInvestigatorAgent = (
},
runConfig: {
maxTimeMinutes: 3,
maxTurns: 10,
maxTimeMinutes: 10,
maxTurns: 50,
},
toolConfig: {
@@ -0,0 +1,376 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
import { HarnessSubagentInvocation } from './harness-invocation.js';
import { makeFakeConfig } from '../test-utils/config.js';
import { AgentFactory } from './agent-factory.js';
import { type Turn } from '../core/turn.js';
import { type Config } from '../config/config.js';
import { type MessageBus } from '../confirmation-bus/message-bus.js';
import { z } from 'zod';
import { type LocalAgentDefinition } from './types.js';
vi.mock('../core/geminiChat.js', () => ({
GeminiChat: vi.fn(),
}));
vi.mock('./agent-factory.js', () => ({
AgentFactory: {
createHarness: vi.fn(),
},
}));
describe('HarnessSubagentInvocation', () => {
let mockConfig: Config;
let mockMessageBus: MessageBus;
let definition: LocalAgentDefinition<z.ZodUnknown>;
beforeEach(() => {
mockConfig = makeFakeConfig();
mockMessageBus = {
publish: vi.fn(),
subscribe: vi.fn(),
} as unknown as MessageBus;
definition = {
kind: 'local',
name: 'test-agent',
displayName: 'Test Agent',
description: 'A test agent',
inputConfig: {
inputSchema: { type: 'object', properties: {}, required: [] },
},
modelConfig: { model: 'test-model' },
runConfig: { maxTurns: 5 },
promptConfig: { systemPrompt: 'Test' },
};
vi.clearAllMocks();
});
it('extracts result from complete_task tool call arguments', async () => {
const invocation = new HarnessSubagentInvocation(
definition,
mockConfig,
{},
mockMessageBus,
);
const mockHarness = {
run: vi.fn().mockReturnValue(
(async function* () {
// No intermediate events
})(),
),
};
(AgentFactory.createHarness as Mock).mockReturnValue(mockHarness);
const mockChat = {
getHistory: vi.fn().mockReturnValue([
{
role: 'model',
parts: [
{
functionCall: {
name: 'complete_task',
args: { result: 'Extracted Finding' },
},
},
],
},
]),
};
const mockTurn = {
getResponseText: vi.fn().mockReturnValue(''), // Text is empty
chat: mockChat,
} as unknown as Turn;
// Simulate the generator returning the final turn
mockHarness.run.mockReturnValue(
(async function* () {
return mockTurn;
})(),
);
const result = await invocation.execute(new AbortController().signal);
expect(result.data?.['result']).toBe('Extracted Finding');
expect((result.llmContent as any)?.[0]).toEqual({ text: 'Extracted Finding' });
expect(result.returnDisplay).toContain('Extracted Finding');
});
it('prefers direct text response over complete_task arguments if available', async () => {
const invocation = new HarnessSubagentInvocation(
definition,
mockConfig,
{},
mockMessageBus,
);
const mockHarness = {
run: vi.fn(),
};
(AgentFactory.createHarness as Mock).mockReturnValue(mockHarness);
const mockChat = {
getHistory: vi.fn().mockReturnValue([
{
role: 'model',
parts: [{ text: 'Textual Result' }],
},
]),
};
const mockTurn = {
getResponseText: vi.fn().mockReturnValue('Textual Result'),
chat: mockChat,
} as unknown as Turn;
mockHarness.run.mockReturnValue(
(async function* () {
return mockTurn;
})(),
);
const result = await invocation.execute(new AbortController().signal);
expect(result.data?.['result']).toBe('Textual Result');
expect((result.llmContent as any)?.[0]).toEqual({ text: 'Textual Result' });
expect(result.returnDisplay).toContain('Textual Result');
});
it('falls back to a default message if no result is found', async () => {
const invocation = new HarnessSubagentInvocation(
definition,
mockConfig,
{},
mockMessageBus,
);
const mockHarness = {
run: vi.fn(),
};
(AgentFactory.createHarness as Mock).mockReturnValue(mockHarness);
const mockChat = {
getHistory: vi.fn().mockReturnValue([]),
};
const mockTurn = {
getResponseText: vi.fn().mockReturnValue(''),
chat: mockChat,
} as unknown as Turn;
mockHarness.run.mockReturnValue(
(async function* () {
return mockTurn;
})(),
);
const result = await invocation.execute(new AbortController().signal);
expect(result.data?.['result']).toBe('Task completed.');
expect(result.returnDisplay).toContain('Task completed.');
});
it('finds the LAST relevant model message if multiple exist', async () => {
const invocation = new HarnessSubagentInvocation(
definition,
mockConfig,
{},
mockMessageBus,
);
const mockHarness = {
run: vi.fn(),
};
(AgentFactory.createHarness as Mock).mockReturnValue(mockHarness);
const mockChat = {
getHistory: vi.fn().mockReturnValue([
{
role: 'model',
parts: [{ text: 'Old Result' }],
},
{
role: 'user',
parts: [{ text: 'Keep going' }],
},
{
role: 'model',
parts: [
{
functionCall: {
name: 'complete_task',
args: { result: 'Newest Result' },
},
},
],
},
]),
};
const mockTurn = {
getResponseText: vi.fn().mockReturnValue(''),
chat: mockChat,
} as unknown as Turn;
mockHarness.run.mockReturnValue(
(async function* () {
return mockTurn;
})(),
);
const result = await invocation.execute(new AbortController().signal);
expect(result.data?.['result']).toBe('Newest Result');
expect(result.returnDisplay).toContain('Newest Result');
});
it('handles model messages with only thoughts and no result-bearing parts', async () => {
const invocation = new HarnessSubagentInvocation(
definition,
mockConfig,
{},
mockMessageBus,
);
const mockHarness = {
run: vi.fn(),
};
(AgentFactory.createHarness as Mock).mockReturnValue(mockHarness);
const mockChat = {
getHistory: vi.fn().mockReturnValue([
{
role: 'model',
parts: [{ thought: true, text: 'Thinking about finishing...' } as any],
},
]),
};
const mockTurn = {
getResponseText: vi.fn().mockReturnValue(''),
chat: mockChat,
} as unknown as Turn;
mockHarness.run.mockReturnValue(
(async function* () {
return mockTurn;
})(),
);
const result = await invocation.execute(new AbortController().signal);
expect(result.data?.['result']).toBe('Task completed.');
expect(result.returnDisplay).toContain('Task completed.');
});
it('extracts result using the custom outputName from outputConfig', async () => {
const customDefinition: LocalAgentDefinition = {
...definition,
outputConfig: {
outputName: 'report',
description: 'A custom report',
schema: { type: 'string' } as any,
},
};
const invocation = new HarnessSubagentInvocation(
customDefinition,
mockConfig,
{},
mockMessageBus,
);
const mockHarness = {
run: vi.fn(),
};
(AgentFactory.createHarness as Mock).mockReturnValue(mockHarness);
const mockChat = {
getHistory: vi.fn().mockReturnValue([
{
role: 'model',
parts: [
{
functionCall: {
name: 'complete_task',
args: { report: 'The custom report content' },
},
},
],
},
]),
};
const mockTurn = {
getResponseText: vi.fn().mockReturnValue(''),
chat: mockChat,
} as unknown as Turn;
mockHarness.run.mockReturnValue(
(async function* () {
return mockTurn;
})(),
);
const result = await invocation.execute(new AbortController().signal);
expect(result.data?.['result']).toBe('The custom report content');
expect(result.returnDisplay).toContain('The custom report content');
});
it('prioritizes complete_task args over whitespace-only text', async () => {
const invocation = new HarnessSubagentInvocation(
definition,
mockConfig,
{},
mockMessageBus,
);
const mockHarness = {
run: vi.fn(),
};
(AgentFactory.createHarness as Mock).mockReturnValue(mockHarness);
const mockChat = {
getHistory: vi.fn().mockReturnValue([
{
role: 'model',
parts: [
{ text: ' \n ' },
{
functionCall: {
name: 'complete_task',
args: { result: 'Actual Result' },
},
},
],
},
]),
};
const mockTurn = {
getResponseText: vi.fn().mockReturnValue(' \n '),
chat: mockChat,
} as unknown as Turn;
mockHarness.run.mockReturnValue(
(async function* () {
return mockTurn;
})(),
);
const result = await invocation.execute(new AbortController().signal);
expect(result.data?.['result']).toBe('Actual Result');
expect(result.returnDisplay).toContain('Actual Result');
});
});
@@ -0,0 +1,277 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { Config } from '../config/config.js';
import type { AnsiOutput } from '../utils/terminalSerializer.js';
import { BaseToolInvocation, type ToolResult } from '../tools/tools.js';
import { ToolErrorType } from '../tools/tool-error.js';
import { debugLogger } from '../utils/debugLogger.js';
import type { LocalAgentDefinition, AgentInputs } from './types.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import { AgentFactory } from './agent-factory.js';
import { type Turn, GeminiEventType } from '../core/turn.js';
import { promptIdContext } from '../utils/promptIdContext.js';
const INPUT_PREVIEW_MAX_LENGTH = 50;
const DESCRIPTION_MAX_LENGTH = 200;
/**
* A specialized invocation for running subagents within the AgentHarness.
* COMPLETELY FORKED from LocalSubagentInvocation to ensure isolated logic.
*/
export class HarnessSubagentInvocation extends BaseToolInvocation<
AgentInputs,
ToolResult
> {
constructor(
private readonly definition: LocalAgentDefinition,
private readonly config: Config,
params: AgentInputs,
messageBus: MessageBus,
_toolName?: string,
_toolDisplayName?: string,
) {
super(
params,
messageBus,
_toolName ?? definition.name,
_toolDisplayName ?? definition.displayName,
);
}
getDescription(): string {
const inputSummary = Object.entries(this.params)
.map(
([key, value]) =>
`${key}: ${String(value).slice(0, INPUT_PREVIEW_MAX_LENGTH)}`,
)
.join(', ');
return `Running harness subagent '${this.definition.name}' with inputs: { ${inputSummary} }`.slice(
0,
DESCRIPTION_MAX_LENGTH,
);
}
async execute(
signal: AbortSignal,
updateOutput?: (output: string | AnsiOutput) => void,
): Promise<ToolResult> {
try {
if (updateOutput) {
updateOutput(`Subagent ${this.definition.name} starting (Harness Mode)...
`);
}
const harness = AgentFactory.createHarness(this.config, this.definition, {
inputs: this.params,
parentPromptId: promptIdContext.getStore(),
});
const initialRequest = [{ text: 'Start' }];
const stream = harness.run(
initialRequest,
signal,
this.definition.runConfig?.maxTurns,
);
let turn: Turn | undefined;
let lastThought = '';
while (true) {
const { value, done } = await stream.next();
if (done) {
turn = value;
break;
}
const event = value;
if (updateOutput) {
if (event.type === GeminiEventType.Thought && 'value' in event) {
lastThought = event.value.subject;
updateOutput(`🤖💭 ${lastThought}\n`);
// Also publish to message bus so UI hooks can see it regardless of where they listen
this.messageBus.publish({
type: 'subagent-activity',
activity: {
agentName: this.definition.name,
type: 'THOUGHT',
data: { subject: lastThought },
},
} as any);
} else if (
event.type === GeminiEventType.SubagentActivity &&
'value' in event
) {
if (event.value.type === 'TOOL_CALL_START') {
const toolName = String(event.value.data['name'] || 'a tool');
updateOutput(`🛠️ Calling ${toolName}...\n`);
}
// Forward the core activity to the global bus
this.messageBus.publish({
type: 'subagent-activity',
activity: event.value,
} as any);
}
}
}
if (!turn) {
throw new Error('Agent failed to return a valid turn.');
}
// 1. Initialize result with the explicit submitted output if available
let finalResultRaw: any = turn.submittedOutput;
let finalResultString: string | undefined;
// 2. Fallback: If no explicit output, try textual response
if (finalResultRaw === undefined) {
const output = turn.getResponseText();
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Initial response text: "${output}"`,
);
if (output.trim()) {
finalResultRaw = output;
}
}
// 3. Fallback: If still no result, extract from 'complete_task' tool call arguments (Directly from the turn)
if (finalResultRaw === undefined) {
const completeCall = turn.pendingToolCalls?.find(
(c) => c.name === 'complete_task',
);
if (completeCall) {
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Found 'complete_task' call in pending tool calls.`,
);
const outputName =
this.definition.outputConfig?.outputName || 'result';
finalResultRaw =
completeCall.args[outputName] || completeCall.args['result'];
if (finalResultRaw !== undefined) {
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Extracted raw result from complete_task args (${outputName}).`,
);
}
}
}
// 4. Fallback: If no result yet, look for any definitive findings in the history
if (finalResultRaw === undefined) {
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] No direct result found, checking history...`,
);
const history = turn.chat.getHistory();
// Find the last model message that has either non-thought text or a complete_task call
const lastMsgWithResult = history.findLast(
(m) =>
m.role === 'model' &&
m.parts &&
(m.parts.some(
(p) =>
!('thought' in p && p.thought) && 'text' in p && p.text?.trim(),
) ||
m.parts.some(
(p) =>
'functionCall' in p &&
p.functionCall &&
p.functionCall.name === 'complete_task',
)),
);
if (lastMsgWithResult?.parts) {
// Check for text part first (likely injected by Harness)
const textPart = lastMsgWithResult.parts.find(
(p) =>
!('thought' in p && p.thought) && 'text' in p && p.text?.trim(),
);
if (textPart && 'text' in textPart && textPart.text) {
finalResultRaw = textPart.text;
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Extracted result from history text part.`,
);
} else {
// Check for complete_task call in history (what the tests use)
const callPart = lastMsgWithResult.parts.find(
(p) =>
'functionCall' in p &&
p.functionCall?.name === 'complete_task',
);
if (
callPart &&
'functionCall' in callPart &&
callPart.functionCall
) {
const outputName =
this.definition.outputConfig?.outputName || 'result';
finalResultRaw =
(callPart.functionCall.args as any)?.[outputName] ||
(callPart.functionCall.args as any)?.['result'];
if (finalResultRaw !== undefined) {
debugLogger.debug(
`[AgentHarness] [Invocation:${this.definition.name}] Extracted result from history function call.`,
);
}
}
}
}
}
finalResultString = typeof finalResultRaw === 'object'
? JSON.stringify(finalResultRaw, null, 2)
: String(finalResultRaw ?? 'Task completed.');
const displayContent = `
Subagent ${this.definition.name} Finished (Harness Mode)
Result:
${finalResultString}
`;
if (updateOutput) {
updateOutput(displayContent);
}
const outputName = this.definition.outputConfig?.outputName || 'result';
// Parse as JSON if it's a string that looks like an object, to satisfy schema requirements
let finalResultData = finalResultRaw ?? 'Task completed.';
if (typeof finalResultData === 'string' && finalResultData.trim().startsWith('{')) {
try {
finalResultData = JSON.parse(finalResultData);
debugLogger.debug(`[AgentHarness] [Invocation:${this.definition.name}] Parsed string result into JSON object.`);
} catch (e) {
// Not valid JSON, keep as string
}
}
const outputName = this.definition.outputConfig?.outputName || 'result';
return {
llmContent: [{ text: finalResultString }],
returnDisplay: displayContent,
data: { [outputName]: finalResultData },
};
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
return {
llmContent: [],
returnDisplay: `Subagent Failed: ${this.definition.name}
Error: ${errorMessage}`,
error: {
message: errorMessage,
type: ToolErrorType.EXECUTION_FAILED,
},
};
}
}
}
+195 -61
View File
@@ -23,6 +23,7 @@ import {
DEFAULT_MAX_TURNS,
DEFAULT_MAX_TIME_MINUTES,
} from './types.js';
import { SubagentBehavior } from './behavior.js';
import { LoopDetectionService } from '../services/loopDetectionService.js';
import { ChatCompressionService } from '../services/chatCompressionService.js';
import { ToolOutputMaskingService } from '../services/toolOutputMaskingService.js';
@@ -34,6 +35,7 @@ import { scheduleAgentTools } from './agent-scheduler.js';
import {
type ToolCallRequestInfo,
type ToolCallResponseInfo,
ROOT_SCHEDULER_ID,
} from '../scheduler/types.js';
import { promptIdContext } from '../utils/promptIdContext.js';
import { logAgentStart, logAgentFinish } from '../telemetry/loggers.js';
@@ -128,13 +130,13 @@ export class AgentHarness {
maxTurns?: number,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
const startTime = Date.now();
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Starting unified ReAct loop`,
);
const maxTurnsLimit = maxTurns ?? DEFAULT_MAX_TURNS;
const maxTimeMinutes = DEFAULT_MAX_TIME_MINUTES;
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Starting unified ReAct loop. maxTurns: ${maxTurnsLimit}, maxTime: ${maxTimeMinutes}m`,
);
const deadlineTimer = new DeadlineTimer(
maxTimeMinutes * 60 * 1000,
'Agent timed out.',
@@ -163,7 +165,7 @@ export class AgentHarness {
let turn = new Turn(this.chat!, this.behavior.agentId);
let currentRequest = await this.behavior.transformRequest(request);
let terminateReason = AgentTerminateMode.GOAL;
let terminateReason = AgentTerminateMode.ABORTED;
try {
while (this.turnCounter < maxTurnsLimit) {
@@ -261,13 +263,28 @@ export class AgentHarness {
cumulativeResponse += event.value;
}
if (event.type === GeminiEventType.ToolCallRequest) {
const tool = this.toolRegistry.getTool(event.value.name);
if (tool instanceof SubagentTool) {
// Subagent activity reporting
if (this.behavior.name !== 'main') {
const displayName =
(this.behavior as any).definition?.displayName ||
this.behavior.name;
if (event.type === GeminiEventType.Thought) {
yield {
type: GeminiEventType.SubagentActivity,
value: {
agentName: this.behavior.name,
agentName: displayName,
type: 'THOUGHT',
data: { subject: event.value.subject },
},
};
}
if (event.type === GeminiEventType.ToolCallRequest) {
yield {
type: GeminiEventType.SubagentActivity,
value: {
agentName: displayName,
type: 'TOOL_CALL_START',
data: { name: event.value.name, args: event.value.args },
},
@@ -297,9 +314,7 @@ export class AgentHarness {
if (afterResult.shouldContinue) {
currentRequest = [{ text: afterResult.reason || 'Continue' }];
this.turnCounter++;
if (this.behavior.name === 'main') {
yield { type: GeminiEventType.TurnFinished };
}
turn = new Turn(this.chat!, this.behavior.agentId);
continue;
}
@@ -310,58 +325,156 @@ export class AgentHarness {
break;
}
// 9. Handle tool calls or termination
if (turn.pendingToolCalls.length > 0) {
const toolResults = await this.executeTools(
turn.pendingToolCalls,
combinedSignal,
onWaitingForConfirmation,
);
// 9. Handle tool calls or termination
if (turn.pendingToolCalls.length > 0) {
const toolResults = await this.executeTools(
turn.pendingToolCalls,
combinedSignal,
onWaitingForConfirmation,
);
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Received ${toolResults.length} tool results. Names: ${toolResults.map((tr) => tr.name).join(', ')}`,
);
// Yield responses so UI knows they are done
for (const result of toolResults) {
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Tool ${result.name} finished. Display length: ${String(result.result?.resultDisplay).length}`,
);
if (result.result) {
yield {
type: GeminiEventType.ToolCallResponse,
value: result.result,
};
// Subagent activity reporting
if (this.behavior.name !== 'main') {
yield {
type: GeminiEventType.SubagentActivity,
value: {
agentName: this.behavior.name,
type: 'TOOL_CALL_END',
data: {
name: result.name,
output: result.result.resultDisplay,
},
},
};
}
const tool = this.toolRegistry.getTool(result.name);
if (tool instanceof SubagentTool) {
yield {
type: GeminiEventType.SubagentActivity,
value: {
agentName: this.behavior.name,
type: 'TOOL_CALL_END',
data: {
name: result.name,
output: result.result.resultDisplay,
},
},
};
}
}
}
const goalReached = this.behavior.isGoalReached(toolResults);
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] isGoalReached check: ${goalReached}`,
);
if (goalReached) {
terminateReason = AgentTerminateMode.GOAL;
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Goal reached. Processing findings for ${toolResults.length} tool results.`,
);
// Extract results from the 'complete_task' tool call arguments
for (const r of toolResults) {
const completeCall = turn.pendingToolCalls.find(
(c) => c.name === TASK_COMPLETE_TOOL_NAME,
);
let findingsText: string | undefined;
if (r.name === TASK_COMPLETE_TOOL_NAME && completeCall) {
const outputName =
(this.behavior as SubagentBehavior).definition?.outputConfig
?.outputName || 'result';
const rawFindings =
completeCall.args[outputName] || completeCall.args['result'];
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Extracting from complete_task args (${outputName}). Found: ${!!rawFindings}`,
);
if (rawFindings !== undefined) {
// CAPTURE RAW DATA: Don't stringify if it's an object/array,
// we need to preserve structure for the parent model.
turn.submittedOutput = rawFindings as any;
findingsText =
typeof rawFindings === 'object'
? JSON.stringify(rawFindings, null, 2)
: String(rawFindings);
}
} else {
const findings =
(r.result?.data as any)?.result || r.result?.resultDisplay;
if (findings !== undefined) {
findingsText = String(findings);
// Also capture as raw if not already set
if (turn.submittedOutput === undefined) {
turn.submittedOutput = findings;
}
}
}
if (findingsText) {
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Captured findings text. Length: ${findingsText.length}`,
);
if (this.chat) {
// Ensure the chat session records the final text result so future turns or getResponseText() can see it
this.chat.addHistory({
role: 'model',
parts: [{ text: findingsText }],
});
}
}
}
return turn;
}
currentRequest = toolResults.map((r) => {
// For subagents, we want to return the raw result to the LLM, not the human-friendly display.
const tool = this.toolRegistry.getTool(r.name);
if (tool instanceof SubagentTool) {
const outputName =
(tool as any).definition?.outputConfig?.outputName || 'result';
const findings = (r.result?.data as any)?.[outputName] || (r.result?.data as any)?.['result'];
// Yield responses so UI knows they are done
for (const result of toolResults) {
if (result.result) {
yield {
type: GeminiEventType.ToolCallResponse,
value: result.result,
};
debugLogger.debug(`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Subagent tool ${r.name} findings type: ${typeof findings}. Using outputName: ${outputName}`);
const tool = this.toolRegistry.getTool(result.name);
if (tool instanceof SubagentTool) {
yield {
type: GeminiEventType.SubagentActivity,
value: {
agentName: this.behavior.name,
type: 'TOOL_CALL_END',
data: {
name: result.name,
output: result.result.resultDisplay,
},
if (findings !== undefined && 'functionResponse' in r.part && r.part.functionResponse) {
const responsePayload = { [outputName]: findings };
debugLogger.debug(`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Sending tool response keys: ${Object.keys(responsePayload).join(', ')}`);
return {
functionResponse: {
...r.part.functionResponse,
response: responsePayload,
},
};
}
}
}
if (this.behavior.isGoalReached(toolResults)) {
terminateReason = AgentTerminateMode.GOAL;
// If it's a subagent, find the complete_task call and extract the result string
const goalCall = toolResults.find((r) => r.name === 'complete_task');
if (goalCall?.result?.resultDisplay && this.chat) {
// Ensure the chat session records the final text result so future turns or getResponseText() can see it
this.chat.addHistory({
role: 'model',
parts: [{ text: String(goalCall.result.resultDisplay) }],
});
}
return turn;
}
currentRequest = toolResults.map((r) => {
// Ensure the LLM "sees" the rich result display if it's available.
// We use the resultDisplay text as the definitive function response.
// Fallback for other tools: Ensure the LLM "sees" the rich result display if it's available.
if (
r.result?.resultDisplay &&
'functionResponse' in r.part &&
@@ -377,6 +490,14 @@ export class AgentHarness {
return r.part;
});
this.turnCounter++;
if (this.turnCounter >= maxTurnsLimit) {
terminateReason = AgentTerminateMode.MAX_TURNS;
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Reached turn limit (${maxTurnsLimit}).`,
);
break;
}
turn = new Turn(this.chat!, this.behavior.agentId);
// Only yield TurnFinished if we are the main agent.
// Nested subagent turns should be internal and not trigger UI flushes in the parent.
@@ -392,6 +513,14 @@ export class AgentHarness {
if (nextParts) {
currentRequest = nextParts;
this.turnCounter++;
if (this.turnCounter >= maxTurnsLimit) {
terminateReason = AgentTerminateMode.MAX_TURNS;
debugLogger.debug(
`[AgentHarness] [${this.behavior.name}:${this.behavior.agentId}] Reached turn limit (${maxTurnsLimit}) during continuation.`,
);
break;
}
turn = new Turn(this.chat!, this.behavior.agentId);
if (this.behavior.name === 'main') {
yield { type: GeminiEventType.TurnFinished };
}
@@ -496,7 +625,9 @@ export class AgentHarness {
calls: ToolCallRequestInfo[],
signal: AbortSignal,
onWaitingForConfirmation?: (waiting: boolean) => void,
): Promise<Array<{ name: string; part: Part; result: ToolCallResponseInfo }>> {
): Promise<
Array<{ name: string; part: Part; result: ToolCallResponseInfo }>
> {
const taskCompleteCalls = calls.filter(
(c) => c.name === TASK_COMPLETE_TOOL_NAME,
);
@@ -512,13 +643,16 @@ export class AgentHarness {
}> = [];
if (otherCalls.length > 0) {
const schedulerId =
this.behavior.name === 'main'
? ROOT_SCHEDULER_ID
: this.behavior.agentId;
completedCalls = await scheduleAgentTools(this.config, otherCalls, {
schedulerId: this.behavior.agentId,
schedulerId,
toolRegistry: this.toolRegistry,
signal,
onWaitingForConfirmation,
// Only broadcast to global UI if we are the main top-level agent
messageBus: this.behavior.name === 'main' ? undefined : null,
});
}
+5 -71
View File
@@ -7,10 +7,7 @@
import type { Config } from '../config/config.js';
import { LocalAgentExecutor } from './local-executor.js';
import type { AnsiOutput } from '../utils/terminalSerializer.js';
import {
BaseToolInvocation,
type ToolResult,
} from '../tools/tools.js';
import { BaseToolInvocation, type ToolResult } from '../tools/tools.js';
import { ToolErrorType } from '../tools/tool-error.js';
import type {
LocalAgentDefinition,
@@ -18,9 +15,6 @@ import type {
SubagentActivityEvent,
} from './types.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import { AgentFactory } from './agent-factory.js';
import type { Turn } from '../core/turn.js';
import { promptIdContext } from '../utils/promptIdContext.js';
const INPUT_PREVIEW_MAX_LENGTH = 50;
const DESCRIPTION_MAX_LENGTH = 200;
@@ -89,10 +83,6 @@ export class LocalSubagentInvocation extends BaseToolInvocation<
signal: AbortSignal,
updateOutput?: (output: string | AnsiOutput) => void,
): Promise<ToolResult> {
if (this.config.isAgentHarnessEnabled()) {
return this.executeWithHarness(signal, updateOutput);
}
try {
if (updateOutput) {
updateOutput('Subagent starting...\n');
@@ -136,6 +126,10 @@ ${output.result}
return {
llmContent: [{ text: resultContent }],
returnDisplay: displayContent,
data: {
result: output.result,
terminate_reason: output.terminate_reason,
},
};
} catch (error) {
const errorMessage =
@@ -151,64 +145,4 @@ ${output.result}
};
}
}
private async executeWithHarness(
signal: AbortSignal,
updateOutput?: (output: string | AnsiOutput) => void,
): Promise<ToolResult> {
try {
if (updateOutput) {
updateOutput('Subagent starting (Harness Mode)...\n');
}
const harness = AgentFactory.createHarness(this.config, this.definition, {
inputs: this.params,
parentPromptId: promptIdContext.getStore(),
});
const initialRequest = [{ text: 'Start' }];
const stream = harness.run(initialRequest, signal);
let turn: Turn | undefined;
while (true) {
const { value, done } = await stream.next();
if (done) {
turn = value;
break;
}
// For the subagent box, we don't want to stream internal thoughts or tool calls
// to the persistent history. We just wait for the final result.
}
if (!turn) {
throw new Error('Agent failed to return a valid turn.');
}
const output = turn.getResponseText();
const displayContent = `
Subagent ${this.definition.name} Finished (Harness Mode)
Result:
${output}
`;
return {
llmContent: [{ text: output }],
returnDisplay: displayContent,
};
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
return {
llmContent: `Subagent '${this.definition.name}' failed (Harness Mode). Error: ${errorMessage}`,
returnDisplay: `Subagent Failed: ${this.definition.name}\nError: ${errorMessage}`,
error: {
message: errorMessage,
type: ToolErrorType.EXECUTION_FAILED,
},
};
}
}
}
@@ -13,6 +13,7 @@ import {
import type { Config } from '../config/config.js';
import type { AgentDefinition, AgentInputs } from './types.js';
import { LocalSubagentInvocation } from './local-invocation.js';
import { HarnessSubagentInvocation } from './harness-invocation.js';
import { RemoteAgentInvocation } from './remote-invocation.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js';
@@ -79,6 +80,17 @@ export class SubagentToolWrapper extends BaseDeclarativeTool<
);
}
if (this.config.isAgentHarnessEnabled()) {
return new HarnessSubagentInvocation(
definition,
this.config,
params,
effectiveMessageBus,
_toolName,
_toolDisplayName,
);
}
return new LocalSubagentInvocation(
definition,
this.config,
+1 -1
View File
@@ -44,7 +44,7 @@ export const DEFAULT_QUERY_STRING = 'Get Started!';
/**
* The default maximum number of conversational turns for an agent.
*/
export const DEFAULT_MAX_TURNS = 15;
export const DEFAULT_MAX_TURNS = 40;
/**
* The default maximum execution time for an agent in minutes.
+1
View File
@@ -256,6 +256,7 @@ export class Turn {
private debugResponses: GenerateContentResponse[] = [];
private pendingCitations = new Set<string>();
finishReason: FinishReason | undefined = undefined;
submittedOutput: string | undefined;
constructor(
readonly chat: GeminiChat,