fix(core): ensure subagents have access to tools and skills in new harness

This commit is contained in:
mkorwel
2026-02-11 21:53:12 -06:00
parent c989087ba5
commit 67819bf5ae
4 changed files with 313 additions and 71 deletions
+25 -6
View File
@@ -39,6 +39,8 @@ import { logRecoveryAttempt } from '../telemetry/loggers.js';
import { RecoveryAttemptEvent } from '../telemetry/types.js';
import { DeadlineTimer } from '../utils/deadlineTimer.js';
import type { ToolRegistry } from '../tools/tool-registry.js';
const TASK_COMPLETE_TOOL_NAME = 'complete_task';
const GRACE_PERIOD_MS = 60 * 1000;
@@ -53,9 +55,10 @@ export interface AgentBehavior {
readonly name: string;
/** Initializes any state needed for the agent. */
initialize(): Promise<void>;
initialize(toolRegistry: ToolRegistry): Promise<void>;
/** Returns the system instruction for the chat. */
getSystemInstruction(): Promise<string | undefined>;
/** Returns the initial chat history. */
@@ -75,9 +78,7 @@ export interface AgentBehavior {
/**
* Fires the "Before Agent" hooks if applicable.
*/
fireBeforeAgent(
request: Part[],
): Promise<{
fireBeforeAgent(request: Part[]): Promise<{
stop?: boolean;
reason?: string;
systemMessage?: string;
@@ -157,7 +158,7 @@ export class MainAgentBehavior implements AgentBehavior {
this.agentId = `${parentPrefix}main-${randomIdPart}`;
}
async initialize() {}
async initialize(_toolRegistry: ToolRegistry) {}
async getSystemInstruction() {
const systemMemory = this.config.getUserMemory();
@@ -338,7 +339,25 @@ export class SubagentBehavior implements AgentBehavior {
this.agentId = `${parentPrefix}${this.name}-${randomIdPart}`;
}
async initialize() {}
async initialize(toolRegistry: ToolRegistry) {
const parentToolRegistry = this.config.getToolRegistry();
if (this.definition.toolConfig) {
for (const toolRef of this.definition.toolConfig.tools) {
if (typeof toolRef === 'string') {
const tool = parentToolRegistry.getTool(toolRef);
if (tool) toolRegistry.registerTool(tool);
} else if (typeof toolRef === 'object' && 'build' in toolRef) {
toolRegistry.registerTool(toolRef);
}
}
} else {
for (const toolName of parentToolRegistry.getAllToolNames()) {
const tool = parentToolRegistry.getTool(toolName);
if (tool) toolRegistry.registerTool(tool);
}
}
toolRegistry.sortTools();
}
async getSystemInstruction() {
const augmentedInputs = {
+183 -6
View File
@@ -57,12 +57,10 @@ describe('AgentHarness', () => {
mockConfig.getIdeMode = vi.fn().mockReturnValue(false);
mockConfig.getBaseLlmClient = vi.fn().mockReturnValue({});
mockConfig.getModelRouterService = vi.fn().mockReturnValue({
route: vi
.fn()
.mockResolvedValue({
model: 'gemini-test-model',
metadata: { source: 'test' },
}),
route: vi.fn().mockResolvedValue({
model: 'gemini-test-model',
metadata: { source: 'test' },
}),
});
vi.clearAllMocks();
@@ -177,6 +175,185 @@ describe('AgentHarness', () => {
expect.objectContaining({ terminate_reason: AgentTerminateMode.GOAL }),
);
});
it('handles multiple turns and model routing', async () => {
const definition: LocalAgentDefinition<z.ZodUnknown> = {
kind: 'local',
name: 'multi-turn-agent',
description: 'Testing multiple turns',
inputConfig: {
inputSchema: { type: 'object', properties: {}, required: [] },
},
modelConfig: { model: 'initial-model' },
runConfig: { maxTurns: 5 },
promptConfig: { systemPrompt: 'Test' },
};
const behavior = new SubagentBehavior(mockConfig, definition);
const harness = new AgentHarness({ config: mockConfig, behavior });
const mockChat = {
sendMessageStream: vi.fn(),
setTools: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
addHistory: vi.fn(),
setSystemInstruction: vi.fn(),
maybeIncludeSchemaDepthContext: vi.fn(),
getLastPromptTokenCount: vi.fn().mockReturnValue(0),
} as unknown as GeminiChat;
(GeminiChat as unknown as Mock).mockReturnValue(mockChat);
// Turn 1: Model calls a tool
(mockChat.sendMessageStream as Mock).mockResolvedValueOnce(
(async function* () {
yield {
type: StreamEventType.CHUNK,
value: {
candidates: [
{
content: { parts: [{ text: 'Thinking...' }] },
finishReason: 'STOP',
},
],
functionCalls: [{ name: 'tool_1', args: {}, id: 'c1' }],
},
};
})(),
);
// Turn 2: Model finishes with complete_task
(mockChat.sendMessageStream as Mock).mockResolvedValueOnce(
(async function* () {
yield {
type: StreamEventType.CHUNK,
value: {
candidates: [
{
content: { parts: [{ text: 'Done' }] },
finishReason: 'STOP',
},
],
functionCalls: [
{
name: 'complete_task',
args: { result: 'Success' },
id: 'c2',
},
],
},
};
})(),
);
(scheduleAgentTools as unknown as Mock).mockResolvedValue([
{
request: { name: 'tool_1', callId: 'c1' },
status: 'success',
response: {
responseParts: [
{ functionResponse: { name: 'tool_1', response: {}, id: 'c1' } },
],
},
},
]);
const run = harness.run(
[{ text: 'Start' }],
new AbortController().signal,
);
while (true) {
const { done } = await run.next();
if (done) break;
}
// Should have called LLM twice
expect(mockChat.sendMessageStream).toHaveBeenCalledTimes(2);
expect(mockConfig.getModelRouterService().route).toHaveBeenCalled();
});
it('attempts recovery when max turns is reached', async () => {
const definition: LocalAgentDefinition<z.ZodUnknown> = {
kind: 'local',
name: 'unproductive-agent',
description: 'Reaches max turns',
inputConfig: {
inputSchema: { type: 'object', properties: {}, required: [] },
},
modelConfig: { model: 'test' },
runConfig: { maxTurns: 1 },
promptConfig: { systemPrompt: 'Test' },
};
const behavior = new SubagentBehavior(mockConfig, definition);
const harness = new AgentHarness({ config: mockConfig, behavior });
const mockChat = {
sendMessageStream: vi.fn(),
setTools: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
addHistory: vi.fn(),
setSystemInstruction: vi.fn(),
maybeIncludeSchemaDepthContext: vi.fn(),
getLastPromptTokenCount: vi.fn().mockReturnValue(0),
} as unknown as GeminiChat;
(GeminiChat as unknown as Mock).mockReturnValue(mockChat);
// Turn 1: Model does nothing (just content) -> reaches limit
(mockChat.sendMessageStream as Mock).mockResolvedValueOnce(
(async function* () {
yield {
type: StreamEventType.CHUNK,
value: {
candidates: [
{
content: { parts: [{ text: 'Thinking...' }] },
finishReason: 'STOP',
},
],
},
};
})(),
);
// Turn 2 (Recovery): Model yields complete_task
(mockChat.sendMessageStream as Mock).mockResolvedValueOnce(
(async function* () {
yield {
type: StreamEventType.CHUNK,
value: {
candidates: [
{
content: { parts: [{ text: 'Final Answer' }] },
finishReason: 'STOP',
},
],
functionCalls: [
{
name: 'complete_task',
args: { result: 'Recovered' },
id: 'rec',
},
],
},
};
})(),
);
const run = harness.run(
[{ text: 'Start' }],
new AbortController().signal,
);
while (true) {
const { done } = await run.next();
if (done) break;
}
// Expect goal to be reached via recovery
expect(vi.mocked(logAgentFinish)).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({ terminate_reason: AgentTerminateMode.GOAL }),
);
});
});
describe('MainAgentBehavior', () => {
+102 -59
View File
@@ -4,10 +4,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
import {
type Part,
type FunctionDeclaration,
} from '@google/genai';
import { type Part, type FunctionDeclaration } from '@google/genai';
import { type Config } from '../config/config.js';
import { GeminiChat } from '../core/geminiChat.js';
import {
@@ -31,16 +28,11 @@ import { ToolRegistry } from '../tools/tool-registry.js';
import { scheduleAgentTools } from './agent-scheduler.js';
import { type ToolCallRequestInfo } from '../scheduler/types.js';
import { promptIdContext } from '../utils/promptIdContext.js';
import {
logAgentStart,
logAgentFinish,
} from '../telemetry/loggers.js';
import {
AgentStartEvent,
AgentFinishEvent,
} from '../telemetry/types.js';
import { logAgentStart, logAgentFinish } from '../telemetry/loggers.js';
import { AgentStartEvent, AgentFinishEvent } from '../telemetry/types.js';
import { DeadlineTimer } from '../utils/deadlineTimer.js';
import { type AgentBehavior } from './behavior.js';
import { debugLogger } from '../utils/debugLogger.js';
const TASK_COMPLETE_TOOL_NAME = 'complete_task';
@@ -58,7 +50,7 @@ export interface AgentHarnessOptions {
/**
* A unified harness for executing agents (both main CLI and subagents).
* Consolidates ReAct loop logic, tool scheduling, and state management.
*
*
* Uses an AgentBehavior plugin to handle specific personality differences.
*/
export class AgentHarness {
@@ -91,7 +83,7 @@ export class AgentHarness {
* Initializes the harness, creating the underlying chat object.
*/
async initialize(): Promise<void> {
await this.behavior.initialize();
await this.behavior.initialize(this.toolRegistry);
this.chat = await this.createChat();
}
@@ -123,6 +115,9 @@ export class AgentHarness {
maxTurns?: number,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
const startTime = Date.now();
debugLogger.debug(
`[AgentHarness] Starting unified ReAct loop for agent: ${this.behavior.name}`,
);
const maxTurnsLimit = maxTurns ?? DEFAULT_MAX_TURNS;
const maxTimeMinutes = DEFAULT_MAX_TIME_MINUTES;
@@ -171,31 +166,47 @@ export class AgentHarness {
}
// 1. Hook: Before Agent
const beforeResult = await this.behavior.fireBeforeAgent(currentRequest);
const beforeResult =
await this.behavior.fireBeforeAgent(currentRequest);
if (beforeResult.stop) {
terminateReason = AgentTerminateMode.ABORTED;
terminateReason = AgentTerminateMode.ABORTED;
if (beforeResult.systemMessage) {
yield { type: GeminiEventType.Error, value: { error: { message: beforeResult.systemMessage } } };
yield {
type: GeminiEventType.Error,
value: { error: { message: beforeResult.systemMessage } },
};
}
break;
}
if (beforeResult.additionalContext) {
currentRequest.push({ text: `<hook_context>${beforeResult.additionalContext}</hook_context>` });
currentRequest.push({
text: `<hook_context>${beforeResult.additionalContext}</hook_context>`,
});
}
// 2. Sync Environment (IDE Context etc)
const envSync = await this.behavior.syncEnvironment(this.chat!.getHistory());
const envSync = await this.behavior.syncEnvironment(
this.chat!.getHistory(),
);
if (envSync.additionalParts) {
currentRequest.push(...envSync.additionalParts);
currentRequest.push(...envSync.additionalParts);
}
// 3. Compression
const compressionResult = await this.tryCompressChat(promptId);
if (compressionResult.compressionStatus === CompressionStatus.COMPRESSED) {
yield { type: GeminiEventType.ChatCompressed, value: compressionResult };
if (
compressionResult.compressionStatus === CompressionStatus.COMPRESSED
) {
yield {
type: GeminiEventType.ChatCompressed,
value: compressionResult,
};
}
await this.toolOutputMaskingService.mask(this.chat!.getHistory(), this.config);
await this.toolOutputMaskingService.mask(
this.chat!.getHistory(),
this.config,
);
// 4. Loop Detection
if (await this.loopDetector.turnStarted(combinedSignal)) {
@@ -205,14 +216,19 @@ export class AgentHarness {
}
// 5. Model Selection/Routing
const modelToUse = await this.selectModel(currentRequest, combinedSignal);
const modelToUse = await this.selectModel(
currentRequest,
combinedSignal,
);
if (!this.currentSequenceModel) {
yield { type: GeminiEventType.ModelInfo, value: modelToUse };
this.currentSequenceModel = modelToUse;
}
// 6. Update tools for this model
this.chat!.setTools([{ functionDeclarations: this.prepareToolsList() }]);
this.chat!.setTools([
{ functionDeclarations: this.prepareToolsList() },
]);
// 7. Run the turn
const turnStream = promptIdContext.run(promptId, () =>
@@ -225,7 +241,7 @@ export class AgentHarness {
yield event;
if (event.type === GeminiEventType.Error) hasError = true;
if (event.type === GeminiEventType.Content && event.value) {
cumulativeResponse += event.value;
cumulativeResponse += event.value;
}
if (event.type === GeminiEventType.ToolCallRequest) {
@@ -246,19 +262,23 @@ export class AgentHarness {
}
// 8. Hook: After Agent
const afterResult = await this.behavior.fireAfterAgent(currentRequest, cumulativeResponse, turn);
const afterResult = await this.behavior.fireAfterAgent(
currentRequest,
cumulativeResponse,
turn,
);
if (afterResult.stop) {
terminateReason = AgentTerminateMode.GOAL;
if (afterResult.contextCleared) {
await this.initialize();
}
break;
terminateReason = AgentTerminateMode.GOAL;
if (afterResult.contextCleared) {
await this.initialize();
}
break;
}
if (afterResult.shouldContinue) {
currentRequest = [{ text: afterResult.reason || 'Continue' }];
this.turnCounter++;
turn = new Turn(this.chat!, this.behavior.agentId);
continue;
currentRequest = [{ text: afterResult.reason || 'Continue' }];
this.turnCounter++;
turn = new Turn(this.chat!, this.behavior.agentId);
continue;
}
if (combinedSignal.aborted) {
@@ -277,8 +297,8 @@ export class AgentHarness {
);
if (this.behavior.isGoalReached(toolResults)) {
terminateReason = AgentTerminateMode.GOAL;
return turn;
terminateReason = AgentTerminateMode.GOAL;
return turn;
}
currentRequest = toolResults.map((r) => r.part);
@@ -286,41 +306,59 @@ export class AgentHarness {
turn = new Turn(this.chat!, this.behavior.agentId);
} else {
// No tool calls. Check for continuation.
const nextParts = await this.behavior.getContinuationRequest(turn, combinedSignal);
const nextParts = await this.behavior.getContinuationRequest(
turn,
combinedSignal,
);
if (nextParts) {
currentRequest = nextParts;
this.turnCounter++;
turn = new Turn(this.chat!, this.behavior.agentId);
continue;
}
if (this.behavior.name !== 'main') {
terminateReason = AgentTerminateMode.ERROR_NO_COMPLETE_TASK_CALL;
terminateReason = AgentTerminateMode.ERROR_NO_COMPLETE_TASK_CALL;
} else {
terminateReason = AgentTerminateMode.GOAL;
terminateReason = AgentTerminateMode.GOAL;
}
break;
}
}
// FINALIZATION & RECOVERY
if (terminateReason !== AgentTerminateMode.GOAL && terminateReason !== AgentTerminateMode.ABORTED) {
if (this.turnCounter >= maxTurnsLimit) terminateReason = AgentTerminateMode.MAX_TURNS;
if (
terminateReason !== AgentTerminateMode.GOAL &&
terminateReason !== AgentTerminateMode.ABORTED
) {
if (this.turnCounter >= maxTurnsLimit)
terminateReason = AgentTerminateMode.MAX_TURNS;
const recoverySuccess = yield* this.behavior.executeRecovery(turn, terminateReason, signal);
if (recoverySuccess) {
terminateReason = AgentTerminateMode.GOAL;
return turn;
}
const recoverySuccess = yield* this.behavior.executeRecovery(
turn,
terminateReason,
signal,
);
if (recoverySuccess) {
terminateReason = AgentTerminateMode.GOAL;
return turn;
}
if (this.behavior.name !== 'main') {
yield {
type: GeminiEventType.Error,
value: { error: { message: this.behavior.getFinalFailureMessage(terminateReason, maxTurnsLimit, maxTimeMinutes) } }
};
}
if (this.behavior.name !== 'main') {
yield {
type: GeminiEventType.Error,
value: {
error: {
message: this.behavior.getFinalFailureMessage(
terminateReason,
maxTurnsLimit,
maxTimeMinutes,
),
},
},
};
}
}
} finally {
deadlineTimer.abort();
logAgentFinish(
@@ -339,7 +377,8 @@ export class AgentHarness {
}
private async tryCompressChat(promptId: string) {
const model = this.currentSequenceModel ?? resolveModel(this.config.getActiveModel());
const model =
this.currentSequenceModel ?? resolveModel(this.config.getActiveModel());
const { info } = await this.compressionService.compress(
this.chat!,
promptId,
@@ -362,7 +401,9 @@ export class AgentHarness {
signal,
requestedModel: this.config.getModel(),
};
const decision = await this.config.getModelRouterService().route(routingContext);
const decision = await this.config
.getModelRouterService()
.route(routingContext);
return decision.model;
}
@@ -371,7 +412,9 @@ export class AgentHarness {
signal: AbortSignal,
onWaitingForConfirmation?: (waiting: boolean) => void,
): Promise<Array<{ name: string; part: Part }>> {
const taskCompleteCalls = calls.filter((c) => c.name === TASK_COMPLETE_TOOL_NAME);
const taskCompleteCalls = calls.filter(
(c) => c.name === TASK_COMPLETE_TOOL_NAME,
);
const otherCalls = calls.filter((c) => c.name !== TASK_COMPLETE_TOOL_NAME);
let completedCalls: Array<{
+3
View File
@@ -797,6 +797,9 @@ export class GeminiClient {
}
if (this.config.isAgentHarnessEnabled()) {
debugLogger.debug(
'[GeminiClient] Using AgentHarness for message execution.',
);
this.sessionTurnCount++;
if (
this.config.getMaxSessionTurns() > 0 &&