feat(core): implement unified AgentHarness and AgentFactory #18267

This commit is contained in:
mkorwel
2026-02-11 17:24:20 -06:00
parent 6c1773170e
commit e650c10cf5
8 changed files with 735 additions and 1 deletions
+1
View File
@@ -788,6 +788,7 @@ export async function loadCliConfig(
extensionLoader: extensionManager,
enableExtensionReloading: settings.experimental?.extensionReloading,
enableAgents: settings.experimental?.enableAgents,
enableAgentHarness: settings.experimental?.enableAgentHarness,
plan: settings.experimental?.plan,
enableEventDrivenScheduler: true,
skillsSupport: settings.skills?.enabled ?? true,
+27
View File
@@ -0,0 +1,27 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { type Config } from '../config/config.js';
import { AgentHarness, type AgentHarnessOptions } from './harness.js';
import { type AgentDefinition } from './types.js';
/**
* Factory for creating agent executors/harnesses.
* Respects experimental flags to determine which implementation to use.
*/
export class AgentFactory {
static createHarness(
config: Config,
definition?: AgentDefinition,
options: Partial<AgentHarnessOptions> = {},
): AgentHarness {
return new AgentHarness({
config,
definition,
...options,
});
}
}
+152
View File
@@ -0,0 +1,152 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
import { AgentHarness } from './harness.js';
import { makeFakeConfig } from '../test-utils/config.js';
import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
import { GeminiEventType, type ServerGeminiStreamEvent } from '../core/turn.js';
import { z } from 'zod';
import { type LocalAgentDefinition, AgentTerminateMode } from './types.js';
import { scheduleAgentTools } from './agent-scheduler.js';
import { logAgentFinish } from '../telemetry/loggers.js';
import { type Config } from '../config/config.js';
vi.mock('../telemetry/loggers.js', async (importOriginal) => {
const actual =
await importOriginal<typeof import('../telemetry/loggers.js')>();
return {
...actual,
logAgentStart: vi.fn(),
logAgentFinish: vi.fn(),
};
});
vi.mock('../core/geminiChat.js', () => ({
GeminiChat: vi.fn(),
StreamEventType: {
CHUNK: 'chunk',
},
}));
vi.mock('./agent-scheduler.js', () => ({
scheduleAgentTools: vi.fn(),
}));
describe('AgentHarness', () => {
let mockConfig: Config;
beforeEach(() => {
mockConfig = makeFakeConfig();
mockConfig.getToolRegistry = vi.fn().mockReturnValue({
getTool: vi.fn(),
getAllToolNames: vi.fn().mockReturnValue([]),
getFunctionDeclarations: vi.fn().mockReturnValue([]),
});
vi.clearAllMocks();
});
it('executes a subagent and finishes when complete_task is called', async () => {
const definition: LocalAgentDefinition<z.ZodString> = {
kind: 'local',
name: 'test-agent',
displayName: 'Test Agent',
description: 'A test agent',
runConfig: { maxTurns: 5, maxTimeMinutes: 5 },
promptConfig: { systemPrompt: 'You are a test agent.' },
outputConfig: {
outputName: 'result',
schema: z.string(),
},
};
const harness = new AgentHarness({
config: mockConfig,
definition,
inputs: {},
});
const mockChat = {
sendMessageStream: vi.fn(),
setTools: vi.fn(),
getHistory: vi.fn().mockReturnValue([]),
addHistory: vi.fn(),
setSystemInstruction: vi.fn(),
maybeIncludeSchemaDepthContext: vi.fn(),
getLastPromptTokenCount: vi.fn().mockReturnValue(0),
} as unknown as GeminiChat;
(GeminiChat as unknown as Mock).mockReturnValue(mockChat);
// Mock model response with complete_task call
(mockChat.sendMessageStream as Mock).mockResolvedValue(
(async function* () {
yield {
type: StreamEventType.CHUNK,
value: {
candidates: [
{ content: { parts: [{ text: 'Done!' }] }, finishReason: 'STOP' },
],
functionCalls: [
{
name: 'complete_task',
args: { result: 'Success' },
id: 'call_1',
},
],
},
};
})(),
);
// Mock tool execution
(scheduleAgentTools as unknown as Mock).mockResolvedValue([
{
request: {
name: 'complete_task',
args: { result: 'Success' },
callId: 'call_1',
},
status: 'success',
response: {
responseParts: [
{
functionResponse: {
name: 'complete_task',
response: { status: 'OK' },
id: 'call_1',
},
},
],
},
},
]);
const events: ServerGeminiStreamEvent[] = [];
const run = harness.run([{ text: 'Start' }], new AbortController().signal);
while (true) {
const { value, done } = await run.next();
if (done) break;
events.push(value);
}
expect(
events.some(
(e) =>
e.type === GeminiEventType.ToolCallRequest &&
e.value.name === 'complete_task',
),
).toBe(true);
expect(mockChat.sendMessageStream).toHaveBeenCalled();
expect(vi.mocked(logAgentFinish)).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
terminate_reason: AgentTerminateMode.GOAL,
}),
);
});
});
+453
View File
@@ -0,0 +1,453 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import {
type Content,
type Part,
type FunctionDeclaration,
Type,
} from '@google/genai';
import { type Config } from '../config/config.js';
import { GeminiChat } from '../core/geminiChat.js';
import {
Turn,
GeminiEventType,
type ServerGeminiStreamEvent,
CompressionStatus,
} from '../core/turn.js';
import {
type AgentDefinition,
AgentTerminateMode,
type LocalAgentDefinition,
type AgentInputs,
} from './types.js';
import { LoopDetectionService } from '../services/loopDetectionService.js';
import { ChatCompressionService } from '../services/chatCompressionService.js';
import { ToolOutputMaskingService } from '../services/toolOutputMaskingService.js';
import {
getDirectoryContextString,
getInitialChatHistory,
} from '../utils/environmentContext.js';
import { templateString } from './utils.js';
import { getVersion } from '../utils/version.js';
import { resolveModel } from '../config/models.js';
import { type RoutingContext } from '../routing/routingStrategy.js';
import { getCoreSystemPrompt } from '../core/prompts.js';
import { ToolRegistry } from '../tools/tool-registry.js';
import { zodToJsonSchema } from 'zod-to-json-schema';
import type { Schema } from '@google/genai';
import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
import { scheduleAgentTools } from './agent-scheduler.js';
import { type ToolCallRequestInfo } from '../scheduler/types.js';
import { promptIdContext } from '../utils/promptIdContext.js';
import { logAgentStart, logAgentFinish } from '../telemetry/loggers.js';
import { AgentStartEvent, AgentFinishEvent } from '../telemetry/types.js';
const TASK_COMPLETE_TOOL_NAME = 'complete_task';
export interface AgentHarnessOptions {
config: Config;
definition?: AgentDefinition;
/** If provided, this prompt_id will be used as a prefix. */
parentPromptId?: string;
/** Initial history to start the agent with. */
initialHistory?: Content[];
/** Inputs for subagent templating. */
inputs?: AgentInputs;
}
/**
* A unified harness for executing agents (both main CLI and subagents).
* Consolidates ReAct loop logic, tool scheduling, and state management.
*/
export class AgentHarness {
private readonly config: Config;
private readonly definition?: AgentDefinition;
private readonly loopDetector: LoopDetectionService;
private readonly compressionService: ChatCompressionService;
private readonly toolOutputMaskingService: ToolOutputMaskingService;
private readonly toolRegistry: ToolRegistry;
private chat?: GeminiChat;
private readonly agentId: string;
private currentSequenceModel: string | null = null;
private turnCounter = 0;
private inputs?: AgentInputs;
constructor(options: AgentHarnessOptions) {
this.config = options.config;
this.definition = options.definition;
this.inputs = options.inputs;
const randomIdPart = Math.random().toString(36).slice(2, 8);
const parentPrefix = options.parentPromptId
? `${options.parentPromptId}-`
: '';
const name = this.definition?.name ?? 'main';
this.agentId = `${parentPrefix}${name}-${randomIdPart}`;
this.loopDetector = new LoopDetectionService(this.config);
this.compressionService = new ChatCompressionService();
this.toolOutputMaskingService = new ToolOutputMaskingService();
// Use an isolated tool registry for subagents, or the global one for the main agent.
this.toolRegistry = this.definition
? new ToolRegistry(this.config, this.config.getMessageBus())
: this.config.getToolRegistry();
}
/**
* Initializes the harness, creating the underlying chat object.
*/
async initialize(): Promise<void> {
if (this.definition) {
await this.setupSubagentTools();
}
this.chat = await this.createChat();
}
private async setupSubagentTools(): Promise<void> {
if (!this.definition) return;
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const def = this.definition as LocalAgentDefinition;
const parentToolRegistry = this.config.getToolRegistry();
if (def.toolConfig) {
for (const toolRef of def.toolConfig.tools) {
if (typeof toolRef === 'string') {
const tool = parentToolRegistry.getTool(toolRef);
if (tool) this.toolRegistry.registerTool(tool);
} else if (typeof toolRef === 'object' && 'build' in toolRef) {
this.toolRegistry.registerTool(toolRef);
}
}
} else {
for (const toolName of parentToolRegistry.getAllToolNames()) {
const tool = parentToolRegistry.getTool(toolName);
if (tool) this.toolRegistry.registerTool(tool);
}
}
this.toolRegistry.sortTools();
}
private async createChat(): Promise<GeminiChat> {
const systemInstruction = await this.getSystemInstruction();
const history = await this.getInitialHistory();
const tools = this.prepareToolsList();
return new GeminiChat(
this.config,
systemInstruction,
[{ functionDeclarations: tools }],
history,
);
}
private async getInitialHistory(): Promise<Content[]> {
if (this.definition) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const def = this.definition as LocalAgentDefinition;
const initialMessages = def.promptConfig.initialMessages ?? [];
if (this.inputs) {
return initialMessages.map((content) => ({
...content,
parts: (content.parts ?? []).map((part) =>
'text' in part && part.text
? { text: templateString(part.text, this.inputs!) }
: part,
),
}));
}
return initialMessages;
}
return getInitialChatHistory(this.config);
}
private async getSystemInstruction(): Promise<string | undefined> {
if (this.definition) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const def = this.definition as LocalAgentDefinition;
if (!def.promptConfig.systemPrompt) return undefined;
const augmentedInputs = {
...this.inputs,
cliVersion: await getVersion(),
today: new Date().toLocaleDateString(),
};
let prompt = templateString(
def.promptConfig.systemPrompt,
augmentedInputs,
);
const dirContext = await getDirectoryContextString(this.config);
prompt += `\n\n# Environment Context\n${dirContext}`;
prompt += `\n\nImportant Rules:\n* You are running in a non-interactive mode. You CANNOT ask the user for input or clarification.\n* Work systematically using available tools to complete your task.\n* Always use absolute paths for file operations.`;
const hasOutput = !!def.outputConfig;
prompt += `\n* When you have completed your task, you MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool${hasOutput ? ' with your structured output' : ''}.`;
return prompt;
}
const systemMemory = this.config.getUserMemory();
return getCoreSystemPrompt(this.config, systemMemory);
}
private prepareToolsList(): FunctionDeclaration[] {
const modelId = this.currentSequenceModel ?? undefined;
const tools = this.toolRegistry.getFunctionDeclarations(modelId);
if (this.definition) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const def = this.definition as LocalAgentDefinition;
const completeTool: FunctionDeclaration = {
name: TASK_COMPLETE_TOOL_NAME,
description:
'Call this tool to submit your final answer and complete the task.',
parameters: { type: Type.OBJECT, properties: {}, required: [] },
};
if (def.outputConfig) {
const schema = zodToJsonSchema(def.outputConfig.schema);
const {
$schema: _,
definitions: __,
...cleanSchema
} = schema as Record<string, unknown>;
completeTool.parameters!.properties![def.outputConfig.outputName] =
cleanSchema as Schema;
completeTool.parameters!.required!.push(def.outputConfig.outputName);
} else {
completeTool.parameters!.properties!['result'] = {
type: Type.STRING,
description: 'Your final results or findings.',
};
completeTool.parameters!.required!.push('result');
}
tools.push(completeTool);
}
return tools;
}
/**
* Runs the agent with the given request.
*/
async *run(
request: Part[],
signal: AbortSignal,
maxTurns = 100,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
const startTime = Date.now();
logAgentStart(
this.config,
new AgentStartEvent(this.agentId, this.definition?.name ?? 'main'),
);
if (!this.chat) {
await this.initialize();
}
let turn = new Turn(this.chat!, this.agentId);
let currentRequest = request;
let terminateReason = AgentTerminateMode.GOAL;
try {
while (this.turnCounter < maxTurns) {
const promptId = `${this.agentId}#${this.turnCounter}`;
if (signal.aborted) {
terminateReason = AgentTerminateMode.ABORTED;
yield { type: GeminiEventType.UserCancelled };
return turn;
}
// 1. Compression and Token Limit checks
const compressionResult = await this.tryCompressChat(promptId);
if (
compressionResult.compressionStatus === CompressionStatus.COMPRESSED
) {
yield {
type: GeminiEventType.ChatCompressed,
value: compressionResult,
};
}
// 2. Loop Detection
if (await this.loopDetector.turnStarted(signal)) {
terminateReason = AgentTerminateMode.ERROR;
yield { type: GeminiEventType.LoopDetected };
return turn;
}
// 3. Model Selection/Routing
const modelToUse = await this.selectModel(currentRequest, signal);
if (!this.currentSequenceModel) {
yield { type: GeminiEventType.ModelInfo, value: modelToUse };
this.currentSequenceModel = modelToUse;
}
// 4. Update tools for this model
this.chat!.setTools([
{ functionDeclarations: this.prepareToolsList() },
]);
// 5. Run the turn
const turnStream = promptIdContext.run(promptId, () =>
turn.run({ model: modelToUse }, currentRequest, signal),
);
let hasError = false;
for await (const event of turnStream) {
yield event;
if (event.type === GeminiEventType.Error) hasError = true;
// Subagent activity reporting
if (
this.definition &&
event.type === GeminiEventType.ToolCallRequest
) {
yield {
type: GeminiEventType.SubagentActivity,
value: {
agentName: this.definition.name,
type: 'TOOL_CALL_START',
data: { name: event.value.name, args: event.value.args },
},
};
}
}
if (hasError) {
terminateReason = AgentTerminateMode.ERROR;
return turn;
}
if (signal.aborted) {
terminateReason = AgentTerminateMode.ABORTED;
return turn;
}
// 6. Handle tool calls or termination
if (turn.pendingToolCalls.length > 0) {
const toolResults = await this.executeTools(
turn.pendingToolCalls,
signal,
);
// Check if subagent called complete_task
if (this.definition) {
const completeCall = toolResults.find(
(r) => r.name === TASK_COMPLETE_TOOL_NAME,
);
if (completeCall) {
// Check for validation errors in complete_task
if (completeCall.part.functionResponse?.response?.error) {
// The model messed up complete_task, it will receive the error as currentRequest and try again
currentRequest = [completeCall.part];
} else {
terminateReason = AgentTerminateMode.GOAL;
return turn;
}
} else {
currentRequest = toolResults.map((r) => r.part);
}
} else {
currentRequest = toolResults.map((r) => r.part);
}
this.turnCounter++;
// Create new turn for next iteration
turn = new Turn(this.chat!, this.agentId);
} else {
// No more tool calls. Check if we should continue (main agent only)
if (!this.definition) {
const nextSpeaker = await checkNextSpeaker(
this.chat!,
this.config.getBaseLlmClient(),
signal,
this.agentId,
);
if (nextSpeaker?.next_speaker === 'model') {
currentRequest = [{ text: 'Please continue.' }];
this.turnCounter++;
turn = new Turn(this.chat!, this.agentId);
continue;
}
} else {
// Subagent stopped without complete_task
terminateReason = AgentTerminateMode.ERROR_NO_COMPLETE_TASK_CALL;
yield {
type: GeminiEventType.Error,
value: {
error: {
message: `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}'`,
},
},
};
}
break; // Finished
}
}
} finally {
logAgentFinish(
this.config,
new AgentFinishEvent(
this.agentId,
this.definition?.name ?? 'main',
Date.now() - startTime,
this.turnCounter,
terminateReason,
),
);
}
return turn;
}
private async tryCompressChat(promptId: string) {
const model =
this.currentSequenceModel ?? resolveModel(this.config.getActiveModel());
const { info } = await this.compressionService.compress(
this.chat!,
promptId,
false,
model,
this.config,
false,
);
return info;
}
private async selectModel(
request: Part[],
signal: AbortSignal,
): Promise<string> {
if (this.currentSequenceModel) return this.currentSequenceModel;
const routingContext: RoutingContext = {
history: this.chat!.getHistory(true),
request,
signal,
requestedModel: this.config.getModel(),
};
const decision = await this.config
.getModelRouterService()
.route(routingContext);
return decision.model;
}
private async executeTools(
calls: ToolCallRequestInfo[],
signal: AbortSignal,
): Promise<Array<{ name: string; part: Part }>> {
const completedCalls = await scheduleAgentTools(this.config, calls, {
schedulerId: this.agentId,
toolRegistry: this.toolRegistry,
signal,
});
return completedCalls.map((call) => ({
name: call.request.name,
part: call.response.responseParts[0],
}));
}
}
@@ -15,6 +15,8 @@ import type {
SubagentActivityEvent,
} from './types.js';
import type { MessageBus } from '../confirmation-bus/message-bus.js';
import { AgentFactory } from './agent-factory.js';
import { GeminiEventType } from '../core/turn.js';
const INPUT_PREVIEW_MAX_LENGTH = 50;
const DESCRIPTION_MAX_LENGTH = 200;
@@ -83,6 +85,10 @@ export class LocalSubagentInvocation extends BaseToolInvocation<
signal: AbortSignal,
updateOutput?: (output: string | AnsiOutput) => void,
): Promise<ToolResult> {
if (this.config.isAgentHarnessEnabled()) {
return this.executeWithHarness(signal, updateOutput);
}
try {
if (updateOutput) {
updateOutput('Subagent starting...\n');
@@ -141,4 +147,74 @@ ${output.result}
};
}
}
private async executeWithHarness(
signal: AbortSignal,
updateOutput?: (output: string | AnsiOutput) => void,
): Promise<ToolResult> {
try {
if (updateOutput) {
updateOutput('Subagent starting (Harness Mode)...\n');
}
const harness = AgentFactory.createHarness(this.config, this.definition, {
inputs: this.params,
parentPromptId: promptIdContext.getStore(),
});
const initialRequest = [{ text: 'Start' }]; // Placeholder for subagent start
const stream = harness.run(initialRequest, signal);
let turn: Turn | undefined;
while (true) {
const { value, done } = await stream.next();
if (done) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
turn = value as Turn;
break;
}
const event = value;
if (updateOutput) {
if (event.type === GeminiEventType.Thought) {
updateOutput(`🤖💭 ${event.value.subject}`);
} else if (event.type === GeminiEventType.SubagentActivity) {
if (event.value.type === 'TOOL_CALL_START') {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const toolName = event.value.data['name'] as string;
updateOutput(`🛠️ Calling tool: ${toolName}...`);
}
}
}
}
if (!turn) {
throw new Error('Agent failed to return a valid turn.');
}
const output = turn.getResponseText();
const displayContent = `
Subagent ${this.definition.name} Finished (Harness Mode)
Result:
${output}
`;
return {
llmContent: [{ text: output }],
returnDisplay: displayContent,
};
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
return {
llmContent: `Subagent '${this.definition.name}' failed (Harness Mode). Error: ${errorMessage}`,
returnDisplay: `Subagent Failed: ${this.definition.name}\nError: ${errorMessage}`,
error: {
message: errorMessage,
type: ToolErrorType.EXECUTION_FAILED,
},
};
}
}
}
+7
View File
@@ -470,6 +470,7 @@ export interface ConfigParameters {
disabledHooks?: string[];
projectHooks?: { [K in HookEventName]?: HookDefinition[] };
enableAgents?: boolean;
enableAgentHarness?: boolean;
enableEventDrivenScheduler?: boolean;
skillsSupport?: boolean;
disabledSkills?: string[];
@@ -654,6 +655,7 @@ export class Config {
| undefined;
private readonly enableAgents: boolean;
private readonly enableAgentHarness: boolean;
private agents: AgentSettings;
private readonly enableEventDrivenScheduler: boolean;
private readonly skillsSupport: boolean;
@@ -748,6 +750,7 @@ export class Config {
this.disableLoopDetection = params.disableLoopDetection ?? false;
this._activeModel = params.model;
this.enableAgents = params.enableAgents ?? false;
this.enableAgentHarness = params.enableAgentHarness ?? false;
this.agents = params.agents ?? {};
this.disableLLMCorrection = params.disableLLMCorrection ?? true;
this.planEnabled = params.plan ?? false;
@@ -1969,6 +1972,10 @@ export class Config {
return this.enableAgents;
}
isAgentHarnessEnabled(): boolean {
return this.enableAgentHarness;
}
isEventDrivenSchedulerEnabled(): boolean {
return this.enableEventDrivenScheduler;
}
+12 -1
View File
@@ -68,8 +68,18 @@ export enum GeminiEventType {
ModelInfo = 'model_info',
AgentExecutionStopped = 'agent_execution_stopped',
AgentExecutionBlocked = 'agent_execution_blocked',
SubagentActivity = 'subagent_activity',
}
export type ServerGeminiSubagentActivityEvent = {
type: GeminiEventType.SubagentActivity;
value: {
agentName: string;
type: string;
data: Record<string, unknown>;
};
};
export type ServerGeminiRetryEvent = {
type: GeminiEventType.Retry;
};
@@ -229,7 +239,8 @@ export type ServerGeminiStreamEvent =
| ServerGeminiInvalidStreamEvent
| ServerGeminiModelInfoEvent
| ServerGeminiAgentExecutionStoppedEvent
| ServerGeminiAgentExecutionBlockedEvent;
| ServerGeminiAgentExecutionBlockedEvent
| ServerGeminiSubagentActivityEvent;
// A turn manages the agentic loop turn within the server context.
export class Turn {
+7
View File
@@ -1486,6 +1486,13 @@
"default": false,
"type": "boolean"
},
"enableAgentHarness": {
"title": "Enable Agent Harness",
"description": "Enable the new unified agent harness (experimental).",
"markdownDescription": "Enable the new unified agent harness (experimental).\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
"default": false,
"type": "boolean"
},
"extensionManagement": {
"title": "Extension Management",
"description": "Enable extension management features.",