Files
gemini-cli/packages/core/src/agents/executor.ts

771 lines
25 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { Config } from '../config/config.js';
import { reportError } from '../utils/errorReporting.js';
import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
import { Type } from '@google/genai';
import type {
Content,
Part,
FunctionCall,
GenerateContentConfig,
FunctionDeclaration,
Schema,
} from '@google/genai';
import { executeToolCall } from '../core/nonInteractiveToolExecutor.js';
import { ToolRegistry } from '../tools/tool-registry.js';
import type { ToolCallRequestInfo } from '../core/turn.js';
import { getDirectoryContextString } from '../utils/environmentContext.js';
import { GlobTool } from '../tools/glob.js';
import { GrepTool } from '../tools/grep.js';
import { RipGrepTool } from '../tools/ripGrep.js';
import { LSTool } from '../tools/ls.js';
import { MemoryTool } from '../tools/memoryTool.js';
import { ReadFileTool } from '../tools/read-file.js';
import { ReadManyFilesTool } from '../tools/read-many-files.js';
import { WebSearchTool } from '../tools/web-search.js';
import { promptIdContext } from '../utils/promptIdContext.js';
import { logAgentStart, logAgentFinish } from '../telemetry/loggers.js';
import { AgentStartEvent, AgentFinishEvent } from '../telemetry/types.js';
import type {
AgentDefinition,
AgentInputs,
OutputObject,
SubagentActivityEvent,
} from './types.js';
import { AgentTerminateMode } from './types.js';
import { templateString } from './utils.js';
import { parseThought } from '../utils/thoughtUtils.js';
import { type z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
/** A callback function to report on agent activity. */
export type ActivityCallback = (activity: SubagentActivityEvent) => void;
const TASK_COMPLETE_TOOL_NAME = 'complete_task';
/**
* Executes an agent loop based on an {@link AgentDefinition}.
*
* This executor runs the agent in a loop, calling tools until it calls the
* mandatory `complete_task` tool to signal completion.
*/
export class AgentExecutor<TOutput extends z.ZodTypeAny> {
readonly definition: AgentDefinition<TOutput>;
private readonly agentId: string;
private readonly toolRegistry: ToolRegistry;
private readonly runtimeContext: Config;
private readonly onActivity?: ActivityCallback;
/**
* Creates and validates a new `AgentExecutor` instance.
*
* This method ensures that all tools specified in the agent's definition are
* safe for non-interactive use before creating the executor.
*
* @param definition The definition object for the agent.
* @param runtimeContext The global runtime configuration.
* @param onActivity An optional callback to receive activity events.
* @returns A promise that resolves to a new `AgentExecutor` instance.
*/
static async create<TOutput extends z.ZodTypeAny>(
definition: AgentDefinition<TOutput>,
runtimeContext: Config,
onActivity?: ActivityCallback,
): Promise<AgentExecutor<TOutput>> {
// Create an isolated tool registry for this agent instance.
const agentToolRegistry = new ToolRegistry(runtimeContext);
const parentToolRegistry = await runtimeContext.getToolRegistry();
if (definition.toolConfig) {
for (const toolRef of definition.toolConfig.tools) {
if (typeof toolRef === 'string') {
// If the tool is referenced by name, retrieve it from the parent
// registry and register it with the agent's isolated registry.
const toolFromParent = parentToolRegistry.getTool(toolRef);
if (toolFromParent) {
agentToolRegistry.registerTool(toolFromParent);
}
} else if (
typeof toolRef === 'object' &&
'name' in toolRef &&
'build' in toolRef
) {
agentToolRegistry.registerTool(toolRef);
}
// Note: Raw `FunctionDeclaration` objects in the config don't need to be
// registered; their schemas are passed directly to the model later.
}
// Validate that all registered tools are safe for non-interactive
// execution.
await AgentExecutor.validateTools(agentToolRegistry, definition.name);
}
// Get the parent prompt ID from context
const parentPromptId = promptIdContext.getStore();
return new AgentExecutor(
definition,
runtimeContext,
agentToolRegistry,
parentPromptId,
onActivity,
);
}
/**
* Constructs a new AgentExecutor instance.
*
* @private This constructor is private. Use the static `create` method to
* instantiate the class.
*/
private constructor(
definition: AgentDefinition<TOutput>,
runtimeContext: Config,
toolRegistry: ToolRegistry,
parentPromptId: string | undefined,
onActivity?: ActivityCallback,
) {
this.definition = definition;
this.runtimeContext = runtimeContext;
this.toolRegistry = toolRegistry;
this.onActivity = onActivity;
const randomIdPart = Math.random().toString(36).slice(2, 8);
// parentPromptId will be undefined if this agent is invoked directly
// (top-level), rather than as a sub-agent.
const parentPrefix = parentPromptId ? `${parentPromptId}-` : '';
this.agentId = `${parentPrefix}${this.definition.name}-${randomIdPart}`;
}
/**
* Runs the agent.
*
* @param inputs The validated input parameters for this invocation.
* @param signal An `AbortSignal` for cancellation.
* @returns A promise that resolves to the agent's final output.
*/
async run(inputs: AgentInputs, signal: AbortSignal): Promise<OutputObject> {
const startTime = Date.now();
let turnCounter = 0;
let terminateReason: AgentTerminateMode = AgentTerminateMode.ERROR;
let finalResult: string | null = null;
logAgentStart(
this.runtimeContext,
new AgentStartEvent(this.agentId, this.definition.name),
);
try {
const chat = await this.createChatObject(inputs);
const tools = this.prepareToolsList();
const query = this.definition.promptConfig.query
? templateString(this.definition.promptConfig.query, inputs)
: 'Get Started!';
let currentMessage: Content = { role: 'user', parts: [{ text: query }] };
while (true) {
// Check for termination conditions like max turns or timeout.
const reason = this.checkTermination(startTime, turnCounter);
if (reason) {
terminateReason = reason;
break;
}
if (signal.aborted) {
terminateReason = AgentTerminateMode.ABORTED;
break;
}
const promptId = `${this.agentId}#${turnCounter++}`;
const { functionCalls } = await promptIdContext.run(
promptId,
async () =>
this.callModel(chat, currentMessage, tools, signal, promptId),
);
if (signal.aborted) {
terminateReason = AgentTerminateMode.ABORTED;
break;
}
// If the model stops calling tools without calling complete_task, it's an error.
if (functionCalls.length === 0) {
terminateReason = AgentTerminateMode.ERROR;
finalResult = `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}' to finalize the session.`;
this.emitActivity('ERROR', {
error: finalResult,
context: 'protocol_violation',
});
break;
}
const { nextMessage, submittedOutput, taskCompleted } =
await this.processFunctionCalls(functionCalls, signal, promptId);
if (taskCompleted) {
finalResult = submittedOutput ?? 'Task completed successfully.';
terminateReason = AgentTerminateMode.GOAL;
break;
}
currentMessage = nextMessage;
}
if (terminateReason === AgentTerminateMode.GOAL) {
return {
result: finalResult || 'Task completed.',
terminate_reason: terminateReason,
};
}
return {
result:
finalResult || 'Agent execution was terminated before completion.',
terminate_reason: terminateReason,
};
} catch (error) {
this.emitActivity('ERROR', { error: String(error) });
throw error; // Re-throw the error for the parent context to handle.
} finally {
logAgentFinish(
this.runtimeContext,
new AgentFinishEvent(
this.agentId,
this.definition.name,
Date.now() - startTime,
turnCounter,
terminateReason,
),
);
}
}
/**
* Calls the generative model with the current context and tools.
*
* @returns The model's response, including any tool calls or text.
*/
private async callModel(
chat: GeminiChat,
message: Content,
tools: FunctionDeclaration[],
signal: AbortSignal,
promptId: string,
): Promise<{ functionCalls: FunctionCall[]; textResponse: string }> {
const messageParams = {
message: message.parts || [],
config: {
abortSignal: signal,
tools: tools.length > 0 ? [{ functionDeclarations: tools }] : undefined,
},
};
const responseStream = await chat.sendMessageStream(
this.definition.modelConfig.model,
messageParams,
promptId,
);
const functionCalls: FunctionCall[] = [];
let textResponse = '';
for await (const resp of responseStream) {
if (signal.aborted) break;
if (resp.type === StreamEventType.CHUNK) {
const chunk = resp.value;
const parts = chunk.candidates?.[0]?.content?.parts;
// Extract and emit any subject "thought" content from the model.
const { subject } = parseThought(
parts?.find((p) => p.thought)?.text || '',
);
if (subject) {
this.emitActivity('THOUGHT_CHUNK', { text: subject });
}
// Collect any function calls requested by the model.
if (chunk.functionCalls) {
functionCalls.push(...chunk.functionCalls);
}
// Handle text response (non-thought text)
const text =
parts
?.filter((p) => !p.thought && p.text)
.map((p) => p.text)
.join('') || '';
if (text) {
textResponse += text;
}
}
}
return { functionCalls, textResponse };
}
/** Initializes a `GeminiChat` instance for the agent run. */
private async createChatObject(inputs: AgentInputs): Promise<GeminiChat> {
const { promptConfig, modelConfig } = this.definition;
if (!promptConfig.systemPrompt && !promptConfig.initialMessages) {
throw new Error(
'PromptConfig must define either `systemPrompt` or `initialMessages`.',
);
}
const startHistory = this.applyTemplateToInitialMessages(
promptConfig.initialMessages ?? [],
inputs,
);
// Build system instruction from the templated prompt string.
const systemInstruction = promptConfig.systemPrompt
? await this.buildSystemPrompt(inputs)
: undefined;
try {
const generationConfig: GenerateContentConfig = {
temperature: modelConfig.temp,
topP: modelConfig.top_p,
thinkingConfig: {
includeThoughts: true,
thinkingBudget: modelConfig.thinkingBudget ?? -1,
},
};
if (systemInstruction) {
generationConfig.systemInstruction = systemInstruction;
}
return new GeminiChat(
this.runtimeContext,
generationConfig,
startHistory,
);
} catch (error) {
await reportError(
error,
`Error initializing Gemini chat for agent ${this.definition.name}.`,
startHistory,
'startChat',
);
// Re-throw as a more specific error after reporting.
throw new Error(`Failed to create chat object: ${error}`);
}
}
/**
* Executes function calls requested by the model and returns the results.
*
* @returns A new `Content` object for history, any submitted output, and completion status.
*/
private async processFunctionCalls(
functionCalls: FunctionCall[],
signal: AbortSignal,
promptId: string,
): Promise<{
nextMessage: Content;
submittedOutput: string | null;
taskCompleted: boolean;
}> {
const allowedToolNames = new Set(this.toolRegistry.getAllToolNames());
// Always allow the completion tool
allowedToolNames.add(TASK_COMPLETE_TOOL_NAME);
let submittedOutput: string | null = null;
let taskCompleted = false;
// We'll collect promises for the tool executions
const toolExecutionPromises: Array<Promise<Part[] | void>> = [];
// And we'll need a place to store the synchronous results (like complete_task or blocked calls)
const syncResponseParts: Part[] = [];
for (const [index, functionCall] of functionCalls.entries()) {
const callId = functionCall.id ?? `${promptId}-${index}`;
const args = (functionCall.args ?? {}) as Record<string, unknown>;
this.emitActivity('TOOL_CALL_START', {
name: functionCall.name,
args,
});
if (functionCall.name === TASK_COMPLETE_TOOL_NAME) {
if (taskCompleted) {
// We already have a completion from this turn. Ignore subsequent ones.
const error =
'Task already marked complete in this turn. Ignoring duplicate call.';
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { error },
id: callId,
},
});
this.emitActivity('ERROR', {
context: 'tool_call',
name: functionCall.name,
error,
});
continue;
}
const { outputConfig } = this.definition;
taskCompleted = true; // Signal completion regardless of output presence
if (outputConfig) {
const outputName = outputConfig.outputName;
if (args[outputName] !== undefined) {
const outputValue = args[outputName];
const validationResult = outputConfig.schema.safeParse(outputValue);
if (!validationResult.success) {
taskCompleted = false; // Validation failed, revoke completion
const error = `Output validation failed: ${JSON.stringify(validationResult.error.flatten())}`;
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { error },
id: callId,
},
});
this.emitActivity('ERROR', {
context: 'tool_call',
name: functionCall.name,
error,
});
continue;
}
const validatedOutput = validationResult.data;
if (this.definition.processOutput) {
submittedOutput = this.definition.processOutput(validatedOutput);
} else {
submittedOutput =
typeof outputValue === 'string'
? outputValue
: JSON.stringify(outputValue, null, 2);
}
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { result: 'Output submitted and task completed.' },
id: callId,
},
});
this.emitActivity('TOOL_CALL_END', {
name: functionCall.name,
output: 'Output submitted and task completed.',
});
} else {
// Failed to provide required output.
taskCompleted = false; // Revoke completion status
const error = `Missing required argument '${outputName}' for completion.`;
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { error },
id: callId,
},
});
this.emitActivity('ERROR', {
context: 'tool_call',
name: functionCall.name,
error,
});
}
} else {
// No output expected. Just signal completion.
submittedOutput = 'Task completed successfully.';
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { status: 'Task marked complete.' },
id: callId,
},
});
this.emitActivity('TOOL_CALL_END', {
name: functionCall.name,
output: 'Task marked complete.',
});
}
continue;
}
// Handle standard tools
if (!allowedToolNames.has(functionCall.name as string)) {
const error = `Unauthorized tool call: '${functionCall.name}' is not available to this agent.`;
console.warn(`[AgentExecutor] Blocked call: ${error}`);
syncResponseParts.push({
functionResponse: {
name: functionCall.name as string,
id: callId,
response: { error },
},
});
this.emitActivity('ERROR', {
context: 'tool_call_unauthorized',
name: functionCall.name,
callId,
error,
});
continue;
}
const requestInfo: ToolCallRequestInfo = {
callId,
name: functionCall.name as string,
args,
isClientInitiated: true,
prompt_id: promptId,
};
// Create a promise for the tool execution
const executionPromise = (async () => {
const toolResponse = await executeToolCall(
this.runtimeContext,
requestInfo,
signal,
);
if (toolResponse.error) {
this.emitActivity('ERROR', {
context: 'tool_call',
name: functionCall.name,
error: toolResponse.error.message,
});
} else {
this.emitActivity('TOOL_CALL_END', {
name: functionCall.name,
output: toolResponse.resultDisplay,
});
}
return toolResponse.responseParts;
})();
toolExecutionPromises.push(executionPromise);
}
// Wait for all tool executions to complete
const asyncResults = await Promise.all(toolExecutionPromises);
// Combine all response parts
const toolResponseParts: Part[] = [...syncResponseParts];
for (const result of asyncResults) {
if (result) {
toolResponseParts.push(...result);
}
}
// If all authorized tool calls failed (and task isn't complete), provide a generic error.
if (
functionCalls.length > 0 &&
toolResponseParts.length === 0 &&
!taskCompleted
) {
toolResponseParts.push({
text: 'All tool calls failed or were unauthorized. Please analyze the errors and try an alternative approach.',
});
}
return {
nextMessage: { role: 'user', parts: toolResponseParts },
submittedOutput,
taskCompleted,
};
}
/**
* Prepares the list of tool function declarations to be sent to the model.
*/
private prepareToolsList(): FunctionDeclaration[] {
const toolsList: FunctionDeclaration[] = [];
const { toolConfig, outputConfig } = this.definition;
if (toolConfig) {
const toolNamesToLoad: string[] = [];
for (const toolRef of toolConfig.tools) {
if (typeof toolRef === 'string') {
toolNamesToLoad.push(toolRef);
} else if (typeof toolRef === 'object' && 'schema' in toolRef) {
// Tool instance with an explicit schema property.
toolsList.push(toolRef.schema as FunctionDeclaration);
} else {
// Raw `FunctionDeclaration` object.
toolsList.push(toolRef as FunctionDeclaration);
}
}
// Add schemas from tools that were registered by name.
toolsList.push(
...this.toolRegistry.getFunctionDeclarationsFiltered(toolNamesToLoad),
);
}
// Always inject complete_task.
// Configure its schema based on whether output is expected.
const completeTool: FunctionDeclaration = {
name: TASK_COMPLETE_TOOL_NAME,
description: outputConfig
? 'Call this tool to submit your final answer and complete the task. This is the ONLY way to finish.'
: 'Call this tool to signal that you have completed your task. This is the ONLY way to finish.',
parameters: {
type: Type.OBJECT,
properties: {},
required: [],
},
};
if (outputConfig) {
const jsonSchema = zodToJsonSchema(outputConfig.schema);
const {
$schema: _$schema,
definitions: _definitions,
...schema
} = jsonSchema;
completeTool.parameters!.properties![outputConfig.outputName] =
schema as Schema;
completeTool.parameters!.required!.push(outputConfig.outputName);
}
toolsList.push(completeTool);
return toolsList;
}
/** Builds the system prompt from the agent definition and inputs. */
private async buildSystemPrompt(inputs: AgentInputs): Promise<string> {
const { promptConfig } = this.definition;
if (!promptConfig.systemPrompt) {
return '';
}
// Inject user inputs into the prompt template.
let finalPrompt = templateString(promptConfig.systemPrompt, inputs);
// Append environment context (CWD and folder structure).
const dirContext = await getDirectoryContextString(this.runtimeContext);
finalPrompt += `\n\n# Environment Context\n${dirContext}`;
// Append standard rules for non-interactive execution.
finalPrompt += `
Important Rules:
* You are running in a non-interactive mode. You CANNOT ask the user for input or clarification.
* Work systematically using available tools to complete your task.
* Always use absolute paths for file operations. Construct them using the provided "Environment Context".`;
finalPrompt += `
* When you have completed your task, you MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool.
* Do not call any other tools in the same turn as \`${TASK_COMPLETE_TOOL_NAME}\`.
* This is the ONLY way to complete your mission. If you stop calling tools without calling this, you have failed.`;
return finalPrompt;
}
/**
* Applies template strings to initial messages.
*
* @param initialMessages The initial messages from the prompt config.
* @param inputs The validated input parameters for this invocation.
* @returns A new array of `Content` with templated strings.
*/
private applyTemplateToInitialMessages(
initialMessages: Content[],
inputs: AgentInputs,
): Content[] {
return initialMessages.map((content) => {
const newParts = (content.parts ?? []).map((part) => {
if ('text' in part && part.text !== undefined) {
return { text: templateString(part.text, inputs) };
}
return part;
});
return { ...content, parts: newParts };
});
}
/**
* Validates that all tools in a registry are safe for non-interactive use.
*
* @throws An error if a tool is not on the allow-list for non-interactive execution.
*/
private static async validateTools(
toolRegistry: ToolRegistry,
agentName: string,
): Promise<void> {
// Tools that are non-interactive. This is temporary until we have tool
// confirmations for subagents.
const allowlist = new Set([
LSTool.Name,
ReadFileTool.Name,
GrepTool.Name,
RipGrepTool.Name,
GlobTool.Name,
ReadManyFilesTool.Name,
MemoryTool.Name,
WebSearchTool.Name,
]);
for (const tool of toolRegistry.getAllTools()) {
if (!allowlist.has(tool.name)) {
throw new Error(
`Tool "${tool.name}" is not on the allow-list for non-interactive ` +
`execution in agent "${agentName}". Only tools that do not require user ` +
`confirmation can be used in subagents.`,
);
}
}
}
/**
* Checks if the agent should terminate due to exceeding configured limits.
*
* @returns The reason for termination, or `null` if execution can continue.
*/
private checkTermination(
startTime: number,
turnCounter: number,
): AgentTerminateMode | null {
const { runConfig } = this.definition;
if (runConfig.max_turns && turnCounter >= runConfig.max_turns) {
return AgentTerminateMode.MAX_TURNS;
}
const elapsedMinutes = (Date.now() - startTime) / (1000 * 60);
if (elapsedMinutes >= runConfig.max_time_minutes) {
return AgentTerminateMode.TIMEOUT;
}
return null;
}
/** Emits an activity event to the configured callback. */
private emitActivity(
type: SubagentActivityEvent['type'],
data: Record<string, unknown>,
): void {
if (this.onActivity) {
const event: SubagentActivityEvent = {
isSubagentActivityEvent: true,
agentName: this.definition.name,
type,
data,
};
this.onActivity(event);
}
}
}