refactor(agents): implement submit_final_output tool for agent completion (#10377)

This commit is contained in:
Abhi
2025-10-02 14:07:58 -04:00
committed by GitHub
parent 4a70d6f22f
commit a6af7bbb46
4 changed files with 696 additions and 548 deletions
@@ -10,6 +10,7 @@ import { ReadFileTool } from '../tools/read-file.js';
import { GLOB_TOOL_NAME } from '../tools/tool-names.js'; import { GLOB_TOOL_NAME } from '../tools/tool-names.js';
import { GrepTool } from '../tools/grep.js'; import { GrepTool } from '../tools/grep.js';
import { DEFAULT_GEMINI_MODEL } from '../config/models.js'; import { DEFAULT_GEMINI_MODEL } from '../config/models.js';
import { Type } from '@google/genai';
const CODEBASE_REPORT_MARKDOWN = `<CodebaseReport> const CODEBASE_REPORT_MARKDOWN = `<CodebaseReport>
<SummaryOfFindings> <SummaryOfFindings>
@@ -77,17 +78,22 @@ export const CodebaseInvestigatorAgent: AgentDefinition = {
}, },
}, },
outputConfig: { outputConfig: {
description: `A detailed markdown report summarizing the findings of the codebase investigation and insights that are the foundation for planning and executing any code modification related to the objective. outputName: 'report',
description: 'The final investigation report.',
schema: {
type: Type.STRING,
description: `A detailed markdown report summarizing the findings of the codebase investigation and insights that are the foundation for planning and executing any code modification related to the objective.
# Report Format # Report Format
The final report should be structured markdown, clearly answering the investigation focus, citing the files, symbols, architectural patterns and how they relate to the given investigation focus. The final report should be structured markdown, clearly answering the investigation focus, citing the files, symbols, architectural patterns and how they relate to the given investigation focus.
The report should strictly follow a format like this example: The report should strictly follow a format like this example:
${CODEBASE_REPORT_MARKDOWN} ${CODEBASE_REPORT_MARKDOWN}
Completion Criteria:
- The report must directly address the initial \`objective\`.
- Cite specific files, functions, or configuration snippets and symbols as evidence for your findings.
- Conclude with a xml markdown summary of the key files, symbols, technologies, architectural patterns, and conventions discovered.
`, `,
completion_criteria: [ },
'The report must directly address the initial `objective`.',
'Cite specific files, functions, or configuration snippets and symbols as evidence for your findings.',
'Conclude with a xml markdown summary of the key files, symbols, technologies, architectural patterns, and conventions discovered.',
],
}, },
modelConfig: { modelConfig: {
File diff suppressed because it is too large Load Diff
+216 -111
View File
@@ -7,6 +7,7 @@
import type { Config } from '../config/config.js'; import type { Config } from '../config/config.js';
import { reportError } from '../utils/errorReporting.js'; import { reportError } from '../utils/errorReporting.js';
import { GeminiChat, StreamEventType } from '../core/geminiChat.js'; import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
import { Type } from '@google/genai';
import type { import type {
Content, Content,
Part, Part,
@@ -39,14 +40,13 @@ import { parseThought } from '../utils/thoughtUtils.js';
/** A callback function to report on agent activity. */ /** A callback function to report on agent activity. */
export type ActivityCallback = (activity: SubagentActivityEvent) => void; export type ActivityCallback = (activity: SubagentActivityEvent) => void;
const TASK_COMPLETE_TOOL_NAME = 'complete_task';
/** /**
* Executes an agent loop based on an {@link AgentDefinition}. * Executes an agent loop based on an {@link AgentDefinition}.
* *
* This executor uses a simplified two-phase approach: * This executor runs the agent in a loop, calling tools until it calls the
* 1. **Work Phase:** The agent runs in a loop, calling tools until it has * mandatory `complete_task` tool to signal completion.
* gathered all necessary information to fulfill its goal.
* 2. **Extraction Phase:** A final prompt is sent to the model to summarize
* the work and extract the final result in the desired format.
*/ */
export class AgentExecutor { export class AgentExecutor {
readonly definition: AgentDefinition; readonly definition: AgentDefinition;
@@ -144,16 +144,13 @@ export class AgentExecutor {
try { try {
const chat = await this.createChatObject(inputs); const chat = await this.createChatObject(inputs);
const tools = this.prepareToolsList(); const tools = this.prepareToolsList();
let terminateReason = AgentTerminateMode.GOAL; let terminateReason = AgentTerminateMode.ERROR;
let finalResult: string | null = null;
// Phase 1: Work Phase
// The agent works in a loop until it stops calling tools.
const query = this.definition.promptConfig.query const query = this.definition.promptConfig.query
? templateString(this.definition.promptConfig.query, inputs) ? templateString(this.definition.promptConfig.query, inputs)
: 'Get Started!'; : 'Get Started!';
let currentMessages: Content[] = [ let currentMessage: Content = { role: 'user', parts: [{ text: query }] };
{ role: 'user', parts: [{ text: query }] },
];
while (true) { while (true) {
// Check for termination conditions like max turns or timeout. // Check for termination conditions like max turns or timeout.
@@ -171,7 +168,7 @@ export class AgentExecutor {
const promptId = `${this.runtimeContext.getSessionId()}#${this.agentId}#${turnCounter++}`; const promptId = `${this.runtimeContext.getSessionId()}#${this.agentId}#${turnCounter++}`;
const { functionCalls } = await this.callModel( const { functionCalls } = await this.callModel(
chat, chat,
currentMessages, currentMessage,
tools, tools,
signal, signal,
promptId, promptId,
@@ -182,46 +179,39 @@ export class AgentExecutor {
break; break;
} }
// If the model stops calling tools, the work phase is complete. // If the model stops calling tools without calling complete_task, it's an error.
if (functionCalls.length === 0) { if (functionCalls.length === 0) {
terminateReason = AgentTerminateMode.ERROR;
finalResult = `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}' to finalize the session.`;
this.emitActivity('ERROR', {
error: finalResult,
context: 'protocol_violation',
});
break; break;
} }
currentMessages = await this.processFunctionCalls( const { nextMessage, submittedOutput, taskCompleted } =
functionCalls, await this.processFunctionCalls(functionCalls, signal, promptId);
signal,
promptId, if (taskCompleted) {
); finalResult = submittedOutput ?? 'Task completed successfully.';
terminateReason = AgentTerminateMode.GOAL;
break;
}
currentMessage = nextMessage;
} }
// If the work phase was terminated early, skip extraction and return. if (terminateReason === AgentTerminateMode.GOAL) {
if (terminateReason !== AgentTerminateMode.GOAL) {
return { return {
result: 'Agent execution was terminated before completion.', result: finalResult || 'Task completed.',
terminate_reason: terminateReason, terminate_reason: terminateReason,
}; };
} }
// Phase 2: Extraction Phase
// A final message is sent to summarize findings and produce the output.
const extractionMessage = this.buildExtractionMessage();
const extractionMessages: Content[] = [
{ role: 'user', parts: [{ text: extractionMessage }] },
];
const extractionPromptId = `${this.runtimeContext.getSessionId()}#${this.agentId}#extraction`;
// TODO: Consider if we should keep tools to avoid cache reset.
const { textResponse } = await this.callModel(
chat,
extractionMessages,
[], // No tools are available in the extraction phase.
signal,
extractionPromptId,
);
return { return {
result: textResponse || 'No response generated', result:
finalResult || 'Agent execution was terminated before completion.',
terminate_reason: terminateReason, terminate_reason: terminateReason,
}; };
} catch (error) { } catch (error) {
@@ -237,13 +227,13 @@ export class AgentExecutor {
*/ */
private async callModel( private async callModel(
chat: GeminiChat, chat: GeminiChat,
messages: Content[], message: Content,
tools: FunctionDeclaration[], tools: FunctionDeclaration[],
signal: AbortSignal, signal: AbortSignal,
promptId: string, promptId: string,
): Promise<{ functionCalls: FunctionCall[]; textResponse: string }> { ): Promise<{ functionCalls: FunctionCall[]; textResponse: string }> {
const messageParams = { const messageParams = {
message: messages[0]?.parts || [], message: message.parts || [],
config: { config: {
abortSignal: signal, abortSignal: signal,
tools: tools.length > 0 ? [{ functionDeclarations: tools }] : undefined, tools: tools.length > 0 ? [{ functionDeclarations: tools }] : undefined,
@@ -349,45 +339,145 @@ export class AgentExecutor {
/** /**
* Executes function calls requested by the model and returns the results. * Executes function calls requested by the model and returns the results.
* *
* @returns A new `Content` object to be added to the chat history. * @returns A new `Content` object for history, any submitted output, and completion status.
*/ */
private async processFunctionCalls( private async processFunctionCalls(
functionCalls: FunctionCall[], functionCalls: FunctionCall[],
signal: AbortSignal, signal: AbortSignal,
promptId: string, promptId: string,
): Promise<Content[]> { ): Promise<{
nextMessage: Content;
submittedOutput: string | null;
taskCompleted: boolean;
}> {
const allowedToolNames = new Set(this.toolRegistry.getAllToolNames()); const allowedToolNames = new Set(this.toolRegistry.getAllToolNames());
// Always allow the completion tool
allowedToolNames.add(TASK_COMPLETE_TOOL_NAME);
// Filter out any tool calls that are not in the agent's allowed list. let submittedOutput: string | null = null;
const validatedFunctionCalls = functionCalls.filter((call) => { let taskCompleted = false;
if (!allowedToolNames.has(call.name as string)) {
console.warn( // We'll collect promises for the tool executions
`[AgentExecutor] Agent '${this.definition.name}' attempted to call ` + const toolExecutionPromises: Array<Promise<Part[] | void>> = [];
`unauthorized tool '${call.name}'. This call has been blocked.`, // And we'll need a place to store the synchronous results (like complete_task or blocked calls)
); const syncResponseParts: Part[] = [];
return false;
for (const [index, functionCall] of functionCalls.entries()) {
const callId = functionCall.id ?? `${promptId}-${index}`;
const args = (functionCall.args ?? {}) as Record<string, unknown>;
this.emitActivity('TOOL_CALL_START', {
name: functionCall.name,
args,
});
if (functionCall.name === TASK_COMPLETE_TOOL_NAME) {
if (taskCompleted) {
// We already have a completion from this turn. Ignore subsequent ones.
const error =
'Task already marked complete in this turn. Ignoring duplicate call.';
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { error },
id: callId,
},
});
this.emitActivity('ERROR', {
context: 'tool_call',
name: functionCall.name,
error,
});
continue;
}
const { outputConfig } = this.definition;
taskCompleted = true; // Signal completion regardless of output presence
if (outputConfig) {
const outputName = outputConfig.outputName;
if (args[outputName] !== undefined) {
submittedOutput = String(args[outputName]);
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { result: 'Output submitted and task completed.' },
id: callId,
},
});
this.emitActivity('TOOL_CALL_END', {
name: functionCall.name,
output: 'Output submitted and task completed.',
});
} else {
// Failed to provide required output.
taskCompleted = false; // Revoke completion status
const error = `Missing required argument '${outputName}' for completion.`;
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { error },
id: callId,
},
});
this.emitActivity('ERROR', {
context: 'tool_call',
name: functionCall.name,
error,
});
}
} else {
// No output expected. Just signal completion.
submittedOutput = 'Task completed successfully.';
syncResponseParts.push({
functionResponse: {
name: TASK_COMPLETE_TOOL_NAME,
response: { status: 'Task marked complete.' },
id: callId,
},
});
this.emitActivity('TOOL_CALL_END', {
name: functionCall.name,
output: 'Task marked complete.',
});
}
continue;
} }
return true;
});
const toolPromises = validatedFunctionCalls.map( // Handle standard tools
async (functionCall, index) => { if (!allowedToolNames.has(functionCall.name as string)) {
const callId = functionCall.id ?? `${promptId}-${index}`; const error = `Unauthorized tool call: '${functionCall.name}' is not available to this agent.`;
const args = functionCall.args ?? {};
this.emitActivity('TOOL_CALL_START', { console.warn(`[AgentExecutor] Blocked call: ${error}`);
name: functionCall.name,
args, syncResponseParts.push({
functionResponse: {
name: functionCall.name as string,
id: callId,
response: { error },
},
}); });
const requestInfo: ToolCallRequestInfo = { this.emitActivity('ERROR', {
context: 'tool_call_unauthorized',
name: functionCall.name,
callId, callId,
name: functionCall.name as string, error,
args: args as Record<string, unknown>, });
isClientInitiated: true,
prompt_id: promptId,
};
continue;
}
const requestInfo: ToolCallRequestInfo = {
callId,
name: functionCall.name as string,
args,
isClientInitiated: true,
prompt_id: promptId,
};
// Create a promise for the tool execution
const executionPromise = (async () => {
const toolResponse = await executeToolCall( const toolResponse = await executeToolCall(
this.runtimeContext, this.runtimeContext,
requestInfo, requestInfo,
@@ -407,24 +497,39 @@ export class AgentExecutor {
}); });
} }
return toolResponse; return toolResponse.responseParts;
}, })();
);
const toolResponses = await Promise.all(toolPromises); toolExecutionPromises.push(executionPromise);
const toolResponseParts: Part[] = toolResponses }
.flatMap((response) => response.responseParts)
.filter((part): part is Part => part !== undefined);
// If all authorized tool calls failed, provide a generic error message // Wait for all tool executions to complete
// to the model so it can try a different approach. const asyncResults = await Promise.all(toolExecutionPromises);
if (functionCalls.length > 0 && toolResponseParts.length === 0) {
// Combine all response parts
const toolResponseParts: Part[] = [...syncResponseParts];
for (const result of asyncResults) {
if (result) {
toolResponseParts.push(...result);
}
}
// If all authorized tool calls failed (and task isn't complete), provide a generic error.
if (
functionCalls.length > 0 &&
toolResponseParts.length === 0 &&
!taskCompleted
) {
toolResponseParts.push({ toolResponseParts.push({
text: 'All tool calls failed. Please analyze the errors and try an alternative approach.', text: 'All tool calls failed or were unauthorized. Please analyze the errors and try an alternative approach.',
}); });
} }
return [{ role: 'user', parts: toolResponseParts }]; return {
nextMessage: { role: 'user', parts: toolResponseParts },
submittedOutput,
taskCompleted,
};
} }
/** /**
@@ -432,7 +537,7 @@ export class AgentExecutor {
*/ */
private prepareToolsList(): FunctionDeclaration[] { private prepareToolsList(): FunctionDeclaration[] {
const toolsList: FunctionDeclaration[] = []; const toolsList: FunctionDeclaration[] = [];
const { toolConfig } = this.definition; const { toolConfig, outputConfig } = this.definition;
if (toolConfig) { if (toolConfig) {
const toolNamesToLoad: string[] = []; const toolNamesToLoad: string[] = [];
@@ -453,12 +558,36 @@ export class AgentExecutor {
); );
} }
// Always inject complete_task.
// Configure its schema based on whether output is expected.
const completeTool: FunctionDeclaration = {
name: TASK_COMPLETE_TOOL_NAME,
description: outputConfig
? 'Call this tool to submit your final answer and complete the task. This is the ONLY way to finish.'
: 'Call this tool to signal that you have completed your task. This is the ONLY way to finish.',
parameters: {
type: Type.OBJECT,
properties: {},
required: [],
},
};
if (outputConfig) {
completeTool.parameters!.properties![outputConfig.outputName] = {
description: outputConfig.description,
...(outputConfig.schema ?? { type: Type.STRING }),
};
completeTool.parameters!.required!.push(outputConfig.outputName);
}
toolsList.push(completeTool);
return toolsList; return toolsList;
} }
/** Builds the system prompt from the agent definition and inputs. */ /** Builds the system prompt from the agent definition and inputs. */
private async buildSystemPrompt(inputs: AgentInputs): Promise<string> { private async buildSystemPrompt(inputs: AgentInputs): Promise<string> {
const { promptConfig, outputConfig } = this.definition; const { promptConfig } = this.definition;
if (!promptConfig.systemPrompt) { if (!promptConfig.systemPrompt) {
return ''; return '';
} }
@@ -470,45 +599,21 @@ export class AgentExecutor {
const dirContext = await getDirectoryContextString(this.runtimeContext); const dirContext = await getDirectoryContextString(this.runtimeContext);
finalPrompt += `\n\n# Environment Context\n${dirContext}`; finalPrompt += `\n\n# Environment Context\n${dirContext}`;
// Append completion criteria to guide the model's output.
if (outputConfig?.completion_criteria) {
finalPrompt += '\n\nEnsure you complete the following:\n';
for (const criteria of outputConfig.completion_criteria) {
finalPrompt += `- ${criteria}\n`;
}
}
// Append standard rules for non-interactive execution. // Append standard rules for non-interactive execution.
finalPrompt += ` finalPrompt += `
Important Rules: Important Rules:
* You are running in a non-interactive mode. You CANNOT ask the user for input or clarification. * You are running in a non-interactive mode. You CANNOT ask the user for input or clarification.
* Work systematically using available tools to complete your task. * Work systematically using available tools to complete your task.
* Always use absolute paths for file operations. Construct them using the provided "Environment Context". * Always use absolute paths for file operations. Construct them using the provided "Environment Context".`;
* When you have completed your analysis and are ready to produce the final answer, stop calling tools.`;
finalPrompt += `
* When you have completed your task, you MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool.
* Do not call any other tools in the same turn as \`${TASK_COMPLETE_TOOL_NAME}\`.
* This is the ONLY way to complete your mission. If you stop calling tools without calling this, you have failed.`;
return finalPrompt; return finalPrompt;
} }
/** Builds the final message for the extraction phase. */
private buildExtractionMessage(): string {
const { outputConfig } = this.definition;
if (outputConfig?.description) {
let message = `Based on your work so far, provide: ${outputConfig.description}`;
if (outputConfig.completion_criteria?.length) {
message += `\n\nBe sure you have addressed:\n`;
for (const criteria of outputConfig.completion_criteria) {
message += `- ${criteria}\n`;
}
}
return message;
}
// Fallback to a generic extraction message if no description is provided.
return 'Based on your work so far, provide a comprehensive summary of your analysis and findings. Do not perform any more function calls.';
}
/** /**
* Applies template strings to initial messages. * Applies template strings to initial messages.
* *
+16 -5
View File
@@ -8,7 +8,7 @@
* @fileoverview Defines the core configuration interfaces and types for the agent architecture. * @fileoverview Defines the core configuration interfaces and types for the agent architecture.
*/ */
import type { Content, FunctionDeclaration } from '@google/genai'; import type { Content, FunctionDeclaration, Schema } from '@google/genai';
import type { AnyDeclarativeTool } from '../tools/tools.js'; import type { AnyDeclarativeTool } from '../tools/tools.js';
/** /**
@@ -119,11 +119,22 @@ export interface InputConfig {
* Configures the expected outputs for the agent. * Configures the expected outputs for the agent.
*/ */
export interface OutputConfig { export interface OutputConfig {
/** Description of what the agent should return when finished. */ /**
* The name of the final result parameter. This will be the name of the
* argument in the `submit_final_output` tool (e.g., "report", "answer").
*/
outputName: string;
/**
* A description of the expected output. This will be used as the description
* for the tool argument.
*/
description: string; description: string;
/** Optional criteria that must be completed before the agent finishes. */ /**
completion_criteria?: string[]; * Optional JSON schema for the output. If provided, it will be used as the
// TODO(abhipatel12): Add required_outputs if natural completion insufficient * schema for the tool's argument, allowing for structured output enforcement.
* Defaults to { type: 'string' }.
*/
schema?: Schema;
} }
/** /**