From ee3e4017c34da6180a70e6698746cecd500b930c Mon Sep 17 00:00:00 2001 From: Silvio Junior Date: Fri, 3 Oct 2025 13:21:08 -0400 Subject: [PATCH] Add function processOutput to AgentDefinition and typing for an agent's output (#10447) --- .../core/src/agents/codebase-investigator.ts | 154 ++++++++---------- packages/core/src/agents/executor.test.ts | 24 +-- packages/core/src/agents/executor.ts | 58 +++++-- packages/core/src/agents/invocation.test.ts | 27 +-- packages/core/src/agents/invocation.ts | 10 +- packages/core/src/agents/registry.ts | 8 +- packages/core/src/agents/types.ts | 20 ++- 7 files changed, 162 insertions(+), 139 deletions(-) diff --git a/packages/core/src/agents/codebase-investigator.ts b/packages/core/src/agents/codebase-investigator.ts index 497c6eac8d..7632a6e75d 100644 --- a/packages/core/src/agents/codebase-investigator.ts +++ b/packages/core/src/agents/codebase-investigator.ts @@ -10,64 +10,44 @@ import { ReadFileTool } from '../tools/read-file.js'; import { GLOB_TOOL_NAME } from '../tools/tool-names.js'; import { GrepTool } from '../tools/grep.js'; import { DEFAULT_GEMINI_MODEL } from '../config/models.js'; -import { Type } from '@google/genai'; +import { z } from 'zod'; -const CODEBASE_REPORT_MARKDOWN = ` - - The user's objective is to remove an optional \`config\` property from the command context. The investigation identified that the \`CommandContext\` interface, defined in \`packages/cli/src/ui/commands/types.ts\`, contains a nullable \`config: Config | null\` property. A \`TODO\` comment confirms the intent to make this property non-nullable. - The primary challenge is that numerous tests throughout the codebase rely on this property being optional, often setting it to \`null\` during the creation of mock \`CommandContext\` objects. The key to resolving this is to update the mock creation utility, \`createMockCommandContext\`, to provide a default mock \`Config\` object instead of \`null\`. - The \`Config\` class, defined in \`packages/core/src/config/config.ts\`, is the type of the \`config\` property. To facilitate the required changes, a mock \`Config\` object can be created based on the \`ConfigParameters\` interface from the same file. - The plan involves three main steps: - 1. Update the \`CommandContext\` interface to make the \`config\` property non-nullable. - 2. Modify \`createMockCommandContext\` to use a default mock \`Config\` object. - 3. Update all test files that currently rely on a null \`config\` to use the updated mock creation utility. - - - 1. Searched for "CommandContext" to locate its definition. - 2. Read \`packages/cli/src/ui/commands/types.ts\` and identified the \`config: Config | null\` property. - 3. Searched for \`config: null\` to find all instances where the config is explicitly set to null. - 4. Read \`packages/cli/src/test-utils/mockCommandContext.ts\` to understand how mock contexts are created. - 5. Searched for \`createMockCommandContext\` to find all its usages. - 6. Searched for the definition of the \`Config\` interface to understand how to create a mock object. - 7. Read \`packages/core/src/config/config.ts\` to understand the \`Config\` class and \`ConfigParameters\` interface. - - - - packages/cli/src/ui/commands/types.ts - This file contains the definition of the \`CommandContext\` interface, which is the central piece of this investigation. The property \`config: Config | null\` needs to be changed to \`config: Config\` here. - - CommandContext - - - - packages/cli/src/test-utils/mockCommandContext.ts - This file contains the \`createMockCommandContext\` function, which is used in many tests to create mock \`CommandContext\` objects. This function needs to be updated to provide a default mock \`Config\` object instead of \`null\`. - - createMockCommandContext - - - - packages/core/src/config/config.ts - This file defines the \`Config\` class and the \`ConfigParameters\` interface. This information is needed to create a proper mock \`Config\` object to be used in the updated \`createMockCommandContext\` function. - - Config - ConfigParameters - - - -`; +// Define a type that matches the outputConfig schema for type safety. +const CodebaseInvestigationReportSchema = z.object({ + SummaryOfFindings: z + .string() + .describe( + "A summary of the investigation's conclusions and insights for the main agent.", + ), + ExplorationTrace: z + .array(z.string()) + .describe( + 'A step-by-step list of actions and tools used during the investigation.', + ), + RelevantLocations: z + .array( + z.object({ + FilePath: z.string(), + Reasoning: z.string(), + KeySymbols: z.array(z.string()), + }), + ) + .describe('A list of relevant files and the key symbols within them.'), +}); /** * A Proof-of-Concept subagent specialized in analyzing codebase structure, * dependencies, and technologies. */ -export const CodebaseInvestigatorAgent: AgentDefinition = { +export const CodebaseInvestigatorAgent: AgentDefinition< + typeof CodebaseInvestigationReportSchema +> = { name: 'codebase_investigator', displayName: 'Codebase Investigator Agent', description: `Your primary tool for multifile search tasks and codebase exploration. Invoke this tool to delegate search tasks to an autonomous subagent. Use this to find features, understand context, or locate specific files, functions, or symbols. - Returns a structured xml report with key file paths, symbols, architectural map and insights to solve a task or answer questions.`, + Returns a structured Json report with key file paths, symbols, architectural map and insights to solve a task or answer questions`, inputConfig: { inputs: { objective: { @@ -80,23 +60,13 @@ export const CodebaseInvestigatorAgent: AgentDefinition = { }, outputConfig: { outputName: 'report', - description: 'The final investigation report.', - schema: { - type: Type.STRING, - description: `A detailed markdown report summarizing the findings of the codebase investigation and insights that are the foundation for planning and executing any code modification related to the objective. - # Report Format -The final report should be structured markdown, clearly answering the investigation focus, citing the files, symbols, architectural patterns and how they relate to the given investigation focus. -The report should strictly follow a format like this example: -${CODEBASE_REPORT_MARKDOWN} - -Completion Criteria: -- The report must directly address the initial \`objective\`. -- Cite specific files, functions, or configuration snippets and symbols as evidence for your findings. -- Conclude with a xml markdown summary of the key files, symbols, technologies, architectural patterns, and conventions discovered. -`, - }, + description: 'The final investigation report as a JSON object.', + schema: CodebaseInvestigationReportSchema, }, + // The 'output' parameter is now strongly typed as CodebaseInvestigationReportSchema + processOutput: (output) => JSON.stringify(output, null, 2), + modelConfig: { model: DEFAULT_GEMINI_MODEL, temp: 0.1, @@ -142,37 +112,41 @@ You operate in a non-interactive loop and must reason based on the information p **This is your most critical function. Your scratchpad is your memory and your plan.** 1. **Initialization:** On your very first turn, you **MUST** create the \`\` section. Analyze the \`task\` and create an initial \`Checklist\` of investigation goals and a \`Questions to Resolve\` section for any initial uncertainties. 2. **Constant Updates:** After **every** \`\`, you **MUST** update the scratchpad. - * Mark checklist items as complete: \`[x]\`. - * Add new checklist items as you trace the architecture. - * **Explicitly log questions in \`Questions to Resolve\`** (e.g., \`[ ] What is the purpose of the 'None' element in this list?\`). Do not consider your investigation complete until this list is empty. - * Record \`Key Findings\` with file paths and notes about their purpose and relevance. - * Update \`Irrelevant Paths to Ignore\` to avoid re-investigating dead ends. + * Mark checklist items as complete: \`[x]\`. + * Add new checklist items as you trace the architecture. + * **Explicitly log questions in \`Questions to Resolve\`** (e.g., \`[ ] What is the purpose of the 'None' element in this list?\`). Do not consider your investigation complete until this list is empty. + * Record \`Key Findings\` with file paths and notes about their purpose and relevance. + * Update \`Irrelevant Paths to Ignore\` to avoid re-investigating dead ends. 3. **Thinking on Paper:** The scratchpad must show your reasoning process, including how you resolve your questions. --- -## Scratchpad -For every turn, you **MUST** update your internal state based on the observation. -Scratchpad example: - -**Checklist:** -- [x] Find the main translation loading logic. -- [ ] **(New)** Investigate the \`gettext.translation\` function to understand its arguments. -- [ ] **(New)** Check the signature of \`locale.init\` and its callers for type consistency. -**Questions to Resolve:** -- [x] ~~What is the purpose of the 'None' element in the \`locale_dirs\` list?~~ **Finding:** It's for system-wide gettext catalogs. -**Key Findings:** -- \`sphinx/application.py\`: Assembles the \`locale_dirs\` list. The order is critical. -- \`sphinx/locale/__init__.py\`: Consumes \`locale_dirs\`. Its \`init\` function signature might need a type hint update if \`None\` is passed. -**Irrelevant Paths to Ignore:** -- \`README.md\` -**Next Step:** -- I will use \`web_fetch\` to search for "python gettext translation localedir None" to resolve my open question. - ## Termination -Your mission is complete **ONLY** when your \`Questions to Resolve\` list is empty and you are confident you have identified all files and necessary change *considerations*. -# Report Format -The final report should be structured markdown, clearly answering the investigation focus, citing the files, symbols, architectural patterns and how they relate to the given investigation focus. -The report should strictly follow a format like this example: -${CODEBASE_REPORT_MARKDOWN} +Your mission is complete **ONLY** when your \`Questions to Resolve\` list is empty and you have identified all files and necessary change *considerations*. +When you are finished, you **MUST** call the \`complete_task\` tool. The \`report\` argument for this tool **MUST** be a valid JSON object containing your findings. + +**Example of the final report** +\`\`\`json +{ + "SummaryOfFindings": "The core issue is a race condition in the \`updateUser\` function. The function reads the user's state, performs an asynchronous operation, and then writes the state back. If another request modifies the user state during the async operation, that change will be overwritten. The fix requires implementing a transactional read-modify-write pattern, potentially using a database lock or a versioning system.", + "ExplorationTrace": [ + "Used \`grep\` to search for \`updateUser\` to locate the primary function.", + "Read the file \`src/controllers/userController.js\` to understand the function's logic.", + "Used \`ls -R\` to look for related files, such as services or database models.", + "Read \`src/services/userService.js\` and \`src/models/User.js\` to understand the data flow and how state is managed." + ], + "RelevantLocations": [ + { + "FilePath": "src/controllers/userController.js", + "Reasoning": "This file contains the \`updateUser\` function which has the race condition. It's the entry point for the problematic logic.", + "KeySymbols": ["updateUser", "getUser", "saveUser"] + }, + { + "FilePath": "src/services/userService.js", + "Reasoning": "This service is called by the controller and handles the direct interaction with the data layer. Any locking mechanism would likely be implemented here.", + "KeySymbols": ["updateUserData"] + } + ] +} +\`\`\` `, }, }; diff --git a/packages/core/src/agents/executor.test.ts b/packages/core/src/agents/executor.test.ts index dbe1d9cdde..ba186e5bb5 100644 --- a/packages/core/src/agents/executor.test.ts +++ b/packages/core/src/agents/executor.test.ts @@ -23,7 +23,6 @@ import { type StreamEvent, } from '../core/geminiChat.js'; import { - Type, type FunctionCall, type Part, type GenerateContentResponse, @@ -32,6 +31,7 @@ import { import type { Config } from '../config/config.js'; import { MockTool } from '../test-utils/mock-tool.js'; import { getDirectoryContextString } from '../utils/environmentContext.js'; +import { z } from 'zod'; const { mockSendMessageStream, mockExecuteToolCall } = vi.hoisted(() => ({ mockSendMessageStream: vi.fn(), @@ -120,27 +120,19 @@ let parentToolRegistry: ToolRegistry; /** * Type-safe helper to create agent definitions for tests. */ -type OutputConfigMode = 'default' | 'none' | Partial; - -const createTestDefinition = ( +const createTestDefinition = ( tools: Array = [LSTool.Name], - runConfigOverrides: Partial = {}, - outputConfigMode: OutputConfigMode = 'default', -): AgentDefinition => { - let outputConfig: OutputConfig | undefined; + runConfigOverrides: Partial['runConfig']> = {}, + outputConfigMode: 'default' | 'none' = 'default', + schema: TOutput = z.string() as unknown as TOutput, +): AgentDefinition => { + let outputConfig: OutputConfig | undefined; if (outputConfigMode === 'default') { outputConfig = { outputName: 'finalResult', description: 'The final result.', - schema: { type: Type.STRING }, - }; - } else if (outputConfigMode !== 'none') { - outputConfig = { - outputName: 'finalResult', - description: 'The final result.', - schema: { type: Type.STRING }, - ...outputConfigMode, + schema, }; } diff --git a/packages/core/src/agents/executor.ts b/packages/core/src/agents/executor.ts index 8c5624ce33..2ba2dc88c2 100644 --- a/packages/core/src/agents/executor.ts +++ b/packages/core/src/agents/executor.ts @@ -14,6 +14,7 @@ import type { FunctionCall, GenerateContentConfig, FunctionDeclaration, + Schema, } from '@google/genai'; import { executeToolCall } from '../core/nonInteractiveToolExecutor.js'; import { ToolRegistry } from '../tools/tool-registry.js'; @@ -36,6 +37,8 @@ import type { import { AgentTerminateMode } from './types.js'; import { templateString } from './utils.js'; import { parseThought } from '../utils/thoughtUtils.js'; +import { type z } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; /** A callback function to report on agent activity. */ export type ActivityCallback = (activity: SubagentActivityEvent) => void; @@ -48,8 +51,8 @@ const TASK_COMPLETE_TOOL_NAME = 'complete_task'; * This executor runs the agent in a loop, calling tools until it calls the * mandatory `complete_task` tool to signal completion. */ -export class AgentExecutor { - readonly definition: AgentDefinition; +export class AgentExecutor { + readonly definition: AgentDefinition; private readonly agentId: string; private readonly toolRegistry: ToolRegistry; @@ -67,11 +70,11 @@ export class AgentExecutor { * @param onActivity An optional callback to receive activity events. * @returns A promise that resolves to a new `AgentExecutor` instance. */ - static async create( - definition: AgentDefinition, + static async create( + definition: AgentDefinition, runtimeContext: Config, onActivity?: ActivityCallback, - ): Promise { + ): Promise> { // Create an isolated tool registry for this agent instance. const agentToolRegistry = new ToolRegistry(runtimeContext); const parentToolRegistry = await runtimeContext.getToolRegistry(); @@ -116,7 +119,7 @@ export class AgentExecutor { * instantiate the class. */ private constructor( - definition: AgentDefinition, + definition: AgentDefinition, runtimeContext: Config, toolRegistry: ToolRegistry, onActivity?: ActivityCallback, @@ -397,7 +400,36 @@ export class AgentExecutor { if (outputConfig) { const outputName = outputConfig.outputName; if (args[outputName] !== undefined) { - submittedOutput = String(args[outputName]); + const outputValue = args[outputName]; + const validationResult = outputConfig.schema.safeParse(outputValue); + + if (!validationResult.success) { + taskCompleted = false; // Validation failed, revoke completion + const error = `Output validation failed: ${JSON.stringify(validationResult.error.flatten())}`; + syncResponseParts.push({ + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, + }); + this.emitActivity('ERROR', { + context: 'tool_call', + name: functionCall.name, + error, + }); + continue; + } + + const validatedOutput = validationResult.data; + if (this.definition.processOutput) { + submittedOutput = this.definition.processOutput(validatedOutput); + } else { + submittedOutput = + typeof outputValue === 'string' + ? outputValue + : JSON.stringify(outputValue, null, 2); + } syncResponseParts.push({ functionResponse: { name: TASK_COMPLETE_TOOL_NAME, @@ -573,10 +605,14 @@ export class AgentExecutor { }; if (outputConfig) { - completeTool.parameters!.properties![outputConfig.outputName] = { - description: outputConfig.description, - ...(outputConfig.schema ?? { type: Type.STRING }), - }; + const jsonSchema = zodToJsonSchema(outputConfig.schema); + const { + $schema: _$schema, + definitions: _definitions, + ...schema + } = jsonSchema; + completeTool.parameters!.properties![outputConfig.outputName] = + schema as Schema; completeTool.parameters!.required!.push(outputConfig.outputName); } diff --git a/packages/core/src/agents/invocation.test.ts b/packages/core/src/agents/invocation.test.ts index d2de395e60..a91f5ee80e 100644 --- a/packages/core/src/agents/invocation.test.ts +++ b/packages/core/src/agents/invocation.test.ts @@ -17,6 +17,7 @@ import { makeFakeConfig } from '../test-utils/config.js'; import { ToolErrorType } from '../tools/tool-error.js'; import type { Config } from '../config/config.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { type z } from 'zod'; vi.mock('./executor.js'); @@ -24,7 +25,7 @@ const MockAgentExecutor = vi.mocked(AgentExecutor); let mockConfig: Config; -const testDefinition: AgentDefinition = { +const testDefinition: AgentDefinition = { name: 'MockAgent', description: 'A mock agent.', inputConfig: { @@ -39,7 +40,7 @@ const testDefinition: AgentDefinition = { }; describe('SubagentInvocation', () => { - let mockExecutorInstance: Mocked; + let mockExecutorInstance: Mocked>; beforeEach(() => { vi.clearAllMocks(); @@ -48,15 +49,17 @@ describe('SubagentInvocation', () => { mockExecutorInstance = { run: vi.fn(), definition: testDefinition, - } as unknown as Mocked; + } as unknown as Mocked>; - MockAgentExecutor.create.mockResolvedValue(mockExecutorInstance); + MockAgentExecutor.create.mockResolvedValue( + mockExecutorInstance as unknown as AgentExecutor, + ); }); it('should pass the messageBus to the parent constructor', () => { const mockMessageBus = {} as MessageBus; const params = { task: 'Analyze data' }; - const invocation = new SubagentInvocation( + const invocation = new SubagentInvocation( params, testDefinition, mockConfig, @@ -71,7 +74,7 @@ describe('SubagentInvocation', () => { describe('getDescription', () => { it('should format the description with inputs', () => { const params = { task: 'Analyze data', priority: 5 }; - const invocation = new SubagentInvocation( + const invocation = new SubagentInvocation( params, testDefinition, mockConfig, @@ -85,7 +88,7 @@ describe('SubagentInvocation', () => { it('should truncate long input values', () => { const longTask = 'A'.repeat(100); const params = { task: longTask }; - const invocation = new SubagentInvocation( + const invocation = new SubagentInvocation( params, testDefinition, mockConfig, @@ -107,7 +110,7 @@ describe('SubagentInvocation', () => { for (let i = 0; i < 20; i++) { params[`input${i}`] = `value${i}`; } - const invocation = new SubagentInvocation( + const invocation = new SubagentInvocation( params, longNameDef, mockConfig, @@ -127,12 +130,16 @@ describe('SubagentInvocation', () => { let signal: AbortSignal; let updateOutput: ReturnType; const params = { task: 'Execute task' }; - let invocation: SubagentInvocation; + let invocation: SubagentInvocation; beforeEach(() => { signal = new AbortController().signal; updateOutput = vi.fn(); - invocation = new SubagentInvocation(params, testDefinition, mockConfig); + invocation = new SubagentInvocation( + params, + testDefinition, + mockConfig, + ); }); it('should initialize and run the executor successfully', async () => { diff --git a/packages/core/src/agents/invocation.ts b/packages/core/src/agents/invocation.ts index f18d2b02be..cb49095cb5 100644 --- a/packages/core/src/agents/invocation.ts +++ b/packages/core/src/agents/invocation.ts @@ -15,6 +15,7 @@ import type { SubagentActivityEvent, } from './types.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { type z } from 'zod'; const INPUT_PREVIEW_MAX_LENGTH = 50; const DESCRIPTION_MAX_LENGTH = 200; @@ -29,10 +30,9 @@ const DESCRIPTION_MAX_LENGTH = 200; * live output stream. * 4. Formatting the final result into a {@link ToolResult}. */ -export class SubagentInvocation extends BaseToolInvocation< - AgentInputs, - ToolResult -> { +export class SubagentInvocation< + TOutput extends z.ZodTypeAny, +> extends BaseToolInvocation { /** * @param params The validated input parameters for the agent. * @param definition The definition object that configures the agent. @@ -41,7 +41,7 @@ export class SubagentInvocation extends BaseToolInvocation< */ constructor( params: AgentInputs, - private readonly definition: AgentDefinition, + private readonly definition: AgentDefinition, private readonly config: Config, messageBus?: MessageBus, ) { diff --git a/packages/core/src/agents/registry.ts b/packages/core/src/agents/registry.ts index a42b30b212..dec30fdc05 100644 --- a/packages/core/src/agents/registry.ts +++ b/packages/core/src/agents/registry.ts @@ -7,13 +7,15 @@ import type { Config } from '../config/config.js'; import type { AgentDefinition } from './types.js'; import { CodebaseInvestigatorAgent } from './codebase-investigator.js'; +import { type z } from 'zod'; /** * Manages the discovery, loading, validation, and registration of * AgentDefinitions. */ export class AgentRegistry { - private readonly agents = new Map(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + private readonly agents = new Map>(); constructor(private readonly config: Config) {} @@ -39,7 +41,9 @@ export class AgentRegistry { * it will be overwritten, respecting the precedence established by the * initialization order. */ - protected registerAgent(definition: AgentDefinition): void { + protected registerAgent( + definition: AgentDefinition, + ): void { // Basic validation if (!definition.name || !definition.description) { console.warn( diff --git a/packages/core/src/agents/types.ts b/packages/core/src/agents/types.ts index 9cd61409f6..015d6ca9da 100644 --- a/packages/core/src/agents/types.ts +++ b/packages/core/src/agents/types.ts @@ -8,8 +8,9 @@ * @fileoverview Defines the core configuration interfaces and types for the agent architecture. */ -import type { Content, FunctionDeclaration, Schema } from '@google/genai'; +import type { Content, FunctionDeclaration } from '@google/genai'; import type { AnyDeclarativeTool } from '../tools/tools.js'; +import { type z } from 'zod'; /** * Describes the possible termination modes for an agent. @@ -48,8 +49,9 @@ export interface SubagentActivityEvent { /** * The definition for an agent. + * @template TOutput The specific Zod schema for the agent's final output object. */ -export interface AgentDefinition { +export interface AgentDefinition { /** Unique identifier for the agent. */ name: string; displayName?: string; @@ -58,8 +60,16 @@ export interface AgentDefinition { modelConfig: ModelConfig; runConfig: RunConfig; toolConfig?: ToolConfig; - outputConfig?: OutputConfig; + outputConfig?: OutputConfig; inputConfig: InputConfig; + /** + * An optional function to process the raw output from the agent's final tool + * call into a string format. + * + * @param output The raw output value from the `complete_task` tool, now strongly typed with TOutput. + * @returns A string representation of the final output. + */ + processOutput?: (output: z.infer) => string; } /** @@ -118,7 +128,7 @@ export interface InputConfig { /** * Configures the expected outputs for the agent. */ -export interface OutputConfig { +export interface OutputConfig { /** * The name of the final result parameter. This will be the name of the * argument in the `submit_final_output` tool (e.g., "report", "answer"). @@ -134,7 +144,7 @@ export interface OutputConfig { * schema for the tool's argument, allowing for structured output enforcement. * Defaults to { type: 'string' }. */ - schema?: Schema; + schema: T; } /**