From b61a123da861f14a0d83739fe9190a287b4181ca Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Thu, 12 Feb 2026 23:28:48 -0800 Subject: [PATCH] feat(sdk): implements SessionContext for SDK tool calls (#18862) --- .prettierignore | 1 + packages/core/src/index.ts | 3 + packages/sdk/SDK_DESIGN.md | 279 ++++++++++++++++++ packages/sdk/examples/session-context.ts | 73 +++++ packages/sdk/src/agent.ts | 55 +++- packages/sdk/src/fs.ts | 35 +++ packages/sdk/src/index.ts | 1 + packages/sdk/src/shell.ts | 69 +++++ packages/sdk/src/tool.integration.test.ts | 147 +++++++++ packages/sdk/src/tool.test.ts | 143 +++++++++ packages/sdk/src/tool.ts | 77 +++-- packages/sdk/src/types.ts | 41 +++ .../sdk/test-data/tool-catchall-error.json | 2 + .../sdk/test-data/tool-error-recovery.json | 2 + packages/sdk/test-data/tool-success.json | 2 + 15 files changed, 903 insertions(+), 27 deletions(-) create mode 100644 packages/sdk/SDK_DESIGN.md create mode 100644 packages/sdk/examples/session-context.ts create mode 100644 packages/sdk/src/fs.ts create mode 100644 packages/sdk/src/shell.ts create mode 100644 packages/sdk/src/tool.integration.test.ts create mode 100644 packages/sdk/src/tool.test.ts create mode 100644 packages/sdk/src/types.ts create mode 100644 packages/sdk/test-data/tool-catchall-error.json create mode 100644 packages/sdk/test-data/tool-error-recovery.json create mode 100644 packages/sdk/test-data/tool-success.json diff --git a/.prettierignore b/.prettierignore index e8f035ad74..9009498d8d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -21,3 +21,4 @@ junit.xml Thumbs.db .pytest_cache **/SKILL.md +packages/sdk/test-data/*.json diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index c8ba601cbb..1802e590cd 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -192,3 +192,6 @@ export * from './agents/types.js'; // Export stdio utils export * from './utils/stdio.js'; export * from './utils/terminal.js'; + +// Export types from @google/genai +export type { Content, Part, FunctionCall } from '@google/genai'; diff --git a/packages/sdk/SDK_DESIGN.md b/packages/sdk/SDK_DESIGN.md new file mode 100644 index 0000000000..8daf6a4bb7 --- /dev/null +++ b/packages/sdk/SDK_DESIGN.md @@ -0,0 +1,279 @@ +# `Gemini CLI SDK` + +# `Examples` + +## `Simple Example` + +Equivalent to `gemini -p "what does this project do?"`. Loads all workspace and +user settings. + +```ts +import { GeminiCliAgent } from '@google/gemini-cli-sdk'; + +const simpleAgent = new GeminiCliAgent({ + cwd: '/path/to/some/dir', +}); + +for await (const chunk of simpleAgent.sendStream( + 'what does this project do?', +)) { + console.log(chunk); // equivalent to JSON streaming chunks (probably?) for now +} +``` + +Validation: + +- Model receives call containing "what does this project do?" text. + +## `System Instructions` + +System instructions can be provided by a static string OR dynamically via a +function: + +```ts +import { GeminiCliAgent } from "@google/gemini-cli-sdk"; + +const agent = new GeminiCliAgent({ + instructions: "This is a static string instruction"; // this is valid + instructions: (ctx) => `The current time is ${new Date().toISOString()} in session ${ctx.sessionId}.` +}); +``` + +Validation: + +- Static string instructions show up where GEMINI.md content normally would in + model call +- Dynamic instructions show up and contain dynamic content. + +## `Custom Tools` + +```ts +import { GeminiCliAgent, tool, z } from "@google/gemini-cli-sdk"; + +const addTool = tool({ + name: 'add', + description: 'add two numbers', + inputSchema: z.object({ + a: z.number().describe('first number to add'), + b: z.number().describe('second number to add'), + }), +}, (({a, b}) => ({result: a + b}),); + +const toolAgent = new GeminiCliAgent({ + tools: [addTool], +}); + +const result = await toolAgent.send("what is 23 + 79?"); +console.log(result.text); +``` + +Validation: + +- Model receives tool definition in prompt +- Model receives tool response after returning tool + +## `Custom Hooks` + +SDK users can provide programmatic custom hooks + +```ts +import { GeminiCliAgent, hook, z } from '@google/gemini-cli-sdk'; +import { reformat } from './reformat.js'; + +const myHook = hook( + { + event: 'AfterTool', + name: 'reformat', + matcher: 'write_file', + }, + (hook, ctx) => { + const filePath = hook.toolInput.path; + + // void return is a no-op + if (!filePath.endsWith('.ts')) return; + + // ctx.fs gives us a filesystem interface that obeys Gemini CLI permissions/sandbox + const reformatted = await reformat(await ctx.fs.read(filePath)); + await ctx.fs.write(filePath, reformatted); + + // hooks return a payload instructing the agent how to proceed + return { + hookSpecificOutput: { + additionalContext: `Reformatted file ${filePath}, read again before modifying further.`, + }, + }; + }, +); +``` + +SDK Hooks can also run as standalone scripts to implement userland "command" +style hooks: + +```ts +import { hook } from "@google/gemini-cli-sdk"; + +// define a hook as above +const myHook = hook({...}, (hook) => {...}); +// calling runAsCommand parses stdin, calls action, uses appropriate exit code +// with output, but you get nice strong typings to guide your impl +myHook.runAsCommand(); +``` + +Validation (these are probably hardest to validate): + +- Test each type of hook and check that model api receives injected content +- Check global halt scenarios +- Check specific return types for each type of hook + +## `Custom Skills` + +Custom skills can be referenced by individual directories or by "skill roots" +(directories containing many skills). + +```ts +import { GeminiCliAgent, skillDir, skillRoot } from '@google/gemini-cli-sdk'; + +const agent = new GeminiCliAgent({ + skills: [skillDir('/path/to/single/skill'), skillRoot('/path/to/skills/dir')], +}); +``` + +**NOTE:** I would like to support fully in-memory skills (including reference +files); however, it seems like that would currently require a pretty significant +refactor so we'll focus on filesystem skills for now. In an ideal future state, +we could do something like: + +```ts +import { GeminiCliAgent, skill } from '@google/gemini-cli-sdk'; + +const mySkill = skill({ + name: 'my-skill', + description: 'description of when my skill should be used', + content: 'This is the SKILL.md content', + // it can also be a function + content: (ctx) => `This is dynamic content.`, +}); +``` + +## `Subagents` + +```ts +import { GeminiCliAgent, subagent } from "@google/gemini-cli"; + +const mySubagent = subagent({ + name: "my-subagent", + description: "when the subagent should be used", + + // simple prompt agent with static string or dynamic string + instructions: "the instructions", + instructions (prompt, ctx) => `can also be dynamic with context`, + + // OR (in an ideal world)... + + // pass a full standalone agent + agent: new GeminiCliAgent(...); +}); + +const agent = new GeminiCliAgent({ + subagents: [mySubagent] +}); +``` + +## `Extensions` + +Potentially the most important feature of the Gemini CLI SDK is support for +extensions, which modularly encapsulate all of the primitives listed above: + +```ts +import { GeminiCliAgent, extension } from "@google/gemini-cli-sdk"; + +const myExtension = extension({ + name: "my-extension", + description: "...", + instructions: "THESE ARE CONCATENATED WITH OTHER AGENT +INSTRUCTIONS", + tools: [...], + skills: [...], + hooks: [...], + subagents: [...], +}); +``` + +## `ACP Mode` + +The SDK will include a wrapper utility to interact with the agent via ACP +instead of the SDK's natural API. + +```ts +import { GeminiCliAgent } from "@google/gemini-cli-sdk"; +import { GeminiCliAcpServer } from "@google/gemini-cli-sdk/acp"; + +const server = new GeminiCliAcpServer(new GeminiCliAgent({...})); +server.start(); // calling start runs a stdio ACP server + +const client = server.connect({ + onMessage: (message) => { /* updates etc received here */ }, +}); +client.send({...clientMessage}); // e.g. a "session/prompt" message +``` + +## `Approvals / Policies` + +TODO + +# `Implementation Guidance` + +## `Session Context` + +Whenever executing a tool, hook, command, or skill, a SessionContext object +should be passed as an additional argument after the arguments/payload. The +interface should look something like: + +```ts +export interface SessionContext { + // translations of existing common hook payload info + sessionId: string; + transcript: Message[]; + cwd: string; + timestamp: string; + + // helpers to access files and run shell commands while adhering to policies/validation + fs: AgentFilesystem; + shell: AgentShell; + // the agent itself is passed as context + agent: GeminiCliAgent; +} + +export interface AgentFilesystem { + readFile(path: string): Promise + writeFile(path: string, content: string): Promise + // consider others including delete, globbing, etc but read/write are bare minimum } + +export interface AgentShell { + // simple promise-based execution that blocks until complete + exec(cmd: string, options?: AgentShellOptions): Promise<{exitCode: number, output: string, stdout: string, stderr: string}> + start(cmd: string, options?: AgentShellOptions): AgentShellProcess; +} + +export interface AgentShellOptions { + env?: Record; + timeoutSeconds?: number; +} + +export interface AgentShellProcess { + // figure out how to have a streaming shell process here that supports stdin too + // investigate how Gemini CLI already does this +} +``` + +# `Notes` + +- To validate the SDK, it would be useful to have a robust way to mock the + underlying model API so that the tests could be closer to end-to-end but still + deterministic. +- Need to work in both Gemini-CLI-triggered approvals and optional + developer-initiated user prompts / HITL stuff. +- Need to think about how subagents inherit message context \- e.g. do they have + the same session id? +- Presumably the transcript is kept updated in memory and also persisted to disk + by default? diff --git a/packages/sdk/examples/session-context.ts b/packages/sdk/examples/session-context.ts new file mode 100644 index 0000000000..704353efe0 --- /dev/null +++ b/packages/sdk/examples/session-context.ts @@ -0,0 +1,73 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { GeminiCliAgent, tool, z } from '../src/index.js'; + +async function main() { + const getContextTool = tool( + { + name: 'get_context', + description: 'Get information about the current session context.', + inputSchema: z.object({}), + }, + async (_params, context) => { + if (!context) { + return { error: 'Context not available' }; + } + + console.log('Session Context Accessed:'); + console.log(`- Session ID: ${context.sessionId}`); + console.log(`- CWD: ${context.cwd}`); + console.log(`- Timestamp: ${context.timestamp}`); + + let fileContent = null; + try { + // Try to read a file (e.g., package.json in the CWD) + // Note: This relies on the agent running in a directory with package.json + fileContent = await context.fs.readFile('package.json'); + } catch (e) { + console.log(`- Could not read package.json: ${e}`); + } + + let shellOutput = null; + try { + // Try to run a simple shell command + const result = await context.shell.exec('echo "Hello from SDK Shell"'); + shellOutput = result.output.trim(); + } catch (e) { + console.log(`- Could not run shell command: ${e}`); + } + + return { + sessionId: context.sessionId, + cwd: context.cwd, + hasFsAccess: !!context.fs, + hasShellAccess: !!context.shell, + packageJsonExists: !!fileContent, + shellEcho: shellOutput, + }; + }, + ); + + const agent = new GeminiCliAgent({ + instructions: + 'You are a helpful assistant. Use the get_context tool to tell me about my environment.', + tools: [getContextTool], + // Set CWD to the package root so package.json exists + cwd: process.cwd(), + }); + + console.log("Sending prompt: 'What is my current session context?'"); + for await (const chunk of agent.sendStream( + 'What is my current session context?', + )) { + if (chunk.type === 'content') { + process.stdout.write(chunk.value || ''); + } + } +} + +main().catch(console.error); diff --git a/packages/sdk/src/agent.ts b/packages/sdk/src/agent.ts index b2ac5a1872..21defe1ab6 100644 --- a/packages/sdk/src/agent.ts +++ b/packages/sdk/src/agent.ts @@ -7,29 +7,38 @@ import { Config, type ConfigParameters, + AuthType, PREVIEW_GEMINI_MODEL_AUTO, GeminiEventType, type ToolCallRequestInfo, type ServerGeminiStreamEvent, type GeminiClient, + type Content, scheduleAgentTools, getAuthTypeFromEnv, - AuthType, + type ToolRegistry, } from '@google/gemini-cli-core'; -import { type Tool, SdkTool, type z } from './tool.js'; +import { type Tool, SdkTool } from './tool.js'; +import { SdkAgentFilesystem } from './fs.js'; +import { SdkAgentShell } from './shell.js'; +import type { SessionContext } from './types.js'; export interface GeminiCliAgentOptions { instructions: string; - tools?: Array>; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: Array>; model?: string; cwd?: string; debug?: boolean; + recordResponses?: string; + fakeResponses?: string; } export class GeminiCliAgent { - private readonly config: Config; - private readonly tools: Array>; + private config: Config; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + private tools: Array>; constructor(options: GeminiCliAgentOptions) { const cwd = options.cwd || process.cwd(); @@ -46,6 +55,8 @@ export class GeminiCliAgent { enableHooks: false, mcpEnabled: false, extensionsEnabled: false, + recordResponses: options.recordResponses, + fakeResponses: options.fakeResponses, }; this.config = new Config(configParams); @@ -67,18 +78,21 @@ export class GeminiCliAgent { const messageBus = this.config.getMessageBus(); for (const toolDef of this.tools) { - const sdkTool = new SdkTool(toolDef, messageBus); + const sdkTool = new SdkTool(toolDef, messageBus, this); registry.registerTool(sdkTool); } } const client = this.config.getGeminiClient(); + const abortSignal = signal ?? new AbortController().signal; + const sessionId = this.config.getSessionId(); + + const fs = new SdkAgentFilesystem(this.config); + const shell = new SdkAgentShell(this.config); let request: Parameters[0] = [ { text: prompt }, ]; - const abortSignal = signal ?? new AbortController().signal; - const sessionId = this.config.getSessionId(); while (true) { // sendMessageStream returns AsyncGenerator @@ -107,12 +121,35 @@ export class GeminiCliAgent { break; } + // Prepare SessionContext + const transcript: Content[] = client.getHistory(); + const context: SessionContext = { + sessionId, + transcript, + cwd: this.config.getWorkingDir(), + timestamp: new Date().toISOString(), + fs, + shell, + agent: this, + }; + + // Create a scoped registry for this turn to bind context safely + const originalRegistry = this.config.getToolRegistry(); + const scopedRegistry: ToolRegistry = Object.create(originalRegistry); + scopedRegistry.getTool = (name: string) => { + const tool = originalRegistry.getTool(name); + if (tool instanceof SdkTool) { + return tool.bindContext(context); + } + return tool; + }; + const completedCalls = await scheduleAgentTools( this.config, toolCallsToSchedule, { schedulerId: sessionId, - toolRegistry: this.config.getToolRegistry(), + toolRegistry: scopedRegistry, signal: abortSignal, }, ); diff --git a/packages/sdk/src/fs.ts b/packages/sdk/src/fs.ts new file mode 100644 index 0000000000..afdb92acff --- /dev/null +++ b/packages/sdk/src/fs.ts @@ -0,0 +1,35 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Config as CoreConfig } from '@google/gemini-cli-core'; +import type { AgentFilesystem } from './types.js'; +import fs from 'node:fs/promises'; + +export class SdkAgentFilesystem implements AgentFilesystem { + constructor(private readonly config: CoreConfig) {} + + async readFile(path: string): Promise { + const error = this.config.validatePathAccess(path, 'read'); + if (error) { + // For now, if access is denied, we can either throw or return null. + // Returning null makes sense for "file not found or readable". + return null; + } + try { + return await fs.readFile(path, 'utf-8'); + } catch { + return null; + } + } + + async writeFile(path: string, content: string): Promise { + const error = this.config.validatePathAccess(path, 'write'); + if (error) { + throw new Error(error); + } + await fs.writeFile(path, content, 'utf-8'); + } +} diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index 0ad940d7b2..36a4c7711d 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -6,3 +6,4 @@ export * from './agent.js'; export * from './tool.js'; +export * from './types.js'; diff --git a/packages/sdk/src/shell.ts b/packages/sdk/src/shell.ts new file mode 100644 index 0000000000..30b9979594 --- /dev/null +++ b/packages/sdk/src/shell.ts @@ -0,0 +1,69 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Config as CoreConfig } from '@google/gemini-cli-core'; +import { ShellExecutionService, ShellTool } from '@google/gemini-cli-core'; +import type { + AgentShell, + AgentShellResult, + AgentShellOptions, +} from './types.js'; + +export class SdkAgentShell implements AgentShell { + constructor(private readonly config: CoreConfig) {} + + async exec( + command: string, + options?: AgentShellOptions, + ): Promise { + const cwd = options?.cwd || this.config.getWorkingDir(); + const abortController = new AbortController(); + + // Use ShellTool to check policy + const shellTool = new ShellTool(this.config, this.config.getMessageBus()); + try { + const invocation = shellTool.build({ + command, + dir_path: cwd, + }); + + const confirmation = await invocation.shouldConfirmExecute( + abortController.signal, + ); + if (confirmation) { + throw new Error( + 'Command execution requires confirmation but no interactive session is available.', + ); + } + } catch (error) { + return { + output: '', + stdout: '', + stderr: '', + exitCode: 1, + error: error instanceof Error ? error : new Error(String(error)), + }; + } + + const handle = await ShellExecutionService.execute( + command, + cwd, + () => {}, // No-op output event handler for now + abortController.signal, + false, // shouldUseNodePty: false for headless execution + this.config.getShellExecutionConfig(), + ); + + const result = await handle.result; + + return { + output: result.output, + stdout: result.output, // ShellExecutionService combines stdout/stderr usually + stderr: '', // ShellExecutionService currently combines, so stderr is empty or mixed + exitCode: result.exitCode, + }; + } +} diff --git a/packages/sdk/src/tool.integration.test.ts b/packages/sdk/src/tool.integration.test.ts new file mode 100644 index 0000000000..1ec9d73abd --- /dev/null +++ b/packages/sdk/src/tool.integration.test.ts @@ -0,0 +1,147 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { GeminiCliAgent } from './agent.js'; +import * as path from 'node:path'; +import { z } from 'zod'; +import { tool, ModelVisibleError } from './tool.js'; +import { fileURLToPath } from 'node:url'; +import { dirname } from 'node:path'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// Set this to true locally when you need to update snapshots +const RECORD_MODE = process.env['RECORD_NEW_RESPONSES'] === 'true'; + +const getGoldenPath = (name: string) => + path.resolve(__dirname, '../test-data', `${name}.json`); + +describe('GeminiCliAgent Tool Integration', () => { + it('handles tool execution success', async () => { + const goldenFile = getGoldenPath('tool-success'); + + const agent = new GeminiCliAgent({ + instructions: 'You are a helpful assistant.', + // If recording, use real model + record path. + // If testing, use auto model + fake path. + model: RECORD_MODE ? 'gemini-2.0-flash' : undefined, + recordResponses: RECORD_MODE ? goldenFile : undefined, + fakeResponses: RECORD_MODE ? undefined : goldenFile, + tools: [ + tool( + { + name: 'add', + description: 'Adds two numbers', + inputSchema: z.object({ a: z.number(), b: z.number() }), + }, + async ({ a, b }) => a + b, + ), + ], + }); + + const events = []; + const stream = agent.sendStream('What is 5 + 3?'); + + for await (const event of stream) { + events.push(event); + } + + const textEvents = events.filter((e) => e.type === 'content'); + const responseText = textEvents + .map((e) => (typeof e.value === 'string' ? e.value : '')) + .join(''); + + expect(responseText).toContain('8'); + }); + + it('handles ModelVisibleError correctly', async () => { + const goldenFile = getGoldenPath('tool-error-recovery'); + + const agent = new GeminiCliAgent({ + instructions: 'You are a helpful assistant.', + model: RECORD_MODE ? 'gemini-2.0-flash' : undefined, + recordResponses: RECORD_MODE ? goldenFile : undefined, + fakeResponses: RECORD_MODE ? undefined : goldenFile, + tools: [ + tool( + { + name: 'failVisible', + description: 'Fails with a visible error if input is "fail"', + inputSchema: z.object({ input: z.string() }), + }, + async ({ input }) => { + if (input === 'fail') { + throw new ModelVisibleError('Tool failed visibly'); + } + return 'Success'; + }, + ), + ], + }); + + const events = []; + // Force the model to trigger the error first, then hopefully recover or at least acknowledge it. + // The prompt is crafted to make the model try 'fail' first. + const stream = agent.sendStream( + 'Call the tool with "fail". If it fails, tell me the error message.', + ); + + for await (const event of stream) { + events.push(event); + } + + const textEvents = events.filter((e) => e.type === 'content'); + const responseText = textEvents + .map((e) => (typeof e.value === 'string' ? e.value : '')) + .join(''); + + // The model should see the error "Tool failed visibly" and report it back. + expect(responseText).toContain('Tool failed visibly'); + }); + + it('handles sendErrorsToModel: true correctly', async () => { + const goldenFile = getGoldenPath('tool-catchall-error'); + + const agent = new GeminiCliAgent({ + instructions: 'You are a helpful assistant.', + model: RECORD_MODE ? 'gemini-2.0-flash' : undefined, + recordResponses: RECORD_MODE ? goldenFile : undefined, + fakeResponses: RECORD_MODE ? undefined : goldenFile, + tools: [ + tool( + { + name: 'checkSystemStatus', + description: 'Checks the current system status', + inputSchema: z.object({}), + sendErrorsToModel: true, + }, + async () => { + throw new Error('Standard error caught'); + }, + ), + ], + }); + + const events = []; + const stream = agent.sendStream( + 'Check the system status and report any errors.', + ); + + for await (const event of stream) { + events.push(event); + } + + const textEvents = events.filter((e) => e.type === 'content'); + const responseText = textEvents + .map((e) => (typeof e.value === 'string' ? e.value : '')) + .join(''); + + // The model should report the caught standard error. + expect(responseText.toLowerCase()).toContain('error'); + }); +}); diff --git a/packages/sdk/src/tool.test.ts b/packages/sdk/src/tool.test.ts new file mode 100644 index 0000000000..819177c3b9 --- /dev/null +++ b/packages/sdk/src/tool.test.ts @@ -0,0 +1,143 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { z } from 'zod'; +import { SdkTool, tool, ModelVisibleError } from './tool.js'; +import type { MessageBus } from '@google/gemini-cli-core'; + +// Mock MessageBus +const mockMessageBus = {} as unknown as MessageBus; + +describe('tool()', () => { + it('creates a tool definition with defaults', () => { + const definition = tool( + { + name: 'testTool', + description: 'A test tool', + inputSchema: z.object({ foo: z.string() }), + }, + async () => 'result', + ); + + expect(definition.name).toBe('testTool'); + expect(definition.description).toBe('A test tool'); + expect(definition.sendErrorsToModel).toBeUndefined(); + }); + + it('creates a tool definition with explicit configuration', () => { + const definition = tool( + { + name: 'testTool', + description: 'A test tool', + inputSchema: z.object({ foo: z.string() }), + sendErrorsToModel: true, + }, + async () => 'result', + ); + + expect(definition.sendErrorsToModel).toBe(true); + }); +}); + +describe('SdkTool Execution', () => { + it('executes successfully', async () => { + const definition = tool( + { + name: 'successTool', + description: 'Always succeeds', + inputSchema: z.object({ val: z.string() }), + }, + async ({ val }) => `Success: ${val}`, + ); + + const sdkTool = new SdkTool(definition, mockMessageBus); + const invocation = sdkTool.createInvocationWithContext( + { val: 'test' }, + mockMessageBus, + undefined, + ); + const result = await invocation.execute(new AbortController().signal); + + expect(result.llmContent).toBe('Success: test'); + expect(result.error).toBeUndefined(); + }); + + it('throws standard Error by default', async () => { + const definition = tool( + { + name: 'failTool', + description: 'Always fails', + inputSchema: z.object({}), + }, + async () => { + throw new Error('Standard error'); + }, + ); + + const sdkTool = new SdkTool(definition, mockMessageBus); + const invocation = sdkTool.createInvocationWithContext( + {}, + mockMessageBus, + undefined, + ); + + await expect( + invocation.execute(new AbortController().signal), + ).rejects.toThrow('Standard error'); + }); + + it('catches ModelVisibleError and returns ToolResult error', async () => { + const definition = tool( + { + name: 'visibleErrorTool', + description: 'Fails with visible error', + inputSchema: z.object({}), + }, + async () => { + throw new ModelVisibleError('Visible error'); + }, + ); + + const sdkTool = new SdkTool(definition, mockMessageBus); + const invocation = sdkTool.createInvocationWithContext( + {}, + mockMessageBus, + undefined, + ); + const result = await invocation.execute(new AbortController().signal); + + expect(result.error).toBeDefined(); + expect(result.error?.message).toBe('Visible error'); + expect(result.llmContent).toContain('Error: Visible error'); + }); + + it('catches standard Error when sendErrorsToModel is true', async () => { + const definition = tool( + { + name: 'catchAllTool', + description: 'Catches all errors', + inputSchema: z.object({}), + sendErrorsToModel: true, + }, + async () => { + throw new Error('Standard error'); + }, + ); + + const sdkTool = new SdkTool(definition, mockMessageBus); + const invocation = sdkTool.createInvocationWithContext( + {}, + mockMessageBus, + undefined, + ); + const result = await invocation.execute(new AbortController().signal); + + expect(result.error).toBeDefined(); + expect(result.error?.message).toBe('Standard error'); + expect(result.llmContent).toContain('Error: Standard error'); + }); +}); diff --git a/packages/sdk/src/tool.ts b/packages/sdk/src/tool.ts index 00cd3802de..ce6bbfc05b 100644 --- a/packages/sdk/src/tool.ts +++ b/packages/sdk/src/tool.ts @@ -14,28 +14,42 @@ import { Kind, type MessageBus, } from '@google/gemini-cli-core'; +import type { SessionContext } from './types.js'; export { z }; -export interface ToolDefinition { +export class ModelVisibleError extends Error { + constructor(message: string | Error) { + super(message instanceof Error ? message.message : message); + this.name = 'ModelVisibleError'; + } +} + +export interface ToolDefinition { name: string; description: string; inputSchema: T; + sendErrorsToModel?: boolean; } -export interface Tool extends ToolDefinition { - action: (params: z.infer) => Promise; +export interface Tool extends ToolDefinition { + action: (params: z.infer, context?: SessionContext) => Promise; } -class SdkToolInvocation extends BaseToolInvocation< +class SdkToolInvocation extends BaseToolInvocation< z.infer, ToolResult > { constructor( params: z.infer, messageBus: MessageBus, - private readonly action: (params: z.infer) => Promise, + private readonly action: ( + params: z.infer, + context?: SessionContext, + ) => Promise, + private readonly context: SessionContext | undefined, toolName: string, + private readonly sendErrorsToModel: boolean = false, ) { super(params, messageBus, toolName); } @@ -49,7 +63,7 @@ class SdkToolInvocation extends BaseToolInvocation< _updateOutput?: (output: string) => void, ): Promise { try { - const result = await this.action(this.params); + const result = await this.action(this.params, this.context); const output = typeof result === 'string' ? result : JSON.stringify(result, null, 2); return { @@ -57,26 +71,31 @@ class SdkToolInvocation extends BaseToolInvocation< returnDisplay: output, }; } catch (error) { - const errorMessage = - error instanceof Error ? error.message : String(error); - return { - llmContent: `Error: ${errorMessage}`, - returnDisplay: `Error: ${errorMessage}`, - error: { - message: errorMessage, - }, - }; + if (this.sendErrorsToModel || error instanceof ModelVisibleError) { + const errorMessage = + error instanceof Error ? error.message : String(error); + return { + llmContent: `Error: ${errorMessage}`, + returnDisplay: `Error: ${errorMessage}`, + error: { + message: errorMessage, + }, + }; + } + throw error; } } } -export class SdkTool extends BaseDeclarativeTool< +export class SdkTool extends BaseDeclarativeTool< z.infer, ToolResult > { constructor( private readonly definition: Tool, messageBus: MessageBus, + _agent?: unknown, + private readonly context?: SessionContext, ) { super( definition.name, @@ -88,6 +107,26 @@ export class SdkTool extends BaseDeclarativeTool< ); } + bindContext(context: SessionContext): SdkTool { + return new SdkTool(this.definition, this.messageBus, undefined, context); + } + + createInvocationWithContext( + params: z.infer, + messageBus: MessageBus, + context: SessionContext | undefined, + toolName?: string, + ): ToolInvocation, ToolResult> { + return new SdkToolInvocation( + params, + messageBus, + this.definition.action, + context || this.context, + toolName || this.name, + this.definition.sendErrorsToModel, + ); + } + protected createInvocation( params: z.infer, messageBus: MessageBus, @@ -97,14 +136,16 @@ export class SdkTool extends BaseDeclarativeTool< params, messageBus, this.definition.action, + this.context, toolName || this.name, + this.definition.sendErrorsToModel, ); } } -export function tool( +export function tool( definition: ToolDefinition, - action: (params: z.infer) => Promise, + action: (params: z.infer, context?: SessionContext) => Promise, ): Tool { return { ...definition, diff --git a/packages/sdk/src/types.ts b/packages/sdk/src/types.ts new file mode 100644 index 0000000000..d7e013d66c --- /dev/null +++ b/packages/sdk/src/types.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content } from '@google/gemini-cli-core'; +import type { GeminiCliAgent } from './agent.js'; + +export interface AgentFilesystem { + readFile(path: string): Promise; + writeFile(path: string, content: string): Promise; +} + +export interface AgentShellOptions { + env?: Record; + timeoutSeconds?: number; + cwd?: string; +} + +export interface AgentShellResult { + exitCode: number | null; + output: string; + stdout: string; + stderr: string; + error?: Error; +} + +export interface AgentShell { + exec(cmd: string, options?: AgentShellOptions): Promise; +} + +export interface SessionContext { + sessionId: string; + transcript: Content[]; + cwd: string; + timestamp: string; + fs: AgentFilesystem; + shell: AgentShell; + agent: GeminiCliAgent; +} diff --git a/packages/sdk/test-data/tool-catchall-error.json b/packages/sdk/test-data/tool-catchall-error.json new file mode 100644 index 0000000000..43c3b44d8b --- /dev/null +++ b/packages/sdk/test-data/tool-catchall-error.json @@ -0,0 +1,2 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"checkSystemStatus","args":{}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7070,"candidatesTokenCount":3,"totalTokenCount":7073,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7070}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":3}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The system status check"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9850,"totalTokenCount":9850,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9850}]}},{"candidates":[{"content":{"parts":[{"text":" returned an error. It says `Error: Standard error caught`."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7082,"candidatesTokenCount":17,"totalTokenCount":7099,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7082}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":17}]}}]} diff --git a/packages/sdk/test-data/tool-error-recovery.json b/packages/sdk/test-data/tool-error-recovery.json new file mode 100644 index 0000000000..4e36d24aa7 --- /dev/null +++ b/packages/sdk/test-data/tool-error-recovery.json @@ -0,0 +1,2 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"failVisible","args":{"input":"fail"}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7073,"candidatesTokenCount":4,"totalTokenCount":7077,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7073}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":4}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9867,"totalTokenCount":9867,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9867}]}},{"candidates":[{"content":{"parts":[{"text":" tool failed visibly with the error message: \"Error: Tool failed visibly\"."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7085,"candidatesTokenCount":16,"totalTokenCount":7101,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7085}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":16}]}}]} diff --git a/packages/sdk/test-data/tool-success.json b/packages/sdk/test-data/tool-success.json new file mode 100644 index 0000000000..1b17993fe4 --- /dev/null +++ b/packages/sdk/test-data/tool-success.json @@ -0,0 +1,2 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"add","args":{"a":5,"b":3}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7045,"candidatesTokenCount":5,"totalTokenCount":7050,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7045}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"8"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9849,"totalTokenCount":9849,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9849}]}},{"candidates":[{"content":{"parts":[{"text":""}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7053,"candidatesTokenCount":1,"totalTokenCount":7054,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7053}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":1}]}}]}