feat(sdk): implements SessionContext for SDK tool calls (#18862)

2026-05-13 05:12:55 -07:00 · 2026-02-12 23:28:48 -08:00
parent bed3eae0e1
commit b61a123da8
15 changed files with 903 additions and 27 deletions
@@ -21,3 +21,4 @@ junit.xml
 Thumbs.db
 .pytest_cache
 **/SKILL.md
+packages/sdk/test-data/*.json
@@ -192,3 +192,6 @@ export * from './agents/types.js';
 // Export stdio utils
 export * from './utils/stdio.js';
 export * from './utils/terminal.js';
+
+// Export types from @google/genai
+export type { Content, Part, FunctionCall } from '@google/genai';
@@ -0,0 +1,279 @@
+# `Gemini CLI SDK`
+
+# `Examples`
+
+## `Simple Example`
+
+Equivalent to `gemini -p "what does this project do?"`. Loads all workspace and
+user settings.
+
+```ts
+import { GeminiCliAgent } from '@google/gemini-cli-sdk';
+
+const simpleAgent = new GeminiCliAgent({
+  cwd: '/path/to/some/dir',
+});
+
+for await (const chunk of simpleAgent.sendStream(
+  'what does this project do?',
+)) {
+  console.log(chunk); // equivalent to JSON streaming chunks (probably?) for now
+}
+```
+
+Validation:
+
+- Model receives call containing "what does this project do?" text.
+
+## `System Instructions`
+
+System instructions can be provided by a static string OR dynamically via a
+function:
+
+```ts
+import { GeminiCliAgent } from "@google/gemini-cli-sdk";
+
+const agent = new GeminiCliAgent({
+  instructions: "This is a static string instruction"; // this is valid
+  instructions: (ctx) => `The current time is ${new Date().toISOString()} in session ${ctx.sessionId}.`
+});
+```
+
+Validation:
+
+- Static string instructions show up where GEMINI.md content normally would in
+  model call
+- Dynamic instructions show up and contain dynamic content.
+
+## `Custom Tools`
+
+```ts
+import { GeminiCliAgent, tool, z } from "@google/gemini-cli-sdk";
+
+const addTool = tool({
+  name: 'add',
+  description: 'add two numbers',
+  inputSchema: z.object({
+    a: z.number().describe('first number to add'),
+    b: z.number().describe('second number to add'),
+  }),
+}, (({a, b}) => ({result: a + b}),);
+
+const toolAgent = new GeminiCliAgent({
+  tools: [addTool],
+});
+
+const result = await toolAgent.send("what is 23 + 79?");
+console.log(result.text);
+```
+
+Validation:
+
+- Model receives tool definition in prompt
+- Model receives tool response after returning tool
+
+## `Custom Hooks`
+
+SDK users can provide programmatic custom hooks
+
+```ts
+import { GeminiCliAgent, hook, z } from '@google/gemini-cli-sdk';
+import { reformat } from './reformat.js';
+
+const myHook = hook(
+  {
+    event: 'AfterTool',
+    name: 'reformat',
+    matcher: 'write_file',
+  },
+  (hook, ctx) => {
+    const filePath = hook.toolInput.path;
+
+    // void return is a no-op
+    if (!filePath.endsWith('.ts')) return;
+
+    // ctx.fs gives us a filesystem interface that obeys Gemini CLI permissions/sandbox
+    const reformatted = await reformat(await ctx.fs.read(filePath));
+    await ctx.fs.write(filePath, reformatted);
+
+    // hooks return a payload instructing the agent how to proceed
+    return {
+      hookSpecificOutput: {
+        additionalContext: `Reformatted file ${filePath}, read again before modifying further.`,
+      },
+    };
+  },
+);
+```
+
+SDK Hooks can also run as standalone scripts to implement userland "command"
+style hooks:
+
+```ts
+import { hook } from "@google/gemini-cli-sdk";
+
+// define a hook as above
+const myHook = hook({...}, (hook) => {...});
+// calling runAsCommand parses stdin, calls action, uses appropriate exit code
+// with output, but you get nice strong typings to guide your impl
+myHook.runAsCommand();
+```
+
+Validation (these are probably hardest to validate):
+
+- Test each type of hook and check that model api receives injected content
+- Check global halt scenarios
+- Check specific return types for each type of hook
+
+## `Custom Skills`
+
+Custom skills can be referenced by individual directories or by "skill roots"
+(directories containing many skills).
+
+```ts
+import { GeminiCliAgent, skillDir, skillRoot } from '@google/gemini-cli-sdk';
+
+const agent = new GeminiCliAgent({
+  skills: [skillDir('/path/to/single/skill'), skillRoot('/path/to/skills/dir')],
+});
+```
+
+**NOTE:** I would like to support fully in-memory skills (including reference
+files); however, it seems like that would currently require a pretty significant
+refactor so we'll focus on filesystem skills for now. In an ideal future state,
+we could do something like:
+
+```ts
+import { GeminiCliAgent, skill } from '@google/gemini-cli-sdk';
+
+const mySkill = skill({
+  name: 'my-skill',
+  description: 'description of when my skill should be used',
+  content: 'This is the SKILL.md content',
+  // it can also be a function
+  content: (ctx) => `This is dynamic content.`,
+});
+```
+
+## `Subagents`
+
+```ts
+import { GeminiCliAgent, subagent } from "@google/gemini-cli";
+
+const mySubagent = subagent({
+  name: "my-subagent",
+  description: "when the subagent should be used",
+
+  // simple prompt agent with static string or dynamic string
+  instructions: "the instructions",
+  instructions (prompt, ctx) => `can also be dynamic with context`,
+
+  // OR (in an ideal world)...
+
+  // pass a full standalone agent
+  agent: new GeminiCliAgent(...);
+});
+
+const agent = new GeminiCliAgent({
+  subagents: [mySubagent]
+});
+```
+
+## `Extensions`
+
+Potentially the most important feature of the Gemini CLI SDK is support for
+extensions, which modularly encapsulate all of the primitives listed above:
+
+```ts
+import { GeminiCliAgent, extension } from "@google/gemini-cli-sdk";
+
+const myExtension = extension({
+  name: "my-extension",
+  description: "...",
+  instructions: "THESE ARE CONCATENATED WITH OTHER AGENT
+INSTRUCTIONS",
+  tools: [...],
+  skills: [...],
+  hooks: [...],
+  subagents: [...],
+});
+```
+
+## `ACP Mode`
+
+The SDK will include a wrapper utility to interact with the agent via ACP
+instead of the SDK's natural API.
+
+```ts
+import { GeminiCliAgent } from "@google/gemini-cli-sdk";
+import { GeminiCliAcpServer } from "@google/gemini-cli-sdk/acp";
+
+const server = new GeminiCliAcpServer(new GeminiCliAgent({...}));
+server.start(); // calling start runs a stdio ACP server
+
+const client = server.connect({
+  onMessage: (message) => { /* updates etc received here */ },
+});
+client.send({...clientMessage}); // e.g. a "session/prompt" message
+```
+
+## `Approvals / Policies`
+
+TODO
+
+# `Implementation Guidance`
+
+## `Session Context`
+
+Whenever executing a tool, hook, command, or skill, a SessionContext object
+should be passed as an additional argument after the arguments/payload. The
+interface should look something like:
+
+```ts
+export interface SessionContext {
+  // translations of existing common hook payload info
+  sessionId: string;
+  transcript: Message[];
+  cwd: string;
+  timestamp: string;
+
+  // helpers to access files and run shell commands while adhering to policies/validation
+  fs: AgentFilesystem;
+  shell: AgentShell;
+  // the agent itself is passed as context
+  agent: GeminiCliAgent;
+}
+
+export interface AgentFilesystem {
+  readFile(path: string): Promise<string | null>
+  writeFile(path: string, content: string): Promise<void>
+  // consider others including delete, globbing, etc but read/write are bare minimum}
+
+export interface AgentShell {
+  // simple promise-based execution that blocks until complete
+  exec(cmd: string, options?: AgentShellOptions): Promise<{exitCode: number, output: string, stdout: string, stderr: string}>
+  start(cmd: string, options?: AgentShellOptions): AgentShellProcess;
+}
+
+export interface AgentShellOptions {
+  env?: Record<string,string>;
+  timeoutSeconds?: number;
+}
+
+export interface AgentShellProcess {
+  // figure out how to have a streaming shell process here that supports stdin too
+  // investigate how Gemini CLI already does this
+}
+```
+
+# `Notes`
+
+- To validate the SDK, it would be useful to have a robust way to mock the
+  underlying model API so that the tests could be closer to end-to-end but still
+  deterministic.
+- Need to work in both Gemini-CLI-triggered approvals and optional
+  developer-initiated user prompts / HITL stuff.
+- Need to think about how subagents inherit message context \- e.g. do they have
+  the same session id?
+- Presumably the transcript is kept updated in memory and also persisted to disk
+  by default?
@@ -0,0 +1,73 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { GeminiCliAgent, tool, z } from '../src/index.js';
+
+async function main() {
+  const getContextTool = tool(
+    {
+      name: 'get_context',
+      description: 'Get information about the current session context.',
+      inputSchema: z.object({}),
+    },
+    async (_params, context) => {
+      if (!context) {
+        return { error: 'Context not available' };
+      }
+
+      console.log('Session Context Accessed:');
+      console.log(`- Session ID: ${context.sessionId}`);
+      console.log(`- CWD: ${context.cwd}`);
+      console.log(`- Timestamp: ${context.timestamp}`);
+
+      let fileContent = null;
+      try {
+        // Try to read a file (e.g., package.json in the CWD)
+        // Note: This relies on the agent running in a directory with package.json
+        fileContent = await context.fs.readFile('package.json');
+      } catch (e) {
+        console.log(`- Could not read package.json: ${e}`);
+      }
+
+      let shellOutput = null;
+      try {
+        // Try to run a simple shell command
+        const result = await context.shell.exec('echo "Hello from SDK Shell"');
+        shellOutput = result.output.trim();
+      } catch (e) {
+        console.log(`- Could not run shell command: ${e}`);
+      }
+
+      return {
+        sessionId: context.sessionId,
+        cwd: context.cwd,
+        hasFsAccess: !!context.fs,
+        hasShellAccess: !!context.shell,
+        packageJsonExists: !!fileContent,
+        shellEcho: shellOutput,
+      };
+    },
+  );
+
+  const agent = new GeminiCliAgent({
+    instructions:
+      'You are a helpful assistant. Use the get_context tool to tell me about my environment.',
+    tools: [getContextTool],
+    // Set CWD to the package root so package.json exists
+    cwd: process.cwd(),
+  });
+
+  console.log("Sending prompt: 'What is my current session context?'");
+  for await (const chunk of agent.sendStream(
+    'What is my current session context?',
+  )) {
+    if (chunk.type === 'content') {
+      process.stdout.write(chunk.value || '');
+    }
+  }
+}
+
+main().catch(console.error);
@@ -7,29 +7,38 @@
 import {
  Config,
  type ConfigParameters,
+  AuthType,
  PREVIEW_GEMINI_MODEL_AUTO,
  GeminiEventType,
  type ToolCallRequestInfo,
  type ServerGeminiStreamEvent,
  type GeminiClient,
+  type Content,
  scheduleAgentTools,
  getAuthTypeFromEnv,
-  AuthType,
+  type ToolRegistry,
 } from '@google/gemini-cli-core';

-import { type Tool, SdkTool, type z } from './tool.js';
+import { type Tool, SdkTool } from './tool.js';
+import { SdkAgentFilesystem } from './fs.js';
+import { SdkAgentShell } from './shell.js';
+import type { SessionContext } from './types.js';

 export interface GeminiCliAgentOptions {
  instructions: string;
-  tools?: Array<Tool<z.ZodType>>;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  tools?: Array<Tool<any>>;
  model?: string;
  cwd?: string;
  debug?: boolean;
+  recordResponses?: string;
+  fakeResponses?: string;
 }

 export class GeminiCliAgent {
-  private readonly config: Config;
-  private readonly tools: Array<Tool<z.ZodType>>;
+  private config: Config;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  private tools: Array<Tool<any>>;

  constructor(options: GeminiCliAgentOptions) {
    const cwd = options.cwd || process.cwd();
@@ -46,6 +55,8 @@ export class GeminiCliAgent {
      enableHooks: false,
      mcpEnabled: false,
      extensionsEnabled: false,
+      recordResponses: options.recordResponses,
+      fakeResponses: options.fakeResponses,
    };

    this.config = new Config(configParams);
@@ -67,18 +78,21 @@ export class GeminiCliAgent {
      const messageBus = this.config.getMessageBus();

      for (const toolDef of this.tools) {
-        const sdkTool = new SdkTool(toolDef, messageBus);
+        const sdkTool = new SdkTool(toolDef, messageBus, this);
        registry.registerTool(sdkTool);
      }
    }

    const client = this.config.getGeminiClient();
+    const abortSignal = signal ?? new AbortController().signal;
+    const sessionId = this.config.getSessionId();
+
+    const fs = new SdkAgentFilesystem(this.config);
+    const shell = new SdkAgentShell(this.config);

    let request: Parameters<GeminiClient['sendMessageStream']>[0] = [
      { text: prompt },
    ];
-    const abortSignal = signal ?? new AbortController().signal;
-    const sessionId = this.config.getSessionId();

    while (true) {
      // sendMessageStream returns AsyncGenerator<ServerGeminiStreamEvent, Turn>
@@ -107,12 +121,35 @@ export class GeminiCliAgent {
        break;
      }

+      // Prepare SessionContext
+      const transcript: Content[] = client.getHistory();
+      const context: SessionContext = {
+        sessionId,
+        transcript,
+        cwd: this.config.getWorkingDir(),
+        timestamp: new Date().toISOString(),
+        fs,
+        shell,
+        agent: this,
+      };
+
+      // Create a scoped registry for this turn to bind context safely
+      const originalRegistry = this.config.getToolRegistry();
+      const scopedRegistry: ToolRegistry = Object.create(originalRegistry);
+      scopedRegistry.getTool = (name: string) => {
+        const tool = originalRegistry.getTool(name);
+        if (tool instanceof SdkTool) {
+          return tool.bindContext(context);
+        }
+        return tool;
+      };
+
      const completedCalls = await scheduleAgentTools(
        this.config,
        toolCallsToSchedule,
        {
          schedulerId: sessionId,
-          toolRegistry: this.config.getToolRegistry(),
+          toolRegistry: scopedRegistry,
          signal: abortSignal,
        },
      );
@@ -0,0 +1,35 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Config as CoreConfig } from '@google/gemini-cli-core';
+import type { AgentFilesystem } from './types.js';
+import fs from 'node:fs/promises';
+
+export class SdkAgentFilesystem implements AgentFilesystem {
+  constructor(private readonly config: CoreConfig) {}
+
+  async readFile(path: string): Promise<string | null> {
+    const error = this.config.validatePathAccess(path, 'read');
+    if (error) {
+      // For now, if access is denied, we can either throw or return null.
+      // Returning null makes sense for "file not found or readable".
+      return null;
+    }
+    try {
+      return await fs.readFile(path, 'utf-8');
+    } catch {
+      return null;
+    }
+  }
+
+  async writeFile(path: string, content: string): Promise<void> {
+    const error = this.config.validatePathAccess(path, 'write');
+    if (error) {
+      throw new Error(error);
+    }
+    await fs.writeFile(path, content, 'utf-8');
+  }
+}
@@ -6,3 +6,4 @@

 export * from './agent.js';
 export * from './tool.js';
+export * from './types.js';
@@ -0,0 +1,69 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Config as CoreConfig } from '@google/gemini-cli-core';
+import { ShellExecutionService, ShellTool } from '@google/gemini-cli-core';
+import type {
+  AgentShell,
+  AgentShellResult,
+  AgentShellOptions,
+} from './types.js';
+
+export class SdkAgentShell implements AgentShell {
+  constructor(private readonly config: CoreConfig) {}
+
+  async exec(
+    command: string,
+    options?: AgentShellOptions,
+  ): Promise<AgentShellResult> {
+    const cwd = options?.cwd || this.config.getWorkingDir();
+    const abortController = new AbortController();
+
+    // Use ShellTool to check policy
+    const shellTool = new ShellTool(this.config, this.config.getMessageBus());
+    try {
+      const invocation = shellTool.build({
+        command,
+        dir_path: cwd,
+      });
+
+      const confirmation = await invocation.shouldConfirmExecute(
+        abortController.signal,
+      );
+      if (confirmation) {
+        throw new Error(
+          'Command execution requires confirmation but no interactive session is available.',
+        );
+      }
+    } catch (error) {
+      return {
+        output: '',
+        stdout: '',
+        stderr: '',
+        exitCode: 1,
+        error: error instanceof Error ? error : new Error(String(error)),
+      };
+    }
+
+    const handle = await ShellExecutionService.execute(
+      command,
+      cwd,
+      () => {}, // No-op output event handler for now
+      abortController.signal,
+      false, // shouldUseNodePty: false for headless execution
+      this.config.getShellExecutionConfig(),
+    );
+
+    const result = await handle.result;
+
+    return {
+      output: result.output,
+      stdout: result.output, // ShellExecutionService combines stdout/stderr usually
+      stderr: '', // ShellExecutionService currently combines, so stderr is empty or mixed
+      exitCode: result.exitCode,
+    };
+  }
+}
@@ -0,0 +1,147 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { GeminiCliAgent } from './agent.js';
+import * as path from 'node:path';
+import { z } from 'zod';
+import { tool, ModelVisibleError } from './tool.js';
+import { fileURLToPath } from 'node:url';
+import { dirname } from 'node:path';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+// Set this to true locally when you need to update snapshots
+const RECORD_MODE = process.env['RECORD_NEW_RESPONSES'] === 'true';
+
+const getGoldenPath = (name: string) =>
+  path.resolve(__dirname, '../test-data', `${name}.json`);
+
+describe('GeminiCliAgent Tool Integration', () => {
+  it('handles tool execution success', async () => {
+    const goldenFile = getGoldenPath('tool-success');
+
+    const agent = new GeminiCliAgent({
+      instructions: 'You are a helpful assistant.',
+      // If recording, use real model + record path.
+      // If testing, use auto model + fake path.
+      model: RECORD_MODE ? 'gemini-2.0-flash' : undefined,
+      recordResponses: RECORD_MODE ? goldenFile : undefined,
+      fakeResponses: RECORD_MODE ? undefined : goldenFile,
+      tools: [
+        tool(
+          {
+            name: 'add',
+            description: 'Adds two numbers',
+            inputSchema: z.object({ a: z.number(), b: z.number() }),
+          },
+          async ({ a, b }) => a + b,
+        ),
+      ],
+    });
+
+    const events = [];
+    const stream = agent.sendStream('What is 5 + 3?');
+
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    const textEvents = events.filter((e) => e.type === 'content');
+    const responseText = textEvents
+      .map((e) => (typeof e.value === 'string' ? e.value : ''))
+      .join('');
+
+    expect(responseText).toContain('8');
+  });
+
+  it('handles ModelVisibleError correctly', async () => {
+    const goldenFile = getGoldenPath('tool-error-recovery');
+
+    const agent = new GeminiCliAgent({
+      instructions: 'You are a helpful assistant.',
+      model: RECORD_MODE ? 'gemini-2.0-flash' : undefined,
+      recordResponses: RECORD_MODE ? goldenFile : undefined,
+      fakeResponses: RECORD_MODE ? undefined : goldenFile,
+      tools: [
+        tool(
+          {
+            name: 'failVisible',
+            description: 'Fails with a visible error if input is "fail"',
+            inputSchema: z.object({ input: z.string() }),
+          },
+          async ({ input }) => {
+            if (input === 'fail') {
+              throw new ModelVisibleError('Tool failed visibly');
+            }
+            return 'Success';
+          },
+        ),
+      ],
+    });
+
+    const events = [];
+    // Force the model to trigger the error first, then hopefully recover or at least acknowledge it.
+    // The prompt is crafted to make the model try 'fail' first.
+    const stream = agent.sendStream(
+      'Call the tool with "fail". If it fails, tell me the error message.',
+    );
+
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    const textEvents = events.filter((e) => e.type === 'content');
+    const responseText = textEvents
+      .map((e) => (typeof e.value === 'string' ? e.value : ''))
+      .join('');
+
+    // The model should see the error "Tool failed visibly" and report it back.
+    expect(responseText).toContain('Tool failed visibly');
+  });
+
+  it('handles sendErrorsToModel: true correctly', async () => {
+    const goldenFile = getGoldenPath('tool-catchall-error');
+
+    const agent = new GeminiCliAgent({
+      instructions: 'You are a helpful assistant.',
+      model: RECORD_MODE ? 'gemini-2.0-flash' : undefined,
+      recordResponses: RECORD_MODE ? goldenFile : undefined,
+      fakeResponses: RECORD_MODE ? undefined : goldenFile,
+      tools: [
+        tool(
+          {
+            name: 'checkSystemStatus',
+            description: 'Checks the current system status',
+            inputSchema: z.object({}),
+            sendErrorsToModel: true,
+          },
+          async () => {
+            throw new Error('Standard error caught');
+          },
+        ),
+      ],
+    });
+
+    const events = [];
+    const stream = agent.sendStream(
+      'Check the system status and report any errors.',
+    );
+
+    for await (const event of stream) {
+      events.push(event);
+    }
+
+    const textEvents = events.filter((e) => e.type === 'content');
+    const responseText = textEvents
+      .map((e) => (typeof e.value === 'string' ? e.value : ''))
+      .join('');
+
+    // The model should report the caught standard error.
+    expect(responseText.toLowerCase()).toContain('error');
+  });
+});
@@ -0,0 +1,143 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { z } from 'zod';
+import { SdkTool, tool, ModelVisibleError } from './tool.js';
+import type { MessageBus } from '@google/gemini-cli-core';
+
+// Mock MessageBus
+const mockMessageBus = {} as unknown as MessageBus;
+
+describe('tool()', () => {
+  it('creates a tool definition with defaults', () => {
+    const definition = tool(
+      {
+        name: 'testTool',
+        description: 'A test tool',
+        inputSchema: z.object({ foo: z.string() }),
+      },
+      async () => 'result',
+    );
+
+    expect(definition.name).toBe('testTool');
+    expect(definition.description).toBe('A test tool');
+    expect(definition.sendErrorsToModel).toBeUndefined();
+  });
+
+  it('creates a tool definition with explicit configuration', () => {
+    const definition = tool(
+      {
+        name: 'testTool',
+        description: 'A test tool',
+        inputSchema: z.object({ foo: z.string() }),
+        sendErrorsToModel: true,
+      },
+      async () => 'result',
+    );
+
+    expect(definition.sendErrorsToModel).toBe(true);
+  });
+});
+
+describe('SdkTool Execution', () => {
+  it('executes successfully', async () => {
+    const definition = tool(
+      {
+        name: 'successTool',
+        description: 'Always succeeds',
+        inputSchema: z.object({ val: z.string() }),
+      },
+      async ({ val }) => `Success: ${val}`,
+    );
+
+    const sdkTool = new SdkTool(definition, mockMessageBus);
+    const invocation = sdkTool.createInvocationWithContext(
+      { val: 'test' },
+      mockMessageBus,
+      undefined,
+    );
+    const result = await invocation.execute(new AbortController().signal);
+
+    expect(result.llmContent).toBe('Success: test');
+    expect(result.error).toBeUndefined();
+  });
+
+  it('throws standard Error by default', async () => {
+    const definition = tool(
+      {
+        name: 'failTool',
+        description: 'Always fails',
+        inputSchema: z.object({}),
+      },
+      async () => {
+        throw new Error('Standard error');
+      },
+    );
+
+    const sdkTool = new SdkTool(definition, mockMessageBus);
+    const invocation = sdkTool.createInvocationWithContext(
+      {},
+      mockMessageBus,
+      undefined,
+    );
+
+    await expect(
+      invocation.execute(new AbortController().signal),
+    ).rejects.toThrow('Standard error');
+  });
+
+  it('catches ModelVisibleError and returns ToolResult error', async () => {
+    const definition = tool(
+      {
+        name: 'visibleErrorTool',
+        description: 'Fails with visible error',
+        inputSchema: z.object({}),
+      },
+      async () => {
+        throw new ModelVisibleError('Visible error');
+      },
+    );
+
+    const sdkTool = new SdkTool(definition, mockMessageBus);
+    const invocation = sdkTool.createInvocationWithContext(
+      {},
+      mockMessageBus,
+      undefined,
+    );
+    const result = await invocation.execute(new AbortController().signal);
+
+    expect(result.error).toBeDefined();
+    expect(result.error?.message).toBe('Visible error');
+    expect(result.llmContent).toContain('Error: Visible error');
+  });
+
+  it('catches standard Error when sendErrorsToModel is true', async () => {
+    const definition = tool(
+      {
+        name: 'catchAllTool',
+        description: 'Catches all errors',
+        inputSchema: z.object({}),
+        sendErrorsToModel: true,
+      },
+      async () => {
+        throw new Error('Standard error');
+      },
+    );
+
+    const sdkTool = new SdkTool(definition, mockMessageBus);
+    const invocation = sdkTool.createInvocationWithContext(
+      {},
+      mockMessageBus,
+      undefined,
+    );
+    const result = await invocation.execute(new AbortController().signal);
+
+    expect(result.error).toBeDefined();
+    expect(result.error?.message).toBe('Standard error');
+    expect(result.llmContent).toContain('Error: Standard error');
+  });
+});
@@ -14,28 +14,42 @@ import {
  Kind,
  type MessageBus,
 } from '@google/gemini-cli-core';
+import type { SessionContext } from './types.js';

 export { z };

-export interface ToolDefinition<T extends z.ZodType> {
+export class ModelVisibleError extends Error {
+  constructor(message: string | Error) {
+    super(message instanceof Error ? message.message : message);
+    this.name = 'ModelVisibleError';
+  }
+}
+
+export interface ToolDefinition<T extends z.ZodTypeAny> {
  name: string;
  description: string;
  inputSchema: T;
+  sendErrorsToModel?: boolean;
 }

-export interface Tool<T extends z.ZodType> extends ToolDefinition<T> {
-  action: (params: z.infer<T>) => Promise<unknown>;
+export interface Tool<T extends z.ZodTypeAny> extends ToolDefinition<T> {
+  action: (params: z.infer<T>, context?: SessionContext) => Promise<unknown>;
 }

-class SdkToolInvocation<T extends z.ZodType> extends BaseToolInvocation<
+class SdkToolInvocation<T extends z.ZodTypeAny> extends BaseToolInvocation<
  z.infer<T>,
  ToolResult
 > {
  constructor(
    params: z.infer<T>,
    messageBus: MessageBus,
-    private readonly action: (params: z.infer<T>) => Promise<unknown>,
+    private readonly action: (
+      params: z.infer<T>,
+      context?: SessionContext,
+    ) => Promise<unknown>,
+    private readonly context: SessionContext | undefined,
    toolName: string,
+    private readonly sendErrorsToModel: boolean = false,
  ) {
    super(params, messageBus, toolName);
  }
@@ -49,7 +63,7 @@ class SdkToolInvocation<T extends z.ZodType> extends BaseToolInvocation<
    _updateOutput?: (output: string) => void,
  ): Promise<ToolResult> {
    try {
-      const result = await this.action(this.params);
+      const result = await this.action(this.params, this.context);
      const output =
        typeof result === 'string' ? result : JSON.stringify(result, null, 2);
      return {
@@ -57,26 +71,31 @@ class SdkToolInvocation<T extends z.ZodType> extends BaseToolInvocation<
        returnDisplay: output,
      };
    } catch (error) {
-      const errorMessage =
-        error instanceof Error ? error.message : String(error);
-      return {
-        llmContent: `Error: ${errorMessage}`,
-        returnDisplay: `Error: ${errorMessage}`,
-        error: {
-          message: errorMessage,
-        },
-      };
+      if (this.sendErrorsToModel || error instanceof ModelVisibleError) {
+        const errorMessage =
+          error instanceof Error ? error.message : String(error);
+        return {
+          llmContent: `Error: ${errorMessage}`,
+          returnDisplay: `Error: ${errorMessage}`,
+          error: {
+            message: errorMessage,
+          },
+        };
+      }
+      throw error;
    }
  }
 }

-export class SdkTool<T extends z.ZodType> extends BaseDeclarativeTool<
+export class SdkTool<T extends z.ZodTypeAny> extends BaseDeclarativeTool<
  z.infer<T>,
  ToolResult
 > {
  constructor(
    private readonly definition: Tool<T>,
    messageBus: MessageBus,
+    _agent?: unknown,
+    private readonly context?: SessionContext,
  ) {
    super(
      definition.name,
@@ -88,6 +107,26 @@ export class SdkTool<T extends z.ZodType> extends BaseDeclarativeTool<
    );
  }

+  bindContext(context: SessionContext): SdkTool<T> {
+    return new SdkTool(this.definition, this.messageBus, undefined, context);
+  }
+
+  createInvocationWithContext(
+    params: z.infer<T>,
+    messageBus: MessageBus,
+    context: SessionContext | undefined,
+    toolName?: string,
+  ): ToolInvocation<z.infer<T>, ToolResult> {
+    return new SdkToolInvocation(
+      params,
+      messageBus,
+      this.definition.action,
+      context || this.context,
+      toolName || this.name,
+      this.definition.sendErrorsToModel,
+    );
+  }
+
  protected createInvocation(
    params: z.infer<T>,
    messageBus: MessageBus,
@@ -97,14 +136,16 @@ export class SdkTool<T extends z.ZodType> extends BaseDeclarativeTool<
      params,
      messageBus,
      this.definition.action,
+      this.context,
      toolName || this.name,
+      this.definition.sendErrorsToModel,
    );
  }
 }

-export function tool<T extends z.ZodType>(
+export function tool<T extends z.ZodTypeAny>(
  definition: ToolDefinition<T>,
-  action: (params: z.infer<T>) => Promise<unknown>,
+  action: (params: z.infer<T>, context?: SessionContext) => Promise<unknown>,
 ): Tool<T> {
  return {
    ...definition,
@@ -0,0 +1,41 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Content } from '@google/gemini-cli-core';
+import type { GeminiCliAgent } from './agent.js';
+
+export interface AgentFilesystem {
+  readFile(path: string): Promise<string | null>;
+  writeFile(path: string, content: string): Promise<void>;
+}
+
+export interface AgentShellOptions {
+  env?: Record<string, string>;
+  timeoutSeconds?: number;
+  cwd?: string;
+}
+
+export interface AgentShellResult {
+  exitCode: number | null;
+  output: string;
+  stdout: string;
+  stderr: string;
+  error?: Error;
+}
+
+export interface AgentShell {
+  exec(cmd: string, options?: AgentShellOptions): Promise<AgentShellResult>;
+}
+
+export interface SessionContext {
+  sessionId: string;
+  transcript: Content[];
+  cwd: string;
+  timestamp: string;
+  fs: AgentFilesystem;
+  shell: AgentShell;
+  agent: GeminiCliAgent;
+}
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"checkSystemStatus","args":{}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7070,"candidatesTokenCount":3,"totalTokenCount":7073,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7070}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":3}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The system status check"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9850,"totalTokenCount":9850,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9850}]}},{"candidates":[{"content":{"parts":[{"text":" returned an error. It says `Error: Standard error caught`."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7082,"candidatesTokenCount":17,"totalTokenCount":7099,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7082}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":17}]}}]}
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"failVisible","args":{"input":"fail"}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7073,"candidatesTokenCount":4,"totalTokenCount":7077,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7073}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":4}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9867,"totalTokenCount":9867,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9867}]}},{"candidates":[{"content":{"parts":[{"text":" tool failed visibly with the error message: \"Error: Tool failed visibly\"."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7085,"candidatesTokenCount":16,"totalTokenCount":7101,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7085}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":16}]}}]}
@@ -0,0 +1,2 @@
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"add","args":{"a":5,"b":3}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7045,"candidatesTokenCount":5,"totalTokenCount":7050,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7045}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"8"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9849,"totalTokenCount":9849,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9849}]}},{"candidates":[{"content":{"parts":[{"text":""}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7053,"candidatesTokenCount":1,"totalTokenCount":7054,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7053}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":1}]}}]}