mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
feat(sdk): implements SessionContext for SDK tool calls (#18862)
This commit is contained in:
@@ -21,3 +21,4 @@ junit.xml
|
||||
Thumbs.db
|
||||
.pytest_cache
|
||||
**/SKILL.md
|
||||
packages/sdk/test-data/*.json
|
||||
|
||||
@@ -192,3 +192,6 @@ export * from './agents/types.js';
|
||||
// Export stdio utils
|
||||
export * from './utils/stdio.js';
|
||||
export * from './utils/terminal.js';
|
||||
|
||||
// Export types from @google/genai
|
||||
export type { Content, Part, FunctionCall } from '@google/genai';
|
||||
|
||||
279
packages/sdk/SDK_DESIGN.md
Normal file
279
packages/sdk/SDK_DESIGN.md
Normal file
@@ -0,0 +1,279 @@
|
||||
# `Gemini CLI SDK`
|
||||
|
||||
# `Examples`
|
||||
|
||||
## `Simple Example`
|
||||
|
||||
Equivalent to `gemini -p "what does this project do?"`. Loads all workspace and
|
||||
user settings.
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent } from '@google/gemini-cli-sdk';
|
||||
|
||||
const simpleAgent = new GeminiCliAgent({
|
||||
cwd: '/path/to/some/dir',
|
||||
});
|
||||
|
||||
for await (const chunk of simpleAgent.sendStream(
|
||||
'what does this project do?',
|
||||
)) {
|
||||
console.log(chunk); // equivalent to JSON streaming chunks (probably?) for now
|
||||
}
|
||||
```
|
||||
|
||||
Validation:
|
||||
|
||||
- Model receives call containing "what does this project do?" text.
|
||||
|
||||
## `System Instructions`
|
||||
|
||||
System instructions can be provided by a static string OR dynamically via a
|
||||
function:
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent } from "@google/gemini-cli-sdk";
|
||||
|
||||
const agent = new GeminiCliAgent({
|
||||
instructions: "This is a static string instruction"; // this is valid
|
||||
instructions: (ctx) => `The current time is ${new Date().toISOString()} in session ${ctx.sessionId}.`
|
||||
});
|
||||
```
|
||||
|
||||
Validation:
|
||||
|
||||
- Static string instructions show up where GEMINI.md content normally would in
|
||||
model call
|
||||
- Dynamic instructions show up and contain dynamic content.
|
||||
|
||||
## `Custom Tools`
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent, tool, z } from "@google/gemini-cli-sdk";
|
||||
|
||||
const addTool = tool({
|
||||
name: 'add',
|
||||
description: 'add two numbers',
|
||||
inputSchema: z.object({
|
||||
a: z.number().describe('first number to add'),
|
||||
b: z.number().describe('second number to add'),
|
||||
}),
|
||||
}, (({a, b}) => ({result: a + b}),);
|
||||
|
||||
const toolAgent = new GeminiCliAgent({
|
||||
tools: [addTool],
|
||||
});
|
||||
|
||||
const result = await toolAgent.send("what is 23 + 79?");
|
||||
console.log(result.text);
|
||||
```
|
||||
|
||||
Validation:
|
||||
|
||||
- Model receives tool definition in prompt
|
||||
- Model receives tool response after returning tool
|
||||
|
||||
## `Custom Hooks`
|
||||
|
||||
SDK users can provide programmatic custom hooks
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent, hook, z } from '@google/gemini-cli-sdk';
|
||||
import { reformat } from './reformat.js';
|
||||
|
||||
const myHook = hook(
|
||||
{
|
||||
event: 'AfterTool',
|
||||
name: 'reformat',
|
||||
matcher: 'write_file',
|
||||
},
|
||||
(hook, ctx) => {
|
||||
const filePath = hook.toolInput.path;
|
||||
|
||||
// void return is a no-op
|
||||
if (!filePath.endsWith('.ts')) return;
|
||||
|
||||
// ctx.fs gives us a filesystem interface that obeys Gemini CLI permissions/sandbox
|
||||
const reformatted = await reformat(await ctx.fs.read(filePath));
|
||||
await ctx.fs.write(filePath, reformatted);
|
||||
|
||||
// hooks return a payload instructing the agent how to proceed
|
||||
return {
|
||||
hookSpecificOutput: {
|
||||
additionalContext: `Reformatted file ${filePath}, read again before modifying further.`,
|
||||
},
|
||||
};
|
||||
},
|
||||
);
|
||||
```
|
||||
|
||||
SDK Hooks can also run as standalone scripts to implement userland "command"
|
||||
style hooks:
|
||||
|
||||
```ts
|
||||
import { hook } from "@google/gemini-cli-sdk";
|
||||
|
||||
// define a hook as above
|
||||
const myHook = hook({...}, (hook) => {...});
|
||||
// calling runAsCommand parses stdin, calls action, uses appropriate exit code
|
||||
// with output, but you get nice strong typings to guide your impl
|
||||
myHook.runAsCommand();
|
||||
```
|
||||
|
||||
Validation (these are probably hardest to validate):
|
||||
|
||||
- Test each type of hook and check that model api receives injected content
|
||||
- Check global halt scenarios
|
||||
- Check specific return types for each type of hook
|
||||
|
||||
## `Custom Skills`
|
||||
|
||||
Custom skills can be referenced by individual directories or by "skill roots"
|
||||
(directories containing many skills).
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent, skillDir, skillRoot } from '@google/gemini-cli-sdk';
|
||||
|
||||
const agent = new GeminiCliAgent({
|
||||
skills: [skillDir('/path/to/single/skill'), skillRoot('/path/to/skills/dir')],
|
||||
});
|
||||
```
|
||||
|
||||
**NOTE:** I would like to support fully in-memory skills (including reference
|
||||
files); however, it seems like that would currently require a pretty significant
|
||||
refactor so we'll focus on filesystem skills for now. In an ideal future state,
|
||||
we could do something like:
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent, skill } from '@google/gemini-cli-sdk';
|
||||
|
||||
const mySkill = skill({
|
||||
name: 'my-skill',
|
||||
description: 'description of when my skill should be used',
|
||||
content: 'This is the SKILL.md content',
|
||||
// it can also be a function
|
||||
content: (ctx) => `This is dynamic content.`,
|
||||
});
|
||||
```
|
||||
|
||||
## `Subagents`
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent, subagent } from "@google/gemini-cli";
|
||||
|
||||
const mySubagent = subagent({
|
||||
name: "my-subagent",
|
||||
description: "when the subagent should be used",
|
||||
|
||||
// simple prompt agent with static string or dynamic string
|
||||
instructions: "the instructions",
|
||||
instructions (prompt, ctx) => `can also be dynamic with context`,
|
||||
|
||||
// OR (in an ideal world)...
|
||||
|
||||
// pass a full standalone agent
|
||||
agent: new GeminiCliAgent(...);
|
||||
});
|
||||
|
||||
const agent = new GeminiCliAgent({
|
||||
subagents: [mySubagent]
|
||||
});
|
||||
```
|
||||
|
||||
## `Extensions`
|
||||
|
||||
Potentially the most important feature of the Gemini CLI SDK is support for
|
||||
extensions, which modularly encapsulate all of the primitives listed above:
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent, extension } from "@google/gemini-cli-sdk";
|
||||
|
||||
const myExtension = extension({
|
||||
name: "my-extension",
|
||||
description: "...",
|
||||
instructions: "THESE ARE CONCATENATED WITH OTHER AGENT
|
||||
INSTRUCTIONS",
|
||||
tools: [...],
|
||||
skills: [...],
|
||||
hooks: [...],
|
||||
subagents: [...],
|
||||
});
|
||||
```
|
||||
|
||||
## `ACP Mode`
|
||||
|
||||
The SDK will include a wrapper utility to interact with the agent via ACP
|
||||
instead of the SDK's natural API.
|
||||
|
||||
```ts
|
||||
import { GeminiCliAgent } from "@google/gemini-cli-sdk";
|
||||
import { GeminiCliAcpServer } from "@google/gemini-cli-sdk/acp";
|
||||
|
||||
const server = new GeminiCliAcpServer(new GeminiCliAgent({...}));
|
||||
server.start(); // calling start runs a stdio ACP server
|
||||
|
||||
const client = server.connect({
|
||||
onMessage: (message) => { /* updates etc received here */ },
|
||||
});
|
||||
client.send({...clientMessage}); // e.g. a "session/prompt" message
|
||||
```
|
||||
|
||||
## `Approvals / Policies`
|
||||
|
||||
TODO
|
||||
|
||||
# `Implementation Guidance`
|
||||
|
||||
## `Session Context`
|
||||
|
||||
Whenever executing a tool, hook, command, or skill, a SessionContext object
|
||||
should be passed as an additional argument after the arguments/payload. The
|
||||
interface should look something like:
|
||||
|
||||
```ts
|
||||
export interface SessionContext {
|
||||
// translations of existing common hook payload info
|
||||
sessionId: string;
|
||||
transcript: Message[];
|
||||
cwd: string;
|
||||
timestamp: string;
|
||||
|
||||
// helpers to access files and run shell commands while adhering to policies/validation
|
||||
fs: AgentFilesystem;
|
||||
shell: AgentShell;
|
||||
// the agent itself is passed as context
|
||||
agent: GeminiCliAgent;
|
||||
}
|
||||
|
||||
export interface AgentFilesystem {
|
||||
readFile(path: string): Promise<string | null>
|
||||
writeFile(path: string, content: string): Promise<void>
|
||||
// consider others including delete, globbing, etc but read/write are bare minimum}
|
||||
|
||||
export interface AgentShell {
|
||||
// simple promise-based execution that blocks until complete
|
||||
exec(cmd: string, options?: AgentShellOptions): Promise<{exitCode: number, output: string, stdout: string, stderr: string}>
|
||||
start(cmd: string, options?: AgentShellOptions): AgentShellProcess;
|
||||
}
|
||||
|
||||
export interface AgentShellOptions {
|
||||
env?: Record<string,string>;
|
||||
timeoutSeconds?: number;
|
||||
}
|
||||
|
||||
export interface AgentShellProcess {
|
||||
// figure out how to have a streaming shell process here that supports stdin too
|
||||
// investigate how Gemini CLI already does this
|
||||
}
|
||||
```
|
||||
|
||||
# `Notes`
|
||||
|
||||
- To validate the SDK, it would be useful to have a robust way to mock the
|
||||
underlying model API so that the tests could be closer to end-to-end but still
|
||||
deterministic.
|
||||
- Need to work in both Gemini-CLI-triggered approvals and optional
|
||||
developer-initiated user prompts / HITL stuff.
|
||||
- Need to think about how subagents inherit message context \- e.g. do they have
|
||||
the same session id?
|
||||
- Presumably the transcript is kept updated in memory and also persisted to disk
|
||||
by default?
|
||||
73
packages/sdk/examples/session-context.ts
Normal file
73
packages/sdk/examples/session-context.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { GeminiCliAgent, tool, z } from '../src/index.js';
|
||||
|
||||
async function main() {
|
||||
const getContextTool = tool(
|
||||
{
|
||||
name: 'get_context',
|
||||
description: 'Get information about the current session context.',
|
||||
inputSchema: z.object({}),
|
||||
},
|
||||
async (_params, context) => {
|
||||
if (!context) {
|
||||
return { error: 'Context not available' };
|
||||
}
|
||||
|
||||
console.log('Session Context Accessed:');
|
||||
console.log(`- Session ID: ${context.sessionId}`);
|
||||
console.log(`- CWD: ${context.cwd}`);
|
||||
console.log(`- Timestamp: ${context.timestamp}`);
|
||||
|
||||
let fileContent = null;
|
||||
try {
|
||||
// Try to read a file (e.g., package.json in the CWD)
|
||||
// Note: This relies on the agent running in a directory with package.json
|
||||
fileContent = await context.fs.readFile('package.json');
|
||||
} catch (e) {
|
||||
console.log(`- Could not read package.json: ${e}`);
|
||||
}
|
||||
|
||||
let shellOutput = null;
|
||||
try {
|
||||
// Try to run a simple shell command
|
||||
const result = await context.shell.exec('echo "Hello from SDK Shell"');
|
||||
shellOutput = result.output.trim();
|
||||
} catch (e) {
|
||||
console.log(`- Could not run shell command: ${e}`);
|
||||
}
|
||||
|
||||
return {
|
||||
sessionId: context.sessionId,
|
||||
cwd: context.cwd,
|
||||
hasFsAccess: !!context.fs,
|
||||
hasShellAccess: !!context.shell,
|
||||
packageJsonExists: !!fileContent,
|
||||
shellEcho: shellOutput,
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
const agent = new GeminiCliAgent({
|
||||
instructions:
|
||||
'You are a helpful assistant. Use the get_context tool to tell me about my environment.',
|
||||
tools: [getContextTool],
|
||||
// Set CWD to the package root so package.json exists
|
||||
cwd: process.cwd(),
|
||||
});
|
||||
|
||||
console.log("Sending prompt: 'What is my current session context?'");
|
||||
for await (const chunk of agent.sendStream(
|
||||
'What is my current session context?',
|
||||
)) {
|
||||
if (chunk.type === 'content') {
|
||||
process.stdout.write(chunk.value || '');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -7,29 +7,38 @@
|
||||
import {
|
||||
Config,
|
||||
type ConfigParameters,
|
||||
AuthType,
|
||||
PREVIEW_GEMINI_MODEL_AUTO,
|
||||
GeminiEventType,
|
||||
type ToolCallRequestInfo,
|
||||
type ServerGeminiStreamEvent,
|
||||
type GeminiClient,
|
||||
type Content,
|
||||
scheduleAgentTools,
|
||||
getAuthTypeFromEnv,
|
||||
AuthType,
|
||||
type ToolRegistry,
|
||||
} from '@google/gemini-cli-core';
|
||||
|
||||
import { type Tool, SdkTool, type z } from './tool.js';
|
||||
import { type Tool, SdkTool } from './tool.js';
|
||||
import { SdkAgentFilesystem } from './fs.js';
|
||||
import { SdkAgentShell } from './shell.js';
|
||||
import type { SessionContext } from './types.js';
|
||||
|
||||
export interface GeminiCliAgentOptions {
|
||||
instructions: string;
|
||||
tools?: Array<Tool<z.ZodType>>;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
tools?: Array<Tool<any>>;
|
||||
model?: string;
|
||||
cwd?: string;
|
||||
debug?: boolean;
|
||||
recordResponses?: string;
|
||||
fakeResponses?: string;
|
||||
}
|
||||
|
||||
export class GeminiCliAgent {
|
||||
private readonly config: Config;
|
||||
private readonly tools: Array<Tool<z.ZodType>>;
|
||||
private config: Config;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private tools: Array<Tool<any>>;
|
||||
|
||||
constructor(options: GeminiCliAgentOptions) {
|
||||
const cwd = options.cwd || process.cwd();
|
||||
@@ -46,6 +55,8 @@ export class GeminiCliAgent {
|
||||
enableHooks: false,
|
||||
mcpEnabled: false,
|
||||
extensionsEnabled: false,
|
||||
recordResponses: options.recordResponses,
|
||||
fakeResponses: options.fakeResponses,
|
||||
};
|
||||
|
||||
this.config = new Config(configParams);
|
||||
@@ -67,18 +78,21 @@ export class GeminiCliAgent {
|
||||
const messageBus = this.config.getMessageBus();
|
||||
|
||||
for (const toolDef of this.tools) {
|
||||
const sdkTool = new SdkTool(toolDef, messageBus);
|
||||
const sdkTool = new SdkTool(toolDef, messageBus, this);
|
||||
registry.registerTool(sdkTool);
|
||||
}
|
||||
}
|
||||
|
||||
const client = this.config.getGeminiClient();
|
||||
const abortSignal = signal ?? new AbortController().signal;
|
||||
const sessionId = this.config.getSessionId();
|
||||
|
||||
const fs = new SdkAgentFilesystem(this.config);
|
||||
const shell = new SdkAgentShell(this.config);
|
||||
|
||||
let request: Parameters<GeminiClient['sendMessageStream']>[0] = [
|
||||
{ text: prompt },
|
||||
];
|
||||
const abortSignal = signal ?? new AbortController().signal;
|
||||
const sessionId = this.config.getSessionId();
|
||||
|
||||
while (true) {
|
||||
// sendMessageStream returns AsyncGenerator<ServerGeminiStreamEvent, Turn>
|
||||
@@ -107,12 +121,35 @@ export class GeminiCliAgent {
|
||||
break;
|
||||
}
|
||||
|
||||
// Prepare SessionContext
|
||||
const transcript: Content[] = client.getHistory();
|
||||
const context: SessionContext = {
|
||||
sessionId,
|
||||
transcript,
|
||||
cwd: this.config.getWorkingDir(),
|
||||
timestamp: new Date().toISOString(),
|
||||
fs,
|
||||
shell,
|
||||
agent: this,
|
||||
};
|
||||
|
||||
// Create a scoped registry for this turn to bind context safely
|
||||
const originalRegistry = this.config.getToolRegistry();
|
||||
const scopedRegistry: ToolRegistry = Object.create(originalRegistry);
|
||||
scopedRegistry.getTool = (name: string) => {
|
||||
const tool = originalRegistry.getTool(name);
|
||||
if (tool instanceof SdkTool) {
|
||||
return tool.bindContext(context);
|
||||
}
|
||||
return tool;
|
||||
};
|
||||
|
||||
const completedCalls = await scheduleAgentTools(
|
||||
this.config,
|
||||
toolCallsToSchedule,
|
||||
{
|
||||
schedulerId: sessionId,
|
||||
toolRegistry: this.config.getToolRegistry(),
|
||||
toolRegistry: scopedRegistry,
|
||||
signal: abortSignal,
|
||||
},
|
||||
);
|
||||
|
||||
35
packages/sdk/src/fs.ts
Normal file
35
packages/sdk/src/fs.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { Config as CoreConfig } from '@google/gemini-cli-core';
|
||||
import type { AgentFilesystem } from './types.js';
|
||||
import fs from 'node:fs/promises';
|
||||
|
||||
export class SdkAgentFilesystem implements AgentFilesystem {
|
||||
constructor(private readonly config: CoreConfig) {}
|
||||
|
||||
async readFile(path: string): Promise<string | null> {
|
||||
const error = this.config.validatePathAccess(path, 'read');
|
||||
if (error) {
|
||||
// For now, if access is denied, we can either throw or return null.
|
||||
// Returning null makes sense for "file not found or readable".
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return await fs.readFile(path, 'utf-8');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async writeFile(path: string, content: string): Promise<void> {
|
||||
const error = this.config.validatePathAccess(path, 'write');
|
||||
if (error) {
|
||||
throw new Error(error);
|
||||
}
|
||||
await fs.writeFile(path, content, 'utf-8');
|
||||
}
|
||||
}
|
||||
@@ -6,3 +6,4 @@
|
||||
|
||||
export * from './agent.js';
|
||||
export * from './tool.js';
|
||||
export * from './types.js';
|
||||
|
||||
69
packages/sdk/src/shell.ts
Normal file
69
packages/sdk/src/shell.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { Config as CoreConfig } from '@google/gemini-cli-core';
|
||||
import { ShellExecutionService, ShellTool } from '@google/gemini-cli-core';
|
||||
import type {
|
||||
AgentShell,
|
||||
AgentShellResult,
|
||||
AgentShellOptions,
|
||||
} from './types.js';
|
||||
|
||||
export class SdkAgentShell implements AgentShell {
|
||||
constructor(private readonly config: CoreConfig) {}
|
||||
|
||||
async exec(
|
||||
command: string,
|
||||
options?: AgentShellOptions,
|
||||
): Promise<AgentShellResult> {
|
||||
const cwd = options?.cwd || this.config.getWorkingDir();
|
||||
const abortController = new AbortController();
|
||||
|
||||
// Use ShellTool to check policy
|
||||
const shellTool = new ShellTool(this.config, this.config.getMessageBus());
|
||||
try {
|
||||
const invocation = shellTool.build({
|
||||
command,
|
||||
dir_path: cwd,
|
||||
});
|
||||
|
||||
const confirmation = await invocation.shouldConfirmExecute(
|
||||
abortController.signal,
|
||||
);
|
||||
if (confirmation) {
|
||||
throw new Error(
|
||||
'Command execution requires confirmation but no interactive session is available.',
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
output: '',
|
||||
stdout: '',
|
||||
stderr: '',
|
||||
exitCode: 1,
|
||||
error: error instanceof Error ? error : new Error(String(error)),
|
||||
};
|
||||
}
|
||||
|
||||
const handle = await ShellExecutionService.execute(
|
||||
command,
|
||||
cwd,
|
||||
() => {}, // No-op output event handler for now
|
||||
abortController.signal,
|
||||
false, // shouldUseNodePty: false for headless execution
|
||||
this.config.getShellExecutionConfig(),
|
||||
);
|
||||
|
||||
const result = await handle.result;
|
||||
|
||||
return {
|
||||
output: result.output,
|
||||
stdout: result.output, // ShellExecutionService combines stdout/stderr usually
|
||||
stderr: '', // ShellExecutionService currently combines, so stderr is empty or mixed
|
||||
exitCode: result.exitCode,
|
||||
};
|
||||
}
|
||||
}
|
||||
147
packages/sdk/src/tool.integration.test.ts
Normal file
147
packages/sdk/src/tool.integration.test.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { GeminiCliAgent } from './agent.js';
|
||||
import * as path from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import { tool, ModelVisibleError } from './tool.js';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dirname } from 'node:path';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// Set this to true locally when you need to update snapshots
|
||||
const RECORD_MODE = process.env['RECORD_NEW_RESPONSES'] === 'true';
|
||||
|
||||
const getGoldenPath = (name: string) =>
|
||||
path.resolve(__dirname, '../test-data', `${name}.json`);
|
||||
|
||||
describe('GeminiCliAgent Tool Integration', () => {
|
||||
it('handles tool execution success', async () => {
|
||||
const goldenFile = getGoldenPath('tool-success');
|
||||
|
||||
const agent = new GeminiCliAgent({
|
||||
instructions: 'You are a helpful assistant.',
|
||||
// If recording, use real model + record path.
|
||||
// If testing, use auto model + fake path.
|
||||
model: RECORD_MODE ? 'gemini-2.0-flash' : undefined,
|
||||
recordResponses: RECORD_MODE ? goldenFile : undefined,
|
||||
fakeResponses: RECORD_MODE ? undefined : goldenFile,
|
||||
tools: [
|
||||
tool(
|
||||
{
|
||||
name: 'add',
|
||||
description: 'Adds two numbers',
|
||||
inputSchema: z.object({ a: z.number(), b: z.number() }),
|
||||
},
|
||||
async ({ a, b }) => a + b,
|
||||
),
|
||||
],
|
||||
});
|
||||
|
||||
const events = [];
|
||||
const stream = agent.sendStream('What is 5 + 3?');
|
||||
|
||||
for await (const event of stream) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
const textEvents = events.filter((e) => e.type === 'content');
|
||||
const responseText = textEvents
|
||||
.map((e) => (typeof e.value === 'string' ? e.value : ''))
|
||||
.join('');
|
||||
|
||||
expect(responseText).toContain('8');
|
||||
});
|
||||
|
||||
it('handles ModelVisibleError correctly', async () => {
|
||||
const goldenFile = getGoldenPath('tool-error-recovery');
|
||||
|
||||
const agent = new GeminiCliAgent({
|
||||
instructions: 'You are a helpful assistant.',
|
||||
model: RECORD_MODE ? 'gemini-2.0-flash' : undefined,
|
||||
recordResponses: RECORD_MODE ? goldenFile : undefined,
|
||||
fakeResponses: RECORD_MODE ? undefined : goldenFile,
|
||||
tools: [
|
||||
tool(
|
||||
{
|
||||
name: 'failVisible',
|
||||
description: 'Fails with a visible error if input is "fail"',
|
||||
inputSchema: z.object({ input: z.string() }),
|
||||
},
|
||||
async ({ input }) => {
|
||||
if (input === 'fail') {
|
||||
throw new ModelVisibleError('Tool failed visibly');
|
||||
}
|
||||
return 'Success';
|
||||
},
|
||||
),
|
||||
],
|
||||
});
|
||||
|
||||
const events = [];
|
||||
// Force the model to trigger the error first, then hopefully recover or at least acknowledge it.
|
||||
// The prompt is crafted to make the model try 'fail' first.
|
||||
const stream = agent.sendStream(
|
||||
'Call the tool with "fail". If it fails, tell me the error message.',
|
||||
);
|
||||
|
||||
for await (const event of stream) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
const textEvents = events.filter((e) => e.type === 'content');
|
||||
const responseText = textEvents
|
||||
.map((e) => (typeof e.value === 'string' ? e.value : ''))
|
||||
.join('');
|
||||
|
||||
// The model should see the error "Tool failed visibly" and report it back.
|
||||
expect(responseText).toContain('Tool failed visibly');
|
||||
});
|
||||
|
||||
it('handles sendErrorsToModel: true correctly', async () => {
|
||||
const goldenFile = getGoldenPath('tool-catchall-error');
|
||||
|
||||
const agent = new GeminiCliAgent({
|
||||
instructions: 'You are a helpful assistant.',
|
||||
model: RECORD_MODE ? 'gemini-2.0-flash' : undefined,
|
||||
recordResponses: RECORD_MODE ? goldenFile : undefined,
|
||||
fakeResponses: RECORD_MODE ? undefined : goldenFile,
|
||||
tools: [
|
||||
tool(
|
||||
{
|
||||
name: 'checkSystemStatus',
|
||||
description: 'Checks the current system status',
|
||||
inputSchema: z.object({}),
|
||||
sendErrorsToModel: true,
|
||||
},
|
||||
async () => {
|
||||
throw new Error('Standard error caught');
|
||||
},
|
||||
),
|
||||
],
|
||||
});
|
||||
|
||||
const events = [];
|
||||
const stream = agent.sendStream(
|
||||
'Check the system status and report any errors.',
|
||||
);
|
||||
|
||||
for await (const event of stream) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
const textEvents = events.filter((e) => e.type === 'content');
|
||||
const responseText = textEvents
|
||||
.map((e) => (typeof e.value === 'string' ? e.value : ''))
|
||||
.join('');
|
||||
|
||||
// The model should report the caught standard error.
|
||||
expect(responseText.toLowerCase()).toContain('error');
|
||||
});
|
||||
});
|
||||
143
packages/sdk/src/tool.test.ts
Normal file
143
packages/sdk/src/tool.test.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
import { SdkTool, tool, ModelVisibleError } from './tool.js';
|
||||
import type { MessageBus } from '@google/gemini-cli-core';
|
||||
|
||||
// Mock MessageBus
|
||||
const mockMessageBus = {} as unknown as MessageBus;
|
||||
|
||||
describe('tool()', () => {
|
||||
it('creates a tool definition with defaults', () => {
|
||||
const definition = tool(
|
||||
{
|
||||
name: 'testTool',
|
||||
description: 'A test tool',
|
||||
inputSchema: z.object({ foo: z.string() }),
|
||||
},
|
||||
async () => 'result',
|
||||
);
|
||||
|
||||
expect(definition.name).toBe('testTool');
|
||||
expect(definition.description).toBe('A test tool');
|
||||
expect(definition.sendErrorsToModel).toBeUndefined();
|
||||
});
|
||||
|
||||
it('creates a tool definition with explicit configuration', () => {
|
||||
const definition = tool(
|
||||
{
|
||||
name: 'testTool',
|
||||
description: 'A test tool',
|
||||
inputSchema: z.object({ foo: z.string() }),
|
||||
sendErrorsToModel: true,
|
||||
},
|
||||
async () => 'result',
|
||||
);
|
||||
|
||||
expect(definition.sendErrorsToModel).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('SdkTool Execution', () => {
|
||||
it('executes successfully', async () => {
|
||||
const definition = tool(
|
||||
{
|
||||
name: 'successTool',
|
||||
description: 'Always succeeds',
|
||||
inputSchema: z.object({ val: z.string() }),
|
||||
},
|
||||
async ({ val }) => `Success: ${val}`,
|
||||
);
|
||||
|
||||
const sdkTool = new SdkTool(definition, mockMessageBus);
|
||||
const invocation = sdkTool.createInvocationWithContext(
|
||||
{ val: 'test' },
|
||||
mockMessageBus,
|
||||
undefined,
|
||||
);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toBe('Success: test');
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('throws standard Error by default', async () => {
|
||||
const definition = tool(
|
||||
{
|
||||
name: 'failTool',
|
||||
description: 'Always fails',
|
||||
inputSchema: z.object({}),
|
||||
},
|
||||
async () => {
|
||||
throw new Error('Standard error');
|
||||
},
|
||||
);
|
||||
|
||||
const sdkTool = new SdkTool(definition, mockMessageBus);
|
||||
const invocation = sdkTool.createInvocationWithContext(
|
||||
{},
|
||||
mockMessageBus,
|
||||
undefined,
|
||||
);
|
||||
|
||||
await expect(
|
||||
invocation.execute(new AbortController().signal),
|
||||
).rejects.toThrow('Standard error');
|
||||
});
|
||||
|
||||
it('catches ModelVisibleError and returns ToolResult error', async () => {
|
||||
const definition = tool(
|
||||
{
|
||||
name: 'visibleErrorTool',
|
||||
description: 'Fails with visible error',
|
||||
inputSchema: z.object({}),
|
||||
},
|
||||
async () => {
|
||||
throw new ModelVisibleError('Visible error');
|
||||
},
|
||||
);
|
||||
|
||||
const sdkTool = new SdkTool(definition, mockMessageBus);
|
||||
const invocation = sdkTool.createInvocationWithContext(
|
||||
{},
|
||||
mockMessageBus,
|
||||
undefined,
|
||||
);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.error).toBeDefined();
|
||||
expect(result.error?.message).toBe('Visible error');
|
||||
expect(result.llmContent).toContain('Error: Visible error');
|
||||
});
|
||||
|
||||
it('catches standard Error when sendErrorsToModel is true', async () => {
|
||||
const definition = tool(
|
||||
{
|
||||
name: 'catchAllTool',
|
||||
description: 'Catches all errors',
|
||||
inputSchema: z.object({}),
|
||||
sendErrorsToModel: true,
|
||||
},
|
||||
async () => {
|
||||
throw new Error('Standard error');
|
||||
},
|
||||
);
|
||||
|
||||
const sdkTool = new SdkTool(definition, mockMessageBus);
|
||||
const invocation = sdkTool.createInvocationWithContext(
|
||||
{},
|
||||
mockMessageBus,
|
||||
undefined,
|
||||
);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.error).toBeDefined();
|
||||
expect(result.error?.message).toBe('Standard error');
|
||||
expect(result.llmContent).toContain('Error: Standard error');
|
||||
});
|
||||
});
|
||||
@@ -14,28 +14,42 @@ import {
|
||||
Kind,
|
||||
type MessageBus,
|
||||
} from '@google/gemini-cli-core';
|
||||
import type { SessionContext } from './types.js';
|
||||
|
||||
export { z };
|
||||
|
||||
export interface ToolDefinition<T extends z.ZodType> {
|
||||
export class ModelVisibleError extends Error {
|
||||
constructor(message: string | Error) {
|
||||
super(message instanceof Error ? message.message : message);
|
||||
this.name = 'ModelVisibleError';
|
||||
}
|
||||
}
|
||||
|
||||
export interface ToolDefinition<T extends z.ZodTypeAny> {
|
||||
name: string;
|
||||
description: string;
|
||||
inputSchema: T;
|
||||
sendErrorsToModel?: boolean;
|
||||
}
|
||||
|
||||
export interface Tool<T extends z.ZodType> extends ToolDefinition<T> {
|
||||
action: (params: z.infer<T>) => Promise<unknown>;
|
||||
export interface Tool<T extends z.ZodTypeAny> extends ToolDefinition<T> {
|
||||
action: (params: z.infer<T>, context?: SessionContext) => Promise<unknown>;
|
||||
}
|
||||
|
||||
class SdkToolInvocation<T extends z.ZodType> extends BaseToolInvocation<
|
||||
class SdkToolInvocation<T extends z.ZodTypeAny> extends BaseToolInvocation<
|
||||
z.infer<T>,
|
||||
ToolResult
|
||||
> {
|
||||
constructor(
|
||||
params: z.infer<T>,
|
||||
messageBus: MessageBus,
|
||||
private readonly action: (params: z.infer<T>) => Promise<unknown>,
|
||||
private readonly action: (
|
||||
params: z.infer<T>,
|
||||
context?: SessionContext,
|
||||
) => Promise<unknown>,
|
||||
private readonly context: SessionContext | undefined,
|
||||
toolName: string,
|
||||
private readonly sendErrorsToModel: boolean = false,
|
||||
) {
|
||||
super(params, messageBus, toolName);
|
||||
}
|
||||
@@ -49,7 +63,7 @@ class SdkToolInvocation<T extends z.ZodType> extends BaseToolInvocation<
|
||||
_updateOutput?: (output: string) => void,
|
||||
): Promise<ToolResult> {
|
||||
try {
|
||||
const result = await this.action(this.params);
|
||||
const result = await this.action(this.params, this.context);
|
||||
const output =
|
||||
typeof result === 'string' ? result : JSON.stringify(result, null, 2);
|
||||
return {
|
||||
@@ -57,26 +71,31 @@ class SdkToolInvocation<T extends z.ZodType> extends BaseToolInvocation<
|
||||
returnDisplay: output,
|
||||
};
|
||||
} catch (error) {
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
error: {
|
||||
message: errorMessage,
|
||||
},
|
||||
};
|
||||
if (this.sendErrorsToModel || error instanceof ModelVisibleError) {
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
error: {
|
||||
message: errorMessage,
|
||||
},
|
||||
};
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class SdkTool<T extends z.ZodType> extends BaseDeclarativeTool<
|
||||
export class SdkTool<T extends z.ZodTypeAny> extends BaseDeclarativeTool<
|
||||
z.infer<T>,
|
||||
ToolResult
|
||||
> {
|
||||
constructor(
|
||||
private readonly definition: Tool<T>,
|
||||
messageBus: MessageBus,
|
||||
_agent?: unknown,
|
||||
private readonly context?: SessionContext,
|
||||
) {
|
||||
super(
|
||||
definition.name,
|
||||
@@ -88,6 +107,26 @@ export class SdkTool<T extends z.ZodType> extends BaseDeclarativeTool<
|
||||
);
|
||||
}
|
||||
|
||||
bindContext(context: SessionContext): SdkTool<T> {
|
||||
return new SdkTool(this.definition, this.messageBus, undefined, context);
|
||||
}
|
||||
|
||||
createInvocationWithContext(
|
||||
params: z.infer<T>,
|
||||
messageBus: MessageBus,
|
||||
context: SessionContext | undefined,
|
||||
toolName?: string,
|
||||
): ToolInvocation<z.infer<T>, ToolResult> {
|
||||
return new SdkToolInvocation(
|
||||
params,
|
||||
messageBus,
|
||||
this.definition.action,
|
||||
context || this.context,
|
||||
toolName || this.name,
|
||||
this.definition.sendErrorsToModel,
|
||||
);
|
||||
}
|
||||
|
||||
protected createInvocation(
|
||||
params: z.infer<T>,
|
||||
messageBus: MessageBus,
|
||||
@@ -97,14 +136,16 @@ export class SdkTool<T extends z.ZodType> extends BaseDeclarativeTool<
|
||||
params,
|
||||
messageBus,
|
||||
this.definition.action,
|
||||
this.context,
|
||||
toolName || this.name,
|
||||
this.definition.sendErrorsToModel,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function tool<T extends z.ZodType>(
|
||||
export function tool<T extends z.ZodTypeAny>(
|
||||
definition: ToolDefinition<T>,
|
||||
action: (params: z.infer<T>) => Promise<unknown>,
|
||||
action: (params: z.infer<T>, context?: SessionContext) => Promise<unknown>,
|
||||
): Tool<T> {
|
||||
return {
|
||||
...definition,
|
||||
|
||||
41
packages/sdk/src/types.ts
Normal file
41
packages/sdk/src/types.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { Content } from '@google/gemini-cli-core';
|
||||
import type { GeminiCliAgent } from './agent.js';
|
||||
|
||||
export interface AgentFilesystem {
|
||||
readFile(path: string): Promise<string | null>;
|
||||
writeFile(path: string, content: string): Promise<void>;
|
||||
}
|
||||
|
||||
export interface AgentShellOptions {
|
||||
env?: Record<string, string>;
|
||||
timeoutSeconds?: number;
|
||||
cwd?: string;
|
||||
}
|
||||
|
||||
export interface AgentShellResult {
|
||||
exitCode: number | null;
|
||||
output: string;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
export interface AgentShell {
|
||||
exec(cmd: string, options?: AgentShellOptions): Promise<AgentShellResult>;
|
||||
}
|
||||
|
||||
export interface SessionContext {
|
||||
sessionId: string;
|
||||
transcript: Content[];
|
||||
cwd: string;
|
||||
timestamp: string;
|
||||
fs: AgentFilesystem;
|
||||
shell: AgentShell;
|
||||
agent: GeminiCliAgent;
|
||||
}
|
||||
2
packages/sdk/test-data/tool-catchall-error.json
Normal file
2
packages/sdk/test-data/tool-catchall-error.json
Normal file
@@ -0,0 +1,2 @@
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"checkSystemStatus","args":{}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7070,"candidatesTokenCount":3,"totalTokenCount":7073,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7070}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":3}]}}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The system status check"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9850,"totalTokenCount":9850,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9850}]}},{"candidates":[{"content":{"parts":[{"text":" returned an error. It says `Error: Standard error caught`."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7082,"candidatesTokenCount":17,"totalTokenCount":7099,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7082}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":17}]}}]}
|
||||
2
packages/sdk/test-data/tool-error-recovery.json
Normal file
2
packages/sdk/test-data/tool-error-recovery.json
Normal file
@@ -0,0 +1,2 @@
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"failVisible","args":{"input":"fail"}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7073,"candidatesTokenCount":4,"totalTokenCount":7077,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7073}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":4}]}}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9867,"totalTokenCount":9867,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9867}]}},{"candidates":[{"content":{"parts":[{"text":" tool failed visibly with the error message: \"Error: Tool failed visibly\"."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7085,"candidatesTokenCount":16,"totalTokenCount":7101,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7085}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":16}]}}]}
|
||||
2
packages/sdk/test-data/tool-success.json
Normal file
2
packages/sdk/test-data/tool-success.json
Normal file
@@ -0,0 +1,2 @@
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"add","args":{"a":5,"b":3}}}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7045,"candidatesTokenCount":5,"totalTokenCount":7050,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7045}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"8"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9849,"totalTokenCount":9849,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9849}]}},{"candidates":[{"content":{"parts":[{"text":""}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7053,"candidatesTokenCount":1,"totalTokenCount":7054,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7053}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":1}]}}]}
|
||||
Reference in New Issue
Block a user