From f76e24c00f53e86cf39b6865ce28ead9cf9d345c Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Fri, 13 Feb 2026 12:48:35 -0800 Subject: [PATCH] feat(sdk): Implement dynamic system instructions (#18863) Co-authored-by: N. Taylor Mullen --- packages/sdk/SDK_DESIGN.md | 64 +++++++- packages/sdk/src/agent.integration.test.ts | 154 ++++++++++++++++++ packages/sdk/src/agent.ts | 38 ++++- .../test-data/agent-async-instructions.json | 4 + .../test-data/agent-dynamic-instructions.json | 4 + .../test-data/agent-static-instructions.json | 1 + 6 files changed, 258 insertions(+), 7 deletions(-) create mode 100644 packages/sdk/src/agent.integration.test.ts create mode 100644 packages/sdk/test-data/agent-async-instructions.json create mode 100644 packages/sdk/test-data/agent-dynamic-instructions.json create mode 100644 packages/sdk/test-data/agent-static-instructions.json diff --git a/packages/sdk/SDK_DESIGN.md b/packages/sdk/SDK_DESIGN.md index 8daf6a4bb7..de0db24100 100644 --- a/packages/sdk/SDK_DESIGN.md +++ b/packages/sdk/SDK_DESIGN.md @@ -1,9 +1,15 @@ # `Gemini CLI SDK` +> **Implementation Status:** Core agent loop, tool execution, and session +> context are implemented. Advanced features like hooks, skills, subagents, and +> ACP are currently missing. + # `Examples` ## `Simple Example` +> **Status:** Implemented. `GeminiCliAgent` supports `cwd` and `sendStream`. + Equivalent to `gemini -p "what does this project do?"`. Loads all workspace and user settings. @@ -27,6 +33,9 @@ Validation: ## `System Instructions` +> **Status:** Implemented. Both static string instructions and dynamic functions +> (receiving `SessionContext`) are supported. + System instructions can be provided by a static string OR dynamically via a function: @@ -47,6 +56,9 @@ Validation: ## `Custom Tools` +> **Status:** Implemented. `tool()` helper and `GeminiCliAgent` support custom +> tool definitions and execution. + ```ts import { GeminiCliAgent, tool, z } from "@google/gemini-cli-sdk"; @@ -74,6 +86,8 @@ Validation: ## `Custom Hooks` +> **Status:** Not Implemented. + SDK users can provide programmatic custom hooks ```ts @@ -127,6 +141,8 @@ Validation (these are probably hardest to validate): ## `Custom Skills` +> **Status:** Not Implemented. + Custom skills can be referenced by individual directories or by "skill roots" (directories containing many skills). @@ -157,6 +173,8 @@ const mySkill = skill({ ## `Subagents` +> **Status:** Not Implemented. + ```ts import { GeminiCliAgent, subagent } from "@google/gemini-cli"; @@ -181,6 +199,8 @@ const agent = new GeminiCliAgent({ ## `Extensions` +> **Status:** Not Implemented. + Potentially the most important feature of the Gemini CLI SDK is support for extensions, which modularly encapsulate all of the primitives listed above: @@ -201,6 +221,8 @@ INSTRUCTIONS", ## `ACP Mode` +> **Status:** Not Implemented. + The SDK will include a wrapper utility to interact with the agent via ACP instead of the SDK's natural API. @@ -219,12 +241,17 @@ client.send({...clientMessage}); // e.g. a "session/prompt" message ## `Approvals / Policies` +> **Status:** Not Implemented. + TODO # `Implementation Guidance` ## `Session Context` +> **Status:** Implemented. `SessionContext` interface exists and is passed to +> tools. + Whenever executing a tool, hook, command, or skill, a SessionContext object should be passed as an additional argument after the arguments/payload. The interface should look something like: @@ -245,18 +272,27 @@ export interface SessionContext { } export interface AgentFilesystem { - readFile(path: string): Promise - writeFile(path: string, content: string): Promise - // consider others including delete, globbing, etc but read/write are bare minimum } + readFile(path: string): Promise; + writeFile(path: string, content: string): Promise; + // consider others including delete, globbing, etc but read/write are bare minimum +} export interface AgentShell { // simple promise-based execution that blocks until complete - exec(cmd: string, options?: AgentShellOptions): Promise<{exitCode: number, output: string, stdout: string, stderr: string}> + exec( + cmd: string, + options?: AgentShellOptions, + ): Promise<{ + exitCode: number; + output: string; + stdout: string; + stderr: string; + }>; start(cmd: string, options?: AgentShellOptions): AgentShellProcess; } export interface AgentShellOptions { - env?: Record; + env?: Record; timeoutSeconds?: number; } @@ -277,3 +313,21 @@ export interface AgentShellProcess { the same session id? - Presumably the transcript is kept updated in memory and also persisted to disk by default? + +# `Next Steps` + +Based on the current implementation status, we can proceed with: + +## Feature 2: Custom Skills Support + +Implement support for loading and registering custom skills. This involves +adding a `skills` option to `GeminiCliAgentOptions` and implementing the logic +to read skill definitions from directories. + +**Tasks:** + +1. Add `skills` option to `GeminiCliAgentOptions`. +2. Implement `skillDir` and `skillRoot` helpers to load skills from the + filesystem. +3. Update `GeminiCliAgent` to register loaded skills with the internal tool + registry. diff --git a/packages/sdk/src/agent.integration.test.ts b/packages/sdk/src/agent.integration.test.ts new file mode 100644 index 0000000000..5226e30e06 --- /dev/null +++ b/packages/sdk/src/agent.integration.test.ts @@ -0,0 +1,154 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { GeminiCliAgent } from './agent.js'; +import * as path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { dirname } from 'node:path'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// Set this to true locally when you need to update snapshots +const RECORD_MODE = process.env['RECORD_NEW_RESPONSES'] === 'true'; + +const getGoldenPath = (name: string) => + path.resolve(__dirname, '../test-data', `${name}.json`); + +describe('GeminiCliAgent Integration', () => { + it('handles static instructions', async () => { + const goldenFile = getGoldenPath('agent-static-instructions'); + + const agent = new GeminiCliAgent({ + instructions: 'You are a pirate. Respond in pirate speak.', + model: 'gemini-2.0-flash', + recordResponses: RECORD_MODE ? goldenFile : undefined, + fakeResponses: RECORD_MODE ? undefined : goldenFile, + }); + + const events = []; + const stream = agent.sendStream('Say hello.'); + + for await (const event of stream) { + events.push(event); + } + + const textEvents = events.filter((e) => e.type === 'content'); + const responseText = textEvents + .map((e) => (typeof e.value === 'string' ? e.value : '')) + .join(''); + + // Expect pirate speak + expect(responseText.toLowerCase()).toMatch(/ahoy|matey|arrr/); + }, 30000); + + it('handles dynamic instructions', async () => { + const goldenFile = getGoldenPath('agent-dynamic-instructions'); + + let callCount = 0; + const agent = new GeminiCliAgent({ + instructions: (_ctx) => { + callCount++; + return `You are a helpful assistant. The secret number is ${callCount}. Always mention the secret number when asked.`; + }, + model: 'gemini-2.0-flash', + recordResponses: RECORD_MODE ? goldenFile : undefined, + fakeResponses: RECORD_MODE ? undefined : goldenFile, + }); + + // First turn + const stream1 = agent.sendStream('What is the secret number?'); + const events1 = []; + for await (const event of stream1) { + events1.push(event); + } + const responseText1 = events1 + .filter((e) => e.type === 'content') + .map((e) => (typeof e.value === 'string' ? e.value : '')) + .join(''); + + expect(responseText1).toContain('1'); + expect(callCount).toBe(1); + + // Second turn + const stream2 = agent.sendStream('What is the secret number now?'); + const events2 = []; + for await (const event of stream2) { + events2.push(event); + } + const responseText2 = events2 + .filter((e) => e.type === 'content') + .map((e) => (typeof e.value === 'string' ? e.value : '')) + .join(''); + + // Should still be 1 because instructions are only loaded once per session + expect(responseText2).toContain('1'); + expect(callCount).toBe(1); + }, 30000); + + it('handles async dynamic instructions', async () => { + const goldenFile = getGoldenPath('agent-async-instructions'); + + let callCount = 0; + const agent = new GeminiCliAgent({ + instructions: async (_ctx) => { + await new Promise((resolve) => setTimeout(resolve, 10)); // Simulate async work + callCount++; + return `You are a helpful assistant. The secret number is ${callCount}. Always mention the secret number when asked.`; + }, + model: 'gemini-2.0-flash', + recordResponses: RECORD_MODE ? goldenFile : undefined, + fakeResponses: RECORD_MODE ? undefined : goldenFile, + }); + + // First turn + const stream1 = agent.sendStream('What is the secret number?'); + const events1 = []; + for await (const event of stream1) { + events1.push(event); + } + const responseText1 = events1 + .filter((e) => e.type === 'content') + .map((e) => (typeof e.value === 'string' ? e.value : '')) + .join(''); + + expect(responseText1).toContain('1'); + expect(callCount).toBe(1); + + // Second turn + const stream2 = agent.sendStream('What is the secret number now?'); + const events2 = []; + for await (const event of stream2) { + events2.push(event); + } + const responseText2 = events2 + .filter((e) => e.type === 'content') + .map((e) => (typeof e.value === 'string' ? e.value : '')) + .join(''); + + // Should still be 1 because instructions are only loaded once per session + expect(responseText2).toContain('1'); + expect(callCount).toBe(1); + }, 30000); + + it('throws when dynamic instructions fail', async () => { + const agent = new GeminiCliAgent({ + instructions: () => { + throw new Error('Dynamic instruction failure'); + }, + model: 'gemini-2.0-flash', + }); + + const stream = agent.sendStream('Say hello.'); + + await expect(async () => { + for await (const _event of stream) { + // Just consume the stream + } + }).rejects.toThrow('Dynamic instruction failure'); + }); +}); diff --git a/packages/sdk/src/agent.ts b/packages/sdk/src/agent.ts index 21defe1ab6..a63414bddd 100644 --- a/packages/sdk/src/agent.ts +++ b/packages/sdk/src/agent.ts @@ -24,8 +24,12 @@ import { SdkAgentFilesystem } from './fs.js'; import { SdkAgentShell } from './shell.js'; import type { SessionContext } from './types.js'; +export type SystemInstructions = + | string + | ((context: SessionContext) => string | Promise); + export interface GeminiCliAgentOptions { - instructions: string; + instructions: SystemInstructions; // eslint-disable-next-line @typescript-eslint/no-explicit-any tools?: Array>; model?: string; @@ -39,18 +43,24 @@ export class GeminiCliAgent { private config: Config; // eslint-disable-next-line @typescript-eslint/no-explicit-any private tools: Array>; + private readonly instructions: SystemInstructions; + private instructionsLoaded = false; constructor(options: GeminiCliAgentOptions) { + this.instructions = options.instructions; const cwd = options.cwd || process.cwd(); this.tools = options.tools || []; + const initialMemory = + typeof this.instructions === 'string' ? this.instructions : ''; + const configParams: ConfigParameters = { sessionId: `sdk-${Date.now()}`, targetDir: cwd, cwd, debugMode: options.debug ?? false, model: options.model || PREVIEW_GEMINI_MODEL_AUTO, - userMemory: options.instructions, + userMemory: initialMemory, // Minimal config enableHooks: false, mcpEnabled: false, @@ -94,6 +104,30 @@ export class GeminiCliAgent { { text: prompt }, ]; + if (!this.instructionsLoaded && typeof this.instructions === 'function') { + const context: SessionContext = { + sessionId, + transcript: client.getHistory(), + cwd: this.config.getWorkingDir(), + timestamp: new Date().toISOString(), + fs, + shell, + agent: this, + }; + try { + const newInstructions = await this.instructions(context); + this.config.setUserMemory(newInstructions); + client.updateSystemInstruction(); + this.instructionsLoaded = true; + } catch (e) { + const error = + e instanceof Error + ? e + : new Error(`Error resolving dynamic instructions: ${String(e)}`); + throw error; + } + } + while (true) { // sendMessageStream returns AsyncGenerator const stream = client.sendMessageStream(request, abortSignal, sessionId); diff --git a/packages/sdk/test-data/agent-async-instructions.json b/packages/sdk/test-data/agent-async-instructions.json new file mode 100644 index 0000000000..833467ad84 --- /dev/null +++ b/packages/sdk/test-data/agent-async-instructions.json @@ -0,0 +1,4 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The secret number is"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9831,"totalTokenCount":9831,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9831}]}},{"candidates":[{"content":{"parts":[{"text":" 1.\n"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7098,"candidatesTokenCount":8,"totalTokenCount":7106,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7098}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":8}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The secret number is"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9848,"totalTokenCount":9848,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9848}]}},{"candidates":[{"content":{"parts":[{"text":" 1.\n"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7113,"candidatesTokenCount":8,"totalTokenCount":7121,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7113}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":8}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The secret number is"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9853,"totalTokenCount":9853,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9853}]}},{"candidates":[{"content":{"parts":[{"text":" 1.\n"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7120,"candidatesTokenCount":8,"totalTokenCount":7128,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7120}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":8}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The secret number is"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9870,"totalTokenCount":9870,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9870}]}},{"candidates":[{"content":{"parts":[{"text":" 1.\n"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7135,"candidatesTokenCount":8,"totalTokenCount":7143,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7135}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":8}]}}]} diff --git a/packages/sdk/test-data/agent-dynamic-instructions.json b/packages/sdk/test-data/agent-dynamic-instructions.json new file mode 100644 index 0000000000..833467ad84 --- /dev/null +++ b/packages/sdk/test-data/agent-dynamic-instructions.json @@ -0,0 +1,4 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The secret number is"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9831,"totalTokenCount":9831,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9831}]}},{"candidates":[{"content":{"parts":[{"text":" 1.\n"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7098,"candidatesTokenCount":8,"totalTokenCount":7106,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7098}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":8}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The secret number is"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9848,"totalTokenCount":9848,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9848}]}},{"candidates":[{"content":{"parts":[{"text":" 1.\n"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7113,"candidatesTokenCount":8,"totalTokenCount":7121,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7113}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":8}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The secret number is"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9853,"totalTokenCount":9853,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9853}]}},{"candidates":[{"content":{"parts":[{"text":" 1.\n"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7120,"candidatesTokenCount":8,"totalTokenCount":7128,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7120}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":8}]}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The secret number is"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9870,"totalTokenCount":9870,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9870}]}},{"candidates":[{"content":{"parts":[{"text":" 1.\n"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7135,"candidatesTokenCount":8,"totalTokenCount":7143,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7135}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":8}]}}]} diff --git a/packages/sdk/test-data/agent-static-instructions.json b/packages/sdk/test-data/agent-static-instructions.json new file mode 100644 index 0000000000..733c1915e7 --- /dev/null +++ b/packages/sdk/test-data/agent-static-instructions.json @@ -0,0 +1 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Ah"}],"role":"model"}}],"usageMetadata":{"promptTokenCount":9828,"totalTokenCount":9828,"promptTokensDetails":[{"modality":"TEXT","tokenCount":9828}]}},{"candidates":[{"content":{"parts":[{"text":"oy, matey! Ready to chart a course through the code?"}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":7095,"candidatesTokenCount":15,"totalTokenCount":7110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7095}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":15}]}}]}