fix: separate instructions from API declaration to prevent API error

This commit is contained in:
Aishanee Shah
2026-02-17 14:14:27 +00:00
parent 0b2fcb84f4
commit 7550fc3d93
9 changed files with 1318 additions and 1094 deletions
+116 -38
View File
@@ -1,64 +1,142 @@
# Gemini CLI Strict Development Rules
These rules apply strictly to all code modifications and additions within the Gemini CLI project.
These rules apply strictly to all code modifications and additions within the
Gemini CLI project.
## Testing Guidelines
* **Async/Await**: Always use `waitFor` from `packages/cli/src/test-utils/async.ts` instead of `vi.waitFor` for all `waitFor` calls within `packages/cli`. NEVER use fixed waits (e.g., `await delay(100)`). Always use `waitFor` with a predicate to ensure tests are stable and fast. Using the wrong `waitFor` can result in flaky tests and `act` warnings.
* **React Testing**: Use `act` to wrap all blocks in tests that change component state. Use `render` or `renderWithProviders` from `packages/cli/src/test-utils/render.tsx` instead of `render` from `ink-testing-library` directly. This prevents spurious `act` warnings. If test cases specify providers directly, consider whether the existing `renderWithProviders` should be modified.
* **Snapshots**: Use `toMatchSnapshot` to verify that rendering works as expected rather than matching against the raw content of the output. When modifying snapshots, verify the changes are intentional and do not hide underlying bugs.
* **Parameterized Tests**: Use parameterized tests where it reduces duplicated lines. Give the parameters explicit types to ensure the tests are type-safe.
* **Mocks Management**:
* Mock critical dependencies (`fs`, `os`, `child_process`) ONLY at the top of the file. Ideally, avoid mocking these dependencies altogether.
* Reuse existing mocks and fakes rather than creating new ones.
* Avoid mocking the file system whenever possible. If using the real file system is too difficult, consider writing an integration test instead.
* Always call `vi.restoreAllMocks()` in `afterEach` to prevent test pollution.
* Use `vi.useFakeTimers()` for tests involving time-based logic to avoid flakiness.
* **Typing in Tests**: Avoid using `any` in tests; prefer proper types or `unknown` with narrowing.
- **Async/Await**: Always use `waitFor` from
`packages/cli/src/test-utils/async.ts` instead of `vi.waitFor` for all
`waitFor` calls within `packages/cli`. NEVER use fixed waits (e.g.,
`await delay(100)`). Always use `waitFor` with a predicate to ensure tests are
stable and fast. Using the wrong `waitFor` can result in flaky tests and `act`
warnings.
- **React Testing**: Use `act` to wrap all blocks in tests that change component
state. Use `render` or `renderWithProviders` from
`packages/cli/src/test-utils/render.tsx` instead of `render` from
`ink-testing-library` directly. This prevents spurious `act` warnings. If test
cases specify providers directly, consider whether the existing
`renderWithProviders` should be modified.
- **Snapshots**: Use `toMatchSnapshot` to verify that rendering works as
expected rather than matching against the raw content of the output. When
modifying snapshots, verify the changes are intentional and do not hide
underlying bugs.
- **Parameterized Tests**: Use parameterized tests where it reduces duplicated
lines. Give the parameters explicit types to ensure the tests are type-safe.
- **Mocks Management**:
- Mock critical dependencies (`fs`, `os`, `child_process`) ONLY at the top of
the file. Ideally, avoid mocking these dependencies altogether.
- Reuse existing mocks and fakes rather than creating new ones.
- Avoid mocking the file system whenever possible. If using the real file
system is too difficult, consider writing an integration test instead.
- Always call `vi.restoreAllMocks()` in `afterEach` to prevent test pollution.
- Use `vi.useFakeTimers()` for tests involving time-based logic to avoid
flakiness.
- **Typing in Tests**: Avoid using `any` in tests; prefer proper types or
`unknown` with narrowing.
## React Guidelines (`packages/cli`)
* **`setState` and Side Effects**: NEVER trigger side effects from within the body of a `setState` callback. Use a reducer or `useRef` if necessary. These cases have historically introduced multiple bugs; typically, they should be resolved using a reducer.
* **Rendering**: Do not introduce infinite rendering loops. Avoid synchronous file I/O in React components as it will hang the UI. Do not implement new logic for custom string measurement or string truncation. Use Ink layout instead, leveraging `ResizeObserver` as needed.
* **Keyboard Handling**: Keyboard handling MUST go through `useKeyPress.ts` from the Gemini CLI package rather than the standard ink library. This library supports reporting multiple keyboard events sequentially in the same React frame (critical for slow terminals). Handling this correctly often requires reducers to ensure multiple state updates are handled gracefully without overriding values. Refer to `text-buffer.ts` for a canonical example.
* **Logging**: Do not leave `console.log`, `console.warn`, or `console.error` in the code.
* **State & Effects**: Ensure state initialization is explicit (e.g., use `undefined` rather than `true` as a default if the state is truly unknown). Carefully manage `useEffect` dependencies. Prefer a reducer whenever practical. NEVER disable `react-hooks/exhaustive-deps`; fix the code to correctly declare dependencies instead.
* **Context & Props**: Avoid excessive property drilling. Leverage existing providers, extend them, or propose a new one if necessary. Only use providers for properties that are consistent across the entire application.
* **Code Structure**: Avoid complex `if` statements where `switch` statements could be used. Keep `AppContainer` minimal; refactor complex logic into React hooks. Evaluate whether business logic should be added to `hookSystem.ts` or integrated into `packages/core` rather than `packages/cli`.
- **`setState` and Side Effects**: NEVER trigger side effects from within the
body of a `setState` callback. Use a reducer or `useRef` if necessary. These
cases have historically introduced multiple bugs; typically, they should be
resolved using a reducer.
- **Rendering**: Do not introduce infinite rendering loops. Avoid synchronous
file I/O in React components as it will hang the UI. Do not implement new
logic for custom string measurement or string truncation. Use Ink layout
instead, leveraging `ResizeObserver` as needed.
- **Keyboard Handling**: Keyboard handling MUST go through `useKeyPress.ts` from
the Gemini CLI package rather than the standard ink library. This library
supports reporting multiple keyboard events sequentially in the same React
frame (critical for slow terminals). Handling this correctly often requires
reducers to ensure multiple state updates are handled gracefully without
overriding values. Refer to `text-buffer.ts` for a canonical example.
- **Logging**: Do not leave `console.log`, `console.warn`, or `console.error` in
the code.
- **State & Effects**: Ensure state initialization is explicit (e.g., use
`undefined` rather than `true` as a default if the state is truly unknown).
Carefully manage `useEffect` dependencies. Prefer a reducer whenever
practical. NEVER disable `react-hooks/exhaustive-deps`; fix the code to
correctly declare dependencies instead.
- **Context & Props**: Avoid excessive property drilling. Leverage existing
providers, extend them, or propose a new one if necessary. Only use providers
for properties that are consistent across the entire application.
- **Code Structure**: Avoid complex `if` statements where `switch` statements
could be used. Keep `AppContainer` minimal; refactor complex logic into React
hooks. Evaluate whether business logic should be added to `hookSystem.ts` or
integrated into `packages/core` rather than `packages/cli`.
## Core Guidelines (`packages/core`)
* **Services**: Implement services as classes with clear lifecycle management (e.g., `initialize()` methods). Services should be stateless where possible, or use the centralized `Storage` service for persistence.
* **Cross-Service Communication**: Prefer using the `coreEvents` bus (from `packages/core/src/utils/events.ts`) for asynchronous communication between services or to notify the UI of state changes. Avoid tight coupling between services.
* **Utilities**: Use `debugLogger` from `packages/core/src/utils/debugLogger.ts` for internal logging instead of `console`. Ensure all shell operations use `spawnAsync` from `packages/core/src/utils/shell-utils.ts` for consistent error handling and promise management. Handle filesystem errors gracefully using `isNodeError` from `packages/core/src/utils/errors.ts`.
* **Exports & Tooling**: Add new tools to `packages/core/src/tools/` and register them in `packages/core/src/tools/tool-registry.ts`. Export all new public services, utilities, and types from `packages/core/src/index.ts`.
- **Services**: Implement services as classes with clear lifecycle management
(e.g., `initialize()` methods). Services should be stateless where possible,
or use the centralized `Storage` service for persistence.
- **Cross-Service Communication**: Prefer using the `coreEvents` bus (from
`packages/core/src/utils/events.ts`) for asynchronous communication between
services or to notify the UI of state changes. Avoid tight coupling between
services.
- **Utilities**: Use `debugLogger` from `packages/core/src/utils/debugLogger.ts`
for internal logging instead of `console`. Ensure all shell operations use
`spawnAsync` from `packages/core/src/utils/shell-utils.ts` for consistent
error handling and promise management. Handle filesystem errors gracefully
using `isNodeError` from `packages/core/src/utils/errors.ts`.
- **Exports & Tooling**: Add new tools to `packages/core/src/tools/` and
register them in `packages/core/src/tools/tool-registry.ts`. Export all new
public services, utilities, and types from `packages/core/src/index.ts`.
## Architectural Audit (Package Boundaries)
* **Logic Placement**: Non-UI logic (e.g., model orchestration, tool implementation, git/filesystem operations) MUST reside in `packages/core`. `packages/cli` should ONLY contain UI/Ink components, command-line argument parsing, and user interaction logic.
* **Environment Isolation**: Core logic must not assume a TUI environment. Use the `ConfirmationBus` or `Output` abstractions for communicating with the user from Core.
* **Decoupling**: Actively look for opportunities to decouple services using `coreEvents`. If a service imports another just to notify it of a change, use an event instead.
- **Logic Placement**: Non-UI logic (e.g., model orchestration, tool
implementation, git/filesystem operations) MUST reside in `packages/core`.
`packages/cli` should ONLY contain UI/Ink components, command-line argument
parsing, and user interaction logic.
- **Environment Isolation**: Core logic must not assume a TUI environment. Use
the `ConfirmationBus` or `Output` abstractions for communicating with the user
from Core.
- **Decoupling**: Actively look for opportunities to decouple services using
`coreEvents`. If a service imports another just to notify it of a change, use
an event instead.
## General Gemini CLI Design Principles
* **Settings**: Use settings for user-configurable options rather than adding new command line arguments. Add new settings to `packages/cli/src/config/settingsSchema.ts`. If a setting has `showInDialog: true`, it MUST be documented in `docs/get-started/configuration.md`. Ensure `requiresRestart` is correctly set.
* **Logging**: Use `debugLogger` for rethrown errors to avoid duplicate logging.
* **Keyboard Shortcuts**: Define all new keyboard shortcuts in `packages/cli/src/config/keyBindings.ts` and document them in `docs/cli/keyboard-shortcuts.md`. Be careful of keybindings that require the `Meta` key, as only certain meta key shortcuts are supported on Mac. Avoid function keys and shortcuts commonly bound in VSCode.
- **Settings**: Use settings for user-configurable options rather than adding
new command line arguments. Add new settings to
`packages/cli/src/config/settingsSchema.ts`. If a setting has
`showInDialog: true`, it MUST be documented in
`docs/get-started/configuration.md`. Ensure `requiresRestart` is correctly
set.
- **Logging**: Use `debugLogger` for rethrown errors to avoid duplicate logging.
- **Keyboard Shortcuts**: Define all new keyboard shortcuts in
`packages/cli/src/config/keyBindings.ts` and document them in
`docs/cli/keyboard-shortcuts.md`. Be careful of keybindings that require the
`Meta` key, as only certain meta key shortcuts are supported on Mac. Avoid
function keys and shortcuts commonly bound in VSCode.
## TypeScript Best Practices
* Use `checkExhaustive` in the `default` clause of `switch` statements to ensure all cases are handled.
* Avoid using the non-null assertion operator (`!`) unless absolutely necessary.
* **STRICT TYPING**: Strictly forbid `any` and `unknown` in both CLI and Core packages. `unknown` is only allowed if it is immediately narrowed using type guards or Zod validation.
* NEVER disable `@typescript-eslint/no-floating-promises`.
* Avoid making types nullable unless strictly necessary, as it hurts readability.
- Use `checkExhaustive` in the `default` clause of `switch` statements to ensure
all cases are handled.
- Avoid using the non-null assertion operator (`!`) unless absolutely necessary.
- **STRICT TYPING**: Strictly forbid `any` and `unknown` in both CLI and Core
packages. `unknown` is only allowed if it is immediately narrowed using type
guards or Zod validation.
- NEVER disable `@typescript-eslint/no-floating-promises`.
- Avoid making types nullable unless strictly necessary, as it hurts
readability.
## TUI Best Practices
* **Terminal Compatibility**: Consider how changes might behave differently across terminals (e.g., VSCode terminal, SSH, Kitty, default Mac terminal, iTerm2, Windows terminal). If modifying keyboard handling, integrate deeply with existing files like `KeypressContext.tsx` and `terminalCapabilityManager.ts`.
* **iTerm**: Be aware that `ITERM_SESSION_ID` may be present when users run VSCode from within iTerm, even if the terminal is not iTerm.
- **Terminal Compatibility**: Consider how changes might behave differently
across terminals (e.g., VSCode terminal, SSH, Kitty, default Mac terminal,
iTerm2, Windows terminal). If modifying keyboard handling, integrate deeply
with existing files like `KeypressContext.tsx` and
`terminalCapabilityManager.ts`.
- **iTerm**: Be aware that `ITERM_SESSION_ID` may be present when users run
VSCode from within iTerm, even if the terminal is not iTerm.
## Code Cleanup
* **Refactoring**: Actively clean up code duplication, technical debt, and boilerplate ("AI Slop") when working in the codebase.
* **Prompts**: Be aware that changes can impact the prompts sent to Gemini CLI and affect overall quality.
- **Refactoring**: Actively clean up code duplication, technical debt, and
boilerplate ("AI Slop") when working in the codebase.
- **Prompts**: Be aware that changes can impact the prompts sent to Gemini CLI
and affect overall quality.
@@ -2,6 +2,7 @@
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: activate_skill 1`] = `
{
"declaration": {
"description": "Activates a specialized agent skill by name (Available: 'skill1', 'skill2'). Returns the skill's instructions wrapped in \`<activated_skill>\` tags. These provide specialized guidance for the current task. Use this when you identify a task that matches a skill's description. ONLY use names exactly as they appear in the \`<available_skills>\` section.",
"name": "activate_skill",
"parametersJsonSchema": {
@@ -22,11 +23,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: activate_skill_empty 1`] = `
{
"declaration": {
"description": "Activates a specialized agent skill by name. Returns the skill's instructions wrapped in \`<activated_skill>\` tags. These provide specialized guidance for the current task. Use this when you identify a task that matches a skill's description. ONLY use names exactly as they appear in the \`<available_skills>\` section.",
"name": "activate_skill",
"parametersJsonSchema": {
@@ -43,11 +47,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: activate_skill_single 1`] = `
{
"declaration": {
"description": "Activates a specialized agent skill by name (Available: 'skill1'). Returns the skill's instructions wrapped in \`<activated_skill>\` tags. These provide specialized guidance for the current task. Use this when you identify a task that matches a skill's description. ONLY use names exactly as they appear in the \`<available_skills>\` section.",
"name": "activate_skill",
"parametersJsonSchema": {
@@ -67,11 +74,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: ask_user 1`] = `
{
"declaration": {
"description": "Ask the user one or more questions to gather preferences, clarify requirements, or make decisions.",
"name": "ask_user",
"parametersJsonSchema": {
@@ -145,11 +155,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: enter_plan_mode 1`] = `
{
"declaration": {
"description": "Switch to Plan Mode to safely research, design, and plan complex changes using read-only tools.",
"name": "enter_plan_mode",
"parametersJsonSchema": {
@@ -161,11 +174,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
},
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: exit_plan_mode 1`] = `
{
"declaration": {
"description": "Signals that the planning phase is complete and requests user approval to start implementation.",
"name": "exit_plan_mode",
"parametersJsonSchema": {
@@ -180,11 +196,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: get_internal_docs 1`] = `
{
"declaration": {
"description": "Returns the content of Gemini CLI internal documentation files. If no path is provided, returns a list of all available documentation paths.",
"name": "get_internal_docs",
"parametersJsonSchema": {
@@ -196,11 +215,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
},
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: glob 1`] = `
{
"declaration": {
"description": "Efficiently finds files matching specific glob patterns (e.g., \`src/**/*.ts\`, \`**/*.md\`), returning absolute paths sorted by modification time (newest first). Ideal for quickly locating files based on their name or path structure, especially in large codebases.",
"name": "glob",
"parametersJsonSchema": {
@@ -231,11 +253,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: google_web_search 1`] = `
{
"declaration": {
"description": "Performs a web search using Google Search (via the Gemini API) and returns the results. This tool is useful for finding information on the internet based on a query.",
"name": "google_web_search",
"parametersJsonSchema": {
@@ -250,11 +275,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: grep_search 1`] = `
{
"declaration": {
"description": "Searches for a regular expression pattern within file contents. Max 100 matches.",
"name": "grep_search",
"parametersJsonSchema": {
@@ -295,11 +323,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: grep_search_ripgrep 1`] = `
{
"declaration": {
"description": "Searches for a regular expression pattern within file contents.",
"name": "grep_search",
"parametersJsonSchema": {
@@ -366,11 +397,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: list_directory 1`] = `
{
"declaration": {
"description": "Lists the names of files and subdirectories directly within a specified directory path. Can optionally ignore entries matching provided glob patterns.",
"name": "list_directory",
"parametersJsonSchema": {
@@ -406,11 +440,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: read_file 1`] = `
{
"declaration": {
"description": "Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges.",
"name": "read_file",
"parametersJsonSchema": {
@@ -433,11 +470,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: read_many_files 1`] = `
{
"declaration": {
"description": "Reads content from multiple files specified by glob patterns within a configured target directory. For text files, it concatenates their content into a single string. It is primarily designed for text-based files. However, it can also process image (e.g., .png, .jpg), audio (e.g., .mp3, .wav), and PDF (.pdf) files if their file names or extensions are explicitly included in the 'include' argument. For these explicitly requested non-text files, their data is read and included in a format suitable for model consumption (e.g., base64 encoded).
This tool is useful when you need to understand or analyze a collection of files, such as:
@@ -499,23 +539,17 @@ Use this tool when the user's query implies needing the content of several files
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: replace 1`] = `
{
"declaration": {
"description": "Replaces text within a file. By default, replaces a single occurrence, but can replace multiple occurrences when \`expected_replacements\` is specified. This tool requires providing significant context around the change to ensure precise targeting. Always use the read_file tool to examine the file's current content before attempting a text replacement.
The user has the ability to modify the \`new_string\` content. If modified, this will be stated in the response.",
"instructions": "
Expectation for required parameters:
1. \`old_string\` MUST be the exact literal text to replace (including all whitespace, indentation, newlines, and surrounding code etc.).
2. \`new_string\` MUST be the exact literal text to replace \`old_string\` with (also including all whitespace, indentation, newlines, and surrounding code etc.). Ensure the resulting code is correct and idiomatic and that \`old_string\` and \`new_string\` are different.
3. \`instruction\` is the detailed instruction of what needs to be changed. It is important to Make it specific and detailed so developers or large language models can understand what needs to be changed and perform the changes on their own if necessary.
4. NEVER escape \`old_string\` or \`new_string\`, that would break the exact literal text requirement.
**Important:** If ANY of the above are not satisfied, the tool will fail. CRITICAL for \`old_string\`: Must uniquely identify the single instance to change. Include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. If this string matches multiple locations, or does not match exactly, the tool will fail.
5. Prefer to break down complex and long changes into multiple smaller atomic calls to this tool. Always check the content of the file after changes or not finding a string to match.
**Multiple replacements:** Set \`expected_replacements\` to the number of occurrences you want to replace. The tool will replace ALL occurrences that match \`old_string\` exactly. Ensure the number of replacements matches your expectation.",
"name": "replace",
"parametersJsonSchema": {
"properties": {
@@ -563,26 +597,23 @@ A good instruction should concisely answer:
],
"type": "object",
},
},
"instructions": "
Expectation for required parameters:
1. \`old_string\` MUST be the exact literal text to replace (including all whitespace, indentation, newlines, and surrounding code etc.).
2. \`new_string\` MUST be the exact literal text to replace \`old_string\` with (also including all whitespace, indentation, newlines, and surrounding code etc.). Ensure the resulting code is correct and idiomatic and that \`old_string\` and \`new_string\` are different.
3. \`instruction\` is the detailed instruction of what needs to be changed. It is important to Make it specific and detailed so developers or large language models can understand what needs to be changed and perform the changes on their own if necessary.
4. NEVER escape \`old_string\` or \`new_string\`, that would break the exact literal text requirement.
**Important:** If ANY of the above are not satisfied, the tool will fail. CRITICAL for \`old_string\`: Must uniquely identify the single instance to change. Include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. If this string matches multiple locations, or does not match exactly, the tool will fail.
5. Prefer to break down complex and long changes into multiple smaller atomic calls to this tool. Always check the content of the file after changes or not finding a string to match.
**Multiple replacements:** Set \`expected_replacements\` to the number of occurrences you want to replace. The tool will replace ALL occurrences that match \`old_string\` exactly. Ensure the number of replacements matches your expectation.",
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: run_shell_command 1`] = `
{
"declaration": {
"description": "This tool executes a given shell command as \`bash -c <command>\`.",
"instructions": "To run a command in the background, set the \`is_background\` parameter to true. Do NOT use \`&\` to background commands. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.
Efficiency Guidelines:
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
The following information is returned:
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.
Exit Code: Only included if non-zero (command failed).
Error: Only included if a process-level error occurred (e.g., spawn failure).
Signal: Only included if process was terminated by a signal.
Background PIDs: Only included if background processes were started.
Process Group PGID: Only included if available.",
"name": "run_shell_command",
"parametersJsonSchema": {
"properties": {
@@ -608,11 +639,27 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": "To run a command in the background, set the \`is_background\` parameter to true. Do NOT use \`&\` to background commands. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.
Efficiency Guidelines:
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
The following information is returned:
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.
Exit Code: Only included if non-zero (command failed).
Error: Only included if a process-level error occurred (e.g., spawn failure).
Signal: Only included if process was terminated by a signal.
Background PIDs: Only included if background processes were started.
Process Group PGID: Only included if available.",
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: save_memory 1`] = `
{
"declaration": {
"description": "
Saves concise global user context (preferences, facts) for use across ALL workspaces.
@@ -635,11 +682,14 @@ NEVER save workspace-specific context, local paths, or commands (e.g. "The entry
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: web_fetch 1`] = `
{
"declaration": {
"description": "Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.",
"name": "web_fetch",
"parametersJsonSchema": {
@@ -654,11 +704,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: write_file 1`] = `
{
"declaration": {
"description": "Writes content to a specified file in the local filesystem.
The user has the ability to modify \`content\`. If modified, this will be stated in the response.",
@@ -680,11 +733,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snaps
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-2.5-pro > snapshot for tool: write_todos 1`] = `
{
"declaration": {
"description": "This tool can help you list out the current subtasks that are required to be completed for a given user request. The list of subtasks helps you keep track of the current task, organize complex queries and help ensure that you don't miss any steps. With this list, the user can also see the current progress you are making in executing a given task.
Depending on the task complexity, you should first divide a given task into subtasks and then use this tool to list out the subtasks that are required to be completed for a given user request.
@@ -731,23 +787,6 @@ Agent:
The agent did not use the todo list because this task could be completed by a tight loop of execute test->edit->execute test.
</reasoning>
</example>",
"instructions": "
## Task state definitions
- pending: Work has not begun on a given subtask.
- in_progress: Marked just prior to beginning work on a given subtask. You should only have one subtask as in_progress at a time.
- completed: Subtask was successfully completed with no errors or issues. If the subtask required more steps to complete, update the todo list with the subtasks. All steps should be identified as completed only when they are completed.
- cancelled: As you update the todo list, some tasks are not required anymore due to the dynamic nature of the task. In this case, mark the subtasks as cancelled.
## Methodology for using this tool
1. Use this todo list as soon as you receive a user request based on the complexity of the task.
2. Keep track of every subtask that you update the list with.
3. Mark a subtask as in_progress before you begin working on it. You should only have one subtask as in_progress at a time.
4. Update the subtask list as you proceed in executing the task. The subtask list is not static and should reflect your progress and current plans, which may evolve as you acquire new information.
5. Mark a subtask as completed when you have completed it.
6. Mark a subtask as cancelled if the subtask is no longer needed.
7. You must update the todo list as soon as you start, stop or cancel a subtask. Don't batch or wait to update the todo list.",
"name": "write_todos",
"parametersJsonSchema": {
"additionalProperties": false,
@@ -787,11 +826,30 @@ The agent did not use the todo list because this task could be completed by a ti
],
"type": "object",
},
},
"instructions": "
## Task state definitions
- pending: Work has not begun on a given subtask.
- in_progress: Marked just prior to beginning work on a given subtask. You should only have one subtask as in_progress at a time.
- completed: Subtask was successfully completed with no errors or issues. If the subtask required more steps to complete, update the todo list with the subtasks. All steps should be identified as completed only when they are completed.
- cancelled: As you update the todo list, some tasks are not required anymore due to the dynamic nature of the task. In this case, mark the subtasks as cancelled.
## Methodology for using this tool
1. Use this todo list as soon as you receive a user request based on the complexity of the task.
2. Keep track of every subtask that you update the list with.
3. Mark a subtask as in_progress before you begin working on it. You should only have one subtask as in_progress at a time.
4. Update the subtask list as you proceed in executing the task. The subtask list is not static and should reflect your progress and current plans, which may evolve as you acquire new information.
5. Mark a subtask as completed when you have completed it.
6. Mark a subtask as cancelled if the subtask is no longer needed.
7. You must update the todo list as soon as you start, stop or cancel a subtask. Don't batch or wait to update the todo list.",
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: activate_skill 1`] = `
{
"declaration": {
"description": "Activates a specialized agent skill by name (Available: 'skill1', 'skill2'). Returns the skill's instructions wrapped in \`<activated_skill>\` tags. These provide specialized guidance for the current task. Use this when you identify a task that matches a skill's description. ONLY use names exactly as they appear in the \`<available_skills>\` section.",
"name": "activate_skill",
"parametersJsonSchema": {
@@ -812,11 +870,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: activate_skill_empty 1`] = `
{
"declaration": {
"description": "Activates a specialized agent skill by name. Returns the skill's instructions wrapped in \`<activated_skill>\` tags. These provide specialized guidance for the current task. Use this when you identify a task that matches a skill's description. ONLY use names exactly as they appear in the \`<available_skills>\` section.",
"name": "activate_skill",
"parametersJsonSchema": {
@@ -833,11 +894,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: activate_skill_single 1`] = `
{
"declaration": {
"description": "Activates a specialized agent skill by name (Available: 'skill1'). Returns the skill's instructions wrapped in \`<activated_skill>\` tags. These provide specialized guidance for the current task. Use this when you identify a task that matches a skill's description. ONLY use names exactly as they appear in the \`<available_skills>\` section.",
"name": "activate_skill",
"parametersJsonSchema": {
@@ -857,11 +921,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: ask_user 1`] = `
{
"declaration": {
"description": "Ask the user one or more questions to gather preferences, clarify requirements, or make decisions.",
"name": "ask_user",
"parametersJsonSchema": {
@@ -935,11 +1002,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: enter_plan_mode 1`] = `
{
"declaration": {
"description": "Switch to Plan Mode to safely research, design, and plan complex changes using read-only tools.",
"name": "enter_plan_mode",
"parametersJsonSchema": {
@@ -951,11 +1021,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
},
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: exit_plan_mode 1`] = `
{
"declaration": {
"description": "Signals that the planning phase is complete and requests user approval to start implementation.",
"name": "exit_plan_mode",
"parametersJsonSchema": {
@@ -970,11 +1043,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: get_internal_docs 1`] = `
{
"declaration": {
"description": "Returns the content of Gemini CLI internal documentation files. If no path is provided, returns a list of all available documentation paths.",
"name": "get_internal_docs",
"parametersJsonSchema": {
@@ -986,11 +1062,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
},
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: glob 1`] = `
{
"declaration": {
"description": "Efficiently finds files matching specific glob patterns (e.g., \`src/**/*.ts\`, \`**/*.md\`), returning absolute paths sorted by modification time (newest first). Ideal for quickly locating files based on their name or path structure, especially in large codebases.",
"name": "glob",
"parametersJsonSchema": {
@@ -1021,11 +1100,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: google_web_search 1`] = `
{
"declaration": {
"description": "Performs a web search using Google Search (via the Gemini API) and returns the results. This tool is useful for finding information on the internet based on a query.",
"name": "google_web_search",
"parametersJsonSchema": {
@@ -1040,11 +1122,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: grep_search 1`] = `
{
"declaration": {
"description": "Searches for a regular expression pattern within file contents. Max 100 matches.",
"name": "grep_search",
"parametersJsonSchema": {
@@ -1085,11 +1170,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: grep_search_ripgrep 1`] = `
{
"declaration": {
"description": "Searches for a regular expression pattern within file contents.",
"name": "grep_search",
"parametersJsonSchema": {
@@ -1156,11 +1244,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: list_directory 1`] = `
{
"declaration": {
"description": "Lists the names of files and subdirectories directly within a specified directory path. Can optionally ignore entries matching provided glob patterns.",
"name": "list_directory",
"parametersJsonSchema": {
@@ -1196,11 +1287,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: read_file 1`] = `
{
"declaration": {
"description": "Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges.",
"name": "read_file",
"parametersJsonSchema": {
@@ -1223,11 +1317,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: read_many_files 1`] = `
{
"declaration": {
"description": "Reads content from multiple files specified by glob patterns within a configured target directory. For text files, it concatenates their content into a single string. It is primarily designed for text-based files. However, it can also process image (e.g., .png, .jpg), audio (e.g., .mp3, .wav), and PDF (.pdf) files if their file names or extensions are explicitly included in the 'include' argument. For these explicitly requested non-text files, their data is read and included in a format suitable for model consumption (e.g., base64 encoded).
This tool is useful when you need to understand or analyze a collection of files, such as:
@@ -1289,23 +1386,17 @@ Use this tool when the user's query implies needing the content of several files
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: replace 1`] = `
{
"declaration": {
"description": "Replaces text within a file. By default, replaces a single occurrence, but can replace multiple occurrences when \`expected_replacements\` is specified. This tool requires providing significant context around the change to ensure precise targeting. Always use the read_file tool to examine the file's current content before attempting a text replacement.
The user has the ability to modify the \`new_string\` content. If modified, this will be stated in the response.",
"instructions": "
Expectation for required parameters:
1. \`old_string\` MUST be the exact literal text to replace (including all whitespace, indentation, newlines, and surrounding code etc.).
2. \`new_string\` MUST be the exact literal text to replace \`old_string\` with (also including all whitespace, indentation, newlines, and surrounding code etc.). Ensure the resulting code is correct and idiomatic and that \`old_string\` and \`new_string\` are different.
3. \`instruction\` is the detailed instruction of what needs to be changed. It is important to Make it specific and detailed so developers or large language models can understand what needs to be changed and perform the changes on their own if necessary.
4. NEVER escape \`old_string\` or \`new_string\`, that would break the exact literal text requirement.
**Important:** If ANY of the above are not satisfied, the tool will fail. CRITICAL for \`old_string\`: Must uniquely identify the single instance to change. Include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. If this string matches multiple locations, or does not match exactly, the tool will fail.
5. Prefer to break down complex and long changes into multiple smaller atomic calls to this tool. Always check the content of the file after changes or not finding a string to match.
**Multiple replacements:** Set \`expected_replacements\` to the number of occurrences you want to replace. The tool will replace ALL occurrences that match \`old_string\` exactly. Ensure the number of replacements matches your expectation.",
"name": "replace",
"parametersJsonSchema": {
"properties": {
@@ -1353,26 +1444,23 @@ A good instruction should concisely answer:
],
"type": "object",
},
},
"instructions": "
Expectation for required parameters:
1. \`old_string\` MUST be the exact literal text to replace (including all whitespace, indentation, newlines, and surrounding code etc.).
2. \`new_string\` MUST be the exact literal text to replace \`old_string\` with (also including all whitespace, indentation, newlines, and surrounding code etc.). Ensure the resulting code is correct and idiomatic and that \`old_string\` and \`new_string\` are different.
3. \`instruction\` is the detailed instruction of what needs to be changed. It is important to Make it specific and detailed so developers or large language models can understand what needs to be changed and perform the changes on their own if necessary.
4. NEVER escape \`old_string\` or \`new_string\`, that would break the exact literal text requirement.
**Important:** If ANY of the above are not satisfied, the tool will fail. CRITICAL for \`old_string\`: Must uniquely identify the single instance to change. Include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. If this string matches multiple locations, or does not match exactly, the tool will fail.
5. Prefer to break down complex and long changes into multiple smaller atomic calls to this tool. Always check the content of the file after changes or not finding a string to match.
**Multiple replacements:** Set \`expected_replacements\` to the number of occurrences you want to replace. The tool will replace ALL occurrences that match \`old_string\` exactly. Ensure the number of replacements matches your expectation.",
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: run_shell_command 1`] = `
{
"declaration": {
"description": "This tool executes a given shell command as \`bash -c <command>\`.",
"instructions": "To run a command in the background, set the \`is_background\` parameter to true. Do NOT use \`&\` to background commands. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.
Efficiency Guidelines:
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
The following information is returned:
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.
Exit Code: Only included if non-zero (command failed).
Error: Only included if a process-level error occurred (e.g., spawn failure).
Signal: Only included if process was terminated by a signal.
Background PIDs: Only included if background processes were started.
Process Group PGID: Only included if available.",
"name": "run_shell_command",
"parametersJsonSchema": {
"properties": {
@@ -1398,11 +1486,27 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": "To run a command in the background, set the \`is_background\` parameter to true. Do NOT use \`&\` to background commands. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.
Efficiency Guidelines:
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
The following information is returned:
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.
Exit Code: Only included if non-zero (command failed).
Error: Only included if a process-level error occurred (e.g., spawn failure).
Signal: Only included if process was terminated by a signal.
Background PIDs: Only included if background processes were started.
Process Group PGID: Only included if available.",
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: save_memory 1`] = `
{
"declaration": {
"description": "
Saves concise global user context (preferences, facts) for use across ALL workspaces.
@@ -1425,11 +1529,14 @@ NEVER save workspace-specific context, local paths, or commands (e.g. "The entry
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: web_fetch 1`] = `
{
"declaration": {
"description": "Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.",
"name": "web_fetch",
"parametersJsonSchema": {
@@ -1444,11 +1551,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: write_file 1`] = `
{
"declaration": {
"description": "Writes content to a specified file in the local filesystem.
The user has the ability to modify \`content\`. If modified, this will be stated in the response.",
@@ -1470,11 +1580,14 @@ exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview >
],
"type": "object",
},
},
"instructions": undefined,
}
`;
exports[`coreTools snapshots for specific models > Model: gemini-3-pro-preview > snapshot for tool: write_todos 1`] = `
{
"declaration": {
"description": "This tool can help you list out the current subtasks that are required to be completed for a given user request. The list of subtasks helps you keep track of the current task, organize complex queries and help ensure that you don't miss any steps. With this list, the user can also see the current progress you are making in executing a given task.
Depending on the task complexity, you should first divide a given task into subtasks and then use this tool to list out the subtasks that are required to be completed for a given user request.
@@ -1521,23 +1634,6 @@ Agent:
The agent did not use the todo list because this task could be completed by a tight loop of execute test->edit->execute test.
</reasoning>
</example>",
"instructions": "
## Task state definitions
- pending: Work has not begun on a given subtask.
- in_progress: Marked just prior to beginning work on a given subtask. You should only have one subtask as in_progress at a time.
- completed: Subtask was successfully completed with no errors or issues. If the subtask required more steps to complete, update the todo list with the subtasks. All steps should be identified as completed only when they are completed.
- cancelled: As you update the todo list, some tasks are not required anymore due to the dynamic nature of the task. In this case, mark the subtasks as cancelled.
## Methodology for using this tool
1. Use this todo list as soon as you receive a user request based on the complexity of the task.
2. Keep track of every subtask that you update the list with.
3. Mark a subtask as in_progress before you begin working on it. You should only have one subtask as in_progress at a time.
4. Update the subtask list as you proceed in executing the task. The subtask list is not static and should reflect your progress and current plans, which may evolve as you acquire new information.
5. Mark a subtask as completed when you have completed it.
6. Mark a subtask as cancelled if the subtask is no longer needed.
7. You must update the todo list as soon as you start, stop or cancel a subtask. Don't batch or wait to update the todo list.",
"name": "write_todos",
"parametersJsonSchema": {
"additionalProperties": false,
@@ -1577,5 +1673,23 @@ The agent did not use the todo list because this task could be completed by a ti
],
"type": "object",
},
},
"instructions": "
## Task state definitions
- pending: Work has not begun on a given subtask.
- in_progress: Marked just prior to beginning work on a given subtask. You should only have one subtask as in_progress at a time.
- completed: Subtask was successfully completed with no errors or issues. If the subtask required more steps to complete, update the todo list with the subtasks. All steps should be identified as completed only when they are completed.
- cancelled: As you update the todo list, some tasks are not required anymore due to the dynamic nature of the task. In this case, mark the subtasks as cancelled.
## Methodology for using this tool
1. Use this todo list as soon as you receive a user request based on the complexity of the task.
2. Keep track of every subtask that you update the list with.
3. Mark a subtask as in_progress before you begin working on it. You should only have one subtask as in_progress at a time.
4. Update the subtask list as you proceed in executing the task. The subtask list is not static and should reflect your progress and current plans, which may evolve as you acquire new information.
5. Mark a subtask as completed when you have completed it.
6. Mark a subtask as cancelled if the subtask is no longer needed.
7. You must update the todo list as soon as you start, stop or cancel a subtask. Don't batch or wait to update the todo list.",
}
`;
@@ -12,23 +12,38 @@ import type { ToolDefinition } from './types.js';
*
* @param definition The tool definition containing the base declaration and optional overrides.
* @param modelId Optional model identifier to apply specific overrides.
* @returns The FunctionDeclaration to be sent to the API.
* @returns An object containing the FunctionDeclaration for the API and optional instructions for the system prompt.
*/
export function resolveToolDeclaration(
definition: ToolDefinition,
modelId?: string,
): FunctionDeclaration & { instructions?: string } {
): { declaration: FunctionDeclaration; instructions?: string } {
const { instructions: baseInstructions, ...baseDeclaration } =
definition.base;
if (!modelId || !definition.overrides) {
return definition.base;
return {
declaration: baseDeclaration,
instructions: baseInstructions,
};
}
const override = definition.overrides(modelId);
if (!override) {
return definition.base;
return {
declaration: baseDeclaration,
instructions: baseInstructions,
};
}
const { instructions: overrideInstructions, ...overrideDeclaration } =
override;
return {
...definition.base,
...override,
declaration: {
...baseDeclaration,
...overrideDeclaration,
},
instructions: overrideInstructions ?? baseInstructions,
};
}
+8 -5
View File
@@ -914,19 +914,22 @@ export class EditTool
typeof config.getActiveModel === 'function'
? config.getActiveModel()
: undefined;
const resolved = resolveToolDeclaration(EDIT_DEFINITION, modelId);
const { declaration, instructions } = resolveToolDeclaration(
EDIT_DEFINITION,
modelId,
);
super(
EditTool.Name,
EDIT_DISPLAY_NAME,
resolved.description!,
declaration.description!,
Kind.Edit,
resolved.parametersJsonSchema,
declaration.parametersJsonSchema,
messageBus,
true, // isOutputMarkdown
false, // canUpdateOutput
undefined, // extensionName
undefined, // extensionId
resolved.instructions,
instructions,
);
}
@@ -970,7 +973,7 @@ export class EditTool
}
override getSchema(modelId?: string) {
return resolveToolDeclaration(EDIT_DEFINITION, modelId);
return resolveToolDeclaration(EDIT_DEFINITION, modelId).declaration;
}
getModifyContext(_: AbortSignal): ModifyContext<EditToolParams> {
+1 -1
View File
@@ -243,6 +243,6 @@ export class ReadFileTool extends BaseDeclarativeTool<
}
override getSchema(modelId?: string) {
return resolveToolDeclaration(READ_FILE_DEFINITION, modelId);
return resolveToolDeclaration(READ_FILE_DEFINITION, modelId).declaration;
}
}
+2 -1
View File
@@ -493,6 +493,7 @@ export class ReadManyFilesTool extends BaseDeclarativeTool<
}
override getSchema(modelId?: string) {
return resolveToolDeclaration(READ_MANY_FILES_DEFINITION, modelId);
return resolveToolDeclaration(READ_MANY_FILES_DEFINITION, modelId)
.declaration;
}
}
+8 -5
View File
@@ -474,19 +474,22 @@ export class ShellTool extends BaseDeclarativeTool<
config.getEnableInteractiveShell(),
config.getEnableShellOutputEfficiency(),
);
const resolved = resolveToolDeclaration(definition, modelId);
const { declaration, instructions } = resolveToolDeclaration(
definition,
modelId,
);
super(
ShellTool.Name,
'Shell',
resolved.description!,
declaration.description!,
Kind.Execute,
resolved.parametersJsonSchema,
declaration.parametersJsonSchema,
messageBus,
false, // output is not markdown
true, // output can be updated
undefined, // extensionName
undefined, // extensionId
resolved.instructions,
instructions,
);
}
@@ -527,6 +530,6 @@ export class ShellTool extends BaseDeclarativeTool<
this.config.getEnableInteractiveShell(),
this.config.getEnableShellOutputEfficiency(),
);
return resolveToolDeclaration(definition, modelId);
return resolveToolDeclaration(definition, modelId).declaration;
}
}
+2 -1
View File
@@ -7,9 +7,10 @@
import { describe, expect, it } from 'vitest';
import { WriteTodosTool, type WriteTodosToolParams } from './write-todos.js';
import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
import { makeFakeConfig } from '../test-utils/config.js';
describe('WriteTodosTool', () => {
const tool = new WriteTodosTool(createMockMessageBus());
const tool = new WriteTodosTool(makeFakeConfig(), createMockMessageBus());
const signal = new AbortController().signal;
describe('validation', () => {
+14 -5
View File
@@ -90,24 +90,33 @@ export class WriteTodosTool extends BaseDeclarativeTool<
typeof config.getActiveModel === 'function'
? config.getActiveModel()
: undefined;
const resolved = resolveToolDeclaration(WRITE_TODOS_DEFINITION, modelId);
const { declaration, instructions } = resolveToolDeclaration(
WRITE_TODOS_DEFINITION,
modelId,
);
super(
WriteTodosTool.Name,
'WriteTodos',
resolved.description!,
declaration.description!,
Kind.Other,
resolved.parametersJsonSchema,
declaration.parametersJsonSchema,
messageBus,
true, // isOutputMarkdown
false, // canUpdateOutput
undefined, // extensionName
undefined, // extensionId
resolved.instructions,
instructions,
);
}
override getSchema(modelId?: string) {
return resolveToolDeclaration(WRITE_TODOS_DEFINITION, modelId);
const activeModel =
modelId ??
(typeof this.config.getActiveModel === 'function'
? this.config.getActiveModel()
: undefined);
return resolveToolDeclaration(WRITE_TODOS_DEFINITION, activeModel)
.declaration;
}
protected override validateToolParamValues(