diff --git a/.gemini/settings.json b/.gemini/settings.json index 9051dc78de..eb7741997b 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -1,9 +1,9 @@ { "experimental": { - "plan": true, "extensionReloading": true, "modelSteering": true, - "memoryManager": true + "memoryManager": false, + "topicUpdateNarration": true }, "general": { "devtools": true diff --git a/.github/actions/publish-release/action.yml b/.github/actions/publish-release/action.yml index a7df2039d5..4d33edffee 100644 --- a/.github/actions/publish-release/action.yml +++ b/.github/actions/publish-release/action.yml @@ -175,7 +175,9 @@ runs: --dry-run="${INPUTS_DRY_RUN}" \ --workspace="${INPUTS_CORE_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${INPUTS_CORE_PACKAGE_NAME} false + if [[ "${INPUTS_DRY_RUN}" == "false" ]]; then + npm dist-tag rm ${INPUTS_CORE_PACKAGE_NAME} false + fi - name: '🔗 Install latest core package' working-directory: '${{ inputs.working-directory }}' @@ -193,7 +195,7 @@ runs: INPUTS_A2A_PACKAGE_NAME: '${{ inputs.a2a-package-name }}' - name: '📦 Prepare bundled CLI for npm release' - if: "inputs.npm-registry-url != 'https://npm.pkg.github.com/' && inputs.npm-tag != 'latest'" + if: "inputs.npm-registry-url != 'https://npm.pkg.github.com/'" working-directory: '${{ inputs.working-directory }}' shell: 'bash' run: | @@ -248,7 +250,9 @@ runs: --dry-run="${INPUTS_DRY_RUN}" \ --workspace="${INPUTS_A2A_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${INPUTS_A2A_PACKAGE_NAME} false + if [[ "${INPUTS_DRY_RUN}" == "false" ]]; then + npm dist-tag rm ${INPUTS_A2A_PACKAGE_NAME} false + fi - name: '🔬 Verify NPM release by version' uses: './.github/actions/verify-release' @@ -287,8 +291,25 @@ runs: INPUTS_PREVIOUS_TAG: '${{ inputs.previous-tag }}' shell: 'bash' run: | + rm -f gemini-cli-bundle.zip + (cd bundle && chmod +x gemini.js && zip -r ../gemini-cli-bundle.zip .) + + echo "Testing the generated bundle archive..." + rm -rf test-bundle + mkdir -p test-bundle + unzip -q gemini-cli-bundle.zip -d test-bundle + + # Verify it runs and outputs a version + BUNDLE_VERSION=$(node test-bundle/gemini.js --version | xargs) + echo "Bundle version output: ${BUNDLE_VERSION}" + if [[ -z "${BUNDLE_VERSION}" ]]; then + echo "Error: Bundle failed to execute or return version." + exit 1 + fi + rm -rf test-bundle + gh release create "${INPUTS_RELEASE_TAG}" \ - bundle/gemini.js \ + gemini-cli-bundle.zip \ --target "${STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME}" \ --title "Release ${INPUTS_RELEASE_TAG}" \ --notes-start-tag "${INPUTS_PREVIOUS_TAG}" \ diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index 2f5c0de140..42fd78d7e9 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -18,6 +18,13 @@ runs: env: JSON_INPUTS: '${{ toJSON(inputs) }}' run: 'echo "$JSON_INPUTS"' + - name: 'Install system dependencies' + if: "runner.os == 'Linux'" + run: | + sudo apt-get update -qq && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq bubblewrap + # Ubuntu 24.04+ requires this to allow bwrap to function in CI + sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true + shell: 'bash' - name: 'Run Tests' env: GEMINI_API_KEY: '${{ inputs.gemini_api_key }}' diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9b3e18d6af..ccc2ad70ce 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -346,9 +346,11 @@ npm run lint - Please adhere to the coding style, patterns, and conventions used throughout the existing codebase. -- Consult [GEMINI.md](../GEMINI.md) (typically found in the project root) for - specific instructions related to AI-assisted development, including - conventions for React, comments, and Git usage. +- Consult + [GEMINI.md](https://github.com/google-gemini/gemini-cli/blob/main/GEMINI.md) + (typically found in the project root) for specific instructions related to + AI-assisted development, including conventions for React, comments, and Git + usage. - **Imports:** Pay special attention to import paths. The project uses ESLint to enforce restrictions on relative imports between packages. @@ -505,8 +507,9 @@ code. ### Documentation structure -Our documentation is organized using [sidebar.json](/docs/sidebar.json) as the -table of contents. When adding new documentation: +Our documentation is organized using +[sidebar.json](https://github.com/google-gemini/gemini-cli/blob/main/docs/sidebar.json) +as the table of contents. When adding new documentation: 1. Create your markdown file **in the appropriate directory** under `/docs`. 2. Add an entry to `sidebar.json` in the relevant section. diff --git a/README.md b/README.md index 03a7be1296..10458b2126 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Learn all about Gemini CLI in our [documentation](https://geminicli.com/docs/). ## 📦 Installation See -[Gemini CLI installation, execution, and releases](./docs/get-started/installation.md) +[Gemini CLI installation, execution, and releases](https://www.geminicli.com/docs/get-started/installation) for recommended system specifications and a detailed installation guide. ### Quick Install @@ -71,9 +71,9 @@ conda activate gemini_env npm install -g @google/gemini-cli ``` -## Release Cadence and Tags +## Release Channels -See [Releases](./docs/releases.md) for more details. +See [Releases](https://www.geminicli.com/docs/changelogs) for more details. ### Preview @@ -209,7 +209,7 @@ gemini ``` For Google Workspace accounts and other authentication methods, see the -[authentication guide](./docs/get-started/authentication.md). +[authentication guide](https://www.geminicli.com/docs/get-started/authentication). ## 🚀 Getting Started @@ -278,59 +278,64 @@ gemini ### Getting Started -- [**Quickstart Guide**](./docs/get-started/index.md) - Get up and running - quickly. -- [**Authentication Setup**](./docs/get-started/authentication.md) - Detailed - auth configuration. -- [**Configuration Guide**](./docs/reference/configuration.md) - Settings and - customization. -- [**Keyboard Shortcuts**](./docs/reference/keyboard-shortcuts.md) - +- [**Quickstart Guide**](https://www.geminicli.com/docs/get-started) - Get up + and running quickly. +- [**Authentication Setup**](https://www.geminicli.com/docs/get-started/authentication) - + Detailed auth configuration. +- [**Configuration Guide**](https://www.geminicli.com/docs/reference/configuration) - + Settings and customization. +- [**Keyboard Shortcuts**](https://www.geminicli.com/docs/reference/keyboard-shortcuts) - Productivity tips. ### Core Features -- [**Commands Reference**](./docs/reference/commands.md) - All slash commands - (`/help`, `/chat`, etc). -- [**Custom Commands**](./docs/cli/custom-commands.md) - Create your own - reusable commands. -- [**Context Files (GEMINI.md)**](./docs/cli/gemini-md.md) - Provide persistent - context to Gemini CLI. -- [**Checkpointing**](./docs/cli/checkpointing.md) - Save and resume - conversations. -- [**Token Caching**](./docs/cli/token-caching.md) - Optimize token usage. +- [**Commands Reference**](https://www.geminicli.com/docs/reference/commands) - + All slash commands (`/help`, `/chat`, etc). +- [**Custom Commands**](https://www.geminicli.com/docs/cli/custom-commands) - + Create your own reusable commands. +- [**Context Files (GEMINI.md)**](https://www.geminicli.com/docs/cli/gemini-md) - + Provide persistent context to Gemini CLI. +- [**Checkpointing**](https://www.geminicli.com/docs/cli/checkpointing) - Save + and resume conversations. +- [**Token Caching**](https://www.geminicli.com/docs/cli/token-caching) - + Optimize token usage. ### Tools & Extensions -- [**Built-in Tools Overview**](./docs/reference/tools.md) - - [File System Operations](./docs/tools/file-system.md) - - [Shell Commands](./docs/tools/shell.md) - - [Web Fetch & Search](./docs/tools/web-fetch.md) -- [**MCP Server Integration**](./docs/tools/mcp-server.md) - Extend with custom - tools. -- [**Custom Extensions**](./docs/extensions/index.md) - Build and share your own - commands. +- [**Built-in Tools Overview**](https://www.geminicli.com/docs/reference/tools) + - [File System Operations](https://www.geminicli.com/docs/tools/file-system) + - [Shell Commands](https://www.geminicli.com/docs/tools/shell) + - [Web Fetch & Search](https://www.geminicli.com/docs/tools/web-fetch) +- [**MCP Server Integration**](https://www.geminicli.com/docs/tools/mcp-server) - + Extend with custom tools. +- [**Custom Extensions**](https://geminicli.com/docs/extensions/writing-extensions) - + Build and share your own commands. ### Advanced Topics -- [**Headless Mode (Scripting)**](./docs/cli/headless.md) - Use Gemini CLI in - automated workflows. -- [**IDE Integration**](./docs/ide-integration/index.md) - VS Code companion. -- [**Sandboxing & Security**](./docs/cli/sandbox.md) - Safe execution - environments. -- [**Trusted Folders**](./docs/cli/trusted-folders.md) - Control execution - policies by folder. -- [**Enterprise Guide**](./docs/cli/enterprise.md) - Deploy and manage in a - corporate environment. -- [**Telemetry & Monitoring**](./docs/cli/telemetry.md) - Usage tracking. -- [**Tools reference**](./docs/reference/tools.md) - Built-in tools overview. -- [**Local development**](./docs/local-development.md) - Local development - tooling. +- [**Headless Mode (Scripting)**](https://www.geminicli.com/docs/cli/headless) - + Use Gemini CLI in automated workflows. +- [**IDE Integration**](https://www.geminicli.com/docs/ide-integration) - VS + Code companion. +- [**Sandboxing & Security**](https://www.geminicli.com/docs/cli/sandbox) - Safe + execution environments. +- [**Trusted Folders**](https://www.geminicli.com/docs/cli/trusted-folders) - + Control execution policies by folder. +- [**Enterprise Guide**](https://www.geminicli.com/docs/cli/enterprise) - Deploy + and manage in a corporate environment. +- [**Telemetry & Monitoring**](https://www.geminicli.com/docs/cli/telemetry) - + Usage tracking. +- [**Tools reference**](https://www.geminicli.com/docs/reference/tools) - + Built-in tools overview. +- [**Local development**](https://www.geminicli.com/docs/local-development) - + Local development tooling. ### Troubleshooting & Support -- [**Troubleshooting Guide**](./docs/resources/troubleshooting.md) - Common - issues and solutions. -- [**FAQ**](./docs/resources/faq.md) - Frequently asked questions. +- [**Troubleshooting Guide**](https://www.geminicli.com/docs/resources/troubleshooting) - + Common issues and solutions. +- [**FAQ**](https://www.geminicli.com/docs/resources/faq) - Frequently asked + questions. - Use `/bug` command to report issues directly from the CLI. ### Using MCP Servers @@ -344,8 +349,9 @@ custom tools: > @database Run a query to find inactive users ``` -See the [MCP Server Integration guide](./docs/tools/mcp-server.md) for setup -instructions. +See the +[MCP Server Integration guide](https://www.geminicli.com/docs/tools/mcp-server) +for setup instructions. ## 🤝 Contributing @@ -366,7 +372,8 @@ for planned features and priorities. ## 📖 Resources - **[Official Roadmap](./ROADMAP.md)** - See what's coming next. -- **[Changelog](./docs/changelogs/index.md)** - See recent notable updates. +- **[Changelog](https://www.geminicli.com/docs/changelogs)** - See recent + notable updates. - **[NPM Package](https://www.npmjs.com/package/@google/gemini-cli)** - Package registry. - **[GitHub Issues](https://github.com/google-gemini/gemini-cli/issues)** - @@ -376,13 +383,14 @@ for planned features and priorities. ### Uninstall -See the [Uninstall Guide](./docs/resources/uninstall.md) for removal -instructions. +See the [Uninstall Guide](https://www.geminicli.com/docs/resources/uninstall) +for removal instructions. ## 📄 Legal - **License**: [Apache License 2.0](LICENSE) -- **Terms of Service**: [Terms & Privacy](./docs/resources/tos-privacy.md) +- **Terms of Service**: + [Terms & Privacy](https://www.geminicli.com/docs/resources/tos-privacy) - **Security**: [Security Policy](SECURITY.md) --- diff --git a/docs/assets/theme-tokyonight-dark.png b/docs/assets/theme-tokyonight-dark.png new file mode 100644 index 0000000000..ebeec93548 Binary files /dev/null and b/docs/assets/theme-tokyonight-dark.png differ diff --git a/docs/changelogs/latest.md b/docs/changelogs/latest.md index 6df33c78d6..3d3cf07f7a 100644 --- a/docs/changelogs/latest.md +++ b/docs/changelogs/latest.md @@ -1,6 +1,6 @@ -# Latest stable release: v0.35.2 +# Latest stable release: v0.35.3 -Released: March 26, 2026 +Released: March 28, 2026 For most users, our latest stable release is the recommended release. Install the latest stable version with: @@ -29,6 +29,9 @@ npm install -g @google/gemini-cli ## What's Changed +- fix(patch): cherry-pick 765fb67 to release/v0.35.2-pr-24055 [CONFLICTS] by + @gemini-cli-robot in + [#24063](https://github.com/google-gemini/gemini-cli/pull/24063) - fix(core): allow disabling environment variable redaction by @galz10 in [#23927](https://github.com/google-gemini/gemini-cli/pull/23927) - fix(a2a-server): A2A server should execute ask policies in interactive mode by @@ -385,4 +388,4 @@ npm install -g @google/gemini-cli [#23585](https://github.com/google-gemini/gemini-cli/pull/23585) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.34.0...v0.35.2 +https://github.com/google-gemini/gemini-cli/compare/v0.34.0...v0.35.3 diff --git a/docs/changelogs/preview.md b/docs/changelogs/preview.md index 6c31a64679..e2ec2c41c0 100644 --- a/docs/changelogs/preview.md +++ b/docs/changelogs/preview.md @@ -1,6 +1,6 @@ -# Preview release: v0.36.0-preview.5 +# Preview release: v0.36.0-preview.8 -Released: March 27, 2026 +Released: April 01, 2026 Our preview release includes the latest, new, and experimental features. This release may not be as stable as our [latest weekly release](latest.md). @@ -31,6 +31,10 @@ npm install -g @google/gemini-cli@preview ## What's Changed +- fix(patch): cherry-pick 765fb67 to release/v0.36.0-preview.5-pr-24055 to patch + version v0.36.0-preview.5 and create version 0.36.0-preview.6 by + @gemini-cli-robot in + [#24061](https://github.com/google-gemini/gemini-cli/pull/24061) - fix(a2a-server): A2A server should execute ask policies in interactive mode by @kschaab in [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) - docs(core): document agent_card_json string literal options for remote agents @@ -386,4 +390,4 @@ npm install -g @google/gemini-cli@preview [#23666](https://github.com/google-gemini/gemini-cli/pull/23666) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.35.0-preview.5...v0.36.0-preview.5 +https://github.com/google-gemini/gemini-cli/compare/v0.35.0-preview.5...v0.36.0-preview.8 diff --git a/docs/cli/cli-reference.md b/docs/cli/cli-reference.md index bc8f8b44ce..39d98f60e9 100644 --- a/docs/cli/cli-reference.md +++ b/docs/cli/cli-reference.md @@ -52,7 +52,7 @@ These commands are available within the interactive REPL. | `--prompt-interactive` | `-i` | string | - | Execute prompt and continue in interactive mode | | `--worktree` | `-w` | string | - | Start Gemini in a new git worktree. If no name is provided, one is generated automatically. Requires `experimental.worktrees: true` in settings. | | `--sandbox` | `-s` | boolean | `false` | Run in a sandboxed environment for safer execution | -| `--approval-mode` | - | string | `default` | Approval mode for tool execution. Choices: `default`, `auto_edit`, `yolo` | +| `--approval-mode` | - | string | `default` | Approval mode for tool execution. Choices: `default`, `auto_edit`, `yolo`, `plan` | | `--yolo` | `-y` | boolean | `false` | **Deprecated.** Auto-approve all actions. Use `--approval-mode=yolo` instead. | | `--experimental-acp` | - | boolean | - | Start in ACP (Agent Code Pilot) mode. **Experimental feature.** | | `--experimental-zed-integration` | - | boolean | - | Run in Zed editor integration mode. **Experimental feature.** | diff --git a/docs/cli/plan-mode.md b/docs/cli/plan-mode.md index ad87bc591b..d60d5e6f6f 100644 --- a/docs/cli/plan-mode.md +++ b/docs/cli/plan-mode.md @@ -56,19 +56,21 @@ Gemini CLI takes action. 1. **Provide a goal:** Start by describing what you want to achieve. Gemini CLI will then enter Plan Mode (if it's not already) to research the task. -2. **Review research and provide input:** As Gemini CLI analyzes your codebase, - it may ask you questions or present different implementation options using - [`ask_user`](../tools/ask-user.md). Provide your preferences to help guide - the design. -3. **Review the plan:** Once Gemini CLI has a proposed strategy, it creates a - detailed implementation plan as a Markdown file in your plans directory. +2. **Discuss and agree on strategy:** As Gemini CLI analyzes your codebase, it + will discuss its findings and proposed strategy with you to ensure + alignment. It may ask you questions or present different implementation + options using [`ask_user`](../tools/ask-user.md). **Gemini CLI will stop and + wait for your confirmation** before drafting the formal plan. You should + reach an informal agreement on the approach before proceeding. +3. **Review the plan:** Once you've agreed on the strategy, Gemini CLI creates + a detailed implementation plan as a Markdown file in your plans directory. - **View:** You can open and read this file to understand the proposed changes. - **Edit:** Press `Ctrl+X` to open the plan directly in your configured external editor. 4. **Approve or iterate:** Gemini CLI will present the finalized plan for your - approval. + formal approval. - **Approve:** If you're satisfied with the plan, approve it to start the implementation immediately: **Yes, automatically accept edits** or **Yes, manually accept edits**. @@ -121,6 +123,7 @@ These are the only allowed tools: [`glob`](../tools/file-system.md#4-glob-findfiles) - **Search:** [`grep_search`](../tools/file-system.md#5-grep_search-searchtext), [`google_web_search`](../tools/web-search.md), + [`web_fetch`](../tools/web-fetch.md) (requires explicit confirmation), [`get_internal_docs`](../tools/internal-docs.md) - **Research Subagents:** [`codebase_investigator`](../core/subagents.md#codebase-investigator), diff --git a/docs/cli/settings.md b/docs/cli/settings.md index ac1fdc98fc..0f01558d2e 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -30,6 +30,7 @@ they appear in the UI. | Default Approval Mode | `general.defaultApprovalMode` | The default approval mode for tool execution. 'default' prompts for approval, 'auto_edit' auto-approves edit tools, and 'plan' is read-only mode. YOLO mode (auto-approve all actions) can only be enabled via command line (--yolo or --approval-mode=yolo). | `"default"` | | Enable Auto Update | `general.enableAutoUpdate` | Enable automatic updates. | `true` | | Enable Notifications | `general.enableNotifications` | Enable run-event notifications for action-required prompts and session completion. | `false` | +| Enable Plan Mode | `general.plan.enabled` | Enable Plan Mode for read-only safety during planning. | `true` | | Plan Directory | `general.plan.directory` | The directory where planning artifacts are stored. If not specified, defaults to the system temporary directory. A custom directory requires a policy to allow write access in Plan Mode. | `undefined` | | Plan Model Routing | `general.plan.modelRouting` | Automatically switch between Pro and Flash models based on Plan Mode status. Uses Pro for the planning phase and Flash for the implementation phase. | `true` | | Retry Fetch Errors | `general.retryFetchErrors` | Retry on "exception TypeError: fetch failed sending request" errors. | `true` | @@ -59,6 +60,7 @@ they appear in the UI. | Hide Tips | `ui.hideTips` | Hide helpful tips in the UI | `false` | | Escape Pasted @ Symbols | `ui.escapePastedAtSymbols` | When enabled, @ symbols in pasted text are escaped to prevent unintended @path expansion. | `false` | | Show Shortcuts Hint | `ui.showShortcutsHint` | Show the "? for shortcuts" hint above the input. | `true` | +| Compact Tool Output | `ui.compactToolOutput` | Display tool outputs (like directory listings and file reads) in a compact, structured format. | `false` | | Hide Banner | `ui.hideBanner` | Hide the application banner | `false` | | Hide Context Summary | `ui.hideContextSummary` | Hide the context summary (GEMINI.md, MCP servers) above the input. | `false` | | Hide CWD | `ui.footer.hideCWD` | Hide the current working directory in the footer. | `false` | @@ -155,21 +157,16 @@ they appear in the UI. ### Experimental -| UI Label | Setting | Description | Default | -| ---------------------------------- | ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | -| Enable Tool Output Masking | `experimental.toolOutputMasking.enabled` | Enables tool output masking to save tokens. | `true` | -| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | -| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Plan | `experimental.plan` | Enable Plan Mode. | `true` | -| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | -| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | -| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | -| Agent History Truncation | `experimental.agentHistoryTruncation` | Enable truncation window logic for the Agent History Provider. | `false` | -| Agent History Truncation Threshold | `experimental.agentHistoryTruncationThreshold` | The maximum number of messages before history is truncated. | `30` | -| Agent History Retained Messages | `experimental.agentHistoryRetainedMessages` | The number of recent messages to retain after truncation. | `15` | -| Agent History Summarization | `experimental.agentHistorySummarization` | Enable summarization of truncated content via a small model for the Agent History Provider. | `false` | -| Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | +| UI Label | Setting | Description | Default | +| ------------------------- | ----------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | +| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | +| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | +| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` | +| Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | ### Skills diff --git a/docs/cli/themes.md b/docs/cli/themes.md index 55acc75625..93912032c0 100644 --- a/docs/cli/themes.md +++ b/docs/cli/themes.md @@ -19,6 +19,7 @@ using the `/theme` command within Gemini CLI: - `Holiday` - `Shades Of Purple` - `Solarized Dark` + - `Tokyo Night` - **Light themes:** - `ANSI Light` - `Ayu Light` @@ -252,6 +253,10 @@ identify their source, for example: `shades-of-green (green-extension)`. Solarized Dark theme +### Tokyo Night + +Tokyo Night theme + ## Light themes ### ANSI Light diff --git a/docs/core/index.md b/docs/core/index.md index afa13787b8..ae5a6794fe 100644 --- a/docs/core/index.md +++ b/docs/core/index.md @@ -7,8 +7,8 @@ requests sent from `packages/cli`. For a general overview of Gemini CLI, see the ## Navigating this section -- **[Sub-agents (experimental)](./subagents.md):** Learn how to create and use - specialized sub-agents for complex tasks. +- **[Sub-agents](./subagents.md):** Learn how to create and use specialized + sub-agents for complex tasks. - **[Core tools reference](../reference/tools.md):** Information on how tools are defined, registered, and used by the core. - **[Memory Import Processor](../reference/memport.md):** Documentation for the diff --git a/docs/core/subagents.md b/docs/core/subagents.md index b0cffca3b5..a789e0f741 100644 --- a/docs/core/subagents.md +++ b/docs/core/subagents.md @@ -1,20 +1,16 @@ -# Subagents (experimental) +# Subagents Subagents are specialized agents that operate within your main Gemini CLI session. They are designed to handle specific, complex tasks—like deep codebase analysis, documentation lookup, or domain-specific reasoning—without cluttering the main agent's context or toolset. - -> [!NOTE] -> Subagents are currently an experimental feature. -> -To use custom subagents, you must ensure they are enabled in your -`settings.json` (enabled by default): +Subagents are enabled by default. To disable them, set `enableAgents` to `false` +in your `settings.json`: ```json { - "experimental": { "enableAgents": true } + "experimental": { "enableAgents": false } } ``` @@ -226,19 +222,65 @@ the `click_at` tool for precise, coordinate-based interactions. > The visual agent requires API key or Vertex AI authentication. It is > not available when using "Sign in with Google". +#### Sandbox support + +The browser agent adjusts its behavior automatically when running inside a +sandbox. + +##### macOS seatbelt (`sandbox-exec`) + +When the CLI runs under the macOS seatbelt sandbox, `persistent` and `isolated` +session modes are forced to `isolated` with `headless` enabled. This avoids +permission errors caused by seatbelt file-system restrictions on persistent +browser profiles. If `sessionMode` is set to `existing`, no override is applied. + +##### Container sandboxes (Docker / Podman) + +Chrome is not available inside the container, so the browser agent is +**disabled** unless `sessionMode` is set to `"existing"`. When enabled with +`existing` mode, the agent automatically connects to Chrome on the host via the +resolved IP of `host.docker.internal:9222` instead of using local pipe +discovery. Port `9222` is currently hardcoded and cannot be customized. + +To use the browser agent in a Docker sandbox: + +1. Start Chrome on the host with remote debugging enabled: + + ```bash + # Option A: Launch Chrome from the command line + google-chrome --remote-debugging-port=9222 + + # Option B: Enable in Chrome settings + # Navigate to chrome://inspect/#remote-debugging and enable + ``` + +2. Configure `sessionMode` and allowed domains in your project's + `.gemini/settings.json`: + + ```json + { + "agents": { + "overrides": { + "browser_agent": { "enabled": true } + }, + "browser": { + "sessionMode": "existing", + "allowedDomains": ["example.com"] + } + } + } + ``` + +3. Launch the CLI with port forwarding: + + ```bash + GEMINI_SANDBOX=docker SANDBOX_PORTS=9222 gemini + ``` + ## Creating custom subagents You can create your own subagents to automate specific workflows or enforce -specific personas. To use custom subagents, you must enable them in your -`settings.json`: - -```json -{ - "experimental": { - "enableAgents": true - } -} -``` +specific personas. ### Agent definition files @@ -290,6 +332,7 @@ it yourself; just report it. | `description` | string | Yes | Short description of what the agent does. This is visible to the main agent to help it decide when to call this subagent. | | `kind` | string | No | `local` (default) or `remote`. | | `tools` | array | No | List of tool names this agent can use. Supports wildcards: `*` (all tools), `mcp_*` (all MCP tools), `mcp_server_*` (all tools from a server). **If omitted, it inherits all tools from the parent session.** | +| `mcpServers` | object | No | Configuration for inline Model Context Protocol (MCP) servers isolated to this specific agent. | | `model` | string | No | Specific model to use (e.g., `gemini-3-preview`). Defaults to `inherit` (uses the main session model). | | `temperature` | number | No | Model temperature (0.0 - 2.0). Defaults to `1`. | | `max_turns` | number | No | Maximum number of conversation turns allowed for this agent before it must return. Defaults to `30`. | @@ -317,6 +360,78 @@ Each subagent runs in its own isolated context loop. This means: subagents **cannot** call other subagents. If a subagent is granted the `*` tool wildcard, it will still be unable to see or invoke other agents. +## Subagent tool isolation + +Subagent tool isolation moves Gemini CLI away from a single global tool +registry. By providing isolated execution environments, you can ensure that +subagents only interact with the parts of the system they are designed for. This +prevents unintended side effects, improves reliability by avoiding state +contamination, and enables fine-grained permission control. + +With this feature, you can: + +- **Specify tool access:** Define exactly which tools an agent can access using + a `tools` list in the agent definition. +- **Define inline MCP servers:** Configure Model Context Protocol (MCP) servers + (which provide a standardized way to connect AI models to external tools and + data sources) directly in the subagent's markdown frontmatter, isolating them + to that specific agent. +- **Maintain state isolation:** Ensure that subagents only interact with their + own set of tools and servers, preventing side effects and state contamination. +- **Apply subagent-specific policies:** Enforce granular rules in your + [Policy Engine](../reference/policy-engine.md) TOML configuration based on the + executing subagent's name. + +### Configuring isolated tools and servers + +You can configure tool isolation for a subagent by updating its markdown +frontmatter. This allows you to explicitly state which tools the subagent can +use, rather than relying on the global registry. + +Add an `mcpServers` object to define inline MCP servers that are unique to the +agent. + +**Example:** + +```yaml +--- +name: my-isolated-agent +tools: + - grep_search + - read_file +mcpServers: + my-custom-server: + command: 'node' + args: ['path/to/server.js'] +--- +``` + +### Subagent-specific policies + +You can enforce fine-grained control over subagents using the +[Policy Engine's](../reference/policy-engine.md) TOML configuration. This allows +you to grant or restrict permissions specifically for an agent, without +affecting the rest of your CLI session. + +To restrict a policy rule to a specific subagent, add the `subagent` property to +the `[[rules]]` block in your `policy.toml` file. + +**Example:** + +```toml +[[rules]] +name = "Allow pr-creator to push code" +subagent = "pr-creator" +description = "Permit pr-creator to push branches automatically." +action = "allow" +toolName = "run_shell_command" +commandPrefix = "git push" +``` + +In this configuration, the policy rule only triggers if the executing subagent's +name matches `pr-creator`. Rules without the `subagent` property apply +universally to all agents. + ## Managing subagents You can manage subagents interactively using the `/agents` command or @@ -406,15 +521,11 @@ If you need to further tune your subagent, you can do so by selecting the model to optimize for with `/model` and then asking the model why it does not think that your subagent was called with a specific prompt and the given description. -## Remote subagents (Agent2Agent) (experimental) +## Remote subagents (Agent2Agent) Gemini CLI can also delegate tasks to remote subagents using the Agent-to-Agent (A2A) protocol. - -> [!NOTE] -> Remote subagents are currently an experimental feature. - See the [Remote Subagents documentation](remote-agents) for detailed configuration, authentication, and usage instructions. diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 4dd7e367e5..67690f6ba2 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -17,8 +17,6 @@ Slash commands provide meta-level control over the CLI itself. ### `/agents` - **Description:** Manage local and remote subagents. -- **Note:** This command is experimental and requires - `experimental.enableAgents: true` in your `settings.json`. - **Sub-commands:** - **`list`**: - **Description:** Lists all discovered agents, including built-in, local, @@ -305,7 +303,7 @@ Slash commands provide meta-level control over the CLI itself. - **Description:** Switch to Plan Mode (read-only) and view the current plan if one has been generated. - **Note:** This feature is enabled by default. It can be disabled via the - `experimental.plan` setting in your configuration. + `general.plan.enabled` setting in your configuration. - **Sub-commands:** - **`copy`**: - **Description:** Copy the currently approved plan to your clipboard. diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index acfb272754..0804fcc463 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -141,6 +141,11 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`general.plan.enabled`** (boolean): + - **Description:** Enable Plan Mode for read-only safety during planning. + - **Default:** `true` + - **Requires restart:** Yes + - **`general.plan.directory`** (string): - **Description:** The directory where planning artifacts are stored. If not specified, defaults to the system temporary directory. A custom directory @@ -257,6 +262,11 @@ their corresponding top-level category object in your `settings.json` file. - **Description:** Show the "? for shortcuts" hint above the input. - **Default:** `true` +- **`ui.compactToolOutput`** (boolean): + - **Description:** Display tool outputs (like directory listings and file + reads) in a compact, structured format. + - **Default:** `false` + - **`ui.hideBanner`** (boolean): - **Description:** Hide the application banner - **Default:** `false` @@ -1577,26 +1587,9 @@ their corresponding top-level category object in your `settings.json` file. #### `experimental` -- **`experimental.toolOutputMasking.enabled`** (boolean): - - **Description:** Enables tool output masking to save tokens. - - **Default:** `true` - - **Requires restart:** Yes - -- **`experimental.toolOutputMasking.toolProtectionThreshold`** (number): - - **Description:** Minimum number of tokens to protect from masking (most - recent tool outputs). - - **Default:** `50000` - - **Requires restart:** Yes - -- **`experimental.toolOutputMasking.minPrunableTokensThreshold`** (number): - - **Description:** Minimum prunable tokens required to trigger a masking pass. - - **Default:** `30000` - - **Requires restart:** Yes - -- **`experimental.toolOutputMasking.protectLatestTurn`** (boolean): - - **Description:** Ensures the absolute latest turn is never masked, - regardless of token count. - - **Default:** `true` +- **`experimental.adk.agentSessionNoninteractiveEnabled`** (boolean): + - **Description:** Enable non-interactive agent sessions. + - **Default:** `false` - **Requires restart:** Yes - **`experimental.enableAgents`** (boolean): @@ -1637,7 +1630,7 @@ their corresponding top-level category object in your `settings.json` file. - **`experimental.jitContext`** (boolean): - **Description:** Enable Just-In-Time (JIT) context loading. - - **Default:** `true` + - **Default:** `false` - **Requires restart:** Yes - **`experimental.useOSC52Paste`** (boolean): @@ -1652,11 +1645,6 @@ their corresponding top-level category object in your `settings.json` file. configured to allow it). - **Default:** `false` -- **`experimental.plan`** (boolean): - - **Description:** Enable Plan Mode. - - **Default:** `true` - - **Requires restart:** Yes - - **`experimental.taskTracker`** (boolean): - **Description:** Enable task tracker tools. - **Default:** `false` @@ -1702,25 +1690,8 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes -- **`experimental.agentHistoryTruncation`** (boolean): - - **Description:** Enable truncation window logic for the Agent History - Provider. - - **Default:** `false` - - **Requires restart:** Yes - -- **`experimental.agentHistoryTruncationThreshold`** (number): - - **Description:** The maximum number of messages before history is truncated. - - **Default:** `30` - - **Requires restart:** Yes - -- **`experimental.agentHistoryRetainedMessages`** (number): - - **Description:** The number of recent messages to retain after truncation. - - **Default:** `15` - - **Requires restart:** Yes - -- **`experimental.agentHistorySummarization`** (boolean): - - **Description:** Enable summarization of truncated content via a small model - for the Agent History Provider. +- **`experimental.contextManagement`** (boolean): + - **Description:** Enable logic for context management. - **Default:** `false` - **Requires restart:** Yes @@ -1815,6 +1786,69 @@ their corresponding top-level category object in your `settings.json` file. prioritize available tools dynamically. - **Default:** `[]` +#### `contextManagement` + +- **`contextManagement.historyWindow.maxTokens`** (number): + - **Description:** The number of tokens to allow before triggering + compression. + - **Default:** `150000` + - **Requires restart:** Yes + +- **`contextManagement.historyWindow.retainedTokens`** (number): + - **Description:** The number of tokens to always retain. + - **Default:** `40000` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.normalMaxTokens`** (number): + - **Description:** The target number of tokens to budget for a normal + conversation turn. + - **Default:** `2500` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.retainedMaxTokens`** (number): + - **Description:** The maximum number of tokens a single conversation turn can + consume before truncation. + - **Default:** `12000` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.normalizationHeadRatio`** (number): + - **Description:** The ratio of tokens to retain from the beginning of a + truncated message (0.0 to 1.0). + - **Default:** `0.25` + - **Requires restart:** Yes + +- **`contextManagement.tools.distillation.maxOutputTokens`** (number): + - **Description:** Maximum tokens to show to the model when truncating large + tool outputs. + - **Default:** `10000` + - **Requires restart:** Yes + +- **`contextManagement.tools.distillation.summarizationThresholdTokens`** + (number): + - **Description:** Threshold above which truncated tool outputs will be + summarized by an LLM. + - **Default:** `20000` + - **Requires restart:** Yes + +- **`contextManagement.tools.outputMasking.protectionThresholdTokens`** + (number): + - **Description:** Minimum number of tokens to protect from masking (most + recent tool outputs). + - **Default:** `50000` + - **Requires restart:** Yes + +- **`contextManagement.tools.outputMasking.minPrunableThresholdTokens`** + (number): + - **Description:** Minimum prunable tokens required to trigger a masking pass. + - **Default:** `30000` + - **Requires restart:** Yes + +- **`contextManagement.tools.outputMasking.protectLatestTurn`** (boolean): + - **Description:** Ensures the absolute latest turn is never masked, + regardless of token count. + - **Default:** `true` + - **Requires restart:** Yes + #### `admin` - **`admin.secureModeEnabled`** (boolean): diff --git a/docs/reference/keyboard-shortcuts.md b/docs/reference/keyboard-shortcuts.md index 58edd797c6..e87c8682df 100644 --- a/docs/reference/keyboard-shortcuts.md +++ b/docs/reference/keyboard-shortcuts.md @@ -127,6 +127,13 @@ available combinations. | `background.unfocusList` | Move focus from background shell list to Gemini. | `Tab` | | `background.unfocusWarning` | Show warning when trying to move focus away from background shell. | `Tab` | +#### Extension Controls + +| Command | Action | Keys | +| ------------------ | ------------------------------------------- | ---- | +| `extension.update` | Update the current extension if available. | `I` | +| `extension.link` | Link the current extension to a local path. | `L` | + ## Customizing Keybindings diff --git a/docs/reference/policy-engine.md b/docs/reference/policy-engine.md index c9fc482ea7..597e74f111 100644 --- a/docs/reference/policy-engine.md +++ b/docs/reference/policy-engine.md @@ -29,13 +29,12 @@ To create your first policy: ```toml [[rule]] toolName = "run_shell_command" - commandPrefix = "git status" - decision = "allow" + commandPrefix = "rm -rf" + decision = "deny" priority = 100 ``` 3. **Run a command** that triggers the policy (e.g., ask Gemini CLI to - `git status`). The tool will now execute automatically without prompting for - confirmation. + `rm -rf /`). The tool will now be blocked automatically. ## Core concepts @@ -143,25 +142,26 @@ engine transforms this into a final priority using the following formula: This system guarantees that: -- Admin policies always override User, Workspace, and Default policies. +- Admin policies always override User, Workspace, and Default policies (defined + in policy TOML files). - User policies override Workspace and Default policies. - Workspace policies override Default policies. - You can still order rules within a single tier with fine-grained control. For example: -- A `priority: 50` rule in a Default policy file becomes `1.050`. -- A `priority: 10` rule in a Workspace policy policy file becomes `2.010`. -- A `priority: 100` rule in a User policy file becomes `3.100`. -- A `priority: 20` rule in an Admin policy file becomes `4.020`. +- A `priority: 50` rule in a Default policy TOML becomes `1.050`. +- A `priority: 10` rule in a Workspace policy TOML becomes `2.010`. +- A `priority: 100` rule in a User policy TOML becomes `3.100`. +- A `priority: 20` rule in an Admin policy TOML becomes `4.020`. ### Approval modes Approval modes allow the policy engine to apply different sets of rules based on -the CLI's operational mode. A rule can be associated with one or more modes -(e.g., `yolo`, `autoEdit`, `plan`). The rule will only be active if the CLI is -running in one of its specified modes. If a rule has no modes specified, it is -always active. +the CLI's operational mode. A rule in a TOML policy file can be associated with +one or more modes (e.g., `yolo`, `autoEdit`, `plan`). The rule will only be +active if the CLI is running in one of its specified modes. If a rule has no +modes specified, it is always active. - `default`: The standard interactive mode where most write tools require confirmation. @@ -179,8 +179,8 @@ outcome. A rule matches a tool call if all of its conditions are met: -1. **Tool name**: The `toolName` in the rule must match the name of the tool - being called. +1. **Tool name**: The `toolName` in the TOML rule must match the name of the + tool being called. - **Wildcards**: You can use wildcards like `*`, `mcp_server_*`, or `mcp_*_toolName` to match multiple tools. See [Tool Name](#tool-name) for details. @@ -264,7 +264,7 @@ toolName = "run_shell_command" # (Optional) The name of a subagent. If provided, the rule only applies to tool # calls made by this specific subagent. -subagent = "generalist" +subagent = "codebase_investigator" # (Optional) The name of an MCP server. Can be combined with toolName # to form a composite FQN internally like "mcp_mcpName_toolName". @@ -419,20 +419,6 @@ decision = "ask_user" priority = 10 ``` -**4. Targeting a tool name across all servers** - -Use `mcpName = "*"` with a specific `toolName` to target that operation -regardless of which server provides it. - -```toml -# Allow the `search` tool across all connected MCP servers -[[rule]] -mcpName = "*" -toolName = "search" -decision = "allow" -priority = 50 -``` - ## Default policies The Gemini CLI ships with a set of default policies to provide a safe diff --git a/docs/reference/tools.md b/docs/reference/tools.md index 09f0518c07..91c626fa69 100644 --- a/docs/reference/tools.md +++ b/docs/reference/tools.md @@ -115,10 +115,10 @@ each tool. ### Web -| Tool | Kind | Description | -| :-------------------------------------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [`google_web_search`](../tools/web-search.md) | `Search` | Performs a Google Search to find up-to-date information. | -| [`web_fetch`](../tools/web-fetch.md) | `Fetch` | Retrieves and processes content from specific URLs. **Warning:** This tool can access local and private network addresses (e.g., localhost), which may pose a security risk if used with untrusted prompts. | +| Tool | Kind | Description | +| :-------------------------------------------- | :------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [`google_web_search`](../tools/web-search.md) | `Search` | Performs a Google Search to find up-to-date information. | +| [`web_fetch`](../tools/web-fetch.md) | `Fetch` | Retrieves and processes content from specific URLs. **Warning:** This tool can access local and private network addresses (e.g., localhost), which may pose a security risk if used with untrusted prompts. In Plan Mode, this tool requires explicit user confirmation. | ## Under the hood diff --git a/docs/sidebar.json b/docs/sidebar.json index ea82a64481..ad5741699e 100644 --- a/docs/sidebar.json +++ b/docs/sidebar.json @@ -138,12 +138,10 @@ { "label": "Plan mode", "slug": "docs/cli/plan-mode" }, { "label": "Subagents", - "badge": "🔬", "slug": "docs/core/subagents" }, { "label": "Remote subagents", - "badge": "🔬", "slug": "docs/core/remote-agents" }, { "label": "Rewind", "slug": "docs/cli/rewind" }, diff --git a/docs/tools/planning.md b/docs/tools/planning.md index e554e47a34..13e9cd4fd8 100644 --- a/docs/tools/planning.md +++ b/docs/tools/planning.md @@ -32,7 +32,9 @@ and planning. ## 2. `exit_plan_mode` (ExitPlanMode) `exit_plan_mode` signals that the planning phase is complete. It presents the -finalized plan to the user and requests approval to start the implementation. +finalized plan to the user and requests formal approval to start the +implementation. The agent MUST reach an informal agreement with the user in the +chat regarding the proposed strategy BEFORE calling this tool. - **Tool name:** `exit_plan_mode` - **Display name:** Exit Plan Mode @@ -44,7 +46,7 @@ finalized plan to the user and requests approval to start the implementation. - **Behavior:** - Validates that the `plan_path` is within the allowed directory and that the file exists and has content. - - Presents the plan to the user for review. + - Presents the plan to the user for formal review. - If the user approves the plan: - Switches the CLI's approval mode to the user's chosen approval mode ( `DEFAULT` or `AUTO_EDIT`). @@ -56,5 +58,5 @@ finalized plan to the user and requests approval to start the implementation. - On approval: A message indicating the plan was approved and the new approval mode. - On rejection: A message containing the user's feedback. -- **Confirmation:** Yes. Shows the finalized plan and asks for user approval to - proceed with implementation. +- **Confirmation:** Yes. Shows the finalized plan and asks for user formal + approval to proceed with implementation. diff --git a/docs/tools/web-fetch.md b/docs/tools/web-fetch.md index bde0232abc..66d8f4a570 100644 --- a/docs/tools/web-fetch.md +++ b/docs/tools/web-fetch.md @@ -17,6 +17,9 @@ specific operations like summarization or extraction. ## Technical behavior - **Confirmation:** Triggers a confirmation dialog showing the converted URLs. +- **Plan Mode:** In [Plan Mode](../cli/plan-mode.md), `web_fetch` is available + but always requires explicit user confirmation (`ask_user`) due to security + implications of accessing external or private network addresses. - **Processing:** Uses the Gemini API's `urlContext` for retrieval. - **Fallback:** If API access fails, the tool attempts to fetch raw content directly from your local machine. diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index 8b01f68155..6eea0c62ba 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -15,7 +15,9 @@ import { describe('plan_mode', () => { const TEST_PREFIX = 'Plan Mode: '; const settings = { - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, }; const getWriteTargets = (logs: any[]) => @@ -172,7 +174,8 @@ describe('plan_mode', () => { params: { settings, }, - prompt: 'Create a plan for a new login feature.', + prompt: + 'I agree with the strategy to use a JWT-based login. Create a plan for a new login feature.', assert: async (rig, result) => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); @@ -209,7 +212,7 @@ describe('plan_mode', () => { 'import { sum } from "./mathUtils";\nconsole.log(sum(1, 2));', }, prompt: - 'I want to refactor our math utilities. Move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts` to use the new file. Please create a detailed implementation plan first, then execute it.', + 'I want to refactor our math utilities. I agree with the strategy to move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts`. Please create a detailed implementation plan first, then execute it.', assert: async (rig, result) => { const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode'); expect( @@ -281,4 +284,80 @@ describe('plan_mode', () => { assertModelHasOutput(result); }, }); + + evalTest('ALWAYS_PASSES', { + name: 'should transition from plan mode to normal execution and create a plan file from scratch', + params: { + settings, + }, + prompt: + 'Enter plan mode and plan to create a new module called foo. The plan should be saved as foo-plan.md. Then, exit plan mode.', + assert: async (rig, result) => { + const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode'); + expect( + enterPlanCalled, + 'Expected enter_plan_mode tool to be called', + ).toBe(true); + + const exitPlanCalled = await rig.waitForToolCall('exit_plan_mode'); + expect(exitPlanCalled, 'Expected exit_plan_mode tool to be called').toBe( + true, + ); + + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + // Check if the plan file was written successfully + const planWrite = toolLogs.find( + (log) => + log.toolRequest.name === 'write_file' && + log.toolRequest.args.includes('foo-plan.md'), + ); + + expect( + planWrite, + 'Expected write_file to be called for foo-plan.md', + ).toBeDefined(); + + expect( + planWrite?.toolRequest.success, + `Expected write_file to succeed, but got error: ${planWrite?.toolRequest.error}`, + ).toBe(true); + + assertModelHasOutput(result); + }, + }); + + evalTest('USUALLY_PASSES', { + name: 'should not exit plan mode or draft before informal agreement', + approvalMode: ApprovalMode.PLAN, + params: { + settings, + }, + prompt: 'I need to build a new login feature. Please plan it.', + assert: async (rig, result) => { + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + expect( + exitPlanCall, + 'Should NOT call exit_plan_mode before informal agreement', + ).toBeUndefined(); + + const planWrite = toolLogs.find( + (log) => + log.toolRequest.name === 'write_file' && + log.toolRequest.args.includes('/plans/'), + ); + expect( + planWrite, + 'Should NOT draft the plan file before informal agreement', + ).toBeUndefined(); + + assertModelHasOutput(result); + }, + }); }); diff --git a/evals/tracker.eval.ts b/evals/tracker.eval.ts index 7afb41dbec..49bc903b0a 100644 --- a/evals/tracker.eval.ts +++ b/evals/tracker.eval.ts @@ -113,4 +113,21 @@ describe('tracker_mode', () => { assertModelHasOutput(result); }, }); + + evalTest('USUALLY_PASSES', { + name: 'should correctly identify the task tracker storage location from the system prompt', + params: { + settings: { experimental: { taskTracker: true } }, + }, + prompt: + 'Where is my task tracker storage located? Please provide the absolute path in your response.', + assert: async (rig, result) => { + // The rig sets GEMINI_CLI_HOME to rig.homeDir + const homeDir = rig.homeDir!; + // The response should contain the dynamic path which includes the home directory + // and follows the .gemini/tmp/.../tracker structure. + expect(result).toContain(homeDir); + expect(result).toMatch(/\.gemini\/tmp\/.*\/tracker/); + }, + }); }); diff --git a/evals/update_topic.eval.ts b/evals/update_topic.eval.ts new file mode 100644 index 0000000000..1836e7f61b --- /dev/null +++ b/evals/update_topic.eval.ts @@ -0,0 +1,116 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +describe('update_topic_behavior', () => { + // Constants for tool names and params for robustness + const UPDATE_TOPIC_TOOL_NAME = 'update_topic'; + + /** + * Verifies the desired behavior of the update_topic tool. update_topic is used by the + * agent to share periodic, concise updates about what the agent is working on, independent + * of the regular model output and/or thoughts. This tool is expected to be called at least + * at the start and end of the session, and typically at least once in the middle, but no + * more than 1/4 turns. + */ + evalTest('USUALLY_PASSES', { + name: 'update_topic should be used at start, end and middle for complex tasks', + prompt: `Create a simple users REST API using Express. +1. Initialize a new npm project and install express. +2. Create src/app.ts as the main entry point. +3. Create src/routes/userRoutes.ts for user routes. +4. Create src/controllers/userController.ts for user logic. +5. Implement GET /users, POST /users, and GET /users/:id using an in-memory array. +6. Add a 'start' script to package.json. +7. Finally, run a quick grep to verify the routes are in src/app.ts.`, + files: { + 'package.json': JSON.stringify( + { + name: 'users-api', + version: '1.0.0', + private: true, + }, + null, + 2, + ), + '.gemini/settings.json': JSON.stringify({ + experimental: { + topicUpdateNarration: true, + }, + }), + }, + assert: async (rig, result) => { + const toolLogs = rig.readToolLogs(); + const topicCalls = toolLogs.filter( + (l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME, + ); + + // 1. Assert that update_topic is called at least 3 times (start, middle, end) + expect( + topicCalls.length, + `Expected at least 3 update_topic calls, but found ${topicCalls.length}`, + ).toBeGreaterThanOrEqual(3); + + // 2. Assert update_topic is called at the very beginning (first tool call) + expect( + toolLogs[0].toolRequest.name, + 'First tool call should be update_topic', + ).toBe(UPDATE_TOPIC_TOOL_NAME); + + // 3. Assert update_topic is called near the end + const lastTopicCallIndex = toolLogs + .map((l) => l.toolRequest.name) + .lastIndexOf(UPDATE_TOPIC_TOOL_NAME); + expect( + lastTopicCallIndex, + 'Expected update_topic to be used near the end of the task', + ).toBeGreaterThanOrEqual(toolLogs.length * 0.7); + + // 4. Assert there is at least one update_topic call in the middle (between start and end phases) + const middleTopicCalls = topicCalls.slice(1, -1); + + expect( + middleTopicCalls.length, + 'Expected at least one update_topic call in the middle of the task', + ).toBeGreaterThanOrEqual(1); + + // 5. Turn Ratio Assertion: update_topic should be <= 1/2 of total turns. + // We only enforce this for tasks that take more than 5 turns, as shorter tasks + // naturally have a higher ratio when following the "start, middle, end" rule. + const uniquePromptIds = new Set( + toolLogs + .map((l) => l.toolRequest.prompt_id) + .filter((id) => id !== undefined), + ); + const totalTurns = uniquePromptIds.size; + + if (totalTurns > 5) { + const topicTurns = new Set( + topicCalls + .map((l) => l.toolRequest.prompt_id) + .filter((id) => id !== undefined), + ); + const topicTurnCount = topicTurns.size; + + const ratio = topicTurnCount / totalTurns; + + expect( + ratio, + `update_topic was used in ${topicTurnCount} out of ${totalTurns} turns (${(ratio * 100).toFixed(1)}%). Expected <= 50%.`, + ).toBeLessThanOrEqual(0.5); + + // Ideal ratio is closer to 1/5 (20%). We log high usage as a warning. + if (ratio > 0.25) { + console.warn( + `[Efficiency Warning] update_topic usage is high: ${(ratio * 100).toFixed(1)}% (Goal: ~20%)`, + ); + } + } + }, + }); +}); diff --git a/integration-tests/api-resilience.responses b/integration-tests/api-resilience.responses index d30d29906e..d0520047f7 100644 --- a/integration-tests/api-resilience.responses +++ b/integration-tests/api-resilience.responses @@ -1 +1 @@ -{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Part 1. "}],"role":"model"},"index":0}]},{"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":10,"totalTokenCount":110}},{"candidates":[{"content":{"parts":[{"text":"Part 2."}],"role":"model"},"index":0}],"finishReason":"STOP"}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Part 1. "}],"role":"model"},"index":0}]},{"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":10,"totalTokenCount":110}},{"candidates":[{"content":{"parts":[{"text":"Part 2."}],"role":"model"},"index":0,"finishReason":"STOP"}]}]} diff --git a/integration-tests/browser-policy.test.ts b/integration-tests/browser-policy.test.ts index f533cb3f5e..4fbfc5db01 100644 --- a/integration-tests/browser-policy.test.ts +++ b/integration-tests/browser-policy.test.ts @@ -10,8 +10,13 @@ import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { execSync } from 'node:child_process'; import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs'; +import { env } from 'node:process'; import stripAnsi from 'strip-ansi'; +// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox. +// See: https://github.com/google-gemini/gemini-cli/issues/24382 +const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker'; + const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); @@ -59,122 +64,146 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => { await rig.cleanup(); }); - it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => { - rig.setup('browser-policy-skip-confirmation', { - fakeResponsesPath: join(__dirname, 'browser-policy.responses'), - settings: { - agents: { - overrides: { - browser_agent: { - enabled: true, + it.skipIf(isDockerSandbox)( + 'should skip confirmation when "Allow all server tools for this session" is chosen', + async () => { + rig.setup('browser-policy-skip-confirmation', { + fakeResponsesPath: join(__dirname, 'browser-policy.responses'), + settings: { + agents: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { + headless: true, + sessionMode: 'isolated', + allowedDomains: ['example.com'], }, }, - browser: { - headless: true, - sessionMode: 'isolated', - allowedDomains: ['example.com'], - }, }, - }, - }); + }); - // Manually trust the folder to avoid the dialog and enable option 3 - const geminiDir = join(rig.homeDir!, '.gemini'); - mkdirSync(geminiDir, { recursive: true }); + // Manually trust the folder to avoid the dialog and enable option 3 + const geminiDir = join(rig.homeDir!, '.gemini'); + mkdirSync(geminiDir, { recursive: true }); - // Write to trustedFolders.json - const trustedFoldersPath = join(geminiDir, 'trustedFolders.json'); - const trustedFolders = { - [rig.testDir!]: 'TRUST_FOLDER', - }; - writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2)); + // Write to trustedFolders.json + const trustedFoldersPath = join(geminiDir, 'trustedFolders.json'); + const trustedFolders = { + [rig.testDir!]: 'TRUST_FOLDER', + }; + writeFileSync( + trustedFoldersPath, + JSON.stringify(trustedFolders, null, 2), + ); - // Force confirmation for browser agent. - // NOTE: We don't force confirm browser tools here because "Allow all server tools" - // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by - // a rule in the user tier (4.x) like the one from this TOML. - // By removing the explicit mcp rule, the first MCP tool will still prompt - // due to default approvalMode = 'default', and then "Allow all" will correctly - // bypass subsequent tools. - const policyFile = join(rig.testDir!, 'force-confirm.toml'); - writeFileSync( - policyFile, - ` + // Force confirmation for browser agent. + // NOTE: We don't force confirm browser tools here because "Allow all server tools" + // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by + // a rule in the user tier (4.x) like the one from this TOML. + // By removing the explicit mcp rule, the first MCP tool will still prompt + // due to default approvalMode = 'default', and then "Allow all" will correctly + // bypass subsequent tools. + const policyFile = join(rig.testDir!, 'force-confirm.toml'); + writeFileSync( + policyFile, + ` [[rule]] name = "Force confirm browser_agent" toolName = "browser_agent" decision = "ask_user" priority = 200 `, - ); + ); - // Update settings.json in both project and home directories to point to the policy file - for (const baseDir of [rig.testDir!, rig.homeDir!]) { - const settingsPath = join(baseDir, '.gemini', 'settings.json'); - if (existsSync(settingsPath)) { - const settings = JSON.parse(readFileSync(settingsPath, 'utf-8')); - settings.policyPaths = [policyFile]; - // Ensure folder trust is enabled - settings.security = settings.security || {}; - settings.security.folderTrust = settings.security.folderTrust || {}; - settings.security.folderTrust.enabled = true; - writeFileSync(settingsPath, JSON.stringify(settings, null, 2)); + // Update settings.json in both project and home directories to point to the policy file + for (const baseDir of [rig.testDir!, rig.homeDir!]) { + const settingsPath = join(baseDir, '.gemini', 'settings.json'); + if (existsSync(settingsPath)) { + const settings = JSON.parse(readFileSync(settingsPath, 'utf-8')); + settings.policyPaths = [policyFile]; + // Ensure folder trust is enabled + settings.security = settings.security || {}; + settings.security.folderTrust = settings.security.folderTrust || {}; + settings.security.folderTrust.enabled = true; + writeFileSync(settingsPath, JSON.stringify(settings, null, 2)); + } } - } - const run = await rig.runInteractive({ - approvalMode: 'default', - env: { - GEMINI_CLI_INTEGRATION_TEST: 'true', - }, - }); + const run = await rig.runInteractive({ + approvalMode: 'default', + env: { + GEMINI_CLI_INTEGRATION_TEST: 'true', + }, + }); - await run.sendKeys( - 'Open https://example.com and check if there is a heading\r', - ); - await run.sendKeys('\r'); + await run.sendKeys( + 'Open https://example.com and check if there is a heading\r', + ); + await run.sendKeys('\r'); - // Handle confirmations. - // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once) - await poll( - () => stripAnsi(run.output).toLowerCase().includes('action required'), - 60000, - 1000, - ); - await run.sendKeys('1\r'); - await new Promise((r) => setTimeout(r, 2000)); + // Handle confirmations. + // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once) + await poll( + () => stripAnsi(run.output).toLowerCase().includes('action required'), + 60000, + 1000, + ); + await run.sendKeys('1\r'); + await new Promise((r) => setTimeout(r, 2000)); - // Handle privacy notice - await poll( - () => stripAnsi(run.output).toLowerCase().includes('privacy notice'), - 5000, - 100, - ); - await run.sendKeys('1\r'); - await new Promise((r) => setTimeout(r, 5000)); + // Handle privacy notice + await poll( + () => stripAnsi(run.output).toLowerCase().includes('privacy notice'), + 5000, + 100, + ); + await run.sendKeys('1\r'); + await new Promise((r) => setTimeout(r, 5000)); - // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools) - await poll( - () => { - const stripped = stripAnsi(run.output).toLowerCase(); - return ( - stripped.includes('new_page') && - stripped.includes('allow all server tools for this session') - ); - }, - 60000, - 1000, - ); + // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools) + await poll( + () => { + const stripped = stripAnsi(run.output).toLowerCase(); + return ( + stripped.includes('new_page') && + stripped.includes('allow all server tools for this session') + ); + }, + 60000, + 1000, + ); - // Select "Allow all server tools for this session" (option 3) - await run.sendKeys('3\r'); - await new Promise((r) => setTimeout(r, 30000)); + // Select "Allow all server tools for this session" (option 3) + await run.sendKeys('3\r'); - const output = stripAnsi(run.output).toLowerCase(); + // Wait for the browser agent to finish (success or failure) + await poll( + () => { + const stripped = stripAnsi(run.output).toLowerCase(); + return ( + stripped.includes('completed successfully') || + stripped.includes('agent error') + ); + }, + 120000, + 1000, + ); - expect(output).toContain('browser_agent'); - expect(output).toContain('completed successfully'); - }); + const output = stripAnsi(run.output).toLowerCase(); + + expect(output).toContain('browser_agent'); + // The test validates that "Allow all server tools" skips subsequent + // tool confirmations — the browser agent may still fail due to + // Chrome/MCP issues in CI, which is acceptable for this policy test. + expect( + output.includes('completed successfully') || + output.includes('agent error'), + ).toBe(true); + }, + ); it('should show the visible warning when browser agent starts in existing session mode', async () => { rig.setup('browser-session-warning', { diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts index 64481068c2..80552cfd68 100644 --- a/integration-tests/file-system.test.ts +++ b/integration-tests/file-system.test.ts @@ -121,6 +121,7 @@ describe('file-system', () => { const result = await rig.run({ args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`, + timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox }); const foundToolCall = await rig.waitForToolCall('write_file'); diff --git a/integration-tests/plan-mode.test.ts b/integration-tests/plan-mode.test.ts index d8d297c460..94ed65f1fe 100644 --- a/integration-tests/plan-mode.test.ts +++ b/integration-tests/plan-mode.test.ts @@ -23,7 +23,9 @@ describe('Plan Mode', () => { 'should allow read-only tools but deny write tools in plan mode', { settings: { - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, tools: { core: [ 'run_shell_command', @@ -67,15 +69,12 @@ describe('Plan Mode', () => { await rig.setup(testName, { settings: { - experimental: { plan: true }, tools: { core: ['write_file', 'read_file', 'list_directory'], }, general: { + plan: { enabled: true, directory: plansDir }, defaultApprovalMode: 'plan', - plan: { - directory: plansDir, - }, }, }, }); @@ -120,22 +119,19 @@ describe('Plan Mode', () => { await rig.setup(testName, { settings: { - experimental: { plan: true }, tools: { core: ['write_file', 'read_file', 'list_directory'], }, general: { + plan: { enabled: true, directory: plansDir }, defaultApprovalMode: 'plan', - plan: { - directory: plansDir, - }, }, }, }); await rig.run({ approvalMode: 'plan', - args: 'Create a file called hello.txt in the current directory.', + args: 'Attempt to create a file named "hello.txt" in the current directory. Do not create a plan file, try to write hello.txt directly.', }); const toolLogs = rig.readToolLogs(); @@ -156,7 +152,9 @@ describe('Plan Mode', () => { it('should be able to enter plan mode from default mode', async () => { await rig.setup('should be able to enter plan mode from default mode', { settings: { - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, tools: { core: ['enter_plan_mode'], allowed: ['enter_plan_mode'], @@ -184,15 +182,12 @@ describe('Plan Mode', () => { await rig.setup(testName, { settings: { - experimental: { plan: true }, tools: { core: ['write_file', 'read_file', 'list_directory'], }, general: { + plan: { enabled: true, directory: plansDir }, defaultApprovalMode: 'plan', - plan: { - directory: plansDir, - }, }, }, }); diff --git a/package-lock.json b/package-lock.json index f3bf8fa616..ea985a7be0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,7 @@ "packages/*" ], "dependencies": { - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.3", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", @@ -92,46 +92,6 @@ "zod": "^3.25.0 || ^4.0.0" } }, - "node_modules/@alcalzone/ansi-tokenize": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@alcalzone/ansi-tokenize/-/ansi-tokenize-0.2.2.tgz", - "integrity": "sha512-mkOh+Wwawzuf5wa30bvc4nA+Qb6DIrGWgBhRR/Pw4T9nsgYait8izvXkNyU78D6Wcu3Z+KUdwCmLCxlWjEotYA==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.2.1", - "is-fullwidth-code-point": "^5.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@alcalzone/ansi-tokenize/node_modules/ansi-styles": { - "version": "6.2.3", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", - "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/@alcalzone/ansi-tokenize/node_modules/is-fullwidth-code-point": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.1.0.tgz", - "integrity": "sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==", - "license": "MIT", - "dependencies": { - "get-east-asian-width": "^1.3.1" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/@ampproject/remapping": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", @@ -10089,14 +10049,13 @@ }, "node_modules/ink": { "name": "@jrichman/ink", - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.5.0.tgz", - "integrity": "sha512-S4g/ng7fPZmFwclO82iWkOce8vDLy/FIDgHIfkCWGOehqHe6dexHsmq3kNQD21okh198pA5SAQTCqNQJb/svRQ==", + "version": "6.6.3", + "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.3.tgz", + "integrity": "sha512-0v4S7TbbF2tpQrfqH1btwLgTgH+K0vY2BJbokTE5Lk1KBr4TqZ+Pyo+geSD5F+zytX6G2ajGHBQyHk8yGK4C7A==", "license": "MIT", "dependencies": { - "@alcalzone/ansi-tokenize": "^0.2.1", "ansi-escapes": "^7.0.0", - "ansi-styles": "^6.2.1", + "ansi-styles": "^6.2.3", "auto-bind": "^5.0.1", "chalk": "^5.6.0", "cli-boxes": "^3.0.0", @@ -10105,6 +10064,7 @@ "code-excerpt": "^4.0.0", "es-toolkit": "^1.39.10", "indent-string": "^5.0.0", + "is-fullwidth-code-point": "^5.0.0", "is-in-ci": "^2.0.0", "mnemonist": "^0.40.3", "patch-console": "^2.0.0", @@ -10174,9 +10134,9 @@ } }, "node_modules/ink/node_modules/ansi-styles": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", - "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", + "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", "license": "MIT", "engines": { "node": ">=12" @@ -10197,6 +10157,21 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/ink/node_modules/is-fullwidth-code-point": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.1.0.tgz", + "integrity": "sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==", + "license": "MIT", + "dependencies": { + "get-east-asian-width": "^1.3.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/ink/node_modules/is-in-ci": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/is-in-ci/-/is-in-ci-2.0.0.tgz", @@ -17551,7 +17526,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.3", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", diff --git a/package.json b/package.json index 8bb5f25e20..5a6a2981cd 100644 --- a/package.json +++ b/package.json @@ -38,7 +38,7 @@ "build:packages": "npm run build --workspaces", "build:sandbox": "node scripts/build_sandbox.js", "build:binary": "node scripts/build_binary.js", - "bundle": "npm run generate && npm run build --workspace=@google/gemini-cli-devtools && node esbuild.config.js && node scripts/copy_bundle_assets.js", + "bundle": "npm run generate && npm run build --workspace=@google/gemini-cli-devtools && npm run bundle:browser-mcp -w @google/gemini-cli-core && node esbuild.config.js && node scripts/copy_bundle_assets.js", "test": "npm run test --workspaces --if-present && npm run test:sea-launch", "test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts && npm run test:sea-launch", "test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts", @@ -68,7 +68,7 @@ "pre-commit": "node scripts/pre-commit.js" }, "overrides": { - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.3", "wrap-ansi": "9.0.2", "cliui": { "wrap-ansi": "7.0.0" @@ -136,7 +136,7 @@ "yargs": "^17.7.2" }, "dependencies": { - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.3", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", diff --git a/packages/a2a-server/src/utils/testing_utils.ts b/packages/a2a-server/src/utils/testing_utils.ts index f7f1645f8c..38b914e840 100644 --- a/packages/a2a-server/src/utils/testing_utils.ts +++ b/packages/a2a-server/src/utils/testing_utils.ts @@ -109,12 +109,8 @@ export function createMockConfig( enableEnvironmentVariableRedaction: false, }, }), - isExperimentalAgentHistoryTruncationEnabled: vi.fn().mockReturnValue(false), - getExperimentalAgentHistoryTruncationThreshold: vi.fn().mockReturnValue(50), - getExperimentalAgentHistoryRetainedMessages: vi.fn().mockReturnValue(30), - isExperimentalAgentHistorySummarizationEnabled: vi - .fn() - .mockReturnValue(false), + isAutoDistillationEnabled: vi.fn().mockReturnValue(false), + getContextManagementConfig: vi.fn().mockReturnValue({ enabled: false }), ...overrides, } as unknown as Config; diff --git a/packages/cli/package.json b/packages/cli/package.json index 072f2b8a72..95a570bf58 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -49,7 +49,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.3", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", diff --git a/packages/cli/src/__snapshots__/nonInteractiveCliAgentSession.test.ts.snap b/packages/cli/src/__snapshots__/nonInteractiveCliAgentSession.test.ts.snap new file mode 100644 index 0000000000..92f396a59c --- /dev/null +++ b/packages/cli/src/__snapshots__/nonInteractiveCliAgentSession.test.ts.snap @@ -0,0 +1,35 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'loop detected' 1`] = ` +"{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} +{"type":"message","timestamp":"","role":"user","content":"Loop test"} +{"type":"error","timestamp":"","severity":"warning","message":"Loop detected, stopping execution"} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"models":{}}} +" +`; + +exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'max session turns' 1`] = ` +"{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} +{"type":"message","timestamp":"","role":"user","content":"Max turns test"} +{"type":"error","timestamp":"","severity":"error","message":"Maximum session turns exceeded"} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"models":{}}} +" +`; + +exports[`runNonInteractive > should emit appropriate events for streaming JSON output 1`] = ` +"{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} +{"type":"message","timestamp":"","role":"user","content":"Stream test"} +{"type":"message","timestamp":"","role":"assistant","content":"Thinking...","delta":true} +{"type":"tool_use","timestamp":"","tool_name":"testTool","tool_id":"tool-1","parameters":{"arg1":"value1"}} +{"type":"tool_result","timestamp":"","tool_id":"tool-1","status":"success","output":"Tool executed successfully"} +{"type":"message","timestamp":"","role":"assistant","content":"Final answer","delta":true} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"models":{}}} +" +`; + +exports[`runNonInteractive > should write a single newline between sequential text outputs from the model 1`] = ` +"Use mock tool +Use mock tool again +Finished. +" +`; diff --git a/packages/cli/src/acp/acpClient.test.ts b/packages/cli/src/acp/acpClient.test.ts index 14295954dd..f077b0ef4b 100644 --- a/packages/cli/src/acp/acpClient.test.ts +++ b/packages/cli/src/acp/acpClient.test.ts @@ -27,6 +27,7 @@ import { type MessageBus, LlmRole, type GitService, + type ModelRouterService, processSingleFileContent, InvalidStreamError, } from '@google/gemini-cli-core'; @@ -102,17 +103,7 @@ vi.mock( ...actual, updatePolicy: vi.fn(), createPolicyUpdater: vi.fn(), - ReadManyFilesTool: vi.fn().mockImplementation(() => ({ - name: 'read_many_files', - kind: 'read', - build: vi.fn().mockReturnValue({ - getDescription: () => 'Read files', - toolLocations: () => [], - execute: vi.fn().mockResolvedValue({ - llmContent: ['--- file.txt ---\n\nFile content\n\n'], - }), - }), - })), + ReadManyFilesTool: vi.fn(), logToolCall: vi.fn(), LlmRole: { MAIN: 'main', @@ -421,6 +412,26 @@ describe('GeminiAgent', () => { ); }); + it('should include gemini-3.1-flash-lite when useGemini31FlashLite is true', async () => { + mockConfig.getHasAccessToPreviewModel = vi.fn().mockReturnValue(true); + mockConfig.getGemini31LaunchedSync = vi.fn().mockReturnValue(true); + mockConfig.getGemini31FlashLiteLaunchedSync = vi.fn().mockReturnValue(true); + + const response = await agent.newSession({ + cwd: '/tmp', + mcpServers: [], + }); + + expect(response.models?.availableModels).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + modelId: 'gemini-3.1-flash-lite-preview', + name: 'gemini-3.1-flash-lite-preview', + }), + ]), + ); + }); + it('should return modes with plan mode when plan is enabled', async () => { mockConfig.getContentGeneratorConfig = vi.fn().mockReturnValue({ apiKey: 'test-key', @@ -646,6 +657,7 @@ describe('Session', () => { sendMessageStream: vi.fn(), addHistory: vi.fn(), recordCompletedToolCalls: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), } as unknown as Mocked; mockTool = { kind: 'read', @@ -667,6 +679,9 @@ describe('Session', () => { mockConfig = { getModel: vi.fn().mockReturnValue('gemini-pro'), getActiveModel: vi.fn().mockReturnValue('gemini-pro'), + getModelRouterService: vi.fn().mockReturnValue({ + route: vi.fn().mockResolvedValue({ model: 'resolved-model' }), + }), getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry), getMcpServers: vi.fn(), getFileService: vi.fn().mockReturnValue({ @@ -713,10 +728,22 @@ describe('Session', () => { }, errors: [], } as unknown as LoadedSettings); + + (ReadManyFilesTool as unknown as Mock).mockImplementation(() => ({ + name: 'read_many_files', + kind: 'read', + build: vi.fn().mockReturnValue({ + getDescription: () => 'Read files', + toolLocations: () => [], + execute: vi.fn().mockResolvedValue({ + llmContent: ['--- file.txt ---\n\nFile content\n\n'], + }), + }), + })); }); afterEach(() => { - vi.clearAllMocks(); + vi.restoreAllMocks(); }); it('should send available commands', async () => { @@ -786,6 +813,42 @@ describe('Session', () => { expect(result).toMatchObject({ stopReason: 'end_turn' }); }); + it('should use model router to determine model', async () => { + const mockRouter = { + route: vi.fn().mockResolvedValue({ model: 'routed-model' }), + } as unknown as ModelRouterService; + mockConfig.getModelRouterService.mockReturnValue(mockRouter); + + const stream = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { + candidates: [{ content: { parts: [{ text: 'Hello' }] } }], + }, + }, + ]); + mockChat.sendMessageStream.mockResolvedValue(stream); + + await session.prompt({ + sessionId: 'session-1', + prompt: [{ type: 'text', text: 'Hi' }], + }); + + expect(mockRouter.route).toHaveBeenCalledWith( + expect.objectContaining({ + requestedModel: 'gemini-pro', + request: [{ text: 'Hi' }], + }), + ); + expect(mockChat.sendMessageStream).toHaveBeenCalledWith( + expect.objectContaining({ model: 'routed-model' }), + expect.any(Array), + expect.any(String), + expect.any(Object), + expect.any(String), + ); + }); + it('should handle prompt with empty response (InvalidStreamError)', async () => { mockChat.sendMessageStream.mockRejectedValue( new InvalidStreamError('Empty response', 'NO_RESPONSE_TEXT'), diff --git a/packages/cli/src/acp/acpClient.ts b/packages/cli/src/acp/acpClient.ts index 6b76ffdc7a..14761d7162 100644 --- a/packages/cli/src/acp/acpClient.ts +++ b/packages/cli/src/acp/acpClient.ts @@ -28,7 +28,7 @@ import { debugLogger, ReadManyFilesTool, REFERENCE_CONTENT_START, - resolveModel, + type RoutingContext, createWorkingStdio, startupProfiler, Kind, @@ -42,6 +42,7 @@ import { DEFAULT_GEMINI_FLASH_LITE_MODEL, PREVIEW_GEMINI_MODEL, PREVIEW_GEMINI_3_1_MODEL, + PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL, PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL, PREVIEW_GEMINI_FLASH_MODEL, DEFAULT_GEMINI_MODEL_AUTO, @@ -758,10 +759,15 @@ export class Session { const functionCalls: FunctionCall[] = []; try { - const model = resolveModel( - this.context.config.getModel(), - (await this.context.config.getGemini31Launched?.()) ?? false, - ); + const routingContext: RoutingContext = { + history: chat.getHistory(/*curated=*/ true), + request: nextMessage?.parts ?? [], + signal: pendingSend.signal, + requestedModel: this.context.config.getModel(), + }; + + const router = this.context.config.getModelRouterService(); + const { model } = await router.route(routingContext); const responseStream = await chat.sendMessageStream( { model }, nextMessage?.parts ?? [], @@ -2009,10 +2015,31 @@ function buildAvailableModels( const preferredModel = config.getModel() || DEFAULT_GEMINI_MODEL_AUTO; const shouldShowPreviewModels = config.getHasAccessToPreviewModel(); const useGemini31 = config.getGemini31LaunchedSync?.() ?? false; + const useGemini31FlashLite = + config.getGemini31FlashLiteLaunchedSync?.() ?? false; const selectedAuthType = settings.merged.security.auth.selectedType; const useCustomToolModel = useGemini31 && selectedAuthType === AuthType.USE_GEMINI; + // --- DYNAMIC PATH --- + if ( + config.getExperimentalDynamicModelConfiguration?.() === true && + config.getModelConfigService + ) { + const options = config.getModelConfigService().getAvailableModelOptions({ + useGemini3_1: useGemini31, + useGemini3_1FlashLite: useGemini31FlashLite, + useCustomTools: useCustomToolModel, + hasAccessToPreview: shouldShowPreviewModels, + }); + + return { + availableModels: options, + currentModelId: preferredModel, + }; + } + + // --- LEGACY PATH --- const mainOptions = [ { value: DEFAULT_GEMINI_MODEL_AUTO, @@ -2056,7 +2083,7 @@ function buildAvailableModels( ? PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL : previewProModel; - manualOptions.unshift( + const previewOptions = [ { value: previewProValue, title: getDisplayString(previewProModel), @@ -2065,7 +2092,16 @@ function buildAvailableModels( value: PREVIEW_GEMINI_FLASH_MODEL, title: getDisplayString(PREVIEW_GEMINI_FLASH_MODEL), }, - ); + ]; + + if (useGemini31FlashLite) { + previewOptions.push({ + value: PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL, + title: getDisplayString(PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL), + }); + } + + manualOptions.unshift(...previewOptions); } const scaleOptions = ( diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index b9401ed5eb..6d4a75bbb0 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -1364,8 +1364,8 @@ describe('Approval mode tool exclusion logic', () => { 'test', ]; const settings = createTestMergedSettings({ - experimental: { - plan: true, + general: { + plan: { enabled: true }, }, }); const argv = await parseArguments(createTestMergedSettings()); @@ -1479,9 +1479,7 @@ describe('Approval mode tool exclusion logic', () => { const settings = createTestMergedSettings({ general: { defaultApprovalMode: 'plan', - }, - experimental: { - plan: false, + plan: { enabled: false }, }, }); const argv = await parseArguments(settings); @@ -1489,14 +1487,12 @@ describe('Approval mode tool exclusion logic', () => { expect(config.getApprovalMode()).toBe(ApprovalMode.DEFAULT); }); - it('should allow plan approval mode if experimental plan is enabled', async () => { + it('should allow plan approval mode if plan is enabled', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ general: { defaultApprovalMode: 'plan', - }, - experimental: { - plan: true, + plan: { enabled: true }, }, }); const argv = await parseArguments(settings); @@ -2742,12 +2738,12 @@ describe('loadCliConfig approval mode', () => { expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.YOLO); }); - it('should set Plan approval mode when --approval-mode=plan is used and experimental.plan is enabled', async () => { + it('should set Plan approval mode when --approval-mode=plan is used and plan is enabled', async () => { process.argv = ['node', 'script.js', '--approval-mode', 'plan']; const argv = await parseArguments(createTestMergedSettings()); const settings = createTestMergedSettings({ - experimental: { - plan: true, + general: { + plan: { enabled: true }, }, }); const config = await loadCliConfig(settings, 'test-session', argv); @@ -2767,12 +2763,12 @@ describe('loadCliConfig approval mode', () => { expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT); }); - it('should throw error when --approval-mode=plan is used but experimental.plan is disabled', async () => { + it('should throw error when --approval-mode=plan is used but plan is disabled', async () => { process.argv = ['node', 'script.js', '--approval-mode', 'plan']; const argv = await parseArguments(createTestMergedSettings()); const settings = createTestMergedSettings({ - experimental: { - plan: false, + general: { + plan: { enabled: false }, }, }); @@ -2893,22 +2889,26 @@ describe('loadCliConfig approval mode', () => { expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.YOLO); }); - it('should respect plan mode from settings when experimental.plan is enabled', async () => { + it('should respect plan mode from settings when plan is enabled', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - general: { defaultApprovalMode: 'plan' }, - experimental: { plan: true }, + general: { + defaultApprovalMode: 'plan', + plan: { enabled: true }, + }, }); const argv = await parseArguments(settings); const config = await loadCliConfig(settings, 'test-session', argv); expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.PLAN); }); - it('should throw error if plan mode is in settings but experimental.plan is disabled', async () => { + it('should fall back to default if plan mode is in settings but disabled', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - general: { defaultApprovalMode: 'plan' }, - experimental: { plan: false }, + general: { + defaultApprovalMode: 'plan', + plan: { enabled: false }, + }, }); const argv = await parseArguments(settings); const config = await loadCliConfig(settings, 'test-session', argv); @@ -3696,7 +3696,9 @@ describe('loadCliConfig mcpEnabled', () => { it('should use plan directory from active extension when user has not specified one', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, }); const argv = await parseArguments(settings); @@ -3715,9 +3717,11 @@ describe('loadCliConfig mcpEnabled', () => { it('should NOT use plan directory from active extension when user has specified one', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - experimental: { plan: true }, general: { - plan: { directory: 'user-plans-dir' }, + plan: { + enabled: true, + directory: 'user-plans-dir', + }, }, }); const argv = await parseArguments(settings); @@ -3738,7 +3742,9 @@ describe('loadCliConfig mcpEnabled', () => { it('should NOT use plan directory from inactive extension', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, }); const argv = await parseArguments(settings); @@ -3759,7 +3765,9 @@ describe('loadCliConfig mcpEnabled', () => { it('should use default path if neither user nor extension settings provide a plan directory', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, }); const argv = await parseArguments(settings); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 25419a2d6c..27953c60a9 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -669,9 +669,9 @@ export async function loadCliConfig( approvalMode = ApprovalMode.AUTO_EDIT; break; case 'plan': - if (!(settings.experimental?.plan ?? false)) { + if (!(settings.general?.plan?.enabled ?? true)) { debugLogger.warn( - 'Approval mode "plan" is only available when experimental.plan is enabled. Falling back to "default".', + 'Approval mode "plan" is disabled in your settings. Falling back to "default".', ); approvalMode = ApprovalMode.DEFAULT; } else { @@ -966,7 +966,7 @@ export async function loadCliConfig( extensionRegistryURI, enableExtensionReloading: settings.experimental?.extensionReloading, enableAgents: settings.experimental?.enableAgents, - plan: settings.experimental?.plan, + plan: settings.general?.plan?.enabled ?? true, tracker: settings.experimental?.taskTracker, directWebFetch: settings.experimental?.directWebFetch, planSettings: settings.general?.plan?.directory @@ -977,17 +977,12 @@ export async function loadCliConfig( disabledSkills: settings.skills?.disabled, experimentalJitContext: settings.experimental?.jitContext, experimentalMemoryManager: settings.experimental?.memoryManager, - experimentalAgentHistoryTruncation: - settings.experimental?.agentHistoryTruncation, - experimentalAgentHistoryTruncationThreshold: - settings.experimental?.agentHistoryTruncationThreshold, - experimentalAgentHistoryRetainedMessages: - settings.experimental?.agentHistoryRetainedMessages, - experimentalAgentHistorySummarization: - settings.experimental?.agentHistorySummarization, + contextManagement: { + enabled: settings.experimental?.contextManagement, + ...settings?.contextManagement, + }, modelSteering: settings.experimental?.modelSteering, topicUpdateNarration: settings.experimental?.topicUpdateNarration, - toolOutputMasking: settings.experimental?.toolOutputMasking, noBrowser: !!process.env['NO_BROWSER'], summarizeToolOutput: settings.model?.summarizeToolOutput, ideMode, @@ -1014,6 +1009,7 @@ export async function loadCliConfig( format: (argv.outputFormat ?? settings.output?.format) as OutputFormat, }, gemmaModelRouter: settings.experimental?.gemmaModelRouter, + adk: settings.experimental?.adk, fakeResponses: argv.fakeResponses, recordResponses: argv.recordResponses, retryFetchErrors: settings.general?.retryFetchErrors, diff --git a/packages/cli/src/config/footerItems.test.ts b/packages/cli/src/config/footerItems.test.ts index 420246811b..d9ef9bc3f2 100644 --- a/packages/cli/src/config/footerItems.test.ts +++ b/packages/cli/src/config/footerItems.test.ts @@ -5,87 +5,153 @@ */ import { describe, it, expect } from 'vitest'; -import { deriveItemsFromLegacySettings } from './footerItems.js'; +import { + deriveItemsFromLegacySettings, + resolveFooterState, +} from './footerItems.js'; import { createMockSettings } from '../test-utils/settings.js'; -describe('deriveItemsFromLegacySettings', () => { - it('returns defaults when no legacy settings are customized', () => { - const settings = createMockSettings({ - ui: { footer: { hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toEqual([ - 'workspace', - 'git-branch', - 'sandbox', - 'model-name', - 'quota', - ]); - }); +describe('footerItems', () => { + describe('deriveItemsFromLegacySettings', () => { + it('returns defaults when no legacy settings are customized', () => { + const settings = createMockSettings({ + ui: { footer: { hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toEqual([ + 'workspace', + 'git-branch', + 'sandbox', + 'model-name', + 'quota', + ]); + }); - it('removes workspace when hideCWD is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideCWD: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('workspace'); - }); + it('removes workspace when hideCWD is true', () => { + const settings = createMockSettings({ + ui: { footer: { hideCWD: true, hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('workspace'); + }); - it('removes sandbox when hideSandboxStatus is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideSandboxStatus: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('sandbox'); - }); - - it('removes model-name, context-used, and quota when hideModelInfo is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideModelInfo: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('model-name'); - expect(items).not.toContain('context-used'); - expect(items).not.toContain('quota'); - }); - - it('includes context-used when hideContextPercentage is false', () => { - const settings = createMockSettings({ - ui: { footer: { hideContextPercentage: false } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toContain('context-used'); - // Should be after model-name - const modelIdx = items.indexOf('model-name'); - const contextIdx = items.indexOf('context-used'); - expect(contextIdx).toBe(modelIdx + 1); - }); - - it('includes memory-usage when showMemoryUsage is true', () => { - const settings = createMockSettings({ - ui: { showMemoryUsage: true, footer: { hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toContain('memory-usage'); - }); - - it('handles combination of settings', () => { - const settings = createMockSettings({ - ui: { - showMemoryUsage: true, - footer: { - hideCWD: true, - hideModelInfo: true, - hideContextPercentage: false, + it('removes sandbox when hideSandboxStatus is true', () => { + const settings = createMockSettings({ + ui: { + footer: { hideSandboxStatus: true, hideContextPercentage: true }, }, - }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toEqual([ - 'git-branch', - 'sandbox', - 'context-used', - 'memory-usage', - ]); + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('sandbox'); + }); + + it('removes model-name, context-used, and quota when hideModelInfo is true', () => { + const settings = createMockSettings({ + ui: { footer: { hideModelInfo: true, hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('model-name'); + expect(items).not.toContain('context-used'); + expect(items).not.toContain('quota'); + }); + + it('includes context-used when hideContextPercentage is false', () => { + const settings = createMockSettings({ + ui: { footer: { hideContextPercentage: false } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toContain('context-used'); + // Should be after model-name + const modelIdx = items.indexOf('model-name'); + const contextIdx = items.indexOf('context-used'); + expect(contextIdx).toBe(modelIdx + 1); + }); + + it('includes memory-usage when showMemoryUsage is true', () => { + const settings = createMockSettings({ + ui: { showMemoryUsage: true, footer: { hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toContain('memory-usage'); + }); + + it('handles combination of settings', () => { + const settings = createMockSettings({ + ui: { + showMemoryUsage: true, + footer: { + hideCWD: true, + hideModelInfo: true, + hideContextPercentage: false, + }, + }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toEqual([ + 'git-branch', + 'sandbox', + 'context-used', + 'memory-usage', + ]); + }); + }); + + describe('resolveFooterState', () => { + it('filters out auth item when showUserIdentity is false', () => { + const settings = createMockSettings({ + ui: { + showUserIdentity: false, + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).not.toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(false); + // It should also not be in the 'others' part of orderedIds + expect(state.orderedIds).toEqual([ + 'workspace', + 'model-name', + 'git-branch', + 'sandbox', + 'context-used', + 'quota', + 'memory-usage', + 'session-id', + 'code-changes', + 'token-count', + ]); + }); + + it('includes auth item when showUserIdentity is true', () => { + const settings = createMockSettings({ + ui: { + showUserIdentity: true, + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(true); + }); + + it('includes auth item by default when showUserIdentity is undefined (defaults to true)', () => { + const settings = createMockSettings({ + ui: { + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(true); + }); }); }); diff --git a/packages/cli/src/config/footerItems.ts b/packages/cli/src/config/footerItems.ts index 8410d0b5ec..9f3943b692 100644 --- a/packages/cli/src/config/footerItems.ts +++ b/packages/cli/src/config/footerItems.ts @@ -47,6 +47,11 @@ export const ALL_ITEMS = [ header: 'session', description: 'Unique identifier for the current session', }, + { + id: 'auth', + header: '/auth', + description: 'Current authentication info', + }, { id: 'code-changes', header: 'diff', @@ -70,6 +75,7 @@ export const DEFAULT_ORDER = [ 'quota', 'memory-usage', 'session-id', + 'auth', 'code-changes', 'token-count', ]; @@ -121,10 +127,19 @@ export function resolveFooterState(settings: MergedSettings): { orderedIds: string[]; selectedIds: Set; } { + const showUserIdentity = settings.ui?.showUserIdentity !== false; + const filteredValidIds = showUserIdentity + ? VALID_IDS + : new Set([...VALID_IDS].filter((id) => id !== 'auth')); + const source = ( settings.ui?.footer?.items ?? deriveItemsFromLegacySettings(settings) - ).filter((id: string) => VALID_IDS.has(id)); - const others = DEFAULT_ORDER.filter((id) => !source.includes(id)); + ).filter((id: string) => filteredValidIds.has(id)); + + const others = DEFAULT_ORDER.filter( + (id) => !source.includes(id) && filteredValidIds.has(id), + ); + return { orderedIds: [...source, ...others], selectedIds: new Set(source), diff --git a/packages/cli/src/config/settings.ts b/packages/cli/src/config/settings.ts index 984bdb8d60..7eec1c61b8 100644 --- a/packages/cli/src/config/settings.ts +++ b/packages/cli/src/config/settings.ts @@ -1124,15 +1124,15 @@ function migrateExperimentalSettings( }; let modified = false; - const migrateExperimental = ( + const migrateExperimental = >( oldKey: string, - migrateFn: (oldValue: Record) => void, + migrateFn: (oldValue: T) => void, ) => { const old = experimentalSettings[oldKey]; - if (old) { + if (old !== undefined) { foundDeprecated?.push(`experimental.${oldKey}`); // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - migrateFn(old as Record); + migrateFn(old as T); modified = true; } }; @@ -1197,6 +1197,24 @@ function migrateExperimentalSettings( agentsOverrides['cli_help'] = override; }); + // Migrate experimental.plan -> general.plan.enabled + migrateExperimental('plan', (planValue) => { + const generalSettings = + (settings.general as Record | undefined) || {}; + const newGeneral = { ...generalSettings }; + const planSettings = + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + (newGeneral['plan'] as Record | undefined) || {}; + const newPlan = { ...planSettings }; + + if (newPlan['enabled'] === undefined) { + newPlan['enabled'] = planValue; + newGeneral['plan'] = newPlan; + loadedSettings.setValue(scope, 'general', newGeneral); + modified = true; + } + }); + if (modified) { agentsSettings['overrides'] = agentsOverrides; loadedSettings.setValue(scope, 'agents', agentsSettings); @@ -1205,6 +1223,7 @@ function migrateExperimentalSettings( const newExperimental = { ...experimentalSettings }; delete newExperimental['codebaseInvestigatorSettings']; delete newExperimental['cliHelpAgentSettings']; + delete newExperimental['plan']; loadedSettings.setValue(scope, 'experimental', newExperimental); } return true; diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index c358cd65aa..8bda41d55b 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -418,14 +418,17 @@ describe('SettingsSchema', () => { }); it('should have plan setting in schema', () => { - const setting = getSettingsSchema().experimental.properties.plan; + const setting = + getSettingsSchema().general.properties.plan.properties.enabled; expect(setting).toBeDefined(); expect(setting.type).toBe('boolean'); - expect(setting.category).toBe('Experimental'); + expect(setting.category).toBe('General'); expect(setting.default).toBe(true); expect(setting.requiresRestart).toBe(true); expect(setting.showInDialog).toBe(true); - expect(setting.description).toBe('Enable Plan Mode.'); + expect(setting.description).toBe( + 'Enable Plan Mode for read-only safety during planning.', + ); }); it('should have hooksConfig.notifications setting in schema', () => { @@ -502,6 +505,31 @@ describe('SettingsSchema', () => { 'The model to use for the classifier. Only tested on `gemma3-1b-gpu-custom`.', ); }); + + it('should have adk setting in schema', () => { + const adk = getSettingsSchema().experimental.properties.adk; + expect(adk).toBeDefined(); + expect(adk.type).toBe('object'); + expect(adk.category).toBe('Experimental'); + expect(adk.default).toEqual({}); + expect(adk.requiresRestart).toBe(true); + expect(adk.showInDialog).toBe(false); + expect(adk.description).toBe( + 'Settings for the Agent Development Kit (ADK).', + ); + + const agentSessionNoninteractiveEnabled = + adk.properties.agentSessionNoninteractiveEnabled; + expect(agentSessionNoninteractiveEnabled).toBeDefined(); + expect(agentSessionNoninteractiveEnabled.type).toBe('boolean'); + expect(agentSessionNoninteractiveEnabled.category).toBe('Experimental'); + expect(agentSessionNoninteractiveEnabled.default).toBe(false); + expect(agentSessionNoninteractiveEnabled.requiresRestart).toBe(true); + expect(agentSessionNoninteractiveEnabled.showInDialog).toBe(false); + expect(agentSessionNoninteractiveEnabled.description).toBe( + 'Enable non-interactive agent sessions.', + ); + }); }); it('has JSON schema definitions for every referenced ref', () => { diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index c40e87db18..1578b920ef 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -293,6 +293,16 @@ const SETTINGS_SCHEMA = { description: 'Planning features configuration.', showInDialog: false, properties: { + enabled: { + type: 'boolean', + label: 'Enable Plan Mode', + category: 'General', + requiresRestart: true, + default: true, + description: + 'Enable Plan Mode for read-only safety during planning.', + showInDialog: true, + }, directory: { type: 'string', label: 'Plan Directory', @@ -561,6 +571,16 @@ const SETTINGS_SCHEMA = { description: 'Show the "? for shortcuts" hint above the input.', showInDialog: true, }, + compactToolOutput: { + type: 'boolean', + label: 'Compact Tool Output', + category: 'UI', + requiresRestart: false, + default: false, + description: + 'Display tool outputs (like directory listings and file reads) in a compact, structured format.', + showInDialog: true, + }, hideBanner: { type: 'boolean', label: 'Hide Banner', @@ -1913,54 +1933,22 @@ const SETTINGS_SCHEMA = { description: 'Setting to enable experimental features', showInDialog: false, properties: { - toolOutputMasking: { + adk: { type: 'object', - label: 'Tool Output Masking', + label: 'ADK', category: 'Experimental', requiresRestart: true, - ignoreInDocs: false, default: {}, - description: - 'Advanced settings for tool output masking to manage context window efficiency.', + description: 'Settings for the Agent Development Kit (ADK).', showInDialog: false, properties: { - enabled: { + agentSessionNoninteractiveEnabled: { type: 'boolean', - label: 'Enable Tool Output Masking', + label: 'Agent Session Non-interactive Enabled', category: 'Experimental', requiresRestart: true, - default: true, - description: 'Enables tool output masking to save tokens.', - showInDialog: true, - }, - toolProtectionThreshold: { - type: 'number', - label: 'Tool Protection Threshold', - category: 'Experimental', - requiresRestart: true, - default: 50000, - description: - 'Minimum number of tokens to protect from masking (most recent tool outputs).', - showInDialog: false, - }, - minPrunableTokensThreshold: { - type: 'number', - label: 'Min Prunable Tokens Threshold', - category: 'Experimental', - requiresRestart: true, - default: 30000, - description: - 'Minimum prunable tokens required to trigger a masking pass.', - showInDialog: false, - }, - protectLatestTurn: { - type: 'boolean', - label: 'Protect Latest Turn', - category: 'Experimental', - requiresRestart: true, - default: true, - description: - 'Ensures the absolute latest turn is never masked, regardless of token count.', + default: false, + description: 'Enable non-interactive agent sessions.', showInDialog: false, }, }, @@ -2036,7 +2024,7 @@ const SETTINGS_SCHEMA = { label: 'JIT Context Loading', category: 'Experimental', requiresRestart: true, - default: true, + default: false, description: 'Enable Just-In-Time (JIT) context loading.', showInDialog: false, }, @@ -2060,15 +2048,6 @@ const SETTINGS_SCHEMA = { 'Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it).', showInDialog: true, }, - plan: { - type: 'boolean', - label: 'Plan', - category: 'Experimental', - requiresRestart: true, - default: true, - description: 'Enable Plan Mode.', - showInDialog: true, - }, taskTracker: { type: 'boolean', label: 'Task Tracker', @@ -2169,44 +2148,13 @@ const SETTINGS_SCHEMA = { 'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.', showInDialog: true, }, - agentHistoryTruncation: { + contextManagement: { type: 'boolean', - label: 'Agent History Truncation', + label: 'Enable Context Management', category: 'Experimental', requiresRestart: true, default: false, - description: - 'Enable truncation window logic for the Agent History Provider.', - showInDialog: true, - }, - agentHistoryTruncationThreshold: { - type: 'number', - label: 'Agent History Truncation Threshold', - category: 'Experimental', - requiresRestart: true, - default: 30, - description: - 'The maximum number of messages before history is truncated.', - showInDialog: true, - }, - agentHistoryRetainedMessages: { - type: 'number', - label: 'Agent History Retained Messages', - category: 'Experimental', - requiresRestart: true, - default: 15, - description: - 'The number of recent messages to retain after truncation.', - showInDialog: true, - }, - agentHistorySummarization: { - type: 'boolean', - label: 'Agent History Summarization', - category: 'Experimental', - requiresRestart: true, - default: false, - description: - 'Enable summarization of truncated content via a small model for the Agent History Provider.', + description: 'Enable logic for context management.', showInDialog: true, }, topicUpdateNarration: { @@ -2485,6 +2433,171 @@ const SETTINGS_SCHEMA = { }, }, + contextManagement: { + type: 'object', + label: 'Context Management', + category: 'Experimental', + requiresRestart: true, + default: {}, + description: + 'Settings for agent history and tool distillation context management.', + showInDialog: false, + properties: { + historyWindow: { + type: 'object', + label: 'History Window Settings', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + maxTokens: { + type: 'number', + label: 'Max Tokens', + category: 'Context Management', + requiresRestart: true, + default: 150_000, + description: + 'The number of tokens to allow before triggering compression.', + showInDialog: false, + }, + retainedTokens: { + type: 'number', + label: 'Retained Tokens', + category: 'Context Management', + requiresRestart: true, + default: 40_000, + description: 'The number of tokens to always retain.', + showInDialog: false, + }, + }, + }, + messageLimits: { + type: 'object', + label: 'Message Limits', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + normalMaxTokens: { + type: 'number', + label: 'Normal Maximum Tokens', + category: 'Context Management', + requiresRestart: true, + default: 2500, + description: + 'The target number of tokens to budget for a normal conversation turn.', + showInDialog: false, + }, + retainedMaxTokens: { + type: 'number', + label: 'Retained Maximum Tokens', + category: 'Context Management', + requiresRestart: true, + default: 12000, + description: + 'The maximum number of tokens a single conversation turn can consume before truncation.', + showInDialog: false, + }, + normalizationHeadRatio: { + type: 'number', + label: 'Normalization Head Ratio', + category: 'Context Management', + requiresRestart: true, + default: 0.25, + description: + 'The ratio of tokens to retain from the beginning of a truncated message (0.0 to 1.0).', + showInDialog: false, + }, + }, + }, + tools: { + type: 'object', + label: 'Context Management Tools', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + distillation: { + type: 'object', + label: 'Tool Distillation', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + maxOutputTokens: { + type: 'number', + label: 'Max Output Tokens', + category: 'Context Management', + requiresRestart: true, + default: 10_000, + description: + 'Maximum tokens to show to the model when truncating large tool outputs.', + showInDialog: false, + }, + summarizationThresholdTokens: { + type: 'number', + label: 'Tool Summarization Threshold', + category: 'Context Management', + requiresRestart: true, + default: 20_000, + description: + 'Threshold above which truncated tool outputs will be summarized by an LLM.', + showInDialog: false, + }, + }, + }, + outputMasking: { + type: 'object', + label: 'Tool Output Masking', + category: 'Context Management', + requiresRestart: true, + ignoreInDocs: false, + default: {}, + description: + 'Advanced settings for tool output masking to manage context window efficiency.', + showInDialog: false, + properties: { + protectionThresholdTokens: { + type: 'number', + label: 'Tool Protection Threshold (Tokens)', + category: 'Context Management', + requiresRestart: true, + default: 50_000, + description: + 'Minimum number of tokens to protect from masking (most recent tool outputs).', + showInDialog: false, + }, + minPrunableThresholdTokens: { + type: 'number', + label: 'Min Prunable Tokens Threshold', + category: 'Context Management', + requiresRestart: true, + default: 30_000, + description: + 'Minimum prunable tokens required to trigger a masking pass.', + showInDialog: false, + }, + protectLatestTurn: { + type: 'boolean', + label: 'Protect Latest Turn', + category: 'Context Management', + requiresRestart: true, + default: true, + description: + 'Ensures the absolute latest turn is never masked, regardless of token count.', + showInDialog: false, + }, + }, + }, + }, + }, + }, + }, + admin: { type: 'object', label: 'Admin', diff --git a/packages/cli/src/interactiveCli.tsx b/packages/cli/src/interactiveCli.tsx index a6337ef29c..2e0cd25619 100644 --- a/packages/cli/src/interactiveCli.tsx +++ b/packages/cli/src/interactiveCli.tsx @@ -46,6 +46,7 @@ import { TerminalProvider } from './ui/contexts/TerminalContext.js'; import { isAlternateBufferEnabled } from './ui/hooks/useAlternateBuffer.js'; import { OverflowProvider } from './ui/contexts/OverflowContext.js'; import { profiler } from './ui/components/DebugProfiler.js'; +import { initializeConsoleStore } from './ui/hooks/useConsoleMessages.js'; const SLOW_RENDER_MS = 200; @@ -57,6 +58,7 @@ export async function startInteractiveUI( resumedSessionData: ResumedSessionData | undefined, initializationResult: InitializationResult, ) { + initializeConsoleStore(); // Never enter Ink alternate buffer mode when screen reader mode is enabled // as there is no benefit of alternate buffer mode when using a screen reader // and the Ink alternate buffer mode requires line wrapping harmful to diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 0b3a61e416..fb803d4272 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -172,7 +172,7 @@ describe('runNonInteractive', () => { }; mockConfig = { - initialize: vi.fn().mockResolvedValue(undefined), + initialize: vi.fn().mockReturnValue(Promise.resolve(undefined)), getMessageBus: vi.fn().mockReturnValue({ subscribe: vi.fn(), unsubscribe: vi.fn(), @@ -196,6 +196,7 @@ describe('runNonInteractive', () => { isTrustedFolder: vi.fn().mockReturnValue(false), getRawOutput: vi.fn().mockReturnValue(false), getAcceptRawOutputRisk: vi.fn().mockReturnValue(false), + getAgentSessionNoninteractiveEnabled: vi.fn().mockReturnValue(false), } as unknown as Config; mockSettings = { diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index c7e5d1c828..ad26f9eb5f 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -53,6 +53,7 @@ import { handleMaxTurnsExceededError, } from './utils/errors.js'; import { TextOutput } from './ui/utils/textOutput.js'; +import { runNonInteractive as runNonInteractiveAgentSession } from './nonInteractiveCliAgentSession.js'; interface RunNonInteractiveParams { config: Config; @@ -62,13 +63,16 @@ interface RunNonInteractiveParams { resumedSessionData?: ResumedSessionData; } -export async function runNonInteractive({ - config, - settings, - input, - prompt_id, - resumedSessionData, -}: RunNonInteractiveParams): Promise { +export async function runNonInteractive( + params: RunNonInteractiveParams, +): Promise { + const useAgentSession = params.config.getAgentSessionNoninteractiveEnabled(); + if (useAgentSession) { + return runNonInteractiveAgentSession(params); + } + + const { config, settings, input, prompt_id, resumedSessionData } = params; + return promptIdContext.run(prompt_id, async () => { const consolePatcher = new ConsolePatcher({ stderr: true, diff --git a/packages/cli/src/nonInteractiveCliAgentSession.test.ts b/packages/cli/src/nonInteractiveCliAgentSession.test.ts new file mode 100644 index 0000000000..1059223b60 --- /dev/null +++ b/packages/cli/src/nonInteractiveCliAgentSession.test.ts @@ -0,0 +1,2436 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + Config, + ToolRegistry, + ServerGeminiStreamEvent, + SessionMetrics, + AnyDeclarativeTool, + AnyToolInvocation, + UserFeedbackPayload, +} from '@google/gemini-cli-core'; +import { + ToolErrorType, + GeminiEventType, + OutputFormat, + uiTelemetryService, + FatalInputError, + CoreEvent, + CoreToolCallStatus, +} from '@google/gemini-cli-core'; +import type { Part } from '@google/genai'; +import { runNonInteractive } from './nonInteractiveCliAgentSession.js'; +import { + describe, + it, + expect, + beforeEach, + afterEach, + vi, + type Mock, + type MockInstance, +} from 'vitest'; +import type { LoadedSettings } from './config/settings.js'; + +// Mock core modules +vi.mock('./ui/hooks/atCommandProcessor.js'); + +const mockSetupInitialActivityLogger = vi.hoisted(() => vi.fn()); +vi.mock('./utils/devtoolsService.js', () => ({ + setupInitialActivityLogger: mockSetupInitialActivityLogger, +})); + +const mockCoreEvents = vi.hoisted(() => ({ + on: vi.fn(), + off: vi.fn(), + emit: vi.fn(), + emitConsoleLog: vi.fn(), + emitFeedback: vi.fn(), + drainBacklogs: vi.fn(), +})); + +const mockSchedulerSchedule = vi.hoisted(() => vi.fn()); + +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const original = + await importOriginal(); + + class MockChatRecordingService { + initialize = vi.fn(); + recordMessage = vi.fn(); + recordMessageTokens = vi.fn(); + recordToolCalls = vi.fn(); + } + + return { + ...original, + Scheduler: class { + schedule = mockSchedulerSchedule; + cancelAll = vi.fn(); + }, + isTelemetrySdkInitialized: vi.fn().mockReturnValue(true), + ChatRecordingService: MockChatRecordingService, + uiTelemetryService: { + getMetrics: vi.fn(), + }, + LegacyAgentSession: original.LegacyAgentSession, + geminiPartsToContentParts: original.geminiPartsToContentParts, + coreEvents: mockCoreEvents, + createWorkingStdio: vi.fn(() => ({ + stdout: process.stdout, + stderr: process.stderr, + })), + }; +}); + +const mockGetCommands = vi.hoisted(() => vi.fn()); +const mockCommandServiceCreate = vi.hoisted(() => vi.fn()); +vi.mock('./services/CommandService.js', () => ({ + CommandService: { + create: mockCommandServiceCreate, + }, +})); + +vi.mock('./services/FileCommandLoader.js'); +vi.mock('./services/McpPromptLoader.js'); +vi.mock('./services/BuiltinCommandLoader.js'); + +describe('runNonInteractive', () => { + let mockConfig: Config; + let mockSettings: LoadedSettings; + let mockToolRegistry: ToolRegistry; + let consoleErrorSpy: MockInstance; + let processStdoutSpy: MockInstance; + let processStderrSpy: MockInstance; + let mockGeminiClient: { + sendMessageStream: Mock; + resumeChat: Mock; + getChatRecordingService: Mock; + getChat: Mock; + getCurrentSequenceModel: Mock; + }; + const MOCK_SESSION_METRICS: SessionMetrics = { + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { + accept: 0, + reject: 0, + modify: 0, + auto_accept: 0, + }, + byName: {}, + }, + files: { + totalLinesAdded: 0, + totalLinesRemoved: 0, + }, + }; + + beforeEach(async () => { + mockSchedulerSchedule.mockReset(); + + mockCommandServiceCreate.mockResolvedValue({ + getCommands: mockGetCommands, + }); + + consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + processStdoutSpy = vi + .spyOn(process.stdout, 'write') + .mockImplementation(() => true); + vi.spyOn(process.stdout, 'on').mockImplementation(() => process.stdout); + processStderrSpy = vi + .spyOn(process.stderr, 'write') + .mockImplementation(() => true); + vi.spyOn(process, 'exit').mockImplementation((code) => { + throw new Error(`process.exit(${code}) called`); + }); + + mockToolRegistry = { + getTool: vi.fn(), + getFunctionDeclarations: vi.fn().mockReturnValue([]), + } as unknown as ToolRegistry; + + mockGeminiClient = { + sendMessageStream: vi.fn(), + resumeChat: vi.fn().mockResolvedValue(undefined), + getChatRecordingService: vi.fn(() => ({ + initialize: vi.fn(), + recordMessage: vi.fn(), + recordMessageTokens: vi.fn(), + recordToolCalls: vi.fn(), + })), + getChat: vi.fn(() => ({ recordCompletedToolCalls: vi.fn() })), + getCurrentSequenceModel: vi.fn().mockReturnValue(null), + }; + + mockConfig = { + initialize: vi.fn().mockReturnValue(Promise.resolve(undefined)), + getMessageBus: vi.fn().mockReturnValue({ + subscribe: vi.fn(), + unsubscribe: vi.fn(), + publish: vi.fn(), + }), + getGeminiClient: vi.fn().mockReturnValue(mockGeminiClient), + getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry), + getMaxSessionTurns: vi.fn().mockReturnValue(10), + getSessionId: vi.fn().mockReturnValue('test-session-id'), + getProjectRoot: vi.fn().mockReturnValue('/test/project'), + storage: { + getProjectTempDir: vi.fn().mockReturnValue('/test/project/.gemini/tmp'), + }, + getIdeMode: vi.fn().mockReturnValue(false), + + getContentGeneratorConfig: vi.fn().mockReturnValue({}), + getDebugMode: vi.fn().mockReturnValue(false), + getOutputFormat: vi.fn().mockReturnValue('text'), + getModel: vi.fn().mockReturnValue('test-model'), + getFolderTrust: vi.fn().mockReturnValue(false), + isTrustedFolder: vi.fn().mockReturnValue(false), + getRawOutput: vi.fn().mockReturnValue(false), + getAcceptRawOutputRisk: vi.fn().mockReturnValue(false), + getAgentSessionNoninteractiveEnabled: vi.fn().mockReturnValue(false), + } as unknown as Config; + + mockSettings = { + system: { path: '', settings: {} }, + systemDefaults: { path: '', settings: {} }, + user: { path: '', settings: {} }, + workspace: { path: '', settings: {} }, + errors: [], + setValue: vi.fn(), + merged: { + security: { + auth: { + enforcedType: undefined, + }, + }, + }, + isTrusted: true, + migratedInMemoryScopes: new Set(), + forScope: vi.fn(), + computeMergedSettings: vi.fn(), + } as unknown as LoadedSettings; + + const { handleAtCommand } = await import( + './ui/hooks/atCommandProcessor.js' + ); + vi.mocked(handleAtCommand).mockImplementation(async ({ query }) => ({ + processedQuery: [{ text: query }], + })); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + async function* createStreamFromEvents( + events: ServerGeminiStreamEvent[], + ): AsyncGenerator { + for (const event of events) { + yield event; + } + } + + const getWrittenOutput = () => + processStdoutSpy.mock.calls.map((c) => c[0]).join(''); + + it('should process input and write text output', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello' }, + { type: GeminiEventType.Content, value: ' World' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-1', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Test input' }], + expect.any(AbortSignal), + 'prompt-id-1', + undefined, + false, + 'Test input', + ); + expect(getWrittenOutput()).toBe('Hello World\n'); + // Note: Telemetry shutdown is now handled in runExitCleanup() in cleanup.ts + // so we no longer expect shutdownTelemetry to be called directly here + }); + + it('should stream the specific stream started by send', async () => { + const { LegacyAgentSession } = await import('@google/gemini-cli-core'); + const streamSpy = vi.spyOn(LegacyAgentSession.prototype, 'stream'); + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello again' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-stream', + }); + + expect(streamSpy).toHaveBeenCalledWith({ streamId: expect.any(String) }); + }); + + it('fails fast if the session acknowledges a message send without a stream', async () => { + const { LegacyAgentSession } = await import('@google/gemini-cli-core'); + const sendSpy = vi + .spyOn(LegacyAgentSession.prototype, 'send') + .mockResolvedValue({ streamId: null }); + const streamSpy = vi.spyOn(LegacyAgentSession.prototype, 'stream'); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-null-stream', + }), + ).rejects.toThrow( + 'LegacyAgentSession.send() unexpectedly returned no stream for a message send.', + ); + + expect(streamSpy).not.toHaveBeenCalled(); + + sendSpy.mockRestore(); + streamSpy.mockRestore(); + }); + + it('should register activity logger when GEMINI_CLI_ACTIVITY_LOG_TARGET is set', async () => { + vi.stubEnv('GEMINI_CLI_ACTIVITY_LOG_TARGET', '/tmp/test.jsonl'); + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-activity-logger', + }); + + expect(mockSetupInitialActivityLogger).toHaveBeenCalledWith(mockConfig); + vi.unstubAllEnvs(); + }); + + it('should not register activity logger when GEMINI_CLI_ACTIVITY_LOG_TARGET is not set', async () => { + vi.stubEnv('GEMINI_CLI_ACTIVITY_LOG_TARGET', ''); + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-activity-logger-off', + }); + + expect(mockSetupInitialActivityLogger).not.toHaveBeenCalled(); + vi.unstubAllEnvs(); + }); + + it('should handle a single tool call and respond', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-2', + }, + }; + const toolResponse: Part[] = [{ text: 'Tool response' }]; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-2', + }, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: toolResponse, + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [toolCallEvent]; + const secondCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondCallEvents)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Use a tool', + prompt_id: 'prompt-id-2', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2); + expect(mockSchedulerSchedule).toHaveBeenCalledWith( + [expect.objectContaining({ name: 'testTool' })], + expect.any(AbortSignal), + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith( + 2, + [{ text: 'Tool response' }], + expect.any(AbortSignal), + 'prompt-id-2', + undefined, + false, + undefined, + ); + expect(getWrittenOutput()).toBe('Final answer\n'); + }); + + it('should write a single newline between sequential text outputs from the model', async () => { + // This test simulates a multi-turn conversation to ensure that a single newline + // is printed between each block of text output from the model. + + // 1. Define the tool requests that the model will ask the CLI to run. + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'mock-tool', + name: 'mockTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-multi', + }, + }; + + // 2. Mock the execution of the tools. We just need them to succeed. + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, // This is generic enough for both calls + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [], + callId: 'mock-tool', + }, + }, + ]); + + // 3. Define the sequence of events streamed from the mock model. + // Turn 1: Model outputs text, then requests a tool call. + const modelTurn1: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Use mock tool' }, + toolCallEvent, + ]; + // Turn 2: Model outputs more text, then requests another tool call. + const modelTurn2: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Use mock tool again' }, + toolCallEvent, + ]; + // Turn 3: Model outputs a final answer. + const modelTurn3: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Finished.' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(modelTurn1)) + .mockReturnValueOnce(createStreamFromEvents(modelTurn2)) + .mockReturnValueOnce(createStreamFromEvents(modelTurn3)); + + // 4. Run the command. + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Use mock tool multiple times', + prompt_id: 'prompt-id-multi', + }); + + // 5. Verify the output. + // The rendered output should contain the text from each turn, separated by a + // single newline, with a final newline at the end. + expect(getWrittenOutput()).toMatchSnapshot(); + + // Also verify the tools were called as expected. + expect(mockSchedulerSchedule).toHaveBeenCalledTimes(2); + }); + + it('should handle error during tool execution and should send error back to the model', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'errorTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-3', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: { + callId: 'tool-1', + name: 'errorTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-3', + }, + tool: {} as AnyDeclarativeTool, + response: { + callId: 'tool-1', + error: new Error('Execution failed'), + errorType: ToolErrorType.EXECUTION_FAILED, + responseParts: [ + { + functionResponse: { + name: 'errorTool', + response: { + output: 'Error: Execution failed', + }, + }, + }, + ], + resultDisplay: 'Execution failed', + contentLength: undefined, + }, + }, + ]); + const finalResponse: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Content, + value: 'Sorry, let me try again.', + }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents([toolCallEvent])) + .mockReturnValueOnce(createStreamFromEvents(finalResponse)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Trigger tool error', + prompt_id: 'prompt-id-3', + }); + + expect(mockSchedulerSchedule).toHaveBeenCalled(); + expect(consoleErrorSpy).toHaveBeenCalledWith( + 'Error executing tool errorTool: Execution failed', + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2); + expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith( + 2, + [ + { + functionResponse: { + name: 'errorTool', + response: { + output: 'Error: Execution failed', + }, + }, + }, + ], + expect.any(AbortSignal), + 'prompt-id-3', + undefined, + false, + undefined, + ); + expect(getWrittenOutput()).toBe('Sorry, let me try again.\n'); + }); + + it('should exit with error if sendMessageStream throws initially', async () => { + const apiError = new Error('API connection failed'); + mockGeminiClient.sendMessageStream.mockImplementation(() => { + throw apiError; + }); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Initial fail', + prompt_id: 'prompt-id-4', + }), + ).rejects.toThrow('API connection failed'); + }); + + it('should not exit if a tool is not found, and should send error back to model', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'nonexistentTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-5', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: { + callId: 'tool-1', + name: 'nonexistentTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-5', + }, + response: { + callId: 'tool-1', + error: new Error('Tool "nonexistentTool" not found in registry.'), + resultDisplay: 'Tool "nonexistentTool" not found in registry.', + responseParts: [], + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + const finalResponse: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Content, + value: "Sorry, I can't find that tool.", + }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents([toolCallEvent])) + .mockReturnValueOnce(createStreamFromEvents(finalResponse)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Trigger tool not found', + prompt_id: 'prompt-id-5', + }); + + expect(mockSchedulerSchedule).toHaveBeenCalled(); + expect(consoleErrorSpy).toHaveBeenCalledWith( + 'Error executing tool nonexistentTool: Tool "nonexistentTool" not found in registry.', + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2); + expect(getWrittenOutput()).toBe("Sorry, I can't find that tool.\n"); + }); + + it('should exit when max session turns are exceeded', async () => { + vi.mocked(mockConfig.getMaxSessionTurns).mockReturnValue(0); + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Trigger loop', + prompt_id: 'prompt-id-6', + }), + ).rejects.toThrow('Reached max session turns for this session'); + }); + + it('should preprocess @include commands before sending to the model', async () => { + // 1. Mock the imported atCommandProcessor + const { handleAtCommand } = await import( + './ui/hooks/atCommandProcessor.js' + ); + const mockHandleAtCommand = vi.mocked(handleAtCommand); + + // 2. Define the raw input and the expected processed output + const rawInput = 'Summarize @file.txt'; + const processedParts: Part[] = [ + { text: 'Summarize @file.txt' }, + { text: '\n--- Content from referenced files ---\n' }, + { text: 'This is the content of the file.' }, + { text: '\n--- End of content ---' }, + ]; + + // 3. Setup the mock to return the processed parts + mockHandleAtCommand.mockResolvedValue({ + processedQuery: processedParts, + }); + + // Mock a simple stream response from the Gemini client + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Summary complete.' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + // 4. Run the non-interactive mode with the raw input + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: rawInput, + prompt_id: 'prompt-id-7', + }); + + // 5. Assert that sendMessageStream was called with the PROCESSED parts, not the raw input + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + processedParts, + expect.any(AbortSignal), + 'prompt-id-7', + undefined, + false, + rawInput, + ); + + // 6. Assert the final output is correct + expect(getWrittenOutput()).toBe('Summary complete.\n'); + }); + + it('should process input and write JSON output with stats', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello World' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-1', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Test input' }], + expect.any(AbortSignal), + 'prompt-id-1', + undefined, + false, + 'Test input', + ); + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: 'Hello World', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should write JSON output with stats for tool-only commands (no text response)', async () => { + // Test the scenario where a command completes successfully with only tool calls + // but no text response - this would have caught the original bug + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-tool-only', + }, + }; + const toolResponse: Part[] = [{ text: 'Tool executed successfully' }]; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-tool-only', + }, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: toolResponse, + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + // First call returns only tool call, no content + const firstCallEvents: ServerGeminiStreamEvent[] = [ + toolCallEvent, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + + // Second call returns no content (tool-only completion) + const secondCallEvents: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 3 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondCallEvents)); + + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Execute tool only', + prompt_id: 'prompt-id-tool-only', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2); + expect(mockSchedulerSchedule).toHaveBeenCalledWith( + [expect.objectContaining({ name: 'testTool' })], + expect.any(AbortSignal), + ); + + // This should output JSON with empty response but include stats + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: '', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should keep only the final post-tool assistant text in JSON output', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-json-tool-text', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [{ text: 'Tool executed successfully' }], + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Let me check that...' }, + toolCallEvent, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]), + ) + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 3 } }, + }, + ]), + ); + + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Use a tool', + prompt_id: 'prompt-id-json-tool-text', + }); + + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: 'Final answer', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should write JSON output with stats for empty response commands', async () => { + // Test the scenario where a command completes but produces no content at all + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 1 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Empty response test', + prompt_id: 'prompt-id-empty', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Empty response test' }], + expect.any(AbortSignal), + 'prompt-id-empty', + undefined, + false, + 'Empty response test', + ); + + // This should output JSON with empty response but include stats + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: '', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should handle errors in JSON format', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + const testError = new Error('Invalid input provided'); + + mockGeminiClient.sendMessageStream.mockImplementation(() => { + throw testError; + }); + + let thrownError: Error | null = null; + try { + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-error', + }); + // Should not reach here + expect.fail('Expected process.exit to be called'); + } catch (error) { + thrownError = error as Error; + } + + // Should throw because of mocked process.exit + expect(thrownError?.message).toBe('process.exit(1) called'); + + expect(mockCoreEvents.emitFeedback).toHaveBeenCalledWith( + 'error', + JSON.stringify( + { + session_id: 'test-session-id', + error: { + type: 'Error', + message: 'Invalid input provided', + code: 1, + }, + }, + null, + 2, + ), + ); + }); + + it('should handle FatalInputError with custom exit code in JSON format', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + const fatalError = new FatalInputError('Invalid command syntax provided'); + + mockGeminiClient.sendMessageStream.mockImplementation(() => { + throw fatalError; + }); + + let thrownError: Error | null = null; + try { + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Invalid syntax', + prompt_id: 'prompt-id-fatal', + }); + // Should not reach here + expect.fail('Expected process.exit to be called'); + } catch (error) { + thrownError = error as Error; + } + + // Should throw because of mocked process.exit with custom exit code + expect(thrownError?.message).toBe('process.exit(42) called'); + + expect(mockCoreEvents.emitFeedback).toHaveBeenCalledWith( + 'error', + JSON.stringify( + { + session_id: 'test-session-id', + error: { + type: 'FatalInputError', + message: 'Invalid command syntax provided', + code: 42, + }, + }, + null, + 2, + ), + ); + }); + + it('should execute a slash command that returns a prompt', async () => { + const mockCommand = { + name: 'testcommand', + description: 'a test command', + action: vi.fn().mockResolvedValue({ + type: 'submit_prompt', + content: [{ text: 'Prompt from command' }], + }), + }; + mockGetCommands.mockReturnValue([mockCommand]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Response from command' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/testcommand', + prompt_id: 'prompt-id-slash', + }); + + // Ensure the prompt sent to the model is from the command, not the raw input + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Prompt from command' }], + expect.any(AbortSignal), + 'prompt-id-slash', + undefined, + false, + '/testcommand', + ); + + expect(getWrittenOutput()).toBe('Response from command\n'); + }); + + it('should handle slash commands', async () => { + const nonInteractiveCliCommands = await import( + './nonInteractiveCliCommands.js' + ); + const handleSlashCommandSpy = vi.spyOn( + nonInteractiveCliCommands, + 'handleSlashCommand', + ); + handleSlashCommandSpy.mockResolvedValue([{ text: 'Slash command output' }]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Response to slash command' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/help', + prompt_id: 'prompt-id-slash', + }); + + expect(handleSlashCommandSpy).toHaveBeenCalledWith( + '/help', + expect.any(AbortController), + mockConfig, + mockSettings, + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Slash command output' }], + expect.any(AbortSignal), + 'prompt-id-slash', + undefined, + false, + '/help', + ); + expect(getWrittenOutput()).toBe('Response to slash command\n'); + handleSlashCommandSpy.mockRestore(); + }); + + it('should handle cancellation (Ctrl+C)', async () => { + // Mock isTTY and setRawMode safely + const originalIsTTY = process.stdin.isTTY; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const originalSetRawMode = (process.stdin as any).setRawMode; + + Object.defineProperty(process.stdin, 'isTTY', { + value: true, + configurable: true, + }); + if (!originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = vi.fn(); + } + + const stdinOnSpy = vi + .spyOn(process.stdin, 'on') + .mockImplementation(() => process.stdin); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + vi.spyOn(process.stdin as any, 'setRawMode').mockImplementation(() => true); + vi.spyOn(process.stdin, 'resume').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'pause').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'removeAllListeners').mockImplementation( + () => process.stdin, + ); + + // Cancellation will throw FatalCancellationError directly + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Thinking...' }, + ]; + // Create a stream that responds to abortion + mockGeminiClient.sendMessageStream.mockImplementation( + (_messages, signal: AbortSignal) => + (async function* () { + yield events[0]; + await new Promise((resolve, reject) => { + const timeout = setTimeout(resolve, 1000); + signal.addEventListener('abort', () => { + clearTimeout(timeout); + setTimeout(() => { + reject(new Error('Aborted')); + }, 300); + }); + }); + })(), + ); + + const runPromise = runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Long running query', + prompt_id: 'prompt-id-cancel', + }); + + // Wait a bit for setup to complete and listeners to be registered + await new Promise((resolve) => setTimeout(resolve, 100)); + + // Find the keypress handler registered by runNonInteractive + const keypressCall = stdinOnSpy.mock.calls.find( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (call) => (call[0] as any) === 'keypress', + ); + expect(keypressCall).toBeDefined(); + const keypressHandler = keypressCall?.[1] as ( + str: string, + key: { name?: string; ctrl?: boolean }, + ) => void; + + if (keypressHandler) { + // Simulate Ctrl+C + keypressHandler('\u0003', { ctrl: true, name: 'c' }); + } + + await expect(runPromise).rejects.toThrow('Operation cancelled.'); + + expect( + processStderrSpy.mock.calls.some( + // eslint-disable-next-line no-restricted-syntax + (call) => typeof call[0] === 'string' && call[0].includes('Cancelling'), + ), + ).toBe(true); + + // Restore original values + Object.defineProperty(process.stdin, 'isTTY', { + value: originalIsTTY, + configurable: true, + }); + if (originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = originalSetRawMode; + } else { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + delete (process.stdin as any).setRawMode; + } + // Spies are automatically restored by vi.restoreAllMocks() in afterEach, + // but we can also do it manually if needed. + }); + + it('should honor cancellation that happens before session.send()', async () => { + const originalIsTTY = process.stdin.isTTY; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const originalSetRawMode = (process.stdin as any).setRawMode; + + Object.defineProperty(process.stdin, 'isTTY', { + value: true, + configurable: true, + }); + if (!originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = vi.fn(); + } + + const stdinOnSpy = vi + .spyOn(process.stdin, 'on') + .mockImplementation( + (event: string | symbol, listener: (...args: unknown[]) => void) => { + if (event === 'keypress') { + listener('\u0003', { ctrl: true, name: 'c' }); + } + return process.stdin; + }, + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + vi.spyOn(process.stdin as any, 'setRawMode').mockImplementation(() => true); + vi.spyOn(process.stdin, 'resume').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'pause').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'removeAllListeners').mockImplementation( + () => process.stdin, + ); + + // Cancellation will throw FatalCancellationError directly + + const { LegacyAgentSession } = await import('@google/gemini-cli-core'); + const sendSpy = vi.spyOn(LegacyAgentSession.prototype, 'send'); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Cancelled query', + prompt_id: 'prompt-id-pre-send-cancel', + }), + ).rejects.toThrow('Operation cancelled.'); + + expect(sendSpy).not.toHaveBeenCalled(); + expect(stdinOnSpy).toHaveBeenCalled(); + sendSpy.mockRestore(); + + Object.defineProperty(process.stdin, 'isTTY', { + value: originalIsTTY, + configurable: true, + }); + if (originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = originalSetRawMode; + } else { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + delete (process.stdin as any).setRawMode; + } + }); + + it('should throw FatalInputError if a command requires confirmation', async () => { + const mockCommand = { + name: 'confirm', + description: 'a command that needs confirmation', + action: vi.fn().mockResolvedValue({ + type: 'confirm_shell_commands', + commands: ['rm -rf /'], + }), + }; + mockGetCommands.mockReturnValue([mockCommand]); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/confirm', + prompt_id: 'prompt-id-confirm', + }), + ).rejects.toThrow( + 'Exiting due to a confirmation prompt requested by the command.', + ); + }); + + it('should treat an unknown slash command as a regular prompt', async () => { + // No commands are mocked, so any slash command is "unknown" + mockGetCommands.mockReturnValue([]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Response to unknown' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/unknowncommand', + prompt_id: 'prompt-id-unknown', + }); + + // Ensure the raw input is sent to the model + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: '/unknowncommand' }], + expect.any(AbortSignal), + 'prompt-id-unknown', + undefined, + false, + '/unknowncommand', + ); + + expect(getWrittenOutput()).toBe('Response to unknown\n'); + }); + + it('should throw for unhandled command result types', async () => { + const mockCommand = { + name: 'noaction', + description: 'unhandled type', + action: vi.fn().mockResolvedValue({ + type: 'unhandled', + }), + }; + mockGetCommands.mockReturnValue([mockCommand]); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/noaction', + prompt_id: 'prompt-id-unhandled', + }), + ).rejects.toThrow( + 'Exiting due to command result that is not supported in non-interactive mode.', + ); + }); + + it('should pass arguments to the slash command action', async () => { + const mockAction = vi.fn().mockResolvedValue({ + type: 'submit_prompt', + content: [{ text: 'Prompt from command' }], + }); + const mockCommand = { + name: 'testargs', + description: 'a test command', + action: mockAction, + }; + mockGetCommands.mockReturnValue([mockCommand]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Acknowledged' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 1 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/testargs arg1 arg2', + prompt_id: 'prompt-id-args', + }); + + expect(mockAction).toHaveBeenCalledWith(expect.any(Object), 'arg1 arg2'); + + expect(getWrittenOutput()).toBe('Acknowledged\n'); + }); + + it('should instantiate CommandService with correct loaders for slash commands', async () => { + // This test indirectly checks that handleSlashCommand is using the right loaders. + const { FileCommandLoader } = await import( + './services/FileCommandLoader.js' + ); + const { McpPromptLoader } = await import('./services/McpPromptLoader.js'); + const { BuiltinCommandLoader } = await import( + './services/BuiltinCommandLoader.js' + ); + mockGetCommands.mockReturnValue([]); // No commands found, so it will fall through + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Acknowledged' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 1 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/mycommand', + prompt_id: 'prompt-id-loaders', + }); + + // Check that loaders were instantiated with the config + expect(FileCommandLoader).toHaveBeenCalledTimes(1); + expect(FileCommandLoader).toHaveBeenCalledWith(mockConfig); + expect(McpPromptLoader).toHaveBeenCalledTimes(1); + expect(McpPromptLoader).toHaveBeenCalledWith(mockConfig); + expect(BuiltinCommandLoader).toHaveBeenCalledWith(mockConfig); + + // Check that instances were passed to CommandService.create + expect(mockCommandServiceCreate).toHaveBeenCalledTimes(1); + const loadersArg = mockCommandServiceCreate.mock.calls[0][0]; + expect(loadersArg).toHaveLength(3); + expect(loadersArg[0]).toBe( + vi.mocked(BuiltinCommandLoader).mock.instances[0], + ); + expect(loadersArg[1]).toBe(vi.mocked(McpPromptLoader).mock.instances[0]); + expect(loadersArg[2]).toBe(vi.mocked(FileCommandLoader).mock.instances[0]); + }); + + it('should allow a normally-excluded tool when --allowed-tools is set', async () => { + // By default, ShellTool is excluded in non-interactive mode. + // This test ensures that --allowed-tools overrides this exclusion. + vi.mocked(mockConfig.getToolRegistry).mockReturnValue({ + getTool: vi.fn().mockReturnValue({ + name: 'ShellTool', + description: 'A shell tool', + run: vi.fn(), + }), + getFunctionDeclarations: vi.fn().mockReturnValue([{ name: 'ShellTool' }]), + } as unknown as ToolRegistry); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-shell-1', + name: 'ShellTool', + args: { command: 'ls' }, + isClientInitiated: false, + prompt_id: 'prompt-id-allowed', + }, + }; + const toolResponse: Part[] = [{ text: 'file.txt' }]; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: { + callId: 'tool-shell-1', + name: 'ShellTool', + args: { command: 'ls' }, + isClientInitiated: false, + prompt_id: 'prompt-id-allowed', + }, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: toolResponse, + callId: 'tool-shell-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [toolCallEvent]; + const secondCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'file.txt' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondCallEvents)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'List the files', + prompt_id: 'prompt-id-allowed', + }); + + expect(mockSchedulerSchedule).toHaveBeenCalledWith( + [expect.objectContaining({ name: 'ShellTool' })], + expect.any(AbortSignal), + ); + expect(getWrittenOutput()).toBe('file.txt\n'); + }); + + describe('CoreEvents Integration', () => { + it('subscribes to UserFeedback and drains backlog on start', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-events', + }); + + expect(mockCoreEvents.on).toHaveBeenCalledWith( + CoreEvent.UserFeedback, + expect.any(Function), + ); + expect(mockCoreEvents.drainBacklogs).toHaveBeenCalledTimes(1); + }); + + it('unsubscribes from UserFeedback on finish', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-events', + }); + + expect(mockCoreEvents.off).toHaveBeenCalledWith( + CoreEvent.UserFeedback, + expect.any(Function), + ); + }); + + it('logs to process.stderr when UserFeedback event is received', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-events', + }); + + // Get the registered handler + const handler = mockCoreEvents.on.mock.calls.find( + (call: unknown[]) => call[0] === CoreEvent.UserFeedback, + )?.[1]; + expect(handler).toBeDefined(); + + // Simulate an event + const payload: UserFeedbackPayload = { + severity: 'error', + message: 'Test error message', + }; + handler(payload); + + expect(processStderrSpy).toHaveBeenCalledWith( + '[ERROR] Test error message\n', + ); + }); + + it('logs optional error object to process.stderr in debug mode', async () => { + vi.mocked(mockConfig.getDebugMode).mockReturnValue(true); + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-events', + }); + + // Get the registered handler + const handler = mockCoreEvents.on.mock.calls.find( + (call: unknown[]) => call[0] === CoreEvent.UserFeedback, + )?.[1]; + expect(handler).toBeDefined(); + + // Simulate an event with error object + const errorObj = new Error('Original error'); + // Mock stack for deterministic testing + errorObj.stack = 'Error: Original error\n at test'; + const payload: UserFeedbackPayload = { + severity: 'warning', + message: 'Test warning message', + error: errorObj, + }; + handler(payload); + + expect(processStderrSpy).toHaveBeenCalledWith( + '[WARNING] Test warning message\n', + ); + expect(processStderrSpy).toHaveBeenCalledWith( + 'Error: Original error\n at test\n', + ); + }); + }); + + it('should emit appropriate events for streaming JSON output', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-stream', + }, + }; + + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [{ text: 'Tool response' }], + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + resultDisplay: 'Tool executed successfully', + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Thinking...' }, + toolCallEvent, + ]; + const secondCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondCallEvents)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Stream test', + prompt_id: 'prompt-id-stream', + }); + + const output = getWrittenOutput(); + const sanitizedOutput = output + .replace(/"timestamp":"[^"]+"/g, '"timestamp":""') + .replace(/"duration_ms":\d+/g, '"duration_ms":'); + expect(sanitizedOutput).toMatchSnapshot(); + }); + + it('should handle EPIPE error gracefully', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello' }, + { type: GeminiEventType.Content, value: ' World' }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + // Mock process.exit to track calls without throwing + vi.spyOn(process, 'exit').mockImplementation((_code) => undefined as never); + + // Simulate EPIPE error on stdout + const stdoutErrorCallback = (process.stdout.on as Mock).mock.calls.find( + (call) => call[0] === 'error', + )?.[1]; + + if (stdoutErrorCallback) { + stdoutErrorCallback({ code: 'EPIPE' }); + } + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'EPIPE test', + prompt_id: 'prompt-id-epipe', + }); + + // Since EPIPE is simulated, it might exit early or continue depending on timing, + // but our main goal is to verify the handler is registered and handles EPIPE. + expect(process.stdout.on).toHaveBeenCalledWith( + 'error', + expect.any(Function), + ); + }); + + it('should resume chat when resumedSessionData is provided', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Resumed' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + const resumedSessionData = { + conversation: { + sessionId: 'resumed-session-id', + messages: [ + { role: 'user', parts: [{ text: 'Previous message' }] }, + ] as any, // eslint-disable-line @typescript-eslint/no-explicit-any + startTime: new Date().toISOString(), + lastUpdated: new Date().toISOString(), + firstUserMessage: 'Previous message', + projectHash: 'test-hash', + }, + filePath: '/path/to/session.json', + }; + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Continue', + prompt_id: 'prompt-id-resume', + resumedSessionData, + }); + + expect(mockGeminiClient.resumeChat).toHaveBeenCalledWith( + expect.any(Array), + resumedSessionData, + ); + expect(getWrittenOutput()).toBe('Resumed\n'); + }); + + it.each([ + { + name: 'loop detected', + events: [ + { type: GeminiEventType.LoopDetected }, + ] as ServerGeminiStreamEvent[], + input: 'Loop test', + promptId: 'prompt-id-loop', + }, + { + name: 'max session turns', + events: [ + { type: GeminiEventType.MaxSessionTurns }, + ] as ServerGeminiStreamEvent[], + input: 'Max turns test', + promptId: 'prompt-id-max-turns', + }, + ])( + 'should emit appropriate error event in streaming JSON mode: $name', + async ({ events, input, promptId }) => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const streamEvents: ServerGeminiStreamEvent[] = [ + ...events, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(streamEvents), + ); + + try { + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input, + prompt_id: promptId, + }); + } catch (_error) { + // Expected exit + } + + const output = getWrittenOutput(); + const sanitizedOutput = output + .replace(/"timestamp":"[^"]+"/g, '"timestamp":""') + .replace(/"duration_ms":\d+/g, '"duration_ms":'); + expect(sanitizedOutput).toMatchSnapshot(); + }, + ); + + it('should log error when tool recording fails', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-tool-error', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [], + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + const events: ServerGeminiStreamEvent[] = [ + toolCallEvent, + { type: GeminiEventType.Content, value: 'Done' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(events)) + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Done' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]), + ); + + // Mock getChat to throw when recording tool calls + const mockChat = { + recordCompletedToolCalls: vi.fn().mockImplementation(() => { + throw new Error('Recording failed'); + }), + }; + mockGeminiClient.getChat = vi.fn().mockReturnValue(mockChat); + mockGeminiClient.getCurrentSequenceModel = vi + .fn() + .mockReturnValue('model-1'); + + // Mock debugLogger.error + const { debugLogger } = await import('@google/gemini-cli-core'); + const debugLoggerErrorSpy = vi + .spyOn(debugLogger, 'error') + .mockImplementation(() => {}); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Tool recording error test', + prompt_id: 'prompt-id-tool-error', + }); + + expect(debugLoggerErrorSpy).toHaveBeenCalledWith( + expect.stringContaining( + 'Error recording completed tool call information: Error: Recording failed', + ), + ); + expect(getWrittenOutput()).toContain('Done'); + }); + + it('should stop agent execution immediately when a tool call returns STOP_EXECUTION error', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'stop-call', + name: 'stopTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-stop', + }, + }; + + // Mock tool execution returning STOP_EXECUTION + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'stop-call', + responseParts: [{ text: 'error occurred' }], + errorType: ToolErrorType.STOP_EXECUTION, + error: new Error('Stop reason from hook'), + resultDisplay: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Executing tool...' }, + toolCallEvent, + ]; + + // Setup the mock to return events for the first call. + // We expect the loop to terminate after the tool execution. + // If it doesn't, it might call sendMessageStream again, which we'll assert against. + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents([])); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Run stop tool', + prompt_id: 'prompt-id-stop', + }); + + expect(mockSchedulerSchedule).toHaveBeenCalled(); + + // The key assertion: sendMessageStream should have been called ONLY ONCE (initial user input). + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(1); + + expect(processStderrSpy).toHaveBeenCalledWith( + 'Agent execution stopped: Stop reason from hook\n', + ); + }); + + it('should write JSON output when a tool call returns STOP_EXECUTION error', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'stop-call', + name: 'stopTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-stop-json', + }, + }; + + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'stop-call', + responseParts: [{ text: 'error occurred' }], + errorType: ToolErrorType.STOP_EXECUTION, + error: new Error('Stop reason'), + resultDisplay: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Partial content' }, + toolCallEvent, + ]; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(firstCallEvents), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Run stop tool', + prompt_id: 'prompt-id-stop-json', + }); + + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: 'Partial content', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should emit result event when a tool call returns STOP_EXECUTION error in streaming JSON mode', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'stop-call', + name: 'stopTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-stop-stream', + }, + }; + + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'stop-call', + responseParts: [{ text: 'error occurred' }], + errorType: ToolErrorType.STOP_EXECUTION, + error: new Error('Stop reason'), + resultDisplay: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [toolCallEvent]; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(firstCallEvents), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Run stop tool', + prompt_id: 'prompt-id-stop-stream', + }); + + const output = getWrittenOutput(); + expect(output).toContain('"type":"result"'); + expect(output).toContain('"status":"success"'); + }); + + describe('Agent Execution Events', () => { + it('should handle AgentExecutionStopped event', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.AgentExecutionStopped, + value: { reason: 'Stopped by hook' }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test stop', + prompt_id: 'prompt-id-stop', + }); + + expect(processStderrSpy).toHaveBeenCalledWith( + 'Agent execution stopped: Stopped by hook\n', + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(1); + }); + + it('should handle AgentExecutionBlocked event', async () => { + const allEvents: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.AgentExecutionBlocked, + value: { reason: 'Blocked by hook' }, + }, + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(allEvents), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test block', + prompt_id: 'prompt-id-block', + }); + + expect(processStderrSpy).toHaveBeenCalledWith( + '[WARNING] Agent execution blocked: Blocked by hook\n', + ); + // Stream continues after blocked event — content should be output + expect(getWrittenOutput()).toBe('Final answer\n'); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(1); + }); + }); + + describe('Output Sanitization', () => { + const ANSI_SEQUENCE = '\u001B[31mRed Text\u001B[0m'; + const OSC_HYPERLINK = + '\u001B]8;;http://example.com\u001B\\Link\u001B]8;;\u001B\\'; + const PLAIN_TEXT_RED = 'Red Text'; + const PLAIN_TEXT_LINK = 'Link'; + + it('should sanitize ANSI output by default', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: ANSI_SEQUENCE }, + { type: GeminiEventType.Content, value: ' ' }, + { type: GeminiEventType.Content, value: OSC_HYPERLINK }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(false); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-sanitization', + }); + + expect(getWrittenOutput()).toBe(`${PLAIN_TEXT_RED} ${PLAIN_TEXT_LINK}\n`); + }); + + it('should allow ANSI output when rawOutput is true', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: ANSI_SEQUENCE }, + { type: GeminiEventType.Content, value: ' ' }, + { type: GeminiEventType.Content, value: OSC_HYPERLINK }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(true); + vi.mocked(mockConfig.getAcceptRawOutputRisk).mockReturnValue(true); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-raw', + }); + + expect(getWrittenOutput()).toBe(`${ANSI_SEQUENCE} ${OSC_HYPERLINK}\n`); + }); + + it('should allow ANSI output when only acceptRawOutputRisk is true', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: ANSI_SEQUENCE }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(false); + vi.mocked(mockConfig.getAcceptRawOutputRisk).mockReturnValue(true); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-accept-only', + }); + + expect(getWrittenOutput()).toBe(`${ANSI_SEQUENCE}\n`); + }); + + it('should warn when rawOutput is true and acceptRisk is false', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(true); + vi.mocked(mockConfig.getAcceptRawOutputRisk).mockReturnValue(false); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-warn', + }); + + expect(processStderrSpy).toHaveBeenCalledWith( + expect.stringContaining('[WARNING] --raw-output is enabled'), + ); + }); + + it('should not warn when rawOutput is true and acceptRisk is true', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(true); + vi.mocked(mockConfig.getAcceptRawOutputRisk).mockReturnValue(true); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-no-warn', + }); + + expect(processStderrSpy).not.toHaveBeenCalledWith( + expect.stringContaining('[WARNING] --raw-output is enabled'), + ); + }); + + it('should emit warning event for loop_detected in streaming JSON mode', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const streamEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.LoopDetected } as ServerGeminiStreamEvent, + { type: GeminiEventType.Content, value: 'Continuing after loop' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(streamEvents), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Loop test explicit', + prompt_id: 'prompt-id-loop-explicit', + }); + + const output = getWrittenOutput(); + // The STREAM_JSON output should contain an error event with warning severity + expect(output).toContain('"type":"error"'); + expect(output).toContain('"severity":"warning"'); + expect(output).toContain('Loop detected'); + }); + + it('should report cancelled tool calls as success in stream-json mode (legacy parity)', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-cancel', + }, + }; + + // Mock the scheduler to return a cancelled status + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Cancelled, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'tool-1', + responseParts: [{ text: 'Operation cancelled' }], + resultDisplay: 'Cancelled', + }, + }, + ]); + + const events: ServerGeminiStreamEvent[] = [ + toolCallEvent, + { + type: GeminiEventType.Content, + value: 'Model continues...', + }, + ]; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-cancel', + }); + + const output = getWrittenOutput(); + expect(output).toContain('"type":"tool_result"'); + expect(output).toContain('"status":"success"'); + }); + }); +}); diff --git a/packages/cli/src/nonInteractiveCliAgentSession.ts b/packages/cli/src/nonInteractiveCliAgentSession.ts new file mode 100644 index 0000000000..78fc18be4e --- /dev/null +++ b/packages/cli/src/nonInteractiveCliAgentSession.ts @@ -0,0 +1,621 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + Config, + ResumedSessionData, + UserFeedbackPayload, + AgentEvent, + ContentPart, +} from '@google/gemini-cli-core'; +import { isSlashCommand } from './ui/utils/commandUtils.js'; +import type { LoadedSettings } from './config/settings.js'; +import { + convertSessionToClientHistory, + FatalError, + FatalAuthenticationError, + FatalInputError, + FatalSandboxError, + FatalConfigError, + FatalTurnLimitedError, + FatalToolExecutionError, + FatalCancellationError, + promptIdContext, + OutputFormat, + JsonFormatter, + StreamJsonFormatter, + JsonStreamEventType, + uiTelemetryService, + coreEvents, + CoreEvent, + createWorkingStdio, + Scheduler, + ROOT_SCHEDULER_ID, + LegacyAgentSession, + ToolErrorType, + geminiPartsToContentParts, +} from '@google/gemini-cli-core'; + +import type { Part } from '@google/genai'; +import readline from 'node:readline'; +import stripAnsi from 'strip-ansi'; + +import { handleSlashCommand } from './nonInteractiveCliCommands.js'; +import { ConsolePatcher } from './ui/utils/ConsolePatcher.js'; +import { handleAtCommand } from './ui/hooks/atCommandProcessor.js'; +import { handleError, handleToolError } from './utils/errors.js'; +import { TextOutput } from './ui/utils/textOutput.js'; + +interface RunNonInteractiveParams { + config: Config; + settings: LoadedSettings; + input: string; + prompt_id: string; + resumedSessionData?: ResumedSessionData; +} + +export async function runNonInteractive({ + config, + settings, + input, + prompt_id, + resumedSessionData, +}: RunNonInteractiveParams): Promise { + return promptIdContext.run(prompt_id, async () => { + const consolePatcher = new ConsolePatcher({ + stderr: true, + interactive: false, + debugMode: config.getDebugMode(), + onNewMessage: (msg) => { + coreEvents.emitConsoleLog(msg.type, msg.content); + }, + }); + + if (process.env['GEMINI_CLI_ACTIVITY_LOG_TARGET']) { + const { setupInitialActivityLogger } = await import( + './utils/devtoolsService.js' + ); + await setupInitialActivityLogger(config); + } + + const { stdout: workingStdout } = createWorkingStdio(); + const textOutput = new TextOutput(workingStdout); + + const handleUserFeedback = (payload: UserFeedbackPayload) => { + const prefix = payload.severity.toUpperCase(); + process.stderr.write(`[${prefix}] ${payload.message}\n`); + if (payload.error && config.getDebugMode()) { + const errorToLog = + payload.error instanceof Error + ? payload.error.stack || payload.error.message + : String(payload.error); + process.stderr.write(`${errorToLog}\n`); + } + }; + + const startTime = Date.now(); + const streamFormatter = + config.getOutputFormat() === OutputFormat.STREAM_JSON + ? new StreamJsonFormatter() + : null; + + const abortController = new AbortController(); + + // Track cancellation state + let isAborting = false; + let cancelMessageTimer: NodeJS.Timeout | null = null; + + // Setup stdin listener for Ctrl+C detection + let stdinWasRaw = false; + let rl: readline.Interface | null = null; + + const setupStdinCancellation = () => { + // Only setup if stdin is a TTY (user can interact) + if (!process.stdin.isTTY) { + return; + } + + // Save original raw mode state + stdinWasRaw = process.stdin.isRaw || false; + + // Enable raw mode to capture individual keypresses + process.stdin.setRawMode(true); + process.stdin.resume(); + + // Setup readline to emit keypress events + rl = readline.createInterface({ + input: process.stdin, + escapeCodeTimeout: 0, + }); + readline.emitKeypressEvents(process.stdin, rl); + + // Listen for Ctrl+C + const keypressHandler = ( + str: string, + key: { name?: string; ctrl?: boolean }, + ) => { + // Detect Ctrl+C: either ctrl+c key combo or raw character code 3 + if ((key && key.ctrl && key.name === 'c') || str === '\u0003') { + // Only handle once + if (isAborting) { + return; + } + + isAborting = true; + + // Only show message if cancellation takes longer than 200ms + // This reduces verbosity for fast cancellations + cancelMessageTimer = setTimeout(() => { + process.stderr.write('\nCancelling...\n'); + }, 200); + + abortController.abort(); + } + }; + + process.stdin.on('keypress', keypressHandler); + }; + + const cleanupStdinCancellation = () => { + // Clear any pending cancel message timer + if (cancelMessageTimer) { + clearTimeout(cancelMessageTimer); + cancelMessageTimer = null; + } + + // Cleanup readline and stdin listeners + if (rl) { + rl.close(); + rl = null; + } + + // Remove keypress listener + process.stdin.removeAllListeners('keypress'); + + // Restore stdin to original state + if (process.stdin.isTTY) { + process.stdin.setRawMode(stdinWasRaw); + process.stdin.pause(); + } + }; + + let errorToHandle: unknown | undefined; + let abortSession = () => {}; + try { + consolePatcher.patch(); + + if ( + config.getRawOutput() && + !config.getAcceptRawOutputRisk() && + config.getOutputFormat() === OutputFormat.TEXT + ) { + process.stderr.write( + '[WARNING] --raw-output is enabled. Model output is not sanitized and may contain harmful ANSI sequences (e.g. for phishing or command injection). Use --accept-raw-output-risk to suppress this warning.\n', + ); + } + + // Setup stdin cancellation listener + setupStdinCancellation(); + + coreEvents.on(CoreEvent.UserFeedback, handleUserFeedback); + coreEvents.drainBacklogs(); + + // Handle EPIPE errors when the output is piped to a command that closes early. + process.stdout.on('error', (err: NodeJS.ErrnoException) => { + if (err.code === 'EPIPE') { + // Exit gracefully if the pipe is closed. + cleanupStdinCancellation(); + consolePatcher.cleanup(); + process.exit(0); + } + }); + + const geminiClient = config.getGeminiClient(); + const scheduler = new Scheduler({ + context: config, + messageBus: config.getMessageBus(), + getPreferredEditor: () => undefined, + schedulerId: ROOT_SCHEDULER_ID, + }); + + // Initialize chat. Resume if resume data is passed. + if (resumedSessionData) { + await geminiClient.resumeChat( + convertSessionToClientHistory( + resumedSessionData.conversation.messages, + ), + resumedSessionData, + ); + } + + // Emit init event for streaming JSON + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.INIT, + timestamp: new Date().toISOString(), + session_id: config.getSessionId(), + model: config.getModel(), + }); + } + + let query: Part[] | undefined; + + if (isSlashCommand(input)) { + const slashCommandResult = await handleSlashCommand( + input, + abortController, + config, + settings, + ); + if (slashCommandResult) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + query = slashCommandResult as Part[]; + } + } + + if (!query) { + const { processedQuery, error } = await handleAtCommand({ + query: input, + config, + addItem: (_item, _timestamp) => 0, + onDebugMessage: () => {}, + messageId: Date.now(), + signal: abortController.signal, + escapePastedAtSymbols: false, + }); + if (error || !processedQuery) { + throw new FatalInputError( + error || 'Exiting due to an error processing the @ command.', + ); + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + query = processedQuery as Part[]; + } + + // Emit user message event for streaming JSON + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.MESSAGE, + timestamp: new Date().toISOString(), + role: 'user', + content: input, + }); + } + + // Create LegacyAgentSession — owns the agentic loop + const session = new LegacyAgentSession({ + client: geminiClient, + scheduler, + config, + promptId: prompt_id, + }); + + // Wire Ctrl+C to session abort + abortSession = () => { + void session.abort(); + }; + abortController.signal.addEventListener('abort', abortSession); + if (abortController.signal.aborted) { + throw new FatalCancellationError('Operation cancelled.'); + } + + // Start the agentic loop (runs in background) + const { streamId } = await session.send({ + message: { + content: geminiPartsToContentParts(query), + displayContent: input, + }, + }); + if (streamId === null) { + throw new Error( + 'LegacyAgentSession.send() unexpectedly returned no stream for a message send.', + ); + } + + const getTextContent = (parts?: ContentPart[]): string | undefined => { + const text = parts + ?.map((part) => (part.type === 'text' ? part.text : '')) + .join(''); + return text ? text : undefined; + }; + + const emitFinalSuccessResult = (): void => { + if (streamFormatter) { + const metrics = uiTelemetryService.getMetrics(); + const durationMs = Date.now() - startTime; + streamFormatter.emitEvent({ + type: JsonStreamEventType.RESULT, + timestamp: new Date().toISOString(), + status: 'success', + stats: streamFormatter.convertToStreamStats(metrics, durationMs), + }); + } else if (config.getOutputFormat() === OutputFormat.JSON) { + const formatter = new JsonFormatter(); + const stats = uiTelemetryService.getMetrics(); + textOutput.write( + formatter.format(config.getSessionId(), responseText, stats), + ); + } else { + textOutput.ensureTrailingNewline(); + } + }; + + const reconstructFatalError = (event: AgentEvent<'error'>): Error => { + const errorMeta = event._meta; + const name = + typeof errorMeta?.['errorName'] === 'string' + ? errorMeta['errorName'] + : undefined; + + let errToThrow: Error; + switch (name) { + case 'FatalAuthenticationError': + errToThrow = new FatalAuthenticationError(event.message); + break; + case 'FatalInputError': + errToThrow = new FatalInputError(event.message); + break; + case 'FatalSandboxError': + errToThrow = new FatalSandboxError(event.message); + break; + case 'FatalConfigError': + errToThrow = new FatalConfigError(event.message); + break; + case 'FatalTurnLimitedError': + errToThrow = new FatalTurnLimitedError(event.message); + break; + case 'FatalToolExecutionError': + errToThrow = new FatalToolExecutionError(event.message); + break; + case 'FatalCancellationError': + errToThrow = new FatalCancellationError(event.message); + break; + case 'FatalError': + errToThrow = new FatalError( + event.message, + typeof errorMeta?.['exitCode'] === 'number' + ? errorMeta['exitCode'] + : 1, + ); + break; + default: + errToThrow = new Error(event.message); + if (name) { + Object.defineProperty(errToThrow, 'name', { + value: name, + enumerable: true, + }); + } + break; + } + + if (errorMeta?.['exitCode'] !== undefined) { + Object.defineProperty(errToThrow, 'exitCode', { + value: errorMeta['exitCode'], + enumerable: true, + }); + } + if (errorMeta?.['code'] !== undefined) { + Object.defineProperty(errToThrow, 'code', { + value: errorMeta['code'], + enumerable: true, + }); + } + if (errorMeta?.['status'] !== undefined) { + Object.defineProperty(errToThrow, 'status', { + value: errorMeta['status'], + enumerable: true, + }); + } + return errToThrow; + }; + + // Consume AgentEvents for output formatting + let responseText = ''; + let preToolResponseText: string | undefined; + let streamEnded = false; + for await (const event of session.stream({ streamId })) { + if (streamEnded) break; + switch (event.type) { + case 'message': { + if (event.role === 'agent') { + for (const part of event.content) { + if (part.type === 'text') { + const isRaw = + config.getRawOutput() || config.getAcceptRawOutputRisk(); + const output = isRaw ? part.text : stripAnsi(part.text); + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.MESSAGE, + timestamp: new Date().toISOString(), + role: 'assistant', + content: output, + delta: true, + }); + } else if (config.getOutputFormat() === OutputFormat.JSON) { + responseText += output; + } else { + if (part.text) { + textOutput.write(output); + } + } + } + } + } + break; + } + case 'tool_request': { + if (config.getOutputFormat() === OutputFormat.JSON) { + // Final JSON output should reflect the last assistant answer after + // any tool orchestration, not intermediate pre-tool text. + preToolResponseText = responseText || preToolResponseText; + responseText = ''; + } + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.TOOL_USE, + timestamp: new Date().toISOString(), + tool_name: event.name, + tool_id: event.requestId, + parameters: event.args, + }); + } + break; + } + case 'tool_response': { + textOutput.ensureTrailingNewline(); + if (streamFormatter) { + const displayText = getTextContent(event.displayContent); + const errorMsg = getTextContent(event.content) ?? 'Tool error'; + streamFormatter.emitEvent({ + type: JsonStreamEventType.TOOL_RESULT, + timestamp: new Date().toISOString(), + tool_id: event.requestId, + status: event.isError ? 'error' : 'success', + output: displayText, + error: event.isError + ? { + type: + typeof event.data?.['errorType'] === 'string' + ? event.data['errorType'] + : 'TOOL_EXECUTION_ERROR', + message: errorMsg, + } + : undefined, + }); + } + if (event.isError) { + const displayText = getTextContent(event.displayContent); + const errorMsg = getTextContent(event.content) ?? 'Tool error'; + + if (event.data?.['errorType'] === ToolErrorType.STOP_EXECUTION) { + if ( + config.getOutputFormat() === OutputFormat.JSON && + !responseText && + preToolResponseText + ) { + responseText = preToolResponseText; + } + const stopMessage = `Agent execution stopped: ${errorMsg}`; + if (config.getOutputFormat() === OutputFormat.TEXT) { + process.stderr.write(`${stopMessage}\n`); + } + } + + if (event.data?.['errorType'] === ToolErrorType.NO_SPACE_LEFT) { + throw new FatalToolExecutionError( + 'Error executing tool ' + + event.name + + ': ' + + (displayText || errorMsg), + ); + } + handleToolError( + event.name, + new Error(errorMsg), + config, + typeof event.data?.['errorType'] === 'string' + ? event.data['errorType'] + : undefined, + displayText, + ); + } + break; + } + case 'error': { + if (event.fatal) { + throw reconstructFatalError(event); + } + + const errorCode = event._meta?.['code']; + + if (errorCode === 'AGENT_EXECUTION_BLOCKED') { + if (config.getOutputFormat() === OutputFormat.TEXT) { + process.stderr.write(`[WARNING] ${event.message}\n`); + } + break; + } + + const severity = + event.status === 'RESOURCE_EXHAUSTED' ? 'error' : 'warning'; + if (config.getOutputFormat() === OutputFormat.TEXT) { + process.stderr.write(`[WARNING] ${event.message}\n`); + } + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.ERROR, + timestamp: new Date().toISOString(), + severity, + message: event.message, + }); + } + break; + } + case 'agent_end': { + if (event.reason === 'aborted') { + throw new FatalCancellationError('Operation cancelled.'); + } else if (event.reason === 'max_turns') { + const isConfiguredTurnLimit = + typeof event.data?.['maxTurns'] === 'number' || + typeof event.data?.['turnCount'] === 'number'; + + if (isConfiguredTurnLimit) { + throw new FatalTurnLimitedError( + 'Reached max session turns for this session. Increase the number of turns by specifying maxSessionTurns in settings.json.', + ); + } else if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.ERROR, + timestamp: new Date().toISOString(), + severity: 'error', + message: 'Maximum session turns exceeded', + }); + } + } + + const stopMessage = + typeof event.data?.['message'] === 'string' + ? event.data['message'] + : ''; + if (stopMessage && config.getOutputFormat() === OutputFormat.TEXT) { + process.stderr.write(`Agent execution stopped: ${stopMessage}\n`); + } + + emitFinalSuccessResult(); + streamEnded = true; + break; + } + case 'initialize': + case 'session_update': + case 'agent_start': + case 'tool_update': + case 'elicitation_request': + case 'elicitation_response': + case 'usage': + case 'custom': + // Explicitly ignore these non-interactive events + break; + default: + event satisfies never; + break; + } + } + } catch (error) { + errorToHandle = error; + } finally { + // Cleanup stdin cancellation before other cleanup + cleanupStdinCancellation(); + abortController.signal.removeEventListener('abort', abortSession); + + consolePatcher.cleanup(); + coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback); + } + + if (errorToHandle) { + handleError(errorToHandle, config); + } + }); +} diff --git a/packages/cli/src/test-utils/mockCommandContext.ts b/packages/cli/src/test-utils/mockCommandContext.ts index 6eda7f3109..9a1156e5cb 100644 --- a/packages/cli/src/test-utils/mockCommandContext.ts +++ b/packages/cli/src/test-utils/mockCommandContext.ts @@ -61,6 +61,7 @@ export const createMockCommandContext = ( toggleCorgiMode: vi.fn(), toggleShortcutsHelp: vi.fn(), toggleVimEnabled: vi.fn(), + reloadCommands: vi.fn(), openAgentConfigDialog: vi.fn(), closeAgentConfigDialog: vi.fn(), extensionsUpdateState: new Map(), diff --git a/packages/cli/src/test-utils/mockConfig.ts b/packages/cli/src/test-utils/mockConfig.ts index 260bafdf2b..daf109d928 100644 --- a/packages/cli/src/test-utils/mockConfig.ts +++ b/packages/cli/src/test-utils/mockConfig.ts @@ -194,6 +194,17 @@ export function createMockSettings( user: { settings: {} }, workspace: { settings: {} }, errors: [], + subscribe: vi.fn().mockReturnValue(() => {}), + getSnapshot: vi.fn().mockReturnValue({ + system: { settings: {} }, + systemDefaults: { settings: {} }, + user: { settings: {} }, + workspace: { settings: {} }, + isTrusted: true, + errors: [], + merged, + }), + setValue: vi.fn(), ...overrides, merged, } as unknown as LoadedSettings; diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index 6ca30dd8b9..817921e83a 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -42,6 +42,7 @@ import { type OverflowState, } from '../ui/contexts/OverflowContext.js'; +import { makeFakeConfig } from '@google/gemini-cli-core'; import { type Config } from '@google/gemini-cli-core'; import { FakePersistentState } from './persistentStateFake.js'; import { AppContext, type AppState } from '../ui/contexts/AppContext.js'; @@ -51,7 +52,6 @@ import { themeManager, DEFAULT_THEME } from '../ui/themes/theme-manager.js'; import { DefaultLight } from '../ui/themes/builtin/light/default-light.js'; import { pickDefaultThemeName } from '../ui/themes/theme.js'; import { generateSvgForTerminal } from './svg.js'; -import { loadCliConfig, type CliArgs } from '../config/config.js'; export const persistentStateMock = new FakePersistentState(); @@ -613,6 +613,7 @@ export const renderWithProviders = async ( mouseEventsEnabled = false, config, uiActions, + toolActions, persistentState, appState = mockAppState, }: { @@ -623,6 +624,11 @@ export const renderWithProviders = async ( mouseEventsEnabled?: boolean; config?: Config; uiActions?: Partial; + toolActions?: Partial<{ + isExpanded: (callId: string) => boolean; + toggleExpansion: (callId: string) => void; + toggleAllExpansion: (callIds: string[]) => void; + }>; persistentState?: { get?: typeof persistentStateMock.get; set?: typeof persistentStateMock.set; @@ -660,12 +666,11 @@ export const renderWithProviders = async ( const terminalWidth = width ?? baseState.terminalWidth; if (!config) { - config = await loadCliConfig( - settings.merged, - 'random-session-id', - {} as unknown as CliArgs, - { cwd: '/' }, - ); + config = makeFakeConfig({ + useAlternateBuffer: settings.merged.ui?.useAlternateBuffer, + showMemoryUsage: settings.merged.ui?.showMemoryUsage, + accessibility: settings.merged.ui?.accessibility, + }); } const mainAreaWidth = providedUiState?.mainAreaWidth ?? terminalWidth; @@ -710,6 +715,16 @@ export const renderWithProviders = async ( { const [adminSettingsChanged, setAdminSettingsChanged] = useState(false); + const [expandedTools, setExpandedTools] = useState>(new Set()); + + const toggleExpansion = useCallback((callId: string) => { + setExpandedTools((prev) => { + const next = new Set(prev); + if (next.has(callId)) { + next.delete(callId); + } else { + next.add(callId); + } + return next; + }); + }, []); + + const toggleAllExpansion = useCallback((callIds: string[]) => { + setExpandedTools((prev) => { + const next = new Set(prev); + const anyCollapsed = callIds.some((id) => !next.has(id)); + + if (anyCollapsed) { + callIds.forEach((id) => next.add(id)); + } else { + callIds.forEach((id) => next.delete(id)); + } + return next; + }); + }, []); + + const isExpanded = useCallback( + (callId: string) => expandedTools.has(callId), + [expandedTools], + ); + const [shellModeActive, setShellModeActive] = useState(false); const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] = useState(false); @@ -993,6 +1027,7 @@ Logging in with Google... Restarting Gemini CLI to continue. if (config.isJitContextEnabled()) { await config.getContextManager()?.refresh(); + config.updateSystemInstructionIfInitialized(); flattenedMemory = flattenMemory(config.getUserMemory()); fileCount = config.getGeminiMdFileCount(); } else { @@ -1137,11 +1172,6 @@ Logging in with Google... Restarting Gemini CLI to continue. [pendingSlashCommandHistoryItems, pendingGeminiHistoryItems], ); - const hasPendingToolConfirmation = useMemo( - () => isToolAwaitingConfirmation(pendingHistoryItems), - [pendingHistoryItems], - ); - toggleBackgroundTasksRef.current = toggleBackgroundTasks; isBackgroundTaskVisibleRef.current = isBackgroundTaskVisible; backgroundTasksRef.current = backgroundTasks; @@ -1727,13 +1757,25 @@ Logging in with Google... Restarting Gemini CLI to continue. return true; } + const toggleLastTurnTools = () => { + triggerExpandHint(true); + + const targetToolCallIds = getLastTurnToolCallIds( + historyManager.history, + pendingHistoryItems, + ); + + if (targetToolCallIds.length > 0) { + toggleAllExpansion(targetToolCallIds); + } + }; + let enteringConstrainHeightMode = false; if (!constrainHeight) { enteringConstrainHeightMode = true; setConstrainHeight(true); if (keyMatchers[Command.SHOW_MORE_LINES](key)) { - // If the user manually collapses the view, show the hint and reset the x-second timer. - triggerExpandHint(true); + toggleLastTurnTools(); } if (!isAlternateBuffer) { refreshStatic(); @@ -1781,11 +1823,8 @@ Logging in with Google... Restarting Gemini CLI to continue. !enteringConstrainHeightMode ) { setConstrainHeight(false); - // If the user manually expands the view, show the hint and reset the x-second timer. - triggerExpandHint(true); - if (!isAlternateBuffer) { - refreshStatic(); - } + toggleLastTurnTools(); + refreshStatic(); return true; } else if ( (keyMatchers[Command.FOCUS_SHELL_INPUT](key) || @@ -1890,6 +1929,9 @@ Logging in with Google... Restarting Gemini CLI to continue. triggerExpandHint, keyMatchers, isHelpDismissKey, + historyManager.history, + pendingHistoryItems, + toggleAllExpansion, ], ); @@ -2033,6 +2075,11 @@ Logging in with Google... Restarting Gemini CLI to continue. authState === AuthState.AwaitingApiKeyInput || !!newAgents; + const hasPendingToolConfirmation = useMemo( + () => isToolAwaitingConfirmation(pendingHistoryItems), + [pendingHistoryItems], + ); + const hasConfirmUpdateExtensionRequests = confirmUpdateExtensionRequests.length > 0; const hasLoopDetectionConfirmationRequest = @@ -2639,7 +2686,13 @@ Logging in with Google... Restarting Gemini CLI to continue. startupWarnings: props.startupWarnings || [], }} > - + diff --git a/packages/cli/src/ui/commands/skillsCommand.test.ts b/packages/cli/src/ui/commands/skillsCommand.test.ts index 120ba01ed7..438f09b182 100644 --- a/packages/cli/src/ui/commands/skillsCommand.test.ts +++ b/packages/cli/src/ui/commands/skillsCommand.test.ts @@ -528,6 +528,7 @@ describe('skillsCommand', () => { await actionPromise; expect(reloadSkillsMock).toHaveBeenCalled(); + expect(context.ui.reloadCommands).toHaveBeenCalled(); expect(context.ui.setPendingItem).toHaveBeenCalledWith(null); expect(context.ui.addItem).toHaveBeenCalledWith( expect.objectContaining({ diff --git a/packages/cli/src/ui/commands/skillsCommand.ts b/packages/cli/src/ui/commands/skillsCommand.ts index 8c8db2fca5..ea1888db40 100644 --- a/packages/cli/src/ui/commands/skillsCommand.ts +++ b/packages/cli/src/ui/commands/skillsCommand.ts @@ -285,6 +285,8 @@ async function reloadAction( context.ui.setPendingItem(null); } + context.ui.reloadCommands(); + const afterSkills = skillManager.getSkills(); const afterNames = new Set(afterSkills.map((s) => s.name)); diff --git a/packages/cli/src/ui/components/DetailedMessagesDisplay.test.tsx b/packages/cli/src/ui/components/DetailedMessagesDisplay.test.tsx index 30f98a6eda..6cb61ea95c 100644 --- a/packages/cli/src/ui/components/DetailedMessagesDisplay.test.tsx +++ b/packages/cli/src/ui/components/DetailedMessagesDisplay.test.tsx @@ -35,10 +35,7 @@ vi.mock('./shared/ScrollableList.js', () => ({ describe('DetailedMessagesDisplay', () => { beforeEach(() => { - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: [], - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue([]); }); it('renders nothing when messages are empty', async () => { const { lastFrame, unmount } = await renderWithProviders( @@ -58,10 +55,7 @@ describe('DetailedMessagesDisplay', () => { { type: 'error', content: 'Error message', count: 1 }, { type: 'debug', content: 'Debug message', count: 1 }, ]; - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: messages, - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue(messages); const { lastFrame, unmount } = await renderWithProviders( , @@ -79,10 +73,7 @@ describe('DetailedMessagesDisplay', () => { const messages: ConsoleMessageItem[] = [ { type: 'error', content: 'Error message', count: 1 }, ]; - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: messages, - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue(messages); const { lastFrame, unmount } = await renderWithProviders( , @@ -98,10 +89,7 @@ describe('DetailedMessagesDisplay', () => { const messages: ConsoleMessageItem[] = [ { type: 'error', content: 'Error message', count: 1 }, ]; - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: messages, - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue(messages); const { lastFrame, unmount } = await renderWithProviders( , @@ -117,10 +105,7 @@ describe('DetailedMessagesDisplay', () => { const messages: ConsoleMessageItem[] = [ { type: 'log', content: 'Repeated message', count: 5 }, ]; - vi.mocked(useConsoleMessages).mockReturnValue({ - consoleMessages: messages, - clearConsoleMessages: vi.fn(), - }); + vi.mocked(useConsoleMessages).mockReturnValue(messages); const { lastFrame, unmount } = await renderWithProviders( , diff --git a/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx b/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx index 2daa1c39e3..97e456eb99 100644 --- a/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx +++ b/packages/cli/src/ui/components/DetailedMessagesDisplay.tsx @@ -29,7 +29,7 @@ export const DetailedMessagesDisplay: React.FC< > = ({ maxHeight, width, hasFocus }) => { const scrollableListRef = useRef>(null); - const { consoleMessages } = useConsoleMessages(); + const consoleMessages = useConsoleMessages(); const config = useConfig(); const messages = useMemo(() => { diff --git a/packages/cli/src/ui/components/Footer.test.tsx b/packages/cli/src/ui/components/Footer.test.tsx index c0a52af868..e21db7940b 100644 --- a/packages/cli/src/ui/components/Footer.test.tsx +++ b/packages/cli/src/ui/components/Footer.test.tsx @@ -8,7 +8,11 @@ import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest'; import { renderWithProviders } from '../../test-utils/render.js'; import { Footer } from './Footer.js'; import { createMockSettings } from '../../test-utils/settings.js'; -import { type Config } from '@google/gemini-cli-core'; +import { + type Config, + UserAccountManager, + AuthType, +} from '@google/gemini-cli-core'; import path from 'node:path'; // Normalize paths to POSIX slashes for stable cross-platform snapshots. @@ -69,14 +73,17 @@ const defaultProps = { branchName: 'main', }; -const mockConfig = { +const mockConfigPlain = { getTargetDir: () => defaultProps.targetDir, getDebugMode: () => false, getModel: () => defaultProps.model, getIdeMode: () => false, isTrustedFolder: () => true, getExtensionRegistryURI: () => undefined, -} as unknown as Config; + getContentGeneratorConfig: () => ({ authType: undefined }), +}; + +const mockConfig = mockConfigPlain as unknown as Config; const mockSessionStats = { sessionId: 'test-session-id', @@ -434,6 +441,7 @@ describe('