diff --git a/.gemini/settings.json b/.gemini/settings.json index 9051dc78de..eb7741997b 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -1,9 +1,9 @@ { "experimental": { - "plan": true, "extensionReloading": true, "modelSteering": true, - "memoryManager": true + "memoryManager": false, + "topicUpdateNarration": true }, "general": { "devtools": true diff --git a/.gemini/skills/behavioral-evals/references/fixing.md b/.gemini/skills/behavioral-evals/references/fixing.md index fc78870515..6cbdee36e1 100644 --- a/.gemini/skills/behavioral-evals/references/fixing.md +++ b/.gemini/skills/behavioral-evals/references/fixing.md @@ -33,6 +33,16 @@ evaluation. - **Warning**: Do not lose test fidelity by making prompts too direct/easy. - **Primary Fix Trigger**: Adjust tool descriptions, system prompts (`snippets.ts`), or **modules that contribute to the prompt template**. + - Fixes should generally try to improve the prompt `@packages/core/src/prompts/snippets.ts` first. + - **Instructional Generality**: Changes to the system prompt should aim to be as general as possible while still accomplishing the goal. Specificity should be added only as needed. + - **Principle**: Instead of creating "forbidden lists" for specific syntax (e.g., "Don't use `Object.create()`"), formulate a broader engineering principle that covers the underlying issue (e.g., "Prioritize explicit composition over hidden prototype manipulation"). This improves steerability across a wider range of similar scenarios. + - *Low Specificity*: "Follow ecosystem best practices" + - *Medium Specificity*: "Utilize OOP and functional best practices, as applicable" + - *High Specificity*: Provide ecosystem-specific hints as examples of a broader principle rather than direct instructions. e.g., "NEVER use hacks like bypassing the type system or employing 'hidden' logic (e.g.: reflection, prototype manipulation). Instead, use explicit and idiomatic language features (e.g.: type guards, explicit class instantiation, or object spread) that maintain structural integrity." + - **Prompt Simplification**: Once the test is passing, use `ask_user` to determine if prompt simplification is desired. + - **Criteria**: Simplification should be attempted only if there are related clauses that can be de-duplicated or reparented under a single heading. + - **Verification**: As part of simplification, you MUST identify and run any behavioral eval tests that might be affected by the changes to ensure no regressions are introduced. + - Test fixes should not "cheat" by changing a test's `GEMINI.md` file or by updating the test's prompt to instruct it to not repro the bug. - **Warning**: Prompts have multiple configurations; ensure your fix targets the correct config for the model in question. 4. **Architecture Options**: If prompt or instruction tuning triggers no diff --git a/.github/actions/publish-release/action.yml b/.github/actions/publish-release/action.yml index a7df2039d5..4d33edffee 100644 --- a/.github/actions/publish-release/action.yml +++ b/.github/actions/publish-release/action.yml @@ -175,7 +175,9 @@ runs: --dry-run="${INPUTS_DRY_RUN}" \ --workspace="${INPUTS_CORE_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${INPUTS_CORE_PACKAGE_NAME} false + if [[ "${INPUTS_DRY_RUN}" == "false" ]]; then + npm dist-tag rm ${INPUTS_CORE_PACKAGE_NAME} false + fi - name: '๐Ÿ”— Install latest core package' working-directory: '${{ inputs.working-directory }}' @@ -193,7 +195,7 @@ runs: INPUTS_A2A_PACKAGE_NAME: '${{ inputs.a2a-package-name }}' - name: '๐Ÿ“ฆ Prepare bundled CLI for npm release' - if: "inputs.npm-registry-url != 'https://npm.pkg.github.com/' && inputs.npm-tag != 'latest'" + if: "inputs.npm-registry-url != 'https://npm.pkg.github.com/'" working-directory: '${{ inputs.working-directory }}' shell: 'bash' run: | @@ -248,7 +250,9 @@ runs: --dry-run="${INPUTS_DRY_RUN}" \ --workspace="${INPUTS_A2A_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${INPUTS_A2A_PACKAGE_NAME} false + if [[ "${INPUTS_DRY_RUN}" == "false" ]]; then + npm dist-tag rm ${INPUTS_A2A_PACKAGE_NAME} false + fi - name: '๐Ÿ”ฌ Verify NPM release by version' uses: './.github/actions/verify-release' @@ -287,8 +291,25 @@ runs: INPUTS_PREVIOUS_TAG: '${{ inputs.previous-tag }}' shell: 'bash' run: | + rm -f gemini-cli-bundle.zip + (cd bundle && chmod +x gemini.js && zip -r ../gemini-cli-bundle.zip .) + + echo "Testing the generated bundle archive..." + rm -rf test-bundle + mkdir -p test-bundle + unzip -q gemini-cli-bundle.zip -d test-bundle + + # Verify it runs and outputs a version + BUNDLE_VERSION=$(node test-bundle/gemini.js --version | xargs) + echo "Bundle version output: ${BUNDLE_VERSION}" + if [[ -z "${BUNDLE_VERSION}" ]]; then + echo "Error: Bundle failed to execute or return version." + exit 1 + fi + rm -rf test-bundle + gh release create "${INPUTS_RELEASE_TAG}" \ - bundle/gemini.js \ + gemini-cli-bundle.zip \ --target "${STEPS_RELEASE_BRANCH_OUTPUTS_BRANCH_NAME}" \ --title "Release ${INPUTS_RELEASE_TAG}" \ --notes-start-tag "${INPUTS_PREVIOUS_TAG}" \ diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index 2f5c0de140..42fd78d7e9 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -18,6 +18,13 @@ runs: env: JSON_INPUTS: '${{ toJSON(inputs) }}' run: 'echo "$JSON_INPUTS"' + - name: 'Install system dependencies' + if: "runner.os == 'Linux'" + run: | + sudo apt-get update -qq && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq bubblewrap + # Ubuntu 24.04+ requires this to allow bwrap to function in CI + sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true + shell: 'bash' - name: 'Run Tests' env: GEMINI_API_KEY: '${{ inputs.gemini_api_key }}' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d40b49bb69..82e9194a02 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -175,10 +175,10 @@ jobs: NO_COLOR: true run: | if [[ "${{ matrix.shard }}" == "cli" ]]; then - npm run test:ci --workspace @google/gemini-cli + npm run test:ci --workspace "@google/gemini-cli" else # Explicitly list non-cli packages to ensure they are sharded correctly - npm run test:ci --workspace @google/gemini-cli-core --workspace @google/gemini-cli-a2a-server --workspace gemini-cli-vscode-ide-companion --workspace @google/gemini-cli-test-utils --if-present -- --coverage.enabled=false + npm run test:ci --workspace "@google/gemini-cli-core" --workspace "@google/gemini-cli-a2a-server" --workspace "gemini-cli-vscode-ide-companion" --workspace "@google/gemini-cli-test-utils" --if-present -- --coverage.enabled=false npm run test:scripts fi @@ -263,10 +263,10 @@ jobs: NO_COLOR: true run: | if [[ "${{ matrix.shard }}" == "cli" ]]; then - npm run test:ci --workspace @google/gemini-cli -- --coverage.enabled=false + npm run test:ci --workspace "@google/gemini-cli" -- --coverage.enabled=false else # Explicitly list non-cli packages to ensure they are sharded correctly - npm run test:ci --workspace @google/gemini-cli-core --workspace @google/gemini-cli-a2a-server --workspace gemini-cli-vscode-ide-companion --workspace @google/gemini-cli-test-utils --if-present -- --coverage.enabled=false + npm run test:ci --workspace "@google/gemini-cli-core" --workspace "@google/gemini-cli-a2a-server" --workspace "gemini-cli-vscode-ide-companion" --workspace "@google/gemini-cli-test-utils" --if-present -- --coverage.enabled=false npm run test:scripts fi @@ -429,11 +429,14 @@ jobs: NODE_ENV: 'test' run: | if ("${{ matrix.shard }}" -eq "cli") { - npm run test:ci --workspace @google/gemini-cli -- --coverage.enabled=false + npm run test:ci --workspace "@google/gemini-cli" -- --coverage.enabled=false + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } } else { # Explicitly list non-cli packages to ensure they are sharded correctly - npm run test:ci --workspace @google/gemini-cli-core --workspace @google/gemini-cli-a2a-server --workspace gemini-cli-vscode-ide-companion --workspace @google/gemini-cli-test-utils --if-present -- --coverage.enabled=false + npm run test:ci --workspace "@google/gemini-cli-core" --workspace "@google/gemini-cli-a2a-server" --workspace "gemini-cli-vscode-ide-companion" --workspace "@google/gemini-cli-test-utils" --if-present -- --coverage.enabled=false + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } npm run test:scripts + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } } shell: 'pwsh' diff --git a/.github/workflows/eval-guidance.yml b/.github/workflows/eval-guidance.yml deleted file mode 100644 index e1f1ab3168..0000000000 --- a/.github/workflows/eval-guidance.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: 'Evals: PR Guidance' - -on: - pull_request: - paths: - - 'packages/core/src/**/*.ts' - - '!**/*.test.ts' - - '!**/*.test.tsx' - -permissions: - pull-requests: 'write' - contents: 'read' - -jobs: - provide-guidance: - name: 'Model Steering Guidance' - runs-on: 'ubuntu-latest' - if: "github.repository == 'google-gemini/gemini-cli'" - steps: - - name: 'Checkout' - uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v4 - with: - fetch-depth: 0 - - - name: 'Set up Node.js' - uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4.4.0 - with: - node-version-file: '.nvmrc' - cache: 'npm' - - - name: 'Detect Steering Changes' - id: 'detect' - run: | - STEERING_DETECTED=$(node scripts/changed_prompt.js --steering-only) - echo "STEERING_DETECTED=$STEERING_DETECTED" >> "$GITHUB_OUTPUT" - - - name: 'Analyze PR Content' - if: "steps.detect.outputs.STEERING_DETECTED == 'true'" - id: 'analysis' - env: - GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}' - run: | - # Check for behavioral eval changes - EVAL_CHANGES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep "^evals/" || true) - if [ -z "$EVAL_CHANGES" ]; then - echo "MISSING_EVALS=true" >> "$GITHUB_OUTPUT" - fi - - # Check if user is a maintainer (has write/admin access) - USER_PERMISSION=$(gh api repos/${{ github.repository }}/collaborators/${{ github.actor }}/permission --jq '.permission') - if [[ "$USER_PERMISSION" == "admin" || "$USER_PERMISSION" == "write" ]]; then - echo "IS_MAINTAINER=true" >> "$GITHUB_OUTPUT" - fi - - - name: 'Post Guidance Comment' - if: "steps.detect.outputs.STEERING_DETECTED == 'true'" - uses: 'thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74' # ratchet:thollander/actions-comment-pull-request@v3 - with: - comment-tag: 'eval-guidance-bot' - message: | - ### ๐Ÿง  Model Steering Guidance - - This PR modifies files that affect the model's behavior (prompts, tools, or instructions). - - ${{ steps.analysis.outputs.MISSING_EVALS == 'true' && '- โš ๏ธ **Consider adding Evals:** No behavioral evaluations (`evals/*.eval.ts`) were added or updated in this PR. Consider adding a test case to verify the new behavior and prevent regressions.' || '' }} - ${{ steps.analysis.outputs.IS_MAINTAINER == 'true' && '- ๐Ÿš€ **Maintainer Reminder:** Please ensure that these changes do not regress results on benchmark evals before merging.' || '' }} - - --- - *This is an automated guidance message triggered by steering logic signatures.* diff --git a/.github/workflows/eval-pr.yml b/.github/workflows/eval-pr.yml new file mode 100644 index 0000000000..9da0fc8511 --- /dev/null +++ b/.github/workflows/eval-pr.yml @@ -0,0 +1,142 @@ +name: 'Evals: PR Evaluation & Regression' + +on: + pull_request: + types: ['opened', 'synchronize', 'reopened', 'ready_for_review'] + paths: + - 'packages/core/src/prompts/**' + - 'packages/core/src/tools/**' + - 'packages/core/src/agents/**' + - 'evals/**' + - '!**/*.test.ts' + - '!**/*.test.tsx' + workflow_dispatch: + +# Prevents multiple runs for the same PR simultaneously (saves tokens) +concurrency: + group: '${{ github.workflow }}-${{ github.head_ref || github.ref }}' + cancel-in-progress: true + +permissions: + pull-requests: 'write' + contents: 'read' + actions: 'read' + +jobs: + pr-evaluation: + name: 'Evaluate Steering & Regressions' + runs-on: 'gemini-cli-ubuntu-16-core' + if: "github.repository == 'google-gemini/gemini-cli' && (github.event_name != 'pull_request' || (github.event.pull_request.draft == false && github.event.pull_request.head.repo.full_name == github.repository))" + # External contributors' PRs will wait for approval in this environment + environment: |- + ${{ (github.event.pull_request.head.repo.full_name == github.repository) && 'internal' || 'external-evals' }} + env: + # CENTRALIZED MODEL LIST + MODEL_LIST: 'gemini-3-flash-preview' + + steps: + - name: 'Checkout' + uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v5 + with: + fetch-depth: 0 + + - name: 'Set up Node.js' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4.4.0 + with: + node-version-file: '.nvmrc' + cache: 'npm' + + - name: 'Detect Steering Changes' + id: 'detect' + run: | + SHOULD_RUN=$(node scripts/changed_prompt.js) + STEERING_DETECTED=$(node scripts/changed_prompt.js --steering-only) + echo "SHOULD_RUN=$SHOULD_RUN" >> "$GITHUB_OUTPUT" + echo "STEERING_DETECTED=$STEERING_DETECTED" >> "$GITHUB_OUTPUT" + + - name: 'Install dependencies' + if: "steps.detect.outputs.SHOULD_RUN == 'true'" + run: 'npm ci' + + - name: 'Build project' + if: "steps.detect.outputs.SHOULD_RUN == 'true'" + run: 'npm run build' + + - name: 'Analyze PR Content (Guidance)' + if: "steps.detect.outputs.STEERING_DETECTED == 'true'" + id: 'analysis' + env: + GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: | + # Check for behavioral eval changes + EVAL_CHANGES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep "^evals/" || true) + if [ -z "$EVAL_CHANGES" ]; then + echo "MISSING_EVALS=true" >> "$GITHUB_OUTPUT" + fi + + # Check if user is a maintainer + USER_PERMISSION=$(gh api repos/${{ github.repository }}/collaborators/${{ github.actor }}/permission --jq '.permission') + if [[ "$USER_PERMISSION" == "admin" || "$USER_PERMISSION" == "write" ]]; then + echo "IS_MAINTAINER=true" >> "$GITHUB_OUTPUT" + fi + + - name: 'Execute Regression Check' + if: "steps.detect.outputs.SHOULD_RUN == 'true'" + env: + GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' + GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + MODEL_LIST: '${{ env.MODEL_LIST }}' + run: | + # Run the regression check loop. The script saves the report to a file. + node scripts/run_eval_regression.js + + # Use the generated report file if it exists + if [[ -f eval_regression_report.md ]]; then + echo "REPORT_FILE=eval_regression_report.md" >> "$GITHUB_ENV" + fi + + - name: 'Post or Update PR Comment' + if: "always() && (steps.detect.outputs.STEERING_DETECTED == 'true' || env.REPORT_FILE != '')" + env: + GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: | + # 1. Build the full comment body + { + if [[ -f eval_regression_report.md ]]; then + cat eval_regression_report.md + echo "" + fi + + if [[ "${{ steps.detect.outputs.STEERING_DETECTED }}" == "true" ]]; then + echo "### ๐Ÿง  Model Steering Guidance" + echo "" + echo "This PR modifies files that affect the model's behavior (prompts, tools, or instructions)." + echo "" + + if [[ "${{ steps.analysis.outputs.MISSING_EVALS }}" == "true" ]]; then + echo "- โš ๏ธ **Consider adding Evals:** No behavioral evaluations (\`evals/*.eval.ts\`) were added or updated in this PR. Consider [adding a test case](https://github.com/google-gemini/gemini-cli/blob/main/evals/README.md#creating-an-evaluation) to verify the new behavior and prevent regressions." + fi + + if [[ "${{ steps.analysis.outputs.IS_MAINTAINER }}" == "true" ]]; then + echo "- ๐Ÿš€ **Maintainer Reminder:** Please ensure that these changes do not regress results on benchmark evals before merging." + fi + fi + + echo "" + echo "---" + echo "*This is an automated guidance message triggered by steering logic signatures.*" + echo "" + } > full_comment.md + + # 2. Find if a comment with our unique tag already exists + # We extract the numeric ID from the URL to ensure compatibility with the REST API + COMMENT_ID=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("")) | .url' | grep -oE "[0-9]+$" | head -n 1) + + # 3. Update or Create the comment + if [ -n "$COMMENT_ID" ]; then + echo "Updating existing comment $COMMENT_ID via API..." + gh api -X PATCH "repos/${{ github.repository }}/issues/comments/$COMMENT_ID" -F body=@full_comment.md + else + echo "Creating new PR comment..." + gh pr comment ${{ github.event.pull_request.number }} --body-file full_comment.md + fi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9b3e18d6af..ccc2ad70ce 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -346,9 +346,11 @@ npm run lint - Please adhere to the coding style, patterns, and conventions used throughout the existing codebase. -- Consult [GEMINI.md](../GEMINI.md) (typically found in the project root) for - specific instructions related to AI-assisted development, including - conventions for React, comments, and Git usage. +- Consult + [GEMINI.md](https://github.com/google-gemini/gemini-cli/blob/main/GEMINI.md) + (typically found in the project root) for specific instructions related to + AI-assisted development, including conventions for React, comments, and Git + usage. - **Imports:** Pay special attention to import paths. The project uses ESLint to enforce restrictions on relative imports between packages. @@ -505,8 +507,9 @@ code. ### Documentation structure -Our documentation is organized using [sidebar.json](/docs/sidebar.json) as the -table of contents. When adding new documentation: +Our documentation is organized using +[sidebar.json](https://github.com/google-gemini/gemini-cli/blob/main/docs/sidebar.json) +as the table of contents. When adding new documentation: 1. Create your markdown file **in the appropriate directory** under `/docs`. 2. Add an entry to `sidebar.json` in the relevant section. diff --git a/README.md b/README.md index 03a7be1296..10458b2126 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Learn all about Gemini CLI in our [documentation](https://geminicli.com/docs/). ## ๐Ÿ“ฆ Installation See -[Gemini CLI installation, execution, and releases](./docs/get-started/installation.md) +[Gemini CLI installation, execution, and releases](https://www.geminicli.com/docs/get-started/installation) for recommended system specifications and a detailed installation guide. ### Quick Install @@ -71,9 +71,9 @@ conda activate gemini_env npm install -g @google/gemini-cli ``` -## Release Cadence and Tags +## Release Channels -See [Releases](./docs/releases.md) for more details. +See [Releases](https://www.geminicli.com/docs/changelogs) for more details. ### Preview @@ -209,7 +209,7 @@ gemini ``` For Google Workspace accounts and other authentication methods, see the -[authentication guide](./docs/get-started/authentication.md). +[authentication guide](https://www.geminicli.com/docs/get-started/authentication). ## ๐Ÿš€ Getting Started @@ -278,59 +278,64 @@ gemini ### Getting Started -- [**Quickstart Guide**](./docs/get-started/index.md) - Get up and running - quickly. -- [**Authentication Setup**](./docs/get-started/authentication.md) - Detailed - auth configuration. -- [**Configuration Guide**](./docs/reference/configuration.md) - Settings and - customization. -- [**Keyboard Shortcuts**](./docs/reference/keyboard-shortcuts.md) - +- [**Quickstart Guide**](https://www.geminicli.com/docs/get-started) - Get up + and running quickly. +- [**Authentication Setup**](https://www.geminicli.com/docs/get-started/authentication) - + Detailed auth configuration. +- [**Configuration Guide**](https://www.geminicli.com/docs/reference/configuration) - + Settings and customization. +- [**Keyboard Shortcuts**](https://www.geminicli.com/docs/reference/keyboard-shortcuts) - Productivity tips. ### Core Features -- [**Commands Reference**](./docs/reference/commands.md) - All slash commands - (`/help`, `/chat`, etc). -- [**Custom Commands**](./docs/cli/custom-commands.md) - Create your own - reusable commands. -- [**Context Files (GEMINI.md)**](./docs/cli/gemini-md.md) - Provide persistent - context to Gemini CLI. -- [**Checkpointing**](./docs/cli/checkpointing.md) - Save and resume - conversations. -- [**Token Caching**](./docs/cli/token-caching.md) - Optimize token usage. +- [**Commands Reference**](https://www.geminicli.com/docs/reference/commands) - + All slash commands (`/help`, `/chat`, etc). +- [**Custom Commands**](https://www.geminicli.com/docs/cli/custom-commands) - + Create your own reusable commands. +- [**Context Files (GEMINI.md)**](https://www.geminicli.com/docs/cli/gemini-md) - + Provide persistent context to Gemini CLI. +- [**Checkpointing**](https://www.geminicli.com/docs/cli/checkpointing) - Save + and resume conversations. +- [**Token Caching**](https://www.geminicli.com/docs/cli/token-caching) - + Optimize token usage. ### Tools & Extensions -- [**Built-in Tools Overview**](./docs/reference/tools.md) - - [File System Operations](./docs/tools/file-system.md) - - [Shell Commands](./docs/tools/shell.md) - - [Web Fetch & Search](./docs/tools/web-fetch.md) -- [**MCP Server Integration**](./docs/tools/mcp-server.md) - Extend with custom - tools. -- [**Custom Extensions**](./docs/extensions/index.md) - Build and share your own - commands. +- [**Built-in Tools Overview**](https://www.geminicli.com/docs/reference/tools) + - [File System Operations](https://www.geminicli.com/docs/tools/file-system) + - [Shell Commands](https://www.geminicli.com/docs/tools/shell) + - [Web Fetch & Search](https://www.geminicli.com/docs/tools/web-fetch) +- [**MCP Server Integration**](https://www.geminicli.com/docs/tools/mcp-server) - + Extend with custom tools. +- [**Custom Extensions**](https://geminicli.com/docs/extensions/writing-extensions) - + Build and share your own commands. ### Advanced Topics -- [**Headless Mode (Scripting)**](./docs/cli/headless.md) - Use Gemini CLI in - automated workflows. -- [**IDE Integration**](./docs/ide-integration/index.md) - VS Code companion. -- [**Sandboxing & Security**](./docs/cli/sandbox.md) - Safe execution - environments. -- [**Trusted Folders**](./docs/cli/trusted-folders.md) - Control execution - policies by folder. -- [**Enterprise Guide**](./docs/cli/enterprise.md) - Deploy and manage in a - corporate environment. -- [**Telemetry & Monitoring**](./docs/cli/telemetry.md) - Usage tracking. -- [**Tools reference**](./docs/reference/tools.md) - Built-in tools overview. -- [**Local development**](./docs/local-development.md) - Local development - tooling. +- [**Headless Mode (Scripting)**](https://www.geminicli.com/docs/cli/headless) - + Use Gemini CLI in automated workflows. +- [**IDE Integration**](https://www.geminicli.com/docs/ide-integration) - VS + Code companion. +- [**Sandboxing & Security**](https://www.geminicli.com/docs/cli/sandbox) - Safe + execution environments. +- [**Trusted Folders**](https://www.geminicli.com/docs/cli/trusted-folders) - + Control execution policies by folder. +- [**Enterprise Guide**](https://www.geminicli.com/docs/cli/enterprise) - Deploy + and manage in a corporate environment. +- [**Telemetry & Monitoring**](https://www.geminicli.com/docs/cli/telemetry) - + Usage tracking. +- [**Tools reference**](https://www.geminicli.com/docs/reference/tools) - + Built-in tools overview. +- [**Local development**](https://www.geminicli.com/docs/local-development) - + Local development tooling. ### Troubleshooting & Support -- [**Troubleshooting Guide**](./docs/resources/troubleshooting.md) - Common - issues and solutions. -- [**FAQ**](./docs/resources/faq.md) - Frequently asked questions. +- [**Troubleshooting Guide**](https://www.geminicli.com/docs/resources/troubleshooting) - + Common issues and solutions. +- [**FAQ**](https://www.geminicli.com/docs/resources/faq) - Frequently asked + questions. - Use `/bug` command to report issues directly from the CLI. ### Using MCP Servers @@ -344,8 +349,9 @@ custom tools: > @database Run a query to find inactive users ``` -See the [MCP Server Integration guide](./docs/tools/mcp-server.md) for setup -instructions. +See the +[MCP Server Integration guide](https://www.geminicli.com/docs/tools/mcp-server) +for setup instructions. ## ๐Ÿค Contributing @@ -366,7 +372,8 @@ for planned features and priorities. ## ๐Ÿ“– Resources - **[Official Roadmap](./ROADMAP.md)** - See what's coming next. -- **[Changelog](./docs/changelogs/index.md)** - See recent notable updates. +- **[Changelog](https://www.geminicli.com/docs/changelogs)** - See recent + notable updates. - **[NPM Package](https://www.npmjs.com/package/@google/gemini-cli)** - Package registry. - **[GitHub Issues](https://github.com/google-gemini/gemini-cli/issues)** - @@ -376,13 +383,14 @@ for planned features and priorities. ### Uninstall -See the [Uninstall Guide](./docs/resources/uninstall.md) for removal -instructions. +See the [Uninstall Guide](https://www.geminicli.com/docs/resources/uninstall) +for removal instructions. ## ๐Ÿ“„ Legal - **License**: [Apache License 2.0](LICENSE) -- **Terms of Service**: [Terms & Privacy](./docs/resources/tos-privacy.md) +- **Terms of Service**: + [Terms & Privacy](https://www.geminicli.com/docs/resources/tos-privacy) - **Security**: [Security Policy](SECURITY.md) --- diff --git a/docs/assets/theme-tokyonight-dark.png b/docs/assets/theme-tokyonight-dark.png new file mode 100644 index 0000000000..ebeec93548 Binary files /dev/null and b/docs/assets/theme-tokyonight-dark.png differ diff --git a/docs/changelogs/index.md b/docs/changelogs/index.md index 84a0daa3b2..ac3a433d0e 100644 --- a/docs/changelogs/index.md +++ b/docs/changelogs/index.md @@ -18,6 +18,31 @@ on GitHub. | [Preview](preview.md) | Experimental features ready for early feedback. | | [Stable](latest.md) | Stable, recommended for general use. | +## Announcements: v0.36.0 - 2026-04-01 + +- **Multi-Registry Architecture and Sandboxing:** Introduced a multi-registry + architecture and implemented native macOS Seatbelt and Windows sandboxing for + enhanced subagent security + ([#22712](https://github.com/google-gemini/gemini-cli/pull/22712), + [#22718](https://github.com/google-gemini/gemini-cli/pull/22718) by @akh64bit, + [#22832](https://github.com/google-gemini/gemini-cli/pull/22832) by @ehedlund, + [#21807](https://github.com/google-gemini/gemini-cli/pull/21807) by + @mattKorwel). +- **Refreshed Composer UX:** Implemented a refreshed user experience for the + Composer layout and improved terminal interaction robustness + ([#21212](https://github.com/google-gemini/gemini-cli/pull/21212), + [#23286](https://github.com/google-gemini/gemini-cli/pull/23286) by + @jwhelangoog). +- **Git Worktree Support:** Added native support for Git worktrees, allowing for + isolated parallel sessions + ([#22973](https://github.com/google-gemini/gemini-cli/pull/22973), + [#23265](https://github.com/google-gemini/gemini-cli/pull/23265) by @jerop). +- **Subagent Context and Feedback:** Enhanced subagents with JIT context + injection and resilient tool rejection with contextual feedback + ([#23032](https://github.com/google-gemini/gemini-cli/pull/23032), + [#22951](https://github.com/google-gemini/gemini-cli/pull/22951) by + @abhipatel12). + ## Announcements: v0.35.0 - 2026-03-24 - **Customizable Keyboard Shortcuts:** Users can now customize their keyboard diff --git a/docs/changelogs/latest.md b/docs/changelogs/latest.md index 6df33c78d6..d776a43135 100644 --- a/docs/changelogs/latest.md +++ b/docs/changelogs/latest.md @@ -1,6 +1,6 @@ -# Latest stable release: v0.35.2 +# Latest stable release: v0.36.0 -Released: March 26, 2026 +Released: April 1, 2026 For most users, our latest stable release is the recommended release. Install the latest stable version with: @@ -11,378 +11,372 @@ npm install -g @google/gemini-cli ## Highlights -- **Customizable Keyboard Shortcuts:** Significant improvements to input - flexibility with support for custom keybindings, literal character bindings, - and extended terminal protocol keys. -- **Vim Mode Enhancements:** Further refinement of the Vim modal editing - experience, adding common motions like \`X\`, \`~\`, \`r\`, and \`f/F/t/T\`, - along with yank and paste support. -- **Enhanced Security through Sandboxing:** Introduction of a unified - \`SandboxManager\` and integration of Linux-native sandboxing (bubblewrap and - seccomp) to isolate tool execution and improve system security. -- **JIT Context Discovery:** Improved performance and accuracy by enabling - Just-In-Time context loading for file system tools, ensuring the model has the - most relevant information without overwhelming the context. -- **Subagent & Performance Updates:** Subagents are now enabled by default, - supported by a model-driven parallel tool scheduler and code splitting for - faster startup and more efficient task execution. +- **Multi-Registry Architecture and Tool Isolation:** Introduced a + multi-registry architecture for subagents and implemented strict sandboxing + for macOS (Seatbelt) and Windows to enhance security and isolation. +- **Improved Subagent Coordination:** Enhanced subagents with local execution + capabilities, JIT context injection (upward traversal capped at git root), and + resilient tool rejection with contextual feedback. +- **Enhanced UI and UX:** Implemented a refreshed UX for the Composer layout, + improved terminal fallback warnings, and resolved various UI flickering and + state persistence issues. +- **Git Worktree Support:** Added support for Git worktrees to enable isolated + parallel sessions within the same repository. +- **Plan Mode Improvements:** Plan mode now supports non-interactive execution + and includes hardened sandbox path resolution to prevent hallucinations. ## What's Changed -- fix(core): allow disabling environment variable redaction by @galz10 in - [#23927](https://github.com/google-gemini/gemini-cli/pull/23927) -- fix(a2a-server): A2A server should execute ask policies in interactive mode by - @keith.schaab in - [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) -- feat(cli): customizable keyboard shortcuts by @scidomino in - [#21945](https://github.com/google-gemini/gemini-cli/pull/21945) -- feat(core): Thread `AgentLoopContext` through core. by @joshualitt in - [#21944](https://github.com/google-gemini/gemini-cli/pull/21944) -- chore(release): bump version to 0.35.0-nightly.20260311.657f19c1f by - @gemini-cli-robot in - [#21966](https://github.com/google-gemini/gemini-cli/pull/21966) -- refactor(a2a): remove legacy CoreToolScheduler by @adamfweidman in - [#21955](https://github.com/google-gemini/gemini-cli/pull/21955) -- feat(ui): add missing vim mode motions (X, ~, r, f/F/t/T, df/dt and friends) - by @aanari in [#21932](https://github.com/google-gemini/gemini-cli/pull/21932) -- Feat/retry fetch notifications by @aishaneeshah in - [#21813](https://github.com/google-gemini/gemini-cli/pull/21813) -- fix(core): remove OAuth check from handle fallback and clean up stray file by - @sehoon38 in [#21962](https://github.com/google-gemini/gemini-cli/pull/21962) -- feat(cli): support literal character keybindings and extended Kitty protocol - keys by @scidomino in - [#21972](https://github.com/google-gemini/gemini-cli/pull/21972) -- fix(ui): clamp cursor to last char after all NORMAL mode deletes by @aanari in - [#21973](https://github.com/google-gemini/gemini-cli/pull/21973) -- test(core): add missing tests for prompts/utils.ts by @krrishverma1805-web in - [#19941](https://github.com/google-gemini/gemini-cli/pull/19941) -- fix(cli): allow scrolling keys in copy mode (Ctrl+S selection mode) by - @nsalerni in [#19933](https://github.com/google-gemini/gemini-cli/pull/19933) -- docs(cli): add custom keybinding documentation by @scidomino in - [#21980](https://github.com/google-gemini/gemini-cli/pull/21980) -- docs: fix misleading YOLO mode description in defaultApprovalMode by - @Gyanranjan-Priyam in - [#21878](https://github.com/google-gemini/gemini-cli/pull/21878) -- fix: clean up /clear and /resume by @jackwotherspoon in - [#22007](https://github.com/google-gemini/gemini-cli/pull/22007) -- fix(core)#20941: reap orphaned descendant processes on PTY abort by @manavmax - in [#21124](https://github.com/google-gemini/gemini-cli/pull/21124) -- fix(core): update language detection to use LSP 3.18 identifiers by @yunaseoul - in [#21931](https://github.com/google-gemini/gemini-cli/pull/21931) -- feat(cli): support removing keybindings via '-' prefix by @scidomino in - [#22042](https://github.com/google-gemini/gemini-cli/pull/22042) -- feat(policy): add --admin-policy flag for supplemental admin policies by - @galz10 in [#20360](https://github.com/google-gemini/gemini-cli/pull/20360) -- merge duplicate imports packages/cli/src subtask1 by @Nixxx19 in - [#22040](https://github.com/google-gemini/gemini-cli/pull/22040) -- perf(core): parallelize user quota and experiments fetching in refreshAuth by - @sehoon38 in [#21648](https://github.com/google-gemini/gemini-cli/pull/21648) -- Changelog for v0.34.0-preview.0 by @gemini-cli-robot in - [#21965](https://github.com/google-gemini/gemini-cli/pull/21965) -- Changelog for v0.33.0 by @gemini-cli-robot in - [#21967](https://github.com/google-gemini/gemini-cli/pull/21967) -- fix(core): handle EISDIR in robustRealpath on Windows by @sehoon38 in - [#21984](https://github.com/google-gemini/gemini-cli/pull/21984) -- feat(core): include initiationMethod in conversation interaction telemetry by - @yunaseoul in [#22054](https://github.com/google-gemini/gemini-cli/pull/22054) -- feat(ui): add vim yank/paste (y/p/P) with unnamed register by @aanari in - [#22026](https://github.com/google-gemini/gemini-cli/pull/22026) -- fix(core): enable numerical routing for api key users by @sehoon38 in - [#21977](https://github.com/google-gemini/gemini-cli/pull/21977) -- feat(telemetry): implement retry attempt telemetry for network related retries - by @aishaneeshah in - [#22027](https://github.com/google-gemini/gemini-cli/pull/22027) -- fix(policy): remove unnecessary escapeRegex from pattern builders by - @spencer426 in - [#21921](https://github.com/google-gemini/gemini-cli/pull/21921) -- fix(core): preserve dynamic tool descriptions on session resume by @sehoon38 - in [#18835](https://github.com/google-gemini/gemini-cli/pull/18835) -- chore: allow 'gemini-3.1' in sensitive keyword linter by @scidomino in - [#22065](https://github.com/google-gemini/gemini-cli/pull/22065) -- feat(core): support custom base URL via env vars by @junaiddshaukat in - [#21561](https://github.com/google-gemini/gemini-cli/pull/21561) -- merge duplicate imports packages/cli/src subtask2 by @Nixxx19 in - [#22051](https://github.com/google-gemini/gemini-cli/pull/22051) -- fix(core): silently retry API errors up to 3 times before halting session by - @spencer426 in - [#21989](https://github.com/google-gemini/gemini-cli/pull/21989) -- feat(core): simplify subagent success UI and improve early termination display - by @abhipatel12 in - [#21917](https://github.com/google-gemini/gemini-cli/pull/21917) -- merge duplicate imports packages/cli/src subtask3 by @Nixxx19 in - [#22056](https://github.com/google-gemini/gemini-cli/pull/22056) -- fix(hooks): fix BeforeAgent/AfterAgent inconsistencies (#18514) by @krishdef7 - in [#21383](https://github.com/google-gemini/gemini-cli/pull/21383) -- feat(core): implement SandboxManager interface and config schema by @galz10 in - [#21774](https://github.com/google-gemini/gemini-cli/pull/21774) -- docs: document npm deprecation warnings as safe to ignore by @h30s in - [#20692](https://github.com/google-gemini/gemini-cli/pull/20692) -- fix: remove status/need-triage from maintainer-only issues by @SandyTao520 in - [#22044](https://github.com/google-gemini/gemini-cli/pull/22044) -- fix(core): propagate subagent context to policy engine by @NTaylorMullen in - [#22086](https://github.com/google-gemini/gemini-cli/pull/22086) -- fix(cli): resolve skill uninstall failure when skill name is updated by - @NTaylorMullen in - [#22085](https://github.com/google-gemini/gemini-cli/pull/22085) -- docs(plan): clarify interactive plan editing with Ctrl+X by @Adib234 in - [#22076](https://github.com/google-gemini/gemini-cli/pull/22076) -- fix(policy): ensure user policies are loaded when policyPaths is empty by - @NTaylorMullen in - [#22090](https://github.com/google-gemini/gemini-cli/pull/22090) -- Docs: Add documentation for model steering (experimental). by @jkcinouye in - [#21154](https://github.com/google-gemini/gemini-cli/pull/21154) -- Add issue for automated changelogs by @g-samroberts in - [#21912](https://github.com/google-gemini/gemini-cli/pull/21912) -- fix(core): secure argsPattern and revert WEB_FETCH_TOOL_NAME escalation by - @spencer426 in - [#22104](https://github.com/google-gemini/gemini-cli/pull/22104) -- feat(core): differentiate User-Agent for a2a-server and ACP clients by - @bdmorgan in [#22059](https://github.com/google-gemini/gemini-cli/pull/22059) -- refactor(core): extract ExecutionLifecycleService for tool backgrounding by - @adamfweidman in - [#21717](https://github.com/google-gemini/gemini-cli/pull/21717) -- feat: Display pending and confirming tool calls by @sripasg in - [#22106](https://github.com/google-gemini/gemini-cli/pull/22106) -- feat(browser): implement input blocker overlay during automation by - @kunal-10-cloud in - [#21132](https://github.com/google-gemini/gemini-cli/pull/21132) -- fix: register themes on extension load not start by @jackwotherspoon in - [#22148](https://github.com/google-gemini/gemini-cli/pull/22148) -- feat(ui): Do not show Ultra users /upgrade hint (#22154) by @sehoon38 in - [#22156](https://github.com/google-gemini/gemini-cli/pull/22156) -- chore: remove unnecessary log for themes by @jackwotherspoon in - [#22165](https://github.com/google-gemini/gemini-cli/pull/22165) -- fix(core): resolve MCP tool FQN validation, schema export, and wildcards in - subagents by @abhipatel12 in - [#22069](https://github.com/google-gemini/gemini-cli/pull/22069) -- fix(cli): validate --model argument at startup by @JaisalJain in - [#21393](https://github.com/google-gemini/gemini-cli/pull/21393) -- fix(core): handle policy ALLOW for exit_plan_mode by @backnotprop in - [#21802](https://github.com/google-gemini/gemini-cli/pull/21802) -- feat(telemetry): add Clearcut instrumentation for AI credits billing events by - @gsquared94 in - [#22153](https://github.com/google-gemini/gemini-cli/pull/22153) -- feat(core): add google credentials provider for remote agents by @adamfweidman - in [#21024](https://github.com/google-gemini/gemini-cli/pull/21024) -- test(cli): add integration test for node deprecation warnings by @Nixxx19 in - [#20215](https://github.com/google-gemini/gemini-cli/pull/20215) -- feat(cli): allow safe tools to execute concurrently while agent is busy by - @spencer426 in - [#21988](https://github.com/google-gemini/gemini-cli/pull/21988) -- feat(core): implement model-driven parallel tool scheduler by @abhipatel12 in - [#21933](https://github.com/google-gemini/gemini-cli/pull/21933) -- update vulnerable deps by @scidomino in - [#22180](https://github.com/google-gemini/gemini-cli/pull/22180) -- fix(core): fix startup stats to use int values for timestamps and durations by - @yunaseoul in [#22201](https://github.com/google-gemini/gemini-cli/pull/22201) -- fix(core): prevent duplicate tool schemas for instantiated tools by - @abhipatel12 in - [#22204](https://github.com/google-gemini/gemini-cli/pull/22204) -- fix(core): add proxy routing support for remote A2A subagents by @adamfweidman - in [#22199](https://github.com/google-gemini/gemini-cli/pull/22199) -- fix(core/ide): add Antigravity CLI fallbacks by @apfine in - [#22030](https://github.com/google-gemini/gemini-cli/pull/22030) -- fix(browser): fix duplicate function declaration error in browser agent by - @gsquared94 in - [#22207](https://github.com/google-gemini/gemini-cli/pull/22207) -- feat(core): implement Stage 1 improvements for webfetch tool by @aishaneeshah - in [#21313](https://github.com/google-gemini/gemini-cli/pull/21313) -- Changelog for v0.34.0-preview.1 by @gemini-cli-robot in - [#22194](https://github.com/google-gemini/gemini-cli/pull/22194) -- perf(cli): enable code splitting and deferred UI loading by @sehoon38 in - [#22117](https://github.com/google-gemini/gemini-cli/pull/22117) -- fix: remove unused img.png from project root by @SandyTao520 in - [#22222](https://github.com/google-gemini/gemini-cli/pull/22222) -- docs(local model routing): add docs on how to use Gemma for local model - routing by @douglas-reid in - [#21365](https://github.com/google-gemini/gemini-cli/pull/21365) -- feat(a2a): enable native gRPC support and protocol routing by @alisa-alisa in - [#21403](https://github.com/google-gemini/gemini-cli/pull/21403) -- fix(cli): escape @ symbols on paste to prevent unintended file expansion by - @krishdef7 in [#21239](https://github.com/google-gemini/gemini-cli/pull/21239) -- feat(core): add trajectoryId to ConversationOffered telemetry by @yunaseoul in - [#22214](https://github.com/google-gemini/gemini-cli/pull/22214) -- docs: clarify that tools.core is an allowlist for ALL built-in tools by - @hobostay in [#18813](https://github.com/google-gemini/gemini-cli/pull/18813) -- docs(plan): document hooks with plan mode by @ruomengz in - [#22197](https://github.com/google-gemini/gemini-cli/pull/22197) -- Changelog for v0.33.1 by @gemini-cli-robot in - [#22235](https://github.com/google-gemini/gemini-cli/pull/22235) -- build(ci): fix false positive evals trigger on merge commits by @gundermanc in - [#22237](https://github.com/google-gemini/gemini-cli/pull/22237) -- fix(core): explicitly pass messageBus to policy engine for MCP tool saves by - @abhipatel12 in - [#22255](https://github.com/google-gemini/gemini-cli/pull/22255) -- feat(core): Fully migrate packages/core to AgentLoopContext. by @joshualitt in - [#22115](https://github.com/google-gemini/gemini-cli/pull/22115) -- feat(core): increase sub-agent turn and time limits by @bdmorgan in - [#22196](https://github.com/google-gemini/gemini-cli/pull/22196) -- feat(core): instrument file system tools for JIT context discovery by +- Changelog for v0.33.2 by @gemini-cli-robot in + [#22730](https://github.com/google-gemini/gemini-cli/pull/22730) +- feat(core): multi-registry architecture and tool filtering for subagents by + @akh64bit in [#22712](https://github.com/google-gemini/gemini-cli/pull/22712) +- Changelog for v0.34.0-preview.4 by @gemini-cli-robot in + [#22752](https://github.com/google-gemini/gemini-cli/pull/22752) +- fix(devtools): use theme-aware text colors for console warnings and errors by @SandyTao520 in - [#22082](https://github.com/google-gemini/gemini-cli/pull/22082) -- refactor(ui): extract pure session browser utilities by @abhipatel12 in - [#22256](https://github.com/google-gemini/gemini-cli/pull/22256) -- fix(plan): Fix AskUser evals by @Adib234 in - [#22074](https://github.com/google-gemini/gemini-cli/pull/22074) -- fix(settings): prevent j/k navigation keys from intercepting edit buffer input - by @student-ankitpandit in - [#21865](https://github.com/google-gemini/gemini-cli/pull/21865) -- feat(skills): improve async-pr-review workflow and logging by @mattKorwel in - [#21790](https://github.com/google-gemini/gemini-cli/pull/21790) -- refactor(cli): consolidate getErrorMessage utility to core by @scidomino in - [#22190](https://github.com/google-gemini/gemini-cli/pull/22190) -- fix(core): show descriptive error messages when saving settings fails by - @afarber in [#18095](https://github.com/google-gemini/gemini-cli/pull/18095) -- docs(core): add authentication guide for remote subagents by @adamfweidman in - [#22178](https://github.com/google-gemini/gemini-cli/pull/22178) -- docs: overhaul subagents documentation and add /agents command by @abhipatel12 - in [#22345](https://github.com/google-gemini/gemini-cli/pull/22345) -- refactor(ui): extract SessionBrowser static ui components by @abhipatel12 in - [#22348](https://github.com/google-gemini/gemini-cli/pull/22348) -- test: add Object.create context regression test and tool confirmation - integration test by @gsquared94 in - [#22356](https://github.com/google-gemini/gemini-cli/pull/22356) -- feat(tracker): return TodoList display for tracker tools by @anj-s in - [#22060](https://github.com/google-gemini/gemini-cli/pull/22060) -- feat(agent): add allowed domain restrictions for browser agent by + [#22181](https://github.com/google-gemini/gemini-cli/pull/22181) +- Add support for dynamic model Resolution to ModelConfigService by @kevinjwang1 + in [#22578](https://github.com/google-gemini/gemini-cli/pull/22578) +- chore(release): bump version to 0.36.0-nightly.20260317.2f90b4653 by + @gemini-cli-robot in + [#22858](https://github.com/google-gemini/gemini-cli/pull/22858) +- fix(cli): use active sessionId in useLogger and improve resume robustness by + @mattKorwel in + [#22606](https://github.com/google-gemini/gemini-cli/pull/22606) +- fix(cli): expand tilde in policy paths from settings.json by @abhipatel12 in + [#22772](https://github.com/google-gemini/gemini-cli/pull/22772) +- fix(core): add actionable warnings for terminal fallbacks (#14426) by + @spencer426 in + [#22211](https://github.com/google-gemini/gemini-cli/pull/22211) +- feat(tracker): integrate task tracker protocol into core system prompt by + @anj-s in [#22442](https://github.com/google-gemini/gemini-cli/pull/22442) +- chore: add posttest build hooks and fix missing dependencies by @NTaylorMullen + in [#22865](https://github.com/google-gemini/gemini-cli/pull/22865) +- feat(a2a): add agent acknowledgment command and enhance registry discovery by + @alisa-alisa in + [#22389](https://github.com/google-gemini/gemini-cli/pull/22389) +- fix(cli): automatically add all VSCode workspace folders to Gemini context by + @sakshisemalti in + [#21380](https://github.com/google-gemini/gemini-cli/pull/21380) +- feat: add 'blocked' status to tasks and todos by @anj-s in + [#22735](https://github.com/google-gemini/gemini-cli/pull/22735) +- refactor(cli): remove extra newlines in ShellToolMessage.tsx by @NTaylorMullen + in [#22868](https://github.com/google-gemini/gemini-cli/pull/22868) +- fix(cli): lazily load settings in onModelChange to prevent stale closure data + loss by @KumarADITHYA123 in + [#20403](https://github.com/google-gemini/gemini-cli/pull/20403) +- feat(core): subagent local execution and tool isolation by @akh64bit in + [#22718](https://github.com/google-gemini/gemini-cli/pull/22718) +- fix(cli): resolve subagent grouping and UI state persistence by @abhipatel12 + in [#22252](https://github.com/google-gemini/gemini-cli/pull/22252) +- refactor(ui): extract SessionBrowser search and navigation components by + @abhipatel12 in + [#22377](https://github.com/google-gemini/gemini-cli/pull/22377) +- fix: updates Docker image reference for GitHub MCP server by @jhhornn in + [#22938](https://github.com/google-gemini/gemini-cli/pull/22938) +- refactor(cli): group subagent trajectory deletion and use native filesystem + testing by @abhipatel12 in + [#22890](https://github.com/google-gemini/gemini-cli/pull/22890) +- refactor(cli): simplify keypress and mouse providers and update tests by + @scidomino in [#22853](https://github.com/google-gemini/gemini-cli/pull/22853) +- Changelog for v0.34.0 by @gemini-cli-robot in + [#22860](https://github.com/google-gemini/gemini-cli/pull/22860) +- test(cli): simplify createMockSettings calls by @scidomino in + [#22952](https://github.com/google-gemini/gemini-cli/pull/22952) +- feat(ui): format multi-line banner warnings with a bold title by @keithguerin + in [#22955](https://github.com/google-gemini/gemini-cli/pull/22955) +- Docs: Remove references to stale Gemini CLI file structure info by + @g-samroberts in + [#22976](https://github.com/google-gemini/gemini-cli/pull/22976) +- feat(ui): remove write todo list tool from UI tips by @aniruddhaadak80 in + [#22281](https://github.com/google-gemini/gemini-cli/pull/22281) +- Fix issue where subagent thoughts are appended. by @gundermanc in + [#22975](https://github.com/google-gemini/gemini-cli/pull/22975) +- Feat/browser privacy consent by @kunal-10-cloud in + [#21119](https://github.com/google-gemini/gemini-cli/pull/21119) +- fix(core): explicitly map execution context in LocalAgentExecutor by @akh64bit + in [#22949](https://github.com/google-gemini/gemini-cli/pull/22949) +- feat(plan): support plan mode in non-interactive mode by @ruomengz in + [#22670](https://github.com/google-gemini/gemini-cli/pull/22670) +- feat(core): implement strict macOS sandboxing using Seatbelt allowlist by + @ehedlund in [#22832](https://github.com/google-gemini/gemini-cli/pull/22832) +- docs: add additional notes by @abhipatel12 in + [#23008](https://github.com/google-gemini/gemini-cli/pull/23008) +- fix(cli): resolve duplicate footer on tool cancel via ESC (#21743) by + @ruomengz in [#21781](https://github.com/google-gemini/gemini-cli/pull/21781) +- Changelog for v0.35.0-preview.1 by @gemini-cli-robot in + [#23012](https://github.com/google-gemini/gemini-cli/pull/23012) +- fix(ui): fix flickering on small terminal heights by @devr0306 in + [#21416](https://github.com/google-gemini/gemini-cli/pull/21416) +- fix(acp): provide more meta in tool_call_update by @Mervap in + [#22663](https://github.com/google-gemini/gemini-cli/pull/22663) +- docs: add FAQ entry for checking Gemini CLI version by @surajsahani in + [#21271](https://github.com/google-gemini/gemini-cli/pull/21271) +- feat(core): resilient subagent tool rejection with contextual feedback by + @abhipatel12 in + [#22951](https://github.com/google-gemini/gemini-cli/pull/22951) +- fix(cli): correctly handle auto-update for standalone binaries by @bdmorgan in + [#23038](https://github.com/google-gemini/gemini-cli/pull/23038) +- feat(core): add content-utils by @adamfweidman in + [#22984](https://github.com/google-gemini/gemini-cli/pull/22984) +- fix: circumvent genai sdk requirement for api key when using gateway auth via + ACP by @sripasg in + [#23042](https://github.com/google-gemini/gemini-cli/pull/23042) +- fix(core): don't persist browser consent sentinel in non-interactive mode by + @jasonmatthewsuhari in + [#23073](https://github.com/google-gemini/gemini-cli/pull/23073) +- fix(core): narrow browser agent description to prevent stealing URL tasks from + web_fetch by @gsquared94 in + [#23086](https://github.com/google-gemini/gemini-cli/pull/23086) +- feat(cli): Partial threading of AgentLoopContext. by @joshualitt in + [#22978](https://github.com/google-gemini/gemini-cli/pull/22978) +- fix(browser-agent): enable "Allow all server tools" session policy by @cynthialong0-0 in - [#21775](https://github.com/google-gemini/gemini-cli/pull/21775) -- chore/release: bump version to 0.35.0-nightly.20260313.bb060d7a9 by - @gemini-cli-robot in - [#22251](https://github.com/google-gemini/gemini-cli/pull/22251) -- Move keychain fallback to keychain service by @chrstnb in - [#22332](https://github.com/google-gemini/gemini-cli/pull/22332) -- feat(core): integrate SandboxManager to sandbox all process-spawning tools by - @galz10 in [#22231](https://github.com/google-gemini/gemini-cli/pull/22231) -- fix(cli): support CJK input and full Unicode scalar values in terminal - protocols by @scidomino in - [#22353](https://github.com/google-gemini/gemini-cli/pull/22353) -- Promote stable tests. by @gundermanc in - [#22253](https://github.com/google-gemini/gemini-cli/pull/22253) -- feat(tracker): add tracker policy by @anj-s in - [#22379](https://github.com/google-gemini/gemini-cli/pull/22379) -- feat(security): add disableAlwaysAllow setting to disable auto-approvals by - @galz10 in [#21941](https://github.com/google-gemini/gemini-cli/pull/21941) -- Revert "fix(cli): validate --model argument at startup" by @sehoon38 in - [#22378](https://github.com/google-gemini/gemini-cli/pull/22378) -- fix(mcp): handle equivalent root resource URLs in OAuth validation by @galz10 - in [#20231](https://github.com/google-gemini/gemini-cli/pull/20231) -- fix(core): use session-specific temp directory for task tracker by @anj-s in - [#22382](https://github.com/google-gemini/gemini-cli/pull/22382) -- Fix issue where config was undefined. by @gundermanc in - [#22397](https://github.com/google-gemini/gemini-cli/pull/22397) -- fix(core): deduplicate project memory when JIT context is enabled by + [#22343](https://github.com/google-gemini/gemini-cli/pull/22343) +- refactor(cli): integrate real config loading into async test utils by + @scidomino in [#23040](https://github.com/google-gemini/gemini-cli/pull/23040) +- feat(core): inject memory and JIT context into subagents by @abhipatel12 in + [#23032](https://github.com/google-gemini/gemini-cli/pull/23032) +- Fix logging and virtual list. by @jacob314 in + [#23080](https://github.com/google-gemini/gemini-cli/pull/23080) +- feat(core): cap JIT context upward traversal at git root by @SandyTao520 in + [#23074](https://github.com/google-gemini/gemini-cli/pull/23074) +- Docs: Minor style updates from initial docs audit. by @g-samroberts in + [#22872](https://github.com/google-gemini/gemini-cli/pull/22872) +- feat(core): add experimental memory manager agent to replace save_memory tool + by @SandyTao520 in + [#22726](https://github.com/google-gemini/gemini-cli/pull/22726) +- Changelog for v0.35.0-preview.2 by @gemini-cli-robot in + [#23142](https://github.com/google-gemini/gemini-cli/pull/23142) +- Update website issue template for label and title by @g-samroberts in + [#23036](https://github.com/google-gemini/gemini-cli/pull/23036) +- fix: upgrade ACP SDK from 0.12 to 0.16.1 by @sripasg in + [#23132](https://github.com/google-gemini/gemini-cli/pull/23132) +- Update callouts to work on github. by @g-samroberts in + [#22245](https://github.com/google-gemini/gemini-cli/pull/22245) +- feat: ACP: Add token usage metadata to the `send` method's return value by + @sripasg in [#23148](https://github.com/google-gemini/gemini-cli/pull/23148) +- fix(plan): clarify that plan mode policies are combined with normal mode by + @ruomengz in [#23158](https://github.com/google-gemini/gemini-cli/pull/23158) +- Add ModelChain support to ModelConfigService and make ModelDialog dynamic by + @kevinjwang1 in + [#22914](https://github.com/google-gemini/gemini-cli/pull/22914) +- Ensure that copied extensions are writable in the user's local directory by + @kevinjwang1 in + [#23016](https://github.com/google-gemini/gemini-cli/pull/23016) +- feat(core): implement native Windows sandboxing by @mattKorwel in + [#21807](https://github.com/google-gemini/gemini-cli/pull/21807) +- feat(core): add support for admin-forced MCP server installations by + @gsquared94 in + [#23163](https://github.com/google-gemini/gemini-cli/pull/23163) +- chore(lint): ignore .gemini directory and recursive node_modules by + @mattKorwel in + [#23211](https://github.com/google-gemini/gemini-cli/pull/23211) +- feat(cli): conditionally exclude ask_user tool in ACP mode by @nmcnamara-eng + in [#23045](https://github.com/google-gemini/gemini-cli/pull/23045) +- feat(core): introduce AgentSession and rename stream events to agent events by + @mbleigh in [#23159](https://github.com/google-gemini/gemini-cli/pull/23159) +- feat(worktree): add Git worktree support for isolated parallel sessions by + @jerop in [#22973](https://github.com/google-gemini/gemini-cli/pull/22973) +- Add support for linking in the extension registry by @kevinjwang1 in + [#23153](https://github.com/google-gemini/gemini-cli/pull/23153) +- feat(extensions): add --skip-settings flag to install command by @Ratish1 in + [#17212](https://github.com/google-gemini/gemini-cli/pull/17212) +- feat(telemetry): track if session is running in a Git worktree by @jerop in + [#23265](https://github.com/google-gemini/gemini-cli/pull/23265) +- refactor(core): use absolute paths in GEMINI.md context markers by @SandyTao520 in - [#22234](https://github.com/google-gemini/gemini-cli/pull/22234) -- feat(prompts): implement Topic-Action-Summary model for verbosity reduction by - @Abhijit-2592 in - [#21503](https://github.com/google-gemini/gemini-cli/pull/21503) -- fix(core): fix manual deletion of subagent histories by @abhipatel12 in - [#22407](https://github.com/google-gemini/gemini-cli/pull/22407) -- Add registry var by @kevinjwang1 in - [#22224](https://github.com/google-gemini/gemini-cli/pull/22224) -- Add ModelDefinitions to ModelConfigService by @kevinjwang1 in - [#22302](https://github.com/google-gemini/gemini-cli/pull/22302) -- fix(cli): improve command conflict handling for skills by @NTaylorMullen in - [#21942](https://github.com/google-gemini/gemini-cli/pull/21942) -- fix(core): merge user settings with extension-provided MCP servers by + [#23135](https://github.com/google-gemini/gemini-cli/pull/23135) +- fix(core): add sanitization to sub agent thoughts and centralize utilities by + @devr0306 in [#22828](https://github.com/google-gemini/gemini-cli/pull/22828) +- feat(core): refine User-Agent for VS Code traffic (unified format) by + @sehoon38 in [#23256](https://github.com/google-gemini/gemini-cli/pull/23256) +- Fix schema for ModelChains by @kevinjwang1 in + [#23284](https://github.com/google-gemini/gemini-cli/pull/23284) +- test(cli): refactor tests for async render utilities by @scidomino in + [#23252](https://github.com/google-gemini/gemini-cli/pull/23252) +- feat(core): add security prompt for browser agent by @cynthialong0-0 in + [#23241](https://github.com/google-gemini/gemini-cli/pull/23241) +- refactor(ide): replace dynamic undici import with static fetch import by + @cocosheng-g in + [#23268](https://github.com/google-gemini/gemini-cli/pull/23268) +- test(cli): address unresolved feedback from PR #23252 by @scidomino in + [#23303](https://github.com/google-gemini/gemini-cli/pull/23303) +- feat(browser): add sensitive action controls and read-only noise reduction by + @cynthialong0-0 in + [#22867](https://github.com/google-gemini/gemini-cli/pull/22867) +- Disabling failing test while investigating by @alisa-alisa in + [#23311](https://github.com/google-gemini/gemini-cli/pull/23311) +- fix broken extension link in hooks guide by @Indrapal-70 in + [#21728](https://github.com/google-gemini/gemini-cli/pull/21728) +- fix(core): fix agent description indentation by @abhipatel12 in + [#23315](https://github.com/google-gemini/gemini-cli/pull/23315) +- Wrap the text under TOML rule for easier readability in policy-engine.mdโ€ฆ by + @CogitationOps in + [#23076](https://github.com/google-gemini/gemini-cli/pull/23076) +- fix(extensions): revert broken extension removal behavior by @ehedlund in + [#23317](https://github.com/google-gemini/gemini-cli/pull/23317) +- feat(core): set up onboarding telemetry by @yunaseoul in + [#23118](https://github.com/google-gemini/gemini-cli/pull/23118) +- Retry evals on API error. by @gundermanc in + [#23322](https://github.com/google-gemini/gemini-cli/pull/23322) +- fix(evals): remove tool restrictions and add compile-time guards by + @SandyTao520 in + [#23312](https://github.com/google-gemini/gemini-cli/pull/23312) +- fix(hooks): support 'ask' decision for BeforeTool hooks by @gundermanc in + [#21146](https://github.com/google-gemini/gemini-cli/pull/21146) +- feat(browser): add warning message for session mode 'existing' by + @cynthialong0-0 in + [#23288](https://github.com/google-gemini/gemini-cli/pull/23288) +- chore(lint): enforce zero warnings and cleanup syntax restrictions by + @alisa-alisa in + [#22902](https://github.com/google-gemini/gemini-cli/pull/22902) +- fix(cli): add Esc instruction to HooksDialog footer by @abhipatel12 in + [#23258](https://github.com/google-gemini/gemini-cli/pull/23258) +- Disallow and suppress misused spread operator. by @gundermanc in + [#23294](https://github.com/google-gemini/gemini-cli/pull/23294) +- fix(core): refine CliHelpAgent description for better delegation by @abhipatel12 in - [#22484](https://github.com/google-gemini/gemini-cli/pull/22484) -- fix(core): skip discovery for incomplete MCP configs and resolve merge race - condition by @abhipatel12 in - [#22494](https://github.com/google-gemini/gemini-cli/pull/22494) -- fix(automation): harden stale PR closer permissions and maintainer detection - by @bdmorgan in - [#22558](https://github.com/google-gemini/gemini-cli/pull/22558) -- fix(automation): evaluate staleness before checking protected labels by - @bdmorgan in [#22561](https://github.com/google-gemini/gemini-cli/pull/22561) -- feat(agent): replace the runtime npx for browser agent chrome devtool mcp with - pre-built bundle by @cynthialong0-0 in - [#22213](https://github.com/google-gemini/gemini-cli/pull/22213) -- perf: optimize TrackerService dependency checks by @anj-s in - [#22384](https://github.com/google-gemini/gemini-cli/pull/22384) -- docs(policy): remove trailing space from commandPrefix examples by @kawasin73 - in [#22264](https://github.com/google-gemini/gemini-cli/pull/22264) -- fix(a2a-server): resolve unsafe assignment lint errors by @ehedlund in - [#22661](https://github.com/google-gemini/gemini-cli/pull/22661) -- fix: Adjust ToolGroupMessage filtering to hide Confirming and show Canceled - tool calls. by @sripasg in - [#22230](https://github.com/google-gemini/gemini-cli/pull/22230) -- Disallow Object.create() and reflect. by @gundermanc in - [#22408](https://github.com/google-gemini/gemini-cli/pull/22408) -- Guard pro model usage by @sehoon38 in - [#22665](https://github.com/google-gemini/gemini-cli/pull/22665) -- refactor(core): Creates AgentSession abstraction for consolidated agent - interface. by @mbleigh in - [#22270](https://github.com/google-gemini/gemini-cli/pull/22270) -- docs(changelog): remove internal commands from release notes by + [#23310](https://github.com/google-gemini/gemini-cli/pull/23310) +- fix(core): enable global session and persistent approval for web_fetch by + @NTaylorMullen in + [#23295](https://github.com/google-gemini/gemini-cli/pull/23295) +- fix(plan): add state transition override to prevent plan mode freeze by + @Adib234 in [#23020](https://github.com/google-gemini/gemini-cli/pull/23020) +- fix(cli): record skill activation tool calls in chat history by @NTaylorMullen + in [#23203](https://github.com/google-gemini/gemini-cli/pull/23203) +- fix(core): ensure subagent tool updates apply configuration overrides + immediately by @abhipatel12 in + [#23161](https://github.com/google-gemini/gemini-cli/pull/23161) +- fix(cli): resolve flicker at boundaries of list in BaseSelectionList by @jackwotherspoon in - [#22529](https://github.com/google-gemini/gemini-cli/pull/22529) -- feat: enable subagents by @abhipatel12 in - [#22386](https://github.com/google-gemini/gemini-cli/pull/22386) -- feat(extensions): implement cryptographic integrity verification for extension - updates by @ehedlund in - [#21772](https://github.com/google-gemini/gemini-cli/pull/21772) -- feat(tracker): polish UI sorting and formatting by @anj-s in - [#22437](https://github.com/google-gemini/gemini-cli/pull/22437) -- Changelog for v0.34.0-preview.2 by @gemini-cli-robot in - [#22220](https://github.com/google-gemini/gemini-cli/pull/22220) -- fix(core): fix three JIT context bugs in read_file, read_many_files, and - memoryDiscovery by @SandyTao520 in - [#22679](https://github.com/google-gemini/gemini-cli/pull/22679) -- refactor(core): introduce InjectionService with source-aware injection and - backend-native background completions by @adamfweidman in - [#22544](https://github.com/google-gemini/gemini-cli/pull/22544) -- Linux sandbox bubblewrap by @DavidAPierce in - [#22680](https://github.com/google-gemini/gemini-cli/pull/22680) -- feat(core): increase thought signature retry resilience by @bdmorgan in - [#22202](https://github.com/google-gemini/gemini-cli/pull/22202) -- feat(core): implement Stage 2 security and consistency improvements for - web_fetch by @aishaneeshah in - [#22217](https://github.com/google-gemini/gemini-cli/pull/22217) -- refactor(core): replace positional execute params with ExecuteOptions bag by + [#23298](https://github.com/google-gemini/gemini-cli/pull/23298) +- test(cli): force generic terminal in tests to fix snapshot failures by + @abhipatel12 in + [#23499](https://github.com/google-gemini/gemini-cli/pull/23499) +- Evals: PR Guidance adding workflow by @alisa-alisa in + [#23164](https://github.com/google-gemini/gemini-cli/pull/23164) +- feat(core): refactor SandboxManager to a stateless architecture and introduce + explicit Deny interface by @ehedlund in + [#23141](https://github.com/google-gemini/gemini-cli/pull/23141) +- feat(core): add event-translator and update agent types by @adamfweidman in + [#22985](https://github.com/google-gemini/gemini-cli/pull/22985) +- perf(cli): parallelize and background startup cleanup tasks by @sehoon38 in + [#23545](https://github.com/google-gemini/gemini-cli/pull/23545) +- fix: "allow always" for commands with paths by @scidomino in + [#23558](https://github.com/google-gemini/gemini-cli/pull/23558) +- fix(cli): prevent terminal escape sequences from leaking on exit by + @mattKorwel in + [#22682](https://github.com/google-gemini/gemini-cli/pull/22682) +- feat(cli): implement full "GEMINI CLI" logo for logged-out state by + @keithguerin in + [#22412](https://github.com/google-gemini/gemini-cli/pull/22412) +- fix(plan): reserve minimum height for selection list in AskUserDialog by + @ruomengz in [#23280](https://github.com/google-gemini/gemini-cli/pull/23280) +- fix(core): harden AgentSession replay semantics by @adamfweidman in + [#23548](https://github.com/google-gemini/gemini-cli/pull/23548) +- test(core): migrate hook tests to scheduler by @abhipatel12 in + [#23496](https://github.com/google-gemini/gemini-cli/pull/23496) +- chore(config): disable agents by default by @abhipatel12 in + [#23546](https://github.com/google-gemini/gemini-cli/pull/23546) +- fix(ui): make tool confirmations take up entire terminal height by @devr0306 + in [#22366](https://github.com/google-gemini/gemini-cli/pull/22366) +- fix(core): prevent redundant remote agent loading on model switch by @adamfweidman in - [#22674](https://github.com/google-gemini/gemini-cli/pull/22674) -- feat(config): enable JIT context loading by default by @SandyTao520 in - [#22736](https://github.com/google-gemini/gemini-cli/pull/22736) -- fix(config): ensure discoveryMaxDirs is passed to global config during - initialization by @kevin-ramdass in - [#22744](https://github.com/google-gemini/gemini-cli/pull/22744) -- fix(plan): allowlist get_internal_docs in Plan Mode by @Adib234 in - [#22668](https://github.com/google-gemini/gemini-cli/pull/22668) -- Changelog for v0.34.0-preview.3 by @gemini-cli-robot in - [#22393](https://github.com/google-gemini/gemini-cli/pull/22393) -- feat(core): add foundation for subagent tool isolation by @akh64bit in - [#22708](https://github.com/google-gemini/gemini-cli/pull/22708) -- fix(core): handle surrogate pairs in truncateString by @sehoon38 in - [#22754](https://github.com/google-gemini/gemini-cli/pull/22754) -- fix(cli): override j/k navigation in settings dialog to fix search input - conflict by @sehoon38 in - [#22800](https://github.com/google-gemini/gemini-cli/pull/22800) -- feat(plan): add 'All the above' option to multi-select AskUser questions by - @Adib234 in [#22365](https://github.com/google-gemini/gemini-cli/pull/22365) -- docs: distribute package-specific GEMINI.md context to each package by + [#23576](https://github.com/google-gemini/gemini-cli/pull/23576) +- refactor(core): update production type imports from coreToolScheduler by + @abhipatel12 in + [#23498](https://github.com/google-gemini/gemini-cli/pull/23498) +- feat(cli): always prefix extension skills with colon separator by + @NTaylorMullen in + [#23566](https://github.com/google-gemini/gemini-cli/pull/23566) +- fix(core): properly support allowRedirect in policy engine by @scidomino in + [#23579](https://github.com/google-gemini/gemini-cli/pull/23579) +- fix(cli): prevent subcommand shadowing and skip auth for commands by + @mattKorwel in + [#23177](https://github.com/google-gemini/gemini-cli/pull/23177) +- fix(test): move flaky tests to non-blocking suite by @mattKorwel in + [#23259](https://github.com/google-gemini/gemini-cli/pull/23259) +- Changelog for v0.35.0-preview.3 by @gemini-cli-robot in + [#23574](https://github.com/google-gemini/gemini-cli/pull/23574) +- feat(skills): add behavioral-evals skill with fixing and promoting guides by + @abhipatel12 in + [#23349](https://github.com/google-gemini/gemini-cli/pull/23349) +- refactor(core): delete obsolete coreToolScheduler by @abhipatel12 in + [#23502](https://github.com/google-gemini/gemini-cli/pull/23502) +- Changelog for v0.35.0-preview.4 by @gemini-cli-robot in + [#23581](https://github.com/google-gemini/gemini-cli/pull/23581) +- feat(core): add LegacyAgentSession by @adamfweidman in + [#22986](https://github.com/google-gemini/gemini-cli/pull/22986) +- feat(test-utils): add TestMcpServerBuilder and support in TestRig by + @abhipatel12 in + [#23491](https://github.com/google-gemini/gemini-cli/pull/23491) +- fix(core)!: Force policy config to specify toolName by @kschaab in + [#23330](https://github.com/google-gemini/gemini-cli/pull/23330) +- eval(save_memory): add multi-turn interactive evals for memoryManager by @SandyTao520 in - [#22734](https://github.com/google-gemini/gemini-cli/pull/22734) -- fix(cli): clean up stale pasted placeholder metadata after word/line deletions - by @Jomak-x in - [#20375](https://github.com/google-gemini/gemini-cli/pull/20375) -- refactor(core): align JIT memory placement with tiered context model by - @SandyTao520 in - [#22766](https://github.com/google-gemini/gemini-cli/pull/22766) -- Linux sandbox seccomp by @DavidAPierce in - [#22815](https://github.com/google-gemini/gemini-cli/pull/22815) -- fix(patch): cherry-pick 4e5dfd0 to release/v0.35.0-preview.1-pr-23074 to patch - version v0.35.0-preview.1 and create version 0.35.0-preview.2 by + [#23572](https://github.com/google-gemini/gemini-cli/pull/23572) +- fix(telemetry): patch memory leak and enforce logPrompts privacy by + @spencer426 in + [#23281](https://github.com/google-gemini/gemini-cli/pull/23281) +- perf(cli): background IDE client to speed up initialization by @sehoon38 in + [#23603](https://github.com/google-gemini/gemini-cli/pull/23603) +- fix(cli): prevent Ctrl+D exit when input buffer is not empty by @wtanaka in + [#23306](https://github.com/google-gemini/gemini-cli/pull/23306) +- fix: ACP: separate conversational text from execute tool command title by + @sripasg in [#23179](https://github.com/google-gemini/gemini-cli/pull/23179) +- feat(evals): add behavioral evaluations for subagent routing by @Samee24 in + [#23272](https://github.com/google-gemini/gemini-cli/pull/23272) +- refactor(cli,core): foundational layout, identity management, and type safety + by @jwhelangoog in + [#23286](https://github.com/google-gemini/gemini-cli/pull/23286) +- fix(core): accurately reflect subagent tool failure in UI by @abhipatel12 in + [#23187](https://github.com/google-gemini/gemini-cli/pull/23187) +- Changelog for v0.35.0-preview.5 by @gemini-cli-robot in + [#23606](https://github.com/google-gemini/gemini-cli/pull/23606) +- feat(ui): implement refreshed UX for Composer layout by @jwhelangoog in + [#21212](https://github.com/google-gemini/gemini-cli/pull/21212) +- fix: API key input dialog user interaction when selected Gemini API Key by + @kartikangiras in + [#21057](https://github.com/google-gemini/gemini-cli/pull/21057) +- docs: update `/mcp refresh` to `/mcp reload` by @adamfweidman in + [#23631](https://github.com/google-gemini/gemini-cli/pull/23631) +- Implementation of sandbox "Write-Protected" Governance Files by @DavidAPierce + in [#23139](https://github.com/google-gemini/gemini-cli/pull/23139) +- feat(sandbox): dynamic macOS sandbox expansion and worktree support by @galz10 + in [#23301](https://github.com/google-gemini/gemini-cli/pull/23301) +- fix(acp): Pass the cwd to `AcpFileSystemService` to avoid looping failures in + asking for perms to write plan md file by @sripasg in + [#23612](https://github.com/google-gemini/gemini-cli/pull/23612) +- fix(plan): sandbox path resolution in Plan Mode to prevent hallucinations by + @Adib234 in [#22737](https://github.com/google-gemini/gemini-cli/pull/22737) +- feat(ui): allow immediate user input during startup by @sehoon38 in + [#23661](https://github.com/google-gemini/gemini-cli/pull/23661) +- refactor(sandbox): reorganize Windows sandbox files by @galz10 in + [#23645](https://github.com/google-gemini/gemini-cli/pull/23645) +- fix(core): improve remote agent streaming UI and UX by @adamfweidman in + [#23633](https://github.com/google-gemini/gemini-cli/pull/23633) +- perf(cli): optimize --version startup time by @sehoon38 in + [#23671](https://github.com/google-gemini/gemini-cli/pull/23671) +- refactor(core): stop gemini CLI from producing unsafe casts by @gundermanc in + [#23611](https://github.com/google-gemini/gemini-cli/pull/23611) +- use enableAutoUpdate in test rig by @scidomino in + [#23681](https://github.com/google-gemini/gemini-cli/pull/23681) +- feat(core): change user-facing auth type from oauth2 to oauth by @adamfweidman + in [#23639](https://github.com/google-gemini/gemini-cli/pull/23639) +- chore(deps): fix npm audit vulnerabilities by @scidomino in + [#23679](https://github.com/google-gemini/gemini-cli/pull/23679) +- test(evals): fix overlapping act() deadlock in app-test-helper by @Adib234 in + [#23666](https://github.com/google-gemini/gemini-cli/pull/23666) +- fix(patch): cherry-pick 055ff92 to release/v0.36.0-preview.0-pr-23672 to patch + version v0.36.0-preview.0 and create version 0.36.0-preview.1 by @gemini-cli-robot in - [#23134](https://github.com/google-gemini/gemini-cli/pull/23134) -- fix(patch): cherry-pick daf3691 to release/v0.35.0-preview.2-pr-23558 to patch - version v0.35.0-preview.2 and create version 0.35.0-preview.3 by + [#23723](https://github.com/google-gemini/gemini-cli/pull/23723) +- fix(patch): cherry-pick 765fb67 to release/v0.36.0-preview.5-pr-24055 to patch + version v0.36.0-preview.5 and create version 0.36.0-preview.6 by @gemini-cli-robot in - [#23565](https://github.com/google-gemini/gemini-cli/pull/23565) -- fix(patch): cherry-pick b2d6dc4 to release/v0.35.0-preview.4-pr-23546 - [CONFLICTS] by @gemini-cli-robot in - [#23585](https://github.com/google-gemini/gemini-cli/pull/23585) + [#24061](https://github.com/google-gemini/gemini-cli/pull/24061) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.34.0...v0.35.2 +https://github.com/google-gemini/gemini-cli/compare/v0.35.3...v0.36.0 diff --git a/docs/changelogs/preview.md b/docs/changelogs/preview.md index 6c31a64679..5bb8d5b575 100644 --- a/docs/changelogs/preview.md +++ b/docs/changelogs/preview.md @@ -1,6 +1,6 @@ -# Preview release: v0.36.0-preview.5 +# Preview release: v0.37.0-preview.1 -Released: March 27, 2026 +Released: April 02, 2026 Our preview release includes the latest, new, and experimental features. This release may not be as stable as our [latest weekly release](latest.md). @@ -13,377 +13,410 @@ npm install -g @google/gemini-cli@preview ## Highlights -- **Subagent Architecture Enhancements:** Significant updates to subagents, - including local execution, tool isolation, multi-registry discovery, dynamic - tool filtering, and JIT context injection. -- **Enhanced Security & Sandboxing:** Implemented strict macOS sandboxing using - Seatbelt allowlist, native Windows sandboxing, and support for - "Write-Protected" governance files. -- **Agent Context & State Management:** Introduced task tracker protocol - integration, 'blocked' statuses for tasks/todos, and `AgentSession` for - improved state management and replay semantics. -- **Browser & ACP Capabilities:** Added privacy consent for the browser agent, - sensitive action controls, improved API token usage metadata, and gateway auth - support via ACP. -- **CLI & UX Improvements:** Implemented a refreshed Composer layout, expanded - terminal fallback warnings, dynamic model resolution, and Git worktree support - for isolated parallel sessions. +- **Plan Mode Enhancements**: Plan now includes support for untrusted folders, + prioritized pre-approval discussions, and a resolve for sandbox-related + deadlocks during file creation. +- **Browser Agent Evolved**: Significant updates to the browser agent, including + persistent session management, dynamic discovery of read-only tools, + sandbox-aware initialization, and automated reclamation of stale snapshots to + optimize context window usage. +- **Advanced Sandbox Security**: Implementation of dynamic sandbox expansion for + both Linux and Windows, alongside secret visibility lockdown for environment + files and OS-specific forbidden path support. +- **Unified Core Architecture**: Centralized context management and a new + `ModelConfigService` for unified model discovery, complemented by the + introduction of `AgentHistoryProvider` and tool-based topic grouping + (Chapters). +- **UI/UX & Performance Improvements**: New Tokyo Night theme, "tab to queue" + message support, and compact tool output formatting, plus optimized build + scripts and improved layout stability for TUI components. ## What's Changed -- fix(a2a-server): A2A server should execute ask policies in interactive mode by - @kschaab in [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) +- fix(patch): cherry-pick 64c928f to release/v0.37.0-preview.0-pr-23257 to patch + version v0.37.0-preview.0 and create version 0.37.0-preview.1 by + @gemini-cli-robot in + [#24561](https://github.com/google-gemini/gemini-cli/pull/24561) +- feat(evals): centralize test agents into test-utils for reuse by @Samee24 in + [#23616](https://github.com/google-gemini/gemini-cli/pull/23616) +- revert: chore(config): disable agents by default by @abhipatel12 in + [#23672](https://github.com/google-gemini/gemini-cli/pull/23672) +- fix(plan): update telemetry attribute keys and add timestamp by @Adib234 in + [#23685](https://github.com/google-gemini/gemini-cli/pull/23685) +- fix(core): prevent premature MCP discovery completion by @jackwotherspoon in + [#23637](https://github.com/google-gemini/gemini-cli/pull/23637) +- feat(browser): add maxActionsPerTask for browser agent setting by + @cynthialong0-0 in + [#23216](https://github.com/google-gemini/gemini-cli/pull/23216) +- fix(core): improve agent loader error formatting for empty paths by + @adamfweidman in + [#23690](https://github.com/google-gemini/gemini-cli/pull/23690) +- fix(cli): only show updating spinner when auto-update is in progress by + @scidomino in [#23709](https://github.com/google-gemini/gemini-cli/pull/23709) +- Refine onboarding metrics to log the duration explicitly and use the tier + name. by @yunaseoul in + [#23678](https://github.com/google-gemini/gemini-cli/pull/23678) +- chore(tools): add toJSON to tools and invocations to reduce logging verbosity + by @alisa-alisa in + [#22899](https://github.com/google-gemini/gemini-cli/pull/22899) +- fix(cli): stabilize copy mode to prevent flickering and cursor resets by + @mattKorwel in + [#22584](https://github.com/google-gemini/gemini-cli/pull/22584) +- fix(test): move flaky ctrl-c-exit test to non-blocking suite by @mattKorwel in + [#23732](https://github.com/google-gemini/gemini-cli/pull/23732) +- feat(skills): add ci skill for automated failure replication by @mattKorwel in + [#23720](https://github.com/google-gemini/gemini-cli/pull/23720) +- feat(sandbox): implement forbiddenPaths for OS-specific sandbox managers by + @ehedlund in [#23282](https://github.com/google-gemini/gemini-cli/pull/23282) +- fix(core): conditionally expose additional_permissions in shell tool by + @galz10 in [#23729](https://github.com/google-gemini/gemini-cli/pull/23729) +- refactor(core): standardize OS-specific sandbox tests and extract linux helper + methods by @ehedlund in + [#23715](https://github.com/google-gemini/gemini-cli/pull/23715) +- format recently added script by @scidomino in + [#23739](https://github.com/google-gemini/gemini-cli/pull/23739) +- fix(ui): prevent over-eager slash subcommand completion by @keithguerin in + [#20136](https://github.com/google-gemini/gemini-cli/pull/20136) +- Fix dynamic model routing for gemini 3.1 pro to customtools model by + @kevinjwang1 in + [#23641](https://github.com/google-gemini/gemini-cli/pull/23641) +- feat(core): support inline agentCardJson for remote agents by @adamfweidman in + [#23743](https://github.com/google-gemini/gemini-cli/pull/23743) +- fix(cli): skip console log/info in headless mode by @cynthialong0-0 in + [#22739](https://github.com/google-gemini/gemini-cli/pull/22739) +- test(core): install bubblewrap on Linux CI for sandbox integration tests by + @ehedlund in [#23583](https://github.com/google-gemini/gemini-cli/pull/23583) +- docs(reference): split tools table into category sections by @sheikhlimon in + [#21516](https://github.com/google-gemini/gemini-cli/pull/21516) +- fix(browser): detect embedded URLs in query params to prevent allowedDomains + bypass by @tony-shi in + [#23225](https://github.com/google-gemini/gemini-cli/pull/23225) +- fix(browser): add proxy bypass constraint to domain restriction system prompt + by @tony-shi in + [#23229](https://github.com/google-gemini/gemini-cli/pull/23229) +- fix(policy): relax write_file argsPattern in plan mode to allow paths without + session ID by @Adib234 in + [#23695](https://github.com/google-gemini/gemini-cli/pull/23695) +- docs: fix grammar in CONTRIBUTING and numbering in sandbox docs by + @splint-disk-8i in + [#23448](https://github.com/google-gemini/gemini-cli/pull/23448) +- fix(acp): allow attachments by adding a permission prompt by @sripasg in + [#23680](https://github.com/google-gemini/gemini-cli/pull/23680) +- fix(core): thread AbortSignal to chat compression requests (#20405) by + @SH20RAJ in [#20778](https://github.com/google-gemini/gemini-cli/pull/20778) +- feat(core): implement Windows sandbox dynamic expansion Phase 1 and 2.1 by + @scidomino in [#23691](https://github.com/google-gemini/gemini-cli/pull/23691) +- Add note about root privileges in sandbox docs by @diodesign in + [#23314](https://github.com/google-gemini/gemini-cli/pull/23314) - docs(core): document agent_card_json string literal options for remote agents by @adamfweidman in [#23797](https://github.com/google-gemini/gemini-cli/pull/23797) -- feat(core): support inline agentCardJson for remote agents by @adamfweidman in - [#23743](https://github.com/google-gemini/gemini-cli/pull/23743) -- fix(patch): cherry-pick 055ff92 to release/v0.36.0-preview.0-pr-23672 to patch - version v0.36.0-preview.0 and create version 0.36.0-preview.1 by - @gemini-cli-robot in - [#23723](https://github.com/google-gemini/gemini-cli/pull/23723) -- Changelog for v0.33.2 by @gemini-cli-robot in - [#22730](https://github.com/google-gemini/gemini-cli/pull/22730) -- feat(core): multi-registry architecture and tool filtering for subagents by - @akh64bit in [#22712](https://github.com/google-gemini/gemini-cli/pull/22712) -- Changelog for v0.34.0-preview.4 by @gemini-cli-robot in - [#22752](https://github.com/google-gemini/gemini-cli/pull/22752) -- fix(devtools): use theme-aware text colors for console warnings and errors by - @SandyTao520 in - [#22181](https://github.com/google-gemini/gemini-cli/pull/22181) -- Add support for dynamic model Resolution to ModelConfigService by @kevinjwang1 - in [#22578](https://github.com/google-gemini/gemini-cli/pull/22578) -- chore(release): bump version to 0.36.0-nightly.20260317.2f90b4653 by - @gemini-cli-robot in - [#22858](https://github.com/google-gemini/gemini-cli/pull/22858) -- fix(cli): use active sessionId in useLogger and improve resume robustness by - @mattKorwel in - [#22606](https://github.com/google-gemini/gemini-cli/pull/22606) -- fix(cli): expand tilde in policy paths from settings.json by @abhipatel12 in - [#22772](https://github.com/google-gemini/gemini-cli/pull/22772) -- fix(core): add actionable warnings for terminal fallbacks (#14426) by - @spencer426 in - [#22211](https://github.com/google-gemini/gemini-cli/pull/22211) -- feat(tracker): integrate task tracker protocol into core system prompt by - @anj-s in [#22442](https://github.com/google-gemini/gemini-cli/pull/22442) -- chore: add posttest build hooks and fix missing dependencies by @NTaylorMullen - in [#22865](https://github.com/google-gemini/gemini-cli/pull/22865) -- feat(a2a): add agent acknowledgment command and enhance registry discovery by +- fix(cli): resolve TTY hang on headless environments by unconditionally + resuming process.stdin before React Ink launch by @cocosheng-g in + [#23673](https://github.com/google-gemini/gemini-cli/pull/23673) +- fix(ui): cleanup estimated string length hacks in composer by @keithguerin in + [#23694](https://github.com/google-gemini/gemini-cli/pull/23694) +- feat(browser): dynamically discover read-only tools by @cynthialong0-0 in + [#23805](https://github.com/google-gemini/gemini-cli/pull/23805) +- docs: clarify policy requirement for `general.plan.directory` in settings + schema by @jerop in + [#23784](https://github.com/google-gemini/gemini-cli/pull/23784) +- Revert "perf(cli): optimize --version startup time (#23671)" by @scidomino in + [#23812](https://github.com/google-gemini/gemini-cli/pull/23812) +- don't silence errors from wombat by @scidomino in + [#23822](https://github.com/google-gemini/gemini-cli/pull/23822) +- fix(ui): prevent escape key from cancelling requests in shell mode by + @PrasannaPal21 in + [#21245](https://github.com/google-gemini/gemini-cli/pull/21245) +- Changelog for v0.36.0-preview.0 by @gemini-cli-robot in + [#23702](https://github.com/google-gemini/gemini-cli/pull/23702) +- feat(core,ui): Add experiment-gated support for gemini flash 3.1 lite by + @chrstnb in [#23794](https://github.com/google-gemini/gemini-cli/pull/23794) +- Changelog for v0.36.0-preview.3 by @gemini-cli-robot in + [#23827](https://github.com/google-gemini/gemini-cli/pull/23827) +- new linting check: github-actions-pinning by @alisa-alisa in + [#23808](https://github.com/google-gemini/gemini-cli/pull/23808) +- fix(cli): show helpful guidance when no skills are available by @Niralisj in + [#23785](https://github.com/google-gemini/gemini-cli/pull/23785) +- fix: Chat logs and errors handle tail tool calls correctly by @googlestrobe in + [#22460](https://github.com/google-gemini/gemini-cli/pull/22460) +- Don't try removing a tag from a non-existent release. by @scidomino in + [#23830](https://github.com/google-gemini/gemini-cli/pull/23830) +- fix(cli): allow ask question dialog to take full window height by @jacob314 in + [#23693](https://github.com/google-gemini/gemini-cli/pull/23693) +- fix(core): strip leading underscores from error types in telemetry by + @yunaseoul in [#23824](https://github.com/google-gemini/gemini-cli/pull/23824) +- Changelog for v0.35.0 by @gemini-cli-robot in + [#23819](https://github.com/google-gemini/gemini-cli/pull/23819) +- feat(evals): add reliability harvester and 500/503 retry support by @alisa-alisa in - [#22389](https://github.com/google-gemini/gemini-cli/pull/22389) -- fix(cli): automatically add all VSCode workspace folders to Gemini context by - @sakshisemalti in - [#21380](https://github.com/google-gemini/gemini-cli/pull/21380) -- feat: add 'blocked' status to tasks and todos by @anj-s in - [#22735](https://github.com/google-gemini/gemini-cli/pull/22735) -- refactor(cli): remove extra newlines in ShellToolMessage.tsx by @NTaylorMullen - in [#22868](https://github.com/google-gemini/gemini-cli/pull/22868) -- fix(cli): lazily load settings in onModelChange to prevent stale closure data - loss by @KumarADITHYA123 in - [#20403](https://github.com/google-gemini/gemini-cli/pull/20403) -- feat(core): subagent local execution and tool isolation by @akh64bit in - [#22718](https://github.com/google-gemini/gemini-cli/pull/22718) -- fix(cli): resolve subagent grouping and UI state persistence by @abhipatel12 - in [#22252](https://github.com/google-gemini/gemini-cli/pull/22252) -- refactor(ui): extract SessionBrowser search and navigation components by - @abhipatel12 in - [#22377](https://github.com/google-gemini/gemini-cli/pull/22377) -- fix: updates Docker image reference for GitHub MCP server by @jhhornn in - [#22938](https://github.com/google-gemini/gemini-cli/pull/22938) -- refactor(cli): group subagent trajectory deletion and use native filesystem - testing by @abhipatel12 in - [#22890](https://github.com/google-gemini/gemini-cli/pull/22890) -- refactor(cli): simplify keypress and mouse providers and update tests by - @scidomino in [#22853](https://github.com/google-gemini/gemini-cli/pull/22853) -- Changelog for v0.34.0 by @gemini-cli-robot in - [#22860](https://github.com/google-gemini/gemini-cli/pull/22860) -- test(cli): simplify createMockSettings calls by @scidomino in - [#22952](https://github.com/google-gemini/gemini-cli/pull/22952) -- feat(ui): format multi-line banner warnings with a bold title by @keithguerin - in [#22955](https://github.com/google-gemini/gemini-cli/pull/22955) -- Docs: Remove references to stale Gemini CLI file structure info by - @g-samroberts in - [#22976](https://github.com/google-gemini/gemini-cli/pull/22976) -- feat(ui): remove write todo list tool from UI tips by @aniruddhaadak80 in - [#22281](https://github.com/google-gemini/gemini-cli/pull/22281) -- Fix issue where subagent thoughts are appended. by @gundermanc in - [#22975](https://github.com/google-gemini/gemini-cli/pull/22975) -- Feat/browser privacy consent by @kunal-10-cloud in - [#21119](https://github.com/google-gemini/gemini-cli/pull/21119) -- fix(core): explicitly map execution context in LocalAgentExecutor by @akh64bit - in [#22949](https://github.com/google-gemini/gemini-cli/pull/22949) -- feat(plan): support plan mode in non-interactive mode by @ruomengz in - [#22670](https://github.com/google-gemini/gemini-cli/pull/22670) -- feat(core): implement strict macOS sandboxing using Seatbelt allowlist by - @ehedlund in [#22832](https://github.com/google-gemini/gemini-cli/pull/22832) -- docs: add additional notes by @abhipatel12 in - [#23008](https://github.com/google-gemini/gemini-cli/pull/23008) -- fix(cli): resolve duplicate footer on tool cancel via ESC (#21743) by - @ruomengz in [#21781](https://github.com/google-gemini/gemini-cli/pull/21781) -- Changelog for v0.35.0-preview.1 by @gemini-cli-robot in - [#23012](https://github.com/google-gemini/gemini-cli/pull/23012) -- fix(ui): fix flickering on small terminal heights by @devr0306 in - [#21416](https://github.com/google-gemini/gemini-cli/pull/21416) -- fix(acp): provide more meta in tool_call_update by @Mervap in - [#22663](https://github.com/google-gemini/gemini-cli/pull/22663) -- docs: add FAQ entry for checking Gemini CLI version by @surajsahani in - [#21271](https://github.com/google-gemini/gemini-cli/pull/21271) -- feat(core): resilient subagent tool rejection with contextual feedback by - @abhipatel12 in - [#22951](https://github.com/google-gemini/gemini-cli/pull/22951) -- fix(cli): correctly handle auto-update for standalone binaries by @bdmorgan in - [#23038](https://github.com/google-gemini/gemini-cli/pull/23038) -- feat(core): add content-utils by @adamfweidman in - [#22984](https://github.com/google-gemini/gemini-cli/pull/22984) -- fix: circumvent genai sdk requirement for api key when using gateway auth via - ACP by @sripasg in - [#23042](https://github.com/google-gemini/gemini-cli/pull/23042) -- fix(core): don't persist browser consent sentinel in non-interactive mode by - @jasonmatthewsuhari in - [#23073](https://github.com/google-gemini/gemini-cli/pull/23073) -- fix(core): narrow browser agent description to prevent stealing URL tasks from - web_fetch by @gsquared94 in - [#23086](https://github.com/google-gemini/gemini-cli/pull/23086) -- feat(cli): Partial threading of AgentLoopContext. by @joshualitt in - [#22978](https://github.com/google-gemini/gemini-cli/pull/22978) -- fix(browser-agent): enable "Allow all server tools" session policy by - @cynthialong0-0 in - [#22343](https://github.com/google-gemini/gemini-cli/pull/22343) -- refactor(cli): integrate real config loading into async test utils by - @scidomino in [#23040](https://github.com/google-gemini/gemini-cli/pull/23040) -- feat(core): inject memory and JIT context into subagents by @abhipatel12 in - [#23032](https://github.com/google-gemini/gemini-cli/pull/23032) -- Fix logging and virtual list. by @jacob314 in - [#23080](https://github.com/google-gemini/gemini-cli/pull/23080) -- feat(core): cap JIT context upward traversal at git root by @SandyTao520 in - [#23074](https://github.com/google-gemini/gemini-cli/pull/23074) -- Docs: Minor style updates from initial docs audit. by @g-samroberts in - [#22872](https://github.com/google-gemini/gemini-cli/pull/22872) -- feat(core): add experimental memory manager agent to replace save_memory tool - by @SandyTao520 in - [#22726](https://github.com/google-gemini/gemini-cli/pull/22726) -- Changelog for v0.35.0-preview.2 by @gemini-cli-robot in - [#23142](https://github.com/google-gemini/gemini-cli/pull/23142) -- Update website issue template for label and title by @g-samroberts in - [#23036](https://github.com/google-gemini/gemini-cli/pull/23036) -- fix: upgrade ACP SDK from 0.12 to 0.16.1 by @sripasg in - [#23132](https://github.com/google-gemini/gemini-cli/pull/23132) -- Update callouts to work on github. by @g-samroberts in - [#22245](https://github.com/google-gemini/gemini-cli/pull/22245) -- feat: ACP: Add token usage metadata to the `send` method's return value by - @sripasg in [#23148](https://github.com/google-gemini/gemini-cli/pull/23148) -- fix(plan): clarify that plan mode policies are combined with normal mode by - @ruomengz in [#23158](https://github.com/google-gemini/gemini-cli/pull/23158) -- Add ModelChain support to ModelConfigService and make ModelDialog dynamic by - @kevinjwang1 in - [#22914](https://github.com/google-gemini/gemini-cli/pull/22914) -- Ensure that copied extensions are writable in the user's local directory by - @kevinjwang1 in - [#23016](https://github.com/google-gemini/gemini-cli/pull/23016) -- feat(core): implement native Windows sandboxing by @mattKorwel in - [#21807](https://github.com/google-gemini/gemini-cli/pull/21807) -- feat(core): add support for admin-forced MCP server installations by - @gsquared94 in - [#23163](https://github.com/google-gemini/gemini-cli/pull/23163) -- chore(lint): ignore .gemini directory and recursive node_modules by - @mattKorwel in - [#23211](https://github.com/google-gemini/gemini-cli/pull/23211) -- feat(cli): conditionally exclude ask_user tool in ACP mode by @nmcnamara-eng - in [#23045](https://github.com/google-gemini/gemini-cli/pull/23045) -- feat(core): introduce AgentSession and rename stream events to agent events by - @mbleigh in [#23159](https://github.com/google-gemini/gemini-cli/pull/23159) -- feat(worktree): add Git worktree support for isolated parallel sessions by - @jerop in [#22973](https://github.com/google-gemini/gemini-cli/pull/22973) -- Add support for linking in the extension registry by @kevinjwang1 in - [#23153](https://github.com/google-gemini/gemini-cli/pull/23153) -- feat(extensions): add --skip-settings flag to install command by @Ratish1 in - [#17212](https://github.com/google-gemini/gemini-cli/pull/17212) -- feat(telemetry): track if session is running in a Git worktree by @jerop in - [#23265](https://github.com/google-gemini/gemini-cli/pull/23265) -- refactor(core): use absolute paths in GEMINI.md context markers by - @SandyTao520 in - [#23135](https://github.com/google-gemini/gemini-cli/pull/23135) -- fix(core): add sanitization to sub agent thoughts and centralize utilities by - @devr0306 in [#22828](https://github.com/google-gemini/gemini-cli/pull/22828) -- feat(core): refine User-Agent for VS Code traffic (unified format) by - @sehoon38 in [#23256](https://github.com/google-gemini/gemini-cli/pull/23256) -- Fix schema for ModelChains by @kevinjwang1 in - [#23284](https://github.com/google-gemini/gemini-cli/pull/23284) -- test(cli): refactor tests for async render utilities by @scidomino in - [#23252](https://github.com/google-gemini/gemini-cli/pull/23252) -- feat(core): add security prompt for browser agent by @cynthialong0-0 in - [#23241](https://github.com/google-gemini/gemini-cli/pull/23241) -- refactor(ide): replace dynamic undici import with static fetch import by - @cocosheng-g in - [#23268](https://github.com/google-gemini/gemini-cli/pull/23268) -- test(cli): address unresolved feedback from PR #23252 by @scidomino in - [#23303](https://github.com/google-gemini/gemini-cli/pull/23303) -- feat(browser): add sensitive action controls and read-only noise reduction by - @cynthialong0-0 in - [#22867](https://github.com/google-gemini/gemini-cli/pull/22867) -- Disabling failing test while investigating by @alisa-alisa in - [#23311](https://github.com/google-gemini/gemini-cli/pull/23311) -- fix broken extension link in hooks guide by @Indrapal-70 in - [#21728](https://github.com/google-gemini/gemini-cli/pull/21728) -- fix(core): fix agent description indentation by @abhipatel12 in - [#23315](https://github.com/google-gemini/gemini-cli/pull/23315) -- Wrap the text under TOML rule for easier readability in policy-engine.mdโ€ฆ by - @CogitationOps in - [#23076](https://github.com/google-gemini/gemini-cli/pull/23076) -- fix(extensions): revert broken extension removal behavior by @ehedlund in - [#23317](https://github.com/google-gemini/gemini-cli/pull/23317) -- feat(core): set up onboarding telemetry by @yunaseoul in - [#23118](https://github.com/google-gemini/gemini-cli/pull/23118) -- Retry evals on API error. by @gundermanc in - [#23322](https://github.com/google-gemini/gemini-cli/pull/23322) -- fix(evals): remove tool restrictions and add compile-time guards by - @SandyTao520 in - [#23312](https://github.com/google-gemini/gemini-cli/pull/23312) -- fix(hooks): support 'ask' decision for BeforeTool hooks by @gundermanc in - [#21146](https://github.com/google-gemini/gemini-cli/pull/21146) -- feat(browser): add warning message for session mode 'existing' by - @cynthialong0-0 in - [#23288](https://github.com/google-gemini/gemini-cli/pull/23288) -- chore(lint): enforce zero warnings and cleanup syntax restrictions by - @alisa-alisa in - [#22902](https://github.com/google-gemini/gemini-cli/pull/22902) -- fix(cli): add Esc instruction to HooksDialog footer by @abhipatel12 in - [#23258](https://github.com/google-gemini/gemini-cli/pull/23258) -- Disallow and suppress misused spread operator. by @gundermanc in - [#23294](https://github.com/google-gemini/gemini-cli/pull/23294) -- fix(core): refine CliHelpAgent description for better delegation by - @abhipatel12 in - [#23310](https://github.com/google-gemini/gemini-cli/pull/23310) -- fix(core): enable global session and persistent approval for web_fetch by - @NTaylorMullen in - [#23295](https://github.com/google-gemini/gemini-cli/pull/23295) -- fix(plan): add state transition override to prevent plan mode freeze by - @Adib234 in [#23020](https://github.com/google-gemini/gemini-cli/pull/23020) -- fix(cli): record skill activation tool calls in chat history by @NTaylorMullen - in [#23203](https://github.com/google-gemini/gemini-cli/pull/23203) -- fix(core): ensure subagent tool updates apply configuration overrides - immediately by @abhipatel12 in - [#23161](https://github.com/google-gemini/gemini-cli/pull/23161) -- fix(cli): resolve flicker at boundaries of list in BaseSelectionList by - @jackwotherspoon in - [#23298](https://github.com/google-gemini/gemini-cli/pull/23298) -- test(cli): force generic terminal in tests to fix snapshot failures by - @abhipatel12 in - [#23499](https://github.com/google-gemini/gemini-cli/pull/23499) -- Evals: PR Guidance adding workflow by @alisa-alisa in - [#23164](https://github.com/google-gemini/gemini-cli/pull/23164) -- feat(core): refactor SandboxManager to a stateless architecture and introduce - explicit Deny interface by @ehedlund in - [#23141](https://github.com/google-gemini/gemini-cli/pull/23141) -- feat(core): add event-translator and update agent types by @adamfweidman in - [#22985](https://github.com/google-gemini/gemini-cli/pull/22985) -- perf(cli): parallelize and background startup cleanup tasks by @sehoon38 in - [#23545](https://github.com/google-gemini/gemini-cli/pull/23545) -- fix: "allow always" for commands with paths by @scidomino in - [#23558](https://github.com/google-gemini/gemini-cli/pull/23558) -- fix(cli): prevent terminal escape sequences from leaking on exit by - @mattKorwel in - [#22682](https://github.com/google-gemini/gemini-cli/pull/22682) -- feat(cli): implement full "GEMINI CLI" logo for logged-out state by + [#23626](https://github.com/google-gemini/gemini-cli/pull/23626) +- feat(sandbox): dynamic Linux sandbox expansion and worktree support by @galz10 + in [#23692](https://github.com/google-gemini/gemini-cli/pull/23692) +- Merge examples of use into quickstart documentation by @diodesign in + [#23319](https://github.com/google-gemini/gemini-cli/pull/23319) +- fix(cli): prioritize primary name matches in slash command search by @sehoon38 + in [#23850](https://github.com/google-gemini/gemini-cli/pull/23850) +- Changelog for v0.35.1 by @gemini-cli-robot in + [#23840](https://github.com/google-gemini/gemini-cli/pull/23840) +- fix(browser): keep input blocker active across navigations by @kunal-10-cloud + in [#22562](https://github.com/google-gemini/gemini-cli/pull/22562) +- feat(core): new skill to look for duplicated code while reviewing PRs by + @devr0306 in [#23704](https://github.com/google-gemini/gemini-cli/pull/23704) +- fix(core): replace hardcoded non-interactive ASK_USER denial with explicit + policy rules by @ruomengz in + [#23668](https://github.com/google-gemini/gemini-cli/pull/23668) +- fix(plan): after exiting plan mode switches model to a flash model by @Adib234 + in [#23885](https://github.com/google-gemini/gemini-cli/pull/23885) +- feat(gcp): add development worker infrastructure by @mattKorwel in + [#23814](https://github.com/google-gemini/gemini-cli/pull/23814) +- fix(a2a-server): A2A server should execute ask policies in interactive mode by + @kschaab in [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) +- feat(core): define TrajectoryProvider interface by @sehoon38 in + [#23050](https://github.com/google-gemini/gemini-cli/pull/23050) +- Docs: Update quotas and pricing by @jkcinouye in + [#23835](https://github.com/google-gemini/gemini-cli/pull/23835) +- fix(core): allow disabling environment variable redaction by @galz10 in + [#23927](https://github.com/google-gemini/gemini-cli/pull/23927) +- feat(cli): enable notifications cross-platform via terminal bell fallback by + @genneth in [#21618](https://github.com/google-gemini/gemini-cli/pull/21618) +- feat(sandbox): implement secret visibility lockdown for env files by + @DavidAPierce in + [#23712](https://github.com/google-gemini/gemini-cli/pull/23712) +- fix(core): remove shell outputChunks buffer caching to prevent memory bloat + and sanitize prompt input by @spencer426 in + [#23751](https://github.com/google-gemini/gemini-cli/pull/23751) +- feat(core): implement persistent browser session management by @kunal-10-cloud + in [#21306](https://github.com/google-gemini/gemini-cli/pull/21306) +- refactor(core): delegate sandbox denial parsing to SandboxManager by + @scidomino in [#23928](https://github.com/google-gemini/gemini-cli/pull/23928) +- dep(update) Update Ink version to 6.5.0 by @jacob314 in + [#23843](https://github.com/google-gemini/gemini-cli/pull/23843) +- Docs: Update 'docs-writer' skill for relative links by @jkcinouye in + [#21463](https://github.com/google-gemini/gemini-cli/pull/21463) +- Changelog for v0.36.0-preview.4 by @gemini-cli-robot in + [#23935](https://github.com/google-gemini/gemini-cli/pull/23935) +- fix(acp): Update allow approval policy flow for ACP clients to fix config + persistence and compatible with TUI by @sripasg in + [#23818](https://github.com/google-gemini/gemini-cli/pull/23818) +- Changelog for v0.35.2 by @gemini-cli-robot in + [#23960](https://github.com/google-gemini/gemini-cli/pull/23960) +- ACP integration documents by @g-samroberts in + [#22254](https://github.com/google-gemini/gemini-cli/pull/22254) +- fix(core): explicitly set error names to avoid bundling renaming issues by + @yunaseoul in [#23913](https://github.com/google-gemini/gemini-cli/pull/23913) +- feat(core): subagent isolation and cleanup hardening by @abhipatel12 in + [#23903](https://github.com/google-gemini/gemini-cli/pull/23903) +- disable extension-reload test by @scidomino in + [#24018](https://github.com/google-gemini/gemini-cli/pull/24018) +- feat(core): add forbiddenPaths to GlobalSandboxOptions and refactor + createSandboxManager by @ehedlund in + [#23936](https://github.com/google-gemini/gemini-cli/pull/23936) +- refactor(core): improve ignore resolution and fix directory-matching bug by + @ehedlund in [#23816](https://github.com/google-gemini/gemini-cli/pull/23816) +- revert(core): support custom base URL via env vars by @spencer426 in + [#23976](https://github.com/google-gemini/gemini-cli/pull/23976) +- Increase memory limited for eslint. by @jacob314 in + [#24022](https://github.com/google-gemini/gemini-cli/pull/24022) +- fix(acp): prevent crash on empty response in ACP mode by @sripasg in + [#23952](https://github.com/google-gemini/gemini-cli/pull/23952) +- feat(core): Land `AgentHistoryProvider`. by @joshualitt in + [#23978](https://github.com/google-gemini/gemini-cli/pull/23978) +- fix(core): switch to subshells for shell tool wrapping to fix heredocs and + edge cases by @abhipatel12 in + [#24024](https://github.com/google-gemini/gemini-cli/pull/24024) +- Debug command. by @jacob314 in + [#23851](https://github.com/google-gemini/gemini-cli/pull/23851) +- Changelog for v0.36.0-preview.5 by @gemini-cli-robot in + [#24046](https://github.com/google-gemini/gemini-cli/pull/24046) +- Fix test flakes by globally mocking ink-spinner by @jacob314 in + [#24044](https://github.com/google-gemini/gemini-cli/pull/24044) +- Enable network access in sandbox configuration by @galz10 in + [#24055](https://github.com/google-gemini/gemini-cli/pull/24055) +- feat(context): add configurable memoryBoundaryMarkers setting by @SandyTao520 + in [#24020](https://github.com/google-gemini/gemini-cli/pull/24020) +- feat(core): implement windows sandbox expansion and denial detection by + @scidomino in [#24027](https://github.com/google-gemini/gemini-cli/pull/24027) +- fix(core): resolve ACP Operation Aborted Errors in grep_search by @ivanporty + in [#23821](https://github.com/google-gemini/gemini-cli/pull/23821) +- fix(hooks): prevent SessionEnd from firing twice in non-interactive mode by + @krishdef7 in [#22139](https://github.com/google-gemini/gemini-cli/pull/22139) +- Re-word intro to Gemini 3 page. by @g-samroberts in + [#24069](https://github.com/google-gemini/gemini-cli/pull/24069) +- fix(cli): resolve layout contention and flashing loop in StatusRow by @keithguerin in - [#22412](https://github.com/google-gemini/gemini-cli/pull/22412) -- fix(plan): reserve minimum height for selection list in AskUserDialog by - @ruomengz in [#23280](https://github.com/google-gemini/gemini-cli/pull/23280) -- fix(core): harden AgentSession replay semantics by @adamfweidman in - [#23548](https://github.com/google-gemini/gemini-cli/pull/23548) -- test(core): migrate hook tests to scheduler by @abhipatel12 in - [#23496](https://github.com/google-gemini/gemini-cli/pull/23496) -- chore(config): disable agents by default by @abhipatel12 in - [#23546](https://github.com/google-gemini/gemini-cli/pull/23546) -- fix(ui): make tool confirmations take up entire terminal height by @devr0306 - in [#22366](https://github.com/google-gemini/gemini-cli/pull/22366) -- fix(core): prevent redundant remote agent loading on model switch by + [#24065](https://github.com/google-gemini/gemini-cli/pull/24065) +- fix(sandbox): implement Windows Mandatory Integrity Control for GeminiSandbox + by @galz10 in [#24057](https://github.com/google-gemini/gemini-cli/pull/24057) +- feat(core): implement tool-based topic grouping (Chapters) by @Abhijit-2592 in + [#23150](https://github.com/google-gemini/gemini-cli/pull/23150) +- feat(cli): support 'tab to queue' for messages while generating by @gundermanc + in [#24052](https://github.com/google-gemini/gemini-cli/pull/24052) +- feat(core): agnostic background task UI with CompletionBehavior by @adamfweidman in - [#23576](https://github.com/google-gemini/gemini-cli/pull/23576) -- refactor(core): update production type imports from coreToolScheduler by - @abhipatel12 in - [#23498](https://github.com/google-gemini/gemini-cli/pull/23498) -- feat(cli): always prefix extension skills with colon separator by - @NTaylorMullen in - [#23566](https://github.com/google-gemini/gemini-cli/pull/23566) -- fix(core): properly support allowRedirect in policy engine by @scidomino in - [#23579](https://github.com/google-gemini/gemini-cli/pull/23579) -- fix(cli): prevent subcommand shadowing and skip auth for commands by + [#22740](https://github.com/google-gemini/gemini-cli/pull/22740) +- UX for topic narration tool by @gundermanc in + [#24079](https://github.com/google-gemini/gemini-cli/pull/24079) +- fix: shellcheck warnings in scripts by @scidomino in + [#24035](https://github.com/google-gemini/gemini-cli/pull/24035) +- test(evals): add comprehensive subagent delegation evaluations by @abhipatel12 + in [#24132](https://github.com/google-gemini/gemini-cli/pull/24132) +- fix(a2a-server): prioritize ADC before evaluating headless constraints for + auth initialization by @spencer426 in + [#23614](https://github.com/google-gemini/gemini-cli/pull/23614) +- Text can be added after /plan command by @rambleraptor in + [#22833](https://github.com/google-gemini/gemini-cli/pull/22833) +- fix(cli): resolve missing F12 logs via global console store by @scidomino in + [#24235](https://github.com/google-gemini/gemini-cli/pull/24235) +- fix broken tests by @scidomino in + [#24279](https://github.com/google-gemini/gemini-cli/pull/24279) +- fix(evals): add update_topic behavioral eval by @gundermanc in + [#24223](https://github.com/google-gemini/gemini-cli/pull/24223) +- feat(core): Unified Context Management and Tool Distillation. by @joshualitt + in [#24157](https://github.com/google-gemini/gemini-cli/pull/24157) +- Default enable narration for the team. by @gundermanc in + [#24224](https://github.com/google-gemini/gemini-cli/pull/24224) +- fix(core): ensure default agents provide tools and use model-specific schemas + by @abhipatel12 in + [#24268](https://github.com/google-gemini/gemini-cli/pull/24268) +- feat(cli): show Flash Lite Preview model regardless of user tier by @sehoon38 + in [#23904](https://github.com/google-gemini/gemini-cli/pull/23904) +- feat(cli): implement compact tool output by @jwhelangoog in + [#20974](https://github.com/google-gemini/gemini-cli/pull/20974) +- Add security settings for tool sandboxing by @galz10 in + [#23923](https://github.com/google-gemini/gemini-cli/pull/23923) +- chore(test-utils): switch integration tests to use PREVIEW_GEMINI_MODEL by + @sehoon38 in [#24276](https://github.com/google-gemini/gemini-cli/pull/24276) +- feat(core): enable topic update narration for legacy models by @Abhijit-2592 + in [#24241](https://github.com/google-gemini/gemini-cli/pull/24241) +- feat(core): add project-level memory scope to save_memory tool by @SandyTao520 + in [#24161](https://github.com/google-gemini/gemini-cli/pull/24161) +- test(integration): fix plan mode write denial test false positive by @sehoon38 + in [#24299](https://github.com/google-gemini/gemini-cli/pull/24299) +- feat(plan): support `Plan` mode in untrusted folders by @Adib234 in + [#17586](https://github.com/google-gemini/gemini-cli/pull/17586) +- fix(core): enable mid-stream retries for all models and re-enable compression + test by @sehoon38 in + [#24302](https://github.com/google-gemini/gemini-cli/pull/24302) +- Changelog for v0.36.0-preview.6 by @gemini-cli-robot in + [#24082](https://github.com/google-gemini/gemini-cli/pull/24082) +- Changelog for v0.35.3 by @gemini-cli-robot in + [#24083](https://github.com/google-gemini/gemini-cli/pull/24083) +- feat(cli): add auth info to footer by @sehoon38 in + [#24042](https://github.com/google-gemini/gemini-cli/pull/24042) +- fix(browser): reset action counter for each agent session and let it ignore + internal actions by @cynthialong0-0 in + [#24228](https://github.com/google-gemini/gemini-cli/pull/24228) +- feat(plan): promote planning feature to stable by @ruomengz in + [#24282](https://github.com/google-gemini/gemini-cli/pull/24282) +- fix(browser): terminate subagent immediately on domain restriction violations + by @gsquared94 in + [#24313](https://github.com/google-gemini/gemini-cli/pull/24313) +- feat(cli): add UI to update extensions by @ruomengz in + [#23682](https://github.com/google-gemini/gemini-cli/pull/23682) +- Fix(browser): terminate immediately for "browser is already running" error by + @cynthialong0-0 in + [#24233](https://github.com/google-gemini/gemini-cli/pull/24233) +- docs: Add 'plan' option to approval mode in CLI reference by @YifanRuan in + [#24134](https://github.com/google-gemini/gemini-cli/pull/24134) +- fix(core): batch macOS seatbelt rules into a profile file to prevent ARG_MAX + errors by @ehedlund in + [#24255](https://github.com/google-gemini/gemini-cli/pull/24255) +- fix(core): fix race condition between browser agent and main closing process + by @cynthialong0-0 in + [#24340](https://github.com/google-gemini/gemini-cli/pull/24340) +- perf(build): optimize build scripts for parallel execution and remove + redundant checks by @sehoon38 in + [#24307](https://github.com/google-gemini/gemini-cli/pull/24307) +- ci: install bubblewrap on Linux for release workflows by @ehedlund in + [#24347](https://github.com/google-gemini/gemini-cli/pull/24347) +- chore(release): allow bundling for all builds, including stable by @sehoon38 + in [#24305](https://github.com/google-gemini/gemini-cli/pull/24305) +- Revert "Add security settings for tool sandboxing" by @jerop in + [#24357](https://github.com/google-gemini/gemini-cli/pull/24357) +- docs: update subagents docs to not be experimental by @abhipatel12 in + [#24343](https://github.com/google-gemini/gemini-cli/pull/24343) +- fix(core): implement **read and **write commands in sandbox managers by + @galz10 in [#24283](https://github.com/google-gemini/gemini-cli/pull/24283) +- don't try to remove tags in dry run by @scidomino in + [#24356](https://github.com/google-gemini/gemini-cli/pull/24356) +- fix(config): disable JIT context loading by default by @SandyTao520 in + [#24364](https://github.com/google-gemini/gemini-cli/pull/24364) +- test(sandbox): add integration test for dynamic permission expansion by + @galz10 in [#24359](https://github.com/google-gemini/gemini-cli/pull/24359) +- docs(policy): remove unsupported mcpName wildcard edge case by @abhipatel12 in + [#24133](https://github.com/google-gemini/gemini-cli/pull/24133) +- docs: fix broken GEMINI.md link in CONTRIBUTING.md by @Panchal-Tirth in + [#24182](https://github.com/google-gemini/gemini-cli/pull/24182) +- feat(core): infrastructure for event-driven subagent history by @abhipatel12 + in [#23914](https://github.com/google-gemini/gemini-cli/pull/23914) +- fix(core): resolve Plan Mode deadlock during plan file creation due to sandbox + restrictions by @DavidAPierce in + [#24047](https://github.com/google-gemini/gemini-cli/pull/24047) +- fix(core): fix browser agent UX issues and improve E2E test reliability by + @gsquared94 in + [#24312](https://github.com/google-gemini/gemini-cli/pull/24312) +- fix(ui): wrap topic and intent fields in TopicMessage by @jwhelangoog in + [#24386](https://github.com/google-gemini/gemini-cli/pull/24386) +- refactor(core): Centralize context management logic into src/context by + @joshualitt in + [#24380](https://github.com/google-gemini/gemini-cli/pull/24380) +- fix(core): pin AuthType.GATEWAY to use Gemini 3.1 Pro/Flash Lite by default by + @sripasg in [#24375](https://github.com/google-gemini/gemini-cli/pull/24375) +- feat(ui): add Tokyo Night theme by @danrneal in + [#24054](https://github.com/google-gemini/gemini-cli/pull/24054) +- fix(cli): refactor test config loading and mock debugLogger in test-setup by @mattKorwel in - [#23177](https://github.com/google-gemini/gemini-cli/pull/23177) -- fix(test): move flaky tests to non-blocking suite by @mattKorwel in - [#23259](https://github.com/google-gemini/gemini-cli/pull/23259) -- Changelog for v0.35.0-preview.3 by @gemini-cli-robot in - [#23574](https://github.com/google-gemini/gemini-cli/pull/23574) -- feat(skills): add behavioral-evals skill with fixing and promoting guides by + [#24389](https://github.com/google-gemini/gemini-cli/pull/24389) +- Set memoryManager to false in settings.json by @mattKorwel in + [#24393](https://github.com/google-gemini/gemini-cli/pull/24393) +- ink 6.6.3 by @jacob314 in + [#24372](https://github.com/google-gemini/gemini-cli/pull/24372) +- fix(core): resolve subagent chat recording gaps and directory inheritance by @abhipatel12 in - [#23349](https://github.com/google-gemini/gemini-cli/pull/23349) -- refactor(core): delete obsolete coreToolScheduler by @abhipatel12 in - [#23502](https://github.com/google-gemini/gemini-cli/pull/23502) -- Changelog for v0.35.0-preview.4 by @gemini-cli-robot in - [#23581](https://github.com/google-gemini/gemini-cli/pull/23581) -- feat(core): add LegacyAgentSession by @adamfweidman in - [#22986](https://github.com/google-gemini/gemini-cli/pull/22986) -- feat(test-utils): add TestMcpServerBuilder and support in TestRig by - @abhipatel12 in - [#23491](https://github.com/google-gemini/gemini-cli/pull/23491) -- fix(core)!: Force policy config to specify toolName by @kschaab in - [#23330](https://github.com/google-gemini/gemini-cli/pull/23330) -- eval(save_memory): add multi-turn interactive evals for memoryManager by - @SandyTao520 in - [#23572](https://github.com/google-gemini/gemini-cli/pull/23572) -- fix(telemetry): patch memory leak and enforce logPrompts privacy by - @spencer426 in - [#23281](https://github.com/google-gemini/gemini-cli/pull/23281) -- perf(cli): background IDE client to speed up initialization by @sehoon38 in - [#23603](https://github.com/google-gemini/gemini-cli/pull/23603) -- fix(cli): prevent Ctrl+D exit when input buffer is not empty by @wtanaka in - [#23306](https://github.com/google-gemini/gemini-cli/pull/23306) -- fix: ACP: separate conversational text from execute tool command title by - @sripasg in [#23179](https://github.com/google-gemini/gemini-cli/pull/23179) -- feat(evals): add behavioral evaluations for subagent routing by @Samee24 in - [#23272](https://github.com/google-gemini/gemini-cli/pull/23272) -- refactor(cli,core): foundational layout, identity management, and type safety - by @jwhelangoog in - [#23286](https://github.com/google-gemini/gemini-cli/pull/23286) -- fix(core): accurately reflect subagent tool failure in UI by @abhipatel12 in - [#23187](https://github.com/google-gemini/gemini-cli/pull/23187) -- Changelog for v0.35.0-preview.5 by @gemini-cli-robot in - [#23606](https://github.com/google-gemini/gemini-cli/pull/23606) -- feat(ui): implement refreshed UX for Composer layout by @jwhelangoog in - [#21212](https://github.com/google-gemini/gemini-cli/pull/21212) -- fix: API key input dialog user interaction when selected Gemini API Key by - @kartikangiras in - [#21057](https://github.com/google-gemini/gemini-cli/pull/21057) -- docs: update `/mcp refresh` to `/mcp reload` by @adamfweidman in - [#23631](https://github.com/google-gemini/gemini-cli/pull/23631) -- Implementation of sandbox "Write-Protected" Governance Files by @DavidAPierce - in [#23139](https://github.com/google-gemini/gemini-cli/pull/23139) -- feat(sandbox): dynamic macOS sandbox expansion and worktree support by @galz10 - in [#23301](https://github.com/google-gemini/gemini-cli/pull/23301) -- fix(acp): Pass the cwd to `AcpFileSystemService` to avoid looping failures in - asking for perms to write plan md file by @sripasg in - [#23612](https://github.com/google-gemini/gemini-cli/pull/23612) -- fix(plan): sandbox path resolution in Plan Mode to prevent hallucinations by - @Adib234 in [#22737](https://github.com/google-gemini/gemini-cli/pull/22737) -- feat(ui): allow immediate user input during startup by @sehoon38 in - [#23661](https://github.com/google-gemini/gemini-cli/pull/23661) -- refactor(sandbox): reorganize Windows sandbox files by @galz10 in - [#23645](https://github.com/google-gemini/gemini-cli/pull/23645) -- fix(core): improve remote agent streaming UI and UX by @adamfweidman in - [#23633](https://github.com/google-gemini/gemini-cli/pull/23633) -- perf(cli): optimize --version startup time by @sehoon38 in - [#23671](https://github.com/google-gemini/gemini-cli/pull/23671) -- refactor(core): stop gemini CLI from producing unsafe casts by @gundermanc in - [#23611](https://github.com/google-gemini/gemini-cli/pull/23611) -- use enableAutoUpdate in test rig by @scidomino in - [#23681](https://github.com/google-gemini/gemini-cli/pull/23681) -- feat(core): change user-facing auth type from oauth2 to oauth by @adamfweidman - in [#23639](https://github.com/google-gemini/gemini-cli/pull/23639) -- chore(deps): fix npm audit vulnerabilities by @scidomino in - [#23679](https://github.com/google-gemini/gemini-cli/pull/23679) -- test(evals): fix overlapping act() deadlock in app-test-helper by @Adib234 in - [#23666](https://github.com/google-gemini/gemini-cli/pull/23666) + [#24368](https://github.com/google-gemini/gemini-cli/pull/24368) +- fix(cli): cap shell output at 10 MB to prevent RangeError crash by @ProthamD + in [#24168](https://github.com/google-gemini/gemini-cli/pull/24168) +- feat(plan): conditionally add enter/exit plan mode tools based on current mode + by @ruomengz in + [#24378](https://github.com/google-gemini/gemini-cli/pull/24378) +- feat(core): prioritize discussion before formal plan approval by @jerop in + [#24423](https://github.com/google-gemini/gemini-cli/pull/24423) +- fix(ui): add accelerated scrolling on alternate buffer mode by @devr0306 in + [#23940](https://github.com/google-gemini/gemini-cli/pull/23940) +- feat(core): populate sandbox forbidden paths with project ignore file contents + by @ehedlund in + [#24038](https://github.com/google-gemini/gemini-cli/pull/24038) +- fix(core): ensure blue border overlay and input blocker to act correctly + depending on browser agent activities by @cynthialong0-0 in + [#24385](https://github.com/google-gemini/gemini-cli/pull/24385) +- fix(ui): removed additional vertical padding for tables by @devr0306 in + [#24381](https://github.com/google-gemini/gemini-cli/pull/24381) +- fix(build): upload full bundle directory archive to GitHub releases by + @sehoon38 in [#24403](https://github.com/google-gemini/gemini-cli/pull/24403) +- fix(build): wire bundle:browser-mcp into bundle pipeline by @gsquared94 in + [#24424](https://github.com/google-gemini/gemini-cli/pull/24424) +- feat(browser): add sandbox-aware browser agent initialization by @gsquared94 + in [#24419](https://github.com/google-gemini/gemini-cli/pull/24419) +- feat(core): enhance tracker task schemas for detailed titles and descriptions + by @anj-s in [#23902](https://github.com/google-gemini/gemini-cli/pull/23902) +- refactor(core): Unified context management settings schema by @joshualitt in + [#24391](https://github.com/google-gemini/gemini-cli/pull/24391) +- feat(core): update browser agent prompt to check open pages first when + bringing up by @cynthialong0-0 in + [#24431](https://github.com/google-gemini/gemini-cli/pull/24431) +- fix(acp) refactor(core,cli): centralize model discovery logic in + ModelConfigService by @sripasg in + [#24392](https://github.com/google-gemini/gemini-cli/pull/24392) +- Changelog for v0.36.0-preview.7 by @gemini-cli-robot in + [#24346](https://github.com/google-gemini/gemini-cli/pull/24346) +- fix: update task tracker storage location in system prompt by @anj-s in + [#24034](https://github.com/google-gemini/gemini-cli/pull/24034) +- feat(browser): supersede stale snapshots to reclaim context-window tokens by + @gsquared94 in + [#24440](https://github.com/google-gemini/gemini-cli/pull/24440) +- docs(core): add subagent tool isolation draft doc by @akh64bit in + [#23275](https://github.com/google-gemini/gemini-cli/pull/23275) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.35.0-preview.5...v0.36.0-preview.5 +https://github.com/google-gemini/gemini-cli/compare/v0.36.0-preview.8...v0.37.0-preview.1 diff --git a/docs/cli/cli-reference.md b/docs/cli/cli-reference.md index bc8f8b44ce..39d98f60e9 100644 --- a/docs/cli/cli-reference.md +++ b/docs/cli/cli-reference.md @@ -52,7 +52,7 @@ These commands are available within the interactive REPL. | `--prompt-interactive` | `-i` | string | - | Execute prompt and continue in interactive mode | | `--worktree` | `-w` | string | - | Start Gemini in a new git worktree. If no name is provided, one is generated automatically. Requires `experimental.worktrees: true` in settings. | | `--sandbox` | `-s` | boolean | `false` | Run in a sandboxed environment for safer execution | -| `--approval-mode` | - | string | `default` | Approval mode for tool execution. Choices: `default`, `auto_edit`, `yolo` | +| `--approval-mode` | - | string | `default` | Approval mode for tool execution. Choices: `default`, `auto_edit`, `yolo`, `plan` | | `--yolo` | `-y` | boolean | `false` | **Deprecated.** Auto-approve all actions. Use `--approval-mode=yolo` instead. | | `--experimental-acp` | - | boolean | - | Start in ACP (Agent Code Pilot) mode. **Experimental feature.** | | `--experimental-zed-integration` | - | boolean | - | Run in Zed editor integration mode. **Experimental feature.** | diff --git a/docs/cli/plan-mode.md b/docs/cli/plan-mode.md index ad87bc591b..11f7a9e521 100644 --- a/docs/cli/plan-mode.md +++ b/docs/cli/plan-mode.md @@ -56,19 +56,21 @@ Gemini CLI takes action. 1. **Provide a goal:** Start by describing what you want to achieve. Gemini CLI will then enter Plan Mode (if it's not already) to research the task. -2. **Review research and provide input:** As Gemini CLI analyzes your codebase, - it may ask you questions or present different implementation options using - [`ask_user`](../tools/ask-user.md). Provide your preferences to help guide - the design. -3. **Review the plan:** Once Gemini CLI has a proposed strategy, it creates a - detailed implementation plan as a Markdown file in your plans directory. +2. **Discuss and agree on strategy:** As Gemini CLI analyzes your codebase, it + will discuss its findings and proposed strategy with you to ensure + alignment. It may ask you questions or present different implementation + options using [`ask_user`](../tools/ask-user.md). **Gemini CLI will stop and + wait for your confirmation** before drafting the formal plan. You should + reach an informal agreement on the approach before proceeding. +3. **Review the plan:** Once you've agreed on the strategy, Gemini CLI creates + a detailed implementation plan as a Markdown file in your plans directory. - **View:** You can open and read this file to understand the proposed changes. - **Edit:** Press `Ctrl+X` to open the plan directly in your configured external editor. 4. **Approve or iterate:** Gemini CLI will present the finalized plan for your - approval. + formal approval. - **Approve:** If you're satisfied with the plan, approve it to start the implementation immediately: **Yes, automatically accept edits** or **Yes, manually accept edits**. @@ -121,6 +123,7 @@ These are the only allowed tools: [`glob`](../tools/file-system.md#4-glob-findfiles) - **Search:** [`grep_search`](../tools/file-system.md#5-grep_search-searchtext), [`google_web_search`](../tools/web-search.md), + [`web_fetch`](../tools/web-fetch.md) (requires explicit confirmation), [`get_internal_docs`](../tools/internal-docs.md) - **Research Subagents:** [`codebase_investigator`](../core/subagents.md#codebase-investigator), @@ -178,9 +181,16 @@ As described in the rule that does not explicitly specify `modes` is considered "always active" and will apply to Plan Mode as well. -If you want a rule to apply to other modes but _not_ to Plan Mode, you must -explicitly specify the target modes. For example, to allow `npm test` in default -and Auto-Edit modes but not in Plan Mode: +To maintain the integrity of Plan Mode as a safe research environment, +persistent tool approvals are context-aware. Approvals granted in modes like +Default or Auto-Edit do not apply to Plan Mode, ensuring that tools trusted for +implementation don't automatically execute while you're researching. However, +approvals granted while in Plan Mode are treated as intentional choices for +global trust and apply to all modes. + +If you want to manually restrict a rule to other modes but _not_ to Plan Mode, +you must explicitly specify the target modes. For example, to allow `npm test` +in default and Auto-Edit modes but not in Plan Mode: ```toml [[rule]] diff --git a/docs/cli/sandbox.md b/docs/cli/sandbox.md index e27587abf0..f81b561e0a 100644 --- a/docs/cli/sandbox.md +++ b/docs/cli/sandbox.md @@ -136,6 +136,58 @@ gemini -p "build the snap" absolute path โ€” the path must be writable inside the container. - Used with tools like Snapcraft or Rockcraft that require a full system. +## Tool sandboxing + +Tool-level sandboxing provides granular isolation for individual tool executions +(like `shell_exec` and `write_file`) instead of sandboxing the entire Gemini CLI +process. + +This approach offers better integration with your local environment for non-tool +tasks (like UI rendering and configuration loading) while still providing +security for tool-driven operations. + +### How to turn off tool sandboxing + +If you experience issues with tool sandboxing or prefer full-process isolation, +you can disable it by setting `security.toolSandboxing` to `false` in your +`settings.json` file. + +```json +{ + "security": { + "toolSandboxing": false + } +} +``` + + +> [!NOTE] +> Changing the `security.toolSandboxing` setting requires a restart of Gemini +> CLI to take effect. + +## Sandbox expansion + +Sandbox expansion is a dynamic permission system that lets Gemini CLI request +additional permissions for a command when needed. + +When a sandboxed command fails due to permission restrictions (like restricted +file paths or network access), or when a command is proactively identified as +requiring extra permissions (like `npm install`), Gemini CLI will present you +with a "Sandbox Expansion Request." + +### How sandbox expansion works + +1. **Detection**: Gemini CLI detects a sandbox denial or proactively identifies + a command that requires extra permissions. +2. **Request**: A modal dialog is shown, explaining which additional + permissions (e.g., specific directories or network access) are required. +3. **Approval**: If you approve the expansion, the command is executed with the + extended permissions for that specific run. + +This mechanism ensures you don't have to manually re-run commands with more +permissive sandbox settings, while still maintaining control over what the AI +can access. + ## Quickstart ```bash diff --git a/docs/cli/settings.md b/docs/cli/settings.md index ac1fdc98fc..4a6b9a77b7 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -30,6 +30,7 @@ they appear in the UI. | Default Approval Mode | `general.defaultApprovalMode` | The default approval mode for tool execution. 'default' prompts for approval, 'auto_edit' auto-approves edit tools, and 'plan' is read-only mode. YOLO mode (auto-approve all actions) can only be enabled via command line (--yolo or --approval-mode=yolo). | `"default"` | | Enable Auto Update | `general.enableAutoUpdate` | Enable automatic updates. | `true` | | Enable Notifications | `general.enableNotifications` | Enable run-event notifications for action-required prompts and session completion. | `false` | +| Enable Plan Mode | `general.plan.enabled` | Enable Plan Mode for read-only safety during planning. | `true` | | Plan Directory | `general.plan.directory` | The directory where planning artifacts are stored. If not specified, defaults to the system temporary directory. A custom directory requires a policy to allow write access in Plan Mode. | `undefined` | | Plan Model Routing | `general.plan.modelRouting` | Automatically switch between Pro and Flash models based on Plan Mode status. Uses Pro for the planning phase and Flash for the implementation phase. | `true` | | Retry Fetch Errors | `general.retryFetchErrors` | Retry on "exception TypeError: fetch failed sending request" errors. | `true` | @@ -46,38 +47,41 @@ they appear in the UI. ### UI -| UI Label | Setting | Description | Default | -| ------------------------------------ | -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -| Auto Theme Switching | `ui.autoThemeSwitching` | Automatically switch between default light and dark themes based on terminal background color. | `true` | -| Terminal Background Polling Interval | `ui.terminalBackgroundPollingInterval` | Interval in seconds to poll the terminal background color. | `60` | -| Hide Window Title | `ui.hideWindowTitle` | Hide the window title bar | `false` | -| Inline Thinking | `ui.inlineThinkingMode` | Display model thinking inline: off or full. | `"off"` | -| Show Thoughts in Title | `ui.showStatusInTitle` | Show Gemini CLI model thoughts in the terminal window title during the working phase | `false` | -| Dynamic Window Title | `ui.dynamicWindowTitle` | Update the terminal window title with current status icons (Ready: โ—‡, Action Required: โœ‹, Working: โœฆ) | `true` | -| Show Home Directory Warning | `ui.showHomeDirectoryWarning` | Show a warning when running Gemini CLI in the home directory. | `true` | -| Show Compatibility Warnings | `ui.showCompatibilityWarnings` | Show warnings about terminal or OS compatibility issues. | `true` | -| Hide Tips | `ui.hideTips` | Hide helpful tips in the UI | `false` | -| Escape Pasted @ Symbols | `ui.escapePastedAtSymbols` | When enabled, @ symbols in pasted text are escaped to prevent unintended @path expansion. | `false` | -| Show Shortcuts Hint | `ui.showShortcutsHint` | Show the "? for shortcuts" hint above the input. | `true` | -| Hide Banner | `ui.hideBanner` | Hide the application banner | `false` | -| Hide Context Summary | `ui.hideContextSummary` | Hide the context summary (GEMINI.md, MCP servers) above the input. | `false` | -| Hide CWD | `ui.footer.hideCWD` | Hide the current working directory in the footer. | `false` | -| Hide Sandbox Status | `ui.footer.hideSandboxStatus` | Hide the sandbox status indicator in the footer. | `false` | -| Hide Model Info | `ui.footer.hideModelInfo` | Hide the model name and context usage in the footer. | `false` | -| Hide Context Window Percentage | `ui.footer.hideContextPercentage` | Hides the context window usage percentage. | `true` | -| Hide Footer | `ui.hideFooter` | Hide the footer from the UI | `false` | -| Show Memory Usage | `ui.showMemoryUsage` | Display memory usage information in the UI | `false` | -| Show Line Numbers | `ui.showLineNumbers` | Show line numbers in the chat. | `true` | -| Show Citations | `ui.showCitations` | Show citations for generated text in the chat. | `false` | -| Show Model Info In Chat | `ui.showModelInfoInChat` | Show the model name in the chat for each model turn. | `false` | -| Show User Identity | `ui.showUserIdentity` | Show the signed-in user's identity (e.g. email) in the UI. | `true` | -| Use Alternate Screen Buffer | `ui.useAlternateBuffer` | Use an alternate screen buffer for the UI, preserving shell history. | `false` | -| Use Background Color | `ui.useBackgroundColor` | Whether to use background colors in the UI. | `true` | -| Incremental Rendering | `ui.incrementalRendering` | Enable incremental rendering for the UI. This option will reduce flickering but may cause rendering artifacts. Only supported when useAlternateBuffer is enabled. | `true` | -| Show Spinner | `ui.showSpinner` | Show the spinner during operations. | `true` | -| Loading Phrases | `ui.loadingPhrases` | What to show while the model is working: tips, witty comments, both, or nothing. | `"tips"` | -| Error Verbosity | `ui.errorVerbosity` | Controls whether recoverable errors are hidden (low) or fully shown (full). | `"low"` | -| Screen Reader Mode | `ui.accessibility.screenReader` | Render output in plain-text to be more screen reader accessible | `false` | +| UI Label | Setting | Description | Default | +| ------------------------------------ | -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Auto Theme Switching | `ui.autoThemeSwitching` | Automatically switch between default light and dark themes based on terminal background color. | `true` | +| Terminal Background Polling Interval | `ui.terminalBackgroundPollingInterval` | Interval in seconds to poll the terminal background color. | `60` | +| Hide Window Title | `ui.hideWindowTitle` | Hide the window title bar | `false` | +| Inline Thinking | `ui.inlineThinkingMode` | Display model thinking inline: off or full. | `"off"` | +| Show Thoughts in Title | `ui.showStatusInTitle` | Show Gemini CLI model thoughts in the terminal window title during the working phase | `false` | +| Dynamic Window Title | `ui.dynamicWindowTitle` | Update the terminal window title with current status icons (Ready: โ—‡, Action Required: โœ‹, Working: โœฆ) | `true` | +| Show Home Directory Warning | `ui.showHomeDirectoryWarning` | Show a warning when running Gemini CLI in the home directory. | `true` | +| Show Compatibility Warnings | `ui.showCompatibilityWarnings` | Show warnings about terminal or OS compatibility issues. | `true` | +| Hide Tips | `ui.hideTips` | Hide helpful tips in the UI | `false` | +| Escape Pasted @ Symbols | `ui.escapePastedAtSymbols` | When enabled, @ symbols in pasted text are escaped to prevent unintended @path expansion. | `false` | +| Show Shortcuts Hint | `ui.showShortcutsHint` | Show the "? for shortcuts" hint above the input. | `true` | +| Compact Tool Output | `ui.compactToolOutput` | Display tool outputs (like directory listings and file reads) in a compact, structured format. | `true` | +| Hide Banner | `ui.hideBanner` | Hide the application banner | `false` | +| Hide Context Summary | `ui.hideContextSummary` | Hide the context summary (GEMINI.md, MCP servers) above the input. | `false` | +| Hide CWD | `ui.footer.hideCWD` | Hide the current working directory in the footer. | `false` | +| Hide Sandbox Status | `ui.footer.hideSandboxStatus` | Hide the sandbox status indicator in the footer. | `false` | +| Hide Model Info | `ui.footer.hideModelInfo` | Hide the model name and context usage in the footer. | `false` | +| Hide Context Window Percentage | `ui.footer.hideContextPercentage` | Hides the context window usage percentage. | `true` | +| Hide Footer | `ui.hideFooter` | Hide the footer from the UI | `false` | +| Show Memory Usage | `ui.showMemoryUsage` | Display memory usage information in the UI | `false` | +| Show Line Numbers | `ui.showLineNumbers` | Show line numbers in the chat. | `true` | +| Show Citations | `ui.showCitations` | Show citations for generated text in the chat. | `false` | +| Show Model Info In Chat | `ui.showModelInfoInChat` | Show the model name in the chat for each model turn. | `false` | +| Show User Identity | `ui.showUserIdentity` | Show the signed-in user's identity (e.g. email) in the UI. | `true` | +| Use Alternate Screen Buffer | `ui.useAlternateBuffer` | Use an alternate screen buffer for the UI, preserving shell history. | `false` | +| Render Process | `ui.renderProcess` | Enable Ink render process for the UI. | `true` | +| Terminal Buffer | `ui.terminalBuffer` | Use the new terminal buffer architecture for rendering. | `true` | +| Use Background Color | `ui.useBackgroundColor` | Whether to use background colors in the UI. | `true` | +| Incremental Rendering | `ui.incrementalRendering` | Enable incremental rendering for the UI. This option will reduce flickering but may cause rendering artifacts. Only supported when useAlternateBuffer is enabled. | `true` | +| Show Spinner | `ui.showSpinner` | Show the spinner during operations. | `true` | +| Loading Phrases | `ui.loadingPhrases` | What to show while the model is working: tips, witty comments, all, or off. | `"off"` | +| Error Verbosity | `ui.errorVerbosity` | Controls whether recoverable errors are hidden (low) or fully shown (full). | `"low"` | +| Screen Reader Mode | `ui.accessibility.screenReader` | Render output in plain-text to be more screen reader accessible | `false` | ### IDE @@ -127,7 +131,7 @@ they appear in the UI. | Sandbox Allowed Paths | `tools.sandboxAllowedPaths` | List of additional paths that the sandbox is allowed to access. | `[]` | | Sandbox Network Access | `tools.sandboxNetworkAccess` | Whether the sandbox is allowed to access the network. | `false` | | Enable Interactive Shell | `tools.shell.enableInteractiveShell` | Use node-pty for an interactive shell experience. Fallback to child_process still applies. | `true` | -| Show Color | `tools.shell.showColor` | Show color in shell output. | `false` | +| Show Color | `tools.shell.showColor` | Show color in shell output. | `true` | | Use Ripgrep | `tools.useRipgrep` | Use ripgrep for file content search instead of the fallback implementation. Provides faster search performance. | `true` | | Tool Output Truncation Threshold | `tools.truncateToolOutputThreshold` | Maximum characters to show when truncating large tool outputs. Set to 0 or negative to disable truncation. | `40000` | | Disable LLM Correction | `tools.disableLLMCorrection` | Disable LLM-based error correction for edit tools. When enabled, tools will fail immediately if exact string matches are not found, instead of attempting to self-correct. | `true` | @@ -136,7 +140,7 @@ they appear in the UI. | UI Label | Setting | Description | Default | | ------------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------- | -| Tool Sandboxing | `security.toolSandboxing` | Experimental tool-level sandboxing (implementation in progress). | `false` | +| Tool Sandboxing | `security.toolSandboxing` | Tool-level sandboxing. Isolates individual tools instead of the entire CLI process. | `false` | | Disable YOLO Mode | `security.disableYoloMode` | Disable YOLO mode, even if enabled by a flag. | `false` | | Disable Always Allow | `security.disableAlwaysAllow` | Disable "Always allow" options in tool confirmation dialogs. | `false` | | Allow Permanent Tool Approval | `security.enablePermanentToolApproval` | Enable the "Allow for all future sessions" option in tool confirmation dialogs. | `false` | @@ -151,25 +155,21 @@ they appear in the UI. | UI Label | Setting | Description | Default | | --------------------------------- | ------------------------------ | --------------------------------------------- | ------- | -| Auto Configure Max Old Space Size | `advanced.autoConfigureMemory` | Automatically configure Node.js memory limits | `false` | +| Auto Configure Max Old Space Size | `advanced.autoConfigureMemory` | Automatically configure Node.js memory limits | `true` | ### Experimental -| UI Label | Setting | Description | Default | -| ---------------------------------- | ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | -| Enable Tool Output Masking | `experimental.toolOutputMasking.enabled` | Enables tool output masking to save tokens. | `true` | -| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | -| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | -| Plan | `experimental.plan` | Enable Plan Mode. | `true` | -| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | -| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | -| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | -| Agent History Truncation | `experimental.agentHistoryTruncation` | Enable truncation window logic for the Agent History Provider. | `false` | -| Agent History Truncation Threshold | `experimental.agentHistoryTruncationThreshold` | The maximum number of messages before history is truncated. | `30` | -| Agent History Retained Messages | `experimental.agentHistoryRetainedMessages` | The number of recent messages to retain after truncation. | `15` | -| Agent History Summarization | `experimental.agentHistorySummarization` | Enable summarization of truncated content via a small model for the Agent History Provider. | `false` | -| Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | +| UI Label | Setting | Description | Default | +| ---------------------------------------------------- | ----------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | +| Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | +| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` | +| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | +| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` | +| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` | +| Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | ### Skills diff --git a/docs/cli/themes.md b/docs/cli/themes.md index 55acc75625..93912032c0 100644 --- a/docs/cli/themes.md +++ b/docs/cli/themes.md @@ -19,6 +19,7 @@ using the `/theme` command within Gemini CLI: - `Holiday` - `Shades Of Purple` - `Solarized Dark` + - `Tokyo Night` - **Light themes:** - `ANSI Light` - `Ayu Light` @@ -252,6 +253,10 @@ identify their source, for example: `shades-of-green (green-extension)`. Solarized Dark theme +### Tokyo Night + +Tokyo Night theme + ## Light themes ### ANSI Light diff --git a/docs/core/index.md b/docs/core/index.md index afa13787b8..ae5a6794fe 100644 --- a/docs/core/index.md +++ b/docs/core/index.md @@ -7,8 +7,8 @@ requests sent from `packages/cli`. For a general overview of Gemini CLI, see the ## Navigating this section -- **[Sub-agents (experimental)](./subagents.md):** Learn how to create and use - specialized sub-agents for complex tasks. +- **[Sub-agents](./subagents.md):** Learn how to create and use specialized + sub-agents for complex tasks. - **[Core tools reference](../reference/tools.md):** Information on how tools are defined, registered, and used by the core. - **[Memory Import Processor](../reference/memport.md):** Documentation for the diff --git a/docs/core/remote-agents.md b/docs/core/remote-agents.md index e11c37fece..584ad87847 100644 --- a/docs/core/remote-agents.md +++ b/docs/core/remote-agents.md @@ -1,4 +1,4 @@ -# Remote Subagents (experimental) +# Remote Subagents Gemini CLI supports connecting to remote subagents using the Agent-to-Agent (A2A) protocol. This allows Gemini CLI to interact with other agents, expanding @@ -10,23 +10,6 @@ agents in the following repositories: - [ADK Samples (Python)](https://github.com/google/adk-samples/tree/main/python) - [ADK Python Contributing Samples](https://github.com/google/adk-python/tree/main/contributing/samples) - -> [!NOTE] -> Remote subagents are currently an experimental feature. - -## Configuration - -To use remote subagents, you must explicitly enable them in your -`settings.json`: - -```json -{ - "experimental": { - "enableAgents": true - } -} -``` - ## Proxy support Gemini CLI routes traffic to remote agents through an HTTP/HTTPS proxy if one is @@ -459,3 +442,16 @@ Users can manage subagents using the following commands within the Gemini CLI: > [!TIP] > You can use the `@cli_help` agent within Gemini CLI for assistance > with configuring subagents. + +## Disabling remote agents + +Remote subagents are enabled by default. To disable them, set `enableAgents` to +`false` in your `settings.json`: + +```json +{ + "experimental": { + "enableAgents": false + } +} +``` diff --git a/docs/core/subagents.md b/docs/core/subagents.md index b0cffca3b5..f1e4dda614 100644 --- a/docs/core/subagents.md +++ b/docs/core/subagents.md @@ -1,23 +1,10 @@ -# Subagents (experimental) +# Subagents Subagents are specialized agents that operate within your main Gemini CLI session. They are designed to handle specific, complex tasksโ€”like deep codebase analysis, documentation lookup, or domain-specific reasoningโ€”without cluttering the main agent's context or toolset. - -> [!NOTE] -> Subagents are currently an experimental feature. -> -To use custom subagents, you must ensure they are enabled in your -`settings.json` (enabled by default): - -```json -{ - "experimental": { "enableAgents": true } -} -``` - ## What are subagents? Subagents are "specialists" that the main Gemini agent can hire for a specific @@ -124,10 +111,12 @@ Gemini CLI comes with the following built-in subagents: The browser agent requires: -- **Chrome** version 144 or later (any recent stable release will work). -- **Node.js** with `npx` available (used to launch the - [`chrome-devtools-mcp`](https://www.npmjs.com/package/chrome-devtools-mcp) - server). +- **Chrome** version 144 or later (any recent stable release works). + +The underlying +[`chrome-devtools-mcp`](https://www.npmjs.com/package/chrome-devtools-mcp) +server is bundled with Gemini CLI and launched automatically โ€” no separate +installation is needed. #### Enabling the browser agent @@ -173,26 +162,58 @@ The available modes are: | `isolated` | Launches Chrome with a temporary profile that is deleted after each session. Use this for clean-state automation. | | `existing` | Attaches to an already-running Chrome instance. You must enable remote debugging first by navigating to `chrome://inspect/#remote-debugging` in Chrome. No new browser process is launched. | +#### First-run consent + +The first time the browser agent is invoked, Gemini CLI displays a consent +dialog. You must accept before the browser session starts. This dialog only +appears once. + #### Configuration reference All browser-specific settings go under `agents.browser` in your `settings.json`. +For full details, see the +[`agents.browser` configuration reference](../reference/configuration.md#agents). -| Setting | Type | Default | Description | -| :------------ | :-------- | :------------- | :---------------------------------------------------------------------------------------------- | -| `sessionMode` | `string` | `"persistent"` | How Chrome is managed: `"persistent"`, `"isolated"`, or `"existing"`. | -| `headless` | `boolean` | `false` | Run Chrome in headless mode (no visible window). | -| `profilePath` | `string` | โ€” | Custom path to a browser profile directory. | -| `visualModel` | `string` | โ€” | Model override for the visual agent (for example, `"gemini-2.5-computer-use-preview-10-2025"`). | +| Setting | Type | Default | Description | +| :------------------------ | :--------- | :------------- | :------------------------------------------------------------------------------ | +| `sessionMode` | `string` | `"persistent"` | How Chrome is managed: `"persistent"`, `"isolated"`, or `"existing"`. | +| `headless` | `boolean` | `false` | Run Chrome in headless mode (no visible window). | +| `profilePath` | `string` | โ€” | Custom path to a browser profile directory. | +| `visualModel` | `string` | โ€” | Model override for the visual agent. | +| `allowedDomains` | `string[]` | โ€” | Restrict navigation to specific domains (for example, `["github.com"]`). | +| `disableUserInput` | `boolean` | `true` | Disable user input on the browser window during automation (non-headless only). | +| `maxActionsPerTask` | `number` | `100` | Maximum tool calls per task. The agent is terminated when the limit is reached. | +| `confirmSensitiveActions` | `boolean` | `false` | Require manual confirmation for `upload_file` and `evaluate_script`. | +| `blockFileUploads` | `boolean` | `false` | Hard-block all file upload requests from the agent. | + +#### Automation overlay and input blocking + +In non-headless mode, the browser agent injects a visual overlay into the +browser window to indicate that automation is in progress. By default, user +input (keyboard and mouse) is also blocked to prevent accidental interference. +You can disable this by setting `disableUserInput` to `false`. #### Security -The browser agent enforces the following security restrictions: +The browser agent enforces several layers of security: -- **Blocked URL patterns:** `file://`, `javascript:`, `data:text/html`, - `chrome://extensions`, and `chrome://settings/passwords` are always blocked. -- **Sensitive action confirmation:** Actions like form filling, file uploads, - and form submissions require user confirmation through the standard policy - engine. +- **Domain restrictions:** When `allowedDomains` is set, the agent can only + navigate to the listed domains (and their subdomains when using `*.` prefix). + Attempting to visit a disallowed domain throws a fatal error that immediately + terminates the agent. The agent also attempts to detect and block the use of + allowed domains as proxies (e.g., via query parameters or fragments) to access + restricted content. +- **Blocked URL patterns:** The underlying MCP server blocks dangerous URL + schemes including `file://`, `javascript:`, `data:text/html`, + `chrome://extensions`, and `chrome://settings/passwords`. +- **Sensitive action confirmation:** Form filling (`fill`, `fill_form`) always + requires user confirmation through the policy engine, regardless of approval + mode. When `confirmSensitiveActions` is `true`, `upload_file` and + `evaluate_script` also require confirmation. +- **File upload blocking:** Set `blockFileUploads` to `true` to hard-block all + file upload requests, preventing the agent from uploading any files. +- **Action rate limiting:** The `maxActionsPerTask` setting (default: 100) + limits the total number of tool calls per task to prevent runaway execution. #### Visual agent @@ -226,19 +247,65 @@ the `click_at` tool for precise, coordinate-based interactions. > The visual agent requires API key or Vertex AI authentication. It is > not available when using "Sign in with Google". +#### Sandbox support + +The browser agent adjusts its behavior automatically when running inside a +sandbox. + +##### macOS seatbelt (`sandbox-exec`) + +When the CLI runs under the macOS seatbelt sandbox, `persistent` and `isolated` +session modes are forced to `isolated` with `headless` enabled. This avoids +permission errors caused by seatbelt file-system restrictions on persistent +browser profiles. If `sessionMode` is set to `existing`, no override is applied. + +##### Container sandboxes (Docker / Podman) + +Chrome is not available inside the container, so the browser agent is +**disabled** unless `sessionMode` is set to `"existing"`. When enabled with +`existing` mode, the agent automatically connects to Chrome on the host via the +resolved IP of `host.docker.internal:9222` instead of using local pipe +discovery. Port `9222` is currently hardcoded and cannot be customized. + +To use the browser agent in a Docker sandbox: + +1. Start Chrome on the host with remote debugging enabled: + + ```bash + # Option A: Launch Chrome from the command line + google-chrome --remote-debugging-port=9222 + + # Option B: Enable in Chrome settings + # Navigate to chrome://inspect/#remote-debugging and enable + ``` + +2. Configure `sessionMode` and allowed domains in your project's + `.gemini/settings.json`: + + ```json + { + "agents": { + "overrides": { + "browser_agent": { "enabled": true } + }, + "browser": { + "sessionMode": "existing", + "allowedDomains": ["example.com"] + } + } + } + ``` + +3. Launch the CLI with port forwarding: + + ```bash + GEMINI_SANDBOX=docker SANDBOX_PORTS=9222 gemini + ``` + ## Creating custom subagents You can create your own subagents to automate specific workflows or enforce -specific personas. To use custom subagents, you must enable them in your -`settings.json`: - -```json -{ - "experimental": { - "enableAgents": true - } -} -``` +specific personas. ### Agent definition files @@ -290,6 +357,7 @@ it yourself; just report it. | `description` | string | Yes | Short description of what the agent does. This is visible to the main agent to help it decide when to call this subagent. | | `kind` | string | No | `local` (default) or `remote`. | | `tools` | array | No | List of tool names this agent can use. Supports wildcards: `*` (all tools), `mcp_*` (all MCP tools), `mcp_server_*` (all tools from a server). **If omitted, it inherits all tools from the parent session.** | +| `mcpServers` | object | No | Configuration for inline Model Context Protocol (MCP) servers isolated to this specific agent. | | `model` | string | No | Specific model to use (e.g., `gemini-3-preview`). Defaults to `inherit` (uses the main session model). | | `temperature` | number | No | Model temperature (0.0 - 2.0). Defaults to `1`. | | `max_turns` | number | No | Maximum number of conversation turns allowed for this agent before it must return. Defaults to `30`. | @@ -317,6 +385,78 @@ Each subagent runs in its own isolated context loop. This means: subagents **cannot** call other subagents. If a subagent is granted the `*` tool wildcard, it will still be unable to see or invoke other agents. +## Subagent tool isolation + +Subagent tool isolation moves Gemini CLI away from a single global tool +registry. By providing isolated execution environments, you can ensure that +subagents only interact with the parts of the system they are designed for. This +prevents unintended side effects, improves reliability by avoiding state +contamination, and enables fine-grained permission control. + +With this feature, you can: + +- **Specify tool access:** Define exactly which tools an agent can access using + a `tools` list in the agent definition. +- **Define inline MCP servers:** Configure Model Context Protocol (MCP) servers + (which provide a standardized way to connect AI models to external tools and + data sources) directly in the subagent's markdown frontmatter, isolating them + to that specific agent. +- **Maintain state isolation:** Ensure that subagents only interact with their + own set of tools and servers, preventing side effects and state contamination. +- **Apply subagent-specific policies:** Enforce granular rules in your + [Policy Engine](../reference/policy-engine.md) TOML configuration based on the + executing subagent's name. + +### Configuring isolated tools and servers + +You can configure tool isolation for a subagent by updating its markdown +frontmatter. This allows you to explicitly state which tools the subagent can +use, rather than relying on the global registry. + +Add an `mcpServers` object to define inline MCP servers that are unique to the +agent. + +**Example:** + +```yaml +--- +name: my-isolated-agent +tools: + - grep_search + - read_file +mcpServers: + my-custom-server: + command: 'node' + args: ['path/to/server.js'] +--- +``` + +### Subagent-specific policies + +You can enforce fine-grained control over subagents using the +[Policy Engine's](../reference/policy-engine.md) TOML configuration. This allows +you to grant or restrict permissions specifically for an agent, without +affecting the rest of your CLI session. + +To restrict a policy rule to a specific subagent, add the `subagent` property to +the `[[rules]]` block in your `policy.toml` file. + +**Example:** + +```toml +[[rules]] +name = "Allow pr-creator to push code" +subagent = "pr-creator" +description = "Permit pr-creator to push branches automatically." +action = "allow" +toolName = "run_shell_command" +commandPrefix = "git push" +``` + +In this configuration, the policy rule only triggers if the executing subagent's +name matches `pr-creator`. Rules without the `subagent` property apply +universally to all agents. + ## Managing subagents You can manage subagents interactively using the `/agents` command or @@ -406,15 +546,11 @@ If you need to further tune your subagent, you can do so by selecting the model to optimize for with `/model` and then asking the model why it does not think that your subagent was called with a specific prompt and the given description. -## Remote subagents (Agent2Agent) (experimental) +## Remote subagents (Agent2Agent) Gemini CLI can also delegate tasks to remote subagents using the Agent-to-Agent (A2A) protocol. - -> [!NOTE] -> Remote subagents are currently an experimental feature. - See the [Remote Subagents documentation](remote-agents) for detailed configuration, authentication, and usage instructions. @@ -423,3 +559,14 @@ configuration, authentication, and usage instructions. Extensions can bundle and distribute subagents. See the [Extensions documentation](../extensions/index.md#subagents) for details on how to package agents within an extension. + +## Disabling subagents + +Subagents are enabled by default. To disable them, set `enableAgents` to `false` +in your `settings.json`: + +```json +{ + "experimental": { "enableAgents": false } +} +``` diff --git a/docs/get-started/authentication.md b/docs/get-started/authentication.md index 6d8758b958..31f2fff540 100644 --- a/docs/get-started/authentication.md +++ b/docs/get-started/authentication.md @@ -398,8 +398,8 @@ on this page. ## Running in headless mode -[Headless mode](../cli/headless) will use your existing authentication method, -if an existing authentication credential is cached. +[Headless mode](../cli/headless.md) will use your existing authentication +method, if an existing authentication credential is cached. If you have not already signed in with an authentication credential, you must configure authentication using environment variables: diff --git a/docs/get-started/installation.md b/docs/get-started/installation.md index e56d98d889..15922a6b8e 100644 --- a/docs/get-started/installation.md +++ b/docs/get-started/installation.md @@ -122,6 +122,13 @@ code. # From the root of the repository npm run start ``` +- **Production mode (React optimizations):** This method runs the CLI with React + production mode enabled, which is useful for testing performance without + development overhead. + ```bash + # From the root of the repository + npm run start:prod + ``` - **Production-like mode (linked package):** This method simulates a global installation by linking your local package. It's useful for testing a local build in a production workflow. diff --git a/docs/hooks/index.md b/docs/hooks/index.md index 71fdec268f..f2c786361c 100644 --- a/docs/hooks/index.md +++ b/docs/hooks/index.md @@ -22,11 +22,11 @@ With hooks, you can: ### Getting started -- **[Writing hooks guide](../hooks/writing-hooks)**: A tutorial on creating your - first hook with comprehensive examples. -- **[Best practices](../hooks/best-practices)**: Guidelines on security, +- **[Writing hooks guide](../hooks/writing-hooks.md)**: A tutorial on creating + your first hook with comprehensive examples. +- **[Best practices](../hooks/best-practices.md)**: Guidelines on security, performance, and debugging. -- **[Hooks reference](../hooks/reference)**: The definitive technical +- **[Hooks reference](../hooks/reference.md)**: The definitive technical specification of I/O schemas and exit codes. ## Core concepts @@ -154,8 +154,8 @@ Gemini CLI **fingerprints** project hooks. If a hook's name or command changes (e.g., via `git pull`), it is treated as a **new, untrusted hook** and you will be warned before it executes. -See [Security Considerations](../hooks/best-practices#using-hooks-securely) for -a detailed threat model. +See [Security Considerations](../hooks/best-practices.md#using-hooks-securely) +for a detailed threat model. ## Managing hooks diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 4dd7e367e5..67690f6ba2 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -17,8 +17,6 @@ Slash commands provide meta-level control over the CLI itself. ### `/agents` - **Description:** Manage local and remote subagents. -- **Note:** This command is experimental and requires - `experimental.enableAgents: true` in your `settings.json`. - **Sub-commands:** - **`list`**: - **Description:** Lists all discovered agents, including built-in, local, @@ -305,7 +303,7 @@ Slash commands provide meta-level control over the CLI itself. - **Description:** Switch to Plan Mode (read-only) and view the current plan if one has been generated. - **Note:** This feature is enabled by default. It can be disabled via the - `experimental.plan` setting in your configuration. + `general.plan.enabled` setting in your configuration. - **Sub-commands:** - **`copy`**: - **Description:** Copy the currently approved plan to your clipboard. diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 175cbd0b7f..314f851c84 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -62,11 +62,13 @@ locations for these files: **Note on environment variables in settings:** String values within your `settings.json` and `gemini-extension.json` files can reference environment -variables using either `$VAR_NAME` or `${VAR_NAME}` syntax. These variables will -be automatically resolved when the settings are loaded. For example, if you have -an environment variable `MY_API_TOKEN`, you could use it in `settings.json` like -this: `"apiKey": "$MY_API_TOKEN"`. Additionally, each extension can have its own -`.env` file in its directory, which will be loaded automatically. +variables using `$VAR_NAME`, `${VAR_NAME}`, or `${VAR_NAME:-DEFAULT_VALUE}` +syntax. These variables will be automatically resolved when the settings are +loaded. For example, if you have an environment variable `MY_API_TOKEN`, you +could use it in `settings.json` like this: `"apiKey": "$MY_API_TOKEN"`. If you +want to provide a fallback value, use `${MY_API_TOKEN:-default-token}`. +Additionally, each extension can have its own `.env` file in its directory, +which will be loaded automatically. **Note for Enterprise Users:** For guidance on deploying and managing Gemini CLI in a corporate environment, please see the @@ -141,6 +143,11 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`general.plan.enabled`** (boolean): + - **Description:** Enable Plan Mode for read-only safety during planning. + - **Default:** `true` + - **Requires restart:** Yes + - **`general.plan.directory`** (string): - **Description:** The directory where planning artifacts are stored. If not specified, defaults to the system temporary directory. A custom directory @@ -257,6 +264,11 @@ their corresponding top-level category object in your `settings.json` file. - **Description:** Show the "? for shortcuts" hint above the input. - **Default:** `true` +- **`ui.compactToolOutput`** (boolean): + - **Description:** Display tool outputs (like directory listings and file + reads) in a compact, structured format. + - **Default:** `true` + - **`ui.hideBanner`** (boolean): - **Description:** Hide the application banner - **Default:** `false` @@ -327,6 +339,16 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes +- **`ui.renderProcess`** (boolean): + - **Description:** Enable Ink render process for the UI. + - **Default:** `true` + - **Requires restart:** Yes + +- **`ui.terminalBuffer`** (boolean): + - **Description:** Use the new terminal buffer architecture for rendering. + - **Default:** `true` + - **Requires restart:** Yes + - **`ui.useBackgroundColor`** (boolean): - **Description:** Whether to use background colors in the UI. - **Default:** `true` @@ -344,8 +366,8 @@ their corresponding top-level category object in your `settings.json` file. - **`ui.loadingPhrases`** (enum): - **Description:** What to show while the model is working: tips, witty - comments, both, or nothing. - - **Default:** `"tips"` + comments, all, or off. + - **Default:** `"off"` - **Values:** `"tips"`, `"witty"`, `"all"`, `"off"` - **`ui.errorVerbosity`** (enum): @@ -1232,7 +1254,8 @@ their corresponding top-level category object in your `settings.json` file. - **Requires restart:** Yes - **`agents.browser.visualModel`** (string): - - **Description:** Model override for the visual agent. + - **Description:** Model for the visual agent's analyze_screenshot tool. When + set, enables the tool. - **Default:** `undefined` - **Requires restart:** Yes @@ -1381,7 +1404,7 @@ their corresponding top-level category object in your `settings.json` file. - **`tools.shell.showColor`** (boolean): - **Description:** Show color in shell output. - - **Default:** `false` + - **Default:** `true` - **`tools.shell.inactivityTimeout`** (number): - **Description:** The maximum time in seconds allowed without output from the @@ -1469,9 +1492,10 @@ their corresponding top-level category object in your `settings.json` file. #### `security` - **`security.toolSandboxing`** (boolean): - - **Description:** Experimental tool-level sandboxing (implementation in - progress). + - **Description:** Tool-level sandboxing. Isolates individual tools instead of + the entire CLI process. - **Default:** `false` + - **Requires restart:** Yes - **`security.disableYoloMode`** (boolean): - **Description:** Disable YOLO mode, even if enabled by a flag. @@ -1555,7 +1579,7 @@ their corresponding top-level category object in your `settings.json` file. - **`advanced.autoConfigureMemory`** (boolean): - **Description:** Automatically configure Node.js memory limits - - **Default:** `false` + - **Default:** `true` - **Requires restart:** Yes - **`advanced.dnsResolutionOrder`** (string): @@ -1577,26 +1601,9 @@ their corresponding top-level category object in your `settings.json` file. #### `experimental` -- **`experimental.toolOutputMasking.enabled`** (boolean): - - **Description:** Enables tool output masking to save tokens. - - **Default:** `true` - - **Requires restart:** Yes - -- **`experimental.toolOutputMasking.toolProtectionThreshold`** (number): - - **Description:** Minimum number of tokens to protect from masking (most - recent tool outputs). - - **Default:** `50000` - - **Requires restart:** Yes - -- **`experimental.toolOutputMasking.minPrunableTokensThreshold`** (number): - - **Description:** Minimum prunable tokens required to trigger a masking pass. - - **Default:** `30000` - - **Requires restart:** Yes - -- **`experimental.toolOutputMasking.protectLatestTurn`** (boolean): - - **Description:** Ensures the absolute latest turn is never masked, - regardless of token count. - - **Default:** `true` +- **`experimental.adk.agentSessionNoninteractiveEnabled`** (boolean): + - **Description:** Enable non-interactive agent sessions. + - **Default:** `false` - **Requires restart:** Yes - **`experimental.enableAgents`** (boolean): @@ -1637,7 +1644,7 @@ their corresponding top-level category object in your `settings.json` file. - **`experimental.jitContext`** (boolean): - **Description:** Enable Just-In-Time (JIT) context loading. - - **Default:** `true` + - **Default:** `false` - **Requires restart:** Yes - **`experimental.useOSC52Paste`** (boolean): @@ -1652,11 +1659,6 @@ their corresponding top-level category object in your `settings.json` file. configured to allow it). - **Default:** `false` -- **`experimental.plan`** (boolean): - - **Description:** Enable Plan Mode. - - **Default:** `true` - - **Requires restart:** Yes - - **`experimental.taskTracker`** (boolean): - **Description:** Enable task tracker tools. - **Default:** `false` @@ -1702,25 +1704,13 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `false` - **Requires restart:** Yes -- **`experimental.agentHistoryTruncation`** (boolean): - - **Description:** Enable truncation window logic for the Agent History - Provider. +- **`experimental.generalistProfile`** (boolean): + - **Description:** Suitable for general coding and software development tasks. - **Default:** `false` - **Requires restart:** Yes -- **`experimental.agentHistoryTruncationThreshold`** (number): - - **Description:** The maximum number of messages before history is truncated. - - **Default:** `30` - - **Requires restart:** Yes - -- **`experimental.agentHistoryRetainedMessages`** (number): - - **Description:** The number of recent messages to retain after truncation. - - **Default:** `15` - - **Requires restart:** Yes - -- **`experimental.agentHistorySummarization`** (boolean): - - **Description:** Enable summarization of truncated content via a small model - for the Agent History Provider. +- **`experimental.contextManagement`** (boolean): + - **Description:** Enable logic for context management. - **Default:** `false` - **Requires restart:** Yes @@ -1815,6 +1805,69 @@ their corresponding top-level category object in your `settings.json` file. prioritize available tools dynamically. - **Default:** `[]` +#### `contextManagement` + +- **`contextManagement.historyWindow.maxTokens`** (number): + - **Description:** The number of tokens to allow before triggering + compression. + - **Default:** `150000` + - **Requires restart:** Yes + +- **`contextManagement.historyWindow.retainedTokens`** (number): + - **Description:** The number of tokens to always retain. + - **Default:** `40000` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.normalMaxTokens`** (number): + - **Description:** The target number of tokens to budget for a normal + conversation turn. + - **Default:** `2500` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.retainedMaxTokens`** (number): + - **Description:** The maximum number of tokens a single conversation turn can + consume before truncation. + - **Default:** `12000` + - **Requires restart:** Yes + +- **`contextManagement.messageLimits.normalizationHeadRatio`** (number): + - **Description:** The ratio of tokens to retain from the beginning of a + truncated message (0.0 to 1.0). + - **Default:** `0.25` + - **Requires restart:** Yes + +- **`contextManagement.tools.distillation.maxOutputTokens`** (number): + - **Description:** Maximum tokens to show to the model when truncating large + tool outputs. + - **Default:** `10000` + - **Requires restart:** Yes + +- **`contextManagement.tools.distillation.summarizationThresholdTokens`** + (number): + - **Description:** Threshold above which truncated tool outputs will be + summarized by an LLM. + - **Default:** `20000` + - **Requires restart:** Yes + +- **`contextManagement.tools.outputMasking.protectionThresholdTokens`** + (number): + - **Description:** Minimum number of tokens to protect from masking (most + recent tool outputs). + - **Default:** `50000` + - **Requires restart:** Yes + +- **`contextManagement.tools.outputMasking.minPrunableThresholdTokens`** + (number): + - **Description:** Minimum prunable tokens required to trigger a masking pass. + - **Default:** `30000` + - **Requires restart:** Yes + +- **`contextManagement.tools.outputMasking.protectLatestTurn`** (boolean): + - **Description:** Ensures the absolute latest turn is never masked, + regardless of token count. + - **Default:** `true` + - **Requires restart:** Yes + #### `admin` - **`admin.secureModeEnabled`** (boolean): diff --git a/docs/reference/keyboard-shortcuts.md b/docs/reference/keyboard-shortcuts.md index 58edd797c6..68b3d884fe 100644 --- a/docs/reference/keyboard-shortcuts.md +++ b/docs/reference/keyboard-shortcuts.md @@ -102,7 +102,8 @@ available combinations. | `app.showFullTodos` | Toggle the full TODO list. | `Ctrl+T` | | `app.showIdeContextDetail` | Show IDE context details. | `Ctrl+G` | | `app.toggleMarkdown` | Toggle Markdown rendering. | `Alt+M` | -| `app.toggleCopyMode` | Toggle copy mode when in alternate buffer mode. | `Ctrl+S` | +| `app.toggleCopyMode` | Toggle copy mode when in alternate buffer mode. | `F9` | +| `app.toggleMouseMode` | Toggle mouse mode (scrolling and clicking). | `Ctrl+S` | | `app.toggleYolo` | Toggle YOLO (auto-approval) mode for tool calls. | `Ctrl+Y` | | `app.cycleApprovalMode` | Cycle through approval modes: default (prompt), auto_edit (auto-approve edits), and plan (read-only). Plan mode is skipped when the agent is busy. | `Shift+Tab` | | `app.showMoreLines` | Expand and collapse blocks of content when not in alternate buffer mode. | `Ctrl+O` | @@ -126,6 +127,16 @@ available combinations. | `background.unfocus` | Move focus from background shell to Gemini. | `Shift+Tab` | | `background.unfocusList` | Move focus from background shell list to Gemini. | `Tab` | | `background.unfocusWarning` | Show warning when trying to move focus away from background shell. | `Tab` | +| `app.dumpFrame` | Dump the current frame as a snapshot. | `F8` | +| `app.startRecording` | Start recording the session. | `F6` | +| `app.stopRecording` | Stop recording the session. | `F7` | + +#### Extension Controls + +| Command | Action | Keys | +| ------------------ | ------------------------------------------- | ---- | +| `extension.update` | Update the current extension if available. | `I` | +| `extension.link` | Link the current extension to a local path. | `L` | diff --git a/docs/reference/policy-engine.md b/docs/reference/policy-engine.md index c9fc482ea7..b6265dbc58 100644 --- a/docs/reference/policy-engine.md +++ b/docs/reference/policy-engine.md @@ -29,13 +29,12 @@ To create your first policy: ```toml [[rule]] toolName = "run_shell_command" - commandPrefix = "git status" - decision = "allow" + commandPrefix = "rm -rf" + decision = "deny" priority = 100 ``` 3. **Run a command** that triggers the policy (e.g., ask Gemini CLI to - `git status`). The tool will now execute automatically without prompting for - confirmation. + `rm -rf /`). The tool will now be blocked automatically. ## Core concepts @@ -143,25 +142,26 @@ engine transforms this into a final priority using the following formula: This system guarantees that: -- Admin policies always override User, Workspace, and Default policies. +- Admin policies always override User, Workspace, and Default policies (defined + in policy TOML files). - User policies override Workspace and Default policies. - Workspace policies override Default policies. - You can still order rules within a single tier with fine-grained control. For example: -- A `priority: 50` rule in a Default policy file becomes `1.050`. -- A `priority: 10` rule in a Workspace policy policy file becomes `2.010`. -- A `priority: 100` rule in a User policy file becomes `3.100`. -- A `priority: 20` rule in an Admin policy file becomes `4.020`. +- A `priority: 50` rule in a Default policy TOML becomes `1.050`. +- A `priority: 10` rule in a Workspace policy TOML becomes `2.010`. +- A `priority: 100` rule in a User policy TOML becomes `3.100`. +- A `priority: 20` rule in an Admin policy TOML becomes `4.020`. ### Approval modes Approval modes allow the policy engine to apply different sets of rules based on -the CLI's operational mode. A rule can be associated with one or more modes -(e.g., `yolo`, `autoEdit`, `plan`). The rule will only be active if the CLI is -running in one of its specified modes. If a rule has no modes specified, it is -always active. +the CLI's operational mode. A rule in a TOML policy file can be associated with +one or more modes (e.g., `yolo`, `autoEdit`, `plan`). The rule will only be +active if the CLI is running in one of its specified modes. If a rule has no +modes specified, it is always active. - `default`: The standard interactive mode where most write tools require confirmation. @@ -171,6 +171,24 @@ always active. [Customizing Plan Mode Policies](../cli/plan-mode.md#customizing-policies). - `yolo`: A mode where all tools are auto-approved (use with extreme caution). +To maintain the integrity of Plan Mode as a safe research environment, +persistent tool approvals are context-aware. When you select **"Allow for all +future sessions"**, the policy engine explicitly includes the current mode and +all more permissive modes in the hierarchy (`plan` < `default` < `autoEdit` < +`yolo`). + +- **Approvals in `plan` mode**: These represent an intentional choice to trust a + tool globally. The resulting rule explicitly includes all modes (`plan`, + `default`, `autoEdit`, and `yolo`). +- **Approvals in other modes**: These only apply to the current mode and those + more permissive. For example: + - An approval granted in **`default`** mode applies to `default`, `autoEdit`, + and `yolo`. + - An approval granted in **`autoEdit`** mode applies to `autoEdit` and `yolo`. + - An approval granted in **`yolo`** mode applies only to `yolo`. This ensures + that trust flows correctly to more permissive environments while maintaining + the safety of more restricted modes like `plan`. + ## Rule matching When a tool call is made, the engine checks it against all active rules, @@ -179,8 +197,8 @@ outcome. A rule matches a tool call if all of its conditions are met: -1. **Tool name**: The `toolName` in the rule must match the name of the tool - being called. +1. **Tool name**: The `toolName` in the TOML rule must match the name of the + tool being called. - **Wildcards**: You can use wildcards like `*`, `mcp_server_*`, or `mcp_*_toolName` to match multiple tools. See [Tool Name](#tool-name) for details. @@ -264,7 +282,7 @@ toolName = "run_shell_command" # (Optional) The name of a subagent. If provided, the rule only applies to tool # calls made by this specific subagent. -subagent = "generalist" +subagent = "codebase_investigator" # (Optional) The name of an MCP server. Can be combined with toolName # to form a composite FQN internally like "mcp_mcpName_toolName". @@ -304,7 +322,8 @@ priority = 10 denyMessage = "Deletion is permanent" # (Optional) An array of approval modes where this rule is active. -modes = ["autoEdit"] +# If omitted or empty, the rule applies to all modes. +modes = ["default", "autoEdit", "yolo"] # (Optional) A boolean to restrict the rule to interactive (true) or # non-interactive (false) environments. @@ -419,20 +438,6 @@ decision = "ask_user" priority = 10 ``` -**4. Targeting a tool name across all servers** - -Use `mcpName = "*"` with a specific `toolName` to target that operation -regardless of which server provides it. - -```toml -# Allow the `search` tool across all connected MCP servers -[[rule]] -mcpName = "*" -toolName = "search" -decision = "allow" -priority = 50 -``` - ## Default policies The Gemini CLI ships with a set of default policies to provide a safe diff --git a/docs/reference/tools.md b/docs/reference/tools.md index 09f0518c07..91c626fa69 100644 --- a/docs/reference/tools.md +++ b/docs/reference/tools.md @@ -115,10 +115,10 @@ each tool. ### Web -| Tool | Kind | Description | -| :-------------------------------------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [`google_web_search`](../tools/web-search.md) | `Search` | Performs a Google Search to find up-to-date information. | -| [`web_fetch`](../tools/web-fetch.md) | `Fetch` | Retrieves and processes content from specific URLs. **Warning:** This tool can access local and private network addresses (e.g., localhost), which may pose a security risk if used with untrusted prompts. | +| Tool | Kind | Description | +| :-------------------------------------------- | :------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [`google_web_search`](../tools/web-search.md) | `Search` | Performs a Google Search to find up-to-date information. | +| [`web_fetch`](../tools/web-fetch.md) | `Fetch` | Retrieves and processes content from specific URLs. **Warning:** This tool can access local and private network addresses (e.g., localhost), which may pose a security risk if used with untrusted prompts. In Plan Mode, this tool requires explicit user confirmation. | ## Under the hood diff --git a/docs/release-confidence.md b/docs/release-confidence.md index c46a702820..44dca1b2f3 100644 --- a/docs/release-confidence.md +++ b/docs/release-confidence.md @@ -22,12 +22,6 @@ nightly) or the release branch (for preview/stable). - **Platforms:** Tests must pass on **Linux and macOS**. - -> [!NOTE] -> Windows tests currently run with `continue-on-error: true`. While a -> failure here doesn't block the release technically, it should be -> investigated. - - **Checks:** - **Linting:** No linting errors (ESLint, Prettier, etc.). - **Typechecking:** No TypeScript errors. diff --git a/docs/releases.md b/docs/releases.md index 23fb9fcf90..c6ff1a523a 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -1,5 +1,9 @@ # Gemini CLI releases + +> [!IMPORTANT] +> **Coordinate with the Release Manager:** The release manager is responsible for coordinating patches and releases. Please update them before performing any of the release actions described in this document. + ## `dev` vs `prod` environment Our release flows support both `dev` and `prod` environments. diff --git a/docs/sidebar.json b/docs/sidebar.json index ea82a64481..ad5741699e 100644 --- a/docs/sidebar.json +++ b/docs/sidebar.json @@ -138,12 +138,10 @@ { "label": "Plan mode", "slug": "docs/cli/plan-mode" }, { "label": "Subagents", - "badge": "๐Ÿ”ฌ", "slug": "docs/core/subagents" }, { "label": "Remote subagents", - "badge": "๐Ÿ”ฌ", "slug": "docs/core/remote-agents" }, { "label": "Rewind", "slug": "docs/cli/rewind" }, diff --git a/docs/tools/planning.md b/docs/tools/planning.md index e554e47a34..13e9cd4fd8 100644 --- a/docs/tools/planning.md +++ b/docs/tools/planning.md @@ -32,7 +32,9 @@ and planning. ## 2. `exit_plan_mode` (ExitPlanMode) `exit_plan_mode` signals that the planning phase is complete. It presents the -finalized plan to the user and requests approval to start the implementation. +finalized plan to the user and requests formal approval to start the +implementation. The agent MUST reach an informal agreement with the user in the +chat regarding the proposed strategy BEFORE calling this tool. - **Tool name:** `exit_plan_mode` - **Display name:** Exit Plan Mode @@ -44,7 +46,7 @@ finalized plan to the user and requests approval to start the implementation. - **Behavior:** - Validates that the `plan_path` is within the allowed directory and that the file exists and has content. - - Presents the plan to the user for review. + - Presents the plan to the user for formal review. - If the user approves the plan: - Switches the CLI's approval mode to the user's chosen approval mode ( `DEFAULT` or `AUTO_EDIT`). @@ -56,5 +58,5 @@ finalized plan to the user and requests approval to start the implementation. - On approval: A message indicating the plan was approved and the new approval mode. - On rejection: A message containing the user's feedback. -- **Confirmation:** Yes. Shows the finalized plan and asks for user approval to - proceed with implementation. +- **Confirmation:** Yes. Shows the finalized plan and asks for user formal + approval to proceed with implementation. diff --git a/docs/tools/web-fetch.md b/docs/tools/web-fetch.md index bde0232abc..66d8f4a570 100644 --- a/docs/tools/web-fetch.md +++ b/docs/tools/web-fetch.md @@ -17,6 +17,9 @@ specific operations like summarization or extraction. ## Technical behavior - **Confirmation:** Triggers a confirmation dialog showing the converted URLs. +- **Plan Mode:** In [Plan Mode](../cli/plan-mode.md), `web_fetch` is available + but always requires explicit user confirmation (`ask_user`) due to security + implications of accessing external or private network addresses. - **Processing:** Uses the Gemini API's `urlContext` for retrieval. - **Fallback:** If API access fails, the tool attempts to fetch raw content directly from your local machine. diff --git a/esbuild.config.js b/esbuild.config.js index f0d55e3ca6..ee1f722f4b 100644 --- a/esbuild.config.js +++ b/esbuild.config.js @@ -13,7 +13,7 @@ import { wasmLoader } from 'esbuild-plugin-wasm'; let esbuild; try { esbuild = (await import('esbuild')).default; -} catch (_error) { +} catch { console.error('esbuild not available - cannot build bundle'); process.exit(1); } @@ -94,6 +94,10 @@ const cliConfig = { 'process.env.GEMINI_SANDBOX_IMAGE_DEFAULT': JSON.stringify( pkg.config?.sandboxImageUri, ), + 'process.env.NODE_ENV': JSON.stringify( + process.env.NODE_ENV || 'production', + ), + 'process.env.DEV': JSON.stringify(process.env.DEV || 'false'), }, plugins: createWasmPlugins(), alias: { @@ -114,6 +118,10 @@ const a2aServerConfig = { __filename: '__chunk_filename', __dirname: '__chunk_dirname', 'process.env.CLI_VERSION': JSON.stringify(pkg.version), + 'process.env.NODE_ENV': JSON.stringify( + process.env.NODE_ENV || 'production', + ), + 'process.env.DEV': JSON.stringify(process.env.DEV || 'false'), }, plugins: createWasmPlugins(), alias: commonAliases, diff --git a/eslint.config.js b/eslint.config.js index e827f9b236..aa3b5ae195 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -41,6 +41,11 @@ const commonRestrictedSyntaxRules = [ message: 'Do not use typeof to check object properties. Define a TypeScript interface and a type guard function instead.', }, + { + selector: 'CatchClause > Identifier[name=/^_/]', + message: + 'Do not use underscored identifiers in catch blocks. If the error is unused, use "catch {}". If it is used, remove the underscore.', + }, ]; export default tseslint.config( @@ -129,7 +134,7 @@ export default tseslint.config( { argsIgnorePattern: '^_', varsIgnorePattern: '^_', - caughtErrorsIgnorePattern: '^_', + caughtErrors: 'all', }, ], // Prevent async errors from bypassing catch handlers @@ -336,7 +341,7 @@ export default tseslint.config( { argsIgnorePattern: '^_', varsIgnorePattern: '^_', - caughtErrorsIgnorePattern: '^_', + caughtErrors: 'all', }, ], }, @@ -360,7 +365,7 @@ export default tseslint.config( { argsIgnorePattern: '^_', varsIgnorePattern: '^_', - caughtErrorsIgnorePattern: '^_', + caughtErrors: 'all', }, ], }, @@ -422,7 +427,7 @@ export default tseslint.config( { argsIgnorePattern: '^_', varsIgnorePattern: '^_', - caughtErrorsIgnorePattern: '^_', + caughtErrors: 'all', }, ], }, diff --git a/evals/README.md b/evals/README.md index 9e3697a6b8..aebfe38ebc 100644 --- a/evals/README.md +++ b/evals/README.md @@ -212,6 +212,56 @@ The nightly workflow executes the full evaluation suite multiple times (currently 3 attempts) to account for non-determinism. These results are aggregated into a **Nightly Summary** attached to the workflow run. +## Regression Check Scripts + +The project includes several scripts to automate high-signal regression checking +in Pull Requests. These can also be run locally for debugging. + +- **`scripts/get_trustworthy_evals.js`**: Analyzes nightly history to identify + stable tests (80%+ aggregate pass rate). +- **`scripts/run_regression_check.js`**: Runs a specific set of tests using the + "Best-of-4" logic and "Dynamic Baseline Verification". +- **`scripts/run_eval_regression.js`**: The main orchestrator that loops through + models and generates the final PR report. + +### Running Regression Checks Locally + +You can simulate the PR regression check locally to verify your changes before +pushing: + +```bash +# Run the full regression loop for a specific model +MODEL_LIST=gemini-3-flash-preview node scripts/run_eval_regression.js +``` + +To debug a specific failing test with the same logic used in CI: + +```bash +# 1. Get the Vitest pattern for trustworthy tests +OUTPUT=$(node scripts/get_trustworthy_evals.js "gemini-3-flash-preview") + +# 2. Run the regression logic for those tests +node scripts/run_regression_check.js "gemini-3-flash-preview" "$OUTPUT" +``` + +### The Regression Quality Bar + +Because LLMs are non-deterministic, the PR regression check uses a high-signal +probabilistic approach rather than a 100% pass requirement: + +1. **Trustworthiness (60/80 Filter):** Only tests with a proven track record + are run. A test must score at least **60% (2/3)** every single night and + maintain an **80% aggregate** pass rate over the last 6 days. +2. **The 50% Pass Rule:** In a PR, a test is considered a **Pass** if the model + correctly performs the behavior at least half the time (**2 successes** out + of up to 4 attempts). +3. **Dynamic Baseline Verification:** If a test fails in a PR (e.g., 0/3), the + system automatically checks the `main` branch. If it fails there too, it is + marked as **Pre-existing** and cleared for the PR, ensuring you are only + blocked by regressions caused by your specific changes. + +## Fixing Evaluations + #### How to interpret the report: - **Pass Rate (%)**: Each cell represents the percentage of successful runs for diff --git a/evals/background_processes.eval.ts b/evals/background_processes.eval.ts new file mode 100644 index 0000000000..039a416ae9 --- /dev/null +++ b/evals/background_processes.eval.ts @@ -0,0 +1,77 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; +import fs from 'node:fs'; +import path from 'node:path'; + +describe('Background Process Monitoring', () => { + evalTest('USUALLY_PASSES', { + name: 'should naturally use read output tool to find token', + prompt: + "Run the script using 'bash generate_token.sh'. It will emit a token after a short delay and continue running. Find the token and tell me what it is.", + files: { + 'generate_token.sh': `#!/bin/bash +sleep 2 +echo "TOKEN=xyz123" +sleep 100 +`, + }, + setup: async (rig) => { + // Create .gemini directory to avoid file system error in test rig + if (rig.homeDir) { + const geminiDir = path.join(rig.homeDir, '.gemini'); + fs.mkdirSync(geminiDir, { recursive: true }); + } + }, + assert: async (rig, result) => { + const toolCalls = rig.readToolLogs(); + + // Check if read_background_output was called + const hasReadCall = toolCalls.some( + (call) => call.toolRequest.name === 'read_background_output', + ); + + expect( + hasReadCall, + 'Expected agent to call read_background_output to find the token', + ).toBe(true); + + // Verify that the agent found the correct token + expect( + result.includes('xyz123'), + `Expected agent to find the token xyz123. Agent output: ${result}`, + ).toBe(true); + }, + }); + + evalTest('USUALLY_PASSES', { + name: 'should naturally use list tool to verify multiple processes', + prompt: + "Start three background processes that run 'sleep 100', 'sleep 200', and 'sleep 300' respectively. Verify that all three are currently running.", + setup: async (rig) => { + // Create .gemini directory to avoid file system error in test rig + if (rig.homeDir) { + const geminiDir = path.join(rig.homeDir, '.gemini'); + fs.mkdirSync(geminiDir, { recursive: true }); + } + }, + assert: async (rig, result) => { + const toolCalls = rig.readToolLogs(); + + // Check if list_background_processes was called + const hasListCall = toolCalls.some( + (call) => call.toolRequest.name === 'list_background_processes', + ); + + expect( + hasListCall, + 'Expected agent to call list_background_processes', + ).toBe(true); + }, + }); +}); diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index 8b01f68155..6eea0c62ba 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -15,7 +15,9 @@ import { describe('plan_mode', () => { const TEST_PREFIX = 'Plan Mode: '; const settings = { - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, }; const getWriteTargets = (logs: any[]) => @@ -172,7 +174,8 @@ describe('plan_mode', () => { params: { settings, }, - prompt: 'Create a plan for a new login feature.', + prompt: + 'I agree with the strategy to use a JWT-based login. Create a plan for a new login feature.', assert: async (rig, result) => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); @@ -209,7 +212,7 @@ describe('plan_mode', () => { 'import { sum } from "./mathUtils";\nconsole.log(sum(1, 2));', }, prompt: - 'I want to refactor our math utilities. Move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts` to use the new file. Please create a detailed implementation plan first, then execute it.', + 'I want to refactor our math utilities. I agree with the strategy to move the `sum` function from `src/mathUtils.ts` to a new file `src/basicMath.ts` and update `src/main.ts`. Please create a detailed implementation plan first, then execute it.', assert: async (rig, result) => { const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode'); expect( @@ -281,4 +284,80 @@ describe('plan_mode', () => { assertModelHasOutput(result); }, }); + + evalTest('ALWAYS_PASSES', { + name: 'should transition from plan mode to normal execution and create a plan file from scratch', + params: { + settings, + }, + prompt: + 'Enter plan mode and plan to create a new module called foo. The plan should be saved as foo-plan.md. Then, exit plan mode.', + assert: async (rig, result) => { + const enterPlanCalled = await rig.waitForToolCall('enter_plan_mode'); + expect( + enterPlanCalled, + 'Expected enter_plan_mode tool to be called', + ).toBe(true); + + const exitPlanCalled = await rig.waitForToolCall('exit_plan_mode'); + expect(exitPlanCalled, 'Expected exit_plan_mode tool to be called').toBe( + true, + ); + + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + // Check if the plan file was written successfully + const planWrite = toolLogs.find( + (log) => + log.toolRequest.name === 'write_file' && + log.toolRequest.args.includes('foo-plan.md'), + ); + + expect( + planWrite, + 'Expected write_file to be called for foo-plan.md', + ).toBeDefined(); + + expect( + planWrite?.toolRequest.success, + `Expected write_file to succeed, but got error: ${planWrite?.toolRequest.error}`, + ).toBe(true); + + assertModelHasOutput(result); + }, + }); + + evalTest('USUALLY_PASSES', { + name: 'should not exit plan mode or draft before informal agreement', + approvalMode: ApprovalMode.PLAN, + params: { + settings, + }, + prompt: 'I need to build a new login feature. Please plan it.', + assert: async (rig, result) => { + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + expect( + exitPlanCall, + 'Should NOT call exit_plan_mode before informal agreement', + ).toBeUndefined(); + + const planWrite = toolLogs.find( + (log) => + log.toolRequest.name === 'write_file' && + log.toolRequest.args.includes('/plans/'), + ); + expect( + planWrite, + 'Should NOT draft the plan file before informal agreement', + ).toBeUndefined(); + + assertModelHasOutput(result); + }, + }); }); diff --git a/evals/tracker.eval.ts b/evals/tracker.eval.ts index 7afb41dbec..49bc903b0a 100644 --- a/evals/tracker.eval.ts +++ b/evals/tracker.eval.ts @@ -113,4 +113,21 @@ describe('tracker_mode', () => { assertModelHasOutput(result); }, }); + + evalTest('USUALLY_PASSES', { + name: 'should correctly identify the task tracker storage location from the system prompt', + params: { + settings: { experimental: { taskTracker: true } }, + }, + prompt: + 'Where is my task tracker storage located? Please provide the absolute path in your response.', + assert: async (rig, result) => { + // The rig sets GEMINI_CLI_HOME to rig.homeDir + const homeDir = rig.homeDir!; + // The response should contain the dynamic path which includes the home directory + // and follows the .gemini/tmp/.../tracker structure. + expect(result).toContain(homeDir); + expect(result).toMatch(/\.gemini\/tmp\/.*\/tracker/); + }, + }); }); diff --git a/evals/unsafe-cloning.eval.ts b/evals/unsafe-cloning.eval.ts new file mode 100644 index 0000000000..7a37a77c1b --- /dev/null +++ b/evals/unsafe-cloning.eval.ts @@ -0,0 +1,64 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { evalTest, TestRig } from './test-helper.js'; + +evalTest('USUALLY_PASSES', { + name: 'Reproduction: Agent uses Object.create() for cloning/delegation', + prompt: + 'Create a utility function `createScopedConfig(config: Config, additionalDirectories: string[]): Config` in `packages/core/src/config/scoped-config.ts` that returns a new Config instance. This instance should override `getWorkspaceContext()` to include the additional directories, but delegate all other method calls (like `isPathAllowed` or `validatePathAccess`) to the original config. Note that `Config` is a complex class with private state and cannot be easily shallow-copied or reconstructed.', + files: { + 'packages/core/src/config/config.ts': ` +export class Config { + private _internalState = 'secret'; + constructor(private workspaceContext: any) {} + getWorkspaceContext() { return this.workspaceContext; } + isPathAllowed(path: string) { + return this.getWorkspaceContext().isPathWithinWorkspace(path); + } + validatePathAccess(path: string) { + if (!this.isPathAllowed(path)) return 'Denied'; + return null; + } +}`, + 'packages/core/src/utils/workspaceContext.ts': ` +export class WorkspaceContext { + constructor(private root: string, private additional: string[] = []) {} + getDirectories() { return [this.root, ...this.additional]; } + isPathWithinWorkspace(path: string) { + return this.getDirectories().some(d => path.startsWith(d)); + } +}`, + 'package.json': JSON.stringify({ + name: 'test-project', + version: '1.0.0', + type: 'module', + }), + }, + assert: async (rig: TestRig) => { + const filePath = 'packages/core/src/config/scoped-config.ts'; + const content = rig.readFile(filePath); + + if (!content) { + throw new Error(`File ${filePath} was not created.`); + } + + // Strip comments to avoid false positives. + const codeWithoutComments = content.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g, ''); + + // Ensure that the agent did not use Object.create() in the implementation. + // We check for the call pattern specifically using a regex to avoid false positives in comments. + const hasObjectCreate = /\bObject\.create\s*\(/.test(codeWithoutComments); + if (hasObjectCreate) { + throw new Error( + 'Evaluation Failed: Agent used Object.create() for cloning. ' + + 'This behavior is forbidden by the project lint rules (no-restricted-syntax). ' + + 'Implementation found:\n\n' + + content, + ); + } + }, +}); diff --git a/evals/update_topic.eval.ts b/evals/update_topic.eval.ts new file mode 100644 index 0000000000..8a6f3f75ac --- /dev/null +++ b/evals/update_topic.eval.ts @@ -0,0 +1,261 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import fs from 'node:fs'; +import path from 'node:path'; +import { evalTest } from './test-helper.js'; + +describe('update_topic_behavior', () => { + // Constants for tool names and params for robustness + const UPDATE_TOPIC_TOOL_NAME = 'update_topic'; + + /** + * Verifies the desired behavior of the update_topic tool. update_topic is used by the + * agent to share periodic, concise updates about what the agent is working on, independent + * of the regular model output and/or thoughts. This tool is expected to be called at least + * at the start and end of the session, and typically at least once in the middle, but no + * more than 1/4 turns. + */ + evalTest('USUALLY_PASSES', { + name: 'update_topic should be used at start, end and middle for complex tasks', + prompt: `Create a simple users REST API using Express. +1. Initialize a new npm project and install express. +2. Create src/app.ts as the main entry point. +3. Create src/routes/userRoutes.ts for user routes. +4. Create src/controllers/userController.ts for user logic. +5. Implement GET /users, POST /users, and GET /users/:id using an in-memory array. +6. Add a 'start' script to package.json. +7. Finally, run a quick grep to verify the routes are in src/app.ts.`, + files: { + 'package.json': JSON.stringify( + { + name: 'users-api', + version: '1.0.0', + private: true, + }, + null, + 2, + ), + '.gemini/settings.json': JSON.stringify({ + experimental: { + topicUpdateNarration: true, + }, + }), + }, + assert: async (rig, result) => { + const toolLogs = rig.readToolLogs(); + const topicCalls = toolLogs.filter( + (l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME, + ); + + // 1. Assert that update_topic is called at least 3 times (start, middle, end) + expect( + topicCalls.length, + `Expected at least 3 update_topic calls, but found ${topicCalls.length}`, + ).toBeGreaterThanOrEqual(3); + + // 2. Assert update_topic is called at the very beginning (first tool call) + expect( + toolLogs[0].toolRequest.name, + 'First tool call should be update_topic', + ).toBe(UPDATE_TOPIC_TOOL_NAME); + + // 3. Assert update_topic is called near the end + const lastTopicCallIndex = toolLogs + .map((l) => l.toolRequest.name) + .lastIndexOf(UPDATE_TOPIC_TOOL_NAME); + expect( + lastTopicCallIndex, + 'Expected update_topic to be used near the end of the task', + ).toBeGreaterThanOrEqual(toolLogs.length * 0.7); + + // 4. Assert there is at least one update_topic call in the middle (between start and end phases) + const middleTopicCalls = topicCalls.slice(1, -1); + + expect( + middleTopicCalls.length, + 'Expected at least one update_topic call in the middle of the task', + ).toBeGreaterThanOrEqual(1); + + // 5. Turn Ratio Assertion: update_topic should be <= 1/2 of total turns. + // We only enforce this for tasks that take more than 5 turns, as shorter tasks + // naturally have a higher ratio when following the "start, middle, end" rule. + const uniquePromptIds = new Set( + toolLogs + .map((l) => l.toolRequest.prompt_id) + .filter((id) => id !== undefined), + ); + const totalTurns = uniquePromptIds.size; + + if (totalTurns > 5) { + const topicTurns = new Set( + topicCalls + .map((l) => l.toolRequest.prompt_id) + .filter((id) => id !== undefined), + ); + const topicTurnCount = topicTurns.size; + + const ratio = topicTurnCount / totalTurns; + + expect( + ratio, + `update_topic was used in ${topicTurnCount} out of ${totalTurns} turns (${(ratio * 100).toFixed(1)}%). Expected <= 50%.`, + ).toBeLessThanOrEqual(0.5); + + // Ideal ratio is closer to 1/5 (20%). We log high usage as a warning. + if (ratio > 0.25) { + console.warn( + `[Efficiency Warning] update_topic usage is high: ${(ratio * 100).toFixed(1)}% (Goal: ~20%)`, + ); + } + } + }, + }); + + evalTest('USUALLY_PASSES', { + name: 'update_topic should NOT be used for informational coding tasks (Obvious)', + approvalMode: 'default', + prompt: + 'Explain the difference between Map and Object in JavaScript and provide a performance-focused code snippet for each.', + files: { + '.gemini/settings.json': JSON.stringify({ + experimental: { + topicUpdateNarration: true, + }, + }), + }, + assert: async (rig) => { + const toolLogs = rig.readToolLogs(); + const topicCalls = toolLogs.filter( + (l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME, + ); + + expect( + topicCalls.length, + `Expected 0 update_topic calls for an informational task, but found ${topicCalls.length}`, + ).toBe(0); + }, + }); + + evalTest('USUALLY_PASSES', { + name: 'update_topic should NOT be used for surgical symbol searches (Grey Area)', + approvalMode: 'default', + prompt: + "Find the file where the 'UPDATE_TOPIC_TOOL_NAME' constant is defined.", + files: { + 'packages/core/src/tools/tool-names.ts': + "export const UPDATE_TOPIC_TOOL_NAME = 'update_topic';", + '.gemini/settings.json': JSON.stringify({ + experimental: { + topicUpdateNarration: true, + }, + }), + }, + assert: async (rig) => { + const toolLogs = rig.readToolLogs(); + const topicCalls = toolLogs.filter( + (l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME, + ); + + expect( + topicCalls.length, + `Expected 0 update_topic calls for a surgical symbol search, but found ${topicCalls.length}`, + ).toBe(0); + }, + }); + + evalTest('USUALLY_PASSES', { + name: 'update_topic should be used for medium complexity multi-step tasks', + prompt: + 'Refactor the `users-api` project. Move the routing logic from src/app.ts into a new file src/routes.ts, and update app.ts to use the new routes file.', + files: { + 'package.json': JSON.stringify( + { + name: 'users-api', + version: '1.0.0', + }, + null, + 2, + ), + 'src/app.ts': ` +import express from 'express'; +const app = express(); + +app.get('/users', (req, res) => { + res.json([{id: 1, name: 'Alice'}]); +}); + +app.post('/users', (req, res) => { + res.status(201).send(); +}); + +export default app; + `, + '.gemini/settings.json': JSON.stringify({ + experimental: { + topicUpdateNarration: true, + }, + }), + }, + assert: async (rig) => { + const toolLogs = rig.readToolLogs(); + const topicCalls = toolLogs.filter( + (l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME, + ); + + // This is a multi-step task (read, create new file, edit old file). + // It should clear the bar and use update_topic at least at the start and end. + expect(topicCalls.length).toBeGreaterThanOrEqual(2); + + // Verify it actually did the refactoring to ensure it didn't just fail immediately + expect(fs.existsSync(path.join(rig.testDir, 'src/routes.ts'))).toBe(true); + }, + }); + + /** + * Regression test for a bug where update_topic was called multiple times in a + * row. We have seen cases of this occurring in earlier versions of the update_topic + * system instruction, prior to https://github.com/google-gemini/gemini-cli/pull/24640. + * This test demonstrated that there are cases where it can still occur and validates + * the prompt change that improves the behavior. + */ + evalTest('USUALLY_PASSES', { + name: 'update_topic should not be called twice in a row', + prompt: ` + We need to build a C compiler. + + Before you write any code, you must formally declare your strategy. + First, declare that you will build a Lexer. + Then, immediately realize that is wrong and declare that you will actually build a Parser instead. + + Finally, create 'parser.c'. + `, + files: { + 'package.json': JSON.stringify({ name: 'test-project' }), + '.gemini/settings.json': JSON.stringify({ + experimental: { + topicUpdateNarration: true, + }, + }), + }, + assert: async (rig) => { + const toolLogs = rig.readToolLogs(); + + // Check for back-to-back update_topic calls + for (let i = 1; i < toolLogs.length; i++) { + if ( + toolLogs[i - 1].toolRequest.name === UPDATE_TOPIC_TOOL_NAME && + toolLogs[i].toolRequest.name === UPDATE_TOPIC_TOOL_NAME + ) { + throw new Error( + `Detected back-to-back ${UPDATE_TOPIC_TOOL_NAME} calls at index ${i - 1} and ${i}`, + ); + } + } + }, + }); +}); diff --git a/integration-tests/api-resilience.responses b/integration-tests/api-resilience.responses index d30d29906e..d0520047f7 100644 --- a/integration-tests/api-resilience.responses +++ b/integration-tests/api-resilience.responses @@ -1 +1 @@ -{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Part 1. "}],"role":"model"},"index":0}]},{"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":10,"totalTokenCount":110}},{"candidates":[{"content":{"parts":[{"text":"Part 2."}],"role":"model"},"index":0}],"finishReason":"STOP"}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Part 1. "}],"role":"model"},"index":0}]},{"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":10,"totalTokenCount":110}},{"candidates":[{"content":{"parts":[{"text":"Part 2."}],"role":"model"},"index":0,"finishReason":"STOP"}]}]} diff --git a/integration-tests/browser-agent-localhost.dynamic.responses b/integration-tests/browser-agent-localhost.dynamic.responses new file mode 100644 index 0000000000..bade94af88 --- /dev/null +++ b/integration-tests/browser-agent-localhost.dynamic.responses @@ -0,0 +1,6 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll check the dynamic content page on the localhost server."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to http://127.0.0.1:18923/dynamic.html, wait for the dynamic content to load, then capture the accessibility tree and report what content appeared"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"http://127.0.0.1:18923/dynamic.html"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"wait_for","args":{"selector":"#dynamic-content","state":"visible","timeout":5000}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":25,"totalTokenCount":175}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":180,"candidatesTokenCount":15,"totalTokenCount":195}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Dynamic content loaded successfully. Found heading 'Content Loaded', message 'This content was loaded dynamically via JavaScript.', and a list with items: Item Alpha, Item Beta, Item Gamma."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":40,"totalTokenCount":240}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The dynamic content page loaded successfully:\n\n- **Heading**: Content Loaded\n- **Message**: This content was loaded dynamically via JavaScript\n- **Items**: Item Alpha, Item Beta, Item Gamma\n\nThe JavaScript-rendered content appeared after the initial page load, replacing the 'Loading...' indicator."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":60,"totalTokenCount":260}}]} diff --git a/integration-tests/browser-agent-localhost.form.responses b/integration-tests/browser-agent-localhost.form.responses new file mode 100644 index 0000000000..119d1ff46f --- /dev/null +++ b/integration-tests/browser-agent-localhost.form.responses @@ -0,0 +1,9 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll fill out the contact form on the localhost server."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to http://127.0.0.1:18923/form.html, fill in the name field with 'Test User', the email field with 'test@example.com', the message field with 'Hello World', and submit the form"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"http://127.0.0.1:18923/form.html"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"fill","args":{"selector":"#name","value":"Test User"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":25,"totalTokenCount":175}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"fill","args":{"selector":"#email","value":"test@example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":180,"candidatesTokenCount":25,"totalTokenCount":205}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"fill","args":{"selector":"#message","value":"Hello World"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":210,"candidatesTokenCount":25,"totalTokenCount":235}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"click","args":{"selector":"#submit-btn"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":240,"candidatesTokenCount":20,"totalTokenCount":260}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":270,"candidatesTokenCount":15,"totalTokenCount":285}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Successfully filled and submitted the contact form. Navigated to form-result.html which shows 'Form Submitted Successfully'."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":300,"candidatesTokenCount":40,"totalTokenCount":340}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've successfully filled out and submitted the contact form:\n\n- **Name**: Test User\n- **Email**: test@example.com\n- **Message**: Hello World\n\nAfter submission, the page navigated to the result page showing 'Form Submitted Successfully'."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":300,"candidatesTokenCount":60,"totalTokenCount":360}}]} diff --git a/integration-tests/browser-agent-localhost.multistep.responses b/integration-tests/browser-agent-localhost.multistep.responses new file mode 100644 index 0000000000..37fc8d438c --- /dev/null +++ b/integration-tests/browser-agent-localhost.multistep.responses @@ -0,0 +1,9 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll go through the multi-step flow on the localhost server."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to http://127.0.0.1:18923/multi-step/step1.html, fill in 'testuser' as the username, click Next, then on step 2 select 'Option B' and click Finish. Report the final result page content."}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"http://127.0.0.1:18923/multi-step/step1.html"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"fill","args":{"selector":"#username","value":"testuser"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":25,"totalTokenCount":175}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"click","args":{"selector":"#next-btn"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":180,"candidatesTokenCount":20,"totalTokenCount":200}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":210,"candidatesTokenCount":15,"totalTokenCount":225}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"click","args":{"selector":"#finish-btn"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":240,"candidatesTokenCount":20,"totalTokenCount":260}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":270,"candidatesTokenCount":15,"totalTokenCount":285}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Completed all steps. Step 1: entered username 'testuser'. Step 2: selected default option. Final result page shows 'Multi-Step Complete' with 'โœ“ Complete' status badge."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":300,"candidatesTokenCount":40,"totalTokenCount":340}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've completed the multi-step flow:\n\n1. **Step 1**: Entered 'testuser' as username and clicked Next\n2. **Step 2**: Confirmed selection and clicked Finish\n3. **Result**: Final page shows 'Multi-Step Complete' with a 'โœ“ Complete' status badge\n\nAll steps were successfully navigated."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":300,"candidatesTokenCount":60,"totalTokenCount":360}}]} diff --git a/integration-tests/browser-agent-localhost.navigate.responses b/integration-tests/browser-agent-localhost.navigate.responses new file mode 100644 index 0000000000..676696bf6b --- /dev/null +++ b/integration-tests/browser-agent-localhost.navigate.responses @@ -0,0 +1,5 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll navigate to the localhost page and read its content using the browser agent."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to http://127.0.0.1:18923/index.html and tell me the page title and list all links on the page"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"http://127.0.0.1:18923/index.html"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":20,"totalTokenCount":170}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Page title is 'Test Fixture - Home'. Found 3 links: Contact Form (/form.html), Multi-Step Flow (/multi-step/step1.html), Dynamic Content (/dynamic.html)."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":40,"totalTokenCount":240}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The localhost test fixture page has:\n\n**Title**: Test Fixture - Home\n\n**Links**:\n1. Contact Form (form.html)\n2. Multi-Step Flow (multi-step/step1.html)\n3. Dynamic Content (dynamic.html)\n\nThe page also has a heading 'Test Fixture Home Page' and footer content."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":60,"totalTokenCount":260}}]} diff --git a/integration-tests/browser-agent-localhost.screenshot.responses b/integration-tests/browser-agent-localhost.screenshot.responses new file mode 100644 index 0000000000..762b560697 --- /dev/null +++ b/integration-tests/browser-agent-localhost.screenshot.responses @@ -0,0 +1,5 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll take a screenshot of the localhost test page."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to http://127.0.0.1:18923/index.html and take a screenshot of the page"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":35,"totalTokenCount":135}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"http://127.0.0.1:18923/index.html"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_screenshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":15,"totalTokenCount":165}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Screenshot captured of the localhost test fixture home page showing the heading, navigation links, and footer.","data":{"screenshotTaken":true}}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":40,"totalTokenCount":240}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've captured a screenshot of the localhost test fixture page. The screenshot shows the 'Test Fixture Home Page' heading with navigation links to the Contact Form, Multi-Step Flow, and Dynamic Content pages, along with the footer section."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":50,"totalTokenCount":250}}]} diff --git a/integration-tests/browser-agent-localhost.test.ts b/integration-tests/browser-agent-localhost.test.ts new file mode 100644 index 0000000000..2de37ba7a9 --- /dev/null +++ b/integration-tests/browser-agent-localhost.test.ts @@ -0,0 +1,161 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { TestRig, assertModelHasOutput } from './test-helper.js'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +describe('browser-agent-localhost', () => { + let rig: TestRig; + + const browserSettings = { + agents: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { + headless: true, + sessionMode: 'isolated' as const, + }, + }, + }; + + beforeEach(() => { + rig = new TestRig(); + }); + + afterEach(async () => { + await rig.cleanup(); + }); + + it('should navigate to localhost fixture and read page content', async () => { + rig.setup('localhost-navigate', { + fakeResponsesPath: join( + __dirname, + 'browser-agent-localhost.navigate.responses', + ), + settings: browserSettings, + }); + + const result = await rig.run({ + args: 'Navigate to http://127.0.0.1:18923/index.html and tell me the page title and list all links.', + }); + + assertModelHasOutput(result); + + const toolLogs = rig.readToolLogs(); + const browserAgentCall = toolLogs.find( + (t) => t.toolRequest.name === 'browser_agent', + ); + expect( + browserAgentCall, + 'Expected browser_agent to be called', + ).toBeDefined(); + }); + + it('should fill out and submit a form on localhost', async () => { + rig.setup('localhost-form', { + fakeResponsesPath: join( + __dirname, + 'browser-agent-localhost.form.responses', + ), + settings: browserSettings, + }); + + const result = await rig.run({ + args: "Navigate to http://127.0.0.1:18923/form.html, fill in name='Test User', email='test@example.com', message='Hello World', and submit the form.", + }); + + assertModelHasOutput(result); + + const toolLogs = rig.readToolLogs(); + const browserAgentCall = toolLogs.find( + (t) => t.toolRequest.name === 'browser_agent', + ); + expect( + browserAgentCall, + 'Expected browser_agent to be called', + ).toBeDefined(); + }); + + it('should navigate through a multi-step flow', async () => { + rig.setup('localhost-multistep', { + fakeResponsesPath: join( + __dirname, + 'browser-agent-localhost.multistep.responses', + ), + settings: browserSettings, + }); + + const result = await rig.run({ + args: "Go to http://127.0.0.1:18923/multi-step/step1.html, fill in 'testuser' as username, click Next, then click Finish on step 2. Report the result.", + }); + + assertModelHasOutput(result); + + const toolLogs = rig.readToolLogs(); + const browserAgentCall = toolLogs.find( + (t) => t.toolRequest.name === 'browser_agent', + ); + expect( + browserAgentCall, + 'Expected browser_agent to be called', + ).toBeDefined(); + }); + + it('should handle dynamically loaded content', async () => { + rig.setup('localhost-dynamic', { + fakeResponsesPath: join( + __dirname, + 'browser-agent-localhost.dynamic.responses', + ), + settings: browserSettings, + }); + + const result = await rig.run({ + args: 'Navigate to http://127.0.0.1:18923/dynamic.html, wait for content to load, and tell me what items appear.', + }); + + assertModelHasOutput(result); + + const toolLogs = rig.readToolLogs(); + const browserAgentCall = toolLogs.find( + (t) => t.toolRequest.name === 'browser_agent', + ); + expect( + browserAgentCall, + 'Expected browser_agent to be called', + ).toBeDefined(); + }); + + it('should take a screenshot of localhost page', async () => { + rig.setup('localhost-screenshot', { + fakeResponsesPath: join( + __dirname, + 'browser-agent-localhost.screenshot.responses', + ), + settings: browserSettings, + }); + + const result = await rig.run({ + args: 'Navigate to http://127.0.0.1:18923/index.html and take a screenshot.', + }); + + assertModelHasOutput(result); + + const toolLogs = rig.readToolLogs(); + const browserCalls = toolLogs.filter( + (t) => t.toolRequest.name === 'browser_agent', + ); + expect(browserCalls.length).toBeGreaterThan(0); + }); +}); diff --git a/integration-tests/browser-agent.cleanup.responses b/integration-tests/browser-agent.cleanup.responses index 9cf7a7b356..e99c757793 100644 --- a/integration-tests/browser-agent.cleanup.responses +++ b/integration-tests/browser-agent.cleanup.responses @@ -1,4 +1,5 @@ {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll open https://example.com and check the page title for you."},{"functionCall":{"name":"browser_agent","args":{"task":"Open https://example.com and get the page title"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":35,"totalTokenCount":135}}]} -{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I have opened the page and the title is 'Example Domain'."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]} -{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The task is complete. The page title is 'Example Domain'."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":300,"candidatesTokenCount":20,"totalTokenCount":320}}]} -{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Done."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":400,"candidatesTokenCount":5,"totalTokenCount":405}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"https://example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":20,"totalTokenCount":170}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"The page title is 'Example Domain'."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I have opened the page and the title is 'Example Domain'. The browser session has been cleaned up successfully."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]} diff --git a/integration-tests/browser-agent.interaction.responses b/integration-tests/browser-agent.interaction.responses index 98474d6b59..0b4a1d84f7 100644 --- a/integration-tests/browser-agent.interaction.responses +++ b/integration-tests/browser-agent.interaction.responses @@ -1,2 +1,5 @@ {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll navigate to https://example.com and analyze the links on the page."},{"functionCall":{"name":"browser_agent","args":{"task":"Go to https://example.com and find all links on the page, then describe them"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"https://example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":20,"totalTokenCount":170}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Found one link on https://example.com: 'More information...' linking to the IANA website for details about reserved domains."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":40,"totalTokenCount":240}}]} {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"After analyzing https://example.com, I found the following links:\n\n1. **\"More information...\"** - This is the main link on the page that points to the IANA (Internet Assigned Numbers Authority) website for more details about reserved domains.\n\nThe page is quite minimal with just this single informational link, which is typical for example domains used in documentation."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":70,"totalTokenCount":270}}]} diff --git a/integration-tests/browser-agent.navigate-snapshot.responses b/integration-tests/browser-agent.navigate-snapshot.responses index 481520234d..e9c9490a21 100644 --- a/integration-tests/browser-agent.navigate-snapshot.responses +++ b/integration-tests/browser-agent.navigate-snapshot.responses @@ -1,2 +1,5 @@ {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll help you open https://example.com and analyze the page. Let me use the browser agent to navigate and capture the page information."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and capture the accessibility tree to get the page title and main content"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"https://example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":30,"totalTokenCount":130}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":20,"totalTokenCount":170}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Successfully navigated to https://example.com. The page title is 'Example Domain' and the main content states: 'This domain is for use in illustrative examples in documents.'"}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":40,"totalTokenCount":240}}]} {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Based on the browser analysis of https://example.com:\n\n**Page Title**: Example Domain\n\n**Main Content**: The page contains a simple heading \"Example Domain\" and explanatory text stating \"This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.\"\n\nThe page has a clean, minimal layout typical of placeholder domains used for documentation and examples."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":80,"totalTokenCount":280}}]} diff --git a/integration-tests/browser-agent.persistent-session.responses b/integration-tests/browser-agent.persistent-session.responses new file mode 100644 index 0000000000..ee224858f1 --- /dev/null +++ b/integration-tests/browser-agent.persistent-session.responses @@ -0,0 +1,8 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll browse to example.com twice to verify the content. Let me first check the page title, then check the links on the page."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and tell me the page title using the accessibility tree"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":50,"totalTokenCount":150}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"https://example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":30,"totalTokenCount":130}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":20,"totalTokenCount":170}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Page title is 'Example Domain'."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":40,"totalTokenCount":240}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The page title is 'Example Domain'. Now let me check the links on the page."},{"functionCall":{"name":"browser_agent","args":{"task":"Take a snapshot of the accessibility tree on the currently open page and tell me about any links"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":50,"totalTokenCount":250}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":20,"totalTokenCount":170}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Found a link 'More information...' pointing to iana.org."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":40,"totalTokenCount":240}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I browsed example.com twice using persistent browser sessions:\n\n1. **First visit**: Page title is 'Example Domain'\n2. **Second visit**: Found a link 'More information...' pointing to iana.org\n\nThe browser stayed open between both visits, confirming persistent session management works correctly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":300,"candidatesTokenCount":60,"totalTokenCount":360}}]} diff --git a/integration-tests/browser-agent.screenshot.responses b/integration-tests/browser-agent.screenshot.responses index c9abaa4268..84467f517a 100644 --- a/integration-tests/browser-agent.screenshot.responses +++ b/integration-tests/browser-agent.screenshot.responses @@ -1,2 +1,5 @@ {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll navigate to https://example.com and take a screenshot for you."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com and take a screenshot of the page"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":40,"totalTokenCount":140}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"https://example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_screenshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":20,"totalTokenCount":170}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Successfully navigated to https://example.com and captured a screenshot. The page shows the Example Domain header with explanatory text."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":40,"totalTokenCount":240}}]} {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've successfully navigated to https://example.com and captured a screenshot. The page shows the familiar \"Example Domain\" header with explanatory text below it. The screenshot captures the clean, minimal layout of this demonstration website."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":50,"totalTokenCount":250}}]} diff --git a/integration-tests/browser-agent.sequential.responses b/integration-tests/browser-agent.sequential.responses index 74732998aa..6dc1e96268 100644 --- a/integration-tests/browser-agent.sequential.responses +++ b/integration-tests/browser-agent.sequential.responses @@ -1,2 +1,6 @@ {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll help you navigate to https://example.com and perform both operations - capturing the accessibility tree and taking a screenshot."},{"functionCall":{"name":"browser_agent","args":{"task":"Navigate to https://example.com, take a snapshot of the accessibility tree, then take a screenshot"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":45,"totalTokenCount":145}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"navigate_page","args":{"url":"https://example.com"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":20,"totalTokenCount":120}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_snapshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":150,"candidatesTokenCount":20,"totalTokenCount":170}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"take_screenshot","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":20,"totalTokenCount":220}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"complete_task","args":{"result":{"success":true,"summary":"Successfully navigated to https://example.com, captured accessibility tree snapshot and took a screenshot. Page title is 'Example Domain'."}}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":250,"candidatesTokenCount":40,"totalTokenCount":290}}]} {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I've successfully completed both operations on https://example.com:\n\n**Accessibility Tree Snapshot**: The page has a clear structure with the main heading \"Example Domain\" and descriptive text about the domain's purpose for documentation examples.\n\n**Screenshot**: Captured a visual representation of the page showing the clean, minimal layout with the heading and explanatory text.\n\nBoth the accessibility data and visual screenshot confirm this is the standard example domain page used for documentation purposes."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":80,"totalTokenCount":280}}]} diff --git a/integration-tests/browser-agent.test.ts b/integration-tests/browser-agent.test.ts index f9f07d4c9e..09e20bcb26 100644 --- a/integration-tests/browser-agent.test.ts +++ b/integration-tests/browser-agent.test.ts @@ -77,7 +77,12 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { ), settings: { agents: { - browser_agent: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { headless: true, sessionMode: 'isolated', }, @@ -106,7 +111,12 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { fakeResponsesPath: join(__dirname, 'browser-agent.screenshot.responses'), settings: { agents: { - browser_agent: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { headless: true, sessionMode: 'isolated', }, @@ -132,7 +142,12 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { fakeResponsesPath: join(__dirname, 'browser-agent.interaction.responses'), settings: { agents: { - browser_agent: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { headless: true, sessionMode: 'isolated', }, @@ -161,7 +176,12 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { fakeResponsesPath: join(__dirname, 'browser-agent.cleanup.responses'), settings: { agents: { - browser_agent: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { headless: true, sessionMode: 'isolated', }, @@ -182,7 +202,12 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { fakeResponsesPath: join(__dirname, 'browser-agent.sequential.responses'), settings: { agents: { - browser_agent: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { headless: true, sessionMode: 'isolated', }, @@ -204,6 +229,51 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { assertModelHasOutput(result); }); + it('should keep browser open across multiple browser_agent invocations', async () => { + rig.setup('browser-persistent-session', { + fakeResponsesPath: join( + __dirname, + 'browser-agent.persistent-session.responses', + ), + settings: { + agents: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { + headless: true, + sessionMode: 'isolated', + }, + }, + }, + }); + + const result = await rig.run({ + args: 'Browse to example.com twice: first get the page title, then check for links.', + }); + + const toolLogs = rig.readToolLogs(); + const browserCalls = toolLogs.filter( + (t) => t.toolRequest.name === 'browser_agent', + ); + + // Both browser_agent invocations must succeed โ€” if the browser was + // incorrectly closed after the first call (regression #24210), + // the second call would fail. + expect( + browserCalls.length, + 'Expected browser_agent to be called twice', + ).toBe(2); + expect( + browserCalls.every((c) => c.toolRequest.success), + 'Both browser_agent calls should succeed', + ).toBe(true); + + assertModelHasOutput(result); + }); + it('should handle tool confirmation for write_file without crashing', async () => { rig.setup('tool-confirmation', { fakeResponsesPath: join( @@ -212,7 +282,12 @@ describe.skipIf(!chromeAvailable)('browser-agent', () => { ), settings: { agents: { - browser_agent: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { headless: true, sessionMode: 'isolated', }, diff --git a/integration-tests/browser-policy.test.ts b/integration-tests/browser-policy.test.ts index f533cb3f5e..4fbfc5db01 100644 --- a/integration-tests/browser-policy.test.ts +++ b/integration-tests/browser-policy.test.ts @@ -10,8 +10,13 @@ import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { execSync } from 'node:child_process'; import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs'; +import { env } from 'node:process'; import stripAnsi from 'strip-ansi'; +// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox. +// See: https://github.com/google-gemini/gemini-cli/issues/24382 +const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker'; + const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); @@ -59,122 +64,146 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => { await rig.cleanup(); }); - it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => { - rig.setup('browser-policy-skip-confirmation', { - fakeResponsesPath: join(__dirname, 'browser-policy.responses'), - settings: { - agents: { - overrides: { - browser_agent: { - enabled: true, + it.skipIf(isDockerSandbox)( + 'should skip confirmation when "Allow all server tools for this session" is chosen', + async () => { + rig.setup('browser-policy-skip-confirmation', { + fakeResponsesPath: join(__dirname, 'browser-policy.responses'), + settings: { + agents: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { + headless: true, + sessionMode: 'isolated', + allowedDomains: ['example.com'], }, }, - browser: { - headless: true, - sessionMode: 'isolated', - allowedDomains: ['example.com'], - }, }, - }, - }); + }); - // Manually trust the folder to avoid the dialog and enable option 3 - const geminiDir = join(rig.homeDir!, '.gemini'); - mkdirSync(geminiDir, { recursive: true }); + // Manually trust the folder to avoid the dialog and enable option 3 + const geminiDir = join(rig.homeDir!, '.gemini'); + mkdirSync(geminiDir, { recursive: true }); - // Write to trustedFolders.json - const trustedFoldersPath = join(geminiDir, 'trustedFolders.json'); - const trustedFolders = { - [rig.testDir!]: 'TRUST_FOLDER', - }; - writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2)); + // Write to trustedFolders.json + const trustedFoldersPath = join(geminiDir, 'trustedFolders.json'); + const trustedFolders = { + [rig.testDir!]: 'TRUST_FOLDER', + }; + writeFileSync( + trustedFoldersPath, + JSON.stringify(trustedFolders, null, 2), + ); - // Force confirmation for browser agent. - // NOTE: We don't force confirm browser tools here because "Allow all server tools" - // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by - // a rule in the user tier (4.x) like the one from this TOML. - // By removing the explicit mcp rule, the first MCP tool will still prompt - // due to default approvalMode = 'default', and then "Allow all" will correctly - // bypass subsequent tools. - const policyFile = join(rig.testDir!, 'force-confirm.toml'); - writeFileSync( - policyFile, - ` + // Force confirmation for browser agent. + // NOTE: We don't force confirm browser tools here because "Allow all server tools" + // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by + // a rule in the user tier (4.x) like the one from this TOML. + // By removing the explicit mcp rule, the first MCP tool will still prompt + // due to default approvalMode = 'default', and then "Allow all" will correctly + // bypass subsequent tools. + const policyFile = join(rig.testDir!, 'force-confirm.toml'); + writeFileSync( + policyFile, + ` [[rule]] name = "Force confirm browser_agent" toolName = "browser_agent" decision = "ask_user" priority = 200 `, - ); + ); - // Update settings.json in both project and home directories to point to the policy file - for (const baseDir of [rig.testDir!, rig.homeDir!]) { - const settingsPath = join(baseDir, '.gemini', 'settings.json'); - if (existsSync(settingsPath)) { - const settings = JSON.parse(readFileSync(settingsPath, 'utf-8')); - settings.policyPaths = [policyFile]; - // Ensure folder trust is enabled - settings.security = settings.security || {}; - settings.security.folderTrust = settings.security.folderTrust || {}; - settings.security.folderTrust.enabled = true; - writeFileSync(settingsPath, JSON.stringify(settings, null, 2)); + // Update settings.json in both project and home directories to point to the policy file + for (const baseDir of [rig.testDir!, rig.homeDir!]) { + const settingsPath = join(baseDir, '.gemini', 'settings.json'); + if (existsSync(settingsPath)) { + const settings = JSON.parse(readFileSync(settingsPath, 'utf-8')); + settings.policyPaths = [policyFile]; + // Ensure folder trust is enabled + settings.security = settings.security || {}; + settings.security.folderTrust = settings.security.folderTrust || {}; + settings.security.folderTrust.enabled = true; + writeFileSync(settingsPath, JSON.stringify(settings, null, 2)); + } } - } - const run = await rig.runInteractive({ - approvalMode: 'default', - env: { - GEMINI_CLI_INTEGRATION_TEST: 'true', - }, - }); + const run = await rig.runInteractive({ + approvalMode: 'default', + env: { + GEMINI_CLI_INTEGRATION_TEST: 'true', + }, + }); - await run.sendKeys( - 'Open https://example.com and check if there is a heading\r', - ); - await run.sendKeys('\r'); + await run.sendKeys( + 'Open https://example.com and check if there is a heading\r', + ); + await run.sendKeys('\r'); - // Handle confirmations. - // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once) - await poll( - () => stripAnsi(run.output).toLowerCase().includes('action required'), - 60000, - 1000, - ); - await run.sendKeys('1\r'); - await new Promise((r) => setTimeout(r, 2000)); + // Handle confirmations. + // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once) + await poll( + () => stripAnsi(run.output).toLowerCase().includes('action required'), + 60000, + 1000, + ); + await run.sendKeys('1\r'); + await new Promise((r) => setTimeout(r, 2000)); - // Handle privacy notice - await poll( - () => stripAnsi(run.output).toLowerCase().includes('privacy notice'), - 5000, - 100, - ); - await run.sendKeys('1\r'); - await new Promise((r) => setTimeout(r, 5000)); + // Handle privacy notice + await poll( + () => stripAnsi(run.output).toLowerCase().includes('privacy notice'), + 5000, + 100, + ); + await run.sendKeys('1\r'); + await new Promise((r) => setTimeout(r, 5000)); - // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools) - await poll( - () => { - const stripped = stripAnsi(run.output).toLowerCase(); - return ( - stripped.includes('new_page') && - stripped.includes('allow all server tools for this session') - ); - }, - 60000, - 1000, - ); + // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools) + await poll( + () => { + const stripped = stripAnsi(run.output).toLowerCase(); + return ( + stripped.includes('new_page') && + stripped.includes('allow all server tools for this session') + ); + }, + 60000, + 1000, + ); - // Select "Allow all server tools for this session" (option 3) - await run.sendKeys('3\r'); - await new Promise((r) => setTimeout(r, 30000)); + // Select "Allow all server tools for this session" (option 3) + await run.sendKeys('3\r'); - const output = stripAnsi(run.output).toLowerCase(); + // Wait for the browser agent to finish (success or failure) + await poll( + () => { + const stripped = stripAnsi(run.output).toLowerCase(); + return ( + stripped.includes('completed successfully') || + stripped.includes('agent error') + ); + }, + 120000, + 1000, + ); - expect(output).toContain('browser_agent'); - expect(output).toContain('completed successfully'); - }); + const output = stripAnsi(run.output).toLowerCase(); + + expect(output).toContain('browser_agent'); + // The test validates that "Allow all server tools" skips subsequent + // tool confirmations โ€” the browser agent may still fail due to + // Chrome/MCP issues in CI, which is acceptable for this policy test. + expect( + output.includes('completed successfully') || + output.includes('agent error'), + ).toBe(true); + }, + ); it('should show the visible warning when browser agent starts in existing session mode', async () => { rig.setup('browser-session-warning', { diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts index 64481068c2..80552cfd68 100644 --- a/integration-tests/file-system.test.ts +++ b/integration-tests/file-system.test.ts @@ -121,6 +121,7 @@ describe('file-system', () => { const result = await rig.run({ args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`, + timeout: 600000, // 10 min โ€” real LLM can be slow in Docker sandbox }); const foundToolCall = await rig.waitForToolCall('write_file'); diff --git a/integration-tests/globalSetup.ts b/integration-tests/globalSetup.ts index 5f963f7459..9dad51f9b3 100644 --- a/integration-tests/globalSetup.ts +++ b/integration-tests/globalSetup.ts @@ -9,16 +9,80 @@ if (process.env['NO_COLOR'] !== undefined) { delete process.env['NO_COLOR']; } -import { mkdir, readdir, rm } from 'node:fs/promises'; -import { join, dirname } from 'node:path'; +import { mkdir, readdir, rm, readFile } from 'node:fs/promises'; +import { join, dirname, extname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js'; import { disableMouseTracking } from '@google/gemini-cli-core'; +import { createServer, type Server } from 'node:http'; const __dirname = dirname(fileURLToPath(import.meta.url)); const rootDir = join(__dirname, '..'); const integrationTestsDir = join(rootDir, '.integration-tests'); let runDir = ''; // Make runDir accessible in teardown +let fixtureServer: Server | undefined; + +const FIXTURE_PORT = 18923; +const FIXTURE_DIR = join(__dirname, 'test-fixtures'); + +const MIME_TYPES: Record = { + '.html': 'text/html', + '.css': 'text/css', + '.js': 'application/javascript', + '.json': 'application/json', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.svg': 'image/svg+xml', +}; + +async function startFixtureServer(): Promise { + return new Promise((resolve, reject) => { + const server = createServer(async (req, res) => { + const urlPath = req.url?.split('?')[0] || '/'; + const relativePath = urlPath === '/' ? 'index.html' : urlPath; + const filePath = join(FIXTURE_DIR, relativePath); + + if (!filePath.startsWith(FIXTURE_DIR)) { + res.writeHead(403, { 'Content-Type': 'text/html' }); + res.end('

403 Forbidden

'); + return; + } + + try { + const content = await readFile(filePath); + const ext = extname(filePath); + res.writeHead(200, { + 'Content-Type': MIME_TYPES[ext] || 'application/octet-stream', + }); + res.end(content); + } catch { + res.writeHead(404, { 'Content-Type': 'text/html' }); + res.end('

404 Not Found

'); + } + }); + + server.on('error', (err: NodeJS.ErrnoException) => { + if (err.code === 'EADDRINUSE') { + console.warn( + `Port ${FIXTURE_PORT} in use, trying ${FIXTURE_PORT + 1}...`, + ); + server.listen(FIXTURE_PORT + 1, '127.0.0.1'); + } else { + reject(err); + } + }); + + server.on('listening', () => { + const addr = server.address(); + const port = typeof addr === 'object' && addr ? addr.port : FIXTURE_PORT; + fixtureServer = server; + console.log(`Test fixture server listening on http://127.0.0.1:${port}`); + resolve(port); + }); + + server.listen(FIXTURE_PORT, '127.0.0.1'); + }); +} export async function setup() { runDir = join(integrationTestsDir, `${Date.now()}`); @@ -40,6 +104,10 @@ export async function setup() { throw new Error('Failed to download ripgrep binary'); } + // Start the test fixture server + const port = await startFixtureServer(); + process.env['TEST_FIXTURE_PORT'] = String(port); + // Clean up old test runs, but keep the latest few for debugging try { const testRuns = await readdir(integrationTestsDir); @@ -73,6 +141,14 @@ export async function setup() { } export async function teardown() { + // Stop the fixture server + if (fixtureServer) { + await new Promise((resolve) => { + fixtureServer!.close(() => resolve()); + }); + fixtureServer = undefined; + } + // Disable mouse tracking if (process.stdout.isTTY) { disableMouseTracking(); diff --git a/integration-tests/plan-mode.test.ts b/integration-tests/plan-mode.test.ts index d8d297c460..94ed65f1fe 100644 --- a/integration-tests/plan-mode.test.ts +++ b/integration-tests/plan-mode.test.ts @@ -23,7 +23,9 @@ describe('Plan Mode', () => { 'should allow read-only tools but deny write tools in plan mode', { settings: { - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, tools: { core: [ 'run_shell_command', @@ -67,15 +69,12 @@ describe('Plan Mode', () => { await rig.setup(testName, { settings: { - experimental: { plan: true }, tools: { core: ['write_file', 'read_file', 'list_directory'], }, general: { + plan: { enabled: true, directory: plansDir }, defaultApprovalMode: 'plan', - plan: { - directory: plansDir, - }, }, }, }); @@ -120,22 +119,19 @@ describe('Plan Mode', () => { await rig.setup(testName, { settings: { - experimental: { plan: true }, tools: { core: ['write_file', 'read_file', 'list_directory'], }, general: { + plan: { enabled: true, directory: plansDir }, defaultApprovalMode: 'plan', - plan: { - directory: plansDir, - }, }, }, }); await rig.run({ approvalMode: 'plan', - args: 'Create a file called hello.txt in the current directory.', + args: 'Attempt to create a file named "hello.txt" in the current directory. Do not create a plan file, try to write hello.txt directly.', }); const toolLogs = rig.readToolLogs(); @@ -156,7 +152,9 @@ describe('Plan Mode', () => { it('should be able to enter plan mode from default mode', async () => { await rig.setup('should be able to enter plan mode from default mode', { settings: { - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, tools: { core: ['enter_plan_mode'], allowed: ['enter_plan_mode'], @@ -184,15 +182,12 @@ describe('Plan Mode', () => { await rig.setup(testName, { settings: { - experimental: { plan: true }, tools: { core: ['write_file', 'read_file', 'list_directory'], }, general: { + plan: { enabled: true, directory: plansDir }, defaultApprovalMode: 'plan', - plan: { - directory: plansDir, - }, }, }, }); diff --git a/integration-tests/shell-background.responses b/integration-tests/shell-background.responses new file mode 100644 index 0000000000..652b82a8e0 --- /dev/null +++ b/integration-tests/shell-background.responses @@ -0,0 +1,5 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I will run the command in the background for you."},{"functionCall":{"name":"run_shell_command","args":{"command":"sleep 10 && echo hello-from-background","is_background":true}}}],"role":"model"},"finishReason":"STOP","index":0}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The background process has been started. Now I will list the background processes to verify."},{"functionCall":{"name":"list_background_processes","args":{}}}],"role":"model"},"finishReason":"STOP","index":0}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I see the background process 'sleep 10 && echo hello-from-background' is running. Would you like me to read its output?"}],"role":"model"},"finishReason":"STOP","index":0}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I will read the output for you."},{"functionCall":{"name":"read_background_output","args":{"pid":12345}}}],"role":"model"},"finishReason":"STOP","index":0}]}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The output of the background process is:\nhello-from-background"}],"role":"model"},"finishReason":"STOP","index":0}]}]} diff --git a/integration-tests/shell-background.test.ts b/integration-tests/shell-background.test.ts new file mode 100644 index 0000000000..f28120e7e4 --- /dev/null +++ b/integration-tests/shell-background.test.ts @@ -0,0 +1,105 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, beforeEach, afterEach } from 'vitest'; +import { TestRig } from './test-helper.js'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +describe('shell-background-tools', () => { + let rig: TestRig; + + beforeEach(() => { + rig = new TestRig(); + }); + + afterEach(async () => await rig.cleanup()); + + it('should run a command in the background, list it, and read its output', async () => { + // We use a fake responses file to make the test deterministic and run in CI. + rig.setup('shell-background-workflow', { + fakeResponsesPath: join(__dirname, 'shell-background.responses'), + settings: { + tools: { + core: [ + 'run_shell_command', + 'list_background_processes', + 'read_background_output', + ], + }, + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ + { + matcher: 'run_shell_command', + hooks: [ + { + type: 'command', + // This hook intercepts run_shell_command. + // If is_background is true, it returns a mock result with PID 12345. + // It also creates the mock log file that read_background_output expects. + command: `node -e " + const fs = require('fs'); + const path = require('path'); + const input = JSON.parse(fs.readFileSync(0, 'utf-8')); + const args = JSON.parse(input.tool_call.args); + + if (args.is_background) { + const logDir = path.join(process.env.GEMINI_CLI_HOME, 'background-processes'); + if (!fs.existsSync(logDir)) fs.mkdirSync(logDir, { recursive: true }); + fs.writeFileSync(path.join(logDir, 'background-12345.log'), 'hello-from-background\\n'); + + console.log(JSON.stringify({ + decision: 'replace', + hookSpecificOutput: { + result: { + llmContent: 'Command moved to background (PID: 12345). Output hidden. Press Ctrl+B to view.', + data: { pid: 12345, command: args.command } + } + } + })); + } else { + console.log(JSON.stringify({ decision: 'allow' })); + } + "`, + }, + ], + }, + ], + }, + }, + }); + + const run = await rig.runInteractive({ approvalMode: 'yolo' }); + + // 1. Start a background process + // We use a command that stays alive for a bit to ensure it shows up in lists + await run.type( + "Run 'sleep 10 && echo hello-from-background' in the background.", + ); + await run.type('\r'); + + // Wait for the model's canned response acknowledging the start + await run.expectText('background', 30000); + + // 2. List background processes + await run.type('List my background processes.'); + await run.type('\r'); + // Wait for the model's canned response showing the list + await run.expectText('hello-from-background', 30000); + + // 3. Read the output + await run.type('Read the output of that process.'); + await run.type('\r'); + // Wait for the model's canned response showing the output + await run.expectText('hello-from-background', 30000); + }, 60000); +}); diff --git a/integration-tests/test-fixtures/dynamic.html b/integration-tests/test-fixtures/dynamic.html new file mode 100644 index 0000000000..73a99b56e4 --- /dev/null +++ b/integration-tests/test-fixtures/dynamic.html @@ -0,0 +1,29 @@ + + + + + Test Fixture - Dynamic Content + + +

Dynamic Content Page

+
Loading...
+ + + + diff --git a/integration-tests/test-fixtures/form-result.html b/integration-tests/test-fixtures/form-result.html new file mode 100644 index 0000000000..182ed70128 --- /dev/null +++ b/integration-tests/test-fixtures/form-result.html @@ -0,0 +1,15 @@ + + + + + Test Fixture - Form Result + + +

Form Submitted Successfully

+

Thank you for your submission.

+
+

Your form data has been received.

+
+ Back to Home + + diff --git a/integration-tests/test-fixtures/form.html b/integration-tests/test-fixtures/form.html new file mode 100644 index 0000000000..848cbe47e8 --- /dev/null +++ b/integration-tests/test-fixtures/form.html @@ -0,0 +1,37 @@ + + + + + Test Fixture - Contact Form + + +

Contact Form

+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+ + diff --git a/integration-tests/test-fixtures/index.html b/integration-tests/test-fixtures/index.html new file mode 100644 index 0000000000..0298ab929d --- /dev/null +++ b/integration-tests/test-fixtures/index.html @@ -0,0 +1,27 @@ + + + + + Test Fixture - Home + + +

Test Fixture Home Page

+

+ This is a test fixture page for browser agent integration tests. +

+ +
+

Footer content for testing.

+
+ + diff --git a/integration-tests/test-fixtures/multi-step/result.html b/integration-tests/test-fixtures/multi-step/result.html new file mode 100644 index 0000000000..f2386215d5 --- /dev/null +++ b/integration-tests/test-fixtures/multi-step/result.html @@ -0,0 +1,15 @@ + + + + + Test Fixture - Result + + +

Multi-Step Complete

+

You have completed all steps successfully.

+
+ โœ“ Complete +
+ Back to Home + + diff --git a/integration-tests/test-fixtures/multi-step/step1.html b/integration-tests/test-fixtures/multi-step/step1.html new file mode 100644 index 0000000000..d6d620d4a0 --- /dev/null +++ b/integration-tests/test-fixtures/multi-step/step1.html @@ -0,0 +1,16 @@ + + + + + Test Fixture - Step 1 + + +

Step 1: Enter Your Details

+

Please provide your name to continue.

+
+ + + +
+ + diff --git a/integration-tests/test-fixtures/multi-step/step2.html b/integration-tests/test-fixtures/multi-step/step2.html new file mode 100644 index 0000000000..f0571a7a8e --- /dev/null +++ b/integration-tests/test-fixtures/multi-step/step2.html @@ -0,0 +1,22 @@ + + + + + Test Fixture - Step 2 + + +

Step 2: Confirm Your Selection

+

Choose your preference below.

+
+
+ + +
+ +
+ + diff --git a/package-lock.json b/package-lock.json index f3bf8fa616..2c8a4b64b8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,7 @@ "packages/*" ], "dependencies": { - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.7", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", @@ -92,46 +92,6 @@ "zod": "^3.25.0 || ^4.0.0" } }, - "node_modules/@alcalzone/ansi-tokenize": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@alcalzone/ansi-tokenize/-/ansi-tokenize-0.2.2.tgz", - "integrity": "sha512-mkOh+Wwawzuf5wa30bvc4nA+Qb6DIrGWgBhRR/Pw4T9nsgYait8izvXkNyU78D6Wcu3Z+KUdwCmLCxlWjEotYA==", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.2.1", - "is-fullwidth-code-point": "^5.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@alcalzone/ansi-tokenize/node_modules/ansi-styles": { - "version": "6.2.3", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", - "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/@alcalzone/ansi-tokenize/node_modules/is-fullwidth-code-point": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.1.0.tgz", - "integrity": "sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==", - "license": "MIT", - "dependencies": { - "get-east-asian-width": "^1.3.1" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/@ampproject/remapping": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", @@ -10089,14 +10049,13 @@ }, "node_modules/ink": { "name": "@jrichman/ink", - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.5.0.tgz", - "integrity": "sha512-S4g/ng7fPZmFwclO82iWkOce8vDLy/FIDgHIfkCWGOehqHe6dexHsmq3kNQD21okh198pA5SAQTCqNQJb/svRQ==", + "version": "6.6.7", + "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.7.tgz", + "integrity": "sha512-bDzQLpLzK/dn9Ur/Ku88ZZR9totVcMGrGYAgPHidsAAbe9NKztU1fggj/iu0wRp5g1kBeALb3cfagFGdDxAU1w==", "license": "MIT", "dependencies": { - "@alcalzone/ansi-tokenize": "^0.2.1", "ansi-escapes": "^7.0.0", - "ansi-styles": "^6.2.1", + "ansi-styles": "^6.2.3", "auto-bind": "^5.0.1", "chalk": "^5.6.0", "cli-boxes": "^3.0.0", @@ -10105,6 +10064,7 @@ "code-excerpt": "^4.0.0", "es-toolkit": "^1.39.10", "indent-string": "^5.0.0", + "is-fullwidth-code-point": "^5.0.0", "is-in-ci": "^2.0.0", "mnemonist": "^0.40.3", "patch-console": "^2.0.0", @@ -10174,9 +10134,9 @@ } }, "node_modules/ink/node_modules/ansi-styles": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", - "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", + "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", "license": "MIT", "engines": { "node": ">=12" @@ -10197,6 +10157,21 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/ink/node_modules/is-fullwidth-code-point": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.1.0.tgz", + "integrity": "sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==", + "license": "MIT", + "dependencies": { + "get-east-asian-width": "^1.3.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/ink/node_modules/is-in-ci": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/is-in-ci/-/is-in-ci-2.0.0.tgz", @@ -17551,7 +17526,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.7", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", diff --git a/package.json b/package.json index 8bb5f25e20..e24f6a20b5 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ }, "scripts": { "start": "cross-env NODE_ENV=development node scripts/start.js", + "start:prod": "cross-env NODE_ENV=production node scripts/start.js", "start:a2a-server": "CODER_AGENT_PORT=41242 npm run start --workspace @google/gemini-cli-a2a-server", "debug": "cross-env DEBUG=1 node --inspect-brk scripts/start.js", "deflake": "node scripts/deflake.js", @@ -38,7 +39,7 @@ "build:packages": "npm run build --workspaces", "build:sandbox": "node scripts/build_sandbox.js", "build:binary": "node scripts/build_binary.js", - "bundle": "npm run generate && npm run build --workspace=@google/gemini-cli-devtools && node esbuild.config.js && node scripts/copy_bundle_assets.js", + "bundle": "npm run generate && npm run build --workspace=@google/gemini-cli-devtools && npm run bundle:browser-mcp -w @google/gemini-cli-core && node esbuild.config.js && node scripts/copy_bundle_assets.js", "test": "npm run test --workspaces --if-present && npm run test:sea-launch", "test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts && npm run test:sea-launch", "test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts", @@ -68,7 +69,7 @@ "pre-commit": "node scripts/pre-commit.js" }, "overrides": { - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.7", "wrap-ansi": "9.0.2", "cliui": { "wrap-ansi": "7.0.0" @@ -136,7 +137,7 @@ "yargs": "^17.7.2" }, "dependencies": { - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.7", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", diff --git a/packages/a2a-server/src/commands/restore.ts b/packages/a2a-server/src/commands/restore.ts index c7567a3b24..7a5205c66b 100644 --- a/packages/a2a-server/src/commands/restore.ts +++ b/packages/a2a-server/src/commands/restore.ts @@ -98,7 +98,7 @@ export class RestoreCommand implements Command { name: this.name, data: restoreResult, }; - } catch (_error) { + } catch { return { name: this.name, data: { @@ -142,7 +142,7 @@ export class ListCheckpointsCommand implements Command { content: JSON.stringify(checkpointInfoList), }, }; - } catch (_error) { + } catch { return { name: this.name, data: { diff --git a/packages/a2a-server/src/http/server.ts b/packages/a2a-server/src/http/server.ts index 1bfb29c081..c22be49331 100644 --- a/packages/a2a-server/src/http/server.ts +++ b/packages/a2a-server/src/http/server.ts @@ -1,4 +1,4 @@ -#!/usr/bin/env -S node --no-warnings=DEP0040 +#!/usr/bin/env node /** * @license diff --git a/packages/a2a-server/src/utils/testing_utils.ts b/packages/a2a-server/src/utils/testing_utils.ts index f7f1645f8c..4265805e09 100644 --- a/packages/a2a-server/src/utils/testing_utils.ts +++ b/packages/a2a-server/src/utils/testing_utils.ts @@ -109,12 +109,8 @@ export function createMockConfig( enableEnvironmentVariableRedaction: false, }, }), - isExperimentalAgentHistoryTruncationEnabled: vi.fn().mockReturnValue(false), - getExperimentalAgentHistoryTruncationThreshold: vi.fn().mockReturnValue(50), - getExperimentalAgentHistoryRetainedMessages: vi.fn().mockReturnValue(30), - isExperimentalAgentHistorySummarizationEnabled: vi - .fn() - .mockReturnValue(false), + isContextManagementEnabled: vi.fn().mockReturnValue(false), + getContextManagementConfig: vi.fn().mockReturnValue({ enabled: false }), ...overrides, } as unknown as Config; diff --git a/packages/cli/index.ts b/packages/cli/index.ts index 5444fe1b74..d94a2dd191 100644 --- a/packages/cli/index.ts +++ b/packages/cli/index.ts @@ -1,4 +1,4 @@ -#!/usr/bin/env -S node --no-warnings=DEP0040 +#!/usr/bin/env node /** * @license diff --git a/packages/cli/package.json b/packages/cli/package.json index 072f2b8a72..52ae182dca 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -49,7 +49,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.5.0", + "ink": "npm:@jrichman/ink@6.6.7", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", diff --git a/packages/cli/src/__snapshots__/nonInteractiveCliAgentSession.test.ts.snap b/packages/cli/src/__snapshots__/nonInteractiveCliAgentSession.test.ts.snap new file mode 100644 index 0000000000..92f396a59c --- /dev/null +++ b/packages/cli/src/__snapshots__/nonInteractiveCliAgentSession.test.ts.snap @@ -0,0 +1,35 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'loop detected' 1`] = ` +"{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} +{"type":"message","timestamp":"","role":"user","content":"Loop test"} +{"type":"error","timestamp":"","severity":"warning","message":"Loop detected, stopping execution"} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"models":{}}} +" +`; + +exports[`runNonInteractive > should emit appropriate error event in streaming JSON mode: 'max session turns' 1`] = ` +"{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} +{"type":"message","timestamp":"","role":"user","content":"Max turns test"} +{"type":"error","timestamp":"","severity":"error","message":"Maximum session turns exceeded"} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"models":{}}} +" +`; + +exports[`runNonInteractive > should emit appropriate events for streaming JSON output 1`] = ` +"{"type":"init","timestamp":"","session_id":"test-session-id","model":"test-model"} +{"type":"message","timestamp":"","role":"user","content":"Stream test"} +{"type":"message","timestamp":"","role":"assistant","content":"Thinking...","delta":true} +{"type":"tool_use","timestamp":"","tool_name":"testTool","tool_id":"tool-1","parameters":{"arg1":"value1"}} +{"type":"tool_result","timestamp":"","tool_id":"tool-1","status":"success","output":"Tool executed successfully"} +{"type":"message","timestamp":"","role":"assistant","content":"Final answer","delta":true} +{"type":"result","timestamp":"","status":"success","stats":{"total_tokens":0,"input_tokens":0,"output_tokens":0,"cached":0,"input":0,"duration_ms":,"tool_calls":0,"models":{}}} +" +`; + +exports[`runNonInteractive > should write a single newline between sequential text outputs from the model 1`] = ` +"Use mock tool +Use mock tool again +Finished. +" +`; diff --git a/packages/cli/src/acp/acpClient.test.ts b/packages/cli/src/acp/acpClient.test.ts index 14295954dd..470ff38351 100644 --- a/packages/cli/src/acp/acpClient.test.ts +++ b/packages/cli/src/acp/acpClient.test.ts @@ -27,6 +27,7 @@ import { type MessageBus, LlmRole, type GitService, + type ModelRouterService, processSingleFileContent, InvalidStreamError, } from '@google/gemini-cli-core'; @@ -102,17 +103,7 @@ vi.mock( ...actual, updatePolicy: vi.fn(), createPolicyUpdater: vi.fn(), - ReadManyFilesTool: vi.fn().mockImplementation(() => ({ - name: 'read_many_files', - kind: 'read', - build: vi.fn().mockReturnValue({ - getDescription: () => 'Read files', - toolLocations: () => [], - execute: vi.fn().mockResolvedValue({ - llmContent: ['--- file.txt ---\n\nFile content\n\n'], - }), - }), - })), + ReadManyFilesTool: vi.fn(), logToolCall: vi.fn(), LlmRole: { MAIN: 'main', @@ -421,6 +412,26 @@ describe('GeminiAgent', () => { ); }); + it('should include gemini-3.1-flash-lite when useGemini31FlashLite is true', async () => { + mockConfig.getHasAccessToPreviewModel = vi.fn().mockReturnValue(true); + mockConfig.getGemini31LaunchedSync = vi.fn().mockReturnValue(true); + mockConfig.getGemini31FlashLiteLaunchedSync = vi.fn().mockReturnValue(true); + + const response = await agent.newSession({ + cwd: '/tmp', + mcpServers: [], + }); + + expect(response.models?.availableModels).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + modelId: 'gemini-3.1-flash-lite-preview', + name: 'gemini-3.1-flash-lite-preview', + }), + ]), + ); + }); + it('should return modes with plan mode when plan is enabled', async () => { mockConfig.getContentGeneratorConfig = vi.fn().mockReturnValue({ apiKey: 'test-key', @@ -646,6 +657,7 @@ describe('Session', () => { sendMessageStream: vi.fn(), addHistory: vi.fn(), recordCompletedToolCalls: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), } as unknown as Mocked; mockTool = { kind: 'read', @@ -667,6 +679,9 @@ describe('Session', () => { mockConfig = { getModel: vi.fn().mockReturnValue('gemini-pro'), getActiveModel: vi.fn().mockReturnValue('gemini-pro'), + getModelRouterService: vi.fn().mockReturnValue({ + route: vi.fn().mockResolvedValue({ model: 'resolved-model' }), + }), getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry), getMcpServers: vi.fn(), getFileService: vi.fn().mockReturnValue({ @@ -713,10 +728,22 @@ describe('Session', () => { }, errors: [], } as unknown as LoadedSettings); + + (ReadManyFilesTool as unknown as Mock).mockImplementation(() => ({ + name: 'read_many_files', + kind: 'read', + build: vi.fn().mockReturnValue({ + getDescription: () => 'Read files', + toolLocations: () => [], + execute: vi.fn().mockResolvedValue({ + llmContent: ['--- file.txt ---\n\nFile content\n\n'], + }), + }), + })); }); afterEach(() => { - vi.clearAllMocks(); + vi.restoreAllMocks(); }); it('should send available commands', async () => { @@ -786,6 +813,42 @@ describe('Session', () => { expect(result).toMatchObject({ stopReason: 'end_turn' }); }); + it('should use model router to determine model', async () => { + const mockRouter = { + route: vi.fn().mockResolvedValue({ model: 'routed-model' }), + } as unknown as ModelRouterService; + mockConfig.getModelRouterService.mockReturnValue(mockRouter); + + const stream = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { + candidates: [{ content: { parts: [{ text: 'Hello' }] } }], + }, + }, + ]); + mockChat.sendMessageStream.mockResolvedValue(stream); + + await session.prompt({ + sessionId: 'session-1', + prompt: [{ type: 'text', text: 'Hi' }], + }); + + expect(mockRouter.route).toHaveBeenCalledWith( + expect.objectContaining({ + requestedModel: 'gemini-pro', + request: [{ text: 'Hi' }], + }), + ); + expect(mockChat.sendMessageStream).toHaveBeenCalledWith( + expect.objectContaining({ model: 'routed-model' }), + expect.any(Array), + expect.any(String), + expect.any(Object), + expect.any(String), + ); + }); + it('should handle prompt with empty response (InvalidStreamError)', async () => { mockChat.sendMessageStream.mockRejectedValue( new InvalidStreamError('Empty response', 'NO_RESPONSE_TEXT'), @@ -812,6 +875,32 @@ describe('Session', () => { expect(result).toMatchObject({ stopReason: 'end_turn' }); }); + it('should handle prompt with no finish reason (InvalidStreamError)', async () => { + mockChat.sendMessageStream.mockRejectedValue( + new InvalidStreamError('No finish reason', 'NO_FINISH_REASON'), + ); + + const result = await session.prompt({ + sessionId: 'session-1', + prompt: [{ type: 'text', text: 'Hi' }], + }); + + expect(mockChat.sendMessageStream).toHaveBeenCalled(); + expect(result).toMatchObject({ stopReason: 'end_turn' }); + }); + + it('should handle prompt with no finish reason (NO_FINISH_REASON anomaly)', async () => { + mockChat.sendMessageStream.mockRejectedValue({ type: 'NO_FINISH_REASON' }); + + const result = await session.prompt({ + sessionId: 'session-1', + prompt: [{ type: 'text', text: 'Hi' }], + }); + + expect(mockChat.sendMessageStream).toHaveBeenCalled(); + expect(result).toMatchObject({ stopReason: 'end_turn' }); + }); + it('should handle /memory command', async () => { const handleCommandSpy = vi .spyOn( diff --git a/packages/cli/src/acp/acpClient.ts b/packages/cli/src/acp/acpClient.ts index 6b76ffdc7a..e0a352e0d1 100644 --- a/packages/cli/src/acp/acpClient.ts +++ b/packages/cli/src/acp/acpClient.ts @@ -28,7 +28,7 @@ import { debugLogger, ReadManyFilesTool, REFERENCE_CONTENT_START, - resolveModel, + type RoutingContext, createWorkingStdio, startupProfiler, Kind, @@ -42,6 +42,7 @@ import { DEFAULT_GEMINI_FLASH_LITE_MODEL, PREVIEW_GEMINI_MODEL, PREVIEW_GEMINI_3_1_MODEL, + PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL, PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL, PREVIEW_GEMINI_FLASH_MODEL, DEFAULT_GEMINI_MODEL_AUTO, @@ -758,10 +759,15 @@ export class Session { const functionCalls: FunctionCall[] = []; try { - const model = resolveModel( - this.context.config.getModel(), - (await this.context.config.getGemini31Launched?.()) ?? false, - ); + const routingContext: RoutingContext = { + history: chat.getHistory(/*curated=*/ true), + request: nextMessage?.parts ?? [], + signal: pendingSend.signal, + requestedModel: this.context.config.getModel(), + }; + + const router = this.context.config.getModelRouterService(); + const { model } = await router.route(routingContext); const responseStream = await chat.sendMessageStream( { model }, nextMessage?.parts ?? [], @@ -857,7 +863,10 @@ export class Session { (error && typeof error === 'object' && 'type' in error && - error.type === 'NO_RESPONSE_TEXT') + (error.type === 'NO_RESPONSE_TEXT' || + error.type === 'NO_FINISH_REASON' || + error.type === 'MALFORMED_FUNCTION_CALL' || + error.type === 'UNEXPECTED_TOOL_CALL')) ) { // The stream ended with an empty response or malformed tool call. // Treat this as a graceful end to the model's turn rather than a crash. @@ -2009,10 +2018,31 @@ function buildAvailableModels( const preferredModel = config.getModel() || DEFAULT_GEMINI_MODEL_AUTO; const shouldShowPreviewModels = config.getHasAccessToPreviewModel(); const useGemini31 = config.getGemini31LaunchedSync?.() ?? false; + const useGemini31FlashLite = + config.getGemini31FlashLiteLaunchedSync?.() ?? false; const selectedAuthType = settings.merged.security.auth.selectedType; const useCustomToolModel = useGemini31 && selectedAuthType === AuthType.USE_GEMINI; + // --- DYNAMIC PATH --- + if ( + config.getExperimentalDynamicModelConfiguration?.() === true && + config.getModelConfigService + ) { + const options = config.getModelConfigService().getAvailableModelOptions({ + useGemini3_1: useGemini31, + useGemini3_1FlashLite: useGemini31FlashLite, + useCustomTools: useCustomToolModel, + hasAccessToPreview: shouldShowPreviewModels, + }); + + return { + availableModels: options, + currentModelId: preferredModel, + }; + } + + // --- LEGACY PATH --- const mainOptions = [ { value: DEFAULT_GEMINI_MODEL_AUTO, @@ -2056,7 +2086,7 @@ function buildAvailableModels( ? PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL : previewProModel; - manualOptions.unshift( + const previewOptions = [ { value: previewProValue, title: getDisplayString(previewProModel), @@ -2065,7 +2095,16 @@ function buildAvailableModels( value: PREVIEW_GEMINI_FLASH_MODEL, title: getDisplayString(PREVIEW_GEMINI_FLASH_MODEL), }, - ); + ]; + + if (useGemini31FlashLite) { + previewOptions.push({ + value: PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL, + title: getDisplayString(PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL), + }); + } + + manualOptions.unshift(...previewOptions); } const scaleOptions = ( diff --git a/packages/cli/src/acp/commandHandler.test.ts b/packages/cli/src/acp/commandHandler.test.ts index 8e04f014f3..23bf907ec3 100644 --- a/packages/cli/src/acp/commandHandler.test.ts +++ b/packages/cli/src/acp/commandHandler.test.ts @@ -26,5 +26,8 @@ describe('CommandHandler', () => { const init = parse('/init'); expect(init.commandToExecute?.name).toBe('init'); + + const about = parse('/about'); + expect(about.commandToExecute?.name).toBe('about'); }); }); diff --git a/packages/cli/src/acp/commandHandler.ts b/packages/cli/src/acp/commandHandler.ts index 836cdf7736..4ed846188e 100644 --- a/packages/cli/src/acp/commandHandler.ts +++ b/packages/cli/src/acp/commandHandler.ts @@ -10,6 +10,7 @@ import { MemoryCommand } from './commands/memory.js'; import { ExtensionsCommand } from './commands/extensions.js'; import { InitCommand } from './commands/init.js'; import { RestoreCommand } from './commands/restore.js'; +import { AboutCommand } from './commands/about.js'; export class CommandHandler { private registry: CommandRegistry; @@ -24,6 +25,7 @@ export class CommandHandler { registry.register(new ExtensionsCommand()); registry.register(new InitCommand()); registry.register(new RestoreCommand()); + registry.register(new AboutCommand()); return registry; } diff --git a/packages/cli/src/acp/commands/about.ts b/packages/cli/src/acp/commands/about.ts new file mode 100644 index 0000000000..06349e88d7 --- /dev/null +++ b/packages/cli/src/acp/commands/about.ts @@ -0,0 +1,74 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + IdeClient, + UserAccountManager, + getVersion, +} from '@google/gemini-cli-core'; +import type { + Command, + CommandContext, + CommandExecutionResponse, +} from './types.js'; +import process from 'node:process'; + +export class AboutCommand implements Command { + readonly name = 'about'; + readonly description = 'Show version and environment info'; + + async execute( + context: CommandContext, + _args: string[] = [], + ): Promise { + const osVersion = process.platform; + let sandboxEnv = 'no sandbox'; + if (process.env['SANDBOX'] && process.env['SANDBOX'] !== 'sandbox-exec') { + sandboxEnv = process.env['SANDBOX']; + } else if (process.env['SANDBOX'] === 'sandbox-exec') { + sandboxEnv = `sandbox-exec (${ + process.env['SEATBELT_PROFILE'] || 'unknown' + })`; + } + const modelVersion = context.agentContext.config.getModel() || 'Unknown'; + const cliVersion = await getVersion(); + const selectedAuthType = + context.settings.merged?.security?.auth?.selectedType ?? ''; + const gcpProject = process.env['GOOGLE_CLOUD_PROJECT'] || ''; + const ideClient = await getIdeClientName(context); + + const userAccountManager = new UserAccountManager(); + const cachedAccount = userAccountManager.getCachedGoogleAccount(); + const userEmail = cachedAccount ?? 'Unknown'; + + const tier = context.agentContext.config.getUserTierName() || 'Unknown'; + + const info = [ + `- Version: ${cliVersion}`, + `- OS: ${osVersion}`, + `- Sandbox: ${sandboxEnv}`, + `- Model: ${modelVersion}`, + `- Auth Type: ${selectedAuthType}`, + `- GCP Project: ${gcpProject}`, + `- IDE Client: ${ideClient}`, + `- User Email: ${userEmail}`, + `- Tier: ${tier}`, + ].join('\n'); + + return { + name: this.name, + data: `Gemini CLI Info:\n${info}`, + }; + } +} + +async function getIdeClientName(context: CommandContext) { + if (!context.agentContext.config.getIdeMode()) { + return ''; + } + const ideClient = await IdeClient.getInstance(); + return ideClient?.getDetectedIdeDisplayName() ?? ''; +} diff --git a/packages/cli/src/acp/commands/extensions.ts b/packages/cli/src/acp/commands/extensions.ts index a6e08f9bbc..7ebe922402 100644 --- a/packages/cli/src/acp/commands/extensions.ts +++ b/packages/cli/src/acp/commands/extensions.ts @@ -284,7 +284,7 @@ export class LinkExtensionCommand implements Command { try { await stat(sourceFilepath); - } catch (_error) { + } catch { return { name: this.name, data: `Invalid source: ${sourceFilepath}` }; } diff --git a/packages/cli/src/acp/commands/restore.ts b/packages/cli/src/acp/commands/restore.ts index 6898cff2e1..4ffc5dfba2 100644 --- a/packages/cli/src/acp/commands/restore.ts +++ b/packages/cli/src/acp/commands/restore.ts @@ -130,7 +130,7 @@ export class ListCheckpointsCommand implements Command { const checkpointDir = config.storage.getProjectTempCheckpointsDir(); try { await fs.mkdir(checkpointDir, { recursive: true }); - } catch (_e) { + } catch { // Ignore } @@ -169,7 +169,7 @@ export class ListCheckpointsCommand implements Command { name: this.name, data: `Available Checkpoints:\n${formatted}`, }; - } catch (_error) { + } catch { return { name: this.name, data: 'An unexpected error occurred while listing checkpoints.', diff --git a/packages/cli/src/commands/extensions/new.ts b/packages/cli/src/commands/extensions/new.ts index e5507194d0..2ff97834c3 100644 --- a/packages/cli/src/commands/extensions/new.ts +++ b/packages/cli/src/commands/extensions/new.ts @@ -25,7 +25,7 @@ async function pathExists(path: string) { try { await access(path); return true; - } catch (_e) { + } catch { return false; } } diff --git a/packages/cli/src/commands/mcp.test.ts b/packages/cli/src/commands/mcp.test.ts index 715786859b..eae9614cf3 100644 --- a/packages/cli/src/commands/mcp.test.ts +++ b/packages/cli/src/commands/mcp.test.ts @@ -32,7 +32,7 @@ describe('mcp command', () => { try { await parser.parse('mcp'); - } catch (_error) { + } catch { // yargs might throw an error when demandCommand is not met } diff --git a/packages/cli/src/commands/mcp/list.ts b/packages/cli/src/commands/mcp/list.ts index 1a59dc8dc2..be255c8659 100644 --- a/packages/cli/src/commands/mcp/list.ts +++ b/packages/cli/src/commands/mcp/list.ts @@ -122,7 +122,7 @@ async function testMCPConnection( try { // Use the same transport creation logic as core transport = await createTransport(serverName, config, false, mcpContext); - } catch (_error) { + } catch { await client.close(); return MCPServerStatus.DISCONNECTED; } @@ -136,7 +136,7 @@ async function testMCPConnection( await client.close(); return MCPServerStatus.CONNECTED; - } catch (_error) { + } catch { await transport.close(); return MCPServerStatus.DISCONNECTED; } diff --git a/packages/cli/src/commands/skills/list.test.ts b/packages/cli/src/commands/skills/list.test.ts index 391749242b..37f6b26613 100644 --- a/packages/cli/src/commands/skills/list.test.ts +++ b/packages/cli/src/commands/skills/list.test.ts @@ -4,8 +4,16 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest'; -import { coreEvents, type Config } from '@google/gemini-cli-core'; +import { + vi, + describe, + it, + expect, + beforeEach, + afterEach, + type MockInstance, +} from 'vitest'; +import { type Config } from '@google/gemini-cli-core'; import { handleList, listCommand } from './list.js'; import { loadSettings, type LoadedSettings } from '../../config/settings.js'; import { loadCliConfig } from '../../config/config.js'; @@ -32,12 +40,16 @@ vi.mock('../utils.js', () => ({ describe('skills list command', () => { const mockLoadSettings = vi.mocked(loadSettings); const mockLoadCliConfig = vi.mocked(loadCliConfig); + let stdoutWriteSpy: MockInstance; beforeEach(async () => { vi.clearAllMocks(); mockLoadSettings.mockReturnValue({ merged: {}, } as unknown as LoadedSettings); + stdoutWriteSpy = vi + .spyOn(process.stdout, 'write') + .mockImplementation(() => true); }); afterEach(() => { @@ -56,10 +68,7 @@ describe('skills list command', () => { await handleList({}); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', - 'No skills discovered.', - ); + expect(stdoutWriteSpy).toHaveBeenCalledWith('No skills discovered.\n'); }); it('should list all discovered skills', async () => { @@ -87,24 +96,19 @@ describe('skills list command', () => { await handleList({}); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', - chalk.bold('Discovered Agent Skills:'), + expect(stdoutWriteSpy).toHaveBeenCalledWith( + chalk.bold('Discovered Agent Skills:') + '\n\n', ); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).toHaveBeenCalledWith( expect.stringContaining('skill1'), ); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).toHaveBeenCalledWith( expect.stringContaining(chalk.green('[Enabled]')), ); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).toHaveBeenCalledWith( expect.stringContaining('skill2'), ); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).toHaveBeenCalledWith( expect.stringContaining(chalk.red('[Disabled]')), ); }); @@ -135,12 +139,10 @@ describe('skills list command', () => { // Default await handleList({ all: false }); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).toHaveBeenCalledWith( expect.stringContaining('regular'), ); - expect(coreEvents.emitConsoleLog).not.toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).not.toHaveBeenCalledWith( expect.stringContaining('builtin'), ); @@ -148,16 +150,13 @@ describe('skills list command', () => { // With all: true await handleList({ all: true }); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).toHaveBeenCalledWith( expect.stringContaining('regular'), ); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).toHaveBeenCalledWith( expect.stringContaining('builtin'), ); - expect(coreEvents.emitConsoleLog).toHaveBeenCalledWith( - 'log', + expect(stdoutWriteSpy).toHaveBeenCalledWith( expect.stringContaining(chalk.gray(' [Built-in]')), ); }); diff --git a/packages/cli/src/commands/skills/list.ts b/packages/cli/src/commands/skills/list.ts index 49fc3a54f1..bc05c6c2af 100644 --- a/packages/cli/src/commands/skills/list.ts +++ b/packages/cli/src/commands/skills/list.ts @@ -5,7 +5,6 @@ */ import type { CommandModule } from 'yargs'; -import { debugLogger } from '@google/gemini-cli-core'; import { loadSettings } from '../../config/settings.js'; import { loadCliConfig, type CliArgs } from '../../config/config.js'; import { exitCli } from '../utils.js'; @@ -42,12 +41,11 @@ export async function handleList(args: { all?: boolean }) { }); if (skills.length === 0) { - debugLogger.log('No skills discovered.'); + process.stdout.write('No skills discovered.\n'); return; } - debugLogger.log(chalk.bold('Discovered Agent Skills:')); - debugLogger.log(''); + process.stdout.write(chalk.bold('Discovered Agent Skills:') + '\n\n'); for (const skill of skills) { const status = skill.disabled @@ -56,10 +54,11 @@ export async function handleList(args: { all?: boolean }) { const builtinSuffix = skill.isBuiltin ? chalk.gray(' [Built-in]') : ''; - debugLogger.log(`${chalk.bold(skill.name)} ${status}${builtinSuffix}`); - debugLogger.log(` Description: ${skill.description}`); - debugLogger.log(` Location: ${skill.location}`); - debugLogger.log(''); + process.stdout.write( + `${chalk.bold(skill.name)} ${status}${builtinSuffix}\n`, + ); + process.stdout.write(` Description: ${skill.description}\n`); + process.stdout.write(` Location: ${skill.location}\n\n`); } } diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index b9401ed5eb..04df366a98 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -21,6 +21,8 @@ import { type MCPServerConfig, type GeminiCLIExtension, Storage, + generalistProfile, + type ContextManagementConfig, } from '@google/gemini-cli-core'; import { loadCliConfig, parseArguments, type CliArgs } from './config.js'; import { @@ -1364,8 +1366,8 @@ describe('Approval mode tool exclusion logic', () => { 'test', ]; const settings = createTestMergedSettings({ - experimental: { - plan: true, + general: { + plan: { enabled: true }, }, }); const argv = await parseArguments(createTestMergedSettings()); @@ -1479,9 +1481,7 @@ describe('Approval mode tool exclusion logic', () => { const settings = createTestMergedSettings({ general: { defaultApprovalMode: 'plan', - }, - experimental: { - plan: false, + plan: { enabled: false }, }, }); const argv = await parseArguments(settings); @@ -1489,14 +1489,12 @@ describe('Approval mode tool exclusion logic', () => { expect(config.getApprovalMode()).toBe(ApprovalMode.DEFAULT); }); - it('should allow plan approval mode if experimental plan is enabled', async () => { + it('should allow plan approval mode if plan is enabled', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ general: { defaultApprovalMode: 'plan', - }, - experimental: { - plan: true, + plan: { enabled: true }, }, }); const argv = await parseArguments(settings); @@ -2178,6 +2176,89 @@ describe('loadCliConfig directWebFetch', () => { }); }); +describe('loadCliConfig context management', () => { + beforeEach(() => { + vi.resetAllMocks(); + vi.mocked(os.homedir).mockReturnValue('/mock/home/user'); + vi.stubEnv('GEMINI_API_KEY', 'test-api-key'); + vi.spyOn(ExtensionManager.prototype, 'getExtensions').mockReturnValue([]); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + vi.restoreAllMocks(); + }); + + it('should be false by default when generalistProfile / context management is not set in settings', async () => { + process.argv = ['node', 'script.js']; + const argv = await parseArguments(createTestMergedSettings()); + const settings = createTestMergedSettings(); + const config = await loadCliConfig(settings, 'test-session', argv); + expect(config.getContextManagementConfig()).haveOwnProperty( + 'enabled', + false, + ); + expect(config.isContextManagementEnabled()).toBe(false); + }); + + it('should be true when generalistProfile is set to true in settings', async () => { + process.argv = ['node', 'script.js']; + const argv = await parseArguments(createTestMergedSettings()); + const settings = createTestMergedSettings({ + experimental: { + generalistProfile: true, + }, + }); + const config = await loadCliConfig(settings, 'test-session', argv); + expect(config.getContextManagementConfig()).toStrictEqual( + generalistProfile, + ); + expect(config.isContextManagementEnabled()).toBe(true); + }); + + it('should be true when contextManagement is set to true in settings', async () => { + process.argv = ['node', 'script.js']; + const argv = await parseArguments(createTestMergedSettings()); + const contextManagementConfig: Partial = { + historyWindow: { + maxTokens: 100_000, + retainedTokens: 50_000, + }, + messageLimits: { + normalMaxTokens: 1000, + retainedMaxTokens: 10_000, + normalizationHeadRatio: 0.25, + }, + tools: { + distillation: { + maxOutputTokens: 10_000, + summarizationThresholdTokens: 15_000, + }, + outputMasking: { + protectionThresholdTokens: 30_000, + minPrunableThresholdTokens: 10_000, + protectLatestTurn: false, + }, + }, + }; + const settings = createTestMergedSettings({ + experimental: { + contextManagement: true, + }, + // The type of numbers is being inferred strangely, and so we have to cast + // to `any` here. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + contextManagement: contextManagementConfig as any, + }); + const config = await loadCliConfig(settings, 'test-session', argv); + expect(config.getContextManagementConfig()).toStrictEqual({ + enabled: true, + ...contextManagementConfig, + }); + expect(config.isContextManagementEnabled()).toBe(true); + }); +}); + describe('screenReader configuration', () => { beforeEach(() => { vi.resetAllMocks(); @@ -2742,12 +2823,12 @@ describe('loadCliConfig approval mode', () => { expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.YOLO); }); - it('should set Plan approval mode when --approval-mode=plan is used and experimental.plan is enabled', async () => { + it('should set Plan approval mode when --approval-mode=plan is used and plan is enabled', async () => { process.argv = ['node', 'script.js', '--approval-mode', 'plan']; const argv = await parseArguments(createTestMergedSettings()); const settings = createTestMergedSettings({ - experimental: { - plan: true, + general: { + plan: { enabled: true }, }, }); const config = await loadCliConfig(settings, 'test-session', argv); @@ -2767,12 +2848,12 @@ describe('loadCliConfig approval mode', () => { expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.DEFAULT); }); - it('should throw error when --approval-mode=plan is used but experimental.plan is disabled', async () => { + it('should throw error when --approval-mode=plan is used but plan is disabled', async () => { process.argv = ['node', 'script.js', '--approval-mode', 'plan']; const argv = await parseArguments(createTestMergedSettings()); const settings = createTestMergedSettings({ - experimental: { - plan: false, + general: { + plan: { enabled: false }, }, }); @@ -2893,22 +2974,26 @@ describe('loadCliConfig approval mode', () => { expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.YOLO); }); - it('should respect plan mode from settings when experimental.plan is enabled', async () => { + it('should respect plan mode from settings when plan is enabled', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - general: { defaultApprovalMode: 'plan' }, - experimental: { plan: true }, + general: { + defaultApprovalMode: 'plan', + plan: { enabled: true }, + }, }); const argv = await parseArguments(settings); const config = await loadCliConfig(settings, 'test-session', argv); expect(config.getApprovalMode()).toBe(ServerConfig.ApprovalMode.PLAN); }); - it('should throw error if plan mode is in settings but experimental.plan is disabled', async () => { + it('should fall back to default if plan mode is in settings but disabled', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - general: { defaultApprovalMode: 'plan' }, - experimental: { plan: false }, + general: { + defaultApprovalMode: 'plan', + plan: { enabled: false }, + }, }); const argv = await parseArguments(settings); const config = await loadCliConfig(settings, 'test-session', argv); @@ -3696,7 +3781,9 @@ describe('loadCliConfig mcpEnabled', () => { it('should use plan directory from active extension when user has not specified one', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, }); const argv = await parseArguments(settings); @@ -3715,9 +3802,11 @@ describe('loadCliConfig mcpEnabled', () => { it('should NOT use plan directory from active extension when user has specified one', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - experimental: { plan: true }, general: { - plan: { directory: 'user-plans-dir' }, + plan: { + enabled: true, + directory: 'user-plans-dir', + }, }, }); const argv = await parseArguments(settings); @@ -3738,7 +3827,9 @@ describe('loadCliConfig mcpEnabled', () => { it('should NOT use plan directory from inactive extension', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, }); const argv = await parseArguments(settings); @@ -3759,7 +3850,9 @@ describe('loadCliConfig mcpEnabled', () => { it('should use default path if neither user nor extension settings provide a plan directory', async () => { process.argv = ['node', 'script.js']; const settings = createTestMergedSettings({ - experimental: { plan: true }, + general: { + plan: { enabled: true }, + }, }); const argv = await parseArguments(settings); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 98bd8b3e42..b96bf34f4b 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -46,6 +46,7 @@ import { type HookEventName, type OutputFormat, detectIdeFromEnv, + generalistProfile, } from '@google/gemini-cli-core'; import { type Settings, @@ -676,9 +677,9 @@ export async function loadCliConfig( approvalMode = ApprovalMode.AUTO_EDIT; break; case 'plan': - if (!(settings.experimental?.plan ?? false)) { + if (!(settings.general?.plan?.enabled ?? true)) { debugLogger.warn( - 'Approval mode "plan" is only available when experimental.plan is enabled. Falling back to "default".', + 'Approval mode "plan" is disabled in your settings. Falling back to "default".', ); approvalMode = ApprovalMode.DEFAULT; } else { @@ -890,6 +891,16 @@ export async function loadCliConfig( } } + const useGeneralistProfile = + settings.experimental?.generalistProfile ?? false; + const useContextManagement = + settings.experimental?.contextManagement ?? false; + const contextManagement = { + ...(useGeneralistProfile ? generalistProfile : {}), + ...(useContextManagement ? settings?.contextManagement : {}), + enabled: useContextManagement || useGeneralistProfile, + }; + return new Config({ acpMode: isAcpMode, clientName, @@ -934,6 +945,8 @@ export async function loadCliConfig( : undefined, blockedEnvironmentVariables: settings.security?.environmentVariableRedaction?.blocked, + allowedEnvironmentVariables: + settings.security?.environmentVariableRedaction?.allowed, enableEnvironmentVariableRedaction: settings.security?.environmentVariableRedaction?.enabled, userMemory: memoryContent, @@ -973,7 +986,7 @@ export async function loadCliConfig( extensionRegistryURI, enableExtensionReloading: settings.experimental?.extensionReloading, enableAgents: settings.experimental?.enableAgents, - plan: settings.experimental?.plan, + plan: settings.general?.plan?.enabled ?? true, tracker: settings.experimental?.taskTracker, directWebFetch: settings.experimental?.directWebFetch, planSettings: settings.general?.plan?.directory @@ -984,17 +997,9 @@ export async function loadCliConfig( disabledSkills: settings.skills?.disabled, experimentalJitContext: settings.experimental?.jitContext, experimentalMemoryManager: settings.experimental?.memoryManager, - experimentalAgentHistoryTruncation: - settings.experimental?.agentHistoryTruncation, - experimentalAgentHistoryTruncationThreshold: - settings.experimental?.agentHistoryTruncationThreshold, - experimentalAgentHistoryRetainedMessages: - settings.experimental?.agentHistoryRetainedMessages, - experimentalAgentHistorySummarization: - settings.experimental?.agentHistorySummarization, + contextManagement, modelSteering: settings.experimental?.modelSteering, topicUpdateNarration: settings.experimental?.topicUpdateNarration, - toolOutputMasking: settings.experimental?.toolOutputMasking, noBrowser: !!process.env['NO_BROWSER'], summarizeToolOutput: settings.model?.summarizeToolOutput, ideMode, @@ -1005,6 +1010,8 @@ export async function loadCliConfig( trustedFolder, useBackgroundColor: settings.ui?.useBackgroundColor, useAlternateBuffer: settings.ui?.useAlternateBuffer, + useTerminalBuffer: settings.ui?.terminalBuffer, + useRenderProcess: settings.ui?.renderProcess, useRipgrep: settings.tools?.useRipgrep, enableInteractiveShell: settings.tools?.shell?.enableInteractiveShell, shellBackgroundCompletionBehavior: settings.tools?.shell @@ -1021,6 +1028,7 @@ export async function loadCliConfig( format: (argv.outputFormat ?? settings.output?.format) as OutputFormat, }, gemmaModelRouter: settings.experimental?.gemmaModelRouter, + adk: settings.experimental?.adk, fakeResponses: argv.fakeResponses, recordResponses: argv.recordResponses, retryFetchErrors: settings.general?.retryFetchErrors, @@ -1076,7 +1084,7 @@ async function resolveWorktreeSettings( if (isGeminiWorktree(toplevel, projectRoot)) { worktreePath = toplevel; } - } catch (_e) { + } catch { return undefined; } diff --git a/packages/cli/src/config/extension-manager-permissions.test.ts b/packages/cli/src/config/extension-manager-permissions.test.ts index 662f30d430..6d6e848fef 100644 --- a/packages/cli/src/config/extension-manager-permissions.test.ts +++ b/packages/cli/src/config/extension-manager-permissions.test.ts @@ -33,7 +33,7 @@ describe('copyExtension permissions', () => { makeWritableSync(path.join(p, child)), ); } - } catch (_e) { + } catch { // Ignore errors during cleanup } }; diff --git a/packages/cli/src/config/extension-manager.test.ts b/packages/cli/src/config/extension-manager.test.ts index 6c20737be9..33c335c16b 100644 --- a/packages/cli/src/config/extension-manager.test.ts +++ b/packages/cli/src/config/extension-manager.test.ts @@ -101,7 +101,7 @@ describe('ExtensionManager', () => { themeManager.clearExtensionThemes(); try { fs.rmSync(tempHomeDir, { recursive: true, force: true }); - } catch (_e) { + } catch { // Ignore } }); diff --git a/packages/cli/src/config/extension.ts b/packages/cli/src/config/extension.ts index 564c4fbb6f..20a7073464 100644 --- a/packages/cli/src/config/extension.ts +++ b/packages/cli/src/config/extension.ts @@ -63,7 +63,7 @@ export function loadInstallMetadata( // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion const metadata = JSON.parse(configContent) as ExtensionInstallMetadata; return metadata; - } catch (_e) { + } catch { return undefined; } } diff --git a/packages/cli/src/config/extensions/github.ts b/packages/cli/src/config/extensions/github.ts index 156fe78309..06cf344a0d 100644 --- a/packages/cli/src/config/extensions/github.ts +++ b/packages/cli/src/config/extensions/github.ts @@ -151,7 +151,7 @@ export async function fetchReleaseFromGithub( return await fetchJson( `https://api.github.com/repos/${owner}/${repo}/releases/latest`, ); - } catch (_) { + } catch { // This can fail if there is no release marked latest. In that case // we want to just try the pre-release logic below. } diff --git a/packages/cli/src/config/footerItems.test.ts b/packages/cli/src/config/footerItems.test.ts index 420246811b..d9ef9bc3f2 100644 --- a/packages/cli/src/config/footerItems.test.ts +++ b/packages/cli/src/config/footerItems.test.ts @@ -5,87 +5,153 @@ */ import { describe, it, expect } from 'vitest'; -import { deriveItemsFromLegacySettings } from './footerItems.js'; +import { + deriveItemsFromLegacySettings, + resolveFooterState, +} from './footerItems.js'; import { createMockSettings } from '../test-utils/settings.js'; -describe('deriveItemsFromLegacySettings', () => { - it('returns defaults when no legacy settings are customized', () => { - const settings = createMockSettings({ - ui: { footer: { hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toEqual([ - 'workspace', - 'git-branch', - 'sandbox', - 'model-name', - 'quota', - ]); - }); +describe('footerItems', () => { + describe('deriveItemsFromLegacySettings', () => { + it('returns defaults when no legacy settings are customized', () => { + const settings = createMockSettings({ + ui: { footer: { hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toEqual([ + 'workspace', + 'git-branch', + 'sandbox', + 'model-name', + 'quota', + ]); + }); - it('removes workspace when hideCWD is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideCWD: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('workspace'); - }); + it('removes workspace when hideCWD is true', () => { + const settings = createMockSettings({ + ui: { footer: { hideCWD: true, hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('workspace'); + }); - it('removes sandbox when hideSandboxStatus is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideSandboxStatus: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('sandbox'); - }); - - it('removes model-name, context-used, and quota when hideModelInfo is true', () => { - const settings = createMockSettings({ - ui: { footer: { hideModelInfo: true, hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).not.toContain('model-name'); - expect(items).not.toContain('context-used'); - expect(items).not.toContain('quota'); - }); - - it('includes context-used when hideContextPercentage is false', () => { - const settings = createMockSettings({ - ui: { footer: { hideContextPercentage: false } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toContain('context-used'); - // Should be after model-name - const modelIdx = items.indexOf('model-name'); - const contextIdx = items.indexOf('context-used'); - expect(contextIdx).toBe(modelIdx + 1); - }); - - it('includes memory-usage when showMemoryUsage is true', () => { - const settings = createMockSettings({ - ui: { showMemoryUsage: true, footer: { hideContextPercentage: true } }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toContain('memory-usage'); - }); - - it('handles combination of settings', () => { - const settings = createMockSettings({ - ui: { - showMemoryUsage: true, - footer: { - hideCWD: true, - hideModelInfo: true, - hideContextPercentage: false, + it('removes sandbox when hideSandboxStatus is true', () => { + const settings = createMockSettings({ + ui: { + footer: { hideSandboxStatus: true, hideContextPercentage: true }, }, - }, - }).merged; - const items = deriveItemsFromLegacySettings(settings); - expect(items).toEqual([ - 'git-branch', - 'sandbox', - 'context-used', - 'memory-usage', - ]); + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('sandbox'); + }); + + it('removes model-name, context-used, and quota when hideModelInfo is true', () => { + const settings = createMockSettings({ + ui: { footer: { hideModelInfo: true, hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).not.toContain('model-name'); + expect(items).not.toContain('context-used'); + expect(items).not.toContain('quota'); + }); + + it('includes context-used when hideContextPercentage is false', () => { + const settings = createMockSettings({ + ui: { footer: { hideContextPercentage: false } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toContain('context-used'); + // Should be after model-name + const modelIdx = items.indexOf('model-name'); + const contextIdx = items.indexOf('context-used'); + expect(contextIdx).toBe(modelIdx + 1); + }); + + it('includes memory-usage when showMemoryUsage is true', () => { + const settings = createMockSettings({ + ui: { showMemoryUsage: true, footer: { hideContextPercentage: true } }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toContain('memory-usage'); + }); + + it('handles combination of settings', () => { + const settings = createMockSettings({ + ui: { + showMemoryUsage: true, + footer: { + hideCWD: true, + hideModelInfo: true, + hideContextPercentage: false, + }, + }, + }).merged; + const items = deriveItemsFromLegacySettings(settings); + expect(items).toEqual([ + 'git-branch', + 'sandbox', + 'context-used', + 'memory-usage', + ]); + }); + }); + + describe('resolveFooterState', () => { + it('filters out auth item when showUserIdentity is false', () => { + const settings = createMockSettings({ + ui: { + showUserIdentity: false, + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).not.toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(false); + // It should also not be in the 'others' part of orderedIds + expect(state.orderedIds).toEqual([ + 'workspace', + 'model-name', + 'git-branch', + 'sandbox', + 'context-used', + 'quota', + 'memory-usage', + 'session-id', + 'code-changes', + 'token-count', + ]); + }); + + it('includes auth item when showUserIdentity is true', () => { + const settings = createMockSettings({ + ui: { + showUserIdentity: true, + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(true); + }); + + it('includes auth item by default when showUserIdentity is undefined (defaults to true)', () => { + const settings = createMockSettings({ + ui: { + footer: { + items: ['workspace', 'auth', 'model-name'], + }, + }, + }).merged; + + const state = resolveFooterState(settings); + expect(state.orderedIds).toContain('auth'); + expect(state.selectedIds.has('auth')).toBe(true); + }); }); }); diff --git a/packages/cli/src/config/footerItems.ts b/packages/cli/src/config/footerItems.ts index 8410d0b5ec..9f3943b692 100644 --- a/packages/cli/src/config/footerItems.ts +++ b/packages/cli/src/config/footerItems.ts @@ -47,6 +47,11 @@ export const ALL_ITEMS = [ header: 'session', description: 'Unique identifier for the current session', }, + { + id: 'auth', + header: '/auth', + description: 'Current authentication info', + }, { id: 'code-changes', header: 'diff', @@ -70,6 +75,7 @@ export const DEFAULT_ORDER = [ 'quota', 'memory-usage', 'session-id', + 'auth', 'code-changes', 'token-count', ]; @@ -121,10 +127,19 @@ export function resolveFooterState(settings: MergedSettings): { orderedIds: string[]; selectedIds: Set; } { + const showUserIdentity = settings.ui?.showUserIdentity !== false; + const filteredValidIds = showUserIdentity + ? VALID_IDS + : new Set([...VALID_IDS].filter((id) => id !== 'auth')); + const source = ( settings.ui?.footer?.items ?? deriveItemsFromLegacySettings(settings) - ).filter((id: string) => VALID_IDS.has(id)); - const others = DEFAULT_ORDER.filter((id) => !source.includes(id)); + ).filter((id: string) => filteredValidIds.has(id)); + + const others = DEFAULT_ORDER.filter( + (id) => !source.includes(id) && filteredValidIds.has(id), + ); + return { orderedIds: [...source, ...others], selectedIds: new Set(source), diff --git a/packages/cli/src/config/settings.ts b/packages/cli/src/config/settings.ts index 984bdb8d60..40d275e79e 100644 --- a/packages/cli/src/config/settings.ts +++ b/packages/cli/src/config/settings.ts @@ -612,7 +612,7 @@ export function loadEnvironment( } } } - } catch (_e) { + } catch { // Errors are ignored to match the behavior of `dotenv.config({ quiet: true })`. } } @@ -1124,15 +1124,15 @@ function migrateExperimentalSettings( }; let modified = false; - const migrateExperimental = ( + const migrateExperimental = >( oldKey: string, - migrateFn: (oldValue: Record) => void, + migrateFn: (oldValue: T) => void, ) => { const old = experimentalSettings[oldKey]; - if (old) { + if (old !== undefined) { foundDeprecated?.push(`experimental.${oldKey}`); // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - migrateFn(old as Record); + migrateFn(old as T); modified = true; } }; @@ -1197,6 +1197,24 @@ function migrateExperimentalSettings( agentsOverrides['cli_help'] = override; }); + // Migrate experimental.plan -> general.plan.enabled + migrateExperimental('plan', (planValue) => { + const generalSettings = + (settings.general as Record | undefined) || {}; + const newGeneral = { ...generalSettings }; + const planSettings = + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + (newGeneral['plan'] as Record | undefined) || {}; + const newPlan = { ...planSettings }; + + if (newPlan['enabled'] === undefined) { + newPlan['enabled'] = planValue; + newGeneral['plan'] = newPlan; + loadedSettings.setValue(scope, 'general', newGeneral); + modified = true; + } + }); + if (modified) { agentsSettings['overrides'] = agentsOverrides; loadedSettings.setValue(scope, 'agents', agentsSettings); @@ -1205,6 +1223,7 @@ function migrateExperimentalSettings( const newExperimental = { ...experimentalSettings }; delete newExperimental['codebaseInvestigatorSettings']; delete newExperimental['cliHelpAgentSettings']; + delete newExperimental['plan']; loadedSettings.setValue(scope, 'experimental', newExperimental); } return true; diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index c358cd65aa..27639fa031 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -87,7 +87,7 @@ describe('SettingsSchema', () => { const definition = getSettingsSchema().ui?.properties?.loadingPhrases; expect(definition).toBeDefined(); expect(definition?.type).toBe('enum'); - expect(definition?.default).toBe('tips'); + expect(definition?.default).toBe('off'); expect(definition?.options?.map((o) => o.value)).toEqual([ 'tips', 'witty', @@ -418,14 +418,17 @@ describe('SettingsSchema', () => { }); it('should have plan setting in schema', () => { - const setting = getSettingsSchema().experimental.properties.plan; + const setting = + getSettingsSchema().general.properties.plan.properties.enabled; expect(setting).toBeDefined(); expect(setting.type).toBe('boolean'); - expect(setting.category).toBe('Experimental'); + expect(setting.category).toBe('General'); expect(setting.default).toBe(true); expect(setting.requiresRestart).toBe(true); expect(setting.showInDialog).toBe(true); - expect(setting.description).toBe('Enable Plan Mode.'); + expect(setting.description).toBe( + 'Enable Plan Mode for read-only safety during planning.', + ); }); it('should have hooksConfig.notifications setting in schema', () => { @@ -502,6 +505,31 @@ describe('SettingsSchema', () => { 'The model to use for the classifier. Only tested on `gemma3-1b-gpu-custom`.', ); }); + + it('should have adk setting in schema', () => { + const adk = getSettingsSchema().experimental.properties.adk; + expect(adk).toBeDefined(); + expect(adk.type).toBe('object'); + expect(adk.category).toBe('Experimental'); + expect(adk.default).toEqual({}); + expect(adk.requiresRestart).toBe(true); + expect(adk.showInDialog).toBe(false); + expect(adk.description).toBe( + 'Settings for the Agent Development Kit (ADK).', + ); + + const agentSessionNoninteractiveEnabled = + adk.properties.agentSessionNoninteractiveEnabled; + expect(agentSessionNoninteractiveEnabled).toBeDefined(); + expect(agentSessionNoninteractiveEnabled.type).toBe('boolean'); + expect(agentSessionNoninteractiveEnabled.category).toBe('Experimental'); + expect(agentSessionNoninteractiveEnabled.default).toBe(false); + expect(agentSessionNoninteractiveEnabled.requiresRestart).toBe(true); + expect(agentSessionNoninteractiveEnabled.showInDialog).toBe(false); + expect(agentSessionNoninteractiveEnabled.description).toBe( + 'Enable non-interactive agent sessions.', + ); + }); }); it('has JSON schema definitions for every referenced ref', () => { diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index c40e87db18..9343be6b02 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -293,6 +293,16 @@ const SETTINGS_SCHEMA = { description: 'Planning features configuration.', showInDialog: false, properties: { + enabled: { + type: 'boolean', + label: 'Enable Plan Mode', + category: 'General', + requiresRestart: true, + default: true, + description: + 'Enable Plan Mode for read-only safety during planning.', + showInDialog: true, + }, directory: { type: 'string', label: 'Plan Directory', @@ -561,6 +571,16 @@ const SETTINGS_SCHEMA = { description: 'Show the "? for shortcuts" hint above the input.', showInDialog: true, }, + compactToolOutput: { + type: 'boolean', + label: 'Compact Tool Output', + category: 'UI', + requiresRestart: false, + default: true, + description: + 'Display tool outputs (like directory listings and file reads) in a compact, structured format.', + showInDialog: true, + }, hideBanner: { type: 'boolean', label: 'Hide Banner', @@ -723,6 +743,24 @@ const SETTINGS_SCHEMA = { 'Use an alternate screen buffer for the UI, preserving shell history.', showInDialog: true, }, + renderProcess: { + type: 'boolean', + label: 'Render Process', + category: 'UI', + requiresRestart: true, + default: true, + description: 'Enable Ink render process for the UI.', + showInDialog: true, + }, + terminalBuffer: { + type: 'boolean', + label: 'Terminal Buffer', + category: 'UI', + requiresRestart: true, + default: true, + description: 'Use the new terminal buffer architecture for rendering.', + showInDialog: true, + }, useBackgroundColor: { type: 'boolean', label: 'Use Background Color', @@ -756,9 +794,9 @@ const SETTINGS_SCHEMA = { label: 'Loading Phrases', category: 'UI', requiresRestart: false, - default: 'tips', + default: 'off', description: - 'What to show while the model is working: tips, witty comments, both, or nothing.', + 'What to show while the model is working: tips, witty comments, all, or off.', showInDialog: true, options: [ { value: 'tips', label: 'Tips' }, @@ -1182,7 +1220,8 @@ const SETTINGS_SCHEMA = { category: 'Advanced', requiresRestart: true, default: undefined as string | undefined, - description: 'Model override for the visual agent.', + description: + "Model for the visual agent's analyze_screenshot tool. When set, enables the tool.", showInDialog: false, }, allowedDomains: { @@ -1488,7 +1527,7 @@ const SETTINGS_SCHEMA = { label: 'Show Color', category: 'Tools', requiresRestart: false, - default: false, + default: true, description: 'Show color in shell output.', showInDialog: true, }, @@ -1672,10 +1711,10 @@ const SETTINGS_SCHEMA = { type: 'boolean', label: 'Tool Sandboxing', category: 'Security', - requiresRestart: false, + requiresRestart: true, default: false, description: - 'Experimental tool-level sandboxing (implementation in progress).', + 'Tool-level sandboxing. Isolates individual tools instead of the entire CLI process.', showInDialog: true, }, disableYoloMode: { @@ -1867,7 +1906,7 @@ const SETTINGS_SCHEMA = { label: 'Auto Configure Max Old Space Size', category: 'Advanced', requiresRestart: true, - default: false, + default: true, description: 'Automatically configure Node.js memory limits', showInDialog: true, }, @@ -1913,54 +1952,22 @@ const SETTINGS_SCHEMA = { description: 'Setting to enable experimental features', showInDialog: false, properties: { - toolOutputMasking: { + adk: { type: 'object', - label: 'Tool Output Masking', + label: 'ADK', category: 'Experimental', requiresRestart: true, - ignoreInDocs: false, default: {}, - description: - 'Advanced settings for tool output masking to manage context window efficiency.', + description: 'Settings for the Agent Development Kit (ADK).', showInDialog: false, properties: { - enabled: { + agentSessionNoninteractiveEnabled: { type: 'boolean', - label: 'Enable Tool Output Masking', + label: 'Agent Session Non-interactive Enabled', category: 'Experimental', requiresRestart: true, - default: true, - description: 'Enables tool output masking to save tokens.', - showInDialog: true, - }, - toolProtectionThreshold: { - type: 'number', - label: 'Tool Protection Threshold', - category: 'Experimental', - requiresRestart: true, - default: 50000, - description: - 'Minimum number of tokens to protect from masking (most recent tool outputs).', - showInDialog: false, - }, - minPrunableTokensThreshold: { - type: 'number', - label: 'Min Prunable Tokens Threshold', - category: 'Experimental', - requiresRestart: true, - default: 30000, - description: - 'Minimum prunable tokens required to trigger a masking pass.', - showInDialog: false, - }, - protectLatestTurn: { - type: 'boolean', - label: 'Protect Latest Turn', - category: 'Experimental', - requiresRestart: true, - default: true, - description: - 'Ensures the absolute latest turn is never masked, regardless of token count.', + default: false, + description: 'Enable non-interactive agent sessions.', showInDialog: false, }, }, @@ -2036,7 +2043,7 @@ const SETTINGS_SCHEMA = { label: 'JIT Context Loading', category: 'Experimental', requiresRestart: true, - default: true, + default: false, description: 'Enable Just-In-Time (JIT) context loading.', showInDialog: false, }, @@ -2060,15 +2067,6 @@ const SETTINGS_SCHEMA = { 'Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it).', showInDialog: true, }, - plan: { - type: 'boolean', - label: 'Plan', - category: 'Experimental', - requiresRestart: true, - default: true, - description: 'Enable Plan Mode.', - showInDialog: true, - }, taskTracker: { type: 'boolean', label: 'Task Tracker', @@ -2169,44 +2167,23 @@ const SETTINGS_SCHEMA = { 'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.', showInDialog: true, }, - agentHistoryTruncation: { + generalistProfile: { type: 'boolean', - label: 'Agent History Truncation', + label: 'Use the generalist profile to manage agent contexts.', category: 'Experimental', requiresRestart: true, default: false, description: - 'Enable truncation window logic for the Agent History Provider.', + 'Suitable for general coding and software development tasks.', showInDialog: true, }, - agentHistoryTruncationThreshold: { - type: 'number', - label: 'Agent History Truncation Threshold', - category: 'Experimental', - requiresRestart: true, - default: 30, - description: - 'The maximum number of messages before history is truncated.', - showInDialog: true, - }, - agentHistoryRetainedMessages: { - type: 'number', - label: 'Agent History Retained Messages', - category: 'Experimental', - requiresRestart: true, - default: 15, - description: - 'The number of recent messages to retain after truncation.', - showInDialog: true, - }, - agentHistorySummarization: { + contextManagement: { type: 'boolean', - label: 'Agent History Summarization', + label: 'Enable Context Management', category: 'Experimental', requiresRestart: true, default: false, - description: - 'Enable summarization of truncated content via a small model for the Agent History Provider.', + description: 'Enable logic for context management.', showInDialog: true, }, topicUpdateNarration: { @@ -2485,6 +2462,171 @@ const SETTINGS_SCHEMA = { }, }, + contextManagement: { + type: 'object', + label: 'Context Management', + category: 'Experimental', + requiresRestart: true, + default: {}, + description: + 'Settings for agent history and tool distillation context management.', + showInDialog: false, + properties: { + historyWindow: { + type: 'object', + label: 'History Window Settings', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + maxTokens: { + type: 'number', + label: 'Max Tokens', + category: 'Context Management', + requiresRestart: true, + default: 150_000, + description: + 'The number of tokens to allow before triggering compression.', + showInDialog: false, + }, + retainedTokens: { + type: 'number', + label: 'Retained Tokens', + category: 'Context Management', + requiresRestart: true, + default: 40_000, + description: 'The number of tokens to always retain.', + showInDialog: false, + }, + }, + }, + messageLimits: { + type: 'object', + label: 'Message Limits', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + normalMaxTokens: { + type: 'number', + label: 'Normal Maximum Tokens', + category: 'Context Management', + requiresRestart: true, + default: 2500, + description: + 'The target number of tokens to budget for a normal conversation turn.', + showInDialog: false, + }, + retainedMaxTokens: { + type: 'number', + label: 'Retained Maximum Tokens', + category: 'Context Management', + requiresRestart: true, + default: 12000, + description: + 'The maximum number of tokens a single conversation turn can consume before truncation.', + showInDialog: false, + }, + normalizationHeadRatio: { + type: 'number', + label: 'Normalization Head Ratio', + category: 'Context Management', + requiresRestart: true, + default: 0.25, + description: + 'The ratio of tokens to retain from the beginning of a truncated message (0.0 to 1.0).', + showInDialog: false, + }, + }, + }, + tools: { + type: 'object', + label: 'Context Management Tools', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + distillation: { + type: 'object', + label: 'Tool Distillation', + category: 'Context Management', + requiresRestart: true, + default: {}, + showInDialog: false, + properties: { + maxOutputTokens: { + type: 'number', + label: 'Max Output Tokens', + category: 'Context Management', + requiresRestart: true, + default: 10_000, + description: + 'Maximum tokens to show to the model when truncating large tool outputs.', + showInDialog: false, + }, + summarizationThresholdTokens: { + type: 'number', + label: 'Tool Summarization Threshold', + category: 'Context Management', + requiresRestart: true, + default: 20_000, + description: + 'Threshold above which truncated tool outputs will be summarized by an LLM.', + showInDialog: false, + }, + }, + }, + outputMasking: { + type: 'object', + label: 'Tool Output Masking', + category: 'Context Management', + requiresRestart: true, + ignoreInDocs: false, + default: {}, + description: + 'Advanced settings for tool output masking to manage context window efficiency.', + showInDialog: false, + properties: { + protectionThresholdTokens: { + type: 'number', + label: 'Tool Protection Threshold (Tokens)', + category: 'Context Management', + requiresRestart: true, + default: 50_000, + description: + 'Minimum number of tokens to protect from masking (most recent tool outputs).', + showInDialog: false, + }, + minPrunableThresholdTokens: { + type: 'number', + label: 'Min Prunable Tokens Threshold', + category: 'Context Management', + requiresRestart: true, + default: 30_000, + description: + 'Minimum prunable tokens required to trigger a masking pass.', + showInDialog: false, + }, + protectLatestTurn: { + type: 'boolean', + label: 'Protect Latest Turn', + category: 'Context Management', + requiresRestart: true, + default: true, + description: + 'Ensures the absolute latest turn is never masked, regardless of token count.', + showInDialog: false, + }, + }, + }, + }, + }, + }, + }, + admin: { type: 'object', label: 'Admin', diff --git a/packages/cli/src/gemini_cleanup.test.tsx b/packages/cli/src/gemini_cleanup.test.tsx index b2fa2139fd..4bbc7e7648 100644 --- a/packages/cli/src/gemini_cleanup.test.tsx +++ b/packages/cli/src/gemini_cleanup.test.tsx @@ -327,6 +327,7 @@ describe('gemini.tsx main function cleanup', () => { refreshAuth: vi.fn(), getRemoteAdminSettings: vi.fn(() => undefined), getUseAlternateBuffer: vi.fn(() => false), + getUseTerminalBuffer: vi.fn(() => false), ...overrides, } as unknown as Config; } diff --git a/packages/cli/src/integration-tests/modelSteering.test.tsx b/packages/cli/src/integration-tests/modelSteering.test.tsx index bada268329..80640045a0 100644 --- a/packages/cli/src/integration-tests/modelSteering.test.tsx +++ b/packages/cli/src/integration-tests/modelSteering.test.tsx @@ -67,7 +67,7 @@ describe('Model Steering Integration', () => { // Then it should proceed with the next action await rig.waitForOutput( - /Since you want me to focus on .txt files,[\s\S]*I will read file1.txt/, + /Since you want me to focus on \.txt[\s\S]*files,[\s\S]*I will read file1\.txt/, ); await rig.waitForOutput('ReadFile'); diff --git a/packages/cli/src/interactiveCli.tsx b/packages/cli/src/interactiveCli.tsx index a6337ef29c..418f58b193 100644 --- a/packages/cli/src/interactiveCli.tsx +++ b/packages/cli/src/interactiveCli.tsx @@ -43,9 +43,9 @@ import { KeypressProvider } from './ui/contexts/KeypressContext.js'; import { useKittyKeyboardProtocol } from './ui/hooks/useKittyKeyboardProtocol.js'; import { ScrollProvider } from './ui/contexts/ScrollProvider.js'; import { TerminalProvider } from './ui/contexts/TerminalContext.js'; -import { isAlternateBufferEnabled } from './ui/hooks/useAlternateBuffer.js'; import { OverflowProvider } from './ui/contexts/OverflowContext.js'; import { profiler } from './ui/components/DebugProfiler.js'; +import { initializeConsoleStore } from './ui/hooks/useConsoleMessages.js'; const SLOW_RENDER_MS = 200; @@ -57,12 +57,13 @@ export async function startInteractiveUI( resumedSessionData: ResumedSessionData | undefined, initializationResult: InitializationResult, ) { + initializeConsoleStore(); // Never enter Ink alternate buffer mode when screen reader mode is enabled // as there is no benefit of alternate buffer mode when using a screen reader // and the Ink alternate buffer mode requires line wrapping harmful to // screen readers. const useAlternateBuffer = shouldEnterAlternateScreen( - isAlternateBufferEnabled(config), + config.getUseAlternateBuffer(), config.getScreenReader(), ); const mouseEventsEnabled = useAlternateBuffer; @@ -131,7 +132,6 @@ export async function startInteractiveUI( // Wait a moment for shpool to stabilize terminal size and state. await new Promise((resolve) => setTimeout(resolve, 100)); } - const instance = render( process.env['DEBUG'] ? ( @@ -152,8 +152,12 @@ export async function startInteractiveUI( } profiler.reportFrameRendered(); }, + standardReactLayoutTiming: + useAlternateBuffer || config.getUseTerminalBuffer(), patchConsole: false, alternateBuffer: useAlternateBuffer, + renderProcess: config.getUseRenderProcess(), + terminalBuffer: config.getUseTerminalBuffer(), incrementalRendering: settings.merged.ui.incrementalRendering !== false && useAlternateBuffer && diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 4e45b0f188..855707de9e 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -166,7 +166,7 @@ describe('runNonInteractive', () => { }; mockConfig = { - initialize: vi.fn().mockResolvedValue(undefined), + initialize: vi.fn().mockReturnValue(Promise.resolve(undefined)), getMessageBus: vi.fn().mockReturnValue({ subscribe: vi.fn(), unsubscribe: vi.fn(), @@ -190,6 +190,7 @@ describe('runNonInteractive', () => { isTrustedFolder: vi.fn().mockReturnValue(false), getRawOutput: vi.fn().mockReturnValue(false), getAcceptRawOutputRisk: vi.fn().mockReturnValue(false), + getAgentSessionNoninteractiveEnabled: vi.fn().mockReturnValue(false), } as unknown as Config; mockSettings = { @@ -1712,7 +1713,7 @@ describe('runNonInteractive', () => { input, prompt_id: promptId, }); - } catch (_error) { + } catch { // Expected exit } diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index 4f9d817204..26daaf66a1 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -46,6 +46,7 @@ import { handleMaxTurnsExceededError, } from './utils/errors.js'; import { TextOutput } from './ui/utils/textOutput.js'; +import { runNonInteractive as runNonInteractiveAgentSession } from './nonInteractiveCliAgentSession.js'; interface RunNonInteractiveParams { config: Config; @@ -55,13 +56,16 @@ interface RunNonInteractiveParams { resumedSessionData?: ResumedSessionData; } -export async function runNonInteractive({ - config, - settings, - input, - prompt_id, - resumedSessionData, -}: RunNonInteractiveParams): Promise { +export async function runNonInteractive( + params: RunNonInteractiveParams, +): Promise { + const useAgentSession = params.config.getAgentSessionNoninteractiveEnabled(); + if (useAgentSession) { + return runNonInteractiveAgentSession(params); + } + + const { config, settings, input, prompt_id, resumedSessionData } = params; + return promptIdContext.run(prompt_id, async () => { const consolePatcher = new ConsolePatcher({ stderr: true, diff --git a/packages/cli/src/nonInteractiveCliAgentSession.test.ts b/packages/cli/src/nonInteractiveCliAgentSession.test.ts new file mode 100644 index 0000000000..617f80aca6 --- /dev/null +++ b/packages/cli/src/nonInteractiveCliAgentSession.test.ts @@ -0,0 +1,2436 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + Config, + ToolRegistry, + ServerGeminiStreamEvent, + SessionMetrics, + AnyDeclarativeTool, + AnyToolInvocation, + UserFeedbackPayload, +} from '@google/gemini-cli-core'; +import { + ToolErrorType, + GeminiEventType, + OutputFormat, + uiTelemetryService, + FatalInputError, + CoreEvent, + CoreToolCallStatus, +} from '@google/gemini-cli-core'; +import type { Part } from '@google/genai'; +import { runNonInteractive } from './nonInteractiveCliAgentSession.js'; +import { + describe, + it, + expect, + beforeEach, + afterEach, + vi, + type Mock, + type MockInstance, +} from 'vitest'; +import type { LoadedSettings } from './config/settings.js'; + +// Mock core modules +vi.mock('./ui/hooks/atCommandProcessor.js'); + +const mockSetupInitialActivityLogger = vi.hoisted(() => vi.fn()); +vi.mock('./utils/devtoolsService.js', () => ({ + setupInitialActivityLogger: mockSetupInitialActivityLogger, +})); + +const mockCoreEvents = vi.hoisted(() => ({ + on: vi.fn(), + off: vi.fn(), + emit: vi.fn(), + emitConsoleLog: vi.fn(), + emitFeedback: vi.fn(), + drainBacklogs: vi.fn(), +})); + +const mockSchedulerSchedule = vi.hoisted(() => vi.fn()); + +vi.mock('@google/gemini-cli-core', async (importOriginal) => { + const original = + await importOriginal(); + + class MockChatRecordingService { + initialize = vi.fn(); + recordMessage = vi.fn(); + recordMessageTokens = vi.fn(); + recordToolCalls = vi.fn(); + } + + return { + ...original, + Scheduler: class { + schedule = mockSchedulerSchedule; + cancelAll = vi.fn(); + }, + isTelemetrySdkInitialized: vi.fn().mockReturnValue(true), + ChatRecordingService: MockChatRecordingService, + uiTelemetryService: { + getMetrics: vi.fn(), + }, + LegacyAgentSession: original.LegacyAgentSession, + geminiPartsToContentParts: original.geminiPartsToContentParts, + coreEvents: mockCoreEvents, + createWorkingStdio: vi.fn(() => ({ + stdout: process.stdout, + stderr: process.stderr, + })), + }; +}); + +const mockGetCommands = vi.hoisted(() => vi.fn()); +const mockCommandServiceCreate = vi.hoisted(() => vi.fn()); +vi.mock('./services/CommandService.js', () => ({ + CommandService: { + create: mockCommandServiceCreate, + }, +})); + +vi.mock('./services/FileCommandLoader.js'); +vi.mock('./services/McpPromptLoader.js'); +vi.mock('./services/BuiltinCommandLoader.js'); + +describe('runNonInteractive', () => { + let mockConfig: Config; + let mockSettings: LoadedSettings; + let mockToolRegistry: ToolRegistry; + let consoleErrorSpy: MockInstance; + let processStdoutSpy: MockInstance; + let processStderrSpy: MockInstance; + let mockGeminiClient: { + sendMessageStream: Mock; + resumeChat: Mock; + getChatRecordingService: Mock; + getChat: Mock; + getCurrentSequenceModel: Mock; + }; + const MOCK_SESSION_METRICS: SessionMetrics = { + models: {}, + tools: { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { + accept: 0, + reject: 0, + modify: 0, + auto_accept: 0, + }, + byName: {}, + }, + files: { + totalLinesAdded: 0, + totalLinesRemoved: 0, + }, + }; + + beforeEach(async () => { + mockSchedulerSchedule.mockReset(); + + mockCommandServiceCreate.mockResolvedValue({ + getCommands: mockGetCommands, + }); + + consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + processStdoutSpy = vi + .spyOn(process.stdout, 'write') + .mockImplementation(() => true); + vi.spyOn(process.stdout, 'on').mockImplementation(() => process.stdout); + processStderrSpy = vi + .spyOn(process.stderr, 'write') + .mockImplementation(() => true); + vi.spyOn(process, 'exit').mockImplementation((code) => { + throw new Error(`process.exit(${code}) called`); + }); + + mockToolRegistry = { + getTool: vi.fn(), + getFunctionDeclarations: vi.fn().mockReturnValue([]), + } as unknown as ToolRegistry; + + mockGeminiClient = { + sendMessageStream: vi.fn(), + resumeChat: vi.fn().mockResolvedValue(undefined), + getChatRecordingService: vi.fn(() => ({ + initialize: vi.fn(), + recordMessage: vi.fn(), + recordMessageTokens: vi.fn(), + recordToolCalls: vi.fn(), + })), + getChat: vi.fn(() => ({ recordCompletedToolCalls: vi.fn() })), + getCurrentSequenceModel: vi.fn().mockReturnValue(null), + }; + + mockConfig = { + initialize: vi.fn().mockReturnValue(Promise.resolve(undefined)), + getMessageBus: vi.fn().mockReturnValue({ + subscribe: vi.fn(), + unsubscribe: vi.fn(), + publish: vi.fn(), + }), + getGeminiClient: vi.fn().mockReturnValue(mockGeminiClient), + getToolRegistry: vi.fn().mockReturnValue(mockToolRegistry), + getMaxSessionTurns: vi.fn().mockReturnValue(10), + getSessionId: vi.fn().mockReturnValue('test-session-id'), + getProjectRoot: vi.fn().mockReturnValue('/test/project'), + storage: { + getProjectTempDir: vi.fn().mockReturnValue('/test/project/.gemini/tmp'), + }, + getIdeMode: vi.fn().mockReturnValue(false), + + getContentGeneratorConfig: vi.fn().mockReturnValue({}), + getDebugMode: vi.fn().mockReturnValue(false), + getOutputFormat: vi.fn().mockReturnValue('text'), + getModel: vi.fn().mockReturnValue('test-model'), + getFolderTrust: vi.fn().mockReturnValue(false), + isTrustedFolder: vi.fn().mockReturnValue(false), + getRawOutput: vi.fn().mockReturnValue(false), + getAcceptRawOutputRisk: vi.fn().mockReturnValue(false), + getAgentSessionNoninteractiveEnabled: vi.fn().mockReturnValue(false), + } as unknown as Config; + + mockSettings = { + system: { path: '', settings: {} }, + systemDefaults: { path: '', settings: {} }, + user: { path: '', settings: {} }, + workspace: { path: '', settings: {} }, + errors: [], + setValue: vi.fn(), + merged: { + security: { + auth: { + enforcedType: undefined, + }, + }, + }, + isTrusted: true, + migratedInMemoryScopes: new Set(), + forScope: vi.fn(), + computeMergedSettings: vi.fn(), + } as unknown as LoadedSettings; + + const { handleAtCommand } = await import( + './ui/hooks/atCommandProcessor.js' + ); + vi.mocked(handleAtCommand).mockImplementation(async ({ query }) => ({ + processedQuery: [{ text: query }], + })); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + async function* createStreamFromEvents( + events: ServerGeminiStreamEvent[], + ): AsyncGenerator { + for (const event of events) { + yield event; + } + } + + const getWrittenOutput = () => + processStdoutSpy.mock.calls.map((c) => c[0]).join(''); + + it('should process input and write text output', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello' }, + { type: GeminiEventType.Content, value: ' World' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-1', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Test input' }], + expect.any(AbortSignal), + 'prompt-id-1', + undefined, + false, + 'Test input', + ); + expect(getWrittenOutput()).toBe('Hello World\n'); + // Note: Telemetry shutdown is now handled in runExitCleanup() in cleanup.ts + // so we no longer expect shutdownTelemetry to be called directly here + }); + + it('should stream the specific stream started by send', async () => { + const { LegacyAgentSession } = await import('@google/gemini-cli-core'); + const streamSpy = vi.spyOn(LegacyAgentSession.prototype, 'stream'); + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello again' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-stream', + }); + + expect(streamSpy).toHaveBeenCalledWith({ streamId: expect.any(String) }); + }); + + it('fails fast if the session acknowledges a message send without a stream', async () => { + const { LegacyAgentSession } = await import('@google/gemini-cli-core'); + const sendSpy = vi + .spyOn(LegacyAgentSession.prototype, 'send') + .mockResolvedValue({ streamId: null }); + const streamSpy = vi.spyOn(LegacyAgentSession.prototype, 'stream'); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-null-stream', + }), + ).rejects.toThrow( + 'LegacyAgentSession.send() unexpectedly returned no stream for a message send.', + ); + + expect(streamSpy).not.toHaveBeenCalled(); + + sendSpy.mockRestore(); + streamSpy.mockRestore(); + }); + + it('should register activity logger when GEMINI_CLI_ACTIVITY_LOG_TARGET is set', async () => { + vi.stubEnv('GEMINI_CLI_ACTIVITY_LOG_TARGET', '/tmp/test.jsonl'); + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-activity-logger', + }); + + expect(mockSetupInitialActivityLogger).toHaveBeenCalledWith(mockConfig); + vi.unstubAllEnvs(); + }); + + it('should not register activity logger when GEMINI_CLI_ACTIVITY_LOG_TARGET is not set', async () => { + vi.stubEnv('GEMINI_CLI_ACTIVITY_LOG_TARGET', ''); + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-activity-logger-off', + }); + + expect(mockSetupInitialActivityLogger).not.toHaveBeenCalled(); + vi.unstubAllEnvs(); + }); + + it('should handle a single tool call and respond', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-2', + }, + }; + const toolResponse: Part[] = [{ text: 'Tool response' }]; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-2', + }, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: toolResponse, + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [toolCallEvent]; + const secondCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondCallEvents)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Use a tool', + prompt_id: 'prompt-id-2', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2); + expect(mockSchedulerSchedule).toHaveBeenCalledWith( + [expect.objectContaining({ name: 'testTool' })], + expect.any(AbortSignal), + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith( + 2, + [{ text: 'Tool response' }], + expect.any(AbortSignal), + 'prompt-id-2', + undefined, + false, + undefined, + ); + expect(getWrittenOutput()).toBe('Final answer\n'); + }); + + it('should write a single newline between sequential text outputs from the model', async () => { + // This test simulates a multi-turn conversation to ensure that a single newline + // is printed between each block of text output from the model. + + // 1. Define the tool requests that the model will ask the CLI to run. + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'mock-tool', + name: 'mockTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-multi', + }, + }; + + // 2. Mock the execution of the tools. We just need them to succeed. + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, // This is generic enough for both calls + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [], + callId: 'mock-tool', + }, + }, + ]); + + // 3. Define the sequence of events streamed from the mock model. + // Turn 1: Model outputs text, then requests a tool call. + const modelTurn1: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Use mock tool' }, + toolCallEvent, + ]; + // Turn 2: Model outputs more text, then requests another tool call. + const modelTurn2: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Use mock tool again' }, + toolCallEvent, + ]; + // Turn 3: Model outputs a final answer. + const modelTurn3: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Finished.' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(modelTurn1)) + .mockReturnValueOnce(createStreamFromEvents(modelTurn2)) + .mockReturnValueOnce(createStreamFromEvents(modelTurn3)); + + // 4. Run the command. + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Use mock tool multiple times', + prompt_id: 'prompt-id-multi', + }); + + // 5. Verify the output. + // The rendered output should contain the text from each turn, separated by a + // single newline, with a final newline at the end. + expect(getWrittenOutput()).toMatchSnapshot(); + + // Also verify the tools were called as expected. + expect(mockSchedulerSchedule).toHaveBeenCalledTimes(2); + }); + + it('should handle error during tool execution and should send error back to the model', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'errorTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-3', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: { + callId: 'tool-1', + name: 'errorTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-3', + }, + tool: {} as AnyDeclarativeTool, + response: { + callId: 'tool-1', + error: new Error('Execution failed'), + errorType: ToolErrorType.EXECUTION_FAILED, + responseParts: [ + { + functionResponse: { + name: 'errorTool', + response: { + output: 'Error: Execution failed', + }, + }, + }, + ], + resultDisplay: 'Execution failed', + contentLength: undefined, + }, + }, + ]); + const finalResponse: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Content, + value: 'Sorry, let me try again.', + }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents([toolCallEvent])) + .mockReturnValueOnce(createStreamFromEvents(finalResponse)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Trigger tool error', + prompt_id: 'prompt-id-3', + }); + + expect(mockSchedulerSchedule).toHaveBeenCalled(); + expect(consoleErrorSpy).toHaveBeenCalledWith( + 'Error executing tool errorTool: Execution failed', + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2); + expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith( + 2, + [ + { + functionResponse: { + name: 'errorTool', + response: { + output: 'Error: Execution failed', + }, + }, + }, + ], + expect.any(AbortSignal), + 'prompt-id-3', + undefined, + false, + undefined, + ); + expect(getWrittenOutput()).toBe('Sorry, let me try again.\n'); + }); + + it('should exit with error if sendMessageStream throws initially', async () => { + const apiError = new Error('API connection failed'); + mockGeminiClient.sendMessageStream.mockImplementation(() => { + throw apiError; + }); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Initial fail', + prompt_id: 'prompt-id-4', + }), + ).rejects.toThrow('API connection failed'); + }); + + it('should not exit if a tool is not found, and should send error back to model', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'nonexistentTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-5', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: { + callId: 'tool-1', + name: 'nonexistentTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-5', + }, + response: { + callId: 'tool-1', + error: new Error('Tool "nonexistentTool" not found in registry.'), + resultDisplay: 'Tool "nonexistentTool" not found in registry.', + responseParts: [], + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + const finalResponse: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Content, + value: "Sorry, I can't find that tool.", + }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents([toolCallEvent])) + .mockReturnValueOnce(createStreamFromEvents(finalResponse)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Trigger tool not found', + prompt_id: 'prompt-id-5', + }); + + expect(mockSchedulerSchedule).toHaveBeenCalled(); + expect(consoleErrorSpy).toHaveBeenCalledWith( + 'Error executing tool nonexistentTool: Tool "nonexistentTool" not found in registry.', + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2); + expect(getWrittenOutput()).toBe("Sorry, I can't find that tool.\n"); + }); + + it('should exit when max session turns are exceeded', async () => { + vi.mocked(mockConfig.getMaxSessionTurns).mockReturnValue(0); + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Trigger loop', + prompt_id: 'prompt-id-6', + }), + ).rejects.toThrow('Reached max session turns for this session'); + }); + + it('should preprocess @include commands before sending to the model', async () => { + // 1. Mock the imported atCommandProcessor + const { handleAtCommand } = await import( + './ui/hooks/atCommandProcessor.js' + ); + const mockHandleAtCommand = vi.mocked(handleAtCommand); + + // 2. Define the raw input and the expected processed output + const rawInput = 'Summarize @file.txt'; + const processedParts: Part[] = [ + { text: 'Summarize @file.txt' }, + { text: '\n--- Content from referenced files ---\n' }, + { text: 'This is the content of the file.' }, + { text: '\n--- End of content ---' }, + ]; + + // 3. Setup the mock to return the processed parts + mockHandleAtCommand.mockResolvedValue({ + processedQuery: processedParts, + }); + + // Mock a simple stream response from the Gemini client + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Summary complete.' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + // 4. Run the non-interactive mode with the raw input + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: rawInput, + prompt_id: 'prompt-id-7', + }); + + // 5. Assert that sendMessageStream was called with the PROCESSED parts, not the raw input + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + processedParts, + expect.any(AbortSignal), + 'prompt-id-7', + undefined, + false, + rawInput, + ); + + // 6. Assert the final output is correct + expect(getWrittenOutput()).toBe('Summary complete.\n'); + }); + + it('should process input and write JSON output with stats', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello World' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-1', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Test input' }], + expect.any(AbortSignal), + 'prompt-id-1', + undefined, + false, + 'Test input', + ); + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: 'Hello World', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should write JSON output with stats for tool-only commands (no text response)', async () => { + // Test the scenario where a command completes successfully with only tool calls + // but no text response - this would have caught the original bug + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-tool-only', + }, + }; + const toolResponse: Part[] = [{ text: 'Tool executed successfully' }]; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-tool-only', + }, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: toolResponse, + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + // First call returns only tool call, no content + const firstCallEvents: ServerGeminiStreamEvent[] = [ + toolCallEvent, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + + // Second call returns no content (tool-only completion) + const secondCallEvents: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 3 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondCallEvents)); + + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Execute tool only', + prompt_id: 'prompt-id-tool-only', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(2); + expect(mockSchedulerSchedule).toHaveBeenCalledWith( + [expect.objectContaining({ name: 'testTool' })], + expect.any(AbortSignal), + ); + + // This should output JSON with empty response but include stats + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: '', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should keep only the final post-tool assistant text in JSON output', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-json-tool-text', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [{ text: 'Tool executed successfully' }], + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Let me check that...' }, + toolCallEvent, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]), + ) + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 3 } }, + }, + ]), + ); + + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Use a tool', + prompt_id: 'prompt-id-json-tool-text', + }); + + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: 'Final answer', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should write JSON output with stats for empty response commands', async () => { + // Test the scenario where a command completes but produces no content at all + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 1 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Empty response test', + prompt_id: 'prompt-id-empty', + }); + + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Empty response test' }], + expect.any(AbortSignal), + 'prompt-id-empty', + undefined, + false, + 'Empty response test', + ); + + // This should output JSON with empty response but include stats + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: '', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should handle errors in JSON format', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + const testError = new Error('Invalid input provided'); + + mockGeminiClient.sendMessageStream.mockImplementation(() => { + throw testError; + }); + + let thrownError: Error | null = null; + try { + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-error', + }); + // Should not reach here + expect.fail('Expected process.exit to be called'); + } catch (error) { + thrownError = error as Error; + } + + // Should throw because of mocked process.exit + expect(thrownError?.message).toBe('process.exit(1) called'); + + expect(mockCoreEvents.emitFeedback).toHaveBeenCalledWith( + 'error', + JSON.stringify( + { + session_id: 'test-session-id', + error: { + type: 'Error', + message: 'Invalid input provided', + code: 1, + }, + }, + null, + 2, + ), + ); + }); + + it('should handle FatalInputError with custom exit code in JSON format', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + const fatalError = new FatalInputError('Invalid command syntax provided'); + + mockGeminiClient.sendMessageStream.mockImplementation(() => { + throw fatalError; + }); + + let thrownError: Error | null = null; + try { + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Invalid syntax', + prompt_id: 'prompt-id-fatal', + }); + // Should not reach here + expect.fail('Expected process.exit to be called'); + } catch (error) { + thrownError = error as Error; + } + + // Should throw because of mocked process.exit with custom exit code + expect(thrownError?.message).toBe('process.exit(42) called'); + + expect(mockCoreEvents.emitFeedback).toHaveBeenCalledWith( + 'error', + JSON.stringify( + { + session_id: 'test-session-id', + error: { + type: 'FatalInputError', + message: 'Invalid command syntax provided', + code: 42, + }, + }, + null, + 2, + ), + ); + }); + + it('should execute a slash command that returns a prompt', async () => { + const mockCommand = { + name: 'testcommand', + description: 'a test command', + action: vi.fn().mockResolvedValue({ + type: 'submit_prompt', + content: [{ text: 'Prompt from command' }], + }), + }; + mockGetCommands.mockReturnValue([mockCommand]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Response from command' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/testcommand', + prompt_id: 'prompt-id-slash', + }); + + // Ensure the prompt sent to the model is from the command, not the raw input + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Prompt from command' }], + expect.any(AbortSignal), + 'prompt-id-slash', + undefined, + false, + '/testcommand', + ); + + expect(getWrittenOutput()).toBe('Response from command\n'); + }); + + it('should handle slash commands', async () => { + const nonInteractiveCliCommands = await import( + './nonInteractiveCliCommands.js' + ); + const handleSlashCommandSpy = vi.spyOn( + nonInteractiveCliCommands, + 'handleSlashCommand', + ); + handleSlashCommandSpy.mockResolvedValue([{ text: 'Slash command output' }]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Response to slash command' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/help', + prompt_id: 'prompt-id-slash', + }); + + expect(handleSlashCommandSpy).toHaveBeenCalledWith( + '/help', + expect.any(AbortController), + mockConfig, + mockSettings, + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'Slash command output' }], + expect.any(AbortSignal), + 'prompt-id-slash', + undefined, + false, + '/help', + ); + expect(getWrittenOutput()).toBe('Response to slash command\n'); + handleSlashCommandSpy.mockRestore(); + }); + + it('should handle cancellation (Ctrl+C)', async () => { + // Mock isTTY and setRawMode safely + const originalIsTTY = process.stdin.isTTY; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const originalSetRawMode = (process.stdin as any).setRawMode; + + Object.defineProperty(process.stdin, 'isTTY', { + value: true, + configurable: true, + }); + if (!originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = vi.fn(); + } + + const stdinOnSpy = vi + .spyOn(process.stdin, 'on') + .mockImplementation(() => process.stdin); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + vi.spyOn(process.stdin as any, 'setRawMode').mockImplementation(() => true); + vi.spyOn(process.stdin, 'resume').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'pause').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'removeAllListeners').mockImplementation( + () => process.stdin, + ); + + // Cancellation will throw FatalCancellationError directly + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Thinking...' }, + ]; + // Create a stream that responds to abortion + mockGeminiClient.sendMessageStream.mockImplementation( + (_messages, signal: AbortSignal) => + (async function* () { + yield events[0]; + await new Promise((resolve, reject) => { + const timeout = setTimeout(resolve, 1000); + signal.addEventListener('abort', () => { + clearTimeout(timeout); + setTimeout(() => { + reject(new Error('Aborted')); + }, 300); + }); + }); + })(), + ); + + const runPromise = runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Long running query', + prompt_id: 'prompt-id-cancel', + }); + + // Wait a bit for setup to complete and listeners to be registered + await new Promise((resolve) => setTimeout(resolve, 100)); + + // Find the keypress handler registered by runNonInteractive + const keypressCall = stdinOnSpy.mock.calls.find( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (call) => (call[0] as any) === 'keypress', + ); + expect(keypressCall).toBeDefined(); + const keypressHandler = keypressCall?.[1] as ( + str: string, + key: { name?: string; ctrl?: boolean }, + ) => void; + + if (keypressHandler) { + // Simulate Ctrl+C + keypressHandler('\u0003', { ctrl: true, name: 'c' }); + } + + await expect(runPromise).rejects.toThrow('Operation cancelled.'); + + expect( + processStderrSpy.mock.calls.some( + // eslint-disable-next-line no-restricted-syntax + (call) => typeof call[0] === 'string' && call[0].includes('Cancelling'), + ), + ).toBe(true); + + // Restore original values + Object.defineProperty(process.stdin, 'isTTY', { + value: originalIsTTY, + configurable: true, + }); + if (originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = originalSetRawMode; + } else { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + delete (process.stdin as any).setRawMode; + } + // Spies are automatically restored by vi.restoreAllMocks() in afterEach, + // but we can also do it manually if needed. + }); + + it('should honor cancellation that happens before session.send()', async () => { + const originalIsTTY = process.stdin.isTTY; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const originalSetRawMode = (process.stdin as any).setRawMode; + + Object.defineProperty(process.stdin, 'isTTY', { + value: true, + configurable: true, + }); + if (!originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = vi.fn(); + } + + const stdinOnSpy = vi + .spyOn(process.stdin, 'on') + .mockImplementation( + (event: string | symbol, listener: (...args: unknown[]) => void) => { + if (event === 'keypress') { + listener('\u0003', { ctrl: true, name: 'c' }); + } + return process.stdin; + }, + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + vi.spyOn(process.stdin as any, 'setRawMode').mockImplementation(() => true); + vi.spyOn(process.stdin, 'resume').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'pause').mockImplementation(() => process.stdin); + vi.spyOn(process.stdin, 'removeAllListeners').mockImplementation( + () => process.stdin, + ); + + // Cancellation will throw FatalCancellationError directly + + const { LegacyAgentSession } = await import('@google/gemini-cli-core'); + const sendSpy = vi.spyOn(LegacyAgentSession.prototype, 'send'); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Cancelled query', + prompt_id: 'prompt-id-pre-send-cancel', + }), + ).rejects.toThrow('Operation cancelled.'); + + expect(sendSpy).not.toHaveBeenCalled(); + expect(stdinOnSpy).toHaveBeenCalled(); + sendSpy.mockRestore(); + + Object.defineProperty(process.stdin, 'isTTY', { + value: originalIsTTY, + configurable: true, + }); + if (originalSetRawMode) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (process.stdin as any).setRawMode = originalSetRawMode; + } else { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + delete (process.stdin as any).setRawMode; + } + }); + + it('should throw FatalInputError if a command requires confirmation', async () => { + const mockCommand = { + name: 'confirm', + description: 'a command that needs confirmation', + action: vi.fn().mockResolvedValue({ + type: 'confirm_shell_commands', + commands: ['rm -rf /'], + }), + }; + mockGetCommands.mockReturnValue([mockCommand]); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/confirm', + prompt_id: 'prompt-id-confirm', + }), + ).rejects.toThrow( + 'Exiting due to a confirmation prompt requested by the command.', + ); + }); + + it('should treat an unknown slash command as a regular prompt', async () => { + // No commands are mocked, so any slash command is "unknown" + mockGetCommands.mockReturnValue([]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Response to unknown' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/unknowncommand', + prompt_id: 'prompt-id-unknown', + }); + + // Ensure the raw input is sent to the model + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: '/unknowncommand' }], + expect.any(AbortSignal), + 'prompt-id-unknown', + undefined, + false, + '/unknowncommand', + ); + + expect(getWrittenOutput()).toBe('Response to unknown\n'); + }); + + it('should throw for unhandled command result types', async () => { + const mockCommand = { + name: 'noaction', + description: 'unhandled type', + action: vi.fn().mockResolvedValue({ + type: 'unhandled', + }), + }; + mockGetCommands.mockReturnValue([mockCommand]); + + await expect( + runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/noaction', + prompt_id: 'prompt-id-unhandled', + }), + ).rejects.toThrow( + 'Exiting due to command result that is not supported in non-interactive mode.', + ); + }); + + it('should pass arguments to the slash command action', async () => { + const mockAction = vi.fn().mockResolvedValue({ + type: 'submit_prompt', + content: [{ text: 'Prompt from command' }], + }); + const mockCommand = { + name: 'testargs', + description: 'a test command', + action: mockAction, + }; + mockGetCommands.mockReturnValue([mockCommand]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Acknowledged' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 1 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/testargs arg1 arg2', + prompt_id: 'prompt-id-args', + }); + + expect(mockAction).toHaveBeenCalledWith(expect.any(Object), 'arg1 arg2'); + + expect(getWrittenOutput()).toBe('Acknowledged\n'); + }); + + it('should instantiate CommandService with correct loaders for slash commands', async () => { + // This test indirectly checks that handleSlashCommand is using the right loaders. + const { FileCommandLoader } = await import( + './services/FileCommandLoader.js' + ); + const { McpPromptLoader } = await import('./services/McpPromptLoader.js'); + const { BuiltinCommandLoader } = await import( + './services/BuiltinCommandLoader.js' + ); + mockGetCommands.mockReturnValue([]); // No commands found, so it will fall through + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Acknowledged' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 1 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: '/mycommand', + prompt_id: 'prompt-id-loaders', + }); + + // Check that loaders were instantiated with the config + expect(FileCommandLoader).toHaveBeenCalledTimes(1); + expect(FileCommandLoader).toHaveBeenCalledWith(mockConfig); + expect(McpPromptLoader).toHaveBeenCalledTimes(1); + expect(McpPromptLoader).toHaveBeenCalledWith(mockConfig); + expect(BuiltinCommandLoader).toHaveBeenCalledWith(mockConfig); + + // Check that instances were passed to CommandService.create + expect(mockCommandServiceCreate).toHaveBeenCalledTimes(1); + const loadersArg = mockCommandServiceCreate.mock.calls[0][0]; + expect(loadersArg).toHaveLength(3); + expect(loadersArg[0]).toBe( + vi.mocked(BuiltinCommandLoader).mock.instances[0], + ); + expect(loadersArg[1]).toBe(vi.mocked(McpPromptLoader).mock.instances[0]); + expect(loadersArg[2]).toBe(vi.mocked(FileCommandLoader).mock.instances[0]); + }); + + it('should allow a normally-excluded tool when --allowed-tools is set', async () => { + // By default, ShellTool is excluded in non-interactive mode. + // This test ensures that --allowed-tools overrides this exclusion. + vi.mocked(mockConfig.getToolRegistry).mockReturnValue({ + getTool: vi.fn().mockReturnValue({ + name: 'ShellTool', + description: 'A shell tool', + run: vi.fn(), + }), + getFunctionDeclarations: vi.fn().mockReturnValue([{ name: 'ShellTool' }]), + } as unknown as ToolRegistry); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-shell-1', + name: 'ShellTool', + args: { command: 'ls' }, + isClientInitiated: false, + prompt_id: 'prompt-id-allowed', + }, + }; + const toolResponse: Part[] = [{ text: 'file.txt' }]; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: { + callId: 'tool-shell-1', + name: 'ShellTool', + args: { command: 'ls' }, + isClientInitiated: false, + prompt_id: 'prompt-id-allowed', + }, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: toolResponse, + callId: 'tool-shell-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [toolCallEvent]; + const secondCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'file.txt' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondCallEvents)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'List the files', + prompt_id: 'prompt-id-allowed', + }); + + expect(mockSchedulerSchedule).toHaveBeenCalledWith( + [expect.objectContaining({ name: 'ShellTool' })], + expect.any(AbortSignal), + ); + expect(getWrittenOutput()).toBe('file.txt\n'); + }); + + describe('CoreEvents Integration', () => { + it('subscribes to UserFeedback and drains backlog on start', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-events', + }); + + expect(mockCoreEvents.on).toHaveBeenCalledWith( + CoreEvent.UserFeedback, + expect.any(Function), + ); + expect(mockCoreEvents.drainBacklogs).toHaveBeenCalledTimes(1); + }); + + it('unsubscribes from UserFeedback on finish', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-events', + }); + + expect(mockCoreEvents.off).toHaveBeenCalledWith( + CoreEvent.UserFeedback, + expect.any(Function), + ); + }); + + it('logs to process.stderr when UserFeedback event is received', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-events', + }); + + // Get the registered handler + const handler = mockCoreEvents.on.mock.calls.find( + (call: unknown[]) => call[0] === CoreEvent.UserFeedback, + )?.[1]; + expect(handler).toBeDefined(); + + // Simulate an event + const payload: UserFeedbackPayload = { + severity: 'error', + message: 'Test error message', + }; + handler(payload); + + expect(processStderrSpy).toHaveBeenCalledWith( + '[ERROR] Test error message\n', + ); + }); + + it('logs optional error object to process.stderr in debug mode', async () => { + vi.mocked(mockConfig.getDebugMode).mockReturnValue(true); + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test', + prompt_id: 'prompt-id-events', + }); + + // Get the registered handler + const handler = mockCoreEvents.on.mock.calls.find( + (call: unknown[]) => call[0] === CoreEvent.UserFeedback, + )?.[1]; + expect(handler).toBeDefined(); + + // Simulate an event with error object + const errorObj = new Error('Original error'); + // Mock stack for deterministic testing + errorObj.stack = 'Error: Original error\n at test'; + const payload: UserFeedbackPayload = { + severity: 'warning', + message: 'Test warning message', + error: errorObj, + }; + handler(payload); + + expect(processStderrSpy).toHaveBeenCalledWith( + '[WARNING] Test warning message\n', + ); + expect(processStderrSpy).toHaveBeenCalledWith( + 'Error: Original error\n at test\n', + ); + }); + }); + + it('should emit appropriate events for streaming JSON output', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-stream', + }, + }; + + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [{ text: 'Tool response' }], + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + resultDisplay: 'Tool executed successfully', + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Thinking...' }, + toolCallEvent, + ]; + const secondCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondCallEvents)); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Stream test', + prompt_id: 'prompt-id-stream', + }); + + const output = getWrittenOutput(); + const sanitizedOutput = output + .replace(/"timestamp":"[^"]+"/g, '"timestamp":""') + .replace(/"duration_ms":\d+/g, '"duration_ms":'); + expect(sanitizedOutput).toMatchSnapshot(); + }); + + it('should handle EPIPE error gracefully', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello' }, + { type: GeminiEventType.Content, value: ' World' }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + // Mock process.exit to track calls without throwing + vi.spyOn(process, 'exit').mockImplementation((_code) => undefined as never); + + // Simulate EPIPE error on stdout + const stdoutErrorCallback = (process.stdout.on as Mock).mock.calls.find( + (call) => call[0] === 'error', + )?.[1]; + + if (stdoutErrorCallback) { + stdoutErrorCallback({ code: 'EPIPE' }); + } + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'EPIPE test', + prompt_id: 'prompt-id-epipe', + }); + + // Since EPIPE is simulated, it might exit early or continue depending on timing, + // but our main goal is to verify the handler is registered and handles EPIPE. + expect(process.stdout.on).toHaveBeenCalledWith( + 'error', + expect.any(Function), + ); + }); + + it('should resume chat when resumedSessionData is provided', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Resumed' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + const resumedSessionData = { + conversation: { + sessionId: 'resumed-session-id', + messages: [ + { role: 'user', parts: [{ text: 'Previous message' }] }, + ] as any, // eslint-disable-line @typescript-eslint/no-explicit-any + startTime: new Date().toISOString(), + lastUpdated: new Date().toISOString(), + firstUserMessage: 'Previous message', + projectHash: 'test-hash', + }, + filePath: '/path/to/session.json', + }; + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Continue', + prompt_id: 'prompt-id-resume', + resumedSessionData, + }); + + expect(mockGeminiClient.resumeChat).toHaveBeenCalledWith( + expect.any(Array), + resumedSessionData, + ); + expect(getWrittenOutput()).toBe('Resumed\n'); + }); + + it.each([ + { + name: 'loop detected', + events: [ + { type: GeminiEventType.LoopDetected }, + ] as ServerGeminiStreamEvent[], + input: 'Loop test', + promptId: 'prompt-id-loop', + }, + { + name: 'max session turns', + events: [ + { type: GeminiEventType.MaxSessionTurns }, + ] as ServerGeminiStreamEvent[], + input: 'Max turns test', + promptId: 'prompt-id-max-turns', + }, + ])( + 'should emit appropriate error event in streaming JSON mode: $name', + async ({ events, input, promptId }) => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const streamEvents: ServerGeminiStreamEvent[] = [ + ...events, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(streamEvents), + ); + + try { + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input, + prompt_id: promptId, + }); + } catch { + // Expected exit + } + + const output = getWrittenOutput(); + const sanitizedOutput = output + .replace(/"timestamp":"[^"]+"/g, '"timestamp":""') + .replace(/"duration_ms":\d+/g, '"duration_ms":'); + expect(sanitizedOutput).toMatchSnapshot(); + }, + ); + + it('should log error when tool recording fails', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-tool-error', + }, + }; + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Success, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + responseParts: [], + callId: 'tool-1', + error: undefined, + errorType: undefined, + contentLength: undefined, + }, + }, + ]); + + const events: ServerGeminiStreamEvent[] = [ + toolCallEvent, + { type: GeminiEventType.Content, value: 'Done' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(events)) + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Done' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]), + ); + + // Mock getChat to throw when recording tool calls + const mockChat = { + recordCompletedToolCalls: vi.fn().mockImplementation(() => { + throw new Error('Recording failed'); + }), + }; + mockGeminiClient.getChat = vi.fn().mockReturnValue(mockChat); + mockGeminiClient.getCurrentSequenceModel = vi + .fn() + .mockReturnValue('model-1'); + + // Mock debugLogger.error + const { debugLogger } = await import('@google/gemini-cli-core'); + const debugLoggerErrorSpy = vi + .spyOn(debugLogger, 'error') + .mockImplementation(() => {}); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Tool recording error test', + prompt_id: 'prompt-id-tool-error', + }); + + expect(debugLoggerErrorSpy).toHaveBeenCalledWith( + expect.stringContaining( + 'Error recording completed tool call information: Error: Recording failed', + ), + ); + expect(getWrittenOutput()).toContain('Done'); + }); + + it('should stop agent execution immediately when a tool call returns STOP_EXECUTION error', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'stop-call', + name: 'stopTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-stop', + }, + }; + + // Mock tool execution returning STOP_EXECUTION + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'stop-call', + responseParts: [{ text: 'error occurred' }], + errorType: ToolErrorType.STOP_EXECUTION, + error: new Error('Stop reason from hook'), + resultDisplay: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Executing tool...' }, + toolCallEvent, + ]; + + // Setup the mock to return events for the first call. + // We expect the loop to terminate after the tool execution. + // If it doesn't, it might call sendMessageStream again, which we'll assert against. + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstCallEvents)) + .mockReturnValueOnce(createStreamFromEvents([])); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Run stop tool', + prompt_id: 'prompt-id-stop', + }); + + expect(mockSchedulerSchedule).toHaveBeenCalled(); + + // The key assertion: sendMessageStream should have been called ONLY ONCE (initial user input). + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(1); + + expect(processStderrSpy).toHaveBeenCalledWith( + 'Agent execution stopped: Stop reason from hook\n', + ); + }); + + it('should write JSON output when a tool call returns STOP_EXECUTION error', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue(OutputFormat.JSON); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'stop-call', + name: 'stopTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-stop-json', + }, + }; + + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'stop-call', + responseParts: [{ text: 'error occurred' }], + errorType: ToolErrorType.STOP_EXECUTION, + error: new Error('Stop reason'), + resultDisplay: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Partial content' }, + toolCallEvent, + ]; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(firstCallEvents), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Run stop tool', + prompt_id: 'prompt-id-stop-json', + }); + + expect(processStdoutSpy).toHaveBeenCalledWith( + JSON.stringify( + { + session_id: 'test-session-id', + response: 'Partial content', + stats: MOCK_SESSION_METRICS, + }, + null, + 2, + ), + ); + }); + + it('should emit result event when a tool call returns STOP_EXECUTION error in streaming JSON mode', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'stop-call', + name: 'stopTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-stop-stream', + }, + }; + + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Error, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'stop-call', + responseParts: [{ text: 'error occurred' }], + errorType: ToolErrorType.STOP_EXECUTION, + error: new Error('Stop reason'), + resultDisplay: undefined, + }, + }, + ]); + + const firstCallEvents: ServerGeminiStreamEvent[] = [toolCallEvent]; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(firstCallEvents), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Run stop tool', + prompt_id: 'prompt-id-stop-stream', + }); + + const output = getWrittenOutput(); + expect(output).toContain('"type":"result"'); + expect(output).toContain('"status":"success"'); + }); + + describe('Agent Execution Events', () => { + it('should handle AgentExecutionStopped event', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.AgentExecutionStopped, + value: { reason: 'Stopped by hook' }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test stop', + prompt_id: 'prompt-id-stop', + }); + + expect(processStderrSpy).toHaveBeenCalledWith( + 'Agent execution stopped: Stopped by hook\n', + ); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(1); + }); + + it('should handle AgentExecutionBlocked event', async () => { + const allEvents: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.AgentExecutionBlocked, + value: { reason: 'Blocked by hook' }, + }, + { type: GeminiEventType.Content, value: 'Final answer' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(allEvents), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'test block', + prompt_id: 'prompt-id-block', + }); + + expect(processStderrSpy).toHaveBeenCalledWith( + '[WARNING] Agent execution blocked: Blocked by hook\n', + ); + // Stream continues after blocked event โ€” content should be output + expect(getWrittenOutput()).toBe('Final answer\n'); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(1); + }); + }); + + describe('Output Sanitization', () => { + const ANSI_SEQUENCE = '\u001B[31mRed Text\u001B[0m'; + const OSC_HYPERLINK = + '\u001B]8;;http://example.com\u001B\\Link\u001B]8;;\u001B\\'; + const PLAIN_TEXT_RED = 'Red Text'; + const PLAIN_TEXT_LINK = 'Link'; + + it('should sanitize ANSI output by default', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: ANSI_SEQUENCE }, + { type: GeminiEventType.Content, value: ' ' }, + { type: GeminiEventType.Content, value: OSC_HYPERLINK }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(false); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-sanitization', + }); + + expect(getWrittenOutput()).toBe(`${PLAIN_TEXT_RED} ${PLAIN_TEXT_LINK}\n`); + }); + + it('should allow ANSI output when rawOutput is true', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: ANSI_SEQUENCE }, + { type: GeminiEventType.Content, value: ' ' }, + { type: GeminiEventType.Content, value: OSC_HYPERLINK }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 10 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(true); + vi.mocked(mockConfig.getAcceptRawOutputRisk).mockReturnValue(true); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-raw', + }); + + expect(getWrittenOutput()).toBe(`${ANSI_SEQUENCE} ${OSC_HYPERLINK}\n`); + }); + + it('should allow ANSI output when only acceptRawOutputRisk is true', async () => { + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: ANSI_SEQUENCE }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(false); + vi.mocked(mockConfig.getAcceptRawOutputRisk).mockReturnValue(true); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-accept-only', + }); + + expect(getWrittenOutput()).toBe(`${ANSI_SEQUENCE}\n`); + }); + + it('should warn when rawOutput is true and acceptRisk is false', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(true); + vi.mocked(mockConfig.getAcceptRawOutputRisk).mockReturnValue(false); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-warn', + }); + + expect(processStderrSpy).toHaveBeenCalledWith( + expect.stringContaining('[WARNING] --raw-output is enabled'), + ); + }); + + it('should not warn when rawOutput is true and acceptRisk is true', async () => { + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 0 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getRawOutput).mockReturnValue(true); + vi.mocked(mockConfig.getAcceptRawOutputRisk).mockReturnValue(true); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-no-warn', + }); + + expect(processStderrSpy).not.toHaveBeenCalledWith( + expect.stringContaining('[WARNING] --raw-output is enabled'), + ); + }); + + it('should emit warning event for loop_detected in streaming JSON mode', async () => { + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + const streamEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.LoopDetected } as ServerGeminiStreamEvent, + { type: GeminiEventType.Content, value: 'Continuing after loop' }, + { + type: GeminiEventType.Finished, + value: { reason: undefined, usageMetadata: { totalTokenCount: 5 } }, + }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(streamEvents), + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Loop test explicit', + prompt_id: 'prompt-id-loop-explicit', + }); + + const output = getWrittenOutput(); + // The STREAM_JSON output should contain an error event with warning severity + expect(output).toContain('"type":"error"'); + expect(output).toContain('"severity":"warning"'); + expect(output).toContain('Loop detected'); + }); + + it('should report cancelled tool calls as success in stream-json mode (legacy parity)', async () => { + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { arg1: 'value1' }, + isClientInitiated: false, + prompt_id: 'prompt-id-cancel', + }, + }; + + // Mock the scheduler to return a cancelled status + mockSchedulerSchedule.mockResolvedValue([ + { + status: CoreToolCallStatus.Cancelled, + request: toolCallEvent.value, + tool: {} as AnyDeclarativeTool, + invocation: {} as AnyToolInvocation, + response: { + callId: 'tool-1', + responseParts: [{ text: 'Operation cancelled' }], + resultDisplay: 'Cancelled', + }, + }, + ]); + + const events: ServerGeminiStreamEvent[] = [ + toolCallEvent, + { + type: GeminiEventType.Content, + value: 'Model continues...', + }, + ]; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + vi.mocked(mockConfig.getOutputFormat).mockReturnValue( + OutputFormat.STREAM_JSON, + ); + vi.mocked(uiTelemetryService.getMetrics).mockReturnValue( + MOCK_SESSION_METRICS, + ); + + await runNonInteractive({ + config: mockConfig, + settings: mockSettings, + input: 'Test input', + prompt_id: 'prompt-id-cancel', + }); + + const output = getWrittenOutput(); + expect(output).toContain('"type":"tool_result"'); + expect(output).toContain('"status":"success"'); + }); + }); +}); diff --git a/packages/cli/src/nonInteractiveCliAgentSession.ts b/packages/cli/src/nonInteractiveCliAgentSession.ts new file mode 100644 index 0000000000..78fc18be4e --- /dev/null +++ b/packages/cli/src/nonInteractiveCliAgentSession.ts @@ -0,0 +1,621 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + Config, + ResumedSessionData, + UserFeedbackPayload, + AgentEvent, + ContentPart, +} from '@google/gemini-cli-core'; +import { isSlashCommand } from './ui/utils/commandUtils.js'; +import type { LoadedSettings } from './config/settings.js'; +import { + convertSessionToClientHistory, + FatalError, + FatalAuthenticationError, + FatalInputError, + FatalSandboxError, + FatalConfigError, + FatalTurnLimitedError, + FatalToolExecutionError, + FatalCancellationError, + promptIdContext, + OutputFormat, + JsonFormatter, + StreamJsonFormatter, + JsonStreamEventType, + uiTelemetryService, + coreEvents, + CoreEvent, + createWorkingStdio, + Scheduler, + ROOT_SCHEDULER_ID, + LegacyAgentSession, + ToolErrorType, + geminiPartsToContentParts, +} from '@google/gemini-cli-core'; + +import type { Part } from '@google/genai'; +import readline from 'node:readline'; +import stripAnsi from 'strip-ansi'; + +import { handleSlashCommand } from './nonInteractiveCliCommands.js'; +import { ConsolePatcher } from './ui/utils/ConsolePatcher.js'; +import { handleAtCommand } from './ui/hooks/atCommandProcessor.js'; +import { handleError, handleToolError } from './utils/errors.js'; +import { TextOutput } from './ui/utils/textOutput.js'; + +interface RunNonInteractiveParams { + config: Config; + settings: LoadedSettings; + input: string; + prompt_id: string; + resumedSessionData?: ResumedSessionData; +} + +export async function runNonInteractive({ + config, + settings, + input, + prompt_id, + resumedSessionData, +}: RunNonInteractiveParams): Promise { + return promptIdContext.run(prompt_id, async () => { + const consolePatcher = new ConsolePatcher({ + stderr: true, + interactive: false, + debugMode: config.getDebugMode(), + onNewMessage: (msg) => { + coreEvents.emitConsoleLog(msg.type, msg.content); + }, + }); + + if (process.env['GEMINI_CLI_ACTIVITY_LOG_TARGET']) { + const { setupInitialActivityLogger } = await import( + './utils/devtoolsService.js' + ); + await setupInitialActivityLogger(config); + } + + const { stdout: workingStdout } = createWorkingStdio(); + const textOutput = new TextOutput(workingStdout); + + const handleUserFeedback = (payload: UserFeedbackPayload) => { + const prefix = payload.severity.toUpperCase(); + process.stderr.write(`[${prefix}] ${payload.message}\n`); + if (payload.error && config.getDebugMode()) { + const errorToLog = + payload.error instanceof Error + ? payload.error.stack || payload.error.message + : String(payload.error); + process.stderr.write(`${errorToLog}\n`); + } + }; + + const startTime = Date.now(); + const streamFormatter = + config.getOutputFormat() === OutputFormat.STREAM_JSON + ? new StreamJsonFormatter() + : null; + + const abortController = new AbortController(); + + // Track cancellation state + let isAborting = false; + let cancelMessageTimer: NodeJS.Timeout | null = null; + + // Setup stdin listener for Ctrl+C detection + let stdinWasRaw = false; + let rl: readline.Interface | null = null; + + const setupStdinCancellation = () => { + // Only setup if stdin is a TTY (user can interact) + if (!process.stdin.isTTY) { + return; + } + + // Save original raw mode state + stdinWasRaw = process.stdin.isRaw || false; + + // Enable raw mode to capture individual keypresses + process.stdin.setRawMode(true); + process.stdin.resume(); + + // Setup readline to emit keypress events + rl = readline.createInterface({ + input: process.stdin, + escapeCodeTimeout: 0, + }); + readline.emitKeypressEvents(process.stdin, rl); + + // Listen for Ctrl+C + const keypressHandler = ( + str: string, + key: { name?: string; ctrl?: boolean }, + ) => { + // Detect Ctrl+C: either ctrl+c key combo or raw character code 3 + if ((key && key.ctrl && key.name === 'c') || str === '\u0003') { + // Only handle once + if (isAborting) { + return; + } + + isAborting = true; + + // Only show message if cancellation takes longer than 200ms + // This reduces verbosity for fast cancellations + cancelMessageTimer = setTimeout(() => { + process.stderr.write('\nCancelling...\n'); + }, 200); + + abortController.abort(); + } + }; + + process.stdin.on('keypress', keypressHandler); + }; + + const cleanupStdinCancellation = () => { + // Clear any pending cancel message timer + if (cancelMessageTimer) { + clearTimeout(cancelMessageTimer); + cancelMessageTimer = null; + } + + // Cleanup readline and stdin listeners + if (rl) { + rl.close(); + rl = null; + } + + // Remove keypress listener + process.stdin.removeAllListeners('keypress'); + + // Restore stdin to original state + if (process.stdin.isTTY) { + process.stdin.setRawMode(stdinWasRaw); + process.stdin.pause(); + } + }; + + let errorToHandle: unknown | undefined; + let abortSession = () => {}; + try { + consolePatcher.patch(); + + if ( + config.getRawOutput() && + !config.getAcceptRawOutputRisk() && + config.getOutputFormat() === OutputFormat.TEXT + ) { + process.stderr.write( + '[WARNING] --raw-output is enabled. Model output is not sanitized and may contain harmful ANSI sequences (e.g. for phishing or command injection). Use --accept-raw-output-risk to suppress this warning.\n', + ); + } + + // Setup stdin cancellation listener + setupStdinCancellation(); + + coreEvents.on(CoreEvent.UserFeedback, handleUserFeedback); + coreEvents.drainBacklogs(); + + // Handle EPIPE errors when the output is piped to a command that closes early. + process.stdout.on('error', (err: NodeJS.ErrnoException) => { + if (err.code === 'EPIPE') { + // Exit gracefully if the pipe is closed. + cleanupStdinCancellation(); + consolePatcher.cleanup(); + process.exit(0); + } + }); + + const geminiClient = config.getGeminiClient(); + const scheduler = new Scheduler({ + context: config, + messageBus: config.getMessageBus(), + getPreferredEditor: () => undefined, + schedulerId: ROOT_SCHEDULER_ID, + }); + + // Initialize chat. Resume if resume data is passed. + if (resumedSessionData) { + await geminiClient.resumeChat( + convertSessionToClientHistory( + resumedSessionData.conversation.messages, + ), + resumedSessionData, + ); + } + + // Emit init event for streaming JSON + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.INIT, + timestamp: new Date().toISOString(), + session_id: config.getSessionId(), + model: config.getModel(), + }); + } + + let query: Part[] | undefined; + + if (isSlashCommand(input)) { + const slashCommandResult = await handleSlashCommand( + input, + abortController, + config, + settings, + ); + if (slashCommandResult) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + query = slashCommandResult as Part[]; + } + } + + if (!query) { + const { processedQuery, error } = await handleAtCommand({ + query: input, + config, + addItem: (_item, _timestamp) => 0, + onDebugMessage: () => {}, + messageId: Date.now(), + signal: abortController.signal, + escapePastedAtSymbols: false, + }); + if (error || !processedQuery) { + throw new FatalInputError( + error || 'Exiting due to an error processing the @ command.', + ); + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + query = processedQuery as Part[]; + } + + // Emit user message event for streaming JSON + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.MESSAGE, + timestamp: new Date().toISOString(), + role: 'user', + content: input, + }); + } + + // Create LegacyAgentSession โ€” owns the agentic loop + const session = new LegacyAgentSession({ + client: geminiClient, + scheduler, + config, + promptId: prompt_id, + }); + + // Wire Ctrl+C to session abort + abortSession = () => { + void session.abort(); + }; + abortController.signal.addEventListener('abort', abortSession); + if (abortController.signal.aborted) { + throw new FatalCancellationError('Operation cancelled.'); + } + + // Start the agentic loop (runs in background) + const { streamId } = await session.send({ + message: { + content: geminiPartsToContentParts(query), + displayContent: input, + }, + }); + if (streamId === null) { + throw new Error( + 'LegacyAgentSession.send() unexpectedly returned no stream for a message send.', + ); + } + + const getTextContent = (parts?: ContentPart[]): string | undefined => { + const text = parts + ?.map((part) => (part.type === 'text' ? part.text : '')) + .join(''); + return text ? text : undefined; + }; + + const emitFinalSuccessResult = (): void => { + if (streamFormatter) { + const metrics = uiTelemetryService.getMetrics(); + const durationMs = Date.now() - startTime; + streamFormatter.emitEvent({ + type: JsonStreamEventType.RESULT, + timestamp: new Date().toISOString(), + status: 'success', + stats: streamFormatter.convertToStreamStats(metrics, durationMs), + }); + } else if (config.getOutputFormat() === OutputFormat.JSON) { + const formatter = new JsonFormatter(); + const stats = uiTelemetryService.getMetrics(); + textOutput.write( + formatter.format(config.getSessionId(), responseText, stats), + ); + } else { + textOutput.ensureTrailingNewline(); + } + }; + + const reconstructFatalError = (event: AgentEvent<'error'>): Error => { + const errorMeta = event._meta; + const name = + typeof errorMeta?.['errorName'] === 'string' + ? errorMeta['errorName'] + : undefined; + + let errToThrow: Error; + switch (name) { + case 'FatalAuthenticationError': + errToThrow = new FatalAuthenticationError(event.message); + break; + case 'FatalInputError': + errToThrow = new FatalInputError(event.message); + break; + case 'FatalSandboxError': + errToThrow = new FatalSandboxError(event.message); + break; + case 'FatalConfigError': + errToThrow = new FatalConfigError(event.message); + break; + case 'FatalTurnLimitedError': + errToThrow = new FatalTurnLimitedError(event.message); + break; + case 'FatalToolExecutionError': + errToThrow = new FatalToolExecutionError(event.message); + break; + case 'FatalCancellationError': + errToThrow = new FatalCancellationError(event.message); + break; + case 'FatalError': + errToThrow = new FatalError( + event.message, + typeof errorMeta?.['exitCode'] === 'number' + ? errorMeta['exitCode'] + : 1, + ); + break; + default: + errToThrow = new Error(event.message); + if (name) { + Object.defineProperty(errToThrow, 'name', { + value: name, + enumerable: true, + }); + } + break; + } + + if (errorMeta?.['exitCode'] !== undefined) { + Object.defineProperty(errToThrow, 'exitCode', { + value: errorMeta['exitCode'], + enumerable: true, + }); + } + if (errorMeta?.['code'] !== undefined) { + Object.defineProperty(errToThrow, 'code', { + value: errorMeta['code'], + enumerable: true, + }); + } + if (errorMeta?.['status'] !== undefined) { + Object.defineProperty(errToThrow, 'status', { + value: errorMeta['status'], + enumerable: true, + }); + } + return errToThrow; + }; + + // Consume AgentEvents for output formatting + let responseText = ''; + let preToolResponseText: string | undefined; + let streamEnded = false; + for await (const event of session.stream({ streamId })) { + if (streamEnded) break; + switch (event.type) { + case 'message': { + if (event.role === 'agent') { + for (const part of event.content) { + if (part.type === 'text') { + const isRaw = + config.getRawOutput() || config.getAcceptRawOutputRisk(); + const output = isRaw ? part.text : stripAnsi(part.text); + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.MESSAGE, + timestamp: new Date().toISOString(), + role: 'assistant', + content: output, + delta: true, + }); + } else if (config.getOutputFormat() === OutputFormat.JSON) { + responseText += output; + } else { + if (part.text) { + textOutput.write(output); + } + } + } + } + } + break; + } + case 'tool_request': { + if (config.getOutputFormat() === OutputFormat.JSON) { + // Final JSON output should reflect the last assistant answer after + // any tool orchestration, not intermediate pre-tool text. + preToolResponseText = responseText || preToolResponseText; + responseText = ''; + } + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.TOOL_USE, + timestamp: new Date().toISOString(), + tool_name: event.name, + tool_id: event.requestId, + parameters: event.args, + }); + } + break; + } + case 'tool_response': { + textOutput.ensureTrailingNewline(); + if (streamFormatter) { + const displayText = getTextContent(event.displayContent); + const errorMsg = getTextContent(event.content) ?? 'Tool error'; + streamFormatter.emitEvent({ + type: JsonStreamEventType.TOOL_RESULT, + timestamp: new Date().toISOString(), + tool_id: event.requestId, + status: event.isError ? 'error' : 'success', + output: displayText, + error: event.isError + ? { + type: + typeof event.data?.['errorType'] === 'string' + ? event.data['errorType'] + : 'TOOL_EXECUTION_ERROR', + message: errorMsg, + } + : undefined, + }); + } + if (event.isError) { + const displayText = getTextContent(event.displayContent); + const errorMsg = getTextContent(event.content) ?? 'Tool error'; + + if (event.data?.['errorType'] === ToolErrorType.STOP_EXECUTION) { + if ( + config.getOutputFormat() === OutputFormat.JSON && + !responseText && + preToolResponseText + ) { + responseText = preToolResponseText; + } + const stopMessage = `Agent execution stopped: ${errorMsg}`; + if (config.getOutputFormat() === OutputFormat.TEXT) { + process.stderr.write(`${stopMessage}\n`); + } + } + + if (event.data?.['errorType'] === ToolErrorType.NO_SPACE_LEFT) { + throw new FatalToolExecutionError( + 'Error executing tool ' + + event.name + + ': ' + + (displayText || errorMsg), + ); + } + handleToolError( + event.name, + new Error(errorMsg), + config, + typeof event.data?.['errorType'] === 'string' + ? event.data['errorType'] + : undefined, + displayText, + ); + } + break; + } + case 'error': { + if (event.fatal) { + throw reconstructFatalError(event); + } + + const errorCode = event._meta?.['code']; + + if (errorCode === 'AGENT_EXECUTION_BLOCKED') { + if (config.getOutputFormat() === OutputFormat.TEXT) { + process.stderr.write(`[WARNING] ${event.message}\n`); + } + break; + } + + const severity = + event.status === 'RESOURCE_EXHAUSTED' ? 'error' : 'warning'; + if (config.getOutputFormat() === OutputFormat.TEXT) { + process.stderr.write(`[WARNING] ${event.message}\n`); + } + if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.ERROR, + timestamp: new Date().toISOString(), + severity, + message: event.message, + }); + } + break; + } + case 'agent_end': { + if (event.reason === 'aborted') { + throw new FatalCancellationError('Operation cancelled.'); + } else if (event.reason === 'max_turns') { + const isConfiguredTurnLimit = + typeof event.data?.['maxTurns'] === 'number' || + typeof event.data?.['turnCount'] === 'number'; + + if (isConfiguredTurnLimit) { + throw new FatalTurnLimitedError( + 'Reached max session turns for this session. Increase the number of turns by specifying maxSessionTurns in settings.json.', + ); + } else if (streamFormatter) { + streamFormatter.emitEvent({ + type: JsonStreamEventType.ERROR, + timestamp: new Date().toISOString(), + severity: 'error', + message: 'Maximum session turns exceeded', + }); + } + } + + const stopMessage = + typeof event.data?.['message'] === 'string' + ? event.data['message'] + : ''; + if (stopMessage && config.getOutputFormat() === OutputFormat.TEXT) { + process.stderr.write(`Agent execution stopped: ${stopMessage}\n`); + } + + emitFinalSuccessResult(); + streamEnded = true; + break; + } + case 'initialize': + case 'session_update': + case 'agent_start': + case 'tool_update': + case 'elicitation_request': + case 'elicitation_response': + case 'usage': + case 'custom': + // Explicitly ignore these non-interactive events + break; + default: + event satisfies never; + break; + } + } + } catch (error) { + errorToHandle = error; + } finally { + // Cleanup stdin cancellation before other cleanup + cleanupStdinCancellation(); + abortController.signal.removeEventListener('abort', abortSession); + + consolePatcher.cleanup(); + coreEvents.off(CoreEvent.UserFeedback, handleUserFeedback); + } + + if (errorToHandle) { + handleError(errorToHandle, config); + } + }); +} diff --git a/packages/cli/src/test-utils/mockCommandContext.ts b/packages/cli/src/test-utils/mockCommandContext.ts index 6eda7f3109..9a1156e5cb 100644 --- a/packages/cli/src/test-utils/mockCommandContext.ts +++ b/packages/cli/src/test-utils/mockCommandContext.ts @@ -61,6 +61,7 @@ export const createMockCommandContext = ( toggleCorgiMode: vi.fn(), toggleShortcutsHelp: vi.fn(), toggleVimEnabled: vi.fn(), + reloadCommands: vi.fn(), openAgentConfigDialog: vi.fn(), closeAgentConfigDialog: vi.fn(), extensionsUpdateState: new Map(), diff --git a/packages/cli/src/test-utils/mockConfig.ts b/packages/cli/src/test-utils/mockConfig.ts index 260bafdf2b..7be8463382 100644 --- a/packages/cli/src/test-utils/mockConfig.ts +++ b/packages/cli/src/test-utils/mockConfig.ts @@ -38,6 +38,7 @@ export const createMockConfig = (overrides: Partial = {}): Config => fireSessionEndEvent: vi.fn().mockResolvedValue(undefined), fireSessionStartEvent: vi.fn().mockResolvedValue(undefined), })), + isMemoryManagerEnabled: vi.fn(() => false), getListExtensions: vi.fn(() => false), getExtensions: vi.fn(() => []), getListSessions: vi.fn(() => false), @@ -175,6 +176,8 @@ export const createMockConfig = (overrides: Partial = {}): Config => getHasAccessToPreviewModel: vi.fn().mockReturnValue(false), validatePathAccess: vi.fn().mockReturnValue(null), getUseAlternateBuffer: vi.fn().mockReturnValue(false), + getUseTerminalBuffer: vi.fn().mockReturnValue(false), + getUseRenderProcess: vi.fn().mockReturnValue(false), ...overrides, }) as unknown as Config; @@ -194,6 +197,17 @@ export function createMockSettings( user: { settings: {} }, workspace: { settings: {} }, errors: [], + subscribe: vi.fn().mockReturnValue(() => {}), + getSnapshot: vi.fn().mockReturnValue({ + system: { settings: {} }, + systemDefaults: { settings: {} }, + user: { settings: {} }, + workspace: { settings: {} }, + isTrusted: true, + errors: [], + merged, + }), + setValue: vi.fn(), ...overrides, merged, } as unknown as LoadedSettings; diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index 6ca30dd8b9..bf8ca468eb 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -42,6 +42,7 @@ import { type OverflowState, } from '../ui/contexts/OverflowContext.js'; +import { makeFakeConfig } from '@google/gemini-cli-core'; import { type Config } from '@google/gemini-cli-core'; import { FakePersistentState } from './persistentStateFake.js'; import { AppContext, type AppState } from '../ui/contexts/AppContext.js'; @@ -51,7 +52,6 @@ import { themeManager, DEFAULT_THEME } from '../ui/themes/theme-manager.js'; import { DefaultLight } from '../ui/themes/builtin/light/default-light.js'; import { pickDefaultThemeName } from '../ui/themes/theme.js'; import { generateSvgForTerminal } from './svg.js'; -import { loadCliConfig, type CliArgs } from '../config/config.js'; export const persistentStateMock = new FakePersistentState(); @@ -223,7 +223,7 @@ class XtermStdout extends EventEmitter { this.once('render', resolve), ); const timeoutPromise = new Promise((resolve) => - setTimeout(resolve, 50), + setTimeout(resolve, 1000), ); await Promise.race([renderPromise, timeoutPromise]); } @@ -254,7 +254,12 @@ class XtermStdout extends EventEmitter { const isMatch = () => { if (expectedFrame === '...') { - return currentFrame !== ''; + // '...' is our fallback when output isn't in metrics, meaning Ink rendered *something* + // but we don't know what it is. If terminal has content, we consider it a match. + // However, if the component rendered null, both would be empty, but our fallback + // made expectedFrame '...'. In that case, we can't easily know if it's ready, + // but we can assume if there are no pending writes, it's ready. + return currentFrame !== '' || this.pendingWrites === 0; } // If Ink expects nothing (no new static content and no dynamic output), @@ -499,6 +504,8 @@ const baseMockUiState = { history: [], renderMarkdown: true, streamingState: StreamingState.Idle, + isConfigInitialized: true, + isAuthenticating: false, terminalWidth: 100, terminalHeight: 40, currentModel: 'gemini-pro', @@ -593,6 +600,9 @@ const mockUIActions: UIActions = { clearAccountSuspension: vi.fn(), }; +import { type TextBuffer } from '../ui/components/shared/text-buffer.js'; +import { InputContext, type InputState } from '../ui/contexts/InputContext.js'; + let capturedOverflowState: OverflowState | undefined; let capturedOverflowActions: OverflowActions | undefined; const ContextCapture: React.FC<{ children: React.ReactNode }> = ({ @@ -609,20 +619,28 @@ export const renderWithProviders = async ( shellFocus = true, settings = mockSettings, uiState: providedUiState, + inputState: providedInputState, width, mouseEventsEnabled = false, config, uiActions, + toolActions, persistentState, appState = mockAppState, }: { shellFocus?: boolean; settings?: LoadedSettings; uiState?: Partial; + inputState?: Partial; width?: number; mouseEventsEnabled?: boolean; config?: Config; uiActions?: Partial; + toolActions?: Partial<{ + isExpanded: (callId: string) => boolean; + toggleExpansion: (callId: string) => void; + toggleAllExpansion: (callIds: string[]) => void; + }>; persistentState?: { get?: typeof persistentStateMock.get; set?: typeof persistentStateMock.set; @@ -648,6 +666,18 @@ export const renderWithProviders = async ( }, ) as UIState; + const inputState = { + buffer: { text: '' } as unknown as TextBuffer, + userMessages: [], + shellModeActive: false, + showEscapePrompt: false, + copyModeEnabled: false, + inputWidth: 80, + suggestionsWidth: 40, + ...(providedUiState as unknown as Partial), + ...providedInputState, + }; + if (persistentState?.get) { persistentStateMock.get.mockImplementation(persistentState.get); } @@ -660,12 +690,11 @@ export const renderWithProviders = async ( const terminalWidth = width ?? baseState.terminalWidth; if (!config) { - config = await loadCliConfig( - settings.merged, - 'random-session-id', - {} as unknown as CliArgs, - { cwd: '/' }, - ); + config = makeFakeConfig({ + useAlternateBuffer: settings.merged.ui?.useAlternateBuffer, + showMemoryUsage: settings.merged.ui?.showMemoryUsage, + accessibility: settings.merged.ui?.accessibility, + }); } const mainAreaWidth = providedUiState?.mainAreaWidth ?? terminalWidth; @@ -698,53 +727,65 @@ export const renderWithProviders = async ( - - - - - - - - - + + + + + + + + - - - - - - - {comp} - - - - - - - - - - - - - - - + + + + + + + + {comp} + + + + + + + + + + + + + + + + diff --git a/packages/cli/src/ui/AppContainer.test.tsx b/packages/cli/src/ui/AppContainer.test.tsx index 0e436cc645..d78b56e11d 100644 --- a/packages/cli/src/ui/AppContainer.test.tsx +++ b/packages/cli/src/ui/AppContainer.test.tsx @@ -122,13 +122,17 @@ vi.mock('ink', async (importOriginal) => { }; }); +import { InputContext, type InputState } from './contexts/InputContext.js'; + // Helper component will read the context values provided by AppContainer // so we can assert against them in our tests. let capturedUIState: UIState; +let capturedInputState: InputState; let capturedUIActions: UIActions; let capturedOverflowActions: OverflowActions; function TestContextConsumer() { capturedUIState = useContext(UIStateContext)!; + capturedInputState = useContext(InputContext)!; capturedUIActions = useContext(UIActionsContext)!; capturedOverflowActions = useOverflowActions()!; return null; @@ -346,6 +350,7 @@ describe('AppContainer State Management', () => { // Initialize mock stdout for terminal title tests mocks.mockStdout.write.mockClear(); + (disableMouseEvents as import('vitest').Mock).mockClear(); capturedUIState = null!; @@ -470,6 +475,7 @@ describe('AppContainer State Management', () => { // Mock Config mockConfig = makeFakeConfig(); + vi.spyOn(mockConfig, 'getUseRenderProcess').mockReturnValue(false); // Mock config's getTargetDir to return consistent workspace directory vi.spyOn(mockConfig, 'getTargetDir').mockReturnValue('/test/workspace'); @@ -1356,6 +1362,7 @@ describe('AppContainer State Management', () => { beforeEach(() => { // Reset mock stdout for each test mocks.mockStdout.write.mockClear(); + (disableMouseEvents as import('vitest').Mock).mockClear(); }); it('verifies useStdout is mocked', async () => { @@ -2459,7 +2466,7 @@ describe('AppContainer State Management', () => { }); }); - describe('Copy Mode (CTRL+S)', () => { + describe('Copy Mode (F9)', () => { let rerender: () => void; let unmount: () => void; let stdin: Awaited>['stdin']; @@ -2468,6 +2475,8 @@ describe('AppContainer State Management', () => { isAlternateMode = false, childHandler?: Mock, ) => { + vi.spyOn(mockConfig, 'getUseTerminalBuffer').mockReturnValue(false); + vi.spyOn(mockConfig, 'getUseAlternateBuffer').mockReturnValue( isAlternateMode, ); @@ -2512,6 +2521,8 @@ describe('AppContainer State Management', () => { beforeEach(() => { mocks.mockStdout.write.mockClear(); + (disableMouseEvents as import('vitest').Mock).mockClear(); + vi.useFakeTimers(); }); @@ -2532,12 +2543,13 @@ describe('AppContainer State Management', () => { modeName: 'Alternate Buffer Mode', }, ])('$modeName', ({ isAlternateMode, shouldEnable }) => { - it(`should ${shouldEnable ? 'toggle' : 'NOT toggle'} mouse off when Ctrl+S is pressed`, async () => { + it(`should ${shouldEnable ? 'toggle' : 'NOT toggle'} mouse off when F9 is pressed`, async () => { await setupCopyModeTest(isAlternateMode); mocks.mockStdout.write.mockClear(); // Clear initial enable call + (disableMouseEvents as import('vitest').Mock).mockClear(); act(() => { - stdin.write('\x13'); // Ctrl+S + stdin.write('\x1b[20~'); // F9 }); rerender(); @@ -2550,13 +2562,13 @@ describe('AppContainer State Management', () => { }); if (shouldEnable) { - it('should toggle mouse back on when Ctrl+S is pressed again', async () => { + it('should toggle mouse back on when F9 is pressed again', async () => { await setupCopyModeTest(isAlternateMode); (writeToStdout as Mock).mockClear(); // Turn it on (disable mouse) act(() => { - stdin.write('\x13'); // Ctrl+S + stdin.write('\x1b[20~'); // F9 }); rerender(); expect(disableMouseEvents).toHaveBeenCalled(); @@ -2576,7 +2588,7 @@ describe('AppContainer State Management', () => { // Enter copy mode act(() => { - stdin.write('\x13'); // Ctrl+S + stdin.write('\x1b[20~'); // F9 }); rerender(); @@ -2656,7 +2668,7 @@ describe('AppContainer State Management', () => { // 2. Enter copy mode act(() => { - stdin.write('\x13'); // Ctrl+S + stdin.write('\x1b[20~'); // F9 }); rerender(); @@ -3028,7 +3040,7 @@ describe('AppContainer State Management', () => { }); const { unmount } = await act(async () => renderAppContainer()); - expect(capturedUIState.userMessages).toContain('previous message'); + expect(capturedInputState.userMessages).toContain('previous message'); const { onCancelSubmit } = extractUseGeminiStreamArgs( mockedUseGeminiStream.mock.lastCall!, @@ -3056,8 +3068,8 @@ describe('AppContainer State Management', () => { const { rerender, unmount } = await act(async () => renderAppContainer()); // Verify userMessages is populated from inputHistory - expect(capturedUIState.userMessages).toContain('first prompt'); - expect(capturedUIState.userMessages).toContain('second prompt'); + expect(capturedInputState.userMessages).toContain('first prompt'); + expect(capturedInputState.userMessages).toContain('second prompt'); // Clear the conversation history (simulating /clear command) const mockClearItems = vi.fn(); @@ -3076,8 +3088,8 @@ describe('AppContainer State Management', () => { // Verify that userMessages still contains the input history // (it should not be affected by clearing conversation history) - expect(capturedUIState.userMessages).toContain('first prompt'); - expect(capturedUIState.userMessages).toContain('second prompt'); + expect(capturedInputState.userMessages).toContain('first prompt'); + expect(capturedInputState.userMessages).toContain('second prompt'); unmount(); }); @@ -3093,6 +3105,7 @@ describe('AppContainer State Management', () => { // Clear previous calls mocks.mockStdout.write.mockClear(); + (disableMouseEvents as import('vitest').Mock).mockClear(); const { unmount } = await act(async () => renderAppContainer()); @@ -3135,16 +3148,13 @@ describe('AppContainer State Management', () => { // Reset mock stdout to clear any initial writes mocks.mockStdout.write.mockClear(); + (disableMouseEvents as import('vitest').Mock).mockClear(); // Submit await act(async () => capturedUIActions.handleFinalSubmit('test prompt')); // Should be reset expect(capturedUIState.constrainHeight).toBe(true); - // Should refresh static (which clears terminal in non-alternate buffer) - expect(mocks.mockStdout.write).toHaveBeenCalledWith( - ansiEscapes.clearTerminal, - ); unmount(); }); @@ -3154,6 +3164,8 @@ describe('AppContainer State Management', () => { ); vi.mocked(checkPermissions).mockResolvedValue([]); + vi.spyOn(mockConfig, 'getUseTerminalBuffer').mockReturnValue(false); + vi.spyOn(mockConfig, 'getUseAlternateBuffer').mockReturnValue(true); const { unmount } = await act(async () => @@ -3170,6 +3182,7 @@ describe('AppContainer State Management', () => { // Reset mock stdout mocks.mockStdout.write.mockClear(); + (disableMouseEvents as import('vitest').Mock).mockClear(); // Submit await act(async () => capturedUIActions.handleFinalSubmit('test prompt')); @@ -3403,6 +3416,8 @@ describe('AppContainer State Management', () => { ui: { useAlternateBuffer: true }, }); + vi.spyOn(mockConfig, 'getUseTerminalBuffer').mockReturnValue(false); + vi.spyOn(mockConfig, 'getUseAlternateBuffer').mockReturnValue(true); const { unmount } = await act(async () => diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index b523cbc792..006ec6cc1e 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -11,6 +11,7 @@ import { useEffect, useRef, useLayoutEffect, + useContext, } from 'react'; import { type DOMElement, @@ -19,6 +20,7 @@ import { useStdout, useStdin, type AppProps, + AppContext as InkAppContext, } from 'ink'; import { App } from './App.js'; import { AppContext } from './contexts/AppContext.js'; @@ -39,6 +41,8 @@ import { checkPermissions } from './hooks/atCommandProcessor.js'; import { MessageType, StreamingState } from './types.js'; import { theme } from './semantic-colors.js'; import { ToolActionsProvider } from './contexts/ToolActionsContext.js'; +import { MouseProvider } from './contexts/MouseContext.js'; +import { ScrollProvider } from './contexts/ScrollProvider.js'; import { type StartupWarning, type EditorType, @@ -85,6 +89,7 @@ import { logBillingEvent, ApiKeyUpdatedEvent, type InjectionSource, + startMemoryService, } from '@google/gemini-cli-core'; import { validateAuthMethod } from '../config/auth.js'; import process from 'node:process'; @@ -170,6 +175,7 @@ import { useSuspend } from './hooks/useSuspend.js'; import { useRunEventNotifications } from './hooks/useRunEventNotifications.js'; import { isNotificationsEnabled } from '../utils/terminalNotifications.js'; import { + getLastTurnToolCallIds, isToolExecuting, isToolAwaitingConfirmation, getAllToolCalls, @@ -190,6 +196,8 @@ import { } from './hooks/useVisibilityToggle.js'; import { useKeyMatchers } from './hooks/useKeyMatchers.js'; +import { InputContext } from './contexts/InputContext.js'; + /** * The fraction of the terminal width to allocate to the shell. * This provides horizontal padding. @@ -210,12 +218,30 @@ export const AppContainer = (props: AppContainerProps) => { const { reset } = useOverflowActions()!; const notificationsEnabled = isNotificationsEnabled(settings); + const { setOptions, dumpCurrentFrame, startRecording, stopRecording } = + useContext(InkAppContext); + const recordingFilenameRef = useRef(null); const historyManager = useHistory({ chatRecordingService: config.getGeminiClient()?.getChatRecordingService(), }); useMemoryMonitor(historyManager); const isAlternateBuffer = config.getUseAlternateBuffer(); + const [mouseMode, setMouseMode] = useState(() => + config.getUseAlternateBuffer(), + ); + + useEffect(() => { + setOptions({ + stickyHeadersInBackbuffer: mouseMode, + }); + if (mouseMode) { + enableMouseEvents(); + } else { + disableMouseEvents(); + } + }, [mouseMode, setOptions]); + const [corgiMode, setCorgiMode] = useState(false); const [forceRerenderKey, setForceRerenderKey] = useState(0); const [debugMessage, setDebugMessage] = useState(''); @@ -240,6 +266,39 @@ export const AppContainer = (props: AppContainerProps) => { const [adminSettingsChanged, setAdminSettingsChanged] = useState(false); + const [expandedTools, setExpandedTools] = useState>(new Set()); + + const toggleExpansion = useCallback((callId: string) => { + setExpandedTools((prev) => { + const next = new Set(prev); + if (next.has(callId)) { + next.delete(callId); + } else { + next.add(callId); + } + return next; + }); + }, []); + + const toggleAllExpansion = useCallback((callIds: string[]) => { + setExpandedTools((prev) => { + const next = new Set(prev); + const anyCollapsed = callIds.some((id) => !next.has(id)); + + if (anyCollapsed) { + callIds.forEach((id) => next.add(id)); + } else { + callIds.forEach((id) => next.delete(id)); + } + return next; + }); + }, []); + + const isExpanded = useCallback( + (callId: string) => expandedTools.has(callId), + [expandedTools], + ); + const [shellModeActive, setShellModeActive] = useState(false); const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] = useState(false); @@ -415,6 +474,13 @@ export const AppContainer = (props: AppContainerProps) => { setConfigInitialized(true); startupProfiler.flush(config); + // Fire-and-forget memory service (skill extraction from past sessions) + if (config.isMemoryManagerEnabled()) { + startMemoryService(config).catch((e) => { + debugLogger.error('Failed to start memory service:', e); + }); + } + const sessionStartSource = resumedSessionData ? SessionStartSource.Resume : SessionStartSource.Startup; @@ -581,11 +647,11 @@ export const AppContainer = (props: AppContainerProps) => { }); const refreshStatic = useCallback(() => { - if (!isAlternateBuffer) { + if (!isAlternateBuffer && !config.getUseTerminalBuffer()) { stdout.write(ansiEscapes.clearTerminal); + setHistoryRemountKey((prev) => prev + 1); } - setHistoryRemountKey((prev) => prev + 1); - }, [setHistoryRemountKey, isAlternateBuffer, stdout]); + }, [setHistoryRemountKey, isAlternateBuffer, stdout, config]); const shouldUseAlternateScreen = shouldEnterAlternateScreen( isAlternateBuffer, @@ -994,7 +1060,8 @@ Logging in with Google... Restarting Gemini CLI to continue. let fileCount: number; if (config.isJitContextEnabled()) { - await config.getContextManager()?.refresh(); + await config.getMemoryContextManager()?.refresh(); + config.updateSystemInstructionIfInitialized(); flattenedMemory = flattenMemory(config.getUserMemory()); fileCount = config.getGeminiMdFileCount(); } else { @@ -1139,11 +1206,6 @@ Logging in with Google... Restarting Gemini CLI to continue. [pendingSlashCommandHistoryItems, pendingGeminiHistoryItems], ); - const hasPendingToolConfirmation = useMemo( - () => isToolAwaitingConfirmation(pendingHistoryItems), - [pendingHistoryItems], - ); - toggleBackgroundTasksRef.current = toggleBackgroundTasks; isBackgroundTaskVisibleRef.current = isBackgroundTaskVisible; backgroundTasksRef.current = backgroundTasks; @@ -1408,10 +1470,17 @@ Logging in with Google... Restarting Gemini CLI to continue. (streamingState === StreamingState.Idle || streamingState === StreamingState.Responding || streamingState === StreamingState.WaitingForConfirmation) && - !proQuotaRequest && - !copyModeEnabled; + !proQuotaRequest; const observerRef = useRef(null); + + useEffect( + () => () => { + observerRef.current?.disconnect(); + }, + [], + ); + const [controlsHeight, setControlsHeight] = useState(0); const [lastNonCopyControlsHeight, setLastNonCopyControlsHeight] = useState(0); @@ -1710,6 +1779,14 @@ Logging in with Google... Restarting Gemini CLI to continue. setShortcutsHelpVisible(false); } + if (keyMatchers[Command.TOGGLE_MOUSE_MODE](key)) { + setMouseMode((prev) => !prev); + if (mouseMode && !isAlternateBuffer) { + appEvents.emit(AppEvent.ScrollToBottom); + } + return true; + } + if (isAlternateBuffer && keyMatchers[Command.TOGGLE_COPY_MODE](key)) { setCopyModeEnabled(true); disableMouseEvents(); @@ -1732,6 +1809,32 @@ Logging in with Google... Restarting Gemini CLI to continue. return true; } else if (keyMatchers[Command.SUSPEND_APP](key)) { handleSuspend(); + } else if (keyMatchers[Command.DUMP_FRAME](key)) { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const filename = `snapshot-${timestamp}.json`; + if (dumpCurrentFrame) { + dumpCurrentFrame(filename); + debugLogger.log(`Dumped frame to: ${filename}`); + } + return true; + } else if (keyMatchers[Command.START_RECORDING](key)) { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const filename = `recording-${timestamp}.json`; + if (startRecording) { + startRecording(filename); + recordingFilenameRef.current = filename; + debugLogger.log(`Started recording to: ${filename}`); + } + return true; + } else if (keyMatchers[Command.STOP_RECORDING](key)) { + if (stopRecording) { + stopRecording(); + debugLogger.log( + `Stopped recording, saved to: ${recordingFilenameRef.current ?? 'unknown'}`, + ); + recordingFilenameRef.current = null; + } + return true; } else if ( keyMatchers[Command.TOGGLE_COPY_MODE](key) && !isAlternateBuffer @@ -1743,13 +1846,25 @@ Logging in with Google... Restarting Gemini CLI to continue. return true; } + const toggleLastTurnTools = () => { + triggerExpandHint(true); + + const targetToolCallIds = getLastTurnToolCallIds( + historyManager.history, + pendingHistoryItems, + ); + + if (targetToolCallIds.length > 0) { + toggleAllExpansion(targetToolCallIds); + } + }; + let enteringConstrainHeightMode = false; if (!constrainHeight) { enteringConstrainHeightMode = true; setConstrainHeight(true); if (keyMatchers[Command.SHOW_MORE_LINES](key)) { - // If the user manually collapses the view, show the hint and reset the x-second timer. - triggerExpandHint(true); + toggleLastTurnTools(); } if (!isAlternateBuffer) { refreshStatic(); @@ -1797,11 +1912,8 @@ Logging in with Google... Restarting Gemini CLI to continue. !enteringConstrainHeightMode ) { setConstrainHeight(false); - // If the user manually expands the view, show the hint and reset the x-second timer. - triggerExpandHint(true); - if (!isAlternateBuffer) { - refreshStatic(); - } + toggleLastTurnTools(); + refreshStatic(); return true; } else if ( (keyMatchers[Command.FOCUS_SHELL_INPUT](key) || @@ -1906,6 +2018,13 @@ Logging in with Google... Restarting Gemini CLI to continue. triggerExpandHint, keyMatchers, isHelpDismissKey, + historyManager.history, + pendingHistoryItems, + toggleAllExpansion, + dumpCurrentFrame, + startRecording, + stopRecording, + mouseMode, ], ); @@ -1925,7 +2044,9 @@ Logging in with Google... Restarting Gemini CLI to continue. } setCopyModeEnabled(false); - enableMouseEvents(); + if (mouseMode) { + enableMouseEvents(); + } return true; }, { @@ -2055,6 +2176,11 @@ Logging in with Google... Restarting Gemini CLI to continue. authState === AuthState.AwaitingApiKeyInput || !!newAgents; + const hasPendingToolConfirmation = useMemo( + () => isToolAwaitingConfirmation(pendingHistoryItems), + [pendingHistoryItems], + ); + const hasConfirmUpdateExtensionRequests = confirmUpdateExtensionRequests.length > 0; const hasLoopDetectionConfirmationRequest = @@ -2226,6 +2352,27 @@ Logging in with Google... Restarting Gemini CLI to continue. }; }, [config, refreshStatic]); + const inputState = useMemo( + () => ({ + buffer, + userMessages: inputHistory, + shellModeActive, + showEscapePrompt, + copyModeEnabled, + inputWidth, + suggestionsWidth, + }), + [ + buffer, + inputHistory, + shellModeActive, + showEscapePrompt, + copyModeEnabled, + inputWidth, + suggestionsWidth, + ], + ); + const uiState: UIState = useMemo( () => ({ history: historyManager.history, @@ -2243,6 +2390,7 @@ Logging in with Google... Restarting Gemini CLI to continue. editorError, isEditorDialogOpen, showPrivacyNotice, + mouseMode, corgiMode, debugMessage, quittingMessages, @@ -2268,11 +2416,6 @@ Logging in with Google... Restarting Gemini CLI to continue. initError, pendingGeminiHistoryItems, thought, - shellModeActive, - userMessages: inputHistory, - buffer, - inputWidth, - suggestionsWidth, isInputActive, isResuming, shouldShowIdePrompt, @@ -2288,7 +2431,6 @@ Logging in with Google... Restarting Gemini CLI to continue. renderMarkdown, ctrlCPressedOnce: ctrlCPressCount >= 1, ctrlDPressedOnce: ctrlDPressCount >= 1, - showEscapePrompt, shortcutsHelpVisible, cleanUiDetailsVisible, isFocused, @@ -2340,7 +2482,6 @@ Logging in with Google... Restarting Gemini CLI to continue. embeddedShellFocused, showDebugProfiler, customDialog, - copyModeEnabled, transientMessage, bannerData, bannerVisible, @@ -2369,6 +2510,7 @@ Logging in with Google... Restarting Gemini CLI to continue. editorError, isEditorDialogOpen, showPrivacyNotice, + mouseMode, corgiMode, debugMessage, quittingMessages, @@ -2394,11 +2536,6 @@ Logging in with Google... Restarting Gemini CLI to continue. initError, pendingGeminiHistoryItems, thought, - shellModeActive, - inputHistory, - buffer, - inputWidth, - suggestionsWidth, isInputActive, isResuming, shouldShowIdePrompt, @@ -2414,7 +2551,6 @@ Logging in with Google... Restarting Gemini CLI to continue. renderMarkdown, ctrlCPressCount, ctrlDPressCount, - showEscapePrompt, shortcutsHelpVisible, cleanUiDetailsVisible, isFocused, @@ -2466,7 +2602,6 @@ Logging in with Google... Restarting Gemini CLI to continue. customDialog, apiKeyDefaultValue, authState, - copyModeEnabled, transientMessage, bannerData, bannerVisible, @@ -2653,22 +2788,34 @@ Logging in with Google... Restarting Gemini CLI to continue. return ( - - - - - - - - - - - + + + + + + + + + + + + + + + + + ); }; diff --git a/packages/cli/src/ui/__snapshots__/App.test.tsx.snap b/packages/cli/src/ui/__snapshots__/App.test.tsx.snap index f145eadfff..94b1f9b1a4 100644 --- a/packages/cli/src/ui/__snapshots__/App.test.tsx.snap +++ b/packages/cli/src/ui/__snapshots__/App.test.tsx.snap @@ -55,12 +55,6 @@ Footer Gemini CLI v1.2.3 - -Tips for getting started: -1. Create GEMINI.md files to customize your interactions -2. /help for more information -3. Ask coding questions, edit code or run commands -4. Be specific for the best results Composer " `; @@ -130,13 +124,14 @@ HistoryItemDisplay โ”‚ โ”‚ โ”‚ ? ls list directory โ”‚ โ”‚ โ”‚ -โ”‚ ls โ”‚ -โ”‚ Allow execution of: 'ls'? โ”‚ +โ”‚ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ +โ”‚ โ”‚ ls โ”‚ โ”‚ +โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ +โ”‚ Allow execution of [ls]? โ”‚ โ”‚ โ”‚ โ”‚ โ— 1. Allow once โ”‚ โ”‚ 2. Allow for this session โ”‚ โ”‚ 3. No, suggest changes (esc) โ”‚ -โ”‚ โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ @@ -144,7 +139,6 @@ HistoryItemDisplay - Notifications Composer diff --git a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg index 97b01f3025..7565185d93 100644 --- a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg +++ b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg @@ -4,263 +4,291 @@ - - โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€ - - - > - - Can you edit InputPrompt.tsx for me? - - - โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„ - โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ - โ”‚ - Action Required - โ”‚ - โ”‚ - โ”‚ - โ”‚ - ? - Edit - packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProto - โ€ฆ - โ”‚ - โ”‚ - โ”‚ - โ”‚ - ... first 44 lines hidden (Ctrl+O to show) ... - โ”‚ - โ”‚ - 45 - const - line45 - = - true - ; - โ”‚ - โ”‚ - 46 - const - line46 - = - true - ; - โ”‚ - โ”‚ - 47 - const - line47 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 48 - const - line48 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 49 - const - line49 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 50 - const - line50 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 51 - const - line51 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 52 - const - line52 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 53 - const - line53 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 54 - const - line54 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 55 - const - line55 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 56 - const - line56 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 57 - const - line57 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 58 - const - line58 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 59 - const - line59 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - 60 - const - line60 - = - true - ; - โ”‚ - โ–ˆ - โ”‚ - - 61 - - - - + + + > + + Can you edit InputPrompt.tsx for me? + + + โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„ + โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ + โ”‚ + ? Edit + โ”‚ + โ”‚ + โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ + โ”‚ + โ”‚ + โ”‚ + ... first 42 lines hidden (Ctrl+O to show) ... + โ”‚ + โ”‚ + โ”‚ + โ”‚ + 43 + const + line43 + = + true + ; + โ”‚ + โ”‚ + โ”‚ + โ”‚ + 44 + const + line44 + = + true + ; + โ”‚ + โ”‚ + โ”‚ + โ”‚ + 45 + const + line45 + = + true + ; + โ”‚ + โ”‚ + โ”‚ + โ”‚ + 46 + const + line46 + = + true + ; + โ”‚ + โ”‚ + โ”‚ + โ”‚ + 47 + const + line47 + = + true + ; + โ”‚ + โ”‚โ–„ + โ”‚ + โ”‚ + 48 + const + line48 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 49 + const + line49 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 50 + const + line50 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 51 + const + line51 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 52 + const + line52 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 53 + const + line53 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 54 + const + line54 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 55 + const + line55 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 56 + const + line56 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 57 + const + line57 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 58 + const + line58 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 59 + const + line59 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 60 + const + line60 + = + true + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + + 61 - - return - - kittyProtocolSupporte...; - โ”‚ - โ–ˆ - โ”‚ - - 61 - - - + + - + + + + return + + kittyProtocolSupporte...; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + + 61 - - return - - kittyProtocolSupporte...; - โ”‚ - โ–ˆ - โ”‚ - 62 - buffer: TextBuffer; - โ”‚ - โ–ˆ - โ”‚ - 63 - onSubmit - : ( - value - : - string - ) => - void - ; - โ”‚ - โ–ˆ - โ”‚ - Apply this change? - โ”‚ - โ–ˆ - โ”‚ - โ”‚ - โ–ˆ - โ”‚ - - โ— - - - 1. - - - Allow once - - โ”‚ - โ–ˆ - โ”‚ - 2. - Allow for this session - โ”‚ - โ–ˆ - โ”‚ - 3. - Allow for this file in all future sessions - โ”‚ - โ–ˆ - โ”‚ - 4. - Modify with external editor - โ”‚ - โ–ˆ - โ”‚ - 5. - No, suggest changes (esc) - โ”‚ - โ–ˆ - โ”‚ - โ”‚ - โ–ˆ - โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ - โ–ˆ + + + + + + return + + kittyProtocolSupporte...; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 62 + buffer: TextBuffer; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ”‚ + 63 + onSubmit + : ( + value + : + string + ) => + void + ; + โ”‚ + โ”‚โ–ˆ + โ”‚ + โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ + โ”‚โ–ˆ + โ”‚ + Apply this change? + โ”‚โ–ˆ + โ”‚ + โ”‚โ–ˆ + โ”‚ + + โ— + + + 1. + + + Allow once + + โ”‚โ–ˆ + โ”‚ + 2. + Allow for this session + โ”‚โ–ˆ + โ”‚ + 3. + Allow for this file in all future sessions + ~/.gemini/policies/auto-saved.toml + โ”‚โ–ˆ + โ”‚ + 4. + Modify with external editor + โ”‚โ–ˆ + โ”‚ + 5. + No, suggest changes (esc) + โ”‚โ–ˆ + โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏโ–ˆ \ No newline at end of file diff --git a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap index 98853434df..d9cc9f7ce3 100644 --- a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap +++ b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap @@ -1,43 +1,43 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html exports[`Full Terminal Tool Confirmation Snapshot > renders tool confirmation box in the frame of the entire terminal 1`] = ` -"โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€โ–€ - > Can you edit InputPrompt.tsx for me? +" > Can you edit InputPrompt.tsx for me? โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„โ–„ + โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ -โ”‚ Action Required โ”‚ -โ”‚ โ”‚ -โ”‚ ? Edit packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProtoโ€ฆ โ”‚ -โ”‚ โ”‚ -โ”‚ ... first 44 lines hidden (Ctrl+O to show) ... โ”‚ -โ”‚ 45 const line45 = true; โ”‚ -โ”‚ 46 const line46 = true; โ”‚ -โ”‚ 47 const line47 = true; โ”‚โ–ˆ -โ”‚ 48 const line48 = true; โ”‚โ–ˆ -โ”‚ 49 const line49 = true; โ”‚โ–ˆ -โ”‚ 50 const line50 = true; โ”‚โ–ˆ -โ”‚ 51 const line51 = true; โ”‚โ–ˆ -โ”‚ 52 const line52 = true; โ”‚โ–ˆ -โ”‚ 53 const line53 = true; โ”‚โ–ˆ -โ”‚ 54 const line54 = true; โ”‚โ–ˆ -โ”‚ 55 const line55 = true; โ”‚โ–ˆ -โ”‚ 56 const line56 = true; โ”‚โ–ˆ -โ”‚ 57 const line57 = true; โ”‚โ–ˆ -โ”‚ 58 const line58 = true; โ”‚โ–ˆ -โ”‚ 59 const line59 = true; โ”‚โ–ˆ -โ”‚ 60 const line60 = true; โ”‚โ–ˆ -โ”‚ 61 - return kittyProtocolSupporte...; โ”‚โ–ˆ -โ”‚ 61 + return kittyProtocolSupporte...; โ”‚โ–ˆ -โ”‚ 62 buffer: TextBuffer; โ”‚โ–ˆ -โ”‚ 63 onSubmit: (value: string) => void; โ”‚โ–ˆ +โ”‚ ? Edit โ”‚ +โ”‚ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ +โ”‚ โ”‚ ... first 42 lines hidden (Ctrl+O to show) ... โ”‚ โ”‚ +โ”‚ โ”‚ 43 const line43 = true; โ”‚ โ”‚ +โ”‚ โ”‚ 44 const line44 = true; โ”‚ โ”‚ +โ”‚ โ”‚ 45 const line45 = true; โ”‚ โ”‚ +โ”‚ โ”‚ 46 const line46 = true; โ”‚ โ”‚ +โ”‚ โ”‚ 47 const line47 = true; โ”‚ โ”‚โ–„ +โ”‚ โ”‚ 48 const line48 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 49 const line49 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 50 const line50 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 51 const line51 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 52 const line52 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 53 const line53 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 54 const line54 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 55 const line55 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 56 const line56 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 57 const line57 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 58 const line58 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 59 const line59 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 60 const line60 = true; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 61 - return kittyProtocolSupporte...; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 61 + return kittyProtocolSupporte...; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 62 buffer: TextBuffer; โ”‚ โ”‚โ–ˆ +โ”‚ โ”‚ 63 onSubmit: (value: string) => void; โ”‚ โ”‚โ–ˆ +โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚โ–ˆ โ”‚ Apply this change? โ”‚โ–ˆ โ”‚ โ”‚โ–ˆ โ”‚ โ— 1. Allow once โ”‚โ–ˆ โ”‚ 2. Allow for this session โ”‚โ–ˆ -โ”‚ 3. Allow for this file in all future sessions โ”‚โ–ˆ +โ”‚ 3. Allow for this file in all future sessions ~/.gemini/policies/auto-saved.toml โ”‚โ–ˆ โ”‚ 4. Modify with external editor โ”‚โ–ˆ โ”‚ 5. No, suggest changes (esc) โ”‚โ–ˆ -โ”‚ โ”‚โ–ˆ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏโ–ˆ " `; diff --git a/packages/cli/src/ui/commands/chatCommand.ts b/packages/cli/src/ui/commands/chatCommand.ts index e7a33672f3..05fd081dfb 100644 --- a/packages/cli/src/ui/commands/chatCommand.ts +++ b/packages/cli/src/ui/commands/chatCommand.ts @@ -65,7 +65,7 @@ const getSavedChatTags = async ( ); return chatDetails; - } catch (_err) { + } catch { return []; } }; diff --git a/packages/cli/src/ui/commands/directoryCommand.tsx b/packages/cli/src/ui/commands/directoryCommand.tsx index 4106efa97b..718012c494 100644 --- a/packages/cli/src/ui/commands/directoryCommand.tsx +++ b/packages/cli/src/ui/commands/directoryCommand.tsx @@ -198,7 +198,7 @@ export const directoryCommand: SlashCommand = { alreadyAdded.push(trimmedPath); continue; } - } catch (_e) { + } catch { // Path might not exist or be inaccessible. // We'll let batchAddDirectories handle it later. } diff --git a/packages/cli/src/ui/commands/extensionsCommand.ts b/packages/cli/src/ui/commands/extensionsCommand.ts index 7a3ada83e0..6c0f3529a2 100644 --- a/packages/cli/src/ui/commands/extensionsCommand.ts +++ b/packages/cli/src/ui/commands/extensionsCommand.ts @@ -321,7 +321,7 @@ async function exploreAction( }); try { await open(extensionsUrl); - } catch (_error) { + } catch { context.ui.addItem({ type: MessageType.ERROR, text: `Failed to open browser. Check out the extensions gallery at ${extensionsUrl}`, diff --git a/packages/cli/src/ui/commands/restoreCommand.ts b/packages/cli/src/ui/commands/restoreCommand.ts index cf18836c20..3796456ff8 100644 --- a/packages/cli/src/ui/commands/restoreCommand.ts +++ b/packages/cli/src/ui/commands/restoreCommand.ts @@ -151,7 +151,7 @@ async function completion( const files = await fs.readdir(checkpointDir); const jsonFiles = files.filter((file) => file.endsWith('.json')); return getTruncatedCheckpointNames(jsonFiles); - } catch (_err) { + } catch { return []; } } diff --git a/packages/cli/src/ui/commands/rewindCommand.test.tsx b/packages/cli/src/ui/commands/rewindCommand.test.tsx index f878091a45..aa5e6cfa6f 100644 --- a/packages/cli/src/ui/commands/rewindCommand.test.tsx +++ b/packages/cli/src/ui/commands/rewindCommand.test.tsx @@ -106,7 +106,7 @@ describe('rewindCommand', () => { }, config: { getSessionId: () => 'test-session-id', - getContextManager: () => ({ refresh: mockResetContext }), + getMemoryContextManager: () => ({ refresh: mockResetContext }), getProjectRoot: mockGetProjectRoot, }, }, diff --git a/packages/cli/src/ui/commands/rewindCommand.tsx b/packages/cli/src/ui/commands/rewindCommand.tsx index c4e0284d0f..f703323c1b 100644 --- a/packages/cli/src/ui/commands/rewindCommand.tsx +++ b/packages/cli/src/ui/commands/rewindCommand.tsx @@ -61,7 +61,9 @@ async function rewindConversation( client.setHistory(clientHistory as Content[]); // Reset context manager as we are rewinding history - await context.services.agentContext?.config.getContextManager()?.refresh(); + await context.services.agentContext?.config + .getMemoryContextManager() + ?.refresh(); // Update UI History // We generate IDs based on index for the rewind history diff --git a/packages/cli/src/ui/commands/setupGithubCommand.ts b/packages/cli/src/ui/commands/setupGithubCommand.ts index afc9b7210e..ff290c27fb 100644 --- a/packages/cli/src/ui/commands/setupGithubCommand.ts +++ b/packages/cli/src/ui/commands/setupGithubCommand.ts @@ -76,7 +76,7 @@ export async function updateGitignore(gitRepoRoot: string): Promise { let fileExists = true; try { existingContent = await fs.promises.readFile(gitignorePath, 'utf8'); - } catch (_error) { + } catch { // File doesn't exist fileExists = false; } @@ -168,8 +168,8 @@ async function downloadFiles({ async function createDirectory(dirPath: string): Promise { try { await fs.promises.mkdir(dirPath, { recursive: true }); - } catch (_error) { - debugLogger.debug(`Failed to create ${dirPath} directory:`, _error); + } catch (error) { + debugLogger.debug(`Failed to create ${dirPath} directory:`, error); throw new Error( `Unable to create ${dirPath} directory. Do you have file permissions in the current directory?`, ); @@ -222,8 +222,8 @@ export const setupGithubCommand: SlashCommand = { let gitRepoRoot: string; try { gitRepoRoot = getGitRepoRoot(); - } catch (_error) { - debugLogger.debug(`Failed to get git repo root:`, _error); + } catch (error) { + debugLogger.debug(`Failed to get git repo root:`, error); throw new Error( 'Unable to determine the GitHub repository. /setup-github must be run from a git repository.', ); diff --git a/packages/cli/src/ui/commands/skillsCommand.test.ts b/packages/cli/src/ui/commands/skillsCommand.test.ts index 120ba01ed7..438f09b182 100644 --- a/packages/cli/src/ui/commands/skillsCommand.test.ts +++ b/packages/cli/src/ui/commands/skillsCommand.test.ts @@ -528,6 +528,7 @@ describe('skillsCommand', () => { await actionPromise; expect(reloadSkillsMock).toHaveBeenCalled(); + expect(context.ui.reloadCommands).toHaveBeenCalled(); expect(context.ui.setPendingItem).toHaveBeenCalledWith(null); expect(context.ui.addItem).toHaveBeenCalledWith( expect.objectContaining({ diff --git a/packages/cli/src/ui/commands/skillsCommand.ts b/packages/cli/src/ui/commands/skillsCommand.ts index 8c8db2fca5..ea1888db40 100644 --- a/packages/cli/src/ui/commands/skillsCommand.ts +++ b/packages/cli/src/ui/commands/skillsCommand.ts @@ -285,6 +285,8 @@ async function reloadAction( context.ui.setPendingItem(null); } + context.ui.reloadCommands(); + const afterSkills = skillManager.getSkills(); const afterNames = new Set(afterSkills.map((s) => s.name)); diff --git a/packages/cli/src/ui/components/AnsiOutput.test.tsx b/packages/cli/src/ui/components/AnsiOutput.test.tsx index 758361be0a..04d6ccb0d9 100644 --- a/packages/cli/src/ui/components/AnsiOutput.test.tsx +++ b/packages/cli/src/ui/components/AnsiOutput.test.tsx @@ -16,6 +16,7 @@ const createAnsiToken = (overrides: Partial): AnsiToken => ({ underline: false, dim: false, inverse: false, + isUninitialized: false, fg: '#ffffff', bg: '#000000', ...overrides, @@ -156,4 +157,30 @@ describe('', () => { expect(lastFrame()).toBeDefined(); unmount(); }); + + describe('robustness', () => { + it('does NOT crash when data is undefined', async () => { + const { lastFrame, unmount } = await render( + , + ); + expect(lastFrame({ allowEmpty: true }).trim()).toBe(''); + unmount(); + }); + + it('does NOT crash when data is an object but not an array', async () => { + const { lastFrame, unmount } = await render( + , + ); + expect(lastFrame({ allowEmpty: true }).trim()).toBe(''); + unmount(); + }); + }); }); diff --git a/packages/cli/src/ui/components/AnsiOutput.tsx b/packages/cli/src/ui/components/AnsiOutput.tsx index a1b30b0856..617740d4ad 100644 --- a/packages/cli/src/ui/components/AnsiOutput.tsx +++ b/packages/cli/src/ui/components/AnsiOutput.tsx @@ -35,14 +35,16 @@ export const AnsiOutputText: React.FC = ({ ? Math.min(availableHeightLimit, maxLines) : (availableHeightLimit ?? maxLines ?? DEFAULT_HEIGHT); - const lastLines = disableTruncation - ? data - : numLinesRetained === 0 - ? [] - : data.slice(-numLinesRetained); + const lastLines = Array.isArray(data) + ? disableTruncation + ? data + : numLinesRetained === 0 + ? [] + : data.slice(-numLinesRetained) + : []; return ( - {lastLines.map((line: AnsiLine, lineIndex: number) => ( + {(lastLines as AnsiLine[]).map((line: AnsiLine, lineIndex: number) => ( diff --git a/packages/cli/src/ui/components/AppHeader.tsx b/packages/cli/src/ui/components/AppHeader.tsx index 7d0ef75a36..1b3d9b2cfa 100644 --- a/packages/cli/src/ui/components/AppHeader.tsx +++ b/packages/cli/src/ui/components/AppHeader.tsx @@ -59,13 +59,20 @@ const NARROW_TERMINAL_BREAKPOINT = 60; export const AppHeader = ({ version, showDetails = true }: AppHeaderProps) => { const settings = useSettings(); const config = useConfig(); - const { terminalWidth, bannerData, bannerVisible, updateInfo } = useUIState(); + const { + terminalWidth, + bannerData, + bannerVisible, + updateInfo, + isConfigInitialized, + isAuthenticating, + } = useUIState(); const { bannerText } = useBanner(bannerData); const { showTips } = useTips(); const authType = config.getContentGeneratorConfig()?.authType; - const loggedOut = !authType; + const loggedOut = isConfigInitialized && !isAuthenticating && !authType; const showHeader = !( settings.merged.ui.hideBanner || config.getScreenReader() diff --git a/packages/cli/src/ui/components/AskUserDialog.test.tsx b/packages/cli/src/ui/components/AskUserDialog.test.tsx index 4f1cca7d8c..5217455358 100644 --- a/packages/cli/src/ui/components/AskUserDialog.test.tsx +++ b/packages/cli/src/ui/components/AskUserDialog.test.tsx @@ -1409,6 +1409,53 @@ describe('AskUserDialog', () => { expect(lastFrame()).toMatchSnapshot(); }); }); + + it('supports "Other" option for yesno questions', async () => { + const questions: Question[] = [ + { + question: 'Is this correct?', + header: 'Confirm', + type: QuestionType.YESNO, + }, + ]; + + const onSubmit = vi.fn(); + const { stdin, lastFrame, waitUntilReady } = await renderWithProviders( + , + { width: 80 }, + ); + + // Navigate to "Other" (3rd option: 1. Yes, 2. No, 3. Other) + writeKey(stdin, '\x1b[B'); // Down to No + writeKey(stdin, '\x1b[B'); // Down to Other + + await waitFor(async () => { + await waitUntilReady(); + expect(lastFrame()).toContain('Enter a custom value'); + }); + + // Type feedback + for (const char of 'Yes, but with caveats') { + writeKey(stdin, char); + } + + await waitFor(async () => { + await waitUntilReady(); + expect(lastFrame()).toContain('Yes, but with caveats'); + }); + + // Submit + writeKey(stdin, '\r'); + + await waitFor(async () => { + expect(onSubmit).toHaveBeenCalledWith({ '0': 'Yes, but with caveats' }); + }); + }); }); it('expands paste placeholders in multi-select custom option via Done', async () => { diff --git a/packages/cli/src/ui/components/AskUserDialog.tsx b/packages/cli/src/ui/components/AskUserDialog.tsx index 483fcb5055..295d54eb73 100644 --- a/packages/cli/src/ui/components/AskUserDialog.tsx +++ b/packages/cli/src/ui/components/AskUserDialog.tsx @@ -511,8 +511,9 @@ const ChoiceQuestionView: React.FC = ({ }) => { const keyMatchers = useKeyMatchers(); const isAlternateBuffer = useAlternateBuffer(); - const numOptions = - (question.options?.length ?? 0) + (question.type !== 'yesno' ? 1 : 0); + const hasAll = question.multiSelect && (question.options?.length ?? 0) > 1; + // Calculate total options including 'All' and 'Other' to ensure consistent numbering column width + const numOptions = (question.options?.length ?? 0) + (hasAll ? 1 : 0) + 1; const numLen = String(numOptions).length; const radioWidth = 2; // "โ— " const numberWidth = numLen + 2; // e.g., "1. " @@ -735,17 +736,15 @@ const ChoiceQuestionView: React.FC = ({ list.push({ key: 'all', value: allItem }); } - // Only add custom option for choice type, not yesno - if (question.type !== 'yesno') { - const otherItem: OptionItem = { - key: 'other', - label: customOptionText || '', - description: '', - type: 'other', - index: list.length, - }; - list.push({ key: 'other', value: otherItem }); - } + // Add custom option for choice and yesno types + const otherItem: OptionItem = { + key: 'other', + label: customOptionText || '', + description: '', + type: 'other', + index: list.length, + }; + list.push({ key: 'other', value: otherItem }); if (question.multiSelect) { const doneItem: OptionItem = { @@ -759,7 +758,7 @@ const ChoiceQuestionView: React.FC = ({ } return list; - }, [questionOptions, question.multiSelect, question.type, customOptionText]); + }, [questionOptions, question.multiSelect, customOptionText]); const handleHighlight = useCallback( (itemValue: OptionItem) => { diff --git a/packages/cli/src/ui/components/Composer.test.tsx b/packages/cli/src/ui/components/Composer.test.tsx index 1750536dbe..316b9a1780 100644 --- a/packages/cli/src/ui/components/Composer.test.tsx +++ b/packages/cli/src/ui/components/Composer.test.tsx @@ -245,20 +245,37 @@ const createMockConfig = (overrides = {}): Config => ...overrides, }) as unknown as Config; +import { InputContext, type InputState } from '../contexts/InputContext.js'; + const renderComposer = async ( uiState: UIState, settings = createMockSettings({ ui: {} }), config = createMockConfig(), uiActions = createMockUIActions(), + inputStateOverrides: Partial = {}, ) => { + const inputState = { + buffer: { text: '' } as unknown as TextBuffer, + userMessages: [], + shellModeActive: false, + showEscapePrompt: false, + copyModeEnabled: false, + inputWidth: 80, + suggestionsWidth: 40, + ...(uiState as unknown as Partial), + ...inputStateOverrides, + }; + const result = await render( - - - - - + + + + + + + , ); @@ -541,7 +558,6 @@ describe('Composer', () => { const uiState = createMockUIState({ ctrlCPressedOnce: false, ctrlDPressedOnce: false, - showEscapePrompt: false, }); const { lastFrame } = await renderComposer(uiState); @@ -631,7 +647,6 @@ describe('Composer', () => { async (mode) => { const uiState = createMockUIState({ showApprovalModeIndicator: mode, - shellModeActive: false, }); const { lastFrame } = await renderComposer(uiState); @@ -641,11 +656,15 @@ describe('Composer', () => { ); it('shows ShellModeIndicator when shell mode is active', async () => { - const uiState = createMockUIState({ - shellModeActive: true, - }); + const uiState = createMockUIState(); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame } = await renderComposer( + uiState, + undefined, + undefined, + undefined, + { shellModeActive: true }, + ); expect(lastFrame()).toMatch(/ShellModeIndic[\s\S]*tor/); }); @@ -724,11 +743,16 @@ describe('Composer', () => { it('shows Esc rewind prompt in minimal mode without showing full UI', async () => { const uiState = createMockUIState({ cleanUiDetailsVisible: false, - showEscapePrompt: true, history: [{ id: 1, type: 'user', text: 'msg' }], }); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame } = await renderComposer( + uiState, + undefined, + undefined, + undefined, + { showEscapePrompt: true }, + ); const output = lastFrame(); expect(output).toContain('Press Esc again to rewind.'); expect(output).not.toContain('ContextSummaryDisplay'); @@ -828,11 +852,16 @@ describe('Composer', () => { describe('Shortcuts Hint', () => { it('restores shortcuts hint after 200ms debounce when buffer is empty', async () => { const uiState = createMockUIState({ - buffer: { text: '' } as unknown as TextBuffer, cleanUiDetailsVisible: false, }); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame } = await renderComposer( + uiState, + undefined, + undefined, + undefined, + { buffer: { text: '' } as unknown as TextBuffer }, + ); await act(async () => { await vi.advanceTimersByTimeAsync(250); @@ -845,11 +874,16 @@ describe('Composer', () => { it('hides shortcuts hint when text is typed in buffer', async () => { const uiState = createMockUIState({ - buffer: { text: 'hello' } as unknown as TextBuffer, cleanUiDetailsVisible: false, }); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame } = await renderComposer( + uiState, + undefined, + undefined, + undefined, + { buffer: { text: 'hello' } as unknown as TextBuffer }, + ); expect(lastFrame()).not.toContain('press tab twice for more'); expect(lastFrame()).not.toContain('? for shortcuts'); diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 590d1e9c6b..52bb2b294f 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -9,6 +9,7 @@ import { useState, useEffect } from 'react'; import { useConfig } from '../contexts/ConfigContext.js'; import { useSettings } from '../contexts/SettingsContext.js'; import { useUIState } from '../contexts/UIStateContext.js'; +import { useInputState } from '../contexts/InputContext.js'; import { useUIActions } from '../contexts/UIActionsContext.js'; import { useVimMode } from '../contexts/VimModeContext.js'; import { useAlternateBuffer } from '../hooks/useAlternateBuffer.js'; @@ -26,9 +27,11 @@ import { OverflowProvider } from '../contexts/OverflowContext.js'; import { ConfigInitDisplay } from './ConfigInitDisplay.js'; import { TodoTray } from './messages/Todo.js'; import { useComposerStatus } from '../hooks/useComposerStatus.js'; +import { appEvents, AppEvent } from '../../utils/events.js'; export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { const uiState = useUIState(); + const inputState = useInputState(); const uiActions = useUIActions(); const settings = useSettings(); const config = useConfig(); @@ -55,6 +58,12 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { const { setShortcutsHelpVisible } = uiActions; + useEffect(() => { + if (hasPendingActionRequired) { + appEvents.emit(AppEvent.ScrollToBottom); + } + }, [hasPendingActionRequired]); + useEffect(() => { if (uiState.shortcutsHelpVisible && !isPassiveShortcutsHelpState) { setShortcutsHelpVisible(false); @@ -74,12 +83,12 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { return null; } - const hasToast = shouldShowToast(uiState); + const showToast = shouldShowToast(uiState, inputState); const hideUiDetailsForSuggestions = suggestionsVisible && suggestionsPosition === 'above'; // Mini Mode VIP Flags (Pure Content Triggers) - const showMinimalToast = hasToast; + const showMinimalToast = showToast; return ( { {uiState.isInputActive && ( { ? vimMode === 'INSERT' ? " Press 'Esc' for NORMAL mode." : " Press 'i' for INSERT mode." - : uiState.shellModeActive + : inputState.shellModeActive ? ' Type your shell command' : ' Type your message or @path/to/file' } @@ -166,15 +170,12 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { streamingState={uiState.streamingState} suggestionsPosition={suggestionsPosition} onSuggestionsVisibilityChange={setSuggestionsVisible} - copyModeEnabled={uiState.copyModeEnabled} /> )} {showUiDetails && !settings.merged.ui.hideFooter && - !isScreenReaderEnabled && ( -