diff --git a/.gcp/Dockerfile.development b/.gcp/Dockerfile.development new file mode 100644 index 0000000000..fb572c3783 --- /dev/null +++ b/.gcp/Dockerfile.development @@ -0,0 +1,89 @@ +# --- STAGE 1: Base Runtime --- +FROM docker.io/library/node:20-slim AS base + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + python3-venv \ + curl \ + dnsutils \ + less \ + jq \ + ca-certificates \ + git \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# --- STAGE 2: Builder (Compile Main) --- +FROM base AS builder +WORKDIR /build +COPY . . +RUN npm ci --ignore-scripts +RUN npm run bundle +# Run the official release preparation script to move the bundle and assets into packages/cli +RUN node scripts/prepare-npm-release.js + +# --- STAGE 3: Development Environment --- +FROM base AS development + +WORKDIR /home/node/dev/main + +# Set up npm global package folder +RUN mkdir -p /usr/local/share/npm-global \ + && chown -R node:node /usr/local/share/npm-global +ENV NPM_CONFIG_PREFIX=/usr/local/share/npm-global +ENV PATH=$PATH:/usr/local/share/npm-global/bin + +# Copy package.json to extract versions for global tools +COPY package.json /tmp/package.json + +# Install Build Tools, Global Dev Tools (pinned), and Linters +ARG ACTIONLINT_VER=1.7.7 +ARG SHELLCHECK_VER=0.11.0 +ARG YAMLLINT_VER=1.35.1 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + make \ + g++ \ + gh \ + git \ + unzip \ + rsync \ + ripgrep \ + procps \ + psmisc \ + lsof \ + socat \ + tmux \ + docker.io \ + build-essential \ + libsecret-1-dev \ + libkrb5-dev \ + file \ + && curl -sSLo /tmp/actionlint.tar.gz https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VER}/actionlint_${ACTIONLINT_VER}_linux_amd64.tar.gz \ + && tar -xzf /tmp/actionlint.tar.gz -C /usr/local/bin actionlint \ + && curl -sSLo /tmp/shellcheck.tar.xz https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VER}/shellcheck-v${SHELLCHECK_VER}.linux.x86_64.tar.xz \ + && tar -xf /tmp/shellcheck.tar.xz -C /usr/local/bin --strip-components=1 shellcheck-v${SHELLCHECK_VER}/shellcheck \ + && pip3 install --break-system-packages yamllint==${YAMLLINT_VER} \ + && export TSX_VER=$(node -p "require('/tmp/package.json').devDependencies.tsx") \ + && export VITEST_VER=$(node -p "require('/tmp/package.json').devDependencies.vitest") \ + && export PRETTIER_VER=$(node -p "require('/tmp/package.json').devDependencies.prettier") \ + && export ESLINT_VER=$(node -p "require('/tmp/package.json').devDependencies.eslint") \ + && export CROSS_ENV_VER=$(node -p "require('/tmp/package.json').devDependencies['cross-env']") \ + && npm install -g tsx@$TSX_VER vitest@$VITEST_VER prettier@$PRETTIER_VER eslint@$ESLINT_VER cross-env@$CROSS_ENV_VER typescript@5.3.3 \ + && npm install -g @google/gemini-cli@nightly && mv /usr/local/share/npm-global/bin/gemini /usr/local/share/npm-global/bin/g-nightly \ + && npm install -g @google/gemini-cli@preview && mv /usr/local/share/npm-global/bin/gemini /usr/local/share/npm-global/bin/g-preview \ + && npm install -g @google/gemini-cli@latest && mv /usr/local/share/npm-global/bin/gemini /usr/local/share/npm-global/bin/g-stable \ + && apt-get purge -y build-essential libsecret-1-dev libkrb5-dev \ + && apt-get autoremove -y \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /tmp/* /root/.npm + +# Copy the bundled CLI package to a permanent location and install it +# We MUST not delete this source folder as 'npm install -g ' +# often symlinks to it for local folder installs. +COPY --from=builder /build/packages/cli /usr/local/lib/gemini-cli +RUN npm install -g /usr/local/lib/gemini-cli + +USER node +CMD ["/bin/bash"] diff --git a/.gcp/Dockerfile.development.dockerignore b/.gcp/Dockerfile.development.dockerignore new file mode 100644 index 0000000000..3e48beb792 --- /dev/null +++ b/.gcp/Dockerfile.development.dockerignore @@ -0,0 +1,10 @@ +node_modules +.git +.gemini/workspaces +dist +!packages/*/dist/*.tgz +bundle +out +*.log +.env +.DS_Store diff --git a/.gcp/development-worker.yml b/.gcp/development-worker.yml new file mode 100644 index 0000000000..1ef1346eda --- /dev/null +++ b/.gcp/development-worker.yml @@ -0,0 +1,58 @@ +substitutions: + _IMAGE_NAME: 'development' + _ARTIFACT_REGISTRY_REPO: 'us-docker.pkg.dev/gemini-code-dev/gemini-cli' + +steps: + # Step 1: Install root dependencies + - name: 'us-west1-docker.pkg.dev/gemini-code-dev/gemini-code-containers/gemini-code-builder' + id: 'Install Dependencies' + entrypoint: 'npm' + args: ['install'] + + # Step 2: Authenticate for Docker + - name: 'us-west1-docker.pkg.dev/gemini-code-dev/gemini-code-containers/gemini-code-builder' + id: 'Authenticate docker' + entrypoint: 'npm' + args: ['run', 'auth'] + + # Step 3: Build workspace packages + - name: 'us-west1-docker.pkg.dev/gemini-code-dev/gemini-code-containers/gemini-code-builder' + id: 'Build packages' + entrypoint: 'npm' + args: ['run', 'build:packages'] + + # Step 4: Build Development Image + - name: 'us-west1-docker.pkg.dev/gemini-code-dev/gemini-code-containers/gemini-code-builder' + id: 'Build Development Image' + entrypoint: 'bash' + env: + - 'RAW_BRANCH_VALUE=${BRANCH_NAME}' + args: + - '-c' + - |- + IMAGE_BASE="${_ARTIFACT_REGISTRY_REPO}/${_IMAGE_NAME}" + + # Determine the primary tag (branch name or 'latest' for main) + # Use $$ for shell variables to avoid Cloud Build attempting premature substitution + RAW_BRANCH="$$RAW_BRANCH_VALUE" + if [ "$${RAW_BRANCH}" == "main" ]; then + TAG_PRIMARY="latest" + else + TAG_PRIMARY=$$(echo "$${RAW_BRANCH}" | sed 's/[^a-zA-Z0-9]/-/g' | tr '[:upper:]' '[:lower:]') + fi + + # Use SHORT_SHA if available (Cloud Build) or fallback to latest-dev + TAG_SHA="$${SHORT_SHA:-latest-dev}" + + echo "📦 Building Development Image for: $${RAW_BRANCH} -> $${TAG_PRIMARY} ($${TAG_SHA})" + + docker build -f .gcp/Dockerfile.development \ + -t "$${IMAGE_BASE}:$${TAG_SHA}" \ + -t "$${IMAGE_BASE}:$${TAG_PRIMARY}" . + + docker push "$${IMAGE_BASE}:$${TAG_SHA}" + docker push "$${IMAGE_BASE}:$${TAG_PRIMARY}" + +options: + defaultLogsBucketBehavior: 'REGIONAL_USER_OWNED_BUCKET' + dynamicSubstitutions: true diff --git a/.gemini/commands/fix-behavioral-eval.toml b/.gemini/commands/fix-behavioral-eval.toml deleted file mode 100644 index d2f1c5b3ed..0000000000 --- a/.gemini/commands/fix-behavioral-eval.toml +++ /dev/null @@ -1,60 +0,0 @@ -description = "Check status of nightly evals, fix failures for key models, and re-run." -prompt = """ -You are an expert at fixing behavioral evaluations. - -1. **Investigate**: - - Use 'gh' cli to fetch the results from the latest run from the main branch: https://github.com/google-gemini/gemini-cli/actions/workflows/evals-nightly.yml. - - DO NOT push any changes or start any runs. The rest of your evaluation will be local. - - Evals are in evals/ directory and are documented by evals/README.md. - - The test case trajectory logs will be logged to evals/logs. - - You should also enable and review the verbose agent logs by setting the GEMINI_DEBUG_LOG_FILE environment variable. - - Identify the relevant test. Confine your investigation and validation to just this test. - - Proactively add logging that will aid in gathering information or validating your hypotheses. - -2. **Fix**: - - If a relevant test is failing, locate the test file and the corresponding prompt/code. - - It's often helpful to make an extreme, brute force change to see if you are changing the right place to make an improvement and then scope it back iteratively. - - Your **final** change should be **minimal and targeted**. - - Keep in mind the following: - - The prompt has multiple configurations and pieces. Take care that your changes - end up in the final prompt for the selected model and configuration. - - The prompt chosen for the eval is intentional. It's often vague or indirect - to see how the agent performs with ambiguous instructions. Changing it should - be a last resort. - - When changing the test prompt, carefully consider whether the prompt still tests - the same scenario. We don't want to lose test fidelity by making the prompts too - direct (i.e.: easy). - - Your primary mechanism for improving the agent's behavior is to make changes to - tool instructions, system prompt (snippets.ts), and/or modules that contribute to the prompt. - - If prompt and description changes are unsuccessful, use logs and debugging to - confirm that everything is working as expected. - - If unable to fix the test, you can make recommendations for architecture changes - that might help stablize the test. Be sure to THINK DEEPLY if offering architecture guidance. - Some facts that might help with this are: - - Agents may be composed of one or more agent loops. - - AgentLoop == 'context + toolset + prompt'. Subagents are one type of agent loop. - - Agent loops perform better when: - - They have direct, unambiguous, and non-contradictory prompts. - - They have fewer irrelevant tools. - - They have fewer goals or steps to perform. - - They have less low value or irrelevant context. - - You may suggest compositions of existing primitives, like subagents, or - propose a new one. - - These recommendations should be high confidence and should be grounded - in observed deficient behaviors rather than just parroting the facts above. - Investigate as needed to ground your recommendations. - -3. **Verify**: - - Run just that one test if needed to validate that it is fixed. Be sure to run vitest in non-interactive mode. - - Running the tests can take a long time, so consider whether you can diagnose via other means or log diagnostics before committing the time. You must minimize the number of test runs needed to diagnose the failure. - - After the test completes, check whether it seems to have improved. - - You will need to run the test 3 times for Gemini 3.0, Gemini 3 flash, and Gemini 2.5 pro to ensure that it is truly stable. Run these runs in parallel, using scripts if needed. - - Some flakiness is expected; if it looks like a transient issue or the test is inherently unstable but passes 2/3 times, you might decide it cannot be improved. - -4. **Report**: - - Provide a summary of the test success rate for each of the tested models. - - Success rate is calculated based on 3 runs per model (e.g., 3/3 = 100%). - - If you couldn't fix it due to persistent flakiness, explain why. - -{{args}} -""" \ No newline at end of file diff --git a/.gemini/commands/promote-behavioral-eval.toml b/.gemini/commands/promote-behavioral-eval.toml deleted file mode 100644 index 9893e9b02b..0000000000 --- a/.gemini/commands/promote-behavioral-eval.toml +++ /dev/null @@ -1,29 +0,0 @@ -description = "Promote behavioral evals that have a 100% success rate over the last 7 nightly runs." -prompt = """ -You are an expert at analyzing and promoting behavioral evaluations. - -1. **Investigate**: - - Use 'gh' cli to fetch the results from the most recent run from the main branch: https://github.com/google-gemini/gemini-cli/actions/workflows/evals-nightly.yml. - - DO NOT push any changes or start any runs. The rest of your evaluation will be local. - - Evals are in evals/ directory and are documented by evals/README.md. - - Identify tests that have passed 100% of the time for ALL enabled models across the past 7 runs in a row. - - NOTE: the results summary from the most recent run contains the last 7 runs test results. 100% means the test passed 3/3 times for that model and run. - - If a test meets this criteria, it is a candidate for promotion. - -2. **Promote**: - - For each candidate test, locate the test file in the evals/ directory. - - Promote the test according to the project's standard promotion process (e.g., moving it to a stable suite, updating its tags, or removing skip/flaky annotations). - - Ensure you follow any guidelines in evals/README.md for stable tests. - - Your **final** change should be **minimal and targeted** to just promoting the test status. - -3. **Verify**: - - Run the promoted tests locally to validate that they still execute correctly. Be sure to run vitest in non-interactive mode. - - Check that the test is now part of the expected standard or stable test suites. - -4. **Report**: - - Provide a summary of the tests that were promoted. - - Include the success rate evidence (7/7 runs passed for all models) for each promoted test. - - If no tests met the criteria for promotion, clearly state that and summarize the closest candidates. - -{{args}} -""" diff --git a/.gemini/settings.json b/.gemini/settings.json index 1a4c889066..9051dc78de 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -2,7 +2,8 @@ "experimental": { "plan": true, "extensionReloading": true, - "modelSteering": true + "modelSteering": true, + "memoryManager": true }, "general": { "devtools": true diff --git a/.gemini/skills/behavioral-evals/SKILL.md b/.gemini/skills/behavioral-evals/SKILL.md new file mode 100644 index 0000000000..f60fb04832 --- /dev/null +++ b/.gemini/skills/behavioral-evals/SKILL.md @@ -0,0 +1,56 @@ +--- +name: behavioral-evals +description: Guidance for creating, running, fixing, and promoting behavioral evaluations. Use when verifying agent decision logic, debugging failures, debugging prompt steering, or adding workspace regression tests. +--- + +# Behavioral Evals + +## Overview + +Behavioral evaluations (evals) are tests that validate the **agent's decision-making** (e.g., tool choice) rather than pure functionality. They are critical for verifying prompt changes, debugging steerability, and preventing regressions. + +> [!NOTE] +> **Single Source of Truth**: For core concepts, policies, running tests, and general best practices, always refer to **[evals/README.md](file:///Users/abhipatel/code/gemini-cli/docs/evals/README.md)**. + +--- + +## 🔄 Workflow Decision Tree + +1. **Does a prompt/tool change need validation?** + * *No* -> Normal integration tests. + * *Yes* -> Continue below. +2. **Is it UI/Interaction heavy?** + * *Yes* -> Use `appEvalTest` (`AppRig`). See **[creating.md](references/creating.md)**. + * *No* -> Use `evalTest` (`TestRig`). See **[creating.md](references/creating.md)**. +3. **Is it a new test?** + * *Yes* -> Set policy to `USUALLY_PASSES`. + * *No* -> `ALWAYS_PASSES` (locks in regression). +4. **Are you fixing a failure or promoting a test?** + * *Fixing* -> See **[fixing.md](references/fixing.md)**. + * *Promoting* -> See **[promoting.md](references/promoting.md)**. + +--- + +## 📋 Quick Checklist + +### 1. Setup Workspace +Seed the workspace with necessary files using the `files` object to simulate a realistic scenario (e.g., NodeJS project with `package.json`). +* *Details in **[creating.md](references/creating.md)*** + +### 2. Write Assertions +Audit agent decisions using `rig.setBreakpoint()` (AppRig only) or index verification on `rig.readToolLogs()`. +* *Details in **[creating.md](references/creating.md)*** + +### 3. Verify +Run single tests locally with Vitest. Confirm stability locally before relying on CI workflows. +* *See **[evals/README.md](file:///Users/abhipatel/code/gemini-cli/docs/evals/README.md)** for running commands.* + +--- + +## 📦 Bundled Resources + +Detailed procedural guides: +* **[creating.md](references/creating.md)**: Assertion strategies, Rig selection, Mock MCPs. +* **[fixing.md](references/fixing.md)**: Step-by-step automated investigation, architecture diagnosis guidelines. +* **[promoting.md](references/promoting.md)**: Candidate identification criteria and threshold guidelines. + diff --git a/.gemini/skills/behavioral-evals/assets/interactive_eval.ts.txt b/.gemini/skills/behavioral-evals/assets/interactive_eval.ts.txt new file mode 100644 index 0000000000..2d2b7433dc --- /dev/null +++ b/.gemini/skills/behavioral-evals/assets/interactive_eval.ts.txt @@ -0,0 +1,27 @@ +import { describe, expect } from 'vitest'; +import { appEvalTest } from './app-test-helper.js'; + +describe('interactive_feature', () => { + // New tests MUST start as USUALLY_PASSES + appEvalTest('USUALLY_PASSES', { + name: 'should pause for user confirmation', + files: { + 'package.json': JSON.stringify({ name: 'app' }) + }, + prompt: 'Task description here requiring approval', + timeout: 60000, + setup: async (rig) => { + // ⚠️ Breakpoints are ONLY safe in appEvalTest + rig.setBreakpoint(['ask_user']); + }, + assert: async (rig) => { + // 1. Wait for the breakpoint to trigger + const confirmation = await rig.waitForPendingConfirmation('ask_user'); + expect(confirmation).toBeDefined(); + + // 2. Resolve it so the test can finish + await rig.resolveTool(confirmation); + await rig.waitForIdle(); + }, + }); +}); diff --git a/.gemini/skills/behavioral-evals/assets/standard_eval.ts.txt b/.gemini/skills/behavioral-evals/assets/standard_eval.ts.txt new file mode 100644 index 0000000000..3e666dfc37 --- /dev/null +++ b/.gemini/skills/behavioral-evals/assets/standard_eval.ts.txt @@ -0,0 +1,30 @@ +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +describe('core_feature', () => { + // New tests MUST start as USUALLY_PASSES + evalTest('USUALLY_PASSES', { + name: 'should perform expected agent action', + setup: async (rig) => { + // For mocking offline MCP: + // rig.addMockMcpServer('workspace-server', 'google-workspace'); + }, + files: { + 'src/app.ts': '// some code', + }, + prompt: 'Task description here', + timeout: 60000, // 1 minute safety limit + assert: async (rig, result) => { + // 1. Audit the trajectory (Safe for standard evalTest) + const logs = rig.readToolLogs(); + const hasTool = logs.some((l) => l.toolRequest.name === 'read_file'); + expect(hasTool, 'Agent should have read the file').toBe(true); + + // 2. Assert efficiency (Cost/Turn) + expect(logs.length).toBeLessThan(5); + + // 3. Assert final output + expect(result).toContain('Expected Keyword'); + }, + }); +}); diff --git a/.gemini/skills/behavioral-evals/references/creating.md b/.gemini/skills/behavioral-evals/references/creating.md new file mode 100644 index 0000000000..bcc1baff06 --- /dev/null +++ b/.gemini/skills/behavioral-evals/references/creating.md @@ -0,0 +1,151 @@ +# Creating Behavioral Evals + +## 🔬 Rig Selection + +| Rig Type | Import From | Architecture | Use When | +| :---------------- | :--------------------- | :------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------- | +| **`evalTest`** | `./test-helper.js` | **Subprocess**. Runs the CLI in a separate process + waits for exit. | Standard workspace tests. **Do not use `setBreakpoint`**; auditing history (`readToolLogs`) is safer. | +| **`appEvalTest`** | `./app-test-helper.js` | **In-Process**. Runs directly inside the runner loop. | UI/Ink rendering. Safe for `setBreakpoint` triggers. | + +--- + +## 🏗️ Scenario Design + +Evals must simulate realistic agent environments to effectively test +decision-making. + +- **Workspace State**: Seed with standard project anchors if testing general + capabilities: + - `package.json` for NodeJS environments. + - Minimal configuration files (`tsconfig.json`, `GEMINI.md`). +- **Structural Complexity**: Provide enough files to force the agent to _search_ + or _navigate_, rather than giving the answer directly. Avoid trivial one-file + tests unless testing exact prompt steering. + +--- + +## ❌ Fail First Principle + +Before asserting a new capability or locking in a fix, **verify that the test +fails first**. + +- It is easy to accidentally write an eval that asserts behaviors that are + already met or pass by default. +- **Process**: reproduce failure with test -> apply fix (prompt/tool) -> verify + test passes. + +--- + +## ✋ Testing Patterns + +### 1. Breakpoints + +Verifies the agent _intends_ to use a tool BEFORE executing it. Useful for +interactive prompts or safety checks. + +```typescript +// ⚠️ Only works with appEvalTest (AppRig) +setup: async (rig) => { + rig.setBreakpoint(['ask_user']); +}, +assert: async (rig) => { + const confirmation = await rig.waitForPendingConfirmation('ask_user'); + expect(confirmation).toBeDefined(); +} +``` + +### 2. Tool Confirmation Race + +When asserting multiple triggers (e.g., "enters plan mode then asks question"): + +```typescript +assert: async (rig) => { + let confirmation = await rig.waitForPendingConfirmation([ + 'enter_plan_mode', + 'ask_user', + ]); + + if (confirmation?.name === 'enter_plan_mode') { + rig.acceptConfirmation('enter_plan_mode'); + confirmation = await rig.waitForPendingConfirmation('ask_user'); + } + expect(confirmation?.toolName).toBe('ask_user'); +}; +``` + +### 3. Audit Tool Logs + +Audit exact operations to ensure efficiency (e.g., no redundant reads). + +```typescript +assert: async (rig, result) => { + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + + const writeCall = toolLogs.find( + (log) => log.toolRequest.name === 'write_file', + ); + expect(writeCall).toBeDefined(); +}; +``` + +### 4. Mock MCP Facades + +To evaluate tools connected via MCP without hitting live endpoints, load a mock +server configuration in the `setup` hook. + +```typescript +setup: async (rig) => { + rig.addMockMcpServer('workspace-server', 'google-workspace'); +}, +assert: async (rig) => { + await rig.waitForTelemetryReady(); + const toolLogs = rig.readToolLogs(); + const workspaceCall = toolLogs.find( + (log) => log.toolRequest.name === 'mcp_workspace-server_docs.getText' + ); + expect(workspaceCall).toBeDefined(); +}; +``` + +--- + +## ⚠️ Safety & Efficiency Guardrails + +### 1. Breakpoint Deadlocks + +Breakpoints (`setBreakpoint`) pause execution. In standard `evalTest`, +`rig.run()` waits for the process to exit _before_ assertions run. **This will +hang indefinitely.** + +- **Use Breakpoints** for `appEvalTest` or interactive simulations. +- **Use Audit Tool Logs** (above) for standard trajectory tests. + +### 2. Runaway Timeout + +Always set a budget boundary in the `EvalCase` to prevent runaway loops on +quota: + +```typescript +evalTest('USUALLY_PASSES', { + name: '...', + timeout: 60000, // 1 minute safety limit + // ... +}); +``` + +### 3. Efficiency Assertion (Turn limits) + +Check if a tool is called _early_ using index checks: + +```typescript +assert: async (rig) => { + const toolLogs = rig.readToolLogs(); + const toolCallIndex = toolLogs.findIndex( + (log) => log.toolRequest.name === 'cli_help', + ); + + expect(toolCallIndex).toBeGreaterThan(-1); + expect(toolCallIndex).toBeLessThan(5); // Called within first 5 turns +}; +``` diff --git a/.gemini/skills/behavioral-evals/references/fixing.md b/.gemini/skills/behavioral-evals/references/fixing.md new file mode 100644 index 0000000000..fc78870515 --- /dev/null +++ b/.gemini/skills/behavioral-evals/references/fixing.md @@ -0,0 +1,71 @@ +# Fixing Behavioral Evals + +Use this guide when asked to debug, troubleshoot, or fix a failing behavioral +evaluation. + +--- + +## 1. 🔍 Investigate + +1. **Fetch Nightly Results**: Use the `gh` CLI to inspect the latest run from + `evals-nightly.yml` if applicable. + - _Example view URL_: + `https://github.com/google-gemini/gemini-cli/actions/workflows/evals-nightly.yml` +2. **Isolate**: DO NOT push changes or start remote runs. Confine investigation + to the local workspace. +3. **Read Logs**: + - Eval logs live in `evals/logs/.log`. + - Enable verbose debugging via `export GEMINI_DEBUG_LOG_FILE="debug.log"`. +4. **Diagnose**: Audit tool logs and telemetry. Note if due to setup/assert. + - **Tip**: Proactively add custom logging/diagnostics to check hypotheses. + +--- + +## 2. 🛠️ Fix Strategy + +1. **Targeted Location**: Locate the test case and the corresponding + prompt/code. +2. **Iterative Scope**: Make extreme change first to verify scope, then refine + to a minimal, targeted change. +3. **Assertion Fidelity**: + - Changing the test prompt is a **last resort** (prompts are often vague by + design). + - **Warning**: Do not lose test fidelity by making prompts too direct/easy. + - **Primary Fix Trigger**: Adjust tool descriptions, system prompts + (`snippets.ts`), or **modules that contribute to the prompt template**. + - **Warning**: Prompts have multiple configurations; ensure your fix targets + the correct config for the model in question. +4. **Architecture Options**: If prompt or instruction tuning triggers no + improvement, analyze loop composition. + - **AgentLoop**: Defined by `context + toolset + prompt`. + - **Enhancements**: Loops perform best with direct prompts, fewer irrelevant + tools, low goal density, and minimal low-value/irrelevant context. + - **Modifications**: Compose subagents or isolate tools. Ground in observed + traces. + - **Warning**: Think deeply before offering recommendations; avoid parroting + abstract design guidelines. + +--- + +## 3. ✅ Verify + +1. **Run Local**: Run Vitest in non-interactive mode on just the file. +2. **Log Audit**: Prioritize diagnosing failures via log comparison before + triggering heavy test runs. +3. **Stability Limit**: Run the test **3 times** locally on key models (can use + scripts to run in parallel for speed): + - **Gemini 3.0** + - **Gemini 3 Flash** + - **Gemini 2.5 Pro** +4. **Flakiness Rule**: If it passes 2/3 times, it may be inherent noise + difficult to improve without a structural split. + +--- + +## 4. 📊 Report + +Provide a summary of: + +- Test success rate for each tested model (e.g., 3/3 = 100%). +- Root cause identification and fix explanation. +- If unfixed, provide high-confidence architecture recommendations. diff --git a/.gemini/skills/behavioral-evals/references/promoting.md b/.gemini/skills/behavioral-evals/references/promoting.md new file mode 100644 index 0000000000..d3d3eaf88f --- /dev/null +++ b/.gemini/skills/behavioral-evals/references/promoting.md @@ -0,0 +1,55 @@ +# Promoting Behavioral Evals + +Use this guide when asked to analyze nightly results and promote incubated tests +to stable suites. + +--- + +## 1. 🔍 Investigate candidates + +1. **Audit Nightly Logs**: Use the `gh` CLI to fetch results from + `evals-nightly.yml` (Direct URL: + `https://github.com/google-gemini/gemini-cli/actions/workflows/evals-nightly.yml`). + - **Tip**: The aggregate summary from the most recent run integrates the + last 7 runs of history automatically. + - **Safety**: DO NOT push changes or start remote runs. All verification is + local. +2. **Assess Stability**: Identify tests that pass **100% of the time** across + ALL enabled models over the **last 7 nightly runs** in a row. + - _100% means the test passed 3/3 times for every model and run._ +3. **Promotion Targets**: Tests meeting this criteria are candidates for + promotion from `USUALLY_PASSES` to `ALWAYS_PASSES`. + +--- + +## 2. 🚥 Promotion Steps + +1. **Locate File**: Locate the eval file in the `evals/` directory. +2. **Update Policy**: Modify the policy argument to `ALWAYS_PASSES`. + ```typescript + evalTest('ALWAYS_PASSES', { ... }) + ``` +3. **Targeting**: Follow guidelines in `evals/README.md` regarding stable suite + organization. +4. **Constraint**: Your final change must be **minimal and targeted** strictly + to promoting the test status. Do not refactor the test or setup fixtures. + +--- + +## 3. ✅ Verify + +1. **Run Prompted Tests**: Run the promoted test locally using non-interactive + Vitest to confirm structure validity. +2. **Verify Suite Inclusion**: Check that the test is successfully picked up by + standard runnable ranges. + +--- + +## 4. 📊 Report + +Provide a summary of: + +- Which tests were promoted. +- Provide the success rate evidence (e.g., 7/7 runs passed for all models). +- If no candidates qualified, list the next closest candidates and their current + pass rate. diff --git a/.gemini/skills/behavioral-evals/references/running.md b/.gemini/skills/behavioral-evals/references/running.md new file mode 100644 index 0000000000..cf8c46a8d6 --- /dev/null +++ b/.gemini/skills/behavioral-evals/references/running.md @@ -0,0 +1,95 @@ +# Running & Promoting Evals + +## 🛠️ Prerequisites + +Behavioral evals run against the compiled binary. You **must** build and bundle +the project first after making changes: + +```bash +npm run build && npm run bundle +``` + +--- + +## 🏃‍♂️ Running Tests + +### 1. Configure Environment Variables + +Evals require a standard API key. If your `.env` file has multiple keys or +comments, use this precise extraction setup: + +```bash +export GEMINI_API_KEY=$(grep '^GEMINI_API_KEY=' .env | cut -d '=' -f2) && RUN_EVALS=1 npx vitest run --config evals/vitest.config.ts +``` + +### 2. Commands + +| Command | Scope | Description | +| :---------------------------------- | :-------------- | :------------------------------------------------- | +| `npm run test:always_passing_evals` | `ALWAYS_PASSES` | Fast feedback, runs in CI. | +| `npm run test:all_evals` | All | Runs nightly incubation tests. Sets `RUN_EVALS=1`. | + +### Target Specific File + +_Note: `RUN_EVALS=1` is required for incubated (`USUALLY_PASSES`) tests._ + +```bash +RUN_EVALS=1 npx vitest run --config evals/vitest.config.ts my_feature.eval.ts +``` + +--- + +## 🐞 Debugging and Logs + +If a test fails, verify: + +- **Tool Trajectory Logs**:序列 of calls in `evals/logs/.log`. +- **Verbose Reasoning**: Capture raw buffer traces by setting + `GEMINI_DEBUG_LOG_FILE`: + ```bash + export GEMINI_DEBUG_LOG_FILE="debug.log" + ``` + +--- + +### 🎯 Verify Model Targeting + +- **Tip:** Standard evals benchmark against model variations. If a test passes + on Flash but fails on Pro (or vice versa), the issue is usually in the **tool + description**, not the prompt definition. Flash is sensitive to "instruction + bloat," while Pro is sensitive to "ambiguous intent." + +--- + +## 🚥 deflaking & Promotion + +To maintain CI stability, all new evals follow a strict incubation period. + +### 1. Incubation (`USUALLY_PASSES`) + +New tests must be created with the `USUALLY_PASSES` policy. + +```typescript +evalTest('USUALLY_PASSES', { ... }) +``` + +They run in **Evals: Nightly** workflows and do not block PR merges. + +### 2. Investigate Failures + +If a nightly eval regresses, investigate via agent: + +```bash +gemini /fix-behavioral-eval [optional-run-uri] +``` + +### 3. Promotion (`ALWAYS_PASSES`) + +Once a test scores 100% consistency over multiple nightly cycles: + +```bash +gemini /promote-behavioral-eval +``` + +_Do not promote manually._ The command verifies trajectory logs before updating +the file policy. diff --git a/.gemini/skills/ci/SKILL.md b/.gemini/skills/ci/SKILL.md new file mode 100644 index 0000000000..b55aa4d233 --- /dev/null +++ b/.gemini/skills/ci/SKILL.md @@ -0,0 +1,66 @@ +--- +name: ci +description: + A specialized skill for Gemini CLI that provides high-performance, fail-fast + monitoring of GitHub Actions workflows and automated local verification of CI + failures. It handles run discovery automatically—simply provide the branch name. +--- + +# CI Replicate & Status + +This skill enables the agent to efficiently monitor GitHub Actions, triage +failures, and bridge remote CI errors to local development. It defaults to +**automatic replication** of failures to streamline the fix cycle. + +## Core Capabilities + +- **Automatic Replication**: Automatically monitors CI and immediately executes + suggested test or lint commands locally upon failure. +- **Real-time Monitoring**: Aggregated status line for all concurrent workflows + on the current branch. +- **Fail-Fast Triage**: Immediately stops on the first job failure to provide a + structured report. + +## Workflow + +### 1. CI Replicate (`replicate`) - DEFAULT +Use this as the primary path to monitor CI and **automatically** replicate +failures locally for immediate triage and fixing. +- **Behavior**: When this workflow is triggered, the agent will monitor the CI + and **immediately and automatically execute** all suggested test or lint + commands (marked with 🚀) as soon as a failure is detected. +- **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch]` +- **Discovery**: The script **automatically** finds the latest active or recent + run for the branch. Do NOT manually search for run IDs. +- **Goal**: Reproduce the failure locally without manual intervention, then + proceed to analyze and fix the code. + +### 1. CI Status (`status`) +Use this when you have pushed changes and need to monitor the CI and reproduce +any failures locally. +- **Tool**: `node .gemini/skills/ci/scripts/ci.mjs [branch] [run_id]` +- **Discovery**: The script **automatically** finds the latest active or recent + run for the branch. You should NOT manually search for \`run_id\` using \`gh run list\` + unless a specific historical run is requested. Simply provide the branch name. +- **Step 1 (Monitor)**: Execute the tool with the branch name. +- **Step 2 (Extract)**: Extract suggested \`npm test\` or \`npm run lint\` commands + from the output (marked with 🚀). +- **Step 3 (Reproduce)**: Execute those commands locally to confirm the failure. +- **Behavior**: It will poll every 15 seconds. If it detects a failure, it will + exit with a structured report and provide the exact commands to run locally. + +## Failure Categories & Actions + +- **Test Failures**: Agent should run the specific `npm test -w -- ` + command suggested. +- **Lint Errors**: Agent should run `npm run lint:all` or the specific package + lint command. +- **Build Errors**: Agent should check `tsc` output or build logs to resolve + compilation issues. +- **Job Errors**: Investigate `gh run view --job --log` for + infrastructure or setup failures. + +## Noise Filtering +The underlying scripts automatically filter noise (Git logs, NPM warnings, stack +trace overhead). The agent should focus on the "Structured Failure Report" +provided by the tool. diff --git a/.gemini/skills/ci/scripts/ci.mjs b/.gemini/skills/ci/scripts/ci.mjs new file mode 100755 index 0000000000..9073285231 --- /dev/null +++ b/.gemini/skills/ci/scripts/ci.mjs @@ -0,0 +1,281 @@ +#!/usr/bin/env node + +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { execSync } from 'node:child_process'; + +const BRANCH = + process.argv[2] || execSync('git branch --show-current').toString().trim(); +const RUN_ID_OVERRIDE = process.argv[3]; + +let REPO; +try { + const remoteUrl = execSync('git remote get-url origin').toString().trim(); + REPO = remoteUrl + .replace(/.*github\.com[\/:]/, '') + .replace(/\.git$/, '') + .trim(); +} catch (e) { + REPO = 'google-gemini/gemini-cli'; +} + +const FAILED_FILES = new Set(); + +function runGh(args) { + try { + return execSync(`gh ${args}`, { + stdio: ['ignore', 'pipe', 'ignore'], + }).toString(); + } catch (e) { + return null; + } +} + +function fetchFailuresViaApi(jobId) { + try { + const cmd = `gh api repos/${REPO}/actions/jobs/${jobId}/logs | grep -iE " FAIL |❌|ERROR|Lint failed|Build failed|Exception|failed with exit code"`; + return execSync(cmd, { + stdio: ['ignore', 'pipe', 'ignore'], + maxBuffer: 10 * 1024 * 1024, + }).toString(); + } catch (e) { + return ''; + } +} + +function isNoise(line) { + const lower = line.toLowerCase(); + return ( + lower.includes('* [new branch]') || + lower.includes('npm warn') || + lower.includes('fetching updates') || + lower.includes('node:internal/errors') || + lower.includes('at ') || // Stack traces + lower.includes('checkexecsyncerror') || + lower.includes('node_modules') + ); +} + +function extractTestFile(failureText) { + const cleanLine = failureText + .replace(/[|#\[\]()]/g, ' ') + .replace(/<[^>]*>/g, ' ') + .trim(); + const fileMatch = cleanLine.match(/([\w\/._-]+\.test\.[jt]sx?)/); + if (fileMatch) return fileMatch[1]; + return null; +} + +function generateTestCommand(failedFilesMap) { + const workspaceToFiles = new Map(); + for (const [file, info] of failedFilesMap.entries()) { + if ( + ['Job Error', 'Unknown File', 'Build Error', 'Lint Error'].includes(file) + ) + continue; + let workspace = '@google/gemini-cli'; + let relPath = file; + if (file.startsWith('packages/core/')) { + workspace = '@google/gemini-cli-core'; + relPath = file.replace('packages/core/', ''); + } else if (file.startsWith('packages/cli/')) { + workspace = '@google/gemini-cli'; + relPath = file.replace('packages/cli/', ''); + } + relPath = relPath.replace(/^.*packages\/[^\/]+\//, ''); + if (!workspaceToFiles.has(workspace)) + workspaceToFiles.set(workspace, new Set()); + workspaceToFiles.get(workspace).add(relPath); + } + const commands = []; + for (const [workspace, files] of workspaceToFiles.entries()) { + commands.push(`npm test -w ${workspace} -- ${Array.from(files).join(' ')}`); + } + return commands.join(' && '); +} + +async function monitor() { + let targetRunIds = []; + if (RUN_ID_OVERRIDE) { + targetRunIds = [RUN_ID_OVERRIDE]; + } else { + // 1. Get runs directly associated with the branch + const runListOutput = runGh( + `run list --branch "${BRANCH}" --limit 10 --json databaseId,status,workflowName,createdAt`, + ); + if (runListOutput) { + const runs = JSON.parse(runListOutput); + const activeRuns = runs.filter((r) => r.status !== 'completed'); + if (activeRuns.length > 0) { + targetRunIds = activeRuns.map((r) => r.databaseId); + } else if (runs.length > 0) { + const latestTime = new Date(runs[0].createdAt).getTime(); + targetRunIds = runs + .filter((r) => latestTime - new Date(r.createdAt).getTime() < 60000) + .map((r) => r.databaseId); + } + } + + // 2. Get runs associated with commit statuses (handles chained/indirect runs) + try { + const headSha = execSync(`git rev-parse "${BRANCH}"`).toString().trim(); + const statusOutput = runGh( + `api repos/${REPO}/commits/${headSha}/status -q '.statuses[] | select(.target_url | contains("actions/runs/")) | .target_url'`, + ); + if (statusOutput) { + const statusRunIds = statusOutput + .split('\n') + .filter(Boolean) + .map((url) => { + const match = url.match(/actions\/runs\/(\d+)/); + return match ? parseInt(match[1], 10) : null; + }) + .filter(Boolean); + + for (const runId of statusRunIds) { + if (!targetRunIds.includes(runId)) { + targetRunIds.push(runId); + } + } + } + } catch (e) { + // Ignore if branch/SHA not found or API fails + } + + if (targetRunIds.length > 0) { + const runNames = []; + for (const runId of targetRunIds) { + const runInfo = runGh(`run view "${runId}" --json workflowName`); + if (runInfo) { + runNames.push(JSON.parse(runInfo).workflowName); + } + } + console.log(`Monitoring workflows: ${[...new Set(runNames)].join(', ')}`); + } + } + + if (targetRunIds.length === 0) { + console.log(`No runs found for branch ${BRANCH}.`); + process.exit(0); + } + + while (true) { + let allPassed = 0, + allFailed = 0, + allRunning = 0, + allQueued = 0, + totalJobs = 0; + let anyRunInProgress = false; + const fileToTests = new Map(); + let failuresFoundInLoop = false; + + for (const runId of targetRunIds) { + const runOutput = runGh( + `run view "${runId}" --json databaseId,status,conclusion,workflowName`, + ); + if (!runOutput) continue; + const run = JSON.parse(runOutput); + if (run.status !== 'completed') anyRunInProgress = true; + + const jobsOutput = runGh(`run view "${runId}" --json jobs`); + if (jobsOutput) { + const { jobs } = JSON.parse(jobsOutput); + totalJobs += jobs.length; + const failedJobs = jobs.filter((j) => j.conclusion === 'failure'); + if (failedJobs.length > 0) { + failuresFoundInLoop = true; + for (const job of failedJobs) { + const failures = fetchFailuresViaApi(job.databaseId); + if (failures.trim()) { + failures.split('\n').forEach((line) => { + if (!line.trim() || isNoise(line)) return; + const file = extractTestFile(line); + const filePath = + file || + (line.toLowerCase().includes('lint') + ? 'Lint Error' + : line.toLowerCase().includes('build') + ? 'Build Error' + : 'Unknown File'); + let testName = line; + if (line.includes(' > ')) { + testName = line.split(' > ').slice(1).join(' > ').trim(); + } + if (!fileToTests.has(filePath)) + fileToTests.set(filePath, new Set()); + fileToTests.get(filePath).add(testName); + }); + } else { + const step = + job.steps?.find((s) => s.conclusion === 'failure')?.name || + 'unknown'; + const category = step.toLowerCase().includes('lint') + ? 'Lint Error' + : step.toLowerCase().includes('build') + ? 'Build Error' + : 'Job Error'; + if (!fileToTests.has(category)) + fileToTests.set(category, new Set()); + fileToTests + .get(category) + .add(`${job.name}: Failed at step "${step}"`); + } + } + } + for (const job of jobs) { + if (job.status === 'in_progress') allRunning++; + else if (job.status === 'queued') allQueued++; + else if (job.conclusion === 'success') allPassed++; + else if (job.conclusion === 'failure') allFailed++; + } + } + } + + if (failuresFoundInLoop) { + console.log( + `\n\n❌ Failures detected across ${allFailed} job(s). Stopping monitor...`, + ); + console.log('\n--- Structured Failure Report (Noise Filtered) ---'); + for (const [file, tests] of fileToTests.entries()) { + console.log(`\nCategory/File: ${file}`); + // Limit output per file if it's too large + const testsArr = Array.from(tests).map((t) => + t.length > 500 ? t.substring(0, 500) + '... [TRUNCATED]' : t, + ); + testsArr.slice(0, 10).forEach((t) => console.log(` - ${t}`)); + if (testsArr.length > 10) + console.log(` ... and ${testsArr.length - 10} more`); + } + const testCmd = generateTestCommand(fileToTests); + if (testCmd) { + console.log('\n🚀 Run this to verify fixes:'); + console.log(testCmd); + } else if ( + Array.from(fileToTests.keys()).some((k) => k.includes('Lint')) + ) { + console.log('\n🚀 Run this to verify lint fixes:\nnpm run lint:all'); + } + console.log('---------------------------------'); + process.exit(1); + } + + const completed = allPassed + allFailed; + process.stdout.write( + `\r⏳ Monitoring ${targetRunIds.length} runs... ${completed}/${totalJobs} jobs (${allPassed} passed, ${allFailed} failed, ${allRunning} running, ${allQueued} queued) `, + ); + if (!anyRunInProgress) { + console.log('\n✅ All workflows passed!'); + process.exit(0); + } + await new Promise((r) => setTimeout(r, 15000)); + } +} + +monitor().catch((err) => { + console.error('\nMonitor error:', err.message); + process.exit(1); +}); diff --git a/.gemini/skills/docs-writer/SKILL.md b/.gemini/skills/docs-writer/SKILL.md index d7cf7b81be..2a814b87bc 100644 --- a/.gemini/skills/docs-writer/SKILL.md +++ b/.gemini/skills/docs-writer/SKILL.md @@ -65,18 +65,60 @@ accessible. - **UI and code:** Use **bold** for UI elements and `code font` for filenames, snippets, commands, and API elements. Focus on the task when discussing interaction. -- **Links:** Use descriptive anchor text; avoid "click here." Ensure the link - makes sense out of context. - **Accessibility:** Use semantic HTML elements correctly (headings, lists, tables). - **Media:** Use lowercase hyphenated filenames. Provide descriptive alt text for all images. +- **Details section:** Use the `
` tag to create a collapsible section. + This is useful for supplementary or data-heavy information that isn't critical + to the main flow. + + Example: + +
+ Title + + - First entry + - Second entry + +
+ +- **Callouts**: Use GitHub-flavored markdown alerts to highlight important + information. To ensure the formatting is preserved by `npm run format`, place + an empty line, then the `` comment directly before + the callout block. The callout type (`[!TYPE]`) should be on the first line, + followed by a newline, and then the content, with each subsequent line of + content starting with `>`. Available types are `NOTE`, `TIP`, `IMPORTANT`, + `WARNING`, and `CAUTION`. + + Example: + + +> [!NOTE] +> This is an example of a multi-line note that will be preserved +> by Prettier. + +### Links +- **Accessibility:** Use descriptive anchor text; avoid "click here." Ensure the + link makes sense out of context, such as when being read by a screen reader. +- **Use relative links in docs:** Use relative links in documentation (`/docs/`) + to ensure portability. Use paths relative to the current file's directory + (for example, `../tools/` from `docs/cli/`). Do not include the `/docs/` + section of a path, but do verify that the resulting relative link exists. This + does not apply to meta files such as README.MD and CONTRIBUTING.MD. +- **When changing headings, check for deep links:** If a user is changing a + heading, check for deep links to that heading in other pages and update + accordingly. ### Structure - **BLUF:** Start with an introduction explaining what to expect. - **Experimental features:** If a feature is clearly noted as experimental, -add the following note immediately after the introductory paragraph: - `> **Note:** This is a preview feature currently under active development.` + add the following note immediately after the introductory paragraph: + + +> [!NOTE] +> This is an experimental feature currently under active development. + - **Headings:** Use hierarchical headings to support the user journey. - **Procedures:** - Introduce lists of steps with a complete sentence. @@ -85,8 +127,7 @@ add the following note immediately after the introductory paragraph: - Put conditions before instructions (e.g., "On the Settings page, click..."). - Provide clear context for where the action takes place. - Indicate optional steps clearly (e.g., "Optional: ..."). -- **Elements:** Use bullet lists, tables, notes (`> **Note:**`), and warnings - (`> **Warning:**`). +- **Elements:** Use bullet lists, tables, details, and callouts. - **Avoid using a table of contents:** If a table of contents is present, remove it. - **Next steps:** Conclude with a "Next steps" section if applicable. @@ -126,7 +167,6 @@ documentation. - **Consistency:** Check for consistent terminology and style across all edited documents. - ## Phase 4: Verification and finalization Perform a final quality check to ensure that all changes are correctly formatted and that all links are functional. diff --git a/.gemini/skills/review-duplication/SKILL.md b/.gemini/skills/review-duplication/SKILL.md new file mode 100644 index 0000000000..966505bdf3 --- /dev/null +++ b/.gemini/skills/review-duplication/SKILL.md @@ -0,0 +1,69 @@ +--- +name: review-duplication +description: Use this skill during code reviews to proactively investigate the codebase for duplicated functionality, reinvented wheels, or failure to reuse existing project best practices and shared utilities. +--- + +# Review Duplication + +## Overview + +This skill provides a structured workflow for investigating a codebase during a code review to identify duplicated logic, reinvented utilities, and missed opportunities to reuse established patterns. By executing this workflow, you ensure that new code integrates seamlessly with the existing project architecture. + +## Workflow: Investigating for Duplication + +When reviewing code, perform the following steps before finalizing your review: + +### 1. Extract Core Logic +Analyze the new code to identify the core algorithms, utility functions, generic data structures, or UI components being introduced. Look beyond the specific business logic to see the underlying mechanics. + +### 2. Hypothesize Existing Locations & Trace Dependencies +Think about where this type of code *would* live if it already existed in the project. Provide absolute paths from the repo root to disambiguate. +- **Utilities:** `packages/core/src/utils/`, `packages/cli/src/utils/` +- **UI Components:** `packages/cli/src/ui/components/`, `packages/cli/src/ui/` +- **Services:** `packages/core/src/services/`, `packages/cli/src/services/` +- **Configuration:** `packages/core/src/config/`, `packages/cli/src/config/` +- **Core Logic:** Call out `packages/core/` if functionality does not appear React UI specific. + +**Trace Third-Party Dependencies:** If the PR introduces a new import for a utility library (e.g., `lodash.merge`, `date-fns`), trace how and where the project currently uses that library. There is likely an existing wrapper or shared utility. + +**Check Package Files:** Before flagging a custom implementation of a complex algorithm, check `package.json` to see if a standard library (like `lodash` or `uuid`) is already installed that provides this functionality. + +### 3. Investigate the Codebase (Sub-Agent Delegation) +Delegate the heavy lifting of codebase investigation to specialized sub-agents. They are optimized to perform deep searches and semantic mapping without bloating your session history. + +To ensure a comprehensive review, you MUST formulate highly specific objectives for the sub-agents, providing them with the "scents" you discovered in Step 1. + +- **Codebase Investigator:** Use the `codebase_investigator` as your primary researcher. When delegating, formulate an objective that asks specific, investigative questions about the codebase, explicitly including these search vectors: + - **Structural Similarity:** Ask if existing code uses the same underlying APIs (e.g., "Does any existing code use `Intl.DateTimeFormat` or `setTimeout` for similar purposes?"). + - **Naming Conventions:** Ask if there are existing symbols with similar naming patterns (e.g., "Are there existing symbols with naming patterns like `*Format*` or `*Debounce*`?"). + - **Comments & Documentation:** Ask if keywords from the PR's comments or JSDoc exist in describing similar behavior elsewhere. + - **Architectural Fit:** Ask where this type of logic is currently centralized (e.g., "Where is centralized date formatting logic located?"). + - **Refactoring Guidance:** Crucially, ask the sub-agent to explain *how* the new code could be refactored to use any existing logic it finds. +- **Generalist Agent:** Use the `generalist` for detailed, turn-intensive comparisons. For example: "Review the implementation of `MyNewComponent` in the PR and compare it semantically against all components in `packages/ui/src`. Are there any existing components that could be extended or used instead?" +- **Retain Fast Path for Simple Searches:** For extremely simple, unambiguous checks (e.g., "Does `package.json` include `lodash`?"), perform a direct search to save time. Default to delegation for any open-ended "investigations." + +### 4. Evaluate Best Practices +Check if the new code aligns with the project's established conventions. +- **Error Handling:** Does it use the project's standard error classes or logging mechanisms? +- **State Management:** Does it bypass established stores or contexts? +- **Styling:** Does it hardcode colors or spacing instead of using theme variables? +If the PR introduces a new pattern, compare it against the documented standards and explicitly confirm if an existing project pattern should have been used instead. + +### 5. Formulate Constructive Feedback +If you discover that the PR duplicates existing functionality or ignores a best practice: +- Provide a clear review comment. +- **Identify the Source:** Explicitly mention the absolute or project-relative file path and the specific symbol (function, component, class) that should be reused. +- **Implementation Guidance:** Provide a brief code snippet or a clear explanation showing **how** to integrate the existing code to fulfill the task's requirements. +- **Explain the Value:** Briefly explain why reusing the existing code is beneficial (e.g., maintainability, consistency, built-in edge case handling). + +Example comment: +> "It looks like this PR introduces a new `formatDate` utility. We already have a robust, tested `formatDate` function in `src/utils/dateHelpers.ts`. +> +> You can replace your implementation by importing it like this: +> ```typescript +> import { formatDate } from '../utils/dateHelpers'; +> +> // Then use it here: +> const displayDate = formatDate(userDate, 'MMM Do, YYYY'); +> ``` +> Reusing this ensures that the date formatting remains consistent with the rest of the application and handles timezone conversions correctly." diff --git a/.geminiignore b/.geminiignore new file mode 100644 index 0000000000..e40b6ba36e --- /dev/null +++ b/.geminiignore @@ -0,0 +1 @@ +packages/core/src/services/scripts/*.exe diff --git a/.github/ISSUE_TEMPLATE/website_issue.yml b/.github/ISSUE_TEMPLATE/website_issue.yml index 02146381ab..d9b30e1127 100644 --- a/.github/ISSUE_TEMPLATE/website_issue.yml +++ b/.github/ISSUE_TEMPLATE/website_issue.yml @@ -1,7 +1,9 @@ name: 'Website issue' description: 'Report an issue with the Gemini CLI Website and Gemini CLI Extensions Gallery' +title: 'GeminiCLI.com Feedback: [ISSUE]' labels: - 'area/extensions' + - 'area/documentation' body: - type: 'markdown' attributes: diff --git a/.github/actions/publish-release/action.yml b/.github/actions/publish-release/action.yml index 54c404c7c1..a7df2039d5 100644 --- a/.github/actions/publish-release/action.yml +++ b/.github/actions/publish-release/action.yml @@ -175,7 +175,7 @@ runs: --dry-run="${INPUTS_DRY_RUN}" \ --workspace="${INPUTS_CORE_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${INPUTS_CORE_PACKAGE_NAME} false --silent + npm dist-tag rm ${INPUTS_CORE_PACKAGE_NAME} false - name: '🔗 Install latest core package' working-directory: '${{ inputs.working-directory }}' @@ -221,7 +221,9 @@ runs: --dry-run="${INPUTS_DRY_RUN}" \ --workspace="${INPUTS_CLI_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${INPUTS_CLI_PACKAGE_NAME} false --silent + if [[ "${INPUTS_DRY_RUN}" == "false" ]]; then + npm dist-tag rm ${INPUTS_CLI_PACKAGE_NAME} false + fi - name: 'Get a2a-server Token' uses: './.github/actions/npm-auth-token' @@ -246,7 +248,7 @@ runs: --dry-run="${INPUTS_DRY_RUN}" \ --workspace="${INPUTS_A2A_PACKAGE_NAME}" \ --no-tag - npm dist-tag rm ${INPUTS_A2A_PACKAGE_NAME} false --silent + npm dist-tag rm ${INPUTS_A2A_PACKAGE_NAME} false - name: '🔬 Verify NPM release by version' uses: './.github/actions/verify-release' diff --git a/.github/actions/push-sandbox/action.yml b/.github/actions/push-sandbox/action.yml index bab85af453..dd2d96c4a1 100644 --- a/.github/actions/push-sandbox/action.yml +++ b/.github/actions/push-sandbox/action.yml @@ -34,7 +34,7 @@ runs: JSON_INPUTS: '${{ toJSON(inputs) }}' run: 'echo "$JSON_INPUTS"' - name: 'Checkout' - uses: 'actions/checkout@v4' + uses: 'actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5' # ratchet:actions/checkout@v4 with: ref: '${{ inputs.github-sha }}' fetch-depth: 0 @@ -45,11 +45,11 @@ runs: shell: 'bash' run: 'npm run build' - name: 'Set up QEMU' - uses: 'docker/setup-qemu-action@v3' + uses: 'docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130' # ratchet:docker/setup-qemu-action@v3 - name: 'Set up Docker Buildx' - uses: 'docker/setup-buildx-action@v3' + uses: 'docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f' # ratchet:docker/setup-buildx-action@v3 - name: 'Log in to GitHub Container Registry' - uses: 'docker/login-action@v3' + uses: 'docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9' # ratchet:docker/login-action@v3 with: registry: 'docker.io' username: '${{ inputs.dockerhub-username }}' diff --git a/.github/actions/verify-release/action.yml b/.github/actions/verify-release/action.yml index 261715c1b9..4e0c6c6f72 100644 --- a/.github/actions/verify-release/action.yml +++ b/.github/actions/verify-release/action.yml @@ -36,7 +36,7 @@ runs: run: 'echo "$JSON_INPUTS"' - name: 'setup node' - uses: 'actions/setup-node@v4' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 with: node-version: '20' diff --git a/.github/workflows/chained_e2e.yml b/.github/workflows/chained_e2e.yml index 8d714b34b0..fe87fb1d5d 100644 --- a/.github/workflows/chained_e2e.yml +++ b/.github/workflows/chained_e2e.yml @@ -334,8 +334,20 @@ jobs: if: "${{ steps.check_evals.outputs.should_run == 'true' }}" env: GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' + GEMINI_MODEL: 'gemini-3-pro-preview' + # Disable Vitest internal retries to avoid double-retrying; + # custom retry logic is handled in evals/test-helper.ts + VITEST_RETRY: 0 run: 'npm run test:always_passing_evals' + - name: 'Upload Reliability Logs' + if: "always() && steps.check_evals.outputs.should_run == 'true'" + uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 + with: + name: 'eval-logs-${{ github.run_id }}-${{ github.run_attempt }}' + path: 'evals/logs/api-reliability.jsonl' + retention-days: 7 + e2e: name: 'E2E' if: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 973d88f5f8..d40b49bb69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,7 +67,7 @@ jobs: cache: 'npm' - name: 'Cache Linters' - uses: 'actions/cache@v4' + uses: 'actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830' # ratchet:actions/cache@v4 with: path: '${{ env.GEMINI_LINT_TEMP_DIR }}' key: "${{ runner.os }}-${{ runner.arch }}-linters-${{ hashFiles('scripts/lint.js') }}" @@ -76,7 +76,7 @@ jobs: run: 'npm ci' - name: 'Cache ESLint' - uses: 'actions/cache@v4' + uses: 'actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830' # ratchet:actions/cache@v4 with: path: '.eslintcache' key: "${{ runner.os }}-eslint-${{ hashFiles('package-lock.json', 'eslint.config.js') }}" @@ -114,6 +114,9 @@ jobs: - name: 'Run sensitive keyword linter' run: 'node scripts/lint.js --sensitive-keywords' + - name: 'Run GitHub Actions pinning linter' + run: 'node scripts/lint.js --check-github-actions-pinning' + link_checker: name: 'Link Checker' runs-on: 'ubuntu-latest' @@ -158,6 +161,12 @@ jobs: - name: 'Build project' run: 'npm run build' + - name: 'Install system dependencies' + run: | + sudo apt-get update -qq && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq bubblewrap + # Ubuntu 24.04+ requires this to allow bwrap to function in CI + sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true + - name: 'Install dependencies for testing' run: 'npm ci' diff --git a/.github/workflows/eval-guidance.yml b/.github/workflows/eval-guidance.yml new file mode 100644 index 0000000000..e1f1ab3168 --- /dev/null +++ b/.github/workflows/eval-guidance.yml @@ -0,0 +1,69 @@ +name: 'Evals: PR Guidance' + +on: + pull_request: + paths: + - 'packages/core/src/**/*.ts' + - '!**/*.test.ts' + - '!**/*.test.tsx' + +permissions: + pull-requests: 'write' + contents: 'read' + +jobs: + provide-guidance: + name: 'Model Steering Guidance' + runs-on: 'ubuntu-latest' + if: "github.repository == 'google-gemini/gemini-cli'" + steps: + - name: 'Checkout' + uses: 'actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955' # ratchet:actions/checkout@v4 + with: + fetch-depth: 0 + + - name: 'Set up Node.js' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4.4.0 + with: + node-version-file: '.nvmrc' + cache: 'npm' + + - name: 'Detect Steering Changes' + id: 'detect' + run: | + STEERING_DETECTED=$(node scripts/changed_prompt.js --steering-only) + echo "STEERING_DETECTED=$STEERING_DETECTED" >> "$GITHUB_OUTPUT" + + - name: 'Analyze PR Content' + if: "steps.detect.outputs.STEERING_DETECTED == 'true'" + id: 'analysis' + env: + GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: | + # Check for behavioral eval changes + EVAL_CHANGES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep "^evals/" || true) + if [ -z "$EVAL_CHANGES" ]; then + echo "MISSING_EVALS=true" >> "$GITHUB_OUTPUT" + fi + + # Check if user is a maintainer (has write/admin access) + USER_PERMISSION=$(gh api repos/${{ github.repository }}/collaborators/${{ github.actor }}/permission --jq '.permission') + if [[ "$USER_PERMISSION" == "admin" || "$USER_PERMISSION" == "write" ]]; then + echo "IS_MAINTAINER=true" >> "$GITHUB_OUTPUT" + fi + + - name: 'Post Guidance Comment' + if: "steps.detect.outputs.STEERING_DETECTED == 'true'" + uses: 'thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74' # ratchet:thollander/actions-comment-pull-request@v3 + with: + comment-tag: 'eval-guidance-bot' + message: | + ### 🧠 Model Steering Guidance + + This PR modifies files that affect the model's behavior (prompts, tools, or instructions). + + ${{ steps.analysis.outputs.MISSING_EVALS == 'true' && '- ⚠️ **Consider adding Evals:** No behavioral evaluations (`evals/*.eval.ts`) were added or updated in this PR. Consider adding a test case to verify the new behavior and prevent regressions.' || '' }} + ${{ steps.analysis.outputs.IS_MAINTAINER == 'true' && '- 🚀 **Maintainer Reminder:** Please ensure that these changes do not regress results on benchmark evals before merging.' || '' }} + + --- + *This is an automated guidance message triggered by steering logic signatures.* diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml index c5b3709c75..9acc1de050 100644 --- a/.github/workflows/evals-nightly.yml +++ b/.github/workflows/evals-nightly.yml @@ -61,6 +61,9 @@ jobs: GEMINI_MODEL: '${{ matrix.model }}' RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}" TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}' + # Disable Vitest internal retries to avoid double-retrying; + # custom retry logic is handled in evals/test-helper.ts + VITEST_RETRY: 0 run: | CMD="npm run test:all_evals" PATTERN="${TEST_NAME_PATTERN}" diff --git a/.github/workflows/gemini-scheduled-stale-issue-closer.yml b/.github/workflows/gemini-scheduled-stale-issue-closer.yml index 2b7b163d88..cfbecd6490 100644 --- a/.github/workflows/gemini-scheduled-stale-issue-closer.yml +++ b/.github/workflows/gemini-scheduled-stale-issue-closer.yml @@ -28,14 +28,14 @@ jobs: steps: - name: 'Generate GitHub App Token' id: 'generate_token' - uses: 'actions/create-github-app-token@v2' + uses: 'actions/create-github-app-token@fee1f7d63c2ff003460e3d139729b119787bc349' # ratchet:actions/create-github-app-token@v2 with: app-id: '${{ secrets.APP_ID }}' private-key: '${{ secrets.PRIVATE_KEY }}' permission-issues: 'write' - name: 'Process Stale Issues' - uses: 'actions/github-script@v7' + uses: 'actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b' # ratchet:actions/github-script@v7 env: DRY_RUN: '${{ inputs.dry_run }}' with: diff --git a/.github/workflows/gemini-scheduled-stale-pr-closer.yml b/.github/workflows/gemini-scheduled-stale-pr-closer.yml index cc33848941..7a8e3c1fd5 100644 --- a/.github/workflows/gemini-scheduled-stale-pr-closer.yml +++ b/.github/workflows/gemini-scheduled-stale-pr-closer.yml @@ -27,13 +27,13 @@ jobs: APP_ID: '${{ secrets.APP_ID }}' if: |- ${{ env.APP_ID != '' }} - uses: 'actions/create-github-app-token@v2' + uses: 'actions/create-github-app-token@fee1f7d63c2ff003460e3d139729b119787bc349' # ratchet:actions/create-github-app-token@v2 with: app-id: '${{ secrets.APP_ID }}' private-key: '${{ secrets.PRIVATE_KEY }}' - name: 'Process Stale PRs' - uses: 'actions/github-script@v7' + uses: 'actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b' # ratchet:actions/github-script@v7 env: DRY_RUN: '${{ inputs.dry_run }}' with: diff --git a/.github/workflows/label-backlog-child-issues.yml b/.github/workflows/label-backlog-child-issues.yml index a819bf4e71..697e605d51 100644 --- a/.github/workflows/label-backlog-child-issues.yml +++ b/.github/workflows/label-backlog-child-issues.yml @@ -18,10 +18,10 @@ jobs: runs-on: 'ubuntu-latest' steps: - name: 'Checkout' - uses: 'actions/checkout@v4' + uses: 'actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5' # ratchet:actions/checkout@v4 - name: 'Setup Node.js' - uses: 'actions/setup-node@v4' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 with: node-version: '20' cache: 'npm' @@ -40,10 +40,10 @@ jobs: runs-on: 'ubuntu-latest' steps: - name: 'Checkout' - uses: 'actions/checkout@v4' + uses: 'actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5' # ratchet:actions/checkout@v4 - name: 'Setup Node.js' - uses: 'actions/setup-node@v4' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 with: node-version: '20' cache: 'npm' diff --git a/.github/workflows/label-workstream-rollup.yml b/.github/workflows/label-workstream-rollup.yml index 97d699d09b..9a44a9c25d 100644 --- a/.github/workflows/label-workstream-rollup.yml +++ b/.github/workflows/label-workstream-rollup.yml @@ -15,7 +15,7 @@ jobs: issues: 'write' steps: - name: 'Check for Parent Workstream and Apply Label' - uses: 'actions/github-script@v7' + uses: 'actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b' # ratchet:actions/github-script@v7 with: script: | const labelToAdd = 'workstream-rollup'; diff --git a/.github/workflows/pr-contribution-guidelines-notifier.yml b/.github/workflows/pr-contribution-guidelines-notifier.yml index 5ee1b37f57..bd08aac0ce 100644 --- a/.github/workflows/pr-contribution-guidelines-notifier.yml +++ b/.github/workflows/pr-contribution-guidelines-notifier.yml @@ -19,7 +19,7 @@ jobs: APP_ID: '${{ secrets.APP_ID }}' if: |- ${{ env.APP_ID != '' }} - uses: 'actions/create-github-app-token@v2' + uses: 'actions/create-github-app-token@fee1f7d63c2ff003460e3d139729b119787bc349' # ratchet:actions/create-github-app-token@v2 with: app-id: '${{ secrets.APP_ID }}' private-key: '${{ secrets.PRIVATE_KEY }}' diff --git a/.github/workflows/release-change-tags.yml b/.github/workflows/release-change-tags.yml index c7c3f3f2d2..3a7c5648f8 100644 --- a/.github/workflows/release-change-tags.yml +++ b/.github/workflows/release-change-tags.yml @@ -40,7 +40,7 @@ jobs: issues: 'write' steps: - name: 'Checkout repository' - uses: 'actions/checkout@v4' + uses: 'actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5' # ratchet:actions/checkout@v4 with: ref: '${{ github.ref }}' fetch-depth: 0 diff --git a/.github/workflows/release-notes.yml b/.github/workflows/release-notes.yml index 13bb2c2ca8..a5a2f90db8 100644 --- a/.github/workflows/release-notes.yml +++ b/.github/workflows/release-notes.yml @@ -29,14 +29,14 @@ jobs: pull-requests: 'write' steps: - name: 'Checkout repository' - uses: 'actions/checkout@v4' + uses: 'actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5' # ratchet:actions/checkout@v4 with: # The user-level skills need to be available to the workflow fetch-depth: 0 ref: 'main' - name: 'Set up Node.js' - uses: 'actions/setup-node@v4' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 with: node-version: '20' @@ -86,7 +86,7 @@ jobs: - name: 'Create Pull Request' if: "steps.validate_version.outputs.CONTINUE == 'true'" - uses: 'peter-evans/create-pull-request@v6' + uses: 'peter-evans/create-pull-request@c5a7806660adbe173f04e3e038b0ccdcd758773c' # ratchet:peter-evans/create-pull-request@v6 with: token: '${{ secrets.GEMINI_CLI_ROBOT_GITHUB_PAT }}' commit-message: 'docs(changelog): update for ${{ steps.release_info.outputs.VERSION }}' diff --git a/.github/workflows/test-build-binary.yml b/.github/workflows/test-build-binary.yml index f11181a9f0..d0069b8b15 100644 --- a/.github/workflows/test-build-binary.yml +++ b/.github/workflows/test-build-binary.yml @@ -33,7 +33,7 @@ jobs: steps: - name: 'Checkout' - uses: 'actions/checkout@v4' + uses: 'actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5' # ratchet:actions/checkout@v4 - name: 'Optimize Windows Performance' if: "matrix.os == 'windows-latest'" @@ -46,7 +46,7 @@ jobs: shell: 'powershell' - name: 'Set up Node.js' - uses: 'actions/setup-node@v4' + uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 with: node-version-file: '.nvmrc' architecture: '${{ matrix.arch }}' @@ -63,7 +63,7 @@ jobs: - name: 'Setup Windows SDK (Windows)' if: "matrix.os == 'windows-latest'" - uses: 'microsoft/setup-msbuild@v2' + uses: 'microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce' # ratchet:microsoft/setup-msbuild@v2 - name: 'Add Signtool to Path (Windows)' if: "matrix.os == 'windows-latest'" @@ -153,7 +153,7 @@ jobs: npm run test:integration:sandbox:none -- --testTimeout=600000 - name: 'Upload Artifact' - uses: 'actions/upload-artifact@v4' + uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 with: name: 'gemini-cli-${{ matrix.platform_name }}' path: 'dist/${{ matrix.platform_name }}/' diff --git a/.github/workflows/unassign-inactive-assignees.yml b/.github/workflows/unassign-inactive-assignees.yml index dd09f0feaf..e3b9905b5d 100644 --- a/.github/workflows/unassign-inactive-assignees.yml +++ b/.github/workflows/unassign-inactive-assignees.yml @@ -40,13 +40,13 @@ jobs: steps: - name: 'Generate GitHub App Token' id: 'generate_token' - uses: 'actions/create-github-app-token@v2' + uses: 'actions/create-github-app-token@fee1f7d63c2ff003460e3d139729b119787bc349' # ratchet:actions/create-github-app-token@v2 with: app-id: '${{ secrets.APP_ID }}' private-key: '${{ secrets.PRIVATE_KEY }}' - name: 'Unassign inactive assignees' - uses: 'actions/github-script@v7' + uses: 'actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b' # ratchet:actions/github-script@v7 env: DRY_RUN: '${{ inputs.dry_run }}' with: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c6c619219c..9b3e18d6af 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -323,8 +323,8 @@ fi #### Formatting -To separately format the code in this project by running the following command -from the root directory: +To separately format the code in this project, run the following command from +the root directory: ```bash npm run format diff --git a/docs/admin/enterprise-controls.md b/docs/admin/enterprise-controls.md index 8c9ba60a13..5792a6c5bc 100644 --- a/docs/admin/enterprise-controls.md +++ b/docs/admin/enterprise-controls.md @@ -106,6 +106,67 @@ organization. ensures users maintain final control over which permitted servers are actually active in their environment. +#### Required MCP Servers (preview) + +**Default**: empty + +Allows administrators to define MCP servers that are **always injected** into +the user's environment. Unlike the allowlist (which filters user-configured +servers), required servers are automatically added regardless of the user's +local configuration. + +**Required Servers Format:** + +```json +{ + "requiredMcpServers": { + "corp-compliance-tool": { + "url": "https://mcp.corp/compliance", + "type": "http", + "trust": true, + "description": "Corporate compliance tool" + }, + "internal-registry": { + "url": "https://registry.corp/mcp", + "type": "sse", + "authProviderType": "google_credentials", + "oauth": { + "scopes": ["https://www.googleapis.com/auth/scope"] + } + } + } +} +``` + +**Supported Fields:** + +- `url`: (Required) The full URL of the MCP server endpoint. +- `type`: (Required) The connection type (`sse` or `http`). +- `trust`: (Optional) If set to `true`, tool execution will not require user + approval. Defaults to `true` for required servers. +- `description`: (Optional) Human-readable description of the server. +- `authProviderType`: (Optional) Authentication provider (`dynamic_discovery`, + `google_credentials`, or `service_account_impersonation`). +- `oauth`: (Optional) OAuth configuration including `scopes`, `clientId`, and + `clientSecret`. +- `targetAudience`: (Optional) OAuth target audience for service-to-service + auth. +- `targetServiceAccount`: (Optional) Service account email to impersonate. +- `headers`: (Optional) Additional HTTP headers to send with requests. +- `includeTools` / `excludeTools`: (Optional) Tool filtering lists. +- `timeout`: (Optional) Timeout in milliseconds for MCP requests. + +**Client Enforcement Logic:** + +- Required servers are injected **after** allowlist filtering, so they are + always available even if the allowlist is active. +- If a required server has the **same name** as a locally configured server, the + admin configuration **completely overrides** the local one. +- Required servers only support remote transports (`sse`, `http`). Local + execution fields (`command`, `args`, `env`, `cwd`) are not supported. +- Required servers can coexist with allowlisted servers — both features work + independently. + ### Unmanaged Capabilities **Enabled/Disabled** | Default: disabled diff --git a/docs/changelogs/index.md b/docs/changelogs/index.md index d79bd910d1..84a0daa3b2 100644 --- a/docs/changelogs/index.md +++ b/docs/changelogs/index.md @@ -18,6 +18,30 @@ on GitHub. | [Preview](preview.md) | Experimental features ready for early feedback. | | [Stable](latest.md) | Stable, recommended for general use. | +## Announcements: v0.35.0 - 2026-03-24 + +- **Customizable Keyboard Shortcuts:** Users can now customize their keyboard + shortcuts, including support for literal character keybindings and the + extended Kitty protocol + ([#21945](https://github.com/google-gemini/gemini-cli/pull/21945), + [#21972](https://github.com/google-gemini/gemini-cli/pull/21972) by + @scidomino). +- **Vim Mode Improvements:** Added missing motions (X, ~, r, f/F/t/T) and + yank/paste support with the unnamed register + ([#21932](https://github.com/google-gemini/gemini-cli/pull/21932), + [#22026](https://github.com/google-gemini/gemini-cli/pull/22026) by @aanari). +- **Tool Isolation and Sandboxing:** Introduced `SandboxManager` to isolate + process-spawning tools and added Linux bubblewrap/seccomp sandboxing support + ([#21774](https://github.com/google-gemini/gemini-cli/pull/21774), + [#22231](https://github.com/google-gemini/gemini-cli/pull/22231) by @galz10, + [#22680](https://github.com/google-gemini/gemini-cli/pull/22680) by + @DavidAPierce). +- **JIT Context Discovery:** Implemented Just-In-Time context discovery for file + system tools to improve model performance and accuracy + ([#22082](https://github.com/google-gemini/gemini-cli/pull/22082), + [#22736](https://github.com/google-gemini/gemini-cli/pull/22736) by + @SandyTao520). + ## Announcements: v0.34.0 - 2026-03-17 - **Plan Mode Enabled by Default:** Plan Mode is now enabled by default to help diff --git a/docs/changelogs/latest.md b/docs/changelogs/latest.md index e49ef1c652..6df33c78d6 100644 --- a/docs/changelogs/latest.md +++ b/docs/changelogs/latest.md @@ -1,6 +1,6 @@ -# Latest stable release: v0.34.0 +# Latest stable release: v0.35.2 -Released: March 17, 2026 +Released: March 26, 2026 For most users, our latest stable release is the recommended release. Install the latest stable version with: @@ -11,474 +11,378 @@ npm install -g @google/gemini-cli ## Highlights -- **Plan Mode Enabled by Default**: The comprehensive planning capability is now - enabled by default, allowing for better structured task management and - execution. -- **Enhanced Sandboxing Capabilities**: Added support for native gVisor (runsc) - sandboxing as well as experimental LXC container sandboxing to provide more - robust and isolated execution environments. -- **Improved Loop Detection & Recovery**: Implemented iterative loop detection - and model feedback mechanisms to prevent the CLI from getting stuck in - repetitive actions. -- **Customizable UI Elements**: You can now configure a custom footer using the - new `/footer` command, and enjoy standardized semantic focus colors for better - history visibility. -- **Extensive Subagent Updates**: Refinements across the tracker visualization - tools, background process logging, and broader fallback support for models in - tool execution scenarios. +- **Customizable Keyboard Shortcuts:** Significant improvements to input + flexibility with support for custom keybindings, literal character bindings, + and extended terminal protocol keys. +- **Vim Mode Enhancements:** Further refinement of the Vim modal editing + experience, adding common motions like \`X\`, \`~\`, \`r\`, and \`f/F/t/T\`, + along with yank and paste support. +- **Enhanced Security through Sandboxing:** Introduction of a unified + \`SandboxManager\` and integration of Linux-native sandboxing (bubblewrap and + seccomp) to isolate tool execution and improve system security. +- **JIT Context Discovery:** Improved performance and accuracy by enabling + Just-In-Time context loading for file system tools, ensuring the model has the + most relevant information without overwhelming the context. +- **Subagent & Performance Updates:** Subagents are now enabled by default, + supported by a model-driven parallel tool scheduler and code splitting for + faster startup and more efficient task execution. ## What's Changed -- feat(cli): add chat resume footer on session quit by @lordshashank in - [#20667](https://github.com/google-gemini/gemini-cli/pull/20667) -- Support bold and other styles in svg snapshots by @jacob314 in - [#20937](https://github.com/google-gemini/gemini-cli/pull/20937) -- fix(core): increase A2A agent timeout to 30 minutes by @adamfweidman in - [#21028](https://github.com/google-gemini/gemini-cli/pull/21028) -- Cleanup old branches. by @jacob314 in - [#19354](https://github.com/google-gemini/gemini-cli/pull/19354) -- chore(release): bump version to 0.34.0-nightly.20260303.34f0c1538 by +- fix(core): allow disabling environment variable redaction by @galz10 in + [#23927](https://github.com/google-gemini/gemini-cli/pull/23927) +- fix(a2a-server): A2A server should execute ask policies in interactive mode by + @keith.schaab in + [#23831](https://github.com/google-gemini/gemini-cli/pull/23831) +- feat(cli): customizable keyboard shortcuts by @scidomino in + [#21945](https://github.com/google-gemini/gemini-cli/pull/21945) +- feat(core): Thread `AgentLoopContext` through core. by @joshualitt in + [#21944](https://github.com/google-gemini/gemini-cli/pull/21944) +- chore(release): bump version to 0.35.0-nightly.20260311.657f19c1f by @gemini-cli-robot in - [#21034](https://github.com/google-gemini/gemini-cli/pull/21034) -- feat(ui): standardize semantic focus colors and enhance history visibility by - @keithguerin in - [#20745](https://github.com/google-gemini/gemini-cli/pull/20745) -- fix: merge duplicate imports in packages/core (3/4) by @Nixxx19 in - [#20928](https://github.com/google-gemini/gemini-cli/pull/20928) -- Add extra safety checks for proto pollution by @jacob314 in - [#20396](https://github.com/google-gemini/gemini-cli/pull/20396) -- feat(core): Add tracker CRUD tools & visualization by @anj-s in - [#19489](https://github.com/google-gemini/gemini-cli/pull/19489) -- Revert "fix(ui): persist expansion in AskUser dialog when navigating options" - by @jacob314 in - [#21042](https://github.com/google-gemini/gemini-cli/pull/21042) -- Changelog for v0.33.0-preview.0 by @gemini-cli-robot in - [#21030](https://github.com/google-gemini/gemini-cli/pull/21030) -- fix: model persistence for all scenarios by @sripasg in - [#21051](https://github.com/google-gemini/gemini-cli/pull/21051) -- chore/release: bump version to 0.34.0-nightly.20260304.28af4e127 by - @gemini-cli-robot in - [#21054](https://github.com/google-gemini/gemini-cli/pull/21054) -- Consistently guard restarts against concurrent auto updates by @scidomino in - [#21016](https://github.com/google-gemini/gemini-cli/pull/21016) -- Defensive coding to reduce the risk of Maximum update depth errors by - @jacob314 in [#20940](https://github.com/google-gemini/gemini-cli/pull/20940) -- fix(cli): Polish shell autocomplete rendering to be a little more shell native - feeling. by @jacob314 in - [#20931](https://github.com/google-gemini/gemini-cli/pull/20931) -- Docs: Update plan mode docs by @jkcinouye in - [#19682](https://github.com/google-gemini/gemini-cli/pull/19682) -- fix(mcp): Notifications/tools/list_changed support not working by @jacob314 in - [#21050](https://github.com/google-gemini/gemini-cli/pull/21050) -- fix(cli): register extension lifecycle events in DebugProfiler by - @fayerman-source in - [#20101](https://github.com/google-gemini/gemini-cli/pull/20101) -- chore(dev): update vscode settings for typescriptreact by @rohit-4321 in - [#19907](https://github.com/google-gemini/gemini-cli/pull/19907) -- fix(cli): enable multi-arch docker builds for sandbox by @ru-aish in - [#19821](https://github.com/google-gemini/gemini-cli/pull/19821) -- Changelog for v0.32.0 by @gemini-cli-robot in - [#21033](https://github.com/google-gemini/gemini-cli/pull/21033) -- Changelog for v0.33.0-preview.1 by @gemini-cli-robot in - [#21058](https://github.com/google-gemini/gemini-cli/pull/21058) -- feat(core): improve @scripts/copy_files.js autocomplete to prioritize - filenames by @sehoon38 in - [#21064](https://github.com/google-gemini/gemini-cli/pull/21064) -- feat(sandbox): add experimental LXC container sandbox support by @h30s in - [#20735](https://github.com/google-gemini/gemini-cli/pull/20735) -- feat(evals): add overall pass rate row to eval nightly summary table by - @gundermanc in - [#20905](https://github.com/google-gemini/gemini-cli/pull/20905) -- feat(telemetry): include language in telemetry and fix accepted lines - computation by @gundermanc in - [#21126](https://github.com/google-gemini/gemini-cli/pull/21126) -- Changelog for v0.32.1 by @gemini-cli-robot in - [#21055](https://github.com/google-gemini/gemini-cli/pull/21055) -- feat(core): add robustness tests, logging, and metrics for CodeAssistServer - SSE parsing by @yunaseoul in - [#21013](https://github.com/google-gemini/gemini-cli/pull/21013) -- feat: add issue assignee workflow by @kartikangiras in - [#21003](https://github.com/google-gemini/gemini-cli/pull/21003) -- fix: improve error message when OAuth succeeds but project ID is required by - @Nixxx19 in [#21070](https://github.com/google-gemini/gemini-cli/pull/21070) -- feat(loop-reduction): implement iterative loop detection and model feedback by - @aishaneeshah in - [#20763](https://github.com/google-gemini/gemini-cli/pull/20763) -- chore(github): require prompt approvers for agent prompt files by @gundermanc - in [#20896](https://github.com/google-gemini/gemini-cli/pull/20896) -- Docs: Create tools reference by @jkcinouye in - [#19470](https://github.com/google-gemini/gemini-cli/pull/19470) -- fix(core, a2a-server): prevent hang during OAuth in non-interactive sessions - by @spencer426 in - [#21045](https://github.com/google-gemini/gemini-cli/pull/21045) -- chore(cli): enable deprecated settings removal by default by @yashodipmore in - [#20682](https://github.com/google-gemini/gemini-cli/pull/20682) -- feat(core): Disable fast ack helper for hints. by @joshualitt in - [#21011](https://github.com/google-gemini/gemini-cli/pull/21011) -- fix(ui): suppress redundant failure note when tool error note is shown by - @NTaylorMullen in - [#21078](https://github.com/google-gemini/gemini-cli/pull/21078) -- docs: document planning workflows with Conductor example by @jerop in - [#21166](https://github.com/google-gemini/gemini-cli/pull/21166) -- feat(release): ship esbuild bundle in npm package by @genneth in - [#19171](https://github.com/google-gemini/gemini-cli/pull/19171) -- fix(extensions): preserve symlinks in extension source path while enforcing - folder trust by @galz10 in - [#20867](https://github.com/google-gemini/gemini-cli/pull/20867) -- fix(cli): defer tool exclusions to policy engine in non-interactive mode by - @EricRahm in [#20639](https://github.com/google-gemini/gemini-cli/pull/20639) -- fix(ui): removed double padding on rendered content by @devr0306 in - [#21029](https://github.com/google-gemini/gemini-cli/pull/21029) -- fix(core): truncate excessively long lines in grep search output by - @gundermanc in - [#21147](https://github.com/google-gemini/gemini-cli/pull/21147) -- feat: add custom footer configuration via `/footer` by @jackwotherspoon in - [#19001](https://github.com/google-gemini/gemini-cli/pull/19001) -- perf(core): fix OOM crash in long-running sessions by @WizardsForgeGames in - [#19608](https://github.com/google-gemini/gemini-cli/pull/19608) -- refactor(cli): categorize built-in themes into dark/ and light/ directories by - @JayadityaGit in - [#18634](https://github.com/google-gemini/gemini-cli/pull/18634) -- fix(core): explicitly allow codebase_investigator and cli_help in read-only - mode by @Adib234 in - [#21157](https://github.com/google-gemini/gemini-cli/pull/21157) -- test: add browser agent integration tests by @kunal-10-cloud in - [#21151](https://github.com/google-gemini/gemini-cli/pull/21151) -- fix(cli): fix enabling kitty codes on Windows Terminal by @scidomino in - [#21136](https://github.com/google-gemini/gemini-cli/pull/21136) -- refactor(core): extract shared OAuth flow primitives from MCPOAuthProvider by - @SandyTao520 in - [#20895](https://github.com/google-gemini/gemini-cli/pull/20895) -- fix(ui): add partial output to cancelled shell UI by @devr0306 in - [#21178](https://github.com/google-gemini/gemini-cli/pull/21178) -- fix(cli): replace hardcoded keybinding strings with dynamic formatters by - @scidomino in [#21159](https://github.com/google-gemini/gemini-cli/pull/21159) -- DOCS: Update quota and pricing page by @g-samroberts in - [#21194](https://github.com/google-gemini/gemini-cli/pull/21194) -- feat(telemetry): implement Clearcut logging for startup statistics by - @yunaseoul in [#21172](https://github.com/google-gemini/gemini-cli/pull/21172) -- feat(triage): add area/documentation to issue triage by @g-samroberts in - [#21222](https://github.com/google-gemini/gemini-cli/pull/21222) -- Fix so shell calls are formatted by @jacob314 in - [#21237](https://github.com/google-gemini/gemini-cli/pull/21237) -- feat(cli): add native gVisor (runsc) sandboxing support by @Zheyuan-Lin in - [#21062](https://github.com/google-gemini/gemini-cli/pull/21062) -- docs: use absolute paths for internal links in plan-mode.md by @jerop in - [#21299](https://github.com/google-gemini/gemini-cli/pull/21299) -- fix(core): prevent unhandled AbortError crash during stream loop detection by - @7hokerz in [#21123](https://github.com/google-gemini/gemini-cli/pull/21123) -- fix:reorder env var redaction checks to scan values first by @kartikangiras in - [#21059](https://github.com/google-gemini/gemini-cli/pull/21059) -- fix(acp): rename --experimental-acp to --acp & remove Zed-specific refrences - by @skeshive in - [#21171](https://github.com/google-gemini/gemini-cli/pull/21171) -- feat(core): fallback to 2.5 models with no access for toolcalls by @sehoon38 - in [#21283](https://github.com/google-gemini/gemini-cli/pull/21283) -- test(core): improve testing for API request/response parsing by @sehoon38 in - [#21227](https://github.com/google-gemini/gemini-cli/pull/21227) -- docs(links): update docs-writer skill and fix broken link by @g-samroberts in - [#21314](https://github.com/google-gemini/gemini-cli/pull/21314) -- Fix code colorizer ansi escape bug. by @jacob314 in - [#21321](https://github.com/google-gemini/gemini-cli/pull/21321) -- remove wildcard behavior on keybindings by @scidomino in - [#21315](https://github.com/google-gemini/gemini-cli/pull/21315) -- feat(acp): Add support for AI Gateway auth by @skeshive in - [#21305](https://github.com/google-gemini/gemini-cli/pull/21305) -- fix(theme): improve theme color contrast for macOS Terminal.app by @clocky in - [#21175](https://github.com/google-gemini/gemini-cli/pull/21175) -- feat (core): Implement tracker related SI changes by @anj-s in - [#19964](https://github.com/google-gemini/gemini-cli/pull/19964) -- Changelog for v0.33.0-preview.2 by @gemini-cli-robot in - [#21333](https://github.com/google-gemini/gemini-cli/pull/21333) -- Changelog for v0.33.0-preview.3 by @gemini-cli-robot in - [#21347](https://github.com/google-gemini/gemini-cli/pull/21347) -- docs: format release times as HH:MM UTC by @pavan-sh in - [#20726](https://github.com/google-gemini/gemini-cli/pull/20726) -- fix(cli): implement --all flag for extensions uninstall by @sehoon38 in - [#21319](https://github.com/google-gemini/gemini-cli/pull/21319) -- docs: fix incorrect relative links to command reference by @kanywst in - [#20964](https://github.com/google-gemini/gemini-cli/pull/20964) -- documentiong ensures ripgrep by @Jatin24062005 in - [#21298](https://github.com/google-gemini/gemini-cli/pull/21298) -- fix(core): handle AbortError thrown during processTurn by @MumuTW in - [#21296](https://github.com/google-gemini/gemini-cli/pull/21296) -- docs(cli): clarify ! command output visibility in shell commands tutorial by - @MohammedADev in - [#21041](https://github.com/google-gemini/gemini-cli/pull/21041) -- fix: logic for task tracker strategy and remove tracker tools by @anj-s in - [#21355](https://github.com/google-gemini/gemini-cli/pull/21355) -- fix(partUtils): display media type and size for inline data parts by @Aboudjem - in [#21358](https://github.com/google-gemini/gemini-cli/pull/21358) -- Fix(accessibility): add screen reader support to RewindViewer by @Famous077 in - [#20750](https://github.com/google-gemini/gemini-cli/pull/20750) -- fix(hooks): propagate stopHookActive in AfterAgent retry path (#20426) by - @Aarchi-07 in [#20439](https://github.com/google-gemini/gemini-cli/pull/20439) -- fix(core): deduplicate GEMINI.md files by device/inode on case-insensitive - filesystems (#19904) by @Nixxx19 in - [#19915](https://github.com/google-gemini/gemini-cli/pull/19915) -- feat(core): add concurrency safety guidance for subagent delegation (#17753) - by @abhipatel12 in - [#21278](https://github.com/google-gemini/gemini-cli/pull/21278) -- feat(ui): dynamically generate all keybinding hints by @scidomino in - [#21346](https://github.com/google-gemini/gemini-cli/pull/21346) -- feat(core): implement unified KeychainService and migrate token storage by - @ehedlund in [#21344](https://github.com/google-gemini/gemini-cli/pull/21344) -- fix(cli): gracefully handle --resume when no sessions exist by @SandyTao520 in - [#21429](https://github.com/google-gemini/gemini-cli/pull/21429) -- fix(plan): keep approved plan during chat compression by @ruomengz in - [#21284](https://github.com/google-gemini/gemini-cli/pull/21284) -- feat(core): implement generic CacheService and optimize setupUser by @sehoon38 - in [#21374](https://github.com/google-gemini/gemini-cli/pull/21374) -- Update quota and pricing documentation with subscription tiers by @srithreepo - in [#21351](https://github.com/google-gemini/gemini-cli/pull/21351) -- fix(core): append correct OTLP paths for HTTP exporters by - @sebastien-prudhomme in - [#16836](https://github.com/google-gemini/gemini-cli/pull/16836) -- Changelog for v0.33.0-preview.4 by @gemini-cli-robot in - [#21354](https://github.com/google-gemini/gemini-cli/pull/21354) -- feat(cli): implement dot-prefixing for slash command conflicts by @ehedlund in - [#20979](https://github.com/google-gemini/gemini-cli/pull/20979) -- refactor(core): standardize MCP tool naming to mcp\_ FQN format by - @abhipatel12 in - [#21425](https://github.com/google-gemini/gemini-cli/pull/21425) -- feat(cli): hide gemma settings from display and mark as experimental by - @abhipatel12 in - [#21471](https://github.com/google-gemini/gemini-cli/pull/21471) -- feat(skills): refine string-reviewer guidelines and description by @clocky in - [#20368](https://github.com/google-gemini/gemini-cli/pull/20368) -- fix(core): whitelist TERM and COLORTERM in environment sanitization by - @deadsmash07 in - [#20514](https://github.com/google-gemini/gemini-cli/pull/20514) -- fix(billing): fix overage strategy lifecycle and settings integration by - @gsquared94 in - [#21236](https://github.com/google-gemini/gemini-cli/pull/21236) -- fix: expand paste placeholders in TextInput on submit by @Jefftree in - [#19946](https://github.com/google-gemini/gemini-cli/pull/19946) -- fix(core): add in-memory cache to ChatRecordingService to prevent OOM by - @SandyTao520 in - [#21502](https://github.com/google-gemini/gemini-cli/pull/21502) -- feat(cli): overhaul thinking UI by @keithguerin in - [#18725](https://github.com/google-gemini/gemini-cli/pull/18725) -- fix(ui): unify Ctrl+O expansion hint experience across buffer modes by - @jwhelangoog in - [#21474](https://github.com/google-gemini/gemini-cli/pull/21474) -- fix(cli): correct shell height reporting by @jacob314 in - [#21492](https://github.com/google-gemini/gemini-cli/pull/21492) -- Make test suite pass when the GEMINI_SYSTEM_MD env variable or - GEMINI_WRITE_SYSTEM_MD variable happens to be set locally/ by @jacob314 in - [#21480](https://github.com/google-gemini/gemini-cli/pull/21480) -- Disallow underspecified types by @gundermanc in - [#21485](https://github.com/google-gemini/gemini-cli/pull/21485) -- refactor(cli): standardize on 'reload' verb for all components by @keithguerin - in [#20654](https://github.com/google-gemini/gemini-cli/pull/20654) -- feat(cli): Invert quota language to 'percent used' by @keithguerin in - [#20100](https://github.com/google-gemini/gemini-cli/pull/20100) -- Docs: Add documentation for notifications (experimental)(macOS) by @jkcinouye - in [#21163](https://github.com/google-gemini/gemini-cli/pull/21163) -- Code review comments as a pr by @jacob314 in - [#21209](https://github.com/google-gemini/gemini-cli/pull/21209) -- feat(cli): unify /chat and /resume command UX by @LyalinDotCom in - [#20256](https://github.com/google-gemini/gemini-cli/pull/20256) -- docs: fix typo 'allowslisted' -> 'allowlisted' in mcp-server.md by + [#21966](https://github.com/google-gemini/gemini-cli/pull/21966) +- refactor(a2a): remove legacy CoreToolScheduler by @adamfweidman in + [#21955](https://github.com/google-gemini/gemini-cli/pull/21955) +- feat(ui): add missing vim mode motions (X, ~, r, f/F/t/T, df/dt and friends) + by @aanari in [#21932](https://github.com/google-gemini/gemini-cli/pull/21932) +- Feat/retry fetch notifications by @aishaneeshah in + [#21813](https://github.com/google-gemini/gemini-cli/pull/21813) +- fix(core): remove OAuth check from handle fallback and clean up stray file by + @sehoon38 in [#21962](https://github.com/google-gemini/gemini-cli/pull/21962) +- feat(cli): support literal character keybindings and extended Kitty protocol + keys by @scidomino in + [#21972](https://github.com/google-gemini/gemini-cli/pull/21972) +- fix(ui): clamp cursor to last char after all NORMAL mode deletes by @aanari in + [#21973](https://github.com/google-gemini/gemini-cli/pull/21973) +- test(core): add missing tests for prompts/utils.ts by @krrishverma1805-web in + [#19941](https://github.com/google-gemini/gemini-cli/pull/19941) +- fix(cli): allow scrolling keys in copy mode (Ctrl+S selection mode) by + @nsalerni in [#19933](https://github.com/google-gemini/gemini-cli/pull/19933) +- docs(cli): add custom keybinding documentation by @scidomino in + [#21980](https://github.com/google-gemini/gemini-cli/pull/21980) +- docs: fix misleading YOLO mode description in defaultApprovalMode by @Gyanranjan-Priyam in - [#21665](https://github.com/google-gemini/gemini-cli/pull/21665) -- fix(core): display actual graph output in tracker_visualize tool by @anj-s in - [#21455](https://github.com/google-gemini/gemini-cli/pull/21455) -- fix(core): sanitize SSE-corrupted JSON and domain strings in error - classification by @gsquared94 in - [#21702](https://github.com/google-gemini/gemini-cli/pull/21702) -- Docs: Make documentation links relative by @diodesign in - [#21490](https://github.com/google-gemini/gemini-cli/pull/21490) -- feat(cli): expose /tools desc as explicit subcommand for discoverability by - @aworki in [#21241](https://github.com/google-gemini/gemini-cli/pull/21241) -- feat(cli): add /compact alias for /compress command by @jackwotherspoon in - [#21711](https://github.com/google-gemini/gemini-cli/pull/21711) -- feat(plan): enable Plan Mode by default by @jerop in - [#21713](https://github.com/google-gemini/gemini-cli/pull/21713) -- feat(core): Introduce `AgentLoopContext`. by @joshualitt in - [#21198](https://github.com/google-gemini/gemini-cli/pull/21198) -- fix(core): resolve symlinks for non-existent paths during validation by - @Adib234 in [#21487](https://github.com/google-gemini/gemini-cli/pull/21487) -- docs: document tool exclusion from memory via deny policy by @Abhijit-2592 in - [#21428](https://github.com/google-gemini/gemini-cli/pull/21428) -- perf(core): cache loadApiKey to reduce redundant keychain access by @sehoon38 - in [#21520](https://github.com/google-gemini/gemini-cli/pull/21520) -- feat(cli): implement /upgrade command by @sehoon38 in - [#21511](https://github.com/google-gemini/gemini-cli/pull/21511) -- Feat/browser agent progress emission by @kunal-10-cloud in - [#21218](https://github.com/google-gemini/gemini-cli/pull/21218) -- fix(settings): display objects as JSON instead of [object Object] by - @Zheyuan-Lin in - [#21458](https://github.com/google-gemini/gemini-cli/pull/21458) -- Unmarshall update by @DavidAPierce in - [#21721](https://github.com/google-gemini/gemini-cli/pull/21721) -- Update mcp's list function to check for disablement. by @DavidAPierce in - [#21148](https://github.com/google-gemini/gemini-cli/pull/21148) -- robustness(core): static checks to validate history is immutable by @jacob314 - in [#21228](https://github.com/google-gemini/gemini-cli/pull/21228) -- refactor(cli): better react patterns for BaseSettingsDialog by @psinha40898 in - [#21206](https://github.com/google-gemini/gemini-cli/pull/21206) -- feat(security): implement robust IP validation and safeFetch foundation by - @alisa-alisa in - [#21401](https://github.com/google-gemini/gemini-cli/pull/21401) -- feat(core): improve subagent result display by @joshualitt in - [#20378](https://github.com/google-gemini/gemini-cli/pull/20378) -- docs: fix broken markdown syntax and anchor links in /tools by @campox747 in - [#20902](https://github.com/google-gemini/gemini-cli/pull/20902) -- feat(policy): support subagent-specific policies in TOML by @akh64bit in - [#21431](https://github.com/google-gemini/gemini-cli/pull/21431) -- Add script to speed up reviewing PRs adding a worktree. by @jacob314 in - [#21748](https://github.com/google-gemini/gemini-cli/pull/21748) -- fix(core): prevent infinite recursion in symlink resolution by @Adib234 in - [#21750](https://github.com/google-gemini/gemini-cli/pull/21750) -- fix(docs): fix headless mode docs by @ame2en in - [#21287](https://github.com/google-gemini/gemini-cli/pull/21287) -- feat/redesign header compact by @jacob314 in - [#20922](https://github.com/google-gemini/gemini-cli/pull/20922) -- refactor: migrate to useKeyMatchers hook by @scidomino in - [#21753](https://github.com/google-gemini/gemini-cli/pull/21753) -- perf(cli): cache loadSettings to reduce redundant disk I/O at startup by - @sehoon38 in [#21521](https://github.com/google-gemini/gemini-cli/pull/21521) -- fix(core): resolve Windows line ending and path separation bugs across CLI by - @muhammadusman586 in - [#21068](https://github.com/google-gemini/gemini-cli/pull/21068) -- docs: fix heading formatting in commands.md and phrasing in tools-api.md by - @campox747 in [#20679](https://github.com/google-gemini/gemini-cli/pull/20679) -- refactor(ui): unify keybinding infrastructure and support string - initialization by @scidomino in - [#21776](https://github.com/google-gemini/gemini-cli/pull/21776) -- Add support for updating extension sources and names by @chrstnb in - [#21715](https://github.com/google-gemini/gemini-cli/pull/21715) -- fix(core): handle GUI editor non-zero exit codes gracefully by @reyyanxahmed - in [#20376](https://github.com/google-gemini/gemini-cli/pull/20376) -- fix(core): destroy PTY on kill() and exception to prevent fd leak by @nbardy - in [#21693](https://github.com/google-gemini/gemini-cli/pull/21693) -- fix(docs): update theme screenshots and add missing themes by @ashmod in - [#20689](https://github.com/google-gemini/gemini-cli/pull/20689) -- refactor(cli): rename 'return' key to 'enter' internally by @scidomino in - [#21796](https://github.com/google-gemini/gemini-cli/pull/21796) -- build(release): restrict npm bundling to non-stable tags by @sehoon38 in - [#21821](https://github.com/google-gemini/gemini-cli/pull/21821) -- fix(core): override toolRegistry property for sub-agent schedulers by - @gsquared94 in - [#21766](https://github.com/google-gemini/gemini-cli/pull/21766) -- fix(cli): make footer items equally spaced by @jacob314 in - [#21843](https://github.com/google-gemini/gemini-cli/pull/21843) -- docs: clarify global policy rules application in plan mode by @jerop in - [#21864](https://github.com/google-gemini/gemini-cli/pull/21864) -- fix(core): ensure correct flash model steering in plan mode implementation - phase by @jerop in - [#21871](https://github.com/google-gemini/gemini-cli/pull/21871) -- fix(core): update @a2a-js/sdk to 0.3.11 by @adamfweidman in - [#21875](https://github.com/google-gemini/gemini-cli/pull/21875) -- refactor(core): improve API response error logging when retry by @yunaseoul in - [#21784](https://github.com/google-gemini/gemini-cli/pull/21784) -- fix(ui): handle headless execution in credits and upgrade dialogs by - @gsquared94 in - [#21850](https://github.com/google-gemini/gemini-cli/pull/21850) -- fix(core): treat retryable errors with >5 min delay as terminal quota errors - by @gsquared94 in - [#21881](https://github.com/google-gemini/gemini-cli/pull/21881) -- feat(telemetry): add specific PR, issue, and custom tracking IDs for GitHub - Actions by @cocosheng-g in - [#21129](https://github.com/google-gemini/gemini-cli/pull/21129) -- feat(core): add OAuth2 Authorization Code auth provider for A2A agents by - @SandyTao520 in - [#21496](https://github.com/google-gemini/gemini-cli/pull/21496) -- feat(cli): give visibility to /tools list command in the TUI and follow the - subcommand pattern of other commands by @JayadityaGit in - [#21213](https://github.com/google-gemini/gemini-cli/pull/21213) -- Handle dirty worktrees better and warn about running scripts/review.sh on - untrusted code. by @jacob314 in - [#21791](https://github.com/google-gemini/gemini-cli/pull/21791) -- feat(policy): support auto-add to policy by default and scoped persistence by + [#21878](https://github.com/google-gemini/gemini-cli/pull/21878) +- fix: clean up /clear and /resume by @jackwotherspoon in + [#22007](https://github.com/google-gemini/gemini-cli/pull/22007) +- fix(core)#20941: reap orphaned descendant processes on PTY abort by @manavmax + in [#21124](https://github.com/google-gemini/gemini-cli/pull/21124) +- fix(core): update language detection to use LSP 3.18 identifiers by @yunaseoul + in [#21931](https://github.com/google-gemini/gemini-cli/pull/21931) +- feat(cli): support removing keybindings via '-' prefix by @scidomino in + [#22042](https://github.com/google-gemini/gemini-cli/pull/22042) +- feat(policy): add --admin-policy flag for supplemental admin policies by + @galz10 in [#20360](https://github.com/google-gemini/gemini-cli/pull/20360) +- merge duplicate imports packages/cli/src subtask1 by @Nixxx19 in + [#22040](https://github.com/google-gemini/gemini-cli/pull/22040) +- perf(core): parallelize user quota and experiments fetching in refreshAuth by + @sehoon38 in [#21648](https://github.com/google-gemini/gemini-cli/pull/21648) +- Changelog for v0.34.0-preview.0 by @gemini-cli-robot in + [#21965](https://github.com/google-gemini/gemini-cli/pull/21965) +- Changelog for v0.33.0 by @gemini-cli-robot in + [#21967](https://github.com/google-gemini/gemini-cli/pull/21967) +- fix(core): handle EISDIR in robustRealpath on Windows by @sehoon38 in + [#21984](https://github.com/google-gemini/gemini-cli/pull/21984) +- feat(core): include initiationMethod in conversation interaction telemetry by + @yunaseoul in [#22054](https://github.com/google-gemini/gemini-cli/pull/22054) +- feat(ui): add vim yank/paste (y/p/P) with unnamed register by @aanari in + [#22026](https://github.com/google-gemini/gemini-cli/pull/22026) +- fix(core): enable numerical routing for api key users by @sehoon38 in + [#21977](https://github.com/google-gemini/gemini-cli/pull/21977) +- feat(telemetry): implement retry attempt telemetry for network related retries + by @aishaneeshah in + [#22027](https://github.com/google-gemini/gemini-cli/pull/22027) +- fix(policy): remove unnecessary escapeRegex from pattern builders by @spencer426 in - [#20361](https://github.com/google-gemini/gemini-cli/pull/20361) -- fix(core): handle AbortError when ESC cancels tool execution by @PrasannaPal21 - in [#20863](https://github.com/google-gemini/gemini-cli/pull/20863) -- fix(release): Improve Patch Release Workflow Comments: Clearer Approval - Guidance by @jerop in - [#21894](https://github.com/google-gemini/gemini-cli/pull/21894) -- docs: clarify telemetry setup and comprehensive data map by @jerop in - [#21879](https://github.com/google-gemini/gemini-cli/pull/21879) -- feat(core): add per-model token usage to stream-json output by @yongruilin in - [#21839](https://github.com/google-gemini/gemini-cli/pull/21839) -- docs: remove experimental badge from plan mode in sidebar by @jerop in - [#21906](https://github.com/google-gemini/gemini-cli/pull/21906) -- fix(cli): prevent race condition in loop detection retry by @skyvanguard in - [#17916](https://github.com/google-gemini/gemini-cli/pull/17916) -- Add behavioral evals for tracker by @anj-s in - [#20069](https://github.com/google-gemini/gemini-cli/pull/20069) -- fix(auth): update terminology to 'sign in' and 'sign out' by @clocky in - [#20892](https://github.com/google-gemini/gemini-cli/pull/20892) -- docs(mcp): standardize mcp tool fqn documentation by @abhipatel12 in - [#21664](https://github.com/google-gemini/gemini-cli/pull/21664) -- fix(ui): prevent empty tool-group border stubs after filtering by @Aaxhirrr in - [#21852](https://github.com/google-gemini/gemini-cli/pull/21852) -- make command names consistent by @scidomino in - [#21907](https://github.com/google-gemini/gemini-cli/pull/21907) -- refactor: remove agent_card_requires_auth config flag by @adamfweidman in - [#21914](https://github.com/google-gemini/gemini-cli/pull/21914) -- feat(a2a): implement standardized normalization and streaming reassembly by - @alisa-alisa in - [#21402](https://github.com/google-gemini/gemini-cli/pull/21402) -- feat(cli): enable skill activation via slash commands by @NTaylorMullen in - [#21758](https://github.com/google-gemini/gemini-cli/pull/21758) -- docs(cli): mention per-model token usage in stream-json result event by - @yongruilin in - [#21908](https://github.com/google-gemini/gemini-cli/pull/21908) -- fix(plan): prevent plan truncation in approval dialog by supporting - unconstrained heights by @Adib234 in - [#21037](https://github.com/google-gemini/gemini-cli/pull/21037) -- feat(a2a): switch from callback-based to event-driven tool scheduler by - @cocosheng-g in - [#21467](https://github.com/google-gemini/gemini-cli/pull/21467) -- feat(voice): implement speech-friendly response formatter by @ayush31010 in - [#20989](https://github.com/google-gemini/gemini-cli/pull/20989) -- feat: add pulsating blue border automation overlay to browser agent by - @kunal-10-cloud in - [#21173](https://github.com/google-gemini/gemini-cli/pull/21173) -- Add extensionRegistryURI setting to change where the registry is read from by - @kevinjwang1 in - [#20463](https://github.com/google-gemini/gemini-cli/pull/20463) -- fix: patch gaxios v7 Array.toString() stream corruption by @gsquared94 in - [#21884](https://github.com/google-gemini/gemini-cli/pull/21884) -- fix: prevent hangs in non-interactive mode and improve agent guidance by - @cocosheng-g in - [#20893](https://github.com/google-gemini/gemini-cli/pull/20893) -- Add ExtensionDetails dialog and support install by @chrstnb in - [#20845](https://github.com/google-gemini/gemini-cli/pull/20845) -- chore/release: bump version to 0.34.0-nightly.20260310.4653b126f by - @gemini-cli-robot in - [#21816](https://github.com/google-gemini/gemini-cli/pull/21816) -- Changelog for v0.33.0-preview.13 by @gemini-cli-robot in - [#21927](https://github.com/google-gemini/gemini-cli/pull/21927) -- fix(cli): stabilize prompt layout to prevent jumping when typing by + [#21921](https://github.com/google-gemini/gemini-cli/pull/21921) +- fix(core): preserve dynamic tool descriptions on session resume by @sehoon38 + in [#18835](https://github.com/google-gemini/gemini-cli/pull/18835) +- chore: allow 'gemini-3.1' in sensitive keyword linter by @scidomino in + [#22065](https://github.com/google-gemini/gemini-cli/pull/22065) +- feat(core): support custom base URL via env vars by @junaiddshaukat in + [#21561](https://github.com/google-gemini/gemini-cli/pull/21561) +- merge duplicate imports packages/cli/src subtask2 by @Nixxx19 in + [#22051](https://github.com/google-gemini/gemini-cli/pull/22051) +- fix(core): silently retry API errors up to 3 times before halting session by + @spencer426 in + [#21989](https://github.com/google-gemini/gemini-cli/pull/21989) +- feat(core): simplify subagent success UI and improve early termination display + by @abhipatel12 in + [#21917](https://github.com/google-gemini/gemini-cli/pull/21917) +- merge duplicate imports packages/cli/src subtask3 by @Nixxx19 in + [#22056](https://github.com/google-gemini/gemini-cli/pull/22056) +- fix(hooks): fix BeforeAgent/AfterAgent inconsistencies (#18514) by @krishdef7 + in [#21383](https://github.com/google-gemini/gemini-cli/pull/21383) +- feat(core): implement SandboxManager interface and config schema by @galz10 in + [#21774](https://github.com/google-gemini/gemini-cli/pull/21774) +- docs: document npm deprecation warnings as safe to ignore by @h30s in + [#20692](https://github.com/google-gemini/gemini-cli/pull/20692) +- fix: remove status/need-triage from maintainer-only issues by @SandyTao520 in + [#22044](https://github.com/google-gemini/gemini-cli/pull/22044) +- fix(core): propagate subagent context to policy engine by @NTaylorMullen in + [#22086](https://github.com/google-gemini/gemini-cli/pull/22086) +- fix(cli): resolve skill uninstall failure when skill name is updated by @NTaylorMullen in - [#21081](https://github.com/google-gemini/gemini-cli/pull/21081) -- fix: preserve prompt text when cancelling streaming by @Nixxx19 in - [#21103](https://github.com/google-gemini/gemini-cli/pull/21103) -- fix: robust UX for remote agent errors by @Shyam-Raghuwanshi in - [#20307](https://github.com/google-gemini/gemini-cli/pull/20307) -- feat: implement background process logging and cleanup by @galz10 in - [#21189](https://github.com/google-gemini/gemini-cli/pull/21189) -- Changelog for v0.33.0-preview.14 by @gemini-cli-robot in - [#21938](https://github.com/google-gemini/gemini-cli/pull/21938) -- fix(patch): cherry-pick 45faf4d to release/v0.34.0-preview.0-pr-22148 + [#22085](https://github.com/google-gemini/gemini-cli/pull/22085) +- docs(plan): clarify interactive plan editing with Ctrl+X by @Adib234 in + [#22076](https://github.com/google-gemini/gemini-cli/pull/22076) +- fix(policy): ensure user policies are loaded when policyPaths is empty by + @NTaylorMullen in + [#22090](https://github.com/google-gemini/gemini-cli/pull/22090) +- Docs: Add documentation for model steering (experimental). by @jkcinouye in + [#21154](https://github.com/google-gemini/gemini-cli/pull/21154) +- Add issue for automated changelogs by @g-samroberts in + [#21912](https://github.com/google-gemini/gemini-cli/pull/21912) +- fix(core): secure argsPattern and revert WEB_FETCH_TOOL_NAME escalation by + @spencer426 in + [#22104](https://github.com/google-gemini/gemini-cli/pull/22104) +- feat(core): differentiate User-Agent for a2a-server and ACP clients by + @bdmorgan in [#22059](https://github.com/google-gemini/gemini-cli/pull/22059) +- refactor(core): extract ExecutionLifecycleService for tool backgrounding by + @adamfweidman in + [#21717](https://github.com/google-gemini/gemini-cli/pull/21717) +- feat: Display pending and confirming tool calls by @sripasg in + [#22106](https://github.com/google-gemini/gemini-cli/pull/22106) +- feat(browser): implement input blocker overlay during automation by + @kunal-10-cloud in + [#21132](https://github.com/google-gemini/gemini-cli/pull/21132) +- fix: register themes on extension load not start by @jackwotherspoon in + [#22148](https://github.com/google-gemini/gemini-cli/pull/22148) +- feat(ui): Do not show Ultra users /upgrade hint (#22154) by @sehoon38 in + [#22156](https://github.com/google-gemini/gemini-cli/pull/22156) +- chore: remove unnecessary log for themes by @jackwotherspoon in + [#22165](https://github.com/google-gemini/gemini-cli/pull/22165) +- fix(core): resolve MCP tool FQN validation, schema export, and wildcards in + subagents by @abhipatel12 in + [#22069](https://github.com/google-gemini/gemini-cli/pull/22069) +- fix(cli): validate --model argument at startup by @JaisalJain in + [#21393](https://github.com/google-gemini/gemini-cli/pull/21393) +- fix(core): handle policy ALLOW for exit_plan_mode by @backnotprop in + [#21802](https://github.com/google-gemini/gemini-cli/pull/21802) +- feat(telemetry): add Clearcut instrumentation for AI credits billing events by + @gsquared94 in + [#22153](https://github.com/google-gemini/gemini-cli/pull/22153) +- feat(core): add google credentials provider for remote agents by @adamfweidman + in [#21024](https://github.com/google-gemini/gemini-cli/pull/21024) +- test(cli): add integration test for node deprecation warnings by @Nixxx19 in + [#20215](https://github.com/google-gemini/gemini-cli/pull/20215) +- feat(cli): allow safe tools to execute concurrently while agent is busy by + @spencer426 in + [#21988](https://github.com/google-gemini/gemini-cli/pull/21988) +- feat(core): implement model-driven parallel tool scheduler by @abhipatel12 in + [#21933](https://github.com/google-gemini/gemini-cli/pull/21933) +- update vulnerable deps by @scidomino in + [#22180](https://github.com/google-gemini/gemini-cli/pull/22180) +- fix(core): fix startup stats to use int values for timestamps and durations by + @yunaseoul in [#22201](https://github.com/google-gemini/gemini-cli/pull/22201) +- fix(core): prevent duplicate tool schemas for instantiated tools by + @abhipatel12 in + [#22204](https://github.com/google-gemini/gemini-cli/pull/22204) +- fix(core): add proxy routing support for remote A2A subagents by @adamfweidman + in [#22199](https://github.com/google-gemini/gemini-cli/pull/22199) +- fix(core/ide): add Antigravity CLI fallbacks by @apfine in + [#22030](https://github.com/google-gemini/gemini-cli/pull/22030) +- fix(browser): fix duplicate function declaration error in browser agent by + @gsquared94 in + [#22207](https://github.com/google-gemini/gemini-cli/pull/22207) +- feat(core): implement Stage 1 improvements for webfetch tool by @aishaneeshah + in [#21313](https://github.com/google-gemini/gemini-cli/pull/21313) +- Changelog for v0.34.0-preview.1 by @gemini-cli-robot in + [#22194](https://github.com/google-gemini/gemini-cli/pull/22194) +- perf(cli): enable code splitting and deferred UI loading by @sehoon38 in + [#22117](https://github.com/google-gemini/gemini-cli/pull/22117) +- fix: remove unused img.png from project root by @SandyTao520 in + [#22222](https://github.com/google-gemini/gemini-cli/pull/22222) +- docs(local model routing): add docs on how to use Gemma for local model + routing by @douglas-reid in + [#21365](https://github.com/google-gemini/gemini-cli/pull/21365) +- feat(a2a): enable native gRPC support and protocol routing by @alisa-alisa in + [#21403](https://github.com/google-gemini/gemini-cli/pull/21403) +- fix(cli): escape @ symbols on paste to prevent unintended file expansion by + @krishdef7 in [#21239](https://github.com/google-gemini/gemini-cli/pull/21239) +- feat(core): add trajectoryId to ConversationOffered telemetry by @yunaseoul in + [#22214](https://github.com/google-gemini/gemini-cli/pull/22214) +- docs: clarify that tools.core is an allowlist for ALL built-in tools by + @hobostay in [#18813](https://github.com/google-gemini/gemini-cli/pull/18813) +- docs(plan): document hooks with plan mode by @ruomengz in + [#22197](https://github.com/google-gemini/gemini-cli/pull/22197) +- Changelog for v0.33.1 by @gemini-cli-robot in + [#22235](https://github.com/google-gemini/gemini-cli/pull/22235) +- build(ci): fix false positive evals trigger on merge commits by @gundermanc in + [#22237](https://github.com/google-gemini/gemini-cli/pull/22237) +- fix(core): explicitly pass messageBus to policy engine for MCP tool saves by + @abhipatel12 in + [#22255](https://github.com/google-gemini/gemini-cli/pull/22255) +- feat(core): Fully migrate packages/core to AgentLoopContext. by @joshualitt in + [#22115](https://github.com/google-gemini/gemini-cli/pull/22115) +- feat(core): increase sub-agent turn and time limits by @bdmorgan in + [#22196](https://github.com/google-gemini/gemini-cli/pull/22196) +- feat(core): instrument file system tools for JIT context discovery by + @SandyTao520 in + [#22082](https://github.com/google-gemini/gemini-cli/pull/22082) +- refactor(ui): extract pure session browser utilities by @abhipatel12 in + [#22256](https://github.com/google-gemini/gemini-cli/pull/22256) +- fix(plan): Fix AskUser evals by @Adib234 in + [#22074](https://github.com/google-gemini/gemini-cli/pull/22074) +- fix(settings): prevent j/k navigation keys from intercepting edit buffer input + by @student-ankitpandit in + [#21865](https://github.com/google-gemini/gemini-cli/pull/21865) +- feat(skills): improve async-pr-review workflow and logging by @mattKorwel in + [#21790](https://github.com/google-gemini/gemini-cli/pull/21790) +- refactor(cli): consolidate getErrorMessage utility to core by @scidomino in + [#22190](https://github.com/google-gemini/gemini-cli/pull/22190) +- fix(core): show descriptive error messages when saving settings fails by + @afarber in [#18095](https://github.com/google-gemini/gemini-cli/pull/18095) +- docs(core): add authentication guide for remote subagents by @adamfweidman in + [#22178](https://github.com/google-gemini/gemini-cli/pull/22178) +- docs: overhaul subagents documentation and add /agents command by @abhipatel12 + in [#22345](https://github.com/google-gemini/gemini-cli/pull/22345) +- refactor(ui): extract SessionBrowser static ui components by @abhipatel12 in + [#22348](https://github.com/google-gemini/gemini-cli/pull/22348) +- test: add Object.create context regression test and tool confirmation + integration test by @gsquared94 in + [#22356](https://github.com/google-gemini/gemini-cli/pull/22356) +- feat(tracker): return TodoList display for tracker tools by @anj-s in + [#22060](https://github.com/google-gemini/gemini-cli/pull/22060) +- feat(agent): add allowed domain restrictions for browser agent by + @cynthialong0-0 in + [#21775](https://github.com/google-gemini/gemini-cli/pull/21775) +- chore/release: bump version to 0.35.0-nightly.20260313.bb060d7a9 by + @gemini-cli-robot in + [#22251](https://github.com/google-gemini/gemini-cli/pull/22251) +- Move keychain fallback to keychain service by @chrstnb in + [#22332](https://github.com/google-gemini/gemini-cli/pull/22332) +- feat(core): integrate SandboxManager to sandbox all process-spawning tools by + @galz10 in [#22231](https://github.com/google-gemini/gemini-cli/pull/22231) +- fix(cli): support CJK input and full Unicode scalar values in terminal + protocols by @scidomino in + [#22353](https://github.com/google-gemini/gemini-cli/pull/22353) +- Promote stable tests. by @gundermanc in + [#22253](https://github.com/google-gemini/gemini-cli/pull/22253) +- feat(tracker): add tracker policy by @anj-s in + [#22379](https://github.com/google-gemini/gemini-cli/pull/22379) +- feat(security): add disableAlwaysAllow setting to disable auto-approvals by + @galz10 in [#21941](https://github.com/google-gemini/gemini-cli/pull/21941) +- Revert "fix(cli): validate --model argument at startup" by @sehoon38 in + [#22378](https://github.com/google-gemini/gemini-cli/pull/22378) +- fix(mcp): handle equivalent root resource URLs in OAuth validation by @galz10 + in [#20231](https://github.com/google-gemini/gemini-cli/pull/20231) +- fix(core): use session-specific temp directory for task tracker by @anj-s in + [#22382](https://github.com/google-gemini/gemini-cli/pull/22382) +- Fix issue where config was undefined. by @gundermanc in + [#22397](https://github.com/google-gemini/gemini-cli/pull/22397) +- fix(core): deduplicate project memory when JIT context is enabled by + @SandyTao520 in + [#22234](https://github.com/google-gemini/gemini-cli/pull/22234) +- feat(prompts): implement Topic-Action-Summary model for verbosity reduction by + @Abhijit-2592 in + [#21503](https://github.com/google-gemini/gemini-cli/pull/21503) +- fix(core): fix manual deletion of subagent histories by @abhipatel12 in + [#22407](https://github.com/google-gemini/gemini-cli/pull/22407) +- Add registry var by @kevinjwang1 in + [#22224](https://github.com/google-gemini/gemini-cli/pull/22224) +- Add ModelDefinitions to ModelConfigService by @kevinjwang1 in + [#22302](https://github.com/google-gemini/gemini-cli/pull/22302) +- fix(cli): improve command conflict handling for skills by @NTaylorMullen in + [#21942](https://github.com/google-gemini/gemini-cli/pull/21942) +- fix(core): merge user settings with extension-provided MCP servers by + @abhipatel12 in + [#22484](https://github.com/google-gemini/gemini-cli/pull/22484) +- fix(core): skip discovery for incomplete MCP configs and resolve merge race + condition by @abhipatel12 in + [#22494](https://github.com/google-gemini/gemini-cli/pull/22494) +- fix(automation): harden stale PR closer permissions and maintainer detection + by @bdmorgan in + [#22558](https://github.com/google-gemini/gemini-cli/pull/22558) +- fix(automation): evaluate staleness before checking protected labels by + @bdmorgan in [#22561](https://github.com/google-gemini/gemini-cli/pull/22561) +- feat(agent): replace the runtime npx for browser agent chrome devtool mcp with + pre-built bundle by @cynthialong0-0 in + [#22213](https://github.com/google-gemini/gemini-cli/pull/22213) +- perf: optimize TrackerService dependency checks by @anj-s in + [#22384](https://github.com/google-gemini/gemini-cli/pull/22384) +- docs(policy): remove trailing space from commandPrefix examples by @kawasin73 + in [#22264](https://github.com/google-gemini/gemini-cli/pull/22264) +- fix(a2a-server): resolve unsafe assignment lint errors by @ehedlund in + [#22661](https://github.com/google-gemini/gemini-cli/pull/22661) +- fix: Adjust ToolGroupMessage filtering to hide Confirming and show Canceled + tool calls. by @sripasg in + [#22230](https://github.com/google-gemini/gemini-cli/pull/22230) +- Disallow Object.create() and reflect. by @gundermanc in + [#22408](https://github.com/google-gemini/gemini-cli/pull/22408) +- Guard pro model usage by @sehoon38 in + [#22665](https://github.com/google-gemini/gemini-cli/pull/22665) +- refactor(core): Creates AgentSession abstraction for consolidated agent + interface. by @mbleigh in + [#22270](https://github.com/google-gemini/gemini-cli/pull/22270) +- docs(changelog): remove internal commands from release notes by + @jackwotherspoon in + [#22529](https://github.com/google-gemini/gemini-cli/pull/22529) +- feat: enable subagents by @abhipatel12 in + [#22386](https://github.com/google-gemini/gemini-cli/pull/22386) +- feat(extensions): implement cryptographic integrity verification for extension + updates by @ehedlund in + [#21772](https://github.com/google-gemini/gemini-cli/pull/21772) +- feat(tracker): polish UI sorting and formatting by @anj-s in + [#22437](https://github.com/google-gemini/gemini-cli/pull/22437) +- Changelog for v0.34.0-preview.2 by @gemini-cli-robot in + [#22220](https://github.com/google-gemini/gemini-cli/pull/22220) +- fix(core): fix three JIT context bugs in read_file, read_many_files, and + memoryDiscovery by @SandyTao520 in + [#22679](https://github.com/google-gemini/gemini-cli/pull/22679) +- refactor(core): introduce InjectionService with source-aware injection and + backend-native background completions by @adamfweidman in + [#22544](https://github.com/google-gemini/gemini-cli/pull/22544) +- Linux sandbox bubblewrap by @DavidAPierce in + [#22680](https://github.com/google-gemini/gemini-cli/pull/22680) +- feat(core): increase thought signature retry resilience by @bdmorgan in + [#22202](https://github.com/google-gemini/gemini-cli/pull/22202) +- feat(core): implement Stage 2 security and consistency improvements for + web_fetch by @aishaneeshah in + [#22217](https://github.com/google-gemini/gemini-cli/pull/22217) +- refactor(core): replace positional execute params with ExecuteOptions bag by + @adamfweidman in + [#22674](https://github.com/google-gemini/gemini-cli/pull/22674) +- feat(config): enable JIT context loading by default by @SandyTao520 in + [#22736](https://github.com/google-gemini/gemini-cli/pull/22736) +- fix(config): ensure discoveryMaxDirs is passed to global config during + initialization by @kevin-ramdass in + [#22744](https://github.com/google-gemini/gemini-cli/pull/22744) +- fix(plan): allowlist get_internal_docs in Plan Mode by @Adib234 in + [#22668](https://github.com/google-gemini/gemini-cli/pull/22668) +- Changelog for v0.34.0-preview.3 by @gemini-cli-robot in + [#22393](https://github.com/google-gemini/gemini-cli/pull/22393) +- feat(core): add foundation for subagent tool isolation by @akh64bit in + [#22708](https://github.com/google-gemini/gemini-cli/pull/22708) +- fix(core): handle surrogate pairs in truncateString by @sehoon38 in + [#22754](https://github.com/google-gemini/gemini-cli/pull/22754) +- fix(cli): override j/k navigation in settings dialog to fix search input + conflict by @sehoon38 in + [#22800](https://github.com/google-gemini/gemini-cli/pull/22800) +- feat(plan): add 'All the above' option to multi-select AskUser questions by + @Adib234 in [#22365](https://github.com/google-gemini/gemini-cli/pull/22365) +- docs: distribute package-specific GEMINI.md context to each package by + @SandyTao520 in + [#22734](https://github.com/google-gemini/gemini-cli/pull/22734) +- fix(cli): clean up stale pasted placeholder metadata after word/line deletions + by @Jomak-x in + [#20375](https://github.com/google-gemini/gemini-cli/pull/20375) +- refactor(core): align JIT memory placement with tiered context model by + @SandyTao520 in + [#22766](https://github.com/google-gemini/gemini-cli/pull/22766) +- Linux sandbox seccomp by @DavidAPierce in + [#22815](https://github.com/google-gemini/gemini-cli/pull/22815) +- fix(patch): cherry-pick 4e5dfd0 to release/v0.35.0-preview.1-pr-23074 to patch + version v0.35.0-preview.1 and create version 0.35.0-preview.2 by + @gemini-cli-robot in + [#23134](https://github.com/google-gemini/gemini-cli/pull/23134) +- fix(patch): cherry-pick daf3691 to release/v0.35.0-preview.2-pr-23558 to patch + version v0.35.0-preview.2 and create version 0.35.0-preview.3 by + @gemini-cli-robot in + [#23565](https://github.com/google-gemini/gemini-cli/pull/23565) +- fix(patch): cherry-pick b2d6dc4 to release/v0.35.0-preview.4-pr-23546 [CONFLICTS] by @gemini-cli-robot in - [#22174](https://github.com/google-gemini/gemini-cli/pull/22174) -- fix(patch): cherry-pick 8432bce to release/v0.34.0-preview.1-pr-22069 to patch - version v0.34.0-preview.1 and create version 0.34.0-preview.2 by - @gemini-cli-robot in - [#22205](https://github.com/google-gemini/gemini-cli/pull/22205) -- fix(patch): cherry-pick 24adacd to release/v0.34.0-preview.2-pr-22332 to patch - version v0.34.0-preview.2 and create version 0.34.0-preview.3 by - @gemini-cli-robot in - [#22391](https://github.com/google-gemini/gemini-cli/pull/22391) -- fix(patch): cherry-pick 48130eb to release/v0.34.0-preview.3-pr-22665 to patch - version v0.34.0-preview.3 and create version 0.34.0-preview.4 by - @gemini-cli-robot in - [#22719](https://github.com/google-gemini/gemini-cli/pull/22719) + [#23585](https://github.com/google-gemini/gemini-cli/pull/23585) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.33.2...v0.34.0 +https://github.com/google-gemini/gemini-cli/compare/v0.34.0...v0.35.2 diff --git a/docs/changelogs/preview.md b/docs/changelogs/preview.md index 91d0c09a0b..541c881ed2 100644 --- a/docs/changelogs/preview.md +++ b/docs/changelogs/preview.md @@ -1,6 +1,6 @@ -# Preview release: v0.35.0-preview.1 +# Preview release: v0.36.0-preview.4 -Released: March 17, 2026 +Released: March 26, 2026 Our preview release includes the latest, new, and experimental features. This release may not be as stable as our [latest weekly release](latest.md). @@ -13,364 +13,372 @@ npm install -g @google/gemini-cli@preview ## Highlights -- **Subagents & Architecture Enhancements**: Enabled subagents and laid the - foundation for subagent tool isolation. Added proxy routing support for remote - A2A subagents and integrated `SandboxManager` to sandbox all process-spawning - tools. -- **CLI & UI Improvements**: Introduced customizable keyboard shortcuts and - support for literal character keybindings. Added missing vim mode motions and - CJK input support. Enabled code splitting and deferred UI loading for improved - performance. -- **Context & Tools Optimization**: JIT context loading is now enabled by - default with deduplication for project memory. Introduced a model-driven - parallel tool scheduler and allowed safe tools to execute concurrently. -- **Security & Extensions**: Implemented cryptographic integrity verification - for extension updates and added a `disableAlwaysAllow` setting to prevent - auto-approvals for enhanced security. -- **Plan Mode & Web Fetch Updates**: Added an 'All the above' option for - multi-select AskUser questions in Plan Mode. Rolled out Stage 1 and Stage 2 - security and consistency improvements for the `web_fetch` tool. +- **Subagent Architecture Enhancements:** Significant updates to subagents, + including local execution, tool isolation, multi-registry discovery, dynamic + tool filtering, and JIT context injection. +- **Enhanced Security & Sandboxing:** Implemented strict macOS sandboxing using + Seatbelt allowlist, native Windows sandboxing, and support for + "Write-Protected" governance files. +- **Agent Context & State Management:** Introduced task tracker protocol + integration, 'blocked' statuses for tasks/todos, and `AgentSession` for + improved state management and replay semantics. +- **Browser & ACP Capabilities:** Added privacy consent for the browser agent, + sensitive action controls, improved API token usage metadata, and gateway auth + support via ACP. +- **CLI & UX Improvements:** Implemented a refreshed Composer layout, expanded + terminal fallback warnings, dynamic model resolution, and Git worktree support + for isolated parallel sessions. ## What's Changed -- feat(cli): customizable keyboard shortcuts by @scidomino in - [#21945](https://github.com/google-gemini/gemini-cli/pull/21945) -- feat(core): Thread `AgentLoopContext` through core. by @joshualitt in - [#21944](https://github.com/google-gemini/gemini-cli/pull/21944) -- chore(release): bump version to 0.35.0-nightly.20260311.657f19c1f by +- feat(core): support inline agentCardJson for remote agents by @adamfweidman in + [#23743](https://github.com/google-gemini/gemini-cli/pull/23743) +- fix(patch): cherry-pick 055ff92 to release/v0.36.0-preview.0-pr-23672 to patch + version v0.36.0-preview.0 and create version 0.36.0-preview.1 by @gemini-cli-robot in - [#21966](https://github.com/google-gemini/gemini-cli/pull/21966) -- refactor(a2a): remove legacy CoreToolScheduler by @adamfweidman in - [#21955](https://github.com/google-gemini/gemini-cli/pull/21955) -- feat(ui): add missing vim mode motions (X, ~, r, f/F/t/T, df/dt and friends) - by @aanari in [#21932](https://github.com/google-gemini/gemini-cli/pull/21932) -- Feat/retry fetch notifications by @aishaneeshah in - [#21813](https://github.com/google-gemini/gemini-cli/pull/21813) -- fix(core): remove OAuth check from handleFallback and clean up stray file by - @sehoon38 in [#21962](https://github.com/google-gemini/gemini-cli/pull/21962) -- feat(cli): support literal character keybindings and extended Kitty protocol - keys by @scidomino in - [#21972](https://github.com/google-gemini/gemini-cli/pull/21972) -- fix(ui): clamp cursor to last char after all NORMAL mode deletes by @aanari in - [#21973](https://github.com/google-gemini/gemini-cli/pull/21973) -- test(core): add missing tests for prompts/utils.ts by @krrishverma1805-web in - [#19941](https://github.com/google-gemini/gemini-cli/pull/19941) -- fix(cli): allow scrolling keys in copy mode (Ctrl+S selection mode) by - @nsalerni in [#19933](https://github.com/google-gemini/gemini-cli/pull/19933) -- docs(cli): add custom keybinding documentation by @scidomino in - [#21980](https://github.com/google-gemini/gemini-cli/pull/21980) -- docs: fix misleading YOLO mode description in defaultApprovalMode by - @Gyanranjan-Priyam in - [#21878](https://github.com/google-gemini/gemini-cli/pull/21878) -- fix: clean up /clear and /resume by @jackwotherspoon in - [#22007](https://github.com/google-gemini/gemini-cli/pull/22007) -- fix(core)#20941: reap orphaned descendant processes on PTY abort by @manavmax - in [#21124](https://github.com/google-gemini/gemini-cli/pull/21124) -- fix(core): update language detection to use LSP 3.18 identifiers by @yunaseoul - in [#21931](https://github.com/google-gemini/gemini-cli/pull/21931) -- feat(cli): support removing keybindings via '-' prefix by @scidomino in - [#22042](https://github.com/google-gemini/gemini-cli/pull/22042) -- feat(policy): add --admin-policy flag for supplemental admin policies by - @galz10 in [#20360](https://github.com/google-gemini/gemini-cli/pull/20360) -- merge duplicate imports packages/cli/src subtask1 by @Nixxx19 in - [#22040](https://github.com/google-gemini/gemini-cli/pull/22040) -- perf(core): parallelize user quota and experiments fetching in refreshAuth by - @sehoon38 in [#21648](https://github.com/google-gemini/gemini-cli/pull/21648) -- Changelog for v0.34.0-preview.0 by @gemini-cli-robot in - [#21965](https://github.com/google-gemini/gemini-cli/pull/21965) -- Changelog for v0.33.0 by @gemini-cli-robot in - [#21967](https://github.com/google-gemini/gemini-cli/pull/21967) -- fix(core): handle EISDIR in robustRealpath on Windows by @sehoon38 in - [#21984](https://github.com/google-gemini/gemini-cli/pull/21984) -- feat(core): include initiationMethod in conversation interaction telemetry by - @yunaseoul in [#22054](https://github.com/google-gemini/gemini-cli/pull/22054) -- feat(ui): add vim yank/paste (y/p/P) with unnamed register by @aanari in - [#22026](https://github.com/google-gemini/gemini-cli/pull/22026) -- fix(core): enable numerical routing for api key users by @sehoon38 in - [#21977](https://github.com/google-gemini/gemini-cli/pull/21977) -- feat(telemetry): implement retry attempt telemetry for network related retries - by @aishaneeshah in - [#22027](https://github.com/google-gemini/gemini-cli/pull/22027) -- fix(policy): remove unnecessary escapeRegex from pattern builders by - @spencer426 in - [#21921](https://github.com/google-gemini/gemini-cli/pull/21921) -- fix(core): preserve dynamic tool descriptions on session resume by @sehoon38 - in [#18835](https://github.com/google-gemini/gemini-cli/pull/18835) -- chore: allow 'gemini-3.1' in sensitive keyword linter by @scidomino in - [#22065](https://github.com/google-gemini/gemini-cli/pull/22065) -- feat(core): support custom base URL via env vars by @junaiddshaukat in - [#21561](https://github.com/google-gemini/gemini-cli/pull/21561) -- merge duplicate imports packages/cli/src subtask2 by @Nixxx19 in - [#22051](https://github.com/google-gemini/gemini-cli/pull/22051) -- fix(core): silently retry API errors up to 3 times before halting session by - @spencer426 in - [#21989](https://github.com/google-gemini/gemini-cli/pull/21989) -- feat(core): simplify subagent success UI and improve early termination display - by @abhipatel12 in - [#21917](https://github.com/google-gemini/gemini-cli/pull/21917) -- merge duplicate imports packages/cli/src subtask3 by @Nixxx19 in - [#22056](https://github.com/google-gemini/gemini-cli/pull/22056) -- fix(hooks): fix BeforeAgent/AfterAgent inconsistencies (#18514) by @krishdef7 - in [#21383](https://github.com/google-gemini/gemini-cli/pull/21383) -- feat(core): implement SandboxManager interface and config schema by @galz10 in - [#21774](https://github.com/google-gemini/gemini-cli/pull/21774) -- docs: document npm deprecation warnings as safe to ignore by @h30s in - [#20692](https://github.com/google-gemini/gemini-cli/pull/20692) -- fix: remove status/need-triage from maintainer-only issues by @SandyTao520 in - [#22044](https://github.com/google-gemini/gemini-cli/pull/22044) -- fix(core): propagate subagent context to policy engine by @NTaylorMullen in - [#22086](https://github.com/google-gemini/gemini-cli/pull/22086) -- fix(cli): resolve skill uninstall failure when skill name is updated by - @NTaylorMullen in - [#22085](https://github.com/google-gemini/gemini-cli/pull/22085) -- docs(plan): clarify interactive plan editing with Ctrl+X by @Adib234 in - [#22076](https://github.com/google-gemini/gemini-cli/pull/22076) -- fix(policy): ensure user policies are loaded when policyPaths is empty by - @NTaylorMullen in - [#22090](https://github.com/google-gemini/gemini-cli/pull/22090) -- Docs: Add documentation for model steering (experimental). by @jkcinouye in - [#21154](https://github.com/google-gemini/gemini-cli/pull/21154) -- Add issue for automated changelogs by @g-samroberts in - [#21912](https://github.com/google-gemini/gemini-cli/pull/21912) -- fix(core): secure argsPattern and revert WEB_FETCH_TOOL_NAME escalation by - @spencer426 in - [#22104](https://github.com/google-gemini/gemini-cli/pull/22104) -- feat(core): differentiate User-Agent for a2a-server and ACP clients by - @bdmorgan in [#22059](https://github.com/google-gemini/gemini-cli/pull/22059) -- refactor(core): extract ExecutionLifecycleService for tool backgrounding by - @adamfweidman in - [#21717](https://github.com/google-gemini/gemini-cli/pull/21717) -- feat: Display pending and confirming tool calls by @sripasg in - [#22106](https://github.com/google-gemini/gemini-cli/pull/22106) -- feat(browser): implement input blocker overlay during automation by - @kunal-10-cloud in - [#21132](https://github.com/google-gemini/gemini-cli/pull/21132) -- fix: register themes on extension load not start by @jackwotherspoon in - [#22148](https://github.com/google-gemini/gemini-cli/pull/22148) -- feat(ui): Do not show Ultra users /upgrade hint (#22154) by @sehoon38 in - [#22156](https://github.com/google-gemini/gemini-cli/pull/22156) -- chore: remove unnecessary log for themes by @jackwotherspoon in - [#22165](https://github.com/google-gemini/gemini-cli/pull/22165) -- fix(core): resolve MCP tool FQN validation, schema export, and wildcards in - subagents by @abhipatel12 in - [#22069](https://github.com/google-gemini/gemini-cli/pull/22069) -- fix(cli): validate --model argument at startup by @JaisalJain in - [#21393](https://github.com/google-gemini/gemini-cli/pull/21393) -- fix(core): handle policy ALLOW for exit_plan_mode by @backnotprop in - [#21802](https://github.com/google-gemini/gemini-cli/pull/21802) -- feat(telemetry): add Clearcut instrumentation for AI credits billing events by - @gsquared94 in - [#22153](https://github.com/google-gemini/gemini-cli/pull/22153) -- feat(core): add google credentials provider for remote agents by @adamfweidman - in [#21024](https://github.com/google-gemini/gemini-cli/pull/21024) -- test(cli): add integration test for node deprecation warnings by @Nixxx19 in - [#20215](https://github.com/google-gemini/gemini-cli/pull/20215) -- feat(cli): allow safe tools to execute concurrently while agent is busy by - @spencer426 in - [#21988](https://github.com/google-gemini/gemini-cli/pull/21988) -- feat(core): implement model-driven parallel tool scheduler by @abhipatel12 in - [#21933](https://github.com/google-gemini/gemini-cli/pull/21933) -- update vulnerable deps by @scidomino in - [#22180](https://github.com/google-gemini/gemini-cli/pull/22180) -- fix(core): fix startup stats to use int values for timestamps and durations by - @yunaseoul in [#22201](https://github.com/google-gemini/gemini-cli/pull/22201) -- fix(core): prevent duplicate tool schemas for instantiated tools by - @abhipatel12 in - [#22204](https://github.com/google-gemini/gemini-cli/pull/22204) -- fix(core): add proxy routing support for remote A2A subagents by @adamfweidman - in [#22199](https://github.com/google-gemini/gemini-cli/pull/22199) -- fix(core/ide): add Antigravity CLI fallbacks by @apfine in - [#22030](https://github.com/google-gemini/gemini-cli/pull/22030) -- fix(browser): fix duplicate function declaration error in browser agent by - @gsquared94 in - [#22207](https://github.com/google-gemini/gemini-cli/pull/22207) -- feat(core): implement Stage 1 improvements for webfetch tool by @aishaneeshah - in [#21313](https://github.com/google-gemini/gemini-cli/pull/21313) -- Changelog for v0.34.0-preview.1 by @gemini-cli-robot in - [#22194](https://github.com/google-gemini/gemini-cli/pull/22194) -- perf(cli): enable code splitting and deferred UI loading by @sehoon38 in - [#22117](https://github.com/google-gemini/gemini-cli/pull/22117) -- fix: remove unused img.png from project root by @SandyTao520 in - [#22222](https://github.com/google-gemini/gemini-cli/pull/22222) -- docs(local model routing): add docs on how to use Gemma for local model - routing by @douglas-reid in - [#21365](https://github.com/google-gemini/gemini-cli/pull/21365) -- feat(a2a): enable native gRPC support and protocol routing by @alisa-alisa in - [#21403](https://github.com/google-gemini/gemini-cli/pull/21403) -- fix(cli): escape @ symbols on paste to prevent unintended file expansion by - @krishdef7 in [#21239](https://github.com/google-gemini/gemini-cli/pull/21239) -- feat(core): add trajectoryId to ConversationOffered telemetry by @yunaseoul in - [#22214](https://github.com/google-gemini/gemini-cli/pull/22214) -- docs: clarify that tools.core is an allowlist for ALL built-in tools by - @hobostay in [#18813](https://github.com/google-gemini/gemini-cli/pull/18813) -- docs(plan): document hooks with plan mode by @ruomengz in - [#22197](https://github.com/google-gemini/gemini-cli/pull/22197) -- Changelog for v0.33.1 by @gemini-cli-robot in - [#22235](https://github.com/google-gemini/gemini-cli/pull/22235) -- build(ci): fix false positive evals trigger on merge commits by @gundermanc in - [#22237](https://github.com/google-gemini/gemini-cli/pull/22237) -- fix(core): explicitly pass messageBus to policy engine for MCP tool saves by - @abhipatel12 in - [#22255](https://github.com/google-gemini/gemini-cli/pull/22255) -- feat(core): Fully migrate packages/core to AgentLoopContext. by @joshualitt in - [#22115](https://github.com/google-gemini/gemini-cli/pull/22115) -- feat(core): increase sub-agent turn and time limits by @bdmorgan in - [#22196](https://github.com/google-gemini/gemini-cli/pull/22196) -- feat(core): instrument file system tools for JIT context discovery by + [#23723](https://github.com/google-gemini/gemini-cli/pull/23723) +- Changelog for v0.33.2 by @gemini-cli-robot in + [#22730](https://github.com/google-gemini/gemini-cli/pull/22730) +- feat(core): multi-registry architecture and tool filtering for subagents by + @akh64bit in [#22712](https://github.com/google-gemini/gemini-cli/pull/22712) +- Changelog for v0.34.0-preview.4 by @gemini-cli-robot in + [#22752](https://github.com/google-gemini/gemini-cli/pull/22752) +- fix(devtools): use theme-aware text colors for console warnings and errors by @SandyTao520 in - [#22082](https://github.com/google-gemini/gemini-cli/pull/22082) -- refactor(ui): extract pure session browser utilities by @abhipatel12 in - [#22256](https://github.com/google-gemini/gemini-cli/pull/22256) -- fix(plan): Fix AskUser evals by @Adib234 in - [#22074](https://github.com/google-gemini/gemini-cli/pull/22074) -- fix(settings): prevent j/k navigation keys from intercepting edit buffer input - by @student-ankitpandit in - [#21865](https://github.com/google-gemini/gemini-cli/pull/21865) -- feat(skills): improve async-pr-review workflow and logging by @mattKorwel in - [#21790](https://github.com/google-gemini/gemini-cli/pull/21790) -- refactor(cli): consolidate getErrorMessage utility to core by @scidomino in - [#22190](https://github.com/google-gemini/gemini-cli/pull/22190) -- fix(core): show descriptive error messages when saving settings fails by - @afarber in [#18095](https://github.com/google-gemini/gemini-cli/pull/18095) -- docs(core): add authentication guide for remote subagents by @adamfweidman in - [#22178](https://github.com/google-gemini/gemini-cli/pull/22178) -- docs: overhaul subagents documentation and add /agents command by @abhipatel12 - in [#22345](https://github.com/google-gemini/gemini-cli/pull/22345) -- refactor(ui): extract SessionBrowser static ui components by @abhipatel12 in - [#22348](https://github.com/google-gemini/gemini-cli/pull/22348) -- test: add Object.create context regression test and tool confirmation - integration test by @gsquared94 in - [#22356](https://github.com/google-gemini/gemini-cli/pull/22356) -- feat(tracker): return TodoList display for tracker tools by @anj-s in - [#22060](https://github.com/google-gemini/gemini-cli/pull/22060) -- feat(agent): add allowed domain restrictions for browser agent by + [#22181](https://github.com/google-gemini/gemini-cli/pull/22181) +- Add support for dynamic model Resolution to ModelConfigService by @kevinjwang1 + in [#22578](https://github.com/google-gemini/gemini-cli/pull/22578) +- chore(release): bump version to 0.36.0-nightly.20260317.2f90b4653 by + @gemini-cli-robot in + [#22858](https://github.com/google-gemini/gemini-cli/pull/22858) +- fix(cli): use active sessionId in useLogger and improve resume robustness by + @mattKorwel in + [#22606](https://github.com/google-gemini/gemini-cli/pull/22606) +- fix(cli): expand tilde in policy paths from settings.json by @abhipatel12 in + [#22772](https://github.com/google-gemini/gemini-cli/pull/22772) +- fix(core): add actionable warnings for terminal fallbacks (#14426) by + @spencer426 in + [#22211](https://github.com/google-gemini/gemini-cli/pull/22211) +- feat(tracker): integrate task tracker protocol into core system prompt by + @anj-s in [#22442](https://github.com/google-gemini/gemini-cli/pull/22442) +- chore: add posttest build hooks and fix missing dependencies by @NTaylorMullen + in [#22865](https://github.com/google-gemini/gemini-cli/pull/22865) +- feat(a2a): add agent acknowledgment command and enhance registry discovery by + @alisa-alisa in + [#22389](https://github.com/google-gemini/gemini-cli/pull/22389) +- fix(cli): automatically add all VSCode workspace folders to Gemini context by + @sakshisemalti in + [#21380](https://github.com/google-gemini/gemini-cli/pull/21380) +- feat: add 'blocked' status to tasks and todos by @anj-s in + [#22735](https://github.com/google-gemini/gemini-cli/pull/22735) +- refactor(cli): remove extra newlines in ShellToolMessage.tsx by @NTaylorMullen + in [#22868](https://github.com/google-gemini/gemini-cli/pull/22868) +- fix(cli): lazily load settings in onModelChange to prevent stale closure data + loss by @KumarADITHYA123 in + [#20403](https://github.com/google-gemini/gemini-cli/pull/20403) +- feat(core): subagent local execution and tool isolation by @akh64bit in + [#22718](https://github.com/google-gemini/gemini-cli/pull/22718) +- fix(cli): resolve subagent grouping and UI state persistence by @abhipatel12 + in [#22252](https://github.com/google-gemini/gemini-cli/pull/22252) +- refactor(ui): extract SessionBrowser search and navigation components by + @abhipatel12 in + [#22377](https://github.com/google-gemini/gemini-cli/pull/22377) +- fix: updates Docker image reference for GitHub MCP server by @jhhornn in + [#22938](https://github.com/google-gemini/gemini-cli/pull/22938) +- refactor(cli): group subagent trajectory deletion and use native filesystem + testing by @abhipatel12 in + [#22890](https://github.com/google-gemini/gemini-cli/pull/22890) +- refactor(cli): simplify keypress and mouse providers and update tests by + @scidomino in [#22853](https://github.com/google-gemini/gemini-cli/pull/22853) +- Changelog for v0.34.0 by @gemini-cli-robot in + [#22860](https://github.com/google-gemini/gemini-cli/pull/22860) +- test(cli): simplify createMockSettings calls by @scidomino in + [#22952](https://github.com/google-gemini/gemini-cli/pull/22952) +- feat(ui): format multi-line banner warnings with a bold title by @keithguerin + in [#22955](https://github.com/google-gemini/gemini-cli/pull/22955) +- Docs: Remove references to stale Gemini CLI file structure info by + @g-samroberts in + [#22976](https://github.com/google-gemini/gemini-cli/pull/22976) +- feat(ui): remove write todo list tool from UI tips by @aniruddhaadak80 in + [#22281](https://github.com/google-gemini/gemini-cli/pull/22281) +- Fix issue where subagent thoughts are appended. by @gundermanc in + [#22975](https://github.com/google-gemini/gemini-cli/pull/22975) +- Feat/browser privacy consent by @kunal-10-cloud in + [#21119](https://github.com/google-gemini/gemini-cli/pull/21119) +- fix(core): explicitly map execution context in LocalAgentExecutor by @akh64bit + in [#22949](https://github.com/google-gemini/gemini-cli/pull/22949) +- feat(plan): support plan mode in non-interactive mode by @ruomengz in + [#22670](https://github.com/google-gemini/gemini-cli/pull/22670) +- feat(core): implement strict macOS sandboxing using Seatbelt allowlist by + @ehedlund in [#22832](https://github.com/google-gemini/gemini-cli/pull/22832) +- docs: add additional notes by @abhipatel12 in + [#23008](https://github.com/google-gemini/gemini-cli/pull/23008) +- fix(cli): resolve duplicate footer on tool cancel via ESC (#21743) by + @ruomengz in [#21781](https://github.com/google-gemini/gemini-cli/pull/21781) +- Changelog for v0.35.0-preview.1 by @gemini-cli-robot in + [#23012](https://github.com/google-gemini/gemini-cli/pull/23012) +- fix(ui): fix flickering on small terminal heights by @devr0306 in + [#21416](https://github.com/google-gemini/gemini-cli/pull/21416) +- fix(acp): provide more meta in tool_call_update by @Mervap in + [#22663](https://github.com/google-gemini/gemini-cli/pull/22663) +- docs: add FAQ entry for checking Gemini CLI version by @surajsahani in + [#21271](https://github.com/google-gemini/gemini-cli/pull/21271) +- feat(core): resilient subagent tool rejection with contextual feedback by + @abhipatel12 in + [#22951](https://github.com/google-gemini/gemini-cli/pull/22951) +- fix(cli): correctly handle auto-update for standalone binaries by @bdmorgan in + [#23038](https://github.com/google-gemini/gemini-cli/pull/23038) +- feat(core): add content-utils by @adamfweidman in + [#22984](https://github.com/google-gemini/gemini-cli/pull/22984) +- fix: circumvent genai sdk requirement for api key when using gateway auth via + ACP by @sripasg in + [#23042](https://github.com/google-gemini/gemini-cli/pull/23042) +- fix(core): don't persist browser consent sentinel in non-interactive mode by + @jasonmatthewsuhari in + [#23073](https://github.com/google-gemini/gemini-cli/pull/23073) +- fix(core): narrow browser agent description to prevent stealing URL tasks from + web_fetch by @gsquared94 in + [#23086](https://github.com/google-gemini/gemini-cli/pull/23086) +- feat(cli): Partial threading of AgentLoopContext. by @joshualitt in + [#22978](https://github.com/google-gemini/gemini-cli/pull/22978) +- fix(browser-agent): enable "Allow all server tools" session policy by @cynthialong0-0 in - [#21775](https://github.com/google-gemini/gemini-cli/pull/21775) -- chore/release: bump version to 0.35.0-nightly.20260313.bb060d7a9 by - @gemini-cli-robot in - [#22251](https://github.com/google-gemini/gemini-cli/pull/22251) -- Move keychain fallback to keychain service by @chrstnb in - [#22332](https://github.com/google-gemini/gemini-cli/pull/22332) -- feat(core): integrate SandboxManager to sandbox all process-spawning tools by - @galz10 in [#22231](https://github.com/google-gemini/gemini-cli/pull/22231) -- fix(cli): support CJK input and full Unicode scalar values in terminal - protocols by @scidomino in - [#22353](https://github.com/google-gemini/gemini-cli/pull/22353) -- Promote stable tests. by @gundermanc in - [#22253](https://github.com/google-gemini/gemini-cli/pull/22253) -- feat(tracker): add tracker policy by @anj-s in - [#22379](https://github.com/google-gemini/gemini-cli/pull/22379) -- feat(security): add disableAlwaysAllow setting to disable auto-approvals by - @galz10 in [#21941](https://github.com/google-gemini/gemini-cli/pull/21941) -- Revert "fix(cli): validate --model argument at startup" by @sehoon38 in - [#22378](https://github.com/google-gemini/gemini-cli/pull/22378) -- fix(mcp): handle equivalent root resource URLs in OAuth validation by @galz10 - in [#20231](https://github.com/google-gemini/gemini-cli/pull/20231) -- fix(core): use session-specific temp directory for task tracker by @anj-s in - [#22382](https://github.com/google-gemini/gemini-cli/pull/22382) -- Fix issue where config was undefined. by @gundermanc in - [#22397](https://github.com/google-gemini/gemini-cli/pull/22397) -- fix(core): deduplicate project memory when JIT context is enabled by + [#22343](https://github.com/google-gemini/gemini-cli/pull/22343) +- refactor(cli): integrate real config loading into async test utils by + @scidomino in [#23040](https://github.com/google-gemini/gemini-cli/pull/23040) +- feat(core): inject memory and JIT context into subagents by @abhipatel12 in + [#23032](https://github.com/google-gemini/gemini-cli/pull/23032) +- Fix logging and virtual list. by @jacob314 in + [#23080](https://github.com/google-gemini/gemini-cli/pull/23080) +- feat(core): cap JIT context upward traversal at git root by @SandyTao520 in + [#23074](https://github.com/google-gemini/gemini-cli/pull/23074) +- Docs: Minor style updates from initial docs audit. by @g-samroberts in + [#22872](https://github.com/google-gemini/gemini-cli/pull/22872) +- feat(core): add experimental memory manager agent to replace save_memory tool + by @SandyTao520 in + [#22726](https://github.com/google-gemini/gemini-cli/pull/22726) +- Changelog for v0.35.0-preview.2 by @gemini-cli-robot in + [#23142](https://github.com/google-gemini/gemini-cli/pull/23142) +- Update website issue template for label and title by @g-samroberts in + [#23036](https://github.com/google-gemini/gemini-cli/pull/23036) +- fix: upgrade ACP SDK from 0.12 to 0.16.1 by @sripasg in + [#23132](https://github.com/google-gemini/gemini-cli/pull/23132) +- Update callouts to work on github. by @g-samroberts in + [#22245](https://github.com/google-gemini/gemini-cli/pull/22245) +- feat: ACP: Add token usage metadata to the `send` method's return value by + @sripasg in [#23148](https://github.com/google-gemini/gemini-cli/pull/23148) +- fix(plan): clarify that plan mode policies are combined with normal mode by + @ruomengz in [#23158](https://github.com/google-gemini/gemini-cli/pull/23158) +- Add ModelChain support to ModelConfigService and make ModelDialog dynamic by + @kevinjwang1 in + [#22914](https://github.com/google-gemini/gemini-cli/pull/22914) +- Ensure that copied extensions are writable in the user's local directory by + @kevinjwang1 in + [#23016](https://github.com/google-gemini/gemini-cli/pull/23016) +- feat(core): implement native Windows sandboxing by @mattKorwel in + [#21807](https://github.com/google-gemini/gemini-cli/pull/21807) +- feat(core): add support for admin-forced MCP server installations by + @gsquared94 in + [#23163](https://github.com/google-gemini/gemini-cli/pull/23163) +- chore(lint): ignore .gemini directory and recursive node_modules by + @mattKorwel in + [#23211](https://github.com/google-gemini/gemini-cli/pull/23211) +- feat(cli): conditionally exclude ask_user tool in ACP mode by @nmcnamara-eng + in [#23045](https://github.com/google-gemini/gemini-cli/pull/23045) +- feat(core): introduce AgentSession and rename stream events to agent events by + @mbleigh in [#23159](https://github.com/google-gemini/gemini-cli/pull/23159) +- feat(worktree): add Git worktree support for isolated parallel sessions by + @jerop in [#22973](https://github.com/google-gemini/gemini-cli/pull/22973) +- Add support for linking in the extension registry by @kevinjwang1 in + [#23153](https://github.com/google-gemini/gemini-cli/pull/23153) +- feat(extensions): add --skip-settings flag to install command by @Ratish1 in + [#17212](https://github.com/google-gemini/gemini-cli/pull/17212) +- feat(telemetry): track if session is running in a Git worktree by @jerop in + [#23265](https://github.com/google-gemini/gemini-cli/pull/23265) +- refactor(core): use absolute paths in GEMINI.md context markers by @SandyTao520 in - [#22234](https://github.com/google-gemini/gemini-cli/pull/22234) -- feat(prompts): implement Topic-Action-Summary model for verbosity reduction by - @Abhijit-2592 in - [#21503](https://github.com/google-gemini/gemini-cli/pull/21503) -- fix(core): fix manual deletion of subagent histories by @abhipatel12 in - [#22407](https://github.com/google-gemini/gemini-cli/pull/22407) -- Add registry var by @kevinjwang1 in - [#22224](https://github.com/google-gemini/gemini-cli/pull/22224) -- Add ModelDefinitions to ModelConfigService by @kevinjwang1 in - [#22302](https://github.com/google-gemini/gemini-cli/pull/22302) -- fix(cli): improve command conflict handling for skills by @NTaylorMullen in - [#21942](https://github.com/google-gemini/gemini-cli/pull/21942) -- fix(core): merge user settings with extension-provided MCP servers by + [#23135](https://github.com/google-gemini/gemini-cli/pull/23135) +- fix(core): add sanitization to sub agent thoughts and centralize utilities by + @devr0306 in [#22828](https://github.com/google-gemini/gemini-cli/pull/22828) +- feat(core): refine User-Agent for VS Code traffic (unified format) by + @sehoon38 in [#23256](https://github.com/google-gemini/gemini-cli/pull/23256) +- Fix schema for ModelChains by @kevinjwang1 in + [#23284](https://github.com/google-gemini/gemini-cli/pull/23284) +- test(cli): refactor tests for async render utilities by @scidomino in + [#23252](https://github.com/google-gemini/gemini-cli/pull/23252) +- feat(core): add security prompt for browser agent by @cynthialong0-0 in + [#23241](https://github.com/google-gemini/gemini-cli/pull/23241) +- refactor(ide): replace dynamic undici import with static fetch import by + @cocosheng-g in + [#23268](https://github.com/google-gemini/gemini-cli/pull/23268) +- test(cli): address unresolved feedback from PR #23252 by @scidomino in + [#23303](https://github.com/google-gemini/gemini-cli/pull/23303) +- feat(browser): add sensitive action controls and read-only noise reduction by + @cynthialong0-0 in + [#22867](https://github.com/google-gemini/gemini-cli/pull/22867) +- Disabling failing test while investigating by @alisa-alisa in + [#23311](https://github.com/google-gemini/gemini-cli/pull/23311) +- fix broken extension link in hooks guide by @Indrapal-70 in + [#21728](https://github.com/google-gemini/gemini-cli/pull/21728) +- fix(core): fix agent description indentation by @abhipatel12 in + [#23315](https://github.com/google-gemini/gemini-cli/pull/23315) +- Wrap the text under TOML rule for easier readability in policy-engine.md… by + @CogitationOps in + [#23076](https://github.com/google-gemini/gemini-cli/pull/23076) +- fix(extensions): revert broken extension removal behavior by @ehedlund in + [#23317](https://github.com/google-gemini/gemini-cli/pull/23317) +- feat(core): set up onboarding telemetry by @yunaseoul in + [#23118](https://github.com/google-gemini/gemini-cli/pull/23118) +- Retry evals on API error. by @gundermanc in + [#23322](https://github.com/google-gemini/gemini-cli/pull/23322) +- fix(evals): remove tool restrictions and add compile-time guards by + @SandyTao520 in + [#23312](https://github.com/google-gemini/gemini-cli/pull/23312) +- fix(hooks): support 'ask' decision for BeforeTool hooks by @gundermanc in + [#21146](https://github.com/google-gemini/gemini-cli/pull/21146) +- feat(browser): add warning message for session mode 'existing' by + @cynthialong0-0 in + [#23288](https://github.com/google-gemini/gemini-cli/pull/23288) +- chore(lint): enforce zero warnings and cleanup syntax restrictions by + @alisa-alisa in + [#22902](https://github.com/google-gemini/gemini-cli/pull/22902) +- fix(cli): add Esc instruction to HooksDialog footer by @abhipatel12 in + [#23258](https://github.com/google-gemini/gemini-cli/pull/23258) +- Disallow and suppress misused spread operator. by @gundermanc in + [#23294](https://github.com/google-gemini/gemini-cli/pull/23294) +- fix(core): refine CliHelpAgent description for better delegation by @abhipatel12 in - [#22484](https://github.com/google-gemini/gemini-cli/pull/22484) -- fix(core): skip discovery for incomplete MCP configs and resolve merge race - condition by @abhipatel12 in - [#22494](https://github.com/google-gemini/gemini-cli/pull/22494) -- fix(automation): harden stale PR closer permissions and maintainer detection - by @bdmorgan in - [#22558](https://github.com/google-gemini/gemini-cli/pull/22558) -- fix(automation): evaluate staleness before checking protected labels by - @bdmorgan in [#22561](https://github.com/google-gemini/gemini-cli/pull/22561) -- feat(agent): replace the runtime npx for browser agent chrome devtool mcp with - pre-built bundle by @cynthialong0-0 in - [#22213](https://github.com/google-gemini/gemini-cli/pull/22213) -- perf: optimize TrackerService dependency checks by @anj-s in - [#22384](https://github.com/google-gemini/gemini-cli/pull/22384) -- docs(policy): remove trailing space from commandPrefix examples by @kawasin73 - in [#22264](https://github.com/google-gemini/gemini-cli/pull/22264) -- fix(a2a-server): resolve unsafe assignment lint errors by @ehedlund in - [#22661](https://github.com/google-gemini/gemini-cli/pull/22661) -- fix: Adjust ToolGroupMessage filtering to hide Confirming and show Canceled - tool calls. by @sripasg in - [#22230](https://github.com/google-gemini/gemini-cli/pull/22230) -- Disallow Object.create() and reflect. by @gundermanc in - [#22408](https://github.com/google-gemini/gemini-cli/pull/22408) -- Guard pro model usage by @sehoon38 in - [#22665](https://github.com/google-gemini/gemini-cli/pull/22665) -- refactor(core): Creates AgentSession abstraction for consolidated agent - interface. by @mbleigh in - [#22270](https://github.com/google-gemini/gemini-cli/pull/22270) -- docs(changelog): remove internal commands from release notes by + [#23310](https://github.com/google-gemini/gemini-cli/pull/23310) +- fix(core): enable global session and persistent approval for web_fetch by + @NTaylorMullen in + [#23295](https://github.com/google-gemini/gemini-cli/pull/23295) +- fix(plan): add state transition override to prevent plan mode freeze by + @Adib234 in [#23020](https://github.com/google-gemini/gemini-cli/pull/23020) +- fix(cli): record skill activation tool calls in chat history by @NTaylorMullen + in [#23203](https://github.com/google-gemini/gemini-cli/pull/23203) +- fix(core): ensure subagent tool updates apply configuration overrides + immediately by @abhipatel12 in + [#23161](https://github.com/google-gemini/gemini-cli/pull/23161) +- fix(cli): resolve flicker at boundaries of list in BaseSelectionList by @jackwotherspoon in - [#22529](https://github.com/google-gemini/gemini-cli/pull/22529) -- feat: enable subagents by @abhipatel12 in - [#22386](https://github.com/google-gemini/gemini-cli/pull/22386) -- feat(extensions): implement cryptographic integrity verification for extension - updates by @ehedlund in - [#21772](https://github.com/google-gemini/gemini-cli/pull/21772) -- feat(tracker): polish UI sorting and formatting by @anj-s in - [#22437](https://github.com/google-gemini/gemini-cli/pull/22437) -- Changelog for v0.34.0-preview.2 by @gemini-cli-robot in - [#22220](https://github.com/google-gemini/gemini-cli/pull/22220) -- fix(core): fix three JIT context bugs in read_file, read_many_files, and - memoryDiscovery by @SandyTao520 in - [#22679](https://github.com/google-gemini/gemini-cli/pull/22679) -- refactor(core): introduce InjectionService with source-aware injection and - backend-native background completions by @adamfweidman in - [#22544](https://github.com/google-gemini/gemini-cli/pull/22544) -- Linux sandbox bubblewrap by @DavidAPierce in - [#22680](https://github.com/google-gemini/gemini-cli/pull/22680) -- feat(core): increase thought signature retry resilience by @bdmorgan in - [#22202](https://github.com/google-gemini/gemini-cli/pull/22202) -- feat(core): implement Stage 2 security and consistency improvements for - web_fetch by @aishaneeshah in - [#22217](https://github.com/google-gemini/gemini-cli/pull/22217) -- refactor(core): replace positional execute params with ExecuteOptions bag by + [#23298](https://github.com/google-gemini/gemini-cli/pull/23298) +- test(cli): force generic terminal in tests to fix snapshot failures by + @abhipatel12 in + [#23499](https://github.com/google-gemini/gemini-cli/pull/23499) +- Evals: PR Guidance adding workflow by @alisa-alisa in + [#23164](https://github.com/google-gemini/gemini-cli/pull/23164) +- feat(core): refactor SandboxManager to a stateless architecture and introduce + explicit Deny interface by @ehedlund in + [#23141](https://github.com/google-gemini/gemini-cli/pull/23141) +- feat(core): add event-translator and update agent types by @adamfweidman in + [#22985](https://github.com/google-gemini/gemini-cli/pull/22985) +- perf(cli): parallelize and background startup cleanup tasks by @sehoon38 in + [#23545](https://github.com/google-gemini/gemini-cli/pull/23545) +- fix: "allow always" for commands with paths by @scidomino in + [#23558](https://github.com/google-gemini/gemini-cli/pull/23558) +- fix(cli): prevent terminal escape sequences from leaking on exit by + @mattKorwel in + [#22682](https://github.com/google-gemini/gemini-cli/pull/22682) +- feat(cli): implement full "GEMINI CLI" logo for logged-out state by + @keithguerin in + [#22412](https://github.com/google-gemini/gemini-cli/pull/22412) +- fix(plan): reserve minimum height for selection list in AskUserDialog by + @ruomengz in [#23280](https://github.com/google-gemini/gemini-cli/pull/23280) +- fix(core): harden AgentSession replay semantics by @adamfweidman in + [#23548](https://github.com/google-gemini/gemini-cli/pull/23548) +- test(core): migrate hook tests to scheduler by @abhipatel12 in + [#23496](https://github.com/google-gemini/gemini-cli/pull/23496) +- chore(config): disable agents by default by @abhipatel12 in + [#23546](https://github.com/google-gemini/gemini-cli/pull/23546) +- fix(ui): make tool confirmations take up entire terminal height by @devr0306 + in [#22366](https://github.com/google-gemini/gemini-cli/pull/22366) +- fix(core): prevent redundant remote agent loading on model switch by @adamfweidman in - [#22674](https://github.com/google-gemini/gemini-cli/pull/22674) -- feat(config): enable JIT context loading by default by @SandyTao520 in - [#22736](https://github.com/google-gemini/gemini-cli/pull/22736) -- fix(config): ensure discoveryMaxDirs is passed to global config during - initialization by @kevin-ramdass in - [#22744](https://github.com/google-gemini/gemini-cli/pull/22744) -- fix(plan): allowlist get_internal_docs in Plan Mode by @Adib234 in - [#22668](https://github.com/google-gemini/gemini-cli/pull/22668) -- Changelog for v0.34.0-preview.3 by @gemini-cli-robot in - [#22393](https://github.com/google-gemini/gemini-cli/pull/22393) -- feat(core): add foundation for subagent tool isolation by @akh64bit in - [#22708](https://github.com/google-gemini/gemini-cli/pull/22708) -- fix(core): handle surrogate pairs in truncateString by @sehoon38 in - [#22754](https://github.com/google-gemini/gemini-cli/pull/22754) -- fix(cli): override j/k navigation in settings dialog to fix search input - conflict by @sehoon38 in - [#22800](https://github.com/google-gemini/gemini-cli/pull/22800) -- feat(plan): add 'All the above' option to multi-select AskUser questions by - @Adib234 in [#22365](https://github.com/google-gemini/gemini-cli/pull/22365) -- docs: distribute package-specific GEMINI.md context to each package by + [#23576](https://github.com/google-gemini/gemini-cli/pull/23576) +- refactor(core): update production type imports from coreToolScheduler by + @abhipatel12 in + [#23498](https://github.com/google-gemini/gemini-cli/pull/23498) +- feat(cli): always prefix extension skills with colon separator by + @NTaylorMullen in + [#23566](https://github.com/google-gemini/gemini-cli/pull/23566) +- fix(core): properly support allowRedirect in policy engine by @scidomino in + [#23579](https://github.com/google-gemini/gemini-cli/pull/23579) +- fix(cli): prevent subcommand shadowing and skip auth for commands by + @mattKorwel in + [#23177](https://github.com/google-gemini/gemini-cli/pull/23177) +- fix(test): move flaky tests to non-blocking suite by @mattKorwel in + [#23259](https://github.com/google-gemini/gemini-cli/pull/23259) +- Changelog for v0.35.0-preview.3 by @gemini-cli-robot in + [#23574](https://github.com/google-gemini/gemini-cli/pull/23574) +- feat(skills): add behavioral-evals skill with fixing and promoting guides by + @abhipatel12 in + [#23349](https://github.com/google-gemini/gemini-cli/pull/23349) +- refactor(core): delete obsolete coreToolScheduler by @abhipatel12 in + [#23502](https://github.com/google-gemini/gemini-cli/pull/23502) +- Changelog for v0.35.0-preview.4 by @gemini-cli-robot in + [#23581](https://github.com/google-gemini/gemini-cli/pull/23581) +- feat(core): add LegacyAgentSession by @adamfweidman in + [#22986](https://github.com/google-gemini/gemini-cli/pull/22986) +- feat(test-utils): add TestMcpServerBuilder and support in TestRig by + @abhipatel12 in + [#23491](https://github.com/google-gemini/gemini-cli/pull/23491) +- fix(core)!: Force policy config to specify toolName by @kschaab in + [#23330](https://github.com/google-gemini/gemini-cli/pull/23330) +- eval(save_memory): add multi-turn interactive evals for memoryManager by @SandyTao520 in - [#22734](https://github.com/google-gemini/gemini-cli/pull/22734) -- fix(cli): clean up stale pasted placeholder metadata after word/line deletions - by @Jomak-x in - [#20375](https://github.com/google-gemini/gemini-cli/pull/20375) -- refactor(core): align JIT memory placement with tiered context model by - @SandyTao520 in - [#22766](https://github.com/google-gemini/gemini-cli/pull/22766) -- Linux sandbox seccomp by @DavidAPierce in - [#22815](https://github.com/google-gemini/gemini-cli/pull/22815) + [#23572](https://github.com/google-gemini/gemini-cli/pull/23572) +- fix(telemetry): patch memory leak and enforce logPrompts privacy by + @spencer426 in + [#23281](https://github.com/google-gemini/gemini-cli/pull/23281) +- perf(cli): background IDE client to speed up initialization by @sehoon38 in + [#23603](https://github.com/google-gemini/gemini-cli/pull/23603) +- fix(cli): prevent Ctrl+D exit when input buffer is not empty by @wtanaka in + [#23306](https://github.com/google-gemini/gemini-cli/pull/23306) +- fix: ACP: separate conversational text from execute tool command title by + @sripasg in [#23179](https://github.com/google-gemini/gemini-cli/pull/23179) +- feat(evals): add behavioral evaluations for subagent routing by @Samee24 in + [#23272](https://github.com/google-gemini/gemini-cli/pull/23272) +- refactor(cli,core): foundational layout, identity management, and type safety + by @jwhelangoog in + [#23286](https://github.com/google-gemini/gemini-cli/pull/23286) +- fix(core): accurately reflect subagent tool failure in UI by @abhipatel12 in + [#23187](https://github.com/google-gemini/gemini-cli/pull/23187) +- Changelog for v0.35.0-preview.5 by @gemini-cli-robot in + [#23606](https://github.com/google-gemini/gemini-cli/pull/23606) +- feat(ui): implement refreshed UX for Composer layout by @jwhelangoog in + [#21212](https://github.com/google-gemini/gemini-cli/pull/21212) +- fix: API key input dialog user interaction when selected Gemini API Key by + @kartikangiras in + [#21057](https://github.com/google-gemini/gemini-cli/pull/21057) +- docs: update `/mcp refresh` to `/mcp reload` by @adamfweidman in + [#23631](https://github.com/google-gemini/gemini-cli/pull/23631) +- Implementation of sandbox "Write-Protected" Governance Files by @DavidAPierce + in [#23139](https://github.com/google-gemini/gemini-cli/pull/23139) +- feat(sandbox): dynamic macOS sandbox expansion and worktree support by @galz10 + in [#23301](https://github.com/google-gemini/gemini-cli/pull/23301) +- fix(acp): Pass the cwd to `AcpFileSystemService` to avoid looping failures in + asking for perms to write plan md file by @sripasg in + [#23612](https://github.com/google-gemini/gemini-cli/pull/23612) +- fix(plan): sandbox path resolution in Plan Mode to prevent hallucinations by + @Adib234 in [#22737](https://github.com/google-gemini/gemini-cli/pull/22737) +- feat(ui): allow immediate user input during startup by @sehoon38 in + [#23661](https://github.com/google-gemini/gemini-cli/pull/23661) +- refactor(sandbox): reorganize Windows sandbox files by @galz10 in + [#23645](https://github.com/google-gemini/gemini-cli/pull/23645) +- fix(core): improve remote agent streaming UI and UX by @adamfweidman in + [#23633](https://github.com/google-gemini/gemini-cli/pull/23633) +- perf(cli): optimize --version startup time by @sehoon38 in + [#23671](https://github.com/google-gemini/gemini-cli/pull/23671) +- refactor(core): stop gemini CLI from producing unsafe casts by @gundermanc in + [#23611](https://github.com/google-gemini/gemini-cli/pull/23611) +- use enableAutoUpdate in test rig by @scidomino in + [#23681](https://github.com/google-gemini/gemini-cli/pull/23681) +- feat(core): change user-facing auth type from oauth2 to oauth by @adamfweidman + in [#23639](https://github.com/google-gemini/gemini-cli/pull/23639) +- chore(deps): fix npm audit vulnerabilities by @scidomino in + [#23679](https://github.com/google-gemini/gemini-cli/pull/23679) +- test(evals): fix overlapping act() deadlock in app-test-helper by @Adib234 in + [#23666](https://github.com/google-gemini/gemini-cli/pull/23666) **Full Changelog**: -https://github.com/google-gemini/gemini-cli/compare/v0.34.0-preview.4...v0.35.0-preview.1 +https://github.com/google-gemini/gemini-cli/compare/v0.35.0-preview.5...v0.36.0-preview.4 diff --git a/docs/cli/acp-mode.md b/docs/cli/acp-mode.md new file mode 100644 index 0000000000..16ff3b9a15 --- /dev/null +++ b/docs/cli/acp-mode.md @@ -0,0 +1,126 @@ +# ACP Mode + +ACP (Agent Client Protocol) mode is a special operational mode of Gemini CLI +designed for programmatic control, primarily for IDE and other developer tool +integrations. It uses a JSON-RPC protocol over stdio to communicate between +Gemini CLI agent and a client. + +To start Gemini CLI in ACP mode, use the `--acp` flag: + +```bash +gemini --acp +``` + +## Agent Client Protocol (ACP) + +ACP is an open protocol that standardizes how AI coding agents communicate with +code editors and IDEs. It addresses the challenge of fragmented distribution, +where agents traditionally needed custom integrations for each client. With ACP, +developers can implement their agent once, and it becomes compatible with any +ACP-compliant editor. + +For a comprehensive introduction to ACP, including its architecture and +benefits, refer to the official +[ACP Introduction](https://agentclientprotocol.com/get-started/introduction) +documentation. + +### Existing integrations using ACP + +The ACP Agent Registry simplifies the distribution and management of +ACP-compatible agents across various IDEs. Gemini CLI is an ACP-compatible agent +and can be found in this registry. + +For more general information about the registry, and how to use it with specific +IDEs like JetBrains and Zed, refer to the +[IDE Integration](../ide-integration/index.md) documentation. + +You can also find more information on the official +[ACP Agent Registry](https://agentclientprotocol.com/get-started/registry) page. + +## Architecture and protocol basics + +ACP mode establishes a client-server relationship between your tool (the client) +and Gemini CLI (the server). + +- **Communication:** The entire communication happens over standard input/output + (stdio) using the JSON-RPC 2.0 protocol. +- **Client's role:** The client is responsible for sending requests (e.g., + prompts) and handling responses and notifications from Gemini CLI. +- **Gemini CLI's role:** In ACP mode, Gemini CLI listens for incoming JSON-RPC + requests, processes them, and sends back responses. + +The core of the ACP implementation can be found in +`packages/cli/src/acp/acpClient.ts`. + +### Extending with MCP + +ACP can be used with the Model Context Protocol (MCP). This lets an ACP client +(like an IDE) expose its own functionality as "tools" that the Gemini model can +use. + +1. The client implements an **MCP server** that advertises its tools. +2. During the ACP `initialize` handshake, the client provides the connection + details for its MCP server. +3. Gemini CLI connects to the MCP server, discovers the available tools, and + makes them available to the AI model. +4. When the model decides to use one of these tools, Gemini CLI sends a tool + call request to the MCP server. + +This mechanism lets for a powerful, two-way integration where the agent can +leverage the IDE's capabilities to perform tasks. The MCP client logic is in +`packages/core/src/tools/mcp-client.ts`. + +## Capabilities and supported methods + +The ACP protocol exposes a number of methods for ACP clients (e.g. IDEs) to +control Gemini CLI. + +### Core methods + +- `initialize`: Establishes the initial connection and lets the client to + register its MCP server. +- `authenticate`: Authenticates the user. +- `newSession`: Starts a new chat session. +- `loadSession`: Loads a previous session. +- `prompt`: Sends a prompt to the agent. +- `cancel`: Cancels an ongoing prompt. + +### Session control + +- `setSessionMode`: Allows changing the approval level for tool calls (e.g., to + `auto-approve`). +- `unstable_setSessionModel`: Changes the model for the current session. + +### File system proxy + +ACP includes a proxied file system service. This means that when the agent needs +to read or write files, it does so through the ACP client. This is a security +feature that ensures the agent only has access to the files that the client (and +by extension, the user) has explicitly allowed. + +## Debugging and telemetry + +You can get insights into the ACP communication and the agent's behavior through +debugging logs and telemetry. + +### Debugging logs + +To enable general debugging logs, start Gemini CLI with the `--debug` flag: + +```bash +gemini --acp --debug +``` + +### Telemetry + +For more detailed telemetry, you can use the following environment variables to +capture telemetry data to a file: + +- `GEMINI_TELEMETRY_ENABLED=true` +- `GEMINI_TELEMETRY_TARGET=local` +- `GEMINI_TELEMETRY_OUTFILE=/path/to/your/log.json` + +This will write a JSON log file containing detailed information about all the +events happening within the agent, including ACP requests and responses. The +integration test `integration-tests/acp-telemetry.test.ts` provides a working +example of how to set this up. diff --git a/docs/cli/checkpointing.md b/docs/cli/checkpointing.md index 0be8bd9508..3a4a690cea 100644 --- a/docs/cli/checkpointing.md +++ b/docs/cli/checkpointing.md @@ -39,7 +39,9 @@ file in your project's temporary directory, typically located at The Checkpointing feature is disabled by default. To enable it, you need to edit your `settings.json` file. -> **Note:** The `--checkpointing` command-line flag was removed in version + +> [!CAUTION] +> The `--checkpointing` command-line flag was removed in version > 0.11.0. Checkpointing can now only be enabled through the `settings.json` > configuration file. diff --git a/docs/cli/cli-reference.md b/docs/cli/cli-reference.md index 167801ca05..bc8f8b44ce 100644 --- a/docs/cli/cli-reference.md +++ b/docs/cli/cli-reference.md @@ -50,6 +50,7 @@ These commands are available within the interactive REPL. | `--model` | `-m` | string | `auto` | Model to use. See [Model Selection](#model-selection) for available values. | | `--prompt` | `-p` | string | - | Prompt text. Appended to stdin input if provided. Forces non-interactive mode. | | `--prompt-interactive` | `-i` | string | - | Execute prompt and continue in interactive mode | +| `--worktree` | `-w` | string | - | Start Gemini in a new git worktree. If no name is provided, one is generated automatically. Requires `experimental.worktrees: true` in settings. | | `--sandbox` | `-s` | boolean | `false` | Run in a sandboxed environment for safer execution | | `--approval-mode` | - | string | `default` | Approval mode for tool execution. Choices: `default`, `auto_edit`, `yolo` | | `--yolo` | `-y` | boolean | `false` | **Deprecated.** Auto-approve all actions. Use `--approval-mode=yolo` instead. | diff --git a/docs/cli/custom-commands.md b/docs/cli/custom-commands.md index dd2698290e..6fcce4e825 100644 --- a/docs/cli/custom-commands.md +++ b/docs/cli/custom-commands.md @@ -30,7 +30,9 @@ separator (`/` or `\`) being converted to a colon (`:`). - A file at `/.gemini/commands/git/commit.toml` becomes the namespaced command `/git:commit`. -> [!TIP] After creating or modifying `.toml` command files, run + +> [!TIP] +> After creating or modifying `.toml` command files, run > `/commands reload` to pick up your changes without restarting the CLI. ## TOML file format (v1) @@ -177,10 +179,10 @@ ensure that only intended commands can be run. automatically shell-escaped (see [Context-Aware Injection](#1-context-aware-injection-with-args) above). 3. **Robust parsing:** The parser correctly handles complex shell commands that - include nested braces, such as JSON payloads. **Note:** The content inside - `!{...}` must have balanced braces (`{` and `}`). If you need to execute a - command containing unbalanced braces, consider wrapping it in an external - script file and calling the script within the `!{...}` block. + include nested braces, such as JSON payloads. The content inside `!{...}` + must have balanced braces (`{` and `}`). If you need to execute a command + containing unbalanced braces, consider wrapping it in an external script + file and calling the script within the `!{...}` block. 4. **Security check and confirmation:** The CLI performs a security check on the final, resolved command (after arguments are escaped and substituted). A dialog will appear showing the exact command(s) to be executed. diff --git a/docs/cli/enterprise.md b/docs/cli/enterprise.md index 39c0f7c5c1..5e9cede33a 100644 --- a/docs/cli/enterprise.md +++ b/docs/cli/enterprise.md @@ -5,9 +5,11 @@ and managing Gemini CLI in an enterprise environment. By leveraging system-level settings, administrators can enforce security policies, manage tool access, and ensure a consistent experience for all users. -> **A note on security:** The patterns described in this document are intended -> to help administrators create a more controlled and secure environment for -> using Gemini CLI. However, they should not be considered a foolproof security + +> [!WARNING] +> The patterns described in this document are intended to help +> administrators create a more controlled and secure environment for using +> Gemini CLI. However, they should not be considered a foolproof security > boundary. A determined user with sufficient privileges on their local machine > may still be able to circumvent these configurations. These measures are > designed to prevent accidental misuse and enforce corporate policy in a @@ -280,10 +282,12 @@ environment to a blocklist. } ``` -**Security note:** Blocklisting with `excludeTools` is less secure than -allowlisting with `coreTools`, as it relies on blocking known-bad commands, and -clever users may find ways to bypass simple string-based blocks. **Allowlisting -is the recommended approach.** + +> [!WARNING] +> Blocklisting with `excludeTools` is less secure than +> allowlisting with `coreTools`, as it relies on blocking known-bad commands, +> and clever users may find ways to bypass simple string-based blocks. +> **Allowlisting is the recommended approach.** ### Disabling YOLO mode @@ -494,8 +498,10 @@ other events. For more information, see the } ``` -**Note:** Ensure that `logPrompts` is set to `false` in an enterprise setting to -avoid collecting potentially sensitive information from user prompts. + +> [!NOTE] +> Ensure that `logPrompts` is set to `false` in an enterprise setting to +> avoid collecting potentially sensitive information from user prompts. ## Authentication diff --git a/docs/cli/git-worktrees.md b/docs/cli/git-worktrees.md new file mode 100644 index 0000000000..5020b3fa9a --- /dev/null +++ b/docs/cli/git-worktrees.md @@ -0,0 +1,107 @@ +# Git Worktrees (experimental) + +When working on multiple tasks at once, you can use Git worktrees to give each +Gemini session its own copy of the codebase. Git worktrees create separate +working directories that each have their own files and branch while sharing the +same repository history. This prevents changes in one session from colliding +with another. + +Learn more about [session management](./session-management.md). + + +> [!NOTE] +> This is an experimental feature currently under active development. Your +> feedback is invaluable as we refine this feature. If you have ideas, +> suggestions, or encounter issues: +> +> - [Open an issue](https://github.com/google-gemini/gemini-cli/issues/new?template=bug_report.yml) on GitHub. +> - Use the **/bug** command within Gemini CLI to file an issue. + +Learn more in the official Git worktree +[documentation](https://git-scm.com/docs/git-worktree). + +## How to enable Git worktrees + +Git worktrees are an experimental feature. You must enable them in your settings +using the `/settings` command or by manually editing your `settings.json` file. + +1. Use the `/settings` command. +2. Search for and set **Enable Git Worktrees** to `true`. + +Alternatively, add the following to your `settings.json`: + +```json +{ + "experimental": { + "worktrees": true + } +} +``` + +## How to use Git worktrees + +Use the `--worktree` (`-w`) flag to create an isolated worktree and start Gemini +CLI in it. + +- **Start with a specific name:** The value you pass becomes both the directory + name (within `.gemini/worktrees/`) and the branch name. + + ```bash + gemini --worktree feature-search + ``` + +- **Start with a random name:** If you omit the name, Gemini generates a random + one automatically (for example, `worktree-a1b2c3d4`). + + ```bash + gemini --worktree + ``` + + +> [!NOTE] +> Remember to initialize your development environment in each new +> worktree according to your project's setup. Depending on your stack, this +> might include running dependency installation (`npm install`, `yarn`), setting +> up virtual environments, or following your project's standard build process. + +## How to exit a Git worktree session + +When you exit a worktree session (using `/quit` or `Ctrl+C`), Gemini leaves the +worktree intact so your work is not lost. This includes your uncommitted changes +(modified files, staged changes, or untracked files) and any new commits you +have made. + +Gemini prioritizes a fast and safe exit: it **does not automatically delete** +your worktree or branch. You are responsible for cleaning up your worktrees +manually once you are finished with them. + +When you exit, Gemini displays instructions on how to resume your work or how to +manually remove the worktree if you no longer need it. + +## Resuming work in a Git worktree + +To resume a session in a worktree, navigate to the worktree directory and start +Gemini CLI with the `--resume` flag and the session ID: + +```bash +cd .gemini/worktrees/feature-search +gemini --resume +``` + +## Managing Git worktrees manually + +For more control over worktree location and branch configuration, or to clean up +a preserved worktree, you can use Git directly: + +- **Clean up a preserved Git worktree:** + ```bash + git worktree remove .gemini/worktrees/feature-search --force + git branch -D worktree-feature-search + ``` +- **Create a Git worktree manually:** + ```bash + git worktree add ../project-feature-search -b feature-search + cd ../project-feature-search && gemini + ``` + +[Open an issue]: https://github.com/google-gemini/gemini-cli/issues diff --git a/docs/cli/model-steering.md b/docs/cli/model-steering.md index 12b581c530..26ff4e1209 100644 --- a/docs/cli/model-steering.md +++ b/docs/cli/model-steering.md @@ -4,9 +4,10 @@ Model steering lets you provide real-time guidance and feedback to Gemini CLI while it is actively executing a task. This lets you correct course, add missing context, or skip unnecessary steps without having to stop and restart the agent. -> **Note:** This is a preview feature under active development. Preview features -> may only be available in the **Preview** channel or may need to be enabled -> under `/settings`. + +> [!NOTE] +> This is an experimental feature currently under active development and +> may need to be enabled under `/settings`. Model steering is particularly useful during complex [Plan Mode](./plan-mode.md) workflows or long-running subagent executions where you want to ensure the agent diff --git a/docs/cli/model.md b/docs/cli/model.md index 3da5ea4cbc..b85f597e08 100644 --- a/docs/cli/model.md +++ b/docs/cli/model.md @@ -5,7 +5,9 @@ used by Gemini CLI, giving you more control over your results. Use **Pro** models for complex tasks and reasoning, **Flash** models for high speed results, or the (recommended) **Auto** setting to choose the best model for your tasks. -> **Note:** The `/model` command (and the `--model` flag) does not override the + +> [!NOTE] +> The `/model` command (and the `--model` flag) does not override the > model used by sub-agents. Consequently, even when using the `/model` flag you > may see other models used in your model usage reports. diff --git a/docs/cli/notifications.md b/docs/cli/notifications.md index 8326a1efb2..abe6743c56 100644 --- a/docs/cli/notifications.md +++ b/docs/cli/notifications.md @@ -4,9 +4,10 @@ Gemini CLI can send system notifications to alert you when a session completes or when it needs your attention, such as when it's waiting for you to approve a tool call. -> **Note:** This is a preview feature currently under active development. -> Preview features may be available on the **Preview** channel or may need to be -> enabled under `/settings`. + +> [!NOTE] +> This is an experimental feature currently under active development and +> may need to be enabled under `/settings`. Notifications are particularly useful when running long-running tasks or using [Plan Mode](./plan-mode.md), letting you switch to other windows while Gemini @@ -14,14 +15,14 @@ CLI works in the background. ## Requirements -Currently, system notifications are only supported on macOS. - ### Terminal support The CLI uses the OSC 9 terminal escape sequence to trigger system notifications. -This is supported by several modern terminal emulators. If your terminal does -not support OSC 9 notifications, Gemini CLI falls back to a system alert sound -to get your attention. +This is supported by several modern terminal emulators including iTerm2, +WezTerm, Ghostty, and Kitty. If your terminal does not support OSC 9 +notifications, Gemini CLI falls back to a terminal bell (BEL) to get your +attention. Most terminals respond to BEL with a taskbar flash or system alert +sound. ## Enable notifications diff --git a/docs/cli/plan-mode.md b/docs/cli/plan-mode.md index 9550e2a918..2163e4fcd1 100644 --- a/docs/cli/plan-mode.md +++ b/docs/cli/plan-mode.md @@ -35,19 +35,17 @@ To launch Gemini CLI in Plan Mode once: To start Plan Mode while using Gemini CLI: - **Keyboard shortcut:** Press `Shift+Tab` to cycle through approval modes - (`Default` -> `Auto-Edit` -> `Plan`). - - > **Note:** Plan Mode is automatically removed from the rotation when Gemini - > CLI is actively processing or showing confirmation dialogs. + (`Default` -> `Auto-Edit` -> `Plan`). Plan Mode is automatically removed from + the rotation when Gemini CLI is actively processing or showing confirmation + dialogs. - **Command:** Type `/plan` in the input box. - **Natural Language:** Ask Gemini CLI to "start a plan for...". Gemini CLI calls the [`enter_plan_mode`](../tools/planning.md#1-enter_plan_mode-enterplanmode) tool - to switch modes. - > **Note:** This tool is not available when Gemini CLI is in - > [YOLO mode](../reference/configuration.md#command-line-arguments). + to switch modes. This tool is not available when Gemini CLI is in + [YOLO mode](../reference/configuration.md#command-line-arguments). ## How to use Plan Mode @@ -202,6 +200,7 @@ your specific environment. ```toml [[rule]] +toolName = "*" mcpName = "*" toolAnnotations = { readOnlyHint = true } decision = "allow" @@ -407,7 +406,9 @@ To build a custom planning workflow, you can use: [custom plan directories](#custom-plan-directory-and-policies) and [custom policies](#custom-policies). -> **Note:** Use [Conductor] as a reference when building your own custom + +> [!TIP] +> Use [Conductor] as a reference when building your own custom > planning workflow. By using Plan Mode as its execution environment, your custom methodology can diff --git a/docs/cli/sandbox.md b/docs/cli/sandbox.md index ec7e88f624..e27587abf0 100644 --- a/docs/cli/sandbox.md +++ b/docs/cli/sandbox.md @@ -50,7 +50,25 @@ Cross-platform sandboxing with complete process isolation. **Note**: Requires building the sandbox image locally or using a published image from your organization's registry. -### 3. gVisor / runsc (Linux only) +### 3. Windows Native Sandbox (Windows only) + +... **Troubleshooting and Side Effects:** + +The Windows Native sandbox uses the `icacls` command to set a "Low Mandatory +Level" on files and directories it needs to write to. + +- **Persistence**: These integrity level changes are persistent on the + filesystem. Even after the sandbox session ends, files created or modified by + the sandbox will retain their "Low" integrity level. +- **Manual Reset**: If you need to reset the integrity level of a file or + directory, you can use: + ```powershell + icacls "C:\path\to\dir" /setintegritylevel Medium + ``` +- **System Folders**: The sandbox manager automatically skips setting integrity + levels on system folders (like `C:\Windows`) for safety. + +### 4. gVisor / runsc (Linux only) Strongest isolation available: runs containers inside a user-space kernel via [gVisor](https://github.com/google/gvisor). gVisor intercepts all container @@ -74,7 +92,7 @@ To set up runsc: 2. Configure the Docker daemon to use the runsc runtime. 3. Verify the installation. -### 4. LXC/LXD (Linux only, experimental) +### 5. LXC/LXD (Linux only, experimental) Full-system container sandboxing using LXC/LXD. Unlike Docker/Podman, LXC containers run a complete Linux system with `systemd`, `snapd`, and other system @@ -253,9 +271,11 @@ $env:SANDBOX_SET_UID_GID="false" # Disable UID/GID mapping DEBUG=1 gemini -s -p "debug command" ``` -**Note:** If you have `DEBUG=true` in a project's `.env` file, it won't affect -gemini-cli due to automatic exclusion. Use `.gemini/.env` files for gemini-cli -specific debug settings. + +> [!NOTE] +> If you have `DEBUG=true` in a project's `.env` file, it won't affect +> gemini-cli due to automatic exclusion. Use `.gemini/.env` files for +> gemini-cli specific debug settings. ### Inspect sandbox diff --git a/docs/cli/session-management.md b/docs/cli/session-management.md index 8e60f61630..74bc4a4337 100644 --- a/docs/cli/session-management.md +++ b/docs/cli/session-management.md @@ -96,6 +96,12 @@ Compatibility aliases: - `/chat ...` works for the same commands. - `/resume checkpoints ...` also remains supported during migration. +## Parallel sessions with Git worktrees + +When working on multiple tasks at once, you can use +[Git worktrees](./git-worktrees.md) to give each Gemini session its own copy of +the codebase. This prevents changes in one session from colliding with another. + ## Managing sessions You can list and delete sessions to keep your history organized and manage disk diff --git a/docs/cli/settings.md b/docs/cli/settings.md index 900038df8b..da1ebbda3d 100644 --- a/docs/cli/settings.md +++ b/docs/cli/settings.md @@ -11,7 +11,9 @@ locations: - **User settings**: `~/.gemini/settings.json` - **Workspace settings**: `your-project/.gemini/settings.json` -Note: Workspace settings override user settings. + +> [!IMPORTANT] +> Workspace settings override user settings. ## Settings reference @@ -27,8 +29,8 @@ they appear in the UI. | Vim Mode | `general.vimMode` | Enable Vim keybindings | `false` | | Default Approval Mode | `general.defaultApprovalMode` | The default approval mode for tool execution. 'default' prompts for approval, 'auto_edit' auto-approves edit tools, and 'plan' is read-only mode. YOLO mode (auto-approve all actions) can only be enabled via command line (--yolo or --approval-mode=yolo). | `"default"` | | Enable Auto Update | `general.enableAutoUpdate` | Enable automatic updates. | `true` | -| Enable Notifications | `general.enableNotifications` | Enable run-event notifications for action-required prompts and session completion. Currently macOS only. | `false` | -| Plan Directory | `general.plan.directory` | The directory where planning artifacts are stored. If not specified, defaults to the system temporary directory. | `undefined` | +| Enable Notifications | `general.enableNotifications` | Enable run-event notifications for action-required prompts and session completion. | `false` | +| Plan Directory | `general.plan.directory` | The directory where planning artifacts are stored. If not specified, defaults to the system temporary directory. A custom directory requires a policy to allow write access in Plan Mode. | `undefined` | | Plan Model Routing | `general.plan.modelRouting` | Automatically switch between Pro and Flash models based on Plan Mode status. Uses Pro for the planning phase and Flash for the implementation phase. | `true` | | Retry Fetch Errors | `general.retryFetchErrors` | Retry on "exception TypeError: fetch failed sending request" errors. | `true` | | Max Chat Model Attempts | `general.maxAttempts` | Maximum number of attempts for requests to the main chat model. Cannot exceed 10. | `10` | @@ -99,6 +101,13 @@ they appear in the UI. | Disable Loop Detection | `model.disableLoopDetection` | Disable automatic detection and prevention of infinite loops. | `false` | | Skip Next Speaker Check | `model.skipNextSpeakerCheck` | Skip the next speaker check. | `true` | +### Agents + +| UI Label | Setting | Description | Default | +| ------------------------- | ---------------------------------------- | --------------------------------------------------------------------------------------------- | ------- | +| Confirm Sensitive Actions | `agents.browser.confirmSensitiveActions` | Require manual confirmation for sensitive browser actions (e.g., fill_form, evaluate_script). | `false` | +| Block File Uploads | `agents.browser.blockFileUploads` | Hard-block file upload requests from the browser agent. | `false` | + ### Context | UI Label | Setting | Description | Default | @@ -115,6 +124,8 @@ they appear in the UI. | UI Label | Setting | Description | Default | | -------------------------------- | ------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Sandbox Allowed Paths | `tools.sandboxAllowedPaths` | List of additional paths that the sandbox is allowed to access. | `[]` | +| Sandbox Network Access | `tools.sandboxNetworkAccess` | Whether the sandbox is allowed to access the network. | `false` | | Enable Interactive Shell | `tools.shell.enableInteractiveShell` | Use node-pty for an interactive shell experience. Fallback to child_process still applies. | `true` | | Show Color | `tools.shell.showColor` | Show color in shell output. | `false` | | Use Ripgrep | `tools.useRipgrep` | Use ripgrep for file content search instead of the fallback implementation. Provides faster search performance. | `true` | @@ -147,11 +158,13 @@ they appear in the UI. | UI Label | Setting | Description | Default | | -------------------------- | ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | | Enable Tool Output Masking | `experimental.toolOutputMasking.enabled` | Enables tool output masking to save tokens. | `true` | +| Enable Git Worktrees | `experimental.worktrees` | Enable automated Git worktree management for parallel work. | `false` | | Use OSC 52 Paste | `experimental.useOSC52Paste` | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | | Use OSC 52 Copy | `experimental.useOSC52Copy` | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` | | Plan | `experimental.plan` | Enable Plan Mode. | `true` | | Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `true` | | Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` | +| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` | | Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` | ### Skills diff --git a/docs/cli/skills.md b/docs/cli/skills.md index d3e8d4e84f..73e5eb66eb 100644 --- a/docs/cli/skills.md +++ b/docs/cli/skills.md @@ -63,8 +63,10 @@ Use the `/skills` slash command to view and manage available expertise: - `/skills enable `: Re-enables a disabled skill. - `/skills reload`: Refreshes the list of discovered skills from all tiers. -_Note: `/skills disable` and `/skills enable` default to the `user` scope. Use -`--scope workspace` to manage workspace-specific settings._ + +> [!NOTE] +> `/skills disable` and `/skills enable` default to the `user` scope. Use +> `--scope workspace` to manage workspace-specific settings. ### From the Terminal diff --git a/docs/cli/system-prompt.md b/docs/cli/system-prompt.md index b1ff43e3fd..c249d55cec 100644 --- a/docs/cli/system-prompt.md +++ b/docs/cli/system-prompt.md @@ -14,7 +14,9 @@ core instructions will apply unless you include them yourself. This feature is intended for advanced users who need to enforce strict, project-specific behavior or create a customized persona. -> Tip: You can export the current default system prompt to a file first, review + +> [!TIP] +> You can export the current default system prompt to a file first, review > it, and then selectively modify or replace it (see > [“Export the default prompt”](#export-the-default-prompt-recommended)). diff --git a/docs/cli/telemetry.md b/docs/cli/telemetry.md index 211d877071..dd13d5eb82 100644 --- a/docs/cli/telemetry.md +++ b/docs/cli/telemetry.md @@ -125,9 +125,11 @@ You must complete several setup steps before enabling Google Cloud telemetry. } ``` - > **Note:** This setting requires **Direct export** (in-process exporters) - > and cannot be used when `useCollector` is `true`. If both are enabled, - > telemetry will be disabled. + +> [!NOTE] +> This setting requires **Direct export** (in-process exporters) +> and cannot be used when `useCollector` is `true`. If both are enabled, +> telemetry will be disabled. 3. Ensure your account or service account has these IAM roles: - Cloud Trace Agent @@ -304,6 +306,7 @@ Emitted at startup with the CLI configuration. - `extension_ids` (string) - `extensions_count` (int) - `auth_type` (string) +- `worktree_active` (boolean) - `github_workflow_name` (string, optional) - `github_repository_hash` (string, optional) - `github_event_name` (string, optional) @@ -901,6 +904,20 @@ Logs keychain availability checks. - `available` (boolean) +##### `gemini_cli.startup_stats` + +Logs detailed startup performance statistics. + +
+Attributes + +- `phases` (json array of startup phases) +- `os_platform` (string) +- `os_release` (string) +- `is_docker` (boolean) + +
+
### Metrics @@ -917,6 +934,20 @@ Gemini CLI exports several custom metrics. Incremented once per CLI startup. +##### Onboarding + +Tracks onboarding flow from authentication to the user + +- `gemini_cli.onboarding.start` (Counter, Int): Incremented when the + authentication flow begins. + +- `gemini_cli.onboarding.success` (Counter, Int): Incremented when the user +onboarding flow completes successfully. +
+Attributes (Success) + +- `user_tier` (string) + ##### Tools ##### `gemini_cli.tool.call.count` diff --git a/docs/cli/themes.md b/docs/cli/themes.md index adfe64d081..55acc75625 100644 --- a/docs/cli/themes.md +++ b/docs/cli/themes.md @@ -36,9 +36,11 @@ using the `/theme` command within Gemini CLI: preview or highlight as you select. 4. Confirm your selection to apply the theme. -**Note:** If a theme is defined in your `settings.json` file (either by name or -by a file path), you must remove the `"theme"` setting from the file before you -can change the theme using the `/theme` command. + +> [!NOTE] +> If a theme is defined in your `settings.json` file (either by name or +> by a file path), you must remove the `"theme"` setting from the file before +> you can change the theme using the `/theme` command. ### Theme persistence @@ -179,11 +181,13 @@ custom theme defined in `settings.json`. } ``` -**Security note:** For your safety, Gemini CLI will only load theme files that -are located within your home directory. If you attempt to load a theme from -outside your home directory, a warning will be displayed and the theme will not -be loaded. This is to prevent loading potentially malicious theme files from -untrusted sources. + +> [!WARNING] +> For your safety, Gemini CLI will only load theme files that +> are located within your home directory. If you attempt to load a theme from +> outside your home directory, a warning will be displayed and the theme will +> not be loaded. This is to prevent loading potentially malicious theme files +> from untrusted sources. ### Example custom theme diff --git a/docs/cli/tutorials/file-management.md b/docs/cli/tutorials/file-management.md index 0f4fa09575..37112d3bc7 100644 --- a/docs/cli/tutorials/file-management.md +++ b/docs/cli/tutorials/file-management.md @@ -7,9 +7,9 @@ create files, and control what Gemini CLI can see. ## Prerequisites - Gemini CLI installed and authenticated. -- A project directory to work with (e.g., a git repository). +- A project directory to work with (for example, a git repository). -## How to give the agent context (Reading files) +## Providing context by reading files Gemini CLI will generally try to read relevant files, sometimes prompting you for access (depending on your settings). To ensure that Gemini CLI uses a file, @@ -58,11 +58,13 @@ You know there's a `UserProfile` component, but you don't know where it lives. ``` Gemini uses the `glob` or `list_directory` tools to search your project -structure. It will return the specific path (e.g., +structure. It will return the specific path (for example, `src/components/UserProfile.tsx`), which you can then use with `@` in your next turn. -> **Tip:** You can also ask for lists of files, like "Show me all the TypeScript + +> [!TIP] +> You can also ask for lists of files, like "Show me all the TypeScript > configuration files in the root directory." ## How to modify code @@ -111,8 +113,8 @@ or, better yet, run your project's tests. `Run the tests for the UserProfile component.` ``` -Gemini CLI uses the `run_shell_command` tool to execute your test runner (e.g., -`npm test` or `jest`). This ensures the changes didn't break existing +Gemini CLI uses the `run_shell_command` tool to execute your test runner (for +example, `npm test` or `jest`). This ensures the changes didn't break existing functionality. ## Advanced: Controlling what Gemini sees diff --git a/docs/cli/tutorials/mcp-setup.md b/docs/cli/tutorials/mcp-setup.md index 1f3edf716a..1eff7452ab 100644 --- a/docs/cli/tutorials/mcp-setup.md +++ b/docs/cli/tutorials/mcp-setup.md @@ -62,8 +62,10 @@ You tell Gemini about new servers by editing your `settings.json`. } ``` -> **Note:** The `command` is `docker`, and the rest are arguments passed to it. -> We map the local environment variable into the container so your secret isn't + +> [!NOTE] +> The `command` is `docker`, and the rest are arguments passed to it. We +> map the local environment variable into the container so your secret isn't > hardcoded in the config file. ## How to verify the connection diff --git a/docs/cli/tutorials/memory-management.md b/docs/cli/tutorials/memory-management.md index 4cbca4bda9..2268ebd923 100644 --- a/docs/cli/tutorials/memory-management.md +++ b/docs/cli/tutorials/memory-management.md @@ -11,8 +11,8 @@ persistent facts, and inspect the active context. ## Why manage context? -Out of the box, Gemini CLI is smart but generic. It doesn't know your preferred -testing framework, your indentation style, or that you hate using `any` in +Gemini CLI is powerful but general. It doesn't know your preferred testing +framework, your indentation style, or your preference against `any` in TypeScript. Context management solves this by giving the agent persistent memory. @@ -109,11 +109,11 @@ immediately. Force a reload with: ## Best practices -- **Keep it focused:** Don't dump your entire internal wiki into `GEMINI.md`. - Keep instructions actionable and relevant to code generation. +- **Keep it focused:** Avoid adding excessive content to `GEMINI.md`. Keep + instructions actionable and relevant to code generation. - **Use negative constraints:** Explicitly telling the agent what _not_ to do - (e.g., "Do not use class components") is often more effective than vague - positive instructions. + (for example, "Do not use class components") is often more effective than + vague positive instructions. - **Review often:** Periodically check your `GEMINI.md` files to remove outdated rules. diff --git a/docs/cli/tutorials/plan-mode-steering.md b/docs/cli/tutorials/plan-mode-steering.md index 86bc63edac..0384425848 100644 --- a/docs/cli/tutorials/plan-mode-steering.md +++ b/docs/cli/tutorials/plan-mode-steering.md @@ -5,9 +5,10 @@ structured environment with model steering's real-time feedback, you can guide Gemini CLI through the research and design phases to ensure the final implementation plan is exactly what you need. -> **Note:** This is a preview feature under active development. Preview features -> may only be available in the **Preview** channel or may need to be enabled -> under `/settings`. + +> [!NOTE] +> This is an experimental feature currently under active development and +> may need to be enabled under `/settings`. ## Prerequisites diff --git a/docs/cli/tutorials/shell-commands.md b/docs/cli/tutorials/shell-commands.md index 3eaaf2049e..390c8acab9 100644 --- a/docs/cli/tutorials/shell-commands.md +++ b/docs/cli/tutorials/shell-commands.md @@ -7,7 +7,7 @@ automate complex workflows, and manage background processes safely. ## Prerequisites - Gemini CLI installed and authenticated. -- Basic familiarity with your system's shell (Bash, Zsh, PowerShell, etc.). +- Basic familiarity with your system's shell (Bash, Zsh, PowerShell, and so on). ## How to run commands directly (`!`) @@ -49,7 +49,7 @@ You want to run tests and fix any failures. 6. Gemini uses `replace` to fix the bug. 7. Gemini runs `npm test` again to verify the fix. -This loop turns Gemini into an autonomous engineer. +This loop lets Gemini work autonomously. ## How to manage background processes @@ -75,7 +75,7 @@ confirmation prompts) by streaming the output to you. However, for highly interactive tools (like `vim` or `top`), it's often better to run them yourself in a separate terminal window or use the `!` prefix. -## Safety first +## Safety features Giving an AI access to your shell is powerful but risky. Gemini CLI includes several safety layers. diff --git a/docs/core/remote-agents.md b/docs/core/remote-agents.md index 1c48df00a3..e11c37fece 100644 --- a/docs/core/remote-agents.md +++ b/docs/core/remote-agents.md @@ -10,7 +10,9 @@ agents in the following repositories: - [ADK Samples (Python)](https://github.com/google/adk-samples/tree/main/python) - [ADK Python Contributing Samples](https://github.com/google/adk-python/tree/main/contributing/samples) -> **Note: Remote subagents are currently an experimental feature.** + +> [!NOTE] +> Remote subagents are currently an experimental feature. ## Configuration @@ -49,12 +51,13 @@ You can place them in: ### Configuration schema -| Field | Type | Required | Description | -| :--------------- | :----- | :------- | :------------------------------------------------------------------------------------------------------------- | -| `kind` | string | Yes | Must be `remote`. | -| `name` | string | Yes | A unique name for the agent. Must be a valid slug (lowercase letters, numbers, hyphens, and underscores only). | -| `agent_card_url` | string | Yes | The URL to the agent's A2A card endpoint. | -| `auth` | object | No | Authentication configuration. See [Authentication](#authentication). | +| Field | Type | Required | Description | +| :---------------- | :----- | :------- | :------------------------------------------------------------------------------------------------------------- | +| `kind` | string | Yes | Must be `remote`. | +| `name` | string | Yes | A unique name for the agent. Must be a valid slug (lowercase letters, numbers, hyphens, and underscores only). | +| `agent_card_url` | string | Yes\* | The URL to the agent's A2A card endpoint. Required if `agent_card_json` is not provided. | +| `agent_card_json` | string | Yes\* | The inline JSON string of the agent's A2A card. Required if `agent_card_url` is not provided. | +| `auth` | object | No | Authentication configuration. See [Authentication](#authentication). | ### Single-subagent example @@ -82,9 +85,99 @@ Markdown file. --- ``` -> **Note:** Mixed local and remote agents, or multiple local agents, are not + +> [!NOTE] Mixed local and remote agents, or multiple local agents, are not > supported in a single file; the list format is currently remote-only. +### Inline Agent Card JSON + +
+View formatting options for JSON strings + +If you don't have an endpoint serving the agent card, you can provide the A2A +card directly as a JSON string using `agent_card_json`. + +When providing a JSON string in YAML, you must properly format it as a string +scalar. You can use single quotes, a block scalar, or double quotes (which +require escaping internal double quotes). + +#### Using single quotes + +Single quotes allow you to embed unescaped double quotes inside the JSON string. +This format is useful for shorter, single-line JSON strings. + +```markdown +--- +kind: remote +name: single-quotes-agent +agent_card_json: + '{ "protocolVersion": "0.3.0", "name": "Example Agent", "version": "1.0.0", + "url": "dummy-url" }' +--- +``` + +#### Using a block scalar + +The literal block scalar (`|`) preserves line breaks and is highly recommended +for multiline JSON strings as it avoids quote escaping entirely. The following +is a complete, valid Agent Card configuration using dummy values. + +```markdown +--- +kind: remote +name: block-scalar-agent +agent_card_json: | + { + "protocolVersion": "0.3.0", + "name": "Example Agent Name", + "description": "An example agent description for documentation purposes.", + "version": "1.0.0", + "url": "dummy-url", + "preferredTransport": "HTTP+JSON", + "capabilities": { + "streaming": true, + "extendedAgentCard": false + }, + "defaultInputModes": [ + "text/plain" + ], + "defaultOutputModes": [ + "application/json" + ], + "skills": [ + { + "id": "ExampleSkill", + "name": "Example Skill Assistant", + "description": "A description of what this example skill does.", + "tags": [ + "example-tag" + ], + "examples": [ + "Show me an example." + ] + } + ] + } +--- +``` + +#### Using double quotes + +Double quotes are also supported, but any internal double quotes in your JSON +must be escaped with a backslash. + +```markdown +--- +kind: remote +name: double-quotes-agent +agent_card_json: + '{ "protocolVersion": "0.3.0", "name": "Example Agent", "version": "1.0.0", + "url": "dummy-url" }' +--- +``` + +
+ ## Authentication Many remote agents require authentication. Gemini CLI supports several @@ -101,7 +194,7 @@ Gemini CLI supports the following authentication types: | `apiKey` | Send a static API key as an HTTP header. | | `http` | HTTP authentication (Bearer token, Basic credentials, or any IANA-registered scheme). | | `google-credentials` | Google Application Default Credentials (ADC). Automatically selects access or identity tokens. | -| `oauth2` | OAuth 2.0 Authorization Code flow with PKCE. Opens a browser for interactive sign-in. | +| `oauth` | OAuth 2.0 Authorization Code flow with PKCE. Opens a browser for interactive sign-in. | ### Dynamic values @@ -260,7 +353,7 @@ hosts: Requests to any other host will be rejected with an error. If your agent is hosted on a different domain, use one of the other auth types (`apiKey`, `http`, -or `oauth2`). +or `oauth`). #### Examples @@ -294,7 +387,7 @@ auth: --- ``` -### OAuth 2.0 (`oauth2`) +### OAuth 2.0 (`oauth`) Performs an interactive OAuth 2.0 Authorization Code flow with PKCE. On first use, Gemini CLI opens your browser for sign-in and persists the resulting tokens @@ -302,7 +395,7 @@ for subsequent requests. | Field | Type | Required | Description | | :------------------ | :------- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------- | -| `type` | string | Yes | Must be `oauth2`. | +| `type` | string | Yes | Must be `oauth`. | | `client_id` | string | Yes\* | OAuth client ID. Required for interactive auth. | | `client_secret` | string | No\* | OAuth client secret. Required by most authorization servers (confidential clients). Can be omitted for public clients that don't require a secret. | | `scopes` | string[] | No | Requested scopes. Can also be discovered from the agent card. | @@ -315,7 +408,7 @@ kind: remote name: oauth-agent agent_card_url: https://example.com/.well-known/agent.json auth: - type: oauth2 + type: oauth client_id: my-client-id.apps.example.com --- ``` @@ -362,5 +455,7 @@ Users can manage subagents using the following commands within the Gemini CLI: - `/agents enable `: Enables a specific subagent. - `/agents disable `: Disables a specific subagent. -> **Tip:** You can use the `@cli_help` agent within Gemini CLI for assistance + +> [!TIP] +> You can use the `@cli_help` agent within Gemini CLI for assistance > with configuring subagents. diff --git a/docs/core/subagents.md b/docs/core/subagents.md index 6d863f489e..b0cffca3b5 100644 --- a/docs/core/subagents.md +++ b/docs/core/subagents.md @@ -5,16 +5,18 @@ session. They are designed to handle specific, complex tasks—like deep codebas analysis, documentation lookup, or domain-specific reasoning—without cluttering the main agent's context or toolset. -> **Note: Subagents are currently an experimental feature.** -> -> To use custom subagents, you must ensure they are enabled in your -> `settings.json` (enabled by default): -> -> ```json -> { -> "experimental": { "enableAgents": true } -> } -> ``` + +> [!NOTE] +> Subagents are currently an experimental feature. +> +To use custom subagents, you must ensure they are enabled in your +`settings.json` (enabled by default): + +```json +{ + "experimental": { "enableAgents": true } +} +``` ## What are subagents? @@ -114,7 +116,9 @@ Gemini CLI comes with the following built-in subagents: the pricing table from this page," "Click the login button and enter my credentials." -> **Note:** This is a preview feature currently under active development. + +> [!NOTE] +> This is a preview feature currently under active development. #### Prerequisites @@ -217,7 +221,9 @@ captures a screenshot and sends it to the vision model for analysis. The model returns coordinates and element descriptions that the browser agent uses with the `click_at` tool for precise, coordinate-based interactions. -> **Note:** The visual agent requires API key or Vertex AI authentication. It is + +> [!NOTE] +> The visual agent requires API key or Vertex AI authentication. It is > not available when using "Sign in with Google". ## Creating custom subagents @@ -405,7 +411,9 @@ that your subagent was called with a specific prompt and the given description. Gemini CLI can also delegate tasks to remote subagents using the Agent-to-Agent (A2A) protocol. -> **Note: Remote subagents are currently an experimental feature.** + +> [!NOTE] +> Remote subagents are currently an experimental feature. See the [Remote Subagents documentation](remote-agents) for detailed configuration, authentication, and usage instructions. diff --git a/docs/extensions/reference.md b/docs/extensions/reference.md index e6012f4d33..56c51d30df 100644 --- a/docs/extensions/reference.md +++ b/docs/extensions/reference.md @@ -23,7 +23,7 @@ Gemini CLI creates a copy of the extension during installation. You must run GitHub, you must have `git` installed on your machine. ```bash -gemini extensions install [--ref ] [--auto-update] [--pre-release] [--consent] +gemini extensions install [--ref ] [--auto-update] [--pre-release] [--consent] [--skip-settings] ``` - ``: The GitHub URL or local path of the extension. @@ -31,6 +31,7 @@ gemini extensions install [--ref ] [--auto-update] [--pre-release] - `--auto-update`: Enable automatic updates for this extension. - `--pre-release`: Enable installation of pre-release versions. - `--consent`: Acknowledge security risks and skip the confirmation prompt. +- `--skip-settings`: Skip the configuration on install process. ### Uninstall an extension @@ -234,7 +235,9 @@ skill definitions in a `skills/` directory. For example, ### Sub-agents -> **Note:** Sub-agents are a preview feature currently under active development. + +> [!NOTE] +> Sub-agents are a preview feature currently under active development. Provide [sub-agents](../core/subagents.md) that users can delegate tasks to. Add agent definition files (`.md`) to an `agents/` directory in your extension root. @@ -253,7 +256,9 @@ Rules contributed by extensions run in their own tier (tier 2), alongside workspace-defined policies. This tier has higher priority than the default rules but lower priority than user or admin policies. -> **Warning:** For security, Gemini CLI ignores any `allow` decisions or `yolo` + +> [!WARNING] +> For security, Gemini CLI ignores any `allow` decisions or `yolo` > mode configurations in extension policies. This ensures that an extension > cannot automatically approve tool calls or bypass security measures without > your confirmation. diff --git a/docs/get-started/authentication.md b/docs/get-started/authentication.md index 964e776567..6d8758b958 100644 --- a/docs/get-started/authentication.md +++ b/docs/get-started/authentication.md @@ -4,7 +4,9 @@ To use Gemini CLI, you'll need to authenticate with Google. This guide helps you quickly find the best way to sign in based on your account type and how you're using the CLI. -> **Note:** Looking for a high-level comparison of all available subscriptions? + +> [!TIP] +> Looking for a high-level comparison of all available subscriptions? > To compare features and find the right quota for your needs, see our > [Plans page](https://geminicli.com/plans/). @@ -40,11 +42,11 @@ Select the authentication method that matches your situation in the table below: If you run Gemini CLI on your local machine, the simplest authentication method is logging in with your Google account. This method requires a web browser on a -machine that can communicate with the terminal running Gemini CLI (e.g., your -local machine). +machine that can communicate with the terminal running Gemini CLI (for example, +your local machine). -> **Important:** If you are a **Google AI Pro** or **Google AI Ultra** -> subscriber, use the Google account associated with your subscription. +If you are a **Google AI Pro** or **Google AI Ultra** subscriber, use the Google +account associated with your subscription. To authenticate and use Gemini CLI: @@ -107,7 +109,9 @@ To authenticate and use Gemini CLI with a Gemini API key: 4. Select **Use Gemini API key**. -> **Warning:** Treat API keys, especially for services like Gemini, as sensitive + +> [!WARNING] +> Treat API keys, especially for services like Gemini, as sensitive > credentials. Protect them to prevent unauthorized access and potential misuse > of the service under your account. @@ -130,7 +134,7 @@ For example: **macOS/Linux** ```bash -# Replace with your project ID and desired location (e.g., us-central1) +# Replace with your project ID and desired location (for example, us-central1) export GOOGLE_CLOUD_PROJECT="YOUR_PROJECT_ID" export GOOGLE_CLOUD_LOCATION="YOUR_PROJECT_LOCATION" ``` @@ -138,7 +142,7 @@ export GOOGLE_CLOUD_LOCATION="YOUR_PROJECT_LOCATION" **Windows (PowerShell)** ```powershell -# Replace with your project ID and desired location (e.g., us-central1) +# Replace with your project ID and desired location (for example, us-central1) $env:GOOGLE_CLOUD_PROJECT="YOUR_PROJECT_ID" $env:GOOGLE_CLOUD_LOCATION="YOUR_PROJECT_LOCATION" ``` @@ -150,20 +154,20 @@ To make any Vertex AI environment variable settings persistent, see Consider this authentication method if you have Google Cloud CLI installed. -> **Note:** If you have previously set `GOOGLE_API_KEY` or `GEMINI_API_KEY`, you -> must unset them to use ADC: -> -> **macOS/Linux** -> -> ```bash -> unset GOOGLE_API_KEY GEMINI_API_KEY -> ``` -> -> **Windows (PowerShell)** -> -> ```powershell -> Remove-Item Env:\GOOGLE_API_KEY, Env:\GEMINI_API_KEY -ErrorAction Ignore -> ``` +If you have previously set `GOOGLE_API_KEY` or `GEMINI_API_KEY`, you must unset +them to use ADC. + +**macOS/Linux** + +```bash +unset GOOGLE_API_KEY GEMINI_API_KEY +``` + +**Windows (PowerShell)** + +```powershell +Remove-Item Env:\GOOGLE_API_KEY, Env:\GEMINI_API_KEY -ErrorAction Ignore +``` 1. Verify you have a Google Cloud project and Vertex AI API is enabled. @@ -188,20 +192,20 @@ Consider this authentication method if you have Google Cloud CLI installed. Consider this method of authentication in non-interactive environments, CI/CD pipelines, or if your organization restricts user-based ADC or API key creation. -> **Note:** If you have previously set `GOOGLE_API_KEY` or `GEMINI_API_KEY`, you -> must unset them: -> -> **macOS/Linux** -> -> ```bash -> unset GOOGLE_API_KEY GEMINI_API_KEY -> ``` -> -> **Windows (PowerShell)** -> -> ```powershell -> Remove-Item Env:\GOOGLE_API_KEY, Env:\GEMINI_API_KEY -ErrorAction Ignore -> ``` +If you have previously set `GOOGLE_API_KEY` or `GEMINI_API_KEY`, you must unset +them: + +**macOS/Linux** + +```bash +unset GOOGLE_API_KEY GEMINI_API_KEY +``` + +**Windows (PowerShell)** + +```powershell +Remove-Item Env:\GOOGLE_API_KEY, Env:\GEMINI_API_KEY -ErrorAction Ignore +``` 1. [Create a service account and key](https://cloud.google.com/iam/docs/keys-create-delete) and download the provided JSON file. Assign the "Vertex AI User" role to the @@ -233,8 +237,11 @@ pipelines, or if your organization restricts user-based ADC or API key creation. ``` 5. Select **Vertex AI**. - > **Warning:** Protect your service account key file as it gives access to - > your resources. + + +> [!WARNING] +> Protect your service account key file as it gives access to +> your resources. #### C. Vertex AI - Google Cloud API key @@ -257,10 +264,9 @@ pipelines, or if your organization restricts user-based ADC or API key creation. $env:GOOGLE_API_KEY="YOUR_GOOGLE_API_KEY" ``` - > **Note:** If you see errors like - > `"API keys are not supported by this API..."`, your organization might - > restrict API key usage for this service. Try the other Vertex AI - > authentication methods instead. + If you see errors like `"API keys are not supported by this API..."`, your + organization might restrict API key usage for this service. Try the other + Vertex AI authentication methods instead. 3. [Configure your Google Cloud Project](#set-gcp). @@ -274,7 +280,9 @@ pipelines, or if your organization restricts user-based ADC or API key creation. ## Set your Google Cloud project -> **Important:** Most individual Google accounts (free and paid) don't require a + +> [!IMPORTANT] +> Most individual Google accounts (free and paid) don't require a > Google Cloud project for authentication. When you sign in using your Google account, you may need to configure a Google @@ -325,29 +333,31 @@ persist them with the following methods: 1. **Add your environment variables to your shell configuration file:** Append the environment variable commands to your shell's startup file. - **macOS/Linux** (e.g., `~/.bashrc`, `~/.zshrc`, or `~/.profile`): + **macOS/Linux** (for example, `~/.bashrc`, `~/.zshrc`, or `~/.profile`): ```bash echo 'export GOOGLE_CLOUD_PROJECT="YOUR_PROJECT_ID"' >> ~/.bashrc source ~/.bashrc ``` - **Windows (PowerShell)** (e.g., `$PROFILE`): + **Windows (PowerShell)** (for example, `$PROFILE`): ```powershell Add-Content -Path $PROFILE -Value '$env:GOOGLE_CLOUD_PROJECT="YOUR_PROJECT_ID"' . $PROFILE ``` - > **Warning:** Be aware that when you export API keys or service account - > paths in your shell configuration file, any process launched from that - > shell can read them. + +> [!WARNING] +> Be aware that when you export API keys or service account +> paths in your shell configuration file, any process launched from that +> shell can read them. 2. **Use a `.env` file:** Create a `.gemini/.env` file in your project directory or home directory. Gemini CLI automatically loads variables from the first `.env` file it finds, searching up from the current directory, - then in your home directory's `.gemini/.env` (e.g., `~/.gemini/.env` or - `%USERPROFILE%\.gemini\.env`). + then in your home directory's `.gemini/.env` (for example, `~/.gemini/.env` + or `%USERPROFILE%\.gemini\.env`). Example for user-wide settings: diff --git a/docs/get-started/examples.md b/docs/get-started/examples.md deleted file mode 100644 index 5d31ddedb8..0000000000 --- a/docs/get-started/examples.md +++ /dev/null @@ -1,139 +0,0 @@ -# Gemini CLI examples - -Gemini CLI helps you automate common engineering tasks by combining AI reasoning -with local system tools. This document provides examples of how to use the CLI -for file management, code analysis, and data transformation. - -> **Note:** These examples demonstrate potential capabilities. Your actual -> results can vary based on the model used and your project environment. - -## Rename your photographs based on content - -You can use Gemini CLI to automate file management tasks that require visual -analysis. In this example, Gemini CLI renames images based on their actual -subject matter. - -Scenario: You have a folder containing the following files: - -```bash -photos/photo1.png -photos/photo2.png -photos/photo3.png -``` - -Give Gemini the following prompt: - -```cli -Rename the photos in my "photos" directory based on their contents. -``` - -Result: Gemini asks for permission to rename your files. - -Select **Allow once** and your files are renamed: - -```bash -photos/yellow_flowers.png -photos/antique_dresser.png -photos/green_android_robot.png -``` - -## Explain a repository by reading its code - -Gemini CLI is effective for rapid codebase exploration. The following example -shows how to ask Gemini CLI to fetch, analyze, and summarize a remote project. - -Scenario: You want to understand how a popular open-source utility works by -inspecting its code, not just its README. - -Give Gemini CLI the following prompt: - -```cli -Clone the 'chalk' repository from https://github.com/chalk/chalk, read its key source files, and explain how it works. -``` - -Result: Gemini performs a sequence of actions to answer your request. - -1. First, it asks for permission to run `git clone` to download the repository. -2. Next, it finds the important source files and asks for permission to read - them. -3. Finally, after analyzing the code, it provides a summary. - -Gemini CLI returns an explanation based on the actual source code: - -```markdown -The `chalk` library is a popular npm package for styling terminal output with -colors. After analyzing the source code, here's how it works: - -- **Core functionality:** The main file sets up a chainable API. Each color or - modifier (like `bold` or `italic`) is a getter that appends the corresponding - ANSI escape code to an internal stack. - -- **Chainable API:** When you call `chalk.red.bold('Hello')`, you are accessing - getters. The `red` getter adds the red color code, and the `bold` getter adds - the bold code. - -- **Output generation:** When the chain is treated as a string (e.g., in - `console.log`), a final `toString()` method is called. This method joins all - the stored ANSI codes, wraps them around the input string ('Hello'), and adds - a reset code at the end. This produces the final, styled string that the - terminal can render. -``` - -## Combine two spreadsheets into one spreadsheet - -Gemini CLI can process and transform data across multiple files. Use this -capability to merge reports or reformat data sets without manual copying. - -Scenario: You have two .csv files: `Revenue - 2023.csv` and -`Revenue - 2024.csv`. Each file contains monthly revenue figures. - -Give Gemini CLI the following prompt: - -```cli -Combine the two .csv files into a single .csv file, with each year a different column. -``` - -Result: Gemini CLI reads each file and then asks for permission to write a new -file. Provide your permission and Gemini CLI provides the combined data: - -```csv -Month,2023,2024 -January,0,1000 -February,0,1200 -March,0,2400 -April,900,500 -May,1000,800 -June,1000,900 -July,1200,1000 -August,1800,400 -September,2000,2000 -October,2400,3400 -November,3400,1800 -December,2100,9000 -``` - -## Run unit tests - -Gemini CLI can generate boilerplate code and tests based on your existing -implementation. This example demonstrates how to request code coverage for a -JavaScript component. - -Scenario: You've written a simple login page. You wish to write unit tests to -ensure that your login page has code coverage. - -Give Gemini CLI the following prompt: - -```cli -Write unit tests for Login.js. -``` - -Result: Gemini CLI asks for permission to write a new file and creates a test -for your login page. - -## Next steps - -- Follow the [File management](../cli/tutorials/file-management.md) guide to - start working with your codebase. -- Follow the [Quickstart](./index.md) to start your first session. -- See the [Cheatsheet](../cli/cli-reference.md) for a quick reference of - available commands. diff --git a/docs/get-started/gemini-3.md b/docs/get-started/gemini-3.md index d22baaa0c0..8e0af1a9ce 100644 --- a/docs/get-started/gemini-3.md +++ b/docs/get-started/gemini-3.md @@ -2,7 +2,9 @@ Gemini 3 Pro and Gemini 3 Flash are available on Gemini CLI for all users! -> **Note:** Gemini 3.1 Pro Preview is rolling out. To determine whether you have + +> [!NOTE] +> Gemini 3.1 Pro Preview is rolling out. To determine whether you have > access to Gemini 3.1, use the `/model` command and select **Manual**. If you > have access, you will see `gemini-3.1-pro-preview`. > @@ -25,7 +27,7 @@ Get started by upgrading Gemini CLI to the latest version: npm install -g @google/gemini-cli@latest ``` -After you’ve confirmed your version is 0.21.1 or later: +If your version is 0.21.1 or later: 1. Run `/model`. 2. Select **Auto (Gemini 3)**. @@ -39,7 +41,9 @@ When you encounter that limit, you’ll be given the option to switch to Gemini 2.5 Pro, upgrade for higher limits, or stop. You’ll also be told when your usage limit resets and Gemini 3 Pro can be used again. -> **Note:** Looking to upgrade for higher limits? To compare subscription + +> [!TIP] +> Looking to upgrade for higher limits? To compare subscription > options and find the right quota for your needs, see our > [Plans page](https://geminicli.com/plans/). @@ -52,7 +56,9 @@ There may be times when the Gemini 3 Pro model is overloaded. When that happens, Gemini CLI will ask you to decide whether you want to keep trying Gemini 3 Pro or fallback to Gemini 2.5 Pro. -> **Note:** The **Keep trying** option uses exponential backoff, in which Gemini + +> [!NOTE] +> The **Keep trying** option uses exponential backoff, in which Gemini > CLI waits longer between each retry, when the system is busy. If the retry > doesn't happen immediately, please wait a few minutes for the request to > process. @@ -109,7 +115,7 @@ then: Restart Gemini CLI and you should have access to Gemini 3. -## Need help? +## Next steps If you need help, we recommend searching for an existing [GitHub issue](https://github.com/google-gemini/gemini-cli/issues). If you diff --git a/docs/get-started/index.md b/docs/get-started/index.md index 566ac6e9df..906998ab48 100644 --- a/docs/get-started/index.md +++ b/docs/get-started/index.md @@ -62,7 +62,133 @@ Once installed and authenticated, you can start using Gemini CLI by issuing commands and prompts in your terminal. Ask it to generate code, explain files, and more. -To explore the power of Gemini CLI, see [Gemini CLI examples](./examples.md). + +> [!NOTE] +> These examples demonstrate potential capabilities. Your actual +> results can vary based on the model used and your project environment. + +### Rename your photographs based on content + +You can use Gemini CLI to automate file management tasks that require visual +analysis. In this example, Gemini CLI renames images based on their actual +subject matter. + +Scenario: You have a folder containing the following files: + +```bash +photos/photo1.png +photos/photo2.png +photos/photo3.png +``` + +Give Gemini the following prompt: + +```cli +Rename the photos in my "photos" directory based on their contents. +``` + +Result: Gemini asks for permission to rename your files. + +Select **Allow once** and your files are renamed: + +```bash +photos/yellow_flowers.png +photos/antique_dresser.png +photos/green_android_robot.png +``` + +### Explain a repository by reading its code + +Gemini CLI is effective for rapid codebase exploration. The following example +shows how to ask Gemini CLI to fetch, analyze, and summarize a remote project. + +Scenario: You want to understand how a popular open-source utility works by +inspecting its code, not just its README. + +Give Gemini CLI the following prompt: + +```cli +Clone the 'chalk' repository from https://github.com/chalk/chalk, read its key source files, and explain how it works. +``` + +Result: Gemini performs a sequence of actions to answer your request. + +1. First, it asks for permission to run `git clone` to download the repository. +2. Next, it finds the important source files and asks for permission to read + them. +3. Finally, after analyzing the code, it provides a summary. + +Gemini CLI returns an explanation based on the actual source code: + +```markdown +The `chalk` library is a popular npm package for styling terminal output with +colors. After analyzing the source code, here's how it works: + +- **Core functionality:** The main file sets up a chainable API. Each color or + modifier (like `bold` or `italic`) is a getter that appends the corresponding + ANSI escape code to an internal stack. + +- **Chainable API:** When you call `chalk.red.bold('Hello')`, you are accessing + getters. The `red` getter adds the red color code, and the `bold` getter adds + the bold code. + +- **Output generation:** When the chain is treated as a string (e.g., in + `console.log`), a final `toString()` method is called. This method joins all + the stored ANSI codes, wraps them around the input string ('Hello'), and adds + a reset code at the end. This produces the final, styled string that the + terminal can render. +``` + +### Combine two spreadsheets into one spreadsheet + +Gemini CLI can process and transform data across multiple files. Use this +capability to merge reports or reformat data sets without manual copying. + +Scenario: You have two .csv files: `Revenue - 2023.csv` and +`Revenue - 2024.csv`. Each file contains monthly revenue figures. + +Give Gemini CLI the following prompt: + +```cli +Combine the two .csv files into a single .csv file, with each year a different column. +``` + +Result: Gemini CLI reads each file and then asks for permission to write a new +file. Provide your permission and Gemini CLI provides the combined data: + +```csv +Month,2023,2024 +January,0,1000 +February,0,1200 +March,0,2400 +April,900,500 +May,1000,800 +June,1000,900 +July,1200,1000 +August,1800,400 +September,2000,2000 +October,2400,3400 +November,3400,1800 +December,2100,9000 +``` + +### Run unit tests + +Gemini CLI can generate boilerplate code and tests based on your existing +implementation. This example demonstrates how to request code coverage for a +JavaScript component. + +Scenario: You've written a simple login page. You wish to write unit tests to +ensure that your login page has code coverage. + +Give Gemini CLI the following prompt: + +```cli +Write unit tests for Login.js. +``` + +Result: Gemini CLI asks for permission to write a new file and creates a test +for your login page. ## Check usage and quota diff --git a/docs/hooks/index.md b/docs/hooks/index.md index 7d526dd885..71fdec268f 100644 --- a/docs/hooks/index.md +++ b/docs/hooks/index.md @@ -143,7 +143,9 @@ Hooks are executed with a sanitized environment. ## Security and risks -> **Warning: Hooks execute arbitrary code with your user privileges.** By + +> [!WARNING] +> Hooks execute arbitrary code with your user privileges. By > configuring hooks, you are allowing scripts to run shell commands on your > machine. diff --git a/docs/hooks/writing-hooks.md b/docs/hooks/writing-hooks.md index ca40d1976c..f4f156776f 100644 --- a/docs/hooks/writing-hooks.md +++ b/docs/hooks/writing-hooks.md @@ -470,5 +470,5 @@ console.error('Consolidating memories for session end...'); While project-level hooks are great for specific repositories, you can share your hooks across multiple projects by packaging them as a -[Gemini CLI extension](https://www.google.com/search?q=../extensions/index.md). -This provides version control, easy distribution, and centralized management. +[Gemini CLI extension](../extensions/index.md). This provides version control, +easy distribution, and centralized management. diff --git a/docs/ide-integration/ide-companion-spec.md b/docs/ide-integration/ide-companion-spec.md index 8f17cd896e..7ae22b7eb5 100644 --- a/docs/ide-integration/ide-companion-spec.md +++ b/docs/ide-integration/ide-companion-spec.md @@ -132,9 +132,11 @@ to the CLI whenever the user's context changes. } ``` - **Note:** The `openFiles` list should only include files that exist on disk. - Virtual files (e.g., unsaved files without a path, editor settings pages) - **MUST** be excluded. + +> [!NOTE] +> The `openFiles` list should only include files that exist on disk. +> Virtual files (e.g., unsaved files without a path, editor settings pages) +> **MUST** be excluded. ### How the CLI uses this context diff --git a/docs/ide-integration/index.md b/docs/ide-integration/index.md index 6686421ca4..00b5ad846d 100644 --- a/docs/ide-integration/index.md +++ b/docs/ide-integration/index.md @@ -1,15 +1,29 @@ -# IDE integration +# IDE Integration Gemini CLI can integrate with your IDE to provide a more seamless and context-aware experience. This integration allows the CLI to understand your workspace better and enables powerful features like native in-editor diffing. -Currently, the supported IDEs are [Antigravity](https://antigravity.google), -[Visual Studio Code](https://code.visualstudio.com/), and other editors that -support VS Code extensions. To build support for other editors, see the -[IDE Companion Extension Spec](./ide-companion-spec.md). +There are two primary ways to integrate Gemini CLI with an IDE: -## Features +1. **VS Code companion extension**: Install the "Gemini CLI Companion" + extension on [Antigravity](https://antigravity.google), + [Visual Studio Code](https://code.visualstudio.com/), or other VS Code + compatible editors. +2. **Agent Client Protocol (ACP)**: An open protocol for interoperability + between AI coding agents and IDEs. This method is used for integrations with + tools like JetBrains and Zed, which leverage the ACP Agent Registry for easy + discovery and installation of compatible agents like Gemini CLI. + +## VS Code companion extension + +The **Gemini CLI Companion extension** grants Gemini CLI direct access to your +VS Code compatible IDEs and improves your experience by providing real-time +context such as open files, cursor positions, and text selection. The extension +also enables a native diffing interface so you can seamlessly review and apply +AI-generated code changes directly within your editor. + +### Features - **Workspace context:** The CLI automatically gains awareness of your workspace to provide more relevant and accurate responses. This context includes: @@ -19,8 +33,8 @@ support VS Code extensions. To build support for other editors, see the truncated). - **Native diffing:** When Gemini suggests code modifications, you can view the - changes directly within your IDE's native diff viewer. This allows you to - review, edit, and accept or reject the suggested changes seamlessly. + changes directly within your IDE's native diff viewer. This lets you review, + edit, and accept or reject the suggested changes seamlessly. - **VS Code commands:** You can access Gemini CLI features directly from the VS Code Command Palette (`Cmd+Shift+P` or `Ctrl+Shift+P`): @@ -32,18 +46,18 @@ support VS Code extensions. To build support for other editors, see the - `Gemini CLI: View Third-Party Notices`: Displays the third-party notices for the extension. -## Installation and setup +### Installation and setup There are three ways to set up the IDE integration: -### 1. Automatic nudge (recommended) +#### 1. Automatic nudge (recommended) When you run Gemini CLI inside a supported editor, it will automatically detect your environment and prompt you to connect. Answering "Yes" will automatically run the necessary setup, which includes installing the companion extension and enabling the connection. -### 2. Manual installation from CLI +#### 2. Manual installation from CLI If you previously dismissed the prompt or want to install the extension manually, you can run the following command inside Gemini CLI: @@ -54,7 +68,7 @@ manually, you can run the following command inside Gemini CLI: This will find the correct extension for your IDE and install it. -### 3. Manual installation from a marketplace +#### 3. Manual installation from a marketplace You can also install the extension directly from a marketplace. @@ -66,16 +80,18 @@ You can also install the extension directly from a marketplace. Follow your editor's instructions for installing extensions from this registry. -> NOTE: The "Gemini CLI Companion" extension may appear towards the bottom of -> search results. If you don't see it immediately, try scrolling down or sorting -> by "Newly Published". + +> [!NOTE] +> The "Gemini CLI Companion" extension may appear towards the bottom of +> search results. If you don't see it immediately, try scrolling down or +> sorting by "Newly Published". > > After manually installing the extension, you must run `/ide enable` in the CLI > to activate the integration. -## Usage +### Usage -### Enabling and disabling +#### Enabling and disabling You can control the IDE integration from within the CLI: @@ -91,7 +107,7 @@ You can control the IDE integration from within the CLI: When enabled, Gemini CLI will automatically attempt to connect to the IDE companion extension. -### Checking the status +#### Checking the status To check the connection status and see the context the CLI has received from the IDE, run: @@ -103,10 +119,12 @@ IDE, run: If connected, this command will show the IDE it's connected to and a list of recently opened files it is aware of. -> [!NOTE] The file list is limited to 10 recently accessed files within your -> workspace and only includes local files on disk.) + +> [!NOTE] +> The file list is limited to 10 recently accessed files within your +> workspace and only includes local files on disk. -### Working with diffs +#### Working with diffs When you ask Gemini to modify a file, it can open a diff view directly in your editor. @@ -131,6 +149,63 @@ accepting them. If you select ‘Allow for this session’ in the CLI, changes will no longer show up in the IDE as they will be auto-accepted. +## Agent Client Protocol (ACP) + +ACP is an open protocol that standardizes how AI coding agents communicate with +code editors and IDEs. It addresses the challenge of fragmented distribution, +where agents traditionally needed custom integrations for each client. With ACP, +developers can implement their agent once, and it becomes compatible with any +ACP-compliant editor. + +For a comprehensive introduction to ACP, including its architecture and +benefits, refer to the official +[ACP Introduction](https://agentclientprotocol.com/get-started/introduction) +documentation. + +### The ACP Agent Registry + +Gemini CLI is officially available in the **ACP Agent Registry**. This allows +you to install and update Gemini CLI directly within supporting IDEs and +eliminates the need for manual downloads or IDE-specific extensions. + +Using the registry ensures: + +- **Ease of use**: Discover and install agents directly within your IDE + settings. +- **Latest versions**: Ensures users always have access to the most up-to-date + agent implementations. + +For more details on how the registry works, visit the official +[ACP Agent Registry](https://agentclientprotocol.com/get-started/registry) page. +You can learn about how specific IDEs leverage this integration in the following +section. + +### IDE-specific integration + +Gemini CLI is an ACP-compatible agent available in the ACP Agent Registry. +Here’s how different IDEs leverage the ACP and the registry: + +#### JetBrains IDEs + +JetBrains IDEs (like IntelliJ IDEA, PyCharm, or GoLand) offer built-in registry +support, allowing users to find and install ACP-compatible agents directly. + +For more details, refer to the official +[JetBrains AI Blog announcement](https://blog.jetbrains.com/ai/2026/01/acp-agent-registry/). + +#### Zed + +Zed, a modern code editor, also integrates with the ACP Agent Registry. This +allows Zed users to easily browse, install, and manage ACP agents. + +Learn more about Zed's integration with the ACP Registry in their +[blog post](https://zed.dev/blog/acp-registry). + +#### Other ACP-compatible IDEs + +Any other IDE that supports the ACP Agent Registry can install Gemini CLI +directly through their in-built registry features. + ## Using with sandboxing If you are using Gemini CLI within a sandbox, please be aware of the following: @@ -147,10 +222,9 @@ If you are using Gemini CLI within a sandbox, please be aware of the following: ## Troubleshooting -If you encounter issues with IDE integration, here are some common error -messages and how to resolve them. +### VS Code companion extension errors -### Connection errors +#### Connection errors - **Message:** `🔴 Disconnected: Failed to connect to IDE companion extension in [IDE Name]. Please ensure the extension is running. To install the extension, run /ide install.` @@ -170,7 +244,7 @@ messages and how to resolve them. - **Solution:** Run `/ide enable` to try and reconnect. If the issue continues, open a new terminal window or restart your IDE. -### Manual PID override +#### Manual PID override If automatic IDE detection fails, or if you are running Gemini CLI in a standalone terminal and want to manually associate it with a specific IDE @@ -192,7 +266,7 @@ $env:GEMINI_CLI_IDE_PID=12345 When this variable is set, Gemini CLI will skip automatic detection and attempt to connect using the provided PID. -### Configuration errors +#### Configuration errors - **Message:** `🔴 Disconnected: Directory mismatch. Gemini CLI is running in a different location than the open workspace in [IDE Name]. Please run the CLI from one of the following directories: [List of directories]` @@ -206,7 +280,7 @@ to connect using the provided PID. - **Cause:** You have no workspace open in your IDE. - **Solution:** Open a workspace in your IDE and restart the CLI. -### General errors +#### General errors - **Message:** `IDE integration is not supported in your current environment. To use this feature, run Gemini CLI in one of these supported IDEs: [List of IDEs]` @@ -216,9 +290,14 @@ to connect using the provided PID. IDE, like Antigravity or VS Code. - **Message:** - `No installer is available for IDE. Please install the Gemini CLI Companion extension manually from the marketplace.` + `No installer is available for IDE. Please install Gemini CLI Companion extension manually from the marketplace.` - **Cause:** You ran `/ide install`, but the CLI does not have an automated installer for your specific IDE. - **Solution:** Open your IDE's extension marketplace, search for "Gemini CLI Companion", and [install it manually](#3-manual-installation-from-a-marketplace). + +### ACP integration errors + +For issues related to ACP integration, please refer to the debugging and +telemetry section in the [ACP Mode](../cli/acp-mode.md) documentation. diff --git a/docs/index.md b/docs/index.md index af1915bb8f..d1c1febf55 100644 --- a/docs/index.md +++ b/docs/index.md @@ -19,8 +19,6 @@ Jump in to Gemini CLI. on your system. - **[Authentication](./get-started/authentication.md):** Setup instructions for personal and enterprise accounts. -- **[Examples](./get-started/examples.md):** Practical examples of Gemini CLI in - action. - **[CLI cheatsheet](./cli/cli-reference.md):** A quick reference for common commands and options. - **[Gemini 3 on Gemini CLI](./get-started/gemini-3.md):** Learn about Gemini 3 diff --git a/docs/issue-and-pr-automation.md b/docs/issue-and-pr-automation.md index 6c023b651b..6f27592833 100644 --- a/docs/issue-and-pr-automation.md +++ b/docs/issue-and-pr-automation.md @@ -14,7 +14,9 @@ feature), while the PR is the "how" (the implementation). This separation helps us track work, prioritize features, and maintain clear historical context. Our automation is built around this principle. -> **Note:** Issues tagged as "🔒Maintainers only" are reserved for project + +> [!NOTE] +> Issues tagged as "🔒Maintainers only" are reserved for project > maintainers. We will not accept pull requests related to these issues. --- diff --git a/docs/local-development.md b/docs/local-development.md index a31fa4aa11..83520c7506 100644 --- a/docs/local-development.md +++ b/docs/local-development.md @@ -79,7 +79,9 @@ You can view traces in the Jaeger UI for local development. You can use an OpenTelemetry collector to forward telemetry data to Google Cloud Trace for custom processing or routing. -> **Warning:** Ensure you complete the + +> [!WARNING] +> Ensure you complete the > [Google Cloud telemetry prerequisites](./cli/telemetry.md#prerequisites) > (Project ID, authentication, IAM roles, and APIs) before using this method. diff --git a/docs/redirects.json b/docs/redirects.json index 598f42cccf..db2dae4333 100644 --- a/docs/redirects.json +++ b/docs/redirects.json @@ -13,6 +13,7 @@ "/docs/faq": "/docs/resources/faq", "/docs/get-started/configuration": "/docs/reference/configuration", "/docs/get-started/configuration-v1": "/docs/reference/configuration", + "/docs/get-started/examples": "/docs/get-started/index", "/docs/index": "/docs", "/docs/quota-and-pricing": "/docs/resources/quota-and-pricing", "/docs/tos-privacy": "/docs/resources/tos-privacy", diff --git a/docs/reference/commands.md b/docs/reference/commands.md index e9383152d2..4dd7e367e5 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -60,8 +60,8 @@ Slash commands provide meta-level control over the CLI itself. - `list` (selecting this opens the auto-saved session browser) - `-- checkpoints --` - `list`, `save`, `resume`, `delete`, `share` (manual tagged checkpoints) - - **Note:** Unique prefixes (for example `/cha` or `/resum`) resolve to the - same grouped menu. + - Unique prefixes (for example `/cha` or `/resu`) resolve to the same grouped + menu. - **Sub-commands:** - **`debug`** - **Description:** Export the most recent API request as a JSON payload. @@ -250,8 +250,8 @@ Slash commands provide meta-level control over the CLI itself. - **`list`** or **`ls`**: - **Description:** List configured MCP servers and tools. This is the default action if no subcommand is specified. - - **`refresh`**: - - **Description:** Restarts all MCP servers and re-discovers their available + - **`reload`**: + - **Description:** Reloads all MCP servers and re-discovers their available tools. - **`schema`**: - **Description:** List configured MCP servers and tools with descriptions diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 34a2e84edd..04feb2df38 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -25,7 +25,9 @@ overridden by higher numbers): Gemini CLI uses JSON settings files for persistent configuration. There are four locations for these files: -> **Tip:** JSON-aware editors can use autocomplete and validation by pointing to + +> [!TIP] +> JSON-aware editors can use autocomplete and validation by pointing to > the generated schema at `schemas/settings.schema.json` in this repository. > When working outside the repo, reference the hosted schema at > `https://raw.githubusercontent.com/google-gemini/gemini-cli/main/schemas/settings.schema.json`. @@ -66,9 +68,9 @@ an environment variable `MY_API_TOKEN`, you could use it in `settings.json` like this: `"apiKey": "$MY_API_TOKEN"`. Additionally, each extension can have its own `.env` file in its directory, which will be loaded automatically. -> **Note for Enterprise Users:** For guidance on deploying and managing Gemini -> CLI in a corporate environment, please see the -> [Enterprise Configuration](../cli/enterprise.md) documentation. +**Note for Enterprise Users:** For guidance on deploying and managing Gemini CLI +in a corporate environment, please see the +[Enterprise Configuration](../cli/enterprise.md) documentation. ### The `.gemini` directory in your project @@ -131,7 +133,7 @@ their corresponding top-level category object in your `settings.json` file. - **`general.enableNotifications`** (boolean): - **Description:** Enable run-event notifications for action-required prompts - and session completion. Currently macOS only. + and session completion. - **Default:** `false` - **`general.checkpointing.enabled`** (boolean): @@ -141,7 +143,8 @@ their corresponding top-level category object in your `settings.json` file. - **`general.plan.directory`** (string): - **Description:** The directory where planning artifacts are stored. If not - specified, defaults to the system temporary directory. + specified, defaults to the system temporary directory. A custom directory + requires a policy to allow write access in Plan Mode. - **Default:** `undefined` - **Requires restart:** Yes @@ -293,6 +296,11 @@ their corresponding top-level category object in your `settings.json` file. - **Description:** Hide the footer from the UI - **Default:** `false` +- **`ui.collapseDrawerDuringApproval`** (boolean): + - **Description:** Whether to collapse the UI drawer when a tool is awaiting + confirmation. + - **Default:** `true` + - **`ui.showMemoryUsage`** (boolean): - **Description:** Display memory usage information in the UI - **Default:** `false` @@ -638,6 +646,11 @@ their corresponding top-level category object in your `settings.json` file. "model": "gemini-3-flash-preview" } }, + "chat-compression-3.1-flash-lite": { + "modelConfig": { + "model": "gemini-3.1-flash-lite-preview" + } + }, "chat-compression-2.5-pro": { "modelConfig": { "model": "gemini-2.5-pro" @@ -684,6 +697,16 @@ their corresponding top-level category object in your `settings.json` file. ```json { + "gemini-3.1-flash-lite-preview": { + "tier": "flash-lite", + "family": "gemini-3", + "isPreview": true, + "isVisible": true, + "features": { + "thinking": false, + "multimodalToolUse": true + } + }, "gemini-3.1-pro-preview": { "tier": "pro", "family": "gemini-3", @@ -795,7 +818,7 @@ their corresponding top-level category object in your `settings.json` file. "tier": "auto", "isPreview": true, "isVisible": true, - "dialogDescription": "Let Gemini CLI decide the best model for the task: gemini-3.1-pro, gemini-3-flash", + "dialogDescription": "Let Gemini CLI decide the best model for the task: gemini-3-pro, gemini-3-flash", "features": { "thinking": true, "multimodalToolUse": false @@ -824,6 +847,45 @@ their corresponding top-level category object in your `settings.json` file. ```json { + "gemini-3.1-pro-preview": { + "default": "gemini-3.1-pro-preview", + "contexts": [ + { + "condition": { + "hasAccessToPreview": false + }, + "target": "gemini-2.5-pro" + }, + { + "condition": { + "useCustomTools": true + }, + "target": "gemini-3.1-pro-preview-customtools" + } + ] + }, + "gemini-3.1-pro-preview-customtools": { + "default": "gemini-3.1-pro-preview-customtools", + "contexts": [ + { + "condition": { + "hasAccessToPreview": false + }, + "target": "gemini-2.5-pro" + } + ] + }, + "gemini-3-flash-preview": { + "default": "gemini-3-flash-preview", + "contexts": [ + { + "condition": { + "hasAccessToPreview": false + }, + "target": "gemini-2.5-flash" + } + ] + }, "gemini-3-pro-preview": { "default": "gemini-3-pro-preview", "contexts": [ @@ -923,6 +985,17 @@ their corresponding top-level category object in your `settings.json` file. "auto-gemini-2.5": { "default": "gemini-2.5-pro" }, + "gemini-3.1-flash-lite-preview": { + "default": "gemini-3.1-flash-lite-preview", + "contexts": [ + { + "condition": { + "useGemini3_1FlashLite": false + }, + "target": "gemini-2.5-flash-lite" + } + ] + }, "flash": { "default": "gemini-3-flash-preview", "contexts": [ @@ -935,7 +1008,15 @@ their corresponding top-level category object in your `settings.json` file. ] }, "flash-lite": { - "default": "gemini-2.5-flash-lite" + "default": "gemini-2.5-flash-lite", + "contexts": [ + { + "condition": { + "useGemini3_1FlashLite": true + }, + "target": "gemini-3.1-flash-lite-preview" + } + ] } } ``` @@ -995,6 +1076,132 @@ their corresponding top-level category object in your `settings.json` file. - **Requires restart:** Yes +- **`modelConfigs.modelChains`** (object): + - **Description:** Availability policy chains defining fallback behavior for + models. + - **Default:** + + ```json + { + "preview": [ + { + "model": "gemini-3-pro-preview", + "actions": { + "terminal": "prompt", + "transient": "prompt", + "not_found": "prompt", + "unknown": "prompt" + }, + "stateTransitions": { + "terminal": "terminal", + "transient": "terminal", + "not_found": "terminal", + "unknown": "terminal" + } + }, + { + "model": "gemini-3-flash-preview", + "isLastResort": true, + "actions": { + "terminal": "prompt", + "transient": "prompt", + "not_found": "prompt", + "unknown": "prompt" + }, + "stateTransitions": { + "terminal": "terminal", + "transient": "terminal", + "not_found": "terminal", + "unknown": "terminal" + } + } + ], + "default": [ + { + "model": "gemini-2.5-pro", + "actions": { + "terminal": "prompt", + "transient": "prompt", + "not_found": "prompt", + "unknown": "prompt" + }, + "stateTransitions": { + "terminal": "terminal", + "transient": "terminal", + "not_found": "terminal", + "unknown": "terminal" + } + }, + { + "model": "gemini-2.5-flash", + "isLastResort": true, + "actions": { + "terminal": "prompt", + "transient": "prompt", + "not_found": "prompt", + "unknown": "prompt" + }, + "stateTransitions": { + "terminal": "terminal", + "transient": "terminal", + "not_found": "terminal", + "unknown": "terminal" + } + } + ], + "lite": [ + { + "model": "gemini-2.5-flash-lite", + "actions": { + "terminal": "silent", + "transient": "silent", + "not_found": "silent", + "unknown": "silent" + }, + "stateTransitions": { + "terminal": "terminal", + "transient": "terminal", + "not_found": "terminal", + "unknown": "terminal" + } + }, + { + "model": "gemini-2.5-flash", + "actions": { + "terminal": "silent", + "transient": "silent", + "not_found": "silent", + "unknown": "silent" + }, + "stateTransitions": { + "terminal": "terminal", + "transient": "terminal", + "not_found": "terminal", + "unknown": "terminal" + } + }, + { + "model": "gemini-2.5-pro", + "isLastResort": true, + "actions": { + "terminal": "silent", + "transient": "silent", + "not_found": "silent", + "unknown": "silent" + }, + "stateTransitions": { + "terminal": "terminal", + "transient": "terminal", + "not_found": "terminal", + "unknown": "terminal" + } + } + ] + } + ``` + + - **Requires restart:** Yes + #### `agents` - **`agents.overrides`** (object): @@ -1039,6 +1246,22 @@ their corresponding top-level category object in your `settings.json` file. - **Description:** Disable user input on browser window during automation. - **Default:** `true` +- **`agents.browser.maxActionsPerTask`** (number): + - **Description:** The maximum number of tool calls allowed per browser task. + Enforcement is hard: the agent will be terminated when the limit is reached. + - **Default:** `100` + +- **`agents.browser.confirmSensitiveActions`** (boolean): + - **Description:** Require manual confirmation for sensitive browser actions + (e.g., fill_form, evaluate_script). + - **Default:** `false` + - **Requires restart:** Yes + +- **`agents.browser.blockFileUploads`** (boolean): + - **Description:** Hard-block file upload requests from the browser agent. + - **Default:** `false` + - **Requires restart:** Yes + #### `context` - **`context.fileName`** (string | string[]): @@ -1105,10 +1328,21 @@ their corresponding top-level category object in your `settings.json` file. - **Description:** Legacy full-process sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, or specify an explicit sandbox command (e.g., "docker", "podman", - "lxc"). + "lxc", "windows-native"). - **Default:** `undefined` - **Requires restart:** Yes +- **`tools.sandboxAllowedPaths`** (array): + - **Description:** List of additional paths that the sandbox is allowed to + access. + - **Default:** `[]` + - **Requires restart:** Yes + +- **`tools.sandboxNetworkAccess`** (boolean): + - **Description:** Whether the sandbox is allowed to access the network. + - **Default:** `false` + - **Requires restart:** Yes + - **`tools.shell.enableInteractiveShell`** (boolean): - **Description:** Use node-pty for an interactive shell experience. Fallback to child_process still applies. @@ -1345,6 +1579,11 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `true` - **Requires restart:** Yes +- **`experimental.worktrees`** (boolean): + - **Description:** Enable automated Git worktree management for parallel work. + - **Default:** `false` + - **Requires restart:** Yes + - **`experimental.extensionManagement`** (boolean): - **Description:** Enable extension management features. - **Default:** `true` @@ -1431,6 +1670,13 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `"gemma3-1b-gpu-custom"` - **Requires restart:** Yes +- **`experimental.memoryManager`** (boolean): + - **Description:** Replace the built-in save_memory tool with a memory manager + subagent that supports adding, removing, de-duplicating, and organizing + memories. + - **Default:** `false` + - **Requires restart:** Yes + - **`experimental.topicUpdateNarration`** (boolean): - **Description:** Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. @@ -1539,7 +1785,11 @@ their corresponding top-level category object in your `settings.json` file. - **Default:** `true` - **`admin.mcp.config`** (object): - - **Description:** Admin-configured MCP servers. + - **Description:** Admin-configured MCP servers (allowlist). + - **Default:** `{}` + +- **`admin.mcp.requiredConfig`** (object): + - **Description:** Admin-required MCP servers that are always injected. - **Default:** `{}` - **`admin.skills.enabled`** (boolean): @@ -1559,7 +1809,9 @@ for compatibility. At least one of `command`, `url`, or `httpUrl` must be provided. If multiple are specified, the order of precedence is `httpUrl`, then `url`, then `command`. -> **Warning:** Avoid using underscores (`_`) in your server aliases (e.g., use + +> [!WARNING] +> Avoid using underscores (`_`) in your server aliases (e.g., use > `my-server` instead of `my_server`). The underlying policy engine parses Fully > Qualified Names (`mcp_server_tool`) using the first underscore after the > `mcp_` prefix. An underscore in your server alias will cause the parser to @@ -1908,37 +2160,14 @@ You can customize this behavior in your `settings.json` file: Arguments passed directly when running the CLI can override other configurations for that specific session. -- **`--model `** (**`-m `**): - - Specifies the Gemini model to use for this session. - - Example: `npm start -- --model gemini-3-pro-preview` -- **`--prompt `** (**`-p `**): - - **Deprecated:** Use positional arguments instead. - - Used to pass a prompt directly to the command. This invokes Gemini CLI in a - non-interactive mode. -- **`--prompt-interactive `** (**`-i `**): - - Starts an interactive session with the provided prompt as the initial input. - - The prompt is processed within the interactive session, not before it. - - Cannot be used when piping input from stdin. - - Example: `gemini -i "explain this code"` -- **`--output-format `**: - - **Description:** Specifies the format of the CLI output for non-interactive - mode. - - **Values:** - - `text`: (Default) The standard human-readable output. - - `json`: A machine-readable JSON output. - - `stream-json`: A streaming JSON output that emits real-time events. - - **Note:** For structured output and scripting, use the - `--output-format json` or `--output-format stream-json` flag. -- **`--sandbox`** (**`-s`**): - - Enables sandbox mode for this session. -- **`--debug`** (**`-d`**): - - Enables debug mode for this session, providing more verbose output. Open the - debug console with F12 to see the additional logging. - -- **`--help`** (or **`-h`**): - - Displays help information about command-line arguments. -- **`--yolo`**: - - Enables YOLO mode, which automatically approves all tool calls. +- **`--acp`**: + - Starts the agent in Agent Communication Protocol (ACP) mode. +- **`--allowed-mcp-server-names`**: + - A comma-separated list of MCP server names to allow for the session. +- **`--allowed-tools `**: + - A comma-separated list of tool names that will bypass the confirmation + dialog. + - Example: `gemini --allowed-tools "ShellTool(git status)"` - **`--approval-mode `**: - Sets the approval mode for tool calls. Available modes: - `default`: Prompt for approval on each tool call (default behavior) @@ -1952,35 +2181,24 @@ for that specific session. - Cannot be used together with `--yolo`. Use `--approval-mode=yolo` instead of `--yolo` for the new unified approach. - Example: `gemini --approval-mode auto_edit` -- **`--allowed-tools `**: - - A comma-separated list of tool names that will bypass the confirmation - dialog. - - Example: `gemini --allowed-tools "ShellTool(git status)"` -- **`--extensions `** (**`-e `**): - - Specifies a list of extensions to use for the session. If not provided, all - available extensions are used. - - Use the special term `gemini -e none` to disable all extensions. - - Example: `gemini -e my-extension -e my-other-extension` -- **`--list-extensions`** (**`-l`**): - - Lists all available extensions and exits. -- **`--resume [session_id]`** (**`-r [session_id]`**): - - Resume a previous chat session. Use "latest" for the most recent session, - provide a session index number, or provide a full session UUID. - - If no session_id is provided, defaults to "latest". - - Example: `gemini --resume 5` or `gemini --resume latest` or - `gemini --resume a1b2c3d4-e5f6-7890-abcd-ef1234567890` or `gemini --resume` - - See [Session Management](../cli/session-management.md) for more details. -- **`--list-sessions`**: - - List all available chat sessions for the current project and exit. - - Shows session indices, dates, message counts, and preview of first user - message. - - Example: `gemini --list-sessions` +- **`--debug`** (**`-d`**): + - Enables debug mode for this session, providing more verbose output. Open the + debug console with F12 to see the additional logging. - **`--delete-session `**: - Delete a specific chat session by its index number or full session UUID. - Use `--list-sessions` first to see available sessions, their indices, and UUIDs. - Example: `gemini --delete-session 3` or `gemini --delete-session a1b2c3d4-e5f6-7890-abcd-ef1234567890` +- **`--extensions `** (**`-e `**): + - Specifies a list of extensions to use for the session. If not provided, all + available extensions are used. + - Use the special term `gemini -e none` to disable all extensions. + - Example: `gemini -e my-extension -e my-other-extension` +- **`--fake-responses`**: + - Path to a file with fake model responses for testing. +- **`--help`** (or **`-h`**): + - Displays help information about command-line arguments. - **`--include-directories `**: - Includes additional directories in the workspace for multi-directory support. @@ -1988,19 +2206,52 @@ for that specific session. - 5 directories can be added at maximum. - Example: `--include-directories /path/to/project1,/path/to/project2` or `--include-directories /path/to/project1 --include-directories /path/to/project2` +- **`--list-extensions`** (**`-l`**): + - Lists all available extensions and exits. +- **`--list-sessions`**: + - List all available chat sessions for the current project and exit. + - Shows session indices, dates, message counts, and preview of first user + message. + - Example: `gemini --list-sessions` +- **`--model `** (**`-m `**): + - Specifies the Gemini model to use for this session. + - Example: `npm start -- --model gemini-3-pro-preview` +- **`--output-format `**: + - **Description:** Specifies the format of the CLI output for non-interactive + mode. + - **Values:** + - `text`: (Default) The standard human-readable output. + - `json`: A machine-readable JSON output. + - `stream-json`: A streaming JSON output that emits real-time events. + - **Note:** For structured output and scripting, use the + `--output-format json` or `--output-format stream-json` flag. +- **`--prompt `** (**`-p `**): + - **Deprecated:** Use positional arguments instead. + - Used to pass a prompt directly to the command. This invokes Gemini CLI in a + non-interactive mode. +- **`--prompt-interactive `** (**`-i `**): + - Starts an interactive session with the provided prompt as the initial input. + - The prompt is processed within the interactive session, not before it. + - Cannot be used when piping input from stdin. + - Example: `gemini -i "explain this code"` +- **`--record-responses`**: + - Path to a file to record model responses for testing. +- **`--resume [session_id]`** (**`-r [session_id]`**): + - Resume a previous chat session. Use "latest" for the most recent session, + provide a session index number, or provide a full session UUID. + - If no session_id is provided, defaults to "latest". + - Example: `gemini --resume 5` or `gemini --resume latest` or + `gemini --resume a1b2c3d4-e5f6-7890-abcd-ef1234567890` or `gemini --resume` + - See [Session Management](../cli/session-management.md) for more details. +- **`--sandbox`** (**`-s`**): + - Enables sandbox mode for this session. - **`--screen-reader`**: - Enables screen reader mode, which adjusts the TUI for better compatibility with screen readers. - **`--version`**: - Displays the version of the CLI. -- **`--experimental-acp`**: - - Starts the agent in ACP mode. -- **`--allowed-mcp-server-names`**: - - Allowed MCP server names. -- **`--fake-responses`**: - - Path to a file with fake model responses for testing. -- **`--record-responses`**: - - Path to a file to record model responses for testing. +- **`--yolo`**: + - Enables YOLO mode, which automatically approves all tool calls. ## Context files (hierarchical instructional context) @@ -2125,9 +2376,13 @@ can be based on the base sandbox image: ```dockerfile FROM gemini-cli-sandbox -# Add your custom dependencies or configurations here +# Add your custom dependencies or configurations here. +# Note: The base image runs as the non-root 'node' user. +# You must switch to 'root' to install system packages. # For example: +# USER root # RUN apt-get update && apt-get install -y some-package +# USER node # COPY ./my-config /app/my-config ``` diff --git a/docs/reference/policy-engine.md b/docs/reference/policy-engine.md index fb97b5e071..c9fc482ea7 100644 --- a/docs/reference/policy-engine.md +++ b/docs/reference/policy-engine.md @@ -113,7 +113,9 @@ There are three possible decisions a rule can enforce: - `ask_user`: The user is prompted to approve or deny the tool call. (In non-interactive mode, this is treated as `deny`.) -> **Note:** The `deny` decision is the recommended way to exclude tools. The + +> [!NOTE] +> The `deny` decision is the recommended way to exclude tools. The > legacy `tools.exclude` setting in `settings.json` is deprecated in favor of > policy rules with a `deny` decision. @@ -239,15 +241,17 @@ directory are **ignored**. - **Linux / macOS:** Must be owned by `root` (UID 0) and NOT writable by group or others (e.g., `chmod 755`). - **Windows:** Must be in `C:\ProgramData`. Standard users (`Users`, `Everyone`) - must NOT have `Write`, `Modify`, or `Full Control` permissions. _Tip: If you - see a security warning, use the folder properties to remove write permissions - for non-admin groups. You may need to "Disable inheritance" in Advanced - Security Settings._ + must NOT have `Write`, `Modify`, or `Full Control` permissions. If you see a + security warning, use the folder properties to remove write permissions for + non-admin groups. You may need to "Disable inheritance" in Advanced Security + Settings. -**Note:** Supplemental admin policies (provided via `--admin-policy` or -`adminPolicyPaths` settings) are **NOT** subject to these strict ownership -checks, as they are explicitly provided by the user or administrator in their -current execution context. + +> [!NOTE] +> Supplemental admin policies (provided via `--admin-policy` or +> `adminPolicyPaths` settings) are **NOT** subject to these strict ownership +> checks, as they are explicitly provided by the user or administrator in their +> current execution context. ### TOML rule schema @@ -258,8 +262,8 @@ Here is a breakdown of the fields available in a TOML policy rule: # A unique name for the tool, or an array of names. toolName = "run_shell_command" -# (Optional) The name of a subagent. If provided, the rule only applies to tool calls -# made by this specific subagent. +# (Optional) The name of a subagent. If provided, the rule only applies to tool +# calls made by this specific subagent. subagent = "generalist" # (Optional) The name of an MCP server. Can be combined with toolName @@ -274,14 +278,17 @@ toolAnnotations = { readOnlyHint = true } argsPattern = '"command":"(git|npm)' # (Optional) A string or array of strings that a shell command must start with. -# This is syntactic sugar for `toolName = "run_shell_command"` and an `argsPattern`. +# This is syntactic sugar for `toolName = "run_shell_command"` and an +# `argsPattern`. commandPrefix = "git" # (Optional) A regex to match against the entire shell command. # This is also syntactic sugar for `toolName = "run_shell_command"`. -# Note: This pattern is tested against the JSON representation of the arguments (e.g., `{"command":""}`). -# Because it prepends `"command":"`, it effectively matches from the start of the command. -# Anchors like `^` or `$` apply to the full JSON string, so `^` should usually be avoided here. +# Note: This pattern is tested against the JSON representation of the arguments +# (e.g., `{"command":""}`). Because it prepends `"command":"`, +# it effectively matches from the start of the command. +# Anchors like `^` or `$` apply to the full JSON string, +# so `^` should usually be avoided here. # You cannot use commandPrefix and commandRegex in the same rule. commandRegex = "git (commit|push)" @@ -291,16 +298,26 @@ decision = "ask_user" # The priority of the rule, from 0 to 999. priority = 10 -# (Optional) A custom message to display when a tool call is denied by this rule. -# This message is returned to the model and user, useful for explaining *why* it was denied. -deny_message = "Deletion is permanent" +# (Optional) A custom message to display when a tool call is denied by this +# rule. This message is returned to the model and user, +# useful for explaining *why* it was denied. +denyMessage = "Deletion is permanent" # (Optional) An array of approval modes where this rule is active. modes = ["autoEdit"] -# (Optional) A boolean to restrict the rule to interactive (true) or non-interactive (false) environments. +# (Optional) A boolean to restrict the rule to interactive (true) or +# non-interactive (false) environments. # If omitted, the rule applies to both. interactive = true + +# (Optional) If true, lets shell commands use redirection operators +# (>, >>, <, <<, <<<). By default, the policy engine asks for confirmation +# when redirection is detected, even if a rule matches the command. +# This permission is granular; it only applies to the specific rule it's +# defined in. In chained commands (e.g., cmd1 > file && cmd2), each +# individual command rule must permit redirection if it's used. +allowRedirection = true ``` ### Using arrays (lists) @@ -348,7 +365,9 @@ using the `mcpName` field. **This is the recommended approach** for defining MCP policies, as it is much more robust than manually writing Fully Qualified Names (FQNs) or string wildcards. -> **Warning:** Do not use underscores (`_`) in your MCP server names (e.g., use + +> [!WARNING] +> Do not use underscores (`_`) in your MCP server names (e.g., use > `my-server` rather than `my_server`). The policy parser splits Fully Qualified > Names (`mcp_server_tool`) on the _first_ underscore following the `mcp_` > prefix. If your server name contains an underscore, the parser will @@ -383,7 +402,7 @@ server. mcpName = "untrusted-server" decision = "deny" priority = 500 -deny_message = "This server is not trusted by the admin." +denyMessage = "This server is not trusted by the admin." ``` **3. Targeting all MCP servers** @@ -394,6 +413,7 @@ registered MCP server. This is useful for setting category-wide defaults. ```toml # Ask user for any tool call from any MCP server [[rule]] +toolName = "*" mcpName = "*" decision = "ask_user" priority = 10 diff --git a/docs/reference/tools.md b/docs/reference/tools.md index e1a0958866..09f0518c07 100644 --- a/docs/reference/tools.md +++ b/docs/reference/tools.md @@ -63,29 +63,62 @@ details. ## Available tools -The following table lists all available tools, categorized by their primary -function. +The following sections list all available tools, categorized by their primary +function. For detailed parameter information, see the linked documentation for +each tool. -| Category | Tool | Kind | Description | -| :---------- | :----------------------------------------------- | :------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| Execution | [`run_shell_command`](../tools/shell.md) | `Execute` | Executes arbitrary shell commands. Supports interactive sessions and background processes. Requires manual confirmation.

**Parameters:** `command`, `description`, `dir_path`, `is_background` | -| File System | [`glob`](../tools/file-system.md) | `Search` | Finds files matching specific glob patterns across the workspace.

**Parameters:** `pattern`, `dir_path`, `case_sensitive`, `respect_git_ignore`, `respect_gemini_ignore` | -| File System | [`grep_search`](../tools/file-system.md) | `Search` | Searches for a regular expression pattern within file contents. Legacy alias: `search_file_content`.

**Parameters:** `pattern`, `dir_path`, `include`, `exclude_pattern`, `names_only`, `max_matches_per_file`, `total_max_matches` | -| File System | [`list_directory`](../tools/file-system.md) | `Read` | Lists the names of files and subdirectories within a specified path.

**Parameters:** `dir_path`, `ignore`, `file_filtering_options` | -| File System | [`read_file`](../tools/file-system.md) | `Read` | Reads the content of a specific file. Supports text, images, audio, and PDF.

**Parameters:** `file_path`, `start_line`, `end_line` | -| File System | [`read_many_files`](../tools/file-system.md) | `Read` | Reads and concatenates content from multiple files. Often triggered by the `@` symbol in your prompt.

**Parameters:** `include`, `exclude`, `recursive`, `useDefaultExcludes`, `file_filtering_options` | -| File System | [`replace`](../tools/file-system.md) | `Edit` | Performs precise text replacement within a file. Requires manual confirmation.

**Parameters:** `file_path`, `instruction`, `old_string`, `new_string`, `allow_multiple` | -| File System | [`write_file`](../tools/file-system.md) | `Edit` | Creates or overwrites a file with new content. Requires manual confirmation.

**Parameters:** `file_path`, `content` | -| Interaction | [`ask_user`](../tools/ask-user.md) | `Communicate` | Requests clarification or missing information via an interactive dialog.

**Parameters:** `questions` | -| Interaction | [`write_todos`](../tools/todos.md) | `Other` | Maintains an internal list of subtasks. The model uses this to track its own progress and display it to you.

**Parameters:** `todos` | -| Memory | [`activate_skill`](../tools/activate-skill.md) | `Other` | Loads specialized procedural expertise for specific tasks from the `.gemini/skills` directory.

**Parameters:** `name` | -| Memory | [`get_internal_docs`](../tools/internal-docs.md) | `Think` | Accesses Gemini CLI's own documentation to provide more accurate answers about its capabilities.

**Parameters:** `path` | -| Memory | [`save_memory`](../tools/memory.md) | `Think` | Persists specific facts and project details to your `GEMINI.md` file to retain context.

**Parameters:** `fact` | -| Planning | [`enter_plan_mode`](../tools/planning.md) | `Plan` | Switches the CLI to a safe, read-only "Plan Mode" for researching complex changes.

**Parameters:** `reason` | -| Planning | [`exit_plan_mode`](../tools/planning.md) | `Plan` | Finalizes a plan, presents it for review, and requests approval to start implementation.

**Parameters:** `plan` | -| System | `complete_task` | `Other` | Finalizes a subagent's mission and returns the result to the parent agent. This tool is not available to the user.

**Parameters:** `result` | -| Web | [`google_web_search`](../tools/web-search.md) | `Search` | Performs a Google Search to find up-to-date information.

**Parameters:** `query` | -| Web | [`web_fetch`](../tools/web-fetch.md) | `Fetch` | Retrieves and processes content from specific URLs. **Warning:** This tool can access local and private network addresses (e.g., localhost), which may pose a security risk if used with untrusted prompts.

**Parameters:** `prompt` | +### Execution + +| Tool | Kind | Description | +| :--------------------------------------- | :-------- | :----------------------------------------------------------------------------------------------------------------------- | +| [`run_shell_command`](../tools/shell.md) | `Execute` | Executes arbitrary shell commands. Supports interactive sessions and background processes. Requires manual confirmation. | + +### File System + +| Tool | Kind | Description | +| :------------------------------------------- | :------- | :---------------------------------------------------------------------------------------------------- | +| [`glob`](../tools/file-system.md) | `Search` | Finds files matching specific glob patterns across the workspace. | +| [`grep_search`](../tools/file-system.md) | `Search` | Searches for a regular expression pattern within file contents. Legacy alias: `search_file_content`. | +| [`list_directory`](../tools/file-system.md) | `Read` | Lists the names of files and subdirectories within a specified path. | +| [`read_file`](../tools/file-system.md) | `Read` | Reads the content of a specific file. Supports text, images, audio, and PDF. | +| [`read_many_files`](../tools/file-system.md) | `Read` | Reads and concatenates content from multiple files. Often triggered by the `@` symbol in your prompt. | +| [`replace`](../tools/file-system.md) | `Edit` | Performs precise text replacement within a file. Requires manual confirmation. | +| [`write_file`](../tools/file-system.md) | `Edit` | Creates or overwrites a file with new content. Requires manual confirmation. | + +### Interaction + +| Tool | Kind | Description | +| :--------------------------------- | :------------ | :------------------------------------------------------------------------------------- | +| [`ask_user`](../tools/ask-user.md) | `Communicate` | Requests clarification or missing information via an interactive dialog. | +| [`write_todos`](../tools/todos.md) | `Other` | Maintains an internal list of subtasks. The model uses this to track its own progress. | + +### Memory + +| Tool | Kind | Description | +| :----------------------------------------------- | :------ | :----------------------------------------------------------------------------------- | +| [`activate_skill`](../tools/activate-skill.md) | `Other` | Loads specialized procedural expertise from the `.gemini/skills` directory. | +| [`get_internal_docs`](../tools/internal-docs.md) | `Think` | Accesses Gemini CLI's own documentation for accurate answers about its capabilities. | +| [`save_memory`](../tools/memory.md) | `Think` | Persists specific facts and project details to your `GEMINI.md` file. | + +### Planning + +| Tool | Kind | Description | +| :---------------------------------------- | :----- | :--------------------------------------------------------------------------------------- | +| [`enter_plan_mode`](../tools/planning.md) | `Plan` | Switches the CLI to a safe, read-only "Plan Mode" for researching complex changes. | +| [`exit_plan_mode`](../tools/planning.md) | `Plan` | Finalizes a plan, presents it for review, and requests approval to start implementation. | + +### System + +| Tool | Kind | Description | +| :-------------- | :------ | :----------------------------------------------------------------------------------------------------------------- | +| `complete_task` | `Other` | Finalizes a subagent's mission and returns the result to the parent agent. This tool is not available to the user. | + +### Web + +| Tool | Kind | Description | +| :-------------------------------------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [`google_web_search`](../tools/web-search.md) | `Search` | Performs a Google Search to find up-to-date information. | +| [`web_fetch`](../tools/web-fetch.md) | `Fetch` | Retrieves and processes content from specific URLs. **Warning:** This tool can access local and private network addresses (e.g., localhost), which may pose a security risk if used with untrusted prompts. | ## Under the hood @@ -95,7 +128,9 @@ For developers, the tool system is designed to be extensible and robust. The You can extend Gemini CLI with custom tools by configuring `tools.discoveryCommand` in your settings or by connecting to MCP servers. -> **Note:** For a deep dive into the internal Tool API and how to implement your + +> [!NOTE] +> For a deep dive into the internal Tool API and how to implement your > own tools in the codebase, see the `packages/core/src/tools/` directory in > GitHub. diff --git a/docs/release-confidence.md b/docs/release-confidence.md index 536e49772c..c46a702820 100644 --- a/docs/release-confidence.md +++ b/docs/release-confidence.md @@ -21,9 +21,13 @@ All workflows in `.github/workflows/ci.yml` must pass on the `main` branch (for nightly) or the release branch (for preview/stable). - **Platforms:** Tests must pass on **Linux and macOS**. - - _Note:_ Windows tests currently run with `continue-on-error: true`. While a - failure here doesn't block the release technically, it should be - investigated. + + +> [!NOTE] +> Windows tests currently run with `continue-on-error: true`. While a +> failure here doesn't block the release technically, it should be +> investigated. + - **Checks:** - **Linting:** No linting errors (ESLint, Prettier, etc.). - **Typechecking:** No TypeScript errors. diff --git a/docs/releases.md b/docs/releases.md index 8b506d45a8..23fb9fcf90 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -234,10 +234,12 @@ This workflow will automatically: Review the automatically created pull request(s) to ensure the cherry-pick was successful and the changes are correct. Once approved, merge the pull request. -**Security note:** The `release/*` branches are protected by branch protection -rules. A pull request to one of these branches requires at least one review from -a code owner before it can be merged. This ensures that no unauthorized code is -released. + +> [!WARNING] +> The `release/*` branches are protected by branch protection +> rules. A pull request to one of these branches requires at least one review from +> a code owner before it can be merged. This ensures that no unauthorized code is +> released. #### 2.5. Adding multiple commits to a hotfix (advanced) @@ -524,9 +526,11 @@ Notifications use [GitHub for Google Chat](https://workspace.google.com/marketplace/app/github_for_google_chat/536184076190). To modify the notifications, use `/github-settings` within the chat space. -> [!WARNING] The following instructions describe a fragile workaround that -> depends on the internal structure of the chat application's UI. It is likely -> to break with future updates. + +> [!WARNING] +> The following instructions describe a fragile workaround that depends on the +> internal structure of the chat application's UI. It is likely to break with +> future updates. The list of available labels is not currently populated correctly. If you want to add a label that does not appear alphabetically in the first 30 labels in the diff --git a/docs/resources/quota-and-pricing.md b/docs/resources/quota-and-pricing.md index 16d6b407b8..18beb7c761 100644 --- a/docs/resources/quota-and-pricing.md +++ b/docs/resources/quota-and-pricing.md @@ -12,6 +12,21 @@ quota for your needs, see the [Plans page](https://geminicli.com/plans/). This article outlines the specific quotas and pricing applicable to Gemini CLI when using different authentication methods. +The following table summarizes the available quotas and their respective limits: + +| Authentication method | Tier / Subscription | Maximum requests per user per day | +| :-------------------- | :------------------------------ | :-------------------------------- | +| **Google account** | Gemini Code Assist (Individual) | 1,000 requests | +| | Google AI Pro | 1,500 requests | +| | Google AI Ultra | 2,000 requests | +| **Gemini API key** | Free tier (Unpaid) | 250 requests | +| | Pay-as-you-go (Paid) | Varies | +| **Vertex AI** | Express mode (Free) | Varies | +| | Pay-as-you-go (Paid) | Varies | +| **Google Workspace** | Code Assist Standard | 1,500 requests | +| | Code Assist Enterprise | 2,000 requests | +| | Workspace AI Ultra | 2,000 requests | + Generally, there are three categories to choose from: - Free Usage: Ideal for experimentation and light use. @@ -20,6 +35,9 @@ Generally, there are three categories to choose from: - Pay-As-You-Go: The most flexible option for professional use, long-running tasks, or when you need full control over your usage. +Requests are limited per user per minute and are subject to the availability of +the service in times of high demand. + ## Free usage Access to Gemini CLI begins with a generous free tier, perfect for @@ -33,8 +51,7 @@ authorization type. For users who authenticate by using their Google account to access Gemini Code Assist for individuals. This includes: -- 1000 model requests / user / day -- 60 model requests / user / minute +- 1000 maximum model requests / user / day - Model requests will be made across the Gemini model family as determined by Gemini CLI. @@ -46,8 +63,7 @@ Learn more at If you are using a Gemini API key, you can also benefit from a free tier. This includes: -- 250 model requests / user / day -- 10 model requests / user / minute +- 250 maximum model requests / user / day - Model requests to Flash model only. Learn more at @@ -59,7 +75,7 @@ Vertex AI offers an Express Mode without the need to enable billing. This includes: - 90 days before you need to enable billing. -- Quotas and models are variable and specific to your account. +- Quotas and models are specific to your account and their limits vary. Learn more at [Vertex AI Express Mode Limits](https://cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/overview#quotas). @@ -112,11 +128,9 @@ Standard/Plus and AI Expanded, are not supported._ This includes the following request limits: - Gemini Code Assist Standard edition: - - 1500 model requests / user / day - - 120 model requests / user / minute + - 1500 maximum model requests / user / day - Gemini Code Assist Enterprise edition: - - 2000 model requests / user / day - - 120 model requests / user / minute + - 2000 maximum model requests / user / day - Model requests will be made across the Gemini model family as determined by Gemini CLI. diff --git a/docs/resources/tos-privacy.md b/docs/resources/tos-privacy.md index 00de950e74..2aaa14cb90 100644 --- a/docs/resources/tos-privacy.md +++ b/docs/resources/tos-privacy.md @@ -16,8 +16,10 @@ account. Your Gemini CLI Usage Statistics are handled in accordance with Google's Privacy Policy. -**Note:** See [quotas and pricing](quota-and-pricing.md) for the quota and -pricing details that apply to your usage of the Gemini CLI. + +> [!NOTE] +> See [quotas and pricing](quota-and-pricing.md) for the quota and +> pricing details that apply to your usage of the Gemini CLI. ## Supported authentication methods diff --git a/docs/resources/troubleshooting.md b/docs/resources/troubleshooting.md index 53b0262d36..f490d41ffe 100644 --- a/docs/resources/troubleshooting.md +++ b/docs/resources/troubleshooting.md @@ -187,5 +187,7 @@ guide_, consider searching the Gemini CLI If you can't find an issue similar to yours, consider creating a new GitHub Issue with a detailed description. Pull requests are also welcome! -> **Note:** Issues tagged as "🔒Maintainers only" are reserved for project + +> [!NOTE] +> Issues tagged as "🔒Maintainers only" are reserved for project > maintainers. We will not accept pull requests related to these issues. diff --git a/docs/sidebar.json b/docs/sidebar.json index 6cac5ec9fd..ea82a64481 100644 --- a/docs/sidebar.json +++ b/docs/sidebar.json @@ -12,7 +12,6 @@ "label": "Authentication", "slug": "docs/get-started/authentication" }, - { "label": "Examples", "slug": "docs/get-started/examples" }, { "label": "CLI cheatsheet", "slug": "docs/cli/cli-reference" }, { "label": "Gemini 3 on Gemini CLI", @@ -99,6 +98,11 @@ { "label": "Agent Skills", "slug": "docs/cli/skills" }, { "label": "Checkpointing", "slug": "docs/cli/checkpointing" }, { "label": "Headless mode", "slug": "docs/cli/headless" }, + { + "label": "Git worktrees", + "badge": "🔬", + "slug": "docs/cli/git-worktrees" + }, { "label": "Hooks", "collapsed": true, @@ -107,7 +111,17 @@ { "label": "Reference", "slug": "docs/hooks/reference" } ] }, - { "label": "IDE integration", "slug": "docs/ide-integration" }, + { + "label": "IDE integration", + "collapsed": true, + "items": [ + { "label": "Overview", "slug": "docs/ide-integration" }, + { + "label": "Developer guide: ACP mode", + "slug": "docs/cli/acp-mode" + } + ] + }, { "label": "MCP servers", "slug": "docs/tools/mcp-server" }, { "label": "Model routing", "slug": "docs/cli/model-routing" }, { "label": "Model selection", "slug": "docs/cli/model" }, diff --git a/docs/tools/mcp-server.md b/docs/tools/mcp-server.md index 5cdbbacf1c..9fc84d54c0 100644 --- a/docs/tools/mcp-server.md +++ b/docs/tools/mcp-server.md @@ -176,8 +176,8 @@ Each server configuration supports the following properties: enabled by default. - **`excludeTools`** (string[]): List of tool names to exclude from this MCP server. Tools listed here will not be available to the model, even if they are - exposed by the server. **Note:** `excludeTools` takes precedence over - `includeTools` - if a tool is in both lists, it will be excluded. + exposed by the server. `excludeTools` takes precedence over `includeTools`. If + a tool is in both lists, it will be excluded. - **`targetAudience`** (string): The OAuth Client ID allowlisted on the IAP-protected application you are trying to access. Used with `authProviderType: 'service_account_impersonation'`. @@ -238,7 +238,9 @@ This follows the security principle that if a variable is explicitly configured by the user for a specific server, it constitutes informed consent to share that specific data with that server. -> **Note:** Even when explicitly defined, you should avoid hardcoding secrets. + +> [!NOTE] +> Even when explicitly defined, you should avoid hardcoding secrets. > Instead, use environment variable expansion (e.g., `"MY_KEY": "$MY_KEY"`) to > securely pull the value from your host environment at runtime. @@ -283,10 +285,12 @@ When connecting to an OAuth-enabled server: #### Browser redirect requirements -**Important:** OAuth authentication requires that your local machine can: - -- Open a web browser for authentication -- Receive redirects on `http://localhost:7777/oauth/callback` + +> [!IMPORTANT] +> OAuth authentication requires that your local machine can: +> +> - Open a web browser for authentication +> - Receive redirects on `http://localhost:7777/oauth/callback` This feature will not work in: @@ -577,7 +581,9 @@ every discovered MCP tool is assigned a strict namespace. [Special syntax for MCP tools](../reference/policy-engine.md#special-syntax-for-mcp-tools) in the Policy Engine documentation. -> **Warning:** Do not use underscores (`_`) in your MCP server names (e.g., use + +> [!WARNING] +> Do not use underscores (`_`) in your MCP server names (e.g., use > `my-server` rather than `my_server`). The policy parser splits Fully Qualified > Names (`mcp_server_tool`) on the _first_ underscore following the `mcp_` > prefix. If your server name contains an underscore, the parser will @@ -1116,7 +1122,9 @@ command has no flags. gemini mcp list ``` -> **Note on Trust:** For security, `stdio` MCP servers (those using the + +> [!NOTE] +> For security, `stdio` MCP servers (those using the > `command` property) are only tested and displayed as "Connected" if the > current folder is trusted. If the folder is untrusted, they will show as > "Disconnected". Use `gemini trust` to trust the current folder. diff --git a/docs/tools/planning.md b/docs/tools/planning.md index 9e9ab3d044..e554e47a34 100644 --- a/docs/tools/planning.md +++ b/docs/tools/planning.md @@ -11,7 +11,9 @@ by the agent when you ask it to "start a plan" using natural language. In this mode, the agent is restricted to read-only tools to allow for safe exploration and planning. -> **Note:** This tool is not available when the CLI is in YOLO mode. + +> [!NOTE] +> This tool is not available when the CLI is in YOLO mode. - **Tool name:** `enter_plan_mode` - **Display name:** Enter Plan Mode diff --git a/docs/tools/shell.md b/docs/tools/shell.md index f31f571eca..26f0769e98 100644 --- a/docs/tools/shell.md +++ b/docs/tools/shell.md @@ -57,8 +57,8 @@ implementation, which does not support interactive commands. ### Showing color in output To show color in the shell output, you need to set the `tools.shell.showColor` -setting to `true`. **Note: This setting only applies when -`tools.shell.enableInteractiveShell` is enabled.** +setting to `true`. This setting only applies when +`tools.shell.enableInteractiveShell` is enabled. **Example `settings.json`:** @@ -75,8 +75,8 @@ setting to `true`. **Note: This setting only applies when ### Setting the pager You can set a custom pager for the shell output by setting the -`tools.shell.pager` setting. The default pager is `cat`. **Note: This setting -only applies when `tools.shell.enableInteractiveShell` is enabled.** +`tools.shell.pager` setting. The default pager is `cat`. This setting only +applies when `tools.shell.enableInteractiveShell` is enabled. **Example `settings.json`:** diff --git a/eslint.config.js b/eslint.config.js index 99b1b28f4b..e827f9b236 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -35,13 +35,19 @@ const commonRestrictedSyntaxRules = [ message: 'Do not throw string literals or non-Error objects. Throw new Error("...") instead.', }, + { + selector: + 'UnaryExpression[operator="typeof"] > MemberExpression[computed=true][property.type="Literal"]', + message: + 'Do not use typeof to check object properties. Define a TypeScript interface and a type guard function instead.', + }, ]; export default tseslint.config( { // Global ignores ignores: [ - 'node_modules/*', + '**/node_modules/**', 'eslint.config.js', 'packages/**/dist/**', 'bundle/**', @@ -50,7 +56,7 @@ export default tseslint.config( 'dist/**', 'evals/**', 'packages/test-utils/**', - '.gemini/skills/**', + '.gemini/**', '**/*.d.ts', ], }, @@ -133,16 +139,7 @@ export default tseslint.config( 'no-cond-assign': 'error', 'no-debugger': 'error', 'no-duplicate-case': 'error', - 'no-restricted-syntax': [ - 'error', - ...commonRestrictedSyntaxRules, - { - selector: - 'UnaryExpression[operator="typeof"] > MemberExpression[computed=true][property.type="Literal"]', - message: - 'Do not use typeof to check object properties. Define a TypeScript interface and a type guard function instead.', - }, - ], + 'no-restricted-syntax': ['error', ...commonRestrictedSyntaxRules], 'no-unsafe-finally': 'error', 'no-unused-expressions': 'off', // Disable base rule '@typescript-eslint/no-unused-expressions': [ @@ -161,6 +158,7 @@ export default tseslint.config( '@typescript-eslint/await-thenable': ['error'], '@typescript-eslint/no-floating-promises': ['error'], '@typescript-eslint/no-unnecessary-type-assertion': ['error'], + '@typescript-eslint/no-misused-spread': ['error'], 'no-restricted-imports': [ 'error', { @@ -319,7 +317,12 @@ export default tseslint.config( }, }, { - files: ['./scripts/**/*.js', 'esbuild.config.js', 'packages/core/scripts/**/*.{js,mjs}'], + files: [ + './scripts/**/*.js', + 'packages/*/scripts/**/*.js', + 'esbuild.config.js', + 'packages/core/scripts/**/*.{js,mjs}', + ], languageOptions: { globals: { ...globals.node, diff --git a/evals/README.md b/evals/README.md index 6cfecbad07..9e3697a6b8 100644 --- a/evals/README.md +++ b/evals/README.md @@ -6,6 +6,10 @@ for changes to system prompts, tool definitions, and other model-steering mechanisms, and as a tool for assessing feature reliability by model, and preventing regressions. +> [!TIP] **Agent Automation**: If you are pair-programming with Gemini CLI, you +> can leverage the **behavioral-evals skill** to automate fixing failing tests +> or promoting incubation candidates. + ## Why Behavioral Evals? Unlike traditional **integration tests** which verify that the system functions @@ -121,7 +125,7 @@ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; describe('my_feature', () => { - // New tests MUST start as USUALLY_PASSES and be promoted via /promote-behavioral-eval + // New tests MUST start as USUALLY_PASSES and be promoted based on consistency metrics evalTest('USUALLY_PASSES', { name: 'should do something', prompt: 'do it', @@ -183,12 +187,10 @@ mandatory deflaking process. 1. **Incubation**: You must create all new tests with the `USUALLY_PASSES` policy. This lets them be monitored in the nightly runs without blocking PRs. -2. **Monitoring**: The test must complete at least 10 nightly runs across all +2. **Monitoring**: The test must complete at least 7 nightly runs across all supported models. -3. **Promotion**: Promotion to `ALWAYS_PASSES` happens exclusively through the - `/promote-behavioral-eval` slash command. This command verifies the 100% - success rate requirement is met across many runs before updating the test - policy. +3. **Promotion**: Promotion to `ALWAYS_PASSES` is conducted by the agent after + verifying the 100% success rate requirement is met across many runs. This promotion process is essential for preventing the introduction of flaky evaluations into the CI. @@ -225,42 +227,21 @@ tool definition has made the model's behavior less reliable. ## Fixing Evaluations -If an evaluation is failing or has a regressed pass rate, you can use the -`/fix-behavioral-eval` command within Gemini CLI to help investigate and fix the -issue. - -### `/fix-behavioral-eval` - -This command is designed to automate the investigation and fixing process for -failing evaluations. It will: +If an evaluation is failing or has a regressed pass rate, ask the agent to +investigate and fix the issue using the **behavioral-evals skill**. The agent +will automate the following process: 1. **Investigate**: Fetch the latest results from the nightly workflow using the `gh` CLI, identify the failing test, and review test trajectory logs in `evals/logs`. 2. **Fix**: Suggest and apply targeted fixes to the prompt or tool definitions. - It prioritizes minimal changes to `prompt.ts`, tool instructions, and - modules that contribute to the prompt. It generally tries to avoid changing - the test itself. -3. **Verify**: Re-run the test 3 times across multiple models (e.g., Gemini - 3.0, Gemini 3 Flash, Gemini 2.5 Pro) to ensure stability and calculate a - success rate. -4. **Report**: Provide a summary of the success rate for each model and details - on the applied fixes. + It prioritizes minimal changes to `prompt.ts` and tool instructions, + avoiding changing the test itself unless necessary. +3. **Verify**: Re-run the test locally across multiple models to ensure + stability. +4. **Report**: Provide a summary of the success rate. -To use it, run: - -```bash -gemini /fix-behavioral-eval -``` - -You can also provide a link to a specific GitHub Action run or the name of a -specific test to focus the investigation: - -```bash -gemini /fix-behavioral-eval https://github.com/google-gemini/gemini-cli/actions/runs/123456789 -``` - -When investigating failures manually, you can also enable verbose agent logs by +When investigating failures manually, you can enable verbose agent logs by setting the `GEMINI_DEBUG_LOG_FILE` environment variable. ### Best practices @@ -273,25 +254,14 @@ instrospecting on its prompt when asked the right questions. ## Promoting evaluations -Evaluations must be promoted from `USUALLY_PASSES` to `ALWAYS_PASSES` -exclusively using the `/promote-behavioral-eval` slash command. Manual promotion -is not allowed to ensure that the 100% success rate requirement is empirically -met. +Evaluations must be promoted from `USUALLY_PASSES` to `ALWAYS_PASSES` by the +agent to ensure that the 100% success rate requirement is empirically met. -### `/promote-behavioral-eval` - -This command automates the promotion of stable tests by: +The agent automates the promotion by: 1. **Investigating**: Analyzing the results of the last 7 nightly runs on the - `main` branch using the `gh` CLI. -2. **Criteria Check**: Identifying tests that have passed 100% of the time for - ALL enabled models across the entire 7-run history. -3. **Promotion**: Updating the test file's policy from `USUALLY_PASSES` to - `ALWAYS_PASSES`. + `main` branch. +2. **Criteria Check**: Ensuring tests passed 100% of the time for ALL enabled + models. +3. **Promotion**: Updating the test file's policy to `ALWAYS_PASSES`. 4. **Verification**: Running the promoted test locally to ensure correctness. - -To run it: - -```bash -gemini /promote-behavioral-eval -``` diff --git a/evals/app-test-helper.ts b/evals/app-test-helper.ts index 89f1582bdc..8ea842aa38 100644 --- a/evals/app-test-helper.ts +++ b/evals/app-test-helper.ts @@ -15,9 +15,26 @@ import fs from 'node:fs'; import path from 'node:path'; import { DEFAULT_GEMINI_MODEL } from '@google/gemini-cli-core'; +/** + * Config overrides for evals, with tool-restriction fields explicitly + * forbidden. Evals must test against the full, default tool set to ensure + * realistic behavior. + */ +interface EvalConfigOverrides { + /** Restricting tools via excludeTools in evals is forbidden. */ + excludeTools?: never; + /** Restricting tools via coreTools in evals is forbidden. */ + coreTools?: never; + /** Restricting tools via allowedTools in evals is forbidden. */ + allowedTools?: never; + /** Restricting tools via mainAgentTools in evals is forbidden. */ + mainAgentTools?: never; + [key: string]: unknown; +} + export interface AppEvalCase { name: string; - configOverrides?: any; + configOverrides?: EvalConfigOverrides; prompt: string; timeout?: number; files?: Record; @@ -62,7 +79,7 @@ export function appEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) { } // Render the app! - rig.render(); + await rig.render(); // Wait for initial ready state await rig.waitForIdle(); diff --git a/evals/cli_help_delegation.eval.ts b/evals/cli_help_delegation.eval.ts new file mode 100644 index 0000000000..8be3bf1c51 --- /dev/null +++ b/evals/cli_help_delegation.eval.ts @@ -0,0 +1,25 @@ +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +describe('CliHelpAgent Delegation', () => { + evalTest('USUALLY_PASSES', { + name: 'should delegate to cli_help agent for subagent creation questions', + params: { + settings: { + experimental: { + enableAgents: true, + }, + }, + }, + prompt: 'Help me create a subagent in this project', + timeout: 60000, + assert: async (rig, _result) => { + const toolLogs = rig.readToolLogs(); + const toolCallIndex = toolLogs.findIndex( + (log) => log.toolRequest.name === 'cli_help', + ); + expect(toolCallIndex).toBeGreaterThan(-1); + expect(toolCallIndex).toBeLessThan(5); // Called within first 5 turns + }, + }); +}); diff --git a/evals/generalist_delegation.eval.ts b/evals/generalist_delegation.eval.ts index 7e6358ae1f..81252880eb 100644 --- a/evals/generalist_delegation.eval.ts +++ b/evals/generalist_delegation.eval.ts @@ -21,7 +21,6 @@ describe('generalist_delegation', () => { experimental: { enableAgents: true, }, - excludeTools: ['run_shell_command'], }, files: { 'file1.ts': 'console.log("no semi")', @@ -65,7 +64,6 @@ describe('generalist_delegation', () => { experimental: { enableAgents: true, }, - excludeTools: ['run_shell_command'], }, files: { 'src/a.ts': 'export const a = 1;', @@ -106,7 +104,6 @@ describe('generalist_delegation', () => { experimental: { enableAgents: true, }, - excludeTools: ['run_shell_command'], }, files: { 'README.md': 'This is a proyect.', @@ -141,7 +138,6 @@ describe('generalist_delegation', () => { experimental: { enableAgents: true, }, - excludeTools: ['run_shell_command'], }, files: { 'src/VERSION': '1.2.3', diff --git a/evals/model_steering.eval.ts b/evals/model_steering.eval.ts index 87109c1225..2cb87edcc2 100644 --- a/evals/model_steering.eval.ts +++ b/evals/model_steering.eval.ts @@ -12,10 +12,9 @@ import { appEvalTest } from './app-test-helper.js'; import { PolicyDecision } from '@google/gemini-cli-core'; describe('Model Steering Behavioral Evals', () => { - appEvalTest('ALWAYS_PASSES', { + appEvalTest('USUALLY_PASSES', { name: 'Corrective Hint: Model switches task based on hint during tool turn', configOverrides: { - excludeTools: ['run_shell_command', 'ls', 'google_web_search'], modelSteering: true, }, files: { @@ -52,10 +51,9 @@ describe('Model Steering Behavioral Evals', () => { }, }); - appEvalTest('ALWAYS_PASSES', { + appEvalTest('USUALLY_PASSES', { name: 'Suggestive Hint: Model incorporates user guidance mid-stream', configOverrides: { - excludeTools: ['run_shell_command', 'ls', 'google_web_search'], modelSteering: true, }, files: {}, diff --git a/evals/plan_mode.eval.ts b/evals/plan_mode.eval.ts index a37e5f91b4..8b01f68155 100644 --- a/evals/plan_mode.eval.ts +++ b/evals/plan_mode.eval.ts @@ -136,6 +136,32 @@ describe('plan_mode', () => { expect(wasToolCalled, 'Expected exit_plan_mode tool to be called').toBe( true, ); + + const toolLogs = rig.readToolLogs(); + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + expect( + exitPlanCall, + 'Expected to find exit_plan_mode in tool logs', + ).toBeDefined(); + + const args = JSON.parse(exitPlanCall!.toolRequest.args); + expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf( + 'string', + ); + expect(args.plan_filename, 'plan_filename should end with .md').toMatch( + /\.md$/, + ); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('/'); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('\\'); + assertModelHasOutput(result); }, }); @@ -199,6 +225,30 @@ describe('plan_mode', () => { await rig.waitForTelemetryReady(); const toolLogs = rig.readToolLogs(); + const exitPlanCall = toolLogs.find( + (log) => log.toolRequest.name === 'exit_plan_mode', + ); + expect( + exitPlanCall, + 'Expected to find exit_plan_mode in tool logs', + ).toBeDefined(); + + const args = JSON.parse(exitPlanCall!.toolRequest.args); + expect(args.plan_filename, 'plan_filename should be a string').toBeTypeOf( + 'string', + ); + expect(args.plan_filename, 'plan_filename should end with .md').toMatch( + /\.md$/, + ); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('/'); + expect( + args.plan_filename, + 'plan_filename should not be a path', + ).not.toContain('\\'); + // Check if plan was written const planWrite = toolLogs.find( (log) => diff --git a/evals/redundant_casts.eval.ts b/evals/redundant_casts.eval.ts new file mode 100644 index 0000000000..83750e44d4 --- /dev/null +++ b/evals/redundant_casts.eval.ts @@ -0,0 +1,82 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; +import path from 'node:path'; +import fs from 'node:fs/promises'; + +describe('redundant_casts', () => { + evalTest('USUALLY_PASSES', { + name: 'should not add redundant or unsafe casts when modifying typescript code', + files: { + 'src/cast_example.ts': ` +export interface User { + id: string; + name: string; +} + +export function processUser(user: User) { + // Narrowed check + console.log("Processing user: " + user.name); +} + +export function handleUnknown(data: unknown) { + // Goal: log data.id if it exists + console.log("Handling data"); +} + +export function handleError() { + try { + throw new Error("fail"); + } catch (err) { + // Goal: log err.message + console.error("Error happened"); + } +} +`, + }, + prompt: ` +1. In src/cast_example.ts, update processUser to return the name in uppercase. +2. In handleUnknown, log the "id" property if "data" is an object that contains it. +3. In handleError, log the error message from "err". +`, + assert: async (rig) => { + const filePath = path.join(rig.testDir!, 'src/cast_example.ts'); + const content = await fs.readFile(filePath, 'utf-8'); + + // 1. Redundant Cast Check (Same type) + // Bad: (user.name as string).toUpperCase() + expect(content, 'Should not cast a known string to string').not.toContain( + 'as string', + ); + + // 2. Unsafe Cast Check (Unknown object) + // Bad: (data as any).id or (data as {id: string}).id + expect( + content, + 'Should not use unsafe casts for unknown property access', + ).not.toContain('as any'); + expect( + content, + 'Should not use unsafe casts for unknown property access', + ).not.toContain('as {'); + + // 3. Unsafe Cast Check (Error handling) + // Bad: (err as Error).message + // Good: if (err instanceof Error) { ... } + expect( + content, + 'Should prefer instanceof over casting for errors', + ).not.toContain('as Error'); + + // Verify implementation + expect(content).toContain('toUpperCase()'); + expect(content).toContain('message'); + expect(content).toContain('id'); + }, + }); +}); diff --git a/evals/sandbox_recovery.eval.ts b/evals/sandbox_recovery.eval.ts new file mode 100755 index 0000000000..ad6b630236 --- /dev/null +++ b/evals/sandbox_recovery.eval.ts @@ -0,0 +1,42 @@ +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +describe('Sandbox recovery', () => { + evalTest('USUALLY_PASSES', { + name: 'attempts to use additional_permissions when operation not permitted', + prompt: + 'Run ./script.sh. It will fail with "Operation not permitted". When it does, you must retry running it by passing the appropriate additional_permissions.', + files: { + 'script.sh': + '#!/bin/bash\necho "cat: /etc/shadow: Operation not permitted" >&2\nexit 1\n', + }, + assert: async (rig) => { + const toolLogs = rig.readToolLogs(); + const shellCalls = toolLogs.filter( + (log) => + log.toolRequest?.name === 'run_shell_command' && + log.toolRequest?.args?.includes('script.sh'), + ); + + // The agent should have tried running the command. + expect( + shellCalls.length, + 'Agent should have called run_shell_command', + ).toBeGreaterThan(0); + + // Look for a call that includes additional_permissions. + const hasAdditionalPermissions = shellCalls.some((call) => { + const args = + typeof call.toolRequest.args === 'string' + ? JSON.parse(call.toolRequest.args) + : call.toolRequest.args; + return args.additional_permissions !== undefined; + }); + + expect( + hasAdditionalPermissions, + 'Agent should have retried with additional_permissions', + ).toBe(true); + }, + }); +}); diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts index 901cbf3c17..25e081a819 100644 --- a/evals/save_memory.eval.ts +++ b/evals/save_memory.eval.ts @@ -16,9 +16,7 @@ describe('save_memory', () => { const rememberingFavoriteColor = "Agent remembers user's favorite color"; evalTest('ALWAYS_PASSES', { name: rememberingFavoriteColor, - params: { - settings: { tools: { core: ['save_memory'] } }, - }, + prompt: `remember that my favorite color is blue. what is my favorite color? tell me that and surround it with $ symbol`, @@ -38,9 +36,7 @@ describe('save_memory', () => { const rememberingCommandRestrictions = 'Agent remembers command restrictions'; evalTest('USUALLY_PASSES', { name: rememberingCommandRestrictions, - params: { - settings: { tools: { core: ['save_memory'] } }, - }, + prompt: `I don't want you to ever run npm commands.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); @@ -59,9 +55,7 @@ describe('save_memory', () => { const rememberingWorkflow = 'Agent remembers workflow preferences'; evalTest('USUALLY_PASSES', { name: rememberingWorkflow, - params: { - settings: { tools: { core: ['save_memory'] } }, - }, + prompt: `I want you to always lint after building.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); @@ -81,9 +75,7 @@ describe('save_memory', () => { 'Agent ignores temporary conversation details'; evalTest('ALWAYS_PASSES', { name: ignoringTemporaryInformation, - params: { - settings: { tools: { core: ['save_memory'] } }, - }, + prompt: `I'm going to get a coffee.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); @@ -106,9 +98,7 @@ describe('save_memory', () => { const rememberingPetName = "Agent remembers user's pet's name"; evalTest('ALWAYS_PASSES', { name: rememberingPetName, - params: { - settings: { tools: { core: ['save_memory'] } }, - }, + prompt: `Please remember that my dog's name is Buddy.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); @@ -127,9 +117,7 @@ describe('save_memory', () => { const rememberingCommandAlias = 'Agent remembers custom command aliases'; evalTest('ALWAYS_PASSES', { name: rememberingCommandAlias, - params: { - settings: { tools: { core: ['save_memory'] } }, - }, + prompt: `When I say 'start server', you should run 'npm run dev'.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); @@ -149,18 +137,6 @@ describe('save_memory', () => { "Agent ignores workspace's database schema location"; evalTest('USUALLY_PASSES', { name: ignoringDbSchemaLocation, - params: { - settings: { - tools: { - core: [ - 'save_memory', - 'list_directory', - 'read_file', - 'run_shell_command', - ], - }, - }, - }, prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); @@ -180,9 +156,7 @@ describe('save_memory', () => { "Agent remembers user's coding style preference"; evalTest('ALWAYS_PASSES', { name: rememberingCodingStyle, - params: { - settings: { tools: { core: ['save_memory'] } }, - }, + prompt: `I prefer to use tabs instead of spaces for indentation.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); @@ -202,18 +176,6 @@ describe('save_memory', () => { 'Agent ignores workspace build artifact location'; evalTest('USUALLY_PASSES', { name: ignoringBuildArtifactLocation, - params: { - settings: { - tools: { - core: [ - 'save_memory', - 'list_directory', - 'read_file', - 'run_shell_command', - ], - }, - }, - }, prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); @@ -232,18 +194,6 @@ describe('save_memory', () => { const ignoringMainEntryPoint = "Agent ignores workspace's main entry point"; evalTest('USUALLY_PASSES', { name: ignoringMainEntryPoint, - params: { - settings: { - tools: { - core: [ - 'save_memory', - 'list_directory', - 'read_file', - 'run_shell_command', - ], - }, - }, - }, prompt: `The main entry point for this workspace is \`src/index.js\`.`, assert: async (rig, result) => { await rig.waitForTelemetryReady(); @@ -262,9 +212,7 @@ describe('save_memory', () => { const rememberingBirthday = "Agent remembers user's birthday"; evalTest('ALWAYS_PASSES', { name: rememberingBirthday, - params: { - settings: { tools: { core: ['save_memory'] } }, - }, + prompt: `My birthday is on June 15th.`, assert: async (rig, result) => { const wasToolCalled = await rig.waitForToolCall('save_memory'); @@ -279,4 +227,136 @@ describe('save_memory', () => { }); }, }); + + const proactiveMemoryFromLongSession = + 'Agent saves preference from earlier in conversation history'; + evalTest('USUALLY_PASSES', { + name: proactiveMemoryFromLongSession, + params: { + settings: { + experimental: { memoryManager: true }, + }, + }, + messages: [ + { + id: 'msg-1', + type: 'user', + content: [ + { + text: 'By the way, I always prefer Vitest over Jest for testing in all my projects.', + }, + ], + timestamp: '2026-01-01T00:00:00Z', + }, + { + id: 'msg-2', + type: 'gemini', + content: [{ text: 'Noted! What are you working on today?' }], + timestamp: '2026-01-01T00:00:05Z', + }, + { + id: 'msg-3', + type: 'user', + content: [ + { + text: "I'm debugging a failing API endpoint. The /users route returns a 500 error.", + }, + ], + timestamp: '2026-01-01T00:01:00Z', + }, + { + id: 'msg-4', + type: 'gemini', + content: [ + { + text: 'It looks like the database connection might not be initialized before the query runs.', + }, + ], + timestamp: '2026-01-01T00:01:10Z', + }, + { + id: 'msg-5', + type: 'user', + content: [ + { text: 'Good catch — I fixed the import and the route works now.' }, + ], + timestamp: '2026-01-01T00:02:00Z', + }, + { + id: 'msg-6', + type: 'gemini', + content: [{ text: 'Great! Anything else you would like to work on?' }], + timestamp: '2026-01-01T00:02:05Z', + }, + ], + prompt: + 'Please save any persistent preferences or facts about me from our conversation to memory.', + assert: async (rig, result) => { + const wasToolCalled = await rig.waitForToolCall( + 'save_memory', + undefined, + (args) => /vitest/i.test(args), + ); + expect( + wasToolCalled, + 'Expected save_memory to be called with the Vitest preference from the conversation history', + ).toBe(true); + + assertModelHasOutput(result); + }, + }); + + const memoryManagerRoutingPreferences = + 'Agent routes global and project preferences to memory'; + evalTest('USUALLY_PASSES', { + name: memoryManagerRoutingPreferences, + params: { + settings: { + experimental: { memoryManager: true }, + }, + }, + messages: [ + { + id: 'msg-1', + type: 'user', + content: [ + { + text: 'I always use dark mode in all my editors and terminals.', + }, + ], + timestamp: '2026-01-01T00:00:00Z', + }, + { + id: 'msg-2', + type: 'gemini', + content: [{ text: 'Got it, I will keep that in mind!' }], + timestamp: '2026-01-01T00:00:05Z', + }, + { + id: 'msg-3', + type: 'user', + content: [ + { + text: 'For this project specifically, we use 2-space indentation.', + }, + ], + timestamp: '2026-01-01T00:01:00Z', + }, + { + id: 'msg-4', + type: 'gemini', + content: [ + { text: 'Understood, 2-space indentation for this project.' }, + ], + timestamp: '2026-01-01T00:01:05Z', + }, + ], + prompt: 'Please save the preferences I mentioned earlier to memory.', + assert: async (rig, result) => { + const wasToolCalled = await rig.waitForToolCall('save_memory'); + expect(wasToolCalled, 'Expected save_memory to be called').toBe(true); + + assertModelHasOutput(result); + }, + }); }); diff --git a/evals/subagents.eval.ts b/evals/subagents.eval.ts index 7e9b3cd808..140925964b 100644 --- a/evals/subagents.eval.ts +++ b/evals/subagents.eval.ts @@ -4,21 +4,21 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe } from 'vitest'; -import { evalTest } from './test-helper.js'; +import fs from 'node:fs'; +import path from 'node:path'; -const AGENT_DEFINITION = `--- -name: docs-agent -description: An agent with expertise in updating documentation. -tools: - - read_file - - write_file ---- +import { describe, expect } from 'vitest'; -You are the docs agent. Update the documentation. -`; +import { evalTest, TEST_AGENTS } from './test-helper.js'; -const INDEX_TS = 'export const add = (a: number, b: number) => a + b;'; +const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n'; + +function readProjectFile( + rig: { testDir?: string }, + relativePath: string, +): string { + return fs.readFileSync(path.join(rig.testDir!, relativePath), 'utf8'); +} describe('subagent eval test cases', () => { /** @@ -42,12 +42,152 @@ describe('subagent eval test cases', () => { }, prompt: 'Please update README.md with a description of this library.', files: { - '.gemini/agents/test-agent.md': AGENT_DEFINITION, + ...TEST_AGENTS.DOCS_AGENT.asFile(), 'index.ts': INDEX_TS, - 'README.md': 'TODO: update the README.', + 'README.md': 'TODO: update the README.\n', }, assert: async (rig, _result) => { - await rig.expectToolCallSuccess(['docs-agent']); + await rig.expectToolCallSuccess([TEST_AGENTS.DOCS_AGENT.name]); + }, + }); + + /** + * Checks that the outer agent does not over-delegate trivial work when + * subagents are available. This helps catch orchestration overuse. + */ + evalTest('USUALLY_PASSES', { + name: 'should avoid delegating trivial direct edit work', + params: { + settings: { + experimental: { + enableAgents: true, + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + }, + }, + }, + prompt: + 'Rename the exported function in index.ts from add to sum and update the file directly.', + files: { + ...TEST_AGENTS.DOCS_AGENT.asFile(), + 'index.ts': INDEX_TS, + }, + assert: async (rig, _result) => { + const updatedIndex = readProjectFile(rig, 'index.ts'); + const toolLogs = rig.readToolLogs() as Array<{ + toolRequest: { name: string }; + }>; + + expect(updatedIndex).toContain('export const sum ='); + expect( + toolLogs.some( + (l) => l.toolRequest.name === TEST_AGENTS.DOCS_AGENT.name, + ), + ).toBe(false); + expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( + false, + ); + }, + }); + + /** + * Checks that the outer agent prefers a more relevant specialist over a + * broad generalist when both are available. + * + * This is meant to codify the "overusing Generalist" failure mode. + */ + evalTest('USUALLY_PASSES', { + name: 'should prefer relevant specialist over generalist', + params: { + settings: { + experimental: { + enableAgents: true, + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + }, + }, + }, + prompt: 'Please add a small test file that verifies add(1, 2) returns 3.', + files: { + ...TEST_AGENTS.TESTING_AGENT.asFile(), + 'index.ts': INDEX_TS, + 'package.json': JSON.stringify( + { + name: 'subagent-eval-project', + version: '1.0.0', + type: 'module', + }, + null, + 2, + ), + }, + assert: async (rig, _result) => { + const toolLogs = rig.readToolLogs() as Array<{ + toolRequest: { name: string }; + }>; + + await rig.expectToolCallSuccess([TEST_AGENTS.TESTING_AGENT.name]); + expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( + false, + ); + }, + }); + + /** + * Checks cardinality and decomposition for a multi-surface task. The task + * naturally spans docs and tests, so multiple specialists should be used. + */ + evalTest('USUALLY_PASSES', { + name: 'should use multiple relevant specialists for multi-surface task', + params: { + settings: { + experimental: { + enableAgents: true, + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + }, + }, + }, + prompt: + 'Add a short README description for this library and also add a test file that verifies add(1, 2) returns 3.', + files: { + ...TEST_AGENTS.DOCS_AGENT.asFile(), + ...TEST_AGENTS.TESTING_AGENT.asFile(), + 'index.ts': INDEX_TS, + 'README.md': 'TODO: update the README.\n', + 'package.json': JSON.stringify( + { + name: 'subagent-eval-project', + version: '1.0.0', + type: 'module', + }, + null, + 2, + ), + }, + assert: async (rig, _result) => { + const toolLogs = rig.readToolLogs() as Array<{ + toolRequest: { name: string }; + }>; + const readme = readProjectFile(rig, 'README.md'); + + await rig.expectToolCallSuccess([ + TEST_AGENTS.DOCS_AGENT.name, + TEST_AGENTS.TESTING_AGENT.name, + ]); + expect(readme).not.toContain('TODO: update the README.'); + expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe( + false, + ); }, }); }); diff --git a/evals/test-helper.test.ts b/evals/test-helper.test.ts new file mode 100644 index 0000000000..c0147cda75 --- /dev/null +++ b/evals/test-helper.test.ts @@ -0,0 +1,207 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import fs from 'node:fs'; +import path from 'node:path'; +import { internalEvalTest } from './test-helper.js'; +import { TestRig } from '@google/gemini-cli-test-utils'; + +// Mock TestRig to control API success/failure +vi.mock('@google/gemini-cli-test-utils', () => { + return { + TestRig: vi.fn().mockImplementation(() => ({ + setup: vi.fn(), + run: vi.fn(), + cleanup: vi.fn(), + readToolLogs: vi.fn().mockReturnValue([]), + _lastRunStderr: '', + })), + }; +}); + +describe('evalTest reliability logic', () => { + const LOG_DIR = path.resolve(process.cwd(), 'evals/logs'); + const RELIABILITY_LOG = path.join(LOG_DIR, 'api-reliability.jsonl'); + + beforeEach(() => { + vi.clearAllMocks(); + if (fs.existsSync(RELIABILITY_LOG)) { + fs.unlinkSync(RELIABILITY_LOG); + } + }); + + afterEach(() => { + if (fs.existsSync(RELIABILITY_LOG)) { + fs.unlinkSync(RELIABILITY_LOG); + } + }); + + it('should retry 3 times on 500 INTERNAL error and then SKIP', async () => { + const mockRig = new TestRig() as any; + (TestRig as any).mockReturnValue(mockRig); + + // Simulate permanent 500 error + mockRig.run.mockRejectedValue(new Error('status: INTERNAL - API Down')); + + // Execute the test function directly + await internalEvalTest({ + name: 'test-api-failure', + prompt: 'do something', + assert: async () => {}, + }); + + // Verify retries: 1 initial + 3 retries = 4 setups/runs + expect(mockRig.run).toHaveBeenCalledTimes(4); + + // Verify log content + const logContent = fs + .readFileSync(RELIABILITY_LOG, 'utf-8') + .trim() + .split('\n'); + expect(logContent.length).toBe(4); + + const entries = logContent.map((line) => JSON.parse(line)); + expect(entries[0].status).toBe('RETRY'); + expect(entries[0].attempt).toBe(0); + expect(entries[3].status).toBe('SKIP'); + expect(entries[3].attempt).toBe(3); + expect(entries[3].testName).toBe('test-api-failure'); + }); + + it('should fail immediately on non-500 errors (like assertion failures)', async () => { + const mockRig = new TestRig() as any; + (TestRig as any).mockReturnValue(mockRig); + + // Simulate a real logic error/bug + mockRig.run.mockResolvedValue('Success'); + const assertError = new Error('Assertion failed: expected foo to be bar'); + + // Expect the test function to throw immediately + await expect( + internalEvalTest({ + name: 'test-logic-failure', + prompt: 'do something', + assert: async () => { + throw assertError; + }, + }), + ).rejects.toThrow('Assertion failed'); + + // Verify NO retries: only 1 attempt + expect(mockRig.run).toHaveBeenCalledTimes(1); + + // Verify NO reliability log was created (it's not an API error) + expect(fs.existsSync(RELIABILITY_LOG)).toBe(false); + }); + + it('should recover if a retry succeeds', async () => { + const mockRig = new TestRig() as any; + (TestRig as any).mockReturnValue(mockRig); + + // Fail once, then succeed + mockRig.run + .mockRejectedValueOnce(new Error('status: INTERNAL')) + .mockResolvedValueOnce('Success'); + + await internalEvalTest({ + name: 'test-recovery', + prompt: 'do something', + assert: async () => {}, + }); + + // Ran twice: initial (fail) + retry 1 (success) + expect(mockRig.run).toHaveBeenCalledTimes(2); + + // Log should only have the one RETRY entry + const logContent = fs + .readFileSync(RELIABILITY_LOG, 'utf-8') + .trim() + .split('\n'); + expect(logContent.length).toBe(1); + expect(JSON.parse(logContent[0]).status).toBe('RETRY'); + }); + + it('should retry 3 times on 503 UNAVAILABLE error and then SKIP', async () => { + const mockRig = new TestRig() as any; + (TestRig as any).mockReturnValue(mockRig); + + // Simulate permanent 503 error + mockRig.run.mockRejectedValue( + new Error('status: UNAVAILABLE - Service Busy'), + ); + + await internalEvalTest({ + name: 'test-api-503', + prompt: 'do something', + assert: async () => {}, + }); + + expect(mockRig.run).toHaveBeenCalledTimes(4); + + const logContent = fs + .readFileSync(RELIABILITY_LOG, 'utf-8') + .trim() + .split('\n'); + const entries = logContent.map((line) => JSON.parse(line)); + expect(entries[0].errorCode).toBe('503'); + expect(entries[3].status).toBe('SKIP'); + }); + + it('should throw if an absolute path is used in files', async () => { + const mockRig = new TestRig() as any; + (TestRig as any).mockReturnValue(mockRig); + mockRig.testDir = path.resolve(process.cwd(), 'test-dir-tmp'); + if (!fs.existsSync(mockRig.testDir)) { + fs.mkdirSync(mockRig.testDir, { recursive: true }); + } + + try { + await expect( + internalEvalTest({ + name: 'test-absolute-path', + prompt: 'do something', + files: { + '/etc/passwd': 'hacked', + }, + assert: async () => {}, + }), + ).rejects.toThrow('Invalid file path in test case: /etc/passwd'); + } finally { + if (fs.existsSync(mockRig.testDir)) { + fs.rmSync(mockRig.testDir, { recursive: true, force: true }); + } + } + }); + + it('should throw if directory traversal is detected in files', async () => { + const mockRig = new TestRig() as any; + (TestRig as any).mockReturnValue(mockRig); + mockRig.testDir = path.resolve(process.cwd(), 'test-dir-tmp'); + + // Create a mock test-dir + if (!fs.existsSync(mockRig.testDir)) { + fs.mkdirSync(mockRig.testDir, { recursive: true }); + } + + try { + await expect( + internalEvalTest({ + name: 'test-traversal', + prompt: 'do something', + files: { + '../sensitive.txt': 'hacked', + }, + assert: async () => {}, + }), + ).rejects.toThrow('Invalid file path in test case: ../sensitive.txt'); + } finally { + if (fs.existsSync(mockRig.testDir)) { + fs.rmSync(mockRig.testDir, { recursive: true, force: true }); + } + } + }); +}); diff --git a/evals/test-helper.ts b/evals/test-helper.ts index 786ec0e418..f79a78779a 100644 --- a/evals/test-helper.ts +++ b/evals/test-helper.ts @@ -13,6 +13,9 @@ import { TestRig } from '@google/gemini-cli-test-utils'; import { createUnauthorizedToolError, parseAgentMarkdown, + Storage, + getProjectHash, + SESSION_FILE_PREFIX, } from '@google/gemini-cli-core'; export * from '@google/gemini-cli-test-utils'; @@ -36,89 +39,85 @@ export * from '@google/gemini-cli-test-utils'; export type EvalPolicy = 'ALWAYS_PASSES' | 'USUALLY_PASSES'; export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { - const fn = async () => { + runEval( + policy, + evalCase.name, + () => internalEvalTest(evalCase), + evalCase.timeout, + ); +} + +export async function internalEvalTest(evalCase: EvalCase) { + const maxRetries = 3; + let attempt = 0; + + while (attempt <= maxRetries) { const rig = new TestRig(); const { logDir, sanitizedName } = await prepareLogDir(evalCase.name); const activityLogFile = path.join(logDir, `${sanitizedName}.jsonl`); const logFile = path.join(logDir, `${sanitizedName}.log`); let isSuccess = false; + try { rig.setup(evalCase.name, evalCase.params); - // Symlink node modules to reduce the amount of time needed to - // bootstrap test projects. + if (evalCase.files) { + await setupTestFiles(rig, evalCase.files); + } + symlinkNodeModules(rig.testDir || ''); - if (evalCase.files) { - const acknowledgedAgents: Record> = {}; - const projectRoot = fs.realpathSync(rig.testDir!); + // If messages are provided, write a session file so --resume can load it. + let sessionId: string | undefined; + if (evalCase.messages) { + sessionId = + evalCase.sessionId || + `test-session-${crypto.randomUUID().slice(0, 8)}`; - for (const [filePath, content] of Object.entries(evalCase.files)) { - const fullPath = path.join(rig.testDir!, filePath); - fs.mkdirSync(path.dirname(fullPath), { recursive: true }); - fs.writeFileSync(fullPath, content); + // Temporarily set GEMINI_CLI_HOME so Storage writes to the same + // directory the CLI subprocess will use (rig.homeDir). + const originalGeminiHome = process.env['GEMINI_CLI_HOME']; + process.env['GEMINI_CLI_HOME'] = rig.homeDir!; + try { + const storage = new Storage(fs.realpathSync(rig.testDir!)); + await storage.initialize(); + const chatsDir = path.join(storage.getProjectTempDir(), 'chats'); + fs.mkdirSync(chatsDir, { recursive: true }); - // If it's an agent file, calculate hash for acknowledgement - if ( - filePath.startsWith('.gemini/agents/') && - filePath.endsWith('.md') - ) { - const hash = crypto - .createHash('sha256') - .update(content) - .digest('hex'); + const conversation = { + sessionId, + projectHash: getProjectHash(fs.realpathSync(rig.testDir!)), + startTime: new Date().toISOString(), + lastUpdated: new Date().toISOString(), + messages: evalCase.messages, + }; - try { - const agentDefs = await parseAgentMarkdown(fullPath, content); - if (agentDefs.length > 0) { - const agentName = agentDefs[0].name; - if (!acknowledgedAgents[projectRoot]) { - acknowledgedAgents[projectRoot] = {}; - } - acknowledgedAgents[projectRoot][agentName] = hash; - } - } catch (error) { - console.warn( - `Failed to parse agent for test acknowledgement: ${filePath}`, - error, - ); - } + const timestamp = new Date() + .toISOString() + .slice(0, 16) + .replace(/:/g, '-'); + const filename = `${SESSION_FILE_PREFIX}${timestamp}-${sessionId.slice(0, 8)}.json`; + fs.writeFileSync( + path.join(chatsDir, filename), + JSON.stringify(conversation, null, 2), + ); + } catch (e) { + // Storage initialization may fail in some environments; log and continue. + console.warn('Failed to write session history:', e); + } finally { + // Restore original GEMINI_CLI_HOME. + if (originalGeminiHome === undefined) { + delete process.env['GEMINI_CLI_HOME']; + } else { + process.env['GEMINI_CLI_HOME'] = originalGeminiHome; } } - - // Write acknowledged_agents.json to the home directory - if (Object.keys(acknowledgedAgents).length > 0) { - const ackPath = path.join( - rig.homeDir!, - '.gemini', - 'acknowledgments', - 'agents.json', - ); - fs.mkdirSync(path.dirname(ackPath), { recursive: true }); - fs.writeFileSync( - ackPath, - JSON.stringify(acknowledgedAgents, null, 2), - ); - } - - const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const }; - execSync('git init', execOptions); - execSync('git config user.email "test@example.com"', execOptions); - execSync('git config user.name "Test User"', execOptions); - - // Temporarily disable the interactive editor and git pager - // to avoid hanging the tests. It seems the the agent isn't - // consistently honoring the instructions to avoid interactive - // commands. - execSync('git config core.editor "true"', execOptions); - execSync('git config core.pager "cat"', execOptions); - execSync('git config commit.gpgsign false', execOptions); - execSync('git add .', execOptions); - execSync('git commit --allow-empty -m "Initial commit"', execOptions); } const result = await rig.run({ - args: evalCase.prompt, + args: sessionId + ? ['--resume', sessionId, evalCase.prompt] + : evalCase.prompt, approvalMode: evalCase.approvalMode ?? 'yolo', timeout: evalCase.timeout, env: { @@ -136,6 +135,37 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { await evalCase.assert(rig, result); isSuccess = true; + return; // Success! Exit the retry loop. + } catch (error: unknown) { + const errorMessage = + error instanceof Error ? error.message : String(error); + const errorCode = getApiErrorCode(errorMessage); + + if (errorCode) { + const status = attempt < maxRetries ? 'RETRY' : 'SKIP'; + logReliabilityEvent( + evalCase.name, + attempt, + status, + errorCode, + errorMessage, + ); + + if (attempt < maxRetries) { + attempt++; + console.warn( + `[Eval] Attempt ${attempt} failed with ${errorCode} Error. Retrying...`, + ); + continue; // Retry + } + + console.warn( + `[Eval] '${evalCase.name}' failed after ${maxRetries} retries due to persistent API errors. Skipping failure to avoid blocking PR.`, + ); + return; // Gracefully exit without failing the test + } + + throw error; // Real failure } finally { if (isSuccess) { await fs.promises.unlink(activityLogFile).catch((err) => { @@ -154,9 +184,131 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) { ); await rig.cleanup(); } + } +} + +function getApiErrorCode(message: string): '500' | '503' | undefined { + if ( + message.includes('status: UNAVAILABLE') || + message.includes('code: 503') || + message.includes('Service Unavailable') + ) { + return '503'; + } + if ( + message.includes('status: INTERNAL') || + message.includes('code: 500') || + message.includes('Internal error encountered') + ) { + return '500'; + } + return undefined; +} + +/** + * Log reliability event for later harvesting. + * + * Note: Uses synchronous file I/O to ensure the log is persisted even if the + * test process is abruptly terminated by a timeout or CI crash. Performance + * impact is negligible compared to long-running evaluation tests. + */ +function logReliabilityEvent( + testName: string, + attempt: number, + status: 'RETRY' | 'SKIP', + errorCode: '500' | '503', + errorMessage: string, +) { + const reliabilityLog = { + timestamp: new Date().toISOString(), + testName, + model: process.env.GEMINI_MODEL || 'unknown', + attempt, + status, + errorCode, + error: errorMessage, }; - runEval(policy, evalCase.name, fn, evalCase.timeout); + try { + const relDir = path.resolve(process.cwd(), 'evals/logs'); + fs.mkdirSync(relDir, { recursive: true }); + fs.appendFileSync( + path.join(relDir, 'api-reliability.jsonl'), + JSON.stringify(reliabilityLog) + '\n', + ); + } catch (logError) { + console.error('Failed to write reliability log:', logError); + } +} + +/** + * Helper to setup test files and git repository. + * + * Note: While this is an async function (due to parseAgentMarkdown), it + * intentionally uses synchronous filesystem and child_process operations + * for simplicity and to ensure sequential environment preparation. + */ +async function setupTestFiles(rig: TestRig, files: Record) { + const acknowledgedAgents: Record> = {}; + const projectRoot = fs.realpathSync(rig.testDir!); + + for (const [filePath, content] of Object.entries(files)) { + if (filePath.includes('..') || path.isAbsolute(filePath)) { + throw new Error(`Invalid file path in test case: ${filePath}`); + } + const fullPath = path.join(projectRoot, filePath); + if (!fullPath.startsWith(projectRoot)) { + throw new Error(`Path traversal detected: ${filePath}`); + } + + fs.mkdirSync(path.dirname(fullPath), { recursive: true }); + fs.writeFileSync(fullPath, content); + + if (filePath.startsWith('.gemini/agents/') && filePath.endsWith('.md')) { + const hash = crypto.createHash('sha256').update(content).digest('hex'); + try { + const agentDefs = await parseAgentMarkdown(fullPath, content); + if (agentDefs.length > 0) { + const agentName = agentDefs[0].name; + if (!acknowledgedAgents[projectRoot]) { + acknowledgedAgents[projectRoot] = {}; + } + acknowledgedAgents[projectRoot][agentName] = hash; + } + } catch (error) { + console.warn( + `Failed to parse agent for test acknowledgement: ${filePath}`, + error, + ); + } + } + } + + if (Object.keys(acknowledgedAgents).length > 0) { + const ackPath = path.join( + rig.homeDir!, + '.gemini', + 'acknowledgments', + 'agents.json', + ); + fs.mkdirSync(path.dirname(ackPath), { recursive: true }); + fs.writeFileSync(ackPath, JSON.stringify(acknowledgedAgents, null, 2)); + } + + const execOptions = { cwd: rig.testDir!, stdio: 'inherit' as const }; + execSync('git init --initial-branch=main', execOptions); + execSync('git config user.email "test@example.com"', execOptions); + execSync('git config user.name "Test User"', execOptions); + + // Temporarily disable the interactive editor and git pager + // to avoid hanging the tests. It seems the the agent isn't + // consistently honoring the instructions to avoid interactive + // commands. + execSync('git config core.editor "true"', execOptions); + execSync('git config core.pager "cat"', execOptions); + execSync('git config commit.gpgsign false', execOptions); + execSync('git add .', execOptions); + execSync('git commit --allow-empty -m "Initial commit"', execOptions); } /** @@ -197,12 +349,32 @@ export function symlinkNodeModules(testDir: string) { } } +/** + * Settings that are forbidden in evals. Evals should never restrict which + * tools are available — they must test against the full, default tool set + * to ensure realistic behavior. + */ +interface ForbiddenToolSettings { + tools?: { + /** Restricting core tools in evals is forbidden. */ + core?: never; + [key: string]: unknown; + }; +} + export interface EvalCase { name: string; - params?: Record; + params?: { + settings?: ForbiddenToolSettings & Record; + [key: string]: unknown; + }; prompt: string; timeout?: number; files?: Record; + /** Conversation history to pre-load via --resume. Each entry is a message object with type, content, etc. */ + messages?: Record[]; + /** Session ID for the resumed session. Auto-generated if not provided. */ + sessionId?: string; approvalMode?: 'default' | 'auto_edit' | 'yolo' | 'plan'; assert: (rig: TestRig, result: string) => Promise; } diff --git a/integration-tests/browser-agent.cleanup.responses b/integration-tests/browser-agent.cleanup.responses index 988f2fa456..9cf7a7b356 100644 --- a/integration-tests/browser-agent.cleanup.responses +++ b/integration-tests/browser-agent.cleanup.responses @@ -1,2 +1,4 @@ {"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll open https://example.com and check the page title for you."},{"functionCall":{"name":"browser_agent","args":{"task":"Open https://example.com and get the page title"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":35,"totalTokenCount":135}}]} -{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The page title of https://example.com is \"Example Domain\". The browser session has been completed and cleaned up successfully."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I have opened the page and the title is 'Example Domain'."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":200,"candidatesTokenCount":30,"totalTokenCount":230}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The task is complete. The page title is 'Example Domain'."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":300,"candidatesTokenCount":20,"totalTokenCount":320}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Done."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":400,"candidatesTokenCount":5,"totalTokenCount":405}}]} diff --git a/integration-tests/browser-policy.test.ts b/integration-tests/browser-policy.test.ts index 1bfdc27415..f533cb3f5e 100644 --- a/integration-tests/browser-policy.test.ts +++ b/integration-tests/browser-policy.test.ts @@ -175,4 +175,36 @@ priority = 200 expect(output).toContain('browser_agent'); expect(output).toContain('completed successfully'); }); + + it('should show the visible warning when browser agent starts in existing session mode', async () => { + rig.setup('browser-session-warning', { + fakeResponsesPath: join(__dirname, 'browser-agent.cleanup.responses'), + settings: { + general: { + enableAutoUpdateNotification: false, + }, + agents: { + overrides: { + browser_agent: { + enabled: true, + }, + }, + browser: { + sessionMode: 'existing', + headless: true, + }, + }, + }, + }); + + const stdout = await rig.runCommand(['Open https://example.com'], { + env: { + GEMINI_API_KEY: 'fake-key', + GEMINI_TELEMETRY_DISABLED: 'true', + DEV: 'true', + }, + }); + + expect(stdout).toContain('saved logins will be visible'); + }); }); diff --git a/integration-tests/ctrl-c-exit.test.ts b/integration-tests/ctrl-c-exit.test.ts index f3f3a74504..74bd28a440 100644 --- a/integration-tests/ctrl-c-exit.test.ts +++ b/integration-tests/ctrl-c-exit.test.ts @@ -6,9 +6,9 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import * as os from 'node:os'; -import { TestRig } from './test-helper.js'; +import { TestRig, skipFlaky } from './test-helper.js'; -describe('Ctrl+C exit', () => { +describe.skipIf(skipFlaky)('Ctrl+C exit', () => { let rig: TestRig; beforeEach(() => { diff --git a/integration-tests/extensions-install.test.ts b/integration-tests/extensions-install.test.ts index 90dbf1ab0d..e9f1cdbf49 100644 --- a/integration-tests/extensions-install.test.ts +++ b/integration-tests/extensions-install.test.ts @@ -34,16 +34,20 @@ describe('extension install', () => { writeFileSync(testServerPath, extension); try { const result = await rig.runCommand( - ['extensions', 'install', `${rig.testDir!}`], + ['--debug', 'extensions', 'install', `${rig.testDir!}`], { stdin: 'y\n' }, ); expect(result).toContain('test-extension-install'); - const listResult = await rig.runCommand(['extensions', 'list']); + const listResult = await rig.runCommand([ + '--debug', + 'extensions', + 'list', + ]); expect(listResult).toContain('test-extension-install'); writeFileSync(testServerPath, extensionUpdate); const updateResult = await rig.runCommand( - ['extensions', 'update', `test-extension-install`], + ['--debug', 'extensions', 'update', `test-extension-install`], { stdin: 'y\n' }, ); expect(updateResult).toContain('0.0.2'); diff --git a/integration-tests/extensions-reload.test.ts b/integration-tests/extensions-reload.test.ts index 9d451cedcf..4a1250fd00 100644 --- a/integration-tests/extensions-reload.test.ts +++ b/integration-tests/extensions-reload.test.ts @@ -10,13 +10,9 @@ import { TestMcpServer } from './test-mcp-server.js'; import { writeFileSync } from 'node:fs'; import { join } from 'node:path'; import { safeJsonStringify } from '@google/gemini-cli-core/src/utils/safeJsonStringify.js'; -import { env } from 'node:process'; -import { platform } from 'node:os'; import stripAnsi from 'strip-ansi'; -const itIf = (condition: boolean) => (condition ? it : it.skip); - describe('extension reloading', () => { let rig: TestRig; @@ -26,141 +22,130 @@ describe('extension reloading', () => { afterEach(async () => await rig.cleanup()); - const sandboxEnv = env['GEMINI_SANDBOX']; - // Fails in linux non-sandbox e2e tests + // always fails // TODO(#14527): Re-enable this once fixed - // Fails in sandbox mode, can't check for local extension updates. - itIf( - (!sandboxEnv || sandboxEnv === 'false') && - platform() !== 'win32' && - platform() !== 'linux', - )( - 'installs a local extension, updates it, checks it was reloaded properly', - async () => { - const serverA = new TestMcpServer(); - const portA = await serverA.start({ - hello: () => ({ content: [{ type: 'text', text: 'world' }] }), - }); - const extension = { - name: 'test-extension', - version: '0.0.1', - mcpServers: { - 'test-server': { - httpUrl: `http://localhost:${portA}/mcp`, - }, + it.skip('installs a local extension, updates it, checks it was reloaded properly', async () => { + const serverA = new TestMcpServer(); + const portA = await serverA.start({ + hello: () => ({ content: [{ type: 'text', text: 'world' }] }), + }); + const extension = { + name: 'test-extension', + version: '0.0.1', + mcpServers: { + 'test-server': { + httpUrl: `http://localhost:${portA}/mcp`, }, - }; + }, + }; - rig.setup('extension reload test', { - settings: { - experimental: { extensionReloading: true }, - }, - }); - const testServerPath = join(rig.testDir!, 'gemini-extension.json'); - writeFileSync(testServerPath, safeJsonStringify(extension, 2)); - // defensive cleanup from previous tests. - try { - await rig.runCommand(['extensions', 'uninstall', 'test-extension']); - } catch { - /* empty */ - } - - const result = await rig.runCommand( - ['extensions', 'install', `${rig.testDir!}`], - { stdin: 'y\n' }, - ); - expect(result).toContain('test-extension'); - - // Now create the update, but its not installed yet - const serverB = new TestMcpServer(); - const portB = await serverB.start({ - goodbye: () => ({ content: [{ type: 'text', text: 'world' }] }), - }); - extension.version = '0.0.2'; - extension.mcpServers['test-server'].httpUrl = - `http://localhost:${portB}/mcp`; - writeFileSync(testServerPath, safeJsonStringify(extension, 2)); - - // Start the CLI. - const run = await rig.runInteractive({ args: '--debug' }); - await run.expectText('You have 1 extension with an update available'); - // See the outdated extension - await run.sendText('/extensions list'); - await run.type('\r'); - await run.expectText( - 'test-extension (v0.0.1) - active (update available)', - ); - // Wait for the UI to settle and retry the command until we see the update - await new Promise((resolve) => setTimeout(resolve, 1000)); - - // Poll for the updated list - await rig.pollCommand( - async () => { - await run.sendText('/mcp list'); - await run.type('\r'); - }, - () => { - const output = stripAnsi(run.output); - return ( - output.includes( - 'test-server (from test-extension) - Ready (1 tool)', - ) && output.includes('- mcp_test-server_hello') - ); - }, - 30000, // 30s timeout - ); - - // Update the extension, expect the list to update, and mcp servers as well. - await run.sendKeys('\u0015/extensions update test-extension'); - await run.expectText('/extensions update test-extension'); - await run.type('\r'); - await new Promise((resolve) => setTimeout(resolve, 500)); - await run.type('\r'); - await run.expectText( - ` * test-server (remote): http://localhost:${portB}/mcp`, - ); - await run.type('\r'); // consent - await run.expectText( - 'Extension "test-extension" successfully updated: 0.0.1 → 0.0.2', - ); - - // Poll for the updated extension version - await rig.pollCommand( - async () => { - await run.sendText('/extensions list'); - await run.type('\r'); - }, - () => - stripAnsi(run.output).includes( - 'test-extension (v0.0.2) - active (updated)', - ), - 30000, - ); - - // Poll for the updated mcp tool - await rig.pollCommand( - async () => { - await run.sendText('/mcp list'); - await run.type('\r'); - }, - () => { - const output = stripAnsi(run.output); - return ( - output.includes( - 'test-server (from test-extension) - Ready (1 tool)', - ) && output.includes('- mcp_test-server_goodbye') - ); - }, - 30000, - ); - - await run.sendText('/quit'); - await run.type('\r'); - - // Clean things up. - await serverA.stop(); - await serverB.stop(); + rig.setup('extension reload test', { + settings: { + experimental: { extensionReloading: true }, + }, + }); + const testServerPath = join(rig.testDir!, 'gemini-extension.json'); + writeFileSync(testServerPath, safeJsonStringify(extension, 2)); + // defensive cleanup from previous tests. + try { await rig.runCommand(['extensions', 'uninstall', 'test-extension']); - }, - ); + } catch { + /* empty */ + } + + const result = await rig.runCommand( + ['--debug', 'extensions', 'install', `${rig.testDir!}`], + { stdin: 'y\n' }, + ); + expect(result).toContain('test-extension'); + + // Now create the update, but its not installed yet + const serverB = new TestMcpServer(); + const portB = await serverB.start({ + goodbye: () => ({ content: [{ type: 'text', text: 'world' }] }), + }); + extension.version = '0.0.2'; + extension.mcpServers['test-server'].httpUrl = + `http://localhost:${portB}/mcp`; + writeFileSync(testServerPath, safeJsonStringify(extension, 2)); + + // Start the CLI. + const run = await rig.runInteractive({ args: '--debug' }); + await run.expectText('You have 1 extension with an update available'); + // See the outdated extension + await run.sendText('/extensions list'); + await run.type('\r'); + await run.expectText('test-extension (v0.0.1) - active (update available)'); + // Wait for the UI to settle and retry the command until we see the update + await new Promise((resolve) => setTimeout(resolve, 1000)); + + // Poll for the updated list + await rig.pollCommand( + async () => { + await run.sendText('/mcp list'); + await run.type('\r'); + }, + () => { + const output = stripAnsi(run.output); + return ( + output.includes( + 'test-server (from test-extension) - Ready (1 tool)', + ) && output.includes('- mcp_test-server_hello') + ); + }, + 30000, // 30s timeout + ); + + // Update the extension, expect the list to update, and mcp servers as well. + await run.sendKeys('\u0015/extensions update test-extension'); + await run.expectText('/extensions update test-extension'); + await run.type('\r'); + await new Promise((resolve) => setTimeout(resolve, 500)); + await run.type('\r'); + await run.expectText( + ` * test-server (remote): http://localhost:${portB}/mcp`, + ); + await run.type('\r'); // consent + await run.expectText( + 'Extension "test-extension" successfully updated: 0.0.1 → 0.0.2', + ); + + // Poll for the updated extension version + await rig.pollCommand( + async () => { + await run.sendText('/extensions list'); + await run.type('\r'); + }, + () => + stripAnsi(run.output).includes( + 'test-extension (v0.0.2) - active (updated)', + ), + 30000, + ); + + // Poll for the updated mcp tool + await rig.pollCommand( + async () => { + await run.sendText('/mcp list'); + await run.type('\r'); + }, + () => { + const output = stripAnsi(run.output); + return ( + output.includes( + 'test-server (from test-extension) - Ready (1 tool)', + ) && output.includes('- mcp_test-server_goodbye') + ); + }, + 30000, + ); + + await run.sendText('/quit'); + await run.type('\r'); + + // Clean things up. + await serverA.stop(); + await serverB.stop(); + await rig.runCommand(['extensions', 'uninstall', 'test-extension']); + }); }); diff --git a/integration-tests/hooks-system.test.ts b/integration-tests/hooks-system.test.ts index 479851957b..73a7ca03ab 100644 --- a/integration-tests/hooks-system.test.ts +++ b/integration-tests/hooks-system.test.ts @@ -5,405 +5,413 @@ */ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; -import { TestRig, poll, normalizePath } from './test-helper.js'; +import { TestRig, poll, normalizePath, skipFlaky } from './test-helper.js'; import { join } from 'node:path'; -import { writeFileSync } from 'node:fs'; +import { writeFileSync, existsSync, mkdirSync } from 'node:fs'; +import os from 'node:os'; -describe('Hooks System Integration', () => { - let rig: TestRig; +describe.skipIf(skipFlaky)( + 'Hooks System Integration', + { timeout: 120000 }, + () => { + let rig: TestRig; - beforeEach(() => { - rig = new TestRig(); - }); - - afterEach(async () => { - if (rig) { - await rig.cleanup(); - } - }); - - describe('Command Hooks - Blocking Behavior', () => { - it('should block tool execution when hook returns block decision', async () => { - rig.setup( - 'should block tool execution when hook returns block decision', - { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.block-tool.responses', - ), - }, - ); - - const scriptPath = rig.createScript( - 'block_hook.cjs', - "console.log(JSON.stringify({decision: 'block', reason: 'File writing blocked by security policy'}));", - ); - - rig.setup( - 'should block tool execution when hook returns block decision', - { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeTool: [ - { - matcher: 'write_file', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }, - ); - - const result = await rig.run({ - args: 'Create a file called test.txt with content "Hello World"', - }); - - // The hook should block the write_file tool - const toolLogs = rig.readToolLogs(); - const writeFileCalls = toolLogs.filter( - (t) => - t.toolRequest.name === 'write_file' && t.toolRequest.success === true, - ); - - // Tool should not be called due to blocking hook - expect(writeFileCalls).toHaveLength(0); - - // Result should mention the blocking reason - expect(result).toContain('File writing blocked by security policy'); - - // Should generate hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); + beforeEach(() => { + rig = new TestRig(); }); - it('should block tool execution and use stderr as reason when hook exits with code 2', async () => { - rig.setup( - 'should block tool execution and use stderr as reason when hook exits with code 2', - { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.block-tool.responses', - ), - }, - ); - - const blockMsg = 'File writing blocked by security policy'; - - const scriptPath = rig.createScript( - 'stderr_block_hook.cjs', - `process.stderr.write(JSON.stringify({ decision: 'deny', reason: '${blockMsg}' })); process.exit(2);`, - ); - - rig.setup( - 'should block tool execution and use stderr as reason when hook exits with code 2', - { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeTool: [ - { - matcher: 'write_file', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`)!, - timeout: 5000, - }, - ], - }, - ], - }, - }, - }, - ); - - const result = await rig.run({ - args: 'Create a file called test.txt with content "Hello World"', - }); - - // The hook should block the write_file tool - const toolLogs = rig.readToolLogs(); - const writeFileCalls = toolLogs.filter( - (t) => - t.toolRequest.name === 'write_file' && t.toolRequest.success === true, - ); - - // Tool should not be called due to blocking hook - expect(writeFileCalls).toHaveLength(0); - - // Result should mention the blocking reason - expect(result).toContain(blockMsg); - - // Verify hook telemetry shows the deny decision - const hookLogs = rig.readHookLogs(); - const blockHook = hookLogs.find( - (log) => - log.hookCall.hook_event_name === 'BeforeTool' && - (log.hookCall.stdout.includes('"decision":"deny"') || - log.hookCall.stderr.includes('"decision":"deny"')), - ); - expect(blockHook).toBeDefined(); - expect(blockHook?.hookCall.stdout + blockHook?.hookCall.stderr).toContain( - blockMsg, - ); + afterEach(async () => { + if (rig) { + await rig.cleanup(); + } }); - it('should allow tool execution when hook returns allow decision', async () => { - rig.setup( - 'should allow tool execution when hook returns allow decision', - { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.allow-tool.responses', - ), - }, - ); - - const scriptPath = rig.createScript( - 'allow_hook.cjs', - "console.log(JSON.stringify({decision: 'allow', reason: 'File writing approved'}));", - ); - - rig.setup( - 'should allow tool execution when hook returns allow decision', - { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeTool: [ - { - matcher: 'write_file', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], - }, + describe('Command Hooks - Blocking Behavior', () => { + it('should block tool execution when hook returns block decision', async () => { + rig.setup( + 'should block tool execution when hook returns block decision', + { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.block-tool.responses', + ), }, - }, - ); + ); - await rig.run({ - args: 'Create a file called approved.txt with content "Approved content"', - }); + const scriptPath = rig.createScript( + 'block_hook.cjs', + "console.log(JSON.stringify({decision: 'block', reason: 'File writing blocked by security policy'}));", + ); - // The hook should allow the write_file tool - const foundWriteFile = await rig.waitForToolCall('write_file'); - expect(foundWriteFile).toBeTruthy(); - - // File should be created - const fileContent = rig.readFile('approved.txt'); - expect(fileContent).toContain('Approved content'); - - // Should generate hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); - }); - }); - - describe('Command Hooks - Additional Context', () => { - it('should add additional context from AfterTool hooks', async () => { - rig.setup('should add additional context from AfterTool hooks', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.after-tool-context.responses', - ), - }); - - const scriptPath = rig.createScript( - 'after_tool_context.cjs', - "console.log(JSON.stringify({hookSpecificOutput: {hookEventName: 'AfterTool', additionalContext: 'Security scan: File content appears safe'}}));", - ); - - const command = `node "${scriptPath}"`; - rig.setup('should add additional context from AfterTool hooks', { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - AfterTool: [ - { - matcher: 'read_file', - sequential: true, - hooks: [ + rig.setup( + 'should block tool execution when hook returns block decision', + { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ { - type: 'command', - command: normalizePath(command), - timeout: 5000, + matcher: 'write_file', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], }, ], }, - ], - }, - }, - }); - - // Create a test file to read - rig.createFile('test-file.txt', 'This is test content'); - - await rig.run({ - args: 'Read the contents of test-file.txt and tell me what it contains', - }); - - // Should find read_file tool call - const foundReadFile = await rig.waitForToolCall('read_file'); - expect(foundReadFile).toBeTruthy(); - - // Should generate hook telemetry - const hookTelemetryFound = rig.readHookLogs(); - expect(hookTelemetryFound.length).toBeGreaterThan(0); - expect(hookTelemetryFound[0].hookCall.hook_event_name).toBe('AfterTool'); - expect(hookTelemetryFound[0].hookCall.hook_name).toBe( - normalizePath(command), - ); - expect(hookTelemetryFound[0].hookCall.hook_input).toBeDefined(); - expect(hookTelemetryFound[0].hookCall.hook_output).toBeDefined(); - expect(hookTelemetryFound[0].hookCall.exit_code).toBe(0); - expect(hookTelemetryFound[0].hookCall.stdout).toBeDefined(); - expect(hookTelemetryFound[0].hookCall.stderr).toBeDefined(); - }); - }); - - describe('Command Hooks - Tail Tool Calls', () => { - it('should execute a tail tool call from AfterTool hooks and replace original response', async () => { - // Create a script that acts as the hook. - // It will trigger on "read_file" and issue a tail call to "write_file". - rig.setup('should execute a tail tool call from AfterTool hooks', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.tail-tool-call.responses', - ), - }); - - const hookOutput = { - decision: 'allow', - hookSpecificOutput: { - hookEventName: 'AfterTool', - tailToolCallRequest: { - name: 'write_file', - args: { - file_path: 'tail-called-file.txt', - content: 'Content from tail call', }, }, - }, - }; + ); - const hookScript = `console.log(JSON.stringify(${JSON.stringify( - hookOutput, - )})); process.exit(0);`; + const result = await rig.run({ + args: 'Create a file called test.txt with content "Hello World"', + }); - const scriptPath = join(rig.testDir!, 'tail_call_hook.js'); - writeFileSync(scriptPath, hookScript); - const commandPath = scriptPath.replace(/\\/g, '/'); + // The hook should block the write_file tool + const toolLogs = rig.readToolLogs(); + const writeFileCalls = toolLogs.filter( + (t) => + t.toolRequest.name === 'write_file' && + t.toolRequest.success === true, + ); - rig.setup('should execute a tail tool call from AfterTool hooks', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.tail-tool-call.responses', - ), - settings: { - hooksConfig: { - enabled: true, + // Tool should not be called due to blocking hook + expect(writeFileCalls).toHaveLength(0); + + // Result should mention the blocking reason + expect(result).toContain('File writing blocked by security policy'); + + // Should generate hook telemetry + const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); + }); + + it('should block tool execution and use stderr as reason when hook exits with code 2', async () => { + rig.setup( + 'should block tool execution and use stderr as reason when hook exits with code 2', + { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.block-tool.responses', + ), }, - hooks: { - AfterTool: [ - { - matcher: 'read_file', - hooks: [ + ); + + const blockMsg = 'File writing blocked by security policy'; + + const scriptPath = rig.createScript( + 'stderr_block_hook.cjs', + `process.stderr.write(JSON.stringify({ decision: 'deny', reason: '${blockMsg}' })); process.exit(2);`, + ); + + rig.setup( + 'should block tool execution and use stderr as reason when hook exits with code 2', + { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ { - type: 'command', - command: `node "${commandPath}"`, - timeout: 5000, + matcher: 'write_file', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`)!, + timeout: 5000, + }, + ], }, ], }, - ], + }, }, - }, + ); + + const result = await rig.run({ + args: 'Create a file called test.txt with content "Hello World"', + }); + + // The hook should block the write_file tool + const toolLogs = rig.readToolLogs(); + const writeFileCalls = toolLogs.filter( + (t) => + t.toolRequest.name === 'write_file' && + t.toolRequest.success === true, + ); + + // Tool should not be called due to blocking hook + expect(writeFileCalls).toHaveLength(0); + + // Result should mention the blocking reason + expect(result).toContain(blockMsg); + + // Verify hook telemetry shows the deny decision + const hookLogs = rig.readHookLogs(); + const blockHook = hookLogs.find( + (log) => + log.hookCall.hook_event_name === 'BeforeTool' && + (log.hookCall.stdout.includes('"decision":"deny"') || + log.hookCall.stderr.includes('"decision":"deny"')), + ); + expect(blockHook).toBeDefined(); + expect( + blockHook?.hookCall.stdout + blockHook?.hookCall.stderr, + ).toContain(blockMsg); }); - // Create a test file to trigger the read_file tool - rig.createFile('original.txt', 'Original content'); + it('should allow tool execution when hook returns allow decision', async () => { + rig.setup( + 'should allow tool execution when hook returns allow decision', + { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.allow-tool.responses', + ), + }, + ); - const cliOutput = await rig.run({ - args: 'Read original.txt', // Fake responses should trigger read_file on this + const scriptPath = rig.createScript( + 'allow_hook.cjs', + "console.log(JSON.stringify({decision: 'allow', reason: 'File writing approved'}));", + ); + + rig.setup( + 'should allow tool execution when hook returns allow decision', + { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ + { + matcher: 'write_file', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }, + ); + + await rig.run({ + args: 'Create a file called approved.txt with content "Approved content"', + }); + + // The hook should allow the write_file tool + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // File should be created + const fileContent = rig.readFile('approved.txt'); + expect(fileContent).toContain('Approved content'); + + // Should generate hook telemetry + const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); }); - - // 1. Verify that write_file was called (as a tail call replacing read_file) - // Since read_file was replaced before finalizing, it will not appear in the tool logs. - const foundWriteFile = await rig.waitForToolCall('write_file'); - expect(foundWriteFile).toBeTruthy(); - - // Ensure hook logs are flushed and the final LLM response is received. - // The mock LLM is configured to respond with "Tail call completed successfully." - expect(cliOutput).toContain('Tail call completed successfully.'); - - // Ensure telemetry is written to disk - await rig.waitForTelemetryReady(); - - // Read hook logs to debug - const hookLogs = rig.readHookLogs(); - const relevantHookLog = hookLogs.find( - (l) => l.hookCall.hook_event_name === 'AfterTool', - ); - - expect(relevantHookLog).toBeDefined(); - - // 2. Verify write_file was executed. - // In non-interactive mode, the CLI deduplicates tool execution logs by callId. - // Since a tail call reuses the original callId, "Tool: write_file" is not printed. - // Instead, we verify the side-effect (file creation) and the telemetry log. - - // 3. Verify the tail-called tool actually wrote the file - const modifiedContent = rig.readFile('tail-called-file.txt'); - expect(modifiedContent).toBe('Content from tail call'); - - // 4. Verify telemetry for the final tool call. - // The original 'read_file' call is replaced, so only 'write_file' is finalized and logged. - const toolLogs = rig.readToolLogs(); - const successfulTools = toolLogs.filter((t) => t.toolRequest.success); - expect( - successfulTools.some((t) => t.toolRequest.name === 'write_file'), - ).toBeTruthy(); - // The original request name should be preserved in the log payload if possible, - // but the executed tool name is 'write_file'. }); - }); - describe('BeforeModel Hooks - LLM Request Modification', () => { - it('should modify LLM requests with BeforeModel hooks', async () => { - // Create a hook script that replaces the LLM request with a modified version - // Note: Providing messages in the hook output REPLACES the entire conversation - rig.setup('should modify LLM requests with BeforeModel hooks', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.before-model.responses', - ), + describe('Command Hooks - Additional Context', () => { + it('should add additional context from AfterTool hooks', async () => { + rig.setup('should add additional context from AfterTool hooks', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.after-tool-context.responses', + ), + }); + + const scriptPath = rig.createScript( + 'after_tool_context.cjs', + "console.log(JSON.stringify({hookSpecificOutput: {hookEventName: 'AfterTool', additionalContext: 'Security scan: File content appears safe'}}));", + ); + + const command = `node "${scriptPath}"`; + rig.setup('should add additional context from AfterTool hooks', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + AfterTool: [ + { + matcher: 'read_file', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(command), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Create a test file to read + rig.createFile('test-file.txt', 'This is test content'); + + await rig.run({ + args: 'Read the contents of test-file.txt and tell me what it contains', + }); + + // Should find read_file tool call + const foundReadFile = await rig.waitForToolCall('read_file'); + expect(foundReadFile).toBeTruthy(); + + // Should generate hook telemetry + const hookTelemetryFound = rig.readHookLogs(); + expect(hookTelemetryFound.length).toBeGreaterThan(0); + expect(hookTelemetryFound[0].hookCall.hook_event_name).toBe( + 'AfterTool', + ); + expect(hookTelemetryFound[0].hookCall.hook_name).toBe( + normalizePath(command), + ); + expect(hookTelemetryFound[0].hookCall.hook_input).toBeDefined(); + expect(hookTelemetryFound[0].hookCall.hook_output).toBeDefined(); + expect(hookTelemetryFound[0].hookCall.exit_code).toBe(0); + expect(hookTelemetryFound[0].hookCall.stdout).toBeDefined(); + expect(hookTelemetryFound[0].hookCall.stderr).toBeDefined(); }); - const hookScript = `const fs = require('fs'); + }); + + describe('Command Hooks - Tail Tool Calls', () => { + it('should execute a tail tool call from AfterTool hooks and replace original response', async () => { + // Create a script that acts as the hook. + // It will trigger on "read_file" and issue a tail call to "write_file". + rig.setup('should execute a tail tool call from AfterTool hooks', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.tail-tool-call.responses', + ), + }); + + const hookOutput = { + decision: 'allow', + hookSpecificOutput: { + hookEventName: 'AfterTool', + tailToolCallRequest: { + name: 'write_file', + args: { + file_path: 'tail-called-file.txt', + content: 'Content from tail call', + }, + }, + }, + }; + + const hookScript = `console.log(JSON.stringify(${JSON.stringify( + hookOutput, + )})); process.exit(0);`; + + const scriptPath = join(rig.testDir!, 'tail_call_hook.js'); + writeFileSync(scriptPath, hookScript); + const commandPath = scriptPath.replace(/\\/g, '/'); + + rig.setup('should execute a tail tool call from AfterTool hooks', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.tail-tool-call.responses', + ), + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + AfterTool: [ + { + matcher: 'read_file', + hooks: [ + { + type: 'command', + command: `node "${commandPath}"`, + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Create a test file to trigger the read_file tool + rig.createFile('original.txt', 'Original content'); + + const cliOutput = await rig.run({ + args: 'Read original.txt', // Fake responses should trigger read_file on this + }); + + // 1. Verify that write_file was called (as a tail call replacing read_file) + // Since read_file was replaced before finalizing, it will not appear in the tool logs. + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // Ensure hook logs are flushed and the final LLM response is received. + // The mock LLM is configured to respond with "Tail call completed successfully." + expect(cliOutput).toContain('Tail call completed successfully.'); + + // Ensure telemetry is written to disk + await rig.waitForTelemetryReady(); + + // Read hook logs to debug + const hookLogs = rig.readHookLogs(); + const relevantHookLog = hookLogs.find( + (l) => l.hookCall.hook_event_name === 'AfterTool', + ); + + expect(relevantHookLog).toBeDefined(); + + // 2. Verify write_file was executed. + // In non-interactive mode, the CLI deduplicates tool execution logs by callId. + // Since a tail call reuses the original callId, "Tool: write_file" is not printed. + // Instead, we verify the side-effect (file creation) and the telemetry log. + + // 3. Verify the tail-called tool actually wrote the file + const modifiedContent = rig.readFile('tail-called-file.txt'); + expect(modifiedContent).toBe('Content from tail call'); + + // 4. Verify telemetry for the final tool call. + // The original 'read_file' call is replaced, so only 'write_file' is finalized and logged. + const toolLogs = rig.readToolLogs(); + const successfulTools = toolLogs.filter((t) => t.toolRequest.success); + expect( + successfulTools.some((t) => t.toolRequest.name === 'write_file'), + ).toBeTruthy(); + // The original request name should be preserved in the log payload if possible, + // but the executed tool name is 'write_file'. + }); + }); + + describe('BeforeModel Hooks - LLM Request Modification', () => { + it('should modify LLM requests with BeforeModel hooks', async () => { + // Create a hook script that replaces the LLM request with a modified version + // Note: Providing messages in the hook output REPLACES the entire conversation + rig.setup('should modify LLM requests with BeforeModel hooks', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.before-model.responses', + ), + }); + const hookScript = `const fs = require('fs'); console.log(JSON.stringify({ decision: "allow", hookSpecificOutput: { @@ -419,166 +427,169 @@ console.log(JSON.stringify({ } }));`; - const scriptPath = rig.createScript('before_model_hook.cjs', hookScript); + const scriptPath = rig.createScript( + 'before_model_hook.cjs', + hookScript, + ); - rig.setup('should modify LLM requests with BeforeModel hooks', { - settings: { - hooksConfig: { - enabled: true, + rig.setup('should modify LLM requests with BeforeModel hooks', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeModel: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], + }, + ], + }, }, - hooks: { - BeforeModel: [ - { - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], - }, - }, + }); + + const result = await rig.run({ args: 'Tell me a story' }); + + // The hook should have replaced the request entirely + // Verify that the model responded to the modified request, not the original + expect(result).toBeDefined(); + expect(result.length).toBeGreaterThan(0); + // The response should contain the expected text from the modified request + expect(result.toLowerCase()).toContain('security hook modified'); + + // Should generate hook telemetry + + // Should generate hook telemetry + const hookTelemetryFound = rig.readHookLogs(); + expect(hookTelemetryFound.length).toBeGreaterThan(0); + expect(hookTelemetryFound[0].hookCall.hook_event_name).toBe( + 'BeforeModel', + ); + expect(hookTelemetryFound[0].hookCall.hook_name).toBe( + `node "${scriptPath}"`, + ); + expect(hookTelemetryFound[0].hookCall.hook_input).toBeDefined(); + expect(hookTelemetryFound[0].hookCall.hook_output).toBeDefined(); + expect(hookTelemetryFound[0].hookCall.exit_code).toBe(0); + expect(hookTelemetryFound[0].hookCall.stdout).toBeDefined(); + expect(hookTelemetryFound[0].hookCall.stderr).toBeDefined(); }); - const result = await rig.run({ args: 'Tell me a story' }); - - // The hook should have replaced the request entirely - // Verify that the model responded to the modified request, not the original - expect(result).toBeDefined(); - expect(result.length).toBeGreaterThan(0); - // The response should contain the expected text from the modified request - expect(result.toLowerCase()).toContain('security hook modified'); - - // Should generate hook telemetry - - // Should generate hook telemetry - const hookTelemetryFound = rig.readHookLogs(); - expect(hookTelemetryFound.length).toBeGreaterThan(0); - expect(hookTelemetryFound[0].hookCall.hook_event_name).toBe( - 'BeforeModel', - ); - expect(hookTelemetryFound[0].hookCall.hook_name).toBe( - `node "${scriptPath}"`, - ); - expect(hookTelemetryFound[0].hookCall.hook_input).toBeDefined(); - expect(hookTelemetryFound[0].hookCall.hook_output).toBeDefined(); - expect(hookTelemetryFound[0].hookCall.exit_code).toBe(0); - expect(hookTelemetryFound[0].hookCall.stdout).toBeDefined(); - expect(hookTelemetryFound[0].hookCall.stderr).toBeDefined(); - }); - - it('should block model execution when BeforeModel hook returns deny decision', async () => { - rig.setup( - 'should block model execution when BeforeModel hook returns deny decision', - ); - const hookScript = `console.log(JSON.stringify({ + it('should block model execution when BeforeModel hook returns deny decision', async () => { + rig.setup( + 'should block model execution when BeforeModel hook returns deny decision', + ); + const hookScript = `console.log(JSON.stringify({ decision: "deny", reason: "Model execution blocked by security policy" }));`; - const scriptPath = rig.createScript( - 'before_model_deny_hook.cjs', - hookScript, - ); + const scriptPath = rig.createScript( + 'before_model_deny_hook.cjs', + hookScript, + ); - rig.setup( - 'should block model execution when BeforeModel hook returns deny decision', - { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeModel: [ - { - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], + rig.setup( + 'should block model execution when BeforeModel hook returns deny decision', + { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeModel: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], + }, + ], + }, }, }, - }, - ); + ); - const result = await rig.run({ args: 'Hello' }); + const result = await rig.run({ args: 'Hello' }); - // The hook should have blocked the request - expect(result).toContain('Model execution blocked by security policy'); + // The hook should have blocked the request + expect(result).toContain('Model execution blocked by security policy'); - // Verify no API requests were made to the LLM - const apiRequests = rig.readAllApiRequest(); - expect(apiRequests).toHaveLength(0); - }); + // Verify no API requests were made to the LLM + const apiRequests = rig.readAllApiRequest(); + expect(apiRequests).toHaveLength(0); + }); - it('should block model execution when BeforeModel hook returns block decision', async () => { - rig.setup( - 'should block model execution when BeforeModel hook returns block decision', - ); - const hookScript = `console.log(JSON.stringify({ + it('should block model execution when BeforeModel hook returns block decision', async () => { + rig.setup( + 'should block model execution when BeforeModel hook returns block decision', + ); + const hookScript = `console.log(JSON.stringify({ decision: "block", reason: "Model execution blocked by security policy" }));`; - const scriptPath = rig.createScript( - 'before_model_block_hook.cjs', - hookScript, - ); + const scriptPath = rig.createScript( + 'before_model_block_hook.cjs', + hookScript, + ); - rig.setup( - 'should block model execution when BeforeModel hook returns block decision', - { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeModel: [ - { - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], + rig.setup( + 'should block model execution when BeforeModel hook returns block decision', + { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeModel: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], + }, + ], + }, }, }, - }, - ); + ); - const result = await rig.run({ args: 'Hello' }); + const result = await rig.run({ args: 'Hello' }); - // The hook should have blocked the request - expect(result).toContain('Model execution blocked by security policy'); + // The hook should have blocked the request + expect(result).toContain('Model execution blocked by security policy'); - // Verify no API requests were made to the LLM - const apiRequests = rig.readAllApiRequest(); - expect(apiRequests).toHaveLength(0); + // Verify no API requests were made to the LLM + const apiRequests = rig.readAllApiRequest(); + expect(apiRequests).toHaveLength(0); + }); }); - }); - describe('AfterModel Hooks - LLM Response Modification', () => { - it.skipIf(process.platform === 'win32')( - 'should modify LLM responses with AfterModel hooks', - async () => { - rig.setup('should modify LLM responses with AfterModel hooks', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.after-model.responses', - ), - }); - // Create a hook script that modifies the LLM response - const hookScript = `const fs = require('fs'); + describe('AfterModel Hooks - LLM Response Modification', () => { + it.skipIf(process.platform === 'win32')( + 'should modify LLM responses with AfterModel hooks', + async () => { + rig.setup('should modify LLM responses with AfterModel hooks', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.after-model.responses', + ), + }); + // Create a hook script that modifies the LLM response + const hookScript = `const fs = require('fs'); console.log(JSON.stringify({ hookSpecificOutput: { hookEventName: "AfterModel", @@ -598,15 +609,148 @@ console.log(JSON.stringify({ } }));`; - const scriptPath = rig.createScript('after_model_hook.cjs', hookScript); + const scriptPath = rig.createScript( + 'after_model_hook.cjs', + hookScript, + ); - rig.setup('should modify LLM responses with AfterModel hooks', { + rig.setup('should modify LLM responses with AfterModel hooks', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + AfterModel: [ + { + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run({ args: 'What is 2 + 2?' }); + + // The hook should have replaced the model response + expect(result).toContain( + '[FILTERED] Response has been filtered for security compliance', + ); + + // Should generate hook telemetry + const hookTelemetryFound = + await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); + }, + ); + }); + + describe('BeforeToolSelection Hooks - Tool Configuration', () => { + it('should modify tool selection with BeforeToolSelection hooks', async () => { + // 1. Initial setup to establish test directory + rig.setup('BeforeToolSelection Hooks'); + + const toolConfigJson = JSON.stringify({ + decision: 'allow', + hookSpecificOutput: { + hookEventName: 'BeforeToolSelection', + toolConfig: { + mode: 'ANY', + allowedFunctionNames: ['read_file'], + }, + }, + }); + + // Use file-based hook to avoid quoting issues + const hookScript = `console.log(JSON.stringify(${toolConfigJson}));`; + const hookFilename = 'before_tool_selection_hook.js'; + const scriptPath = rig.createScript(hookFilename, hookScript); + + // 2. Final setup with script path + rig.setup('BeforeToolSelection Hooks', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.before-tool-selection.responses', + ), + settings: { + debugMode: true, + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeToolSelection: [ + { + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 60000, + }, + ], + }, + ], + }, + }, + }); + + // Create a test file + rig.createFile('new_file_data.txt', 'test data'); + + await rig.run({ + args: 'Check the content of new_file_data.txt', + }); + + // Verify the hook was called for BeforeToolSelection event + const hookLogs = rig.readHookLogs(); + const beforeToolSelectionHook = hookLogs.find( + (log) => log.hookCall.hook_event_name === 'BeforeToolSelection', + ); + expect(beforeToolSelectionHook).toBeDefined(); + expect(beforeToolSelectionHook?.hookCall.success).toBe(true); + + // Verify hook telemetry shows it modified the config + expect( + JSON.stringify(beforeToolSelectionHook?.hookCall.hook_output), + ).toContain('read_file'); + }); + }); + + describe('BeforeAgent Hooks - Prompt Augmentation', () => { + it('should augment prompts with BeforeAgent hooks', async () => { + // Create a hook script that adds context to the prompt + const hookScript = `const fs = require('fs'); +console.log(JSON.stringify({ + decision: "allow", + hookSpecificOutput: { + hookEventName: "BeforeAgent", + additionalContext: "SYSTEM INSTRUCTION: You are in a secure environment. Always mention security compliance in your responses." + } +}));`; + + rig.setup('should augment prompts with BeforeAgent hooks', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.before-agent.responses', + ), + }); + + const scriptPath = rig.createScript( + 'before_agent_hook.cjs', + hookScript, + ); + + rig.setup('should augment prompts with BeforeAgent hooks', { settings: { hooksConfig: { enabled: true, }, hooks: { - AfterModel: [ + BeforeAgent: [ { hooks: [ { @@ -621,335 +765,210 @@ console.log(JSON.stringify({ }, }); - const result = await rig.run({ args: 'What is 2 + 2?' }); + const result = await rig.run({ args: 'Hello, how are you?' }); - // The hook should have replaced the model response - expect(result).toContain( - '[FILTERED] Response has been filtered for security compliance', - ); + // The hook should have added security context, which should influence the response + expect(result).toContain('security'); // Should generate hook telemetry const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); expect(hookTelemetryFound).toBeTruthy(); - }, - ); - }); - - describe('BeforeToolSelection Hooks - Tool Configuration', () => { - it('should modify tool selection with BeforeToolSelection hooks', async () => { - // 1. Initial setup to establish test directory - rig.setup('BeforeToolSelection Hooks'); - - const toolConfigJson = JSON.stringify({ - decision: 'allow', - hookSpecificOutput: { - hookEventName: 'BeforeToolSelection', - toolConfig: { - mode: 'ANY', - allowedFunctionNames: ['read_file'], - }, - }, }); - - // Use file-based hook to avoid quoting issues - const hookScript = `console.log(JSON.stringify(${toolConfigJson}));`; - const hookFilename = 'before_tool_selection_hook.js'; - const scriptPath = rig.createScript(hookFilename, hookScript); - - // 2. Final setup with script path - rig.setup('BeforeToolSelection Hooks', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.before-tool-selection.responses', - ), - settings: { - debugMode: true, - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeToolSelection: [ - { - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 60000, - }, - ], - }, - ], - }, - }, - }); - - // Create a test file - rig.createFile('new_file_data.txt', 'test data'); - - await rig.run({ - args: 'Check the content of new_file_data.txt', - }); - - // Verify the hook was called for BeforeToolSelection event - const hookLogs = rig.readHookLogs(); - const beforeToolSelectionHook = hookLogs.find( - (log) => log.hookCall.hook_event_name === 'BeforeToolSelection', - ); - expect(beforeToolSelectionHook).toBeDefined(); - expect(beforeToolSelectionHook?.hookCall.success).toBe(true); - - // Verify hook telemetry shows it modified the config - expect( - JSON.stringify(beforeToolSelectionHook?.hookCall.hook_output), - ).toContain('read_file'); }); - }); - describe('BeforeAgent Hooks - Prompt Augmentation', () => { - it('should augment prompts with BeforeAgent hooks', async () => { - // Create a hook script that adds context to the prompt - const hookScript = `const fs = require('fs'); -console.log(JSON.stringify({ - decision: "allow", - hookSpecificOutput: { - hookEventName: "BeforeAgent", - additionalContext: "SYSTEM INSTRUCTION: You are in a secure environment. Always mention security compliance in your responses." - } -}));`; + describe('Notification Hooks - Permission Handling', () => { + it('should handle notification hooks for tool permissions', async () => { + rig.setup('should handle notification hooks for tool permissions', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.notification.responses', + ), + }); - rig.setup('should augment prompts with BeforeAgent hooks', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.before-agent.responses', - ), - }); - - const scriptPath = rig.createScript('before_agent_hook.cjs', hookScript); - - rig.setup('should augment prompts with BeforeAgent hooks', { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeAgent: [ - { - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - const result = await rig.run({ args: 'Hello, how are you?' }); - - // The hook should have added security context, which should influence the response - expect(result).toContain('security'); - - // Should generate hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); - }); - }); - - describe('Notification Hooks - Permission Handling', () => { - it('should handle notification hooks for tool permissions', async () => { - rig.setup('should handle notification hooks for tool permissions', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.notification.responses', - ), - }); - - // Create script file for hook - const scriptPath = rig.createScript( - 'notification_hook.cjs', - "console.log(JSON.stringify({suppressOutput: false, systemMessage: 'Permission request logged by security hook'}));", - ); - - const hookCommand = `node "${scriptPath}"`; - - rig.setup('should handle notification hooks for tool permissions', { - settings: { - // Configure tools to enable hooks and require confirmation to trigger notifications - tools: { - approval: 'ASK', // Disable YOLO mode to show permission prompts - confirmationRequired: ['run_shell_command'], - }, - hooksConfig: { - enabled: true, - }, - hooks: { - Notification: [ - { - matcher: 'ToolPermission', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(hookCommand), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - const run = await rig.runInteractive({ approvalMode: 'default' }); - - // Send prompt that will trigger a permission request - await run.type('Run the command "echo test"'); - await run.type('\r'); - - // Wait for permission prompt to appear - await run.expectText('Allow', 10000); - - // Approve the permission - await run.type('y'); - await run.type('\r'); - - // Wait for command to execute - await run.expectText('test', 10000); - - // Should find the shell command execution - const foundShellCommand = await rig.waitForToolCall('run_shell_command'); - expect(foundShellCommand).toBeTruthy(); - - // Verify Notification hook executed - const hookLogs = rig.readHookLogs(); - const notificationLog = hookLogs.find( - (log) => - log.hookCall.hook_event_name === 'Notification' && - log.hookCall.hook_name === normalizePath(hookCommand), - ); - - expect(notificationLog).toBeDefined(); - if (notificationLog) { - expect(notificationLog.hookCall.exit_code).toBe(0); - expect(notificationLog.hookCall.stdout).toContain( - 'Permission request logged by security hook', + // Create script file for hook + const scriptPath = rig.createScript( + 'notification_hook.cjs', + "console.log(JSON.stringify({suppressOutput: false, systemMessage: 'Permission request logged by security hook'}));", ); - // Verify hook input contains notification details - const hookInputStr = - typeof notificationLog.hookCall.hook_input === 'string' - ? notificationLog.hookCall.hook_input - : JSON.stringify(notificationLog.hookCall.hook_input); - const hookInput = JSON.parse(hookInputStr) as Record; + const hookCommand = `node "${scriptPath}"`; - // Should have notification type (uses snake_case) - expect(hookInput['notification_type']).toBe('ToolPermission'); - - // Should have message - expect(hookInput['message']).toBeDefined(); - - // Should have details with tool info - expect(hookInput['details']).toBeDefined(); - const details = hookInput['details'] as Record; - // For 'exec' type confirmations, details contains: type, title, command, rootCommand - expect(details['type']).toBe('exec'); - expect(details['command']).toBeDefined(); - expect(details['title']).toBeDefined(); - } - }); - }); - - describe('Sequential Hook Execution', () => { - it('should execute hooks sequentially when configured', async () => { - rig.setup('should execute hooks sequentially when configured', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.sequential-execution.responses', - ), - }); - - // Create script files for hooks - const hook1Path = rig.createScript( - 'seq_hook1.cjs', - "console.log(JSON.stringify({decision: 'allow', hookSpecificOutput: {hookEventName: 'BeforeAgent', additionalContext: 'Step 1: Initial validation passed.'}}));", - ); - const hook2Path = rig.createScript( - 'seq_hook2.cjs', - "console.log(JSON.stringify({decision: 'allow', hookSpecificOutput: {hookEventName: 'BeforeAgent', additionalContext: 'Step 2: Security check completed.'}}));", - ); - - const hook1Command = `node "${hook1Path}"`; - const hook2Command = `node "${hook2Path}"`; - - rig.setup('should execute hooks sequentially when configured', { - settings: { - hooksConfig: { - enabled: true, + rig.setup('should handle notification hooks for tool permissions', { + settings: { + // Configure tools to enable hooks and require confirmation to trigger notifications + tools: { + approval: 'ASK', // Disable YOLO mode to show permission prompts + confirmationRequired: ['run_shell_command'], + }, + hooksConfig: { + enabled: true, + }, + hooks: { + Notification: [ + { + matcher: 'ToolPermission', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(hookCommand), + timeout: 5000, + }, + ], + }, + ], + }, }, - hooks: { - BeforeAgent: [ - { - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(hook1Command), - timeout: 5000, - }, - { - type: 'command', - command: normalizePath(hook2Command), - timeout: 5000, - }, - ], - }, - ], - }, - }, + }); + + const run = await rig.runInteractive({ approvalMode: 'default' }); + + // Send prompt that will trigger a permission request + await run.type('Run the command "echo test"'); + await run.type('\r'); + + // Wait for permission prompt to appear + await run.expectText('Allow', 10000); + + // Approve the permission + await run.type('y'); + await run.type('\r'); + + // Wait for command to execute + await run.expectText('test', 10000); + + // Should find the shell command execution + const foundShellCommand = + await rig.waitForToolCall('run_shell_command'); + expect(foundShellCommand).toBeTruthy(); + + // Verify Notification hook executed + const hookLogs = rig.readHookLogs(); + const notificationLog = hookLogs.find( + (log) => + log.hookCall.hook_event_name === 'Notification' && + log.hookCall.hook_name === normalizePath(hookCommand), + ); + + expect(notificationLog).toBeDefined(); + if (notificationLog) { + expect(notificationLog.hookCall.exit_code).toBe(0); + expect(notificationLog.hookCall.stdout).toContain( + 'Permission request logged by security hook', + ); + + // Verify hook input contains notification details + const hookInputStr = + typeof notificationLog.hookCall.hook_input === 'string' + ? notificationLog.hookCall.hook_input + : JSON.stringify(notificationLog.hookCall.hook_input); + const hookInput = JSON.parse(hookInputStr) as Record; + + // Should have notification type (uses snake_case) + expect(hookInput['notification_type']).toBe('ToolPermission'); + + // Should have message + expect(hookInput['message']).toBeDefined(); + + // Should have details with tool info + expect(hookInput['details']).toBeDefined(); + const details = hookInput['details'] as Record; + // For 'exec' type confirmations, details contains: type, title, command, rootCommand + expect(details['type']).toBe('exec'); + expect(details['command']).toBeDefined(); + expect(details['title']).toBeDefined(); + } }); - - await rig.run({ args: 'Hello, please help me with a task' }); - - // Should generate hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); - - // Verify both hooks executed - const hookLogs = rig.readHookLogs(); - const hook1Log = hookLogs.find( - (log) => log.hookCall.hook_name === normalizePath(hook1Command), - ); - const hook2Log = hookLogs.find( - (log) => log.hookCall.hook_name === normalizePath(hook2Command), - ); - - expect(hook1Log).toBeDefined(); - expect(hook1Log?.hookCall.exit_code).toBe(0); - expect(hook1Log?.hookCall.stdout).toContain( - 'Step 1: Initial validation passed', - ); - - expect(hook2Log).toBeDefined(); - expect(hook2Log?.hookCall.exit_code).toBe(0); - expect(hook2Log?.hookCall.stdout).toContain( - 'Step 2: Security check completed', - ); }); - }); - describe('Hook Input/Output Validation', () => { - it('should provide correct input format to hooks', async () => { - rig.setup('should provide correct input format to hooks', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.input-validation.responses', - ), + describe('Sequential Hook Execution', () => { + it('should execute hooks sequentially when configured', async () => { + rig.setup('should execute hooks sequentially when configured', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.sequential-execution.responses', + ), + }); + + // Create script files for hooks + const hook1Path = rig.createScript( + 'seq_hook1.cjs', + "console.log(JSON.stringify({decision: 'allow', hookSpecificOutput: {hookEventName: 'BeforeAgent', additionalContext: 'Step 1: Initial validation passed.'}}));", + ); + const hook2Path = rig.createScript( + 'seq_hook2.cjs', + "console.log(JSON.stringify({decision: 'allow', hookSpecificOutput: {hookEventName: 'BeforeAgent', additionalContext: 'Step 2: Security check completed.'}}));", + ); + + const hook1Command = `node "${hook1Path}"`; + const hook2Command = `node "${hook2Path}"`; + + rig.setup('should execute hooks sequentially when configured', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeAgent: [ + { + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(hook1Command), + timeout: 5000, + }, + { + type: 'command', + command: normalizePath(hook2Command), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + await rig.run({ args: 'Hello, please help me with a task' }); + + // Should generate hook telemetry + const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); + + // Verify both hooks executed + const hookLogs = rig.readHookLogs(); + const hook1Log = hookLogs.find( + (log) => log.hookCall.hook_name === normalizePath(hook1Command), + ); + const hook2Log = hookLogs.find( + (log) => log.hookCall.hook_name === normalizePath(hook2Command), + ); + + expect(hook1Log).toBeDefined(); + expect(hook1Log?.hookCall.exit_code).toBe(0); + expect(hook1Log?.hookCall.stdout).toContain( + 'Step 1: Initial validation passed', + ); + + expect(hook2Log).toBeDefined(); + expect(hook2Log?.hookCall.exit_code).toBe(0); + expect(hook2Log?.hookCall.stdout).toContain( + 'Step 2: Security check completed', + ); }); - // Create a hook script that validates the input format - const hookScript = `const fs = require('fs'); + }); + + describe('Hook Input/Output Validation', () => { + it('should provide correct input format to hooks', async () => { + rig.setup('should provide correct input format to hooks', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.input-validation.responses', + ), + }); + // Create a hook script that validates the input format + const hookScript = `const fs = require('fs'); const input = fs.readFileSync(0, 'utf-8'); try { const json = JSON.parse(input); @@ -963,69 +982,12 @@ try { console.log(JSON.stringify({decision: "block", reason: "Invalid JSON"})); }`; - const scriptPath = rig.createScript( - 'input_validation_hook.cjs', - hookScript, - ); + const scriptPath = rig.createScript( + 'input_validation_hook.cjs', + hookScript, + ); - rig.setup('should provide correct input format to hooks', { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeTool: [ - { - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - await rig.run({ - args: 'Create a file called input-test.txt with content "test"', - }); - - // Hook should validate input format successfully - const foundWriteFile = await rig.waitForToolCall('write_file'); - expect(foundWriteFile).toBeTruthy(); - - // Check that the file was created (hook allowed it) - const fileContent = rig.readFile('input-test.txt'); - expect(fileContent).toContain('test'); - - // Should generate hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); - }); - - it('should treat mixed stdout (text + JSON) as system message and allow execution when exit code is 0', async () => { - rig.setup( - 'should treat mixed stdout (text + JSON) as system message and allow execution when exit code is 0', - { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.allow-tool.responses', - ), - }, - ); - - // Create script file for hook - const scriptPath = rig.createScript( - 'pollution_hook.cjs', - "console.log('Pollution'); console.log(JSON.stringify({decision: 'deny', reason: 'Should be ignored'}));", - ); - - rig.setup( - 'should treat mixed stdout (text + JSON) as system message and allow execution when exit code is 0', - { + rig.setup('should provide correct input format to hooks', { settings: { hooksConfig: { enabled: true, @@ -1033,13 +995,9 @@ try { hooks: { BeforeTool: [ { - matcher: 'write_file', - sequential: true, hooks: [ { type: 'command', - // Output plain text then JSON. - // This breaks JSON parsing, so it falls back to 'allow' with the whole stdout as systemMessage. command: normalizePath(`node "${scriptPath}"`), timeout: 5000, }, @@ -1048,341 +1006,402 @@ try { ], }, }, - }, - ); + }); - const result = await rig.run({ - args: 'Create a file called approved.txt with content "Approved content"', + await rig.run({ + args: 'Create a file called input-test.txt with content "test"', + }); + + // Hook should validate input format successfully + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // Check that the file was created (hook allowed it) + const fileContent = rig.readFile('input-test.txt'); + expect(fileContent).toContain('test'); + + // Should generate hook telemetry + const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); }); - // The hook logic fails to parse JSON, so it allows the tool. - const foundWriteFile = await rig.waitForToolCall('write_file'); - expect(foundWriteFile).toBeTruthy(); - - // The entire stdout (including the JSON part) becomes the systemMessage - expect(result).toContain('Pollution'); - expect(result).toContain('Should be ignored'); - }); - }); - - describe('Multiple Event Types', () => { - it('should handle hooks for all major event types', async () => { - rig.setup('should handle hooks for all major event types', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.multiple-events.responses', - ), - }); - - // Create script files for hooks - const btPath = rig.createScript( - 'bt_hook.cjs', - "console.log(JSON.stringify({decision: 'allow', systemMessage: 'BeforeTool: File operation logged'}));", - ); - const atPath = rig.createScript( - 'at_hook.cjs', - "console.log(JSON.stringify({hookSpecificOutput: {hookEventName: 'AfterTool', additionalContext: 'AfterTool: Operation completed successfully'}}));", - ); - const baPath = rig.createScript( - 'ba_hook.cjs', - "console.log(JSON.stringify({decision: 'allow', hookSpecificOutput: {hookEventName: 'BeforeAgent', additionalContext: 'BeforeAgent: User request processed'}}));", - ); - - const beforeToolCommand = `node "${btPath}"`; - const afterToolCommand = `node "${atPath}"`; - const beforeAgentCommand = `node "${baPath}"`; - - rig.setup('should handle hooks for all major event types', { - settings: { - hooksConfig: { - enabled: true, + it('should treat mixed stdout (text + JSON) as system message and allow execution when exit code is 0', async () => { + rig.setup( + 'should treat mixed stdout (text + JSON) as system message and allow execution when exit code is 0', + { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.allow-tool.responses', + ), }, - hooks: { - BeforeAgent: [ - { - hooks: [ - { - type: 'command', - command: normalizePath(beforeAgentCommand), - timeout: 5000, - }, - ], - }, - ], - BeforeTool: [ - { - matcher: 'write_file', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(beforeToolCommand), - timeout: 5000, - }, - ], - }, - ], - AfterTool: [ - { - matcher: 'write_file', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(afterToolCommand), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - const result = await rig.run({ - args: - 'Create a file called multi-event-test.txt with content ' + - '"testing multiple events", and then please reply with ' + - 'everything I say just after this:"', - }); - - // Should execute write_file tool - const foundWriteFile = await rig.waitForToolCall('write_file'); - expect(foundWriteFile).toBeTruthy(); - - // File should be created - const fileContent = rig.readFile('multi-event-test.txt'); - expect(fileContent).toContain('testing multiple events'); - - // Result should contain context from all hooks - expect(result).toContain('BeforeTool: File operation logged'); - - // Should generate hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); - - // Verify all three hooks executed - const hookLogs = rig.readHookLogs(); - const beforeAgentLog = hookLogs.find( - (log) => log.hookCall.hook_name === normalizePath(beforeAgentCommand), - ); - const beforeToolLog = hookLogs.find( - (log) => log.hookCall.hook_name === normalizePath(beforeToolCommand), - ); - const afterToolLog = hookLogs.find( - (log) => log.hookCall.hook_name === normalizePath(afterToolCommand), - ); - - expect(beforeAgentLog).toBeDefined(); - expect(beforeAgentLog?.hookCall.exit_code).toBe(0); - expect(beforeAgentLog?.hookCall.stdout).toContain( - 'BeforeAgent: User request processed', - ); - - expect(beforeToolLog).toBeDefined(); - expect(beforeToolLog?.hookCall.exit_code).toBe(0); - expect(beforeToolLog?.hookCall.stdout).toContain( - 'BeforeTool: File operation logged', - ); - - expect(afterToolLog).toBeDefined(); - expect(afterToolLog?.hookCall.exit_code).toBe(0); - expect(afterToolLog?.hookCall.stdout).toContain( - 'AfterTool: Operation completed successfully', - ); - }); - }); - - describe('Hook Error Handling', () => { - it('should handle hook failures gracefully', async () => { - rig.setup('should handle hook failures gracefully', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.error-handling.responses', - ), - }); - // Create script files for hooks - const failingPath = join(rig.testDir!, 'fail_hook.cjs'); - writeFileSync(failingPath, 'process.exit(1);'); - const workingPath = join(rig.testDir!, 'work_hook.cjs'); - writeFileSync( - workingPath, - "console.log(JSON.stringify({decision: 'allow', reason: 'Working hook succeeded'}));", - ); - - // Failing hook: exits with non-zero code - const failingCommand = `node "${failingPath}"`; - // Working hook: returns success with JSON - const workingCommand = `node "${workingPath}"`; - - rig.setup('should handle hook failures gracefully', { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeTool: [ - { - hooks: [ - { - type: 'command', - command: normalizePath(failingCommand), - timeout: 5000, - }, - { - type: 'command', - command: normalizePath(workingCommand), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - await rig.run({ - args: 'Create a file called error-test.txt with content "testing error handling"', - }); - - // Despite one hook failing, the working hook should still allow the operation - const foundWriteFile = await rig.waitForToolCall('write_file'); - expect(foundWriteFile).toBeTruthy(); - - // File should be created - const fileContent = rig.readFile('error-test.txt'); - expect(fileContent).toContain('testing error handling'); - - // Should generate hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); - }); - }); - - describe('Hook Telemetry and Observability', () => { - it('should generate telemetry events for hook executions', async () => { - rig.setup('should generate telemetry events for hook executions', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.telemetry.responses', - ), - }); - - // Create script file for hook - const scriptPath = rig.createScript( - 'telemetry_hook.cjs', - "console.log(JSON.stringify({decision: 'allow', reason: 'Telemetry test hook'}));", - ); - - const hookCommand = `node "${scriptPath}"`; - - rig.setup('should generate telemetry events for hook executions', { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeTool: [ - { - hooks: [ - { - type: 'command', - command: normalizePath(hookCommand), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - await rig.run({ args: 'Create a file called telemetry-test.txt' }); - - // Should execute the tool - const foundWriteFile = await rig.waitForToolCall('write_file'); - expect(foundWriteFile).toBeTruthy(); - - // Should generate hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); - }); - }); - - describe('Session Lifecycle Hooks', () => { - it('should fire SessionStart hook on app startup', async () => { - rig.setup('should fire SessionStart hook on app startup', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.session-startup.responses', - ), - }); - - // Create script file for hook - const scriptPath = rig.createScript( - 'session_start_hook.cjs', - "console.log(JSON.stringify({decision: 'allow', systemMessage: 'Session starting on startup'}));", - ); - - const sessionStartCommand = `node "${scriptPath}"`; - - rig.setup('should fire SessionStart hook on app startup', { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - SessionStart: [ - { - matcher: 'startup', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(sessionStartCommand), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - // Run a simple query - the SessionStart hook will fire during app initialization - await rig.run({ args: 'Say hello' }); - - // Verify hook executed with correct parameters - const hookLogs = rig.readHookLogs(); - const sessionStartLog = hookLogs.find( - (log) => log.hookCall.hook_event_name === 'SessionStart', - ); - - expect(sessionStartLog).toBeDefined(); - if (sessionStartLog) { - expect(sessionStartLog.hookCall.hook_name).toBe( - normalizePath(sessionStartCommand), ); - expect(sessionStartLog.hookCall.exit_code).toBe(0); - expect(sessionStartLog.hookCall.hook_input).toBeDefined(); - // hook_input is a string that needs to be parsed - const hookInputStr = - typeof sessionStartLog.hookCall.hook_input === 'string' - ? sessionStartLog.hookCall.hook_input - : JSON.stringify(sessionStartLog.hookCall.hook_input); - const hookInput = JSON.parse(hookInputStr) as Record; - - expect(hookInput['source']).toBe('startup'); - expect(sessionStartLog.hookCall.stdout).toContain( - 'Session starting on startup', + // Create script file for hook + const scriptPath = rig.createScript( + 'pollution_hook.cjs', + "console.log('Pollution'); console.log(JSON.stringify({decision: 'deny', reason: 'Should be ignored'}));", ); - } + + rig.setup( + 'should treat mixed stdout (text + JSON) as system message and allow execution when exit code is 0', + { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ + { + matcher: 'write_file', + sequential: true, + hooks: [ + { + type: 'command', + // Output plain text then JSON. + // This breaks JSON parsing, so it falls back to 'allow' with the whole stdout as systemMessage. + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }, + ); + + const result = await rig.run({ + args: 'Create a file called approved.txt with content "Approved content"', + }); + + // The hook logic fails to parse JSON, so it allows the tool. + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // The entire stdout (including the JSON part) becomes the systemMessage + expect(result).toContain('Pollution'); + expect(result).toContain('Should be ignored'); + }); }); - it('should fire SessionStart hook and inject context', async () => { - // Create hook script that outputs JSON with additionalContext - const hookScript = `const fs = require('fs'); + describe('Multiple Event Types', () => { + it('should handle hooks for all major event types', async () => { + rig.setup('should handle hooks for all major event types', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.multiple-events.responses', + ), + }); + + // Create script files for hooks + const btPath = rig.createScript( + 'bt_hook.cjs', + "console.log(JSON.stringify({decision: 'allow', systemMessage: 'BeforeTool: File operation logged'}));", + ); + const atPath = rig.createScript( + 'at_hook.cjs', + "console.log(JSON.stringify({hookSpecificOutput: {hookEventName: 'AfterTool', additionalContext: 'AfterTool: Operation completed successfully'}}));", + ); + const baPath = rig.createScript( + 'ba_hook.cjs', + "console.log(JSON.stringify({decision: 'allow', hookSpecificOutput: {hookEventName: 'BeforeAgent', additionalContext: 'BeforeAgent: User request processed'}}));", + ); + + const beforeToolCommand = `node "${btPath}"`; + const afterToolCommand = `node "${atPath}"`; + const beforeAgentCommand = `node "${baPath}"`; + + rig.setup('should handle hooks for all major event types', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeAgent: [ + { + hooks: [ + { + type: 'command', + command: normalizePath(beforeAgentCommand), + timeout: 5000, + }, + ], + }, + ], + BeforeTool: [ + { + matcher: 'write_file', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(beforeToolCommand), + timeout: 5000, + }, + ], + }, + ], + AfterTool: [ + { + matcher: 'write_file', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(afterToolCommand), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run({ + args: + 'Create a file called multi-event-test.txt with content ' + + '"testing multiple events", and then please reply with ' + + 'everything I say just after this:"', + }); + + // Should execute write_file tool + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // File should be created + const fileContent = rig.readFile('multi-event-test.txt'); + expect(fileContent).toContain('testing multiple events'); + + // Result should contain context from all hooks + expect(result).toContain('BeforeTool: File operation logged'); + + // Should generate hook telemetry + const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); + + // Verify all three hooks executed + const hookLogs = rig.readHookLogs(); + const beforeAgentLog = hookLogs.find( + (log) => log.hookCall.hook_name === normalizePath(beforeAgentCommand), + ); + const beforeToolLog = hookLogs.find( + (log) => log.hookCall.hook_name === normalizePath(beforeToolCommand), + ); + const afterToolLog = hookLogs.find( + (log) => log.hookCall.hook_name === normalizePath(afterToolCommand), + ); + + expect(beforeAgentLog).toBeDefined(); + expect(beforeAgentLog?.hookCall.exit_code).toBe(0); + expect(beforeAgentLog?.hookCall.stdout).toContain( + 'BeforeAgent: User request processed', + ); + + expect(beforeToolLog).toBeDefined(); + expect(beforeToolLog?.hookCall.exit_code).toBe(0); + expect(beforeToolLog?.hookCall.stdout).toContain( + 'BeforeTool: File operation logged', + ); + + expect(afterToolLog).toBeDefined(); + expect(afterToolLog?.hookCall.exit_code).toBe(0); + expect(afterToolLog?.hookCall.stdout).toContain( + 'AfterTool: Operation completed successfully', + ); + }); + }); + + describe('Hook Error Handling', () => { + it('should handle hook failures gracefully', async () => { + rig.setup('should handle hook failures gracefully', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.error-handling.responses', + ), + }); + // Create script files for hooks + const failingPath = join(rig.testDir!, 'fail_hook.cjs'); + writeFileSync(failingPath, 'process.exit(1);'); + const workingPath = join(rig.testDir!, 'work_hook.cjs'); + writeFileSync( + workingPath, + "console.log(JSON.stringify({decision: 'allow', reason: 'Working hook succeeded'}));", + ); + + // Failing hook: exits with non-zero code + const failingCommand = `node "${failingPath}"`; + // Working hook: returns success with JSON + const workingCommand = `node "${workingPath}"`; + + rig.setup('should handle hook failures gracefully', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ + { + hooks: [ + { + type: 'command', + command: normalizePath(failingCommand), + timeout: 5000, + }, + { + type: 'command', + command: normalizePath(workingCommand), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + await rig.run({ + args: 'Create a file called error-test.txt with content "testing error handling"', + }); + + // Despite one hook failing, the working hook should still allow the operation + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // File should be created + const fileContent = rig.readFile('error-test.txt'); + expect(fileContent).toContain('testing error handling'); + + // Should generate hook telemetry + const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); + }); + }); + + describe('Hook Telemetry and Observability', () => { + it('should generate telemetry events for hook executions', async () => { + rig.setup('should generate telemetry events for hook executions', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.telemetry.responses', + ), + }); + + // Create script file for hook + const scriptPath = rig.createScript( + 'telemetry_hook.cjs', + "console.log(JSON.stringify({decision: 'allow', reason: 'Telemetry test hook'}));", + ); + + const hookCommand = `node "${scriptPath}"`; + + rig.setup('should generate telemetry events for hook executions', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ + { + hooks: [ + { + type: 'command', + command: normalizePath(hookCommand), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + await rig.run({ args: 'Create a file called telemetry-test.txt' }); + + // Should execute the tool + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // Should generate hook telemetry + const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); + }); + }); + + describe('Session Lifecycle Hooks', () => { + it('should fire SessionStart hook on app startup', async () => { + rig.setup('should fire SessionStart hook on app startup', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.session-startup.responses', + ), + }); + + // Create script file for hook + const scriptPath = rig.createScript( + 'session_start_hook.cjs', + "console.log(JSON.stringify({decision: 'allow', systemMessage: 'Session starting on startup'}));", + ); + + const sessionStartCommand = `node "${scriptPath}"`; + + rig.setup('should fire SessionStart hook on app startup', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + SessionStart: [ + { + matcher: 'startup', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(sessionStartCommand), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + // Run a simple query - the SessionStart hook will fire during app initialization + await rig.run({ args: 'Say hello' }); + + // Verify hook executed with correct parameters + const hookLogs = rig.readHookLogs(); + const sessionStartLog = hookLogs.find( + (log) => log.hookCall.hook_event_name === 'SessionStart', + ); + + expect(sessionStartLog).toBeDefined(); + if (sessionStartLog) { + expect(sessionStartLog.hookCall.hook_name).toBe( + normalizePath(sessionStartCommand), + ); + expect(sessionStartLog.hookCall.exit_code).toBe(0); + expect(sessionStartLog.hookCall.hook_input).toBeDefined(); + + // hook_input is a string that needs to be parsed + const hookInputStr = + typeof sessionStartLog.hookCall.hook_input === 'string' + ? sessionStartLog.hookCall.hook_input + : JSON.stringify(sessionStartLog.hookCall.hook_input); + const hookInput = JSON.parse(hookInputStr) as Record; + + expect(hookInput['source']).toBe('startup'); + expect(sessionStartLog.hookCall.stdout).toContain( + 'Session starting on startup', + ); + } + }); + + it('should fire SessionStart hook and inject context', async () => { + // Create hook script that outputs JSON with additionalContext + const hookScript = `const fs = require('fs'); console.log(JSON.stringify({ decision: 'allow', systemMessage: 'Context injected via SessionStart hook', @@ -1392,104 +1411,19 @@ console.log(JSON.stringify({ } }));`; - rig.setup('should fire SessionStart hook and inject context', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.session-startup.responses', - ), - }); - - const scriptPath = rig.createScript( - 'session_start_context_hook.cjs', - hookScript, - ); - - rig.setup('should fire SessionStart hook and inject context', { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - SessionStart: [ - { - matcher: 'startup', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - // Run a query - the SessionStart hook will fire during app initialization - const result = await rig.run({ args: 'Who are you?' }); - - // Check if systemMessage was displayed (in stderr, which rig.run captures) - expect(result).toContain('Context injected via SessionStart hook'); - - // Check if additionalContext influenced the model response - // Note: We use fake responses, but the rig records interactions. - // If we are using fake responses, the model won't actually respond unless we provide a fake response for the injected context. - // But the test rig setup uses 'hooks-system.session-startup.responses'. - // If I'm adding a new test, I might need to generate new fake responses or expect the context to be sent to the model (verify API logs). - - // Verify hook executed - const hookLogs = rig.readHookLogs(); - const sessionStartLog = hookLogs.find( - (log) => log.hookCall.hook_event_name === 'SessionStart', - ); - - expect(sessionStartLog).toBeDefined(); - - // Verify the API request contained the injected context - // rig.readAllApiRequest() gives us telemetry on API requests. - const apiRequests = rig.readAllApiRequest(); - // We expect at least one API request - expect(apiRequests.length).toBeGreaterThan(0); - - // The injected context should be in the request text - // For non-interactive mode, I prepended it to input: "context\n\ninput" - // The telemetry `request_text` should contain it. - const requestText = apiRequests[0].attributes?.request_text || ''; - expect(requestText).toContain('protocol droid'); - }); - - it('should fire SessionStart hook and display systemMessage in interactive mode', async () => { - // Create hook script that outputs JSON with systemMessage and additionalContext - const hookScript = `const fs = require('fs'); -console.log(JSON.stringify({ - decision: 'allow', - systemMessage: 'Interactive Session Start Message', - hookSpecificOutput: { - hookEventName: 'SessionStart', - additionalContext: 'The user is a Jedi Master.' - } -}));`; - - rig.setup( - 'should fire SessionStart hook and display systemMessage in interactive mode', - { + rig.setup('should fire SessionStart hook and inject context', { fakeResponsesPath: join( import.meta.dirname, 'hooks-system.session-startup.responses', ), - }, - ); + }); - const scriptPath = rig.createScript( - 'session_start_interactive_hook.cjs', - hookScript, - ); + const scriptPath = rig.createScript( + 'session_start_context_hook.cjs', + hookScript, + ); - rig.setup( - 'should fire SessionStart hook and display systemMessage in interactive mode', - { + rig.setup('should fire SessionStart hook and inject context', { settings: { hooksConfig: { enabled: true, @@ -1510,70 +1444,418 @@ console.log(JSON.stringify({ ], }, }, - }, - ); + }); - const run = await rig.runInteractive(); + // Run a query - the SessionStart hook will fire during app initialization + const result = await rig.run({ args: 'Who are you?' }); - // Verify systemMessage is displayed - await run.expectText('Interactive Session Start Message', 10000); + // Check if systemMessage was displayed (in stderr, which rig.run captures) + expect(result).toContain('Context injected via SessionStart hook'); - // Send a prompt to establish a session and trigger an API call - await run.sendKeys('Hello'); - await run.type('\r'); + // Check if additionalContext influenced the model response + // Note: We use fake responses, but the rig records interactions. + // If we are using fake responses, the model won't actually respond unless we provide a fake response for the injected context. + // But the test rig setup uses 'hooks-system.session-startup.responses'. + // If I'm adding a new test, I might need to generate new fake responses or expect the context to be sent to the model (verify API logs). - // Wait for response to ensure API call happened - await run.expectText('Hello', 15000); + // Verify hook executed + const hookLogs = rig.readHookLogs(); + const sessionStartLog = hookLogs.find( + (log) => log.hookCall.hook_event_name === 'SessionStart', + ); - // Wait for telemetry to be written to disk - await rig.waitForTelemetryReady(); + expect(sessionStartLog).toBeDefined(); - // Verify the API request contained the injected context - // We may need to poll for API requests as they are written asynchronously - const pollResult = await poll( - () => { - const apiRequests = rig.readAllApiRequest(); - return apiRequests.length > 0; - }, - 15000, - 500, - ); + // Verify the API request contained the injected context + // rig.readAllApiRequest() gives us telemetry on API requests. + const apiRequests = rig.readAllApiRequest(); + // We expect at least one API request + expect(apiRequests.length).toBeGreaterThan(0); - expect(pollResult).toBe(true); + // The injected context should be in the request text + // For non-interactive mode, I prepended it to input: "context\n\ninput" + // The telemetry `request_text` should contain it. + const requestText = apiRequests[0].attributes?.request_text || ''; + expect(requestText).toContain('protocol droid'); + }); - const apiRequests = rig.readAllApiRequest(); - // The injected context should be in the request_text of the API request - const requestText = apiRequests[0].attributes?.request_text || ''; - expect(requestText).toContain('Jedi Master'); + it('should fire SessionStart hook and display systemMessage in interactive mode', async () => { + // Create hook script that outputs JSON with systemMessage and additionalContext + const hookScript = `const fs = require('fs'); +console.log(JSON.stringify({ + decision: 'allow', + systemMessage: 'Interactive Session Start Message', + hookSpecificOutput: { + hookEventName: 'SessionStart', + additionalContext: 'The user is a Jedi Master.' + } +}));`; + + rig.setup( + 'should fire SessionStart hook and display systemMessage in interactive mode', + { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.session-startup.responses', + ), + }, + ); + + const scriptPath = rig.createScript( + 'session_start_interactive_hook.cjs', + hookScript, + ); + + rig.setup( + 'should fire SessionStart hook and display systemMessage in interactive mode', + { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + SessionStart: [ + { + matcher: 'startup', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }, + ); + + const run = await rig.runInteractive(); + + // Verify systemMessage is displayed + await run.expectText('Interactive Session Start Message', 10000); + + // Send a prompt to establish a session and trigger an API call + await run.sendKeys('Hello'); + await run.type('\r'); + + // Wait for response to ensure API call happened + await run.expectText('Hello', 15000); + + // Wait for telemetry to be written to disk + await rig.waitForTelemetryReady(); + + // Verify the API request contained the injected context + // We may need to poll for API requests as they are written asynchronously + const pollResult = await poll( + () => { + const apiRequests = rig.readAllApiRequest(); + return apiRequests.length > 0; + }, + 15000, + 500, + ); + + expect(pollResult).toBe(true); + + const apiRequests = rig.readAllApiRequest(); + // The injected context should be in the request_text of the API request + const requestText = apiRequests[0].attributes?.request_text || ''; + expect(requestText).toContain('Jedi Master'); + }); + + it('should fire SessionEnd and SessionStart hooks on /clear command', async () => { + rig.setup( + 'should fire SessionEnd and SessionStart hooks on /clear command', + { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.session-clear.responses', + ), + }, + ); + + // Create script files for hooks + const endScriptPath = rig.createScript( + 'session_end_clear.cjs', + "console.log(JSON.stringify({decision: 'allow', systemMessage: 'Session ending due to clear'}));", + ); + const startScriptPath = rig.createScript( + 'session_start_clear.cjs', + "console.log(JSON.stringify({decision: 'allow', systemMessage: 'Session starting after clear'}));", + ); + + const sessionEndCommand = `node "${endScriptPath}"`; + const sessionStartCommand = `node "${startScriptPath}"`; + + rig.setup( + 'should fire SessionEnd and SessionStart hooks on /clear command', + { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + SessionEnd: [ + { + matcher: '*', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(sessionEndCommand), + timeout: 5000, + }, + ], + }, + ], + SessionStart: [ + { + matcher: '*', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(sessionStartCommand), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }, + ); + + const run = await rig.runInteractive(); + + // Send an initial prompt to establish a session + await run.sendKeys('Say hello'); + await run.type('\r'); + + // Wait for the response + await run.expectText('Hello', 10000); + + // Execute /clear command multiple times to generate more hook events + // This makes the test more robust by creating multiple start/stop cycles + const numClears = 3; + for (let i = 0; i < numClears; i++) { + await run.sendKeys('/clear'); + await run.type('\r'); + + // Wait a bit for clear to complete + await new Promise((resolve) => setTimeout(resolve, 2000)); + + // Send a prompt to establish an active session before next clear + await run.sendKeys('Say hello'); + await run.type('\r'); + + // Wait for response + await run.expectText('Hello', 10000); + } + + // Wait for all clears to complete + // BatchLogRecordProcessor exports telemetry every 10 seconds by default + // Use generous wait time across all platforms (CI, Docker, Mac, Linux) + await new Promise((resolve) => setTimeout(resolve, 15000)); + + // Wait for telemetry to be written to disk + await rig.waitForTelemetryReady(); + + // Wait for hook telemetry events to be flushed to disk + // In interactive mode, telemetry may be buffered, so we need to poll for the events + // We execute multiple clears to generate more hook events (total: 1 + numClears * 2) + // But we only require >= 1 hooks to pass, making the test more permissive + const expectedMinHooks = 1; // SessionStart (startup), SessionEnd (clear), SessionStart (clear) + const pollResult = await poll( + () => { + const hookLogs = rig.readHookLogs(); + return hookLogs.length >= expectedMinHooks; + }, + 90000, // 90 second timeout for all platforms + 1000, // check every 1s to reduce I/O overhead + ); + + // If polling failed, log diagnostic info + if (!pollResult) { + const hookLogs = rig.readHookLogs(); + const hookEvents = hookLogs.map( + (log) => log.hookCall.hook_event_name, + ); + console.error( + `Polling timeout after 90000ms: Expected >= ${expectedMinHooks} hooks, got ${hookLogs.length}`, + ); + console.error( + 'Hooks found:', + hookEvents.length > 0 ? hookEvents.join(', ') : 'NONE', + ); + console.error('Full hook logs:', JSON.stringify(hookLogs, null, 2)); + } + + // Verify hooks executed + const hookLogs = rig.readHookLogs(); + + // Diagnostic: Log which hooks we actually got + const hookEvents = hookLogs.map((log) => log.hookCall.hook_event_name); + if (hookLogs.length < expectedMinHooks) { + console.error( + `TEST FAILURE: Expected >= ${expectedMinHooks} hooks, got ${hookLogs.length}: [${hookEvents.length > 0 ? hookEvents.join(', ') : 'NONE'}]`, + ); + } + + expect(hookLogs.length).toBeGreaterThanOrEqual(expectedMinHooks); + + // Find SessionEnd hook log + const sessionEndLog = hookLogs.find( + (log) => + log.hookCall.hook_event_name === 'SessionEnd' && + log.hookCall.hook_name === normalizePath(sessionEndCommand), + ); + // Because the flakiness of the test, we relax this check + // expect(sessionEndLog).toBeDefined(); + if (sessionEndLog) { + expect(sessionEndLog.hookCall.exit_code).toBe(0); + expect(sessionEndLog.hookCall.stdout).toContain( + 'Session ending due to clear', + ); + + // Verify hook input contains reason + const hookInputStr = + typeof sessionEndLog.hookCall.hook_input === 'string' + ? sessionEndLog.hookCall.hook_input + : JSON.stringify(sessionEndLog.hookCall.hook_input); + const hookInput = JSON.parse(hookInputStr) as Record; + expect(hookInput['reason']).toBe('clear'); + } + + // Find SessionStart hook log after clear + const sessionStartAfterClearLogs = hookLogs.filter( + (log) => + log.hookCall.hook_event_name === 'SessionStart' && + log.hookCall.hook_name === normalizePath(sessionStartCommand), + ); + // Should have at least one SessionStart from after clear + // Because the flakiness of the test, we relax this check + // expect(sessionStartAfterClearLogs.length).toBeGreaterThanOrEqual(1); + + const sessionStartLog = sessionStartAfterClearLogs.find((log) => { + const hookInputStr = + typeof log.hookCall.hook_input === 'string' + ? log.hookCall.hook_input + : JSON.stringify(log.hookCall.hook_input); + const hookInput = JSON.parse(hookInputStr) as Record; + return hookInput['source'] === 'clear'; + }); + + // Because the flakiness of the test, we relax this check + // expect(sessionStartLog).toBeDefined(); + if (sessionStartLog) { + expect(sessionStartLog.hookCall.exit_code).toBe(0); + expect(sessionStartLog.hookCall.stdout).toContain( + 'Session starting after clear', + ); + } + }); }); - it('should fire SessionEnd and SessionStart hooks on /clear command', async () => { - rig.setup( - 'should fire SessionEnd and SessionStart hooks on /clear command', - { + describe('Compression Hooks', () => { + it('should fire PreCompress hook on automatic compression', async () => { + rig.setup('should fire PreCompress hook on automatic compression', { fakeResponsesPath: join( import.meta.dirname, - 'hooks-system.session-clear.responses', + 'hooks-system.compress-auto.responses', ), - }, - ); + }); - // Create script files for hooks - const endScriptPath = rig.createScript( - 'session_end_clear.cjs', - "console.log(JSON.stringify({decision: 'allow', systemMessage: 'Session ending due to clear'}));", - ); - const startScriptPath = rig.createScript( - 'session_start_clear.cjs', - "console.log(JSON.stringify({decision: 'allow', systemMessage: 'Session starting after clear'}));", - ); + // Create script file for hook + const scriptPath = rig.createScript( + 'pre_compress_hook.cjs', + "console.log(JSON.stringify({decision: 'allow', systemMessage: 'PreCompress hook executed for automatic compression'}));", + ); - const sessionEndCommand = `node "${endScriptPath}"`; - const sessionStartCommand = `node "${startScriptPath}"`; + const preCompressCommand = `node "${scriptPath}"`; - rig.setup( - 'should fire SessionEnd and SessionStart hooks on /clear command', - { + rig.setup('should fire PreCompress hook on automatic compression', { + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + PreCompress: [ + { + matcher: 'auto', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(preCompressCommand), + timeout: 5000, + }, + ], + }, + ], + }, + // Configure automatic compression with a very low threshold + // This will trigger auto-compression after the first response + contextCompression: { + // enabled: true, + targetTokenCount: 10, // Very low threshold to trigger compression + }, + }, + }); + + // Run a simple query that will trigger automatic compression + await rig.run({ args: 'Say hello in exactly 5 words' }); + + // Verify hook executed with correct parameters + const hookLogs = rig.readHookLogs(); + const preCompressLog = hookLogs.find( + (log) => log.hookCall.hook_event_name === 'PreCompress', + ); + + expect(preCompressLog).toBeDefined(); + if (preCompressLog) { + expect(preCompressLog.hookCall.hook_name).toBe( + normalizePath(preCompressCommand), + ); + expect(preCompressLog.hookCall.exit_code).toBe(0); + expect(preCompressLog.hookCall.hook_input).toBeDefined(); + + // hook_input is a string that needs to be parsed + const hookInputStr = + typeof preCompressLog.hookCall.hook_input === 'string' + ? preCompressLog.hookCall.hook_input + : JSON.stringify(preCompressLog.hookCall.hook_input); + const hookInput = JSON.parse(hookInputStr) as Record; + + expect(hookInput['trigger']).toBe('auto'); + expect(preCompressLog.hookCall.stdout).toContain( + 'PreCompress hook executed for automatic compression', + ); + } + }); + }); + + describe('SessionEnd on Exit', () => { + it('should fire SessionEnd hook on graceful exit in non-interactive mode', async () => { + rig.setup('should fire SessionEnd hook on graceful exit', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.session-startup.responses', + ), + }); + + // Create script file for hook + const scriptPath = rig.createScript( + 'session_end_exit.cjs', + "console.log(JSON.stringify({decision: 'allow', systemMessage: 'SessionEnd hook executed on exit'}));", + ); + + const sessionEndCommand = `node "${scriptPath}"`; + + rig.setup('should fire SessionEnd hook on graceful exit', { settings: { hooksConfig: { enabled: true, @@ -1581,7 +1863,7 @@ console.log(JSON.stringify({ hooks: { SessionEnd: [ { - matcher: '*', + matcher: 'exit', sequential: true, hooks: [ { @@ -1592,14 +1874,287 @@ console.log(JSON.stringify({ ], }, ], - SessionStart: [ + }, + }, + }); + + // Run in non-interactive mode with a simple prompt + await rig.run({ args: 'Hello' }); + + // The process should exit gracefully, firing the SessionEnd hook + // Wait for telemetry to be written to disk + await rig.waitForTelemetryReady(); + + // Poll for the hook log to appear + const isCI = process.env['CI'] === 'true'; + const pollTimeout = isCI ? 30000 : 10000; + const pollResult = await poll( + () => { + const hookLogs = rig.readHookLogs(); + return hookLogs.some( + (log) => log.hookCall.hook_event_name === 'SessionEnd', + ); + }, + pollTimeout, + 200, + ); + + if (!pollResult) { + const hookLogs = rig.readHookLogs(); + console.error( + 'Polling timeout: Expected SessionEnd hook, got:', + JSON.stringify(hookLogs, null, 2), + ); + } + + expect(pollResult).toBe(true); + + const hookLogs = rig.readHookLogs(); + const sessionEndLog = hookLogs.find( + (log) => log.hookCall.hook_event_name === 'SessionEnd', + ); + + expect(sessionEndLog).toBeDefined(); + if (sessionEndLog) { + expect(sessionEndLog.hookCall.hook_name).toBe( + normalizePath(sessionEndCommand), + ); + expect(sessionEndLog.hookCall.exit_code).toBe(0); + expect(sessionEndLog.hookCall.hook_input).toBeDefined(); + + const hookInputStr = + typeof sessionEndLog.hookCall.hook_input === 'string' + ? sessionEndLog.hookCall.hook_input + : JSON.stringify(sessionEndLog.hookCall.hook_input); + const hookInput = JSON.parse(hookInputStr) as Record; + + expect(hookInput['reason']).toBe('exit'); + expect(sessionEndLog.hookCall.stdout).toContain( + 'SessionEnd hook executed', + ); + } + }); + }); + + describe('Hook Disabling', () => { + it('should not execute hooks disabled in settings file', async () => { + const enabledMsg = 'EXECUTION_ALLOWED_BY_HOOK_A'; + const disabledMsg = 'EXECUTION_BLOCKED_BY_HOOK_B'; + + const enabledJson = JSON.stringify({ + decision: 'allow', + systemMessage: enabledMsg, + }); + const disabledJson = JSON.stringify({ + decision: 'block', + reason: disabledMsg, + }); + + const enabledScript = `console.log(JSON.stringify(${enabledJson}));`; + const disabledScript = `console.log(JSON.stringify(${disabledJson}));`; + const enabledFilename = 'enabled_hook.js'; + const disabledFilename = 'disabled_hook.js'; + const enabledCmd = `node ${enabledFilename}`; + const disabledCmd = `node ${disabledFilename}`; + + // 3. Final setup with full settings + rig.setup('Hook Disabling Settings', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.disabled-via-settings.responses', + ), + settings: { + hooksConfig: { + enabled: true, + disabled: ['hook-b'], + }, + hooks: { + BeforeTool: [ { - matcher: '*', + hooks: [ + { + type: 'command', + name: 'hook-a', + command: enabledCmd, + timeout: 60000, + }, + { + type: 'command', + name: 'hook-b', + command: disabledCmd, + timeout: 60000, + }, + ], + }, + ], + }, + }, + }); + + rig.createScript(enabledFilename, enabledScript); + rig.createScript(disabledFilename, disabledScript); + + await rig.run({ + args: 'Create a file called disabled-test.txt with content "test"', + }); + + // Tool should execute (enabled hook allows it) + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // Check hook telemetry - only enabled hook should have executed + const hookLogs = rig.readHookLogs(); + const enabledHookLog = hookLogs.find((log) => + JSON.stringify(log.hookCall.hook_output).includes(enabledMsg), + ); + const disabledHookLog = hookLogs.find((log) => + JSON.stringify(log.hookCall.hook_output).includes(disabledMsg), + ); + + expect(enabledHookLog).toBeDefined(); + expect(disabledHookLog).toBeUndefined(); + }); + + it('should respect disabled hooks across multiple operations', async () => { + const activeMsg = 'MULTIPLE_OPS_ENABLED_HOOK'; + const disabledMsg = 'MULTIPLE_OPS_DISABLED_HOOK'; + + const activeJson = JSON.stringify({ + decision: 'allow', + systemMessage: activeMsg, + }); + const disabledJson = JSON.stringify({ + decision: 'block', + reason: disabledMsg, + }); + + const activeScript = `console.log(JSON.stringify(${activeJson}));`; + const disabledScript = `console.log(JSON.stringify(${disabledJson}));`; + const activeFilename = 'active_hook.js'; + const disabledFilename = 'disabled_hook.js'; + const activeCmd = `node ${activeFilename}`; + const disabledCmd = `node ${disabledFilename}`; + + // 3. Final setup with full settings + rig.setup('Hook Disabling Multiple Ops', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.disabled-via-command.responses', + ), + settings: { + hooksConfig: { + enabled: true, + disabled: ['multi-hook-disabled'], + }, + hooks: { + BeforeTool: [ + { + hooks: [ + { + type: 'command', + name: 'multi-hook-active', + command: activeCmd, + timeout: 60000, + }, + { + type: 'command', + name: 'multi-hook-disabled', + command: disabledCmd, + timeout: 60000, + }, + ], + }, + ], + }, + }, + }); + + rig.createScript(activeFilename, activeScript); + rig.createScript(disabledFilename, disabledScript); + + // First run - only active hook should execute + await rig.run({ + args: 'Create a file called first-run.txt with "test1"', + }); + + // Tool should execute (active hook allows it) + const foundWriteFile1 = await rig.waitForToolCall('write_file'); + expect(foundWriteFile1).toBeTruthy(); + + // Check hook telemetry - only active hook should have executed + const hookLogs1 = rig.readHookLogs(); + const activeHookLog1 = hookLogs1.find((log) => + JSON.stringify(log.hookCall.hook_output).includes(activeMsg), + ); + const disabledHookLog1 = hookLogs1.find((log) => + JSON.stringify(log.hookCall.hook_output).includes(disabledMsg), + ); + + expect(activeHookLog1).toBeDefined(); + expect(disabledHookLog1).toBeUndefined(); + + // Second run - verify disabled hook stays disabled + await rig.run({ + args: 'Create a file called second-run.txt with "test2"', + }); + + const foundWriteFile2 = await rig.waitForToolCall('write_file'); + expect(foundWriteFile2).toBeTruthy(); + + // Verify disabled hook still hasn't executed + const hookLogs2 = rig.readHookLogs(); + const disabledHookLog2 = hookLogs2.find((log) => + JSON.stringify(log.hookCall.hook_output).includes(disabledMsg), + ); + expect(disabledHookLog2).toBeUndefined(); + }); + }); + + describe('BeforeTool Hooks - Input Override', () => { + it('should override tool input parameters via BeforeTool hook', async () => { + // 1. First setup to get the test directory and prepare the hook script + rig.setup('should override tool input parameters via BeforeTool hook'); + + // Create a hook script that overrides the tool input + const hookOutput = { + decision: 'allow', + hookSpecificOutput: { + hookEventName: 'BeforeTool', + tool_input: { + file_path: 'modified.txt', + content: 'modified content', + }, + }, + }; + + const hookScript = `process.stdout.write(JSON.stringify(${JSON.stringify( + hookOutput, + )}));`; + + const scriptPath = rig.createScript( + 'input_override_hook.js', + hookScript, + ); + + // 2. Full setup with settings and fake responses + rig.setup('should override tool input parameters via BeforeTool hook', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.input-modification.responses', + ), + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ + { + matcher: 'write_file', sequential: true, hooks: [ { type: 'command', - command: normalizePath(sessionStartCommand), + command: normalizePath(`node "${scriptPath}"`), timeout: 5000, }, ], @@ -1607,639 +2162,322 @@ console.log(JSON.stringify({ ], }, }, - }, - ); + }); - const run = await rig.runInteractive(); + // Run the agent. The fake response will attempt to call write_file with + // file_path="original.txt" and content="original content" + await rig.run({ + args: 'Create a file called original.txt with content "original content"', + }); - // Send an initial prompt to establish a session - await run.sendKeys('Say hello'); - await run.type('\r'); + // 1. Verify that 'modified.txt' was created with 'modified content' (Override successful) + const modifiedContent = rig.readFile('modified.txt'); + expect(modifiedContent).toBe('modified content'); - // Wait for the response - await run.expectText('Hello', 10000); + // 2. Verify that 'original.txt' was NOT created (Override replaced original) + let originalExists = false; + try { + rig.readFile('original.txt'); + originalExists = true; + } catch { + originalExists = false; + } + expect(originalExists).toBe(false); - // Execute /clear command multiple times to generate more hook events - // This makes the test more robust by creating multiple start/stop cycles - const numClears = 3; - for (let i = 0; i < numClears; i++) { - await run.sendKeys('/clear'); - await run.type('\r'); + // 3. Verify hook telemetry + const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); + expect(hookTelemetryFound).toBeTruthy(); - // Wait a bit for clear to complete - await new Promise((resolve) => setTimeout(resolve, 2000)); - - // Send a prompt to establish an active session before next clear - await run.sendKeys('Say hello'); - await run.type('\r'); - - // Wait for response - await run.expectText('Hello', 10000); - } - - // Wait for all clears to complete - // BatchLogRecordProcessor exports telemetry every 10 seconds by default - // Use generous wait time across all platforms (CI, Docker, Mac, Linux) - await new Promise((resolve) => setTimeout(resolve, 15000)); - - // Wait for telemetry to be written to disk - await rig.waitForTelemetryReady(); - - // Wait for hook telemetry events to be flushed to disk - // In interactive mode, telemetry may be buffered, so we need to poll for the events - // We execute multiple clears to generate more hook events (total: 1 + numClears * 2) - // But we only require >= 1 hooks to pass, making the test more permissive - const expectedMinHooks = 1; // SessionStart (startup), SessionEnd (clear), SessionStart (clear) - const pollResult = await poll( - () => { - const hookLogs = rig.readHookLogs(); - return hookLogs.length >= expectedMinHooks; - }, - 90000, // 90 second timeout for all platforms - 1000, // check every 1s to reduce I/O overhead - ); - - // If polling failed, log diagnostic info - if (!pollResult) { const hookLogs = rig.readHookLogs(); - const hookEvents = hookLogs.map((log) => log.hookCall.hook_event_name); - console.error( - `Polling timeout after 90000ms: Expected >= ${expectedMinHooks} hooks, got ${hookLogs.length}`, - ); - console.error( - 'Hooks found:', - hookEvents.length > 0 ? hookEvents.join(', ') : 'NONE', - ); - console.error('Full hook logs:', JSON.stringify(hookLogs, null, 2)); - } - - // Verify hooks executed - const hookLogs = rig.readHookLogs(); - - // Diagnostic: Log which hooks we actually got - const hookEvents = hookLogs.map((log) => log.hookCall.hook_event_name); - if (hookLogs.length < expectedMinHooks) { - console.error( - `TEST FAILURE: Expected >= ${expectedMinHooks} hooks, got ${hookLogs.length}: [${hookEvents.length > 0 ? hookEvents.join(', ') : 'NONE'}]`, - ); - } - - expect(hookLogs.length).toBeGreaterThanOrEqual(expectedMinHooks); - - // Find SessionEnd hook log - const sessionEndLog = hookLogs.find( - (log) => - log.hookCall.hook_event_name === 'SessionEnd' && - log.hookCall.hook_name === normalizePath(sessionEndCommand), - ); - // Because the flakiness of the test, we relax this check - // expect(sessionEndLog).toBeDefined(); - if (sessionEndLog) { - expect(sessionEndLog.hookCall.exit_code).toBe(0); - expect(sessionEndLog.hookCall.stdout).toContain( - 'Session ending due to clear', + expect(hookLogs.length).toBe(1); + expect(hookLogs[0].hookCall.hook_name).toContain( + 'input_override_hook.js', ); - // Verify hook input contains reason - const hookInputStr = - typeof sessionEndLog.hookCall.hook_input === 'string' - ? sessionEndLog.hookCall.hook_input - : JSON.stringify(sessionEndLog.hookCall.hook_input); - const hookInput = JSON.parse(hookInputStr) as Record; - expect(hookInput['reason']).toBe('clear'); - } - - // Find SessionStart hook log after clear - const sessionStartAfterClearLogs = hookLogs.filter( - (log) => - log.hookCall.hook_event_name === 'SessionStart' && - log.hookCall.hook_name === normalizePath(sessionStartCommand), - ); - // Should have at least one SessionStart from after clear - // Because the flakiness of the test, we relax this check - // expect(sessionStartAfterClearLogs.length).toBeGreaterThanOrEqual(1); - - const sessionStartLog = sessionStartAfterClearLogs.find((log) => { - const hookInputStr = - typeof log.hookCall.hook_input === 'string' - ? log.hookCall.hook_input - : JSON.stringify(log.hookCall.hook_input); - const hookInput = JSON.parse(hookInputStr) as Record; - return hookInput['source'] === 'clear'; + // 4. Verify that the agent didn't try to work-around the hook input change + const toolLogs = rig.readToolLogs(); + expect(toolLogs.length).toBe(1); + expect(toolLogs[0].toolRequest.name).toBe('write_file'); + expect(JSON.parse(toolLogs[0].toolRequest.args).file_path).toBe( + 'modified.txt', + ); }); - - // Because the flakiness of the test, we relax this check - // expect(sessionStartLog).toBeDefined(); - if (sessionStartLog) { - expect(sessionStartLog.hookCall.exit_code).toBe(0); - expect(sessionStartLog.hookCall.stdout).toContain( - 'Session starting after clear', - ); - } }); - }); - describe('Compression Hooks', () => { - it('should fire PreCompress hook on automatic compression', async () => { - rig.setup('should fire PreCompress hook on automatic compression', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.compress-auto.responses', - ), - }); - - // Create script file for hook - const scriptPath = rig.createScript( - 'pre_compress_hook.cjs', - "console.log(JSON.stringify({decision: 'allow', systemMessage: 'PreCompress hook executed for automatic compression'}));", - ); - - const preCompressCommand = `node "${scriptPath}"`; - - rig.setup('should fire PreCompress hook on automatic compression', { - settings: { - hooksConfig: { - enabled: true, + describe('BeforeTool Hooks - Stop Execution', () => { + it('should stop agent execution via BeforeTool hook', async () => { + // Create a hook script that stops execution + const hookOutput = { + continue: false, + reason: 'Emergency Stop triggered by hook', + hookSpecificOutput: { + hookEventName: 'BeforeTool', }, - hooks: { - PreCompress: [ - { - matcher: 'auto', - sequential: true, - hooks: [ + }; + + const hookScript = `console.log(JSON.stringify(${JSON.stringify( + hookOutput, + )}));`; + + rig.setup('should stop agent execution via BeforeTool hook'); + const scriptPath = rig.createScript( + 'before_tool_stop_hook.js', + hookScript, + ); + + rig.setup('should stop agent execution via BeforeTool hook', { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.before-tool-stop.responses', + ), + settings: { + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ + { + matcher: 'write_file', + sequential: true, + hooks: [ + { + type: 'command', + command: normalizePath(`node "${scriptPath}"`), + timeout: 5000, + }, + ], + }, + ], + }, + }, + }); + + const result = await rig.run({ + args: 'Use write_file to create test.txt', + }); + + // The hook should have stopped execution message (returned from tool) + expect(result).toContain( + 'Agent execution stopped by hook: Emergency Stop triggered by hook', + ); + + // Tool should NOT be called successfully (it was blocked/stopped) + const toolLogs = rig.readToolLogs(); + const writeFileCalls = toolLogs.filter( + (t) => + t.toolRequest.name === 'write_file' && + t.toolRequest.success === true, + ); + expect(writeFileCalls).toHaveLength(0); + }); + }); + + describe('Hooks "ask" Decision Integration', () => { + it( + 'should force confirmation prompt when hook returns "ask" decision even in YOLO mode', + { timeout: 60000 }, + async () => { + const testName = + 'should force confirmation prompt when hook returns "ask" decision even in YOLO mode'; + + // 1. Setup hook script that returns 'ask' decision + const hookOutput = { + decision: 'ask', + systemMessage: 'Confirmation forced by security hook', + hookSpecificOutput: { + hookEventName: 'BeforeTool', + }, + }; + + const hookScript = `console.log(JSON.stringify(${JSON.stringify( + hookOutput, + )}));`; + + // Create script path predictably + const scriptPath = join(os.tmpdir(), 'gemini-cli-tests-ask-hook.js'); + writeFileSync(scriptPath, hookScript); + + // 2. Setup rig with YOLO mode enabled but with the 'ask' hook + rig.setup(testName, { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.allow-tool.responses', + ), + settings: { + debugMode: true, + tools: { + approval: 'yolo', + }, + general: { + enableAutoUpdateNotification: false, + }, + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ { - type: 'command', - command: normalizePath(preCompressCommand), - timeout: 5000, + matcher: 'write_file', + hooks: [ + { + type: 'command', + command: `node "${scriptPath}"`, + timeout: 5000, + }, + ], }, ], }, - ], - }, - // Configure automatic compression with a very low threshold - // This will trigger auto-compression after the first response - contextCompression: { - // enabled: true, - targetTokenCount: 10, // Very low threshold to trigger compression - }, - }, - }); + }, + }); - // Run a simple query that will trigger automatic compression - await rig.run({ args: 'Say hello in exactly 5 words' }); - - // Verify hook executed with correct parameters - const hookLogs = rig.readHookLogs(); - const preCompressLog = hookLogs.find( - (log) => log.hookCall.hook_event_name === 'PreCompress', - ); - - expect(preCompressLog).toBeDefined(); - if (preCompressLog) { - expect(preCompressLog.hookCall.hook_name).toBe( - normalizePath(preCompressCommand), - ); - expect(preCompressLog.hookCall.exit_code).toBe(0); - expect(preCompressLog.hookCall.hook_input).toBeDefined(); - - // hook_input is a string that needs to be parsed - const hookInputStr = - typeof preCompressLog.hookCall.hook_input === 'string' - ? preCompressLog.hookCall.hook_input - : JSON.stringify(preCompressLog.hookCall.hook_input); - const hookInput = JSON.parse(hookInputStr) as Record; - - expect(hookInput['trigger']).toBe('auto'); - expect(preCompressLog.hookCall.stdout).toContain( - 'PreCompress hook executed for automatic compression', - ); - } - }); - }); - - describe('SessionEnd on Exit', () => { - it('should fire SessionEnd hook on graceful exit in non-interactive mode', async () => { - rig.setup('should fire SessionEnd hook on graceful exit', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.session-startup.responses', - ), - }); - - // Create script file for hook - const scriptPath = rig.createScript( - 'session_end_exit.cjs', - "console.log(JSON.stringify({decision: 'allow', systemMessage: 'SessionEnd hook executed on exit'}));", - ); - - const sessionEndCommand = `node "${scriptPath}"`; - - rig.setup('should fire SessionEnd hook on graceful exit', { - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - SessionEnd: [ - { - matcher: 'exit', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(sessionEndCommand), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - // Run in non-interactive mode with a simple prompt - await rig.run({ args: 'Hello' }); - - // The process should exit gracefully, firing the SessionEnd hook - // Wait for telemetry to be written to disk - await rig.waitForTelemetryReady(); - - // Poll for the hook log to appear - const isCI = process.env['CI'] === 'true'; - const pollTimeout = isCI ? 30000 : 10000; - const pollResult = await poll( - () => { - const hookLogs = rig.readHookLogs(); - return hookLogs.some( - (log) => log.hookCall.hook_event_name === 'SessionEnd', + // Bypass terminal setup prompt and other startup banners + const stateDir = join(rig.homeDir!, '.gemini'); + if (!existsSync(stateDir)) mkdirSync(stateDir, { recursive: true }); + writeFileSync( + join(stateDir, 'state.json'), + JSON.stringify({ + terminalSetupPromptShown: true, + hasSeenScreenReaderNudge: true, + tipsShown: 100, + }), ); + + // 3. Run interactive and verify prompt appears despite YOLO mode + const run = await rig.runInteractive(); + + // Wait for prompt to appear + await run.expectText('Type your message', 30000); + + // Send prompt that will trigger write_file + await run.type( + 'Create a file called ask-test.txt with content "test"', + ); + await run.type('\r'); + + // Wait for the FORCED confirmation prompt to appear + // It should contain the system message from the hook + await run.expectText('Confirmation forced by security hook', 30000); + await run.expectText('Allow', 5000); + + // 4. Approve the permission + await run.type('y'); + await run.type('\r'); + + // Wait for command to execute + await run.expectText('approved.txt', 30000); + + // Should find the tool call + const foundWriteFile = await rig.waitForToolCall('write_file'); + expect(foundWriteFile).toBeTruthy(); + + // File should be created + const fileContent = rig.readFile('approved.txt'); + expect(fileContent).toBe('Approved content'); }, - pollTimeout, - 200, ); - if (!pollResult) { - const hookLogs = rig.readHookLogs(); - console.error( - 'Polling timeout: Expected SessionEnd hook, got:', - JSON.stringify(hookLogs, null, 2), - ); - } + it( + 'should allow cancelling when hook forces "ask" decision', + { timeout: 60000 }, + async () => { + const testName = + 'should allow cancelling when hook forces "ask" decision'; + const hookOutput = { + decision: 'ask', + systemMessage: 'Confirmation forced for cancellation test', + hookSpecificOutput: { + hookEventName: 'BeforeTool', + }, + }; - expect(pollResult).toBe(true); + const hookScript = `console.log(JSON.stringify(${JSON.stringify( + hookOutput, + )}));`; - const hookLogs = rig.readHookLogs(); - const sessionEndLog = hookLogs.find( - (log) => log.hookCall.hook_event_name === 'SessionEnd', - ); + const scriptPath = join( + os.tmpdir(), + 'gemini-cli-tests-ask-cancel-hook.js', + ); + writeFileSync(scriptPath, hookScript); - expect(sessionEndLog).toBeDefined(); - if (sessionEndLog) { - expect(sessionEndLog.hookCall.hook_name).toBe( - normalizePath(sessionEndCommand), - ); - expect(sessionEndLog.hookCall.exit_code).toBe(0); - expect(sessionEndLog.hookCall.hook_input).toBeDefined(); - - const hookInputStr = - typeof sessionEndLog.hookCall.hook_input === 'string' - ? sessionEndLog.hookCall.hook_input - : JSON.stringify(sessionEndLog.hookCall.hook_input); - const hookInput = JSON.parse(hookInputStr) as Record; - - expect(hookInput['reason']).toBe('exit'); - expect(sessionEndLog.hookCall.stdout).toContain( - 'SessionEnd hook executed', - ); - } - }); - }); - - describe('Hook Disabling', () => { - it('should not execute hooks disabled in settings file', async () => { - const enabledMsg = 'EXECUTION_ALLOWED_BY_HOOK_A'; - const disabledMsg = 'EXECUTION_BLOCKED_BY_HOOK_B'; - - const enabledJson = JSON.stringify({ - decision: 'allow', - systemMessage: enabledMsg, - }); - const disabledJson = JSON.stringify({ - decision: 'block', - reason: disabledMsg, - }); - - const enabledScript = `console.log(JSON.stringify(${enabledJson}));`; - const disabledScript = `console.log(JSON.stringify(${disabledJson}));`; - const enabledFilename = 'enabled_hook.js'; - const disabledFilename = 'disabled_hook.js'; - const enabledCmd = `node ${enabledFilename}`; - const disabledCmd = `node ${disabledFilename}`; - - // 3. Final setup with full settings - rig.setup('Hook Disabling Settings', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.disabled-via-settings.responses', - ), - settings: { - hooksConfig: { - enabled: true, - disabled: ['hook-b'], - }, - hooks: { - BeforeTool: [ - { - hooks: [ + rig.setup(testName, { + fakeResponsesPath: join( + import.meta.dirname, + 'hooks-system.allow-tool.responses', + ), + settings: { + debugMode: true, + tools: { + approval: 'yolo', + }, + general: { + enableAutoUpdateNotification: false, + }, + hooksConfig: { + enabled: true, + }, + hooks: { + BeforeTool: [ { - type: 'command', - name: 'hook-a', - command: enabledCmd, - timeout: 60000, - }, - { - type: 'command', - name: 'hook-b', - command: disabledCmd, - timeout: 60000, + matcher: 'write_file', + hooks: [ + { + type: 'command', + command: `node "${scriptPath}"`, + timeout: 5000, + }, + ], }, ], }, - ], - }, + }, + }); + + // Bypass terminal setup prompt and other startup banners + const stateDir = join(rig.homeDir!, '.gemini'); + if (!existsSync(stateDir)) mkdirSync(stateDir, { recursive: true }); + writeFileSync( + join(stateDir, 'state.json'), + JSON.stringify({ + terminalSetupPromptShown: true, + hasSeenScreenReaderNudge: true, + tipsShown: 100, + }), + ); + + const run = await rig.runInteractive(); + + // Wait for prompt to appear + await run.expectText('Type your message', 30000); + + await run.type( + 'Create a file called cancel-test.txt with content "test"', + ); + await run.type('\r'); + + await run.expectText( + 'Confirmation forced for cancellation test', + 30000, + ); + + // 4. Deny the permission using option 4 + await run.type('4'); + await run.type('\r'); + + // Wait for cancellation message + await run.expectText('Cancelled', 15000); + + // Tool should NOT be called successfully + const toolLogs = rig.readToolLogs(); + const writeFileCalls = toolLogs.filter( + (t) => + t.toolRequest.name === 'write_file' && + t.toolRequest.success === true, + ); + expect(writeFileCalls).toHaveLength(0); }, - }); - - rig.createScript(enabledFilename, enabledScript); - rig.createScript(disabledFilename, disabledScript); - - await rig.run({ - args: 'Create a file called disabled-test.txt with content "test"', - }); - - // Tool should execute (enabled hook allows it) - const foundWriteFile = await rig.waitForToolCall('write_file'); - expect(foundWriteFile).toBeTruthy(); - - // Check hook telemetry - only enabled hook should have executed - const hookLogs = rig.readHookLogs(); - const enabledHookLog = hookLogs.find((log) => - JSON.stringify(log.hookCall.hook_output).includes(enabledMsg), - ); - const disabledHookLog = hookLogs.find((log) => - JSON.stringify(log.hookCall.hook_output).includes(disabledMsg), - ); - - expect(enabledHookLog).toBeDefined(); - expect(disabledHookLog).toBeUndefined(); - }); - - it('should respect disabled hooks across multiple operations', async () => { - const activeMsg = 'MULTIPLE_OPS_ENABLED_HOOK'; - const disabledMsg = 'MULTIPLE_OPS_DISABLED_HOOK'; - - const activeJson = JSON.stringify({ - decision: 'allow', - systemMessage: activeMsg, - }); - const disabledJson = JSON.stringify({ - decision: 'block', - reason: disabledMsg, - }); - - const activeScript = `console.log(JSON.stringify(${activeJson}));`; - const disabledScript = `console.log(JSON.stringify(${disabledJson}));`; - const activeFilename = 'active_hook.js'; - const disabledFilename = 'disabled_hook.js'; - const activeCmd = `node ${activeFilename}`; - const disabledCmd = `node ${disabledFilename}`; - - // 3. Final setup with full settings - rig.setup('Hook Disabling Multiple Ops', { - settings: { - hooksConfig: { - enabled: true, - disabled: ['multi-hook-disabled'], - }, - hooks: { - BeforeTool: [ - { - hooks: [ - { - type: 'command', - name: 'multi-hook-active', - command: activeCmd, - timeout: 60000, - }, - { - type: 'command', - name: 'multi-hook-disabled', - command: disabledCmd, - timeout: 60000, - }, - ], - }, - ], - }, - }, - }); - - rig.createScript(activeFilename, activeScript); - rig.createScript(disabledFilename, disabledScript); - - // First run - only active hook should execute - await rig.run({ - args: 'Create a file called first-run.txt with "test1"', - }); - - // Tool should execute (active hook allows it) - const foundWriteFile1 = await rig.waitForToolCall('write_file'); - expect(foundWriteFile1).toBeTruthy(); - - // Check hook telemetry - only active hook should have executed - const hookLogs1 = rig.readHookLogs(); - const activeHookLog1 = hookLogs1.find((log) => - JSON.stringify(log.hookCall.hook_output).includes(activeMsg), - ); - const disabledHookLog1 = hookLogs1.find((log) => - JSON.stringify(log.hookCall.hook_output).includes(disabledMsg), - ); - - expect(activeHookLog1).toBeDefined(); - expect(disabledHookLog1).toBeUndefined(); - - // Second run - verify disabled hook stays disabled - await rig.run({ - args: 'Create a file called second-run.txt with "test2"', - }); - - const foundWriteFile2 = await rig.waitForToolCall('write_file'); - expect(foundWriteFile2).toBeTruthy(); - - // Verify disabled hook still hasn't executed - const hookLogs2 = rig.readHookLogs(); - const disabledHookLog2 = hookLogs2.find((log) => - JSON.stringify(log.hookCall.hook_output).includes(disabledMsg), - ); - expect(disabledHookLog2).toBeUndefined(); - }); - }); - - describe('BeforeTool Hooks - Input Override', () => { - it('should override tool input parameters via BeforeTool hook', async () => { - // 1. First setup to get the test directory and prepare the hook script - rig.setup('should override tool input parameters via BeforeTool hook'); - - // Create a hook script that overrides the tool input - const hookOutput = { - decision: 'allow', - hookSpecificOutput: { - hookEventName: 'BeforeTool', - tool_input: { - file_path: 'modified.txt', - content: 'modified content', - }, - }, - }; - - const hookScript = `process.stdout.write(JSON.stringify(${JSON.stringify( - hookOutput, - )}));`; - - const scriptPath = rig.createScript('input_override_hook.js', hookScript); - - // 2. Full setup with settings and fake responses - rig.setup('should override tool input parameters via BeforeTool hook', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.input-modification.responses', - ), - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeTool: [ - { - matcher: 'write_file', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - // Run the agent. The fake response will attempt to call write_file with - // file_path="original.txt" and content="original content" - await rig.run({ - args: 'Create a file called original.txt with content "original content"', - }); - - // 1. Verify that 'modified.txt' was created with 'modified content' (Override successful) - const modifiedContent = rig.readFile('modified.txt'); - expect(modifiedContent).toBe('modified content'); - - // 2. Verify that 'original.txt' was NOT created (Override replaced original) - let originalExists = false; - try { - rig.readFile('original.txt'); - originalExists = true; - } catch { - originalExists = false; - } - expect(originalExists).toBe(false); - - // 3. Verify hook telemetry - const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call'); - expect(hookTelemetryFound).toBeTruthy(); - - const hookLogs = rig.readHookLogs(); - expect(hookLogs.length).toBe(1); - expect(hookLogs[0].hookCall.hook_name).toContain( - 'input_override_hook.js', - ); - - // 4. Verify that the agent didn't try to work-around the hook input change - const toolLogs = rig.readToolLogs(); - expect(toolLogs.length).toBe(1); - expect(toolLogs[0].toolRequest.name).toBe('write_file'); - expect(JSON.parse(toolLogs[0].toolRequest.args).file_path).toBe( - 'modified.txt', ); }); - }); - - describe('BeforeTool Hooks - Stop Execution', () => { - it('should stop agent execution via BeforeTool hook', async () => { - // Create a hook script that stops execution - const hookOutput = { - continue: false, - reason: 'Emergency Stop triggered by hook', - hookSpecificOutput: { - hookEventName: 'BeforeTool', - }, - }; - - const hookScript = `console.log(JSON.stringify(${JSON.stringify( - hookOutput, - )}));`; - - rig.setup('should stop agent execution via BeforeTool hook'); - const scriptPath = rig.createScript( - 'before_tool_stop_hook.js', - hookScript, - ); - - rig.setup('should stop agent execution via BeforeTool hook', { - fakeResponsesPath: join( - import.meta.dirname, - 'hooks-system.before-tool-stop.responses', - ), - settings: { - hooksConfig: { - enabled: true, - }, - hooks: { - BeforeTool: [ - { - matcher: 'write_file', - sequential: true, - hooks: [ - { - type: 'command', - command: normalizePath(`node "${scriptPath}"`), - timeout: 5000, - }, - ], - }, - ], - }, - }, - }); - - const result = await rig.run({ - args: 'Use write_file to create test.txt', - }); - - // The hook should have stopped execution message (returned from tool) - expect(result).toContain( - 'Agent execution stopped: Emergency Stop triggered by hook', - ); - - // Tool should NOT be called successfully (it was blocked/stopped) - const toolLogs = rig.readToolLogs(); - const writeFileCalls = toolLogs.filter( - (t) => - t.toolRequest.name === 'write_file' && t.toolRequest.success === true, - ); - expect(writeFileCalls).toHaveLength(0); - }); - }); -}); + }, +); diff --git a/integration-tests/plan-mode.test.ts b/integration-tests/plan-mode.test.ts index 8709aac189..d8d297c460 100644 --- a/integration-tests/plan-mode.test.ts +++ b/integration-tests/plan-mode.test.ts @@ -4,10 +4,10 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { writeFileSync } from 'node:fs'; +import { writeFileSync, mkdirSync } from 'node:fs'; import { join } from 'node:path'; import { describe, it, expect, beforeEach, afterEach } from 'vitest'; -import { TestRig, checkModelOutputContent, GEMINI_DIR } from './test-helper.js'; +import { GEMINI_DIR, TestRig, checkModelOutputContent } from './test-helper.js'; describe('Plan Mode', () => { let rig: TestRig; @@ -36,27 +36,23 @@ describe('Plan Mode', () => { }, ); - // We use a prompt that asks for both a read-only action and a write action. - // "List files" (read-only) followed by "touch denied.txt" (write). const result = await rig.run({ approvalMode: 'plan', - stdin: - 'Please list the files in the current directory, and then attempt to create a new file named "denied.txt" using a shell command.', + args: 'Please list the files in the current directory, and then attempt to create a new file named "denied.txt" using a shell command.', }); - const lsCallFound = await rig.waitForToolCall('list_directory'); - expect(lsCallFound, 'Expected list_directory to be called').toBe(true); - - const shellCallFound = await rig.waitForToolCall('run_shell_command'); - expect(shellCallFound, 'Expected run_shell_command to fail').toBe(false); - const toolLogs = rig.readToolLogs(); const lsLog = toolLogs.find((l) => l.toolRequest.name === 'list_directory'); - expect( - toolLogs.find((l) => l.toolRequest.name === 'run_shell_command'), - ).toBeUndefined(); + const shellLog = toolLogs.find( + (l) => l.toolRequest.name === 'run_shell_command', + ); + expect(lsLog, 'Expected list_directory to be called').toBeDefined(); expect(lsLog?.toolRequest.success).toBe(true); + expect( + shellLog, + 'Expected run_shell_command to be blocked (not even called)', + ).toBeUndefined(); checkModelOutputContent(result, { expectedContent: ['Plan Mode', 'read-only'], @@ -84,23 +80,11 @@ describe('Plan Mode', () => { }, }); - // Disable the interactive terminal setup prompt in tests - writeFileSync( - join(rig.homeDir!, GEMINI_DIR, 'state.json'), - JSON.stringify({ terminalSetupPromptShown: true }, null, 2), - ); - - const run = await rig.runInteractive({ + await rig.run({ approvalMode: 'plan', + args: 'Create a file called plan.md in the plans directory.', }); - await run.type('Create a file called plan.md in the plans directory.'); - await run.type('\r'); - - await rig.expectToolCallSuccess(['write_file'], 30000, (args) => - args.includes('plan.md'), - ); - const toolLogs = rig.readToolLogs(); const planWrite = toolLogs.find( (l) => @@ -108,7 +92,25 @@ describe('Plan Mode', () => { l.toolRequest.args.includes('plans') && l.toolRequest.args.includes('plan.md'), ); - expect(planWrite?.toolRequest.success).toBe(true); + + if (!planWrite) { + console.error( + 'All tool calls found:', + toolLogs.map((l) => ({ + name: l.toolRequest.name, + args: l.toolRequest.args, + })), + ); + } + + expect( + planWrite, + 'Expected write_file to be called for plan.md', + ).toBeDefined(); + expect( + planWrite?.toolRequest.success, + `Expected write_file to succeed, but it failed with error: ${planWrite?.toolRequest.error}`, + ).toBe(true); }); it('should deny write_file to non-plans directory in plan mode', async () => { @@ -131,19 +133,11 @@ describe('Plan Mode', () => { }, }); - // Disable the interactive terminal setup prompt in tests - writeFileSync( - join(rig.homeDir!, GEMINI_DIR, 'state.json'), - JSON.stringify({ terminalSetupPromptShown: true }, null, 2), - ); - - const run = await rig.runInteractive({ + await rig.run({ approvalMode: 'plan', + args: 'Create a file called hello.txt in the current directory.', }); - await run.type('Create a file called hello.txt in the current directory.'); - await run.type('\r'); - const toolLogs = rig.readToolLogs(); const writeLog = toolLogs.find( (l) => @@ -151,10 +145,11 @@ describe('Plan Mode', () => { l.toolRequest.args.includes('hello.txt'), ); - // In Plan Mode, writes outside the plans directory should be blocked. - // Model is undeterministic, sometimes it doesn't even try, but if it does, it must fail. if (writeLog) { - expect(writeLog.toolRequest.success).toBe(false); + expect( + writeLog.toolRequest.success, + 'Expected write_file to non-plans dir to fail', + ).toBe(false); } }); @@ -169,28 +164,133 @@ describe('Plan Mode', () => { }, }); - // Disable the interactive terminal setup prompt in tests - writeFileSync( - join(rig.homeDir!, GEMINI_DIR, 'state.json'), - JSON.stringify({ terminalSetupPromptShown: true }, null, 2), - ); - - // Start in default mode and ask to enter plan mode. await rig.run({ approvalMode: 'default', - stdin: - 'I want to perform a complex refactoring. Please enter plan mode so we can design it first.', + args: 'I want to perform a complex refactoring. Please enter plan mode so we can design it first.', }); - const enterPlanCallFound = await rig.waitForToolCall('enter_plan_mode'); - expect(enterPlanCallFound, 'Expected enter_plan_mode to be called').toBe( - true, - ); - const toolLogs = rig.readToolLogs(); const enterLog = toolLogs.find( (l) => l.toolRequest.name === 'enter_plan_mode', ); + expect(enterLog, 'Expected enter_plan_mode to be called').toBeDefined(); expect(enterLog?.toolRequest.success).toBe(true); }); + + it('should allow write_file to the plans directory in plan mode even without a session ID', async () => { + const plansDir = '.gemini/tmp/foo/plans'; + const testName = + 'should allow write_file to the plans directory in plan mode even without a session ID'; + + await rig.setup(testName, { + settings: { + experimental: { plan: true }, + tools: { + core: ['write_file', 'read_file', 'list_directory'], + }, + general: { + defaultApprovalMode: 'plan', + plan: { + directory: plansDir, + }, + }, + }, + }); + + await rig.run({ + approvalMode: 'plan', + args: 'Create a file called plan-no-session.md in the plans directory.', + }); + + const toolLogs = rig.readToolLogs(); + const planWrite = toolLogs.find( + (l) => + l.toolRequest.name === 'write_file' && + l.toolRequest.args.includes('plans') && + l.toolRequest.args.includes('plan-no-session.md'), + ); + + if (!planWrite) { + console.error( + 'All tool calls found:', + toolLogs.map((l) => ({ + name: l.toolRequest.name, + args: l.toolRequest.args, + })), + ); + } + + expect( + planWrite, + 'Expected write_file to be called for plan-no-session.md', + ).toBeDefined(); + expect( + planWrite?.toolRequest.success, + `Expected write_file to succeed, but it failed with error: ${planWrite?.toolRequest.error}`, + ).toBe(true); + }); + it('should switch from a pro model to a flash model after exiting plan mode', async () => { + const plansDir = 'plans-folder'; + const planFilename = 'my-plan.md'; + + await rig.setup('should-switch-to-flash', { + settings: { + model: { + name: 'auto-gemini-2.5', + }, + experimental: { plan: true }, + tools: { + core: ['exit_plan_mode', 'run_shell_command'], + allowed: ['exit_plan_mode', 'run_shell_command'], + }, + general: { + defaultApprovalMode: 'plan', + plan: { + directory: plansDir, + }, + }, + }, + }); + + writeFileSync( + join(rig.homeDir!, GEMINI_DIR, 'state.json'), + JSON.stringify({ terminalSetupPromptShown: true }, null, 2), + ); + + const fullPlansDir = join(rig.testDir!, plansDir); + mkdirSync(fullPlansDir, { recursive: true }); + writeFileSync(join(fullPlansDir, planFilename), 'Execute echo hello'); + + await rig.run({ + approvalMode: 'plan', + stdin: `Exit plan mode using ${planFilename} and then run a shell command \`echo hello\`.`, + }); + + const exitCallFound = await rig.waitForToolCall('exit_plan_mode'); + expect(exitCallFound, 'Expected exit_plan_mode to be called').toBe(true); + + const shellCallFound = await rig.waitForToolCall('run_shell_command'); + expect(shellCallFound, 'Expected run_shell_command to be called').toBe( + true, + ); + + const apiRequests = rig.readAllApiRequest(); + const modelNames = apiRequests.map((r) => r.attributes?.model || 'unknown'); + + const proRequests = apiRequests.filter((r) => + r.attributes?.model?.includes('pro'), + ); + const flashRequests = apiRequests.filter((r) => + r.attributes?.model?.includes('flash'), + ); + + expect( + proRequests.length, + `Expected at least one Pro request. Models used: ${modelNames.join(', ')}`, + ).toBeGreaterThanOrEqual(1); + expect( + flashRequests.length, + `Expected at least one Flash request after mode switch. Models used: ${modelNames.join(', ')}`, + ).toBeGreaterThanOrEqual(1); + }); }); diff --git a/integration-tests/policy-headless.test.ts b/integration-tests/policy-headless.test.ts index b6cc14f61c..3a8fb5238a 100644 --- a/integration-tests/policy-headless.test.ts +++ b/integration-tests/policy-headless.test.ts @@ -183,11 +183,17 @@ describe('Policy Engine Headless Mode', () => { responsesFile: 'policy-headless-shell-denied.responses', promptCommand: ECHO_PROMPT, policyContent: ` + [[rule]] + toolName = "run_shell_command" + commandPrefix = "echo" + decision = "deny" + priority = 100 + [[rule]] toolName = "run_shell_command" commandPrefix = "node" decision = "allow" - priority = 100 + priority = 90 `, expectAllowed: false, expectedDenialString: 'Tool execution denied by policy', diff --git a/integration-tests/run_shell_command.test.ts b/integration-tests/run_shell_command.test.ts index 8ae72fed84..02fda5be45 100644 --- a/integration-tests/run_shell_command.test.ts +++ b/integration-tests/run_shell_command.test.ts @@ -58,12 +58,18 @@ function getDisallowedFileReadCommand(testFile: string): { const quotedPath = `"${testFile}"`; switch (shell) { case 'powershell': - return { command: `Get-Content ${quotedPath}`, tool: 'Get-Content' }; + return { + command: `powershell -Command "Get-Content ${quotedPath}"`, + tool: 'powershell', + }; case 'cmd': - return { command: `type ${quotedPath}`, tool: 'type' }; + return { command: `cmd /c type ${quotedPath}`, tool: 'cmd' }; case 'bash': default: - return { command: `cat ${quotedPath}`, tool: 'cat' }; + return { + command: `node -e "console.log(require('fs').readFileSync('${testFile}', 'utf8'))"`, + tool: 'node', + }; } } diff --git a/integration-tests/symlink-install.test.ts b/integration-tests/symlink-install.test.ts index be4a5ac398..c98db98029 100644 --- a/integration-tests/symlink-install.test.ts +++ b/integration-tests/symlink-install.test.ts @@ -5,7 +5,7 @@ */ import { describe, expect, it, beforeEach, afterEach } from 'vitest'; -import { TestRig, InteractiveRun } from './test-helper.js'; +import { TestRig, InteractiveRun, skipFlaky } from './test-helper.js'; import * as fs from 'node:fs'; import * as os from 'node:os'; import { @@ -33,104 +33,107 @@ const otherExtension = `{ "version": "6.6.6" }`; -describe('extension symlink install spoofing protection', () => { - let rig: TestRig; +describe.skipIf(skipFlaky)( + 'extension symlink install spoofing protection', + () => { + let rig: TestRig; - beforeEach(() => { - rig = new TestRig(); - }); - - afterEach(async () => await rig.cleanup()); - - it('canonicalizes the trust path and prevents symlink spoofing', async () => { - // Enable folder trust for this test - rig.setup('symlink spoofing test', { - settings: { - security: { - folderTrust: { - enabled: true, - }, - }, - }, + beforeEach(() => { + rig = new TestRig(); }); - const realExtPath = join(rig.testDir!, 'real-extension'); - mkdirSync(realExtPath); - writeFileSync(join(realExtPath, 'gemini-extension.json'), extension); + afterEach(async () => await rig.cleanup()); - const maliciousExtPath = join( - os.tmpdir(), - `malicious-extension-${Date.now()}`, - ); - mkdirSync(maliciousExtPath); - writeFileSync( - join(maliciousExtPath, 'gemini-extension.json'), - otherExtension, - ); - - const symlinkPath = join(rig.testDir!, 'symlink-extension'); - symlinkSync(realExtPath, symlinkPath); - - // Function to run a command with a PTY to avoid headless mode - const runPty = (args: string[]) => { - const ptyProcess = pty.spawn(process.execPath, [BUNDLE_PATH, ...args], { - name: 'xterm-color', - cols: 80, - rows: 80, - cwd: rig.testDir!, - env: { - ...process.env, - GEMINI_CLI_HOME: rig.homeDir!, - GEMINI_CLI_INTEGRATION_TEST: 'true', - GEMINI_PTY_INFO: 'node-pty', + it('canonicalizes the trust path and prevents symlink spoofing', async () => { + // Enable folder trust for this test + rig.setup('symlink spoofing test', { + settings: { + security: { + folderTrust: { + enabled: true, + }, + }, }, }); - return new InteractiveRun(ptyProcess); - }; - // 1. Install via symlink, trust it - const run1 = runPty(['extensions', 'install', symlinkPath]); - await run1.expectText('Do you want to trust this folder', 30000); - await run1.type('y\r'); - await run1.expectText('trust this workspace', 30000); - await run1.type('y\r'); - await run1.expectText('Do you want to continue', 30000); - await run1.type('y\r'); - await run1.expectText('installed successfully', 30000); - await run1.kill(); + const realExtPath = join(rig.testDir!, 'real-extension'); + mkdirSync(realExtPath); + writeFileSync(join(realExtPath, 'gemini-extension.json'), extension); - // 2. Verify trustedFolders.json contains the REAL path, not the symlink path - const trustedFoldersPath = join( - rig.homeDir!, - GEMINI_DIR, - 'trustedFolders.json', - ); - // Wait for file to be written - let attempts = 0; - while (!fs.existsSync(trustedFoldersPath) && attempts < 50) { - await new Promise((resolve) => setTimeout(resolve, 100)); - attempts++; - } + const maliciousExtPath = join( + os.tmpdir(), + `malicious-extension-${Date.now()}`, + ); + mkdirSync(maliciousExtPath); + writeFileSync( + join(maliciousExtPath, 'gemini-extension.json'), + otherExtension, + ); - const trustedFolders = JSON.parse( - readFileSync(trustedFoldersPath, 'utf-8'), - ); - const trustedPaths = Object.keys(trustedFolders); - const canonicalRealExtPath = fs.realpathSync(realExtPath); + const symlinkPath = join(rig.testDir!, 'symlink-extension'); + symlinkSync(realExtPath, symlinkPath); - expect(trustedPaths).toContain(canonicalRealExtPath); - expect(trustedPaths).not.toContain(symlinkPath); + // Function to run a command with a PTY to avoid headless mode + const runPty = (args: string[]) => { + const ptyProcess = pty.spawn(process.execPath, [BUNDLE_PATH, ...args], { + name: 'xterm-color', + cols: 80, + rows: 80, + cwd: rig.testDir!, + env: { + ...process.env, + GEMINI_CLI_HOME: rig.homeDir!, + GEMINI_CLI_INTEGRATION_TEST: 'true', + GEMINI_PTY_INFO: 'node-pty', + }, + }); + return new InteractiveRun(ptyProcess); + }; - // 3. Swap the symlink to point to the malicious extension - unlinkSync(symlinkPath); - symlinkSync(maliciousExtPath, symlinkPath); + // 1. Install via symlink, trust it + const run1 = runPty(['extensions', 'install', symlinkPath]); + await run1.expectText('Do you want to trust this folder', 30000); + await run1.type('y\r'); + await run1.expectText('trust this workspace', 30000); + await run1.type('y\r'); + await run1.expectText('Do you want to continue', 30000); + await run1.type('y\r'); + await run1.expectText('installed successfully', 30000); + await run1.kill(); - // 4. Try to install again via the same symlink path. - // It should NOT be trusted because the real path changed. - const run2 = runPty(['extensions', 'install', symlinkPath]); - await run2.expectText('Do you want to trust this folder', 30000); - await run2.type('n\r'); - await run2.expectText('Installation aborted', 30000); - await run2.kill(); - }, 60000); -}); + // 2. Verify trustedFolders.json contains the REAL path, not the symlink path + const trustedFoldersPath = join( + rig.homeDir!, + GEMINI_DIR, + 'trustedFolders.json', + ); + // Wait for file to be written + let attempts = 0; + while (!fs.existsSync(trustedFoldersPath) && attempts < 50) { + await new Promise((resolve) => setTimeout(resolve, 100)); + attempts++; + } + + const trustedFolders = JSON.parse( + readFileSync(trustedFoldersPath, 'utf-8'), + ); + const trustedPaths = Object.keys(trustedFolders); + const canonicalRealExtPath = fs.realpathSync(realExtPath); + + expect(trustedPaths).toContain(canonicalRealExtPath); + expect(trustedPaths).not.toContain(symlinkPath); + + // 3. Swap the symlink to point to the malicious extension + unlinkSync(symlinkPath); + symlinkSync(maliciousExtPath, symlinkPath); + + // 4. Try to install again via the same symlink path. + // It should NOT be trusted because the real path changed. + const run2 = runPty(['extensions', 'install', symlinkPath]); + await run2.expectText('Do you want to trust this folder', 30000); + await run2.type('n\r'); + await run2.expectText('Installation aborted', 30000); + await run2.kill(); + }, 60000); + }, +); diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts index a4546a2cd3..5f205ae997 100644 --- a/integration-tests/test-helper.ts +++ b/integration-tests/test-helper.ts @@ -6,3 +6,5 @@ export * from '@google/gemini-cli-test-utils'; export { normalizePath } from '@google/gemini-cli-test-utils'; + +export const skipFlaky = !process.env['RUN_FLAKY_INTEGRATION']; diff --git a/integration-tests/test-mcp-support.responses b/integration-tests/test-mcp-support.responses new file mode 100644 index 0000000000..1db32fdc21 --- /dev/null +++ b/integration-tests/test-mcp-support.responses @@ -0,0 +1,2 @@ +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"mcp_weather-server_get_weather","args":{"location":"London"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":10,"candidatesTokenCount":10,"totalTokenCount":20}}]} +{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The weather in London is rainy."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":10,"candidatesTokenCount":10,"totalTokenCount":20}}]} diff --git a/integration-tests/test-mcp-support.test.ts b/integration-tests/test-mcp-support.test.ts new file mode 100644 index 0000000000..15266e6be9 --- /dev/null +++ b/integration-tests/test-mcp-support.test.ts @@ -0,0 +1,75 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { + TestRig, + assertModelHasOutput, + TestMcpServerBuilder, +} from './test-helper.js'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import fs from 'node:fs'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +describe('test-mcp-support', () => { + let rig: TestRig; + + beforeEach(() => { + rig = new TestRig(); + }); + + afterEach(async () => await rig.cleanup()); + + it('should discover and call a tool on the test server', async () => { + await rig.setup('test-mcp-test', { + settings: { + tools: { core: [] }, // disable core tools to force using MCP + model: { + name: 'gemini-3-flash-preview', + }, + }, + fakeResponsesPath: join(__dirname, 'test-mcp-support.responses'), + }); + + // Workaround for ProjectRegistry save issue + const userGeminiDir = join(rig.homeDir!, '.gemini'); + fs.writeFileSync(join(userGeminiDir, 'projects.json'), '{"projects":{}}'); + + const builder = new TestMcpServerBuilder('weather-server').addTool( + 'get_weather', + 'Get the weather for a location', + 'The weather in London is always rainy.', + { + type: 'object', + properties: { + location: { type: 'string' }, + }, + }, + ); + + rig.addTestMcpServer('weather-server', builder.build()); + + // Run the CLI asking for weather + const output = await rig.run({ + args: 'What is the weather in London? Answer with the raw tool response snippet.', + env: { GEMINI_API_KEY: 'dummy' }, + }); + + // Assert tool call + const foundToolCall = await rig.waitForToolCall( + 'mcp_weather-server_get_weather', + ); + expect( + foundToolCall, + 'Expected to find a get_weather tool call', + ).toBeTruthy(); + + assertModelHasOutput(output); + expect(output.toLowerCase()).toContain('rainy'); + }, 30000); +}); diff --git a/package-lock.json b/package-lock.json index 914d66d3ac..f3bf8fa616 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,7 @@ "packages/*" ], "dependencies": { - "ink": "npm:@jrichman/ink@6.4.11", + "ink": "npm:@jrichman/ink@6.5.0", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", @@ -22,7 +22,7 @@ "gemini": "bundle/gemini.js" }, "devDependencies": { - "@agentclientprotocol/sdk": "^0.12.0", + "@agentclientprotocol/sdk": "^0.16.1", "@octokit/rest": "^22.0.0", "@types/marked": "^5.0.2", "@types/mime-types": "^3.0.1", @@ -84,9 +84,9 @@ } }, "node_modules/@agentclientprotocol/sdk": { - "version": "0.12.0", - "resolved": "https://registry.npmjs.org/@agentclientprotocol/sdk/-/sdk-0.12.0.tgz", - "integrity": "sha512-V8uH/KK1t7utqyJmTA7y7DzKu6+jKFIXM+ZVouz8E55j8Ej2RV42rEvPKn3/PpBJlliI5crcGk1qQhZ7VwaepA==", + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/@agentclientprotocol/sdk/-/sdk-0.16.1.tgz", + "integrity": "sha512-1ad+Sc/0sCtZGHthxxvgEUo5Wsbw16I+aF+YwdiLnPwkZG8KAGUEAPK6LM6Pf69lCyJPt1Aomk1d+8oE3C4ZEw==", "license": "Apache-2.0", "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" @@ -8696,9 +8696,9 @@ "license": "BSD-3-Clause" }, "node_modules/fast-xml-builder": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/fast-xml-builder/-/fast-xml-builder-1.1.2.tgz", - "integrity": "sha512-NJAmiuVaJEjVa7TjLZKlYd7RqmzOC91EtPFXHvlTcqBVo50Qh7XV5IwvXi1c7NRz2Q/majGX9YLcwJtWgHjtkA==", + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/fast-xml-builder/-/fast-xml-builder-1.1.4.tgz", + "integrity": "sha512-f2jhpN4Eccy0/Uz9csxh3Nu6q4ErKxf0XIsasomfOihuSUa3/xw6w8dnOtCDgEItQFJG8KyXPzQXzcODDrrbOg==", "funding": [ { "type": "github", @@ -8711,9 +8711,9 @@ } }, "node_modules/fast-xml-parser": { - "version": "5.5.3", - "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.5.3.tgz", - "integrity": "sha512-Ymnuefk6VzAhT3SxLzVUw+nMio/wB1NGypHkgetwtXcK1JfryaHk4DWQFGVwQ9XgzyS5iRZ7C2ZGI4AMsdMZ6A==", + "version": "5.5.9", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.5.9.tgz", + "integrity": "sha512-jldvxr1MC6rtiZKgrFnDSvT8xuH+eJqxqOBThUVjYrxssYTo1avZLGql5l0a0BAERR01CadYzZ83kVEkbyDg+g==", "funding": [ { "type": "github", @@ -8722,9 +8722,9 @@ ], "license": "MIT", "dependencies": { - "fast-xml-builder": "^1.1.2", - "path-expression-matcher": "^1.1.3", - "strnum": "^2.1.2" + "fast-xml-builder": "^1.1.4", + "path-expression-matcher": "^1.2.0", + "strnum": "^2.2.2" }, "bin": { "fxparser": "src/cli/cli.js" @@ -8900,9 +8900,9 @@ } }, "node_modules/flatted": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz", - "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==", + "version": "3.4.2", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.4.2.tgz", + "integrity": "sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==", "dev": true, "license": "ISC" }, @@ -10089,9 +10089,9 @@ }, "node_modules/ink": { "name": "@jrichman/ink", - "version": "6.4.11", - "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.4.11.tgz", - "integrity": "sha512-93LQlzT7vvZ1XJcmOMwN4s+6W334QegendeHOMnEJBlhnpIzr8bws6/aOEHG8ZCuVD/vNeeea5m1msHIdAY6ig==", + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.5.0.tgz", + "integrity": "sha512-S4g/ng7fPZmFwclO82iWkOce8vDLy/FIDgHIfkCWGOehqHe6dexHsmq3kNQD21okh198pA5SAQTCqNQJb/svRQ==", "license": "MIT", "dependencies": { "@alcalzone/ansi-tokenize": "^0.2.1", @@ -10116,6 +10116,7 @@ "type-fest": "^4.27.0", "wrap-ansi": "^9.0.0", "ws": "^8.18.0", + "yargs": "^17.7.2", "yoga-layout": "~3.2.1" }, "engines": { @@ -13200,9 +13201,9 @@ } }, "node_modules/path-expression-matcher": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.1.3.tgz", - "integrity": "sha512-qdVgY8KXmVdJZRSS1JdEPOKPdTiEK/pi0RkcT2sw1RhXxohdujUlJFPuS1TSkevZ9vzd3ZlL7ULl1MHGTApKzQ==", + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.2.0.tgz", + "integrity": "sha512-DwmPWeFn+tq7TiyJ2CxezCAirXjFxvaiD03npak3cRjlP9+OjTmSy1EpIrEbh+l6JgUundniloMLDQ/6VTdhLQ==", "funding": [ { "type": "github", @@ -15465,9 +15466,9 @@ } }, "node_modules/strnum": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.2.0.tgz", - "integrity": "sha512-Y7Bj8XyJxnPAORMZj/xltsfo55uOiyHcU2tnAVzHUnSJR/KsEX+9RoDeXEnsXtl/CX4fAcrt64gZ13aGaWPeBg==", + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.2.2.tgz", + "integrity": "sha512-DnR90I+jtXNSTXWdwrEy9FakW7UX+qUZg28gj5fk2vxxl7uS/3bpI4fjFYVmdK9etptYBPNkpahuQnEwhwECqA==", "funding": [ { "type": "github", @@ -16469,9 +16470,9 @@ "license": "MIT" }, "node_modules/undici": { - "version": "7.19.0", - "resolved": "https://registry.npmjs.org/undici/-/undici-7.19.0.tgz", - "integrity": "sha512-Heho1hJD81YChi+uS2RkSjcVO+EQLmLSyUlHyp7Y/wFbxQaGb4WXVKD073JytrjXJVkSZVzoE2MCSOKugFGtOQ==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.24.5.tgz", + "integrity": "sha512-3IWdCpjgxp15CbJnsi/Y9TCDE7HWVN19j1hmzVhoAkY/+CJx449tVxT5wZc1Gwg8J+P0LWvzlBzxYRnHJ+1i7Q==", "license": "MIT", "engines": { "node": ">=20.18.1" @@ -17531,7 +17532,7 @@ "version": "0.36.0-nightly.20260317.2f90b4653", "license": "Apache-2.0", "dependencies": { - "@agentclientprotocol/sdk": "^0.12.0", + "@agentclientprotocol/sdk": "^0.16.1", "@google/gemini-cli-core": "file:../core", "@google/genai": "1.30.0", "@iarna/toml": "^2.2.5", @@ -17550,7 +17551,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.4.11", + "ink": "npm:@jrichman/ink@6.5.0", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", diff --git a/package.json b/package.json index 531f9f75d9..73ebef63fd 100644 --- a/package.json +++ b/package.json @@ -48,10 +48,11 @@ "test:all_evals": "cross-env RUN_EVALS=1 vitest run --config evals/vitest.config.ts", "test:e2e": "cross-env VERBOSE=true KEEP_OUTPUT=true npm run test:integration:sandbox:none", "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman", + "test:integration:flaky": "cross-env RUN_FLAKY_INTEGRATION=1 npm run test:integration:sandbox:none", "test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests", "test:integration:sandbox:docker": "cross-env GEMINI_SANDBOX=docker npm run build:sandbox && cross-env GEMINI_SANDBOX=docker vitest run --root ./integration-tests", "test:integration:sandbox:podman": "cross-env GEMINI_SANDBOX=podman vitest run --root ./integration-tests", - "lint": "eslint . --cache", + "lint": "eslint . --cache --max-warnings 0", "lint:fix": "eslint . --fix --ext .ts,.tsx && eslint integration-tests --fix && eslint scripts --fix && npm run format", "lint:ci": "npm run lint:all", "lint:all": "node scripts/lint.js", @@ -67,7 +68,7 @@ "pre-commit": "node scripts/pre-commit.js" }, "overrides": { - "ink": "npm:@jrichman/ink@6.4.11", + "ink": "npm:@jrichman/ink@6.5.0", "wrap-ansi": "9.0.2", "cliui": { "wrap-ansi": "7.0.0" @@ -87,7 +88,7 @@ "LICENSE" ], "devDependencies": { - "@agentclientprotocol/sdk": "^0.12.0", + "@agentclientprotocol/sdk": "^0.16.1", "@octokit/rest": "^22.0.0", "@types/marked": "^5.0.2", "@types/mime-types": "^3.0.1", @@ -135,7 +136,7 @@ "yargs": "^17.7.2" }, "dependencies": { - "ink": "npm:@jrichman/ink@6.4.11", + "ink": "npm:@jrichman/ink@6.5.0", "latest-version": "^9.0.0", "node-fetch-native": "^1.6.7", "proper-lockfile": "^4.1.2", diff --git a/packages/a2a-server/src/config/config.test.ts b/packages/a2a-server/src/config/config.test.ts index cfe77311ea..1c553d7539 100644 --- a/packages/a2a-server/src/config/config.test.ts +++ b/packages/a2a-server/src/config/config.test.ts @@ -29,6 +29,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { await importOriginal(); return { ...actual, + PRIORITY_YOLO_ALLOW_ALL: 998, Config: vi.fn().mockImplementation((params) => { const mockConfig = { ...params, @@ -351,23 +352,37 @@ describe('loadConfig', () => { }); describe('interactivity', () => { - it('should set interactive true when not headless', async () => { + it('should always set interactive true', async () => { + vi.mocked(isHeadlessMode).mockReturnValue(true); + await loadConfig(mockSettings, mockExtensionLoader, taskId); + expect(Config).toHaveBeenCalledWith( + expect.objectContaining({ + interactive: true, + }), + ); + vi.mocked(isHeadlessMode).mockReturnValue(false); await loadConfig(mockSettings, mockExtensionLoader, taskId); expect(Config).toHaveBeenCalledWith( expect.objectContaining({ interactive: true, - enableInteractiveShell: true, }), ); }); - it('should set interactive false when headless', async () => { + it('should set enableInteractiveShell based on headless mode', async () => { + vi.mocked(isHeadlessMode).mockReturnValue(false); + await loadConfig(mockSettings, mockExtensionLoader, taskId); + expect(Config).toHaveBeenCalledWith( + expect.objectContaining({ + enableInteractiveShell: true, + }), + ); + vi.mocked(isHeadlessMode).mockReturnValue(true); await loadConfig(mockSettings, mockExtensionLoader, taskId); expect(Config).toHaveBeenCalledWith( expect.objectContaining({ - interactive: false, enableInteractiveShell: false, }), ); diff --git a/packages/a2a-server/src/config/config.ts b/packages/a2a-server/src/config/config.ts index 9474c4d9c5..cd4f5df25f 100644 --- a/packages/a2a-server/src/config/config.ts +++ b/packages/a2a-server/src/config/config.ts @@ -87,6 +87,7 @@ export async function loadConfig( approvalMode === ApprovalMode.YOLO ? [ { + toolName: '*', decision: PolicyDecision.ALLOW, priority: PRIORITY_YOLO_ALLOW_ALL, modes: [ApprovalMode.YOLO], @@ -124,7 +125,7 @@ export async function loadConfig( trustedFolder: true, extensionLoader, checkpointing, - interactive: !isHeadlessMode(), + interactive: true, enableInteractiveShell: !isHeadlessMode(), ptyInfo: 'auto', enableAgents: settings.experimental?.enableAgents ?? true, diff --git a/packages/a2a-server/src/utils/testing_utils.ts b/packages/a2a-server/src/utils/testing_utils.ts index fd4d721732..8181f702f1 100644 --- a/packages/a2a-server/src/utils/testing_utils.ts +++ b/packages/a2a-server/src/utils/testing_utils.ts @@ -97,6 +97,7 @@ export function createMockConfig( getMcpClientManager: vi.fn().mockReturnValue({ getMcpServers: vi.fn().mockReturnValue({}), }), + getTelemetryLogPromptsEnabled: vi.fn().mockReturnValue(false), getGitService: vi.fn(), validatePathAccess: vi.fn().mockReturnValue(undefined), getShellExecutionConfig: vi.fn().mockReturnValue({ diff --git a/packages/cli/GEMINI.md b/packages/cli/GEMINI.md index e98ca81376..8bad8f0721 100644 --- a/packages/cli/GEMINI.md +++ b/packages/cli/GEMINI.md @@ -7,7 +7,10 @@ - **Shortcuts**: only define keyboard shortcuts in `packages/cli/src/ui/key/keyBindings.ts` - Do not implement any logic performing custom string measurement or string - truncation. Use Ink layout instead leveraging ResizeObserver as needed. + truncation. Use Ink layout instead leveraging ResizeObserver as needed. When + using `ResizeObserver`, prefer the `useCallback` ref pattern (as seen in + `MaxSizedBox.tsx`) to ensure size measurements are captured as soon as the + element is available, avoiding potential rendering timing issues. - Avoid prop drilling when at all possible. ## Testing diff --git a/packages/cli/package.json b/packages/cli/package.json index 79cb21307a..072f2b8a72 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -30,7 +30,7 @@ "sandboxImageUri": "us-docker.pkg.dev/gemini-code-dev/gemini-cli/sandbox:0.36.0-nightly.20260317.2f90b4653" }, "dependencies": { - "@agentclientprotocol/sdk": "^0.12.0", + "@agentclientprotocol/sdk": "^0.16.1", "@google/gemini-cli-core": "file:../core", "@google/genai": "1.30.0", "@iarna/toml": "^2.2.5", @@ -49,7 +49,7 @@ "fzf": "^0.5.2", "glob": "^12.0.0", "highlight.js": "^11.11.1", - "ink": "npm:@jrichman/ink@6.4.11", + "ink": "npm:@jrichman/ink@6.5.0", "ink-gradient": "^3.0.0", "ink-spinner": "^5.0.0", "latest-version": "^9.0.0", diff --git a/packages/cli/src/acp/acpClient.test.ts b/packages/cli/src/acp/acpClient.test.ts index ca525182b5..9e4b89ea20 100644 --- a/packages/cli/src/acp/acpClient.test.ts +++ b/packages/cli/src/acp/acpClient.test.ts @@ -21,13 +21,13 @@ import { AuthType, ToolConfirmationOutcome, StreamEventType, - isWithinRoot, ReadManyFilesTool, type GeminiChat, type Config, type MessageBus, LlmRole, type GitService, + processSingleFileContent, } from '@google/gemini-cli-core'; import { SettingScope, @@ -99,6 +99,8 @@ vi.mock( const actual = await importOriginal(); return { ...actual, + updatePolicy: vi.fn(), + createPolicyUpdater: vi.fn(), ReadManyFilesTool: vi.fn().mockImplementation(() => ({ name: 'read_many_files', kind: 'read', @@ -111,7 +113,6 @@ vi.mock( }), })), logToolCall: vi.fn(), - isWithinRoot: vi.fn().mockReturnValue(true), LlmRole: { MAIN: 'main', SUBAGENT: 'subagent', @@ -134,6 +135,7 @@ vi.mock( Cancelled: 'cancelled', AwaitingApproval: 'awaiting_approval', }, + processSingleFileContent: vi.fn(), }; }, ); @@ -177,6 +179,24 @@ describe('GeminiAgent', () => { getHasAccessToPreviewModel: vi.fn().mockReturnValue(false), getCheckpointingEnabled: vi.fn().mockReturnValue(false), getDisableAlwaysAllow: vi.fn().mockReturnValue(false), + validatePathAccess: vi.fn().mockReturnValue(null), + getWorkspaceContext: vi.fn().mockReturnValue({ + addReadOnlyPath: vi.fn(), + }), + getPolicyEngine: vi.fn().mockReturnValue({ + addRule: vi.fn(), + }), + messageBus: { + publish: vi.fn(), + subscribe: vi.fn(), + unsubscribe: vi.fn(), + }, + storage: { + getWorkspaceAutoSavedPolicyPath: vi.fn(), + getAutoSavedPolicyPath: vi.fn(), + setClientName: vi.fn(), + }, + setClientName: vi.fn(), get config() { return this; }, @@ -191,12 +211,16 @@ describe('GeminiAgent', () => { mockArgv = {} as unknown as CliArgs; mockConnection = { sessionUpdate: vi.fn(), + requestPermission: vi.fn(), } as unknown as Mocked; (loadCliConfig as unknown as Mock).mockResolvedValue(mockConfig); (loadSettings as unknown as Mock).mockImplementation(() => ({ merged: { - security: { auth: { selectedType: AuthType.LOGIN_WITH_GOOGLE } }, + security: { + auth: { selectedType: AuthType.LOGIN_WITH_GOOGLE }, + enablePermanentToolApproval: true, + }, mcpServers: {}, }, setValue: vi.fn(), @@ -551,7 +575,7 @@ describe('GeminiAgent', () => { }); expect(session.prompt).toHaveBeenCalled(); - expect(result).toEqual({ stopReason: 'end_turn' }); + expect(result).toMatchObject({ stopReason: 'end_turn' }); }); it('should delegate setMode to session', async () => { @@ -648,6 +672,7 @@ describe('Session', () => { shouldIgnoreFile: vi.fn().mockReturnValue(false), }), getFileFilteringOptions: vi.fn().mockReturnValue({}), + getFileSystemService: vi.fn().mockReturnValue({}), getTargetDir: vi.fn().mockReturnValue('/tmp'), getEnableRecursiveFileSearch: vi.fn().mockReturnValue(false), getDebugMode: vi.fn().mockReturnValue(false), @@ -657,6 +682,10 @@ describe('Session', () => { isPlanEnabled: vi.fn().mockReturnValue(true), getCheckpointingEnabled: vi.fn().mockReturnValue(false), getGitService: vi.fn().mockResolvedValue({} as GitService), + validatePathAccess: vi.fn().mockReturnValue(null), + getWorkspaceContext: vi.fn().mockReturnValue({ + addReadOnlyPath: vi.fn(), + }), waitForMcpInit: vi.fn(), getDisableAlwaysAllow: vi.fn().mockReturnValue(false), get config() { @@ -677,7 +706,10 @@ describe('Session', () => { systemDefaults: { settings: {} }, user: { settings: {} }, workspace: { settings: {} }, - merged: { settings: {} }, + merged: { + security: { enablePermanentToolApproval: true }, + mcpServers: {}, + }, errors: [], } as unknown as LoadedSettings); }); @@ -750,7 +782,7 @@ describe('Session', () => { content: { type: 'text', text: 'Hello' }, }, }); - expect(result).toEqual({ stopReason: 'end_turn' }); + expect(result).toMatchObject({ stopReason: 'end_turn' }); }); it('should handle /memory command', async () => { @@ -767,7 +799,7 @@ describe('Session', () => { prompt: [{ type: 'text', text: '/memory view' }], }); - expect(result).toEqual({ stopReason: 'end_turn' }); + expect(result).toMatchObject({ stopReason: 'end_turn' }); expect(handleCommandSpy).toHaveBeenCalledWith( '/memory view', expect.any(Object), @@ -789,7 +821,7 @@ describe('Session', () => { prompt: [{ type: 'text', text: '/extensions list' }], }); - expect(result).toEqual({ stopReason: 'end_turn' }); + expect(result).toMatchObject({ stopReason: 'end_turn' }); expect(handleCommandSpy).toHaveBeenCalledWith( '/extensions list', expect.any(Object), @@ -811,7 +843,7 @@ describe('Session', () => { prompt: [{ type: 'text', text: '/extensions explore' }], }); - expect(result).toEqual({ stopReason: 'end_turn' }); + expect(result).toMatchObject({ stopReason: 'end_turn' }); expect(handleCommandSpy).toHaveBeenCalledWith( '/extensions explore', expect.any(Object), @@ -833,7 +865,7 @@ describe('Session', () => { prompt: [{ type: 'text', text: '/restore' }], }); - expect(result).toEqual({ stopReason: 'end_turn' }); + expect(result).toMatchObject({ stopReason: 'end_turn' }); expect(handleCommandSpy).toHaveBeenCalledWith( '/restore', expect.any(Object), @@ -855,7 +887,7 @@ describe('Session', () => { prompt: [{ type: 'text', text: '/init' }], }); - expect(result).toEqual({ stopReason: 'end_turn' }); + expect(result).toMatchObject({ stopReason: 'end_turn' }); expect(handleCommandSpy).toHaveBeenCalledWith('/init', expect.any(Object)); expect(mockChat.sendMessageStream).not.toHaveBeenCalled(); }); @@ -909,7 +941,7 @@ describe('Session', () => { }), }), ); - expect(result).toEqual({ stopReason: 'end_turn' }); + expect(result).toMatchObject({ stopReason: 'end_turn' }); }); it('should handle tool call permission request', async () => { @@ -1016,6 +1048,166 @@ describe('Session', () => { ); }); + it('should exclude always allow and save permanent option when enablePermanentToolApproval is false', async () => { + mockConfig.getDisableAlwaysAllow = vi.fn().mockReturnValue(false); + const confirmationDetails = { + type: 'edit', + onConfirm: vi.fn(), + }; + mockTool.build.mockReturnValue({ + getDescription: () => 'Test Tool', + toolLocations: () => [], + shouldConfirmExecute: vi.fn().mockResolvedValue(confirmationDetails), + execute: vi.fn().mockResolvedValue({ llmContent: 'Tool Result' }), + }); + + const customSettings = { + system: { settings: {} }, + systemDefaults: { settings: {} }, + user: { settings: {} }, + workspace: { settings: {} }, + merged: { + security: { enablePermanentToolApproval: false }, + mcpServers: {}, + }, + errors: [], + } as unknown as LoadedSettings; + + const localSession = new Session( + 'session-2', + mockChat, + mockConfig, + mockConnection, + customSettings, + ); + + mockConnection.requestPermission.mockResolvedValueOnce({ + outcome: { + outcome: 'selected', + optionId: ToolConfirmationOutcome.ProceedOnce, + }, + }); + + const stream1 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { + functionCalls: [{ name: 'test_tool', args: {} }], + }, + }, + ]); + const stream2 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }, + ]); + + mockChat.sendMessageStream + .mockResolvedValueOnce(stream1) + .mockResolvedValueOnce(stream2); + + await localSession.prompt({ + sessionId: 'session-2', + prompt: [{ type: 'text', text: 'Call tool' }], + }); + + expect(mockConnection.requestPermission).toHaveBeenCalledWith( + expect.objectContaining({ + options: expect.not.arrayContaining([ + expect.objectContaining({ + optionId: ToolConfirmationOutcome.ProceedAlwaysAndSave, + }), + ]), + }), + ); + expect(mockConnection.requestPermission).toHaveBeenCalledWith( + expect.objectContaining({ + options: expect.arrayContaining([ + expect.objectContaining({ + optionId: ToolConfirmationOutcome.ProceedAlways, + }), + ]), + }), + ); + }); + + it('should include always allow and save permanent option when enablePermanentToolApproval is true', async () => { + mockConfig.getDisableAlwaysAllow = vi.fn().mockReturnValue(false); + const confirmationDetails = { + type: 'edit', + onConfirm: vi.fn(), + }; + mockTool.build.mockReturnValue({ + getDescription: () => 'Test Tool', + toolLocations: () => [], + shouldConfirmExecute: vi.fn().mockResolvedValue(confirmationDetails), + execute: vi.fn().mockResolvedValue({ llmContent: 'Tool Result' }), + }); + + const customSettings = { + system: { settings: {} }, + systemDefaults: { settings: {} }, + user: { settings: {} }, + workspace: { settings: {} }, + merged: { + security: { enablePermanentToolApproval: true }, + mcpServers: {}, + }, + errors: [], + } as unknown as LoadedSettings; + + const localSession = new Session( + 'session-2', + mockChat, + mockConfig, + mockConnection, + customSettings, + ); + + mockConnection.requestPermission.mockResolvedValueOnce({ + outcome: { + outcome: 'selected', + optionId: ToolConfirmationOutcome.ProceedOnce, + }, + }); + + const stream1 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { + functionCalls: [{ name: 'test_tool', args: {} }], + }, + }, + ]); + const stream2 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }, + ]); + + mockChat.sendMessageStream + .mockResolvedValueOnce(stream1) + .mockResolvedValueOnce(stream2); + + await localSession.prompt({ + sessionId: 'session-2', + prompt: [{ type: 'text', text: 'Call tool' }], + }); + + expect(mockConnection.requestPermission).toHaveBeenCalledWith( + expect.objectContaining({ + options: expect.arrayContaining([ + expect.objectContaining({ + optionId: ToolConfirmationOutcome.ProceedAlwaysAndSave, + name: 'Allow for this file in all future sessions', + }), + ]), + }), + ); + }); + it('should use filePath for ACP diff content in permission request', async () => { const confirmationDetails = { type: 'edit', @@ -1080,6 +1272,120 @@ describe('Session', () => { ); }); + it('should split getDisplayTitle and getExplanation for title and content in permission request', async () => { + const confirmationDetails = { + type: 'info', + onConfirm: vi.fn(), + }; + mockTool.build.mockReturnValue({ + getDescription: () => 'Original Description', + getDisplayTitle: () => 'Display Title Only', + getExplanation: () => 'A detailed explanation text', + toolLocations: () => [], + shouldConfirmExecute: vi.fn().mockResolvedValue(confirmationDetails), + execute: vi.fn().mockResolvedValue({ llmContent: 'Tool Result' }), + }); + + mockConnection.requestPermission.mockResolvedValue({ + outcome: { + outcome: 'selected', + optionId: ToolConfirmationOutcome.ProceedOnce, + }, + }); + + const stream1 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { + functionCalls: [{ name: 'test_tool', args: {} }], + }, + }, + ]); + const stream2 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }, + ]); + + mockChat.sendMessageStream + .mockResolvedValueOnce(stream1) + .mockResolvedValueOnce(stream2); + + await session.prompt({ + sessionId: 'session-1', + prompt: [{ type: 'text', text: 'Call tool' }], + }); + + expect(mockConnection.requestPermission).toHaveBeenCalledWith( + expect.objectContaining({ + toolCall: expect.objectContaining({ + title: 'Display Title Only', + content: [], + }), + }), + ); + + expect(mockConnection.sessionUpdate).toHaveBeenCalledWith( + expect.objectContaining({ + update: expect.objectContaining({ + sessionUpdate: 'agent_thought_chunk', + content: { type: 'text', text: 'A detailed explanation text' }, + }), + }), + ); + }); + + it('should call updatePolicy when tool permission triggers always allow', async () => { + const confirmationDetails = { + type: 'info', + onConfirm: vi.fn(), + }; + mockTool.build.mockReturnValue({ + getDescription: () => 'Test Tool', + toolLocations: () => [], + shouldConfirmExecute: vi.fn().mockResolvedValue(confirmationDetails), + execute: vi.fn().mockResolvedValue({ llmContent: 'Tool Result' }), + }); + + mockConnection.requestPermission.mockResolvedValue({ + outcome: { + outcome: 'selected', + optionId: ToolConfirmationOutcome.ProceedAlways, + }, + }); + + const stream1 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { + functionCalls: [{ name: 'test_tool', args: {} }], + }, + }, + ]); + const stream2 = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }, + ]); + + mockChat.sendMessageStream + .mockResolvedValueOnce(stream1) + .mockResolvedValueOnce(stream2); + + const { updatePolicy } = await import('@google/gemini-cli-core'); + + await session.prompt({ + sessionId: 'session-1', + prompt: [{ type: 'text', text: 'Call tool' }], + }); + + expect(confirmationDetails.onConfirm).toHaveBeenCalled(); + + expect(updatePolicy).toHaveBeenCalled(); + }); + it('should use filePath for ACP diff content in tool result', async () => { mockTool.build.mockReturnValue({ getDescription: () => 'Test Tool', @@ -1292,7 +1598,6 @@ describe('Session', () => { (fs.stat as unknown as Mock).mockResolvedValue({ isDirectory: () => false, }); - (isWithinRoot as unknown as Mock).mockReturnValue(true); const stream = createMockStream([ { @@ -1350,7 +1655,6 @@ describe('Session', () => { (fs.stat as unknown as Mock).mockResolvedValue({ isDirectory: () => false, }); - (isWithinRoot as unknown as Mock).mockReturnValue(true); const MockReadManyFilesTool = ReadManyFilesTool as unknown as Mock; MockReadManyFilesTool.mockImplementationOnce(() => ({ @@ -1404,6 +1708,172 @@ describe('Session', () => { ); }); + it('should handle @path validation error and bubble it to user', async () => { + mockConfig.getTargetDir.mockReturnValue('/workspace'); + (path.resolve as unknown as Mock).mockReturnValue('/tmp/disallowed.txt'); + mockConfig.validatePathAccess.mockReturnValue('Path is outside workspace'); + + // Force fs.stat to fail to skip direct reading and triggers the warning + (fs.stat as unknown as Mock).mockRejectedValue(new Error('File not found')); + + const stream = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }, + ]); + mockChat.sendMessageStream.mockResolvedValue(stream); + + await session.prompt({ + sessionId: 'session-1', + prompt: [ + { + type: 'resource_link', + uri: 'file://disallowed.txt', + mimeType: 'text/plain', + name: 'disallowed.txt', + }, + ], + }); + + // Verify warning sent via sendUpdate + expect(mockConnection.sessionUpdate).toHaveBeenCalledWith( + expect.objectContaining({ + update: expect.objectContaining({ + sessionUpdate: 'agent_thought_chunk', + content: expect.objectContaining({ + text: expect.stringContaining( + 'Warning: skipping access to `disallowed.txt`. Reason: Path is outside workspace', + ), + }), + }), + }), + ); + }); + + it('should read absolute file directly if outside workspace', async () => { + mockConfig.getTargetDir.mockReturnValue('/workspace'); + const testFilePath = '/tmp/custom.txt'; + (path.resolve as unknown as Mock).mockReturnValue(testFilePath); + mockConfig.validatePathAccess.mockReturnValue('Path is outside workspace'); + + mockConnection.requestPermission.mockResolvedValue({ + outcome: { + outcome: 'selected', + optionId: ToolConfirmationOutcome.ProceedOnce, + }, + } as unknown as acp.RequestPermissionResponse); + + const mockStats = { + isFile: () => true, + isDirectory: () => false, + }; + (fs.stat as unknown as Mock).mockResolvedValue(mockStats); + (processSingleFileContent as unknown as Mock).mockResolvedValue({ + llmContent: 'Absolute File Content', + }); + + const stream = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }, + ]); + mockChat.sendMessageStream.mockResolvedValue(stream); + + await session.prompt({ + sessionId: 'session-1', + prompt: [ + { + type: 'resource_link', + uri: `file://${testFilePath}`, + mimeType: 'text/plain', + name: 'custom.txt', + }, + ], + }); + + expect(processSingleFileContent).toHaveBeenCalledWith( + testFilePath, + expect.anything(), + expect.anything(), + ); + + // Verify content appended to sendMessageStream parts + expect(mockChat.sendMessageStream).toHaveBeenCalledWith( + expect.anything(), + expect.arrayContaining([ + expect.objectContaining({ + text: 'Absolute File Content', + }), + ]), + expect.anything(), + expect.any(AbortSignal), + expect.anything(), + ); + }); + + it('should read escaping relative file directly if outside workspace', async () => { + mockConfig.getTargetDir.mockReturnValue('/workspace'); + const testFilePath = '../../custom.txt'; + (path.resolve as unknown as Mock).mockReturnValue('/custom.txt'); + mockConfig.validatePathAccess.mockReturnValue('Path is outside workspace'); + + mockConnection.requestPermission.mockResolvedValue({ + outcome: { + outcome: 'selected', + optionId: ToolConfirmationOutcome.ProceedOnce, + }, + } as unknown as acp.RequestPermissionResponse); + + const mockStats = { + isFile: () => true, + isDirectory: () => false, + }; + (fs.stat as unknown as Mock).mockResolvedValue(mockStats); + (processSingleFileContent as unknown as Mock).mockResolvedValue({ + llmContent: 'Escaping Relative File Content', + }); + + const stream = createMockStream([ + { + type: StreamEventType.CHUNK, + value: { candidates: [] }, + }, + ]); + mockChat.sendMessageStream.mockResolvedValue(stream); + + await session.prompt({ + sessionId: 'session-1', + prompt: [ + { + type: 'resource_link', + uri: `file://${testFilePath}`, + mimeType: 'text/plain', + name: 'custom.txt', + }, + ], + }); + + expect(processSingleFileContent).toHaveBeenCalledWith( + '/custom.txt', + expect.any(String), + expect.anything(), + ); + + expect(mockChat.sendMessageStream).toHaveBeenCalledWith( + expect.anything(), + expect.arrayContaining([ + expect.objectContaining({ + text: 'Escaping Relative File Content', + }), + ]), + expect.anything(), + expect.any(AbortSignal), + expect.anything(), + ); + }); + it('should handle cancellation during prompt', async () => { let streamController: ReadableStreamDefaultController; const stream = new ReadableStream({ @@ -1602,7 +2072,6 @@ describe('Session', () => { (fs.stat as unknown as Mock).mockResolvedValue({ isDirectory: () => true, }); - (isWithinRoot as unknown as Mock).mockReturnValue(true); const stream = createMockStream([ { diff --git a/packages/cli/src/acp/acpClient.ts b/packages/cli/src/acp/acpClient.ts index bd5a52f126..59c6cb2b3f 100644 --- a/packages/cli/src/acp/acpClient.ts +++ b/packages/cli/src/acp/acpClient.ts @@ -47,7 +47,9 @@ import { DEFAULT_GEMINI_MODEL_AUTO, PREVIEW_GEMINI_MODEL_AUTO, getDisplayString, + processSingleFileContent, type AgentLoopContext, + updatePolicy, } from '@google/gemini-cli-core'; import * as acp from '@agentclientprotocol/sdk'; import { AcpFileSystemService } from './fileSystemService.js'; @@ -63,6 +65,7 @@ import { loadSettings, type LoadedSettings, } from '../config/settings.js'; +import { createPolicyUpdater } from '../config/policy.js'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import { z } from 'zod'; @@ -73,6 +76,17 @@ import { runExitCleanup } from '../utils/cleanup.js'; import { SessionSelector } from '../utils/sessionUtils.js'; import { CommandHandler } from './commandHandler.js'; + +const RequestPermissionResponseSchema = z.object({ + outcome: z.discriminatedUnion('outcome', [ + z.object({ outcome: z.literal('cancelled') }), + z.object({ + outcome: z.literal('selected'), + optionId: z.string(), + }), + ]), +}); + export async function runAcpClient( config: Config, settings: LoadedSettings, @@ -98,6 +112,12 @@ export async function runAcpClient( } export class GeminiAgent { + private static callIdCounter = 0; + + static generateCallId(name: string): string { + return `${name}-${Date.now()}-${++GeminiAgent.callIdCounter}`; + } + private sessions: Map = new Map(); private clientCapabilities: acp.ClientCapabilities | undefined; private apiKey: string | undefined; @@ -115,6 +135,7 @@ export class GeminiAgent { args: acp.InitializeRequest, ): Promise { this.clientCapabilities = args.clientCapabilities; + const authMethods = [ { id: AuthType.LOGIN_WITH_GOOGLE, @@ -294,6 +315,7 @@ export class GeminiAgent { sessionId, this.clientCapabilities.fs, config.getFileSystemService(), + cwd, ); config.setFileSystemService(acpFileSystemService); } @@ -303,6 +325,7 @@ export class GeminiAgent { const geminiClient = config.getGeminiClient(); const chat = await geminiClient.startChat(); + const session = new Session( sessionId, chat, @@ -351,16 +374,6 @@ export class GeminiAgent { const { sessionData, sessionPath } = await sessionSelector.resolveSession(sessionId); - if (this.clientCapabilities?.fs) { - const acpFileSystemService = new AcpFileSystemService( - this.connection, - sessionId, - this.clientCapabilities.fs, - config.getFileSystemService(), - ); - config.setFileSystemService(acpFileSystemService); - } - const clientHistory = convertSessionToClientHistory(sessionData.messages); const geminiClient = config.getGeminiClient(); @@ -434,7 +447,19 @@ export class GeminiAgent { throw acp.RequestError.authRequired(); } - // 3. Now that we are authenticated, it is safe to initialize the config + // 3. Set the ACP FileSystemService (if supported) before config initialization + if (this.clientCapabilities?.fs) { + const acpFileSystemService = new AcpFileSystemService( + this.connection, + sessionId, + this.clientCapabilities.fs, + config.getFileSystemService(), + cwd, + ); + config.setFileSystemService(acpFileSystemService); + } + + // 4. Now that we are authenticated, it is safe to initialize the config // which starts the MCP servers and other heavy resources. await config.initialize(); startupProfiler.flush(config); @@ -491,6 +516,12 @@ export class GeminiAgent { const config = await loadCliConfig(settings, sessionId, this.argv, { cwd }); + createPolicyUpdater( + config.getPolicyEngine(), + config.messageBus, + config.storage, + ); + return config; } @@ -699,10 +730,22 @@ export class Session { // It uses `parts` argument but effectively ignores it in current implementation const handled = await this.handleCommand(commandText, parts); if (handled) { - return { stopReason: 'end_turn' }; + return { + stopReason: 'end_turn', + _meta: { + quota: { + token_count: { input_tokens: 0, output_tokens: 0 }, + model_usage: [], + }, + }, + }; } } + let totalInputTokens = 0; + let totalOutputTokens = 0; + const modelUsageMap = new Map(); + let nextMessage: Content | null = { role: 'user', parts }; while (nextMessage !== null) { @@ -727,11 +770,25 @@ export class Session { ); nextMessage = null; + let turnInputTokens = 0; + let turnOutputTokens = 0; + let turnModelId = model; + for await (const resp of responseStream) { if (pendingSend.signal.aborted) { return { stopReason: CoreToolCallStatus.Cancelled }; } + if (resp.type === StreamEventType.CHUNK && resp.value.usageMetadata) { + turnInputTokens = + resp.value.usageMetadata.promptTokenCount ?? turnInputTokens; + turnOutputTokens = + resp.value.usageMetadata.candidatesTokenCount ?? turnOutputTokens; + if (resp.value.modelVersion) { + turnModelId = resp.value.modelVersion; + } + } + if ( resp.type === StreamEventType.CHUNK && resp.value.candidates && @@ -763,6 +820,19 @@ export class Session { } } + totalInputTokens += turnInputTokens; + totalOutputTokens += turnOutputTokens; + + if (turnInputTokens > 0 || turnOutputTokens > 0) { + const existing = modelUsageMap.get(turnModelId) ?? { + input: 0, + output: 0, + }; + existing.input += turnInputTokens; + existing.output += turnOutputTokens; + modelUsageMap.set(turnModelId, existing); + } + if (pendingSend.signal.aborted) { return { stopReason: CoreToolCallStatus.Cancelled }; } @@ -799,7 +869,28 @@ export class Session { } } - return { stopReason: 'end_turn' }; + const modelUsageArray = Array.from(modelUsageMap.entries()).map( + ([modelName, counts]) => ({ + model: modelName, + token_count: { + input_tokens: counts.input, + output_tokens: counts.output, + }, + }), + ); + + return { + stopReason: 'end_turn', + _meta: { + quota: { + token_count: { + input_tokens: totalInputTokens, + output_tokens: totalOutputTokens, + }, + model_usage: modelUsageArray, + }, + }, + }; } private async handleCommand( @@ -837,7 +928,7 @@ export class Session { promptId: string, fc: FunctionCall, ): Promise { - const callId = fc.id ?? `${fc.name}-${Date.now()}`; + const callId = fc.id ?? GeminiAgent.generateCallId(fc.name || 'unknown'); const args = fc.args ?? {}; const startTime = Date.now(); @@ -887,6 +978,23 @@ export class Session { try { const invocation = tool.build(args); + const displayTitle = + typeof invocation.getDisplayTitle === 'function' + ? invocation.getDisplayTitle() + : invocation.getDescription(); + + const explanation = + typeof invocation.getExplanation === 'function' + ? invocation.getExplanation() + : ''; + + if (explanation) { + await this.sendUpdate({ + sessionUpdate: 'agent_thought_chunk', + content: { type: 'text', text: explanation }, + }); + } + const confirmationDetails = await invocation.shouldConfirmExecute(abortSignal); @@ -914,21 +1022,24 @@ export class Session { options: toPermissionOptions( confirmationDetails, this.context.config, + this.settings.merged.security.enablePermanentToolApproval, ), toolCall: { toolCallId: callId, status: 'pending', - title: invocation.getDescription(), + title: displayTitle, content, locations: invocation.toolLocations(), kind: toAcpToolKind(tool.kind), }, }; - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - const output = await this.connection.requestPermission(params); + const output = RequestPermissionResponseSchema.parse( + await this.connection.requestPermission(params), + ); + const outcome = - output.outcome.outcome === CoreToolCallStatus.Cancelled + output.outcome.outcome === 'cancelled' ? ToolConfirmationOutcome.Cancel : z .nativeEnum(ToolConfirmationOutcome) @@ -936,6 +1047,16 @@ export class Session { await confirmationDetails.onConfirm(outcome); + // Update policy to enable Always Allow persistence + await updatePolicy( + tool, + outcome, + confirmationDetails, + this.context, + this.context.messageBus, + invocation, + ); + switch (outcome) { case ToolConfirmationOutcome.Cancel: return errorResponse( @@ -954,12 +1075,14 @@ export class Session { } } } else { + const content: acp.ToolCallContent[] = []; + await this.sendUpdate({ sessionUpdate: 'tool_call', toolCallId: callId, status: 'in_progress', - title: invocation.getDescription(), - content: [], + title: displayTitle, + content, locations: invocation.toolLocations(), kind: toAcpToolKind(tool.kind), }); @@ -968,12 +1091,14 @@ export class Session { const toolResult: ToolResult = await invocation.execute(abortSignal); const content = toToolCallContent(toolResult); + const updateContent: acp.ToolCallContent[] = content ? [content] : []; + await this.sendUpdate({ sessionUpdate: 'tool_call_update', toolCallId: callId, status: 'completed', - title: invocation.getDescription(), - content: content ? [content] : [], + title: displayTitle, + content: updateContent, locations: invocation.toolLocations(), kind: toAcpToolKind(tool.kind), }); @@ -1135,6 +1260,11 @@ export class Session { const pathSpecsToRead: string[] = []; const contentLabelsForDisplay: string[] = []; const ignoredPaths: string[] = []; + const directContents: Array<{ + spec: string; + content?: string; + part?: Part; + }> = []; const toolRegistry = this.context.toolRegistry; const readManyFilesTool = new ReadManyFilesTool( @@ -1157,28 +1287,197 @@ export class Session { } let currentPathSpec = pathName; let resolvedSuccessfully = false; + let readDirectly = false; try { const absolutePath = path.resolve( this.context.config.getTargetDir(), pathName, ); - if (isWithinRoot(absolutePath, this.context.config.getTargetDir())) { - const stats = await fs.stat(absolutePath); - if (stats.isDirectory()) { - currentPathSpec = pathName.endsWith('/') - ? `${pathName}**` - : `${pathName}/**`; + + let validationError = this.context.config.validatePathAccess( + absolutePath, + 'read', + ); + + // We ask the user for explicit permission to read them if outside sandboxed workspace boundaries (and not already authorized). + if ( + validationError && + !isWithinRoot(absolutePath, this.context.config.getTargetDir()) + ) { + try { + const stats = await fs.stat(absolutePath); + if (stats.isFile()) { + const syntheticCallId = `resolve-prompt-${pathName}-${randomUUID()}`; + const params = { + sessionId: this.id, + options: [ + { + optionId: ToolConfirmationOutcome.ProceedOnce, + name: 'Allow once', + kind: 'allow_once', + }, + { + optionId: ToolConfirmationOutcome.Cancel, + name: 'Deny', + kind: 'reject_once', + }, + ] as acp.PermissionOption[], + toolCall: { + toolCallId: syntheticCallId, + status: 'pending', + title: `Allow access to absolute path: ${pathName}`, + content: [ + { + type: 'content', + content: { + type: 'text', + text: `The Agent needs access to read an attached file outside your workspace: ${pathName}`, + }, + }, + ], + locations: [], + kind: 'read', + }, + }; + + const output = RequestPermissionResponseSchema.parse( + await this.connection.requestPermission(params), + ); + + const outcome = + output.outcome.outcome === 'cancelled' + ? ToolConfirmationOutcome.Cancel + : z + .nativeEnum(ToolConfirmationOutcome) + .parse(output.outcome.optionId); + + if (outcome === ToolConfirmationOutcome.ProceedOnce) { + this.context.config + .getWorkspaceContext() + .addReadOnlyPath(absolutePath); + validationError = null; + } else { + this.debug( + `Direct read authorization denied for absolute path ${pathName}`, + ); + directContents.push({ + spec: pathName, + content: `[Warning: Access to absolute path \`${pathName}\` denied by user.]`, + }); + continue; + } + } + } catch (error) { this.debug( - `Path ${pathName} resolved to directory, using glob: ${currentPathSpec}`, + `Failed to request permission for absolute attachment ${pathName}: ${getErrorMessage(error)}`, ); - } else { - this.debug(`Path ${pathName} resolved to file: ${currentPathSpec}`); + await this.sendUpdate({ + sessionUpdate: 'agent_thought_chunk', + content: { + type: 'text', + text: `Warning: Failed to display permission dialog for \`${absolutePath}\`. Error: ${getErrorMessage(error)}`, + }, + }); + } + } + + if (!validationError) { + // If it's an absolute path that is authorized (e.g. added via readOnlyPaths), + // read it directly to avoid ReadManyFilesTool absolute path resolution issues. + if ( + (path.isAbsolute(pathName) || + !isWithinRoot( + absolutePath, + this.context.config.getTargetDir(), + )) && + !readDirectly + ) { + try { + const stats = await fs.stat(absolutePath); + if (stats.isFile()) { + const fileReadResult = await processSingleFileContent( + absolutePath, + this.context.config.getTargetDir(), + this.context.config.getFileSystemService(), + ); + + if (!fileReadResult.error) { + if ( + typeof fileReadResult.llmContent === 'object' && + 'inlineData' in fileReadResult.llmContent + ) { + directContents.push({ + spec: pathName, + part: fileReadResult.llmContent, + }); + } else if (typeof fileReadResult.llmContent === 'string') { + let contentToPush = fileReadResult.llmContent; + if (fileReadResult.isTruncated) { + contentToPush = `[WARNING: This file was truncated]\n\n${contentToPush}`; + } + directContents.push({ + spec: pathName, + content: contentToPush, + }); + } + readDirectly = true; + resolvedSuccessfully = true; + } else { + this.debug( + `Direct read failed for absolute path ${pathName}: ${fileReadResult.error}`, + ); + await this.sendUpdate({ + sessionUpdate: 'agent_thought_chunk', + content: { + type: 'text', + text: `Warning: file read failed for \`${pathName}\`. Reason: ${fileReadResult.error}`, + }, + }); + continue; + } + } + } catch (error) { + this.debug( + `File stat/access error for absolute path ${pathName}: ${getErrorMessage(error)}`, + ); + await this.sendUpdate({ + sessionUpdate: 'agent_thought_chunk', + content: { + type: 'text', + text: `Warning: file access failed for \`${pathName}\`. Reason: ${getErrorMessage(error)}`, + }, + }); + continue; + } + } + + if (!readDirectly) { + const stats = await fs.stat(absolutePath); + if (stats.isDirectory()) { + currentPathSpec = pathName.endsWith('/') + ? `${pathName}**` + : `${pathName}/**`; + this.debug( + `Path ${pathName} resolved to directory, using glob: ${currentPathSpec}`, + ); + } else { + this.debug( + `Path ${pathName} resolved to file: ${currentPathSpec}`, + ); + } + resolvedSuccessfully = true; } - resolvedSuccessfully = true; } else { this.debug( - `Path ${pathName} is outside the project directory. Skipping.`, + `Path ${pathName} access disallowed: ${validationError}. Skipping.`, ); + await this.sendUpdate({ + sessionUpdate: 'agent_thought_chunk', + content: { + type: 'text', + text: `Warning: skipping access to \`${pathName}\`. Reason: ${validationError}`, + }, + }); } } catch (error) { if (isNodeError(error) && error.code === 'ENOENT') { @@ -1238,7 +1537,9 @@ export class Session { } } if (resolvedSuccessfully) { - pathSpecsToRead.push(currentPathSpec); + if (!readDirectly) { + pathSpecsToRead.push(currentPathSpec); + } atPathToResolvedSpecMap.set(pathName, currentPathSpec); contentLabelsForDisplay.push(pathName); } @@ -1299,7 +1600,11 @@ export class Session { const processedQueryParts: Part[] = [{ text: initialQueryText }]; - if (pathSpecsToRead.length === 0 && embeddedContext.length === 0) { + if ( + pathSpecsToRead.length === 0 && + embeddedContext.length === 0 && + directContents.length === 0 + ) { // Fallback for lone "@" or completely invalid @-commands resulting in empty initialQueryText debugLogger.warn('No valid file paths found in @ commands to read.'); return [{ text: initialQueryText }]; @@ -1310,7 +1615,7 @@ export class Session { include: pathSpecsToRead, }; - const callId = `${readManyFilesTool.name}-${Date.now()}`; + const callId = GeminiAgent.generateCallId(readManyFilesTool.name); try { const invocation = readManyFilesTool.build(toolArgs); @@ -1391,6 +1696,30 @@ export class Session { } } + if (directContents.length > 0) { + const hasReferenceStart = processedQueryParts.some( + (p) => + 'text' in p && + typeof p.text === 'string' && + p.text.includes(REFERENCE_CONTENT_START), + ); + if (!hasReferenceStart) { + processedQueryParts.push({ + text: `\n${REFERENCE_CONTENT_START}`, + }); + } + for (const item of directContents) { + processedQueryParts.push({ + text: `\nContent from @${item.spec}:\n`, + }); + if (item.content) { + processedQueryParts.push({ text: item.content }); + } else if (item.part) { + processedQueryParts.push(item.part); + } + } + } + if (embeddedContext.length > 0) { processedQueryParts.push({ text: '\n--- Content from referenced context ---', @@ -1477,6 +1806,7 @@ const basicPermissionOptions = [ function toPermissionOptions( confirmation: ToolCallConfirmationDetails, config: Config, + enablePermanentToolApproval: boolean = false, ): acp.PermissionOption[] { const disableAlwaysAllow = config.getDisableAlwaysAllow(); const options: acp.PermissionOption[] = []; @@ -1486,37 +1816,65 @@ function toPermissionOptions( case 'edit': options.push({ optionId: ToolConfirmationOutcome.ProceedAlways, - name: 'Allow All Edits', + name: 'Allow for this session', kind: 'allow_always', }); + if (enablePermanentToolApproval) { + options.push({ + optionId: ToolConfirmationOutcome.ProceedAlwaysAndSave, + name: 'Allow for this file in all future sessions', + kind: 'allow_always', + }); + } break; case 'exec': options.push({ optionId: ToolConfirmationOutcome.ProceedAlways, - name: `Always Allow ${confirmation.rootCommand}`, + name: 'Allow for this session', kind: 'allow_always', }); + if (enablePermanentToolApproval) { + options.push({ + optionId: ToolConfirmationOutcome.ProceedAlwaysAndSave, + name: 'Allow this command for all future sessions', + kind: 'allow_always', + }); + } break; case 'mcp': options.push( { optionId: ToolConfirmationOutcome.ProceedAlwaysServer, - name: `Always Allow ${confirmation.serverName}`, + name: 'Allow all server tools for this session', kind: 'allow_always', }, { optionId: ToolConfirmationOutcome.ProceedAlwaysTool, - name: `Always Allow ${confirmation.toolName}`, + name: 'Allow tool for this session', kind: 'allow_always', }, ); + if (enablePermanentToolApproval) { + options.push({ + optionId: ToolConfirmationOutcome.ProceedAlwaysAndSave, + name: 'Allow tool for all future sessions', + kind: 'allow_always', + }); + } break; case 'info': options.push({ optionId: ToolConfirmationOutcome.ProceedAlways, - name: `Always Allow`, + name: 'Allow for this session', kind: 'allow_always', }); + if (enablePermanentToolApproval) { + options.push({ + optionId: ToolConfirmationOutcome.ProceedAlwaysAndSave, + name: 'Allow for all future sessions', + kind: 'allow_always', + }); + } break; case 'ask_user': case 'exit_plan_mode': @@ -1538,6 +1896,7 @@ function toPermissionOptions( case 'info': case 'ask_user': case 'exit_plan_mode': + case 'sandbox_expansion': break; default: { const unreachable: never = confirmation; diff --git a/packages/cli/src/acp/acpResume.test.ts b/packages/cli/src/acp/acpResume.test.ts index 77021004ca..3f75119d0b 100644 --- a/packages/cli/src/acp/acpResume.test.ts +++ b/packages/cli/src/acp/acpResume.test.ts @@ -91,6 +91,14 @@ describe('GeminiAgent Session Resume', () => { storage: { getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'), }, + getPolicyEngine: vi.fn().mockReturnValue({ + addRule: vi.fn(), + }), + messageBus: { + publish: vi.fn(), + subscribe: vi.fn(), + unsubscribe: vi.fn(), + }, getApprovalMode: vi.fn().mockReturnValue('default'), isPlanEnabled: vi.fn().mockReturnValue(true), getModel: vi.fn().mockReturnValue('gemini-pro'), diff --git a/packages/cli/src/acp/fileSystemService.test.ts b/packages/cli/src/acp/fileSystemService.test.ts index 66624d5449..188aadbc09 100644 --- a/packages/cli/src/acp/fileSystemService.test.ts +++ b/packages/cli/src/acp/fileSystemService.test.ts @@ -4,10 +4,25 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi, beforeEach, type Mocked } from 'vitest'; +import { + describe, + it, + expect, + vi, + beforeEach, + afterEach, + type Mocked, +} from 'vitest'; import { AcpFileSystemService } from './fileSystemService.js'; import type { AgentSideConnection } from '@agentclientprotocol/sdk'; import type { FileSystemService } from '@google/gemini-cli-core'; +import os from 'node:os'; + +vi.mock('node:os', () => ({ + default: { + homedir: vi.fn(), + }, +})); describe('AcpFileSystemService', () => { let mockConnection: Mocked; @@ -25,13 +40,19 @@ describe('AcpFileSystemService', () => { readTextFile: vi.fn(), writeTextFile: vi.fn(), }; + vi.mocked(os.homedir).mockReturnValue('/home/user'); + }); + + afterEach(() => { + vi.restoreAllMocks(); }); describe('readTextFile', () => { it.each([ { capability: true, - desc: 'connection if capability exists', + path: '/path/to/file', + desc: 'connection if capability exists and file is inside root', setup: () => { mockConnection.readTextFile.mockResolvedValue({ content: 'content' }); }, @@ -45,6 +66,7 @@ describe('AcpFileSystemService', () => { }, { capability: false, + path: '/path/to/file', desc: 'fallback if capability missing', setup: () => { mockFallback.readTextFile.mockResolvedValue('content'); @@ -56,19 +78,72 @@ describe('AcpFileSystemService', () => { expect(mockConnection.readTextFile).not.toHaveBeenCalled(); }, }, - ])('should use $desc', async ({ capability, setup, verify }) => { + { + capability: true, + path: '/outside/file', + desc: 'fallback if capability exists but file is outside root', + setup: () => { + mockFallback.readTextFile.mockResolvedValue('content'); + }, + verify: () => { + expect(mockFallback.readTextFile).toHaveBeenCalledWith( + '/outside/file', + ); + expect(mockConnection.readTextFile).not.toHaveBeenCalled(); + }, + }, + { + capability: true, + path: '/home/user/.gemini/tmp/file.md', + root: '/home/user', + desc: 'fallback if file is inside global gemini dir, even if root overlaps', + setup: () => { + mockFallback.readTextFile.mockResolvedValue('content'); + }, + verify: () => { + expect(mockFallback.readTextFile).toHaveBeenCalledWith( + '/home/user/.gemini/tmp/file.md', + ); + expect(mockConnection.readTextFile).not.toHaveBeenCalled(); + }, + }, + ])( + 'should use $desc', + async ({ capability, path, root, setup, verify }) => { + service = new AcpFileSystemService( + mockConnection, + 'session-1', + { readTextFile: capability, writeTextFile: true }, + mockFallback, + root || '/path/to', + ); + setup(); + + const result = await service.readTextFile(path); + + expect(result).toBe('content'); + verify(); + }, + ); + + it('should throw normalized ENOENT error when readTextFile encounters "Resource not found"', async () => { service = new AcpFileSystemService( mockConnection, 'session-1', - { readTextFile: capability, writeTextFile: true }, + { readTextFile: true, writeTextFile: true }, mockFallback, + '/path/to', + ); + mockConnection.readTextFile.mockRejectedValue( + new Error('Resource not found for document'), ); - setup(); - const result = await service.readTextFile('/path/to/file'); - - expect(result).toBe('content'); - verify(); + await expect( + service.readTextFile('/path/to/missing'), + ).rejects.toMatchObject({ + code: 'ENOENT', + message: 'Resource not found for document', + }); }); }); @@ -76,7 +151,8 @@ describe('AcpFileSystemService', () => { it.each([ { capability: true, - desc: 'connection if capability exists', + path: '/path/to/file', + desc: 'connection if capability exists and file is inside root', verify: () => { expect(mockConnection.writeTextFile).toHaveBeenCalledWith({ path: '/path/to/file', @@ -88,6 +164,7 @@ describe('AcpFileSystemService', () => { }, { capability: false, + path: '/path/to/file', desc: 'fallback if capability missing', verify: () => { expect(mockFallback.writeTextFile).toHaveBeenCalledWith( @@ -97,17 +174,63 @@ describe('AcpFileSystemService', () => { expect(mockConnection.writeTextFile).not.toHaveBeenCalled(); }, }, - ])('should use $desc', async ({ capability, verify }) => { + { + capability: true, + path: '/outside/file', + desc: 'fallback if capability exists but file is outside root', + verify: () => { + expect(mockFallback.writeTextFile).toHaveBeenCalledWith( + '/outside/file', + 'content', + ); + expect(mockConnection.writeTextFile).not.toHaveBeenCalled(); + }, + }, + { + capability: true, + path: '/home/user/.gemini/tmp/file.md', + root: '/home/user', + desc: 'fallback if file is inside global gemini dir, even if root overlaps', + verify: () => { + expect(mockFallback.writeTextFile).toHaveBeenCalledWith( + '/home/user/.gemini/tmp/file.md', + 'content', + ); + expect(mockConnection.writeTextFile).not.toHaveBeenCalled(); + }, + }, + ])('should use $desc', async ({ capability, path, root, verify }) => { service = new AcpFileSystemService( mockConnection, 'session-1', { writeTextFile: capability, readTextFile: true }, mockFallback, + root || '/path/to', ); - await service.writeTextFile('/path/to/file', 'content'); + await service.writeTextFile(path, 'content'); verify(); }); + + it('should throw normalized ENOENT error when writeTextFile encounters "Resource not found"', async () => { + service = new AcpFileSystemService( + mockConnection, + 'session-1', + { readTextFile: true, writeTextFile: true }, + mockFallback, + '/path/to', + ); + mockConnection.writeTextFile.mockRejectedValue( + new Error('Resource not found for directory'), + ); + + await expect( + service.writeTextFile('/path/to/missing', 'content'), + ).rejects.toMatchObject({ + code: 'ENOENT', + message: 'Resource not found for directory', + }); + }); }); }); diff --git a/packages/cli/src/acp/fileSystemService.ts b/packages/cli/src/acp/fileSystemService.ts index 1d3c8ad0b8..b020cd27f2 100644 --- a/packages/cli/src/acp/fileSystemService.ts +++ b/packages/cli/src/acp/fileSystemService.ts @@ -4,44 +4,82 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { FileSystemService } from '@google/gemini-cli-core'; +import { isWithinRoot, type FileSystemService } from '@google/gemini-cli-core'; import type * as acp from '@agentclientprotocol/sdk'; +import os from 'node:os'; +import path from 'node:path'; /** * ACP client-based implementation of FileSystemService */ export class AcpFileSystemService implements FileSystemService { + private readonly geminiDir = path.join(os.homedir(), '.gemini'); + constructor( private readonly connection: acp.AgentSideConnection, private readonly sessionId: string, - private readonly capabilities: acp.FileSystemCapability, + private readonly capabilities: acp.FileSystemCapabilities, private readonly fallback: FileSystemService, + private readonly root: string, ) {} + private shouldUseFallback(filePath: string): boolean { + // Files inside the global CLI directory must always use the native file system, + // even if the user runs the CLI directly from their home directory (which + // would make the IDE's project root overlap with the global directory). + return ( + !isWithinRoot(filePath, this.root) || + isWithinRoot(filePath, this.geminiDir) + ); + } + + private normalizeFileSystemError(err: unknown): never { + const errorMessage = err instanceof Error ? err.message : String(err); + if ( + errorMessage.includes('Resource not found') || + errorMessage.includes('ENOENT') || + errorMessage.includes('does not exist') || + errorMessage.includes('No such file') + ) { + const newErr = new Error(errorMessage) as NodeJS.ErrnoException; + newErr.code = 'ENOENT'; + throw newErr; + } + throw err; + } + async readTextFile(filePath: string): Promise { - if (!this.capabilities.readTextFile) { + if (!this.capabilities.readTextFile || this.shouldUseFallback(filePath)) { return this.fallback.readTextFile(filePath); } - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - const response = await this.connection.readTextFile({ - path: filePath, - sessionId: this.sessionId, - }); + try { + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + const response = await this.connection.readTextFile({ + path: filePath, + sessionId: this.sessionId, + }); - // eslint-disable-next-line @typescript-eslint/no-unsafe-return - return response.content; + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return response.content; + } catch (err: unknown) { + this.normalizeFileSystemError(err); + } } async writeTextFile(filePath: string, content: string): Promise { - if (!this.capabilities.writeTextFile) { + if (!this.capabilities.writeTextFile || this.shouldUseFallback(filePath)) { return this.fallback.writeTextFile(filePath, content); } - await this.connection.writeTextFile({ - path: filePath, - content, - sessionId: this.sessionId, - }); + try { + await this.connection.writeTextFile({ + path: filePath, + content, + sessionId: this.sessionId, + }); + } catch (err: unknown) { + this.normalizeFileSystemError(err); + } } } diff --git a/packages/cli/src/commands/extensions/examples/policies/policies/policies.toml b/packages/cli/src/commands/extensions/examples/policies/policies/policies.toml index d89d5e5737..225627c59b 100644 --- a/packages/cli/src/commands/extensions/examples/policies/policies/policies.toml +++ b/packages/cli/src/commands/extensions/examples/policies/policies/policies.toml @@ -16,7 +16,7 @@ toolName = "grep_search" argsPattern = "(\.env|id_rsa|passwd)" decision = "deny" priority = 200 -deny_message = "Access to sensitive credentials or system files is restricted by the policy-example extension." +denyMessage = "Access to sensitive credentials or system files is restricted by the policy-example extension." # Safety Checker: Apply path validation to all write operations. [[safety_checker]] diff --git a/packages/cli/src/commands/extensions/install.test.ts b/packages/cli/src/commands/extensions/install.test.ts index 417e750651..8b3f8c5807 100644 --- a/packages/cli/src/commands/extensions/install.test.ts +++ b/packages/cli/src/commands/extensions/install.test.ts @@ -12,48 +12,46 @@ import { beforeEach, afterEach, type MockInstance, - type Mock, } from 'vitest'; import { handleInstall, installCommand } from './install.js'; import yargs from 'yargs'; import * as core from '@google/gemini-cli-core'; -import { - ExtensionManager, - type inferInstallMetadata, -} from '../../config/extension-manager.js'; -import type { - promptForConsentNonInteractive, - requestConsentNonInteractive, -} from '../../config/extensions/consent.js'; -import type { - isWorkspaceTrusted, - loadTrustedFolders, -} from '../../config/trustedFolders.js'; -import type * as fs from 'node:fs/promises'; import type { Stats } from 'node:fs'; import * as path from 'node:path'; +import { promptForSetting } from '../../config/extensions/extensionSettings.js'; -const mockInstallOrUpdateExtension: Mock< - typeof ExtensionManager.prototype.installOrUpdateExtension -> = vi.hoisted(() => vi.fn()); -const mockRequestConsentNonInteractive: Mock< - typeof requestConsentNonInteractive -> = vi.hoisted(() => vi.fn()); -const mockPromptForConsentNonInteractive: Mock< - typeof promptForConsentNonInteractive -> = vi.hoisted(() => vi.fn()); -const mockStat: Mock = vi.hoisted(() => vi.fn()); -const mockInferInstallMetadata: Mock = vi.hoisted( - () => vi.fn(), -); -const mockIsWorkspaceTrusted: Mock = vi.hoisted(() => - vi.fn(), -); -const mockLoadTrustedFolders: Mock = vi.hoisted(() => - vi.fn(), -); -const mockDiscover: Mock = - vi.hoisted(() => vi.fn()); +const { + mockInstallOrUpdateExtension, + mockLoadExtensions, + mockExtensionManager, + mockRequestConsentNonInteractive, + mockPromptForConsentNonInteractive, + mockStat, + mockInferInstallMetadata, + mockIsWorkspaceTrusted, + mockLoadTrustedFolders, + mockDiscover, +} = vi.hoisted(() => { + const mockLoadExtensions = vi.fn(); + const mockInstallOrUpdateExtension = vi.fn(); + const mockExtensionManager = vi.fn().mockImplementation(() => ({ + loadExtensions: mockLoadExtensions, + installOrUpdateExtension: mockInstallOrUpdateExtension, + })); + + return { + mockLoadExtensions, + mockInstallOrUpdateExtension, + mockExtensionManager, + mockRequestConsentNonInteractive: vi.fn(), + mockPromptForConsentNonInteractive: vi.fn(), + mockStat: vi.fn(), + mockInferInstallMetadata: vi.fn(), + mockIsWorkspaceTrusted: vi.fn(), + mockLoadTrustedFolders: vi.fn(), + mockDiscover: vi.fn(), + }; +}); vi.mock('../../config/extensions/consent.js', () => ({ requestConsentNonInteractive: mockRequestConsentNonInteractive, @@ -84,6 +82,7 @@ vi.mock('../../config/extension-manager.js', async (importOriginal) => ({ ...(await importOriginal< typeof import('../../config/extension-manager.js') >()), + ExtensionManager: mockExtensionManager, inferInstallMetadata: mockInferInstallMetadata, })); @@ -117,19 +116,18 @@ describe('handleInstall', () => { let processSpy: MockInstance; beforeEach(() => { - debugLogSpy = vi.spyOn(core.debugLogger, 'log'); - debugErrorSpy = vi.spyOn(core.debugLogger, 'error'); + debugLogSpy = vi + .spyOn(core.debugLogger, 'log') + .mockImplementation(() => {}); + debugErrorSpy = vi + .spyOn(core.debugLogger, 'error') + .mockImplementation(() => {}); processSpy = vi .spyOn(process, 'exit') .mockImplementation(() => undefined as never); - vi.spyOn(ExtensionManager.prototype, 'loadExtensions').mockResolvedValue( - [], - ); - vi.spyOn( - ExtensionManager.prototype, - 'installOrUpdateExtension', - ).mockImplementation(mockInstallOrUpdateExtension); + mockLoadExtensions.mockResolvedValue([]); + mockInstallOrUpdateExtension.mockReset(); mockIsWorkspaceTrusted.mockReturnValue({ isTrusted: true, source: 'file' }); mockDiscover.mockResolvedValue({ @@ -163,12 +161,7 @@ describe('handleInstall', () => { }); afterEach(() => { - mockInstallOrUpdateExtension.mockClear(); - mockRequestConsentNonInteractive.mockClear(); - mockStat.mockClear(); - mockInferInstallMetadata.mockClear(); vi.clearAllMocks(); - vi.restoreAllMocks(); }); function createMockExtension( @@ -288,6 +281,39 @@ describe('handleInstall', () => { expect(processSpy).toHaveBeenCalledWith(1); }); + it('should pass promptForSetting when skipSettings is not provided', async () => { + mockInstallOrUpdateExtension.mockResolvedValue({ + name: 'test-extension', + } as unknown as core.GeminiCLIExtension); + + await handleInstall({ + source: 'http://google.com', + }); + + expect(mockExtensionManager).toHaveBeenCalledWith( + expect.objectContaining({ + requestSetting: promptForSetting, + }), + ); + }); + + it('should pass null for requestSetting when skipSettings is true', async () => { + mockInstallOrUpdateExtension.mockResolvedValue({ + name: 'test-extension', + } as unknown as core.GeminiCLIExtension); + + await handleInstall({ + source: 'http://google.com', + skipSettings: true, + }); + + expect(mockExtensionManager).toHaveBeenCalledWith( + expect.objectContaining({ + requestSetting: null, + }), + ); + }); + it('should proceed if local path is already trusted', async () => { mockInstallOrUpdateExtension.mockResolvedValue( createMockExtension({ diff --git a/packages/cli/src/commands/extensions/install.ts b/packages/cli/src/commands/extensions/install.ts index 542d1240be..cf135a9366 100644 --- a/packages/cli/src/commands/extensions/install.ts +++ b/packages/cli/src/commands/extensions/install.ts @@ -37,6 +37,7 @@ interface InstallArgs { autoUpdate?: boolean; allowPreRelease?: boolean; consent?: boolean; + skipSettings?: boolean; } export async function handleInstall(args: InstallArgs) { @@ -153,7 +154,7 @@ export async function handleInstall(args: InstallArgs) { const extensionManager = new ExtensionManager({ workspaceDir, requestConsent, - requestSetting: promptForSetting, + requestSetting: args.skipSettings ? null : promptForSetting, settings, }); await extensionManager.loadExtensions(); @@ -196,6 +197,11 @@ export const installCommand: CommandModule = { type: 'boolean', default: false, }) + .option('skip-settings', { + describe: 'Skip the configuration on install process.', + type: 'boolean', + default: false, + }) .check((argv) => { if (!argv.source) { throw new Error('The source argument must be provided.'); @@ -214,6 +220,8 @@ export const installCommand: CommandModule = { allowPreRelease: argv['pre-release'] as boolean | undefined, // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion consent: argv['consent'] as boolean | undefined, + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + skipSettings: argv['skip-settings'] as boolean | undefined, }); await exitCli(); }, diff --git a/packages/cli/src/commands/mcp/list.test.ts b/packages/cli/src/commands/mcp/list.test.ts index 54534961dd..578894845e 100644 --- a/packages/cli/src/commands/mcp/list.test.ts +++ b/packages/cli/src/commands/mcp/list.test.ts @@ -264,6 +264,7 @@ describe('mcp list command', () => { config: { 'allowed-server': { url: 'http://allowed' }, }, + requiredConfig: {}, }, }; diff --git a/packages/cli/src/commands/mcp/list.ts b/packages/cli/src/commands/mcp/list.ts index a1df1a8027..8154e3b7bf 100644 --- a/packages/cli/src/commands/mcp/list.ts +++ b/packages/cli/src/commands/mcp/list.ts @@ -54,6 +54,7 @@ export async function getMcpServersFromConfig( return; } mcpServers[key] = { + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...server, extension, }; diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index a94d1f0a28..0d9fb8a9a0 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -143,12 +143,17 @@ vi.mock('@google/gemini-cli-core', async () => { respectGeminiIgnore: true, customIgnoreFilePaths: [], }, - createPolicyEngineConfig: vi.fn(async () => ({ - rules: [], - checkers: [], - defaultDecision: ServerConfig.PolicyDecision.ASK_USER, - approvalMode: ServerConfig.ApprovalMode.DEFAULT, - })), + createPolicyEngineConfig: vi.fn( + async (_settings, approvalMode, _workspacePoliciesDir, interactive) => ({ + rules: [], + checkers: [], + defaultDecision: interactive + ? ServerConfig.PolicyDecision.ASK_USER + : ServerConfig.PolicyDecision.DENY, + approvalMode: approvalMode ?? ServerConfig.ApprovalMode.DEFAULT, + nonInteractive: !interactive, + }), + ), getAdminErrorMessage: vi.fn( (_feature) => `YOLO mode is disabled by your administrator. To enable it, please request an update to the settings at: https://goo.gle/manage-gemini-cli`, @@ -226,6 +231,51 @@ afterEach(() => { }); describe('parseArguments', () => { + describe('worktree', () => { + it('should parse --worktree flag when provided with a name', async () => { + process.argv = ['node', 'script.js', '--worktree', 'my-feature']; + const settings = createTestMergedSettings(); + settings.experimental.worktrees = true; + const argv = await parseArguments(settings); + expect(argv.worktree).toBe('my-feature'); + }); + + it('should generate a random name when --worktree is provided without a name', async () => { + process.argv = ['node', 'script.js', '--worktree']; + const settings = createTestMergedSettings(); + settings.experimental.worktrees = true; + const argv = await parseArguments(settings); + expect(argv.worktree).toBeDefined(); + expect(argv.worktree).not.toBe(''); + expect(typeof argv.worktree).toBe('string'); + }); + + it('should throw an error when --worktree is used but experimental.worktrees is not enabled', async () => { + process.argv = ['node', 'script.js', '--worktree', 'feature']; + const settings = createTestMergedSettings(); + settings.experimental.worktrees = false; + + const mockExit = vi.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('process.exit called'); + }); + const mockConsoleError = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); + + await expect(parseArguments(settings)).rejects.toThrow( + 'process.exit called', + ); + expect(mockConsoleError).toHaveBeenCalledWith( + expect.stringContaining( + 'The --worktree flag is only available when experimental.worktrees is enabled in your settings.', + ), + ); + + mockExit.mockRestore(); + mockConsoleError.mockRestore(); + }); + }); + it.each([ { description: 'long flags', @@ -277,6 +327,41 @@ describe('parseArguments', () => { }, ); + describe('isCommand middleware', () => { + it.each([ + { cmd: 'mcp list', expected: true }, + { cmd: 'extensions list', expected: true }, + { cmd: 'extension list', expected: true }, + { cmd: 'skills list', expected: true }, + { cmd: 'skill list', expected: true }, + { cmd: 'hooks migrate', expected: true }, + { cmd: 'hook migrate', expected: true }, + { cmd: 'some query', expected: undefined }, + { cmd: 'hello world', expected: undefined }, + ])( + 'should set isCommand to $expected for "$cmd"', + async ({ cmd, expected }) => { + process.argv = ['node', 'script.js', ...cmd.split(' ')]; + const settings = createTestMergedSettings({ + admin: { + mcp: { enabled: true }, + }, + experimental: { + extensionManagement: true, + }, + skills: { + enabled: true, + }, + hooksConfig: { + enabled: true, + }, + }); + const parsedArgs = await parseArguments(settings); + expect(parsedArgs.isCommand).toBe(expected); + }, + ); + }); + it.each([ { description: 'should allow --prompt without --prompt-interactive', @@ -1671,6 +1756,7 @@ describe('loadCliConfig with admin.mcp.config', () => { const serverA = config.getMcpServers()?.['serverA']; expect(serverA).toEqual({ + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...localMcpServers['serverA'], type: 'sse', url: 'https://admin-server-a.com/sse', @@ -1721,6 +1807,7 @@ describe('loadCliConfig with admin.mcp.config', () => { }; const localMcpServersWithTools: Record = { serverA: { + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...localMcpServers['serverA'], includeTools: ['local_tool'], timeout: 1234, @@ -1763,6 +1850,7 @@ describe('loadCliConfig with admin.mcp.config', () => { }; const localMcpServersWithTools: Record = { serverA: { + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...localMcpServers['serverA'], includeTools: ['local_tool'], }, @@ -2225,6 +2313,30 @@ describe('loadCliConfig tool exclusions', () => { expect(config.getExcludeTools()).toContain('ask_user'); }); + it('should exclude ask_user in interactive mode when --acp is provided', async () => { + process.stdin.isTTY = true; + process.argv = ['node', 'script.js', '--acp']; + const argv = await parseArguments(createTestMergedSettings()); + const config = await loadCliConfig( + createTestMergedSettings(), + 'test-session', + argv, + ); + expect(config.getExcludeTools()).toContain('ask_user'); + }); + + it('should exclude ask_user in interactive mode when --experimental-acp is provided', async () => { + process.stdin.isTTY = true; + process.argv = ['node', 'script.js', '--experimental-acp']; + const argv = await parseArguments(createTestMergedSettings()); + const config = await loadCliConfig( + createTestMergedSettings(), + 'test-session', + argv, + ); + expect(config.getExcludeTools()).toContain('ask_user'); + }); + it('should not exclude shell tool in non-interactive mode when --allowed-tools="ShellTool" is set', async () => { process.stdin.isTTY = false; process.argv = [ @@ -3353,6 +3465,8 @@ describe('Policy Engine Integration in loadCliConfig', () => { }), }), expect.anything(), + undefined, + expect.anything(), ); }); @@ -3374,6 +3488,8 @@ describe('Policy Engine Integration in loadCliConfig', () => { }), }), expect.anything(), + undefined, + expect.anything(), ); }); @@ -3397,6 +3513,8 @@ describe('Policy Engine Integration in loadCliConfig', () => { ], }), expect.anything(), + undefined, + expect.anything(), ); }); }); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 80c1e19443..af8c1ae0ac 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -4,10 +4,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import yargs from 'yargs/yargs'; +import yargs from 'yargs'; import { hideBin } from 'yargs/helpers'; import process from 'node:process'; import * as path from 'node:path'; +import { execa } from 'execa'; import { mcpCommand } from '../commands/mcp.js'; import { extensionsCommand } from '../commands/extensions.js'; import { skillsCommand } from '../commands/skills.js'; @@ -36,7 +37,11 @@ import { Config, resolveToRealPath, applyAdminAllowlist, + applyRequiredServers, getAdminBlockedMcpServersMessage, + getProjectRootForWorktree, + isGeminiWorktree, + type WorktreeSettings, type HookDefinition, type HookEventName, type OutputFormat, @@ -47,6 +52,8 @@ import { type MergedSettings, saveModelChange, loadSettings, + isWorktreeEnabled, + type LoadedSettings, } from './settings.js'; import { loadSandboxConfig } from './sandboxConfig.js'; @@ -73,6 +80,7 @@ export interface CliArgs { debug: boolean | undefined; prompt: string | undefined; promptInteractive: string | undefined; + worktree?: string; yolo: boolean | undefined; approvalMode: string | undefined; @@ -114,6 +122,36 @@ const coerceCommaSeparated = (values: string[]): string[] => { ); }; +/** + * Pre-parses the command line arguments to find the worktree flag. + * Used for early setup before full argument parsing with settings. + */ +export function getWorktreeArg(argv: string[]): string | undefined { + const result = yargs(hideBin(argv)) + .help(false) + .version(false) + .option('worktree', { alias: 'w', type: 'string' }) + .strict(false) + .exitProcess(false) + .parseSync(); + + if (result.worktree === undefined) return undefined; + return typeof result.worktree === 'string' ? result.worktree.trim() : ''; +} + +/** + * Checks if a worktree is requested via CLI and enabled in settings. + * Returns the requested name (can be empty string for auto-generated) or undefined. + */ +export function getRequestedWorktreeName( + settings: LoadedSettings, +): string | undefined { + if (!isWorktreeEnabled(settings)) { + return undefined; + } + return getWorktreeArg(process.argv); +} + export async function parseArguments( settings: MergedSettings, ): Promise { @@ -125,12 +163,104 @@ export async function parseArguments( .usage( 'Usage: gemini [options] [command]\n\nGemini CLI - Defaults to interactive mode. Use -p/--prompt for non-interactive (headless) mode.', ) + .option('isCommand', { + type: 'boolean', + hidden: true, + description: 'Internal flag to indicate if a subcommand is being run', + }) .option('debug', { alias: 'd', type: 'boolean', description: 'Run in debug mode (open debug console with F12)', default: false, }) + .middleware((argv) => { + const commandModules = [ + mcpCommand, + extensionsCommand, + skillsCommand, + hooksCommand, + ]; + + const subcommands = commandModules.flatMap((mod) => { + const names: string[] = []; + + const cmd = mod.command; + if (cmd) { + if (Array.isArray(cmd)) { + for (const c of cmd) { + names.push(String(c).split(' ')[0]); + } + } else { + names.push(String(cmd).split(' ')[0]); + } + } + + const aliases = mod.aliases; + if (aliases) { + if (Array.isArray(aliases)) { + for (const a of aliases) { + names.push(String(a).split(' ')[0]); + } + } else { + names.push(String(aliases).split(' ')[0]); + } + } + + return names; + }); + + const firstArg = argv._[0]; + if (typeof firstArg === 'string' && subcommands.includes(firstArg)) { + argv['isCommand'] = true; + } + }, true) + // Ensure validation flows through .fail() for clean UX + .fail((msg, err) => { + if (err) throw err; + throw new Error(msg); + }) + .check((argv) => { + // The 'query' positional can be a string (for one arg) or string[] (for multiple). + // This guard safely checks if any positional argument was provided. + const queryArg = argv['query']; + const query = + typeof queryArg === 'string' || Array.isArray(queryArg) + ? queryArg + : undefined; + const hasPositionalQuery = Array.isArray(query) + ? query.length > 0 + : !!query; + + if (argv['prompt'] && hasPositionalQuery) { + return 'Cannot use both a positional prompt and the --prompt (-p) flag together'; + } + if (argv['prompt'] && argv['promptInteractive']) { + return 'Cannot use both --prompt (-p) and --prompt-interactive (-i) together'; + } + if (argv['yolo'] && argv['approvalMode']) { + return 'Cannot use both --yolo (-y) and --approval-mode together. Use --approval-mode=yolo instead.'; + } + + const outputFormat = argv['outputFormat']; + if ( + typeof outputFormat === 'string' && + !['text', 'json', 'stream-json'].includes(outputFormat) + ) { + return `Invalid values:\n Argument: output-format, Given: "${outputFormat}", Choices: "text", "json", "stream-json"`; + } + if (argv['worktree'] && !settings.experimental?.worktrees) { + return 'The --worktree flag is only available when experimental.worktrees is enabled in your settings.'; + } + return true; + }); + + yargsInstance.command(mcpCommand); + yargsInstance.command(extensionsCommand); + yargsInstance.command(skillsCommand); + yargsInstance.command(hooksCommand); + + yargsInstance .command('$0 [query..]', 'Launch Gemini CLI', (yargsInstance) => yargsInstance .positional('query', { @@ -157,6 +287,20 @@ export async function parseArguments( description: 'Execute the provided prompt and continue in interactive mode', }) + .option('worktree', { + alias: 'w', + type: 'string', + skipValidation: true, + description: + 'Start Gemini in a new git worktree. If no name is provided, one is generated automatically.', + coerce: (value: unknown): string => { + const trimmed = typeof value === 'string' ? value.trim() : ''; + if (trimmed === '') { + return Math.random().toString(36).substring(2, 10); + } + return trimmed; + }, + }) .option('sandbox', { alias: 's', type: 'boolean', @@ -300,56 +444,6 @@ export async function parseArguments( description: 'Suppress the security warning when using --raw-output.', }), ) - // Register MCP subcommands - .command(mcpCommand) - // Ensure validation flows through .fail() for clean UX - .fail((msg, err) => { - if (err) throw err; - throw new Error(msg); - }) - .check((argv) => { - // The 'query' positional can be a string (for one arg) or string[] (for multiple). - // This guard safely checks if any positional argument was provided. - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const query = argv['query'] as string | string[] | undefined; - const hasPositionalQuery = Array.isArray(query) - ? query.length > 0 - : !!query; - - if (argv['prompt'] && hasPositionalQuery) { - return 'Cannot use both a positional prompt and the --prompt (-p) flag together'; - } - if (argv['prompt'] && argv['promptInteractive']) { - return 'Cannot use both --prompt (-p) and --prompt-interactive (-i) together'; - } - if (argv['yolo'] && argv['approvalMode']) { - return 'Cannot use both --yolo (-y) and --approval-mode together. Use --approval-mode=yolo instead.'; - } - if ( - argv['outputFormat'] && - !['text', 'json', 'stream-json'].includes( - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - argv['outputFormat'] as string, - ) - ) { - return `Invalid values:\n Argument: output-format, Given: "${argv['outputFormat']}", Choices: "text", "json", "stream-json"`; - } - return true; - }); - - if (settings.experimental?.extensionManagement) { - yargsInstance.command(extensionsCommand); - } - - if (settings.skills?.enabled ?? true) { - yargsInstance.command(skillsCommand); - } - // Register hooks command if hooks are enabled - if (settings.hooksConfig.enabled) { - yargsInstance.command(hooksCommand); - } - - yargsInstance .version(await getVersion()) // This will enable the --version flag based on package.json .alias('v', 'version') .help() @@ -419,6 +513,7 @@ export interface LoadCliConfigOptions { projectHooks?: { [K in HookEventName]?: HookDefinition[] } & { disabled?: string[]; }; + worktreeSettings?: WorktreeSettings; } export async function loadCliConfig( @@ -430,6 +525,9 @@ export async function loadCliConfig( const { cwd = process.cwd(), projectHooks } = options; const debugMode = isDebugMode(argv); + const worktreeSettings = + options.worktreeSettings ?? (await resolveWorktreeSettings(cwd)); + if (argv.sandbox) { process.env['GEMINI_SANDBOX'] = 'true'; } @@ -648,12 +746,16 @@ export async function loadCliConfig( const allowedTools = argv.allowedTools || settings.tools?.allowed || []; + const isAcpMode = !!argv.acp || !!argv.experimentalAcp; + // In non-interactive mode, exclude tools that require a prompt. const extraExcludes: string[] = []; - if (!interactive) { + if (!interactive || isAcpMode) { // The Policy Engine natively handles headless safety by translating ASK_USER // decisions to DENY. However, we explicitly block ask_user here to guarantee // it can never be allowed via a high-priority policy rule when no human is present. + // We also exclude it in ACP mode as IDEs intercept tool calls and ask for permission, + // breaking conversational flows. extraExcludes.push(ASK_USER_TOOL_NAME); } @@ -690,8 +792,8 @@ export async function loadCliConfig( effectiveSettings, approvalMode, workspacePoliciesDir, + interactive, ); - policyEngineConfig.nonInteractive = !interactive; const defaultModel = PREVIEW_GEMINI_MODEL_AUTO; const specifiedModel = @@ -702,6 +804,19 @@ export async function loadCliConfig( ? defaultModel : specifiedModel || defaultModel; const sandboxConfig = await loadSandboxConfig(settings, argv); + if (sandboxConfig) { + const existingPaths = sandboxConfig.allowedPaths || []; + if (settings.tools.sandboxAllowedPaths?.length) { + sandboxConfig.allowedPaths = [ + ...new Set([...existingPaths, ...settings.tools.sandboxAllowedPaths]), + ]; + } + if (settings.tools.sandboxNetworkAccess !== undefined) { + sandboxConfig.networkAccess = + sandboxConfig.networkAccess || settings.tools.sandboxNetworkAccess; + } + } + const screenReader = argv.screenReader !== undefined ? argv.screenReader @@ -737,7 +852,25 @@ export async function loadCliConfig( } } - const isAcpMode = !!argv.acp || !!argv.experimentalAcp; + // Apply admin-required MCP servers (injected regardless of allowlist) + if (mcpEnabled) { + const requiredMcpConfig = settings.admin?.mcp?.requiredConfig; + if (requiredMcpConfig && Object.keys(requiredMcpConfig).length > 0) { + const requiredResult = applyRequiredServers( + mcpServers ?? {}, + requiredMcpConfig, + ); + mcpServers = requiredResult.mcpServers; + + if (requiredResult.requiredServerNames.length > 0) { + coreEvents.emitConsoleLog( + 'info', + `Admin-required MCP servers injected: ${requiredResult.requiredServerNames.join(', ')}`, + ); + } + } + } + let clientName: string | undefined = undefined; if (isAcpMode) { const ide = detectIdeFromEnv(); @@ -766,6 +899,7 @@ export async function loadCliConfig( importFormat: settings.context?.importFormat, debugMode, question, + worktreeSettings, coreTools: settings.tools?.core || undefined, allowedTools: allowedTools.length > 0 ? allowedTools : undefined, @@ -840,6 +974,7 @@ export async function loadCliConfig( skillsSupport: settings.skills?.enabled ?? true, disabledSkills: settings.skills?.disabled, experimentalJitContext: settings.experimental?.jitContext, + experimentalMemoryManager: settings.experimental?.memoryManager, modelSteering: settings.experimental?.modelSteering, topicUpdateNarration: settings.experimental?.topicUpdateNarration, toolOutputMasking: settings.experimental?.toolOutputMasking, @@ -906,3 +1041,48 @@ function mergeExcludeTools( ]); return Array.from(allExcludeTools); } + +async function resolveWorktreeSettings( + cwd: string, +): Promise { + let worktreePath: string | undefined; + try { + const { stdout } = await execa('git', ['rev-parse', '--show-toplevel'], { + cwd, + }); + const toplevel = stdout.trim(); + const projectRoot = await getProjectRootForWorktree(toplevel); + + if (isGeminiWorktree(toplevel, projectRoot)) { + worktreePath = toplevel; + } + } catch (_e) { + return undefined; + } + + if (!worktreePath) { + return undefined; + } + + let worktreeBaseSha: string | undefined; + try { + const { stdout } = await execa('git', ['rev-parse', 'HEAD'], { + cwd: worktreePath, + }); + worktreeBaseSha = stdout.trim(); + } catch (e: unknown) { + debugLogger.debug( + `Failed to resolve worktree base SHA at ${worktreePath}: ${e instanceof Error ? e.message : String(e)}`, + ); + } + + if (!worktreeBaseSha) { + return undefined; + } + + return { + name: path.basename(worktreePath), + path: worktreePath, + baseSha: worktreeBaseSha, + }; +} diff --git a/packages/cli/src/config/extension-manager-permissions.test.ts b/packages/cli/src/config/extension-manager-permissions.test.ts new file mode 100644 index 0000000000..662f30d430 --- /dev/null +++ b/packages/cli/src/config/extension-manager-permissions.test.ts @@ -0,0 +1,133 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { copyExtension } from './extension-manager.js'; + +describe('copyExtension permissions', () => { + let tempDir: string; + let sourceDir: string; + let destDir: string; + + beforeEach(() => { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-permission-test-')); + sourceDir = path.join(tempDir, 'source'); + destDir = path.join(tempDir, 'dest'); + fs.mkdirSync(sourceDir); + }); + + afterEach(() => { + // Ensure we can delete the temp directory by making everything writable again + const makeWritableSync = (p: string) => { + try { + const stats = fs.lstatSync(p); + fs.chmodSync(p, stats.mode | 0o700); + if (stats.isDirectory()) { + fs.readdirSync(p).forEach((child) => + makeWritableSync(path.join(p, child)), + ); + } + } catch (_e) { + // Ignore errors during cleanup + } + }; + + if (fs.existsSync(tempDir)) { + makeWritableSync(tempDir); + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); + + it('should make destination writable even if source is read-only', async () => { + const fileName = 'test.txt'; + const filePath = path.join(sourceDir, fileName); + fs.writeFileSync(filePath, 'hello'); + + // Make source read-only: 0o555 for directory, 0o444 for file + fs.chmodSync(filePath, 0o444); + fs.chmodSync(sourceDir, 0o555); + + // Verify source is read-only + expect(() => fs.writeFileSync(filePath, 'fail')).toThrow(); + + // Perform copy + await copyExtension(sourceDir, destDir); + + // Verify destination is writable + const destFilePath = path.join(destDir, fileName); + const destFileStats = fs.statSync(destFilePath); + const destDirStats = fs.statSync(destDir); + + // Check that owner write bits are set (0o200) + expect(destFileStats.mode & 0o200).toBe(0o200); + expect(destDirStats.mode & 0o200).toBe(0o200); + + // Verify we can actually write to the destination file + fs.writeFileSync(destFilePath, 'writable'); + expect(fs.readFileSync(destFilePath, 'utf-8')).toBe('writable'); + + // Verify we can delete the destination (which requires write bit on destDir) + fs.rmSync(destFilePath); + expect(fs.existsSync(destFilePath)).toBe(false); + }); + + it('should handle nested directories with restrictive permissions', async () => { + const subDir = path.join(sourceDir, 'subdir'); + fs.mkdirSync(subDir); + const fileName = 'nested.txt'; + const filePath = path.join(subDir, fileName); + fs.writeFileSync(filePath, 'nested content'); + + // Make nested structure read-only + fs.chmodSync(filePath, 0o444); + fs.chmodSync(subDir, 0o555); + fs.chmodSync(sourceDir, 0o555); + + // Perform copy + await copyExtension(sourceDir, destDir); + + // Verify nested destination is writable + const destSubDir = path.join(destDir, 'subdir'); + const destFilePath = path.join(destSubDir, fileName); + + expect(fs.statSync(destSubDir).mode & 0o200).toBe(0o200); + expect(fs.statSync(destFilePath).mode & 0o200).toBe(0o200); + + // Verify we can delete the whole destination tree + await fs.promises.rm(destDir, { recursive: true, force: true }); + expect(fs.existsSync(destDir)).toBe(false); + }); + + it('should not follow symlinks or modify symlink targets', async () => { + const symlinkTarget = path.join(tempDir, 'external-target'); + fs.writeFileSync(symlinkTarget, 'external content'); + // Target is read-only + fs.chmodSync(symlinkTarget, 0o444); + + const symlinkPath = path.join(sourceDir, 'symlink-file'); + fs.symlinkSync(symlinkTarget, symlinkPath); + + // Perform copy + await copyExtension(sourceDir, destDir); + + const destSymlinkPath = path.join(destDir, 'symlink-file'); + const destSymlinkStats = fs.lstatSync(destSymlinkPath); + + // Verify it is still a symlink in the destination + expect(destSymlinkStats.isSymbolicLink()).toBe(true); + + // Verify the target (external to the extension) was NOT modified + const targetStats = fs.statSync(symlinkTarget); + // Owner write bit should still NOT be set (0o200) + expect(targetStats.mode & 0o200).toBe(0o000); + + // Clean up + fs.chmodSync(symlinkTarget, 0o644); + }); +}); diff --git a/packages/cli/src/config/extension-manager-skills.test.ts b/packages/cli/src/config/extension-manager-skills.test.ts index a76d88482d..800417de36 100644 --- a/packages/cli/src/config/extension-manager-skills.test.ts +++ b/packages/cli/src/config/extension-manager-skills.test.ts @@ -15,6 +15,10 @@ import { createExtension } from '../test-utils/createExtension.js'; import { EXTENSIONS_DIRECTORY_NAME } from './extensions/variables.js'; const mockHomedir = vi.hoisted(() => vi.fn(() => '/tmp/mock-home')); +const mockIntegrityManager = vi.hoisted(() => ({ + verify: vi.fn().mockResolvedValue('verified'), + store: vi.fn().mockResolvedValue(undefined), +})); vi.mock('node:os', async (importOriginal) => { const actual = await importOriginal(); @@ -31,6 +35,9 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { return { ...actual, homedir: mockHomedir, + ExtensionIntegrityManager: vi + .fn() + .mockImplementation(() => mockIntegrityManager), loadAgentsFromDirectory: vi .fn() .mockImplementation(async () => ({ agents: [], errors: [] })), @@ -64,6 +71,7 @@ describe('ExtensionManager skills validation', () => { requestConsent: vi.fn().mockResolvedValue(true), requestSetting: vi.fn(), workspaceDir: tempDir, + integrityManager: mockIntegrityManager, }); }); @@ -139,6 +147,7 @@ describe('ExtensionManager skills validation', () => { requestConsent: vi.fn().mockResolvedValue(true), requestSetting: vi.fn(), workspaceDir: tempDir, + integrityManager: mockIntegrityManager, }); // 4. Load extensions diff --git a/packages/cli/src/config/extension-manager.test.ts b/packages/cli/src/config/extension-manager.test.ts index 67636d922e..6c20737be9 100644 --- a/packages/cli/src/config/extension-manager.test.ts +++ b/packages/cli/src/config/extension-manager.test.ts @@ -637,64 +637,4 @@ describe('ExtensionManager', () => { ); }); }); - - describe('orphaned extension cleanup', () => { - it('should remove broken extension metadata on startup to allow re-installation', async () => { - const extName = 'orphaned-ext'; - const sourceDir = path.join(tempHomeDir, 'valid-source'); - fs.mkdirSync(sourceDir, { recursive: true }); - fs.writeFileSync( - path.join(sourceDir, 'gemini-extension.json'), - JSON.stringify({ name: extName, version: '1.0.0' }), - ); - - // Link an extension successfully. - await extensionManager.loadExtensions(); - await extensionManager.installOrUpdateExtension({ - source: sourceDir, - type: 'link', - }); - - const destinationPath = path.join(userExtensionsDir, extName); - const metadataPath = path.join( - destinationPath, - '.gemini-extension-install.json', - ); - expect(fs.existsSync(metadataPath)).toBe(true); - - // Simulate metadata corruption (e.g., pointing to a non-existent source). - fs.writeFileSync( - metadataPath, - JSON.stringify({ source: '/NON_EXISTENT_PATH', type: 'link' }), - ); - - // Simulate CLI startup. The manager should detect the broken link - // and proactively delete the orphaned metadata directory. - const newManager = new ExtensionManager({ - settings: createTestMergedSettings(), - workspaceDir: tempWorkspaceDir, - requestConsent: vi.fn().mockResolvedValue(true), - requestSetting: null, - integrityManager: mockIntegrityManager, - }); - - await newManager.loadExtensions(); - - // Verify the extension failed to load and was proactively cleaned up. - expect(newManager.getExtensions().some((e) => e.name === extName)).toBe( - false, - ); - expect(fs.existsSync(destinationPath)).toBe(false); - - // Verify the system is self-healed and allows re-linking to the valid source. - await newManager.installOrUpdateExtension({ - source: sourceDir, - type: 'link', - }); - - expect(newManager.getExtensions().some((e) => e.name === extName)).toBe( - true, - ); - }); - }); }); diff --git a/packages/cli/src/config/extension-manager.ts b/packages/cli/src/config/extension-manager.ts index 2c46a845e6..65b3539794 100644 --- a/packages/cli/src/config/extension-manager.ts +++ b/packages/cli/src/config/extension-manager.ts @@ -614,7 +614,7 @@ Would you like to attempt to install via "git clone" instead?`, this.loadingPromise = (async () => { try { - if (this.settings.admin.extensions.enabled === false) { + if (this.settings.admin?.extensions?.enabled === false) { this.loadedExtensions = []; return this.loadedExtensions; } @@ -824,11 +824,11 @@ Would you like to attempt to install via "git clone" instead?`, } if (config.mcpServers) { - if (this.settings.admin.mcp.enabled === false) { + if (this.settings.admin?.mcp?.enabled === false) { config.mcpServers = undefined; } else { // Apply admin allowlist if configured - const adminAllowlist = this.settings.admin.mcp.config; + const adminAllowlist = this.settings.admin?.mcp?.config; if (adminAllowlist && Object.keys(adminAllowlist).length > 0) { const result = applyAdminAllowlist( config.mcpServers, @@ -982,18 +982,11 @@ Would you like to attempt to install via "git clone" instead?`, plan: config.plan, }; } catch (e) { - const extName = path.basename(extensionDir); - debugLogger.warn( - `Warning: Removing broken extension ${extName}: ${getErrorMessage(e)}`, + debugLogger.error( + `Warning: Skipping extension in ${effectiveExtensionPath}: ${getErrorMessage( + e, + )}`, ); - try { - await fs.promises.rm(extensionDir, { recursive: true, force: true }); - } catch (rmError) { - debugLogger.error( - `Failed to remove broken extension directory ${extensionDir}:`, - rmError, - ); - } return null; } } @@ -1248,11 +1241,32 @@ function filterMcpConfig(original: MCPServerConfig): MCPServerConfig { return Object.freeze(rest); } +/** + * Recursively ensures that the owner has write permissions for all files + * and directories within the target path. + */ +async function makeWritableRecursive(targetPath: string): Promise { + const stats = await fs.promises.lstat(targetPath); + + if (stats.isDirectory()) { + // Ensure directory is rwx for the owner (0o700) + await fs.promises.chmod(targetPath, stats.mode | 0o700); + const children = await fs.promises.readdir(targetPath); + for (const child of children) { + await makeWritableRecursive(path.join(targetPath, child)); + } + } else if (stats.isFile()) { + // Ensure file is rw for the owner (0o600) + await fs.promises.chmod(targetPath, stats.mode | 0o600); + } +} + export async function copyExtension( source: string, destination: string, ): Promise { await fs.promises.cp(source, destination, { recursive: true }); + await makeWritableRecursive(destination); } function getContextFileNames(config: ExtensionConfig): string[] { @@ -1284,7 +1298,9 @@ export async function inferInstallMetadata( source.startsWith('http://') || source.startsWith('https://') || source.startsWith('git@') || - source.startsWith('sso://') + source.startsWith('sso://') || + source.startsWith('github:') || + source.startsWith('gitlab:') ) { return { source, diff --git a/packages/cli/src/config/extension.test.ts b/packages/cli/src/config/extension.test.ts index fa957d8f7f..ef7e61cf25 100644 --- a/packages/cli/src/config/extension.test.ts +++ b/packages/cli/src/config/extension.test.ts @@ -249,8 +249,10 @@ describe('extension tests', () => { expect(extensions[0].name).toBe('test-extension'); }); - it('should log a warning and remove the extension if a context file path is outside the extension directory', async () => { - const consoleSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + it('should skip the extension if a context file path is outside the extension directory and log an error', async () => { + const consoleSpy = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); createExtension({ extensionsDir: userExtensionsDir, name: 'traversal-extension', @@ -660,8 +662,10 @@ name = "yolo-checker" expect(serverConfig.env!['MISSING_VAR_BRACES']).toBe('${ALSO_UNDEFINED}'); }); - it('should remove an extension with invalid JSON config and log a warning', async () => { - const consoleSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + it('should skip an extension with invalid JSON config and log an error', async () => { + const consoleSpy = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); // Good extension createExtension({ @@ -682,15 +686,17 @@ name = "yolo-checker" expect(extensions[0].name).toBe('good-ext'); expect(consoleSpy).toHaveBeenCalledWith( expect.stringContaining( - `Warning: Removing broken extension bad-ext: Failed to load extension config from ${badConfigPath}`, + `Warning: Skipping extension in ${badExtDir}: Failed to load extension config from ${badConfigPath}`, ), ); consoleSpy.mockRestore(); }); - it('should remove an extension with missing "name" in config and log a warning', async () => { - const consoleSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + it('should skip an extension with missing "name" in config and log an error', async () => { + const consoleSpy = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); // Good extension createExtension({ @@ -711,7 +717,7 @@ name = "yolo-checker" expect(extensions[0].name).toBe('good-ext'); expect(consoleSpy).toHaveBeenCalledWith( expect.stringContaining( - `Warning: Removing broken extension bad-ext-no-name: Failed to load extension config from ${badConfigPath}: Invalid configuration in ${badConfigPath}: missing "name"`, + `Warning: Skipping extension in ${badExtDir}: Failed to load extension config from ${badConfigPath}: Invalid configuration in ${badConfigPath}: missing "name"`, ), ); @@ -737,8 +743,10 @@ name = "yolo-checker" expect(extensions[0].mcpServers?.['test-server'].trust).toBeUndefined(); }); - it('should log a warning for invalid extension names during loading', async () => { - const consoleSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + it('should log an error for invalid extension names during loading', async () => { + const consoleSpy = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); createExtension({ extensionsDir: userExtensionsDir, name: 'bad_name', diff --git a/packages/cli/src/config/extensions/consent.test.ts b/packages/cli/src/config/extensions/consent.test.ts index 76d7227ab4..8de884cdd5 100644 --- a/packages/cli/src/config/extensions/consent.test.ts +++ b/packages/cli/src/config/extensions/consent.test.ts @@ -59,8 +59,9 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { }); async function expectConsentSnapshot(consentString: string) { - const renderResult = render(React.createElement(Text, null, consentString)); - await renderResult.waitUntilReady(); + const renderResult = await render( + React.createElement(Text, null, consentString), + ); await expect(renderResult).toMatchSvgSnapshot(); } diff --git a/packages/cli/src/config/extensions/extensionUpdates.test.ts b/packages/cli/src/config/extensions/extensionUpdates.test.ts index 69339b4eeb..89282fcd8a 100644 --- a/packages/cli/src/config/extensions/extensionUpdates.test.ts +++ b/packages/cli/src/config/extensions/extensionUpdates.test.ts @@ -36,6 +36,8 @@ vi.mock('node:fs', async (importOriginal) => { rm: vi.fn(), cp: vi.fn(), readFile: vi.fn(), + lstat: vi.fn(), + chmod: vi.fn(), }, }; }); @@ -143,6 +145,11 @@ describe('extensionUpdates', () => { vi.mocked(fs.promises.rm).mockResolvedValue(undefined); vi.mocked(fs.promises.cp).mockResolvedValue(undefined); vi.mocked(fs.promises.readdir).mockResolvedValue([]); + vi.mocked(fs.promises.lstat).mockResolvedValue({ + isDirectory: () => true, + mode: 0o755, + } as unknown as fs.Stats); + vi.mocked(fs.promises.chmod).mockResolvedValue(undefined); vi.mocked(isWorkspaceTrusted).mockReturnValue({ isTrusted: true, source: 'file', diff --git a/packages/cli/src/config/mcp/mcpServerEnablement.test.ts b/packages/cli/src/config/mcp/mcpServerEnablement.test.ts index 8b41324790..12b483d59d 100644 --- a/packages/cli/src/config/mcp/mcpServerEnablement.test.ts +++ b/packages/cli/src/config/mcp/mcpServerEnablement.test.ts @@ -13,6 +13,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { return { ...actual, Storage: { + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...actual.Storage, getGlobalGeminiDir: () => '/virtual-home/.gemini', }, diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts index 847b47bbe3..edc06bfbf0 100644 --- a/packages/cli/src/config/policy-engine.integration.test.ts +++ b/packages/cli/src/config/policy-engine.integration.test.ts @@ -381,6 +381,7 @@ describe('Policy Engine Integration Tests', () => { // Add a manual rule with annotations to the config config.rules = config.rules || []; config.rules.push({ + toolName: '*', toolAnnotations: { readOnlyHint: true }, decision: PolicyDecision.ALLOW, priority: 10, @@ -516,7 +517,9 @@ describe('Policy Engine Integration Tests', () => { ); expect(mcpServerRule?.priority).toBe(4.1); // MCP allowed server - const readOnlyToolRule = rules.find((r) => r.toolName === 'glob'); + const readOnlyToolRule = rules.find( + (r) => r.toolName === 'glob' && !r.subagent, + ); // Priority 70 in default tier → 1.07 (Overriding Plan Mode Deny) expect(readOnlyToolRule?.priority).toBeCloseTo(1.07, 5); @@ -602,12 +605,12 @@ describe('Policy Engine Integration Tests', () => { it('should verify non-interactive mode transformation', async () => { const settings: Settings = {}; - const config = await createPolicyEngineConfig( + const engineConfig = await createPolicyEngineConfig( settings, ApprovalMode.DEFAULT, + undefined, + false, ); - // Enable non-interactive mode - const engineConfig = { ...config, nonInteractive: true }; const engine = new PolicyEngine(engineConfig); // ASK_USER should become DENY in non-interactive mode @@ -673,7 +676,7 @@ describe('Policy Engine Integration Tests', () => { const server1Rule = rules.find((r) => r.toolName === 'mcp_server1_*'); expect(server1Rule?.priority).toBe(4.1); // Allowed servers (user tier) - const globRule = rules.find((r) => r.toolName === 'glob'); + const globRule = rules.find((r) => r.toolName === 'glob' && !r.subagent); // Priority 70 in default tier → 1.07 expect(globRule?.priority).toBeCloseTo(1.07, 5); // Auto-accept read-only diff --git a/packages/cli/src/config/policy.ts b/packages/cli/src/config/policy.ts index 9837c2c355..317d2e848d 100644 --- a/packages/cli/src/config/policy.ts +++ b/packages/cli/src/config/policy.ts @@ -53,6 +53,7 @@ export async function createPolicyEngineConfig( settings: Settings, approvalMode: ApprovalMode, workspacePoliciesDir?: string, + interactive: boolean = true, ): Promise { // Explicitly construct PolicySettings from Settings to ensure type safety // and avoid accidental leakage of other settings properties. @@ -68,7 +69,12 @@ export async function createPolicyEngineConfig( settings.admin?.secureModeEnabled, }; - return createCorePolicyEngineConfig(policySettings, approvalMode); + return createCorePolicyEngineConfig( + policySettings, + approvalMode, + undefined, + interactive, + ); } export function createPolicyUpdater( diff --git a/packages/cli/src/config/sandboxConfig.test.ts b/packages/cli/src/config/sandboxConfig.test.ts index cfe1fed660..3ec0e6a5bb 100644 --- a/packages/cli/src/config/sandboxConfig.test.ts +++ b/packages/cli/src/config/sandboxConfig.test.ts @@ -338,6 +338,8 @@ describe('loadSandboxConfig', () => { sandbox: { enabled: true, command: 'podman', + allowedPaths: [], + networkAccess: false, }, }, }, @@ -353,6 +355,8 @@ describe('loadSandboxConfig', () => { sandbox: { enabled: true, image: 'custom/image', + allowedPaths: [], + networkAccess: false, }, }, }, @@ -367,6 +371,8 @@ describe('loadSandboxConfig', () => { tools: { sandbox: { enabled: false, + allowedPaths: [], + networkAccess: false, }, }, }, @@ -382,6 +388,7 @@ describe('loadSandboxConfig', () => { sandbox: { enabled: true, allowedPaths: ['/settings-path'], + networkAccess: false, }, }, }, diff --git a/packages/cli/src/config/sandboxConfig.ts b/packages/cli/src/config/sandboxConfig.ts index 59a9685f70..1a047760d3 100644 --- a/packages/cli/src/config/sandboxConfig.ts +++ b/packages/cli/src/config/sandboxConfig.ts @@ -29,6 +29,7 @@ const VALID_SANDBOX_COMMANDS = [ 'sandbox-exec', 'runsc', 'lxc', + 'windows-native', ]; function isSandboxCommand( @@ -75,8 +76,15 @@ function getSandboxCommand( 'gVisor (runsc) sandboxing is only supported on Linux', ); } - // confirm that specified command exists - if (!commandExists.sync(sandbox)) { + // windows-native is only supported on Windows + if (sandbox === 'windows-native' && os.platform() !== 'win32') { + throw new FatalSandboxError( + 'Windows native sandboxing is only supported on Windows', + ); + } + + // confirm that specified command exists (unless it's built-in) + if (sandbox !== 'windows-native' && !commandExists.sync(sandbox)) { throw new FatalSandboxError( `Missing sandbox command '${sandbox}' (from GEMINI_SANDBOX)`, ); @@ -149,7 +157,12 @@ export async function loadSandboxConfig( customImage ?? packageJson?.config?.sandboxImageUri; - return command && image + const isNative = + command === 'windows-native' || + command === 'sandbox-exec' || + command === 'lxc'; + + return command && (image || isNative) ? { enabled: true, allowedPaths, networkAccess, command, image } : undefined; } diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts index 06129a4760..a58b9889a2 100644 --- a/packages/cli/src/config/settings.test.ts +++ b/packages/cli/src/config/settings.test.ts @@ -2751,6 +2751,28 @@ describe('Settings Loading and Merging', () => { expect(loadedSettings.merged.admin?.mcp?.config).toEqual(mcpServers); }); + it('should map requiredMcpConfig from remote settings', () => { + const loadedSettings = loadSettings(MOCK_WORKSPACE_DIR); + const requiredMcpConfig = { + 'corp-tool': { + url: 'https://mcp.corp/tool', + type: 'http' as const, + trust: true, + }, + }; + + loadedSettings.setRemoteAdminSettings({ + mcpSetting: { + mcpEnabled: true, + requiredMcpConfig, + }, + }); + + expect(loadedSettings.merged.admin?.mcp?.requiredConfig).toEqual( + requiredMcpConfig, + ); + }); + it('should set skills based on unmanagedCapabilitiesEnabled', () => { const loadedSettings = loadSettings(); loadedSettings.setRemoteAdminSettings({ diff --git a/packages/cli/src/config/settings.ts b/packages/cli/src/config/settings.ts index 711ff93271..984bdb8d60 100644 --- a/packages/cli/src/config/settings.ts +++ b/packages/cli/src/config/settings.ts @@ -480,6 +480,7 @@ export class LoadedSettings { admin.mcp = { enabled: mcpSetting?.mcpEnabled, config: mcpSetting?.mcpConfig?.mcpServers, + requiredConfig: mcpSetting?.requiredMcpConfig, }; admin.extensions = { enabled: cliFeatureSetting?.extensionsSetting?.extensionsEnabled, @@ -631,6 +632,10 @@ export function resetSettingsCacheForTesting() { settingsCache.clear(); } +export function isWorktreeEnabled(settings: LoadedSettings): boolean { + return settings.merged.experimental.worktrees; +} + /** * Loads settings from user and workspace directories. * Project settings override user settings. diff --git a/packages/cli/src/config/settingsSchema.test.ts b/packages/cli/src/config/settingsSchema.test.ts index 37ddf87642..c358cd65aa 100644 --- a/packages/cli/src/config/settingsSchema.test.ts +++ b/packages/cli/src/config/settingsSchema.test.ts @@ -538,8 +538,32 @@ describe('SettingsSchema', () => { } }; + const visitJsonSchema = (jsonSchema: Record) => { + const ref = jsonSchema['ref']; + if (typeof ref === 'string') { + referenced.add(ref); + } + const properties = jsonSchema['properties']; + if ( + properties && + typeof properties === 'object' && + !Array.isArray(properties) + ) { + Object.values(properties as Record).forEach((prop) => + visitJsonSchema(prop as Record), + ); + } + const items = jsonSchema['items']; + if (items && typeof items === 'object' && !Array.isArray(items)) { + visitJsonSchema(items as Record); + } + }; + Object.values(schema).forEach(visitDefinition); + // Also visit all definitions to find nested references + Object.values(SETTINGS_SCHEMA_DEFINITIONS).forEach(visitJsonSchema); + // Ensure definitions map doesn't accumulate stale entries. Object.keys(SETTINGS_SCHEMA_DEFINITIONS).forEach((key) => { if (!referenced.has(key)) { diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index fed9a50131..880f2de156 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -12,7 +12,9 @@ import { DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, DEFAULT_MODEL_CONFIGS, + AuthProviderType, type MCPServerConfig, + type RequiredMcpServerConfig, type BugCommandSettings, type TelemetrySettings, type AuthType, @@ -259,7 +261,7 @@ const SETTINGS_SCHEMA = { requiresRestart: false, default: false, description: - 'Enable run-event notifications for action-required prompts and session completion. Currently macOS only.', + 'Enable run-event notifications for action-required prompts and session completion.', showInDialog: true, }, checkpointing: { @@ -298,7 +300,7 @@ const SETTINGS_SCHEMA = { requiresRestart: true, default: undefined as string | undefined, description: - 'The directory where planning artifacts are stored. If not specified, defaults to the system temporary directory.', + 'The directory where planning artifacts are stored. If not specified, defaults to the system temporary directory. A custom directory requires a policy to allow write access in Plan Mode.', showInDialog: true, }, modelRouting: { @@ -655,6 +657,16 @@ const SETTINGS_SCHEMA = { description: 'Hide the footer from the UI', showInDialog: true, }, + collapseDrawerDuringApproval: { + type: 'boolean', + label: 'Collapse Drawer During Approval', + category: 'UI', + requiresRestart: false, + default: true, + description: + 'Whether to collapse the UI drawer when a tool is awaiting confirmation.', + showInDialog: false, + }, showMemoryUsage: { type: 'boolean', label: 'Show Memory Usage', @@ -1081,6 +1093,20 @@ const SETTINGS_SCHEMA = { ref: 'ModelResolution', }, }, + modelChains: { + type: 'object', + label: 'Model Chains', + category: 'Model', + requiresRestart: true, + default: DEFAULT_MODEL_CONFIGS.modelChains, + description: + 'Availability policy chains defining fallback behavior for models.', + showInDialog: false, + additionalProperties: { + type: 'array', + ref: 'ModelPolicyChain', + }, + }, }, }, @@ -1182,6 +1208,36 @@ const SETTINGS_SCHEMA = { 'Disable user input on browser window during automation.', showInDialog: false, }, + maxActionsPerTask: { + type: 'number', + label: 'Max Actions Per Task', + category: 'Advanced', + requiresRestart: false, + default: 100, + description: + 'The maximum number of tool calls allowed per browser task. Enforcement is hard: the agent will be terminated when the limit is reached.', + showInDialog: false, + }, + confirmSensitiveActions: { + type: 'boolean', + label: 'Confirm Sensitive Actions', + category: 'Advanced', + requiresRestart: true, + default: false, + description: + 'Require manual confirmation for sensitive browser actions (e.g., fill_form, evaluate_script).', + showInDialog: true, + }, + blockFileUploads: { + type: 'boolean', + label: 'Block File Uploads', + category: 'Advanced', + requiresRestart: true, + default: false, + description: + 'Hard-block file upload requests from the browser agent.', + showInDialog: true, + }, }, }, }, @@ -1344,10 +1400,30 @@ const SETTINGS_SCHEMA = { description: oneLine` Legacy full-process sandbox execution environment. Set to a boolean to enable or disable the sandbox, provide a string path to a sandbox profile, - or specify an explicit sandbox command (e.g., "docker", "podman", "lxc"). + or specify an explicit sandbox command (e.g., "docker", "podman", "lxc", "windows-native"). `, showInDialog: false, }, + sandboxAllowedPaths: { + type: 'array', + label: 'Sandbox Allowed Paths', + category: 'Tools', + requiresRestart: true, + default: [] as string[], + description: + 'List of additional paths that the sandbox is allowed to access.', + showInDialog: true, + items: { type: 'string' }, + }, + sandboxNetworkAccess: { + type: 'boolean', + label: 'Sandbox Network Access', + category: 'Tools', + requiresRestart: true, + default: false, + description: 'Whether the sandbox is allowed to access the network.', + showInDialog: true, + }, shell: { type: 'object', label: 'Shell', @@ -1870,6 +1946,16 @@ const SETTINGS_SCHEMA = { description: 'Enable local and remote subagents.', showInDialog: false, }, + worktrees: { + type: 'boolean', + label: 'Enable Git Worktrees', + category: 'Experimental', + requiresRestart: true, + default: false, + description: + 'Enable automated Git worktree management for parallel work.', + showInDialog: true, + }, extensionManagement: { type: 'boolean', label: 'Extension Management', @@ -2045,6 +2131,16 @@ const SETTINGS_SCHEMA = { }, }, }, + memoryManager: { + type: 'boolean', + label: 'Memory Manager Agent', + category: 'Experimental', + requiresRestart: true, + default: false, + description: + 'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.', + showInDialog: true, + }, topicUpdateNarration: { type: 'boolean', label: 'Topic & Update Narration', @@ -2391,7 +2487,7 @@ const SETTINGS_SCHEMA = { category: 'Admin', requiresRestart: false, default: {} as Record, - description: 'Admin-configured MCP servers.', + description: 'Admin-configured MCP servers (allowlist).', showInDialog: false, mergeStrategy: MergeStrategy.REPLACE, additionalProperties: { @@ -2399,6 +2495,20 @@ const SETTINGS_SCHEMA = { ref: 'MCPServerConfig', }, }, + requiredConfig: { + type: 'object', + label: 'Required MCP Config', + category: 'Admin', + requiresRestart: false, + default: {} as Record, + description: 'Admin-required MCP servers that are always injected.', + showInDialog: false, + mergeStrategy: MergeStrategy.REPLACE, + additionalProperties: { + type: 'object', + ref: 'RequiredMcpServerConfig', + }, + }, }, }, skills: { @@ -2523,11 +2633,72 @@ export const SETTINGS_SCHEMA_DEFINITIONS: Record< type: 'string', description: 'Authentication provider used for acquiring credentials (for example `dynamic_discovery`).', - enum: [ - 'dynamic_discovery', - 'google_credentials', - 'service_account_impersonation', - ], + enum: Object.values(AuthProviderType), + }, + targetAudience: { + type: 'string', + description: + 'OAuth target audience (CLIENT_ID.apps.googleusercontent.com).', + }, + targetServiceAccount: { + type: 'string', + description: + 'Service account email to impersonate (name@project.iam.gserviceaccount.com).', + }, + }, + }, + RequiredMcpServerConfig: { + type: 'object', + description: + 'Admin-required MCP server configuration (remote transports only).', + additionalProperties: false, + properties: { + url: { + type: 'string', + description: 'URL for the required MCP server.', + }, + type: { + type: 'string', + description: 'Transport type for the required server.', + enum: ['sse', 'http'], + }, + headers: { + type: 'object', + description: 'Additional HTTP headers sent to the server.', + additionalProperties: { type: 'string' }, + }, + timeout: { + type: 'number', + description: 'Timeout in milliseconds for MCP requests.', + }, + trust: { + type: 'boolean', + description: + 'Marks the server as trusted. Defaults to true for admin-required servers.', + }, + description: { + type: 'string', + description: 'Human-readable description of the server.', + }, + includeTools: { + type: 'array', + description: 'Subset of tools enabled for this server.', + items: { type: 'string' }, + }, + excludeTools: { + type: 'array', + description: 'Tools disabled for this server.', + items: { type: 'string' }, + }, + oauth: { + type: 'object', + description: 'OAuth configuration for authenticating with the server.', + additionalProperties: true, + }, + authProviderType: { + type: 'string', + description: 'Authentication provider used for acquiring credentials.', + enum: Object.values(AuthProviderType), }, targetAudience: { type: 'string', @@ -2853,6 +3024,7 @@ export const SETTINGS_SCHEMA_DEFINITIONS: Record< type: 'object', properties: { useGemini3_1: { type: 'boolean' }, + useGemini3_1FlashLite: { type: 'boolean' }, useCustomTools: { type: 'boolean' }, hasAccessToPreview: { type: 'boolean' }, requestedModels: { @@ -2867,6 +3039,42 @@ export const SETTINGS_SCHEMA_DEFINITIONS: Record< }, }, }, + ModelPolicyChain: { + type: 'array', + description: 'A chain of model policies for fallback behavior.', + items: { + type: 'object', + ref: 'ModelPolicy', + }, + }, + ModelPolicy: { + type: 'object', + description: + 'Defines the policy for a single model in the availability chain.', + properties: { + model: { type: 'string' }, + isLastResort: { type: 'boolean' }, + actions: { + type: 'object', + properties: { + terminal: { type: 'string', enum: ['silent', 'prompt'] }, + transient: { type: 'string', enum: ['silent', 'prompt'] }, + not_found: { type: 'string', enum: ['silent', 'prompt'] }, + unknown: { type: 'string', enum: ['silent', 'prompt'] }, + }, + }, + stateTransitions: { + type: 'object', + properties: { + terminal: { type: 'string', enum: ['terminal', 'sticky_retry'] }, + transient: { type: 'string', enum: ['terminal', 'sticky_retry'] }, + not_found: { type: 'string', enum: ['terminal', 'sticky_retry'] }, + unknown: { type: 'string', enum: ['terminal', 'sticky_retry'] }, + }, + }, + }, + required: ['model'], + }, }; export function getSettingsSchema(): SettingsSchemaType { diff --git a/packages/cli/src/config/workspace-policy-cli.test.ts b/packages/cli/src/config/workspace-policy-cli.test.ts index d0d98a5a31..bd9bcd0105 100644 --- a/packages/cli/src/config/workspace-policy-cli.test.ts +++ b/packages/cli/src/config/workspace-policy-cli.test.ts @@ -88,6 +88,8 @@ describe('Workspace-Level Policy CLI Integration', () => { ), }), expect.anything(), + undefined, + expect.anything(), ); }); @@ -107,6 +109,8 @@ describe('Workspace-Level Policy CLI Integration', () => { workspacePoliciesDir: undefined, }), expect.anything(), + undefined, + expect.anything(), ); }); @@ -131,6 +135,8 @@ describe('Workspace-Level Policy CLI Integration', () => { workspacePoliciesDir: undefined, }), expect.anything(), + undefined, + expect.anything(), ); }); @@ -163,6 +169,8 @@ describe('Workspace-Level Policy CLI Integration', () => { ), }), expect.anything(), + undefined, + expect.anything(), ); }); @@ -201,6 +209,8 @@ describe('Workspace-Level Policy CLI Integration', () => { ), }), expect.anything(), + undefined, + expect.anything(), ); }); @@ -237,6 +247,8 @@ describe('Workspace-Level Policy CLI Integration', () => { ), }), expect.anything(), + undefined, + expect.anything(), ); }); @@ -278,6 +290,8 @@ describe('Workspace-Level Policy CLI Integration', () => { workspacePoliciesDir: undefined, }), expect.anything(), + undefined, + expect.anything(), ); } finally { // Restore for other tests diff --git a/packages/cli/src/core/initializer.test.ts b/packages/cli/src/core/initializer.test.ts index e4fdb2cba5..9093ad54ee 100644 --- a/packages/cli/src/core/initializer.test.ts +++ b/packages/cli/src/core/initializer.test.ts @@ -105,6 +105,9 @@ describe('initializer', () => { mockSettings, ); + // Wait for the background promise to resolve + await new Promise((resolve) => setTimeout(resolve, 0)); + expect(result).toEqual({ authError: null, accountSuspensionInfo: null, diff --git a/packages/cli/src/core/initializer.ts b/packages/cli/src/core/initializer.ts index f27e9a9511..607129ae3e 100644 --- a/packages/cli/src/core/initializer.ts +++ b/packages/cli/src/core/initializer.ts @@ -13,6 +13,7 @@ import { StartSessionEvent, logCliConfiguration, startupProfiler, + debugLogger, } from '@google/gemini-cli-core'; import { type LoadedSettings } from '../config/settings.js'; import { performInitialAuth } from './auth.js'; @@ -55,9 +56,18 @@ export async function initializeApp( ); if (config.getIdeMode()) { - const ideClient = await IdeClient.getInstance(); - await ideClient.connect(); - logIdeConnection(config, new IdeConnectionEvent(IdeConnectionType.START)); + IdeClient.getInstance() + .then(async (ideClient) => { + await ideClient.connect(); + logIdeConnection( + config, + new IdeConnectionEvent(IdeConnectionType.START), + ); + }) + .catch((e) => { + // We log locally if IDE connection setup fails in the background. + debugLogger.error('Failed to initialize IDE client:', e); + }); } return { diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx index 31fec36db0..fd19ffa79c 100644 --- a/packages/cli/src/gemini.test.tsx +++ b/packages/cli/src/gemini.test.tsx @@ -126,6 +126,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { clearInstance: vi.fn(), }, coreEvents: { + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...actual.coreEvents, emitFeedback: vi.fn(), emitConsoleLog: vi.fn(), @@ -199,6 +200,8 @@ vi.mock('./config/config.js', () => ({ networkAccess: false, }), isDebugMode: vi.fn(() => false), + getRequestedWorktreeName: vi.fn(() => undefined), + getWorktreeArg: vi.fn(() => undefined), })); vi.mock('read-package-up', () => ({ @@ -525,6 +528,62 @@ describe('gemini.tsx main function kitty protocol', () => { ); }); + it('should call process.stdin.resume when isInteractive is true to protect against implicit Node pause', async () => { + const resumeSpy = vi.spyOn(process.stdin, 'resume'); + vi.mocked(loadCliConfig).mockResolvedValue( + createMockConfig({ + isInteractive: () => true, + getQuestion: () => '', + getSandbox: () => undefined, + }), + ); + vi.mocked(loadSettings).mockReturnValue( + createMockSettings({ + merged: { + advanced: {}, + security: { auth: {} }, + ui: {}, + }, + }), + ); + vi.mocked(parseArguments).mockResolvedValue({ + model: undefined, + sandbox: undefined, + debug: undefined, + prompt: undefined, + promptInteractive: undefined, + query: undefined, + yolo: undefined, + approvalMode: undefined, + policy: undefined, + adminPolicy: undefined, + allowedMcpServerNames: undefined, + allowedTools: undefined, + experimentalAcp: undefined, + extensions: undefined, + listExtensions: undefined, + includeDirectories: undefined, + screenReader: undefined, + useWriteTodos: undefined, + resume: undefined, + listSessions: undefined, + deleteSession: undefined, + outputFormat: undefined, + fakeResponses: undefined, + recordResponses: undefined, + rawOutput: undefined, + acceptRawOutputRisk: undefined, + isCommand: undefined, + }); + + await act(async () => { + await main(); + }); + + expect(resumeSpy).toHaveBeenCalledTimes(1); + resumeSpy.mockRestore(); + }); + it.each([ { flag: 'listExtensions' }, { flag: 'listSessions' }, @@ -1506,6 +1565,7 @@ describe('startInteractiveUI', () => { .spyOn(process.stdout, 'write') .mockImplementation(() => true); const mockConfigWithScreenReader = { + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...mockConfig, getScreenReader: () => screenReader, } as Config; diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 4722bb73f3..4b43d7d81b 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -9,6 +9,7 @@ import { WarningPriority, type Config, type ResumedSessionData, + type WorktreeInfo, type OutputPayload, type ConsoleLogPayload, type UserFeedbackPayload, @@ -31,6 +32,7 @@ import { ValidationRequiredError, type AdminControlsSettings, debugLogger, + isHeadlessMode, } from '@google/gemini-cli-core'; import { loadCliConfig, parseArguments } from './config/config.js'; @@ -63,6 +65,7 @@ import { registerTelemetryConfig, setupSignalHandlers, } from './utils/cleanup.js'; +import { setupWorktree } from './utils/worktreeSetup.js'; import { cleanupToolOutputFiles, cleanupExpiredSessions, @@ -210,6 +213,37 @@ export async function main() { const settings = loadSettings(); loadSettingsHandle?.end(); + // If a worktree is requested and enabled, set it up early. + // This must be awaited before any other async tasks that depend on CWD (like loadCliConfig) + // because setupWorktree calls process.chdir(). + const requestedWorktree = cliConfig.getRequestedWorktreeName(settings); + let worktreeInfo: WorktreeInfo | undefined; + if (requestedWorktree !== undefined) { + const worktreeHandle = startupProfiler.start('setup_worktree'); + worktreeInfo = await setupWorktree(requestedWorktree || undefined); + worktreeHandle?.end(); + } + + const cleanupOpsHandle = startupProfiler.start('cleanup_ops'); + Promise.all([ + cleanupCheckpoints(), + cleanupToolOutputFiles(settings.merged), + cleanupBackgroundLogs(), + ]) + .catch((e) => { + debugLogger.error('Early cleanup failed:', e); + }) + .finally(() => { + cleanupOpsHandle?.end(); + }); + + const parseArgsHandle = startupProfiler.start('parse_arguments'); + const argvPromise = parseArguments(settings.merged).finally(() => { + parseArgsHandle?.end(); + }); + + const rawStartupWarningsPromise = getStartupWarnings(); + // Report settings errors once during startup settings.errors.forEach((error) => { coreEvents.emitFeedback('warning', error.message); @@ -223,15 +257,7 @@ export async function main() { ); }); - await Promise.all([ - cleanupCheckpoints(), - cleanupToolOutputFiles(settings.merged), - cleanupBackgroundLogs(), - ]); - - const parseArgsHandle = startupProfiler.start('parse_arguments'); - const argv = await parseArguments(settings.merged); - parseArgsHandle?.end(); + const argv = await argvPromise; if ( (argv.allowedTools && argv.allowedTools.length > 0) || @@ -271,6 +297,7 @@ export async function main() { const isDebugMode = cliConfig.isDebugMode(argv); const consolePatcher = new ConsolePatcher({ stderr: true, + interactive: isHeadlessMode() ? false : true, debugMode: isDebugMode, onNewMessage: (msg) => { coreEvents.emitConsoleLog(msg.type, msg.content); @@ -309,7 +336,7 @@ export async function main() { // the sandbox because the sandbox will interfere with the Oauth2 web // redirect. let initialAuthFailed = false; - if (!settings.merged.security.auth.useExternal) { + if (!settings.merged.security.auth.useExternal && !argv.isCommand) { try { if ( partialConfig.isInteractive() && @@ -361,7 +388,7 @@ export async function main() { await runDeferredCommand(settings.merged); // hop into sandbox if we are outside and sandboxing is enabled - if (!process.env['SANDBOX']) { + if (!process.env['SANDBOX'] && !argv.isCommand) { const memoryArgs = settings.merged.advanced.autoConfigureMemory ? getNodeMemoryArgs(isDebugMode) : []; @@ -426,6 +453,7 @@ export async function main() { const loadConfigHandle = startupProfiler.start('load_cli_config'); const config = await loadCliConfig(settings.merged, sessionId, argv, { projectHooks: settings.workspace.settings.hooks, + worktreeSettings: worktreeInfo, }); loadConfigHandle?.end(); @@ -457,12 +485,10 @@ export async function main() { await config.getHookSystem()?.fireSessionEndEvent(SessionEndReason.Exit); }); - // Cleanup sessions after config initialization - try { - await cleanupExpiredSessions(config, settings.merged); - } catch (e) { + // Launch cleanup expired sessions as a background task + cleanupExpiredSessions(config, settings.merged).catch((e) => { debugLogger.error('Failed to cleanup expired sessions:', e); - } + }); if (config.getListExtensions()) { debugLogger.log('Installed extensions:'); @@ -514,7 +540,9 @@ export async function main() { }); } + const terminalHandle = startupProfiler.start('setup_terminal'); await setupTerminalAndTheme(config, settings); + terminalHandle?.end(); const initAppHandle = startupProfiler.start('initialize_app'); const initializationResult = await initializeApp(config, settings); @@ -538,7 +566,7 @@ export async function main() { isAlternateBufferEnabled(config), config.getScreenReader(), ); - const rawStartupWarnings = await getStartupWarnings(); + const rawStartupWarnings = await rawStartupWarningsPromise; const startupWarnings: StartupWarning[] = [ ...rawStartupWarnings.map((message) => ({ id: `startup-${createHash('sha256').update(message).digest('hex').substring(0, 16)}`, @@ -585,8 +613,17 @@ export async function main() { } cliStartupHandle?.end(); + // Render UI, passing necessary config values. Check that there is no command line question. if (config.isInteractive()) { + // Earlier initialization phases (like TerminalCapabilityManager resolving + // or authWithWeb) may have added and removed 'data' listeners on process.stdin. + // When the listener count drops to 0, Node.js implicitly pauses the stream buffer. + // React Ink's useInput hooks will silently fail to receive keystrokes if the stream remains paused. + if (process.stdin.isTTY) { + process.stdin.resume(); + } + await startInteractiveUI( config, settings, diff --git a/packages/cli/src/gemini_cleanup.test.tsx b/packages/cli/src/gemini_cleanup.test.tsx index 9be9fc6194..382ad3f81f 100644 --- a/packages/cli/src/gemini_cleanup.test.tsx +++ b/packages/cli/src/gemini_cleanup.test.tsx @@ -72,6 +72,8 @@ vi.mock('./config/config.js', () => ({ } as unknown as Config), parseArguments: vi.fn().mockResolvedValue({}), isDebugMode: vi.fn(() => false), + getRequestedWorktreeName: vi.fn(() => undefined), + getWorktreeArg: vi.fn(() => undefined), })); vi.mock('read-package-up', () => ({ diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 206d011e63..4e45b0f188 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -1137,6 +1137,7 @@ describe('runNonInteractive', () => { expect( processStderrSpy.mock.calls.some( + // eslint-disable-next-line no-restricted-syntax (call) => typeof call[0] === 'string' && call[0].includes('Cancelling'), ), ).toBe(true); diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index 891e3d0ee9..4f9d817204 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -65,6 +65,7 @@ export async function runNonInteractive({ return promptIdContext.run(prompt_id, async () => { const consolePatcher = new ConsolePatcher({ stderr: true, + interactive: false, debugMode: config.getDebugMode(), onNewMessage: (msg) => { coreEvents.emitConsoleLog(msg.type, msg.content); diff --git a/packages/cli/src/services/BuiltinCommandLoader.test.ts b/packages/cli/src/services/BuiltinCommandLoader.test.ts index b5e7856711..f166c161cd 100644 --- a/packages/cli/src/services/BuiltinCommandLoader.test.ts +++ b/packages/cli/src/services/BuiltinCommandLoader.test.ts @@ -266,6 +266,7 @@ describe('BuiltinCommandLoader', () => { it('should include policies command when message bus integration is enabled', async () => { const mockConfigWithMessageBus = { + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...mockConfig, getEnableHooks: () => false, getMcpEnabled: () => true, diff --git a/packages/cli/src/services/SlashCommandResolver.test.ts b/packages/cli/src/services/SlashCommandResolver.test.ts index 43d1c310a8..40e3b6f1d5 100644 --- a/packages/cli/src/services/SlashCommandResolver.test.ts +++ b/packages/cli/src/services/SlashCommandResolver.test.ts @@ -43,7 +43,7 @@ describe('SlashCommandResolver', () => { ]); expect(finalCommands.map((c) => c.name)).toContain('deploy'); - expect(finalCommands.map((c) => c.name)).toContain('firebase.deploy'); + expect(finalCommands.map((c) => c.name)).toContain('firebase:deploy'); expect(conflicts).toHaveLength(1); }); @@ -159,7 +159,7 @@ describe('SlashCommandResolver', () => { it('should apply numeric suffixes when renames also conflict', () => { const user1 = createMockCommand('deploy', CommandKind.USER_FILE); - const user2 = createMockCommand('gcp.deploy', CommandKind.USER_FILE); + const user2 = createMockCommand('gcp:deploy', CommandKind.USER_FILE); const extension = { ...createMockCommand('deploy', CommandKind.EXTENSION_FILE), extensionName: 'gcp', @@ -171,7 +171,7 @@ describe('SlashCommandResolver', () => { extension, ]); - expect(finalCommands.find((c) => c.name === 'gcp.deploy1')).toBeDefined(); + expect(finalCommands.find((c) => c.name === 'gcp:deploy1')).toBeDefined(); }); it('should prefix skills with extension name when they conflict with built-in', () => { @@ -185,7 +185,37 @@ describe('SlashCommandResolver', () => { const names = finalCommands.map((c) => c.name); expect(names).toContain('chat'); - expect(names).toContain('google-workspace.chat'); + expect(names).toContain('google-workspace:chat'); + }); + + it('should ALWAYS prefix extension skills even if no conflict exists', () => { + const skill = { + ...createMockCommand('chat', CommandKind.SKILL), + extensionName: 'google-workspace', + }; + + const { finalCommands } = SlashCommandResolver.resolve([skill]); + + const names = finalCommands.map((c) => c.name); + expect(names).toContain('google-workspace:chat'); + expect(names).not.toContain('chat'); + }); + + it('should use numeric suffixes if prefixed skill names collide', () => { + const skill1 = { + ...createMockCommand('chat', CommandKind.SKILL), + extensionName: 'google-workspace', + }; + const skill2 = { + ...createMockCommand('chat', CommandKind.SKILL), + extensionName: 'google-workspace', + }; + + const { finalCommands } = SlashCommandResolver.resolve([skill1, skill2]); + + const names = finalCommands.map((c) => c.name); + expect(names).toContain('google-workspace:chat'); + expect(names).toContain('google-workspace:chat1'); }); it('should NOT prefix skills with "skill" when extension name is missing', () => { diff --git a/packages/cli/src/services/SlashCommandResolver.ts b/packages/cli/src/services/SlashCommandResolver.ts index 4947e6545a..e956d6f566 100644 --- a/packages/cli/src/services/SlashCommandResolver.ts +++ b/packages/cli/src/services/SlashCommandResolver.ts @@ -47,7 +47,17 @@ export class SlashCommandResolver { const originalName = cmd.name; let finalName = originalName; - if (registry.firstEncounters.has(originalName)) { + const shouldAlwaysPrefix = + cmd.kind === CommandKind.SKILL && !!cmd.extensionName; + + if (shouldAlwaysPrefix) { + finalName = this.getRenamedName( + originalName, + this.getPrefix(cmd), + registry.commandMap, + cmd.kind, + ); + } else if (registry.firstEncounters.has(originalName)) { // We've already seen a command with this name, so resolve the conflict. finalName = this.handleConflict(cmd, registry); } else { @@ -93,6 +103,7 @@ export class SlashCommandResolver { incoming.name, this.getPrefix(incoming), registry.commandMap, + incoming.kind, ); this.trackConflict( registry.conflictsMap, @@ -132,6 +143,7 @@ export class SlashCommandResolver { currentOwner.name, this.getPrefix(currentOwner), registry.commandMap, + currentOwner.kind, ); // Update the registry: remove the old name and add the owner under the new name. @@ -156,8 +168,12 @@ export class SlashCommandResolver { name: string, prefix: string | undefined, commandMap: Map, + kind?: CommandKind, ): string { - const base = prefix ? `${prefix}.${name}` : name; + const isExtensionPrefix = + kind === CommandKind.SKILL || kind === CommandKind.EXTENSION_FILE; + const separator = isExtensionPrefix ? ':' : '.'; + const base = prefix ? `${prefix}${separator}${name}` : name; let renamedName = base; let suffix = 1; diff --git a/packages/cli/src/test-utils/AppRig.tsx b/packages/cli/src/test-utils/AppRig.tsx index 5ead5d615a..548372a139 100644 --- a/packages/cli/src/test-utils/AppRig.tsx +++ b/packages/cli/src/test-utils/AppRig.tsx @@ -11,7 +11,11 @@ import os from 'node:os'; import path from 'node:path'; import fs from 'node:fs'; import { AppContainer } from '../ui/AppContainer.js'; -import { renderWithProviders, type RenderInstance } from './render.js'; +import { + renderWithProviders, + type RenderInstance, + persistentStateMock, +} from './render.js'; import { makeFakeConfig, type Config, @@ -162,7 +166,7 @@ export class AppRig { private sessionId: string; private pendingConfirmations = new Map(); - private breakpointTools = new Set(); + private breakpointTools = new Set(); private lastAwaitedConfirmation: PendingConfirmation | undefined; /** @@ -177,9 +181,24 @@ export class AppRig { ); this.sessionId = `test-session-${uniqueId}`; activeRigs.set(this.sessionId, this); + + // Pre-create the persistent state file to bypass the terminal setup prompt + const geminiDir = path.join(this.testDir, '.gemini'); + if (!fs.existsSync(geminiDir)) { + fs.mkdirSync(geminiDir, { recursive: true }); + } + fs.writeFileSync( + path.join(geminiDir, 'state.json'), + JSON.stringify({ terminalSetupPromptShown: true }), + ); } async initialize() { + persistentStateMock.setData({ + terminalSetupPromptShown: true, + tipsShown: 10, + }); + this.setupEnvironment(); resetSettingsCacheForTesting(); this.settings = this.createRigSettings(); @@ -226,6 +245,8 @@ export class AppRig { private setupEnvironment() { // Stub environment variables to avoid interference from developer's machine vi.stubEnv('GEMINI_CLI_HOME', this.testDir); + vi.stubEnv('TERM_PROGRAM', 'other'); + vi.stubEnv('VSCODE_GIT_IPC_HANDLE', ''); if (this.options.fakeResponsesPath) { vi.stubEnv('GEMINI_API_KEY', 'test-api-key'); MockShellExecutionService.setPassthrough(false); @@ -291,7 +312,6 @@ export class AppRig { const newContentGeneratorConfig = { authType: authMethod, - proxy: gcConfig.getProxy(), apiKey: process.env['GEMINI_API_KEY'] || 'test-api-key', }; @@ -426,11 +446,7 @@ export class AppRig { MockShellExecutionService.setMockCommands(commands); } - setToolPolicy( - toolName: string | undefined, - decision: PolicyDecision, - priority = 10, - ) { + setToolPolicy(toolName: string, decision: PolicyDecision, priority = 10) { if (!this.config) throw new Error('AppRig not initialized'); this.config.getPolicyEngine().addRule({ toolName, @@ -440,27 +456,20 @@ export class AppRig { }); } - setBreakpoint(toolName: string | string[] | undefined) { + setBreakpoint(toolName: string | string[]) { if (Array.isArray(toolName)) { for (const name of toolName) { this.setBreakpoint(name); } } else { - // Use undefined toolName to create a global rule if '*' is provided - const actualToolName = toolName === '*' ? undefined : toolName; - this.setToolPolicy(actualToolName, PolicyDecision.ASK_USER, 100); + this.setToolPolicy(toolName, PolicyDecision.ASK_USER, 100); this.breakpointTools.add(toolName); } } - removeToolPolicy(toolName?: string, source = 'AppRig Override') { + removeToolPolicy(toolName: string, source = 'AppRig Override') { if (!this.config) throw new Error('AppRig not initialized'); - // Map '*' back to undefined for policy removal - const actualToolName = toolName === '*' ? undefined : toolName; - this.config - .getPolicyEngine() - - .removeRulesForTool(actualToolName as string, source); + this.config.getPolicyEngine().removeRulesForTool(toolName, source); this.breakpointTools.delete(toolName); } diff --git a/packages/cli/src/test-utils/customMatchers.ts b/packages/cli/src/test-utils/customMatchers.ts index ae9b44ee44..d34576cf3f 100644 --- a/packages/cli/src/test-utils/customMatchers.ts +++ b/packages/cli/src/test-utils/customMatchers.ts @@ -79,7 +79,7 @@ export async function toMatchSvgSnapshot( } function toHaveOnlyValidCharacters(this: Assertion, buffer: TextBuffer) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-unsafe-assignment + // eslint-disable-next-line @typescript-eslint/no-explicit-any const { isNot } = this as any; let pass = true; const invalidLines: Array<{ line: number; content: string }> = []; @@ -108,7 +108,6 @@ function toHaveOnlyValidCharacters(this: Assertion, buffer: TextBuffer) { }; } -// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion expect.extend({ toHaveOnlyValidCharacters, toMatchSvgSnapshot, diff --git a/packages/cli/src/test-utils/mockCommandContext.ts b/packages/cli/src/test-utils/mockCommandContext.ts index 15e6422e1a..6eda7f3109 100644 --- a/packages/cli/src/test-utils/mockCommandContext.ts +++ b/packages/cli/src/test-utils/mockCommandContext.ts @@ -37,14 +37,12 @@ export const createMockCommandContext = ( }, services: { agentContext: null, - settings: { merged: defaultMergedSettings, setValue: vi.fn(), forScope: vi.fn().mockReturnValue({ settings: {} }), } as unknown as LoadedSettings, git: undefined as GitService | undefined, - logger: { log: vi.fn(), logMessage: vi.fn(), @@ -53,7 +51,6 @@ export const createMockCommandContext = ( // eslint-disable-next-line @typescript-eslint/no-explicit-any } as any, // Cast because Logger is a class. }, - ui: { addItem: vi.fn(), clear: vi.fn(), @@ -72,7 +69,6 @@ export const createMockCommandContext = ( } as any, session: { sessionShellAllowlist: new Set(), - stats: { sessionStartTime: new Date(), lastPromptTokenCount: 0, @@ -98,7 +94,6 @@ export const createMockCommandContext = ( for (const key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { const sourceValue = source[key]; - const targetValue = output[key]; if ( @@ -109,7 +104,6 @@ export const createMockCommandContext = ( output[key] = merge(targetValue, sourceValue); } else { // If not, we do a direct assignment. This preserves Date objects and others. - output[key] = sourceValue; } } diff --git a/packages/cli/src/test-utils/mockConfig.ts b/packages/cli/src/test-utils/mockConfig.ts index d4f11212e3..e1505df970 100644 --- a/packages/cli/src/test-utils/mockConfig.ts +++ b/packages/cli/src/test-utils/mockConfig.ts @@ -44,6 +44,7 @@ export const createMockConfig = (overrides: Partial = {}): Config => getDeleteSession: vi.fn(() => undefined), setSessionId: vi.fn(), getSessionId: vi.fn().mockReturnValue('mock-session-id'), + getWorktreeSettings: vi.fn(() => undefined), getContentGeneratorConfig: vi.fn(() => ({ authType: 'google' })), getAcpMode: vi.fn(() => false), isBrowserLaunchSuppressed: vi.fn(() => false), diff --git a/packages/cli/src/test-utils/render.test.tsx b/packages/cli/src/test-utils/render.test.tsx index 7172a99119..3c3f4102a4 100644 --- a/packages/cli/src/test-utils/render.test.tsx +++ b/packages/cli/src/test-utils/render.test.tsx @@ -12,24 +12,18 @@ import { waitFor } from './async.js'; describe('render', () => { it('should render a component', async () => { - const { lastFrame, waitUntilReady, unmount } = render( - Hello World, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await render(Hello World); expect(lastFrame()).toBe('Hello World\n'); unmount(); }); it('should support rerender', async () => { - const { lastFrame, rerender, waitUntilReady, unmount } = render( + const { lastFrame, rerender, waitUntilReady, unmount } = await render( Hello, ); - await waitUntilReady(); expect(lastFrame()).toBe('Hello\n'); - await act(async () => { - rerender(World); - }); + await act(async () => rerender(World)); await waitUntilReady(); expect(lastFrame()).toBe('World\n'); unmount(); @@ -42,10 +36,8 @@ describe('render', () => { return Hello; } - const { unmount, waitUntilReady } = render(); - await waitUntilReady(); + const { unmount } = await render(); unmount(); - expect(cleanupMock).toHaveBeenCalled(); }); }); @@ -54,36 +46,27 @@ describe('renderHook', () => { it('should rerender with previous props when called without arguments', async () => { const useTestHook = ({ value }: { value: number }) => { const [count, setCount] = useState(0); - useEffect(() => { - setCount((c) => c + 1); - }, [value]); + useEffect(() => setCount((c) => c + 1), [value]); return { count, value }; }; - const { result, rerender, waitUntilReady, unmount } = renderHook( + const { result, rerender, waitUntilReady, unmount } = await renderHook( useTestHook, - { - initialProps: { value: 1 }, - }, + { initialProps: { value: 1 } }, ); - await waitUntilReady(); expect(result.current.value).toBe(1); await waitFor(() => expect(result.current.count).toBe(1)); // Rerender with new props - await act(async () => { - rerender({ value: 2 }); - }); + await act(async () => rerender({ value: 2 })); await waitUntilReady(); expect(result.current.value).toBe(2); await waitFor(() => expect(result.current.count).toBe(2)); // Rerender without arguments should use previous props (value: 2) // This would previously crash or pass undefined if not fixed - await act(async () => { - rerender(); - }); + await act(async () => rerender()); await waitUntilReady(); expect(result.current.value).toBe(2); // Count should not increase because value didn't change @@ -98,14 +81,11 @@ describe('renderHook', () => { }; const { result, rerender, waitUntilReady, unmount } = - renderHook(useTestHook); - await waitUntilReady(); + await renderHook(useTestHook); expect(result.current.count).toBe(0); - await act(async () => { - rerender(); - }); + await act(async () => rerender()); await waitUntilReady(); expect(result.current.count).toBe(0); unmount(); @@ -113,19 +93,14 @@ describe('renderHook', () => { it('should update props if undefined is passed explicitly', async () => { const useTestHook = (val: string | undefined) => val; - const { result, rerender, waitUntilReady, unmount } = renderHook( + const { result, rerender, waitUntilReady, unmount } = await renderHook( useTestHook, - { - initialProps: 'initial' as string | undefined, - }, + { initialProps: 'initial' }, ); - await waitUntilReady(); expect(result.current).toBe('initial'); - await act(async () => { - rerender(undefined); - }); + await act(async () => rerender(undefined)); await waitUntilReady(); expect(result.current).toBeUndefined(); unmount(); diff --git a/packages/cli/src/test-utils/render.tsx b/packages/cli/src/test-utils/render.tsx index 7d298b120d..c4aec2e9cd 100644 --- a/packages/cli/src/test-utils/render.tsx +++ b/packages/cli/src/test-utils/render.tsx @@ -257,13 +257,9 @@ class XtermStdout extends EventEmitter { return currentFrame !== ''; } - // If both are empty, it's a match. - // We consider undefined lastRenderOutput as effectively empty for this check - // to support hook testing where Ink may skip rendering completely. - if ( - (this.lastRenderOutput === undefined || expectedFrame === '') && - currentFrame === '' - ) { + // If Ink expects nothing (no new static content and no dynamic output), + // we consider it a match because the terminal buffer will just hold the historical static content. + if (expectedFrame === '') { return true; } @@ -271,8 +267,8 @@ class XtermStdout extends EventEmitter { return false; } - // If Ink expects nothing but terminal has content, or vice-versa, it's NOT a match. - if (expectedFrame === '' || currentFrame === '') { + // If the terminal is empty but Ink expects something, it's not a match. + if (currentFrame === '') { return false; } @@ -380,15 +376,21 @@ export type RenderInstance = { capturedOverflowActions: OverflowActions | undefined; }; +export type RenderWithProvidersInstance = RenderInstance & { + simulateClick: ( + col: number, + row: number, + button?: 0 | 1 | 2, + ) => Promise; +}; + const instances: InkInstance[] = []; -// Wrapper around ink's render that ensures act() is called and uses Xterm for output -export const render = ( +export const render = async ( tree: React.ReactElement, terminalWidth?: number, -): Omit< - RenderInstance, - 'capturedOverflowState' | 'capturedOverflowActions' +): Promise< + Omit > => { const cols = terminalWidth ?? 100; // We use 1000 rows to avoid windows with incorrect snapshots if a correct @@ -437,6 +439,8 @@ export const render = ( instances.push(instance); + await stdout.waitUntilReady(); + return { rerender: (newTree: React.ReactElement) => { act(() => { @@ -520,6 +524,8 @@ const baseMockUiState = { nightly: false, updateInfo: null, pendingHistoryItems: [], + mainControlsRef: () => {}, + rootUiRef: { current: null }, }; export const mockAppState: AppState = { @@ -622,15 +628,7 @@ export const renderWithProviders = async ( }; appState?: AppState; } = {}, -): Promise< - RenderInstance & { - simulateClick: ( - col: number, - row: number, - button?: 0 | 1 | 2, - ) => Promise; - } -> => { +): Promise => { const baseState: UIState = new Proxy( { ...baseMockUiState, ...providedUiState }, { @@ -669,7 +667,7 @@ export const renderWithProviders = async ( ); } - const mainAreaWidth = terminalWidth; + const mainAreaWidth = providedUiState?.mainAreaWidth ?? terminalWidth; const finalUiState = { ...baseState, @@ -751,7 +749,10 @@ export const renderWithProviders = async ( ); - const renderResult = render(wrapWithProviders(component), terminalWidth); + const renderResult = await render( + wrapWithProviders(component), + terminalWidth, + ); return { ...renderResult, @@ -765,21 +766,20 @@ export const renderWithProviders = async ( }; }; -export function renderHook( +export async function renderHook( renderCallback: (props: Props) => Result, options?: { initialProps?: Props; wrapper?: React.ComponentType<{ children: React.ReactNode }>; }, -): { +): Promise<{ result: { current: Result }; rerender: (props?: Props) => void; unmount: () => void; waitUntilReady: () => Promise; generateSvg: () => string; -} { +}> { const result = { current: undefined as unknown as Result }; - let currentProps = options?.initialProps as Props; function TestComponent({ @@ -800,17 +800,15 @@ export function renderHook( let waitUntilReady: () => Promise = async () => {}; let generateSvg: () => string = () => ''; - act(() => { - const renderResult = render( - - - , - ); - inkRerender = renderResult.rerender; - unmount = renderResult.unmount; - waitUntilReady = renderResult.waitUntilReady; - generateSvg = renderResult.generateSvg; - }); + const renderResult = await render( + + + , + ); + inkRerender = renderResult.rerender; + unmount = renderResult.unmount; + waitUntilReady = renderResult.waitUntilReady; + generateSvg = renderResult.generateSvg; function rerender(props?: Props) { if (arguments.length > 0) { @@ -864,7 +862,7 @@ export async function renderHookWithProviders( const Wrapper = options.wrapper || (({ children }) => <>{children}); - let renderResult: ReturnType; + let renderResult: RenderWithProvidersInstance; await act(async () => { renderResult = await renderWithProviders( diff --git a/packages/cli/src/test-utils/settings.ts b/packages/cli/src/test-utils/settings.ts index ab2420849d..20d0613f83 100644 --- a/packages/cli/src/test-utils/settings.ts +++ b/packages/cli/src/test-utils/settings.ts @@ -46,7 +46,6 @@ export const createMockSettings = ( workspace, isTrusted, errors, - merged: mergedOverride, ...settingsOverrides } = overrides; @@ -61,7 +60,6 @@ export const createMockSettings = ( settings: settingsOverrides, originalSettings: settingsOverrides, }, - (workspace as any) || { path: '', settings: {}, originalSettings: {} }, isTrusted ?? true, errors || [], diff --git a/packages/cli/src/ui/App.test.tsx b/packages/cli/src/ui/App.test.tsx index 7f5e55c022..b836202eb7 100644 --- a/packages/cli/src/ui/App.test.tsx +++ b/packages/cli/src/ui/App.test.tsx @@ -70,9 +70,7 @@ describe('App', () => { cleanUiDetailsVisible: true, quittingMessages: null, dialogsVisible: false, - mainControlsRef: { - current: null, - } as unknown as React.MutableRefObject, + mainControlsRef: vi.fn(), rootUiRef: { current: null, } as unknown as React.MutableRefObject, @@ -94,14 +92,10 @@ describe('App', () => { }; it('should render main content and composer when not quitting', async () => { - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: mockUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: false } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: mockUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: false } }), + }); expect(lastFrame()).toContain('Tips for getting started'); expect(lastFrame()).toContain('Notifications'); @@ -115,14 +109,10 @@ describe('App', () => { quittingMessages: [{ id: 1, type: 'user', text: 'test' }], } as UIState; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: quittingUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: false } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: quittingUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: false } }), + }); expect(lastFrame()).toContain('Quitting...'); unmount(); @@ -136,14 +126,10 @@ describe('App', () => { pendingHistoryItems: [{ type: 'user', text: 'pending item' }], } as UIState; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: quittingUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: quittingUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toContain('HistoryItemDisplay'); expect(lastFrame()).toContain('Quitting...'); @@ -156,14 +142,10 @@ describe('App', () => { dialogsVisible: true, } as UIState; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: dialogUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: dialogUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toContain('Tips for getting started'); expect(lastFrame()).toContain('Notifications'); @@ -183,14 +165,10 @@ describe('App', () => { [stateKey]: true, } as UIState; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toContain(`Press Ctrl+${key} again to exit.`); unmount(); @@ -200,14 +178,10 @@ describe('App', () => { it('should render ScreenReaderAppLayout when screen reader is enabled', async () => { (useIsScreenReaderEnabled as Mock).mockReturnValue(true); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: mockUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: mockUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toContain('Notifications'); expect(lastFrame()).toContain('Footer'); @@ -219,14 +193,10 @@ describe('App', () => { it('should render DefaultAppLayout when screen reader is not enabled', async () => { (useIsScreenReaderEnabled as Mock).mockReturnValue(false); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: mockUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: mockUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toContain('Tips for getting started'); expect(lastFrame()).toContain('Notifications'); @@ -274,15 +244,11 @@ describe('App', () => { vi.spyOn(configWithExperiment, 'isTrustedFolder').mockReturnValue(true); vi.spyOn(configWithExperiment, 'getIdeMode').mockReturnValue(false); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: stateWithConfirmingTool, - config: configWithExperiment, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: stateWithConfirmingTool, + config: configWithExperiment, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toContain('Tips for getting started'); expect(lastFrame()).toContain('Notifications'); @@ -295,28 +261,20 @@ describe('App', () => { describe('Snapshots', () => { it('renders default layout correctly', async () => { (useIsScreenReaderEnabled as Mock).mockReturnValue(false); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: mockUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: mockUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toMatchSnapshot(); unmount(); }); it('renders screen reader layout correctly', async () => { (useIsScreenReaderEnabled as Mock).mockReturnValue(true); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: mockUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: mockUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toMatchSnapshot(); unmount(); }); @@ -326,14 +284,10 @@ describe('App', () => { ...mockUIState, dialogsVisible: true, } as UIState; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { - uiState: dialogUIState, - settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + uiState: dialogUIState, + settings: createMockSettings({ ui: { useAlternateBuffer: true } }), + }); expect(lastFrame()).toMatchSnapshot(); unmount(); }); diff --git a/packages/cli/src/ui/AppContainer.test.tsx b/packages/cli/src/ui/AppContainer.test.tsx index 650804025b..3324505778 100644 --- a/packages/cli/src/ui/AppContainer.test.tsx +++ b/packages/cli/src/ui/AppContainer.test.tsx @@ -16,7 +16,7 @@ import { } from 'vitest'; import { render, cleanup, persistentStateMock } from '../test-utils/render.js'; import { waitFor } from '../test-utils/async.js'; -import { act, useContext, type ReactElement } from 'react'; +import { act, useContext } from 'react'; import { AppContainer } from './AppContainer.js'; import { SettingsContext } from './contexts/SettingsContext.js'; import { type TrackedToolCall } from './hooks/useToolScheduler.js'; @@ -250,6 +250,15 @@ describe('AppContainer State Management', () => { let mockInitResult: InitializationResult; let mockExtensionManager: MockedObject; + type AppContainerProps = { + settings?: LoadedSettings; + config?: Config; + version?: string; + initResult?: InitializationResult; + startupWarnings?: StartupWarning[]; + resumedSessionData?: ResumedSessionData; + }; + // Helper to generate the AppContainer JSX for render and rerender const getAppContainer = ({ settings = mockSettings, @@ -258,14 +267,7 @@ describe('AppContainer State Management', () => { initResult = mockInitResult, startupWarnings, resumedSessionData, - }: { - settings?: LoadedSettings; - config?: Config; - version?: string; - initResult?: InitializationResult; - startupWarnings?: StartupWarning[]; - resumedSessionData?: ResumedSessionData; - } = {}) => ( + }: AppContainerProps = {}) => ( @@ -282,7 +284,7 @@ describe('AppContainer State Management', () => { ); // Helper to render the AppContainer - const renderAppContainer = (props?: Parameters[0]) => + const renderAppContainer = async (props?: AppContainerProps) => render(getAppContainer(props)); // Create typed mocks for all hooks @@ -487,8 +489,8 @@ describe('AppContainer State Management', () => { // Mock LoadedSettings mockSettings = createMockSettings({ hideBanner: false, - hideFooter: false, hideTips: false, + hideFooter: false, showMemoryUsage: false, theme: 'default', ui: { @@ -514,13 +516,9 @@ describe('AppContainer State Management', () => { describe('Basic Rendering', () => { it('renders without crashing with minimal props', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => renderAppContainer()); + expect(capturedUIState).toBeTruthy(); + unmount(); }); it('renders with startup warnings', async () => { @@ -537,44 +535,32 @@ describe('AppContainer State Management', () => { }, ]; - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ startupWarnings }); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => + renderAppContainer({ startupWarnings }), + ); + expect(capturedUIState).toBeTruthy(); + unmount(); }); it('shows full UI details by default', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => { - expect(capturedUIState.cleanUiDetailsVisible).toBe(true); - }); - unmount!(); + expect(capturedUIState.cleanUiDetailsVisible).toBe(true); + unmount(); }); it('starts in minimal UI mode when Focus UI preference is persisted', async () => { persistentStateMock.get.mockReturnValueOnce(true); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ + const { unmount } = await act(async () => + renderAppContainer({ settings: mockSettings, - }); - unmount = result.unmount; - }); + }), + ); - await waitFor(() => { - expect(capturedUIState.cleanUiDetailsVisible).toBe(false); - }); + expect(capturedUIState.cleanUiDetailsVisible).toBe(false); expect(persistentStateMock.get).toHaveBeenCalledWith('focusUiEnabled'); - unmount!(); + unmount(); }); }); @@ -609,15 +595,9 @@ describe('AppContainer State Management', () => { ], }); - let unmount: (() => void) | undefined; - await act(async () => { - const rendered = renderAppContainer(); - unmount = rendered.unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => - expect(terminalNotificationsMocks.notifyViaTerminal).toHaveBeenCalled(), - ); + expect(terminalNotificationsMocks.notifyViaTerminal).toHaveBeenCalled(); expect( terminalNotificationsMocks.buildRunEventNotificationContent, ).toHaveBeenCalledWith( @@ -626,9 +606,7 @@ describe('AppContainer State Management', () => { }), ); - await act(async () => { - unmount?.(); - }); + unmount(); }); it('does not send attention notification when terminal is focused', async () => { @@ -661,19 +639,13 @@ describe('AppContainer State Management', () => { ], }); - let unmount: (() => void) | undefined; - await act(async () => { - const rendered = renderAppContainer(); - unmount = rendered.unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); expect( terminalNotificationsMocks.notifyViaTerminal, ).not.toHaveBeenCalled(); - await act(async () => { - unmount?.(); - }); + unmount(); }); it('sends attention notification when focus reporting is unavailable', async () => { @@ -706,19 +678,11 @@ describe('AppContainer State Management', () => { ], }); - let unmount: (() => void) | undefined; - await act(async () => { - const rendered = renderAppContainer(); - unmount = rendered.unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => - expect(terminalNotificationsMocks.notifyViaTerminal).toHaveBeenCalled(), - ); + expect(terminalNotificationsMocks.notifyViaTerminal).toHaveBeenCalled(); - await act(async () => { - unmount?.(); - }); + unmount(); }); it('sends a macOS notification when a response completes while unfocused', async () => { @@ -732,35 +696,24 @@ describe('AppContainer State Management', () => { streamingState: currentStreamingState, })); - let unmount: (() => void) | undefined; - let rerender: ((tree: ReactElement) => void) | undefined; - - await act(async () => { - const rendered = renderAppContainer(); - unmount = rendered.unmount; - rerender = rendered.rerender; - }); + const { unmount, rerender } = await act(async () => renderAppContainer()); currentStreamingState = 'idle'; await act(async () => { - rerender?.(getAppContainer()); + rerender(getAppContainer()); }); - await waitFor(() => - expect( - terminalNotificationsMocks.buildRunEventNotificationContent, - ).toHaveBeenCalledWith( - expect.objectContaining({ - type: 'session_complete', - detail: 'Gemini CLI finished responding.', - }), - ), + expect( + terminalNotificationsMocks.buildRunEventNotificationContent, + ).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'session_complete', + detail: 'Gemini CLI finished responding.', + }), ); expect(terminalNotificationsMocks.notifyViaTerminal).toHaveBeenCalled(); - await act(async () => { - unmount?.(); - }); + unmount(); }); it('sends completion notification when focus reporting is unavailable', async () => { @@ -774,34 +727,23 @@ describe('AppContainer State Management', () => { streamingState: currentStreamingState, })); - let unmount: (() => void) | undefined; - let rerender: ((tree: ReactElement) => void) | undefined; - - await act(async () => { - const rendered = renderAppContainer(); - unmount = rendered.unmount; - rerender = rendered.rerender; - }); + const { unmount, rerender } = await act(async () => renderAppContainer()); currentStreamingState = 'idle'; await act(async () => { - rerender?.(getAppContainer()); + rerender(getAppContainer()); }); - await waitFor(() => - expect( - terminalNotificationsMocks.buildRunEventNotificationContent, - ).toHaveBeenCalledWith( - expect.objectContaining({ - type: 'session_complete', - detail: 'Gemini CLI finished responding.', - }), - ), + expect( + terminalNotificationsMocks.buildRunEventNotificationContent, + ).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'session_complete', + detail: 'Gemini CLI finished responding.', + }), ); - await act(async () => { - unmount?.(); - }); + unmount(); }); it('does not send completion notification when another action-required dialog is pending', async () => { @@ -819,27 +761,18 @@ describe('AppContainer State Management', () => { streamingState: currentStreamingState, })); - let unmount: (() => void) | undefined; - let rerender: ((tree: ReactElement) => void) | undefined; - - await act(async () => { - const rendered = renderAppContainer(); - unmount = rendered.unmount; - rerender = rendered.rerender; - }); + const { unmount, rerender } = await act(async () => renderAppContainer()); currentStreamingState = 'idle'; await act(async () => { - rerender?.(getAppContainer()); + rerender(getAppContainer()); }); expect( terminalNotificationsMocks.notifyViaTerminal, ).not.toHaveBeenCalled(); - await act(async () => { - unmount?.(); - }); + unmount(); }); it('can send repeated attention notifications for the same key after pending state clears', async () => { @@ -875,24 +808,15 @@ describe('AppContainer State Management', () => { pendingHistoryItems, })); - let unmount: (() => void) | undefined; - let rerender: ((tree: ReactElement) => void) | undefined; + const { unmount, rerender } = await act(async () => renderAppContainer()); - await act(async () => { - const rendered = renderAppContainer(); - unmount = rendered.unmount; - rerender = rendered.rerender; - }); - - await waitFor(() => - expect( - terminalNotificationsMocks.notifyViaTerminal, - ).toHaveBeenCalledTimes(1), - ); + expect( + terminalNotificationsMocks.notifyViaTerminal, + ).toHaveBeenCalledTimes(1); pendingHistoryItems = []; await act(async () => { - rerender?.(getAppContainer()); + rerender(getAppContainer()); }); pendingHistoryItems = [ @@ -917,18 +841,14 @@ describe('AppContainer State Management', () => { }, ]; await act(async () => { - rerender?.(getAppContainer()); + rerender(getAppContainer()); }); - await waitFor(() => - expect( - terminalNotificationsMocks.notifyViaTerminal, - ).toHaveBeenCalledTimes(2), - ); + expect( + terminalNotificationsMocks.notifyViaTerminal, + ).toHaveBeenCalledTimes(2); - await act(async () => { - unmount?.(); - }); + unmount(); }); it('initializes with theme error from initialization result', async () => { @@ -937,68 +857,53 @@ describe('AppContainer State Management', () => { themeError: 'Failed to load theme', }; - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ + const { unmount } = await act(async () => + renderAppContainer({ initResult: initResultWithError, - }); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + }), + ); + expect(capturedUIState).toBeTruthy(); + unmount(); }); - it('handles debug mode state', () => { + it('handles debug mode state', async () => { const debugConfig = makeFakeConfig(); vi.spyOn(debugConfig, 'getDebugMode').mockReturnValue(true); - expect(() => { - renderAppContainer({ config: debugConfig }); - }).not.toThrow(); + const { unmount } = await act(async () => + renderAppContainer({ config: debugConfig }), + ); + unmount(); }); }); describe('Context Providers', () => { it('provides AppContext with correct values', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ version: '2.0.0' }); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => + renderAppContainer({ version: '2.0.0' }), + ); + expect(capturedUIState).toBeTruthy(); // Should render and unmount cleanly - expect(() => unmount!()).not.toThrow(); + unmount(); }); it('provides UIStateContext with state management', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => renderAppContainer()); + expect(capturedUIState).toBeTruthy(); + unmount(); }); it('provides UIActionsContext with action handlers', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => renderAppContainer()); + expect(capturedUIState).toBeTruthy(); + unmount(); }); it('provides ConfigContext with config object', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => renderAppContainer()); + expect(capturedUIState).toBeTruthy(); + unmount(); }); }); @@ -1006,18 +911,16 @@ describe('AppContainer State Management', () => { it('handles settings with all display options disabled', async () => { const settingsAllHidden = createMockSettings({ hideBanner: true, - hideFooter: true, hideTips: true, + hideFooter: true, showMemoryUsage: false, }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ settings: settingsAllHidden }); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => + renderAppContainer({ settings: settingsAllHidden }), + ); + expect(capturedUIState).toBeTruthy(); + unmount(); }); it('handles settings with memory usage enabled', async () => { @@ -1025,13 +928,11 @@ describe('AppContainer State Management', () => { showMemoryUsage: true, }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ settings: settingsWithMemory }); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => + renderAppContainer({ settings: settingsWithMemory }), + ); + expect(capturedUIState).toBeTruthy(); + unmount(); }); }); @@ -1039,13 +940,11 @@ describe('AppContainer State Management', () => { it.each(['1.0.0', '2.1.3-beta', '3.0.0-nightly'])( 'handles version format: %s', async (version) => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ version }); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => + renderAppContainer({ version }), + ); + expect(capturedUIState).toBeTruthy(); + unmount(); }, ); }); @@ -1058,30 +957,30 @@ describe('AppContainer State Management', () => { }); // Should still render without crashing - errors should be handled internally - const { unmount } = renderAppContainer({ config: errorConfig }); + const { unmount } = await act(async () => + renderAppContainer({ config: errorConfig }), + ); unmount(); }); it('handles undefined settings gracefully', async () => { const undefinedSettings = createMockSettings(); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ settings: undefinedSettings }); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - unmount!(); + const { unmount } = await act(async () => + renderAppContainer({ settings: undefinedSettings }), + ); + expect(capturedUIState).toBeTruthy(); + unmount(); }); }); describe('Provider Hierarchy', () => { - it('establishes correct provider nesting order', () => { + it('establishes correct provider nesting order', async () => { // This tests that all the context providers are properly nested // and that the component tree can be built without circular dependencies - const { unmount } = renderAppContainer(); + const { unmount } = await act(async () => renderAppContainer()); - expect(() => unmount()).not.toThrow(); + unmount(); }); }); @@ -1113,40 +1012,32 @@ describe('AppContainer State Management', () => { filePath: '/tmp/test-session.json', }; - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ + const { unmount } = await act(async () => + renderAppContainer({ config: mockConfig, settings: mockSettings, version: '1.0.0', initResult: mockInitResult, resumedSessionData: mockResumedSessionData, - }); - unmount = result.unmount; - }); - await act(async () => { - unmount(); - }); + }), + ); + unmount(); }); it('renders without resumed session data', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ + const { unmount } = await act(async () => + renderAppContainer({ config: mockConfig, settings: mockSettings, version: '1.0.0', initResult: mockInitResult, resumedSessionData: undefined, - }); - unmount = result.unmount; - }); - await act(async () => { - unmount(); - }); + }), + ); + unmount(); }); - it('initializes chat recording service when config has it', () => { + it('initializes chat recording service when config has it', async () => { const mockChatRecordingService = { initialize: vi.fn(), recordMessage: vi.fn(), @@ -1166,18 +1057,19 @@ describe('AppContainer State Management', () => { mockGeminiClient as unknown as ReturnType, ); - expect(() => { + const { unmount } = await act(async () => renderAppContainer({ config: configWithRecording, settings: mockSettings, version: '1.0.0', initResult: mockInitResult, - }); - }).not.toThrow(); + }), + ); + unmount(); }); }); describe('Session Recording Integration', () => { - it('provides chat recording service configuration', () => { + it('provides chat recording service configuration', async () => { const mockChatRecordingService = { initialize: vi.fn(), recordMessage: vi.fn(), @@ -1203,23 +1095,24 @@ describe('AppContainer State Management', () => { 'test-session-123', ); - expect(() => { + const { unmount } = await act(async () => renderAppContainer({ config: configWithRecording, settings: mockSettings, version: '1.0.0', initResult: mockInitResult, - }); - }).not.toThrow(); + }), + ); // Verify the recording service structure is correct expect(configWithRecording.getGeminiClient).toBeDefined(); expect(mockGeminiClient.getChatRecordingService).toBeDefined(); expect(mockChatRecordingService.initialize).toBeDefined(); expect(mockChatRecordingService.recordMessage).toBeDefined(); + unmount(); }); - it('handles session recording when messages are added', () => { + it('handles session recording when messages are added', async () => { const mockRecordMessage = vi.fn(); const mockRecordMessageTokens = vi.fn(); @@ -1242,22 +1135,25 @@ describe('AppContainer State Management', () => { mockGeminiClient as unknown as ReturnType, ); - renderAppContainer({ - config: configWithRecording, - settings: mockSettings, - version: '1.0.0', - initResult: mockInitResult, - }); + const { unmount } = await act(async () => + renderAppContainer({ + config: configWithRecording, + settings: mockSettings, + version: '1.0.0', + initResult: mockInitResult, + }), + ); // The actual recording happens through the useHistory hook // which would be triggered by user interactions expect(mockChatRecordingService.initialize).toBeDefined(); expect(mockChatRecordingService.recordMessage).toBeDefined(); + unmount(); }); }); describe('Session Resume Flow', () => { - it('accepts resumed session data', () => { + it('accepts resumed session data', async () => { const mockResumeChat = vi.fn(); const mockGeminiClient = { isInitialized: vi.fn(() => true), @@ -1303,22 +1199,23 @@ describe('AppContainer State Management', () => { filePath: '/tmp/resumed-session.json', }; - expect(() => { + const { unmount } = await act(async () => renderAppContainer({ config: configWithClient, settings: mockSettings, version: '1.0.0', initResult: mockInitResult, resumedSessionData: resumedData, - }); - }).not.toThrow(); + }), + ); // Verify the resume functionality structure is in place expect(mockGeminiClient.resumeChat).toBeDefined(); expect(resumedData.conversation.messages).toHaveLength(2); + unmount(); }); - it('does not attempt resume when client is not initialized', () => { + it('does not attempt resume when client is not initialized', async () => { const mockResumeChat = vi.fn(); const mockGeminiClient = { isInitialized: vi.fn(() => false), // Not initialized @@ -1343,21 +1240,24 @@ describe('AppContainer State Management', () => { filePath: '/tmp/session.json', }; - renderAppContainer({ - config: configWithClient, - settings: mockSettings, - version: '1.0.0', - initResult: mockInitResult, - resumedSessionData: resumedData, - }); + const { unmount } = await act(async () => + renderAppContainer({ + config: configWithClient, + settings: mockSettings, + version: '1.0.0', + initResult: mockInitResult, + resumedSessionData: resumedData, + }), + ); // Should not call resumeChat when client is not initialized expect(mockResumeChat).not.toHaveBeenCalled(); + unmount(); }); }); describe('Token Counting from Session Stats', () => { - it('tracks token counts from session messages', () => { + it('tracks token counts from session messages', async () => { // Session stats are provided through the SessionStatsProvider context // in the real app, not through the config directly const mockChatRecordingService = { @@ -1385,33 +1285,30 @@ describe('AppContainer State Management', () => { mockGeminiClient as unknown as ReturnType, ); - renderAppContainer({ - config: configWithRecording, - settings: mockSettings, - version: '1.0.0', - initResult: mockInitResult, - }); + const { unmount } = await act(async () => + renderAppContainer({ + config: configWithRecording, + settings: mockSettings, + version: '1.0.0', + initResult: mockInitResult, + }), + ); // In the actual app, these stats would be displayed in components // and updated as messages are processed through the recording service expect(mockChatRecordingService.recordMessageTokens).toBeDefined(); expect(mockChatRecordingService.getCurrentConversation).toBeDefined(); + unmount(); }); }); describe('Quota and Fallback Integration', () => { it('passes a null proQuotaRequest to UIStateContext by default', async () => { // The default mock from beforeEach already sets proQuotaRequest to null - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => { - // Assert that the context value is as expected - expect(capturedUIState.quota.proQuotaRequest).toBeNull(); - }); - unmount!(); + const { unmount } = await act(async () => renderAppContainer()); + // Assert that the context value is as expected + expect(capturedUIState.quota.proQuotaRequest).toBeNull(); + unmount(); }); it('passes a valid proQuotaRequest to UIStateContext when provided by the hook', async () => { @@ -1427,16 +1324,10 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => { - // Assert: The mock request is correctly passed through the context - expect(capturedUIState.quota.proQuotaRequest).toEqual(mockRequest); - }); - unmount!(); + const { unmount } = await act(async () => renderAppContainer()); + // Assert: The mock request is correctly passed through the context + expect(capturedUIState.quota.proQuotaRequest).toEqual(mockRequest); + unmount(); }); it('passes the handleProQuotaChoice function to UIActionsContext', async () => { @@ -1448,22 +1339,16 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => { - // Assert: The action in the context is the mock handler we provided - expect(capturedUIActions.handleProQuotaChoice).toBe(mockHandler); - }); + const { unmount } = await act(async () => renderAppContainer()); + // Assert: The action in the context is the mock handler we provided + expect(capturedUIActions.handleProQuotaChoice).toBe(mockHandler); // You can even verify that the plumbed function is callable act(() => { capturedUIActions.handleProQuotaChoice('retry_later'); }); expect(mockHandler).toHaveBeenCalledWith('retry_later'); - unmount!(); + unmount(); }); }); @@ -1479,7 +1364,7 @@ describe('AppContainer State Management', () => { expect(stdout).toBe(mocks.mockStdout); }); - it('should update terminal title with Working… when showStatusInTitle is false', () => { + it('should update terminal title with Working… when showStatusInTitle is false', async () => { // Arrange: Set up mock settings with showStatusInTitle disabled const mockSettingsWithShowStatusFalse = createMockSettings({ ui: { @@ -1496,9 +1381,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer({ - settings: mockSettingsWithShowStatusFalse, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithShowStatusFalse, + }), + ); // Assert: Check that title was updated with "Working…" const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1512,7 +1399,7 @@ describe('AppContainer State Management', () => { unmount(); }); - it('should use legacy terminal title when dynamicWindowTitle is false', () => { + it('should use legacy terminal title when dynamicWindowTitle is false', async () => { // Arrange: Set up mock settings with dynamicWindowTitle disabled const mockSettingsWithDynamicTitleFalse = createMockSettings({ ui: { @@ -1529,9 +1416,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer({ - settings: mockSettingsWithDynamicTitleFalse, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithDynamicTitleFalse, + }), + ); // Assert: Check that legacy title was used const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1545,7 +1434,7 @@ describe('AppContainer State Management', () => { unmount(); }); - it('should not update terminal title when hideWindowTitle is true', () => { + it('should not update terminal title when hideWindowTitle is true', async () => { // Arrange: Set up mock settings with hideWindowTitle enabled const mockSettingsWithHideTitleTrue = createMockSettings({ ui: { @@ -1555,9 +1444,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer({ - settings: mockSettingsWithHideTitleTrue, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithHideTitleTrue, + }), + ); // Assert: Check that no title-related writes occurred const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1568,7 +1459,7 @@ describe('AppContainer State Management', () => { unmount(); }); - it('should update terminal title with thought subject when in active state', () => { + it('should update terminal title with thought subject when in active state', async () => { // Arrange: Set up mock settings with showStatusInTitle enabled const mockSettingsWithTitleEnabled = createMockSettings({ ui: { @@ -1586,9 +1477,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer({ - settings: mockSettingsWithTitleEnabled, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleEnabled, + }), + ); // Assert: Check that title was updated with thought subject and suffix const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1602,7 +1495,7 @@ describe('AppContainer State Management', () => { unmount(); }); - it('should update terminal title with default text when in Idle state and no thought subject', () => { + it('should update terminal title with default text when in Idle state and no thought subject', async () => { // Arrange: Set up mock settings with showStatusInTitle enabled const mockSettingsWithTitleEnabled = createMockSettings({ ui: { @@ -1615,9 +1508,11 @@ describe('AppContainer State Management', () => { mockedUseGeminiStream.mockReturnValue(DEFAULT_GEMINI_STREAM_MOCK); // Act: Render the container - const { unmount } = renderAppContainer({ - settings: mockSettingsWithTitleEnabled, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleEnabled, + }), + ); // Assert: Check that title was updated with default Idle text const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1649,13 +1544,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ + const { unmount } = await act(async () => + renderAppContainer({ settings: mockSettingsWithTitleEnabled, - }); - unmount = result.unmount; - }); + }), + ); // Assert: Check that title was updated with confirmation text const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1666,7 +1559,7 @@ describe('AppContainer State Management', () => { expect(titleWrites[0][0]).toBe( `\x1b]0;${'✋ Action Required (workspace)'.padEnd(80, ' ')}\x07`, ); - unmount!(); + unmount(); }); describe('Shell Focus Action Required', () => { @@ -1712,9 +1605,11 @@ describe('AppContainer State Management', () => { vi.spyOn(mockConfig, 'isInteractiveShellEnabled').mockReturnValue(true); // Act: Render the container (embeddedShellFocused is false by default in state) - const { unmount } = renderAppContainer({ - settings: mockSettingsWithTitleEnabled, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleEnabled, + }), + ); // Initially it should show the working status const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1773,9 +1668,11 @@ describe('AppContainer State Management', () => { vi.spyOn(mockConfig, 'isInteractive').mockReturnValue(true); vi.spyOn(mockConfig, 'isInteractiveShellEnabled').mockReturnValue(true); - const { unmount } = renderAppContainer({ - settings: mockSettingsWithTitleEnabled, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleEnabled, + }), + ); // Fast-forward time by 65 seconds - should still NOT be Action Required await act(async () => { @@ -1830,9 +1727,11 @@ describe('AppContainer State Management', () => { vi.spyOn(mockConfig, 'isInteractive').mockReturnValue(true); vi.spyOn(mockConfig, 'isInteractiveShellEnabled').mockReturnValue(true); - const { unmount } = renderAppContainer({ - settings: mockSettingsWithTitleEnabled, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleEnabled, + }), + ); // Fast-forward time by 65 seconds await act(async () => { @@ -1875,9 +1774,11 @@ describe('AppContainer State Management', () => { vi.spyOn(mockConfig, 'isInteractiveShellEnabled').mockReturnValue(true); // Act: Render the container - const { unmount, rerender } = renderAppContainer({ - settings: mockSettingsWithTitleEnabled, - }); + const { unmount, rerender } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleEnabled, + }), + ); // Fast-forward time by 20 seconds await act(async () => { @@ -1931,7 +1832,7 @@ describe('AppContainer State Management', () => { }); }); - it('should pad title to exactly 80 characters', () => { + it('should pad title to exactly 80 characters', async () => { // Arrange: Set up mock settings with showStatusInTitle enabled const mockSettingsWithTitleEnabled = createMockSettings({ ui: { @@ -1949,9 +1850,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer({ - settings: mockSettingsWithTitleEnabled, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleEnabled, + }), + ); // Assert: Check that title is padded to exactly 80 characters const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1966,7 +1869,7 @@ describe('AppContainer State Management', () => { unmount(); }); - it('should use correct ANSI escape code format', () => { + it('should use correct ANSI escape code format', async () => { // Arrange: Set up mock settings with showStatusInTitle enabled const mockSettingsWithTitleEnabled = createMockSettings({ ui: { @@ -1984,9 +1887,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer({ - settings: mockSettingsWithTitleEnabled, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleEnabled, + }), + ); // Assert: Check that the correct ANSI escape sequence is used const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -1999,7 +1904,7 @@ describe('AppContainer State Management', () => { unmount(); }); - it('should use CLI_TITLE environment variable when set', () => { + it('should use CLI_TITLE environment variable when set', async () => { // Arrange: Set up mock settings with showStatusInTitle disabled (so it shows suffix) const mockSettingsWithTitleDisabled = createMockSettings({ ui: { @@ -2018,9 +1923,11 @@ describe('AppContainer State Management', () => { }); // Act: Render the container - const { unmount } = renderAppContainer({ - settings: mockSettingsWithTitleDisabled, - }); + const { unmount } = await act(async () => + renderAppContainer({ + settings: mockSettingsWithTitleDisabled, + }), + ); // Assert: Check that title was updated with CLI_TITLE value const titleWrites = mocks.mockStdout.write.mock.calls.filter((call) => @@ -2046,7 +1953,7 @@ describe('AppContainer State Management', () => { }); it('should set and clear the queue error message after a timeout', async () => { - const { rerender, unmount } = renderAppContainer(); + const { rerender, unmount } = await act(async () => renderAppContainer()); await act(async () => { vi.advanceTimersByTime(0); }); @@ -2068,7 +1975,7 @@ describe('AppContainer State Management', () => { }); it('should reset the timer if a new error message is set', async () => { - const { rerender, unmount } = renderAppContainer(); + const { rerender, unmount } = await act(async () => renderAppContainer()); await act(async () => { vi.advanceTimersByTime(0); }); @@ -2110,11 +2017,11 @@ describe('AppContainer State Management', () => { let mockCancelOngoingRequest: Mock; let rerender: () => void; let unmount: () => void; - let stdin: ReturnType['stdin']; + let stdin: Awaited>['stdin']; // Helper function to reduce boilerplate in tests const setupKeypressTest = async () => { - const renderResult = renderAppContainer(); + const renderResult = await act(async () => renderAppContainer()); stdin = renderResult.stdin; await act(async () => { vi.advanceTimersByTime(0); @@ -2250,13 +2157,8 @@ describe('AppContainer State Management', () => { expect(mockHandleSlashCommand).not.toHaveBeenCalled(); pressKey('\x04'); // Ctrl+D - // Now count is 2, it should quit. - expect(mockHandleSlashCommand).toHaveBeenCalledWith( - '/quit', - undefined, - undefined, - false, - ); + // It should still not quit because buffer is non-empty. + expect(mockHandleSlashCommand).not.toHaveBeenCalled(); unmount(); }); @@ -2328,7 +2230,7 @@ describe('AppContainer State Management', () => { activePtyId: 1, }); - const renderResult = render(getAppContainer()); + const renderResult = await act(async () => render(getAppContainer())); await act(async () => { vi.advanceTimersByTime(0); }); @@ -2446,7 +2348,7 @@ describe('AppContainer State Management', () => { let unmount: () => void; const setupShortcutsVisibilityTest = async () => { - const renderResult = renderAppContainer(); + const renderResult = await act(async () => renderAppContainer()); await act(async () => { vi.advanceTimersByTime(0); }); @@ -2522,9 +2424,7 @@ describe('AppContainer State Management', () => { await act(async () => { rerender(); }); - await waitFor(() => { - expect(capturedUIState.shortcutsHelpVisible).toBe(false); - }); + expect(capturedUIState.shortcutsHelpVisible).toBe(false); unmount(); }); @@ -2553,9 +2453,7 @@ describe('AppContainer State Management', () => { await act(async () => { rerender(); }); - await waitFor(() => { - expect(capturedUIState.shortcutsHelpVisible).toBe(false); - }); + expect(capturedUIState.shortcutsHelpVisible).toBe(false); unmount(); }); @@ -2564,7 +2462,7 @@ describe('AppContainer State Management', () => { describe('Copy Mode (CTRL+S)', () => { let rerender: () => void; let unmount: () => void; - let stdin: ReturnType['stdin']; + let stdin: Awaited>['stdin']; const setupCopyModeTest = async ( isAlternateMode = false, @@ -2602,7 +2500,7 @@ describe('AppContainer State Management', () => { ); - const renderResult = render(getTree(testSettings)); + const renderResult = await act(async () => render(getTree(testSettings))); stdin = renderResult.stdin; await act(async () => { vi.advanceTimersByTime(0); @@ -2792,15 +2690,10 @@ describe('AppContainer State Management', () => { closeModelDialog: vi.fn(), }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => renderAppContainer()); expect(capturedUIState.isModelDialogOpen).toBe(true); - unmount!(); + unmount(); }); it('should provide model dialog actions in the UIActionsContext', async () => { @@ -2812,45 +2705,29 @@ describe('AppContainer State Management', () => { closeModelDialog: mockCloseModelDialog, }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => renderAppContainer()); // Verify that the actions are correctly passed through context act(() => { capturedUIActions.closeModelDialog(); }); expect(mockCloseModelDialog).toHaveBeenCalled(); - unmount!(); + unmount(); }); }); describe('Agent Configuration Dialog Integration', () => { it('should initialize with dialog closed and no agent selected', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); - + const { unmount } = await act(async () => renderAppContainer()); expect(capturedUIState.isAgentConfigDialogOpen).toBe(false); expect(capturedUIState.selectedAgentName).toBeUndefined(); expect(capturedUIState.selectedAgentDisplayName).toBeUndefined(); expect(capturedUIState.selectedAgentDefinition).toBeUndefined(); - unmount!(); + unmount(); }); it('should update state when openAgentConfigDialog is called', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => renderAppContainer()); const agentDefinition = { name: 'test-agent' }; act(() => { @@ -2865,16 +2742,11 @@ describe('AppContainer State Management', () => { expect(capturedUIState.selectedAgentName).toBe('test-agent'); expect(capturedUIState.selectedAgentDisplayName).toBe('Test Agent'); expect(capturedUIState.selectedAgentDefinition).toEqual(agentDefinition); - unmount!(); + unmount(); }); it('should clear state when closeAgentConfigDialog is called', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => renderAppContainer()); const agentDefinition = { name: 'test-agent' }; act(() => { @@ -2895,31 +2767,26 @@ describe('AppContainer State Management', () => { expect(capturedUIState.selectedAgentName).toBeUndefined(); expect(capturedUIState.selectedAgentDisplayName).toBeUndefined(); expect(capturedUIState.selectedAgentDefinition).toBeUndefined(); - unmount!(); + unmount(); }); }); describe('CoreEvents Integration', () => { it('subscribes to UserFeedback and drains backlog on mount', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => renderAppContainer()); expect(mockCoreEvents.on).toHaveBeenCalledWith( CoreEvent.UserFeedback, expect.any(Function), ); expect(mockCoreEvents.drainBacklogs).toHaveBeenCalledTimes(1); - unmount!(); + unmount(); }); it('unsubscribes from UserFeedback on unmount', async () => { let unmount: () => void; await act(async () => { - const result = renderAppContainer(); + const result = await renderAppContainer(); unmount = result.unmount; }); await waitFor(() => expect(capturedUIState).toBeTruthy()); @@ -2935,7 +2802,7 @@ describe('AppContainer State Management', () => { it('adds history item when UserFeedback event is received', async () => { let unmount: () => void; await act(async () => { - const result = renderAppContainer(); + const result = await renderAppContainer(); unmount = result.unmount; }); await waitFor(() => expect(capturedUIState).toBeTruthy()); @@ -2971,7 +2838,7 @@ describe('AppContainer State Management', () => { let unmount: () => void; await act(async () => { - const result = renderAppContainer(); + const result = await renderAppContainer(); unmount = result.unmount; }); await waitFor(() => { @@ -3004,7 +2871,7 @@ describe('AppContainer State Management', () => { let unmount: () => void; await act(async () => { - const result = renderAppContainer(); + const result = await renderAppContainer(); unmount = result.unmount; }); await waitFor(() => expect(capturedUIState).toBeTruthy()); @@ -3016,7 +2883,7 @@ describe('AppContainer State Management', () => { it('handles consent request events', async () => { let unmount: () => void; await act(async () => { - const result = renderAppContainer(); + const result = await renderAppContainer(); unmount = result.unmount; }); await waitFor(() => expect(capturedUIState).toBeTruthy()); @@ -3053,7 +2920,7 @@ describe('AppContainer State Management', () => { it('unsubscribes from ConsentRequest on unmount', async () => { let unmount: () => void; await act(async () => { - const result = renderAppContainer(); + const result = await renderAppContainer(); unmount = result.unmount; }); await waitFor(() => expect(capturedUIState).toBeTruthy()); @@ -3076,7 +2943,7 @@ describe('AppContainer State Management', () => { }); let unmount: () => void; await act(async () => { - const result = renderAppContainer(); + const result = await renderAppContainer(); unmount = result.unmount; }); await waitFor(() => { @@ -3104,12 +2971,7 @@ describe('AppContainer State Management', () => { }); it('preserves buffer when cancelling, even if empty (user is in control)', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => renderAppContainer()); const { onCancelSubmit } = extractUseGeminiStreamArgs( mockedUseGeminiStream.mock.lastCall!, @@ -3122,7 +2984,7 @@ describe('AppContainer State Management', () => { // Should NOT modify buffer when cancelling - user is in control expect(mockSetText).not.toHaveBeenCalled(); - unmount!(); + unmount(); }); it('preserves prompt text when cancelling streaming, even if same as last message (regression test for issue #13387)', async () => { @@ -3140,12 +3002,7 @@ describe('AppContainer State Management', () => { initializeFromLogger: vi.fn(), }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => renderAppContainer()); const { onCancelSubmit } = extractUseGeminiStreamArgs( mockedUseGeminiStream.mock.lastCall!, @@ -3159,7 +3016,7 @@ describe('AppContainer State Management', () => { // Should NOT call setText - prompt should be preserved regardless of content expect(mockSetText).not.toHaveBeenCalled(); - unmount!(); + unmount(); }); it('restores the prompt when onCancelSubmit is called with shouldRestorePrompt=true (or undefined)', async () => { @@ -3170,14 +3027,8 @@ describe('AppContainer State Management', () => { initializeFromLogger: vi.fn(), }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => - expect(capturedUIState.userMessages).toContain('previous message'), - ); + const { unmount } = await act(async () => renderAppContainer()); + expect(capturedUIState.userMessages).toContain('previous message'); const { onCancelSubmit } = extractUseGeminiStreamArgs( mockedUseGeminiStream.mock.lastCall!, @@ -3187,11 +3038,9 @@ describe('AppContainer State Management', () => { onCancelSubmit(true); }); - await waitFor(() => { - expect(mockSetText).toHaveBeenCalledWith('previous message'); - }); + expect(mockSetText).toHaveBeenCalledWith('previous message'); - unmount!(); + unmount(); }); it('input history is independent from conversation history (survives /clear)', async () => { @@ -3204,18 +3053,10 @@ describe('AppContainer State Management', () => { initializeFromLogger: vi.fn(), }); - let rerender: (tree: ReactElement) => void; - let unmount; - await act(async () => { - const result = renderAppContainer(); - rerender = result.rerender; - unmount = result.unmount; - }); + const { rerender, unmount } = await act(async () => renderAppContainer()); // Verify userMessages is populated from inputHistory - await waitFor(() => - expect(capturedUIState.userMessages).toContain('first prompt'), - ); + expect(capturedUIState.userMessages).toContain('first prompt'); expect(capturedUIState.userMessages).toContain('second prompt'); // Clear the conversation history (simulating /clear command) @@ -3238,7 +3079,7 @@ describe('AppContainer State Management', () => { expect(capturedUIState.userMessages).toContain('first prompt'); expect(capturedUIState.userMessages).toContain('second prompt'); - unmount!(); + unmount(); }); }); @@ -3253,14 +3094,10 @@ describe('AppContainer State Management', () => { // Clear previous calls mocks.mockStdout.write.mockClear(); - let compUnmount: () => void = () => {}; - await act(async () => { - const { unmount } = renderAppContainer(); - compUnmount = unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); // Allow async effects to run - await waitFor(() => expect(capturedUIState).toBeTruthy()); + expect(capturedUIState).toBeTruthy(); // Wait for fetchBannerTexts to complete await act(async () => { @@ -3273,7 +3110,7 @@ describe('AppContainer State Management', () => { ); expect(clearTerminalCalls).toHaveLength(0); - compUnmount(); + unmount(); }); }); @@ -3284,14 +3121,13 @@ describe('AppContainer State Management', () => { ); vi.mocked(checkPermissions).mockResolvedValue([]); - let unmount: () => void; - await act(async () => { - unmount = renderAppContainer({ + const { unmount } = await act(async () => + renderAppContainer({ settings: createMockSettings({ ui: { useAlternateBuffer: false } }), - }).unmount; - }); + }), + ); - await waitFor(() => expect(capturedUIActions).toBeTruthy()); + expect(capturedUIActions).toBeTruthy(); // Expand first act(() => capturedUIActions.setConstrainHeight(false)); @@ -3309,7 +3145,7 @@ describe('AppContainer State Management', () => { expect(mocks.mockStdout.write).toHaveBeenCalledWith( ansiEscapes.clearTerminal, ); - unmount!(); + unmount(); }); it('resets expansion state on submission when in alternate buffer without clearing terminal', async () => { @@ -3320,14 +3156,13 @@ describe('AppContainer State Management', () => { vi.spyOn(mockConfig, 'getUseAlternateBuffer').mockReturnValue(true); - let unmount: () => void; - await act(async () => { - unmount = renderAppContainer({ + const { unmount } = await act(async () => + renderAppContainer({ settings: createMockSettings({ ui: { useAlternateBuffer: true } }), - }).unmount; - }); + }), + ); - await waitFor(() => expect(capturedUIActions).toBeTruthy()); + expect(capturedUIActions).toBeTruthy(); // Expand first act(() => capturedUIActions.setConstrainHeight(false)); @@ -3345,7 +3180,7 @@ describe('AppContainer State Management', () => { expect(mocks.mockStdout.write).not.toHaveBeenCalledWith( ansiEscapes.clearTerminal, ); - unmount!(); + unmount(); }); }); @@ -3358,13 +3193,9 @@ describe('AppContainer State Management', () => { vi.useRealTimers(); }); - it('sets showIsExpandableHint when overflow occurs in Standard Mode and hides after 10s', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + it('should set showIsExpandableHint when overflow occurs in Standard Mode and hides after 10s', async () => { + const { unmount } = await act(async () => renderAppContainer()); + await waitFor(() => expect(capturedOverflowActions).toBeTruthy()); // Trigger overflow act(() => { @@ -3390,16 +3221,12 @@ describe('AppContainer State Management', () => { expect(capturedUIState.showIsExpandableHint).toBe(false); }); - unmount!(); + unmount(); }); it('resets the hint timer when a new component overflows (overflowingIdsSize increases)', async () => { - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { unmount } = await act(async () => renderAppContainer()); + await waitFor(() => expect(capturedOverflowActions).toBeTruthy()); // 1. Trigger first overflow act(() => { @@ -3447,18 +3274,12 @@ describe('AppContainer State Management', () => { expect(capturedUIState.showIsExpandableHint).toBe(false); }); - unmount!(); + unmount(); }); it('toggles expansion state and resets the hint timer when Ctrl+O is pressed in Standard Mode', async () => { - let unmount: () => void; - let stdin: ReturnType['stdin']; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - stdin = result.stdin; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { stdin, unmount } = await act(async () => renderAppContainer()); + await waitFor(() => expect(capturedOverflowActions).toBeTruthy()); // Initial state is constrainHeight = true expect(capturedUIState.constrainHeight).toBe(true); @@ -3483,10 +3304,8 @@ describe('AppContainer State Management', () => { stdin.write('\x0f'); // \x0f is Ctrl+O }); - await waitFor(() => { - // constrainHeight should toggle - expect(capturedUIState.constrainHeight).toBe(false); - }); + // constrainHeight should toggle + expect(capturedUIState.constrainHeight).toBe(false); // Advance enough that the original timer would have expired if it hadn't reset act(() => { @@ -3505,18 +3324,12 @@ describe('AppContainer State Management', () => { expect(capturedUIState.showIsExpandableHint).toBe(false); }); - unmount!(); + unmount(); }); it('toggles Ctrl+O multiple times and verifies the hint disappears exactly after the last toggle', async () => { - let unmount: () => void; - let stdin: ReturnType['stdin']; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - stdin = result.stdin; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + const { stdin, unmount } = await act(async () => renderAppContainer()); + await waitFor(() => expect(capturedOverflowActions).toBeTruthy()); // Initial state is constrainHeight = true expect(capturedUIState.constrainHeight).toBe(true); @@ -3540,9 +3353,7 @@ describe('AppContainer State Management', () => { act(() => { stdin.write('\x0f'); // Ctrl+O }); - await waitFor(() => { - expect(capturedUIState.constrainHeight).toBe(false); - }); + expect(capturedUIState.constrainHeight).toBe(false); // Wait 1 second act(() => { @@ -3554,9 +3365,7 @@ describe('AppContainer State Management', () => { act(() => { stdin.write('\x0f'); // Ctrl+O }); - await waitFor(() => { - expect(capturedUIState.constrainHeight).toBe(true); - }); + expect(capturedUIState.constrainHeight).toBe(true); // Wait 1 second act(() => { @@ -3568,9 +3377,7 @@ describe('AppContainer State Management', () => { act(() => { stdin.write('\x0f'); // Ctrl+O }); - await waitFor(() => { - expect(capturedUIState.constrainHeight).toBe(false); - }); + expect(capturedUIState.constrainHeight).toBe(false); // Now we wait just before the timeout from the LAST toggle. // It should still be true. @@ -3588,7 +3395,7 @@ describe('AppContainer State Management', () => { expect(capturedUIState.showIsExpandableHint).toBe(false); }); - unmount!(); + unmount(); }); it('DOES set showIsExpandableHint when overflow occurs in Alternate Buffer Mode', async () => { @@ -3598,14 +3405,12 @@ describe('AppContainer State Management', () => { vi.spyOn(mockConfig, 'getUseAlternateBuffer').mockReturnValue(true); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer({ + const { unmount } = await act(async () => + renderAppContainer({ settings: settingsWithAlternateBuffer, - }); - unmount = result.unmount; - }); - await waitFor(() => expect(capturedUIState).toBeTruthy()); + }), + ); + await waitFor(() => expect(capturedOverflowActions).toBeTruthy()); // Trigger overflow act(() => { @@ -3617,7 +3422,7 @@ describe('AppContainer State Management', () => { expect(capturedUIState.showIsExpandableHint).toBe(true); }); - unmount!(); + unmount(); }); }); @@ -3628,10 +3433,9 @@ describe('AppContainer State Management', () => { ); vi.mocked(checkPermissions).mockResolvedValue(['/test/file.txt']); - let unmount: () => void; - await act(async () => (unmount = renderAppContainer().unmount)); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => expect(capturedUIActions).toBeTruthy()); + expect(capturedUIActions).toBeTruthy(); await act(async () => capturedUIActions.handleFinalSubmit('read @file.txt'), @@ -3641,7 +3445,7 @@ describe('AppContainer State Management', () => { expect(capturedUIState.permissionConfirmationRequest?.files).toEqual([ '/test/file.txt', ]); - await act(async () => unmount!()); + unmount(); }); it.each([true, false])( @@ -3657,10 +3461,9 @@ describe('AppContainer State Management', () => { ); const { submitQuery } = mockedUseGeminiStream(); - let unmount: () => void; - await act(async () => (unmount = renderAppContainer().unmount)); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => expect(capturedUIActions).toBeTruthy()); + expect(capturedUIActions).toBeTruthy(); await act(async () => capturedUIActions.handleFinalSubmit('read @file.txt'), @@ -3679,7 +3482,7 @@ describe('AppContainer State Management', () => { } expect(submitQuery).toHaveBeenCalledWith('read @file.txt'); expect(capturedUIState.permissionConfirmationRequest).toBeNull(); - await act(async () => unmount!()); + unmount(); }, ); }); @@ -3692,17 +3495,11 @@ describe('AppContainer State Management', () => { pendingHistoryItems: [], }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => { - expect(capturedUIState).toBeTruthy(); - expect(capturedUIState.allowPlanMode).toBe(true); - }); - unmount!(); + expect(capturedUIState).toBeTruthy(); + expect(capturedUIState.allowPlanMode).toBe(true); + unmount(); }); it('should NOT allow plan mode when disabled in config', async () => { @@ -3712,17 +3509,11 @@ describe('AppContainer State Management', () => { pendingHistoryItems: [], }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => { - expect(capturedUIState).toBeTruthy(); - expect(capturedUIState.allowPlanMode).toBe(false); - }); - unmount!(); + expect(capturedUIState).toBeTruthy(); + expect(capturedUIState.allowPlanMode).toBe(false); + unmount(); }); it('should NOT allow plan mode when streaming', async () => { @@ -3733,17 +3524,11 @@ describe('AppContainer State Management', () => { pendingHistoryItems: [], }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => { - expect(capturedUIState).toBeTruthy(); - expect(capturedUIState.allowPlanMode).toBe(false); - }); - unmount!(); + expect(capturedUIState).toBeTruthy(); + expect(capturedUIState.allowPlanMode).toBe(false); + unmount(); }); it('should NOT allow plan mode when a tool is awaiting confirmation', async () => { @@ -3764,17 +3549,11 @@ describe('AppContainer State Management', () => { ], }); - let unmount: () => void; - await act(async () => { - const result = renderAppContainer(); - unmount = result.unmount; - }); + const { unmount } = await act(async () => renderAppContainer()); - await waitFor(() => { - expect(capturedUIState).toBeTruthy(); - expect(capturedUIState.allowPlanMode).toBe(false); - }); - unmount!(); + expect(capturedUIState).toBeTruthy(); + expect(capturedUIState.allowPlanMode).toBe(false); + unmount(); }); }); }); diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 07edb72642..d5b34915bc 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -14,7 +14,7 @@ import { } from 'react'; import { type DOMElement, - measureElement, + ResizeObserver, useApp, useStdout, useStdin, @@ -30,8 +30,6 @@ import { import { ConfigContext } from './contexts/ConfigContext.js'; import { type HistoryItem, - type HistoryItemWithoutId, - type HistoryItemToolGroup, AuthState, type ConfirmationRequest, type PermissionConfirmationRequest, @@ -81,7 +79,6 @@ import { type AgentsDiscoveredPayload, ChangeAuthRequestedError, ProjectIdRequiredError, - CoreToolCallStatus, buildUserSteeringHintPrompt, logBillingEvent, ApiKeyUpdatedEvent, @@ -170,29 +167,11 @@ import { useIsHelpDismissKey } from './utils/shortcutsHelp.js'; import { useSuspend } from './hooks/useSuspend.js'; import { useRunEventNotifications } from './hooks/useRunEventNotifications.js'; import { isNotificationsEnabled } from '../utils/terminalNotifications.js'; - -function isToolExecuting(pendingHistoryItems: HistoryItemWithoutId[]) { - return pendingHistoryItems.some((item) => { - if (item && item.type === 'tool_group') { - return item.tools.some( - (tool) => CoreToolCallStatus.Executing === tool.status, - ); - } - return false; - }); -} - -function isToolAwaitingConfirmation( - pendingHistoryItems: HistoryItemWithoutId[], -) { - return pendingHistoryItems - .filter((item): item is HistoryItemToolGroup => item.type === 'tool_group') - .some((item) => - item.tools.some( - (tool) => CoreToolCallStatus.AwaitingApproval === tool.status, - ), - ); -} +import { + isToolExecuting, + isToolAwaitingConfirmation, + getAllToolCalls, +} from './utils/historyUtils.js'; interface AppContainerProps { config: Config; @@ -418,7 +397,6 @@ export const AppContainer = (props: AppContainerProps) => { const branchName = useGitBranchName(config.getTargetDir()); // Layout measurements - const mainControlsRef = useRef(null); // For performance profiling only const rootUiRef = useRef(null); const lastTitleRef = useRef(null); @@ -721,7 +699,10 @@ export const AppContainer = (props: AppContainerProps) => { // Derive auth state variables for backward compatibility with UIStateContext const isAuthDialogOpen = authState === AuthState.Updating; - const isAuthenticating = authState === AuthState.Unauthenticated; + // TODO: Consider handling other auth types that should also skip the blocking screen + const isAuthenticating = + authState === AuthState.Unauthenticated && + settings.merged.security.auth.selectedType !== AuthType.USE_GEMINI; // Session browser and resume functionality const isGeminiClientInitialized = config.getGeminiClient()?.isInitialized(); @@ -745,7 +726,7 @@ export const AppContainer = (props: AppContainerProps) => { // Wrap handleDeleteSession to return a Promise for UIActions interface const handleDeleteSession = useCallback( async (session: SessionInfo): Promise => { - handleDeleteSessionSync(session); + await handleDeleteSessionSync(session); }, [handleDeleteSessionSync], ); @@ -1007,10 +988,18 @@ Logging in with Google... Restarting Gemini CLI to continue. Date.now(), ); try { - const { memoryContent, fileCount } = - await refreshServerHierarchicalMemory(config); + let flattenedMemory: string; + let fileCount: number; - const flattenedMemory = flattenMemory(memoryContent); + if (config.isJitContextEnabled()) { + await config.getContextManager()?.refresh(); + flattenedMemory = flattenMemory(config.getUserMemory()); + fileCount = config.getGeminiMdFileCount(); + } else { + const result = await refreshServerHierarchicalMemory(config); + flattenedMemory = flattenMemory(result.memoryContent); + fileCount = result.fileCount; + } historyManager.addItem( { @@ -1143,6 +1132,16 @@ Logging in with Google... Restarting Gemini CLI to continue. consumePendingHints, ); + const pendingHistoryItems = useMemo( + () => [...pendingSlashCommandHistoryItems, ...pendingGeminiHistoryItems], + [pendingSlashCommandHistoryItems, pendingGeminiHistoryItems], + ); + + const hasPendingToolConfirmation = useMemo( + () => isToolAwaitingConfirmation(pendingHistoryItems), + [pendingHistoryItems], + ); + toggleBackgroundShellRef.current = toggleBackgroundShell; isBackgroundShellVisibleRef.current = isBackgroundShellVisible; backgroundShellsRef.current = backgroundShells; @@ -1214,10 +1213,6 @@ Logging in with Google... Restarting Gemini CLI to continue. cancelHandlerRef.current = useCallback( (shouldRestorePrompt: boolean = true) => { - const pendingHistoryItems = [ - ...pendingSlashCommandHistoryItems, - ...pendingGeminiHistoryItems, - ]; if (isToolAwaitingConfirmation(pendingHistoryItems)) { return; // Don't clear - user may be composing a follow-up message } @@ -1251,8 +1246,7 @@ Logging in with Google... Restarting Gemini CLI to continue. inputHistory, getQueuedMessagesText, clearQueue, - pendingSlashCommandHistoryItems, - pendingGeminiHistoryItems, + pendingHistoryItems, ], ); @@ -1288,10 +1282,7 @@ Logging in with Google... Restarting Gemini CLI to continue. const isIdle = streamingState === StreamingState.Idle; const isAgentRunning = streamingState === StreamingState.Responding || - isToolExecuting([ - ...pendingSlashCommandHistoryItems, - ...pendingGeminiHistoryItems, - ]); + isToolExecuting(pendingHistoryItems); if (isSlash && isAgentRunning) { const { commandToExecute } = parseSlashCommand( @@ -1311,7 +1302,8 @@ Logging in with Google... Restarting Gemini CLI to continue. return; } - if (isSlash || (isIdle && isMcpReady)) { + const isMcpOrConfigReady = isConfigInitialized && isMcpReady; + if ((isSlash && isConfigInitialized) || (isIdle && isMcpOrConfigReady)) { if (!isSlash) { const permissions = await checkPermissions(submittedValue, config); if (permissions.length > 0) { @@ -1334,10 +1326,12 @@ Logging in with Google... Restarting Gemini CLI to continue. void submitQuery(submittedValue); } else { // Check messageQueue.length === 0 to only notify on the first queued item - if (isIdle && !isMcpReady && messageQueue.length === 0) { + if (isIdle && !isMcpOrConfigReady && messageQueue.length === 0) { coreEvents.emitFeedback( 'info', - 'Waiting for MCP servers to initialize... Slash commands are still available and prompts will be queued.', + !isConfigInitialized + ? 'Initializing... Prompts will be queued.' + : 'Waiting for MCP servers to initialize... Slash commands are still available and prompts will be queued.', ); } addMessage(submittedValue); @@ -1353,8 +1347,7 @@ Logging in with Google... Restarting Gemini CLI to continue. isMcpReady, streamingState, messageQueue.length, - pendingSlashCommandHistoryItems, - pendingGeminiHistoryItems, + pendingHistoryItems, config, constrainHeight, setConstrainHeight, @@ -1362,6 +1355,7 @@ Logging in with Google... Restarting Gemini CLI to continue. refreshStatic, reset, handleHintSubmit, + isConfigInitialized, triggerExpandHint, ], ); @@ -1392,31 +1386,55 @@ Logging in with Google... Restarting Gemini CLI to continue. * - Any future streaming states not explicitly allowed */ const isInputActive = - isConfigInitialized && !initError && !isProcessing && !isResuming && - !!slashCommands && (streamingState === StreamingState.Idle || - streamingState === StreamingState.Responding) && - !proQuotaRequest; + streamingState === StreamingState.Responding || + streamingState === StreamingState.WaitingForConfirmation) && + !proQuotaRequest && + !copyModeEnabled; + const observerRef = useRef(null); const [controlsHeight, setControlsHeight] = useState(0); + const [lastNonCopyControlsHeight, setLastNonCopyControlsHeight] = useState(0); useLayoutEffect(() => { - if (mainControlsRef.current) { - const fullFooterMeasurement = measureElement(mainControlsRef.current); - const roundedHeight = Math.round(fullFooterMeasurement.height); - if (roundedHeight > 0 && roundedHeight !== controlsHeight) { - setControlsHeight(roundedHeight); - } + if (!copyModeEnabled && controlsHeight > 0) { + setLastNonCopyControlsHeight(controlsHeight); } - }, [buffer, terminalWidth, terminalHeight, controlsHeight]); + }, [copyModeEnabled, controlsHeight]); - // Compute available terminal height based on controls measurement + const stableControlsHeight = + copyModeEnabled && lastNonCopyControlsHeight > 0 + ? lastNonCopyControlsHeight + : controlsHeight; + + const mainControlsRef = useCallback((node: DOMElement | null) => { + if (observerRef.current) { + observerRef.current.disconnect(); + observerRef.current = null; + } + + if (node) { + const observer = new ResizeObserver((entries) => { + const entry = entries[0]; + if (entry) { + const roundedHeight = Math.round(entry.contentRect.height); + setControlsHeight((prev) => + roundedHeight !== prev ? roundedHeight : prev, + ); + } + }); + observer.observe(node); + observerRef.current = observer; + } + }, []); + + // Compute available terminal height based on stable controls measurement const availableTerminalHeight = Math.max( 0, - terminalHeight - controlsHeight - backgroundShellHeight - 1, + terminalHeight - stableControlsHeight - backgroundShellHeight - 1, ); config.setShellExecutionConfig({ @@ -1665,17 +1683,13 @@ Logging in with Google... Restarting Gemini CLI to continue. [handleSlashCommand, settings], ); - const { elapsedTime, currentLoadingPhrase } = useLoadingIndicator({ - streamingState, - shouldShowFocusHint, - retryStatus, - loadingPhrasesMode: settings.merged.ui.loadingPhrases, - customWittyPhrases: settings.merged.ui.customWittyPhrases, - errorVerbosity: settings.merged.ui.errorVerbosity, - }); - const handleGlobalKeypress = useCallback( (key: Key): boolean => { + // Debug log keystrokes if enabled + if (settings.merged.general.debugKeystrokeLogging) { + debugLogger.log('[DEBUG] Keystroke:', JSON.stringify(key)); + } + if (shortcutsHelpVisible && isHelpDismissKey(key)) { setShortcutsHelpVisible(false); } @@ -1694,6 +1708,10 @@ Logging in with Google... Restarting Gemini CLI to continue. handleCtrlCPress(); return true; } else if (keyMatchers[Command.EXIT](key)) { + // If the input field is non-empty, do not exit. + if (bufferRef.current.text.length > 0) { + return false; + } handleCtrlDPress(); return true; } else if (keyMatchers[Command.SUSPEND_APP](key)) { @@ -1854,6 +1872,7 @@ Logging in with Google... Restarting Gemini CLI to continue. activePtyId, handleSuspend, embeddedShellFocused, + settings.merged.general.debugKeystrokeLogging, refreshStatic, setCopyModeEnabled, tabFocusTimeoutRef, @@ -2014,16 +2033,6 @@ Logging in with Google... Restarting Gemini CLI to continue. authState === AuthState.AwaitingApiKeyInput || !!newAgents; - const pendingHistoryItems = useMemo( - () => [...pendingSlashCommandHistoryItems, ...pendingGeminiHistoryItems], - [pendingSlashCommandHistoryItems, pendingGeminiHistoryItems], - ); - - const hasPendingToolConfirmation = useMemo( - () => isToolAwaitingConfirmation(pendingHistoryItems), - [pendingHistoryItems], - ); - const hasConfirmUpdateExtensionRequests = confirmUpdateExtensionRequests.length > 0; const hasLoopDetectionConfirmationRequest = @@ -2041,6 +2050,48 @@ Logging in with Google... Restarting Gemini CLI to continue. !!emptyWalletRequest || !!customDialog; + const loadingPhrases = settings.merged.ui.loadingPhrases; + const showStatusTips = loadingPhrases === 'tips' || loadingPhrases === 'all'; + const showStatusWit = loadingPhrases === 'witty' || loadingPhrases === 'all'; + + const showLoadingIndicator = + (!embeddedShellFocused || isBackgroundShellVisible) && + streamingState === StreamingState.Responding && + !hasPendingActionRequired; + + let estimatedStatusLength = 0; + if (activeHooks.length > 0 && settings.merged.hooksConfig.notifications) { + const hookLabel = + activeHooks.length > 1 ? 'Executing Hooks' : 'Executing Hook'; + const hookNames = activeHooks + .map( + (h) => + h.name + + (h.index && h.total && h.total > 1 ? ` (${h.index}/${h.total})` : ''), + ) + .join(', '); + estimatedStatusLength = hookLabel.length + hookNames.length + 10; + } else if (showLoadingIndicator) { + const thoughtText = thought?.subject || 'Waiting for model...'; + estimatedStatusLength = thoughtText.length + 25; + } else if (hasPendingActionRequired) { + estimatedStatusLength = 35; + } + + const maxLength = terminalWidth - estimatedStatusLength - 5; + + const { elapsedTime, currentLoadingPhrase, currentTip, currentWittyPhrase } = + useLoadingIndicator({ + streamingState, + shouldShowFocusHint, + retryStatus, + showTips: showStatusTips, + showWit: showStatusWit, + customWittyPhrases: settings.merged.ui.customWittyPhrases, + errorVerbosity: settings.merged.ui.errorVerbosity, + maxLength, + }); + const allowPlanMode = config.isPlanEnabled() && streamingState === StreamingState.Idle && @@ -2113,12 +2164,7 @@ Logging in with Google... Restarting Gemini CLI to continue. ]); const allToolCalls = useMemo( - () => - pendingHistoryItems - .filter( - (item): item is HistoryItemToolGroup => item.type === 'tool_group', - ) - .flatMap((item) => item.tools), + () => getAllToolCalls(pendingHistoryItems), [pendingHistoryItems], ); @@ -2226,6 +2272,8 @@ Logging in with Google... Restarting Gemini CLI to continue. isFocused, elapsedTime, currentLoadingPhrase, + currentTip, + currentWittyPhrase, historyRemountKey, activeHooks, messageQueue, @@ -2245,6 +2293,7 @@ Logging in with Google... Restarting Gemini CLI to continue. contextFileNames, errorCount, availableTerminalHeight, + stableControlsHeight, mainAreaWidth, staticAreaMaxItemHeight, staticExtraHeight, @@ -2283,11 +2332,7 @@ Logging in with Google... Restarting Gemini CLI to continue. newAgents, showIsExpandableHint, hintMode: - config.isModelSteeringEnabled() && - isToolExecuting([ - ...pendingSlashCommandHistoryItems, - ...pendingGeminiHistoryItems, - ]), + config.isModelSteeringEnabled() && isToolExecuting(pendingHistoryItems), hintBuffer: '', }), [ @@ -2353,6 +2398,8 @@ Logging in with Google... Restarting Gemini CLI to continue. isFocused, elapsedTime, currentLoadingPhrase, + currentTip, + currentWittyPhrase, historyRemountKey, activeHooks, messageQueue, @@ -2368,6 +2415,7 @@ Logging in with Google... Restarting Gemini CLI to continue. contextFileNames, errorCount, availableTerminalHeight, + stableControlsHeight, mainAreaWidth, staticAreaMaxItemHeight, staticExtraHeight, diff --git a/packages/cli/src/ui/IdeIntegrationNudge.test.tsx b/packages/cli/src/ui/IdeIntegrationNudge.test.tsx index 5df3534f12..d05a17dad8 100644 --- a/packages/cli/src/ui/IdeIntegrationNudge.test.tsx +++ b/packages/cli/src/ui/IdeIntegrationNudge.test.tsx @@ -42,6 +42,7 @@ describe('IdeIntegrationNudge', () => { beforeEach(() => { vi.mocked(debugLogger.warn).mockImplementation((...args) => { if ( + // eslint-disable-next-line no-restricted-syntax typeof args[0] === 'string' && /was not wrapped in act/.test(args[0]) ) { @@ -53,10 +54,9 @@ describe('IdeIntegrationNudge', () => { }); it('renders correctly with default options', async () => { - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); const frame = lastFrame(); expect(frame).toContain('Do you want to connect VS Code to Gemini CLI?'); @@ -72,8 +72,6 @@ describe('IdeIntegrationNudge', () => { , ); - await waitUntilReady(); - // "Yes" is the first option and selected by default usually. await act(async () => { stdin.write('\r'); @@ -93,8 +91,6 @@ describe('IdeIntegrationNudge', () => { , ); - await waitUntilReady(); - // Navigate down to "No (esc)" await act(async () => { stdin.write('\u001B[B'); // Down arrow @@ -119,8 +115,6 @@ describe('IdeIntegrationNudge', () => { , ); - await waitUntilReady(); - // Navigate down to "No, don't ask again" await act(async () => { stdin.write('\u001B[B'); // Down arrow @@ -150,8 +144,6 @@ describe('IdeIntegrationNudge', () => { , ); - await waitUntilReady(); - // Press Escape await act(async () => { stdin.write('\u001B'); @@ -178,8 +170,6 @@ describe('IdeIntegrationNudge', () => { , ); - await waitUntilReady(); - const frame = lastFrame(); expect(frame).toContain( diff --git a/packages/cli/src/ui/ToolConfirmationFullFrame.test.tsx b/packages/cli/src/ui/ToolConfirmationFullFrame.test.tsx new file mode 100644 index 0000000000..c8456fb237 --- /dev/null +++ b/packages/cli/src/ui/ToolConfirmationFullFrame.test.tsx @@ -0,0 +1,179 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { cleanup, renderWithProviders } from '../test-utils/render.js'; +import { createMockSettings } from '../test-utils/settings.js'; +import { App } from './App.js'; +import { + CoreToolCallStatus, + ApprovalMode, + makeFakeConfig, +} from '@google/gemini-cli-core'; +import { type UIState } from './contexts/UIStateContext.js'; +import type { SerializableConfirmationDetails } from '@google/gemini-cli-core'; +import { act } from 'react'; +import { StreamingState } from './types.js'; + +vi.mock('ink', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + useIsScreenReaderEnabled: vi.fn(() => false), + }; +}); + +vi.mock('./components/GeminiSpinner.js', () => ({ + GeminiSpinner: () => null, +})); + +vi.mock('./components/CliSpinner.js', () => ({ + CliSpinner: () => null, +})); + +// Mock hooks to align with codebase style, even if App uses UIState directly +vi.mock('./hooks/useGeminiStream.js'); +vi.mock('./hooks/useHistoryManager.js'); +vi.mock('./hooks/useQuotaAndFallback.js'); +vi.mock('./hooks/useThemeCommand.js'); +vi.mock('./auth/useAuth.js'); +vi.mock('./hooks/useEditorSettings.js'); +vi.mock('./hooks/useSettingsCommand.js'); +vi.mock('./hooks/useModelCommand.js'); +vi.mock('./hooks/slashCommandProcessor.js'); +vi.mock('./hooks/useConsoleMessages.js'); +vi.mock('./hooks/useTerminalSize.js', () => ({ + useTerminalSize: vi.fn(() => ({ columns: 100, rows: 30 })), +})); + +describe('Full Terminal Tool Confirmation Snapshot', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + afterEach(() => { + cleanup(); + vi.restoreAllMocks(); + }); + + it('renders tool confirmation box in the frame of the entire terminal', async () => { + // Generate a large diff to warrant truncation + let largeDiff = + '--- a/packages/cli/src/ui/components/InputPrompt.tsx\n+++ b/packages/cli/src/ui/components/InputPrompt.tsx\n@@ -1,100 +1,105 @@\n'; + for (let i = 1; i <= 60; i++) { + largeDiff += ` const line${i} = true;\n`; + } + largeDiff += '- return kittyProtocolSupporte...;\n'; + largeDiff += '+ return kittyProtocolSupporte...;\n'; + largeDiff += ' buffer: TextBuffer;\n'; + largeDiff += ' onSubmit: (value: string) => void;'; + + const confirmationDetails: SerializableConfirmationDetails = { + type: 'edit', + title: 'Edit packages/.../InputPrompt.tsx', + fileName: 'InputPrompt.tsx', + filePath: 'packages/.../InputPrompt.tsx', + fileDiff: largeDiff, + originalContent: 'old', + newContent: 'new', + isModifying: false, + }; + + const toolCalls = [ + { + callId: 'call-1-modify-selected', + name: 'Edit', + description: + 'packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProtocolSupporte...', + status: CoreToolCallStatus.AwaitingApproval, + resultDisplay: '', + confirmationDetails, + }, + ]; + + const mockUIState = { + history: [ + { + id: 1, + type: 'user', + text: 'Can you edit InputPrompt.tsx for me?', + }, + ], + mainAreaWidth: 99, + availableTerminalHeight: 36, + streamingState: StreamingState.WaitingForConfirmation, + constrainHeight: true, + isConfigInitialized: true, + cleanUiDetailsVisible: true, + quota: { + userTier: 'PRO', + stats: { + limits: {}, + usage: {}, + }, + proQuotaRequest: null, + validationRequest: null, + }, + pendingHistoryItems: [ + { + id: 2, + type: 'tool_group', + tools: toolCalls, + }, + ], + showApprovalModeIndicator: ApprovalMode.DEFAULT, + sessionStats: { + lastPromptTokenCount: 175400, + contextPercentage: 3, + }, + buffer: { text: '' }, + messageQueue: [], + activeHooks: [], + contextFileNames: [], + rootUiRef: { current: null }, + } as unknown as UIState; + + const mockConfig = makeFakeConfig(); + mockConfig.getUseAlternateBuffer = () => true; + mockConfig.isTrustedFolder = () => true; + mockConfig.getDisableAlwaysAllow = () => false; + mockConfig.getIdeMode = () => false; + mockConfig.getTargetDir = () => '/directory'; + + const { waitUntilReady, lastFrame, generateSvg, unmount } = + await renderWithProviders(, { + uiState: mockUIState, + config: mockConfig, + settings: createMockSettings({ + merged: { + ui: { + useAlternateBuffer: true, + theme: 'default', + showUserIdentity: false, + showShortcutsHint: false, + footer: { + hideContextPercentage: false, + hideTokens: false, + hideModel: false, + }, + }, + security: { + enablePermanentToolApproval: true, + }, + }, + }), + }); + + await waitUntilReady(); + + // Give it a moment to render + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 500)); + }); + + await expect({ lastFrame, generateSvg }).toMatchSvgSnapshot(); + unmount(); + }); +}); diff --git a/packages/cli/src/ui/__snapshots__/App.test.tsx.snap b/packages/cli/src/ui/__snapshots__/App.test.tsx.snap index 9e1d66df01..f145eadfff 100644 --- a/packages/cli/src/ui/__snapshots__/App.test.tsx.snap +++ b/packages/cli/src/ui/__snapshots__/App.test.tsx.snap @@ -2,10 +2,13 @@ exports[`App > Snapshots > renders default layout correctly 1`] = ` " - ▝▜▄ Gemini CLI v1.2.3 - ▝▜▄ - ▗▟▀ - ▝▀ + ▝▜▄ ▗█▀▀▜▙▝█▛▀▀▌▜██▖▟██▘▜█▘▜██▖▝█▛▝█▛ + ▝▜▄ █▌ █▙▟ ▐█▝█▛▐█ ▐█ ▐█▝█▖█▌ █▌ + ▗▟▀ ▜▙ ▝█▛ █▌▝ ▖▐█ ▐█ ▐█ ▐█ ▝██▌ █▌ + ▝▀ ▀▀▀▀▘▝▀▀▀▀▘▀▀▘ ▀▀▘▀▀▘▀▀▘ ▝▀▀▝▀▀ + + Gemini CLI v1.2.3 + Tips for getting started: @@ -29,16 +32,13 @@ Tips for getting started: - - - - Notifications + Composer " `; @@ -47,10 +47,13 @@ exports[`App > Snapshots > renders screen reader layout correctly 1`] = ` "Notifications Footer - ▝▜▄ Gemini CLI v1.2.3 - ▝▜▄ - ▗▟▀ - ▝▀ + ▝▜▄ ▗█▀▀▜▙▝█▛▀▀▌▜██▖▟██▘▜█▘▜██▖▝█▛▝█▛ + ▝▜▄ █▌ █▙▟ ▐█▝█▛▐█ ▐█ ▐█▝█▖█▌ █▌ + ▗▟▀ ▜▙ ▝█▛ █▌▝ ▖▐█ ▐█ ▐█ ▐█ ▝██▌ █▌ + ▝▀ ▀▀▀▀▘▝▀▀▀▀▘▀▀▘ ▀▀▘▀▀▘▀▀▘ ▝▀▀▝▀▀ + + Gemini CLI v1.2.3 + Tips for getting started: @@ -64,13 +67,12 @@ Composer exports[`App > Snapshots > renders with dialogs visible 1`] = ` " - ▝▜▄ Gemini CLI v1.2.3 - ▝▜▄ - ▗▟▀ - ▝▀ - - + ▝▜▄ ▗█▀▀▜▙▝█▛▀▀▌▜██▖▟██▘▜█▘▜██▖▝█▛▝█▛ + ▝▜▄ █▌ █▙▟ ▐█▝█▛▐█ ▐█ ▐█▝█▖█▌ █▌ + ▗▟▀ ▜▙ ▝█▛ █▌▝ ▖▐█ ▐█ ▐█ ▐█ ▝██▌ █▌ + ▝▀ ▀▀▀▀▘▝▀▀▀▀▘▀▀▘ ▀▀▘▀▀▘▀▀▘ ▝▀▀▝▀▀ + Gemini CLI v1.2.3 @@ -101,16 +103,20 @@ exports[`App > Snapshots > renders with dialogs visible 1`] = ` Notifications + DialogManager " `; exports[`App > should render ToolConfirmationQueue along with Composer when tool is confirming and experiment is on 1`] = ` " - ▝▜▄ Gemini CLI v1.2.3 - ▝▜▄ - ▗▟▀ - ▝▀ + ▝▜▄ ▗█▀▀▜▙▝█▛▀▀▌▜██▖▟██▘▜█▘▜██▖▝█▛▝█▛ + ▝▜▄ █▌ █▙▟ ▐█▝█▛▐█ ▐█ ▐█▝█▖█▌ █▌ + ▗▟▀ ▜▙ ▝█▛ █▌▝ ▖▐█ ▐█ ▐█ ▐█ ▝██▌ █▌ + ▝▀ ▀▀▀▀▘▝▀▀▀▀▘▀▀▘ ▀▀▘▀▀▘▀▀▘ ▝▀▀▝▀▀ + + Gemini CLI v1.2.3 + Tips for getting started: @@ -139,11 +145,8 @@ HistoryItemDisplay - - - - Notifications + Composer " `; diff --git a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg new file mode 100644 index 0000000000..97b01f3025 --- /dev/null +++ b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame-Full-Terminal-Tool-Confirmation-Snapshot-renders-tool-confirmation-box-in-the-frame-of-the-entire-terminal.snap.svg @@ -0,0 +1,266 @@ + + + + + + ▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ + + + > + + Can you edit InputPrompt.tsx for me? + + + ▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ + ╭─────────────────────────────────────────────────────────────────────────────────────────────────╮ + + Action Required + + + + + ? + Edit + packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProto + + + + + + ... first 44 lines hidden (Ctrl+O to show) ... + + + 45 + const + line45 + = + true + ; + + + 46 + const + line46 + = + true + ; + + + 47 + const + line47 + = + true + ; + + + + 48 + const + line48 + = + true + ; + + + + 49 + const + line49 + = + true + ; + + + + 50 + const + line50 + = + true + ; + + + + 51 + const + line51 + = + true + ; + + + + 52 + const + line52 + = + true + ; + + + + 53 + const + line53 + = + true + ; + + + + 54 + const + line54 + = + true + ; + + + + 55 + const + line55 + = + true + ; + + + + 56 + const + line56 + = + true + ; + + + + 57 + const + line57 + = + true + ; + + + + 58 + const + line58 + = + true + ; + + + + 59 + const + line59 + = + true + ; + + + + 60 + const + line60 + = + true + ; + + + + + 61 + + + - + + + + return + + kittyProtocolSupporte...; + + + + + 61 + + + + + + + + return + + kittyProtocolSupporte...; + + + + 62 + buffer: TextBuffer; + + + + 63 + onSubmit + : ( + value + : + string + ) => + void + ; + + + + Apply this change? + + + + + + + + + + + 1. + + + Allow once + + + + + 2. + Allow for this session + + + + 3. + Allow for this file in all future sessions + + + + 4. + Modify with external editor + + + + 5. + No, suggest changes (esc) + + + + + + ╰─────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + \ No newline at end of file diff --git a/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap new file mode 100644 index 0000000000..98853434df --- /dev/null +++ b/packages/cli/src/ui/__snapshots__/ToolConfirmationFullFrame.test.tsx.snap @@ -0,0 +1,43 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`Full Terminal Tool Confirmation Snapshot > renders tool confirmation box in the frame of the entire terminal 1`] = ` +"▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ + > Can you edit InputPrompt.tsx for me? +▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄ +╭─────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ Action Required │ +│ │ +│ ? Edit packages/.../InputPrompt.tsx: return kittyProtocolSupporte... => return kittyProto… │ +│ │ +│ ... first 44 lines hidden (Ctrl+O to show) ... │ +│ 45 const line45 = true; │ +│ 46 const line46 = true; │ +│ 47 const line47 = true; │█ +│ 48 const line48 = true; │█ +│ 49 const line49 = true; │█ +│ 50 const line50 = true; │█ +│ 51 const line51 = true; │█ +│ 52 const line52 = true; │█ +│ 53 const line53 = true; │█ +│ 54 const line54 = true; │█ +│ 55 const line55 = true; │█ +│ 56 const line56 = true; │█ +│ 57 const line57 = true; │█ +│ 58 const line58 = true; │█ +│ 59 const line59 = true; │█ +│ 60 const line60 = true; │█ +│ 61 - return kittyProtocolSupporte...; │█ +│ 61 + return kittyProtocolSupporte...; │█ +│ 62 buffer: TextBuffer; │█ +│ 63 onSubmit: (value: string) => void; │█ +│ Apply this change? │█ +│ │█ +│ ● 1. Allow once │█ +│ 2. Allow for this session │█ +│ 3. Allow for this file in all future sessions │█ +│ 4. Modify with external editor │█ +│ 5. No, suggest changes (esc) │█ +│ │█ +╰─────────────────────────────────────────────────────────────────────────────────────────────────╯█ +" +`; diff --git a/packages/cli/src/ui/auth/ApiAuthDialog.test.tsx b/packages/cli/src/ui/auth/ApiAuthDialog.test.tsx index b8de6adb0b..d46e0295a1 100644 --- a/packages/cli/src/ui/auth/ApiAuthDialog.test.tsx +++ b/packages/cli/src/ui/auth/ApiAuthDialog.test.tsx @@ -73,23 +73,21 @@ describe('ApiAuthDialog', () => { }); it('renders correctly', async () => { - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); }); it('renders with a defaultValue', async () => { - const { waitUntilReady, unmount } = render( + const { unmount } = await render( , ); - await waitUntilReady(); expect(mockedUseTextBuffer).toHaveBeenCalledWith( expect.objectContaining({ initialText: 'test-key', @@ -113,10 +111,9 @@ describe('ApiAuthDialog', () => { 'calls $expectedCall.name when $keyName is pressed', async ({ keyName, sequence, expectedCall, args }) => { mockBuffer.text = 'submitted-key'; // Set for the onSubmit case - const { waitUntilReady, unmount } = render( + const { unmount } = await render( , ); - await waitUntilReady(); // calls[0] is the ApiAuthDialog's useKeypress (Ctrl+C handler) // calls[1] is the TextInput's useKeypress (typing handler) const keypressHandler = mockedUseKeypress.mock.calls[1][0]; @@ -136,24 +133,22 @@ describe('ApiAuthDialog', () => { ); it('displays an error message', async () => { - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toContain('Invalid API Key'); unmount(); }); it('calls clearApiKey and clears buffer when Ctrl+C is pressed', async () => { - const { waitUntilReady, unmount } = render( + const { unmount } = await render( , ); - await waitUntilReady(); // Call 0 is ApiAuthDialog (isActive: true) // Call 1 is TextInput (isActive: true, priority: true) const keypressHandler = mockedUseKeypress.mock.calls[0][0]; diff --git a/packages/cli/src/ui/auth/AuthDialog.test.tsx b/packages/cli/src/ui/auth/AuthDialog.test.tsx index 878b2a8ee0..69593df076 100644 --- a/packages/cli/src/ui/auth/AuthDialog.test.tsx +++ b/packages/cli/src/ui/auth/AuthDialog.test.tsx @@ -143,10 +143,9 @@ describe('AuthDialog', () => { for (const [key, value] of Object.entries(env)) { vi.stubEnv(key, value as string); } - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , ); - await waitUntilReady(); const items = mockedRadioButtonSelect.mock.calls[0][0].items; for (const item of shouldContain) { expect(items).toContainEqual(item); @@ -161,10 +160,7 @@ describe('AuthDialog', () => { it('filters auth types when enforcedType is set', async () => { props.settings.merged.security.auth.enforcedType = AuthType.USE_GEMINI; - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const items = mockedRadioButtonSelect.mock.calls[0][0].items; expect(items).toHaveLength(1); expect(items[0].value).toBe(AuthType.USE_GEMINI); @@ -173,10 +169,7 @@ describe('AuthDialog', () => { it('sets initial index to 0 when enforcedType is set', async () => { props.settings.merged.security.auth.enforcedType = AuthType.USE_GEMINI; - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const { initialIndex } = mockedRadioButtonSelect.mock.calls[0][0]; expect(initialIndex).toBe(0); unmount(); @@ -213,10 +206,7 @@ describe('AuthDialog', () => { }, ])('selects initial auth type $desc', async ({ setup, expected }) => { setup(); - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const { items, initialIndex } = mockedRadioButtonSelect.mock.calls[0][0]; expect(items[initialIndex].value).toBe(expected); unmount(); @@ -226,10 +216,7 @@ describe('AuthDialog', () => { describe('handleAuthSelect', () => { it('calls onAuthError if validation fails', async () => { mockedValidateAuthMethod.mockReturnValue('Invalid method'); - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const { onSelect: handleAuthSelect } = mockedRadioButtonSelect.mock.calls[0][0]; handleAuthSelect(AuthType.USE_GEMINI); @@ -245,10 +232,7 @@ describe('AuthDialog', () => { it('sets auth context with requiresRestart: true for LOGIN_WITH_GOOGLE', async () => { mockedValidateAuthMethod.mockReturnValue(null); - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const { onSelect: handleAuthSelect } = mockedRadioButtonSelect.mock.calls[0][0]; await handleAuthSelect(AuthType.LOGIN_WITH_GOOGLE); @@ -261,10 +245,7 @@ describe('AuthDialog', () => { it('sets auth context with empty object for other auth types', async () => { mockedValidateAuthMethod.mockReturnValue(null); - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const { onSelect: handleAuthSelect } = mockedRadioButtonSelect.mock.calls[0][0]; await handleAuthSelect(AuthType.USE_GEMINI); @@ -273,53 +254,12 @@ describe('AuthDialog', () => { unmount(); }); - it('skips API key dialog on initial setup if env var is present', async () => { + it('always shows API key dialog even when env var is present', async () => { mockedValidateAuthMethod.mockReturnValue(null); vi.stubEnv('GEMINI_API_KEY', 'test-key-from-env'); // props.settings.merged.security.auth.selectedType is undefined here, simulating initial setup - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); - const { onSelect: handleAuthSelect } = - mockedRadioButtonSelect.mock.calls[0][0]; - await handleAuthSelect(AuthType.USE_GEMINI); - - expect(props.setAuthState).toHaveBeenCalledWith( - AuthState.Unauthenticated, - ); - unmount(); - }); - - it('skips API key dialog if env var is present but empty', async () => { - mockedValidateAuthMethod.mockReturnValue(null); - vi.stubEnv('GEMINI_API_KEY', ''); // Empty string - // props.settings.merged.security.auth.selectedType is undefined here - - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); - const { onSelect: handleAuthSelect } = - mockedRadioButtonSelect.mock.calls[0][0]; - await handleAuthSelect(AuthType.USE_GEMINI); - - expect(props.setAuthState).toHaveBeenCalledWith( - AuthState.Unauthenticated, - ); - unmount(); - }); - - it('shows API key dialog on initial setup if no env var is present', async () => { - mockedValidateAuthMethod.mockReturnValue(null); - // process.env['GEMINI_API_KEY'] is not set - // props.settings.merged.security.auth.selectedType is undefined here, simulating initial setup - - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const { onSelect: handleAuthSelect } = mockedRadioButtonSelect.mock.calls[0][0]; await handleAuthSelect(AuthType.USE_GEMINI); @@ -330,23 +270,52 @@ describe('AuthDialog', () => { unmount(); }); - it('skips API key dialog on re-auth if env var is present (cannot edit)', async () => { + it('always shows API key dialog even when env var is empty string', async () => { mockedValidateAuthMethod.mockReturnValue(null); - vi.stubEnv('GEMINI_API_KEY', 'test-key-from-env'); - // Simulate that the user has already authenticated once - props.settings.merged.security.auth.selectedType = - AuthType.LOGIN_WITH_GOOGLE; + vi.stubEnv('GEMINI_API_KEY', ''); // Empty string + // props.settings.merged.security.auth.selectedType is undefined here - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const { onSelect: handleAuthSelect } = mockedRadioButtonSelect.mock.calls[0][0]; await handleAuthSelect(AuthType.USE_GEMINI); expect(props.setAuthState).toHaveBeenCalledWith( - AuthState.Unauthenticated, + AuthState.AwaitingApiKeyInput, + ); + unmount(); + }); + + it('shows API key dialog on initial setup if no env var is present', async () => { + mockedValidateAuthMethod.mockReturnValue(null); + // process.env['GEMINI_API_KEY'] is not set + // props.settings.merged.security.auth.selectedType is undefined here, simulating initial setup + + const { unmount } = await renderWithProviders(); + const { onSelect: handleAuthSelect } = + mockedRadioButtonSelect.mock.calls[0][0]; + await handleAuthSelect(AuthType.USE_GEMINI); + + expect(props.setAuthState).toHaveBeenCalledWith( + AuthState.AwaitingApiKeyInput, + ); + unmount(); + }); + + it('always shows API key dialog on re-auth even if env var is present', async () => { + mockedValidateAuthMethod.mockReturnValue(null); + vi.stubEnv('GEMINI_API_KEY', 'test-key-from-env'); + // Simulate switching from a different auth method (e.g., Google Login → API key) + props.settings.merged.security.auth.selectedType = + AuthType.LOGIN_WITH_GOOGLE; + + const { unmount } = await renderWithProviders(); + const { onSelect: handleAuthSelect } = + mockedRadioButtonSelect.mock.calls[0][0]; + await handleAuthSelect(AuthType.USE_GEMINI); + + expect(props.setAuthState).toHaveBeenCalledWith( + AuthState.AwaitingApiKeyInput, ); unmount(); }); @@ -360,10 +329,7 @@ describe('AuthDialog', () => { vi.mocked(props.config.isBrowserLaunchSuppressed).mockReturnValue(true); mockedValidateAuthMethod.mockReturnValue(null); - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const { onSelect: handleAuthSelect } = mockedRadioButtonSelect.mock.calls[0][0]; await act(async () => { @@ -383,10 +349,9 @@ describe('AuthDialog', () => { it('displays authError when provided', async () => { props.authError = 'Something went wrong'; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); expect(lastFrame()).toContain('Something went wrong'); unmount(); }); @@ -429,10 +394,7 @@ describe('AuthDialog', () => { }, ])('$desc', async ({ setup, expectations }) => { setup(); - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); const keypressHandler = mockedUseKeypress.mock.calls[0][0]; keypressHandler({ name: 'escape' }); expectations(props); @@ -442,30 +404,27 @@ describe('AuthDialog', () => { describe('Snapshots', () => { it('renders correctly with default props', async () => { - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); }); it('renders correctly with auth error', async () => { props.authError = 'Something went wrong'; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); }); it('renders correctly with enforced auth type', async () => { props.settings.merged.security.auth.enforcedType = AuthType.USE_GEMINI; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); }); diff --git a/packages/cli/src/ui/auth/AuthDialog.tsx b/packages/cli/src/ui/auth/AuthDialog.tsx index c823f606c6..e73d380bf3 100644 --- a/packages/cli/src/ui/auth/AuthDialog.tsx +++ b/packages/cli/src/ui/auth/AuthDialog.tsx @@ -137,13 +137,11 @@ export function AuthDialog({ } if (authType === AuthType.USE_GEMINI) { - if (process.env['GEMINI_API_KEY'] !== undefined) { - setAuthState(AuthState.Unauthenticated); - return; - } else { - setAuthState(AuthState.AwaitingApiKeyInput); - return; - } + // Always show the API key input dialog so the user can + // explicitly enter or confirm their key, regardless of + // whether GEMINI_API_KEY env var or a stored key exists. + setAuthState(AuthState.AwaitingApiKeyInput); + return; } } setAuthState(AuthState.Unauthenticated); diff --git a/packages/cli/src/ui/auth/AuthInProgress.test.tsx b/packages/cli/src/ui/auth/AuthInProgress.test.tsx index bd6a3cb126..a387fcb6f3 100644 --- a/packages/cli/src/ui/auth/AuthInProgress.test.tsx +++ b/packages/cli/src/ui/auth/AuthInProgress.test.tsx @@ -42,6 +42,7 @@ describe('AuthInProgress', () => { vi.useFakeTimers(); vi.mocked(debugLogger.error).mockImplementation((...args) => { if ( + // eslint-disable-next-line no-restricted-syntax typeof args[0] === 'string' && args[0].includes('was not wrapped in act') ) { @@ -55,20 +56,18 @@ describe('AuthInProgress', () => { }); it('renders initial state with spinner', async () => { - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toContain('[Spinner] Waiting for authentication...'); expect(lastFrame()).toContain('Press Esc or Ctrl+C to cancel'); unmount(); }); it('calls onTimeout when ESC is pressed', async () => { - const { waitUntilReady, unmount } = render( + const { waitUntilReady, unmount } = await render( , ); - await waitUntilReady(); const keypressHandler = vi.mocked(useKeypress).mock.calls[0][0]; await act(async () => { @@ -84,10 +83,9 @@ describe('AuthInProgress', () => { }); it('calls onTimeout when Ctrl+C is pressed', async () => { - const { waitUntilReady, unmount } = render( + const { waitUntilReady, unmount } = await render( , ); - await waitUntilReady(); const keypressHandler = vi.mocked(useKeypress).mock.calls[0][0]; await act(async () => { @@ -100,10 +98,9 @@ describe('AuthInProgress', () => { }); it('calls onTimeout and shows timeout message after 3 minutes', async () => { - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, waitUntilReady, unmount } = await render( , ); - await waitUntilReady(); await act(async () => { vi.advanceTimersByTime(180000); @@ -116,10 +113,7 @@ describe('AuthInProgress', () => { }); it('clears timer on unmount', async () => { - const { waitUntilReady, unmount } = render( - , - ); - await waitUntilReady(); + const { unmount } = await render(); await act(async () => { unmount(); diff --git a/packages/cli/src/ui/auth/BannedAccountDialog.test.tsx b/packages/cli/src/ui/auth/BannedAccountDialog.test.tsx index 0670c81bc9..4b5d44e6d5 100644 --- a/packages/cli/src/ui/auth/BannedAccountDialog.test.tsx +++ b/packages/cli/src/ui/auth/BannedAccountDialog.test.tsx @@ -73,14 +73,13 @@ describe('BannedAccountDialog', () => { }); it('renders the suspension message from accountSuspensionInfo', async () => { - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); const frame = lastFrame(); expect(frame).toContain('Account Suspended'); expect(frame).toContain('violation of Terms of Service'); @@ -89,14 +88,13 @@ describe('BannedAccountDialog', () => { }); it('renders menu options with appeal link text from response', async () => { - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , ); - await waitUntilReady(); const items = mockedRadioButtonSelect.mock.calls[0][0].items; expect(items).toHaveLength(3); expect(items[0].label).toBe('Appeal Here'); @@ -109,14 +107,13 @@ describe('BannedAccountDialog', () => { const infoWithoutUrl: AccountSuspensionInfo = { message: 'Account suspended.', }; - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , ); - await waitUntilReady(); const items = mockedRadioButtonSelect.mock.calls[0][0].items; expect(items).toHaveLength(2); expect(items[0].label).toBe('Change authentication'); @@ -129,28 +126,26 @@ describe('BannedAccountDialog', () => { message: 'Account suspended.', appealUrl: 'https://example.com/appeal', }; - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , ); - await waitUntilReady(); const items = mockedRadioButtonSelect.mock.calls[0][0].items; expect(items[0].label).toBe('Open the Google Form'); unmount(); }); it('opens browser when appeal option is selected', async () => { - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , ); - await waitUntilReady(); const { onSelect } = mockedRadioButtonSelect.mock.calls[0][0]; await onSelect('open_form'); expect(mockedOpenBrowser).toHaveBeenCalledWith( @@ -162,14 +157,13 @@ describe('BannedAccountDialog', () => { it('shows URL when browser cannot be launched', async () => { mockedShouldLaunchBrowser.mockReturnValue(false); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); const { onSelect } = mockedRadioButtonSelect.mock.calls[0][0]; onSelect('open_form'); await waitFor(() => { @@ -180,14 +174,13 @@ describe('BannedAccountDialog', () => { }); it('calls onExit when "Exit" is selected', async () => { - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , ); - await waitUntilReady(); const { onSelect } = mockedRadioButtonSelect.mock.calls[0][0]; await onSelect('exit'); expect(mockedRunExitCleanup).toHaveBeenCalled(); @@ -196,14 +189,13 @@ describe('BannedAccountDialog', () => { }); it('calls onChangeAuth when "Change authentication" is selected', async () => { - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , ); - await waitUntilReady(); const { onSelect } = mockedRadioButtonSelect.mock.calls[0][0]; onSelect('change_auth'); expect(onChangeAuth).toHaveBeenCalled(); @@ -212,14 +204,13 @@ describe('BannedAccountDialog', () => { }); it('exits on escape key', async () => { - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , ); - await waitUntilReady(); const keypressHandler = mockedUseKeypress.mock.calls[0][0]; const result = keypressHandler({ name: 'escape' }); expect(result).toBe(true); @@ -227,14 +218,13 @@ describe('BannedAccountDialog', () => { }); it('renders snapshot correctly', async () => { - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); }); diff --git a/packages/cli/src/ui/auth/LoginWithGoogleRestartDialog.test.tsx b/packages/cli/src/ui/auth/LoginWithGoogleRestartDialog.test.tsx index 77310e3069..4dd13a3334 100644 --- a/packages/cli/src/ui/auth/LoginWithGoogleRestartDialog.test.tsx +++ b/packages/cli/src/ui/auth/LoginWithGoogleRestartDialog.test.tsx @@ -45,25 +45,23 @@ describe('LoginWithGoogleRestartDialog', () => { }); it('renders correctly', async () => { - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); }); it('calls onDismiss when escape is pressed', async () => { - const { waitUntilReady, unmount } = render( + const { unmount } = await render( , ); - await waitUntilReady(); const keypressHandler = mockedUseKeypress.mock.calls[0][0]; keypressHandler({ @@ -83,13 +81,12 @@ describe('LoginWithGoogleRestartDialog', () => { async (keyName) => { vi.useFakeTimers(); - const { waitUntilReady, unmount } = render( + const { unmount } = await render( , ); - await waitUntilReady(); const keypressHandler = mockedUseKeypress.mock.calls[0][0]; keypressHandler({ diff --git a/packages/cli/src/ui/auth/useAuth.test.tsx b/packages/cli/src/ui/auth/useAuth.test.tsx index f236428ff1..8d51e46a64 100644 --- a/packages/cli/src/ui/auth/useAuth.test.tsx +++ b/packages/cli/src/ui/auth/useAuth.test.tsx @@ -4,15 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { - describe, - it, - expect, - vi, - beforeEach, - afterEach, - type Mock, -} from 'vitest'; +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { act } from 'react'; import { renderHook } from '../../test-utils/render.js'; import { useAuthCommand, validateAuthMethodWithSettings } from './useAuth.js'; import { @@ -22,7 +15,6 @@ import { } from '@google/gemini-cli-core'; import { AuthState } from '../types.js'; import type { LoadedSettings } from '../../config/settings.js'; -import { waitFor } from '../../test-utils/async.js'; // Mock dependencies const mockLoadApiKey = vi.fn(); @@ -142,171 +134,202 @@ describe('useAuth', () => { }, }) as LoadedSettings; + let deferredRefreshAuth: { + resolve: () => void; + reject: (e: Error) => void; + }; + + beforeEach(() => { + vi.mocked(mockConfig.refreshAuth).mockImplementation( + () => + new Promise((resolve, reject) => { + deferredRefreshAuth = { resolve, reject }; + }), + ); + }); + it('should initialize with Unauthenticated state', async () => { - const { result } = renderHook(() => + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.LOGIN_WITH_GOOGLE), mockConfig), ); + // Because we defer refreshAuth, the initial state is safely caught here expect(result.current.authState).toBe(AuthState.Unauthenticated); - await waitFor(() => { - expect(result.current.authState).toBe(AuthState.Authenticated); + await act(async () => { + deferredRefreshAuth.resolve(); }); + + expect(result.current.authState).toBe(AuthState.Authenticated); }); it('should set error if no auth type is selected and no env key', async () => { - const { result } = renderHook(() => + const { result } = await renderHook(() => useAuthCommand(createSettings(undefined), mockConfig), ); - await waitFor(() => { - expect(result.current.authError).toBe( - 'No authentication method selected.', - ); - expect(result.current.authState).toBe(AuthState.Updating); - }); + // This happens synchronously, no deferred promise + expect(result.current.authError).toBe( + 'No authentication method selected.', + ); + expect(result.current.authState).toBe(AuthState.Updating); }); it('should set error if no auth type is selected but env key exists', async () => { process.env['GEMINI_API_KEY'] = 'env-key'; - const { result } = renderHook(() => + const { result } = await renderHook(() => useAuthCommand(createSettings(undefined), mockConfig), ); - await waitFor(() => { - expect(result.current.authError).toContain( - 'Existing API key detected (GEMINI_API_KEY)', - ); - expect(result.current.authState).toBe(AuthState.Updating); - }); + expect(result.current.authError).toContain( + 'Existing API key detected (GEMINI_API_KEY)', + ); + expect(result.current.authState).toBe(AuthState.Updating); }); it('should transition to AwaitingApiKeyInput if USE_GEMINI and no key found', async () => { - mockLoadApiKey.mockResolvedValue(null); - const { result } = renderHook(() => + let deferredLoadKey: { resolve: (k: string | null) => void }; + mockLoadApiKey.mockImplementation( + () => + new Promise((resolve) => { + deferredLoadKey = { resolve }; + }), + ); + + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.USE_GEMINI), mockConfig), ); - await waitFor(() => { - expect(result.current.authState).toBe(AuthState.AwaitingApiKeyInput); + await act(async () => { + deferredLoadKey.resolve(null); }); + + expect(result.current.authState).toBe(AuthState.AwaitingApiKeyInput); }); it('should authenticate if USE_GEMINI and key is found', async () => { - mockLoadApiKey.mockResolvedValue('stored-key'); - const { result } = renderHook(() => + let deferredLoadKey: { resolve: (k: string | null) => void }; + mockLoadApiKey.mockImplementation( + () => + new Promise((resolve) => { + deferredLoadKey = { resolve }; + }), + ); + + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.USE_GEMINI), mockConfig), ); - await waitFor(() => { - expect(mockConfig.refreshAuth).toHaveBeenCalledWith( - AuthType.USE_GEMINI, - ); - expect(result.current.authState).toBe(AuthState.Authenticated); - expect(result.current.apiKeyDefaultValue).toBe('stored-key'); + await act(async () => { + deferredLoadKey.resolve('stored-key'); }); + + await act(async () => { + deferredRefreshAuth.resolve(); + }); + + expect(mockConfig.refreshAuth).toHaveBeenCalledWith(AuthType.USE_GEMINI); + expect(result.current.authState).toBe(AuthState.Authenticated); + expect(result.current.apiKeyDefaultValue).toBe('stored-key'); }); it('should authenticate if USE_GEMINI and env key is found', async () => { - mockLoadApiKey.mockResolvedValue(null); process.env['GEMINI_API_KEY'] = 'env-key'; - const { result } = renderHook(() => + + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.USE_GEMINI), mockConfig), ); - await waitFor(() => { - expect(mockConfig.refreshAuth).toHaveBeenCalledWith( - AuthType.USE_GEMINI, - ); - expect(result.current.authState).toBe(AuthState.Authenticated); - expect(result.current.apiKeyDefaultValue).toBe('env-key'); + await act(async () => { + deferredRefreshAuth.resolve(); }); + + expect(mockConfig.refreshAuth).toHaveBeenCalledWith(AuthType.USE_GEMINI); + expect(result.current.authState).toBe(AuthState.Authenticated); + expect(result.current.apiKeyDefaultValue).toBe('env-key'); }); it('should prioritize env key over stored key when both are present', async () => { - mockLoadApiKey.mockResolvedValue('stored-key'); process.env['GEMINI_API_KEY'] = 'env-key'; - const { result } = renderHook(() => + + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.USE_GEMINI), mockConfig), ); - await waitFor(() => { - expect(mockConfig.refreshAuth).toHaveBeenCalledWith( - AuthType.USE_GEMINI, - ); - expect(result.current.authState).toBe(AuthState.Authenticated); - // The environment key should take precedence - expect(result.current.apiKeyDefaultValue).toBe('env-key'); + await act(async () => { + deferredRefreshAuth.resolve(); }); + + expect(mockConfig.refreshAuth).toHaveBeenCalledWith(AuthType.USE_GEMINI); + expect(result.current.authState).toBe(AuthState.Authenticated); + expect(result.current.apiKeyDefaultValue).toBe('env-key'); }); it('should set error if validation fails', async () => { mockValidateAuthMethod.mockReturnValue('Validation Failed'); - const { result } = renderHook(() => + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.LOGIN_WITH_GOOGLE), mockConfig), ); - await waitFor(() => { - expect(result.current.authError).toBe('Validation Failed'); - expect(result.current.authState).toBe(AuthState.Updating); - }); + expect(result.current.authError).toBe('Validation Failed'); + expect(result.current.authState).toBe(AuthState.Updating); }); it('should set error if GEMINI_DEFAULT_AUTH_TYPE is invalid', async () => { process.env['GEMINI_DEFAULT_AUTH_TYPE'] = 'INVALID_TYPE'; - const { result } = renderHook(() => + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.LOGIN_WITH_GOOGLE), mockConfig), ); - await waitFor(() => { - expect(result.current.authError).toContain( - 'Invalid value for GEMINI_DEFAULT_AUTH_TYPE', - ); - expect(result.current.authState).toBe(AuthState.Updating); - }); + expect(result.current.authError).toContain( + 'Invalid value for GEMINI_DEFAULT_AUTH_TYPE', + ); + expect(result.current.authState).toBe(AuthState.Updating); }); it('should authenticate successfully for valid auth type', async () => { - const { result } = renderHook(() => + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.LOGIN_WITH_GOOGLE), mockConfig), ); - await waitFor(() => { - expect(mockConfig.refreshAuth).toHaveBeenCalledWith( - AuthType.LOGIN_WITH_GOOGLE, - ); - expect(result.current.authState).toBe(AuthState.Authenticated); - expect(result.current.authError).toBeNull(); + await act(async () => { + deferredRefreshAuth.resolve(); }); + + expect(mockConfig.refreshAuth).toHaveBeenCalledWith( + AuthType.LOGIN_WITH_GOOGLE, + ); + expect(result.current.authState).toBe(AuthState.Authenticated); + expect(result.current.authError).toBeNull(); }); it('should handle refreshAuth failure', async () => { - (mockConfig.refreshAuth as Mock).mockRejectedValue( - new Error('Auth Failed'), - ); - const { result } = renderHook(() => + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.LOGIN_WITH_GOOGLE), mockConfig), ); - await waitFor(() => { - expect(result.current.authError).toContain('Failed to sign in'); - expect(result.current.authState).toBe(AuthState.Updating); + await act(async () => { + deferredRefreshAuth.reject(new Error('Auth Failed')); }); + + expect(result.current.authError).toContain('Failed to sign in'); + expect(result.current.authState).toBe(AuthState.Updating); }); it('should handle ProjectIdRequiredError without "Failed to login" prefix', async () => { const projectIdError = new ProjectIdRequiredError(); - (mockConfig.refreshAuth as Mock).mockRejectedValue(projectIdError); - const { result } = renderHook(() => + const { result } = await renderHook(() => useAuthCommand(createSettings(AuthType.LOGIN_WITH_GOOGLE), mockConfig), ); - await waitFor(() => { - expect(result.current.authError).toBe( - 'This account requires setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID env var. See https://goo.gle/gemini-cli-auth-docs#workspace-gca', - ); - expect(result.current.authError).not.toContain('Failed to login'); - expect(result.current.authState).toBe(AuthState.Updating); + await act(async () => { + deferredRefreshAuth.reject(projectIdError); }); + + expect(result.current.authError).toBe( + 'This account requires setting the GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_PROJECT_ID env var. See https://goo.gle/gemini-cli-auth-docs#workspace-gca', + ); + expect(result.current.authError).not.toContain('Failed to login'); + expect(result.current.authState).toBe(AuthState.Updating); }); }); }); diff --git a/packages/cli/src/ui/commands/clearCommand.ts b/packages/cli/src/ui/commands/clearCommand.ts index 061c4f9085..fb032da811 100644 --- a/packages/cli/src/ui/commands/clearCommand.ts +++ b/packages/cli/src/ui/commands/clearCommand.ts @@ -9,6 +9,7 @@ import { SessionEndReason, SessionStartSource, flushTelemetry, + resetBrowserSession, } from '@google/gemini-cli-core'; import { CommandKind, type SlashCommand } from './types.js'; import { MessageType } from '../types.js'; @@ -43,6 +44,10 @@ export const clearCommand: SlashCommand = { if (geminiClient) { context.ui.setDebugMessage('Clearing terminal and resetting chat.'); + + // Close persistent browser sessions before resetting chat + await resetBrowserSession(); + // If resetChat fails, the exception will propagate and halt the command, // which is the correct behavior to signal a failure to the user. await geminiClient.resetChat(); diff --git a/packages/cli/src/ui/commands/extensionsCommand.test.ts b/packages/cli/src/ui/commands/extensionsCommand.test.ts index dc49390c7e..8f065438e2 100644 --- a/packages/cli/src/ui/commands/extensionsCommand.test.ts +++ b/packages/cli/src/ui/commands/extensionsCommand.test.ts @@ -710,10 +710,14 @@ describe('extensionsCommand', () => { size: 100, } as Stats); await linkAction!(mockContext, packageName); - expect(mockInstallExtension).toHaveBeenCalledWith({ - source: packageName, - type: 'link', - }); + expect(mockInstallExtension).toHaveBeenCalledWith( + { + source: packageName, + type: 'link', + }, + undefined, + undefined, + ); expect(mockContext.ui.addItem).toHaveBeenCalledWith({ type: MessageType.INFO, text: `Linking extension from "${packageName}"...`, @@ -733,10 +737,14 @@ describe('extensionsCommand', () => { } as Stats); await linkAction!(mockContext, packageName); - expect(mockInstallExtension).toHaveBeenCalledWith({ - source: packageName, - type: 'link', - }); + expect(mockInstallExtension).toHaveBeenCalledWith( + { + source: packageName, + type: 'link', + }, + undefined, + undefined, + ); expect(mockContext.ui.addItem).toHaveBeenCalledWith({ type: MessageType.ERROR, text: `Failed to link extension from "${packageName}": ${errorMessage}`, diff --git a/packages/cli/src/ui/commands/extensionsCommand.ts b/packages/cli/src/ui/commands/extensionsCommand.ts index 8e988917e5..aed7595389 100644 --- a/packages/cli/src/ui/commands/extensionsCommand.ts +++ b/packages/cli/src/ui/commands/extensionsCommand.ts @@ -286,6 +286,11 @@ async function exploreAction( await installAction(context, extension.url, requestConsentOverride); context.ui.removeComponent(); }, + onLink: async (extension, requestConsentOverride) => { + debugLogger.log(`Linking extension: ${extension.extensionName}`); + await linkAction(context, extension.url, requestConsentOverride); + context.ui.removeComponent(); + }, onClose: () => context.ui.removeComponent(), extensionManager, }), @@ -533,7 +538,11 @@ async function installAction( } } -async function linkAction(context: CommandContext, args: string) { +async function linkAction( + context: CommandContext, + args: string, + requestConsentOverride?: (consent: string) => Promise, +) { const extensionLoader = context.services.agentContext?.config.getExtensionLoader(); if (!(extensionLoader instanceof ExtensionManager)) { @@ -582,8 +591,11 @@ async function linkAction(context: CommandContext, args: string) { source: sourceFilepath, type: 'link', }; - const extension = - await extensionLoader.installOrUpdateExtension(installMetadata); + const extension = await extensionLoader.installOrUpdateExtension( + installMetadata, + undefined, + requestConsentOverride, + ); context.ui.addItem({ type: MessageType.INFO, text: `Extension "${extension.name}" linked successfully.`, diff --git a/packages/cli/src/ui/commands/policiesCommand.test.ts b/packages/cli/src/ui/commands/policiesCommand.test.ts index c5baa89d5d..929b528290 100644 --- a/packages/cli/src/ui/commands/policiesCommand.test.ts +++ b/packages/cli/src/ui/commands/policiesCommand.test.ts @@ -116,7 +116,9 @@ describe('policiesCommand', () => { expect(content).toContain( '### Yolo Mode Policies (combined with normal mode policies)', ); - expect(content).toContain('### Plan Mode Policies'); + expect(content).toContain( + '### Plan Mode Policies (combined with normal mode policies)', + ); expect(content).toContain( '**DENY** tool: `dangerousTool` [Priority: 10]', ); @@ -162,7 +164,9 @@ describe('policiesCommand', () => { const content = (call[0] as { text: string }).text; // Plan-only rules appear under Plan Mode section - expect(content).toContain('### Plan Mode Policies'); + expect(content).toContain( + '### Plan Mode Policies (combined with normal mode policies)', + ); // glob ALLOW is plan-only, should appear in plan section expect(content).toContain('**ALLOW** tool: `glob` [Priority: 70]'); // shell ALLOW has no modes (applies to all), appears in normal section diff --git a/packages/cli/src/ui/commands/policiesCommand.ts b/packages/cli/src/ui/commands/policiesCommand.ts index 40ed56ae3b..c6f3b1e1e1 100644 --- a/packages/cli/src/ui/commands/policiesCommand.ts +++ b/packages/cli/src/ui/commands/policiesCommand.ts @@ -100,7 +100,10 @@ const listPoliciesCommand: SlashCommand = { 'Yolo Mode Policies (combined with normal mode policies)', uniqueYolo, ); - content += formatSection('Plan Mode Policies', uniquePlan); + content += formatSection( + 'Plan Mode Policies (combined with normal mode policies)', + uniquePlan, + ); context.ui.addItem( { diff --git a/packages/cli/src/ui/commands/rewindCommand.test.tsx b/packages/cli/src/ui/commands/rewindCommand.test.tsx index d93d365a3e..f878091a45 100644 --- a/packages/cli/src/ui/commands/rewindCommand.test.tsx +++ b/packages/cli/src/ui/commands/rewindCommand.test.tsx @@ -38,6 +38,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => { return { ...actual, coreEvents: { + // eslint-disable-next-line @typescript-eslint/no-misused-spread ...actual.coreEvents, emitFeedback: vi.fn(), }, diff --git a/packages/cli/src/ui/components/AboutBox.test.tsx b/packages/cli/src/ui/components/AboutBox.test.tsx index 1db36b1f60..9115ca31c1 100644 --- a/packages/cli/src/ui/components/AboutBox.test.tsx +++ b/packages/cli/src/ui/components/AboutBox.test.tsx @@ -25,10 +25,9 @@ describe('AboutBox', () => { }; it('renders with required props', async () => { - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); const output = lastFrame(); expect(output).toContain('About Gemini CLI'); expect(output).toContain('1.0.0'); @@ -46,10 +45,9 @@ describe('AboutBox', () => { ['tier', 'Enterprise', 'Tier'], ])('renders optional prop %s', async (prop, value, label) => { const props = { ...defaultProps, [prop]: value }; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); const output = lastFrame(); expect(output).toContain(label); expect(output).toContain(value); @@ -58,10 +56,9 @@ describe('AboutBox', () => { it('renders Auth Method with email when userEmail is provided', async () => { const props = { ...defaultProps, userEmail: 'test@example.com' }; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); const output = lastFrame(); expect(output).toContain('Signed in with Google (test@example.com)'); unmount(); @@ -69,10 +66,9 @@ describe('AboutBox', () => { it('renders Auth Method correctly when not oauth', async () => { const props = { ...defaultProps, selectedAuthType: 'api-key' }; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); const output = lastFrame(); expect(output).toContain('api-key'); unmount(); diff --git a/packages/cli/src/ui/components/AdminSettingsChangedDialog.test.tsx b/packages/cli/src/ui/components/AdminSettingsChangedDialog.test.tsx index 19db058b87..76a36fe4dc 100644 --- a/packages/cli/src/ui/components/AdminSettingsChangedDialog.test.tsx +++ b/packages/cli/src/ui/components/AdminSettingsChangedDialog.test.tsx @@ -17,15 +17,14 @@ describe('AdminSettingsChangedDialog', () => { }); it('renders correctly', async () => { - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('restarts on "r" key press', async () => { - const { stdin, waitUntilReady } = await renderWithProviders( + const { stdin } = await renderWithProviders( , { uiActions: { @@ -33,7 +32,6 @@ describe('AdminSettingsChangedDialog', () => { }, }, ); - await waitUntilReady(); act(() => { stdin.write('r'); @@ -43,7 +41,7 @@ describe('AdminSettingsChangedDialog', () => { }); it.each(['r', 'R'])('restarts on "%s" key press', async (key) => { - const { stdin, waitUntilReady } = await renderWithProviders( + const { stdin } = await renderWithProviders( , { uiActions: { @@ -51,7 +49,6 @@ describe('AdminSettingsChangedDialog', () => { }, }, ); - await waitUntilReady(); act(() => { stdin.write(key); diff --git a/packages/cli/src/ui/components/AgentConfigDialog.test.tsx b/packages/cli/src/ui/components/AgentConfigDialog.test.tsx index a2bfe052bb..2c6ea454db 100644 --- a/packages/cli/src/ui/components/AgentConfigDialog.test.tsx +++ b/packages/cli/src/ui/components/AgentConfigDialog.test.tsx @@ -126,7 +126,6 @@ describe('AgentConfigDialog', () => { />, { settings, uiState: { mainAreaWidth: 100 } }, ); - await result.waitUntilReady(); return result; }; diff --git a/packages/cli/src/ui/components/AlternateBufferQuittingDisplay.test.tsx b/packages/cli/src/ui/components/AlternateBufferQuittingDisplay.test.tsx index da71895485..571e0d36d3 100644 --- a/packages/cli/src/ui/components/AlternateBufferQuittingDisplay.test.tsx +++ b/packages/cli/src/ui/components/AlternateBufferQuittingDisplay.test.tsx @@ -108,7 +108,7 @@ describe('AlternateBufferQuittingDisplay', () => { it('renders with active and pending tool messages', async () => { persistentStateMock.setData({ tipsShown: 0 }); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState: { @@ -118,14 +118,13 @@ describe('AlternateBufferQuittingDisplay', () => { }, }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot('with_history_and_pending'); unmount(); }); it('renders with empty history and no pending items', async () => { persistentStateMock.setData({ tipsShown: 0 }); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState: { @@ -135,14 +134,13 @@ describe('AlternateBufferQuittingDisplay', () => { }, }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot('empty'); unmount(); }); it('renders with history but no pending items', async () => { persistentStateMock.setData({ tipsShown: 0 }); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState: { @@ -152,14 +150,13 @@ describe('AlternateBufferQuittingDisplay', () => { }, }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot('with_history_no_pending'); unmount(); }); it('renders with pending items but no history', async () => { persistentStateMock.setData({ tipsShown: 0 }); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState: { @@ -169,7 +166,6 @@ describe('AlternateBufferQuittingDisplay', () => { }, }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot('with_pending_no_history'); unmount(); }); @@ -195,7 +191,7 @@ describe('AlternateBufferQuittingDisplay', () => { ], }, ]; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState: { @@ -205,7 +201,6 @@ describe('AlternateBufferQuittingDisplay', () => { }, }, ); - await waitUntilReady(); const output = lastFrame(); expect(output).toContain('Action Required (was prompted):'); expect(output).toContain('confirming_tool'); @@ -220,7 +215,7 @@ describe('AlternateBufferQuittingDisplay', () => { { id: 1, type: 'user', text: 'Hello Gemini' }, { id: 2, type: 'gemini', text: 'Hello User!' }, ]; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState: { @@ -230,7 +225,6 @@ describe('AlternateBufferQuittingDisplay', () => { }, }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot('with_user_gemini_messages'); unmount(); }); diff --git a/packages/cli/src/ui/components/AnsiOutput.test.tsx b/packages/cli/src/ui/components/AnsiOutput.test.tsx index ac824fefe6..758361be0a 100644 --- a/packages/cli/src/ui/components/AnsiOutput.test.tsx +++ b/packages/cli/src/ui/components/AnsiOutput.test.tsx @@ -29,10 +29,9 @@ describe('', () => { createAnsiToken({ text: 'world!' }), ], ]; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); expect(lastFrame().trim()).toBe('Hello, world!'); unmount(); }); @@ -47,10 +46,9 @@ describe('', () => { { style: { inverse: true }, text: 'Inverse' }, ])('correctly applies style $text', async ({ style, text }) => { const data: AnsiOutput = [[createAnsiToken({ text, ...style })]]; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); expect(lastFrame().trim()).toBe(text); unmount(); }); @@ -61,10 +59,9 @@ describe('', () => { { color: { fg: '#00ff00', bg: '#ff00ff' }, text: 'Green FG Magenta BG' }, ])('correctly applies color $text', async ({ color, text }) => { const data: AnsiOutput = [[createAnsiToken({ text, ...color })]]; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); expect(lastFrame().trim()).toBe(text); unmount(); }); @@ -76,10 +73,9 @@ describe('', () => { [createAnsiToken({ text: 'Third line' })], [createAnsiToken({ text: '' })], ]; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); const output = lastFrame(); expect(output).toBeDefined(); const lines = output.split('\n'); @@ -96,10 +92,9 @@ describe('', () => { [createAnsiToken({ text: 'Line 3' })], [createAnsiToken({ text: 'Line 4' })], ]; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); const output = lastFrame(); expect(output).not.toContain('Line 1'); expect(output).not.toContain('Line 2'); @@ -115,10 +110,9 @@ describe('', () => { [createAnsiToken({ text: 'Line 3' })], [createAnsiToken({ text: 'Line 4' })], ]; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); const output = lastFrame(); expect(output).not.toContain('Line 1'); expect(output).not.toContain('Line 2'); @@ -135,7 +129,7 @@ describe('', () => { [createAnsiToken({ text: 'Line 4' })], ]; // availableTerminalHeight=3, maxLines=2 => show 2 lines - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( ', () => { width={80} />, ); - await waitUntilReady(); const output = lastFrame(); expect(output).not.toContain('Line 2'); expect(output).toContain('Line 3'); @@ -156,10 +149,9 @@ describe('', () => { for (let i = 0; i < 1000; i++) { largeData.push([createAnsiToken({ text: `Line ${i}` })]); } - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( , ); - await waitUntilReady(); // We are just checking that it renders something without crashing. expect(lastFrame()).toBeDefined(); unmount(); diff --git a/packages/cli/src/ui/components/AppHeader.test.tsx b/packages/cli/src/ui/components/AppHeader.test.tsx index 0d7e2b3a7b..4dbdbc0052 100644 --- a/packages/cli/src/ui/components/AppHeader.test.tsx +++ b/packages/cli/src/ui/components/AppHeader.test.tsx @@ -8,8 +8,10 @@ import { renderWithProviders, persistentStateMock, } from '../../test-utils/render.js'; +import type { LoadedSettings } from '../../config/settings.js'; import { AppHeader } from './AppHeader.js'; import { describe, it, expect, vi } from 'vitest'; +import { makeFakeConfig } from '@google/gemini-cli-core'; import crypto from 'node:crypto'; vi.mock('../utils/terminalSetup.js', () => ({ @@ -27,13 +29,12 @@ describe('', () => { bannerVisible: true, }; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState, }, ); - await waitUntilReady(); expect(lastFrame()).toContain('This is the default banner'); expect(lastFrame()).toMatchSnapshot(); @@ -50,13 +51,12 @@ describe('', () => { bannerVisible: true, }; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState, }, ); - await waitUntilReady(); expect(lastFrame()).toContain('There are capacity issues'); expect(lastFrame()).toMatchSnapshot(); @@ -72,13 +72,12 @@ describe('', () => { }, }; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState, }, ); - await waitUntilReady(); expect(lastFrame()).not.toContain('Banner'); expect(lastFrame()).toMatchSnapshot(); @@ -103,13 +102,12 @@ describe('', () => { }, }); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState, }, ); - await waitUntilReady(); expect(lastFrame()).not.toContain('This is the default banner'); expect(lastFrame()).toMatchSnapshot(); @@ -129,13 +127,12 @@ describe('', () => { // and interfering with the expected persistentState.set call. persistentStateMock.setData({ tipsShown: 10 }); - const { waitUntilReady, unmount } = await renderWithProviders( + const { unmount } = await renderWithProviders( , { uiState, }, ); - await waitUntilReady(); expect(persistentStateMock.set).toHaveBeenCalledWith( 'defaultBannerShownCount', @@ -159,13 +156,12 @@ describe('', () => { bannerVisible: true, }; - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState, }, ); - await waitUntilReady(); expect(lastFrame()).not.toContain('First line\\nSecond line'); unmount(); @@ -183,13 +179,12 @@ describe('', () => { persistentStateMock.setData({ tipsShown: 5 }); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState, }, ); - await waitUntilReady(); expect(lastFrame()).toContain('Tips'); expect(persistentStateMock.set).toHaveBeenCalledWith('tipsShown', 6); @@ -206,13 +201,12 @@ describe('', () => { persistentStateMock.setData({ tipsShown: 10 }); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , { uiState, }, ); - await waitUntilReady(); expect(lastFrame()).not.toContain('Tips'); unmount(); @@ -234,7 +228,6 @@ describe('', () => { const session1 = await renderWithProviders(, { uiState, }); - await session1.waitUntilReady(); expect(session1.lastFrame()).toContain('Tips'); expect(persistentStateMock.get('tipsShown')).toBe(10); @@ -245,9 +238,50 @@ describe('', () => { , {}, ); - await session2.waitUntilReady(); expect(session2.lastFrame()).not.toContain('Tips'); session2.unmount(); }); + + it('should render the full logo when logged out', async () => { + const mockConfig = makeFakeConfig(); + vi.spyOn(mockConfig, 'getContentGeneratorConfig').mockReturnValue({ + authType: undefined, + } as any); // eslint-disable-line @typescript-eslint/no-explicit-any + + const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + , + { + config: mockConfig, + uiState: { + terminalWidth: 120, + }, + }, + ); + await waitUntilReady(); + + // Check for block characters from the logo + expect(lastFrame()).toContain('▗█▀▀▜▙'); + expect(lastFrame()).toMatchSnapshot(); + unmount(); + }); + + it('should NOT render Tips when ui.hideTips is true', async () => { + const mockConfig = makeFakeConfig(); + const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + , + { + config: mockConfig, + settings: { + merged: { + ui: { hideTips: true }, + }, + } as unknown as LoadedSettings, + }, + ); + await waitUntilReady(); + + expect(lastFrame()).not.toContain('Tips'); + unmount(); + }); }); diff --git a/packages/cli/src/ui/components/AppHeader.tsx b/packages/cli/src/ui/components/AppHeader.tsx index 0b15f917a6..7d0ef75a36 100644 --- a/packages/cli/src/ui/components/AppHeader.tsx +++ b/packages/cli/src/ui/components/AppHeader.tsx @@ -19,6 +19,9 @@ import { CliSpinner } from './CliSpinner.js'; import { isAppleTerminal } from '@google/gemini-cli-core'; +import { longAsciiLogoCompactText } from './AsciiArt.js'; +import { getAsciiArtWidth } from '../utils/textUtils.js'; + interface AppHeaderProps { version: string; showDetails?: boolean; @@ -41,6 +44,18 @@ const MAC_TERMINAL_ICON = `▝▜▄ ▗▟▀ ▗▟▀ `; +/** + * The horizontal padding (in columns) required for metadata (version, identity, etc.) + * when rendered alongside the ASCII logo. + */ +const LOGO_METADATA_PADDING = 20; + +/** + * The terminal width below which we switch to a narrow/column layout to prevent + * UI elements from wrapping or overlapping. + */ +const NARROW_TERMINAL_BREAKPOINT = 60; + export const AppHeader = ({ version, showDetails = true }: AppHeaderProps) => { const settings = useSettings(); const config = useConfig(); @@ -49,70 +64,90 @@ export const AppHeader = ({ version, showDetails = true }: AppHeaderProps) => { const { bannerText } = useBanner(bannerData); const { showTips } = useTips(); + const authType = config.getContentGeneratorConfig()?.authType; + const loggedOut = !authType; + const showHeader = !( settings.merged.ui.hideBanner || config.getScreenReader() ); const ICON = isAppleTerminal() ? MAC_TERMINAL_ICON : DEFAULT_ICON; - if (!showDetails) { - return ( - - {showHeader && ( - - - {ICON} - - - - - Gemini CLI - - v{version} - - + let logoTextArt = ''; + if (loggedOut) { + const widthOfLongLogo = + getAsciiArtWidth(longAsciiLogoCompactText) + LOGO_METADATA_PADDING; + + if (terminalWidth >= widthOfLongLogo) { + logoTextArt = longAsciiLogoCompactText.trim(); + } + } + + // If the terminal is too narrow to fit the icon and metadata (especially long nightly versions) + // side-by-side, we switch to column mode to prevent wrapping. + const isNarrow = terminalWidth < NARROW_TERMINAL_BREAKPOINT; + + const renderLogo = () => ( + + + {ICON} + + {logoTextArt && ( + + {logoTextArt} + + )} + + ); + + const renderMetadata = (isBelow = false) => ( + + {/* Line 1: Gemini CLI vVersion [Updating] */} + + + Gemini CLI + + v{version} + {updateInfo?.isUpdating && ( + + + Updating + )} - ); - } + + {showDetails && ( + <> + {/* Line 2: Blank */} + + + {/* Lines 3 & 4: User Identity info (Email /auth and Plan /upgrade) */} + {settings.merged.ui.showUserIdentity !== false && ( + + )} + + )} + + ); + + const useColumnLayout = !!logoTextArt || isNarrow; return ( {showHeader && ( - - - {ICON} - - - {/* Line 1: Gemini CLI vVersion [Updating] */} - - - Gemini CLI - - v{version} - {updateInfo && ( - - - Updating - - - )} - - - {/* Line 2: Blank */} - - - {/* Lines 3 & 4: User Identity info (Email /auth and Plan /upgrade) */} - {settings.merged.ui.showUserIdentity !== false && ( - - )} - + + {renderLogo()} + {useColumnLayout ? ( + {renderMetadata(true)} + ) : ( + renderMetadata(false) + )} )} diff --git a/packages/cli/src/ui/components/ApprovalModeIndicator.test.tsx b/packages/cli/src/ui/components/ApprovalModeIndicator.test.tsx index 4386891c7a..1b2decbe16 100644 --- a/packages/cli/src/ui/components/ApprovalModeIndicator.test.tsx +++ b/packages/cli/src/ui/components/ApprovalModeIndicator.test.tsx @@ -11,56 +11,50 @@ import { ApprovalMode } from '@google/gemini-cli-core'; describe('ApprovalModeIndicator', () => { it('renders correctly for AUTO_EDIT mode', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('renders correctly for AUTO_EDIT mode with plan enabled', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('renders correctly for PLAN mode', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('renders correctly for YOLO mode', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('renders correctly for DEFAULT mode', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('renders correctly for DEFAULT mode with plan enabled', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); }); diff --git a/packages/cli/src/ui/components/AsciiArt.ts b/packages/cli/src/ui/components/AsciiArt.ts index 79eb522c80..40f0eb8296 100644 --- a/packages/cli/src/ui/components/AsciiArt.ts +++ b/packages/cli/src/ui/components/AsciiArt.ts @@ -16,14 +16,14 @@ export const shortAsciiLogo = ` `; export const longAsciiLogo = ` - ███ █████████ ██████████ ██████ ██████ █████ ██████ █████ █████ -░░░███ ███░░░░░███░░███░░░░░█░░██████ ██████ ░░███ ░░██████ ░░███ ░░███ - ░░░███ ███ ░░░ ░███ █ ░ ░███░█████░███ ░███ ░███░███ ░███ ░███ - ░░░███ ░███ ░██████ ░███░░███ ░███ ░███ ░███░░███░███ ░███ - ███░ ░███ █████ ░███░░█ ░███ ░░░ ░███ ░███ ░███ ░░██████ ░███ - ███░ ░░███ ░░███ ░███ ░ █ ░███ ░███ ░███ ░███ ░░█████ ░███ - ███░ ░░█████████ ██████████ █████ █████ █████ █████ ░░█████ █████ -░░░ ░░░░░░░░░ ░░░░░░░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░ + █████████ ██████████ ██████ ██████ █████ ██████ █████ █████ +███░░░░░███░░███░░░░░█░░██████ █████ ░░███░░██████ ░░███ ░░███ +███ ░░░░░░░ ░███ █ ░ ░███░█████░███ ░███ ░███░███ ░███ ░███ +░███ ░██████ ░███░░███ ░███ ░███ ░███░░███░███ ░███ +░███ █████ ░███░░█ ░███ ░░░ ░███ ░███ ░███ ░░██████ ░███ +░░███ ░░███ ░███ ░ █ ░███ ░███ ░███ ░███ ░░█████ ░███ + ░░█████████ ██████████ █████ █████ █████ █████ ░░████ █████ + ░░░░░░░░░ ░░░░░░░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░ ░░░░░ `; export const tinyAsciiLogo = ` @@ -36,3 +36,24 @@ export const tinyAsciiLogo = ` ███░ ░░█████████ ░░░ ░░░░░░░░░ `; + +export const shortAsciiLogoCompactText = ` +▟▛▀▀█▖▜█▀▀▜▝██▙▗██▛▝█▛▝██▙ ▜█▘▜█▘ +▐█ ▐█▄▌ █▌▜█▘█▌ █▌ █▌▜▙▐█ ▐█ +▝█▖ ▜█▘▐█ ▘▗ █▌ █▌ █▌ █▌ ▜██ ▐█ + ▝▀▀▀▀ ▀▀▀▀▀▝▀▀ ▝▀▀▝▀▀▝▀▀ ▀▀▘▀▀▘ +`; + +export const longAsciiLogoCompactText = ` +▗█▀▀▜▙▝█▛▀▀▌▜██▖▟██▘▜█▘▜██▖▝█▛▝█▛ +█▌ █▙▟ ▐█▝█▛▐█ ▐█ ▐█▝█▖█▌ █▌ +▜▙ ▝█▛ █▌▝ ▖▐█ ▐█ ▐█ ▐█ ▝██▌ █▌ + ▀▀▀▀▘▝▀▀▀▀▘▀▀▘ ▀▀▘▀▀▘▀▀▘ ▝▀▀▝▀▀ +`; + +export const tinyAsciiLogoCompactText = ` +▟▛▀▀█▖ +▐█ +▝█▖ ▜█▘ + ▝▀▀▀▀ +`; diff --git a/packages/cli/src/ui/components/AskUserDialog.test.tsx b/packages/cli/src/ui/components/AskUserDialog.test.tsx index 8ed240389c..4f1cca7d8c 100644 --- a/packages/cli/src/ui/components/AskUserDialog.test.tsx +++ b/packages/cli/src/ui/components/AskUserDialog.test.tsx @@ -48,7 +48,7 @@ describe('AskUserDialog', () => { ]; it('renders question and options', async () => { - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); @@ -288,7 +287,7 @@ describe('AskUserDialog', () => { }); describe.each([ - { useAlternateBuffer: true, expectedArrows: false }, + { useAlternateBuffer: true, expectedArrows: true }, { useAlternateBuffer: false, expectedArrows: true }, ])( 'Scroll Arrows (useAlternateBuffer: $useAlternateBuffer)', @@ -397,7 +396,7 @@ describe('AskUserDialog', () => { }, ]; - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('hides progress header for single question', async () => { - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('shows keyboard hints', async () => { - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); @@ -471,7 +467,6 @@ describe('AskUserDialog', () => { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toContain('Which testing framework?'); writeKey(stdin, '\x1b[C'); // Right arrow @@ -582,7 +577,7 @@ describe('AskUserDialog', () => { }, ]; - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); @@ -736,7 +730,7 @@ describe('AskUserDialog', () => { }, ]; - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); @@ -759,7 +752,7 @@ describe('AskUserDialog', () => { }, ]; - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); @@ -820,7 +812,7 @@ describe('AskUserDialog', () => { }, ]; - const { lastFrame, waitUntilReady } = await renderWithProviders( + const { lastFrame } = await renderWithProviders( { { width: 120 }, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); @@ -1462,4 +1453,85 @@ describe('AskUserDialog', () => { }); }); }); + + it('shows at least 3 selection options even in small terminal heights', async () => { + const questions: Question[] = [ + { + question: + 'A very long question that would normally take up most of the space and squeeze the list if we did not have a heuristic to prevent it. This line is just to make it longer. And another one. Imagine this is a plan.', + header: 'Test', + type: QuestionType.CHOICE, + options: [ + { label: 'Option 1', description: 'Description 1' }, + { label: 'Option 2', description: 'Description 2' }, + { label: 'Option 3', description: 'Description 3' }, + { label: 'Option 4', description: 'Description 4' }, + ], + multiSelect: false, + }, + ]; + + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { width: 80 }, + ); + + await waitFor(async () => { + await waitUntilReady(); + const frame = lastFrame(); + // Should show at least 3 options + expect(frame).toContain('1. Option 1'); + expect(frame).toContain('2. Option 2'); + expect(frame).toContain('3. Option 3'); + }); + }); + + it('allows the question to exceed 15 lines in a tall terminal', async () => { + const longQuestion = Array.from( + { length: 25 }, + (_, i) => `Line ${i + 1}`, + ).join('\n'); + const questions: Question[] = [ + { + question: longQuestion, + header: 'Tall Test', + type: QuestionType.CHOICE, + options: [ + { label: 'Option 1', description: 'D1' }, + { label: 'Option 2', description: 'D2' }, + { label: 'Option 3', description: 'D3' }, + ], + multiSelect: false, + unconstrainedHeight: false, + }, + ]; + + const { lastFrame, waitUntilReady } = await renderWithProviders( + , + { width: 80 }, + ); + + await waitFor(async () => { + await waitUntilReady(); + const frame = lastFrame(); + // Should show more than 15 lines of the question + // (The limit was previously 15, so showing Line 20 proves it's working) + expect(frame).toContain('Line 20'); + expect(frame).toContain('Line 25'); + // Should still show the options + expect(frame).toContain('1. Option 1'); + }); + }); }); diff --git a/packages/cli/src/ui/components/AskUserDialog.tsx b/packages/cli/src/ui/components/AskUserDialog.tsx index b1d23885e6..483fcb5055 100644 --- a/packages/cli/src/ui/components/AskUserDialog.tsx +++ b/packages/cli/src/ui/components/AskUserDialog.tsx @@ -849,16 +849,24 @@ const ChoiceQuestionView: React.FC = ({ ? Math.max(1, availableHeight - overhead) : undefined; + // Reserve space for at least 3 items if more selectionItems available. + const reservedListHeight = Math.min(selectionItems.length * 2, 6); const questionHeightLimit = listHeight && !isAlternateBuffer ? question.unconstrainedHeight ? Math.max(1, listHeight - selectionItems.length * 2) - : Math.min(15, Math.max(1, listHeight - DIALOG_PADDING)) + : Math.max(1, listHeight - Math.max(DIALOG_PADDING, reservedListHeight)) : undefined; const maxItemsToShow = - listHeight && questionHeightLimit - ? Math.max(1, Math.floor((listHeight - questionHeightLimit) / 2)) + listHeight && (!isAlternateBuffer || availableHeight !== undefined) + ? Math.min( + selectionItems.length, + Math.max( + 1, + Math.floor((listHeight - (questionHeightLimit ?? 0)) / 2), + ), + ) : selectionItems.length; return ( diff --git a/packages/cli/src/ui/components/BackgroundShellDisplay.test.tsx b/packages/cli/src/ui/components/BackgroundShellDisplay.test.tsx index 847dcd9a87..c097028a0d 100644 --- a/packages/cli/src/ui/components/BackgroundShellDisplay.test.tsx +++ b/packages/cli/src/ui/components/BackgroundShellDisplay.test.tsx @@ -145,7 +145,7 @@ describe('', () => { it('renders the output of the active shell', async () => { const width = 80; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( ', () => { , width, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); @@ -166,7 +165,7 @@ describe('', () => { it('renders tabs for multiple shells', async () => { const width = 100; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( ', () => { , width, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); @@ -187,7 +185,7 @@ describe('', () => { it('highlights the focused state', async () => { const width = 80; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( ', () => { , width, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); @@ -208,7 +205,7 @@ describe('', () => { it('resizes the PTY on mount and when dimensions change', async () => { const width = 80; - const { rerender, waitUntilReady, unmount } = render( + const { rerender, unmount } = await render( ', () => { , width, ); - await waitUntilReady(); expect(ShellExecutionService.resizePty).toHaveBeenCalledWith( shell1.pid, @@ -241,7 +237,6 @@ describe('', () => { /> , ); - await waitUntilReady(); expect(ShellExecutionService.resizePty).toHaveBeenCalledWith( shell1.pid, @@ -253,7 +248,7 @@ describe('', () => { it('renders the process list when isListOpenProp is true', async () => { const width = 80; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( ', () => { , width, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); @@ -274,7 +268,7 @@ describe('', () => { it('selects the current process and closes the list when Ctrl+L is pressed in list view', async () => { const width = 80; - const { waitUntilReady, unmount } = render( + const { unmount } = await render( ', () => { , width, ); - await waitUntilReady(); // Simulate down arrow to select the second process (handled by RadioButtonSelect) await act(async () => { simulateKey({ name: 'down' }); }); - await waitUntilReady(); // Simulate Ctrl+L (handled by BackgroundShellDisplay) await act(async () => { simulateKey({ name: 'l', ctrl: true }); }); - await waitUntilReady(); expect(mockSetActiveBackgroundShellPid).toHaveBeenCalledWith(shell2.pid); expect(mockSetIsBackgroundShellListOpen).toHaveBeenCalledWith(false); @@ -308,7 +299,7 @@ describe('', () => { it('kills the highlighted process when Ctrl+K is pressed in list view', async () => { const width = 80; - const { waitUntilReady, unmount } = render( + const { unmount } = await render( ', () => { , width, ); - await waitUntilReady(); // Initial state: shell1 (active) is highlighted @@ -329,13 +319,11 @@ describe('', () => { await act(async () => { simulateKey({ name: 'down' }); }); - await waitUntilReady(); // Press Ctrl+K await act(async () => { simulateKey({ name: 'k', ctrl: true }); }); - await waitUntilReady(); expect(mockDismissBackgroundShell).toHaveBeenCalledWith(shell2.pid); unmount(); @@ -343,7 +331,7 @@ describe('', () => { it('kills the active process when Ctrl+K is pressed in output view', async () => { const width = 80; - const { waitUntilReady, unmount } = render( + const { unmount } = await render( ', () => { , width, ); - await waitUntilReady(); await act(async () => { simulateKey({ name: 'k', ctrl: true }); }); - await waitUntilReady(); expect(mockDismissBackgroundShell).toHaveBeenCalledWith(shell1.pid); unmount(); @@ -370,7 +356,7 @@ describe('', () => { it('scrolls to active shell when list opens', async () => { // shell2 is active const width = 80; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( ', () => { , width, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); @@ -402,7 +387,7 @@ describe('', () => { mockShells.set(exitedShell.pid, exitedShell); const width = 80; - const { lastFrame, waitUntilReady, unmount } = render( + const { lastFrame, unmount } = await render( ', () => { , width, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); unmount(); diff --git a/packages/cli/src/ui/components/Checklist.test.tsx b/packages/cli/src/ui/components/Checklist.test.tsx index 442ee0400f..329a560aec 100644 --- a/packages/cli/src/ui/components/Checklist.test.tsx +++ b/packages/cli/src/ui/components/Checklist.test.tsx @@ -18,10 +18,9 @@ describe('', () => { ]; it('renders nothing when list is empty', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame({ allowEmpty: true })).toBe(''); }); @@ -30,15 +29,14 @@ describe('', () => { { status: 'completed', label: 'Task 1' }, { status: 'cancelled', label: 'Task 2' }, ]; - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame({ allowEmpty: true })).toBe(''); }); it('renders summary view correctly (collapsed)', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( ', () => { toggleHint="toggle me" />, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); it('renders expanded view correctly', async () => { - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( ', () => { toggleHint="toggle me" />, ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); @@ -68,10 +64,9 @@ describe('', () => { { status: 'completed', label: 'Task 1' }, { status: 'pending', label: 'Task 2' }, ]; - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); }); diff --git a/packages/cli/src/ui/components/ChecklistItem.test.tsx b/packages/cli/src/ui/components/ChecklistItem.test.tsx index 4176f7914b..c71af523e1 100644 --- a/packages/cli/src/ui/components/ChecklistItem.test.tsx +++ b/packages/cli/src/ui/components/ChecklistItem.test.tsx @@ -17,8 +17,7 @@ describe('', () => { { status: 'cancelled', label: 'Skipped this' }, { status: 'blocked', label: 'Blocked this' }, ] as ChecklistItemData[])('renders %s item correctly', async (item) => { - const { lastFrame, waitUntilReady } = render(); - await waitUntilReady(); + const { lastFrame } = await render(); expect(lastFrame()).toMatchSnapshot(); }); @@ -28,12 +27,11 @@ describe('', () => { label: 'This is a very long text that should be truncated because the wrap prop is set to truncate', }; - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); @@ -43,12 +41,11 @@ describe('', () => { label: 'This is a very long text that should wrap because the default behavior is wrapping', }; - const { lastFrame, waitUntilReady } = render( + const { lastFrame } = await render( , ); - await waitUntilReady(); expect(lastFrame()).toMatchSnapshot(); }); }); diff --git a/packages/cli/src/ui/components/CliSpinner.test.tsx b/packages/cli/src/ui/components/CliSpinner.test.tsx index cca997f370..4da6abb199 100644 --- a/packages/cli/src/ui/components/CliSpinner.test.tsx +++ b/packages/cli/src/ui/components/CliSpinner.test.tsx @@ -17,10 +17,7 @@ describe('', () => { it('should increment debugNumAnimatedComponents on mount and decrement on unmount', async () => { expect(debugState.debugNumAnimatedComponents).toBe(0); - const { waitUntilReady, unmount } = await renderWithProviders( - , - ); - await waitUntilReady(); + const { unmount } = await renderWithProviders(); expect(debugState.debugNumAnimatedComponents).toBe(1); unmount(); expect(debugState.debugNumAnimatedComponents).toBe(0); @@ -28,11 +25,9 @@ describe('', () => { it('should not render when showSpinner is false', async () => { const settings = createMockSettings({ ui: { showSpinner: false } }); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( - , - { settings }, - ); - await waitUntilReady(); + const { lastFrame, unmount } = await renderWithProviders(, { + settings, + }); expect(lastFrame({ allowEmpty: true })).toBe(''); unmount(); }); diff --git a/packages/cli/src/ui/components/ColorsDisplay.test.tsx b/packages/cli/src/ui/components/ColorsDisplay.test.tsx index fdd08fd653..d934831c0e 100644 --- a/packages/cli/src/ui/components/ColorsDisplay.test.tsx +++ b/packages/cli/src/ui/components/ColorsDisplay.test.tsx @@ -96,10 +96,9 @@ describe('ColorsDisplay', () => { it('renders correctly', async () => { const mockTheme = themeManager.getActiveTheme(); - const { lastFrame, waitUntilReady, unmount } = await renderWithProviders( + const { lastFrame, unmount } = await renderWithProviders( , ); - await waitUntilReady(); const output = lastFrame(); // Check for title and description diff --git a/packages/cli/src/ui/components/Composer.test.tsx b/packages/cli/src/ui/components/Composer.test.tsx index 641fc24810..1cbb29a06c 100644 --- a/packages/cli/src/ui/components/Composer.test.tsx +++ b/packages/cli/src/ui/components/Composer.test.tsx @@ -17,13 +17,6 @@ import { import { ConfigContext } from '../contexts/ConfigContext.js'; import { SettingsContext } from '../contexts/SettingsContext.js'; import { createMockSettings } from '../../test-utils/settings.js'; -// Mock VimModeContext hook -vi.mock('../contexts/VimModeContext.js', () => ({ - useVimMode: vi.fn(() => ({ - vimEnabled: false, - vimMode: 'INSERT', - })), -})); import { ApprovalMode, tokenLimit, @@ -36,6 +29,21 @@ import type { LoadedSettings } from '../../config/settings.js'; import type { SessionMetrics } from '../contexts/SessionContext.js'; import type { TextBuffer } from './shared/text-buffer.js'; +// Mock VimModeContext hook +vi.mock('../contexts/VimModeContext.js', () => ({ + useVimMode: vi.fn(() => ({ + vimEnabled: false, + vimMode: 'INSERT', + })), +})); + +vi.mock('../hooks/useTerminalSize.js', () => ({ + useTerminalSize: vi.fn(() => ({ + columns: 100, + rows: 24, + })), +})); + const composerTestControls = vi.hoisted(() => ({ suggestionsVisible: false, isAlternateBuffer: false, @@ -58,18 +66,9 @@ vi.mock('./LoadingIndicator.js', () => ({ })); vi.mock('./StatusDisplay.js', () => ({ - StatusDisplay: () => StatusDisplay, -})); - -vi.mock('./ToastDisplay.js', () => ({ - ToastDisplay: () => ToastDisplay, - shouldShowToast: (uiState: UIState) => - uiState.ctrlCPressedOnce || - Boolean(uiState.transientMessage) || - uiState.ctrlDPressedOnce || - (uiState.showEscapePrompt && - (uiState.buffer.text.length > 0 || uiState.history.length > 0)) || - Boolean(uiState.queueErrorMessage), + StatusDisplay: ({ hideContextSummary }: { hideContextSummary: boolean }) => ( + StatusDisplay{hideContextSummary ? ' (hidden summary)' : ''} + ), })); vi.mock('./ContextSummaryDisplay.js', () => ({ @@ -81,17 +80,15 @@ vi.mock('./HookStatusDisplay.js', () => ({ })); vi.mock('./ApprovalModeIndicator.js', () => ({ - ApprovalModeIndicator: () => ApprovalModeIndicator, + ApprovalModeIndicator: ({ approvalMode }: { approvalMode: ApprovalMode }) => ( + ApprovalModeIndicator: {approvalMode} + ), })); vi.mock('./ShellModeIndicator.js', () => ({ ShellModeIndicator: () => ShellModeIndicator, })); -vi.mock('./ShortcutsHint.js', () => ({ - ShortcutsHint: () => ShortcutsHint, -})); - vi.mock('./ShortcutsHelp.js', () => ({ ShortcutsHelp: () => ShortcutsHelp, })); @@ -174,6 +171,8 @@ const createMockUIState = (overrides: Partial = {}): UIState => isFocused: true, thought: '', currentLoadingPhrase: '', + currentTip: '', + currentWittyPhrase: '', elapsedTime: 0, ctrlCPressedOnce: false, ctrlDPressedOnce: false, @@ -201,6 +200,7 @@ const createMockUIState = (overrides: Partial = {}): UIState => activeHooks: [], isBackgroundShellVisible: false, embeddedShellFocused: false, + showIsExpandableHint: false, quota: { userTier: undefined, stats: undefined, @@ -247,22 +247,21 @@ const createMockConfig = (overrides = {}): Config => const renderComposer = async ( uiState: UIState, - settings = createMockSettings(), + settings = createMockSettings({ ui: {} }), config = createMockConfig(), uiActions = createMockUIActions(), ) => { - const result = render( + const result = await render( - + , ); - await result.waitUntilReady(); // Wait for shortcuts hint debounce if using fake timers if (vi.isFakeTimers()) { @@ -384,10 +383,12 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState, settings); const output = lastFrame(); - expect(output).toContain('LoadingIndicator: Thinking...'); + // In Refreshed UX, we don't force 'Thinking...' label in renderStatusNode + // It uses the subject directly + expect(output).toContain('LoadingIndicator: Thinking about code'); }); - it('hides shortcuts hint while loading', async () => { + it('shows shortcuts hint while loading', async () => { const uiState = createMockUIState({ streamingState: StreamingState.Responding, elapsedTime: 1, @@ -398,7 +399,8 @@ describe('Composer', () => { const output = lastFrame(); expect(output).toContain('LoadingIndicator'); - expect(output).not.toContain('ShortcutsHint'); + expect(output).toContain('press tab twice for more'); + expect(output).not.toContain('? for shortcuts'); }); it('renders LoadingIndicator with thought when loadingPhrases is off', async () => { @@ -454,9 +456,8 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); - const output = lastFrame(); - expect(output).not.toContain('LoadingIndicator'); - expect(output).not.toContain('esc to cancel'); + const output = lastFrame({ allowEmpty: true }); + expect(output).toBe(''); }); it('renders LoadingIndicator when embedded shell is focused but background shell is visible', async () => { @@ -559,8 +560,10 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); const output = lastFrame(); - expect(output).toContain('ToastDisplay'); - expect(output).not.toContain('ApprovalModeIndicator'); + expect(output).toContain('Press Ctrl+C again to exit.'); + // In Refreshed UX, Row 1 shows toast, and Row 2 shows ApprovalModeIndicator/StatusDisplay + // They are no longer mutually exclusive. + expect(output).toContain('ApprovalModeIndicator'); expect(output).toContain('StatusDisplay'); }); @@ -575,8 +578,8 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); const output = lastFrame(); - expect(output).toContain('ToastDisplay'); - expect(output).not.toContain('ApprovalModeIndicator'); + expect(output).toContain('Warning'); + expect(output).toContain('ApprovalModeIndicator'); }); }); @@ -585,15 +588,17 @@ describe('Composer', () => { const uiState = createMockUIState({ cleanUiDetailsVisible: false, }); + const settings = createMockSettings({ + ui: { showShortcutsHint: false }, + }); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame } = await renderComposer(uiState, settings); const output = lastFrame(); - expect(output).toContain('ShortcutsHint'); + expect(output).not.toContain('press tab twice for more'); + expect(output).not.toContain('? for shortcuts'); expect(output).toContain('InputPrompt'); expect(output).not.toContain('Footer'); - expect(output).not.toContain('ApprovalModeIndicator'); - expect(output).not.toContain('ContextSummaryDisplay'); }); it('renders InputPrompt when input is active', async () => { @@ -666,12 +671,15 @@ describe('Composer', () => { }); it.each([ - [ApprovalMode.YOLO, 'YOLO'], - [ApprovalMode.PLAN, 'plan'], - [ApprovalMode.AUTO_EDIT, 'auto edit'], + { mode: ApprovalMode.YOLO, label: '● YOLO' }, + { mode: ApprovalMode.PLAN, label: '● plan' }, + { + mode: ApprovalMode.AUTO_EDIT, + label: '● auto edit', + }, ])( - 'shows minimal mode badge "%s" when clean UI details are hidden', - async (mode, label) => { + 'shows minimal mode badge "$mode" when clean UI details are hidden', + async ({ mode, label }) => { const uiState = createMockUIState({ cleanUiDetailsVisible: false, showApprovalModeIndicator: mode, @@ -694,7 +702,8 @@ describe('Composer', () => { const output = lastFrame(); expect(output).toContain('LoadingIndicator'); expect(output).not.toContain('plan'); - expect(output).not.toContain('ShortcutsHint'); + expect(output).toContain('press tab twice for more'); + expect(output).not.toContain('? for shortcuts'); }); it('hides minimal mode badge while action-required state is active', async () => { @@ -709,9 +718,7 @@ describe('Composer', () => { }); const { lastFrame } = await renderComposer(uiState); - const output = lastFrame(); - expect(output).not.toContain('plan'); - expect(output).not.toContain('ShortcutsHint'); + expect(lastFrame({ allowEmpty: true })).toBe(''); }); it('shows Esc rewind prompt in minimal mode without showing full UI', async () => { @@ -723,7 +730,7 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); const output = lastFrame(); - expect(output).toContain('ToastDisplay'); + expect(output).toContain('Press Esc again to rewind.'); expect(output).not.toContain('ContextSummaryDisplay'); }); @@ -748,7 +755,14 @@ describe('Composer', () => { }); const { lastFrame } = await renderComposer(uiState, settings); - expect(lastFrame()).toContain('%'); + + await act(async () => { + await vi.advanceTimersByTimeAsync(250); + }); + + // StatusDisplay (which contains ContextUsageDisplay) should bleed through in minimal mode + expect(lastFrame()).toContain('StatusDisplay'); + expect(lastFrame()).toContain('70% used'); }); }); @@ -813,14 +827,20 @@ describe('Composer', () => { describe('Shortcuts Hint', () => { it('restores shortcuts hint after 200ms debounce when buffer is empty', async () => { - const { lastFrame } = await renderComposer( - createMockUIState({ - buffer: { text: '' } as unknown as TextBuffer, - cleanUiDetailsVisible: false, - }), - ); + const uiState = createMockUIState({ + buffer: { text: '' } as unknown as TextBuffer, + cleanUiDetailsVisible: false, + }); - expect(lastFrame({ allowEmpty: true })).toContain('ShortcutsHint'); + const { lastFrame } = await renderComposer(uiState); + + await act(async () => { + await vi.advanceTimersByTimeAsync(250); + }); + + expect(lastFrame({ allowEmpty: true })).toContain( + 'press tab twice for more', + ); }); it('hides shortcuts hint when text is typed in buffer', async () => { @@ -831,7 +851,8 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); - expect(lastFrame()).not.toContain('ShortcutsHint'); + expect(lastFrame()).not.toContain('press tab twice for more'); + expect(lastFrame()).not.toContain('? for shortcuts'); }); it('hides shortcuts hint when showShortcutsHint setting is false', async () => { @@ -844,7 +865,7 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState, settings); - expect(lastFrame()).not.toContain('ShortcutsHint'); + expect(lastFrame()).not.toContain('? for shortcuts'); }); it('hides shortcuts hint when a action is required (e.g. dialog is open)', async () => { @@ -857,9 +878,10 @@ describe('Composer', () => { ), }); - const { lastFrame } = await renderComposer(uiState); + const { lastFrame, unmount } = await renderComposer(uiState); - expect(lastFrame()).not.toContain('ShortcutsHint'); + expect(lastFrame({ allowEmpty: true })).toBe(''); + unmount(); }); it('keeps shortcuts hint visible when no action is required', async () => { @@ -869,7 +891,11 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); - expect(lastFrame()).toContain('ShortcutsHint'); + await act(async () => { + await vi.advanceTimersByTimeAsync(250); + }); + + expect(lastFrame()).toContain('press tab twice for more'); }); it('shows shortcuts hint when full UI details are visible', async () => { @@ -879,10 +905,15 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); - expect(lastFrame()).toContain('ShortcutsHint'); + await act(async () => { + await vi.advanceTimersByTimeAsync(250); + }); + + // In Refreshed UX, shortcuts hint is in the top multipurpose status row + expect(lastFrame()).toContain('? for shortcuts'); }); - it('hides shortcuts hint while loading when full UI details are visible', async () => { + it('shows shortcuts hint while loading when full UI details are visible', async () => { const uiState = createMockUIState({ cleanUiDetailsVisible: true, streamingState: StreamingState.Responding, @@ -890,10 +921,17 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); - expect(lastFrame()).not.toContain('ShortcutsHint'); + await act(async () => { + await vi.advanceTimersByTimeAsync(250); + }); + + // In experimental layout, status row is visible during loading + expect(lastFrame()).toContain('LoadingIndicator'); + expect(lastFrame()).toContain('? for shortcuts'); + expect(lastFrame()).not.toContain('press tab twice for more'); }); - it('hides shortcuts hint while loading in minimal mode', async () => { + it('shows shortcuts hint while loading in minimal mode', async () => { const uiState = createMockUIState({ cleanUiDetailsVisible: false, streamingState: StreamingState.Responding, @@ -902,7 +940,14 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); - expect(lastFrame()).not.toContain('ShortcutsHint'); + await act(async () => { + await vi.advanceTimersByTimeAsync(250); + }); + + // In experimental layout, status row is visible in clean mode while busy + expect(lastFrame()).toContain('LoadingIndicator'); + expect(lastFrame()).toContain('press tab twice for more'); + expect(lastFrame()).not.toContain('? for shortcuts'); }); it('shows shortcuts help in minimal mode when toggled on', async () => { @@ -927,7 +972,8 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); - expect(lastFrame()).not.toContain('ShortcutsHint'); + expect(lastFrame()).not.toContain('press tab twice for more'); + expect(lastFrame()).not.toContain('? for shortcuts'); expect(lastFrame()).not.toContain('plan'); }); @@ -955,7 +1001,12 @@ describe('Composer', () => { const { lastFrame } = await renderComposer(uiState); - expect(lastFrame()).toContain('ShortcutsHint'); + await act(async () => { + await vi.advanceTimersByTimeAsync(250); + }); + + // In Refreshed UX, shortcuts hint is in the top status row and doesn't collide with suggestions below + expect(lastFrame()).toContain('press tab twice for more'); }); }); @@ -983,24 +1034,22 @@ describe('Composer', () => { expect(lastFrame()).not.toContain('ShortcutsHelp'); unmount(); }); - it('hides shortcuts help when action is required', async () => { const uiState = createMockUIState({ shortcutsHelpVisible: true, customDialog: ( - Dialog content + Test Dialog ), }); const { lastFrame, unmount } = await renderComposer(uiState); - expect(lastFrame()).not.toContain('ShortcutsHelp'); + expect(lastFrame({ allowEmpty: true })).toBe(''); unmount(); }); }); - describe('Snapshots', () => { it('matches snapshot in idle state', async () => { const uiState = createMockUIState(); diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 89c9c9d3d6..5c9850bf92 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -4,89 +4,53 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { useState, useEffect, useMemo } from 'react'; -import { Box, Text, useIsScreenReaderEnabled } from 'ink'; -import { - ApprovalMode, - checkExhaustive, - CoreToolCallStatus, -} from '@google/gemini-cli-core'; -import { LoadingIndicator } from './LoadingIndicator.js'; -import { StatusDisplay } from './StatusDisplay.js'; -import { ToastDisplay, shouldShowToast } from './ToastDisplay.js'; -import { ApprovalModeIndicator } from './ApprovalModeIndicator.js'; -import { ShellModeIndicator } from './ShellModeIndicator.js'; -import { DetailedMessagesDisplay } from './DetailedMessagesDisplay.js'; -import { RawMarkdownIndicator } from './RawMarkdownIndicator.js'; -import { ShortcutsHint } from './ShortcutsHint.js'; -import { ShortcutsHelp } from './ShortcutsHelp.js'; -import { InputPrompt } from './InputPrompt.js'; -import { Footer } from './Footer.js'; -import { ShowMoreLines } from './ShowMoreLines.js'; -import { QueuedMessageDisplay } from './QueuedMessageDisplay.js'; -import { ContextUsageDisplay } from './ContextUsageDisplay.js'; -import { HorizontalLine } from './shared/HorizontalLine.js'; -import { OverflowProvider } from '../contexts/OverflowContext.js'; -import { isNarrowWidth } from '../utils/isNarrowWidth.js'; +import { Box, useIsScreenReaderEnabled } from 'ink'; +import { useState, useEffect } from 'react'; +import { useConfig } from '../contexts/ConfigContext.js'; +import { useSettings } from '../contexts/SettingsContext.js'; import { useUIState } from '../contexts/UIStateContext.js'; import { useUIActions } from '../contexts/UIActionsContext.js'; import { useVimMode } from '../contexts/VimModeContext.js'; -import { useConfig } from '../contexts/ConfigContext.js'; -import { useSettings } from '../contexts/SettingsContext.js'; import { useAlternateBuffer } from '../hooks/useAlternateBuffer.js'; -import { StreamingState, type HistoryItemToolGroup } from '../types.js'; -import { ConfigInitDisplay } from '../components/ConfigInitDisplay.js'; +import { useTerminalSize } from '../hooks/useTerminalSize.js'; +import { isNarrowWidth } from '../utils/isNarrowWidth.js'; +import { ToastDisplay, shouldShowToast } from './ToastDisplay.js'; +import { DetailedMessagesDisplay } from './DetailedMessagesDisplay.js'; +import { ShortcutsHelp } from './ShortcutsHelp.js'; +import { InputPrompt } from './InputPrompt.js'; +import { Footer } from './Footer.js'; +import { StatusRow } from './StatusRow.js'; +import { ShowMoreLines } from './ShowMoreLines.js'; +import { QueuedMessageDisplay } from './QueuedMessageDisplay.js'; +import { OverflowProvider } from '../contexts/OverflowContext.js'; +import { ConfigInitDisplay } from './ConfigInitDisplay.js'; import { TodoTray } from './messages/Todo.js'; -import { getInlineThinkingMode } from '../utils/inlineThinkingMode.js'; -import { isContextUsageHigh } from '../utils/contextUsage.js'; -import { theme } from '../semantic-colors.js'; +import { useComposerStatus } from '../hooks/useComposerStatus.js'; export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { - const config = useConfig(); - const settings = useSettings(); - const isScreenReaderEnabled = useIsScreenReaderEnabled(); const uiState = useUIState(); const uiActions = useUIActions(); + const settings = useSettings(); + const config = useConfig(); const { vimEnabled, vimMode } = useVimMode(); - const inlineThinkingMode = getInlineThinkingMode(settings); - const terminalWidth = uiState.terminalWidth; + const isScreenReaderEnabled = useIsScreenReaderEnabled(); + const { columns: terminalWidth } = useTerminalSize(); const isNarrow = isNarrowWidth(terminalWidth); const debugConsoleMaxHeight = Math.floor(Math.max(terminalWidth * 0.2, 5)); const [suggestionsVisible, setSuggestionsVisible] = useState(false); const isAlternateBuffer = useAlternateBuffer(); - const { showApprovalModeIndicator } = uiState; const showUiDetails = uiState.cleanUiDetailsVisible; const suggestionsPosition = isAlternateBuffer ? 'above' : 'below'; const hideContextSummary = suggestionsVisible && suggestionsPosition === 'above'; - const hasPendingToolConfirmation = useMemo( - () => - (uiState.pendingHistoryItems ?? []) - .filter( - (item): item is HistoryItemToolGroup => item.type === 'tool_group', - ) - .some((item) => - item.tools.some( - (tool) => tool.status === CoreToolCallStatus.AwaitingApproval, - ), - ), - [uiState.pendingHistoryItems], - ); + const { hasPendingActionRequired, shouldCollapseDuringApproval } = + useComposerStatus(); - const hasPendingActionRequired = - hasPendingToolConfirmation || - Boolean(uiState.commandConfirmationRequest) || - Boolean(uiState.authConsentRequest) || - (uiState.confirmUpdateExtensionRequests?.length ?? 0) > 0 || - Boolean(uiState.loopDetectionConfirmationRequest) || - Boolean(uiState.quota.proQuotaRequest) || - Boolean(uiState.quota.validationRequest) || - Boolean(uiState.customDialog); const isPassiveShortcutsHelpState = uiState.isInputActive && - uiState.streamingState === StreamingState.Idle && + uiState.streamingState === 'idle' && !hasPendingActionRequired; const { setShortcutsHelpVisible } = uiActions; @@ -103,91 +67,19 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { const showShortcutsHelp = uiState.shortcutsHelpVisible && - uiState.streamingState === StreamingState.Idle && + uiState.streamingState === 'idle' && !hasPendingActionRequired; - const hasToast = shouldShowToast(uiState); - const showLoadingIndicator = - (!uiState.embeddedShellFocused || uiState.isBackgroundShellVisible) && - uiState.streamingState === StreamingState.Responding && - !hasPendingActionRequired; - const hideUiDetailsForSuggestions = - suggestionsVisible && suggestionsPosition === 'above'; - const showApprovalIndicator = - !uiState.shellModeActive && !hideUiDetailsForSuggestions; - const showRawMarkdownIndicator = !uiState.renderMarkdown; - let modeBleedThrough: { text: string; color: string } | null = null; - switch (showApprovalModeIndicator) { - case ApprovalMode.YOLO: - modeBleedThrough = { text: 'YOLO', color: theme.status.error }; - break; - case ApprovalMode.PLAN: - modeBleedThrough = { text: 'plan', color: theme.status.success }; - break; - case ApprovalMode.AUTO_EDIT: - modeBleedThrough = { text: 'auto edit', color: theme.status.warning }; - break; - case ApprovalMode.DEFAULT: - modeBleedThrough = null; - break; - default: - checkExhaustive(showApprovalModeIndicator); - modeBleedThrough = null; - break; + + if (hasPendingActionRequired && shouldCollapseDuringApproval) { + return null; } - const hideMinimalModeHintWhileBusy = - !showUiDetails && (showLoadingIndicator || hasPendingActionRequired); - const minimalModeBleedThrough = hideMinimalModeHintWhileBusy - ? null - : modeBleedThrough; - const hasMinimalStatusBleedThrough = shouldShowToast(uiState); + const hasToast = shouldShowToast(uiState); + const hideUiDetailsForSuggestions = + suggestionsVisible && suggestionsPosition === 'above'; - const showMinimalContextBleedThrough = - !settings.merged.ui.footer.hideContextPercentage && - isContextUsageHigh( - uiState.sessionStats.lastPromptTokenCount, - typeof uiState.currentModel === 'string' - ? uiState.currentModel - : undefined, - ); - const hideShortcutsHintForSuggestions = hideUiDetailsForSuggestions; - const isModelIdle = uiState.streamingState === StreamingState.Idle; - const isBufferEmpty = uiState.buffer.text.length === 0; - const canShowShortcutsHint = - isModelIdle && isBufferEmpty && !hasPendingActionRequired; - const [showShortcutsHintDebounced, setShowShortcutsHintDebounced] = - useState(canShowShortcutsHint); - - useEffect(() => { - if (!canShowShortcutsHint) { - setShowShortcutsHintDebounced(false); - return; - } - - const timeout = setTimeout(() => { - setShowShortcutsHintDebounced(true); - }, 200); - - return () => clearTimeout(timeout); - }, [canShowShortcutsHint]); - - const shouldReserveSpaceForShortcutsHint = - settings.merged.ui.showShortcutsHint && !hideShortcutsHintForSuggestions; - const showShortcutsHint = - shouldReserveSpaceForShortcutsHint && showShortcutsHintDebounced; - const showMinimalModeBleedThrough = - !hideUiDetailsForSuggestions && Boolean(minimalModeBleedThrough); - const showMinimalInlineLoading = !showUiDetails && showLoadingIndicator; - const showMinimalBleedThroughRow = - !showUiDetails && - (showMinimalModeBleedThrough || - hasMinimalStatusBleedThrough || - showMinimalContextBleedThrough); - const showMinimalMetaRow = - !showUiDetails && - (showMinimalInlineLoading || - showMinimalBleedThroughRow || - shouldReserveSpaceForShortcutsHint); + // Mini Mode VIP Flags (Pure Content Triggers) + const showMinimalToast = hasToast; return ( { flexGrow={0} flexShrink={0} > - {(!uiState.slashCommands || - !uiState.isConfigInitialized || - uiState.isResuming) && ( - + {uiState.isResuming && ( + )} {showUiDetails && ( @@ -210,212 +98,23 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { {showUiDetails && } - - - - {showUiDetails && showLoadingIndicator && ( - - )} - - - {showUiDetails && showShortcutsHint && } - - - {showMinimalMetaRow && ( - - - {showMinimalInlineLoading && ( - - )} - {showMinimalModeBleedThrough && minimalModeBleedThrough && ( - - ● {minimalModeBleedThrough.text} - - )} - {hasMinimalStatusBleedThrough && ( - - - - )} - - {(showMinimalContextBleedThrough || - shouldReserveSpaceForShortcutsHint) && ( - - {showMinimalContextBleedThrough && ( - - )} - - {showShortcutsHint && } - - - )} - - )} - {showShortcutsHelp && } - {showUiDetails && } - {showUiDetails && ( - - - {hasToast ? ( - - ) : ( - - {showApprovalIndicator && ( - - )} - {!showLoadingIndicator && ( - <> - {uiState.shellModeActive && ( - - - - )} - {showRawMarkdownIndicator && ( - - - - )} - - )} - - )} - + {showShortcutsHelp && } - - {!showLoadingIndicator && ( - - )} - - - )} + {(showUiDetails || showMinimalToast) && ( + + + + )} + + + {showUiDetails && uiState.showErrorDetails && ( @@ -447,7 +146,7 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { commandContext={uiState.commandContext} shellModeActive={uiState.shellModeActive} setShellModeActive={uiActions.setShellModeActive} - approvalMode={showApprovalModeIndicator} + approvalMode={uiState.showApprovalModeIndicator} onEscapePromptChange={uiActions.onEscapePromptChange} focus={isFocused} vimHandleInput={uiActions.vimHandleInput} @@ -466,12 +165,15 @@ export const Composer = ({ isFocused = true }: { isFocused?: boolean }) => { streamingState={uiState.streamingState} suggestionsPosition={suggestionsPosition} onSuggestionsVisibilityChange={setSuggestionsVisible} + copyModeEnabled={uiState.copyModeEnabled} /> )} {showUiDetails && !settings.merged.ui.hideFooter && - !isScreenReaderEnabled &&