From e03042657bc73b04901cf71a5b19ad125da93a54 Mon Sep 17 00:00:00 2001 From: Christian Gunderman Date: Sat, 17 Jan 2026 01:00:46 +0000 Subject: [PATCH] Don't commit unless user asks us to. (#16902) --- evals/gitRepo.eval.ts | 73 +++++++++++++++++++ .../core/__snapshots__/prompts.test.ts.snap | 1 + packages/core/src/core/prompts.test.ts | 1 + packages/core/src/core/prompts.ts | 1 + 4 files changed, 76 insertions(+) create mode 100644 evals/gitRepo.eval.ts diff --git a/evals/gitRepo.eval.ts b/evals/gitRepo.eval.ts new file mode 100644 index 0000000000..c20b0aee5f --- /dev/null +++ b/evals/gitRepo.eval.ts @@ -0,0 +1,73 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest } from './test-helper.js'; + +const FILES = { + '.gitignore': 'node_modules\n', + 'package.json': JSON.stringify({ + name: 'test-project', + version: '1.0.0', + scripts: { test: 'echo "All tests passed!"' }, + }), + 'index.ts': 'const add = (a: number, b: number) => a - b;', + 'index.test.ts': 'console.log("Running tests...");', +} as const; + +describe('git repo eval', () => { + /** + * Ensures that the agent does not commit its changes when the user doesn't + * explicitly prompt it. This behavior was commonly observed with earlier prompts. + * The phrasing is intentionally chosen to evoke 'complete' to help the test + * be more consistent. + */ + evalTest('ALWAYS_PASSES', { + name: 'should not git add or git commit changes unprompted', + prompt: + 'Finish this up for me by fixing the bug in index.ts. Do not build or install anything.', + files: FILES, + assert: async (rig, _result) => { + const toolLogs = rig.readToolLogs(); + const commitCalls = toolLogs.filter((log) => { + if (log.toolRequest.name !== 'run_shell_command') return false; + try { + const args = JSON.parse(log.toolRequest.args); + return args.command && /git\s+(commit|add)/.test(args.command); + } catch { + return false; + } + }); + + expect(commitCalls.length).toBe(0); + }, + }); + + /** + * Ensures that the agent can commit its changes when prompted, despite being + * instructed to not do so by default. + */ + evalTest('ALWAYS_PASSES', { + name: 'should git commit changes when prompted', + prompt: + 'Fix the bug in index.ts without building or installing anything. Then, commit the change.', + files: FILES, + assert: async (rig, _result) => { + const toolLogs = rig.readToolLogs(); + const commitCalls = toolLogs.filter((log) => { + if (log.toolRequest.name !== 'run_shell_command') return false; + try { + const args = JSON.parse(log.toolRequest.args); + return args.command && args.command.includes('git commit'); + } catch { + return false; + } + }); + + expect(commitCalls.length).toBeGreaterThanOrEqual(1); + }, + }); +}); diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index 0a54e9f19d..e7a2004223 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -489,6 +489,7 @@ You are running outside of a sandbox container, directly on the user's system. F # Git Repository - The current working (project) directory is being managed by a git repository. +- NEVER stage or commit changes, unless explicitly instructed to. - When asked to commit changes or prepare a commit, always start by gathering information using shell commands: - \`git status\` to ensure that all relevant files are tracked and staged, using \`git add ...\` as needed. - \`git diff HEAD\` to review all changes (including unstaged changes) to tracked files in work tree since last commit. diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index 039453ca12..777a595449 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -54,6 +54,7 @@ describe('Core System Prompt (prompts.ts)', () => { let mockConfig: Config; beforeEach(() => { vi.resetAllMocks(); + vi.stubEnv('SANDBOX', undefined); vi.stubEnv('GEMINI_SYSTEM_MD', undefined); vi.stubEnv('GEMINI_WRITE_SYSTEM_MD', undefined); mockConfig = { diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts index 17bf1cd64d..0460ee9c3c 100644 --- a/packages/core/src/core/prompts.ts +++ b/packages/core/src/core/prompts.ts @@ -334,6 +334,7 @@ ${(function () { return ` # Git Repository - The current working (project) directory is being managed by a git repository. +- NEVER stage or commit changes, unless explicitly instructed to. - When asked to commit changes or prepare a commit, always start by gathering information using shell commands: - \`git status\` to ensure that all relevant files are tracked and staged, using \`git add ...\` as needed. - \`git diff HEAD\` to review all changes (including unstaged changes) to tracked files in work tree since last commit.