Don't commit unless user asks us to. (#16902)

This commit is contained in:
Christian Gunderman
2026-01-17 01:00:46 +00:00
committed by GitHub
parent 1998a713e2
commit e03042657b
4 changed files with 76 additions and 0 deletions

73
evals/gitRepo.eval.ts Normal file
View File

@@ -0,0 +1,73 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
const FILES = {
'.gitignore': 'node_modules\n',
'package.json': JSON.stringify({
name: 'test-project',
version: '1.0.0',
scripts: { test: 'echo "All tests passed!"' },
}),
'index.ts': 'const add = (a: number, b: number) => a - b;',
'index.test.ts': 'console.log("Running tests...");',
} as const;
describe('git repo eval', () => {
/**
* Ensures that the agent does not commit its changes when the user doesn't
* explicitly prompt it. This behavior was commonly observed with earlier prompts.
* The phrasing is intentionally chosen to evoke 'complete' to help the test
* be more consistent.
*/
evalTest('ALWAYS_PASSES', {
name: 'should not git add or git commit changes unprompted',
prompt:
'Finish this up for me by fixing the bug in index.ts. Do not build or install anything.',
files: FILES,
assert: async (rig, _result) => {
const toolLogs = rig.readToolLogs();
const commitCalls = toolLogs.filter((log) => {
if (log.toolRequest.name !== 'run_shell_command') return false;
try {
const args = JSON.parse(log.toolRequest.args);
return args.command && /git\s+(commit|add)/.test(args.command);
} catch {
return false;
}
});
expect(commitCalls.length).toBe(0);
},
});
/**
* Ensures that the agent can commit its changes when prompted, despite being
* instructed to not do so by default.
*/
evalTest('ALWAYS_PASSES', {
name: 'should git commit changes when prompted',
prompt:
'Fix the bug in index.ts without building or installing anything. Then, commit the change.',
files: FILES,
assert: async (rig, _result) => {
const toolLogs = rig.readToolLogs();
const commitCalls = toolLogs.filter((log) => {
if (log.toolRequest.name !== 'run_shell_command') return false;
try {
const args = JSON.parse(log.toolRequest.args);
return args.command && args.command.includes('git commit');
} catch {
return false;
}
});
expect(commitCalls.length).toBeGreaterThanOrEqual(1);
},
});
});

View File

@@ -489,6 +489,7 @@ You are running outside of a sandbox container, directly on the user's system. F
# Git Repository
- The current working (project) directory is being managed by a git repository.
- NEVER stage or commit changes, unless explicitly instructed to.
- When asked to commit changes or prepare a commit, always start by gathering information using shell commands:
- \`git status\` to ensure that all relevant files are tracked and staged, using \`git add ...\` as needed.
- \`git diff HEAD\` to review all changes (including unstaged changes) to tracked files in work tree since last commit.

View File

@@ -54,6 +54,7 @@ describe('Core System Prompt (prompts.ts)', () => {
let mockConfig: Config;
beforeEach(() => {
vi.resetAllMocks();
vi.stubEnv('SANDBOX', undefined);
vi.stubEnv('GEMINI_SYSTEM_MD', undefined);
vi.stubEnv('GEMINI_WRITE_SYSTEM_MD', undefined);
mockConfig = {

View File

@@ -334,6 +334,7 @@ ${(function () {
return `
# Git Repository
- The current working (project) directory is being managed by a git repository.
- NEVER stage or commit changes, unless explicitly instructed to.
- When asked to commit changes or prepare a commit, always start by gathering information using shell commands:
- \`git status\` to ensure that all relevant files are tracked and staged, using \`git add ...\` as needed.
- \`git diff HEAD\` to review all changes (including unstaged changes) to tracked files in work tree since last commit.