mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
Don't commit unless user asks us to. (#16902)
This commit is contained in:
committed by
GitHub
parent
1998a713e2
commit
e03042657b
73
evals/gitRepo.eval.ts
Normal file
73
evals/gitRepo.eval.ts
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2026 Google LLC
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, expect } from 'vitest';
|
||||||
|
import { evalTest } from './test-helper.js';
|
||||||
|
|
||||||
|
const FILES = {
|
||||||
|
'.gitignore': 'node_modules\n',
|
||||||
|
'package.json': JSON.stringify({
|
||||||
|
name: 'test-project',
|
||||||
|
version: '1.0.0',
|
||||||
|
scripts: { test: 'echo "All tests passed!"' },
|
||||||
|
}),
|
||||||
|
'index.ts': 'const add = (a: number, b: number) => a - b;',
|
||||||
|
'index.test.ts': 'console.log("Running tests...");',
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
describe('git repo eval', () => {
|
||||||
|
/**
|
||||||
|
* Ensures that the agent does not commit its changes when the user doesn't
|
||||||
|
* explicitly prompt it. This behavior was commonly observed with earlier prompts.
|
||||||
|
* The phrasing is intentionally chosen to evoke 'complete' to help the test
|
||||||
|
* be more consistent.
|
||||||
|
*/
|
||||||
|
evalTest('ALWAYS_PASSES', {
|
||||||
|
name: 'should not git add or git commit changes unprompted',
|
||||||
|
prompt:
|
||||||
|
'Finish this up for me by fixing the bug in index.ts. Do not build or install anything.',
|
||||||
|
files: FILES,
|
||||||
|
assert: async (rig, _result) => {
|
||||||
|
const toolLogs = rig.readToolLogs();
|
||||||
|
const commitCalls = toolLogs.filter((log) => {
|
||||||
|
if (log.toolRequest.name !== 'run_shell_command') return false;
|
||||||
|
try {
|
||||||
|
const args = JSON.parse(log.toolRequest.args);
|
||||||
|
return args.command && /git\s+(commit|add)/.test(args.command);
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(commitCalls.length).toBe(0);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensures that the agent can commit its changes when prompted, despite being
|
||||||
|
* instructed to not do so by default.
|
||||||
|
*/
|
||||||
|
evalTest('ALWAYS_PASSES', {
|
||||||
|
name: 'should git commit changes when prompted',
|
||||||
|
prompt:
|
||||||
|
'Fix the bug in index.ts without building or installing anything. Then, commit the change.',
|
||||||
|
files: FILES,
|
||||||
|
assert: async (rig, _result) => {
|
||||||
|
const toolLogs = rig.readToolLogs();
|
||||||
|
const commitCalls = toolLogs.filter((log) => {
|
||||||
|
if (log.toolRequest.name !== 'run_shell_command') return false;
|
||||||
|
try {
|
||||||
|
const args = JSON.parse(log.toolRequest.args);
|
||||||
|
return args.command && args.command.includes('git commit');
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(commitCalls.length).toBeGreaterThanOrEqual(1);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -489,6 +489,7 @@ You are running outside of a sandbox container, directly on the user's system. F
|
|||||||
|
|
||||||
# Git Repository
|
# Git Repository
|
||||||
- The current working (project) directory is being managed by a git repository.
|
- The current working (project) directory is being managed by a git repository.
|
||||||
|
- NEVER stage or commit changes, unless explicitly instructed to.
|
||||||
- When asked to commit changes or prepare a commit, always start by gathering information using shell commands:
|
- When asked to commit changes or prepare a commit, always start by gathering information using shell commands:
|
||||||
- \`git status\` to ensure that all relevant files are tracked and staged, using \`git add ...\` as needed.
|
- \`git status\` to ensure that all relevant files are tracked and staged, using \`git add ...\` as needed.
|
||||||
- \`git diff HEAD\` to review all changes (including unstaged changes) to tracked files in work tree since last commit.
|
- \`git diff HEAD\` to review all changes (including unstaged changes) to tracked files in work tree since last commit.
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ describe('Core System Prompt (prompts.ts)', () => {
|
|||||||
let mockConfig: Config;
|
let mockConfig: Config;
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
vi.resetAllMocks();
|
vi.resetAllMocks();
|
||||||
|
vi.stubEnv('SANDBOX', undefined);
|
||||||
vi.stubEnv('GEMINI_SYSTEM_MD', undefined);
|
vi.stubEnv('GEMINI_SYSTEM_MD', undefined);
|
||||||
vi.stubEnv('GEMINI_WRITE_SYSTEM_MD', undefined);
|
vi.stubEnv('GEMINI_WRITE_SYSTEM_MD', undefined);
|
||||||
mockConfig = {
|
mockConfig = {
|
||||||
|
|||||||
@@ -334,6 +334,7 @@ ${(function () {
|
|||||||
return `
|
return `
|
||||||
# Git Repository
|
# Git Repository
|
||||||
- The current working (project) directory is being managed by a git repository.
|
- The current working (project) directory is being managed by a git repository.
|
||||||
|
- NEVER stage or commit changes, unless explicitly instructed to.
|
||||||
- When asked to commit changes or prepare a commit, always start by gathering information using shell commands:
|
- When asked to commit changes or prepare a commit, always start by gathering information using shell commands:
|
||||||
- \`git status\` to ensure that all relevant files are tracked and staged, using \`git add ...\` as needed.
|
- \`git status\` to ensure that all relevant files are tracked and staged, using \`git add ...\` as needed.
|
||||||
- \`git diff HEAD\` to review all changes (including unstaged changes) to tracked files in work tree since last commit.
|
- \`git diff HEAD\` to review all changes (including unstaged changes) to tracked files in work tree since last commit.
|
||||||
|
|||||||
Reference in New Issue
Block a user