mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-16 09:01:17 -07:00
Merge branch 'main' into make_memory_usual
This commit is contained in:
110
evals/edit-locations-eval.eval.ts
Normal file
110
evals/edit-locations-eval.eval.ts
Normal file
@@ -0,0 +1,110 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { evalTest } from './test-helper.js';
|
||||
|
||||
describe('Edits location eval', () => {
|
||||
/**
|
||||
* Ensure that Gemini CLI always updates existing test files, if present,
|
||||
* instead of creating a new one.
|
||||
*/
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should update existing test file instead of creating a new one',
|
||||
files: {
|
||||
'package.json': JSON.stringify(
|
||||
{
|
||||
name: 'test-location-repro',
|
||||
version: '1.0.0',
|
||||
scripts: {
|
||||
test: 'vitest run',
|
||||
},
|
||||
devDependencies: {
|
||||
vitest: '^1.0.0',
|
||||
typescript: '^5.0.0',
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'src/math.ts': `
|
||||
export function add(a: number, b: number): number {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
export function subtract(a: number, b: number): number {
|
||||
return a - b;
|
||||
}
|
||||
|
||||
export function multiply(a: number, b: number): number {
|
||||
return a + b;
|
||||
}
|
||||
`,
|
||||
'src/math.test.ts': `
|
||||
import { expect, test } from 'vitest';
|
||||
import { add, subtract } from './math';
|
||||
|
||||
test('add adds two numbers', () => {
|
||||
expect(add(2, 3)).toBe(5);
|
||||
});
|
||||
|
||||
test('subtract subtracts two numbers', () => {
|
||||
expect(subtract(5, 3)).toBe(2);
|
||||
});
|
||||
`,
|
||||
'src/utils.ts': `
|
||||
export function capitalize(s: string): string {
|
||||
return s.charAt(0).toUpperCase() + s.slice(1);
|
||||
}
|
||||
`,
|
||||
'src/utils.test.ts': `
|
||||
import { expect, test } from 'vitest';
|
||||
import { capitalize } from './utils';
|
||||
|
||||
test('capitalize capitalizes the first letter', () => {
|
||||
expect(capitalize('hello')).toBe('Hello');
|
||||
});
|
||||
`,
|
||||
},
|
||||
prompt: 'Fix the bug in src/math.ts. Do not run the code.',
|
||||
timeout: 180000,
|
||||
assert: async (rig) => {
|
||||
const toolLogs = rig.readToolLogs();
|
||||
const replaceCalls = toolLogs.filter(
|
||||
(t) => t.toolRequest.name === 'replace',
|
||||
);
|
||||
const writeFileCalls = toolLogs.filter(
|
||||
(t) => t.toolRequest.name === 'write_file',
|
||||
);
|
||||
|
||||
expect(replaceCalls.length).toBeGreaterThan(0);
|
||||
expect(
|
||||
writeFileCalls.some((file) =>
|
||||
file.toolRequest.args.includes('.test.ts'),
|
||||
),
|
||||
).toBe(false);
|
||||
|
||||
const targetFiles = replaceCalls.map((t) => {
|
||||
try {
|
||||
return JSON.parse(t.toolRequest.args).file_path;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
console.log('DEBUG: targetFiles', targetFiles);
|
||||
|
||||
expect(
|
||||
new Set(targetFiles).size,
|
||||
'Expected only two files changed',
|
||||
).greaterThanOrEqual(2);
|
||||
expect(targetFiles.some((f) => f?.endsWith('src/math.ts'))).toBe(true);
|
||||
expect(targetFiles.some((f) => f?.endsWith('src/math.test.ts'))).toBe(
|
||||
true,
|
||||
);
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -42,11 +42,12 @@ When asked for my favorite fruit, always say "Cherry".
|
||||
</project_context>
|
||||
|
||||
What is my favorite fruit? Tell me just the name of the fruit.`,
|
||||
assert: async (_rig, result) => {
|
||||
assertModelHasOutput(result);
|
||||
expect(result).toMatch(/Cherry/i);
|
||||
expect(result).not.toMatch(/Apple/i);
|
||||
expect(result).not.toMatch(/Banana/i);
|
||||
assert: async (rig) => {
|
||||
const stdout = rig._lastRunStdout!;
|
||||
assertModelHasOutput(stdout);
|
||||
expect(stdout).toMatch(/Cherry/i);
|
||||
expect(stdout).not.toMatch(/Apple/i);
|
||||
expect(stdout).not.toMatch(/Banana/i);
|
||||
},
|
||||
});
|
||||
|
||||
@@ -80,11 +81,12 @@ Provide the answer as an XML block like this:
|
||||
<extension>Instruction ...</extension>
|
||||
<project>Instruction ...</project>
|
||||
</results>`,
|
||||
assert: async (_rig, result) => {
|
||||
assertModelHasOutput(result);
|
||||
expect(result).toMatch(/<global>.*Instruction A/i);
|
||||
expect(result).toMatch(/<extension>.*Instruction B/i);
|
||||
expect(result).toMatch(/<project>.*Instruction C/i);
|
||||
assert: async (rig) => {
|
||||
const stdout = rig._lastRunStdout!;
|
||||
assertModelHasOutput(stdout);
|
||||
expect(stdout).toMatch(/<global>.*Instruction A/i);
|
||||
expect(stdout).toMatch(/<extension>.*Instruction B/i);
|
||||
expect(stdout).toMatch(/<project>.*Instruction C/i);
|
||||
},
|
||||
});
|
||||
|
||||
@@ -108,10 +110,11 @@ Set the theme to "Dark".
|
||||
</extension_context>
|
||||
|
||||
What theme should I use? Tell me just the name of the theme.`,
|
||||
assert: async (_rig, result) => {
|
||||
assertModelHasOutput(result);
|
||||
expect(result).toMatch(/Dark/i);
|
||||
expect(result).not.toMatch(/Light/i);
|
||||
assert: async (rig) => {
|
||||
const stdout = rig._lastRunStdout!;
|
||||
assertModelHasOutput(stdout);
|
||||
expect(stdout).toMatch(/Dark/i);
|
||||
expect(stdout).not.toMatch(/Light/i);
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user