mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-10 14:10:37 -07:00
Encourage agent to utilize ecosystem tools to perform work (#17881)
This commit is contained in:
committed by
GitHub
parent
aba8c5f662
commit
ed02b94570
170
evals/automated-tool-use.eval.ts
Normal file
170
evals/automated-tool-use.eval.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { evalTest } from './test-helper.js';
|
||||
|
||||
describe('Automated tool use', () => {
|
||||
/**
|
||||
* Tests that the agent always utilizes --fix when calling eslint.
|
||||
* We provide a 'lint' script in the package.json, which helps elicit
|
||||
* a repro by guiding the agent into using the existing deficient script.
|
||||
*/
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should use automated tools (eslint --fix) to fix code style issues',
|
||||
files: {
|
||||
'package.json': JSON.stringify(
|
||||
{
|
||||
name: 'typescript-project',
|
||||
version: '1.0.0',
|
||||
type: 'module',
|
||||
scripts: {
|
||||
lint: 'eslint .',
|
||||
},
|
||||
devDependencies: {
|
||||
eslint: '^9.0.0',
|
||||
globals: '^15.0.0',
|
||||
typescript: '^5.0.0',
|
||||
'typescript-eslint': '^8.0.0',
|
||||
'@eslint/js': '^9.0.0',
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'eslint.config.js': `
|
||||
import globals from "globals";
|
||||
import pluginJs from "@eslint/js";
|
||||
import tseslint from "typescript-eslint";
|
||||
|
||||
export default [
|
||||
{
|
||||
files: ["**/*.{js,mjs,cjs,ts}"],
|
||||
languageOptions: {
|
||||
globals: globals.node
|
||||
}
|
||||
},
|
||||
pluginJs.configs.recommended,
|
||||
...tseslint.configs.recommended,
|
||||
{
|
||||
rules: {
|
||||
"prefer-const": "error",
|
||||
"@typescript-eslint/no-unused-vars": "off"
|
||||
}
|
||||
}
|
||||
];
|
||||
`,
|
||||
'src/app.ts': `
|
||||
export function main() {
|
||||
let count = 10;
|
||||
console.log(count);
|
||||
}
|
||||
`,
|
||||
},
|
||||
prompt:
|
||||
'Fix the linter errors in this project. Make sure to avoid interactive commands.',
|
||||
assert: async (rig) => {
|
||||
// Check if run_shell_command was used with --fix
|
||||
const toolCalls = rig.readToolLogs();
|
||||
const shellCommands = toolCalls.filter(
|
||||
(call) => call.toolRequest.name === 'run_shell_command',
|
||||
);
|
||||
|
||||
const hasFixCommand = shellCommands.some((call) => {
|
||||
let args = call.toolRequest.args;
|
||||
if (typeof args === 'string') {
|
||||
try {
|
||||
args = JSON.parse(args);
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const cmd = (args as any)['command'];
|
||||
return (
|
||||
cmd &&
|
||||
(cmd.includes('eslint') || cmd.includes('npm run lint')) &&
|
||||
cmd.includes('--fix')
|
||||
);
|
||||
});
|
||||
|
||||
expect(
|
||||
hasFixCommand,
|
||||
'Expected agent to use eslint --fix via run_shell_command',
|
||||
).toBe(true);
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Tests that the agent uses prettier --write to fix formatting issues in files
|
||||
* instead of trying to edit the files itself.
|
||||
*/
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should use automated tools (prettier --write) to fix formatting issues',
|
||||
files: {
|
||||
'package.json': JSON.stringify(
|
||||
{
|
||||
name: 'typescript-project',
|
||||
version: '1.0.0',
|
||||
type: 'module',
|
||||
scripts: {},
|
||||
devDependencies: {
|
||||
prettier: '^3.0.0',
|
||||
typescript: '^5.0.0',
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'.prettierrc': JSON.stringify(
|
||||
{
|
||||
semi: true,
|
||||
singleQuote: true,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'src/app.ts': `
|
||||
export function main() {
|
||||
const data={ name:'test',
|
||||
val:123
|
||||
}
|
||||
console.log(data)
|
||||
}
|
||||
`,
|
||||
},
|
||||
prompt:
|
||||
'Fix the formatting errors in this project. Make sure to avoid interactive commands.',
|
||||
assert: async (rig) => {
|
||||
// Check if run_shell_command was used with --write
|
||||
const toolCalls = rig.readToolLogs();
|
||||
const shellCommands = toolCalls.filter(
|
||||
(call) => call.toolRequest.name === 'run_shell_command',
|
||||
);
|
||||
|
||||
const hasFixCommand = shellCommands.some((call) => {
|
||||
let args = call.toolRequest.args;
|
||||
if (typeof args === 'string') {
|
||||
try {
|
||||
args = JSON.parse(args);
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const cmd = (args as any)['command'];
|
||||
return (
|
||||
cmd &&
|
||||
cmd.includes('prettier') &&
|
||||
(cmd.includes('--write') || cmd.includes('-w'))
|
||||
);
|
||||
});
|
||||
|
||||
expect(
|
||||
hasFixCommand,
|
||||
'Expected agent to use prettier --write via run_shell_command',
|
||||
).toBe(true);
|
||||
},
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user