mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-15 06:12:50 -07:00
feat(core): discourage update topic tool for simple tasks (#24640)
Co-authored-by: Samee Zahid <sameez@google.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -5,6 +5,8 @@
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { evalTest } from './test-helper.js';
|
||||
|
||||
describe('update_topic_behavior', () => {
|
||||
@@ -113,4 +115,104 @@ describe('update_topic_behavior', () => {
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'update_topic should NOT be used for informational coding tasks (Obvious)',
|
||||
approvalMode: 'default',
|
||||
prompt:
|
||||
'Explain the difference between Map and Object in JavaScript and provide a performance-focused code snippet for each.',
|
||||
files: {
|
||||
'.gemini/settings.json': JSON.stringify({
|
||||
experimental: {
|
||||
topicUpdateNarration: true,
|
||||
},
|
||||
}),
|
||||
},
|
||||
assert: async (rig) => {
|
||||
const toolLogs = rig.readToolLogs();
|
||||
const topicCalls = toolLogs.filter(
|
||||
(l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME,
|
||||
);
|
||||
|
||||
expect(
|
||||
topicCalls.length,
|
||||
`Expected 0 update_topic calls for an informational task, but found ${topicCalls.length}`,
|
||||
).toBe(0);
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'update_topic should NOT be used for surgical symbol searches (Grey Area)',
|
||||
approvalMode: 'default',
|
||||
prompt:
|
||||
"Find the file where the 'UPDATE_TOPIC_TOOL_NAME' constant is defined.",
|
||||
files: {
|
||||
'packages/core/src/tools/tool-names.ts':
|
||||
"export const UPDATE_TOPIC_TOOL_NAME = 'update_topic';",
|
||||
'.gemini/settings.json': JSON.stringify({
|
||||
experimental: {
|
||||
topicUpdateNarration: true,
|
||||
},
|
||||
}),
|
||||
},
|
||||
assert: async (rig) => {
|
||||
const toolLogs = rig.readToolLogs();
|
||||
const topicCalls = toolLogs.filter(
|
||||
(l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME,
|
||||
);
|
||||
|
||||
expect(
|
||||
topicCalls.length,
|
||||
`Expected 0 update_topic calls for a surgical symbol search, but found ${topicCalls.length}`,
|
||||
).toBe(0);
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'update_topic should be used for medium complexity multi-step tasks',
|
||||
prompt:
|
||||
'Refactor the `users-api` project. Move the routing logic from src/app.ts into a new file src/routes.ts, and update app.ts to use the new routes file.',
|
||||
files: {
|
||||
'package.json': JSON.stringify(
|
||||
{
|
||||
name: 'users-api',
|
||||
version: '1.0.0',
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'src/app.ts': `
|
||||
import express from 'express';
|
||||
const app = express();
|
||||
|
||||
app.get('/users', (req, res) => {
|
||||
res.json([{id: 1, name: 'Alice'}]);
|
||||
});
|
||||
|
||||
app.post('/users', (req, res) => {
|
||||
res.status(201).send();
|
||||
});
|
||||
|
||||
export default app;
|
||||
`,
|
||||
'.gemini/settings.json': JSON.stringify({
|
||||
experimental: {
|
||||
topicUpdateNarration: true,
|
||||
},
|
||||
}),
|
||||
},
|
||||
assert: async (rig) => {
|
||||
const toolLogs = rig.readToolLogs();
|
||||
const topicCalls = toolLogs.filter(
|
||||
(l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME,
|
||||
);
|
||||
|
||||
// This is a multi-step task (read, create new file, edit old file).
|
||||
// It should clear the bar and use update_topic at least at the start and end.
|
||||
expect(topicCalls.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Verify it actually did the refactoring to ensure it didn't just fail immediately
|
||||
expect(fs.existsSync(path.join(rig.testDir, 'src/routes.ts'))).toBe(true);
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user