feat(core): discourage update topic tool for simple tasks (#24640)

Co-authored-by: Samee Zahid <sameez@google.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Samee Zahid
2026-04-03 16:52:24 -07:00
committed by GitHub
parent e671ffedbd
commit 9e21fad52e
3 changed files with 106 additions and 4 deletions
+102
View File
@@ -5,6 +5,8 @@
*/
import { describe, expect } from 'vitest';
import fs from 'node:fs';
import path from 'node:path';
import { evalTest } from './test-helper.js';
describe('update_topic_behavior', () => {
@@ -113,4 +115,104 @@ describe('update_topic_behavior', () => {
}
},
});
evalTest('USUALLY_PASSES', {
name: 'update_topic should NOT be used for informational coding tasks (Obvious)',
approvalMode: 'default',
prompt:
'Explain the difference between Map and Object in JavaScript and provide a performance-focused code snippet for each.',
files: {
'.gemini/settings.json': JSON.stringify({
experimental: {
topicUpdateNarration: true,
},
}),
},
assert: async (rig) => {
const toolLogs = rig.readToolLogs();
const topicCalls = toolLogs.filter(
(l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME,
);
expect(
topicCalls.length,
`Expected 0 update_topic calls for an informational task, but found ${topicCalls.length}`,
).toBe(0);
},
});
evalTest('USUALLY_PASSES', {
name: 'update_topic should NOT be used for surgical symbol searches (Grey Area)',
approvalMode: 'default',
prompt:
"Find the file where the 'UPDATE_TOPIC_TOOL_NAME' constant is defined.",
files: {
'packages/core/src/tools/tool-names.ts':
"export const UPDATE_TOPIC_TOOL_NAME = 'update_topic';",
'.gemini/settings.json': JSON.stringify({
experimental: {
topicUpdateNarration: true,
},
}),
},
assert: async (rig) => {
const toolLogs = rig.readToolLogs();
const topicCalls = toolLogs.filter(
(l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME,
);
expect(
topicCalls.length,
`Expected 0 update_topic calls for a surgical symbol search, but found ${topicCalls.length}`,
).toBe(0);
},
});
evalTest('USUALLY_PASSES', {
name: 'update_topic should be used for medium complexity multi-step tasks',
prompt:
'Refactor the `users-api` project. Move the routing logic from src/app.ts into a new file src/routes.ts, and update app.ts to use the new routes file.',
files: {
'package.json': JSON.stringify(
{
name: 'users-api',
version: '1.0.0',
},
null,
2,
),
'src/app.ts': `
import express from 'express';
const app = express();
app.get('/users', (req, res) => {
res.json([{id: 1, name: 'Alice'}]);
});
app.post('/users', (req, res) => {
res.status(201).send();
});
export default app;
`,
'.gemini/settings.json': JSON.stringify({
experimental: {
topicUpdateNarration: true,
},
}),
},
assert: async (rig) => {
const toolLogs = rig.readToolLogs();
const topicCalls = toolLogs.filter(
(l) => l.toolRequest.name === UPDATE_TOPIC_TOOL_NAME,
);
// This is a multi-step task (read, create new file, edit old file).
// It should clear the bar and use update_topic at least at the start and end.
expect(topicCalls.length).toBeGreaterThanOrEqual(2);
// Verify it actually did the refactoring to ensure it didn't just fail immediately
expect(fs.existsSync(path.join(rig.testDir, 'src/routes.ts'))).toBe(true);
},
});
});