mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-03-28 15:01:14 -07:00
Introduces a 'create_new_topic' tool to organize agent work into logical chapters in the UI. Key changes: - Added TopicManager singleton to track active session phase. - Updated Scheduler to prioritize topic markers at the start of batches. - Injected active topic context into prompt footers for model alignment. - Removed 'Explain Before Acting' mandate in favor of semantic headers. - Added behavioral eval to ensure model compliance with grouping.
86 lines
3.0 KiB
TypeScript
86 lines
3.0 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, expect } from 'vitest';
|
|
import { CREATE_NEW_TOPIC_TOOL_NAME } from '@google/gemini-cli-core';
|
|
import { evalTest, assertModelHasOutput } from './test-helper.js';
|
|
|
|
describe('topic_grouping', () => {
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should use create_new_topic to mark major phase transitions',
|
|
prompt:
|
|
'I want you to fix a bug in src/utils.js. First, read the file to understand the bug, then research if there are any related tests, and finally fix it. Use create_new_topic to organize your work into logical chapters (e.g., Researching, Fixing).',
|
|
files: {
|
|
'src/utils.js':
|
|
'export function add(a, b) { return a - b; } // BUG: should be +',
|
|
'tests/utils.test.js': '// test file',
|
|
},
|
|
assert: async (rig, result) => {
|
|
// Expect at least two topic changes: one for research, one for fixing
|
|
const toolLogs = rig.readToolLogs();
|
|
const topicCalls = toolLogs.filter(
|
|
(log) => log.toolRequest.name === CREATE_NEW_TOPIC_TOOL_NAME,
|
|
);
|
|
|
|
expect(
|
|
topicCalls.length,
|
|
`Expected at least 2 topic calls, but got ${topicCalls.length}`,
|
|
).toBeGreaterThanOrEqual(2);
|
|
|
|
// Verify that the topics are distinct and descriptive
|
|
const titles = topicCalls.map((call) => {
|
|
const args = JSON.parse(call.toolRequest.args);
|
|
console.log('Topic call args:', args);
|
|
const title = args.title || '';
|
|
return title.toLowerCase();
|
|
});
|
|
|
|
console.log('Observed topic titles:', titles);
|
|
|
|
const hasResearch = titles.some(
|
|
(t) =>
|
|
t.includes('research') ||
|
|
t.includes('analyz') ||
|
|
t.includes('understand'),
|
|
);
|
|
const hasFix = titles.some(
|
|
(t) =>
|
|
t.includes('fix') || t.includes('implement') || t.includes('apply'),
|
|
);
|
|
|
|
expect(
|
|
hasResearch,
|
|
'Should have a topic call for research/analysis',
|
|
).toBe(true);
|
|
expect(hasFix, 'Should have a topic call for fixing/implementation').toBe(
|
|
true,
|
|
);
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should sort create_new_topic to the top of the turn',
|
|
prompt:
|
|
'Immediately start by creating a new topic called "Deep Research" and then list the contents of the current directory. Do both in the same turn if possible.',
|
|
assert: async (rig, result) => {
|
|
// In the same turn, create_new_topic should be sorted to index 0
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
// We look for a turn where both were called
|
|
// Since it's a simple prompt, they should both be in the first turn
|
|
expect(toolLogs.length).toBeGreaterThanOrEqual(2);
|
|
|
|
// The first tool call in the logs should be create_new_topic
|
|
// if they were executed in the same batch.
|
|
expect(toolLogs[0].toolRequest.name).toBe(CREATE_NEW_TOPIC_TOOL_NAME);
|
|
|
|
assertModelHasOutput(result);
|
|
},
|
|
});
|
|
});
|