mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-22 19:14:33 -07:00
feat(core): implement linear semantic topic grouping system
Introduces a 'create_new_topic' tool to organize agent work into logical chapters in the UI. Key changes: - Added TopicManager singleton to track active session phase. - Updated Scheduler to prioritize topic markers at the start of batches. - Injected active topic context into prompt footers for model alignment. - Removed 'Explain Before Acting' mandate in favor of semantic headers. - Added behavioral eval to ensure model compliance with grouping.
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { CREATE_NEW_TOPIC_TOOL_NAME } from '@google/gemini-cli-core';
|
||||
import { evalTest, assertModelHasOutput } from './test-helper.js';
|
||||
|
||||
describe('topic_grouping', () => {
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should use create_new_topic to mark major phase transitions',
|
||||
prompt:
|
||||
'I want you to fix a bug in src/utils.js. First, read the file to understand the bug, then research if there are any related tests, and finally fix it. Use create_new_topic to organize your work into logical chapters (e.g., Researching, Fixing).',
|
||||
files: {
|
||||
'src/utils.js':
|
||||
'export function add(a, b) { return a - b; } // BUG: should be +',
|
||||
'tests/utils.test.js': '// test file',
|
||||
},
|
||||
assert: async (rig, result) => {
|
||||
// Expect at least two topic changes: one for research, one for fixing
|
||||
const toolLogs = rig.readToolLogs();
|
||||
const topicCalls = toolLogs.filter(
|
||||
(log) => log.toolRequest.name === CREATE_NEW_TOPIC_TOOL_NAME,
|
||||
);
|
||||
|
||||
expect(
|
||||
topicCalls.length,
|
||||
`Expected at least 2 topic calls, but got ${topicCalls.length}`,
|
||||
).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Verify that the topics are distinct and descriptive
|
||||
const titles = topicCalls.map((call) => {
|
||||
const args = JSON.parse(call.toolRequest.args);
|
||||
console.log('Topic call args:', args);
|
||||
const title = args.title || '';
|
||||
return title.toLowerCase();
|
||||
});
|
||||
|
||||
console.log('Observed topic titles:', titles);
|
||||
|
||||
const hasResearch = titles.some(
|
||||
(t) =>
|
||||
t.includes('research') ||
|
||||
t.includes('analyz') ||
|
||||
t.includes('understand'),
|
||||
);
|
||||
const hasFix = titles.some(
|
||||
(t) =>
|
||||
t.includes('fix') || t.includes('implement') || t.includes('apply'),
|
||||
);
|
||||
|
||||
expect(
|
||||
hasResearch,
|
||||
'Should have a topic call for research/analysis',
|
||||
).toBe(true);
|
||||
expect(hasFix, 'Should have a topic call for fixing/implementation').toBe(
|
||||
true,
|
||||
);
|
||||
|
||||
assertModelHasOutput(result);
|
||||
},
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
name: 'should sort create_new_topic to the top of the turn',
|
||||
prompt:
|
||||
'Immediately start by creating a new topic called "Deep Research" and then list the contents of the current directory. Do both in the same turn if possible.',
|
||||
assert: async (rig, result) => {
|
||||
// In the same turn, create_new_topic should be sorted to index 0
|
||||
const toolLogs = rig.readToolLogs();
|
||||
|
||||
// We look for a turn where both were called
|
||||
// Since it's a simple prompt, they should both be in the first turn
|
||||
expect(toolLogs.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// The first tool call in the logs should be create_new_topic
|
||||
// if they were executed in the same batch.
|
||||
expect(toolLogs[0].toolRequest.name).toBe(CREATE_NEW_TOPIC_TOOL_NAME);
|
||||
|
||||
assertModelHasOutput(result);
|
||||
},
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user