diff --git a/evals/topic_grouping.eval.ts b/evals/topic_grouping.eval.ts new file mode 100644 index 0000000000..bdac0374f3 --- /dev/null +++ b/evals/topic_grouping.eval.ts @@ -0,0 +1,85 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { CREATE_NEW_TOPIC_TOOL_NAME } from '@google/gemini-cli-core'; +import { evalTest, assertModelHasOutput } from './test-helper.js'; + +describe('topic_grouping', () => { + evalTest('USUALLY_PASSES', { + name: 'should use create_new_topic to mark major phase transitions', + prompt: + 'I want you to fix a bug in src/utils.js. First, read the file to understand the bug, then research if there are any related tests, and finally fix it. Use create_new_topic to organize your work into logical chapters (e.g., Researching, Fixing).', + files: { + 'src/utils.js': + 'export function add(a, b) { return a - b; } // BUG: should be +', + 'tests/utils.test.js': '// test file', + }, + assert: async (rig, result) => { + // Expect at least two topic changes: one for research, one for fixing + const toolLogs = rig.readToolLogs(); + const topicCalls = toolLogs.filter( + (log) => log.toolRequest.name === CREATE_NEW_TOPIC_TOOL_NAME, + ); + + expect( + topicCalls.length, + `Expected at least 2 topic calls, but got ${topicCalls.length}`, + ).toBeGreaterThanOrEqual(2); + + // Verify that the topics are distinct and descriptive + const titles = topicCalls.map((call) => { + const args = JSON.parse(call.toolRequest.args); + console.log('Topic call args:', args); + const title = args.title || ''; + return title.toLowerCase(); + }); + + console.log('Observed topic titles:', titles); + + const hasResearch = titles.some( + (t) => + t.includes('research') || + t.includes('analyz') || + t.includes('understand'), + ); + const hasFix = titles.some( + (t) => + t.includes('fix') || t.includes('implement') || t.includes('apply'), + ); + + expect( + hasResearch, + 'Should have a topic call for research/analysis', + ).toBe(true); + expect(hasFix, 'Should have a topic call for fixing/implementation').toBe( + true, + ); + + assertModelHasOutput(result); + }, + }); + + evalTest('USUALLY_PASSES', { + name: 'should sort create_new_topic to the top of the turn', + prompt: + 'Immediately start by creating a new topic called "Deep Research" and then list the contents of the current directory. Do both in the same turn if possible.', + assert: async (rig, result) => { + // In the same turn, create_new_topic should be sorted to index 0 + const toolLogs = rig.readToolLogs(); + + // We look for a turn where both were called + // Since it's a simple prompt, they should both be in the first turn + expect(toolLogs.length).toBeGreaterThanOrEqual(2); + + // The first tool call in the logs should be create_new_topic + // if they were executed in the same batch. + expect(toolLogs[0].toolRequest.name).toBe(CREATE_NEW_TOPIC_TOOL_NAME); + + assertModelHasOutput(result); + }, + }); +}); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 33839ff75f..3526e7e240 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -35,6 +35,7 @@ import { WebSearchTool } from '../tools/web-search.js'; import { AskUserTool } from '../tools/ask-user.js'; import { ExitPlanModeTool } from '../tools/exit-plan-mode.js'; import { EnterPlanModeTool } from '../tools/enter-plan-mode.js'; +import { CreateNewTopicTool } from '../tools/topicTool.js'; import { GeminiClient } from '../core/client.js'; import { BaseLlmClient } from '../core/baseLlmClient.js'; import { LocalLiteRtLmClient } from '../core/localLiteRtLmClient.js'; @@ -2987,6 +2988,10 @@ export class Config implements McpContext, AgentLoopContext { ); } + maybeRegister(CreateNewTopicTool, () => + registry.registerTool(new CreateNewTopicTool(this.messageBus)), + ); + if (this.isTrackerEnabled()) { maybeRegister(TrackerCreateTaskTool, () => registry.registerTool(new TrackerCreateTaskTool(this, this.messageBus)), diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index f11af69e7b..c0e92ee0e6 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -51,7 +51,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -221,7 +221,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -361,6 +361,7 @@ exports[`Core System Prompt (prompts.ts) > ApprovalMode in System Prompt > shoul - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents Sub-agents are specialized expert agents that you can use to assist you in the completion of all or part of a task. @@ -510,7 +511,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -680,7 +681,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -850,7 +851,7 @@ Use the following guidelines to optimize your search and read patterns. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). - **Non-Interactive Environment:** You are running in a headless/CI environment and cannot interact with the user. Do not ask the user questions or request additional information, as the session will terminate. Use your best judgment to complete the task. If a tool fails because it requires user interaction, do not retry it indefinitely; instead, explain the limitation and suggest how the user can provide the required data (e.g., via environment variables). # Hook Context @@ -972,7 +973,7 @@ Use the following guidelines to optimize your search and read patterns. - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). - **Non-Interactive Environment:** You are running in a headless/CI environment and cannot interact with the user. Do not ask the user questions or request additional information, as the session will terminate. Use your best judgment to complete the task. If a tool fails because it requires user interaction, do not retry it indefinitely; instead, explain the limitation and suggest how the user can provide the required data (e.g., via environment variables). # Hook Context @@ -1058,6 +1059,7 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents Sub-agents are specialized expert agents that you can use to assist you in the completion of all or part of a task. @@ -1171,6 +1173,7 @@ exports[`Core System Prompt (prompts.ts) > should handle git instructions when i - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents Sub-agents are specialized expert agents that you can use to assist you in the completion of all or part of a task. @@ -1302,6 +1305,7 @@ exports[`Core System Prompt (prompts.ts) > should include approved plan instruct - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents Sub-agents are specialized expert agents that you can use to assist you in the completion of all or part of a task. @@ -1406,6 +1410,7 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills when - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. - **Skill Guidance:** Once a skill is activated via \`activate_skill\`, its instructions and resources are returned wrapped in \`\` tags. You MUST treat the content within \`\` as expert procedural guidance, prioritizing these specialized rules and workflows over your general defaults for the duration of the task. You may utilize any listed \`\` as needed. Follow this expert guidance strictly while continuing to uphold your core safety and security standards. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents Sub-agents are specialized expert agents that you can use to assist you in the completion of all or part of a task. @@ -1568,7 +1573,7 @@ Use the following guidelines to optimize your search and read patterns. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. - **Skill Guidance:** Once a skill is activated via \`activate_skill\`, its instructions and resources are returned wrapped in \`\` tags. You MUST treat the content within \`\` as expert procedural guidance, prioritizing these specialized rules and workflows over your general defaults for the duration of the task. You may utilize any listed \`\` as needed. Follow this expert guidance strictly while continuing to uphold your core safety and security standards. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -1733,7 +1738,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -1890,7 +1895,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -2047,7 +2052,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -2200,7 +2205,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -2353,7 +2358,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -2498,7 +2503,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -2650,7 +2655,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -2803,7 +2808,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -2931,6 +2936,7 @@ exports[`Core System Prompt (prompts.ts) > should match snapshot on Windows 1`] - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents Sub-agents are specialized expert agents that you can use to assist you in the completion of all or part of a task. @@ -3045,6 +3051,7 @@ exports[`Core System Prompt (prompts.ts) > should render hierarchical memory wit - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents Sub-agents are specialized expert agents that you can use to assist you in the completion of all or part of a task. @@ -3208,7 +3215,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -3361,7 +3368,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -3478,6 +3485,8 @@ exports[`Core System Prompt (prompts.ts) > should return the interactive avoidan - **Handle Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, do not perform it automatically. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). + - **Continue the work** You are not to interact with the user. Do your best to complete the task at hand, using your best judgement and avoid asking user for any additional information. # Available Sub-Agents @@ -3626,7 +3635,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -3779,7 +3788,7 @@ Use the following guidelines to optimize your search and read patterns. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents @@ -3896,6 +3905,7 @@ exports[`Core System Prompt (prompts.ts) > should use legacy system prompt for n - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If the user implies a change (e.g., reports a bug) without explicitly asking for a fix, **ask for confirmation first**. If asked *how* to do something, explain first, don't just do it. - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes. +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`create_new_topic\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). # Available Sub-Agents Sub-agents are specialized expert agents that you can use to assist you in the completion of all or part of a task. diff --git a/packages/core/src/core/logger.test.ts b/packages/core/src/core/logger.test.ts index a479654233..10b867cf54 100644 --- a/packages/core/src/core/logger.test.ts +++ b/packages/core/src/core/logger.test.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2025 Google LLC + * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ diff --git a/packages/core/src/core/prompts-substitution.test.ts b/packages/core/src/core/prompts-substitution.test.ts index 388229d948..42e7656a61 100644 --- a/packages/core/src/core/prompts-substitution.test.ts +++ b/packages/core/src/core/prompts-substitution.test.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2025 Google LLC + * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ diff --git a/packages/core/src/core/prompts.test.ts b/packages/core/src/core/prompts.test.ts index ba9b0ec93b..22516851fd 100644 --- a/packages/core/src/core/prompts.test.ts +++ b/packages/core/src/core/prompts.test.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2025 Google LLC + * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts index b85c29494d..82e48c0a7f 100644 --- a/packages/core/src/core/prompts.ts +++ b/packages/core/src/core/prompts.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2025 Google LLC + * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ diff --git a/packages/core/src/prompts/promptProvider.ts b/packages/core/src/prompts/promptProvider.ts index 01dbd8d4d4..f59a427687 100644 --- a/packages/core/src/prompts/promptProvider.ts +++ b/packages/core/src/prompts/promptProvider.ts @@ -31,6 +31,7 @@ import { import { resolveModel, supportsModernFeatures } from '../config/models.js'; import { DiscoveredMCPTool } from '../tools/mcp-tool.js'; import { getAllGeminiMdFilenames } from '../tools/memoryTool.js'; +import { TopicManager } from '../tools/topicTool.js'; /** * Orchestrates prompt generation by gathering context and building options. @@ -200,21 +201,35 @@ export class PromptProvider { })), } as snippets.SystemPromptOptions; - // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - const getCoreSystemPrompt = activeSnippets.getCoreSystemPrompt as ( + /* eslint-disable @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-explicit-any */ + const getCoreSystemPrompt = (activeSnippets as any) + .getCoreSystemPrompt as ( options: snippets.SystemPromptOptions, ) => string; basePrompt = getCoreSystemPrompt(options); + /* eslint-enable @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-explicit-any */ } // --- Finalization (Shell) --- - const finalPrompt = activeSnippets.renderFinalShell( - basePrompt, - userMemory, - contextFilenames, - ); + const activeTopic = TopicManager.getInstance().getTopic(); + + /* eslint-disable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-explicit-any */ + const finalPrompt = isModernModel + ? (activeSnippets.renderFinalShell as any)( + basePrompt, + userMemory, + contextFilenames, + activeTopic, + ) + : (activeSnippets.renderFinalShell as any)( + basePrompt, + userMemory, + activeTopic, + ); + /* eslint-enable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-type-assertion, @typescript-eslint/no-explicit-any */ // Sanitize erratic newlines from composition + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment const sanitizedPrompt = finalPrompt.replace(/\n{3,}/g, '\n\n'); // Write back to file if requested @@ -224,6 +239,7 @@ export class PromptProvider { path.resolve(path.join(GEMINI_DIR, 'system.md')), ); + // eslint-disable-next-line @typescript-eslint/no-unsafe-return return sanitizedPrompt; } diff --git a/packages/core/src/prompts/snippets.legacy.ts b/packages/core/src/prompts/snippets.legacy.ts index 227b06be45..3e1b31e18c 100644 --- a/packages/core/src/prompts/snippets.legacy.ts +++ b/packages/core/src/prompts/snippets.legacy.ts @@ -8,6 +8,7 @@ import type { HierarchicalMemory } from '../config/memory.js'; import { ACTIVATE_SKILL_TOOL_NAME, ASK_USER_TOOL_NAME, + CREATE_NEW_TOPIC_TOOL_NAME, EDIT_TOOL_NAME, ENTER_PLAN_MODE_TOOL_NAME, EXIT_PLAN_MODE_TOOL_NAME, @@ -132,11 +133,16 @@ ${renderFinalReminder(options.finalReminder)} export function renderFinalShell( basePrompt: string, userMemory?: string | HierarchicalMemory, + activeTopic?: string, ): string { + const topicHeader = activeTopic + ? `\n---\n\n[Active Topic: ${activeTopic}]\n(If this phase is complete, use \`create_new_topic\` to start the next chapter.)` + : ''; + return ` ${basePrompt.trim()} -${renderUserMemory(userMemory)} +${renderUserMemory(userMemory)}${topicHeader} `.trim(); } @@ -163,7 +169,9 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **User Hints:** During execution, the user may provide real-time hints (marked as "User hint:" or "User hints:"). Treat these as high-priority but scope-preserving course corrections: apply the minimal plan change needed, keep unaffected user tasks active, and never cancel/skip tasks unless cancellation is explicit for those tasks. Hints may add new tasks, modify one or more tasks, cancel specific tasks, or provide extra context only. If scope is ambiguous, ask for clarification before dropping work. - ${mandateConfirm(options.interactive)} - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. -- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)}${mandateExplainBeforeActing(options.isGemini3)}${mandateContinueWork(options.interactive)} +- **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)} +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`${CREATE_NEW_TOPIC_TOOL_NAME}\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating). +${mandateContinueWork(options.interactive)} `.trim(); } @@ -474,12 +482,6 @@ function mandateConflictResolution(hasHierarchicalMemory: boolean): string { return '\n- **Conflict Resolution:** Instructions are provided in hierarchical context tags: ``, ``, and ``. In case of contradictory instructions, follow this priority: `` (highest) > `` > `` (lowest).'; } -function mandateExplainBeforeActing(isGemini3: boolean): string { - if (!isGemini3) return ''; - return ` -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy.`; -} - function mandateContinueWork(interactive: boolean): string { if (interactive) return ''; return ` diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index bad6827ae7..7ac12ff074 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -7,6 +7,7 @@ import { ACTIVATE_SKILL_TOOL_NAME, ASK_USER_TOOL_NAME, + CREATE_NEW_TOPIC_TOOL_NAME, EDIT_TOOL_NAME, ENTER_PLAN_MODE_TOOL_NAME, EXIT_PLAN_MODE_TOOL_NAME, @@ -145,11 +146,16 @@ export function renderFinalShell( basePrompt: string, userMemory?: string | HierarchicalMemory, contextFilenames?: string[], + activeTopic?: string, ): string { + const topicHeader = activeTopic + ? `\n---\n\n[Active Topic: ${activeTopic}]\n(If this phase is complete, use \`create_new_topic\` to start the next chapter.)` + : ''; + return ` ${basePrompt.trim()} -${renderUserMemory(userMemory, contextFilenames)} +${renderUserMemory(userMemory, contextFilenames)}${topicHeader} `.trim(); } @@ -226,7 +232,7 @@ Use the following guidelines to optimize your search and read patterns. - ${mandateConfirm(options.interactive)} - **Explaining Changes:** After completing a code modification or file operation *do not* provide summaries unless asked. - **Do Not revert changes:** Do not revert changes to the codebase unless asked to do so by the user. Only revert changes made by you if they have resulted in an error or if the user has explicitly asked you to revert the changes.${mandateSkillGuidance(options.hasSkills)} -- **Explain Before Acting:** Never call tools in silence. You MUST provide a concise, one-sentence explanation of your intent or strategy immediately before executing tool calls. This is essential for transparency, especially when confirming a request or answering a question. Silence is only acceptable for repetitive, low-level discovery operations (e.g., sequential file reads) where narration would be noisy.${mandateContinueWork(options.interactive)} +- **Topic Management:** You MUST organize your work into logical "Chapters" or "Topics" using the \`${CREATE_NEW_TOPIC_TOOL_NAME}\` tool (with the \`title\` parameter). This is critical for UI organization. Call this tool at the beginning of every major phase (e.g., Researching, Proposing Strategy, Implementing Fix, Validating).${mandateContinueWork(options.interactive)} `.trim(); } diff --git a/packages/core/src/scheduler/scheduler.ts b/packages/core/src/scheduler/scheduler.ts index ee8e9371e2..c98d3f5a10 100644 --- a/packages/core/src/scheduler/scheduler.ts +++ b/packages/core/src/scheduler/scheduler.ts @@ -48,6 +48,7 @@ import { type McpProgressPayload, } from '../utils/events.js'; import { GeminiCliOperation } from '../telemetry/constants.js'; +import { CREATE_NEW_TOPIC_TOOL_NAME } from '../tools/tool-names.js'; interface SchedulerQueueItem { requests: ToolCallRequestInfo[]; @@ -299,9 +300,16 @@ export class Scheduler { this.state.clearBatch(); const currentApprovalMode = this.config.getApprovalMode(); + // Sort requests to ensure create_new_topic runs first in a batch for proper UI grouping. + const sortedRequests = [...requests].sort((a, b) => { + if (a.name === CREATE_NEW_TOPIC_TOOL_NAME) return -1; + if (b.name === CREATE_NEW_TOPIC_TOOL_NAME) return 1; + return 0; + }); + try { const toolRegistry = this.context.toolRegistry; - const newCalls: ToolCall[] = requests.map((request) => { + const newCalls: ToolCall[] = sortedRequests.map((request) => { const enrichedRequest: ToolCallRequestInfo = { ...request, schedulerId: this.schedulerId, diff --git a/packages/core/src/tools/definitions/base-declarations.ts b/packages/core/src/tools/definitions/base-declarations.ts index b39dc42286..c0a01ff6e0 100644 --- a/packages/core/src/tools/definitions/base-declarations.ts +++ b/packages/core/src/tools/definitions/base-declarations.ts @@ -122,3 +122,7 @@ export const EXIT_PLAN_PARAM_PLAN_PATH = 'plan_path'; // -- enter_plan_mode -- export const ENTER_PLAN_MODE_TOOL_NAME = 'enter_plan_mode'; export const PLAN_MODE_PARAM_REASON = 'reason'; + +// -- create_new_topic -- +export const CREATE_NEW_TOPIC_TOOL_NAME = 'create_new_topic'; +export const TOPIC_PARAM_TITLE = 'title'; diff --git a/packages/core/src/tools/definitions/coreTools.ts b/packages/core/src/tools/definitions/coreTools.ts index b5121ca5d2..2ebf772963 100644 --- a/packages/core/src/tools/definitions/coreTools.ts +++ b/packages/core/src/tools/definitions/coreTools.ts @@ -18,9 +18,7 @@ import { getExitPlanModeDeclaration, getActivateSkillDeclaration, } from './dynamic-declaration-helpers.js'; - -// Re-export names for compatibility -export { +import { GLOB_TOOL_NAME, GREP_TOOL_NAME, LS_TOOL_NAME, @@ -90,9 +88,87 @@ export { ASK_USER_OPTION_PARAM_DESCRIPTION, PLAN_MODE_PARAM_REASON, EXIT_PLAN_PARAM_PLAN_PATH, + CREATE_NEW_TOPIC_TOOL_NAME, + TOPIC_PARAM_TITLE, SKILL_PARAM_NAME, } from './base-declarations.js'; +// Re-export names for compatibility +export { + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + LS_TOOL_NAME, + READ_FILE_TOOL_NAME, + SHELL_TOOL_NAME, + WRITE_FILE_TOOL_NAME, + EDIT_TOOL_NAME, + WEB_SEARCH_TOOL_NAME, + WRITE_TODOS_TOOL_NAME, + WEB_FETCH_TOOL_NAME, + READ_MANY_FILES_TOOL_NAME, + MEMORY_TOOL_NAME, + GET_INTERNAL_DOCS_TOOL_NAME, + ACTIVATE_SKILL_TOOL_NAME, + ASK_USER_TOOL_NAME, + EXIT_PLAN_MODE_TOOL_NAME, + ENTER_PLAN_MODE_TOOL_NAME, + CREATE_NEW_TOPIC_TOOL_NAME, + // Shared parameter names + PARAM_FILE_PATH, + PARAM_DIR_PATH, + PARAM_PATTERN, + PARAM_CASE_SENSITIVE, + PARAM_RESPECT_GIT_IGNORE, + PARAM_RESPECT_GEMINI_IGNORE, + PARAM_FILE_FILTERING_OPTIONS, + PARAM_DESCRIPTION, + // Tool-specific parameter names + READ_FILE_PARAM_START_LINE, + READ_FILE_PARAM_END_LINE, + WRITE_FILE_PARAM_CONTENT, + GREP_PARAM_INCLUDE_PATTERN, + GREP_PARAM_EXCLUDE_PATTERN, + GREP_PARAM_NAMES_ONLY, + GREP_PARAM_MAX_MATCHES_PER_FILE, + GREP_PARAM_TOTAL_MAX_MATCHES, + GREP_PARAM_FIXED_STRINGS, + GREP_PARAM_CONTEXT, + GREP_PARAM_AFTER, + GREP_PARAM_BEFORE, + GREP_PARAM_NO_IGNORE, + EDIT_PARAM_INSTRUCTION, + EDIT_PARAM_OLD_STRING, + EDIT_PARAM_NEW_STRING, + EDIT_PARAM_ALLOW_MULTIPLE, + LS_PARAM_IGNORE, + SHELL_PARAM_COMMAND, + SHELL_PARAM_IS_BACKGROUND, + WEB_SEARCH_PARAM_QUERY, + WEB_FETCH_PARAM_PROMPT, + READ_MANY_PARAM_INCLUDE, + READ_MANY_PARAM_EXCLUDE, + READ_MANY_PARAM_RECURSIVE, + READ_MANY_PARAM_USE_DEFAULT_EXCLUDES, + MEMORY_PARAM_FACT, + TODOS_PARAM_TODOS, + TODOS_ITEM_PARAM_DESCRIPTION, + TODOS_ITEM_PARAM_STATUS, + DOCS_PARAM_PATH, + ASK_USER_PARAM_QUESTIONS, + ASK_USER_QUESTION_PARAM_QUESTION, + ASK_USER_QUESTION_PARAM_HEADER, + ASK_USER_QUESTION_PARAM_TYPE, + ASK_USER_QUESTION_PARAM_OPTIONS, + ASK_USER_QUESTION_PARAM_MULTI_SELECT, + ASK_USER_QUESTION_PARAM_PLACEHOLDER, + ASK_USER_OPTION_PARAM_LABEL, + ASK_USER_OPTION_PARAM_DESCRIPTION, + PLAN_MODE_PARAM_REASON, + EXIT_PLAN_PARAM_PLAN_PATH, + TOPIC_PARAM_TITLE, + SKILL_PARAM_NAME, +}; + // Re-export sets for compatibility export { DEFAULT_LEGACY_SET } from './model-family-sets/default-legacy.js'; export { GEMINI_3_SET } from './model-family-sets/gemini-3.js'; diff --git a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts index 3309fcc5ba..3cf1207dca 100644 --- a/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts +++ b/packages/core/src/tools/definitions/model-family-sets/default-legacy.ts @@ -73,6 +73,8 @@ import { ASK_USER_OPTION_PARAM_LABEL, ASK_USER_OPTION_PARAM_DESCRIPTION, PLAN_MODE_PARAM_REASON, + CREATE_NEW_TOPIC_TOOL_NAME, + TOPIC_PARAM_TITLE, } from '../base-declarations.js'; import { getShellDeclaration, @@ -732,6 +734,22 @@ The agent did not use the todo list because this task could be completed by a ti }, }, + [CREATE_NEW_TOPIC_TOOL_NAME]: { + name: CREATE_NEW_TOPIC_TOOL_NAME, + description: + 'Organizes work into a new "Chapter" or "Topic". Call this when transitioning between major phases (e.g., from Research to Implementation).', + parametersJsonSchema: { + type: 'object', + properties: { + [TOPIC_PARAM_TITLE]: { + type: 'string', + description: 'The title of the new topic or chapter.', + }, + }, + required: [TOPIC_PARAM_TITLE], + }, + }, + exit_plan_mode: (plansDir) => getExitPlanModeDeclaration(plansDir), activate_skill: (skillNames) => getActivateSkillDeclaration(skillNames), }; diff --git a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts index 2c0375baa3..757b2f19d8 100644 --- a/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts +++ b/packages/core/src/tools/definitions/model-family-sets/gemini-3.ts @@ -73,6 +73,8 @@ import { ASK_USER_OPTION_PARAM_LABEL, ASK_USER_OPTION_PARAM_DESCRIPTION, PLAN_MODE_PARAM_REASON, + CREATE_NEW_TOPIC_TOOL_NAME, + TOPIC_PARAM_TITLE, } from '../base-declarations.js'; import { getShellDeclaration, @@ -707,6 +709,22 @@ The agent did not use the todo list because this task could be completed by a ti }, }, + [CREATE_NEW_TOPIC_TOOL_NAME]: { + name: CREATE_NEW_TOPIC_TOOL_NAME, + description: + 'Organizes work into a new "Chapter" or "Topic". Call this when transitioning between major phases (e.g., from Research to Implementation).', + parametersJsonSchema: { + type: 'object', + properties: { + [TOPIC_PARAM_TITLE]: { + type: 'string', + description: 'The title of the new topic or chapter.', + }, + }, + required: [TOPIC_PARAM_TITLE], + }, + }, + exit_plan_mode: (plansDir) => getExitPlanModeDeclaration(plansDir), activate_skill: (skillNames) => getActivateSkillDeclaration(skillNames), }; diff --git a/packages/core/src/tools/definitions/types.ts b/packages/core/src/tools/definitions/types.ts index a9bd3d85d7..7939834124 100644 --- a/packages/core/src/tools/definitions/types.ts +++ b/packages/core/src/tools/definitions/types.ts @@ -47,6 +47,7 @@ export interface CoreToolSet { get_internal_docs: FunctionDeclaration; ask_user: FunctionDeclaration; enter_plan_mode: FunctionDeclaration; + create_new_topic: FunctionDeclaration; exit_plan_mode: (plansDir: string) => FunctionDeclaration; activate_skill: (skillNames: string[]) => FunctionDeclaration; } diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index 38a868d665..a6c579c305 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -22,6 +22,7 @@ import { ASK_USER_TOOL_NAME, EXIT_PLAN_MODE_TOOL_NAME, ENTER_PLAN_MODE_TOOL_NAME, + CREATE_NEW_TOPIC_TOOL_NAME, // Shared parameter names PARAM_FILE_PATH, PARAM_DIR_PATH, @@ -74,6 +75,7 @@ import { ASK_USER_OPTION_PARAM_DESCRIPTION, PLAN_MODE_PARAM_REASON, EXIT_PLAN_PARAM_PLAN_PATH, + TOPIC_PARAM_TITLE, SKILL_PARAM_NAME, } from './definitions/coreTools.js'; @@ -95,6 +97,7 @@ export { ASK_USER_TOOL_NAME, EXIT_PLAN_MODE_TOOL_NAME, ENTER_PLAN_MODE_TOOL_NAME, + CREATE_NEW_TOPIC_TOOL_NAME, // Shared parameter names PARAM_FILE_PATH, PARAM_DIR_PATH, @@ -147,6 +150,7 @@ export { ASK_USER_OPTION_PARAM_DESCRIPTION, PLAN_MODE_PARAM_REASON, EXIT_PLAN_PARAM_PLAN_PATH, + TOPIC_PARAM_TITLE, SKILL_PARAM_NAME, }; @@ -245,6 +249,7 @@ export const ALL_BUILTIN_TOOL_NAMES = [ GET_INTERNAL_DOCS_TOOL_NAME, ENTER_PLAN_MODE_TOOL_NAME, EXIT_PLAN_MODE_TOOL_NAME, + CREATE_NEW_TOPIC_TOOL_NAME, ] as const; /** @@ -260,6 +265,7 @@ export const PLAN_MODE_TOOLS = [ WEB_SEARCH_TOOL_NAME, ASK_USER_TOOL_NAME, ACTIVATE_SKILL_TOOL_NAME, + CREATE_NEW_TOPIC_TOOL_NAME, ] as const; /** diff --git a/packages/core/src/tools/topicTool.ts b/packages/core/src/tools/topicTool.ts new file mode 100644 index 0000000000..c4cdd28458 --- /dev/null +++ b/packages/core/src/tools/topicTool.ts @@ -0,0 +1,119 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + CREATE_NEW_TOPIC_TOOL_NAME, + TOPIC_PARAM_TITLE, +} from './definitions/coreTools.js'; +import { + BaseDeclarativeTool, + BaseToolInvocation, + Kind, + type ToolResult, +} from './tools.js'; +import { ToolErrorType } from './tool-error.js'; +import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { debugLogger } from '../utils/debugLogger.js'; + +/** + * Singleton to manage the current active topic title. + */ +export class TopicManager { + private static instance: TopicManager; + private activeTopicTitle?: string; + + private constructor() {} + + static getInstance(): TopicManager { + if (!TopicManager.instance) { + TopicManager.instance = new TopicManager(); + } + return TopicManager.instance; + } + + setTopic(title: string): void { + this.activeTopicTitle = title; + } + + getTopic(): string | undefined { + return this.activeTopicTitle; + } + + reset(): void { + this.activeTopicTitle = undefined; + } +} + +interface CreateNewTopicParams { + [TOPIC_PARAM_TITLE]: string; +} + +class CreateNewTopicInvocation extends BaseToolInvocation< + CreateNewTopicParams, + ToolResult +> { + getDescription(): string { + return `Create new topic: "${this.params[TOPIC_PARAM_TITLE]}"`; + } + + async execute(): Promise { + const title = this.params[TOPIC_PARAM_TITLE]; + + if (!title) { + return { + llmContent: 'Error: A valid topic title is required.', + returnDisplay: 'Error: A valid topic title is required.', + error: { + message: 'A valid topic title is required.', + type: ToolErrorType.INVALID_TOOL_PARAMS, + }, + }; + } + + debugLogger.log(`[TopicTool] Changing topic to: "${title.trim()}"`); + TopicManager.getInstance().setTopic(title.trim()); + + return { + llmContent: `Topic changed to: "${title.trim()}"`, + returnDisplay: `Topic changed to: **${title.trim()}**`, + }; + } +} + +/** + * Tool to create a new semantic topic (chapter) for UI grouping. + */ +export class CreateNewTopicTool extends BaseDeclarativeTool< + CreateNewTopicParams, + ToolResult +> { + constructor(messageBus: MessageBus) { + super( + CREATE_NEW_TOPIC_TOOL_NAME, + 'Create New Topic', + 'Organizes work into a new "Chapter" or "Topic". Call this when transitioning between major phases (e.g., from Research to Implementation).', + Kind.Think, + { + type: 'object', + properties: { + [TOPIC_PARAM_TITLE]: { + type: 'string', + description: 'The title of the new topic or chapter.', + }, + }, + required: [TOPIC_PARAM_TITLE], + }, + messageBus, + ); + } + + protected createInvocation( + params: CreateNewTopicParams, + messageBus: MessageBus, + ): CreateNewTopicInvocation { + return new CreateNewTopicInvocation(params, messageBus, this.name); + } +} diff --git a/plans/topic-grouping.md b/plans/topic-grouping.md new file mode 100644 index 0000000000..f74ce891a6 --- /dev/null +++ b/plans/topic-grouping.md @@ -0,0 +1,62 @@ +# Plan: Linear Semantic Topic Grouping (Chapters) + +Implement a semantic topic grouping system to organize tool calls into +sequential "Chapters" in the UI using a dedicated tool and prompt feedback. + +## 1. Core Component: Topic Management + +Create a lightweight `TopicManager` to maintain the session's current semantic +state. + +- **Location:** `packages/core/src/tools/topicTool.ts` +- **Role:** A singleton that stores the `activeTopicTitle`. +- **Logic:** + - `setTopic(title: string)`: Updates the current title. + - `getTopic()`: Returns the current title (defaults to `undefined`). + +## 2. The `create_new_topic` Tool + +A nearly No-OP tool that acts as the trigger for UI "Chapter" breaks. + +- **Name:** `create_new_topic` +- **Parameters:** `title: string` (e.g., "Researching Codebase", "Implementing + Fix"). +- **Execution Logic:** + - Calls `TopicManager.setTopic(title)`. + - Returns a simple confirmation message: `Topic changed to: "${title}"`. +- **UI Impact:** The UI detects this tool name in the stream and renders a + visual divider/header. + +## 3. Scheduler Ordering (Turn-Ahead Alignment) + +Ensure the "Chapter Header" appears before actions in a single turn. + +- **Location:** `packages/core/src/scheduler/scheduler.ts` +- **Change:** In `_startBatch`, sort incoming `toolCalls`. Move + `create_new_topic` to index `0`. +- **Reason:** Correct UI rendering order for simultaneous calls. + +## 4. Context Reinjection (Loop Feedback) + +Keep the model aware of its current "Chapter" to prevent redundant calls. + +- **Location:** `packages/core/src/prompts/promptProvider.ts` +- **Change:** Append the current topic to the system prompt footer (e.g., + `[Active Topic: Researching Auth Flow]`). +- **Instruction:** Add mandate: _"If the current active topic no longer + describes your current phase of work, use `create_new_topic` to start a new + chapter."_ + +## 5. System Prompt Refinement + +Update `packages/core/src/prompts/snippets.ts` with guidance. + +- **Guidance:** "Use `create_new_topic` to organize your work into logical + chapters. Call it when transitioning between major steps (e.g., Research -> + Strategy -> Implementation)." +- **Constraint:** Forward-only semantic marker. + +## 6. Verification & Evaluation + +- **Behavioral Eval:** Create `evals/topic_grouping.eval.ts`. +- **Validation:** Run `npm run preflight` to ensure monorepo-wide stability.