diff --git a/evals/generalist_delegation.eval.ts b/evals/generalist_delegation.eval.ts new file mode 100644 index 0000000000..7e6358ae1f --- /dev/null +++ b/evals/generalist_delegation.eval.ts @@ -0,0 +1,165 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { appEvalTest } from './app-test-helper.js'; + +describe('generalist_delegation', () => { + // --- Positive Evals (Should Delegate) --- + + appEvalTest('USUALLY_PASSES', { + name: 'should delegate batch error fixing to generalist agent', + configOverrides: { + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + experimental: { + enableAgents: true, + }, + excludeTools: ['run_shell_command'], + }, + files: { + 'file1.ts': 'console.log("no semi")', + 'file2.ts': 'console.log("no semi")', + 'file3.ts': 'console.log("no semi")', + 'file4.ts': 'console.log("no semi")', + 'file5.ts': 'console.log("no semi")', + 'file6.ts': 'console.log("no semi")', + 'file7.ts': 'console.log("no semi")', + 'file8.ts': 'console.log("no semi")', + 'file9.ts': 'console.log("no semi")', + 'file10.ts': 'console.log("no semi")', + }, + prompt: + 'I have 10 files (file1.ts to file10.ts) that are missing semicolons. Can you fix them?', + setup: async (rig) => { + rig.setBreakpoint(['generalist']); + }, + assert: async (rig) => { + const confirmation = await rig.waitForPendingConfirmation( + 'generalist', + 60000, + ); + expect( + confirmation, + 'Expected a tool call for generalist agent', + ).toBeTruthy(); + await rig.resolveTool(confirmation); + await rig.waitForIdle(60000); + }, + }); + + appEvalTest('USUALLY_PASSES', { + name: 'should autonomously delegate complex batch task to generalist agent', + configOverrides: { + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + experimental: { + enableAgents: true, + }, + excludeTools: ['run_shell_command'], + }, + files: { + 'src/a.ts': 'export const a = 1;', + 'src/b.ts': 'export const b = 2;', + 'src/c.ts': 'export const c = 3;', + 'src/d.ts': 'export const d = 4;', + 'src/e.ts': 'export const e = 5;', + }, + prompt: + 'Please update all files in the src directory. For each file, add a comment at the top that says "Processed by Gemini".', + setup: async (rig) => { + rig.setBreakpoint(['generalist']); + }, + assert: async (rig) => { + const confirmation = await rig.waitForPendingConfirmation( + 'generalist', + 60000, + ); + expect( + confirmation, + 'Expected autonomously delegate to generalist for batch task', + ).toBeTruthy(); + await rig.resolveTool(confirmation); + await rig.waitForIdle(60000); + }, + }); + + // --- Negative Evals (Should NOT Delegate - Assertive Handling) --- + + appEvalTest('USUALLY_PASSES', { + name: 'should NOT delegate simple read and fix to generalist agent', + configOverrides: { + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + experimental: { + enableAgents: true, + }, + excludeTools: ['run_shell_command'], + }, + files: { + 'README.md': 'This is a proyect.', + }, + prompt: + 'There is a typo in README.md ("proyect"). Please fix it to "project".', + setup: async (rig) => { + // Break on everything to see what it calls + rig.setBreakpoint(['*']); + }, + assert: async (rig) => { + await rig.drainBreakpointsUntilIdle((confirmation) => { + expect( + confirmation.toolName, + `Agent should NOT have delegated to generalist.`, + ).not.toBe('generalist'); + }); + + const output = rig.getStaticOutput(); + expect(output).toMatch(/project/i); + }, + }); + + appEvalTest('USUALLY_PASSES', { + name: 'should NOT delegate simple direct question to generalist agent', + configOverrides: { + agents: { + overrides: { + generalist: { enabled: true }, + }, + }, + experimental: { + enableAgents: true, + }, + excludeTools: ['run_shell_command'], + }, + files: { + 'src/VERSION': '1.2.3', + }, + prompt: 'Can you tell me the version number in the src folder?', + setup: async (rig) => { + rig.setBreakpoint(['*']); + }, + assert: async (rig) => { + await rig.drainBreakpointsUntilIdle((confirmation) => { + expect( + confirmation.toolName, + `Agent should NOT have delegated to generalist.`, + ).not.toBe('generalist'); + }); + + const output = rig.getStaticOutput(); + expect(output).toMatch(/1\.2\.3/); + }, + }); +}); diff --git a/packages/cli/src/test-utils/AppRig.tsx b/packages/cli/src/test-utils/AppRig.tsx index 018ce1502b..d953be0ff6 100644 --- a/packages/cli/src/test-utils/AppRig.tsx +++ b/packages/cli/src/test-utils/AppRig.tsx @@ -29,6 +29,7 @@ import { createContentGenerator, IdeClient, debugLogger, + CoreToolCallStatus, } from '@google/gemini-cli-core'; import { type MockShellCommand, @@ -36,7 +37,47 @@ import { } from './MockShellExecutionService.js'; import { createMockSettings } from './settings.js'; import { type LoadedSettings } from '../config/settings.js'; -import { AuthState } from '../ui/types.js'; +import { AuthState, StreamingState } from '../ui/types.js'; +import { randomUUID } from 'node:crypto'; +import type { + TrackedCancelledToolCall, + TrackedCompletedToolCall, + TrackedToolCall, +} from '../ui/hooks/useToolScheduler.js'; + +// Global state observer for React-based signals +const sessionStateMap = new Map(); +const activeRigs = new Map(); + +// Mock StreamingContext to report state changes back to the observer +vi.mock('../ui/contexts/StreamingContext.js', async (importOriginal) => { + const original = + await importOriginal(); + const { useConfig } = await import('../ui/contexts/ConfigContext.js'); + const React = await import('react'); + + return { + ...original, + useStreamingContext: () => { + const state = original.useStreamingContext(); + const config = useConfig(); + const sessionId = config.getSessionId(); + + React.useEffect(() => { + sessionStateMap.set(sessionId, state); + // If we see activity, we are no longer "awaiting" the start of a response + if (state !== StreamingState.Idle) { + const rig = activeRigs.get(sessionId); + if (rig) { + rig.awaitingResponse = false; + } + } + }, [sessionId, state]); + + return state; + }, + }; +}); // Mock core functions globally for tests using AppRig. vi.mock('@google/gemini-cli-core', async (importOriginal) => { @@ -112,9 +153,18 @@ export class AppRig { private breakpointTools = new Set(); private lastAwaitedConfirmation: PendingConfirmation | undefined; + /** + * True if a message was just sent but React hasn't yet reported a non-idle state. + */ + awaitingResponse = false; + constructor(private options: AppRigOptions = {}) { - this.testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-app-rig-')); - this.sessionId = `test-session-${Math.random().toString(36).slice(2, 9)}`; + const uniqueId = randomUUID(); + this.testDir = fs.mkdtempSync( + path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`), + ); + this.sessionId = `test-session-${uniqueId}`; + activeRigs.set(this.sessionId, this); } async initialize() { @@ -245,6 +295,8 @@ export class AppRig { }; } + private toolCalls: TrackedToolCall[] = []; + private setupMessageBusListeners() { if (!this.config) return; const messageBus = this.config.getMessageBus(); @@ -252,6 +304,7 @@ export class AppRig { messageBus.subscribe( MessageBusType.TOOL_CALLS_UPDATE, (message: ToolCallsUpdateMessage) => { + this.toolCalls = message.toolCalls; for (const call of message.toolCalls) { if (call.status === 'awaiting_approval' && call.correlationId) { const details = call.confirmationDetails; @@ -281,6 +334,48 @@ export class AppRig { ); } + /** + * Returns true if the agent is currently busy (responding or executing tools). + */ + isBusy(): boolean { + if (this.awaitingResponse) { + return true; + } + + const reactState = sessionStateMap.get(this.sessionId); + // If we have a React-based state, use it as the definitive signal. + // 'responding' and 'waiting-for-confirmation' both count as busy for the overall task. + if (reactState !== undefined) { + return reactState !== StreamingState.Idle; + } + + // Fallback to tool tracking if React hasn't reported yet + const isAnyToolActive = this.toolCalls.some((tc) => { + if ( + tc.status === CoreToolCallStatus.Executing || + tc.status === CoreToolCallStatus.Scheduled || + tc.status === CoreToolCallStatus.Validating + ) { + return true; + } + if ( + tc.status === CoreToolCallStatus.Success || + tc.status === CoreToolCallStatus.Error || + tc.status === CoreToolCallStatus.Cancelled + ) { + return !(tc as TrackedCompletedToolCall | TrackedCancelledToolCall) + .responseSubmittedToGemini; + } + return false; + }); + + const isAwaitingConfirmation = this.toolCalls.some( + (tc) => tc.status === CoreToolCallStatus.AwaitingApproval, + ); + + return isAnyToolActive || isAwaitingConfirmation; + } + render() { if (!this.config || !this.settings) throw new Error('AppRig not initialized'); @@ -334,17 +429,21 @@ export class AppRig { this.setBreakpoint(name); } } else { - this.setToolPolicy(toolName, PolicyDecision.ASK_USER, 100); + // Use undefined toolName to create a global rule if '*' is provided + const actualToolName = toolName === '*' ? undefined : toolName; + this.setToolPolicy(actualToolName, PolicyDecision.ASK_USER, 100); this.breakpointTools.add(toolName); } } removeToolPolicy(toolName?: string, source = 'AppRig Override') { if (!this.config) throw new Error('AppRig not initialized'); + // Map '*' back to undefined for policy removal + const actualToolName = toolName === '*' ? undefined : toolName; this.config .getPolicyEngine() // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion - .removeRulesForTool(toolName as string, source); + .removeRulesForTool(actualToolName as string, source); this.breakpointTools.delete(toolName); } @@ -416,6 +515,44 @@ export class AppRig { return matched!; } + /** + * Waits for either a tool confirmation request OR for the agent to go idle. + */ + async waitForNextEvent( + timeout = 60000, + ): Promise< + | { type: 'confirmation'; confirmation: PendingConfirmation } + | { type: 'idle' } + > { + let confirmation: PendingConfirmation | undefined; + let isIdle = false; + + await this.waitUntil( + async () => { + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 0)); + }); + confirmation = this.getPendingConfirmations()[0]; + // Now that we have a code-powered signal, this should be perfectly deterministic. + isIdle = !this.isBusy(); + return !!confirmation || isIdle; + }, + { + timeout, + message: 'Timed out waiting for next event (confirmation or idle).', + }, + ); + + if (confirmation) { + this.lastAwaitedConfirmation = confirmation; + return { type: 'confirmation', confirmation }; + } + + // Ensure all renders are flushed before returning 'idle' + await this.renderResult?.waitUntilReady(); + return { type: 'idle' }; + } + async resolveTool( toolNameOrDisplayName: string | RegExp | PendingConfirmation, outcome: ToolConfirmationOutcome = ToolConfirmationOutcome.ProceedOnce, @@ -471,6 +608,32 @@ export class AppRig { }); } + /** + * Drains all pending tool calls that hit a breakpoint until the agent is idle. + * Useful for negative tests to ensure no unwanted tools (like generalist) are called. + * + * @param onConfirmation Optional callback to inspect each confirmation before resolving. + * Return true to skip the default resolveTool call (e.g. if you handled it). + */ + async drainBreakpointsUntilIdle( + onConfirmation?: (confirmation: PendingConfirmation) => void | boolean, + timeout = 60000, + ) { + while (true) { + const event = await this.waitForNextEvent(timeout); + if (event.type === 'idle') { + break; + } + + const confirmation = event.confirmation; + const handled = onConfirmation?.(confirmation); + + if (!handled) { + await this.resolveTool(confirmation); + } + } + } + getConfig(): Config { if (!this.config) throw new Error('AppRig not initialized'); return this.config; @@ -530,11 +693,16 @@ export class AppRig { } async sendMessage(text: string) { + this.awaitingResponse = true; await this.type(text); await this.pressEnter(); } async unmount() { + // Clean up global state for this session + sessionStateMap.delete(this.sessionId); + activeRigs.delete(this.sessionId); + // Poison the chat recording service to prevent late writes to the test directory if (this.config) { const recordingService = this.config diff --git a/packages/core/src/agents/generalist-agent.ts b/packages/core/src/agents/generalist-agent.ts index 4f9040a7b0..412880b089 100644 --- a/packages/core/src/agents/generalist-agent.ts +++ b/packages/core/src/agents/generalist-agent.ts @@ -24,8 +24,7 @@ export const GeneralistAgent = ( name: 'generalist', displayName: 'Generalist Agent', description: - "A general-purpose AI agent with access to all tools. Use it for complex tasks that don't fit into other specialized agents.", - experimental: true, + 'A general-purpose AI agent with access to all tools. Highly recommended for tasks that are turn-intensive or involve processing large amounts of data. Use this to keep the main session history lean and efficient. Excellent for: batch refactoring/error fixing across multiple files, running commands with high-volume output, and speculative investigations.', inputConfig: { inputSchema: { type: 'object', diff --git a/packages/core/src/agents/registry.test.ts b/packages/core/src/agents/registry.test.ts index 8cc45a9a5a..c5f2faa06f 100644 --- a/packages/core/src/agents/registry.test.ts +++ b/packages/core/src/agents/registry.test.ts @@ -50,6 +50,7 @@ function makeMockedConfig(params?: Partial): Config { } as unknown as ToolRegistry); vi.spyOn(config, 'getAgentRegistry').mockReturnValue({ getDirectoryContext: () => 'mock directory context', + getAllDefinitions: () => [], } as unknown as AgentRegistry); return config; } @@ -262,6 +263,7 @@ describe('AgentRegistry', () => { overrides: { codebase_investigator: { enabled: false }, cli_help: { enabled: false }, + generalist: { enabled: false }, }, }, }); @@ -299,13 +301,13 @@ describe('AgentRegistry', () => { expect(registry.getDefinition('cli_help')).toBeUndefined(); }); - it('should NOT register generalist agent by default (because it is experimental)', async () => { + it('should register generalist agent by default', async () => { const config = makeMockedConfig(); const registry = new TestableAgentRegistry(config); await registry.initialize(); - expect(registry.getDefinition('generalist')).toBeUndefined(); + expect(registry.getDefinition('generalist')).toBeDefined(); }); it('should register generalist agent if explicitly enabled via override', async () => { diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index e789efa5d6..f1979ef00b 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -57,6 +57,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -209,6 +221,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -480,6 +504,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -632,6 +668,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -1506,6 +1554,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -1657,6 +1717,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -1800,6 +1872,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -1943,6 +2027,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -2082,6 +2178,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -2221,6 +2329,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -2352,6 +2472,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -2490,6 +2622,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + test-agent @@ -2870,6 +3014,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -3009,6 +3165,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -3260,6 +3428,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent @@ -3399,6 +3579,18 @@ Use the following guidelines to optimize your search and read patterns. Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + mock-agent diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index a96842c3f1..3ab3b68e0a 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -228,6 +228,18 @@ export function renderSubAgents(subAgents?: SubAgentOptions[]): string { Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise. +### Strategic Orchestration & Delegation +Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work. + +When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean. + +**High-Impact Delegation Candidates:** +- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project"). +- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches). +- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found. + +**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path. + ${subAgentsXml}