feat(core): Enable generalist agent (#19665)

2026-05-13 05:12:55 -07:00 · 2026-02-26 08:38:49 -08:00
parent 9c2fd5a7c6
commit 611d934829
6 changed files with 547 additions and 9 deletions
@@ -0,0 +1,165 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, expect } from 'vitest';
+import { appEvalTest } from './app-test-helper.js';
+
+describe('generalist_delegation', () => {
+  // --- Positive Evals (Should Delegate) ---
+
+  appEvalTest('USUALLY_PASSES', {
+    name: 'should delegate batch error fixing to generalist agent',
+    configOverrides: {
+      agents: {
+        overrides: {
+          generalist: { enabled: true },
+        },
+      },
+      experimental: {
+        enableAgents: true,
+      },
+      excludeTools: ['run_shell_command'],
+    },
+    files: {
+      'file1.ts': 'console.log("no semi")',
+      'file2.ts': 'console.log("no semi")',
+      'file3.ts': 'console.log("no semi")',
+      'file4.ts': 'console.log("no semi")',
+      'file5.ts': 'console.log("no semi")',
+      'file6.ts': 'console.log("no semi")',
+      'file7.ts': 'console.log("no semi")',
+      'file8.ts': 'console.log("no semi")',
+      'file9.ts': 'console.log("no semi")',
+      'file10.ts': 'console.log("no semi")',
+    },
+    prompt:
+      'I have 10 files (file1.ts to file10.ts) that are missing semicolons. Can you fix them?',
+    setup: async (rig) => {
+      rig.setBreakpoint(['generalist']);
+    },
+    assert: async (rig) => {
+      const confirmation = await rig.waitForPendingConfirmation(
+        'generalist',
+        60000,
+      );
+      expect(
+        confirmation,
+        'Expected a tool call for generalist agent',
+      ).toBeTruthy();
+      await rig.resolveTool(confirmation);
+      await rig.waitForIdle(60000);
+    },
+  });
+
+  appEvalTest('USUALLY_PASSES', {
+    name: 'should autonomously delegate complex batch task to generalist agent',
+    configOverrides: {
+      agents: {
+        overrides: {
+          generalist: { enabled: true },
+        },
+      },
+      experimental: {
+        enableAgents: true,
+      },
+      excludeTools: ['run_shell_command'],
+    },
+    files: {
+      'src/a.ts': 'export const a = 1;',
+      'src/b.ts': 'export const b = 2;',
+      'src/c.ts': 'export const c = 3;',
+      'src/d.ts': 'export const d = 4;',
+      'src/e.ts': 'export const e = 5;',
+    },
+    prompt:
+      'Please update all files in the src directory. For each file, add a comment at the top that says "Processed by Gemini".',
+    setup: async (rig) => {
+      rig.setBreakpoint(['generalist']);
+    },
+    assert: async (rig) => {
+      const confirmation = await rig.waitForPendingConfirmation(
+        'generalist',
+        60000,
+      );
+      expect(
+        confirmation,
+        'Expected autonomously delegate to generalist for batch task',
+      ).toBeTruthy();
+      await rig.resolveTool(confirmation);
+      await rig.waitForIdle(60000);
+    },
+  });
+
+  // --- Negative Evals (Should NOT Delegate - Assertive Handling) ---
+
+  appEvalTest('USUALLY_PASSES', {
+    name: 'should NOT delegate simple read and fix to generalist agent',
+    configOverrides: {
+      agents: {
+        overrides: {
+          generalist: { enabled: true },
+        },
+      },
+      experimental: {
+        enableAgents: true,
+      },
+      excludeTools: ['run_shell_command'],
+    },
+    files: {
+      'README.md': 'This is a proyect.',
+    },
+    prompt:
+      'There is a typo in README.md ("proyect"). Please fix it to "project".',
+    setup: async (rig) => {
+      // Break on everything to see what it calls
+      rig.setBreakpoint(['*']);
+    },
+    assert: async (rig) => {
+      await rig.drainBreakpointsUntilIdle((confirmation) => {
+        expect(
+          confirmation.toolName,
+          `Agent should NOT have delegated to generalist.`,
+        ).not.toBe('generalist');
+      });
+
+      const output = rig.getStaticOutput();
+      expect(output).toMatch(/project/i);
+    },
+  });
+
+  appEvalTest('USUALLY_PASSES', {
+    name: 'should NOT delegate simple direct question to generalist agent',
+    configOverrides: {
+      agents: {
+        overrides: {
+          generalist: { enabled: true },
+        },
+      },
+      experimental: {
+        enableAgents: true,
+      },
+      excludeTools: ['run_shell_command'],
+    },
+    files: {
+      'src/VERSION': '1.2.3',
+    },
+    prompt: 'Can you tell me the version number in the src folder?',
+    setup: async (rig) => {
+      rig.setBreakpoint(['*']);
+    },
+    assert: async (rig) => {
+      await rig.drainBreakpointsUntilIdle((confirmation) => {
+        expect(
+          confirmation.toolName,
+          `Agent should NOT have delegated to generalist.`,
+        ).not.toBe('generalist');
+      });
+
+      const output = rig.getStaticOutput();
+      expect(output).toMatch(/1\.2\.3/);
+    },
+  });
+});
@@ -29,6 +29,7 @@ import {
  createContentGenerator,
  IdeClient,
  debugLogger,
+  CoreToolCallStatus,
 } from '@google/gemini-cli-core';
 import {
  type MockShellCommand,
@@ -36,7 +37,47 @@ import {
 } from './MockShellExecutionService.js';
 import { createMockSettings } from './settings.js';
 import { type LoadedSettings } from '../config/settings.js';
-import { AuthState } from '../ui/types.js';
+import { AuthState, StreamingState } from '../ui/types.js';
+import { randomUUID } from 'node:crypto';
+import type {
+  TrackedCancelledToolCall,
+  TrackedCompletedToolCall,
+  TrackedToolCall,
+} from '../ui/hooks/useToolScheduler.js';
+
+// Global state observer for React-based signals
+const sessionStateMap = new Map<string, StreamingState>();
+const activeRigs = new Map<string, AppRig>();
+
+// Mock StreamingContext to report state changes back to the observer
+vi.mock('../ui/contexts/StreamingContext.js', async (importOriginal) => {
+  const original =
+    await importOriginal<typeof import('../ui/contexts/StreamingContext.js')>();
+  const { useConfig } = await import('../ui/contexts/ConfigContext.js');
+  const React = await import('react');
+
+  return {
+    ...original,
+    useStreamingContext: () => {
+      const state = original.useStreamingContext();
+      const config = useConfig();
+      const sessionId = config.getSessionId();
+
+      React.useEffect(() => {
+        sessionStateMap.set(sessionId, state);
+        // If we see activity, we are no longer "awaiting" the start of a response
+        if (state !== StreamingState.Idle) {
+          const rig = activeRigs.get(sessionId);
+          if (rig) {
+            rig.awaitingResponse = false;
+          }
+        }
+      }, [sessionId, state]);
+
+      return state;
+    },
+  };
+});

 // Mock core functions globally for tests using AppRig.
 vi.mock('@google/gemini-cli-core', async (importOriginal) => {
@@ -112,9 +153,18 @@ export class AppRig {
  private breakpointTools = new Set<string | undefined>();
  private lastAwaitedConfirmation: PendingConfirmation | undefined;

+  /**
+   * True if a message was just sent but React hasn't yet reported a non-idle state.
+   */
+  awaitingResponse = false;
+
  constructor(private options: AppRigOptions = {}) {
-    this.testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-app-rig-'));
-    this.sessionId = `test-session-${Math.random().toString(36).slice(2, 9)}`;
+    const uniqueId = randomUUID();
+    this.testDir = fs.mkdtempSync(
+      path.join(os.tmpdir(), `gemini-app-rig-${uniqueId.slice(0, 8)}-`),
+    );
+    this.sessionId = `test-session-${uniqueId}`;
+    activeRigs.set(this.sessionId, this);
  }

  async initialize() {
@@ -245,6 +295,8 @@ export class AppRig {
    };
  }

+  private toolCalls: TrackedToolCall[] = [];
+
  private setupMessageBusListeners() {
    if (!this.config) return;
    const messageBus = this.config.getMessageBus();
@@ -252,6 +304,7 @@ export class AppRig {
    messageBus.subscribe(
      MessageBusType.TOOL_CALLS_UPDATE,
      (message: ToolCallsUpdateMessage) => {
+        this.toolCalls = message.toolCalls;
        for (const call of message.toolCalls) {
          if (call.status === 'awaiting_approval' && call.correlationId) {
            const details = call.confirmationDetails;
@@ -281,6 +334,48 @@ export class AppRig {
    );
  }

+  /**
+   * Returns true if the agent is currently busy (responding or executing tools).
+   */
+  isBusy(): boolean {
+    if (this.awaitingResponse) {
+      return true;
+    }
+
+    const reactState = sessionStateMap.get(this.sessionId);
+    // If we have a React-based state, use it as the definitive signal.
+    // 'responding' and 'waiting-for-confirmation' both count as busy for the overall task.
+    if (reactState !== undefined) {
+      return reactState !== StreamingState.Idle;
+    }
+
+    // Fallback to tool tracking if React hasn't reported yet
+    const isAnyToolActive = this.toolCalls.some((tc) => {
+      if (
+        tc.status === CoreToolCallStatus.Executing ||
+        tc.status === CoreToolCallStatus.Scheduled ||
+        tc.status === CoreToolCallStatus.Validating
+      ) {
+        return true;
+      }
+      if (
+        tc.status === CoreToolCallStatus.Success ||
+        tc.status === CoreToolCallStatus.Error ||
+        tc.status === CoreToolCallStatus.Cancelled
+      ) {
+        return !(tc as TrackedCompletedToolCall | TrackedCancelledToolCall)
+          .responseSubmittedToGemini;
+      }
+      return false;
+    });
+
+    const isAwaitingConfirmation = this.toolCalls.some(
+      (tc) => tc.status === CoreToolCallStatus.AwaitingApproval,
+    );
+
+    return isAnyToolActive || isAwaitingConfirmation;
+  }
+
  render() {
    if (!this.config || !this.settings)
      throw new Error('AppRig not initialized');
@@ -334,17 +429,21 @@ export class AppRig {
        this.setBreakpoint(name);
      }
    } else {
-      this.setToolPolicy(toolName, PolicyDecision.ASK_USER, 100);
+      // Use undefined toolName to create a global rule if '*' is provided
+      const actualToolName = toolName === '*' ? undefined : toolName;
+      this.setToolPolicy(actualToolName, PolicyDecision.ASK_USER, 100);
      this.breakpointTools.add(toolName);
    }
  }

  removeToolPolicy(toolName?: string, source = 'AppRig Override') {
    if (!this.config) throw new Error('AppRig not initialized');
+    // Map '*' back to undefined for policy removal
+    const actualToolName = toolName === '*' ? undefined : toolName;
    this.config
      .getPolicyEngine()
      // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
-      .removeRulesForTool(toolName as string, source);
+      .removeRulesForTool(actualToolName as string, source);
    this.breakpointTools.delete(toolName);
  }

@@ -416,6 +515,44 @@ export class AppRig {
    return matched!;
  }

+  /**
+   * Waits for either a tool confirmation request OR for the agent to go idle.
+   */
+  async waitForNextEvent(
+    timeout = 60000,
+  ): Promise<
+    | { type: 'confirmation'; confirmation: PendingConfirmation }
+    | { type: 'idle' }
+  > {
+    let confirmation: PendingConfirmation | undefined;
+    let isIdle = false;
+
+    await this.waitUntil(
+      async () => {
+        await act(async () => {
+          await new Promise((resolve) => setTimeout(resolve, 0));
+        });
+        confirmation = this.getPendingConfirmations()[0];
+        // Now that we have a code-powered signal, this should be perfectly deterministic.
+        isIdle = !this.isBusy();
+        return !!confirmation || isIdle;
+      },
+      {
+        timeout,
+        message: 'Timed out waiting for next event (confirmation or idle).',
+      },
+    );
+
+    if (confirmation) {
+      this.lastAwaitedConfirmation = confirmation;
+      return { type: 'confirmation', confirmation };
+    }
+
+    // Ensure all renders are flushed before returning 'idle'
+    await this.renderResult?.waitUntilReady();
+    return { type: 'idle' };
+  }
+
  async resolveTool(
    toolNameOrDisplayName: string | RegExp | PendingConfirmation,
    outcome: ToolConfirmationOutcome = ToolConfirmationOutcome.ProceedOnce,
@@ -471,6 +608,32 @@ export class AppRig {
    });
  }

+  /**
+   * Drains all pending tool calls that hit a breakpoint until the agent is idle.
+   * Useful for negative tests to ensure no unwanted tools (like generalist) are called.
+   *
+   * @param onConfirmation Optional callback to inspect each confirmation before resolving.
+   *                       Return true to skip the default resolveTool call (e.g. if you handled it).
+   */
+  async drainBreakpointsUntilIdle(
+    onConfirmation?: (confirmation: PendingConfirmation) => void | boolean,
+    timeout = 60000,
+  ) {
+    while (true) {
+      const event = await this.waitForNextEvent(timeout);
+      if (event.type === 'idle') {
+        break;
+      }
+
+      const confirmation = event.confirmation;
+      const handled = onConfirmation?.(confirmation);
+
+      if (!handled) {
+        await this.resolveTool(confirmation);
+      }
+    }
+  }
+
  getConfig(): Config {
    if (!this.config) throw new Error('AppRig not initialized');
    return this.config;
@@ -530,11 +693,16 @@ export class AppRig {
  }

  async sendMessage(text: string) {
+    this.awaitingResponse = true;
    await this.type(text);
    await this.pressEnter();
  }

  async unmount() {
+    // Clean up global state for this session
+    sessionStateMap.delete(this.sessionId);
+    activeRigs.delete(this.sessionId);
+
    // Poison the chat recording service to prevent late writes to the test directory
    if (this.config) {
      const recordingService = this.config
@@ -24,8 +24,7 @@ export const GeneralistAgent = (
  name: 'generalist',
  displayName: 'Generalist Agent',
  description:
-    "A general-purpose AI agent with access to all tools. Use it for complex tasks that don't fit into other specialized agents.",
-  experimental: true,
+    'A general-purpose AI agent with access to all tools. Highly recommended for tasks that are turn-intensive or involve processing large amounts of data. Use this to keep the main session history lean and efficient. Excellent for: batch refactoring/error fixing across multiple files, running commands with high-volume output, and speculative investigations.',
  inputConfig: {
    inputSchema: {
      type: 'object',
@@ -50,6 +50,7 @@ function makeMockedConfig(params?: Partial<ConfigParameters>): Config {
  } as unknown as ToolRegistry);
  vi.spyOn(config, 'getAgentRegistry').mockReturnValue({
    getDirectoryContext: () => 'mock directory context',
+    getAllDefinitions: () => [],
  } as unknown as AgentRegistry);
  return config;
 }
@@ -262,6 +263,7 @@ describe('AgentRegistry', () => {
          overrides: {
            codebase_investigator: { enabled: false },
            cli_help: { enabled: false },
+            generalist: { enabled: false },
          },
        },
      });
@@ -299,13 +301,13 @@ describe('AgentRegistry', () => {
      expect(registry.getDefinition('cli_help')).toBeUndefined();
    });

-    it('should NOT register generalist agent by default (because it is experimental)', async () => {
+    it('should register generalist agent by default', async () => {
      const config = makeMockedConfig();
      const registry = new TestableAgentRegistry(config);

      await registry.initialize();

-      expect(registry.getDefinition('generalist')).toBeUndefined();
+      expect(registry.getDefinition('generalist')).toBeDefined();
    });

    it('should register generalist agent if explicitly enabled via override', async () => {
@@ -57,6 +57,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -209,6 +221,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -480,6 +504,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -632,6 +668,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -1506,6 +1554,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -1657,6 +1717,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -1800,6 +1872,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -1943,6 +2027,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -2082,6 +2178,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -2221,6 +2329,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -2352,6 +2472,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -2490,6 +2622,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>test-agent</name>
@@ -2870,6 +3014,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -3009,6 +3165,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -3260,6 +3428,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -3399,6 +3579,18 @@ Use the following guidelines to optimize your search and read patterns.

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
  <subagent>
    <name>mock-agent</name>
@@ -228,6 +228,18 @@ export function renderSubAgents(subAgents?: SubAgentOptions[]): string {

 Sub-agents are specialized expert agents. Each sub-agent is available as a tool of the same name. You MUST delegate tasks to the sub-agent with the most relevant expertise.

+### Strategic Orchestration & Delegation
+Operate as a **strategic orchestrator**. Your own context window is your most precious resource. Every turn you take adds to the permanent session history. To keep the session fast and efficient, use sub-agents to "compress" complex or repetitive work.
+
+When you delegate, the sub-agent's entire execution is consolidated into a single summary in your history, keeping your main loop lean.
+
+**High-Impact Delegation Candidates:**
+- **Repetitive Batch Tasks:** Tasks involving more than 3 files or repeated steps (e.g., "Add license headers to all files in src/", "Fix all lint errors in the project").
+- **High-Volume Output:** Commands or tools expected to return large amounts of data (e.g., verbose builds, exhaustive file searches).
+- **Speculative Research:** Investigations that require many "trial and error" steps before a clear path is found.
+
+**Assertive Action:** Continue to handle "surgical" tasks directly—simple reads, single-file edits, or direct questions that can be resolved in 1-2 turns. Delegation is an efficiency tool, not a way to avoid direct action when it is the fastest path.
+
 <available_subagents>
 ${subAgentsXml}
 </available_subagents>