diff --git a/evals/subagents.eval.ts b/evals/subagents.eval.ts
index 7e9b3cd808..3a7d8fa44f 100644
--- a/evals/subagents.eval.ts
+++ b/evals/subagents.eval.ts
@@ -4,21 +4,41 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { describe } from 'vitest';
+import fs from 'node:fs';
+import path from 'node:path';
+
+import { describe, expect } from 'vitest';
+
 import { evalTest } from './test-helper.js';
 
-const AGENT_DEFINITION = `---
+const DOCS_AGENT_DEFINITION = `---
 name: docs-agent
 description: An agent with expertise in updating documentation.
 tools:
   - read_file
   - write_file
 ---
-
-You are the docs agent. Update the documentation.
+You are the docs agent. Update documentation clearly and accurately.
 `;
 
-const INDEX_TS = 'export const add = (a: number, b: number) => a + b;';
+const TEST_AGENT_DEFINITION = `---
+name: test-agent
+description: An agent with expertise in writing and updating tests.
+tools:
+  - read_file
+  - write_file
+---
+You are the test agent. Add or update tests.
+`;
+
+const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n';
+
+function readProjectFile(
+  rig: { testDir?: string },
+  relativePath: string,
+): string {
+  return fs.readFileSync(path.join(rig.testDir!, relativePath), 'utf8');
+}
 
 describe('subagent eval test cases', () => {
   /**
@@ -42,12 +62,147 @@ describe('subagent eval test cases', () => {
     },
     prompt: 'Please update README.md with a description of this library.',
     files: {
-      '.gemini/agents/test-agent.md': AGENT_DEFINITION,
+      '.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
       'index.ts': INDEX_TS,
-      'README.md': 'TODO: update the README.',
+      'README.md': 'TODO: update the README.\n',
     },
     assert: async (rig, _result) => {
       await rig.expectToolCallSuccess(['docs-agent']);
     },
   });
+
+  /**
+   * Checks that the outer agent does not over-delegate trivial work when
+   * subagents are available. This helps catch orchestration overuse.
+   */
+  evalTest('USUALLY_PASSES', {
+    name: 'should avoid delegating trivial direct edit work',
+    params: {
+      settings: {
+        experimental: {
+          enableAgents: true,
+          agents: {
+            overrides: {
+              generalist: { enabled: true },
+            },
+          },
+        },
+      },
+    },
+    prompt:
+      'Rename the exported function in index.ts from add to sum and update the file directly.',
+    files: {
+      '.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
+      'index.ts': INDEX_TS,
+    },
+    assert: async (rig, _result) => {
+      const updatedIndex = readProjectFile(rig, 'index.ts');
+      const toolLogs = rig.readToolLogs() as Array<{
+        toolRequest: { name: string };
+      }>;
+
+      expect(updatedIndex).toContain('export const sum =');
+      expect(toolLogs.some((l) => l.toolRequest.name === 'docs-agent')).toBe(
+        false,
+      );
+      expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
+        false,
+      );
+    },
+  });
+
+  /**
+   * Checks that the outer agent prefers a more relevant specialist over a
+   * broad generalist when both are available.
+   *
+   * This is meant to codify the "overusing Generalist" failure mode.
+   */
+  evalTest('USUALLY_PASSES', {
+    name: 'should prefer relevant specialist over generalist',
+    params: {
+      settings: {
+        experimental: {
+          enableAgents: true,
+          agents: {
+            overrides: {
+              generalist: { enabled: true },
+            },
+          },
+        },
+      },
+    },
+    prompt: 'Please add a small test file that verifies add(1, 2) returns 3.',
+    files: {
+      '.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION,
+      'index.ts': INDEX_TS,
+      'package.json': JSON.stringify(
+        {
+          name: 'subagent-eval-project',
+          version: '1.0.0',
+          type: 'module',
+        },
+        null,
+        2,
+      ),
+    },
+    assert: async (rig, _result) => {
+      const toolLogs = rig.readToolLogs() as Array<{
+        toolRequest: { name: string };
+      }>;
+
+      await rig.expectToolCallSuccess(['test-agent']);
+      expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
+        false,
+      );
+    },
+  });
+
+  /**
+   * Checks cardinality and decomposition for a multi-surface task. The task
+   * naturally spans docs and tests, so multiple specialists should be used.
+   */
+  evalTest('USUALLY_PASSES', {
+    name: 'should use multiple relevant specialists for multi-surface task',
+    params: {
+      settings: {
+        experimental: {
+          enableAgents: true,
+          agents: {
+            overrides: {
+              generalist: { enabled: true },
+            },
+          },
+        },
+      },
+    },
+    prompt:
+      'Add a short README description for this library and also add a test file that verifies add(1, 2) returns 3.',
+    files: {
+      '.gemini/agents/docs-agent.md': DOCS_AGENT_DEFINITION,
+      '.gemini/agents/test-agent.md': TEST_AGENT_DEFINITION,
+      'index.ts': INDEX_TS,
+      'README.md': 'TODO: update the README.\n',
+      'package.json': JSON.stringify(
+        {
+          name: 'subagent-eval-project',
+          version: '1.0.0',
+          type: 'module',
+        },
+        null,
+        2,
+      ),
+    },
+    assert: async (rig, _result) => {
+      const toolLogs = rig.readToolLogs() as Array<{
+        toolRequest: { name: string };
+      }>;
+      const readme = readProjectFile(rig, 'README.md');
+
+      await rig.expectToolCallSuccess(['docs-agent', 'test-agent']);
+      expect(readme).not.toContain('TODO: update the README.');
+      expect(toolLogs.some((l) => l.toolRequest.name === 'generalist')).toBe(
+        false,
+      );
+    },
+  });
 });