diff --git a/evals/app-test-helper.ts b/evals/app-test-helper.ts
index e9d5d695b7..1794573fe1 100644
--- a/evals/app-test-helper.ts
+++ b/evals/app-test-helper.ts
@@ -12,7 +12,7 @@ import {
   symlinkNodeModules,
   withEvalRetries,
   prepareWorkspace,
-  BaseEvalCase,
+  type BaseEvalCase,
   EVAL_MODEL,
 } from './test-helper.js';
 import fs from 'node:fs';
@@ -32,6 +32,7 @@ interface EvalConfigOverrides {
   allowedTools?: never;
   /** Restricting tools via mainAgentTools in evals is forbidden. */
   mainAgentTools?: never;
+
   [key: string]: unknown;
 }
 
diff --git a/evals/ask_user.eval.ts b/evals/ask_user.eval.ts
index b31ef54ae5..580081108a 100644
--- a/evals/ask_user.eval.ts
+++ b/evals/ask_user.eval.ts
@@ -5,17 +5,21 @@
  */
 
 import { describe, expect } from 'vitest';
+import { ApprovalMode, isRecord } from '@google/gemini-cli-core';
 import { appEvalTest, type AppEvalCase } from './app-test-helper.js';
 import { type EvalPolicy } from './test-helper.js';
 
 function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
+  const existingGeneral = evalCase.configOverrides?.['general'];
+  const generalBase = isRecord(existingGeneral) ? existingGeneral : {};
+
   return appEvalTest(policy, {
     ...evalCase,
     configOverrides: {
       ...evalCase.configOverrides,
+      approvalMode: ApprovalMode.DEFAULT,
       general: {
-        ...evalCase.configOverrides?.general,
-        approvalMode: 'default',
+        ...generalBase,
         enableAutoUpdate: false,
         enableAutoUpdateNotification: false,
       },
@@ -27,7 +31,7 @@ function askUserEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
 }
 
 describe('ask_user', () => {
-  askUserEvalTest('USUALLY_PASSES', {
+  askUserEvalTest('ALWAYS_PASSES', {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: 'Agent uses AskUser tool to present multiple choice options',
@@ -44,7 +48,7 @@ describe('ask_user', () => {
     },
   });
 
-  askUserEvalTest('USUALLY_PASSES', {
+  askUserEvalTest('ALWAYS_PASSES', {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: 'Agent uses AskUser tool to clarify ambiguous requirements',
@@ -64,7 +68,7 @@ describe('ask_user', () => {
     },
   });
 
-  askUserEvalTest('USUALLY_PASSES', {
+  askUserEvalTest('ALWAYS_PASSES', {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: 'Agent uses AskUser tool before performing significant ambiguous rework',
@@ -88,8 +92,8 @@ describe('ask_user', () => {
       ]);
       expect(confirmation, 'Expected a tool call confirmation').toBeDefined();
 
-      if (confirmation?.name === 'enter_plan_mode') {
-        rig.acceptConfirmation('enter_plan_mode');
+      if (confirmation?.toolName === 'enter_plan_mode') {
+        await rig.resolveTool('enter_plan_mode');
         confirmation = await rig.waitForPendingConfirmation('ask_user');
       }
 
@@ -106,7 +110,7 @@ describe('ask_user', () => {
   // confirm shell commands. Fixed via prompt refinements and tool definition
   // updates to clarify that shell command confirmation is handled by the UI.
   // See fix: https://github.com/google-gemini/gemini-cli/pull/20504
-  askUserEvalTest('USUALLY_PASSES', {
+  askUserEvalTest('ALWAYS_PASSES', {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: 'Agent does NOT use AskUser to confirm shell commands',
diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts
index 82438585e6..d5962b1534 100644
--- a/evals/frugalSearch.eval.ts
+++ b/evals/frugalSearch.eval.ts
@@ -13,18 +13,6 @@ import { evalTest } from './test-helper.js';
  * This ensures the agent doesn't flood the context window with unnecessary search results.
  */
 describe('Frugal Search', () => {
-  const getGrepParams = (call: any): any => {
-    let args = call.toolRequest.args;
-    if (typeof args === 'string') {
-      try {
-        args = JSON.parse(args);
-      } catch (e) {
-        // Ignore parse errors
-      }
-    }
-    return args;
-  };
-
   /**
    * Ensure that the agent makes use of either grep or ranged reads in fulfilling this task.
    * The task is specifically phrased to not evoke "view" or "search" specifically because
diff --git a/evals/hierarchical_memory.eval.ts b/evals/hierarchical_memory.eval.ts
index b7b58c79a1..7b673af6d6 100644
--- a/evals/hierarchical_memory.eval.ts
+++ b/evals/hierarchical_memory.eval.ts
@@ -5,8 +5,7 @@
  */
 
 import { describe, expect } from 'vitest';
-import { evalTest } from './test-helper.js';
-import { assertModelHasOutput } from '../integration-tests/test-helper.js';
+import { evalTest, assertModelHasOutput } from './test-helper.js';
 
 describe('Hierarchical Memory', () => {
   const conflictResolutionTest =
diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts
index bbb13d1c44..5a228ed065 100644
--- a/evals/save_memory.eval.ts
+++ b/evals/save_memory.eval.ts
@@ -5,11 +5,11 @@
  */
 
 import { describe, expect } from 'vitest';
-import { evalTest } from './test-helper.js';
 import {
+  evalTest,
   assertModelHasOutput,
   checkModelOutputContent,
-} from '../integration-tests/test-helper.js';
+} from './test-helper.js';
 
 describe('save_memory', () => {
   const TEST_PREFIX = 'Save memory test: ';
diff --git a/evals/test-helper.test.ts b/evals/test-helper.test.ts
index c0147cda75..6be26e918a 100644
--- a/evals/test-helper.test.ts
+++ b/evals/test-helper.test.ts
@@ -49,6 +49,8 @@ describe('evalTest reliability logic', () => {
 
     // Execute the test function directly
     await internalEvalTest({
+      suiteName: 'test',
+      suiteType: 'behavioral',
       name: 'test-api-failure',
       prompt: 'do something',
       assert: async () => {},
@@ -83,6 +85,8 @@ describe('evalTest reliability logic', () => {
     // Expect the test function to throw immediately
     await expect(
       internalEvalTest({
+        suiteName: 'test',
+        suiteType: 'behavioral',
         name: 'test-logic-failure',
         prompt: 'do something',
         assert: async () => {
@@ -108,6 +112,8 @@ describe('evalTest reliability logic', () => {
       .mockResolvedValueOnce('Success');
 
     await internalEvalTest({
+      suiteName: 'test',
+      suiteType: 'behavioral',
       name: 'test-recovery',
       prompt: 'do something',
       assert: async () => {},
@@ -135,6 +141,8 @@ describe('evalTest reliability logic', () => {
     );
 
     await internalEvalTest({
+      suiteName: 'test',
+      suiteType: 'behavioral',
       name: 'test-api-503',
       prompt: 'do something',
       assert: async () => {},
@@ -162,6 +170,8 @@ describe('evalTest reliability logic', () => {
     try {
       await expect(
         internalEvalTest({
+          suiteName: 'test',
+          suiteType: 'behavioral',
           name: 'test-absolute-path',
           prompt: 'do something',
           files: {
@@ -190,6 +200,8 @@ describe('evalTest reliability logic', () => {
     try {
       await expect(
         internalEvalTest({
+          suiteName: 'test',
+          suiteType: 'behavioral',
           name: 'test-traversal',
           prompt: 'do something',
           files: {
diff --git a/evals/tsconfig.json b/evals/tsconfig.json
deleted file mode 100644
index edc9007206..0000000000
--- a/evals/tsconfig.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "extends": "../tsconfig.json",
-  "compilerOptions": {
-    "jsx": "react-jsx",
-    "lib": ["DOM", "DOM.Iterable", "ES2023"],
-    "types": ["node", "vitest/globals"]
-  },
-  "include": ["**/*.ts", "**/*.tsx"],
-  "exclude": ["node_modules", "logs"]
-}