Generalize evals infra to support more types of evals, organization and queuing of named suites (#24941)

2026-04-24 20:14:44 -07:00 · 2026-04-08 23:57:26 +00:00
parent bc3ed61adb
commit f1bb2af6de
32 changed files with 475 additions and 133 deletions
@@ -5,14 +5,14 @@
 */

 import { describe, expect } from 'vitest';
-import { act } from 'react';
 import path from 'node:path';
 import fs from 'node:fs';
 import { appEvalTest } from './app-test-helper.js';
-import { PolicyDecision } from '@google/gemini-cli-core';

 describe('Model Steering Behavioral Evals', () => {
  appEvalTest('USUALLY_PASSES', {
+    suiteName: 'default',
+    suiteType: 'behavioral',
    name: 'Corrective Hint: Model switches task based on hint during tool turn',
    configOverrides: {
      modelSteering: true,
@@ -52,6 +52,8 @@ describe('Model Steering Behavioral Evals', () => {
  });

  appEvalTest('USUALLY_PASSES', {
+    suiteName: 'default',
+    suiteType: 'behavioral',
    name: 'Suggestive Hint: Model incorporates user guidance mid-stream',
    configOverrides: {
      modelSteering: true,