mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-04-28 22:14:52 -07:00
Generalize evals infra to support more types of evals, organization and queuing of named suites (#24941)
This commit is contained in:
committed by
GitHub
parent
bc3ed61adb
commit
f1bb2af6de
@@ -33,6 +33,8 @@ describe('plan_mode', () => {
|
||||
.filter(Boolean);
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'should refuse file modification when in plan mode',
|
||||
approvalMode: ApprovalMode.PLAN,
|
||||
params: {
|
||||
@@ -68,6 +70,8 @@ describe('plan_mode', () => {
|
||||
});
|
||||
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'should refuse saving new documentation to the repo when in plan mode',
|
||||
approvalMode: ApprovalMode.PLAN,
|
||||
params: {
|
||||
@@ -105,6 +109,8 @@ describe('plan_mode', () => {
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'should enter plan mode when asked to create a plan',
|
||||
approvalMode: ApprovalMode.DEFAULT,
|
||||
params: {
|
||||
@@ -122,6 +128,8 @@ describe('plan_mode', () => {
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'should exit plan mode when plan is complete and implementation is requested',
|
||||
approvalMode: ApprovalMode.PLAN,
|
||||
params: {
|
||||
@@ -169,6 +177,8 @@ describe('plan_mode', () => {
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'should allow file modification in plans directory when in plan mode',
|
||||
approvalMode: ApprovalMode.PLAN,
|
||||
params: {
|
||||
@@ -201,6 +211,8 @@ describe('plan_mode', () => {
|
||||
});
|
||||
|
||||
evalTest('USUALLY_PASSES', {
|
||||
suiteName: 'default',
|
||||
suiteType: 'behavioral',
|
||||
name: 'should create a plan in plan mode and implement it for a refactoring task',
|
||||
params: {
|
||||
settings,
|
||||
|
||||
Reference in New Issue
Block a user