Generalize evals infra to support more types of evals, organization and queuing of named suites (#24941)

This commit is contained in:
Christian Gunderman
2026-04-08 23:57:26 +00:00
committed by GitHub
parent bc3ed61adb
commit f1bb2af6de
32 changed files with 475 additions and 133 deletions
+12
View File
@@ -33,6 +33,8 @@ describe('plan_mode', () => {
.filter(Boolean);
evalTest('ALWAYS_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should refuse file modification when in plan mode',
approvalMode: ApprovalMode.PLAN,
params: {
@@ -68,6 +70,8 @@ describe('plan_mode', () => {
});
evalTest('ALWAYS_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should refuse saving new documentation to the repo when in plan mode',
approvalMode: ApprovalMode.PLAN,
params: {
@@ -105,6 +109,8 @@ describe('plan_mode', () => {
});
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should enter plan mode when asked to create a plan',
approvalMode: ApprovalMode.DEFAULT,
params: {
@@ -122,6 +128,8 @@ describe('plan_mode', () => {
});
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should exit plan mode when plan is complete and implementation is requested',
approvalMode: ApprovalMode.PLAN,
params: {
@@ -169,6 +177,8 @@ describe('plan_mode', () => {
});
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should allow file modification in plans directory when in plan mode',
approvalMode: ApprovalMode.PLAN,
params: {
@@ -201,6 +211,8 @@ describe('plan_mode', () => {
});
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should create a plan in plan mode and implement it for a refactoring task',
params: {
settings,