diff --git a/evals/chaos.eval.ts b/evals/chaos.eval.ts new file mode 100644 index 0000000000..78899115cb --- /dev/null +++ b/evals/chaos.eval.ts @@ -0,0 +1,33 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { evalTest } from './test-helper.js'; + +/** + * These tests are designed to trigger the "Chaos Simulation" logic in evals/test-helper.ts. + * They simulate persistent 500 and 503 API errors to verify that the reliability + * pipeline correctly retries, logs the events, and eventually skips the tests + * instead of failing the CI. + */ + +evalTest('ALWAYS_PASSES', { + name: 'Chaos 500 - API Internal Error Simulation', + prompt: 'Say hello', + assert: async (rig, result) => { + // This assertion should never be reached because the chaos simulation + // throws an error before rig.run(). + throw new Error('Should have been caught by chaos simulation'); + }, +}); + +evalTest('ALWAYS_PASSES', { + name: 'Chaos 503 - API Unavailable Simulation', + prompt: 'Say hello', + assert: async (rig, result) => { + // This assertion should never be reached. + throw new Error('Should have been caught by chaos simulation'); + }, +}); diff --git a/evals/test-helper.ts b/evals/test-helper.ts index 9bd5e219d9..bed738ddb2 100644 --- a/evals/test-helper.ts +++ b/evals/test-helper.ts @@ -65,6 +65,15 @@ export async function internalEvalTest(evalCase: EvalCase) { await setupTestFiles(rig, evalCase.files); } + // --- CHAOS SIMULATION --- + if (evalCase.name.includes('Chaos')) { + const errorCode = evalCase.name.includes('503') ? '503' : '500'; + throw new Error( + `status: INTERNAL - Simulated ${errorCode} error for testing pipeline`, + ); + } + // ------------------------ + symlinkNodeModules(rig.testDir || ''); // If messages are provided, write a session file so --resume can load it.