diff --git a/evals/chaos.eval.ts b/evals/chaos.eval.ts
new file mode 100644
index 0000000000..78899115cb
--- /dev/null
+++ b/evals/chaos.eval.ts
@@ -0,0 +1,33 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { evalTest } from './test-helper.js';
+
+/**
+ * These tests are designed to trigger the "Chaos Simulation" logic in evals/test-helper.ts.
+ * They simulate persistent 500 and 503 API errors to verify that the reliability
+ * pipeline correctly retries, logs the events, and eventually skips the tests
+ * instead of failing the CI.
+ */
+
+evalTest('ALWAYS_PASSES', {
+  name: 'Chaos 500 - API Internal Error Simulation',
+  prompt: 'Say hello',
+  assert: async (rig, result) => {
+    // This assertion should never be reached because the chaos simulation
+    // throws an error before rig.run().
+    throw new Error('Should have been caught by chaos simulation');
+  },
+});
+
+evalTest('ALWAYS_PASSES', {
+  name: 'Chaos 503 - API Unavailable Simulation',
+  prompt: 'Say hello',
+  assert: async (rig, result) => {
+    // This assertion should never be reached.
+    throw new Error('Should have been caught by chaos simulation');
+  },
+});
diff --git a/evals/test-helper.ts b/evals/test-helper.ts
index 9bd5e219d9..bed738ddb2 100644
--- a/evals/test-helper.ts
+++ b/evals/test-helper.ts
@@ -65,6 +65,15 @@ export async function internalEvalTest(evalCase: EvalCase) {
         await setupTestFiles(rig, evalCase.files);
       }
 
+      // --- CHAOS SIMULATION ---
+      if (evalCase.name.includes('Chaos')) {
+        const errorCode = evalCase.name.includes('503') ? '503' : '500';
+        throw new Error(
+          `status: INTERNAL - Simulated ${errorCode} error for testing pipeline`,
+        );
+      }
+      // ------------------------
+
       symlinkNodeModules(rig.testDir || '');
 
       // If messages are provided, write a session file so --resume can load it.