chore(evals): add steering demonstration with deliberate failure

This commit is contained in:
Alisa Novikova
2026-04-08 21:25:12 -07:00
parent 6ca4147462
commit a591cd9f8f
5 changed files with 41 additions and 2 deletions
+12
View File
@@ -0,0 +1,12 @@
### 🧪 Related Evaluation Rationale
- Testing **glob** because **packages/core/src/tools/glob.ts** was modified.
- Testing **grep** because **packages/core/src/tools/grep.ts** was modified.
_Something missing?
[Update evals/suites.json](evals/README.md#related-testing-with-related) to
adjust detection logic._
---
**6** tests passed successfully on **gemini-3-flash-preview**.
+22
View File
@@ -0,0 +1,22 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest, TestRig } from './test-helper.js';
describe('Related Eval Demonstration', () => {
evalTest('ALWAYS_PASSES', {
name: 'should fail deliberately to demonstrate regression reporting',
files: {
'test.txt': 'Hello world',
},
prompt: 'What is in test.txt?',
assert: async (rig: TestRig, result: string) => {
// This assertion is designed to fail.
expect(result).toContain('THE MODEL WILL NEVER SAY THIS EXACT SENTENCE');
},
});
});
+4 -2
View File
@@ -104,13 +104,15 @@
"packages/core/src/prompts/**",
"evals/answer-vs-act.eval.ts",
"evals/model_steering.eval.ts",
"evals/redundant_casts.eval.ts"
"evals/redundant_casts.eval.ts",
"evals/failing_steering.eval.ts"
],
"evals": [
"ALL_ALWAYS_PASSING",
"evals/answer-vs-act.eval.ts",
"evals/model_steering.eval.ts",
"evals/redundant_casts.eval.ts"
"evals/redundant_casts.eval.ts",
"evals/failing_steering.eval.ts"
]
},
"edit_fidelity": {
Submodule
+1
Submodule nulllinter added at 7ef35ce7b2
+2
View File
@@ -4,6 +4,8 @@
* SPDX-License-Identifier: Apache-2.0
*/
// Related Eval Demo: Modification in this file should trigger core_steering.
import {
ACTIVATE_SKILL_TOOL_NAME,
ASK_USER_TOOL_NAME,