diff --git a/eval_regression_report.md b/eval_regression_report.md new file mode 100644 index 0000000000..6c3b69e01c --- /dev/null +++ b/eval_regression_report.md @@ -0,0 +1,12 @@ +### 🧪 Related Evaluation Rationale + +- Testing **glob** because **packages/core/src/tools/glob.ts** was modified. +- Testing **grep** because **packages/core/src/tools/grep.ts** was modified. + +_Something missing? +[Update evals/suites.json](evals/README.md#related-testing-with-related) to +adjust detection logic._ + +--- + +✅ **6** tests passed successfully on **gemini-3-flash-preview**. diff --git a/evals/failing_steering.eval.ts b/evals/failing_steering.eval.ts new file mode 100644 index 0000000000..7d23234af7 --- /dev/null +++ b/evals/failing_steering.eval.ts @@ -0,0 +1,22 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect } from 'vitest'; +import { evalTest, TestRig } from './test-helper.js'; + +describe('Related Eval Demonstration', () => { + evalTest('ALWAYS_PASSES', { + name: 'should fail deliberately to demonstrate regression reporting', + files: { + 'test.txt': 'Hello world', + }, + prompt: 'What is in test.txt?', + assert: async (rig: TestRig, result: string) => { + // This assertion is designed to fail. + expect(result).toContain('THE MODEL WILL NEVER SAY THIS EXACT SENTENCE'); + }, + }); +}); diff --git a/evals/suites.json b/evals/suites.json index feedfce4b3..74c2a20257 100644 --- a/evals/suites.json +++ b/evals/suites.json @@ -104,13 +104,15 @@ "packages/core/src/prompts/**", "evals/answer-vs-act.eval.ts", "evals/model_steering.eval.ts", - "evals/redundant_casts.eval.ts" + "evals/redundant_casts.eval.ts", + "evals/failing_steering.eval.ts" ], "evals": [ "ALL_ALWAYS_PASSING", "evals/answer-vs-act.eval.ts", "evals/model_steering.eval.ts", - "evals/redundant_casts.eval.ts" + "evals/redundant_casts.eval.ts", + "evals/failing_steering.eval.ts" ] }, "edit_fidelity": { diff --git a/nulllinter b/nulllinter new file mode 160000 index 0000000000..7ef35ce7b2 --- /dev/null +++ b/nulllinter @@ -0,0 +1 @@ +Subproject commit 7ef35ce7b2d4b3d0d1c4a7033c2367649d2ff014 diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index 59315e1ca6..2477c8433e 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -4,6 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +// Related Eval Demo: Modification in this file should trigger core_steering. + import { ACTIVATE_SKILL_TOOL_NAME, ASK_USER_TOOL_NAME,