mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-18 15:52:53 -07:00
chore(evals): add steering demonstration with deliberate failure
This commit is contained in:
@@ -0,0 +1,12 @@
|
||||
### 🧪 Related Evaluation Rationale
|
||||
|
||||
- Testing **glob** because **packages/core/src/tools/glob.ts** was modified.
|
||||
- Testing **grep** because **packages/core/src/tools/grep.ts** was modified.
|
||||
|
||||
_Something missing?
|
||||
[Update evals/suites.json](evals/README.md#related-testing-with-related) to
|
||||
adjust detection logic._
|
||||
|
||||
---
|
||||
|
||||
✅ **6** tests passed successfully on **gemini-3-flash-preview**.
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect } from 'vitest';
|
||||
import { evalTest, TestRig } from './test-helper.js';
|
||||
|
||||
describe('Related Eval Demonstration', () => {
|
||||
evalTest('ALWAYS_PASSES', {
|
||||
name: 'should fail deliberately to demonstrate regression reporting',
|
||||
files: {
|
||||
'test.txt': 'Hello world',
|
||||
},
|
||||
prompt: 'What is in test.txt?',
|
||||
assert: async (rig: TestRig, result: string) => {
|
||||
// This assertion is designed to fail.
|
||||
expect(result).toContain('THE MODEL WILL NEVER SAY THIS EXACT SENTENCE');
|
||||
},
|
||||
});
|
||||
});
|
||||
+4
-2
@@ -104,13 +104,15 @@
|
||||
"packages/core/src/prompts/**",
|
||||
"evals/answer-vs-act.eval.ts",
|
||||
"evals/model_steering.eval.ts",
|
||||
"evals/redundant_casts.eval.ts"
|
||||
"evals/redundant_casts.eval.ts",
|
||||
"evals/failing_steering.eval.ts"
|
||||
],
|
||||
"evals": [
|
||||
"ALL_ALWAYS_PASSING",
|
||||
"evals/answer-vs-act.eval.ts",
|
||||
"evals/model_steering.eval.ts",
|
||||
"evals/redundant_casts.eval.ts"
|
||||
"evals/redundant_casts.eval.ts",
|
||||
"evals/failing_steering.eval.ts"
|
||||
]
|
||||
},
|
||||
"edit_fidelity": {
|
||||
|
||||
Submodule
+1
Submodule nulllinter added at 7ef35ce7b2
@@ -4,6 +4,8 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
// Related Eval Demo: Modification in this file should trigger core_steering.
|
||||
|
||||
import {
|
||||
ACTIVATE_SKILL_TOOL_NAME,
|
||||
ASK_USER_TOOL_NAME,
|
||||
|
||||
Reference in New Issue
Block a user